diff options
author | Matt Kangas <matt.kangas@mongodb.com> | 2014-12-17 10:30:34 -0500 |
---|---|---|
committer | Matt Kangas <matt.kangas@mongodb.com> | 2014-12-17 10:30:34 -0500 |
commit | 09a1b0ca18b5340f5383d541abe96beb186a515a (patch) | |
tree | 0f3ee4393728b4e7d79b2aa23fe87f9a4e3593a1 /src | |
parent | 3077abf30145174c4cab2779de7a621c2dd42062 (diff) | |
download | mongo-09a1b0ca18b5340f5383d541abe96beb186a515a.tar.gz |
Import wiredtiger-wiredtiger-2.8-rc2-253-g379ffab.tar.gz from wiredtiger branch mongodb-2.8
Diffstat (limited to 'src')
76 files changed, 931 insertions, 784 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 42ec64ff344..8737c497140 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -1,36 +1,5 @@ # This file is a python script that describes the WiredTiger API. -class Error: - def __init__(self, name, desc, long_desc=None, **flags): - self.name = name - self.desc = desc - self.long_desc = long_desc - self.flags = flags - -errors = [ - Error('WT_DUPLICATE_KEY', 'attempt to insert an existing key', ''' - This error is generated when the application attempts to insert - a record with the same key as an existing record without the - 'overwrite' configuration to WT_SESSION::open_cursor.'''), - Error('WT_ERROR', 'non-specific WiredTiger error', ''' - This error is returned when an error is not covered by a - specific error return.'''), - Error('WT_NOTFOUND', 'item not found', ''' - This error indicates an operation did not find a value to - return. This includes cursor search and other operations - where no record matched the cursor's search key such as - WT_CURSOR::update or WT_CURSOR::remove.'''), - Error('WT_PANIC', 'WiredTiger library panic', ''' - This error indicates an underlying problem that requires the - application exit and restart.'''), - Error('WT_RESTART', 'restart the operation (internal)', undoc=True), - Error('WT_ROLLBACK', 'conflict between concurrent operations', ''' - This error is generated when an operation cannot be completed - due to a conflict with concurrent operations. The operation - may be retried; if a transaction is in progress, it should be - rolled back and the operation retried in a new transaction.'''), -] - class Method: def __init__(self, config, **flags): self.config = config @@ -207,17 +176,26 @@ file_config = format_meta + [ block compression is done''', min='512B', max='512MB'), Config('internal_item_max', '0', r''' - the largest key stored within an internal node, in bytes. If - non-zero, any key larger than the specified size will be - stored as an overflow item (which may require additional I/O - to access). If zero, a default size is chosen that permits at - least 8 keys per internal page''', - min=0), + historic term for internal_key_max''', + min=0, undoc=True), + Config('internal_key_max', '0', r''' + the largest key stored in an internal node, in bytes. If set, keys + larger than the specified size are stored as overflow items (which + may require additional I/O to access). The default and the maximum + allowed value are both one-tenth the size of a newly split internal + page''', + min='0'), Config('key_gap', '10', r''' the maximum gap between instantiated keys in a Btree leaf page, constraining the number of keys processed to instantiate a random Btree leaf page key''', min='0', undoc=True), + Config('leaf_key_max', '0', r''' + the largest key stored in a leaf node, in bytes. If set, keys + larger than the specified size are stored as overflow items (which + may require additional I/O to access). The default value is + one-tenth the size of a newly split leaf page''', + min='0'), Config('leaf_page_max', '32KB', r''' the maximum page size for leaf nodes, in bytes; the size must be a multiple of the allocation size, and is significant for @@ -226,13 +204,17 @@ file_config = format_meta + [ data, that is, the limit is applied before any block compression is done''', min='512B', max='512MB'), + Config('leaf_value_max', '0', r''' + the largest value stored in a leaf node, in bytes. If set, values + larger than the specified size are stored as overflow items (which + may require additional I/O to access). If the size is larger than + the maximum leaf page size, the page size is temporarily ignored + when large values are written. The default is one-half the size of + a newly split leaf page''', + min='0'), Config('leaf_item_max', '0', r''' - the largest key or value stored within a leaf node, in bytes. - If non-zero, any key or value larger than the specified size - will be stored as an overflow item (which may require additional - I/O to access). If zero, a default size is chosen that permits - at least 4 key and value pairs per leaf page''', - min=0), + historic term for leaf_key_max and leaf_value_max''', + min=0, undoc=True), Config('memory_page_max', '5MB', r''' the maximum size a page can grow to in memory before being reconciled to disk. The specified size will be adjusted to a lower @@ -366,7 +348,8 @@ connection_runtime_config = [ Config('worker_thread_max', '4', r''' Configure a set of threads to manage merging LSM trees in the database.''', - min='3', max='20'), # !!! Must match WT_LSM_MAX_WORKERS + min='3', # !!! Must match WT_LSM_MIN_WORKERS + max='20'), # !!! Must match WT_LSM_MAX_WORKERS Config('merge', 'true', r''' merge LSM chunks where possible''', type='boolean') @@ -399,8 +382,9 @@ connection_runtime_config = [ amount of cache this database is guaranteed to have available from the shared cache. This setting is per database. Defaults to the chunk size''', type='int'), - Config('name', '', r''' - name of a cache that is shared between databases'''), + Config('name', 'none', r''' + the name of a cache that is shared between databases or + \c "none" when no shared cache is configured'''), Config('size', '500MB', r''' maximum memory to allocate for the shared cache. Setting this will update the value if one is already set''', @@ -527,7 +511,7 @@ common_wiredtiger_open = [ type='boolean'), Config('compressor', '', r''' configure a compressor for log records. Permitted values are - empty (off) or \c "bzip2", \c "snappy" or custom compression + \c "none" or \c "bzip2", \c "snappy" or custom compression engine \c "name" created with WT_CONNECTION::add_compressor. See @ref compression for more information'''), Config('enabled', 'false', r''' @@ -536,7 +520,7 @@ common_wiredtiger_open = [ Config('file_max', '100MB', r''' the maximum size of log files''', min='100KB', max='2GB'), - Config('path', '""', r''' + Config('path', '', r''' the path to a directory into which the log files are written. If the value is not an absolute path name, the files are created relative to the database home'''), diff --git a/src/third_party/wiredtiger/dist/api_err.py b/src/third_party/wiredtiger/dist/api_err.py index 0c61a41ff28..cb2c8cc588e 100644 --- a/src/third_party/wiredtiger/dist/api_err.py +++ b/src/third_party/wiredtiger/dist/api_err.py @@ -42,7 +42,9 @@ errors = [ Error('WT_PANIC', -31804, 'WiredTiger library panic', ''' This error indicates an underlying problem that requires the - application exit and restart.'''), + application exit and restart. The application can exit + immediately when \c WT_PANIC is returned from a WiredTiger + interface, no further WiredTiger calls are required.'''), Error('WT_RESTART', -31805, 'restart the operation (internal)', undoc=True), ] diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 0bd9cfc6a8c..aa5d65bcc8b 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -394,6 +394,7 @@ agc alfred alloc allocator +allocfile allocsize amd ao diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index bd628e7418a..23243227892 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -186,7 +186,9 @@ connection_stats = [ # Dhandle statistics ########################################## DhandleStat('dh_conn_handles', 'connection dhandles swept'), + DhandleStat('dh_conn_ref', 'connection candidate referenced'), DhandleStat('dh_conn_sweeps', 'connection sweeps'), + DhandleStat('dh_conn_tod', 'connection time-of-death sets'), DhandleStat('dh_session_handles', 'session dhandles swept'), DhandleStat('dh_session_sweeps', 'session sweep attempts'), @@ -348,14 +350,16 @@ dsrc_stats = [ BtreeStat('btree_fixed_len', 'fixed-record size', 'no_aggregate,no_scale'), BtreeStat('btree_maximum_depth', 'maximum tree depth', 'max_aggregate,no_scale'), - BtreeStat('btree_maxintlitem', - 'maximum internal page item size', 'no_aggregate,no_scale'), + BtreeStat('btree_maxintlkey', + 'maximum internal page key size', 'no_aggregate,no_scale'), BtreeStat('btree_maxintlpage', 'maximum internal page size', 'no_aggregate,no_scale'), - BtreeStat('btree_maxleafitem', - 'maximum leaf page item size', 'no_aggregate,no_scale'), + BtreeStat('btree_maxleafkey', + 'maximum leaf page key size', 'no_aggregate,no_scale'), BtreeStat('btree_maxleafpage', 'maximum leaf page size', 'no_aggregate,no_scale'), + BtreeStat('btree_maxleafvalue', + 'maximum leaf page value size', 'no_aggregate,no_scale'), BtreeStat('btree_overflow', 'overflow pages', 'no_scale'), BtreeStat('btree_row_internal', 'row-store internal pages', 'no_scale'), BtreeStat('btree_row_leaf', 'row-store leaf pages', 'no_scale'), diff --git a/src/third_party/wiredtiger/src/async/async_api.c b/src/third_party/wiredtiger/src/async/async_api.c index 3cb78e80b09..6aeb404bccd 100644 --- a/src/third_party/wiredtiger/src/async/async_api.c +++ b/src/third_party/wiredtiger/src/async/async_api.c @@ -54,7 +54,7 @@ __async_get_format(WT_CONNECTION_IMPL *conn, const char *uri, WT_RET( __wt_open_internal_session(conn, "async-cursor", 1, 1, &session)); __wt_spin_lock(session, &async->ops_lock); - WT_ERR(__wt_calloc_def(session, 1, &af)); + WT_ERR(__wt_calloc_one(session, &af)); WT_ERR(__wt_strdup(session, uri, &af->uri)); WT_ERR(__wt_strdup(session, config, &af->config)); af->uri_hash = uri_hash; @@ -232,7 +232,7 @@ __async_start(WT_SESSION_IMPL *session) /* * Async is on, allocate the WT_ASYNC structure and initialize the ops. */ - WT_RET(__wt_calloc(session, 1, sizeof(WT_ASYNC), &conn->async)); + WT_RET(__wt_calloc_one(session, &conn->async)); async = conn->async; STAILQ_INIT(&async->formatqh); WT_RET(__wt_spin_init(session, &async->ops_lock, "ops")); diff --git a/src/third_party/wiredtiger/src/async/async_worker.c b/src/third_party/wiredtiger/src/async/async_worker.c index 7a88ac9dd6e..ecf052fc3bf 100644 --- a/src/third_party/wiredtiger/src/async/async_worker.c +++ b/src/third_party/wiredtiger/src/async/async_worker.c @@ -150,7 +150,7 @@ __async_worker_cursor(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op, * We didn't find one in our cache. Open one and cache it. * Insert it at the head expecting LRU usage. */ - WT_RET(__wt_calloc_def(session, 1, &ac)); + WT_RET(__wt_calloc_one(session, &ac)); WT_ERR(wt_session->open_cursor( wt_session, op->format->uri, NULL, op->format->config, &c)); ac->cfg_hash = op->format->cfg_hash; diff --git a/src/third_party/wiredtiger/src/block/block_mgr.c b/src/third_party/wiredtiger/src/block/block_mgr.c index 4f7f2898de5..a9b3b07904d 100644 --- a/src/third_party/wiredtiger/src/block/block_mgr.c +++ b/src/third_party/wiredtiger/src/block/block_mgr.c @@ -419,7 +419,7 @@ __wt_block_manager_open(WT_SESSION_IMPL *session, *bmp = NULL; - WT_RET(__wt_calloc_def(session, 1, &bm)); + WT_RET(__wt_calloc_one(session, &bm)); __bm_method_set(bm, 0); WT_ERR(__wt_block_open(session, filename, cfg, diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c index 7b68c59c766..0abe9cffc5f 100644 --- a/src/third_party/wiredtiger/src/block/block_open.c +++ b/src/third_party/wiredtiger/src/block/block_open.c @@ -128,7 +128,7 @@ __wt_block_open(WT_SESSION_IMPL *session, } /* Basic structure allocation, initialization. */ - WT_ERR(__wt_calloc_def(session, 1, &block)); + WT_ERR(__wt_calloc_one(session, &block)); block->ref = 1; TAILQ_INSERT_HEAD(&conn->blockqh, block, q); diff --git a/src/third_party/wiredtiger/src/block/block_session.c b/src/third_party/wiredtiger/src/block/block_session.c index fa56b72f49b..90fe0af562a 100644 --- a/src/third_party/wiredtiger/src/block/block_session.c +++ b/src/third_party/wiredtiger/src/block/block_session.c @@ -152,7 +152,7 @@ __block_ext_discard(WT_SESSION_IMPL *session, u_int max) static int __block_size_alloc(WT_SESSION_IMPL *session, WT_SIZE **szp) { - return (__wt_calloc(session, 1, sizeof(WT_SIZE), szp)); + return (__wt_calloc_one(session, szp)); } /* diff --git a/src/third_party/wiredtiger/src/bloom/bloom.c b/src/third_party/wiredtiger/src/bloom/bloom.c index b8fecfe0efd..5f7a8f47c21 100644 --- a/src/third_party/wiredtiger/src/bloom/bloom.c +++ b/src/third_party/wiredtiger/src/bloom/bloom.c @@ -28,7 +28,7 @@ __bloom_init(WT_SESSION_IMPL *session, *bloomp = NULL; - WT_RET(__wt_calloc_def(session, 1, &bloom)); + WT_RET(__wt_calloc_one(session, &bloom)); WT_ERR(__wt_strdup(session, uri, &bloom->uri)); len = strlen(WT_BLOOM_TABLE_CONFIG) + 2; diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c index f189760c7dd..390c6819ca2 100644 --- a/src/third_party/wiredtiger/src/btree/bt_cursor.c +++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c @@ -505,6 +505,34 @@ err: if (ret == WT_RESTART) } /* + * __curfile_update_check -- + * Check whether an update would conflict. + * + * This function expects the cursor to already be positioned. It should + * be called before deciding whether to skip an update operation based on + * existence of a visible update for a key -- even if there is no value + * visible to the transaction, an update could still conflict. + */ +static int +__curfile_update_check(WT_CURSOR_BTREE *cbt) +{ + WT_BTREE *btree; + WT_SESSION_IMPL *session; + + btree = cbt->btree; + session = (WT_SESSION_IMPL *)cbt->iface.session; + + if (cbt->compare != 0) + return (0); + if (cbt->ins != NULL) + return (__wt_txn_update_check(session, cbt->ins->upd)); + if (btree->type == BTREE_ROW && cbt->ref->page->pg_row_upd != NULL) + return (__wt_txn_update_check( + session, cbt->ref->page->pg_row_upd[cbt->slot])); + return (0); +} + +/* * __wt_btcur_update_check -- * Check whether an update would conflict. * @@ -532,10 +560,9 @@ retry: WT_RET(__cursor_func_init(cbt, 1)); WT_ERR(__cursor_row_search(session, cbt, 1)); /* - * We are only interested in checking for conflicts. + * Just check for conflicts. */ - if (cbt->compare == 0 && cbt->ins != NULL) - ret = __wt_txn_update_check(session, cbt->ins->upd); + ret = __curfile_update_check(cbt); break; case BTREE_COL_FIX: case BTREE_COL_VAR: @@ -580,6 +607,13 @@ retry: WT_RET(__cursor_func_init(cbt, 1)); case BTREE_COL_VAR: WT_ERR(__cursor_col_search(session, cbt)); + /* + * If we find a matching record, check whether an update would + * conflict. Do this before checking if the update is visible + * in __cursor_valid, or we can miss conflict. + */ + WT_ERR(__curfile_update_check(cbt)); + /* Remove the record if it exists. */ if (cbt->compare != 0 || !__cursor_valid(cbt, NULL)) { if (!__cursor_fix_implicit(btree, cbt)) @@ -601,6 +635,10 @@ retry: WT_RET(__cursor_func_init(cbt, 1)); case BTREE_ROW: /* Remove the record if it exists. */ WT_ERR(__cursor_row_search(session, cbt, 0)); + + /* Check whether an update would conflict. */ + WT_ERR(__curfile_update_check(cbt)); + if (cbt->compare != 0 || !__cursor_valid(cbt, NULL)) WT_ERR(WT_NOTFOUND); @@ -666,26 +704,32 @@ retry: WT_RET(__cursor_func_init(cbt, 1)); WT_ERR(__cursor_col_search(session, cbt)); /* - * If not overwriting, fail if the key doesn't exist. Update - * the record if it exists. Creating a record past the end of - * the tree in a fixed-length column-store implicitly fills the - * gap with empty records. Update the record in that case, the + * If not overwriting, fail if the key doesn't exist. If we + * find an update for the key, check for conflicts. Update the + * record if it exists. Creating a record past the end of the + * tree in a fixed-length column-store implicitly fills the gap + * with empty records. Update the record in that case, the * record exists. */ - if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && - (cbt->compare != 0 || !__cursor_valid(cbt, NULL)) && - !__cursor_fix_implicit(btree, cbt)) - WT_ERR(WT_NOTFOUND); + if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { + WT_ERR(__curfile_update_check(cbt)); + if ((cbt->compare != 0 || !__cursor_valid(cbt, NULL)) && + !__cursor_fix_implicit(btree, cbt)) + WT_ERR(WT_NOTFOUND); + } ret = __cursor_col_modify(session, cbt, 0); break; case BTREE_ROW: WT_ERR(__cursor_row_search(session, cbt, 1)); /* - * If not overwriting, fail if the key does not exist. + * If not overwriting, check for conflicts and fail if the key + * does not exist. */ - if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && - (cbt->compare != 0 || !__cursor_valid(cbt, NULL))) - WT_ERR(WT_NOTFOUND); + if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { + WT_ERR(__curfile_update_check(cbt)); + if (cbt->compare != 0 || !__cursor_valid(cbt, NULL)) + WT_ERR(WT_NOTFOUND); + } ret = __cursor_row_modify(session, cbt, 0); break; WT_ILLEGAL_VALUE_ERR(session); diff --git a/src/third_party/wiredtiger/src/btree/bt_delete.c b/src/third_party/wiredtiger/src/btree/bt_delete.c index 2fc1b0d5460..a58ed5d66e9 100644 --- a/src/third_party/wiredtiger/src/btree/bt_delete.c +++ b/src/third_party/wiredtiger/src/btree/bt_delete.c @@ -117,7 +117,7 @@ __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, int *skipp) * Record the change in the transaction structure and set the change's * transaction ID. */ - WT_ERR(__wt_calloc_def(session, 1, &ref->page_del)); + WT_ERR(__wt_calloc_one(session, &ref->page_del)); ref->page_del->txnid = session->txn.id; WT_ERR(__wt_txn_modify_ref(session, ref)); @@ -306,7 +306,7 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) * deleted items. */ for (i = 0; i < page->pg_row_entries; ++i) { - WT_ERR(__wt_calloc_def(session, 1, &upd)); + WT_ERR(__wt_calloc_one(session, &upd)); WT_UPDATE_DELETED_SET(upd); if (page_del == NULL) diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index fe2623b055b..6d69bd8fc74 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -13,9 +13,6 @@ static int __btree_page_sizes(WT_SESSION_IMPL *); static int __btree_preload(WT_SESSION_IMPL *); static int __btree_tree_open_empty(WT_SESSION_IMPL *, int, int); -static int pse1(WT_SESSION_IMPL *, const char *, uint32_t, uint32_t); -static int pse2(WT_SESSION_IMPL *, const char *, uint32_t, uint32_t, int); - /* * __wt_btree_open -- * Open a Btree. @@ -307,7 +304,7 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) break; } - WT_RET(__wt_config_gets(session, cfg, "block_compressor", &cval)); + WT_RET(__wt_config_gets_none(session, cfg, "block_compressor", &cval)); if (cval.len > 0) { TAILQ_FOREACH(ncomp, &conn->compqh, q) if (WT_STRING_MATCH(ncomp->name, cval.str, cval.len)) { @@ -623,153 +620,98 @@ __btree_page_sizes(WT_SESSION_IMPL *session) btree = S2BT(session); cfg = btree->dhandle->cfg; + /* + * Get the allocation size. Allocation sizes must be a power-of-two, + * nothing else makes sense. + */ WT_RET(__wt_direct_io_size_check( session, cfg, "allocation_size", &btree->allocsize)); + if (!__wt_ispo2(btree->allocsize)) + WT_RET_MSG(session, + EINVAL, "the allocation size must be a power of two"); + + /* + * Get the internal/leaf page sizes. + * All page sizes must be in units of the allocation size. + */ WT_RET(__wt_direct_io_size_check( session, cfg, "internal_page_max", &btree->maxintlpage)); - WT_RET(__wt_config_gets(session, cfg, "internal_item_max", &cval)); - btree->maxintlitem = (uint32_t)cval.val; WT_RET(__wt_direct_io_size_check( session, cfg, "leaf_page_max", &btree->maxleafpage)); - WT_RET(__wt_config_gets(session, cfg, "leaf_item_max", &cval)); - btree->maxleafitem = (uint32_t)cval.val; - - WT_RET(__wt_config_gets(session, cfg, "split_pct", &cval)); - btree->split_pct = (int)cval.val; + if (btree->maxintlpage < btree->allocsize || + btree->maxintlpage % btree->allocsize != 0 || + btree->maxleafpage < btree->allocsize || + btree->maxleafpage % btree->allocsize != 0) + WT_RET_MSG(session, EINVAL, + "page sizes must be a multiple of the page allocation " + "size (%" PRIu32 "B)", btree->allocsize); /* * When a page is forced to split, we want at least 50 entries on its * parent. - */ - WT_RET(__wt_config_gets(session, cfg, "memory_page_max", &cval)); - btree->maxmempage = WT_MAX((uint64_t)cval.val, 50 * btree->maxleafpage); - - /* + * * Don't let pages grow to more than half the cache size. Otherwise, * with very small caches, we can end up in a situation where nothing * can be evicted. Take care getting the cache size: with a shared * cache, it may not have been set. */ + WT_RET(__wt_config_gets(session, cfg, "memory_page_max", &cval)); + btree->maxmempage = WT_MAX((uint64_t)cval.val, 50 * btree->maxleafpage); cache_size = S2C(session)->cache_size; if (cache_size > 0) btree->maxmempage = WT_MIN(btree->maxmempage, cache_size / 2); - /* Allocation sizes must be a power-of-two, nothing else makes sense. */ - if (!__wt_ispo2(btree->allocsize)) - WT_RET_MSG(session, - EINVAL, "the allocation size must be a power of two"); - - /* All page sizes must be in units of the allocation size. */ - if (btree->maxintlpage < btree->allocsize || - btree->maxintlpage % btree->allocsize != 0 || - btree->maxleafpage < btree->allocsize || - btree->maxleafpage % btree->allocsize != 0) - WT_RET_MSG(session, EINVAL, - "page sizes must be a multiple of the page allocation " - "size (%" PRIu32 "B)", btree->allocsize); - /* - * Set the split percentage: reconciliation splits to a smaller-than- - * maximum page size so we don't split every time a new entry is added. + * Get the split percentage (reconciliation splits pages into smaller + * than the maximum page size chunks so we don't split every time a + * new entry is added). Determine how large newly split pages will be. */ + WT_RET(__wt_config_gets(session, cfg, "split_pct", &cval)); + btree->split_pct = (int)cval.val; intl_split_size = __wt_split_page_size(btree, btree->maxintlpage); leaf_split_size = __wt_split_page_size(btree, btree->maxleafpage); /* - * Default values for internal and leaf page items: make sure at least - * 8 items fit on split pages. - */ - if (btree->maxintlitem == 0) - btree->maxintlitem = intl_split_size / 8; - if (btree->maxleafitem == 0) - btree->maxleafitem = leaf_split_size / 8; - - /* - * If raw compression is configured, the application owns page layout, - * it's not our problem. Hopefully the application chose well. + * Get the maximum internal/leaf page key/value sizes. + * + * In historic versions of WiredTiger, the maximum internal/leaf page + * key/value sizes were set by the internal_item_max and leaf_item_max + * configuration strings. Look for those strings if we don't find the + * newer ones. */ - if (btree->compressor != NULL && - btree->compressor->compress_raw != NULL) - return (0); - - /* Check we can fit at least 2 items on a page. */ - if (btree->maxintlitem > btree->maxintlpage / 2) - return (pse1(session, "internal", - btree->maxintlpage, btree->maxintlitem)); - if (btree->maxleafitem > btree->maxleafpage / 2) - return (pse1(session, "leaf", - btree->maxleafpage, btree->maxleafitem)); + WT_RET(__wt_config_gets(session, cfg, "internal_key_max", &cval)); + btree->maxintlkey = (uint32_t)cval.val; + if (btree->maxintlkey == 0) { + WT_RET( + __wt_config_gets(session, cfg, "internal_item_max", &cval)); + btree->maxintlkey = (uint32_t)cval.val; + } + WT_RET(__wt_config_gets(session, cfg, "leaf_key_max", &cval)); + btree->maxleafkey = (uint32_t)cval.val; + WT_RET(__wt_config_gets(session, cfg, "leaf_value_max", &cval)); + btree->maxleafvalue = (uint32_t)cval.val; + if (btree->maxleafkey == 0 && btree->maxleafvalue == 0) { + WT_RET(__wt_config_gets(session, cfg, "leaf_item_max", &cval)); + btree->maxleafkey = (uint32_t)cval.val; + btree->maxleafvalue = (uint32_t)cval.val; + } /* - * Take into account the size of a split page: + * Default/maximum for internal and leaf page keys: split-page / 10. + * Default for leaf page values: split-page / 2. * - * Make it a separate error message so it's clear what went wrong. + * It's difficult for applications to configure this in any exact way as + * they have to duplicate our calculation of how many keys must fit on a + * page, and given a split-percentage and page header, that isn't easy + * to do. If the maximum internal key value is too large for the page, + * reset it to the default. */ - if (btree->maxintlitem > intl_split_size / 2) - return (pse2(session, "internal", - btree->maxintlpage, btree->maxintlitem, btree->split_pct)); - if (btree->maxleafitem > leaf_split_size / 2) - return (pse2(session, "leaf", - btree->maxleafpage, btree->maxleafitem, btree->split_pct)); + if (btree->maxintlkey == 0 || btree->maxintlkey > intl_split_size / 10) + btree->maxintlkey = intl_split_size / 10; + if (btree->maxleafkey == 0) + btree->maxleafkey = leaf_split_size / 10; + if (btree->maxleafvalue == 0) + btree->maxleafvalue = leaf_split_size / 2; return (0); } - -/* - * __wt_split_page_size -- - * Split page size calculation: we don't want to repeatedly split every - * time a new entry is added, so we split to a smaller-than-maximum page size. - */ -uint32_t -__wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) -{ - uintmax_t a; - uint32_t split_size; - - /* - * Ideally, the split page size is some percentage of the maximum page - * size rounded to an allocation unit (round to an allocation unit so - * we don't waste space when we write). - */ - a = maxpagesize; /* Don't overflow. */ - split_size = (uint32_t) - WT_ALIGN((a * (u_int)btree->split_pct) / 100, btree->allocsize); - - /* - * If the result of that calculation is the same as the allocation unit - * (that happens if the maximum size is the same size as an allocation - * unit, use a percentage of the maximum page size). - */ - if (split_size == btree->allocsize) - split_size = (uint32_t)((a * (u_int)btree->split_pct) / 100); - - return (split_size); -} - -/* - * pse1 -- - * Page size error message 1. - */ -static int -pse1(WT_SESSION_IMPL *session, const char *type, uint32_t max, uint32_t ovfl) -{ - WT_RET_MSG(session, EINVAL, - "%s page size (%" PRIu32 "B) too small for the maximum item size " - "(%" PRIu32 "B); the page must be able to hold at least 2 items", - type, max, ovfl); -} - -/* - * pse2 -- - * Page size error message 2. - */ -static int -pse2(WT_SESSION_IMPL *session, - const char *type, uint32_t max, uint32_t ovfl, int pct) -{ - WT_RET_MSG(session, EINVAL, - "%s page size (%" PRIu32 "B) too small for the maximum item size " - "(%" PRIu32 "B), because of the split percentage (%d %%); a split " - "page must be able to hold at least 2 items", - type, max, ovfl, pct); -} diff --git a/src/third_party/wiredtiger/src/btree/bt_huffman.c b/src/third_party/wiredtiger/src/btree/bt_huffman.c index aa6e7c36451..80da16f2a62 100644 --- a/src/third_party/wiredtiger/src/btree/bt_huffman.c +++ b/src/third_party/wiredtiger/src/btree/bt_huffman.c @@ -144,8 +144,9 @@ __wt_btree_huffman_open(WT_SESSION_IMPL *session) btree = S2BT(session); cfg = btree->dhandle->cfg; - WT_RET(__wt_config_gets(session, cfg, "huffman_key", &key_conf)); - WT_RET(__wt_config_gets(session, cfg, "huffman_value", &value_conf)); + WT_RET(__wt_config_gets_none(session, cfg, "huffman_key", &key_conf)); + WT_RET( + __wt_config_gets_none(session, cfg, "huffman_value", &value_conf)); if (key_conf.len == 0 && value_conf.len == 0) return (0); @@ -153,6 +154,7 @@ __wt_btree_huffman_open(WT_SESSION_IMPL *session) case BTREE_COL_FIX: WT_RET_MSG(session, EINVAL, "fixed-size column-store files may not be Huffman encoded"); + /* NOTREACHED */ case BTREE_COL_VAR: if (key_conf.len != 0) WT_RET_MSG(session, EINVAL, @@ -163,18 +165,20 @@ __wt_btree_huffman_open(WT_SESSION_IMPL *session) break; } - if (strncasecmp(key_conf.str, "english", key_conf.len) == 0) { + if (key_conf.len == 0) { + ; + } else if (strncasecmp(key_conf.str, "english", key_conf.len) == 0) { struct __wt_huffman_table copy[WT_ELEMENTS(__wt_huffman_nytenglish)]; memcpy(copy, __wt_huffman_nytenglish, sizeof(__wt_huffman_nytenglish)); - WT_RET(__wt_huffman_open(session, copy, - WT_ELEMENTS(__wt_huffman_nytenglish), + WT_RET(__wt_huffman_open( + session, copy, WT_ELEMENTS(__wt_huffman_nytenglish), 1, &btree->huffman_key)); /* Check for a shared key/value table. */ - if (strncasecmp( + if (value_conf.len != 0 && strncasecmp( value_conf.str, "english", value_conf.len) == 0) { btree->huffman_value = btree->huffman_key; return (0); @@ -182,8 +186,8 @@ __wt_btree_huffman_open(WT_SESSION_IMPL *session) } else { WT_RET(__wt_huffman_read( session, &key_conf, &table, &entries, &numbytes)); - ret = __wt_huffman_open(session, table, - entries, numbytes, &btree->huffman_key); + ret = __wt_huffman_open( + session, table, entries, numbytes, &btree->huffman_key); __wt_free(session, table); if (ret != 0) return (ret); @@ -195,20 +199,24 @@ __wt_btree_huffman_open(WT_SESSION_IMPL *session) return (0); } } - if (strncasecmp(value_conf.str, "english", value_conf.len) == 0) { + + if (value_conf.len == 0) { + ; + } else if ( + strncasecmp(value_conf.str, "english", value_conf.len) == 0) { struct __wt_huffman_table copy[WT_ELEMENTS(__wt_huffman_nytenglish)]; memcpy(copy, __wt_huffman_nytenglish, sizeof(__wt_huffman_nytenglish)); - WT_RET(__wt_huffman_open(session, copy, - WT_ELEMENTS(__wt_huffman_nytenglish), + WT_RET(__wt_huffman_open( + session, copy, WT_ELEMENTS(__wt_huffman_nytenglish), 1, &btree->huffman_value)); } else { WT_RET(__wt_huffman_read( session, &value_conf, &table, &entries, &numbytes)); - ret = __wt_huffman_open(session, table, - entries, numbytes, &btree->huffman_value); + ret = __wt_huffman_open( + session, table, entries, numbytes, &btree->huffman_value); __wt_free(session, table); if (ret != 0) return (ret); diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c index b2767e74bac..799f0cca3ee 100644 --- a/src/third_party/wiredtiger/src/btree/bt_page.c +++ b/src/third_party/wiredtiger/src/btree/bt_page.c @@ -227,8 +227,8 @@ __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, WT_INTL_INDEX_SET(page, pindex); if (alloc_refs) for (i = 0; i < pindex->entries; ++i) { - WT_ERR(__wt_calloc_def( - session, 1, &pindex->index[i])); + WT_ERR(__wt_calloc_one( + session, &pindex->index[i])); size += sizeof(WT_REF); } if (0) { diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c index 6e70c9ea2b6..9d98d0db739 100644 --- a/src/third_party/wiredtiger/src/btree/bt_slvg.c +++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c @@ -235,10 +235,8 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) * Add unreferenced overflow page blocks to the free list so they are * reused immediately. */ - if (ss->ovfl_next != 0) { - WT_ERR(__slvg_ovfl_reconcile(session, ss)); - WT_ERR(__slvg_ovfl_discard(session, ss)); - } + WT_ERR(__slvg_ovfl_reconcile(session, ss)); + WT_ERR(__slvg_ovfl_discard(session, ss)); /* * Step 5: @@ -491,8 +489,8 @@ __slvg_trk_init(WT_SESSION_IMPL *session, WT_DECL_RET; WT_TRACK *trk; - WT_RET(__wt_calloc_def(session, 1, &trk)); - WT_ERR(__wt_calloc_def(session, 1, &trk->shared)); + WT_RET(__wt_calloc_one(session, &trk)); + WT_ERR(__wt_calloc_one(session, &trk->shared)); trk->shared->ref = 1; trk->ss = ss; @@ -519,7 +517,7 @@ __slvg_trk_split(WT_SESSION_IMPL *session, WT_TRACK *orig, WT_TRACK **newp) { WT_TRACK *trk; - WT_RET(__wt_calloc_def(session, 1, &trk)); + WT_RET(__wt_calloc_one(session, &trk)); trk->shared = orig->shared; trk->ss = orig->ss; @@ -1181,7 +1179,7 @@ __slvg_col_build_internal( ref->home = page; ref->page = NULL; - WT_ERR(__wt_calloc(session, 1, sizeof(WT_ADDR), &addr)); + WT_ERR(__wt_calloc_one(session, &addr)); WT_ERR(__wt_strndup( session, trk->trk_addr, trk->trk_addr_size, &addr->addr)); addr->size = trk->trk_addr_size; @@ -1826,7 +1824,7 @@ __slvg_row_build_internal( ref->home = page; ref->page = NULL; - WT_ERR(__wt_calloc(session, 1, sizeof(WT_ADDR), &addr)); + WT_ERR(__wt_calloc_one(session, &addr)); WT_ERR(__wt_strndup( session, trk->trk_addr, trk->trk_addr_size, &addr->addr)); addr->size = trk->trk_addr_size; diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index e25f0b73e01..c6b97733b69 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -310,7 +310,7 @@ __split_ref_instantiate(WT_SESSION_IMPL *session, sizeof(WT_ADDR) + addr->size); else { __wt_cell_unpack((WT_CELL *)ref->addr, &unpack); - WT_RET(__wt_calloc_def(session, 1, &addr)); + WT_RET(__wt_calloc_one(session, &addr)); if ((ret = __wt_strndup( session, unpack.data, unpack.size, &addr->addr)) != 0) { __wt_free(session, addr); @@ -444,7 +444,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent) pindex->index[pindex->entries - 1]; for (alloc_refp = alloc_index->index + SPLIT_CORRECT_1, i = 0; i < children; ++alloc_refp, ++i) { - WT_ERR(__wt_calloc_def(session, 1, alloc_refp)); + WT_ERR(__wt_calloc_one(session, alloc_refp)); WT_MEMSIZE_ADD(parent_incr, sizeof(WT_REF)); } @@ -747,7 +747,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, /* In some cases, the underlying WT_REF has not yet been allocated. */ if (*refp == NULL) { - WT_RET(__wt_calloc_def(session, 1, refp)); + WT_RET(__wt_calloc_one(session, refp)); WT_MEMSIZE_ADD(incr, sizeof(WT_REF)); } ref = *refp; @@ -768,7 +768,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, * would have to avoid freeing the memory, and it's not worth * the confusion. */ - WT_RET(__wt_calloc_def(session, 1, &addr)); + WT_RET(__wt_calloc_one(session, &addr)); WT_MEMSIZE_ADD(incr, sizeof(WT_ADDR)); ref->addr = addr; addr->size = multi->addr.size; @@ -1081,7 +1081,7 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp) * * The new reference is visible to readers once the split completes. */ - WT_ERR(__wt_calloc_def(session, 1, &split_ref[0])); + WT_ERR(__wt_calloc_one(session, &split_ref[0])); child = split_ref[0]; *child = *ref; child->state = WT_REF_MEM; @@ -1112,12 +1112,12 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp) * The second page in the split is a new WT_REF/page pair. */ WT_ERR(__wt_page_alloc(session, WT_PAGE_ROW_LEAF, 0, 0, 0, &right)); - WT_ERR(__wt_calloc_def(session, 1, &right->pg_row_ins)); - WT_ERR(__wt_calloc_def(session, 1, &right->pg_row_ins[0])); + WT_ERR(__wt_calloc_one(session, &right->pg_row_ins)); + WT_ERR(__wt_calloc_one(session, &right->pg_row_ins[0])); WT_MEMSIZE_ADD(right_incr, sizeof(WT_INSERT_HEAD)); WT_MEMSIZE_ADD(right_incr, sizeof(WT_INSERT_HEAD *)); - WT_ERR(__wt_calloc_def(session, 1, &split_ref[1])); + WT_ERR(__wt_calloc_one(session, &split_ref[1])); child = split_ref[1]; child->page = right; child->state = WT_REF_MEM; diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c index 3da0bcf346c..c08e9d9218b 100644 --- a/src/third_party/wiredtiger/src/btree/bt_stat.c +++ b/src/third_party/wiredtiger/src/btree/bt_stat.c @@ -32,10 +32,11 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) WT_STAT_SET(stats, btree_fixed_len, btree->bitcnt); WT_STAT_SET(stats, btree_maximum_depth, btree->maximum_depth); - WT_STAT_SET(stats, btree_maxintlitem, btree->maxintlitem); WT_STAT_SET(stats, btree_maxintlpage, btree->maxintlpage); - WT_STAT_SET(stats, btree_maxleafitem, btree->maxleafitem); + WT_STAT_SET(stats, btree_maxintlkey, btree->maxintlkey); WT_STAT_SET(stats, btree_maxleafpage, btree->maxleafpage); + WT_STAT_SET(stats, btree_maxleafkey, btree->maxleafkey); + WT_STAT_SET(stats, btree_maxleafvalue, btree->maxleafvalue); /* Everything else is really, really expensive. */ if (!F_ISSET(cst, WT_CONN_STAT_ALL)) diff --git a/src/third_party/wiredtiger/src/btree/row_modify.c b/src/third_party/wiredtiger/src/btree/row_modify.c index e0036d14cbb..e7fb75dc8cb 100644 --- a/src/third_party/wiredtiger/src/btree/row_modify.c +++ b/src/third_party/wiredtiger/src/btree/row_modify.c @@ -19,7 +19,7 @@ __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) conn = S2C(session); - WT_RET(__wt_calloc_def(session, 1, &modify)); + WT_RET(__wt_calloc_one(session, &modify)); /* * Select a spinlock for the page; let the barrier immediately below diff --git a/src/third_party/wiredtiger/src/config/config.c b/src/third_party/wiredtiger/src/config/config.c index 4285b23be23..c33cae594da 100644 --- a/src/third_party/wiredtiger/src/config/config.c +++ b/src/third_party/wiredtiger/src/config/config.c @@ -700,6 +700,21 @@ __wt_config_getones(WT_SESSION_IMPL *session, } /* + * __wt_config_getones_none -- + * Get the value for a given string key from a single config string. + * Treat "none" as empty. + */ +int +__wt_config_getones_none(WT_SESSION_IMPL *session, + const char *config, const char *key, WT_CONFIG_ITEM *value) +{ + WT_RET(__wt_config_getones(session, config, key, value)); + if (WT_STRING_CASE_MATCH("none", value->str, value->len)) + value->len = 0; + return (0); +} + +/* * __wt_config_gets_def -- * Performance hack: skip parsing config strings by hard-coding defaults. * diff --git a/src/third_party/wiredtiger/src/config/config_api.c b/src/third_party/wiredtiger/src/config/config_api.c index 42f4c117b81..0c920af0d0e 100644 --- a/src/third_party/wiredtiger/src/config/config_api.c +++ b/src/third_party/wiredtiger/src/config/config_api.c @@ -84,7 +84,7 @@ wiredtiger_config_parser_open(WT_SESSION *wt_session, *config_parserp = NULL; session = (WT_SESSION_IMPL *)wt_session; - WT_RET(__wt_calloc_def(session, 1, &config_parser)); + WT_RET(__wt_calloc_one(session, &config_parser)); config_parser->iface = stds; config_parser->session = session; diff --git a/src/third_party/wiredtiger/src/config/config_check.c b/src/third_party/wiredtiger/src/config/config_check.c index c6fd6bbd75b..18300da8282 100644 --- a/src/third_party/wiredtiger/src/config/config_check.c +++ b/src/third_party/wiredtiger/src/config/config_check.c @@ -122,7 +122,7 @@ __wt_configure_method(WT_SESSION_IMPL *session, * The new base value is the previous base value, a separator and the * new configuration string. */ - WT_ERR(__wt_calloc_def(session, 1, &entry)); + WT_ERR(__wt_calloc_one(session, &entry)); entry->method = (*epp)->method; WT_ERR(__wt_calloc_def(session, strlen((*epp)->base) + strlen(",") + strlen(config) + 1, &p)); diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index 7751a4ece40..6bd4edd3543 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -131,12 +131,15 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = { { "huffman_value", "string", NULL, NULL }, { "id", "string", NULL, NULL }, { "internal_item_max", "int", "min=0", NULL }, + { "internal_key_max", "int", "min=0", NULL }, { "internal_key_truncate", "boolean", NULL, NULL }, { "internal_page_max", "int", "min=512B,max=512MB", NULL }, { "key_format", "format", NULL, NULL }, { "key_gap", "int", "min=0", NULL }, { "leaf_item_max", "int", "min=0", NULL }, + { "leaf_key_max", "int", "min=0", NULL }, { "leaf_page_max", "int", "min=512B,max=512MB", NULL }, + { "leaf_value_max", "int", "min=0", NULL }, { "memory_page_max", "int", "min=512B,max=10TB", NULL }, { "os_cache_dirty_max", "int", "min=0", NULL }, { "os_cache_max", "int", "min=0", NULL }, @@ -220,12 +223,15 @@ static const WT_CONFIG_CHECK confchk_session_create[] = { { "huffman_value", "string", NULL, NULL }, { "immutable", "boolean", NULL, NULL }, { "internal_item_max", "int", "min=0", NULL }, + { "internal_key_max", "int", "min=0", NULL }, { "internal_key_truncate", "boolean", NULL, NULL }, { "internal_page_max", "int", "min=512B,max=512MB", NULL }, { "key_format", "format", NULL, NULL }, { "key_gap", "int", "min=0", NULL }, { "leaf_item_max", "int", "min=0", NULL }, + { "leaf_key_max", "int", "min=0", NULL }, { "leaf_page_max", "int", "min=512B,max=512MB", NULL }, + { "leaf_value_max", "int", "min=0", NULL }, { "lsm", "category", NULL, confchk_lsm_subconfigs }, { "memory_page_max", "int", "min=512B,max=10TB", NULL }, { "os_cache_dirty_max", "int", "min=0", NULL }, @@ -541,7 +547,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "error_prefix=,eviction=(threads_max=1,threads_min=1)," "eviction_dirty_target=80,eviction_target=80,eviction_trigger=95," "lsm_manager=(merge=,worker_thread_max=4),lsm_merge=," - "shared_cache=(chunk=10MB,name=,reserve=0,size=500MB)," + "shared_cache=(chunk=10MB,name=none,reserve=0,size=500MB)," "statistics=none,statistics_log=(on_close=0," "path=\"WiredTigerStat.%d.%H\",sources=," "timestamp=\"%b %d %H:%M:%S\",wait=0),verbose=", @@ -556,11 +562,12 @@ static const WT_CONFIG_ENTRY config_entries[] = { "block_compressor=,cache_resident=0,checkpoint=,checkpoint_lsn=," "checksum=uncompressed,collator=,columns=,dictionary=0," "format=btree,huffman_key=,huffman_value=,id=,internal_item_max=0" - ",internal_key_truncate=,internal_page_max=4KB,key_format=u," - "key_gap=10,leaf_item_max=0,leaf_page_max=32KB," - "memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0," - "prefix_compression=0,prefix_compression_min=4,split_pct=75," - "value_format=u,version=(major=0,minor=0)", + ",internal_key_max=0,internal_key_truncate=,internal_page_max=4KB" + ",key_format=u,key_gap=10,leaf_item_max=0,leaf_key_max=0," + "leaf_page_max=32KB,leaf_value_max=0,memory_page_max=5MB," + "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=0," + "prefix_compression_min=4,split_pct=75,value_format=u," + "version=(major=0,minor=0)", confchk_file_meta }, { "index.meta", @@ -593,8 +600,9 @@ static const WT_CONFIG_ENTRY config_entries[] = { "block_compressor=,cache_resident=0,checksum=uncompressed," "colgroups=,collator=,columns=,dictionary=0,exclusive=0," "extractor=,format=btree,huffman_key=,huffman_value=,immutable=0," - "internal_item_max=0,internal_key_truncate=,internal_page_max=4KB" - ",key_format=u,key_gap=10,leaf_item_max=0,leaf_page_max=32KB," + "internal_item_max=0,internal_key_max=0,internal_key_truncate=," + "internal_page_max=4KB,key_format=u,key_gap=10,leaf_item_max=0," + "leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=0," "lsm=(auto_throttle=,bloom=,bloom_bit_count=16,bloom_config=," "bloom_hash_count=8,bloom_oldest=0,chunk_max=5GB,chunk_size=10MB," "merge_max=15,merge_min=0),memory_page_max=5MB," @@ -656,9 +664,9 @@ static const WT_CONFIG_ENTRY config_entries[] = { "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80," "eviction_target=80,eviction_trigger=95,exclusive=0,extensions=," "file_extend=,hazard_max=1000,log=(archive=,compressor=,enabled=0" - ",file_max=100MB,path=\"\",prealloc=),lsm_manager=(merge=," + ",file_max=100MB,path=,prealloc=),lsm_manager=(merge=," "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0," - "session_max=100,shared_cache=(chunk=10MB,name=,reserve=0," + "session_max=100,shared_cache=(chunk=10MB,name=none,reserve=0," "size=500MB),statistics=none,statistics_log=(on_close=0," "path=\"WiredTigerStat.%d.%H\",sources=," "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" @@ -673,9 +681,9 @@ static const WT_CONFIG_ENTRY config_entries[] = { "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80," "eviction_target=80,eviction_trigger=95,exclusive=0,extensions=," "file_extend=,hazard_max=1000,log=(archive=,compressor=,enabled=0" - ",file_max=100MB,path=\"\",prealloc=),lsm_manager=(merge=," + ",file_max=100MB,path=,prealloc=),lsm_manager=(merge=," "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0," - "session_max=100,shared_cache=(chunk=10MB,name=,reserve=0," + "session_max=100,shared_cache=(chunk=10MB,name=none,reserve=0," "size=500MB),statistics=none,statistics_log=(on_close=0," "path=\"WiredTigerStat.%d.%H\",sources=," "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" @@ -690,13 +698,14 @@ static const WT_CONFIG_ENTRY config_entries[] = { "direct_io=,error_prefix=,eviction=(threads_max=1,threads_min=1)," "eviction_dirty_target=80,eviction_target=80,eviction_trigger=95," "extensions=,file_extend=,hazard_max=1000,log=(archive=," - "compressor=,enabled=0,file_max=100MB,path=\"\",prealloc=)," + "compressor=,enabled=0,file_max=100MB,path=,prealloc=)," "lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,mmap=," - "multiprocess=0,session_max=100,shared_cache=(chunk=10MB,name=," - "reserve=0,size=500MB),statistics=none,statistics_log=(on_close=0" - ",path=\"WiredTigerStat.%d.%H\",sources=," - "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" - ",method=fsync),verbose=,version=(major=0,minor=0)", + "multiprocess=0,session_max=100,shared_cache=(chunk=10MB," + "name=none,reserve=0,size=500MB),statistics=none," + "statistics_log=(on_close=0,path=\"WiredTigerStat.%d.%H\"," + "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," + "transaction_sync=(enabled=0,method=fsync),verbose=," + "version=(major=0,minor=0)", confchk_wiredtiger_open_basecfg }, { "wiredtiger_open_usercfg", @@ -706,13 +715,13 @@ static const WT_CONFIG_ENTRY config_entries[] = { "direct_io=,error_prefix=,eviction=(threads_max=1,threads_min=1)," "eviction_dirty_target=80,eviction_target=80,eviction_trigger=95," "extensions=,file_extend=,hazard_max=1000,log=(archive=," - "compressor=,enabled=0,file_max=100MB,path=\"\",prealloc=)," + "compressor=,enabled=0,file_max=100MB,path=,prealloc=)," "lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,mmap=," - "multiprocess=0,session_max=100,shared_cache=(chunk=10MB,name=," - "reserve=0,size=500MB),statistics=none,statistics_log=(on_close=0" - ",path=\"WiredTigerStat.%d.%H\",sources=," - "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" - ",method=fsync),verbose=", + "multiprocess=0,session_max=100,shared_cache=(chunk=10MB," + "name=none,reserve=0,size=500MB),statistics=none," + "statistics_log=(on_close=0,path=\"WiredTigerStat.%d.%H\"," + "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," + "transaction_sync=(enabled=0,method=fsync),verbose=", confchk_wiredtiger_open_usercfg }, { NULL, NULL, NULL } diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index 8d104729733..cc88f848861 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -292,7 +292,7 @@ __conn_add_collator(WT_CONNECTION *wt_conn, WT_ERR_MSG(session, EINVAL, "invalid name for a collator: %s", name); - WT_ERR(__wt_calloc_def(session, 1, &ncoll)); + WT_ERR(__wt_calloc_one(session, &ncoll)); WT_ERR(__wt_strdup(session, name, &ncoll->name)); ncoll->collator = collator; @@ -363,7 +363,7 @@ __conn_add_compressor(WT_CONNECTION *wt_conn, WT_ERR_MSG(session, EINVAL, "invalid name for a compressor: %s", name); - WT_ERR(__wt_calloc_def(session, 1, &ncomp)); + WT_ERR(__wt_calloc_one(session, &ncomp)); WT_ERR(__wt_strdup(session, name, &ncomp->name)); ncomp->compressor = compressor; @@ -428,7 +428,7 @@ __conn_add_data_source(WT_CONNECTION *wt_conn, CONNECTION_API_CALL(conn, session, add_data_source, config, cfg); WT_UNUSED(cfg); - WT_ERR(__wt_calloc_def(session, 1, &ndsrc)); + WT_ERR(__wt_calloc_one(session, &ndsrc)); WT_ERR(__wt_strdup(session, prefix, &ndsrc->prefix)); ndsrc->dsrc = dsrc; @@ -497,7 +497,7 @@ __conn_add_extractor(WT_CONNECTION *wt_conn, WT_ERR_MSG(session, EINVAL, "invalid name for an extractor: %s", name); - WT_ERR(__wt_calloc_def(session, 1, &nextractor)); + WT_ERR(__wt_calloc_one(session, &nextractor)); WT_ERR(__wt_strdup(session, name, &nextractor->name)); nextractor->extractor = extractor; @@ -533,8 +533,8 @@ __wt_extractor_config(WT_SESSION_IMPL *session, const char *config, conn = S2C(session); if ((ret = - __wt_config_getones(session, config, "extractor", &cval)) != 0) - return (ret == WT_NOTFOUND ? 0 : ret); + __wt_config_getones_none(session, config, "extractor", &cval)) != 0) + return (ret == WT_NOTFOUND || cval.len == 0 ? 0 : ret); if (cval.len > 0) { TAILQ_FOREACH(nextractor, &conn->extractorqh, q) @@ -1490,7 +1490,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WT_RET(__wt_library_init()); - WT_RET(__wt_calloc_def(NULL, 1, &conn)); + WT_RET(__wt_calloc_one(NULL, &conn)); conn->iface = stdc; /* diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c index 079bd05ff1e..61bd4447abf 100644 --- a/src/third_party/wiredtiger/src/conn/conn_cache.c +++ b/src/third_party/wiredtiger/src/conn/conn_cache.c @@ -83,7 +83,7 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_ASSERT(session, conn->cache == NULL || (F_ISSET(conn, WT_CONN_CACHE_POOL) && conn->cache != NULL)); - WT_RET(__wt_calloc_def(session, 1, &conn->cache)); + WT_RET(__wt_calloc_one(session, &conn->cache)); cache = conn->cache; diff --git a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c index c7558eea5fb..edd6f01d52c 100644 --- a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c +++ b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c @@ -51,16 +51,17 @@ __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) session, cfg, "shared_cache.name", &cval)); if (cval.len == 0) { /* - * Tell the user if they configured some shared cache - * settings, but didn't enable it by naming it. + * Tell the user if they configured a cache pool + * size but didn't enable it by naming the pool. */ - if (__wt_config_gets(session, - &cfg[1], "shared_cache", &cval) != WT_NOTFOUND) + if (__wt_config_gets(session, &cfg[1], + "shared_cache.size", &cval) != WT_NOTFOUND) WT_RET_MSG(session, EINVAL, "Shared cache configuration requires a " "pool name"); return (0); } + if (__wt_config_gets(session, &cfg[1], "cache_size", &cval) != WT_NOTFOUND) WT_RET_MSG(session, EINVAL, @@ -81,7 +82,7 @@ __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) if (__wt_process.cache_pool == NULL) { WT_ASSERT(session, !reconfiguring); /* Create a cache pool. */ - WT_ERR(__wt_calloc_def(session, 1, &cp)); + WT_ERR(__wt_calloc_one(session, &cp)); created = 1; cp->name = pool_name; pool_name = NULL; /* Belongs to the cache pool now. */ diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c index 088ff2f3d2c..5d5a67ccac8 100644 --- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c +++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c @@ -187,7 +187,7 @@ __conn_dhandle_get(WT_SESSION_IMPL *session, * then initialize the data handle. Exclusively lock the data handle * before inserting it in the list. */ - WT_RET(__wt_calloc_def(session, 1, &dhandle)); + WT_RET(__wt_calloc_one(session, &dhandle)); WT_ERR(__wt_rwlock_alloc(session, &dhandle->rwlock, "data handle")); @@ -196,7 +196,7 @@ __conn_dhandle_get(WT_SESSION_IMPL *session, if (ckpt != NULL) WT_ERR(__wt_strdup(session, ckpt, &dhandle->checkpoint)); - WT_ERR(__wt_calloc_def(session, 1, &btree)); + WT_ERR(__wt_calloc_one(session, &btree)); dhandle->handle = btree; btree->dhandle = dhandle; @@ -600,13 +600,15 @@ __wt_conn_dhandle_close_all( WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle; WT_DECL_RET; + uint64_t bucket; conn = S2C(session); WT_ASSERT(session, F_ISSET(session, WT_SESSION_HANDLE_LIST_LOCKED)); WT_ASSERT(session, session->dhandle == NULL); - SLIST_FOREACH(dhandle, &conn->dhlh, l) { + bucket = __wt_hash_city64(name, strlen(name)) % WT_HASH_ARRAY_SIZE; + SLIST_FOREACH(dhandle, &conn->dhhash[bucket], l) { if (strcmp(dhandle->name, name) != 0) continue; diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c index 618a0934ce1..a0e46c96291 100644 --- a/src/third_party/wiredtiger/src/conn/conn_log.c +++ b/src/third_party/wiredtiger/src/conn/conn_log.c @@ -54,15 +54,13 @@ __logmgr_config(WT_SESSION_IMPL *session, const char **cfg, int *runp) */ WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval)); *runp = cval.val != 0; - if (*runp == 0) - return (0); - - WT_RET(__wt_config_gets(session, cfg, "log.archive", &cval)); - if (cval.val != 0) - FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE); + /* + * Setup a log path and compression even if logging is disabled in + * case we are going to print a log. + */ conn->log_compressor = NULL; - WT_RET(__wt_config_gets(session, cfg, "log.compressor", &cval)); + WT_RET(__wt_config_gets_none(session, cfg, "log.compressor", &cval)); if (cval.len > 0) { TAILQ_FOREACH(ncomp, &conn->compqh, q) if (WT_STRING_MATCH(ncomp->name, cval.str, cval.len)) { @@ -75,13 +73,21 @@ __logmgr_config(WT_SESSION_IMPL *session, const char **cfg, int *runp) (int)cval.len, cval.str); } + WT_RET(__wt_config_gets(session, cfg, "log.path", &cval)); + WT_RET(__wt_strndup(session, cval.str, cval.len, &conn->log_path)); + + /* We are done if logging isn't enabled. */ + if (*runp == 0) + return (0); + + WT_RET(__wt_config_gets(session, cfg, "log.archive", &cval)); + if (cval.val != 0) + FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE); + WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval)); conn->log_file_max = (wt_off_t)cval.val; WT_STAT_FAST_CONN_SET(session, log_max_filesize, conn->log_file_max); - WT_RET(__wt_config_gets(session, cfg, "log.path", &cval)); - WT_RET(__wt_strndup(session, cval.str, cval.len, &conn->log_path)); - WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval)); /* * If pre-allocation is configured, set the initial number to one. @@ -215,8 +221,11 @@ __log_prealloc_once(WT_SESSION_IMPL *session) /* * Allocate up to the maximum number that we just computed and detected. */ - for (i = reccount; i < (u_int)conn->log_prealloc; i++) - WT_ERR(__wt_log_prealloc(session, ++log->prep_fileid)); + for (i = reccount; i < (u_int)conn->log_prealloc; i++) { + WT_ERR(__wt_log_allocfile( + session, ++log->prep_fileid, WT_LOG_PREPNAME)); + WT_STAT_FAST_CONN_INCR(session, log_prealloc_files); + } if (0) err: __wt_err(session, ret, "log pre-alloc server error"); @@ -341,7 +350,7 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]) /* * Logging is on, allocate the WT_LOG structure and open the log file. */ - WT_RET(__wt_calloc(session, 1, sizeof(WT_LOG), &conn->log)); + WT_RET(__wt_calloc_one(session, &conn->log)); log = conn->log; WT_RET(__wt_spin_init(session, &log->log_lock, "log")); WT_RET(__wt_spin_init(session, &log->log_slot_lock, "log slot")); @@ -415,8 +424,15 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) conn = S2C(session); - if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) + if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) { + /* + * We always set up the log_path so printlog can work without + * recovery. Therefore, always free it, even if logging isn't + * on. + */ + __wt_free(session, conn->log_path); return (0); + } if (conn->log_tid_set) { WT_TRET(__wt_cond_signal(session, conn->log_cond)); WT_TRET(__wt_thread_join(session, conn->log_tid)); @@ -426,8 +442,6 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) WT_TRET(__wt_log_close(session)); - __wt_free(session, conn->log_path); - /* Close the server thread's session. */ if (conn->log_session != NULL) { wt_session = &conn->log_session->iface; @@ -441,6 +455,7 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) __wt_spin_destroy(session, &conn->log->log_lock); __wt_spin_destroy(session, &conn->log->log_slot_lock); __wt_spin_destroy(session, &conn->log->log_sync_lock); + __wt_free(session, conn->log_path); __wt_free(session, conn->log); return (ret); diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c index f77f38ee01c..97ced7d5263 100644 --- a/src/third_party/wiredtiger/src/conn/conn_open.c +++ b/src/third_party/wiredtiger/src/conn/conn_open.c @@ -122,13 +122,14 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) /* * Now that all data handles are closed, tell logging that a checkpoint * has completed then shut down the log manager (only after closing - * data handles). + * data handles). The call to destroy the log manager is outside the + * conditional because we allocate the log path so that printlog can + * run without running logging or recovery. */ - if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) { + if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) WT_TRET(__wt_txn_checkpoint_log( session, 1, WT_TXN_LOG_CKPT_STOP, NULL)); - WT_TRET(__wt_logmgr_destroy(session)); - } + WT_TRET(__wt_logmgr_destroy(session)); /* Free memory for collators, compressors, data sources. */ WT_TRET(__wt_conn_remove_collator(session)); diff --git a/src/third_party/wiredtiger/src/conn/conn_sweep.c b/src/third_party/wiredtiger/src/conn/conn_sweep.c index 3671dfde5ef..a1ed9ca11e5 100644 --- a/src/third_party/wiredtiger/src/conn/conn_sweep.c +++ b/src/third_party/wiredtiger/src/conn/conn_sweep.c @@ -33,6 +33,7 @@ __sweep(WT_SESSION_IMPL *session) continue; if (dhandle->session_inuse == 0 && dhandle->timeofdeath == 0) { dhandle->timeofdeath = now; + WT_STAT_FAST_CONN_INCR(session, dh_conn_tod); continue; } if (dhandle->session_inuse != 0 || @@ -75,9 +76,13 @@ __sweep(WT_SESSION_IMPL *session) locked = 1; /* If the handle is open, try to close it. */ - if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) + if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) { WT_WITH_DHANDLE(session, dhandle, ret = __wt_conn_btree_sync_and_close(session, 0)); + if (ret == 0) + WT_STAT_FAST_CONN_INCR( + session, dh_conn_handles); + } /* * If there are no longer any references to the handle in any @@ -86,14 +91,14 @@ __sweep(WT_SESSION_IMPL *session) * don't do any special handling of EBUSY returns above. */ if (ret == 0 && dhandle->session_ref == 0) { - WT_STAT_FAST_CONN_INCR(session, dh_conn_handles); WT_WITH_DHANDLE(session, dhandle, ret = __wt_conn_dhandle_discard_single(session, 0)); /* If the handle was discarded, it isn't locked. */ if (ret == 0) locked = 0; - } + } else + WT_STAT_FAST_CONN_INCR(session, dh_conn_ref); if (locked) WT_TRET(__wt_writeunlock(session, dhandle->rwlock)); @@ -124,8 +129,8 @@ __sweep_server(void *arg) F_ISSET(conn, WT_CONN_SERVER_SWEEP)) { /* Wait until the next event. */ - WT_ERR( - __wt_cond_wait(session, conn->sweep_cond, 30 * WT_MILLION)); + WT_ERR(__wt_cond_wait(session, + conn->sweep_cond, WT_DHANDLE_SWEEP_PERIOD * WT_MILLION)); /* Sweep the handles. */ WT_ERR(__sweep(session)); diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c index b993e3f1af4..2ff6e614ae1 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_backup.c +++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c @@ -125,7 +125,7 @@ __wt_curbackup_open(WT_SESSION_IMPL *session, cb = NULL; - WT_RET(__wt_calloc_def(session, 1, &cb)); + WT_RET(__wt_calloc_one(session, &cb)); cursor = &cb->iface; *cursor = iface; cursor->session = &session->iface; @@ -412,7 +412,7 @@ __backup_uri(WT_SESSION_IMPL *session, * If we find a non-empty target configuration string, we have a job, * otherwise it's not our problem. */ - WT_RET(__wt_config_gets_none(session, cfg, "target", &cval)); + WT_RET(__wt_config_gets(session, cfg, "target", &cval)); WT_RET(__wt_config_subinit(session, &targetconf, &cval)); for (cb->list_next = 0, target_list = 0; (ret = __wt_config_next(&targetconf, &k, &v)) == 0; ++target_list) { diff --git a/src/third_party/wiredtiger/src/cursor/cur_config.c b/src/third_party/wiredtiger/src/cursor/cur_config.c index 868b144efc1..297e030c6dc 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_config.c +++ b/src/third_party/wiredtiger/src/cursor/cur_config.c @@ -48,7 +48,7 @@ __wt_curconfig_open(WT_SESSION_IMPL *session, WT_UNUSED(uri); - WT_RET(__wt_calloc_def(session, 1, &cconfig)); + WT_RET(__wt_calloc_one(session, &cconfig)); cursor = &cconfig->iface; *cursor = iface; diff --git a/src/third_party/wiredtiger/src/cursor/cur_ds.c b/src/third_party/wiredtiger/src/cursor/cur_ds.c index fc742ae7c3d..6dd3fa76e23 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_ds.c +++ b/src/third_party/wiredtiger/src/cursor/cur_ds.c @@ -473,7 +473,7 @@ __wt_curds_open( data_source = NULL; metaconf = NULL; - WT_RET(__wt_calloc_def(session, 1, &data_source)); + WT_RET(__wt_calloc_one(session, &data_source)); cursor = &data_source->iface; *cursor = iface; cursor->session = &session->iface; diff --git a/src/third_party/wiredtiger/src/cursor/cur_dump.c b/src/third_party/wiredtiger/src/cursor/cur_dump.c index 003b7e1f961..d632607de29 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_dump.c +++ b/src/third_party/wiredtiger/src/cursor/cur_dump.c @@ -371,7 +371,7 @@ __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) session = (WT_SESSION_IMPL *)child->session; - WT_RET(__wt_calloc_def(session, 1, &cdump)); + WT_RET(__wt_calloc_one(session, &cdump)); cursor = &cdump->iface; *cursor = iface; cursor->session = child->session; @@ -384,7 +384,7 @@ __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) F_SET(cursor, F_ISSET(child, WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_JSON | WT_CURSTD_DUMP_PRINT)); if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON)) { - WT_ERR(__wt_calloc_def(session, 1, &json)); + WT_ERR(__wt_calloc_one(session, &json)); cursor->json_private = child->json_private = json; } diff --git a/src/third_party/wiredtiger/src/cursor/cur_index.c b/src/third_party/wiredtiger/src/cursor/cur_index.c index 936337047b8..d9b6815274e 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_index.c +++ b/src/third_party/wiredtiger/src/cursor/cur_index.c @@ -382,7 +382,7 @@ __wt_curindex_open(WT_SESSION_IMPL *session, namesize = (size_t)(columns - idxname); WT_RET(__wt_schema_open_index(session, table, idxname, namesize, &idx)); - WT_RET(__wt_calloc_def(session, 1, &cindex)); + WT_RET(__wt_calloc_one(session, &cindex)); cursor = &cindex->iface; *cursor = iface; diff --git a/src/third_party/wiredtiger/src/cursor/cur_log.c b/src/third_party/wiredtiger/src/cursor/cur_log.c index 1c8371fb9b5..00352bd8d3b 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_log.c +++ b/src/third_party/wiredtiger/src/cursor/cur_log.c @@ -335,12 +335,12 @@ __wt_curlog_open(WT_SESSION_IMPL *session, log = conn->log; cl = NULL; - WT_RET(__wt_calloc_def(session, 1, &cl)); + WT_RET(__wt_calloc_one(session, &cl)); cursor = &cl->iface; *cursor = iface; cursor->session = &session->iface; - WT_ERR(__wt_calloc_def(session, 1, &cl->cur_lsn)); - WT_ERR(__wt_calloc_def(session, 1, &cl->next_lsn)); + WT_ERR(__wt_calloc_one(session, &cl->cur_lsn)); + WT_ERR(__wt_calloc_one(session, &cl->next_lsn)); WT_ERR(__wt_scr_alloc(session, 0, &cl->logrec)); WT_ERR(__wt_scr_alloc(session, 0, &cl->opkey)); WT_ERR(__wt_scr_alloc(session, 0, &cl->opvalue)); diff --git a/src/third_party/wiredtiger/src/cursor/cur_metadata.c b/src/third_party/wiredtiger/src/cursor/cur_metadata.c index 5543dccec30..df7c7af2ce0 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_metadata.c +++ b/src/third_party/wiredtiger/src/cursor/cur_metadata.c @@ -421,7 +421,7 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session, WT_CURSOR_METADATA *mdc; WT_DECL_RET; - WT_RET(__wt_calloc(session, 1, sizeof(WT_CURSOR_METADATA), &mdc)); + WT_RET(__wt_calloc_one(session, &mdc)); cursor = &mdc->iface; *cursor = iface; @@ -438,7 +438,9 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session, WT_ERR(__wt_cursor_config_readonly(cursor, cfg, 1)); if (0) { -err: __wt_free(session, mdc); +err: if (mdc->file_cursor != NULL) + WT_TRET(mdc->file_cursor->close(mdc->file_cursor)); + __wt_free(session, mdc); } return (ret); } diff --git a/src/third_party/wiredtiger/src/cursor/cur_stat.c b/src/third_party/wiredtiger/src/cursor/cur_stat.c index b9bc4039619..cbea3e50a56 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_stat.c +++ b/src/third_party/wiredtiger/src/cursor/cur_stat.c @@ -502,7 +502,7 @@ __wt_curstat_open(WT_SESSION_IMPL *session, conn = S2C(session); - WT_ERR(__wt_calloc_def(session, 1, &cst)); + WT_ERR(__wt_calloc_one(session, &cst)); cursor = &cst->iface; *cursor = iface; cursor->session = &session->iface; diff --git a/src/third_party/wiredtiger/src/cursor/cur_table.c b/src/third_party/wiredtiger/src/cursor/cur_table.c index 25479a166b1..69e5a00df08 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_table.c +++ b/src/third_party/wiredtiger/src/cursor/cur_table.c @@ -876,7 +876,7 @@ __wt_curtable_open(WT_SESSION_IMPL *session, return (ret); } - WT_RET(__wt_calloc_def(session, 1, &ctable)); + WT_RET(__wt_calloc_one(session, &ctable)); cursor = &ctable->iface; *cursor = iface; diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c index fa3bfa50eb0..bc791de6d0f 100644 --- a/src/third_party/wiredtiger/src/evict/evict_page.c +++ b/src/third_party/wiredtiger/src/evict/evict_page.c @@ -206,7 +206,7 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive) * Publish: a barrier to ensure the structure fields are set * before the state change makes the page available to readers. */ - WT_RET(__wt_calloc(session, 1, sizeof(WT_ADDR), &addr)); + WT_RET(__wt_calloc_one(session, &addr)); *addr = mod->mod_replace; mod->mod_replace.addr = NULL; mod->mod_replace.size = 0; diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h index 907b36c9ed4..e7c1826bda9 100644 --- a/src/third_party/wiredtiger/src/include/btree.h +++ b/src/third_party/wiredtiger/src/include/btree.h @@ -83,9 +83,10 @@ struct __wt_btree { uint32_t allocsize; /* Allocation size */ uint32_t maxintlpage; /* Internal page max size */ - uint32_t maxintlitem; /* Internal page max item size */ + uint32_t maxintlkey; /* Internal page max key size */ uint32_t maxleafpage; /* Leaf page max size */ - uint32_t maxleafitem; /* Leaf page max item size */ + uint32_t maxleafkey; /* Leaf page max key size */ + uint32_t maxleafvalue; /* Leaf page max value size */ uint64_t maxmempage; /* In memory page max size */ void *huffman_key; /* Key huffman encoding */ diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index bc75a6eda26..bb7caa991d6 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -122,7 +122,6 @@ extern void __wt_root_ref_init(WT_REF *root_ref, WT_PAGE *root, int is_recno); extern int __wt_btree_tree_open( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size); extern int __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep); extern void __wt_btree_evictable(WT_SESSION_IMPL *session, int on); -extern uint32_t __wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize); extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session); extern void __wt_btree_huffman_close(WT_SESSION_IMPL *session); extern int __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size); @@ -183,6 +182,7 @@ extern int __wt_config_gets(WT_SESSION_IMPL *session, const char **cfg, const ch extern int __wt_config_gets_none(WT_SESSION_IMPL *session, const char **cfg, const char *key, WT_CONFIG_ITEM *value); extern int __wt_config_getone(WT_SESSION_IMPL *session, const char *config, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value); extern int __wt_config_getones(WT_SESSION_IMPL *session, const char *config, const char *key, WT_CONFIG_ITEM *value); +extern int __wt_config_getones_none(WT_SESSION_IMPL *session, const char *config, const char *key, WT_CONFIG_ITEM *value); extern int __wt_config_gets_def(WT_SESSION_IMPL *session, const char **cfg, const char *key, int def, WT_CONFIG_ITEM *value); extern int __wt_config_subgetraw(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value); extern int __wt_config_subgets(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, const char *key, WT_CONFIG_ITEM *value); @@ -304,7 +304,7 @@ extern void __wt_log_written_reset(WT_SESSION_IMPL *session); extern int __wt_log_get_all_files(WT_SESSION_IMPL *session, char ***filesp, u_int *countp, uint32_t *maxid, int active_only); extern void __wt_log_files_free(WT_SESSION_IMPL *session, char **files, u_int count); extern int __wt_log_extract_lognum( WT_SESSION_IMPL *session, const char *name, uint32_t *id); -extern int __wt_log_prealloc(WT_SESSION_IMPL *session, uint32_t lognum); +extern int __wt_log_allocfile(WT_SESSION_IMPL *session, uint32_t lognum, const char *dest); extern int __wt_log_remove(WT_SESSION_IMPL *session, const char *file_prefix, uint32_t lognum); extern int __wt_log_open(WT_SESSION_IMPL *session); extern int __wt_log_close(WT_SESSION_IMPL *session); @@ -496,6 +496,7 @@ extern void __wt_ovfl_txnc_free(WT_SESSION_IMPL *session, WT_PAGE *page); extern int __wt_ovfl_track_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page); extern int __wt_ovfl_track_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page); extern int __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, uint32_t flags); +extern uint32_t __wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize); extern int __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); extern int __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); extern int __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk); diff --git a/src/third_party/wiredtiger/src/include/lsm.h b/src/third_party/wiredtiger/src/include/lsm.h index 7055de7b264..cf923236278 100644 --- a/src/third_party/wiredtiger/src/include/lsm.h +++ b/src/third_party/wiredtiger/src/include/lsm.h @@ -151,6 +151,7 @@ struct __wt_lsm_manager { uint32_t lsm_workers; /* Current number of LSM workers */ uint32_t lsm_workers_max; #define WT_LSM_MAX_WORKERS 20 +#define WT_LSM_MIN_WORKERS 3 WT_LSM_WORKER_ARGS lsm_worker_cookies[WT_LSM_MAX_WORKERS]; }; diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h index c861dff18bc..c2abaa08057 100644 --- a/src/third_party/wiredtiger/src/include/misc.h +++ b/src/third_party/wiredtiger/src/include/misc.h @@ -65,11 +65,13 @@ #define WT_SKIP_PROBABILITY (UINT32_MAX >> 2) /* - * __wt_calloc_def -- - * Simple calls don't need separate sizeof arguments. + * __wt_calloc_def, __wt_calloc_one -- + * Most calloc calls don't need separate count or sizeof arguments. */ #define __wt_calloc_def(session, number, addr) \ __wt_calloc(session, (size_t)(number), sizeof(**(addr)), addr) +#define __wt_calloc_one(session, addr) \ + __wt_calloc(session, (size_t)1, sizeof(**(addr)), addr) /* * __wt_realloc_def -- diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h index 47c8987ec51..e381ded8d16 100644 --- a/src/third_party/wiredtiger/src/include/session.h +++ b/src/third_party/wiredtiger/src/include/session.h @@ -67,8 +67,8 @@ struct __wt_session_impl { */ /* Session handle reference list */ SLIST_HEAD(__dhandles, __wt_data_handle_cache) dhandles; -#define WT_DHANDLE_SWEEP_WAIT 60 /* Wait before discarding */ -#define WT_DHANDLE_SWEEP_PERIOD 20 /* Only sweep every 20 seconds */ +#define WT_DHANDLE_SWEEP_WAIT 30 /* Idle wait before discarding */ +#define WT_DHANDLE_SWEEP_PERIOD 10 /* Sweep interim */ time_t last_sweep; /* Last sweep for dead handles */ WT_CURSOR *cursor; /* Current cursor */ @@ -78,7 +78,7 @@ struct __wt_session_impl { WT_CURSOR_BACKUP *bkp_cursor; /* Hot backup cursor */ WT_COMPACT *compact; /* Compact state */ - WT_BTREE *metafile; /* Metadata file */ + WT_DATA_HANDLE *meta_dhandle; /* Metadata file */ void *meta_track; /* Metadata operation tracking */ void *meta_track_next; /* Current position */ void *meta_track_sub; /* Child transaction / save point */ diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index 37df43adfee..20b7ff9c1d4 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -189,7 +189,9 @@ struct __wt_connection_stats { WT_STATS cursor_search_near; WT_STATS cursor_update; WT_STATS dh_conn_handles; + WT_STATS dh_conn_ref; WT_STATS dh_conn_sweeps; + WT_STATS dh_conn_tod; WT_STATS dh_session_handles; WT_STATS dh_session_sweeps; WT_STATS file_open; @@ -287,10 +289,11 @@ struct __wt_dsrc_stats { WT_STATS btree_entries; WT_STATS btree_fixed_len; WT_STATS btree_maximum_depth; - WT_STATS btree_maxintlitem; + WT_STATS btree_maxintlkey; WT_STATS btree_maxintlpage; - WT_STATS btree_maxleafitem; + WT_STATS btree_maxleafkey; WT_STATS btree_maxleafpage; + WT_STATS btree_maxleafvalue; WT_STATS btree_overflow; WT_STATS btree_row_internal; WT_STATS btree_row_leaf; diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index 127176c67ea..40ccf0ee59c 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -138,7 +138,7 @@ __wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id) * schema and metadata locks) to protect access to in-flight updates. */ if (txn->isolation == TXN_ISO_READ_UNCOMMITTED || - S2BT_SAFE(session) == session->metafile) + session->dhandle == session->meta_dhandle) return (1); /* Transactions see their own changes. */ diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 4f092127620..df2872dcb4d 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -996,12 +996,12 @@ struct __wt_session { * @config{immutable, configure the index to be immutable - that is an * index is not changed by any update to a record in the table., a * boolean flag; default \c false.} - * @config{internal_item_max, the largest key stored within an internal - * node\, in bytes. If non-zero\, any key larger than the specified - * size will be stored as an overflow item (which may require additional - * I/O to access). If zero\, a default size is chosen that permits at - * least 8 keys per internal page., an integer greater than or equal to - * 0; default \c 0.} + * @config{internal_key_max, the largest key stored in an internal + * node\, in bytes. If set\, keys larger than the specified size are + * stored as overflow items (which may require additional I/O to + * access). The default and the maximum allowed value are both one-tenth + * the size of a newly split internal page., an integer greater than or + * equal to 0; default \c 0.} * @config{internal_key_truncate, configure internal key truncation\, * discarding unnecessary trailing bytes on internal keys (ignored for * custom collators)., a boolean flag; default \c true.} @@ -1019,12 +1019,11 @@ struct __wt_session { * row-store files: keys of type \c 'r' are record numbers and records * referenced by record number are stored in column-store files., a * format string; default \c u.} - * @config{leaf_item_max, the largest key or value stored within a leaf - * node\, in bytes. If non-zero\, any key or value larger than the - * specified size will be stored as an overflow item (which may require - * additional I/O to access). If zero\, a default size is chosen that - * permits at least 4 key and value pairs per leaf page., an integer - * greater than or equal to 0; default \c 0.} + * @config{leaf_key_max, the largest key stored in a leaf node\, in + * bytes. If set\, keys larger than the specified size are stored as + * overflow items (which may require additional I/O to access). The + * default value is one-tenth the size of a newly split leaf page., an + * integer greater than or equal to 0; default \c 0.} * @config{leaf_page_max, the maximum page size for leaf nodes\, in * bytes; the size must be a multiple of the allocation size\, and is * significant for applications wanting to maximize sequential data @@ -1032,6 +1031,13 @@ struct __wt_session { * uncompressed data\, that is\, the limit is applied before any block * compression is done., an integer between 512B and 512MB; default \c * 32KB.} + * @config{leaf_value_max, the largest value stored in a leaf node\, in + * bytes. If set\, values larger than the specified size are stored as + * overflow items (which may require additional I/O to access). If the + * size is larger than the maximum leaf page size\, the page size is + * temporarily ignored when large values are written. The default is + * one-half the size of a newly split leaf page., an integer greater + * than or equal to 0; default \c 0.} * @config{lsm = (, options only relevant for LSM data sources., a set * of related configuration options defined below.} * @config{ auto_throttle, Throttle inserts into @@ -1549,8 +1555,9 @@ struct __wt_connection { * @config{ chunk, the granularity that a shared * cache is redistributed., an integer between 1MB and 10TB; default \c * 10MB.} - * @config{ name, name of a cache that is - * shared between databases., a string; default empty.} + * @config{ name, the name of a cache that + * is shared between databases or \c "none" when no shared cache is + * configured., a string; default \c none.} * @config{ reserve, amount of cache this * database is guaranteed to have available from the shared cache. This * setting is per database. Defaults to the chunk size., an integer; @@ -1895,7 +1902,7 @@ struct __wt_connection { * @config{ archive, automatically * archive unneeded log files., a boolean flag; default \c true.} * @config{ compressor, configure a compressor for log - * records. Permitted values are empty (off) or \c "bzip2"\, \c "snappy" or + * records. Permitted values are \c "none" or \c "bzip2"\, \c "snappy" or * custom compression engine \c "name" created with * WT_CONNECTION::add_compressor. See @ref compression for more information., a * string; default empty.} @@ -1905,7 +1912,7 @@ struct __wt_connection { * integer between 100KB and 2GB; default \c 100MB.} * @config{ path, the path to a directory into which the * log files are written. If the value is not an absolute path name\, the files - * are created relative to the database home., a string; default \c "".} + * are created relative to the database home., a string; default empty.} * @config{ prealloc, pre-allocate log files., a boolean * flag; default \c true.} * @config{ ),,} @@ -1930,11 +1937,13 @@ struct __wt_connection { * related configuration options defined below.} * @config{ chunk, the granularity that a shared cache is * redistributed., an integer between 1MB and 10TB; default \c 10MB.} - * @config{ name, name of a cache that is shared between - * databases., a string; default empty.} - * @config{ reserve, amount of cache this database is - * guaranteed to have available from the shared cache. This setting is per - * database. Defaults to the chunk size., an integer; default \c 0.} + * @config{ name, the name of a cache that is shared + * between databases or \c "none" when no shared cache is configured., a string; + * default \c none.} + * @config{ reserve, amount of cache + * this database is guaranteed to have available from the shared cache. This + * setting is per database. Defaults to the chunk size., an integer; default \c + * 0.} * @config{ size, maximum memory to allocate for the * shared cache. Setting this will update the value if one is already set., an * integer between 1MB and 10TB; default \c 500MB.} @@ -2055,6 +2064,11 @@ struct __wt_event_handler { * Callback to handle error messages; by default, error messages are * written to the stderr stream. * + * Errors that require the application to exit and restart will have + * their \c error value set to \c WT_PANIC. The application can exit + * immediately when \c WT_PANIC is passed to an error handler, there + * is no reason to return into WiredTiger. + * * Error handler returns are not ignored: if the handler returns * non-zero, the error may cause the WiredTiger function posting the * event to fail, and may even cause operation or library failure. @@ -2525,7 +2539,9 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp); /*! * WiredTiger library panic. * This error indicates an underlying problem that requires the application exit - * and restart. + * and restart. The application can exit immediately when \c WT_PANIC is + * returned from a WiredTiger interface, no further WiredTiger calls are + * required. */ #define WT_PANIC -31804 /*! @cond internal */ @@ -2641,7 +2657,7 @@ struct __wt_compressor { * of \c dst_len. If the WT_COMPRESSOR::pre_size method is specified, * the destination buffer will be at least the size returned by that * method; otherwise, the destination buffer will be at least as large - * as \c src_len. + * as the length of the data to compress. * * If compression would not shrink the data or the \c dst buffer is not * large enough to hold the compressed data, the callback should set @@ -2711,10 +2727,8 @@ struct __wt_compressor { * On entry, \c dst points to the destination buffer with a length * of \c dst_len. If the WT_COMPRESSOR::pre_size method is specified, * the destination buffer will be at least the size returned by that - * method; otherwise, the destination buffer will be at least the - * maximum size for the page being written (that is, when writing a - * row-store leaf page, the destination buffer will be at least as - * large as the \c leaf_page_max configuration value). + * method; otherwise, the destination buffer will be at least as large + * as the length of the data to compress. * * After successful completion, the callback should return \c 0, and * set \c result_slotsp to the number of byte strings encoded and @@ -3188,138 +3202,142 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CURSOR_UPDATE 1056 /*! data-handle: connection dhandles swept */ #define WT_STAT_CONN_DH_CONN_HANDLES 1057 +/*! data-handle: connection candidate referenced */ +#define WT_STAT_CONN_DH_CONN_REF 1058 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_CONN_SWEEPS 1058 +#define WT_STAT_CONN_DH_CONN_SWEEPS 1059 +/*! data-handle: connection time-of-death sets */ +#define WT_STAT_CONN_DH_CONN_TOD 1060 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1059 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1061 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1060 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1062 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1061 +#define WT_STAT_CONN_FILE_OPEN 1063 /*! log: log buffer size increases */ -#define WT_STAT_CONN_LOG_BUFFER_GROW 1062 +#define WT_STAT_CONN_LOG_BUFFER_GROW 1064 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1063 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1065 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1064 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1066 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1065 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1067 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1066 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1068 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1067 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1069 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1068 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1070 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1069 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1071 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1070 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1072 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1071 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1073 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1072 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1074 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1073 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1075 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1074 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1076 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1075 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1077 /*! log: log read operations */ -#define WT_STAT_CONN_LOG_READS 1076 +#define WT_STAT_CONN_LOG_READS 1078 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1077 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1079 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1078 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1080 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1079 +#define WT_STAT_CONN_LOG_SCANS 1081 /*! log: consolidated slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1080 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1082 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1081 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1083 /*! log: consolidated slot joins */ -#define WT_STAT_CONN_LOG_SLOT_JOINS 1082 +#define WT_STAT_CONN_LOG_SLOT_JOINS 1084 /*! log: consolidated slot join races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1083 +#define WT_STAT_CONN_LOG_SLOT_RACES 1085 /*! log: slots selected for switching that were unavailable */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1084 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1086 /*! log: record size exceeded maximum */ -#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1085 +#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1087 /*! log: failed to find a slot large enough for record */ -#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1086 +#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1088 /*! log: consolidated slot join transitions */ -#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1087 +#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1089 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1088 +#define WT_STAT_CONN_LOG_SYNC 1090 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1089 +#define WT_STAT_CONN_LOG_WRITES 1091 /*! LSM: sleep for LSM checkpoint throttle */ -#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1090 +#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1092 /*! LSM: sleep for LSM merge throttle */ -#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1091 +#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1093 /*! LSM: rows merged in an LSM tree */ -#define WT_STAT_CONN_LSM_ROWS_MERGED 1092 +#define WT_STAT_CONN_LSM_ROWS_MERGED 1094 /*! LSM: application work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1093 +#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1095 /*! LSM: merge work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1094 +#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1096 /*! LSM: tree queue hit maximum */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1095 +#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1097 /*! LSM: switch work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1096 +#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1098 /*! LSM: tree maintenance operations scheduled */ -#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1097 +#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1099 /*! LSM: tree maintenance operations discarded */ -#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1098 +#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1100 /*! LSM: tree maintenance operations executed */ -#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1099 +#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1101 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1100 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1102 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1101 +#define WT_STAT_CONN_MEMORY_FREE 1103 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1102 +#define WT_STAT_CONN_MEMORY_GROW 1104 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1103 +#define WT_STAT_CONN_READ_IO 1105 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1104 +#define WT_STAT_CONN_REC_PAGES 1106 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1105 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1107 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1106 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1108 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1107 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1109 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1108 +#define WT_STAT_CONN_RWLOCK_READ 1110 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1109 +#define WT_STAT_CONN_RWLOCK_WRITE 1111 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1110 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1112 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1111 +#define WT_STAT_CONN_SESSION_OPEN 1113 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1112 +#define WT_STAT_CONN_TXN_BEGIN 1114 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1113 +#define WT_STAT_CONN_TXN_CHECKPOINT 1115 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1114 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1116 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1115 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1117 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1116 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1118 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1117 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1119 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1118 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1120 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1119 +#define WT_STAT_CONN_TXN_COMMIT 1121 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1120 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1122 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1121 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1123 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1122 +#define WT_STAT_CONN_TXN_ROLLBACK 1124 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1123 +#define WT_STAT_CONN_WRITE_IO 1125 /*! * @} @@ -3377,130 +3395,132 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_DSRC_BTREE_FIXED_LEN 2023 /*! btree: maximum tree depth */ #define WT_STAT_DSRC_BTREE_MAXIMUM_DEPTH 2024 -/*! btree: maximum internal page item size */ -#define WT_STAT_DSRC_BTREE_MAXINTLITEM 2025 +/*! btree: maximum internal page key size */ +#define WT_STAT_DSRC_BTREE_MAXINTLKEY 2025 /*! btree: maximum internal page size */ #define WT_STAT_DSRC_BTREE_MAXINTLPAGE 2026 -/*! btree: maximum leaf page item size */ -#define WT_STAT_DSRC_BTREE_MAXLEAFITEM 2027 +/*! btree: maximum leaf page key size */ +#define WT_STAT_DSRC_BTREE_MAXLEAFKEY 2027 /*! btree: maximum leaf page size */ #define WT_STAT_DSRC_BTREE_MAXLEAFPAGE 2028 +/*! btree: maximum leaf page value size */ +#define WT_STAT_DSRC_BTREE_MAXLEAFVALUE 2029 /*! btree: overflow pages */ -#define WT_STAT_DSRC_BTREE_OVERFLOW 2029 +#define WT_STAT_DSRC_BTREE_OVERFLOW 2030 /*! btree: row-store internal pages */ -#define WT_STAT_DSRC_BTREE_ROW_INTERNAL 2030 +#define WT_STAT_DSRC_BTREE_ROW_INTERNAL 2031 /*! btree: row-store leaf pages */ -#define WT_STAT_DSRC_BTREE_ROW_LEAF 2031 +#define WT_STAT_DSRC_BTREE_ROW_LEAF 2032 /*! cache: bytes read into cache */ -#define WT_STAT_DSRC_CACHE_BYTES_READ 2032 +#define WT_STAT_DSRC_CACHE_BYTES_READ 2033 /*! cache: bytes written from cache */ -#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2033 +#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2034 /*! cache: checkpoint blocked page eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2034 +#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2035 /*! cache: unmodified pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2035 +#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2036 /*! cache: modified pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2036 +#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2037 /*! cache: data source pages selected for eviction unable to be evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2037 +#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2038 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2038 +#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2039 /*! cache: internal pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2039 +#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2040 /*! cache: in-memory page splits */ -#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2040 +#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2041 /*! cache: overflow values cached in memory */ -#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2041 +#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2042 /*! cache: pages read into cache */ -#define WT_STAT_DSRC_CACHE_READ 2042 +#define WT_STAT_DSRC_CACHE_READ 2043 /*! cache: overflow pages read into cache */ -#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2043 +#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2044 /*! cache: pages written from cache */ -#define WT_STAT_DSRC_CACHE_WRITE 2044 +#define WT_STAT_DSRC_CACHE_WRITE 2045 /*! compression: raw compression call failed, no additional data available */ -#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2045 +#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2046 /*! compression: raw compression call failed, additional data available */ -#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2046 +#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2047 /*! compression: raw compression call succeeded */ -#define WT_STAT_DSRC_COMPRESS_RAW_OK 2047 +#define WT_STAT_DSRC_COMPRESS_RAW_OK 2048 /*! compression: compressed pages read */ -#define WT_STAT_DSRC_COMPRESS_READ 2048 +#define WT_STAT_DSRC_COMPRESS_READ 2049 /*! compression: compressed pages written */ -#define WT_STAT_DSRC_COMPRESS_WRITE 2049 +#define WT_STAT_DSRC_COMPRESS_WRITE 2050 /*! compression: page written failed to compress */ -#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2050 +#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2051 /*! compression: page written was too small to compress */ -#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2051 +#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2052 /*! cursor: create calls */ -#define WT_STAT_DSRC_CURSOR_CREATE 2052 +#define WT_STAT_DSRC_CURSOR_CREATE 2053 /*! cursor: insert calls */ -#define WT_STAT_DSRC_CURSOR_INSERT 2053 +#define WT_STAT_DSRC_CURSOR_INSERT 2054 /*! cursor: bulk-loaded cursor-insert calls */ -#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2054 +#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2055 /*! cursor: cursor-insert key and value bytes inserted */ -#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2055 +#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2056 /*! cursor: next calls */ -#define WT_STAT_DSRC_CURSOR_NEXT 2056 +#define WT_STAT_DSRC_CURSOR_NEXT 2057 /*! cursor: prev calls */ -#define WT_STAT_DSRC_CURSOR_PREV 2057 +#define WT_STAT_DSRC_CURSOR_PREV 2058 /*! cursor: remove calls */ -#define WT_STAT_DSRC_CURSOR_REMOVE 2058 +#define WT_STAT_DSRC_CURSOR_REMOVE 2059 /*! cursor: cursor-remove key bytes removed */ -#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2059 +#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2060 /*! cursor: reset calls */ -#define WT_STAT_DSRC_CURSOR_RESET 2060 +#define WT_STAT_DSRC_CURSOR_RESET 2061 /*! cursor: search calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH 2061 +#define WT_STAT_DSRC_CURSOR_SEARCH 2062 /*! cursor: search near calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2062 +#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2063 /*! cursor: update calls */ -#define WT_STAT_DSRC_CURSOR_UPDATE 2063 +#define WT_STAT_DSRC_CURSOR_UPDATE 2064 /*! cursor: cursor-update value bytes updated */ -#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2064 +#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2065 /*! LSM: sleep for LSM checkpoint throttle */ -#define WT_STAT_DSRC_LSM_CHECKPOINT_THROTTLE 2065 +#define WT_STAT_DSRC_LSM_CHECKPOINT_THROTTLE 2066 /*! LSM: chunks in the LSM tree */ -#define WT_STAT_DSRC_LSM_CHUNK_COUNT 2066 +#define WT_STAT_DSRC_LSM_CHUNK_COUNT 2067 /*! LSM: highest merge generation in the LSM tree */ -#define WT_STAT_DSRC_LSM_GENERATION_MAX 2067 +#define WT_STAT_DSRC_LSM_GENERATION_MAX 2068 /*! LSM: queries that could have benefited from a Bloom filter that did * not exist */ -#define WT_STAT_DSRC_LSM_LOOKUP_NO_BLOOM 2068 +#define WT_STAT_DSRC_LSM_LOOKUP_NO_BLOOM 2069 /*! LSM: sleep for LSM merge throttle */ -#define WT_STAT_DSRC_LSM_MERGE_THROTTLE 2069 +#define WT_STAT_DSRC_LSM_MERGE_THROTTLE 2070 /*! reconciliation: dictionary matches */ -#define WT_STAT_DSRC_REC_DICTIONARY 2070 +#define WT_STAT_DSRC_REC_DICTIONARY 2071 /*! reconciliation: internal page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2071 +#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2072 /*! reconciliation: leaf page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2072 +#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2073 /*! reconciliation: maximum blocks required for a page */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2073 +#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2074 /*! reconciliation: internal-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2074 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2075 /*! reconciliation: leaf-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2075 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2076 /*! reconciliation: overflow values written */ -#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2076 +#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2077 /*! reconciliation: pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE 2077 +#define WT_STAT_DSRC_REC_PAGE_DELETE 2078 /*! reconciliation: page checksum matches */ -#define WT_STAT_DSRC_REC_PAGE_MATCH 2078 +#define WT_STAT_DSRC_REC_PAGE_MATCH 2079 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_DSRC_REC_PAGES 2079 +#define WT_STAT_DSRC_REC_PAGES 2080 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_DSRC_REC_PAGES_EVICTION 2080 +#define WT_STAT_DSRC_REC_PAGES_EVICTION 2081 /*! reconciliation: leaf page key bytes discarded using prefix compression */ -#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2081 +#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2082 /*! reconciliation: internal page key bytes discarded using suffix * compression */ -#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2082 +#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2083 /*! session: object compaction */ -#define WT_STAT_DSRC_SESSION_COMPACT 2083 +#define WT_STAT_DSRC_SESSION_COMPACT 2084 /*! session: open cursor count */ -#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2084 +#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2085 /*! transaction: update conflicts */ -#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2085 +#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2086 /*! @} */ /* * Statistics section: END diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index ec09bce5035..e7a7998f47d 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -564,12 +564,12 @@ err: if (log_fh != NULL) } /* - * __wt_log_prealloc -- - * Given an old log number of an archived file, create a new pre-allocated - * log file by resetting its header and pre-allocating it. + * __wt_log_allocfile -- + * Given a log number, create a new log file by writing the header, + * pre-allocating the file and moving it to the destination name. */ int -__wt_log_prealloc(WT_SESSION_IMPL *session, uint32_t lognum) +__wt_log_allocfile(WT_SESSION_IMPL *session, uint32_t lognum, const char *dest) { WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(from_path); @@ -591,7 +591,7 @@ __wt_log_prealloc(WT_SESSION_IMPL *session, uint32_t lognum) WT_RET(__wt_scr_alloc(session, 0, &from_path)); WT_ERR(__wt_scr_alloc(session, 0, &to_path)); WT_ERR(__log_filename(session, lognum, WT_LOG_TMPNAME, from_path)); - WT_ERR(__log_filename(session, lognum, WT_LOG_PREPNAME, to_path)); + WT_ERR(__log_filename(session, lognum, dest, to_path)); /* * Set up the temporary file. */ @@ -599,7 +599,6 @@ __wt_log_prealloc(WT_SESSION_IMPL *session, uint32_t lognum) WT_ERR(__log_file_header(session, log_fh, NULL, 1)); WT_ERR(__wt_ftruncate(session, log_fh, LOG_FIRST_RECORD)); WT_ERR(__log_prealloc(session, log_fh)); - WT_STAT_FAST_CONN_INCR(session, log_prealloc_files); tmp_fh = log_fh; log_fh = NULL; WT_ERR(__wt_close(session, tmp_fh)); @@ -1008,8 +1007,7 @@ __wt_log_newfile(WT_SESSION_IMPL *session, int conn_create, int *created) if (conn->log_prealloc) { ret = __log_alloc_prealloc(session, log->fileid); /* - * If ret is 0 it means we reused a file and we don't send the - * create flag to __log_openfile. + * If ret is 0 it means we found a pre-allocated file. * If ret is non-zero but not WT_NOTFOUND, we return the error. * If ret is WT_NOTFOUND, we leave create_log set and create * the new log file. @@ -1023,24 +1021,21 @@ __wt_log_newfile(WT_SESSION_IMPL *session, int conn_create, int *created) return (ret); ret = 0; } + /* + * If we need to create the log file, do so now. + */ + if (create_log && (ret = __wt_log_allocfile( + session, log->fileid, WT_LOG_FILENAME)) != 0) + return (ret); WT_RET(__log_openfile(session, - create_log, &log->log_fh, WT_LOG_FILENAME, log->fileid)); + 0, &log->log_fh, WT_LOG_FILENAME, log->fileid)); /* - * If we created the log, write a header. Otherwise it's already there. - * We need to setup the LSNs. If we're using a pre-allocated log file, - * set the end LSN and alloc LSN to the end of the header because the - * header is already in the file. Otherwise it will be filled in - * when writing to the log file and the LSN values will be updated. + * We need to setup the LSNs. Set the end LSN and alloc LSN to + * the end of the header. */ - if (create_log) { - log->alloc_lsn.file = log->fileid; - log->alloc_lsn.offset = log->log_fh->size; - WT_RET(__log_file_header(session, NULL, &end_lsn, 0)); - } else { - log->alloc_lsn.file = log->fileid; - log->alloc_lsn.offset = LOG_FIRST_RECORD; - end_lsn = log->alloc_lsn; - } + log->alloc_lsn.file = log->fileid; + log->alloc_lsn.offset = LOG_FIRST_RECORD; + end_lsn = log->alloc_lsn; /* * If we're called from connection creation code, we need to update diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c index 0a8b54197a6..b7104eecb9d 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c @@ -1460,7 +1460,7 @@ __wt_clsm_open(WT_SESSION_IMPL *session, ret = __wt_lsm_tree_get(session, uri, 0, &lsm_tree)); WT_RET(ret); - WT_ERR(__wt_calloc_def(session, 1, &clsm)); + WT_ERR(__wt_calloc_one(session, &clsm)); cursor = &clsm->iface; *cursor = iface; diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c index 1356d336f6e..319bc734d6d 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c @@ -93,12 +93,17 @@ __lsm_general_worker_start(WT_SESSION_IMPL *session) } /* - * If there are only two threads handling work units let them - * both do flushes otherwise a single merge can lead to switched - * chunks filling up the cache. + * Setup the first worker properly - if there are only a minimal + * number of workers allow the first worker to flush. Otherwise a + * single merge can lead to switched chunks filling up the cache. + * This is separate to the main loop so that it is applied on startup + * and reconfigure. */ - if (manager->lsm_workers_max == 3) + if (manager->lsm_workers_max == WT_LSM_MIN_WORKERS) FLD_SET(manager->lsm_worker_cookies[1].type, WT_LSM_WORK_FLUSH); + else + FLD_CLR(manager->lsm_worker_cookies[1].type, WT_LSM_WORK_FLUSH); + return (0); } @@ -137,6 +142,15 @@ __lsm_stop_workers(WT_SESSION_IMPL *session) * statically when the connection was opened. */ } + + /* + * Setup the first worker properly - if there are only a minimal + * number of workers it should flush. Since the number of threads + * is being reduced the field can't already be set. + */ + if (manager->lsm_workers_max == WT_LSM_MIN_WORKERS) + FLD_SET(manager->lsm_worker_cookies[1].type, WT_LSM_WORK_FLUSH); + return (0); } @@ -645,7 +659,7 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session, WT_RET(__wt_epoch(session, &lsm_tree->work_push_ts)); - WT_RET(__wt_calloc_def(session, 1, &entry)); + WT_RET(__wt_calloc_one(session, &entry)); entry->type = type; entry->flags = flags; entry->lsm_tree = lsm_tree; diff --git a/src/third_party/wiredtiger/src/lsm/lsm_merge.c b/src/third_party/wiredtiger/src/lsm/lsm_merge.c index 9ed605724ce..8989e979a44 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_merge.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_merge.c @@ -311,7 +311,7 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) lsm_tree->name, verb, lsm_tree->chunk[verb]->id)); } - WT_ERR(__wt_calloc_def(session, 1, &chunk)); + WT_ERR(__wt_calloc_one(session, &chunk)); created_chunk = 1; chunk->id = dest_id; diff --git a/src/third_party/wiredtiger/src/lsm/lsm_meta.c b/src/third_party/wiredtiger/src/lsm/lsm_meta.c index bf03588c066..7fd77b64720 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_meta.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_meta.c @@ -91,8 +91,8 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_ERR(__wt_realloc_def(session, &lsm_tree->chunk_alloc, nchunks + 1, &lsm_tree->chunk)); - WT_ERR(__wt_calloc_def( - session, 1, &chunk)); + WT_ERR( + __wt_calloc_one(session, &chunk)); lsm_tree->chunk[nchunks++] = chunk; chunk->id = (uint32_t)lv.val; WT_ERR(__wt_lsm_tree_chunk_name(session, @@ -136,7 +136,7 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_ERR(__wt_realloc_def(session, &lsm_tree->old_alloc, nchunks + 1, &lsm_tree->old_chunks)); - WT_ERR(__wt_calloc_def(session, 1, &chunk)); + WT_ERR(__wt_calloc_one(session, &chunk)); lsm_tree->old_chunks[nchunks++] = chunk; WT_ERR(__wt_strndup(session, lk.str, lk.len, &chunk->uri)); diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c index 888f12bdd94..2e61bcbacc8 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c @@ -332,7 +332,7 @@ __wt_lsm_tree_create(WT_SESSION_IMPL *session, WT_RET_MSG(session, EINVAL, "LSM trees cannot be configured as column stores"); - WT_RET(__wt_calloc_def(session, 1, &lsm_tree)); + WT_RET(__wt_calloc_one(session, &lsm_tree)); WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri)); @@ -343,7 +343,7 @@ __wt_lsm_tree_create(WT_SESSION_IMPL *session, WT_ERR(__wt_strndup( session, cval.str, cval.len, &lsm_tree->value_format)); - WT_ERR(__wt_config_gets(session, cfg, "collator", &cval)); + WT_ERR(__wt_config_gets_none(session, cfg, "collator", &cval)); WT_ERR(__wt_strndup( session, cval.str, cval.len, &lsm_tree->collator_name)); @@ -551,7 +551,7 @@ __lsm_tree_open( return (ret); /* Try to open the tree. */ - WT_RET(__wt_calloc_def(session, 1, &lsm_tree)); + WT_RET(__wt_calloc_one(session, &lsm_tree)); WT_ERR(__wt_rwlock_alloc(session, &lsm_tree->rwlock, "lsm tree")); WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri)); @@ -820,7 +820,7 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) "merge throttle %ld", lsm_tree->name, new_id, lsm_tree->ckpt_throttle, lsm_tree->merge_throttle)); - WT_ERR(__wt_calloc_def(session, 1, &chunk)); + WT_ERR(__wt_calloc_one(session, &chunk)); chunk->id = new_id; chunk->switch_txn = WT_TXN_NONE; lsm_tree->chunk[lsm_tree->nchunks++] = chunk; @@ -1011,7 +1011,7 @@ __wt_lsm_tree_truncate( locked = 1; /* Create the new chunk. */ - WT_ERR(__wt_calloc_def(session, 1, &chunk)); + WT_ERR(__wt_calloc_one(session, &chunk)); chunk->id = WT_ATOMIC_ADD4(lsm_tree->last, 1); WT_ERR(__wt_lsm_tree_setup_chunk(session, lsm_tree, chunk)); diff --git a/src/third_party/wiredtiger/src/meta/meta_table.c b/src/third_party/wiredtiger/src/meta/meta_table.c index 0f2d37a2e55..2df6e252f5b 100644 --- a/src/third_party/wiredtiger/src/meta/meta_table.c +++ b/src/third_party/wiredtiger/src/meta/meta_table.c @@ -31,20 +31,20 @@ __metadata_turtle(const char *key) /* * __wt_metadata_open -- - * Opens the metadata file, sets session->metafile. + * Opens the metadata file, sets session->meta_dhandle. */ int __wt_metadata_open(WT_SESSION_IMPL *session) { - if (session->metafile != NULL) + if (session->meta_dhandle != NULL) return (0); WT_RET(__wt_session_get_btree(session, WT_METAFILE_URI, NULL, NULL, 0)); - session->metafile = S2BT(session); - WT_ASSERT(session, session->metafile != NULL); + session->meta_dhandle = session->dhandle; + WT_ASSERT(session, session->meta_dhandle != NULL); - /* The metafile doesn't need to stay locked -- release it. */ + /* The meta_dhandle doesn't need to stay locked -- release it. */ return (__wt_session_release_btree(session)); } @@ -64,7 +64,7 @@ __wt_metadata_cursor( saved_dhandle = session->dhandle; WT_ERR(__wt_metadata_open(session)); - WT_SET_BTREE_IN_SESSION(session, session->metafile); + session->dhandle = session->meta_dhandle; /* * We use the metadata a lot, so we have a handle cached; lock it and diff --git a/src/third_party/wiredtiger/src/meta/meta_track.c b/src/third_party/wiredtiger/src/meta/meta_track.c index a240ee76cb8..8cc8822b268 100644 --- a/src/third_party/wiredtiger/src/meta/meta_track.c +++ b/src/third_party/wiredtiger/src/meta/meta_track.c @@ -220,9 +220,9 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, int unroll) * If the operation succeeded and we aren't relying on the log for * durability, checkpoint the metadata. */ - if (!unroll && ret == 0 && session->metafile != NULL && + if (!unroll && ret == 0 && session->meta_dhandle != NULL && !FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED)) - WT_WITH_BTREE(session, session->metafile, + WT_WITH_DHANDLE(session, session->meta_dhandle, ret = __wt_checkpoint(session, NULL)); return (ret); diff --git a/src/third_party/wiredtiger/src/os_posix/os_dlopen.c b/src/third_party/wiredtiger/src/os_posix/os_dlopen.c index 91410c54c04..cb9fe314beb 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_dlopen.c +++ b/src/third_party/wiredtiger/src/os_posix/os_dlopen.c @@ -17,7 +17,7 @@ __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) WT_DECL_RET; WT_DLH *dlh; - WT_RET(__wt_calloc_def(session, 1, &dlh)); + WT_RET(__wt_calloc_one(session, &dlh)); WT_ERR(__wt_strdup(session, path, &dlh->name)); if ((dlh->handle = dlopen(path, RTLD_LAZY)) == NULL) diff --git a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c index 3a76cceb3f0..479a61db795 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c +++ b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c @@ -22,7 +22,7 @@ __wt_cond_alloc(WT_SESSION_IMPL *session, * !!! * This function MUST handle a NULL session handle. */ - WT_RET(__wt_calloc(session, 1, sizeof(WT_CONDVAR), &cond)); + WT_RET(__wt_calloc_one(session, &cond)); WT_ERR(pthread_mutex_init(&cond->mtx, NULL)); diff --git a/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c b/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c index 1a692f71dce..c6cfa9412a7 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c +++ b/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c @@ -53,7 +53,7 @@ __wt_rwlock_alloc( WT_RET(__wt_verbose(session, WT_VERB_MUTEX, "rwlock: alloc %s", name)); - WT_RET(__wt_calloc_def(session, 1, &rwlock)); + WT_RET(__wt_calloc_one(session, &rwlock)); rwlock->name = name; diff --git a/src/third_party/wiredtiger/src/os_posix/os_open.c b/src/third_party/wiredtiger/src/os_posix/os_open.c index 736ed2be377..a0da1952101 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_open.c +++ b/src/third_party/wiredtiger/src/os_posix/os_open.c @@ -145,7 +145,7 @@ setupfh: WT_ERR(posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM)); #endif - WT_ERR(__wt_calloc(session, 1, sizeof(WT_FH), &fh)); + WT_ERR(__wt_calloc_one(session, &fh)); WT_ERR(__wt_strdup(session, name, &fh->name)); fh->fd = fd; fh->ref = 1; diff --git a/src/third_party/wiredtiger/src/os_win/os_dir.c b/src/third_party/wiredtiger/src/os_win/os_dir.c index 076c64670d4..ab332e01186 100644 --- a/src/third_party/wiredtiger/src/os_win/os_dir.c +++ b/src/third_party/wiredtiger/src/os_win/os_dir.c @@ -38,7 +38,7 @@ __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix, path[pathlen - 1] = '\0'; } - WT_ERR(__wt_scr_alloc(session, 0, &pathbuf)); + WT_ERR(__wt_scr_alloc(session, pathlen + 3, &pathbuf)); WT_ERR(__wt_buf_fmt(session, pathbuf, "%s\\*", path)); dirallocsz = 0; @@ -96,7 +96,7 @@ err: if (findhandle != INVALID_HANDLE_VALUE) (void)FindClose(findhandle); __wt_free(session, path); - __wt_buf_free(session, pathbuf); + __wt_scr_free(&pathbuf); if (ret == 0) return (0); diff --git a/src/third_party/wiredtiger/src/os_win/os_dlopen.c b/src/third_party/wiredtiger/src/os_win/os_dlopen.c index ebc90edd2b2..3fdd0c74b1f 100644 --- a/src/third_party/wiredtiger/src/os_win/os_dlopen.c +++ b/src/third_party/wiredtiger/src/os_win/os_dlopen.c @@ -17,7 +17,7 @@ __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) WT_DECL_RET; WT_DLH *dlh; - WT_RET(__wt_calloc_def(session, 1, &dlh)); + WT_RET(__wt_calloc_one(session, &dlh)); WT_ERR(__wt_strdup(session, path, &dlh->name)); /* NULL means load from the current binary */ diff --git a/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c b/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c index 9c9907bd8be..a33ab4e5c37 100644 --- a/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c +++ b/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c @@ -21,7 +21,7 @@ __wt_cond_alloc(WT_SESSION_IMPL *session, * !!! * This function MUST handle a NULL session handle. */ - WT_RET(__wt_calloc(session, 1, sizeof(WT_CONDVAR), &cond)); + WT_RET(__wt_calloc_one(session, &cond)); InitializeCriticalSection(&cond->mtx); diff --git a/src/third_party/wiredtiger/src/os_win/os_open.c b/src/third_party/wiredtiger/src/os_win/os_open.c index 6bdbaa3f065..f9d47c5be5d 100644 --- a/src/third_party/wiredtiger/src/os_win/os_open.c +++ b/src/third_party/wiredtiger/src/os_win/os_open.c @@ -130,7 +130,7 @@ __wt_open(WT_SESSION_IMPL *session, "open failed for secondary handle: %s", path); setupfh: - WT_ERR(__wt_calloc(session, 1, sizeof(WT_FH), &fh)); + WT_ERR(__wt_calloc_one(session, &fh)); WT_ERR(__wt_strdup(session, name, &fh->name)); fh->filehandle = filehandle; fh->filehandle_secondary = filehandle_secondary; diff --git a/src/third_party/wiredtiger/src/packing/pack_stream.c b/src/third_party/wiredtiger/src/packing/pack_stream.c index efbbd5d9adb..a35a3555458 100644 --- a/src/third_party/wiredtiger/src/packing/pack_stream.c +++ b/src/third_party/wiredtiger/src/packing/pack_stream.c @@ -30,7 +30,7 @@ wiredtiger_pack_start(WT_SESSION *wt_session, WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; - WT_RET(__wt_calloc_def(session, 1, &ps)); + WT_RET(__wt_calloc_one(session, &ps)); WT_ERR(__pack_init(session, &ps->pack, format)); ps->p = ps->start = buffer; ps->end = ps->p + len; diff --git a/src/third_party/wiredtiger/src/reconcile/rec_track.c b/src/third_party/wiredtiger/src/reconcile/rec_track.c index 92282393a23..fdf8ee6d68b 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_track.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_track.c @@ -21,7 +21,7 @@ static int __ovfl_track_init(WT_SESSION_IMPL *session, WT_PAGE *page) { - return (__wt_calloc_def(session, 1, &page->modify->ovfl_track)); + return (__wt_calloc_one(session, &page->modify->ovfl_track)); } /* diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index c72447ae841..839ab028afd 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -96,16 +96,15 @@ typedef struct { * image size. * * First, the sizes of the page we're building. If WiredTiger is doing - * page layout, page_size is the same as page_size_max. We accumulate - * the maximum page size of raw data and when we reach that size, we - * split the page into multiple chunks, eventually compressing those - * chunks. When the application is doing page layout (raw compression - * is configured), page_size can continue to grow past page_size_max, - * and we keep accumulating raw data until the raw compression callback - * accepts it. + * page layout, page_size is the same as page_size_orig. We accumulate + * a "page size" of raw data and when we reach that size, we split the + * page into multiple chunks, eventually compressing those chunks. When + * the application is doing page layout (raw compression is configured), + * page_size can continue to grow past page_size_orig, and we keep + * accumulating raw data until the raw compression callback accepts it. */ - uint32_t page_size; /* Current page size */ - uint32_t page_size_max; /* Maximum on-disk page size */ + uint32_t page_size; /* Set page size */ + uint32_t page_size_orig; /* Saved set page size */ /* * Second, the split size: if we're doing the page layout, split to a @@ -202,9 +201,8 @@ typedef struct { * because we've already been forced to split. */ enum { SPLIT_BOUNDARY=0, /* Next: a split page boundary */ - SPLIT_MAX=1, /* Next: the maximum page boundary */ - SPLIT_TRACKING_OFF=2, /* No boundary checks */ - SPLIT_TRACKING_RAW=3 } /* Underlying compression decides */ + SPLIT_TRACKING_OFF=1, /* No boundary checks */ + SPLIT_TRACKING_RAW=2 } /* Underlying compression decides */ bnd_state; /* @@ -591,7 +589,7 @@ __rec_write_init(WT_SESSION_IMPL *session, page = ref->page; if ((r = *(WT_RECONCILE **)reconcilep) == NULL) { - WT_RET(__wt_calloc_def(session, 1, &r)); + WT_RET(__wt_calloc_one(session, &r)); *(WT_RECONCILE **)reconcilep = r; session->reconcile_cleanup = __rec_destroy_session; @@ -1284,7 +1282,7 @@ __rec_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size) */ WT_ASSERT(session, r->space_avail >= size); WT_ASSERT(session, - WT_BLOCK_FITS(r->first_free, size, r->dsk.mem, r->page_size)); + WT_BLOCK_FITS(r->first_free, size, r->dsk.mem, r->dsk.memsize)); r->entries += v; r->space_avail -= size; @@ -1543,6 +1541,37 @@ __rec_split_bnd_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r) } /* + * __wt_split_page_size -- + * Split page size calculation: we don't want to repeatedly split every + * time a new entry is added, so we split to a smaller-than-maximum page size. + */ +uint32_t +__wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) +{ + uintmax_t a; + uint32_t split_size; + + /* + * Ideally, the split page size is some percentage of the maximum page + * size rounded to an allocation unit (round to an allocation unit so + * we don't waste space when we write). + */ + a = maxpagesize; /* Don't overflow. */ + split_size = (uint32_t) + WT_ALIGN((a * (u_int)btree->split_pct) / 100, btree->allocsize); + + /* + * If the result of that calculation is the same as the allocation unit + * (that happens if the maximum size is the same size as an allocation + * unit, use a percentage of the maximum page size). + */ + if (split_size == btree->allocsize) + split_size = (uint32_t)((a * (u_int)btree->split_pct) / 100); + + return (split_size); +} + +/* * __rec_split_init -- * Initialization for the reconciliation split functions. */ @@ -1576,7 +1605,7 @@ __rec_split_init(WT_SESSION_IMPL *session, * we don't want to increment our way up to the amount of data needed by * the application to successfully compress to the target page size. */ - r->page_size = r->page_size_max = max; + r->page_size = r->page_size_orig = max; if (r->raw_compression) r->page_size *= 10; @@ -1632,11 +1661,11 @@ __rec_split_init(WT_SESSION_IMPL *session, r->space_avail = r->page_size - WT_PAGE_HEADER_BYTE_SIZE(btree); } else if (page->type == WT_PAGE_COL_FIX) { - r->split_size = r->page_size_max; + r->split_size = r->page_size; r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); } else { - r->split_size = __wt_split_page_size(btree, r->page_size_max); + r->split_size = __wt_split_page_size(btree, r->page_size); r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); } @@ -1853,17 +1882,45 @@ err: __wt_scr_free(&update); } /* + * __rec_split_grow -- + * Grow the split buffer. + */ +static int +__rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len) +{ + WT_BM *bm; + WT_BTREE *btree; + size_t corrected_page_size, len; + + btree = S2BT(session); + bm = btree->bm; + + len = WT_PTRDIFF(r->first_free, r->dsk.mem); + corrected_page_size = len + add_len; + WT_RET(bm->write_size(bm, session, &corrected_page_size)); + WT_RET(__wt_buf_grow(session, &r->dsk, corrected_page_size)); + r->first_free = (uint8_t *)r->dsk.mem + len; + WT_ASSERT(session, corrected_page_size >= len); + r->space_avail = corrected_page_size - len; + WT_ASSERT(session, r->space_avail >= add_len); + return (0); +} + +/* * __rec_split -- * Handle the page reconciliation bookkeeping. (Did you know "bookkeeper" * has 3 doubled letters in a row? Sweet-tooth does, too.) */ static int -__rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r) +__rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) { - WT_BTREE *btree; WT_BOUNDARY *last, *next; + WT_BTREE *btree; WT_PAGE_HEADER *dsk; - uint32_t len; + size_t len; + + btree = S2BT(session); + dsk = r->dsk.mem; /* * We should never split during salvage, and we're about to drop core @@ -1874,45 +1931,20 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r) "%s page too large, attempted split during salvage", __wt_page_type_string(r->page->type)); - /* - * Handle page-buffer size tracking; we have to do this work in every - * reconciliation loop, and I don't want to repeat the code that many - * times. - */ - btree = S2BT(session); - dsk = r->dsk.mem; - /* Hitting a page boundary resets the dictionary, in all cases. */ __rec_dictionary_reset(r); - /* - * There are 3 cases we have to handle. - * - * #1 - * About to cross a split boundary: save current boundary information - * and return. - * - * #2 - * About to cross the maximum boundary: use saved boundary information - * to write all of the split pages. - * - * #3 - * About to cross a split boundary, but we've either already done the - * split thing when we approached the maximum boundary, in which - * case we write the page and keep going, or we were never tracking - * split boundaries at all. - * - * Cases #1 and #2 are the hard ones: we're called when we're about to - * cross each split boundary, and we save information away so we can - * split if we have to. We're also called when we're about to cross - * the maximum page boundary: in that case, we do the actual split and - * clean up all the previous boundaries, then keep going. - */ switch (r->bnd_state) { - case SPLIT_BOUNDARY: /* Case #1 */ + case SPLIT_BOUNDARY: + /* We can get here if the first key/value pair won't fit. */ + if (r->entries == 0) + break; + /* - * Save the information about where we are when the split would - * have happened. + * About to cross a split boundary but not yet forced to split + * into multiple pages. If we have to split, this is one of the + * split points, save information about where we are when the + * split would have happened. */ WT_RET(__rec_split_bnd_grow(session, r)); last = &r->bnd[r->bnd_next++]; @@ -1939,37 +1971,50 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r) /* * Set the space available to another split-size chunk, if we * have one. If we don't have room for another split chunk, - * add whatever space remains in the maximum page size, and - * hope it's enough. + * add whatever space remains in this page. */ len = WT_PTRDIFF32(r->first_free, dsk); if (len + r->split_size <= r->page_size) r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); else { - r->bnd_state = SPLIT_MAX; + WT_ASSERT(session, r->page_size >= + (WT_PAGE_HEADER_BYTE_SIZE(btree) + len)); r->space_avail = r->page_size - (WT_PAGE_HEADER_BYTE_SIZE(btree) + len); } - break; - case SPLIT_MAX: /* Case #2 */ + + /* If the next object fits into this page, we're good to go. */ + if (r->space_avail >= next_len) + return (0); + /* - * It didn't all fit into a single page. + * We're going to have to split and create multiple pages. * * Cycle through the saved split-point information, writing the - * split chunks we have tracked. + * split chunks we have tracked. The underlying fixup function + * sets the space available and other information, and copied + * any unwritten chunk of data to the beginning of the buffer. */ WT_RET(__rec_split_fixup(session, r)); - - /* We're done saving split chunks. */ - r->bnd_state = SPLIT_TRACKING_OFF; break; - case SPLIT_TRACKING_OFF: /* Case #3 */ + case SPLIT_TRACKING_OFF: + /* + * We can get here if the first key/value pair won't fit. + * Additionally, grow the buffer to contain the current data if + * we haven't already consumed a reasonable portion of the page. + */ + if (r->entries == 0) + break; + if (WT_PTRDIFF(r->first_free, r->dsk.mem) < r->page_size / 2) + break; + /* - * It didn't all fit, but either we've already noticed it and - * are now processing the rest of the page at the split-size - * boundaries, or the split size was the same as the page size, - * so we never bothered with saving split-point information. + * The key/value pairs didn't fit into a single page, but either + * we've already noticed that and are now processing the rest of + * the pairs at split size boundaries, or the split size was the + * same as the page size, and we never bothered with split point + * information at all. */ WT_RET(__rec_split_bnd_grow(session, r)); last = &r->bnd[r->bnd_next++]; @@ -2007,6 +2052,24 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r) case SPLIT_TRACKING_RAW: WT_ILLEGAL_VALUE(session); } + + /* + * Overflow values can be larger than the maximum page size but still be + * "on-page". If the next key/value pair is larger than space available + * after a split has happened (in other words, larger than the maximum + * page size), create a page sized to hold that one key/value pair. This + * generally splits the page into key/value pairs before a large object, + * the object, and key/value pairs after the object. It's possible other + * key/value pairs will also be aggregated onto the bigger page before + * or after, if the page happens to hold them, but it won't necessarily + * happen that way. + */ + if (r->space_avail < next_len) + WT_RET(__rec_split_grow(session, r, next_len)); + + /* We're done saving split chunks. */ + r->bnd_state = SPLIT_TRACKING_OFF; + return (0); } @@ -2015,8 +2078,8 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r) * Handle the raw compression page reconciliation bookkeeping. */ static int -__rec_split_raw_worker( - WT_SESSION_IMPL *session, WT_RECONCILE *r, int no_more_rows) +__rec_split_raw_worker(WT_SESSION_IMPL *session, + WT_RECONCILE *r, size_t next_len, int no_more_rows) { WT_BM *bm; WT_BOUNDARY *last, *next; @@ -2048,6 +2111,12 @@ __rec_split_raw_worker( next = last + 1; /* + * We can get here if the first key/value pair won't fit. + */ + if (r->entries == 0) + goto split_grow; + + /* * Build arrays of offsets and cumulative counts of cells and rows in * the page: the offset is the byte offset to the possible split-point * (adjusted for an initial chunk that cannot be compressed), entries @@ -2150,27 +2219,29 @@ __rec_split_raw_worker( WT_STORE_SIZE(WT_PTRDIFF(cell, dsk) - WT_BLOCK_COMPRESS_SKIP); /* - * Allocate a destination buffer. If there's a pre-size function, use - * it to determine the destination buffer's minimum size, otherwise the - * destination buffer is documented to be at least the maximum object - * size. + * Allocate a destination buffer. If there's a pre-size function, call + * it to determine the destination buffer's size, else the destination + * buffer is documented to be at least the source size. (We can't use + * the target page size, any single key/value could be larger than the + * page size. Don't bother figuring out a minimum, just use the source + * size.) * - * The destination buffer really only needs to be large enough for the - * target block size, corrected for the requirements of the underlying - * block manager. If the target block size is 8KB, that's a multiple - * of 512B and so the underlying block manager is fine with it. But... - * we don't control what the pre_size method returns us as a required - * size, and we don't want to document the compress_raw method has to - * skip bytes in the buffer because that's confusing, so do something - * more complicated. First, find out how much space the compress_raw - * function might need, either the value returned from pre_size, or the - * maximum object size. Add the compress-skip bytes, and then correct - * that value for the underlying block manager. As a result, we have - * a destination buffer that's the right "object" size when calling the - * compress_raw method, and there are bytes in the header just for us. + * The destination buffer needs to be large enough for the final block + * size, corrected for the requirements of the underlying block manager. + * If the final block size is 8KB, that's a multiple of 512B and so the + * underlying block manager is fine with it. But... we don't control + * what the pre_size method returns us as a required size, and we don't + * want to document the compress_raw method has to skip bytes in the + * buffer because that's confusing, so do something more complicated. + * First, find out how much space the compress_raw function might need, + * either the value returned from pre_size, or the initial source size. + * Add the compress-skip bytes, and then correct that value for the + * underlying block manager. As a result, we have a destination buffer + * that's large enough when calling the compress_raw method, and there + * are bytes in the header just for us. */ if (compressor->pre_size == NULL) - result_len = r->page_size_max; + result_len = (size_t)r->raw_offsets[slots]; else WT_RET(compressor->pre_size(compressor, wt_session, (uint8_t *)dsk + WT_BLOCK_COMPRESS_SKIP, @@ -2185,7 +2256,7 @@ __rec_split_raw_worker( */ memcpy(dst->mem, dsk, WT_BLOCK_COMPRESS_SKIP); ret = compressor->compress_raw(compressor, wt_session, - r->page_size_max, btree->split_pct, + r->page_size_orig, btree->split_pct, WT_BLOCK_COMPRESS_SKIP, (uint8_t *)dsk + WT_BLOCK_COMPRESS_SKIP, r->raw_offsets, slots, (uint8_t *)dst->mem + WT_BLOCK_COMPRESS_SKIP, @@ -2296,15 +2367,16 @@ no_slots: * Note use of memmove, the source and destination buffers can * overlap. */ - len = WT_PTRDIFF(r->first_free, (uint8_t *)dsk + - r->raw_offsets[result_slots] + WT_BLOCK_COMPRESS_SKIP); + len = WT_PTRDIFF( + r->first_free, (uint8_t *)dsk + dsk_dst->mem_size); dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); (void)memmove(dsk_start, (uint8_t *)r->first_free - len, len); r->entries -= r->raw_entries[result_slots - 1]; r->first_free = dsk_start + len; - r->space_avail = - r->page_size - (WT_PAGE_HEADER_BYTE_SIZE(btree) + len); + r->space_avail += r->raw_offsets[result_slots]; + WT_ASSERT(session, r->first_free + r->space_avail <= + (uint8_t *)r->dsk.mem + r->dsk.memsize); /* * Set the key for the next block (before writing the block, a @@ -2358,15 +2430,14 @@ no_slots: */ WT_STAT_FAST_DATA_INCR(session, compress_raw_fail_temporary); - len = WT_PTRDIFF(r->first_free, r->dsk.mem); - corrected_page_size = r->page_size * 2; - WT_RET(bm->write_size(bm, session, &corrected_page_size)); - WT_RET(__wt_buf_grow(session, &r->dsk, corrected_page_size)); +split_grow: /* + * Double the page size and make sure we accommodate at least + * one more record. The reason for the latter is that we may + * be here because there's a large key/value pair that won't + * fit in our initial page buffer, even at its expanded size. + */ r->page_size *= 2; - r->first_free = (uint8_t *)r->dsk.mem + len; - r->space_avail = - r->page_size - (WT_PAGE_HEADER_BYTE_SIZE(btree) + len); - return (0); + return (__rec_split_grow(session, r, r->page_size + next_len)); } /* We have a block, update the boundary counter. */ @@ -2438,9 +2509,9 @@ err: __wt_scr_free(&tmp); * Raw compression split routine. */ static inline int -__rec_split_raw(WT_SESSION_IMPL *session, WT_RECONCILE *r) +__rec_split_raw(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) { - return (__rec_split_raw_worker(session, r, 0)); + return (__rec_split_raw_worker(session, r, next_len, 0)); } /* @@ -2456,7 +2527,6 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) /* Adjust the boundary information based on our split status. */ switch (r->bnd_state) { case SPLIT_BOUNDARY: - case SPLIT_MAX: /* * We never split, the reconciled page fit into a maximum page * size. Change the first boundary slot to represent the full @@ -2516,7 +2586,7 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r) /* We're done reconciling - write the final page */ if (r->raw_compression && r->entries != 0) { while (r->entries != 0) - WT_RET(__rec_split_raw_worker(session, r, 1)); + WT_RET(__rec_split_raw_worker(session, r, 0, 1)); } else WT_RET(__rec_split_finish_std(session, r)); @@ -2553,7 +2623,7 @@ __rec_split_fixup(WT_SESSION_IMPL *session, WT_RECONCILE *r) * WT_PAGE_HEADER header onto the scratch buffer, most of the header * information remains unchanged between the pages. */ - WT_RET(__wt_scr_alloc(session, r->page_size_max, &tmp)); + WT_RET(__wt_scr_alloc(session, r->page_size, &tmp)); dsk = tmp->mem; memcpy(dsk, r->dsk.mem, WT_PAGE_HEADER_SIZE); @@ -2595,8 +2665,10 @@ __rec_split_fixup(WT_SESSION_IMPL *session, WT_RECONCILE *r) r->entries -= r->total_entries; r->first_free = dsk_start + len; + WT_ASSERT(session, + r->page_size >= (WT_PAGE_HEADER_BYTE_SIZE(btree) + len)); r->space_avail = - (r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree)) - len; + r->split_size - (WT_PAGE_HEADER_BYTE_SIZE(btree) + len); err: __wt_scr_free(&tmp); return (ret); @@ -2905,17 +2977,17 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) cursor->value.data, cursor->value.size, (uint64_t)0)); /* Boundary: split or write the page. */ - while (key->len + val->len > r->space_avail) + if (key->len + val->len > r->space_avail) { if (r->raw_compression) - WT_RET(__rec_split_raw(session, r)); + WT_RET( + __rec_split_raw(session, r, key->len + val->len)); else { - WT_RET(__rec_split(session, r)); + WT_RET(__rec_split(session, r, key->len + val->len)); /* * Turn off prefix compression until a full key written - * to the new page, and (unless we're already working - * with an overflow key), rebuild the key without prefix - * compression. + * to the new page, and (unless already working with an + * overflow key), rebuild the key without compression. */ if (r->key_pfx_compress_conf) { r->key_pfx_compress = 0; @@ -2924,6 +2996,7 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) session, r, NULL, 0, &ovfl_key)); } } + } /* Copy the key/value pair onto the page. */ __rec_copy_incr(session, r, key); @@ -2968,7 +3041,7 @@ __rec_col_fix_bulk_insert_split_check(WT_CURSOR_BULK *cbulk) __rec_incr(session, r, cbulk->entry, __bitstr_size( (size_t)cbulk->entry * btree->bitcnt)); - WT_RET(__rec_split(session, r)); + WT_RET(__rec_split(session, r, 0)); } cbulk->entry = 0; cbulk->nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail); @@ -3048,11 +3121,10 @@ __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) session, r, cbulk->last.data, cbulk->last.size, cbulk->rle)); /* Boundary: split or write the page. */ - while (val->len > r->space_avail) - if (r->raw_compression) - WT_RET(__rec_split_raw(session, r)); - else - WT_RET(__rec_split(session, r)); + if (val->len > r->space_avail) + WT_RET(r->raw_compression ? + __rec_split_raw(session, r, val->len) : + __rec_split(session, r, val->len)); /* Copy the value onto the page. */ if (btree->dictionary) @@ -3171,11 +3243,10 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) CHILD_RELEASE_ERR(session, hazard, ref); /* Boundary: split or write the page. */ - while (val->len > r->space_avail) - if (r->raw_compression) - WT_ERR(__rec_split_raw(session, r)); - else - WT_ERR(__rec_split(session, r)); + if (val->len > r->space_avail) + WT_ERR(r->raw_compression ? + __rec_split_raw(session, r, val->len) : + __rec_split(session, r, val->len)); /* Copy the value onto the page. */ __rec_copy_incr(session, r, val); @@ -3217,11 +3288,10 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) addr->addr, addr->size, __rec_vtype(addr), r->recno); /* Boundary: split or write the page. */ - while (val->len > r->space_avail) - if (r->raw_compression) - WT_RET(__rec_split_raw(session, r)); - else - WT_RET(__rec_split(session, r)); + if (val->len > r->space_avail) + WT_RET(r->raw_compression ? + __rec_split_raw(session, r, val->len) : + __rec_split(session, r, val->len)); /* Copy the value onto the page. */ __rec_copy_incr(session, r, val); @@ -3298,7 +3368,7 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) */ __rec_incr(session, r, entry, __bitstr_size((size_t)entry * btree->bitcnt)); - WT_RET(__rec_split(session, r)); + WT_RET(__rec_split(session, r, 0)); /* Calculate the number of entries per page. */ entry = 0; @@ -3442,11 +3512,10 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, session, r, value->data, value->size, rle)); /* Boundary: split or write the page. */ - while (val->len > r->space_avail) - if (r->raw_compression) - WT_RET(__rec_split_raw(session, r)); - else - WT_RET(__rec_split(session, r)); + if (val->len > r->space_avail) + WT_RET(r->raw_compression ? + __rec_split_raw(session, r, val->len) : + __rec_split(session, r, val->len)); /* Copy the value onto the page. */ if (!deleted && !overflow_type && btree->dictionary) @@ -4034,24 +4103,25 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) r->cell_zero = 0; /* Boundary: split or write the page. */ - while (key->len + val->len > r->space_avail) { - if (r->raw_compression) { - WT_ERR(__rec_split_raw(session, r)); - continue; - } - - /* - * In one path above, we copied address blocks from the - * page rather than building the actual key. In that - * case, we have to build the actual key now because we - * are about to promote it. - */ - if (key_onpage_ovfl) { - WT_ERR(__wt_buf_set(session, - r->cur, WT_IKEY_DATA(ikey), ikey->size)); - key_onpage_ovfl = 0; + if (key->len + val->len > r->space_avail) { + if (r->raw_compression) + WT_ERR(__rec_split_raw( + session, r, key->len + val->len)); + else { + /* + * In one path above, we copied address blocks + * from the page rather than building the actual + * key. In that case, we have to build the key + * now because we are about to promote it. + */ + if (key_onpage_ovfl) { + WT_ERR(__wt_buf_set(session, r->cur, + WT_IKEY_DATA(ikey), ikey->size)); + key_onpage_ovfl = 0; + } + WT_ERR(__rec_split( + session, r, key->len + val->len)); } - WT_ERR(__rec_split(session, r)); } /* Copy the key and value onto the page. */ @@ -4102,11 +4172,10 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) r, addr->addr, addr->size, __rec_vtype(addr), 0); /* Boundary: split or write the page. */ - while (key->len + val->len > r->space_avail) - if (r->raw_compression) - WT_RET(__rec_split_raw(session, r)); - else - WT_RET(__rec_split(session, r)); + if (key->len + val->len > r->space_avail) + WT_RET(r->raw_compression ? + __rec_split_raw(session, r, key->len + val->len) : + __rec_split(session, r, key->len + val->len)); /* Copy the key and value onto the page. */ __rec_copy_incr(session, r, key); @@ -4140,7 +4209,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session, size_t size; uint64_t slvg_skip; uint32_t i; - int dictionary, onpage_ovfl, ovfl_key; + int dictionary, key_onpage_ovfl, ovfl_key; const void *p; void *copy; @@ -4369,9 +4438,9 @@ __rec_row_leaf(WT_SESSION_IMPL *session, * If the key is an overflow key that hasn't been removed, use * the original backing blocks. */ - onpage_ovfl = kpack != NULL && + key_onpage_ovfl = kpack != NULL && kpack->ovfl && kpack->raw != WT_CELL_KEY_OVFL_RM; - if (onpage_ovfl) { + if (key_onpage_ovfl) { key->buf.data = cell; key->buf.size = __wt_cell_total_len(kpack); key->cell_len = 0; @@ -4435,36 +4504,39 @@ build: } /* Boundary: split or write the page. */ - while (key->len + val->len > r->space_avail) { - if (r->raw_compression) { - WT_ERR(__rec_split_raw(session, r)); - continue; - } - - /* - * In one path above, we copied address blocks from the - * page rather than building the actual key. In that - * case, we have to build the actual key now because we - * are about to promote it. - */ - if (onpage_ovfl) { - WT_ERR(__wt_dsk_cell_data_ref( - session, WT_PAGE_ROW_LEAF, kpack, r->cur)); - onpage_ovfl = 0; - } - WT_ERR(__rec_split(session, r)); + if (key->len + val->len > r->space_avail) { + if (r->raw_compression) + WT_ERR(__rec_split_raw( + session, r, key->len + val->len)); + else { + /* + * In one path above, we copied address blocks + * from the page rather than building the actual + * key. In that case, we have to build the key + * now because we are about to promote it. + */ + if (key_onpage_ovfl) { + WT_ERR(__wt_dsk_cell_data_ref(session, + WT_PAGE_ROW_LEAF, kpack, r->cur)); + key_onpage_ovfl = 0; + } + WT_ERR(__rec_split( + session, r, key->len + val->len)); - /* - * Turn off prefix compression until a full key written - * to the new page, and (unless we're already working - * with an overflow key), rebuild the key without prefix - * compression. - */ - if (r->key_pfx_compress_conf) { - r->key_pfx_compress = 0; - if (!ovfl_key) - WT_ERR(__rec_cell_build_leaf_key( - session, r, NULL, 0, &ovfl_key)); + /* + * Turn off prefix compression until a full key + * written to the new page, and (unless already + * working with an overflow key), rebuild the + * key without compression. + */ + if (r->key_pfx_compress_conf) { + r->key_pfx_compress = 0; + if (!ovfl_key) + WT_ERR( + __rec_cell_build_leaf_key( + session, + r, NULL, 0, &ovfl_key)); + } } } @@ -4529,24 +4601,28 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins), &ovfl_key)); /* Boundary: split or write the page. */ - while (key->len + val->len > r->space_avail) { - if (r->raw_compression) { - WT_RET(__rec_split_raw(session, r)); - continue; - } - WT_RET(__rec_split(session, r)); + if (key->len + val->len > r->space_avail) { + if (r->raw_compression) + WT_RET(__rec_split_raw( + session, r, key->len + val->len)); + else { + WT_RET(__rec_split( + session, r, key->len + val->len)); - /* - * Turn off prefix compression until a full key written - * to the new page, and (unless we're already working - * with an overflow key), rebuild the key without prefix - * compression. - */ - if (r->key_pfx_compress_conf) { - r->key_pfx_compress = 0; - if (!ovfl_key) - WT_RET(__rec_cell_build_leaf_key( - session, r, NULL, 0, &ovfl_key)); + /* + * Turn off prefix compression until a full key + * written to the new page, and (unless already + * working with an overflow key), rebuild the + * key without compression. + */ + if (r->key_pfx_compress_conf) { + r->key_pfx_compress = 0; + if (!ovfl_key) + WT_RET( + __rec_cell_build_leaf_key( + session, + r, NULL, 0, &ovfl_key)); + } } } @@ -5064,7 +5140,7 @@ __rec_cell_build_int_key(WT_SESSION_IMPL *session, WT_RET(__wt_buf_set(session, &key->buf, data, size)); /* Create an overflow object if the data won't fit. */ - if (size > btree->maxintlitem) { + if (size > btree->maxintlkey) { WT_STAT_FAST_DATA_INCR(session, rec_overflow_key_internal); *is_ovflp = 1; @@ -5159,7 +5235,7 @@ __rec_cell_build_leaf_key(WT_SESSION_IMPL *session, key->buf.data, (uint32_t)key->buf.size, &key->buf)); /* Create an overflow object if the data won't fit. */ - if (key->buf.size > btree->maxleafitem) { + if (key->buf.size > btree->maxleafkey) { /* * Overflow objects aren't prefix compressed -- rebuild any * object that was prefix compressed. @@ -5246,7 +5322,7 @@ __rec_cell_build_val(WT_SESSION_IMPL *session, val->buf.data, (uint32_t)val->buf.size, &val->buf)); /* Create an overflow object if the data won't fit. */ - if (val->buf.size > btree->maxleafitem) { + if (val->buf.size > btree->maxleafvalue) { WT_STAT_FAST_DATA_INCR(session, rec_overflow_value); return (__rec_cell_build_ovfl( diff --git a/src/third_party/wiredtiger/src/schema/schema_create.c b/src/third_party/wiredtiger/src/schema/schema_create.c index a2348adb95f..7a1cf757093 100644 --- a/src/third_party/wiredtiger/src/schema/schema_create.c +++ b/src/third_party/wiredtiger/src/schema/schema_create.c @@ -367,8 +367,8 @@ __create_index(WT_SESSION_IMPL *session, ",source=\"%s\"", source)); } - if (__wt_config_getones(session, config, "extractor", &cval) == 0 && - cval.len != 0) { + if (__wt_config_getones_none( + session, config, "extractor", &cval) == 0 && cval.len != 0) { have_extractor = 1; /* Custom extractors must supply a key format. */ if ((ret = __wt_config_getones( @@ -586,8 +586,8 @@ __create_data_source(WT_SESSION_IMPL *session, /* * User-specified collators aren't supported for data-source objects. */ - if (__wt_config_getones( - session, config, "collator", &cval) != WT_NOTFOUND) + if (__wt_config_getones_none( + session, config, "collator", &cval) != WT_NOTFOUND && cval.len != 0) WT_RET_MSG(session, EINVAL, "WT_DATA_SOURCE objects do not support WT_COLLATOR " "ordering"); diff --git a/src/third_party/wiredtiger/src/schema/schema_open.c b/src/third_party/wiredtiger/src/schema/schema_open.c index f5937381cbb..4699fdeee02 100644 --- a/src/third_party/wiredtiger/src/schema/schema_open.c +++ b/src/third_party/wiredtiger/src/schema/schema_open.c @@ -83,7 +83,7 @@ __wt_schema_open_colgroups(WT_SESSION_IMPL *session, WT_TABLE *table) goto err; } - WT_ERR(__wt_calloc_def(session, 1, &colgroup)); + WT_ERR(__wt_calloc_one(session, &colgroup)); WT_ERR(__wt_strndup( session, buf->data, buf->size, &colgroup->name)); colgroup->config = cgconfig; @@ -319,7 +319,7 @@ __wt_schema_open_index(WT_SESSION_IMPL *session, if (table->indices[i] == NULL) { WT_ERR(cursor->get_value(cursor, &idxconf)); - WT_ERR(__wt_calloc_def(session, 1, &idx)); + WT_ERR(__wt_calloc_one(session, &idx)); WT_ERR(__wt_strdup(session, uri, &idx->name)); WT_ERR(__wt_strdup(session, idxconf, &idx->config)); WT_ERR(__open_index(session, table, idx)); @@ -392,7 +392,7 @@ __wt_schema_open_table(WT_SESSION_IMPL *session, WT_ERR(cursor->search(cursor)); WT_ERR(cursor->get_value(cursor, &tconfig)); - WT_ERR(__wt_calloc_def(session, 1, &table)); + WT_ERR(__wt_calloc_one(session, &table)); table->name = tablename; tablename = NULL; table->name_hash = __wt_hash_city64(name, namelen); diff --git a/src/third_party/wiredtiger/src/session/session_dhandle.c b/src/third_party/wiredtiger/src/session/session_dhandle.c index e28e277d5f6..85483c7c8ae 100644 --- a/src/third_party/wiredtiger/src/session/session_dhandle.c +++ b/src/third_party/wiredtiger/src/session/session_dhandle.c @@ -20,7 +20,7 @@ __session_add_dhandle( WT_DATA_HANDLE_CACHE *dhandle_cache; uint64_t bucket; - WT_RET(__wt_calloc_def(session, 1, &dhandle_cache)); + WT_RET(__wt_calloc_one(session, &dhandle_cache)); dhandle_cache->dhandle = session->dhandle; bucket = dhandle_cache->dhandle->name_hash % WT_HASH_ARRAY_SIZE; diff --git a/src/third_party/wiredtiger/src/support/huffman.c b/src/third_party/wiredtiger/src/support/huffman.c index 5a06b72d33e..9625e879381 100644 --- a/src/third_party/wiredtiger/src/support/huffman.c +++ b/src/third_party/wiredtiger/src/support/huffman.c @@ -306,7 +306,7 @@ __wt_huffman_open(WT_SESSION_IMPL *session, combined_nodes = leaves = NULL; node = node2 = tempnode = NULL; - WT_RET(__wt_calloc_def(session, 1, &huffman)); + WT_RET(__wt_calloc_one(session, &huffman)); /* * The frequency table is 4B pairs of symbol and frequency. The symbol @@ -381,8 +381,8 @@ __wt_huffman_open(WT_SESSION_IMPL *session, symcnt, sizeof(INDEXED_SYMBOL), indexed_freq_compare); /* We need two node queues to build the tree. */ - WT_ERR(__wt_calloc_def(session, 1, &leaves)); - WT_ERR(__wt_calloc_def(session, 1, &combined_nodes)); + WT_ERR(__wt_calloc_one(session, &leaves)); + WT_ERR(__wt_calloc_one(session, &combined_nodes)); /* * Adding the leaves to the queue. @@ -393,7 +393,7 @@ __wt_huffman_open(WT_SESSION_IMPL *session, */ for (i = 0; i < symcnt; ++i) if (indexed_freqs[i].frequency > 0) { - WT_ERR(__wt_calloc_def(session, 1, &tempnode)); + WT_ERR(__wt_calloc_one(session, &tempnode)); tempnode->symbol = (uint8_t)indexed_freqs[i].symbol; tempnode->weight = indexed_freqs[i].frequency; WT_ERR(node_queue_enqueue(session, leaves, tempnode)); @@ -431,7 +431,7 @@ __wt_huffman_open(WT_SESSION_IMPL *session, * In every second run, we have both node and node2 initialized. */ if (node != NULL && node2 != NULL) { - WT_ERR(__wt_calloc_def(session, 1, &tempnode)); + WT_ERR(__wt_calloc_one(session, &tempnode)); /* The new weight is the sum of the two weights. */ tempnode->weight = node->weight + node2->weight; @@ -845,7 +845,7 @@ node_queue_enqueue( NODE_QUEUE_ELEM *elem; /* Allocating a new linked list element */ - WT_RET(__wt_calloc_def(session, 1, &elem)); + WT_RET(__wt_calloc_one(session, &elem)); /* It holds the tree node, and has no next element yet */ elem->node = node; diff --git a/src/third_party/wiredtiger/src/support/scratch.c b/src/third_party/wiredtiger/src/support/scratch.c index ca2cdac8377..e4df04a36ed 100644 --- a/src/third_party/wiredtiger/src/support/scratch.c +++ b/src/third_party/wiredtiger/src/support/scratch.c @@ -216,7 +216,7 @@ __wt_scr_alloc_func(WT_SESSION_IMPL *session, size_t size, WT_ITEM **scratchp WT_ASSERT(session, slot != NULL); best = slot; - WT_ERR(__wt_calloc_def(session, 1, best)); + WT_ERR(__wt_calloc_one(session, best)); /* Scratch buffers must be aligned. */ F_SET(*best, WT_ITEM_ALIGNED); diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index 21d56238f4a..265a2673d7c 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -30,11 +30,11 @@ __wt_stat_init_dsrc_stats(WT_DSRC_STATS *stats) stats->btree_column_variable.desc = "btree: column-store variable-size leaf pages"; stats->btree_fixed_len.desc = "btree: fixed-record size"; - stats->btree_maxintlitem.desc = - "btree: maximum internal page item size"; + stats->btree_maxintlkey.desc = "btree: maximum internal page key size"; stats->btree_maxintlpage.desc = "btree: maximum internal page size"; - stats->btree_maxleafitem.desc = "btree: maximum leaf page item size"; + stats->btree_maxleafkey.desc = "btree: maximum leaf page key size"; stats->btree_maxleafpage.desc = "btree: maximum leaf page size"; + stats->btree_maxleafvalue.desc = "btree: maximum leaf page value size"; stats->btree_maximum_depth.desc = "btree: maximum tree depth"; stats->btree_entries.desc = "btree: number of key/value pairs"; stats->btree_overflow.desc = "btree: overflow pages"; @@ -154,10 +154,11 @@ __wt_stat_refresh_dsrc_stats(void *stats_arg) stats->btree_column_deleted.v = 0; stats->btree_column_variable.v = 0; stats->btree_fixed_len.v = 0; - stats->btree_maxintlitem.v = 0; + stats->btree_maxintlkey.v = 0; stats->btree_maxintlpage.v = 0; - stats->btree_maxleafitem.v = 0; + stats->btree_maxleafkey.v = 0; stats->btree_maxleafpage.v = 0; + stats->btree_maxleafvalue.v = 0; stats->btree_maximum_depth.v = 0; stats->btree_entries.v = 0; stats->btree_overflow.v = 0; @@ -408,8 +409,11 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats) stats->cursor_search.desc = "cursor: cursor search calls"; stats->cursor_search_near.desc = "cursor: cursor search near calls"; stats->cursor_update.desc = "cursor: cursor update calls"; + stats->dh_conn_ref.desc = + "data-handle: connection candidate referenced"; stats->dh_conn_handles.desc = "data-handle: connection dhandles swept"; stats->dh_conn_sweeps.desc = "data-handle: connection sweeps"; + stats->dh_conn_tod.desc = "data-handle: connection time-of-death sets"; stats->dh_session_handles.desc = "data-handle: session dhandles swept"; stats->dh_session_sweeps.desc = "data-handle: session sweep attempts"; stats->log_slot_closes.desc = "log: consolidated slot closures"; @@ -563,8 +567,10 @@ __wt_stat_refresh_connection_stats(void *stats_arg) stats->cursor_search.v = 0; stats->cursor_search_near.v = 0; stats->cursor_update.v = 0; + stats->dh_conn_ref.v = 0; stats->dh_conn_handles.v = 0; stats->dh_conn_sweeps.v = 0; + stats->dh_conn_tod.v = 0; stats->dh_session_handles.v = 0; stats->dh_session_sweeps.v = 0; stats->log_slot_closes.v = 0; diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index 15cce6eb098..fe9c9eaf89e 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -311,7 +311,6 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) { struct timespec start, stop; WT_CONNECTION_IMPL *conn; - WT_DATA_HANDLE *dhandle; WT_DECL_RET; WT_TXN *txn; WT_TXN_ISOLATION saved_isolation; @@ -327,6 +326,9 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) txn = &session->txn; full = logging = tracking = 0; + /* Ensure the metadata table is open before taking any locks. */ + WT_RET(__wt_metadata_open(session)); + /* * Do a pass over the configuration arguments and figure out what kind * kind of checkpoint this is. @@ -410,16 +412,6 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) if (F_ISSET(conn, WT_CONN_CKPT_SYNC)) WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync)); - /* Checkpoint the metadata file. */ - SLIST_FOREACH(dhandle, &conn->dhlh, l) { - if (WT_IS_METADATA(dhandle) || - !WT_PREFIX_MATCH(dhandle->name, "file:")) - break; - } - if (dhandle == NULL) - WT_ERR_MSG(session, EINVAL, - "checkpoint unable to find open meta-data handle"); - /* * Disable metadata tracking during the metadata checkpoint. * @@ -430,11 +422,12 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) session->isolation = txn->isolation = TXN_ISO_READ_UNCOMMITTED; saved_meta_next = session->meta_track_next; session->meta_track_next = NULL; - WT_WITH_DHANDLE(session, dhandle, ret = __wt_checkpoint(session, cfg)); + WT_WITH_DHANDLE(session, + session->meta_dhandle, ret = __wt_checkpoint(session, cfg)); session->meta_track_next = saved_meta_next; WT_ERR(ret); if (F_ISSET(conn, WT_CONN_CKPT_SYNC)) { - WT_WITH_DHANDLE(session, dhandle, + WT_WITH_DHANDLE(session, session->meta_dhandle, ret = __wt_checkpoint_sync(session, NULL)); WT_ERR(ret); } diff --git a/src/third_party/wiredtiger/src/txn/txn_log.c b/src/third_party/wiredtiger/src/txn/txn_log.c index 73fe35284b5..ae7641020be 100644 --- a/src/third_party/wiredtiger/src/txn/txn_log.c +++ b/src/third_party/wiredtiger/src/txn/txn_log.c @@ -430,7 +430,7 @@ __txn_printlog( p = LOG_SKIP_HEADER(rawrec->data); end = (const uint8_t *)rawrec->data + rawrec->size; logrec = (WT_LOG_RECORD *)rawrec->data; - compressed = F_ISSET(rawrec, WT_LOG_RECORD_COMPRESSED); + compressed = F_ISSET(logrec, WT_LOG_RECORD_COMPRESSED); /* First, peek at the log record type. */ WT_RET(__wt_logrec_read(session, &p, end, &rectype)); diff --git a/src/third_party/wiredtiger/tools/stat_data.py b/src/third_party/wiredtiger/tools/stat_data.py index 50528dbd26a..3d192be7566 100644 --- a/src/third_party/wiredtiger/tools/stat_data.py +++ b/src/third_party/wiredtiger/tools/stat_data.py @@ -34,10 +34,11 @@ no_scale_per_second_list = [ 'btree: column-store variable-size deleted values', 'btree: column-store variable-size leaf pages', 'btree: fixed-record size', - 'btree: maximum internal page item size', + 'btree: maximum internal page key size', 'btree: maximum internal page size', - 'btree: maximum leaf page item size', + 'btree: maximum leaf page key size', 'btree: maximum leaf page size', + 'btree: maximum leaf page value size', 'btree: maximum tree depth', 'btree: number of key/value pairs', 'btree: overflow pages', |