diff options
author | Luke Chen <luke.chen@mongodb.com> | 2018-05-04 13:57:22 +1000 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2018-05-04 13:57:22 +1000 |
commit | 11d837751b0da2cf7948ee17e2f6438f38c93693 (patch) | |
tree | 508b815763591b0d61f457b845a7d49909f1dd8d | |
parent | 381acc4baa6d5f730fb77be5acc39f8473a16b88 (diff) | |
download | mongo-11d837751b0da2cf7948ee17e2f6438f38c93693.tar.gz |
Import wiredtiger: ba76f72622a52024382effb7296e9f8f5cac224b from branch mongodb-3.8
ref: aa6646fd0a..ba76f72622
for: 3.7.10
WT-4023 Add messages to a few places that return errors
WT-4033 Add ability to alter tables without taking exclusive access
WT-4058 Make slot switch quicker when I/O is slow
WT-4064 Relax checking of lookaside entry count
WT-4069 Commit of a truncate can leave a page permanently locked
WT-4070 WiredTiger recovery can checkpoint data referencing pieces of a transaction
23 files changed, 289 insertions, 186 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 9ecd76d76ec..68865afaee3 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -938,7 +938,15 @@ methods = { 'WT_CURSOR.reconfigure' : Method(cursor_runtime_config), -'WT_SESSION.alter' : Method(file_runtime_config), +'WT_SESSION.alter' : Method(file_runtime_config + [ + Config('exclusive_refreshed', 'true', r''' + refresh the in memory state and flush the metadata change to disk, + disabling this flag is dangerous - it will only re-write the + metadata without refreshing the in-memory information or creating + a checkpoint. The update will also only be applied to table URI + entries in the metadata, not their sub-entries.''', + type='boolean', undoc=True), +]), 'WT_SESSION.close' : Method([]), diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 3dcc44b3f0a..aba792cccb7 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -1,5 +1,5 @@ { - "commit": "aa6646fd0a1394793edfcf799f5f41f1d073bc5d", + "commit": "ba76f72622a52024382effb7296e9f8f5cac224b", "github": "wiredtiger/wiredtiger.git", "vendor": "wiredtiger", "branch": "mongodb-3.8" diff --git a/src/third_party/wiredtiger/src/bloom/bloom.c b/src/third_party/wiredtiger/src/bloom/bloom.c index edc68169365..cf4743009ee 100644 --- a/src/third_party/wiredtiger/src/bloom/bloom.c +++ b/src/third_party/wiredtiger/src/bloom/bloom.c @@ -299,13 +299,13 @@ __wt_bloom_hash_get(WT_BLOOM *bloom, WT_BLOOM_HASH *bhash) WT_ERR(c->reset(c)); return (result); -err: /* Don't return WT_NOTFOUND from a failed search. */ - if (ret == WT_NOTFOUND) - ret = WT_ERROR; - if (c != NULL) - (void)c->reset(c); - __wt_err(bloom->session, ret, "Failed lookup in bloom filter"); - return (ret); +err: if (c != NULL) + WT_TRET(c->reset(c)); + + /* Don't return WT_NOTFOUND from a failed cursor open or search. */ + WT_RET_MSG(bloom->session, + ret == WT_NOTFOUND ? WT_ERROR : ret, + "Failed lookup in bloom filter"); } /* diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c index ed3cf6b5943..7dd5177dbf6 100644 --- a/src/third_party/wiredtiger/src/btree/bt_cursor.c +++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c @@ -441,9 +441,9 @@ __cursor_row_modify( */ static void __cursor_restart( - WT_SESSION_IMPL *session, uint64_t *yield_count, uint64_t *sleep_count) + WT_SESSION_IMPL *session, uint64_t *yield_count, uint64_t *sleep_usecs) { - __wt_state_yield_sleep(yield_count, sleep_count); + __wt_state_yield_sleep(yield_count, sleep_usecs); WT_STAT_CONN_INCR(session, cursor_restart); WT_STAT_DATA_INCR(session, cursor_restart); @@ -733,13 +733,13 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; - uint64_t yield_count, sleep_count; + uint64_t yield_count, sleep_usecs; bool append_key, valid; btree = cbt->btree; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; - yield_count = sleep_count = 0; + yield_count = sleep_usecs = 0; WT_STAT_CONN_INCR(session, cursor_insert); WT_STAT_DATA_INCR(session, cursor_insert); @@ -856,7 +856,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); } err: if (ret == WT_RESTART) { - __cursor_restart(session, &yield_count, &sleep_count); + __cursor_restart(session, &yield_count, &sleep_usecs); goto retry; } @@ -919,12 +919,12 @@ __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; - uint64_t yield_count, sleep_count; + uint64_t yield_count, sleep_usecs; cursor = &cbt->iface; btree = cbt->btree; session = (WT_SESSION_IMPL *)cursor->session; - yield_count = sleep_count = 0; + yield_count = sleep_usecs = 0; /* * The pinned page goes away if we do a search, get a local copy of any @@ -946,7 +946,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); WT_ERR(__wt_illegal_value(session, NULL)); err: if (ret == WT_RESTART) { - __cursor_restart(session, &yield_count, &sleep_count); + __cursor_restart(session, &yield_count, &sleep_usecs); goto retry; } @@ -971,13 +971,13 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt) WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; - uint64_t yield_count, sleep_count; + uint64_t yield_count, sleep_usecs; bool iterating, valid; btree = cbt->btree; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; - yield_count = sleep_count = 0; + yield_count = sleep_usecs = 0; iterating = F_ISSET(cbt, WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV); WT_STAT_CONN_INCR(session, cursor_remove); @@ -1110,7 +1110,7 @@ retry: if (positioned == POSITIONED) } err: if (ret == WT_RESTART) { - __cursor_restart(session, &yield_count, &sleep_count); + __cursor_restart(session, &yield_count, &sleep_usecs); goto retry; } @@ -1189,13 +1189,13 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type) WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; - uint64_t yield_count, sleep_count; + uint64_t yield_count, sleep_usecs; bool valid; btree = cbt->btree; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; - yield_count = sleep_count = 0; + yield_count = sleep_usecs = 0; /* It's no longer possible to bulk-load into the tree. */ __cursor_disable_bulk(session, btree); @@ -1287,7 +1287,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); } err: if (ret == WT_RESTART) { - __cursor_restart(session, &yield_count, &sleep_count); + __cursor_restart(session, &yield_count, &sleep_usecs); goto retry; } @@ -1626,9 +1626,9 @@ __cursor_truncate(WT_SESSION_IMPL *session, int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int)) { WT_DECL_RET; - uint64_t yield_count, sleep_count; + uint64_t yield_count, sleep_usecs; - yield_count = sleep_count = 0; + yield_count = sleep_usecs = 0; /* * First, call the cursor search method to re-position the cursor: we @@ -1665,7 +1665,7 @@ retry: WT_ERR(__wt_btcur_search(start)); } err: if (ret == WT_RESTART) { - __cursor_restart(session, &yield_count, &sleep_count); + __cursor_restart(session, &yield_count, &sleep_usecs); goto retry; } @@ -1683,10 +1683,10 @@ __cursor_truncate_fix(WT_SESSION_IMPL *session, int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int)) { WT_DECL_RET; - uint64_t yield_count, sleep_count; + uint64_t yield_count, sleep_usecs; const uint8_t *value; - yield_count = sleep_count = 0; + yield_count = sleep_usecs = 0; /* * Handle fixed-length column-store objects separately: for row-store @@ -1725,7 +1725,7 @@ retry: WT_ERR(__wt_btcur_search(start)); } err: if (ret == WT_RESTART) { - __cursor_restart(session, &yield_count, &sleep_count); + __cursor_restart(session, &yield_count, &sleep_usecs); goto retry; } diff --git a/src/third_party/wiredtiger/src/btree/bt_delete.c b/src/third_party/wiredtiger/src/btree/bt_delete.c index cb283fc0f92..533e0990918 100644 --- a/src/third_party/wiredtiger/src/btree/bt_delete.c +++ b/src/third_party/wiredtiger/src/btree/bt_delete.c @@ -173,7 +173,7 @@ int __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref) { WT_UPDATE **updp; - uint64_t sleep_count, yield_count; + uint64_t sleep_usecs, yield_count; uint32_t current_state; bool locked; @@ -183,7 +183,7 @@ __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref) * instantiated or being instantiated. Loop because it's possible for * the page to return to the deleted state if instantiation fails. */ - for (locked = false, sleep_count = yield_count = 0;;) { + for (locked = false, sleep_usecs = yield_count = 0;;) { switch (current_state = ref->state) { case WT_REF_DELETED: /* @@ -222,9 +222,9 @@ __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref) * and if we've yielded enough times, start sleeping so we * don't burn CPU to no purpose. */ - __wt_state_yield_sleep(&yield_count, &sleep_count); + __wt_state_yield_sleep(&yield_count, &sleep_usecs); WT_STAT_CONN_INCRV(session, - page_del_rollback_blocked, sleep_count); + page_del_rollback_blocked, sleep_usecs); } /* diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c index 32de6d60566..1299c0ab4e8 100644 --- a/src/third_party/wiredtiger/src/btree/bt_read.c +++ b/src/third_party/wiredtiger/src/btree/bt_read.c @@ -552,7 +552,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags WT_BTREE *btree; WT_DECL_RET; WT_PAGE *page; - uint64_t sleep_cnt, wait_cnt; + uint64_t sleep_usecs, yield_cnt; uint32_t current_state; int force_attempts; bool busy, cache_work, did_read, stalled, wont_need; @@ -579,7 +579,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags } for (did_read = wont_need = stalled = false, - force_attempts = 0, sleep_cnt = wait_cnt = 0;;) { + force_attempts = 0, sleep_usecs = yield_cnt = 0;;) { switch (current_state = ref->state) { case WT_REF_DELETED: if (LF_ISSET(WT_READ_DELETED_SKIP | WT_READ_NO_WAIT)) @@ -788,11 +788,13 @@ skip_evict: /* * we've yielded enough times, start sleeping so we don't burn * CPU to no purpose. */ - if (stalled) - wait_cnt += WT_THOUSAND; - else if (++wait_cnt < WT_THOUSAND) { - __wt_yield(); - continue; + if (yield_cnt < WT_THOUSAND) { + if (!stalled) { + ++yield_cnt; + __wt_yield(); + continue; + } + yield_cnt = WT_THOUSAND; } /* @@ -808,7 +810,7 @@ skip_evict: /* if (cache_work) continue; } - __wt_state_yield_sleep(&wait_cnt, &sleep_cnt); - WT_STAT_CONN_INCRV(session, page_sleep, sleep_cnt); + __wt_state_yield_sleep(&yield_cnt, &sleep_usecs); + WT_STAT_CONN_INCRV(session, page_sleep, sleep_usecs); } } diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c index a800d896023..56e5ae62fe8 100644 --- a/src/third_party/wiredtiger/src/btree/bt_walk.c +++ b/src/third_party/wiredtiger/src/btree/bt_walk.c @@ -18,7 +18,7 @@ __ref_index_slot(WT_SESSION_IMPL *session, { WT_PAGE_INDEX *pindex; WT_REF **start, **stop, **p, **t; - uint64_t sleep_count, yield_count; + uint64_t sleep_usecs, yield_count; uint32_t entries, slot; /* @@ -27,7 +27,7 @@ __ref_index_slot(WT_SESSION_IMPL *session, * split, their WT_REF structure home values are updated; yield * and wait for that to happen. */ - for (sleep_count = yield_count = 0;;) { + for (sleep_usecs = yield_count = 0;;) { /* * Copy the parent page's index value: the page can split at * any time, but the index's value is always valid, even if @@ -70,9 +70,9 @@ __ref_index_slot(WT_SESSION_IMPL *session, * before retrying, and if we've yielded enough times, start * sleeping so we don't burn CPU to no purpose. */ - __wt_state_yield_sleep(&yield_count, &sleep_count); + __wt_state_yield_sleep(&yield_count, &sleep_usecs); WT_STAT_CONN_INCRV(session, page_index_slot_ref_blocked, - sleep_count); + sleep_usecs); } found: WT_ASSERT(session, pindex->index[slot] == ref); @@ -230,13 +230,13 @@ __tree_walk_internal(WT_SESSION_IMPL *session, WT_DECL_RET; WT_PAGE_INDEX *pindex; WT_REF *couple, *couple_orig, *ref; - uint64_t sleep_count, yield_count; + uint64_t sleep_usecs, yield_count; uint32_t current_state, slot; bool empty_internal, initial_descent, prev, skip; btree = S2BT(session); pindex = NULL; - sleep_count = yield_count = 0; + sleep_usecs = yield_count = 0; empty_internal = initial_descent = false; /* @@ -477,7 +477,7 @@ restart: /* * CPU to no purpose. */ __wt_state_yield_sleep( - &yield_count, &sleep_count); + &yield_count, &sleep_usecs); /* * If a cursor is setting up at the end of the diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c index 1d9078d6aee..4e5a27a7229 100644 --- a/src/third_party/wiredtiger/src/cache/cache_las.c +++ b/src/third_party/wiredtiger/src/cache/cache_las.c @@ -42,17 +42,28 @@ __las_restore_isolation( } /* - * __wt_las_nonempty -- + * __las_entry_count -- * Return when there are entries in the lookaside table. */ -bool -__wt_las_nonempty(WT_SESSION_IMPL *session) +static uint64_t +__las_entry_count(WT_CACHE *cache) { - WT_CACHE *cache; + uint64_t insert_cnt, remove_cnt; - cache = S2C(session)->cache; + insert_cnt = cache->las_insert_count; + WT_ORDERED_READ(remove_cnt, cache->las_remove_count); - return (cache->las_entry_count > 0); + return (insert_cnt > remove_cnt ? insert_cnt - remove_cnt : 0); +} + +/* + * __wt_las_empty -- + * Return when there are entries in the lookaside table. + */ +bool +__wt_las_empty(WT_SESSION_IMPL *session) +{ + return (__las_entry_count(S2C(session)->cache) == 0); } /* @@ -81,8 +92,9 @@ __wt_las_stats_update(WT_SESSION_IMPL *session) /* Set the connection-wide statistics. */ cstats = conn->stats; - WT_STAT_SET( - session, cstats, cache_lookaside_entries, cache->las_entry_count); + + WT_STAT_SET(session, cstats, + cache_lookaside_entries, __las_entry_count(cache)); /* * We have a cursor, and we need the underlying data handle; we can get @@ -726,24 +738,15 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, err: /* Resolve the transaction. */ if (local_txn) { - if (ret == 0) { - /* - * Adjust the entry count. - * - * For inserts, we increment before committing. As - * soon as we commit, sweep could catch up and remove - * the block, and we don't want the count to underflow. - * In the unlikely event that the commit fails, roll - * back the increment. - */ - __wt_atomic_add64( - &conn->cache->las_entry_count, insert_cnt); - if ((ret = __wt_txn_commit(las_session, NULL)) != 0) - __wt_cache_decr_check_uint64(session, - &conn->cache->las_entry_count, - insert_cnt, "lookaside entry count"); - } else + if (ret == 0) + WT_TRET(__wt_txn_commit(las_session, NULL)); + else WT_TRET(__wt_txn_rollback(las_session, NULL)); + + /* Adjust the entry count. */ + if (ret == 0) + (void)__wt_atomic_add64( + &conn->cache->las_insert_count, insert_cnt); } __las_restore_isolation(las_session, saved_isolation); @@ -853,9 +856,8 @@ __wt_las_remove_block( else WT_TRET(__wt_txn_rollback(las_session, NULL)); if (ret == 0) - __wt_cache_decr_check_uint64(session, - &conn->cache->las_entry_count, - remove_cnt, "lookaside entry count"); + (void)__wt_atomic_add64( + &conn->cache->las_remove_count, remove_cnt); err: __las_restore_isolation(las_session, saved_isolation); WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); @@ -892,6 +894,8 @@ err: __wt_spin_unlock(session, &cache->las_sweep_lock); static inline uint64_t __las_sweep_count(WT_CACHE *cache) { + uint64_t las_entry_count; + /* * The sweep server is a slow moving thread. Try to review the entire * lookaside table once every 5 minutes. @@ -908,8 +912,9 @@ __las_sweep_count(WT_CACHE *cache) * with lookaside entries are blocked during sweep, make sure we do * some work but don't block reads for too long. */ - return ((uint64_t)WT_MAX(100, - cache->las_entry_count / (WT_MINUTE * 5 / WT_LAS_SWEEP_SEC))); + las_entry_count = __las_entry_count(cache); + return ((uint64_t)WT_MAX(WT_LAS_SWEEP_ENTRIES, + las_entry_count / (5 * WT_MINUTE / WT_LAS_SWEEP_SEC))); } /* @@ -932,7 +937,7 @@ __las_sweep_init(WT_SESSION_IMPL *session) * there's nothing to do. */ if (cache->las_dropped_next == 0) { - if (cache->las_entry_count == 0) + if (__wt_las_empty(session)) ret = WT_NOTFOUND; goto err; } @@ -1040,16 +1045,7 @@ __wt_las_sweep(WT_SESSION_IMPL *session) if (ret != 0) goto srch_notfound; - /* - * Walk at least the number we calculated at the beginning of the - * sweep, or more if there have been additional records inserted in the - * meantime. Don't just repeat the calculation here since sweep - * removes entries and that would cause sweep to do less and less work - * rather than driving the lookaside table to empty. - */ cnt = __las_sweep_count(cache); - if (cnt < WT_LAS_SWEEP_ENTRIES) - cnt = WT_LAS_SWEEP_ENTRIES; visit_cnt = 0; /* Walk the file. */ @@ -1188,9 +1184,8 @@ err: __wt_buf_free(session, sweep_key); else WT_TRET(__wt_txn_rollback(session, NULL)); if (ret == 0) - __wt_cache_decr_check_uint64(session, - &S2C(session)->cache->las_entry_count, - remove_cnt, "lookaside entry count"); + (void)__wt_atomic_add64( + &cache->las_remove_count, remove_cnt); } if (locked) __wt_writeunlock(session, &cache->las_sweepwalk_lock); diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index 09f031a1828..1fe38c79e37 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -244,6 +244,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_alter[] = { NULL, NULL, confchk_assert_subconfigs, 2 }, { "cache_resident", "boolean", NULL, NULL, NULL, 0 }, + { "exclusive_refreshed", "boolean", NULL, NULL, NULL, 0 }, { "log", "category", NULL, NULL, confchk_WT_SESSION_create_log_subconfigs, 1 }, @@ -1281,8 +1282,8 @@ static const WT_CONFIG_ENTRY config_entries[] = { { "WT_SESSION.alter", "access_pattern_hint=none,app_metadata=," "assert=(commit_timestamp=none,read_timestamp=none)," - "cache_resident=false,log=(enabled=true)", - confchk_WT_SESSION_alter, 5 + "cache_resident=false,exclusive_refreshed=true,log=(enabled=true)", + confchk_WT_SESSION_alter, 6 }, { "WT_SESSION.begin_transaction", "ignore_prepare=false,isolation=,name=,priority=0,read_timestamp=" diff --git a/src/third_party/wiredtiger/src/conn/conn_sweep.c b/src/third_party/wiredtiger/src/conn/conn_sweep.c index 52a4a5424b4..33342fb4873 100644 --- a/src/third_party/wiredtiger/src/conn/conn_sweep.c +++ b/src/third_party/wiredtiger/src/conn/conn_sweep.c @@ -313,7 +313,7 @@ __sweep_server(void *arg) * which will stop the cache from moving into the stuck state. */ if (now - last >= WT_LAS_SWEEP_SEC && - __wt_las_nonempty(session) && + !__wt_las_empty(session) && !__wt_cache_stuck(session)) { oldest_id = __wt_txn_oldest_id(session); if (WT_TXNID_LT(last_las_sweep_id, oldest_id)) { diff --git a/src/third_party/wiredtiger/src/cursor/cur_join.c b/src/third_party/wiredtiger/src/cursor/cur_join.c index 8f80ecba5f8..af11ced4ff1 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_join.c +++ b/src/third_party/wiredtiger/src/cursor/cur_join.c @@ -1117,9 +1117,11 @@ __curjoin_next(WT_CURSOR *cursor) * generic error. */ iter->entry->stats.main_access++; - if ((ret = c->search(c)) == WT_NOTFOUND) - ret = WT_ERROR; - WT_ERR(ret); + if ((ret = c->search(c)) != 0) { + if (ret == WT_NOTFOUND) + ret = WT_ERROR; + WT_ERR_MSG(session, ret, "join cursor failed search"); + } F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); } else if (ret == WT_NOTFOUND && diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h index 5d2d4bbf6cb..1299d3e90e3 100644 --- a/src/third_party/wiredtiger/src/include/cache.h +++ b/src/third_party/wiredtiger/src/include/cache.h @@ -200,7 +200,8 @@ struct __wt_cache { bool las_session_inuse[WT_LAS_NUM_SESSIONS]; uint32_t las_fileid; /* Lookaside table file ID */ - uint64_t las_entry_count; /* Count of entries in lookaside */ + uint64_t las_insert_count; /* Count of inserts to lookaside */ + uint64_t las_remove_count; /* Count of removes from lookaside */ uint64_t las_pageid; /* Lookaside table page ID counter */ bool las_reader; /* Indicate an LAS reader to sweep */ diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index cce787e46bc..2fde75ea763 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -201,7 +201,7 @@ extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_ extern WT_UPDATE *__wt_update_obsolete_check(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd); extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert, bool restore) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern bool __wt_las_nonempty(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_las_empty(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_las_stats_update(WT_SESSION_IMPL *session); extern int __wt_las_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_las_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -540,7 +540,6 @@ extern int __wt_turtle_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_optrack_record_funcid(WT_SESSION_IMPL *session, const char *func, uint16_t *func_idp); -extern int __wt_optrack_open_file(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_optrack_flush_buffer(WT_SESSION_IMPL *s); extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_nfilename(WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/third_party/wiredtiger/src/include/misc.i b/src/third_party/wiredtiger/src/include/misc.i index 102d4f0cce0..2a196f1da68 100644 --- a/src/third_party/wiredtiger/src/include/misc.i +++ b/src/third_party/wiredtiger/src/include/misc.i @@ -229,7 +229,7 @@ __wt_txn_context_check(WT_SESSION_IMPL *session, bool requires_txn) * Sleep while waiting, after a thousand yields. */ static inline void -__wt_state_yield_sleep(uint64_t *yield_count, uint64_t *sleep_count) +__wt_state_yield_sleep(uint64_t *yield_count, uint64_t *sleep_usecs) { /* * We yield before retrying, and if we've yielded enough times, start @@ -241,6 +241,6 @@ __wt_state_yield_sleep(uint64_t *yield_count, uint64_t *sleep_count) return; } - (*sleep_count) = WT_MIN((*sleep_count) + 100, WT_THOUSAND); - __wt_sleep(0, (*sleep_count)); + (*sleep_usecs) = WT_MIN((*sleep_usecs) + 100, WT_THOUSAND); + __wt_sleep(0, (*sleep_usecs)); } diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index 167297c5c80..87ed32d80de 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -521,7 +521,8 @@ __wt_log_extract_lognum( const char *p; if (id == NULL || name == NULL) - return (WT_ERROR); + WT_RET_MSG(session, EINVAL, + "%s: unexpected usage: no id or no name", __func__); if ((p = strrchr(name, '.')) == NULL || sscanf(++p, "%" SCNu32, id) != 1) WT_RET_MSG(session, WT_ERROR, "Bad log file name '%s'", name); @@ -706,8 +707,8 @@ __log_decompress(WT_SESSION_IMPL *session, WT_ITEM *in, WT_ITEM *out) compressor = conn->log_compressor; if (compressor == NULL || compressor->decompress == NULL) WT_RET_MSG(session, WT_ERROR, - "log_decompress: Compressed record with " - "no configured compressor"); + "%s: Compressed record with no configured compressor", + __func__); uncompressed_size = logrec->mem_len; WT_RET(__wt_buf_initsize(session, out, uncompressed_size)); memcpy(out->mem, in->mem, skip); @@ -723,7 +724,8 @@ __log_decompress(WT_SESSION_IMPL *session, WT_ITEM *in, WT_ITEM *out) * it's OK, otherwise it's really, really bad. */ if (result_len != uncompressed_size - WT_LOG_COMPRESS_SKIP) - return (WT_ERROR); + WT_RET_MSG(session, WT_ERROR, + "%s: decompression failed with incorrect size", __func__); return (0); } @@ -745,8 +747,8 @@ __log_decrypt(WT_SESSION_IMPL *session, WT_ITEM *in, WT_ITEM *out) (encryptor = kencryptor->encryptor) == NULL || encryptor->decrypt == NULL) WT_RET_MSG(session, WT_ERROR, - "log_decrypt: Encrypted record with " - "no configured decrypt method"); + "%s: Encrypted record with no configured decrypt method", + __func__); return (__wt_decrypt(session, encryptor, WT_LOG_ENCRYPT_SKIP, in, out)); } @@ -1950,7 +1952,8 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, if (LF_ISSET(WT_LOGSCAN_FROM_CKP)) start_lsn = log->ckpt_lsn; else if (!LF_ISSET(WT_LOGSCAN_FIRST)) - return (WT_ERROR); /* Illegal usage */ + WT_RET_MSG(session, WT_ERROR, + "%s: WT_LOGSCAN_FIRST not set", __func__); } lastlog = log->fileid; } else { diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c index fc8181e2460..d0369194cbb 100644 --- a/src/third_party/wiredtiger/src/log/log_slot.c +++ b/src/third_party/wiredtiger/src/log/log_slot.c @@ -297,7 +297,7 @@ __log_slot_switch_internal( WT_LOG *log; WT_LOGSLOT *slot; uint32_t joined; - bool free_slot, release; + bool release; log = S2C(session)->log; release = false; @@ -355,12 +355,6 @@ __log_slot_switch_internal( */ WT_RET(__log_slot_new(session)); F_CLR(myslot, WT_MYSLOT_CLOSE); - if (F_ISSET(myslot, WT_MYSLOT_NEEDS_RELEASE)) { - WT_RET(__wt_log_release(session, slot, &free_slot)); - F_CLR(myslot, WT_MYSLOT_NEEDS_RELEASE); - if (free_slot) - __wt_log_slot_free(session, slot); - } return (ret); } @@ -374,6 +368,7 @@ __wt_log_slot_switch(WT_SESSION_IMPL *session, { WT_DECL_RET; WT_LOG *log; + bool free_slot; log = S2C(session)->log; @@ -392,7 +387,18 @@ __wt_log_slot_switch(WT_SESSION_IMPL *session, WT_WITH_SLOT_LOCK(session, log, ret = __log_slot_switch_internal( session, myslot, forced, did_work)); - if (ret == EBUSY) { + /* + * If we need to release the slot we can do it now, after + * we release the lock. + */ + if (ret == 0 && + F_ISSET(myslot, WT_MYSLOT_NEEDS_RELEASE)) { + WT_RET(__wt_log_release( + session, myslot->slot, &free_slot)); + F_CLR(myslot, WT_MYSLOT_NEEDS_RELEASE); + if (free_slot) + __wt_log_slot_free(session, myslot->slot); + } else if (ret == EBUSY) { WT_STAT_CONN_INCR(session, log_slot_switch_busy); __wt_yield(); } diff --git a/src/third_party/wiredtiger/src/optrack/optrack.c b/src/third_party/wiredtiger/src/optrack/optrack.c index 1be466d179e..ccec13d433b 100644 --- a/src/third_party/wiredtiger/src/optrack/optrack.c +++ b/src/third_party/wiredtiger/src/optrack/optrack.c @@ -46,11 +46,11 @@ err: WT_PANIC_MSG(session, ret, "%s", __func__); } /* - * __wt_optrack_open_file -- + * __optrack_open_file -- * Open the per-session operation-tracking file. */ -int -__wt_optrack_open_file(WT_SESSION_IMPL *session) +static int +__optrack_open_file(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(buf); @@ -61,7 +61,8 @@ __wt_optrack_open_file(WT_SESSION_IMPL *session) conn = S2C(session); if (!F_ISSET(conn, WT_CONN_OPTRACK)) - return (WT_ERROR); + WT_RET_MSG(session, WT_ERROR, + "%s: WT_CONN_OPTRACK not set", __func__); WT_RET(__wt_scr_alloc(session, 0, &buf)); WT_ERR(__wt_filename_construct(session, conn->optrack_path, @@ -103,9 +104,8 @@ err: WT_TRET(__wt_close(session, &session->optrack_fh)); void __wt_optrack_flush_buffer(WT_SESSION_IMPL *s) { - if (s->optrack_fh == NULL) - if (__wt_optrack_open_file(s)) - return; + if (s->optrack_fh == NULL && __optrack_open_file(s) != 0) + return; /* * We're not using the standard write path deliberately, that's quite diff --git a/src/third_party/wiredtiger/src/schema/schema_alter.c b/src/third_party/wiredtiger/src/schema/schema_alter.c index 2ebfcfc5d9d..e880cb415c8 100644 --- a/src/third_party/wiredtiger/src/schema/schema_alter.c +++ b/src/third_party/wiredtiger/src/schema/schema_alter.c @@ -133,7 +133,8 @@ err: __wt_scr_free(session, &data_source); * Alter a table. */ static int -__alter_table(WT_SESSION_IMPL *session, const char *uri, const char *newcfg[]) +__alter_table(WT_SESSION_IMPL *session, + const char *uri, const char *newcfg[], bool exclusive_refreshed) { WT_COLGROUP *colgroup; WT_DECL_RET; @@ -148,30 +149,39 @@ __alter_table(WT_SESSION_IMPL *session, const char *uri, const char *newcfg[]) WT_PREFIX_SKIP_REQUIRED(session, name, "table:"); /* - * Open the table so we can alter its column groups and indexes, keeping - * the table locked exclusive across the alter. + * If we have exclusive access update all objects in the schema for + * this table and reopen the handle to update the in-memory state. */ - WT_RET(__wt_schema_get_table_uri(session, uri, true, - WT_DHANDLE_EXCLUSIVE, &table)); - /* Meta tracking needs to be used because alter needs to be atomic. */ - WT_ASSERT(session, WT_META_TRACKING(session)); - WT_WITH_DHANDLE(session, &table->iface, - ret = __wt_meta_track_handle_lock(session, false)); - WT_RET(ret); - - /* Alter the column groups. */ - for (i = 0; i < WT_COLGROUPS(table); i++) { - if ((colgroup = table->cgroups[i]) == NULL) - continue; - WT_RET(__alter_tree(session, colgroup->name, newcfg)); - } - - /* Alter the indices. */ - WT_RET(__wt_schema_open_indices(session, table)); - for (i = 0; i < table->nindices; i++) { - if ((idx = table->indices[i]) == NULL) - continue; - WT_RET(__alter_tree(session, idx->name, newcfg)); + if (exclusive_refreshed) { + /* + * Open the table so we can alter its column groups and indexes, + * keeping the table locked exclusive across the alter. + */ + WT_RET(__wt_schema_get_table_uri(session, uri, true, + WT_DHANDLE_EXCLUSIVE, &table)); + /* + * Meta tracking needs to be used because alter needs to be + * atomic. + */ + WT_ASSERT(session, WT_META_TRACKING(session)); + WT_WITH_DHANDLE(session, &table->iface, + ret = __wt_meta_track_handle_lock(session, false)); + WT_RET(ret); + + /* Alter the column groups. */ + for (i = 0; i < WT_COLGROUPS(table); i++) { + if ((colgroup = table->cgroups[i]) == NULL) + continue; + WT_RET(__alter_tree(session, colgroup->name, newcfg)); + } + + /* Alter the indices. */ + WT_RET(__wt_schema_open_indices(session, table)); + for (i = 0; i < table->nindices; i++) { + if ((idx = table->indices[i]) == NULL) + continue; + WT_RET(__alter_tree(session, idx->name, newcfg)); + } } /* Alter the table */ @@ -188,7 +198,24 @@ __alter_table(WT_SESSION_IMPL *session, const char *uri, const char *newcfg[]) static int __schema_alter(WT_SESSION_IMPL *session, const char *uri, const char *newcfg[]) { + WT_CONFIG_ITEM cv; uint32_t flags; + bool exclusive_refreshed; + const char *cfg[] = { + WT_CONFIG_BASE(session, WT_SESSION_alter), newcfg[0], NULL}; + + /* + * Determine what configuration says about exclusive access. + * A non exclusive alter that doesn't refresh in-memory configuration is + * only valid for the table objects. + */ + WT_RET(__wt_config_gets(session, cfg, "exclusive_refreshed", &cv)); + exclusive_refreshed = (bool)cv.val; + + if (!exclusive_refreshed && !WT_PREFIX_MATCH(uri, "table:")) + WT_RET_MSG(session, EINVAL, + "option \"exclusive_refreshed\" " + "is applicable only on simple tables"); /* * The alter flag is used so LSM can apply some special logic, the @@ -208,7 +235,8 @@ __schema_alter(WT_SESSION_IMPL *session, const char *uri, const char *newcfg[]) return (__wt_lsm_tree_worker(session, uri, __alter_file, NULL, newcfg, flags)); if (WT_PREFIX_MATCH(uri, "table:")) - return (__alter_table(session, uri, newcfg)); + return (__alter_table(session, + uri, newcfg, exclusive_refreshed)); return (__wt_bad_object_type(session, uri)); } diff --git a/src/third_party/wiredtiger/src/support/huffman.c b/src/third_party/wiredtiger/src/support/huffman.c index f784352c981..55910ab835d 100644 --- a/src/third_party/wiredtiger/src/support/huffman.c +++ b/src/third_party/wiredtiger/src/support/huffman.c @@ -520,11 +520,7 @@ __wt_huffman_open(WT_SESSION_IMPL *session, *(void **)retp = huffman; - if (0) { -err: if (ret == 0) - ret = WT_ERROR; - } - __wt_free(session, indexed_freqs); +err: __wt_free(session, indexed_freqs); if (leaves != NULL) node_queue_close(session, leaves); if (combined_nodes != NULL) diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index b43380e97dc..557d48f5401 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -882,9 +882,10 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) if (ref->page_del->update_list == NULL) break; - for (;;) { + for (;; __wt_yield()) { previous_state = ref->state; - if (__wt_atomic_casv32( + if (previous_state != WT_REF_LOCKED && + __wt_atomic_casv32( &ref->state, previous_state, WT_REF_LOCKED)) break; } diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index 78197e838f4..3c12b647ca8 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -1852,6 +1852,15 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) } /* + * Don't flush data from trees when there is a stable timestamp set: + * that can lead to files that are inconsistent on disk after a crash. + */ + if (btree->modified && !bulk && + S2C(session)->txn_global.has_stable_timestamp && + !__wt_btree_immediately_durable(session)) + return (EBUSY); + + /* * Turn on metadata tracking if: * - The session is not already doing metadata tracking. * - The file was not bulk loaded. diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index e0d5beea61a..35a89eeb072 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -79,8 +79,10 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session) } } WT_ERR_NOTFOUND_OK(ret); -err: if (ret == 0) - conn->cache->las_entry_count = las_total; +err: if (ret == 0) { + conn->cache->las_insert_count = las_total; + conn->cache->las_remove_count = 0; + } __wt_writeunlock(session, &conn->cache->las_sweepwalk_lock); WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); diff --git a/src/third_party/wiredtiger/test/suite/test_alter03.py b/src/third_party/wiredtiger/test/suite/test_alter03.py index 78d1481f778..268778f5fb5 100644 --- a/src/third_party/wiredtiger/test/suite/test_alter03.py +++ b/src/third_party/wiredtiger/test/suite/test_alter03.py @@ -34,34 +34,35 @@ from wtscenario import make_scenarios class test_alter03(wttest.WiredTigerTestCase): name = "alter03" - def verify_metadata(self, metastr): - if metastr == '': + def verify_metadata(self, table_metastr, file_metastr): + if table_metastr == '' and file_metastr == '': return - cursor = self.session.open_cursor('metadata:', None, None) - # - # Walk through all the metadata looking for the entries that are - # the URIs for the named object. - # - found = False - while True: - ret = cursor.next() - if ret != 0: - break - key = cursor.get_key() - if key.find(self.name) != -1: - value = cursor[key] - found = True - self.assertTrue(value.find(metastr) != -1) - cursor.close() - self.assertTrue(found == True) - - # Alter: Change the app_metadata and verify - def test_alter03_app_metadata(self): + c = self.session.open_cursor('metadata:', None, None) + + if table_metastr != '': + # We must find a table type entry for this object and it's value + # should contain the provided table meta string. + c.set_key('table:' + self.name) + self.assertNotEqual(c.search(), wiredtiger.WT_NOTFOUND) + value = c.get_value() + self.assertTrue(value.find(table_metastr) != -1) + + if file_metastr != '': + # We must find a file type entry for the object and it's value + # should contain the provided file meta string. + c.set_key('file:' + self.name + '.wt') + self.assertNotEqual(c.search(), wiredtiger.WT_NOTFOUND) + value = c.get_value() + self.assertTrue(value.find(file_metastr) != -1) + + c.close() + + # Alter Table: Change the app_metadata and verify + def test_alter03_table_app_metadata(self): uri = "table:" + self.name entries = 100 create_params = 'key_format=i,value_format=i,' app_meta_orig = 'app_metadata="meta_data_1",' - app_meta_new = 'app_metadata="meta_data_2",' self.session.create(uri, create_params + app_meta_orig) @@ -72,11 +73,60 @@ class test_alter03(wttest.WiredTigerTestCase): c.close() # Verify the string in the metadata - self.verify_metadata(app_meta_orig) + self.verify_metadata(app_meta_orig, app_meta_orig) # Alter app metadata and verify - self.session.alter(uri, app_meta_new) - self.verify_metadata(app_meta_new) + self.session.alter(uri, 'app_metadata="meta_data_2",') + self.verify_metadata('app_metadata="meta_data_2",', 'app_metadata="meta_data_2",') + + # Alter app metadata, explicitly asking for exclusive access and verify + self.session.alter(uri, 'app_metadata="meta_data_3",exclusive_refreshed=true,') + self.verify_metadata('app_metadata="meta_data_3",', 'app_metadata="meta_data_3",') + + # Alter app metadata without taking exclusive lock and verify that only + # table object gets modified + self.session.alter(uri, 'app_metadata="meta_data_4",exclusive_refreshed=false,') + self.verify_metadata('app_metadata="meta_data_4",', 'app_metadata="meta_data_3",') + + # Open a cursor, insert some data and try to alter with session open. + # We should fail unless we ask not to take an exclusive lock + c2 = self.session.open_cursor(uri, None) + for k in range(entries): + c2[k+1] = 2 + + self.assertRaisesException(wiredtiger.WiredTigerError, + lambda: self.session.alter(uri, 'app_metadata="meta_data_5",')) + self.verify_metadata('app_metadata="meta_data_4",', 'app_metadata="meta_data_3",') + + self.assertRaisesException(wiredtiger.WiredTigerError, + lambda: self.session.alter(uri, + 'exclusive_refreshed=true,app_metadata="meta_data_5",')) + self.verify_metadata('app_metadata="meta_data_4",', 'app_metadata="meta_data_3",') + + self.session.alter(uri, 'app_metadata="meta_data_5",exclusive_refreshed=false,') + self.verify_metadata('app_metadata="meta_data_5",', 'app_metadata="meta_data_3",') + + c2.close() + + # Close and reopen the connection. + # Confirm we retain the app_metadata as expected after reopen + self.reopen_conn() + self.verify_metadata('app_metadata="meta_data_5",', 'app_metadata="meta_data_3",') + + # Alter LSM: A non exclusive alter should not be allowed + def test_alter03_lsm_app_metadata(self): + uri = "lsm:" + self.name + create_params = 'key_format=i,value_format=i,' + app_meta_orig = 'app_metadata="meta_data_1",' + + self.session.create(uri, create_params + app_meta_orig) + + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.alter(uri, + 'exclusive_refreshed=false,app_metadata="meta_data_2",'), + '/is applicable only on simple tables/') + + self.session.alter(uri, 'exclusive_refreshed=true,app_metadata="meta_data_2",') if __name__ == '__main__': wttest.run() |