diff options
author | Michael Cahill <michael.cahill@wiredtiger.com> | 2015-03-24 21:13:36 +1100 |
---|---|---|
committer | Michael Cahill <michael.cahill@wiredtiger.com> | 2015-03-24 21:13:36 +1100 |
commit | 43692651196e610b41eaba48c37ac95fefbff686 (patch) | |
tree | 9dab568cf5e039362e9357e0cc194336ffc730a2 /src | |
parent | 3d0720774cf7c623a00fbdd122b4a5aa5f4e3fd3 (diff) | |
download | mongo-43692651196e610b41eaba48c37ac95fefbff686.tar.gz |
Change the sweep server to only operate on clean files. Track the maximum transaction ID seen in the checkpoint of a file so that we can be sure in sweep that all pages can be discarded (without dirtying anything in the tree).
Preparation work for SERVER-17587
Diffstat (limited to 'src')
-rw-r--r-- | src/btree/bt_sync.c | 9 | ||||
-rw-r--r-- | src/conn/conn_dhandle.c | 12 | ||||
-rw-r--r-- | src/conn/conn_sweep.c | 11 | ||||
-rw-r--r-- | src/evict/evict_file.c | 16 | ||||
-rw-r--r-- | src/evict/evict_lru.c | 2 | ||||
-rw-r--r-- | src/include/btree.h | 1 | ||||
-rw-r--r-- | src/include/btree.i | 4 | ||||
-rw-r--r-- | src/include/extern.h | 4 | ||||
-rw-r--r-- | src/session/session_dhandle.c | 2 | ||||
-rw-r--r-- | src/txn/txn_ckpt.c | 34 |
10 files changed, 58 insertions, 37 deletions
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c index bc5d1051b1e..a6ad86c888f 100644 --- a/src/btree/bt_sync.c +++ b/src/btree/bt_sync.c @@ -117,8 +117,12 @@ __sync_file(WT_SESSION_IMPL *session, int syncop) mod = page->modify; /* Skip clean pages. */ - if (!__wt_page_is_modified(page)) + if (!__wt_page_is_modified(page)) { + if (mod != NULL && TXNID_LT( + btree->rec_max_txn, mod->rec_max_txn)) + btree->rec_max_txn = mod->rec_max_txn; continue; + } /* * Write dirty pages, unless we can be sure they only @@ -153,6 +157,9 @@ __sync_file(WT_SESSION_IMPL *session, int syncop) ++leaf_pages; } WT_ERR(__wt_reconcile(session, walk, NULL, 0)); + + if (TXNID_LT(btree->rec_max_txn, mod->rec_max_txn)) + btree->rec_max_txn = mod->rec_max_txn; } break; } diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index 7756158594c..e28f18a6fa5 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -234,7 +234,7 @@ err: WT_TRET(__wt_rwlock_destroy(session, &dhandle->rwlock)); * Sync and close the underlying btree handle. */ int -__wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int force) +__wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int final, int force) { WT_BTREE *btree; WT_DATA_HANDLE *dhandle; @@ -273,7 +273,7 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int force) */ if (!F_ISSET(btree, WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) - WT_ERR(__wt_checkpoint_close(session, force)); + WT_ERR(__wt_checkpoint_close(session, final, force)); if (dhandle->checkpoint == NULL) --S2C(session)->open_btree_count; @@ -392,7 +392,7 @@ __conn_btree_open( * in the tree that can block the close. */ if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) - WT_RET(__wt_conn_btree_sync_and_close(session, 0)); + WT_RET(__wt_conn_btree_sync_and_close(session, 0, 0)); /* Discard any previous configuration, set up the new configuration. */ __conn_btree_config_clear(session); @@ -424,7 +424,7 @@ __conn_btree_open( err: F_CLR(btree, WT_BTREE_SPECIAL_FLAGS); /* If the open failed, close the handle. */ if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) - WT_TRET(__wt_conn_btree_sync_and_close(session, 0)); + WT_TRET(__wt_conn_btree_sync_and_close(session, 0, 0)); } return (ret); @@ -670,7 +670,7 @@ __wt_conn_dhandle_close_all( if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) { if ((ret = __wt_meta_track_sub_on(session)) == 0) ret = __wt_conn_btree_sync_and_close( - session, force); + session, 0, force); /* * If the close succeeded, drop any locks it acquired. @@ -732,7 +732,7 @@ __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final) dhandle = session->dhandle; if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) { - tret = __wt_conn_btree_sync_and_close(session, 0); + tret = __wt_conn_btree_sync_and_close(session, final, 0); if (final && tret != 0) { __wt_err(session, tret, "Final close of %s failed", dhandle->name); diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c index 50812ccfa95..d788060ab17 100644 --- a/src/conn/conn_sweep.c +++ b/src/conn/conn_sweep.c @@ -15,6 +15,7 @@ static int __sweep(WT_SESSION_IMPL *session) { + WT_BTREE *btree; WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle, *dhandle_next; WT_DECL_RET; @@ -63,10 +64,16 @@ __sweep(WT_SESSION_IMPL *session) WT_RET(ret); locked = 1; + /* Only sweep clean trees where all updates are visible. */ + btree = dhandle->handle; + if (btree->modified || + !__wt_txn_visible_all(session, btree->rec_max_txn)) + goto unlock; + /* If the handle is open, try to close it. */ if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) { - WT_WITH_DHANDLE(session, dhandle, - ret = __wt_conn_btree_sync_and_close(session, 0)); + WT_WITH_DHANDLE(session, dhandle, ret = + __wt_conn_btree_sync_and_close(session, 0, 0)); if (ret != 0) goto unlock; diff --git a/src/evict/evict_file.c b/src/evict/evict_file.c index f546a5adae1..864c116a380 100644 --- a/src/evict/evict_file.c +++ b/src/evict/evict_file.c @@ -72,23 +72,17 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop) WT_READ_CACHE | WT_READ_NO_EVICT)); switch (syncop) { - case WT_SYNC_DISCARD: - /* - * Check that the page is clean: if we see a dirty page - * (including a dirty parent page after evicting a - * child), give up. The higher level can try to - * checkpoint, but during discard we aren't set up to - * manage checkpoints. - */ - if (__wt_page_is_modified(page)) - WT_ERR(EBUSY); - /* FALLTHROUGH */ case WT_SYNC_CLOSE: /* * Evict the page. */ WT_ERR(__wt_evict(session, ref, 1)); break; + case WT_SYNC_DISCARD: + WT_ASSERT(session, + __wt_page_can_evict(session, page, 0)); + __wt_evict_page_clean_update(session, ref); + break; case WT_SYNC_DISCARD_FORCE: /* * Forced discard of the page, whether clean or dirty. diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 2ebd699c579..2b5bd015223 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -1211,7 +1211,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags) } fast: /* If the page can't be evicted, give up. */ - if (!__wt_page_can_evict(session, page, 0)) + if (!__wt_page_can_evict(session, page, 1)) continue; /* diff --git a/src/include/btree.h b/src/include/btree.h index f00a7ac9a8e..cc571124207 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -124,6 +124,7 @@ struct __wt_btree { u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */ uint64_t checkpoint_gen; /* Checkpoint generation */ + uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */ uint64_t write_gen; /* Write generation */ WT_REF *evict_ref; /* Eviction thread's location */ diff --git a/src/include/btree.i b/src/include/btree.i index 3b18cb530e9..6c261f3768d 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -970,7 +970,7 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_PAGE *page, int check_splits) * a transaction value, once that's globally visible, we know we can * evict the created page. */ - if (WT_PAGE_IS_INTERNAL(page) && + if (check_splits && WT_PAGE_IS_INTERNAL(page) && !__wt_txn_visible_all(session, mod->mod_split_txn)) return (0); @@ -1013,7 +1013,7 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_PAGE *page, int check_splits) /* * If the page was recently split in-memory, don't force it out: we - * hope eviction will find it first. + * hope an eviction thread will find it first. */ if (check_splits && !__wt_txn_visible_all(session, mod->inmem_split_txn)) diff --git a/src/include/extern.h b/src/include/extern.h index e9c37e62bb6..6ac926b494c 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -225,7 +225,7 @@ extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *c extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session); extern int __wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize); extern int __wt_conn_dhandle_find(WT_SESSION_IMPL *session, const char *name, const char *ckpt, uint32_t flags); -extern int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int force); +extern int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int final, int force); extern int __wt_conn_btree_get(WT_SESSION_IMPL *session, const char *name, const char *ckpt, const char *cfg[], uint32_t flags); extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, int apply_checkpoints, const char *uri, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]); extern int __wt_conn_btree_apply_single_ckpt(WT_SESSION_IMPL *session, const char *uri, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]); @@ -672,7 +672,7 @@ extern int __wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]); -extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, int force); +extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, int final, int force); extern uint64_t __wt_ext_transaction_id(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session); extern int __wt_ext_transaction_isolation_level( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session); extern int __wt_ext_transaction_notify( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_TXN_NOTIFY *notify); diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c index 833d098efeb..0825f783ca3 100644 --- a/src/session/session_dhandle.c +++ b/src/session/session_dhandle.c @@ -168,7 +168,7 @@ __wt_session_release_btree(WT_SESSION_IMPL *session) WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE)); F_CLR(dhandle, WT_DHANDLE_DISCARD); - WT_TRET(__wt_conn_btree_sync_and_close(session, 0)); + WT_TRET(__wt_conn_btree_sync_and_close(session, 0, 0)); } if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE)) diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index fa77d2b5fa5..c910b1cbc90 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -1090,27 +1090,39 @@ __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]) * Checkpoint a single file as part of closing the handle. */ int -__wt_checkpoint_close(WT_SESSION_IMPL *session, int force) +__wt_checkpoint_close(WT_SESSION_IMPL *session, int final, int force) { - WT_DECL_RET; + WT_BTREE *btree; + + btree = S2BT(session); /* Handle forced discard (when dropping a file). */ if (force) return (__wt_cache_op(session, NULL, WT_SYNC_DISCARD_FORCE)); - /* If closing an unmodified file, try to evict its pages. */ - if (!S2BT(session)->modified) { - ret = __wt_cache_op(session, NULL, WT_SYNC_DISCARD); - if (ret != EBUSY) - return (ret); + /* + * If closing an unmodified file, check that no update is required + * for active readers. + */ + if (!btree->modified && !F_ISSET(btree, WT_BTREE_BULK)) { + __wt_txn_update_oldest(session); + return (__wt_txn_visible_all(session, btree->rec_max_txn) ? + __wt_cache_op(session, NULL, WT_SYNC_DISCARD) : EBUSY); } /* - * If closing a modified file, or closing an unmodified file was blocked - * for any reason, checkpoint the file and optionally flush the writes - * (the checkpoint call will discard the blocks, there's no additional - * step needed). + * If closing a modified file, checkpoint the file and optionally flush + * the writes (the checkpoint call will discard the blocks, there's no + * additional step needed). + * + * We should already have the schema lock unless we're finishing a bulk + * load -- the only other paths to closing files (sweep and LSM) have + * already checked for read-only trees. */ + if (!final) + WT_ASSERT(session, F_ISSET(session, WT_SESSION_SCHEMA_LOCKED) || + F_ISSET(btree, WT_BTREE_BULK)); + WT_RET(__checkpoint_worker(session, NULL, 0)); if (F_ISSET(S2C(session), WT_CONN_CKPT_SYNC)) WT_RET(__wt_checkpoint_sync(session, NULL)); |