summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@wiredtiger.com>2015-03-24 21:13:36 +1100
committerMichael Cahill <michael.cahill@wiredtiger.com>2015-03-24 21:13:36 +1100
commit43692651196e610b41eaba48c37ac95fefbff686 (patch)
tree9dab568cf5e039362e9357e0cc194336ffc730a2
parent3d0720774cf7c623a00fbdd122b4a5aa5f4e3fd3 (diff)
downloadmongo-43692651196e610b41eaba48c37ac95fefbff686.tar.gz
Change the sweep server to only operate on clean files. Track the maximum transaction ID seen in the checkpoint of a file so that we can be sure in sweep that all pages can be discarded (without dirtying anything in the tree).
Preparation work for SERVER-17587
-rw-r--r--src/btree/bt_sync.c9
-rw-r--r--src/conn/conn_dhandle.c12
-rw-r--r--src/conn/conn_sweep.c11
-rw-r--r--src/evict/evict_file.c16
-rw-r--r--src/evict/evict_lru.c2
-rw-r--r--src/include/btree.h1
-rw-r--r--src/include/btree.i4
-rw-r--r--src/include/extern.h4
-rw-r--r--src/session/session_dhandle.c2
-rw-r--r--src/txn/txn_ckpt.c34
-rw-r--r--test/suite/test_sweep01.py8
11 files changed, 60 insertions, 43 deletions
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index bc5d1051b1e..a6ad86c888f 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -117,8 +117,12 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
mod = page->modify;
/* Skip clean pages. */
- if (!__wt_page_is_modified(page))
+ if (!__wt_page_is_modified(page)) {
+ if (mod != NULL && TXNID_LT(
+ btree->rec_max_txn, mod->rec_max_txn))
+ btree->rec_max_txn = mod->rec_max_txn;
continue;
+ }
/*
* Write dirty pages, unless we can be sure they only
@@ -153,6 +157,9 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
++leaf_pages;
}
WT_ERR(__wt_reconcile(session, walk, NULL, 0));
+
+ if (TXNID_LT(btree->rec_max_txn, mod->rec_max_txn))
+ btree->rec_max_txn = mod->rec_max_txn;
}
break;
}
diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c
index 7756158594c..e28f18a6fa5 100644
--- a/src/conn/conn_dhandle.c
+++ b/src/conn/conn_dhandle.c
@@ -234,7 +234,7 @@ err: WT_TRET(__wt_rwlock_destroy(session, &dhandle->rwlock));
* Sync and close the underlying btree handle.
*/
int
-__wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int force)
+__wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int final, int force)
{
WT_BTREE *btree;
WT_DATA_HANDLE *dhandle;
@@ -273,7 +273,7 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int force)
*/
if (!F_ISSET(btree,
WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY))
- WT_ERR(__wt_checkpoint_close(session, force));
+ WT_ERR(__wt_checkpoint_close(session, final, force));
if (dhandle->checkpoint == NULL)
--S2C(session)->open_btree_count;
@@ -392,7 +392,7 @@ __conn_btree_open(
* in the tree that can block the close.
*/
if (F_ISSET(dhandle, WT_DHANDLE_OPEN))
- WT_RET(__wt_conn_btree_sync_and_close(session, 0));
+ WT_RET(__wt_conn_btree_sync_and_close(session, 0, 0));
/* Discard any previous configuration, set up the new configuration. */
__conn_btree_config_clear(session);
@@ -424,7 +424,7 @@ __conn_btree_open(
err: F_CLR(btree, WT_BTREE_SPECIAL_FLAGS);
/* If the open failed, close the handle. */
if (F_ISSET(dhandle, WT_DHANDLE_OPEN))
- WT_TRET(__wt_conn_btree_sync_and_close(session, 0));
+ WT_TRET(__wt_conn_btree_sync_and_close(session, 0, 0));
}
return (ret);
@@ -670,7 +670,7 @@ __wt_conn_dhandle_close_all(
if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
if ((ret = __wt_meta_track_sub_on(session)) == 0)
ret = __wt_conn_btree_sync_and_close(
- session, force);
+ session, 0, force);
/*
* If the close succeeded, drop any locks it acquired.
@@ -732,7 +732,7 @@ __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final)
dhandle = session->dhandle;
if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
- tret = __wt_conn_btree_sync_and_close(session, 0);
+ tret = __wt_conn_btree_sync_and_close(session, final, 0);
if (final && tret != 0) {
__wt_err(session, tret,
"Final close of %s failed", dhandle->name);
diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c
index 50812ccfa95..d788060ab17 100644
--- a/src/conn/conn_sweep.c
+++ b/src/conn/conn_sweep.c
@@ -15,6 +15,7 @@
static int
__sweep(WT_SESSION_IMPL *session)
{
+ WT_BTREE *btree;
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle, *dhandle_next;
WT_DECL_RET;
@@ -63,10 +64,16 @@ __sweep(WT_SESSION_IMPL *session)
WT_RET(ret);
locked = 1;
+ /* Only sweep clean trees where all updates are visible. */
+ btree = dhandle->handle;
+ if (btree->modified ||
+ !__wt_txn_visible_all(session, btree->rec_max_txn))
+ goto unlock;
+
/* If the handle is open, try to close it. */
if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
- WT_WITH_DHANDLE(session, dhandle,
- ret = __wt_conn_btree_sync_and_close(session, 0));
+ WT_WITH_DHANDLE(session, dhandle, ret =
+ __wt_conn_btree_sync_and_close(session, 0, 0));
if (ret != 0)
goto unlock;
diff --git a/src/evict/evict_file.c b/src/evict/evict_file.c
index f546a5adae1..864c116a380 100644
--- a/src/evict/evict_file.c
+++ b/src/evict/evict_file.c
@@ -72,23 +72,17 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop)
WT_READ_CACHE | WT_READ_NO_EVICT));
switch (syncop) {
- case WT_SYNC_DISCARD:
- /*
- * Check that the page is clean: if we see a dirty page
- * (including a dirty parent page after evicting a
- * child), give up. The higher level can try to
- * checkpoint, but during discard we aren't set up to
- * manage checkpoints.
- */
- if (__wt_page_is_modified(page))
- WT_ERR(EBUSY);
- /* FALLTHROUGH */
case WT_SYNC_CLOSE:
/*
* Evict the page.
*/
WT_ERR(__wt_evict(session, ref, 1));
break;
+ case WT_SYNC_DISCARD:
+ WT_ASSERT(session,
+ __wt_page_can_evict(session, page, 0));
+ __wt_evict_page_clean_update(session, ref);
+ break;
case WT_SYNC_DISCARD_FORCE:
/*
* Forced discard of the page, whether clean or dirty.
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index 2ebd699c579..2b5bd015223 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -1211,7 +1211,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
}
fast: /* If the page can't be evicted, give up. */
- if (!__wt_page_can_evict(session, page, 0))
+ if (!__wt_page_can_evict(session, page, 1))
continue;
/*
diff --git a/src/include/btree.h b/src/include/btree.h
index f00a7ac9a8e..cc571124207 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -124,6 +124,7 @@ struct __wt_btree {
u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */
uint64_t checkpoint_gen; /* Checkpoint generation */
+ uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */
uint64_t write_gen; /* Write generation */
WT_REF *evict_ref; /* Eviction thread's location */
diff --git a/src/include/btree.i b/src/include/btree.i
index 3b18cb530e9..6c261f3768d 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -970,7 +970,7 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_PAGE *page, int check_splits)
* a transaction value, once that's globally visible, we know we can
* evict the created page.
*/
- if (WT_PAGE_IS_INTERNAL(page) &&
+ if (check_splits && WT_PAGE_IS_INTERNAL(page) &&
!__wt_txn_visible_all(session, mod->mod_split_txn))
return (0);
@@ -1013,7 +1013,7 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_PAGE *page, int check_splits)
/*
* If the page was recently split in-memory, don't force it out: we
- * hope eviction will find it first.
+ * hope an eviction thread will find it first.
*/
if (check_splits &&
!__wt_txn_visible_all(session, mod->inmem_split_txn))
diff --git a/src/include/extern.h b/src/include/extern.h
index e9c37e62bb6..6ac926b494c 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -225,7 +225,7 @@ extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *c
extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session);
extern int __wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize);
extern int __wt_conn_dhandle_find(WT_SESSION_IMPL *session, const char *name, const char *ckpt, uint32_t flags);
-extern int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int force);
+extern int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int final, int force);
extern int __wt_conn_btree_get(WT_SESSION_IMPL *session, const char *name, const char *ckpt, const char *cfg[], uint32_t flags);
extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, int apply_checkpoints, const char *uri, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
extern int __wt_conn_btree_apply_single_ckpt(WT_SESSION_IMPL *session, const char *uri, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
@@ -672,7 +672,7 @@ extern int __wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]);
-extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, int force);
+extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, int final, int force);
extern uint64_t __wt_ext_transaction_id(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session);
extern int __wt_ext_transaction_isolation_level( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session);
extern int __wt_ext_transaction_notify( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_TXN_NOTIFY *notify);
diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c
index 833d098efeb..0825f783ca3 100644
--- a/src/session/session_dhandle.c
+++ b/src/session/session_dhandle.c
@@ -168,7 +168,7 @@ __wt_session_release_btree(WT_SESSION_IMPL *session)
WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE));
F_CLR(dhandle, WT_DHANDLE_DISCARD);
- WT_TRET(__wt_conn_btree_sync_and_close(session, 0));
+ WT_TRET(__wt_conn_btree_sync_and_close(session, 0, 0));
}
if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE))
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index fa77d2b5fa5..c910b1cbc90 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -1090,27 +1090,39 @@ __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[])
* Checkpoint a single file as part of closing the handle.
*/
int
-__wt_checkpoint_close(WT_SESSION_IMPL *session, int force)
+__wt_checkpoint_close(WT_SESSION_IMPL *session, int final, int force)
{
- WT_DECL_RET;
+ WT_BTREE *btree;
+
+ btree = S2BT(session);
/* Handle forced discard (when dropping a file). */
if (force)
return (__wt_cache_op(session, NULL, WT_SYNC_DISCARD_FORCE));
- /* If closing an unmodified file, try to evict its pages. */
- if (!S2BT(session)->modified) {
- ret = __wt_cache_op(session, NULL, WT_SYNC_DISCARD);
- if (ret != EBUSY)
- return (ret);
+ /*
+ * If closing an unmodified file, check that no update is required
+ * for active readers.
+ */
+ if (!btree->modified && !F_ISSET(btree, WT_BTREE_BULK)) {
+ __wt_txn_update_oldest(session);
+ return (__wt_txn_visible_all(session, btree->rec_max_txn) ?
+ __wt_cache_op(session, NULL, WT_SYNC_DISCARD) : EBUSY);
}
/*
- * If closing a modified file, or closing an unmodified file was blocked
- * for any reason, checkpoint the file and optionally flush the writes
- * (the checkpoint call will discard the blocks, there's no additional
- * step needed).
+ * If closing a modified file, checkpoint the file and optionally flush
+ * the writes (the checkpoint call will discard the blocks, there's no
+ * additional step needed).
+ *
+ * We should already have the schema lock unless we're finishing a bulk
+ * load -- the only other paths to closing files (sweep and LSM) have
+ * already checked for read-only trees.
*/
+ if (!final)
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_SCHEMA_LOCKED) ||
+ F_ISSET(btree, WT_BTREE_BULK));
+
WT_RET(__checkpoint_worker(session, NULL, 0));
if (F_ISSET(S2C(session), WT_CONN_CKPT_SYNC))
WT_RET(__wt_checkpoint_sync(session, NULL));
diff --git a/test/suite/test_sweep01.py b/test/suite/test_sweep01.py
index a1a89c58838..989ffb7a971 100644
--- a/test/suite/test_sweep01.py
+++ b/test/suite/test_sweep01.py
@@ -42,10 +42,7 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
uri = 'table:' + tablebase
numfiles = 50
numkv = 1000
- ckpt_list = [
- ('off', dict(ckpt=0)),
- ('on', dict(ckpt=10)),
- ]
+ ckpt=10
types = [
('row', dict(tabletype='row',
@@ -56,8 +53,7 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
create_params = 'key_format=r,value_format=8t')),
]
- scenarios = number_scenarios(
- prune_scenarios(multiply_scenarios('.', types, ckpt_list), 1, 100))
+ scenarios = types
# Overrides WiredTigerTestCase
def setUpConnectionOpen(self, dir):