summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/btree/bt_curnext.c
diff options
context:
space:
mode:
authorMatt Kangas <matt.kangas@mongodb.com>2014-12-12 10:56:37 -0500
committerMatt Kangas <matt.kangas@mongodb.com>2014-12-12 10:56:37 -0500
commitd3877ceca348713dc4d4347249a3976ee466edd3 (patch)
treea3ebc6344fc7fc8332f4da12057208660bd96d9d /src/third_party/wiredtiger/src/btree/bt_curnext.c
parent6b6471ea34f384e747eeef83dfdfcaee1b332bd0 (diff)
downloadmongo-d3877ceca348713dc4d4347249a3976ee466edd3.tar.gz
Import wiredtiger-wiredtiger-2.8-rc2-171-g378f727.tar.gz from wiredtiger branch mongodb-2.8
Diffstat (limited to 'src/third_party/wiredtiger/src/btree/bt_curnext.c')
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c61
1 files changed, 36 insertions, 25 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index 6a07969d684..9cd7f0241fc 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -120,7 +120,7 @@ new_page:
* Return the next variable-length entry on the append list.
*/
static inline int
-__cursor_var_append_next(WT_CURSOR_BTREE *cbt, int newpage, int *skipped)
+__cursor_var_append_next(WT_CURSOR_BTREE *cbt, int newpage)
{
WT_ITEM *val;
WT_SESSION_IMPL *session;
@@ -140,16 +140,17 @@ new_page: if (cbt->ins == NULL)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
- if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL ||
- WT_UPDATE_DELETED_ISSET(upd)) {
- *skipped = 1;
+ if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL)
+ continue;
+ if (WT_UPDATE_DELETED_ISSET(upd)) {
+ ++cbt->page_deleted_count;
continue;
}
val->data = WT_UPDATE_DATA(upd);
val->size = upd->size;
- break;
+ return (0);
}
- return (0);
+ /* NOTREACHED */
}
/*
@@ -157,7 +158,7 @@ new_page: if (cbt->ins == NULL)
* Move to the next, variable-length column-store item.
*/
static inline int
-__cursor_var_next(WT_CURSOR_BTREE *cbt, int newpage, int *skipped)
+__cursor_var_next(WT_CURSOR_BTREE *cbt, int newpage)
{
WT_CELL *cell;
WT_CELL_UNPACK unpack;
@@ -198,7 +199,7 @@ new_page: /* Find the matching WT_COL slot. */
NULL : __wt_txn_read(session, cbt->ins->upd);
if (upd != NULL) {
if (WT_UPDATE_DELETED_ISSET(upd)) {
- *skipped = 1;
+ ++cbt->page_deleted_count;
continue;
}
@@ -237,7 +238,7 @@ new_page: /* Find the matching WT_COL slot. */
* Move to the next row-store item.
*/
static inline int
-__cursor_row_next(WT_CURSOR_BTREE *cbt, int newpage, int *skipped)
+__cursor_row_next(WT_CURSOR_BTREE *cbt, int newpage)
{
WT_INSERT *ins;
WT_ITEM *key, *val;
@@ -280,9 +281,10 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, int newpage, int *skipped)
cbt->ins = WT_SKIP_NEXT(cbt->ins);
new_insert: if ((ins = cbt->ins) != NULL) {
- if ((upd = __wt_txn_read(session, ins->upd)) == NULL ||
- WT_UPDATE_DELETED_ISSET(upd)) {
- *skipped = 1;
+ if ((upd = __wt_txn_read(session, ins->upd)) == NULL)
+ continue;
+ if (WT_UPDATE_DELETED_ISSET(upd)) {
+ ++cbt->page_deleted_count;
continue;
}
key->data = WT_INSERT_KEY(ins);
@@ -314,7 +316,7 @@ new_insert: if ((ins = cbt->ins) != NULL) {
rip = &page->pg_row_d[cbt->slot];
upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip));
if (upd != NULL && WT_UPDATE_DELETED_ISSET(upd)) {
- *skipped = 1;
+ ++cbt->page_deleted_count;
continue;
}
@@ -342,6 +344,11 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt, int next)
F_SET(cbt, WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV);
/*
+ * Clear the count of deleted items on the page.
+ */
+ cbt->page_deleted_count = 0;
+
+ /*
* If we don't have a search page, then we're done, we're starting at
* the beginning or end of the tree, not as a result of a search.
*/
@@ -393,7 +400,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating)
WT_PAGE *page;
WT_SESSION_IMPL *session;
uint32_t flags;
- int skipped, newpage;
+ int newpage;
session = (WT_SESSION_IMPL *)cbt->iface.session;
@@ -418,7 +425,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating)
* found. Then, move to the next page, until we reach the end of the
* file.
*/
- for (skipped = newpage = 0;; skipped = 0, newpage = 1) {
+ for (newpage = 0;; newpage = 1) {
page = cbt->ref == NULL ? NULL : cbt->ref->page;
WT_ASSERT(session, page == NULL || !WT_PAGE_IS_INTERNAL(page));
@@ -428,8 +435,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating)
ret = __cursor_fix_append_next(cbt, newpage);
break;
case WT_PAGE_COL_VAR:
- ret = __cursor_var_append_next(
- cbt, newpage, &skipped);
+ ret = __cursor_var_append_next(cbt, newpage);
break;
WT_ILLEGAL_VALUE_ERR(session);
}
@@ -444,10 +450,10 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating)
ret = __cursor_fix_next(cbt, newpage);
break;
case WT_PAGE_COL_VAR:
- ret = __cursor_var_next(cbt, newpage, &skipped);
+ ret = __cursor_var_next(cbt, newpage);
break;
case WT_PAGE_ROW_LEAF:
- ret = __cursor_row_next(cbt, newpage, &skipped);
+ ret = __cursor_row_next(cbt, newpage);
break;
WT_ILLEGAL_VALUE_ERR(session);
}
@@ -467,13 +473,18 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating)
}
/*
- * If we scanned all the way through a page and only saw
- * deleted records, try to evict the page as we release it.
- * Otherwise repeatedly deleting from the beginning of a tree
- * can have quadratic performance.
+ * If we saw a lot of deleted records on this page, or we went
+ * all the way through a page and only saw deleted records, try
+ * to evict the page when we release it. Otherwise repeatedly
+ * deleting from the beginning of a tree can have quadratic
+ * performance. Take care not to force eviction of pages that
+ * are genuinely empty, in new trees.
*/
- if (newpage && skipped)
- page->read_gen = WT_READGEN_OLDEST;
+ if (page != NULL &&
+ (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD ||
+ (newpage && cbt->page_deleted_count > 0)))
+ __wt_page_evict_soon(page);
+ cbt->page_deleted_count = 0;
WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND);