diff options
author | Matt Kangas <matt.kangas@mongodb.com> | 2014-12-12 10:56:37 -0500 |
---|---|---|
committer | Matt Kangas <matt.kangas@mongodb.com> | 2014-12-12 10:56:37 -0500 |
commit | d3877ceca348713dc4d4347249a3976ee466edd3 (patch) | |
tree | a3ebc6344fc7fc8332f4da12057208660bd96d9d /src/third_party/wiredtiger/src/btree/bt_curnext.c | |
parent | 6b6471ea34f384e747eeef83dfdfcaee1b332bd0 (diff) | |
download | mongo-d3877ceca348713dc4d4347249a3976ee466edd3.tar.gz |
Import wiredtiger-wiredtiger-2.8-rc2-171-g378f727.tar.gz from wiredtiger branch mongodb-2.8
Diffstat (limited to 'src/third_party/wiredtiger/src/btree/bt_curnext.c')
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_curnext.c | 61 |
1 files changed, 36 insertions, 25 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c index 6a07969d684..9cd7f0241fc 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curnext.c +++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c @@ -120,7 +120,7 @@ new_page: * Return the next variable-length entry on the append list. */ static inline int -__cursor_var_append_next(WT_CURSOR_BTREE *cbt, int newpage, int *skipped) +__cursor_var_append_next(WT_CURSOR_BTREE *cbt, int newpage) { WT_ITEM *val; WT_SESSION_IMPL *session; @@ -140,16 +140,17 @@ new_page: if (cbt->ins == NULL) return (WT_NOTFOUND); __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins)); - if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL || - WT_UPDATE_DELETED_ISSET(upd)) { - *skipped = 1; + if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL) + continue; + if (WT_UPDATE_DELETED_ISSET(upd)) { + ++cbt->page_deleted_count; continue; } val->data = WT_UPDATE_DATA(upd); val->size = upd->size; - break; + return (0); } - return (0); + /* NOTREACHED */ } /* @@ -157,7 +158,7 @@ new_page: if (cbt->ins == NULL) * Move to the next, variable-length column-store item. */ static inline int -__cursor_var_next(WT_CURSOR_BTREE *cbt, int newpage, int *skipped) +__cursor_var_next(WT_CURSOR_BTREE *cbt, int newpage) { WT_CELL *cell; WT_CELL_UNPACK unpack; @@ -198,7 +199,7 @@ new_page: /* Find the matching WT_COL slot. */ NULL : __wt_txn_read(session, cbt->ins->upd); if (upd != NULL) { if (WT_UPDATE_DELETED_ISSET(upd)) { - *skipped = 1; + ++cbt->page_deleted_count; continue; } @@ -237,7 +238,7 @@ new_page: /* Find the matching WT_COL slot. */ * Move to the next row-store item. */ static inline int -__cursor_row_next(WT_CURSOR_BTREE *cbt, int newpage, int *skipped) +__cursor_row_next(WT_CURSOR_BTREE *cbt, int newpage) { WT_INSERT *ins; WT_ITEM *key, *val; @@ -280,9 +281,10 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, int newpage, int *skipped) cbt->ins = WT_SKIP_NEXT(cbt->ins); new_insert: if ((ins = cbt->ins) != NULL) { - if ((upd = __wt_txn_read(session, ins->upd)) == NULL || - WT_UPDATE_DELETED_ISSET(upd)) { - *skipped = 1; + if ((upd = __wt_txn_read(session, ins->upd)) == NULL) + continue; + if (WT_UPDATE_DELETED_ISSET(upd)) { + ++cbt->page_deleted_count; continue; } key->data = WT_INSERT_KEY(ins); @@ -314,7 +316,7 @@ new_insert: if ((ins = cbt->ins) != NULL) { rip = &page->pg_row_d[cbt->slot]; upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip)); if (upd != NULL && WT_UPDATE_DELETED_ISSET(upd)) { - *skipped = 1; + ++cbt->page_deleted_count; continue; } @@ -342,6 +344,11 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt, int next) F_SET(cbt, WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV); /* + * Clear the count of deleted items on the page. + */ + cbt->page_deleted_count = 0; + + /* * If we don't have a search page, then we're done, we're starting at * the beginning or end of the tree, not as a result of a search. */ @@ -393,7 +400,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating) WT_PAGE *page; WT_SESSION_IMPL *session; uint32_t flags; - int skipped, newpage; + int newpage; session = (WT_SESSION_IMPL *)cbt->iface.session; @@ -418,7 +425,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating) * found. Then, move to the next page, until we reach the end of the * file. */ - for (skipped = newpage = 0;; skipped = 0, newpage = 1) { + for (newpage = 0;; newpage = 1) { page = cbt->ref == NULL ? NULL : cbt->ref->page; WT_ASSERT(session, page == NULL || !WT_PAGE_IS_INTERNAL(page)); @@ -428,8 +435,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating) ret = __cursor_fix_append_next(cbt, newpage); break; case WT_PAGE_COL_VAR: - ret = __cursor_var_append_next( - cbt, newpage, &skipped); + ret = __cursor_var_append_next(cbt, newpage); break; WT_ILLEGAL_VALUE_ERR(session); } @@ -444,10 +450,10 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating) ret = __cursor_fix_next(cbt, newpage); break; case WT_PAGE_COL_VAR: - ret = __cursor_var_next(cbt, newpage, &skipped); + ret = __cursor_var_next(cbt, newpage); break; case WT_PAGE_ROW_LEAF: - ret = __cursor_row_next(cbt, newpage, &skipped); + ret = __cursor_row_next(cbt, newpage); break; WT_ILLEGAL_VALUE_ERR(session); } @@ -467,13 +473,18 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating) } /* - * If we scanned all the way through a page and only saw - * deleted records, try to evict the page as we release it. - * Otherwise repeatedly deleting from the beginning of a tree - * can have quadratic performance. + * If we saw a lot of deleted records on this page, or we went + * all the way through a page and only saw deleted records, try + * to evict the page when we release it. Otherwise repeatedly + * deleting from the beginning of a tree can have quadratic + * performance. Take care not to force eviction of pages that + * are genuinely empty, in new trees. */ - if (newpage && skipped) - page->read_gen = WT_READGEN_OLDEST; + if (page != NULL && + (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD || + (newpage && cbt->page_deleted_count > 0))) + __wt_page_evict_soon(page); + cbt->page_deleted_count = 0; WT_ERR(__wt_tree_walk(session, &cbt->ref, flags)); WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND); |