diff options
author | Keith Bostic <keith@wiredtiger.com> | 2014-02-05 15:45:54 -0500 |
---|---|---|
committer | Keith Bostic <keith@wiredtiger.com> | 2014-02-05 15:45:54 -0500 |
commit | 4adabd868acdd540eb93b9bdafcd2394c73dc99e (patch) | |
tree | d36e65ffc598798eedf99b5657b545729e1185df /src/btree | |
parent | 50d9147a965c0ece7786b53d55bbe1149940136c (diff) | |
download | mongo-4adabd868acdd540eb93b9bdafcd2394c73dc99e.tar.gz |
Add a new level of indirection on internal pages so there's a way to
split them by atomically replacing the WT_REF array with a new one.
Diffstat (limited to 'src/btree')
-rw-r--r-- | src/btree/bt_bulk.c | 2 | ||||
-rw-r--r-- | src/btree/bt_curnext.c | 10 | ||||
-rw-r--r-- | src/btree/bt_curprev.c | 12 | ||||
-rw-r--r-- | src/btree/bt_cursor.c | 23 | ||||
-rw-r--r-- | src/btree/bt_debug.c | 22 | ||||
-rw-r--r-- | src/btree/bt_discard.c | 26 | ||||
-rw-r--r-- | src/btree/bt_handle.c | 62 | ||||
-rw-r--r-- | src/btree/bt_page.c | 89 | ||||
-rw-r--r-- | src/btree/bt_read.c | 6 | ||||
-rw-r--r-- | src/btree/bt_slvg.c | 37 | ||||
-rw-r--r-- | src/btree/bt_stat.c | 6 | ||||
-rw-r--r-- | src/btree/bt_vrfy.c | 14 | ||||
-rw-r--r-- | src/btree/bt_walk.c | 17 | ||||
-rw-r--r-- | src/btree/col_modify.c | 9 | ||||
-rw-r--r-- | src/btree/col_srch.c | 22 | ||||
-rw-r--r-- | src/btree/rec_evict.c | 10 | ||||
-rw-r--r-- | src/btree/rec_merge.c | 30 | ||||
-rw-r--r-- | src/btree/rec_write.c | 39 | ||||
-rw-r--r-- | src/btree/row_key.c | 8 | ||||
-rw-r--r-- | src/btree/row_modify.c | 6 | ||||
-rw-r--r-- | src/btree/row_srch.c | 23 |
21 files changed, 261 insertions, 212 deletions
diff --git a/src/btree/bt_bulk.c b/src/btree/bt_bulk.c index b66f84f5a0c..70457523086 100644 --- a/src/btree/bt_bulk.c +++ b/src/btree/bt_bulk.c @@ -31,7 +31,7 @@ __wt_bulk_init(WT_CURSOR_BULK *cbulk) "bulk-load is only possible for newly created trees"); /* Set a reference to the empty leaf page. */ - cbulk->leaf = btree->root_page->u.intl.t->page; + cbulk->leaf = btree->root_page->pu_intl_index[0]->page; WT_RET(__wt_rec_bulk_init(cbulk)); diff --git a/src/btree/bt_curnext.c b/src/btree/bt_curnext.c index 7f8e04b43e9..b6313535e84 100644 --- a/src/btree/bt_curnext.c +++ b/src/btree/bt_curnext.c @@ -84,7 +84,7 @@ __cursor_fix_next(WT_CURSOR_BTREE *cbt, int newpage) /* Initialize for each new page. */ if (newpage) { - cbt->last_standard_recno = __col_last_recno(cbt->page); + cbt->last_standard_recno = __col_fix_last_recno(cbt->page); if (cbt->last_standard_recno == 0) return (WT_NOTFOUND); __cursor_set_recno(cbt, cbt->page->u.col_fix.recno); @@ -167,7 +167,7 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, int newpage) /* Initialize for each new page. */ if (newpage) { - cbt->last_standard_recno = __col_last_recno(cbt->page); + cbt->last_standard_recno = __col_var_last_recno(cbt->page); if (cbt->last_standard_recno == 0) return (WT_NOTFOUND); __cursor_set_recno(cbt, cbt->page->u.col_var.recno); @@ -281,7 +281,8 @@ new_insert: if ((ins = cbt->ins) != NULL) { } /* Check for the end of the page. */ - if (cbt->row_iteration_slot >= cbt->page->entries * 2 + 1) + if (cbt->row_iteration_slot >= + cbt->page->pu_row_entries * 2 + 1) return (WT_NOTFOUND); ++cbt->row_iteration_slot; @@ -357,7 +358,8 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt, int next) * For column-store pages, calculate the largest record on the * page. */ - cbt->last_standard_recno = __col_last_recno(page); + cbt->last_standard_recno = page->type == WT_PAGE_COL_VAR ? + __col_var_last_recno(page) : __col_fix_last_recno(page); /* If we're traversing the append list, set the reference. */ if (cbt->ins_head != NULL && diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c index cdfefb5bd65..d2d3091f82a 100644 --- a/src/btree/bt_curprev.c +++ b/src/btree/bt_curprev.c @@ -163,7 +163,7 @@ __cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, int newpage) cbt->recno <= WT_INSERT_RECNO(cbt->ins)) WT_RET(__cursor_skip_prev(cbt)); if (cbt->ins == NULL && - (cbt->recno == 1 || __col_last_recno(cbt->page) != 0)) + (cbt->recno == 1 || __col_fix_last_recno(cbt->page) != 0)) return (WT_NOTFOUND); } @@ -219,7 +219,7 @@ __cursor_fix_prev(WT_CURSOR_BTREE *cbt, int newpage) /* Initialize for each new page. */ if (newpage) { - cbt->last_standard_recno = __col_last_recno(cbt->page); + cbt->last_standard_recno = __col_fix_last_recno(cbt->page); if (cbt->last_standard_recno == 0) return (WT_NOTFOUND); __cursor_set_recno(cbt, cbt->last_standard_recno); @@ -302,7 +302,7 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, int newpage) /* Initialize for each new page. */ if (newpage) { - cbt->last_standard_recno = __col_last_recno(cbt->page); + cbt->last_standard_recno = __col_var_last_recno(cbt->page); if (cbt->last_standard_recno == 0) return (WT_NOTFOUND); __cursor_set_recno(cbt, cbt->last_standard_recno); @@ -396,13 +396,13 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, int newpage) if (!F_ISSET_ATOMIC(cbt->page, WT_PAGE_BUILD_KEYS)) WT_RET(__wt_row_leaf_keys(session, cbt->page)); - if (cbt->page->entries == 0) + if (cbt->page->pu_row_entries == 0) cbt->ins_head = WT_ROW_INSERT_SMALLEST(cbt->page); else cbt->ins_head = WT_ROW_INSERT_SLOT( - cbt->page, cbt->page->entries - 1); + cbt->page, cbt->page->pu_row_entries - 1); cbt->ins = WT_SKIP_LAST(cbt->ins_head); - cbt->row_iteration_slot = cbt->page->entries * 2 + 1; + cbt->row_iteration_slot = cbt->page->pu_row_entries * 2 + 1; goto new_insert; } diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index 45a43051e48..53c7b03bf7a 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -94,32 +94,41 @@ __cursor_invalid(WT_CURSOR_BTREE *cbt) /* Do we have a position on the page? */ switch (btree->type) { case BTREE_COL_FIX: - if (cbt->recno >= page->u.col_fix.recno + page->entries) + if (cbt->recno >= + page->u.col_fix.recno + page->pu_fix_entries) return (1); break; case BTREE_COL_VAR: + if (cbt->slot > page->pu_var_entries) + return (1); + break; case BTREE_ROW: - if (cbt->slot > page->entries) + if (cbt->slot > page->pu_row_entries) return (1); break; } } - /* The page may be empty, the search routine doesn't check. */ - if (page->entries == 0) - return (1); - - /* Otherwise, check for an update in the page's slots. */ + /* + * Check for empty pages (the page may be empty, the search routine + * doesn't check), otherwise, check for an update in the page's slots. + */ switch (btree->type) { case BTREE_COL_FIX: + if (page->pu_fix_entries == 0) + return (1); break; case BTREE_COL_VAR: + if (page->pu_var_entries == 0) + return (1); cip = &page->u.col_var.d[cbt->slot]; if ((cell = WT_COL_PTR(page, cip)) == NULL || __wt_cell_type(cell) == WT_CELL_DEL) return (1); break; case BTREE_ROW: + if (page->pu_row_entries == 0) + return (1); if (page->u.row.upd != NULL && (upd = __wt_txn_read(session, page->u.row.upd[cbt->slot])) != NULL && WT_UPDATE_DELETED_ISSET(upd)) diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index c5e0f33b290..279e5cfdc8f 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -443,6 +443,7 @@ static int __debug_page_hdr(WT_DBG *ds, WT_PAGE *page) { WT_SESSION_IMPL *session; + uint32_t entries; session = ds->session; @@ -452,15 +453,21 @@ __debug_page_hdr(WT_DBG *ds, WT_PAGE *page) switch (page->type) { case WT_PAGE_COL_INT: __dmsg(ds, " recno %" PRIu64, page->u.intl.recno); + entries = page->pu_intl_entries; break; case WT_PAGE_COL_FIX: __dmsg(ds, " recno %" PRIu64, page->u.col_fix.recno); + entries = page->pu_fix_entries; break; case WT_PAGE_COL_VAR: __dmsg(ds, " recno %" PRIu64, page->u.col_var.recno); + entries = page->pu_var_entries; break; case WT_PAGE_ROW_INT: + entries = page->pu_intl_entries; + break; case WT_PAGE_ROW_LEAF: + entries = page->pu_row_entries; break; WT_ILLEGAL_VALUE(session); } @@ -486,8 +493,7 @@ __debug_page_hdr(WT_DBG *ds, WT_PAGE *page) __dmsg(ds, "\troot"); else __dmsg(ds, "\tparent %p", page->parent); - __dmsg(ds, - ", disk %p, entries %" PRIu32 "\n", page->dsk, page->entries); + __dmsg(ds, ", disk %p, entries %" PRIu32 "\n", page->dsk, entries); return (0); } @@ -589,16 +595,16 @@ __debug_page_col_fix(WT_DBG *ds, WT_PAGE *page) static int __debug_page_col_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags) { - WT_REF *ref; + WT_REF **refp, *ref; uint32_t i; - WT_REF_FOREACH(page, ref, i) { + WT_INTL_FOREACH(page, refp, ref, i) { __dmsg(ds, "\trecno %" PRIu64 "\n", ref->key.recno); WT_RET(__debug_ref(ds, ref, page)); } if (LF_ISSET(WT_DEBUG_TREE_WALK)) - WT_REF_FOREACH(page, ref, i) + WT_INTL_FOREACH(page, refp, ref, i) if (ref->state == WT_REF_MEM) { __dmsg(ds, "\n"); WT_RET(__debug_page(ds, ref->page, flags)); @@ -657,19 +663,19 @@ __debug_page_col_var(WT_DBG *ds, WT_PAGE *page) static int __debug_page_row_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags) { - WT_REF *ref; + WT_REF **refp, *ref; size_t len; uint8_t *p; uint32_t i; - WT_REF_FOREACH(page, ref, i) { + WT_INTL_FOREACH(page, refp, ref, i) { __wt_ref_key(page, ref, &p, &len); __debug_item(ds, "K", p, len); WT_RET(__debug_ref(ds, ref, page)); } if (LF_ISSET(WT_DEBUG_TREE_WALK)) - WT_REF_FOREACH(page, ref, i) + WT_INTL_FOREACH(page, refp, ref, i) if (ref->state == WT_REF_MEM) { __dmsg(ds, "\n"); WT_RET(__debug_page(ds, ref->page, flags)); diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c index 0815fe90ca4..8f6f8a40b04 100644 --- a/src/btree/bt_discard.c +++ b/src/btree/bt_discard.c @@ -129,7 +129,7 @@ __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page) /* Free the insert/update array. */ if (mod->update != NULL) __free_skip_array(session, mod->update, - page->type == WT_PAGE_COL_FIX ? 1 : page->entries); + page->type == WT_PAGE_COL_FIX ? 1 : page->pu_var_entries); /* Free the overflow on-page, reuse and transaction-cache skiplists. */ __wt_ovfl_onpage_discard(session, page); @@ -148,14 +148,14 @@ __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page) static void __free_page_col_int(WT_SESSION_IMPL *session, WT_PAGE *page) { - WT_REF *ref; + WT_REF **refp, *ref; uint32_t i; /* * For each referenced addr, see if the addr was an allocation, and if * so, free it. */ - WT_REF_FOREACH(page, ref, i) + WT_INTL_FOREACH(page, refp, ref, i) if (ref->addr != NULL && __wt_off_page(page, ref->addr)) { __wt_free(session, ((WT_ADDR *)ref->addr)->addr); @@ -182,20 +182,19 @@ static void __free_page_row_int(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_IKEY *ikey; - WT_REF *ref; + WT_REF **refp, *ref; uint32_t i; /* - * Free any allocated keys. - * - * For each referenced addr, see if the addr was an allocation, and if - * so, free it. + * For each WT_REF referenced addr, see if the key or address was an + * allocation, and if so, free it. */ - WT_REF_FOREACH(page, ref, i) { + if (page->pu_intl_index == NULL) + return; + WT_INTL_FOREACH(page, refp, ref, i) { if ((ikey = __wt_ref_key_instantiated(ref)) != NULL) __wt_free(session, ikey); - if (ref->addr != NULL && - __wt_off_page(page, ref->addr)) { + if (ref->addr != NULL && __wt_off_page(page, ref->addr)) { __wt_free(session, ((WT_ADDR *)ref->addr)->addr); __wt_free(session, ref->addr); } @@ -234,11 +233,12 @@ __free_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page) * found on the original page). */ if (page->u.row.ins != NULL) - __free_skip_array(session, page->u.row.ins, page->entries + 1); + __free_skip_array( + session, page->u.row.ins, page->pu_row_entries + 1); /* Free the update array. */ if (page->u.row.upd != NULL) - __free_update(session, page->u.row.upd, page->entries); + __free_update(session, page->u.row.upd, page->pu_row_entries); } /* diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index 9525b12ca90..c43a4680dd5 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -390,17 +390,20 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, int creation) switch (btree->type) { case BTREE_COL_FIX: case BTREE_COL_VAR: - WT_ERR(__wt_page_alloc(session, WT_PAGE_COL_INT, 1, &root)); - root->u.intl.recno = 1; - ref = root->u.intl.t; + WT_ERR(__wt_page_alloc(session, WT_PAGE_COL_INT, 1, 1, &root)); + root->parent = NULL; + + ref = root->pu_intl_index[0]; WT_ERR(__wt_btree_new_leaf_page(session, root, ref, &leaf)); ref->addr = NULL; ref->state = WT_REF_MEM; ref->key.recno = 1; break; case BTREE_ROW: - WT_ERR(__wt_page_alloc(session, WT_PAGE_ROW_INT, 1, &root)); - ref = root->u.intl.t; + WT_ERR(__wt_page_alloc(session, WT_PAGE_ROW_INT, 0, 1, &root)); + root->parent = NULL; + + ref = root->pu_intl_index[0]; WT_ERR(__wt_btree_new_leaf_page(session, root, ref, &leaf)); ref->addr = NULL; ref->state = WT_REF_MEM; @@ -409,8 +412,6 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, int creation) break; WT_ILLEGAL_VALUE_ERR(session); } - root->entries = 1; - root->parent = NULL; /* * Mark the leaf page dirty: we didn't create an entirely valid root @@ -452,36 +453,6 @@ err: if (leaf != NULL) } /* - * __wt_btree_new_modified_page -- - * Create a new in-memory page could be an internal or leaf page. Setup - * the page modify structure. - */ -int -__wt_btree_new_modified_page(WT_SESSION_IMPL *session, - uint8_t type, uint32_t entries, int merge, WT_PAGE **pagep) -{ - WT_DECL_RET; - WT_PAGE *newpage; - - /* Allocate a new page and fill it in. */ - WT_RET(__wt_page_alloc(session, type, entries, &newpage)); - newpage->read_gen = WT_READ_GEN_NOTSET; - newpage->entries = entries; - - WT_ERR(__wt_page_modify_init(session, newpage)); - if (merge) - F_SET(newpage->modify, WT_PM_REC_SPLIT_MERGE); - else - __wt_page_modify_set(session, newpage); - - *pagep = newpage; - return (0); - -err: __wt_page_out(session, &newpage); - return (ret); -} - -/* * __wt_btree_new_leaf_page -- * Create an empty leaf page and link it into a reference in its parent. */ @@ -496,19 +467,16 @@ __wt_btree_new_leaf_page( switch (btree->type) { case BTREE_COL_FIX: - WT_RET(__wt_page_alloc(session, WT_PAGE_COL_FIX, 0, &leaf)); - leaf->u.col_fix.recno = 1; + WT_RET(__wt_page_alloc(session, WT_PAGE_COL_FIX, 1, 0, &leaf)); break; case BTREE_COL_VAR: - WT_RET(__wt_page_alloc(session, WT_PAGE_COL_VAR, 0, &leaf)); - leaf->u.col_var.recno = 1; + WT_RET(__wt_page_alloc(session, WT_PAGE_COL_VAR, 1, 0, &leaf)); break; case BTREE_ROW: - WT_RET(__wt_page_alloc(session, WT_PAGE_ROW_LEAF, 0, &leaf)); + WT_RET(__wt_page_alloc(session, WT_PAGE_ROW_LEAF, 0, 0, &leaf)); break; WT_ILLEGAL_VALUE(session); } - leaf->entries = 0; WT_LINK_PAGE(parent, ref, leaf); *pagep = leaf; @@ -537,7 +505,7 @@ __btree_preload(WT_SESSION_IMPL *session) { WT_BM *bm; WT_BTREE *btree; - WT_REF *ref; + WT_REF **refp, *ref; size_t addr_size; uint32_t i; const uint8_t *addr; @@ -546,7 +514,7 @@ __btree_preload(WT_SESSION_IMPL *session) bm = btree->bm; /* Pre-load the second-level internal pages. */ - WT_REF_FOREACH(btree->root_page, ref, i) { + WT_INTL_FOREACH(btree->root_page, refp, ref, i) { WT_RET(__wt_ref_info(session, btree->root_page, ref, &addr, &addr_size, NULL)); if (addr != NULL) @@ -572,7 +540,9 @@ __btree_get_last_recno(WT_SESSION_IMPL *session) if (page == NULL) return (WT_NOTFOUND); - btree->last_recno = __col_last_recno(page); + btree->last_recno = page->type == WT_PAGE_COL_VAR ? + __col_var_last_recno(page) : __col_fix_last_recno(page); + return (__wt_page_release(session, page)); } diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c index cfc7137b13a..9d52e71e5fa 100644 --- a/src/btree/bt_page.c +++ b/src/btree/bt_page.c @@ -124,10 +124,13 @@ __wt_page_in_func( */ int __wt_page_alloc(WT_SESSION_IMPL *session, - uint8_t type, uint32_t alloc_entries, WT_PAGE **pagep) + uint8_t type, uint64_t recno, uint32_t alloc_entries, WT_PAGE **pagep) { WT_CACHE *cache; + WT_DECL_RET; WT_PAGE *page; + WT_REF **refp; + uint32_t i; size_t size; void *p; @@ -136,12 +139,18 @@ __wt_page_alloc(WT_SESSION_IMPL *session, cache = S2C(session)->cache; /* - * Allocate a page, and for most page types, the additional information - * it needs to describe the disk image. + * Allocate a page, and for most page types, the additional memory it + * needs to describe the disk image. */ size = sizeof(WT_PAGE); switch (type) { case WT_PAGE_COL_FIX: + /* + * The exception is fixed-length column-store, where we don't + * describe the disk image, it's too expensive at N bits per + * item. Ignore the passed-in value, other than setting the + * number of items on the page. + */ break; case WT_PAGE_COL_INT: case WT_PAGE_ROW_INT: @@ -159,17 +168,46 @@ __wt_page_alloc(WT_SESSION_IMPL *session, WT_RET(__wt_calloc(session, 1, size, &page)); p = (uint8_t *)page + sizeof(WT_PAGE); + page->type = type; + page->read_gen = WT_READ_GEN_NOTSET; + switch (type) { case WT_PAGE_COL_FIX: + page->u.col_fix.recno = recno; + page->pu_fix_entries = alloc_entries; break; case WT_PAGE_COL_INT: + page->u.intl.recno = recno; + /* FALLTHROUGH */ case WT_PAGE_ROW_INT: - page->u.intl.t = p; + page->u.intl.__index = p; + + /* + * Internal pages have an array of WT_REF pointers so they can + * split. Allocate and initialize it to point to the first set + * of slots (even though those slots aren't yet initialized). + */ + if ((ret = __wt_calloc(session, alloc_entries, + sizeof(*page->u.intl.index) + + alloc_entries * sizeof(WT_REF *), + &page->u.intl.index)) != 0) { + __wt_free(session, page); + return (ret); + } + size += sizeof(uint32_t) + alloc_entries * sizeof(WT_REF *); + + page->pu_intl_entries = alloc_entries; + for (i = 0, refp = page->pu_intl_index; i < alloc_entries; ++i) + *refp++ = &page->u.intl.__index[i]; + break; case WT_PAGE_COL_VAR: + page->u.col_var.recno = recno; page->u.col_var.d = p; + page->pu_var_entries = alloc_entries; break; case WT_PAGE_ROW_LEAF: + page->pu_row_entries = alloc_entries; page->u.row.d = p; break; WT_ILLEGAL_VALUE(session); @@ -179,9 +217,6 @@ __wt_page_alloc(WT_SESSION_IMPL *session, __wt_cache_page_inmem_incr(session, page, size); (void)WT_ATOMIC_ADD(cache->pages_inmem, 1); - /* The one page field we set is the type. */ - page->type = type; - *pagep = page; return (0); } @@ -200,37 +235,33 @@ __wt_page_inmem( uint32_t alloc_entries; size_t size; - alloc_entries = 0; *pagep = NULL; + alloc_entries = 0; /* - * Figure out how many underlying objects the page references so - * we can allocate them along with the page. + * Figure out how many underlying objects the page references so we can + * allocate them along with the page. */ switch (dsk->type) { case WT_PAGE_COL_FIX: - break; case WT_PAGE_COL_INT: - /* - * Column-store internal page entries map one-to-one to the - * number of physical entries on the page (each physical entry - * is an offset object). - */ - alloc_entries = dsk->u.entries; - break; case WT_PAGE_COL_VAR: /* * Column-store leaf page entries map one-to-one to the number * of physical entries on the page (each physical entry is a * value item). + * + * Column-store internal page entries map one-to-one to the + * number of physical entries on the page (each entry is a + * location cookie). */ alloc_entries = dsk->u.entries; break; case WT_PAGE_ROW_INT: /* * Row-store internal page entries map one-to-two to the number - * of physical entries on the page (each in-memory entry is a - * key item and location cookie). + * of physical entries on the page (each entry is a key and + * location cookie pair). */ alloc_entries = dsk->u.entries / 2; break; @@ -254,9 +285,9 @@ __wt_page_inmem( } /* Allocate and initialize a new WT_PAGE. */ - WT_RET(__wt_page_alloc(session, dsk->type, alloc_entries, &page)); + WT_RET(__wt_page_alloc( + session, dsk->type, dsk->recno, alloc_entries, &page)); page->dsk = dsk; - page->read_gen = WT_READ_GEN_NOTSET; F_SET_ATOMIC(page, flags); /* @@ -267,26 +298,18 @@ __wt_page_inmem( switch (page->type) { case WT_PAGE_COL_FIX: - page->entries = dsk->u.entries; - page->u.col_fix.recno = dsk->recno; __inmem_col_fix(session, page); break; case WT_PAGE_COL_INT: - page->entries = dsk->u.entries; - page->u.intl.recno = dsk->recno; __inmem_col_int(session, page); break; case WT_PAGE_COL_VAR: - page->entries = dsk->u.entries; - page->u.col_var.recno = dsk->recno; WT_ERR(__inmem_col_var(session, page, &size)); break; case WT_PAGE_ROW_INT: - page->entries = dsk->u.entries / 2; WT_ERR(__inmem_row_int(session, page, &size)); break; case WT_PAGE_ROW_LEAF: - page->entries = alloc_entries; WT_ERR(__inmem_row_leaf(session, page)); break; WT_ILLEGAL_VALUE_ERR(session); @@ -344,7 +367,7 @@ __inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page) * Walk the page, building references: the page contains value items. * The value items are on-page items (WT_CELL_VALUE). */ - ref = page->u.intl.t; + ref = page->u.intl.__index; WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { __wt_cell_unpack(cell, unpack); ref->addr = cell; @@ -466,14 +489,14 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep) unpack = &_unpack; dsk = page->dsk; - WT_ERR(__wt_scr_alloc(session, 0, ¤t)); + WT_RET(__wt_scr_alloc(session, 0, ¤t)); /* * Walk the page, instantiating keys: the page contains sorted key and * location cookie pairs. Keys are on-page/overflow items and location * cookies are WT_CELL_ADDR_XXX items. */ - ref = page->u.intl.t; + ref = page->u.intl.__index; WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { __wt_cell_unpack(cell, unpack); switch (unpack->type) { diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index 82e972a5541..7a27cc422c6 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -35,11 +35,11 @@ __cache_read_row_deleted( } /* Allocate the update array. */ - WT_RET(__wt_calloc_def(session, page->entries, &upd_array)); + WT_RET(__wt_calloc_def(session, page->pu_row_entries, &upd_array)); page->u.row.upd = upd_array; /* Fill in the update array with deleted items. */ - for (i = 0; i < page->entries; ++i) { + for (i = 0; i < page->pu_row_entries; ++i) { WT_RET(__wt_calloc_def(session, 1, &upd)); upd->next = upd_array[i]; upd_array[i] = upd; @@ -49,7 +49,7 @@ __cache_read_row_deleted( } __wt_cache_page_inmem_incr(session, page, - page->entries * (sizeof(WT_UPDATE *) + sizeof(WT_UPDATE))); + page->pu_row_entries * (sizeof(WT_UPDATE *) + sizeof(WT_UPDATE))); return (0); } diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c index daa57d5e935..2d2add6ffff 100644 --- a/src/btree/bt_slvg.c +++ b/src/btree/bt_slvg.c @@ -534,8 +534,8 @@ __slvg_trk_leaf(WT_SESSION_IMPL *session, WT_ERR(__wt_page_inmem(session, NULL, NULL, dsk, 0, &page)); WT_ERR(__wt_row_leaf_key_copy(session, page, &page->u.row.d[0], &trk->row_start)); - WT_ERR(__wt_row_leaf_key_copy(session, - page, &page->u.row.d[page->entries - 1], &trk->row_stop)); + WT_ERR(__wt_row_leaf_key_copy(session, page, + &page->u.row.d[page->pu_row_entries - 1], &trk->row_stop)); if (WT_VERBOSE_ISSET(session, salvage)) { WT_ERR(__wt_buf_set_printable(session, ss->tmp1, @@ -1077,14 +1077,11 @@ __slvg_col_build_internal( addr = NULL; /* Allocate a column-store root (internal) page and fill it in. */ - WT_RET(__wt_page_alloc(session, WT_PAGE_COL_INT, leaf_cnt, &page)); + WT_RET(__wt_page_alloc(session, WT_PAGE_COL_INT, 1, leaf_cnt, &page)); page->parent = NULL; /* Root page */ - page->read_gen = WT_READ_GEN_NOTSET; - page->u.intl.recno = 1; - page->entries = leaf_cnt; WT_ERR(__slvg_modify_init(session, page)); - for (ref = page->u.intl.t, i = 0; i < ss->pages_next; ++i) { + for (ref = page->u.intl.__index, i = 0; i < ss->pages_next; ++i) { if ((trk = ss->pages[i]) == NULL) continue; @@ -1140,7 +1137,7 @@ __slvg_col_build_leaf( WT_PAGE *page; WT_SALVAGE_COOKIE *cookie, _cookie; uint64_t skip, take; - uint32_t save_entries; + uint32_t *entriesp, save_entries; cookie = &_cookie; WT_CLEAR(*cookie); @@ -1148,8 +1145,12 @@ __slvg_col_build_leaf( /* Get the original page, including the full in-memory setup. */ WT_RET(__wt_page_in(session, parent, ref)); page = ref->page; + + entriesp = page->type == WT_PAGE_COL_VAR ? + &page->pu_var_entries : &page->pu_fix_entries; + save_col_var = page->u.col_var.d; - save_entries = page->entries; + save_entries = *entriesp; /* * Calculate the number of K/V entries we are going to skip, and @@ -1210,7 +1211,7 @@ __slvg_col_build_leaf( /* Reset the page. */ page->u.col_var.d = save_col_var; - page->entries = save_entries; + *entriesp = save_entries; ret = __wt_page_release(session, page); if (ret == 0) @@ -1657,13 +1658,11 @@ __slvg_row_build_internal( addr = NULL; /* Allocate a row-store root (internal) page and fill it in. */ - WT_RET(__wt_page_alloc(session, WT_PAGE_ROW_INT, leaf_cnt, &page)); + WT_RET(__wt_page_alloc(session, WT_PAGE_ROW_INT, 0, leaf_cnt, &page)); page->parent = NULL; - page->read_gen = WT_READ_GEN_NOTSET; - page->entries = leaf_cnt; WT_ERR(__slvg_modify_init(session, page)); - for (ref = page->u.intl.t, i = 0; i < ss->pages_next; ++i) { + for (ref = page->u.intl.__index, i = 0; i < ss->pages_next; ++i) { if ((trk = ss->pages[i]) == NULL) continue; @@ -1808,7 +1807,7 @@ __slvg_row_build_leaf(WT_SESSION_IMPL *session, /* We should have selected some entries, but not the entire page. */ WT_ASSERT(session, skip_start + skip_stop > 0 && - skip_start + skip_stop < page->entries); + skip_start + skip_stop < page->pu_row_entries); /* * Take a copy of this page's first key to define the start of @@ -1825,8 +1824,8 @@ __slvg_row_build_leaf(WT_SESSION_IMPL *session, * reference overflow pages. */ WT_ERR(__slvg_row_merge_ovfl(session, trk, page, 0, skip_start)); - WT_ERR(__slvg_row_merge_ovfl( - session, trk, page, page->entries - skip_stop, page->entries)); + WT_ERR(__slvg_row_merge_ovfl(session, trk, page, + page->pu_row_entries - skip_stop, page->pu_row_entries)); /* * If we take all of the keys, we don't write the page and we clear the @@ -1843,7 +1842,7 @@ __slvg_row_build_leaf(WT_SESSION_IMPL *session, * is no need to copy anything on the page itself, the entries * value limits the number of page items. */ - page->entries -= skip_stop; + page->pu_row_entries -= skip_stop; cookie->skip = skip_start; /* @@ -1866,7 +1865,7 @@ __slvg_row_build_leaf(WT_SESSION_IMPL *session, session, page, cookie, WT_SKIP_UPDATE_ERR)); /* Reset the page. */ - page->entries += skip_stop; + page->pu_row_entries += skip_stop; } /* diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c index 266f1ec8031..bffd83282c8 100644 --- a/src/btree/bt_stat.c +++ b/src/btree/bt_stat.c @@ -61,11 +61,11 @@ __stat_page(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS *stats) switch (page->type) { case WT_PAGE_COL_FIX: WT_STAT_INCR(stats, btree_column_fix); - WT_STAT_INCRV(stats, btree_entries, page->entries); + WT_STAT_INCRV(stats, btree_entries, page->pu_fix_entries); break; case WT_PAGE_COL_INT: WT_STAT_INCR(stats, btree_column_internal); - WT_STAT_INCRV(stats, btree_entries, page->entries); + WT_STAT_INCRV(stats, btree_entries, page->pu_intl_entries); break; case WT_PAGE_COL_VAR: WT_RET(__stat_page_col_var(page, stats)); @@ -75,7 +75,7 @@ __stat_page(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS *stats) break; case WT_PAGE_ROW_INT: WT_STAT_INCR(stats, btree_row_internal); - WT_STAT_INCRV(stats, btree_entries, page->entries); + WT_STAT_INCRV(stats, btree_entries, page->pu_intl_entries); break; case WT_PAGE_ROW_LEAF: WT_RET(__stat_page_row_leaf(page, stats)); diff --git a/src/btree/bt_vrfy.c b/src/btree/bt_vrfy.c index 421cdb79e68..574c703edf8 100644 --- a/src/btree/bt_vrfy.c +++ b/src/btree/bt_vrfy.c @@ -221,7 +221,7 @@ __verify_tree(WT_SESSION_IMPL *session, WT_PAGE *page, WT_VSTUFF *vs) WT_CELL_UNPACK *unpack, _unpack; WT_COL *cip; WT_DECL_RET; - WT_REF *ref; + WT_REF **refp, *ref; uint64_t recno; uint32_t entry, i; int found; @@ -297,7 +297,7 @@ recno_chk: if (recno != vs->record_total + 1) } switch (page->type) { case WT_PAGE_COL_FIX: - vs->record_total += page->entries; + vs->record_total += page->pu_fix_entries; break; case WT_PAGE_COL_VAR: recno = 0; @@ -389,7 +389,7 @@ celltype_err: WT_RET_MSG(session, WT_ERROR, case WT_PAGE_COL_INT: /* For each entry in an internal page, verify the subtree. */ entry = 0; - WT_REF_FOREACH(page, ref, i) { + WT_INTL_FOREACH(page, refp, ref, i) { /* * It's a depth-first traversal: this entry's starting * record number should be 1 more than the total records @@ -423,7 +423,7 @@ celltype_err: WT_RET_MSG(session, WT_ERROR, case WT_PAGE_ROW_INT: /* For each entry in an internal page, verify the subtree. */ entry = 0; - WT_REF_FOREACH(page, ref, i) { + WT_INTL_FOREACH(page, refp, ref, i) { /* * It's a depth-first traversal: this entry's starting * key should be larger than the largest key previously @@ -509,7 +509,7 @@ __verify_row_leaf_key_order( * If a tree is empty (just created), it won't have keys; if there * are no keys, we're done. */ - if (page->entries == 0) + if (page->pu_row_entries == 0) return (0); /* @@ -543,8 +543,8 @@ __verify_row_leaf_key_order( } /* Update the largest key we've seen to the last key on this page. */ - WT_RET(__wt_row_leaf_key_copy(session, - page, page->u.row.d + (page->entries - 1), vs->max_key)); + WT_RET(__wt_row_leaf_key_copy(session, page, + page->u.row.d + (page->pu_row_entries - 1), vs->max_key)); (void)__wt_page_addr_string(session, vs->max_addr, page); return (0); diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c index a7692b9fd18..f7b62b53596 100644 --- a/src/btree/bt_walk.c +++ b/src/btree/bt_walk.c @@ -162,7 +162,7 @@ __wt_tree_walk(WT_SESSION_IMPL *session, WT_PAGE **pagep, uint32_t flags) WT_BTREE *btree; WT_PAGE *couple, *page; WT_REF *ref; - uint32_t slot; + uint32_t page_entries, slot; int cache, compact, discard, eviction, prev, set_read_gen; int skip, skip_intl, skip_leaf; @@ -221,7 +221,8 @@ __wt_tree_walk(WT_SESSION_IMPL *session, WT_PAGE **pagep, uint32_t flags) if (page == NULL) { if ((page = btree->root_page) == NULL) return (0); - slot = prev ? page->entries - 1 : 0; + page_entries = page->pu_intl_entries; + slot = prev ? page_entries - 1 : 0; goto descend; } @@ -236,9 +237,10 @@ ascend: /* * Figure out the current slot in the parent page's WT_REF array and * switch to the parent. */ - ref = __wt_page_ref(session, page); - slot = (uint32_t)(ref - page->parent->u.intl.t); + WT_RET(__wt_page_refp(session, page, &slot)); + ref = page->parent->pu_intl_index[slot]; page = page->parent; + page_entries = page->pu_intl_entries; /* If the eviction thread, clear the page's walk status. * @@ -259,7 +261,7 @@ ascend: /* * next/prev slot and left/right-most element in its subtree. */ if ((prev && slot == 0) || - (!prev && slot == page->entries - 1)) { + (!prev && slot == page_entries - 1)) { /* Optionally skip internal pages. */ if (skip_intl) goto ascend; @@ -295,7 +297,7 @@ ascend: /* descend: for (;;) { if (page->type == WT_PAGE_ROW_INT || page->type == WT_PAGE_COL_INT) - ref = &page->u.intl.t[slot]; + ref = page->pu_intl_index[slot]; else if (skip_leaf) goto ascend; else { @@ -418,7 +420,8 @@ retry: if (ref->state != WT_REF_MEM || } couple = page = ref->page; - slot = prev ? page->entries - 1 : 0; + page_entries = __wt_page_entries(page); + slot = prev ? page_entries - 1 : 0; } } /* NOTREACHED */ diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c index 97dbc679995..7d18f3ae137 100644 --- a/src/btree/col_modify.c +++ b/src/btree/col_modify.c @@ -51,7 +51,9 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int is_remove) * append list, not the update list. In addition, a recno of * 0 implies an append operation, we're allocating a new row. */ - if (recno == 0 || recno > __col_last_recno(page)) + if (recno == 0 || + recno > (btree->type == BTREE_COL_VAR ? + __col_var_last_recno(page) : __col_fix_last_recno(page))) append = 1; } @@ -102,8 +104,9 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int is_remove) session, page, page->modify->update, ins_headp, 1); ins_headp = &page->modify->update[0]; } else { - WT_PAGE_ALLOC_AND_SWAP(session, page, - page->modify->update, ins_headp, page->entries); + WT_PAGE_ALLOC_AND_SWAP( + session, page, page->modify->update, + ins_headp, page->pu_var_entries); ins_headp = &page->modify->update[cbt->slot]; } diff --git a/src/btree/col_srch.c b/src/btree/col_srch.c index b7af547b756..56589929f59 100644 --- a/src/btree/col_srch.c +++ b/src/btree/col_srch.c @@ -34,20 +34,21 @@ __wt_col_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) /* Search the internal pages of the tree. */ for (depth = 2, page = btree->root_page; page->type == WT_PAGE_COL_INT; ++depth) { - WT_ASSERT(session, ref == NULL || - ref->key.recno == page->u.intl.recno); + WT_ASSERT(session, + ref == NULL || ref->key.recno == page->u.intl.recno); /* Fast path appends. */ - base = page->entries; - ref = &page->u.intl.t[base - 1]; + base = page->pu_intl_entries; + ref = page->pu_intl_index[base - 1]; if (recno >= ref->key.recno) goto descend; /* Binary search of internal pages. */ for (base = 0, ref = NULL, - limit = page->entries - 1; limit != 0; limit >>= 1) { + limit = page->pu_intl_entries - 1; + limit != 0; limit >>= 1) { indx = base + (limit >> 1); - ref = page->u.intl.t + indx; + ref = page->pu_intl_index[indx]; if (recno == ref->key.recno) break; @@ -73,7 +74,7 @@ descend: WT_ASSERT(session, ref != NULL); * starting recno. */ WT_ASSERT(session, base > 0); - ref = page->u.intl.t + (base - 1); + ref = page->pu_intl_index[base - 1]; } /* @@ -101,14 +102,15 @@ descend: WT_ASSERT(session, ref != NULL); * we arrive here with a record that's impossibly large for the page. */ if (page->type == WT_PAGE_COL_FIX) { - if (recno >= page->u.col_fix.recno + page->entries) { - cbt->recno = page->u.col_fix.recno + page->entries; + if (recno >= page->u.col_fix.recno + page->pu_fix_entries) { + cbt->recno = + page->u.col_fix.recno + page->pu_fix_entries; goto past_end; } else ins_head = WT_COL_UPDATE_SINGLE(page); } else if ((cip = __col_var_search(page, recno)) == NULL) { - cbt->recno = __col_last_recno(page); + cbt->recno = __col_var_last_recno(page); goto past_end; } else { cbt->slot = WT_COL_SLOT(page, cip); diff --git a/src/btree/rec_evict.c b/src/btree/rec_evict.c index 995b219de8c..d3a0d0805bb 100644 --- a/src/btree/rec_evict.c +++ b/src/btree/rec_evict.c @@ -253,14 +253,14 @@ __rec_page_dirty_update( static void __rec_discard_tree(WT_SESSION_IMPL *session, WT_PAGE *page, int exclusive) { - WT_REF *ref; + WT_REF **refp, *ref; uint32_t i; switch (page->type) { case WT_PAGE_COL_INT: case WT_PAGE_ROW_INT: /* For each entry in the page... */ - WT_REF_FOREACH(page, ref, i) { + WT_INTL_FOREACH(page, refp, ref, i) { if (ref->state == WT_REF_DISK || ref->state == WT_REF_DELETED) continue; @@ -294,6 +294,7 @@ __rec_review(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page, WT_DECL_RET; WT_PAGE_MODIFY *mod; WT_PAGE *t; + WT_REF **refp; uint32_t i; btree = S2BT(session); @@ -321,7 +322,7 @@ __rec_review(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page, * pages after we've written them. */ if (page->type == WT_PAGE_COL_INT || page->type == WT_PAGE_ROW_INT) - WT_REF_FOREACH(page, ref, i) + WT_INTL_FOREACH(page, refp, ref, i) switch (ref->state) { case WT_REF_DISK: /* On-disk */ case WT_REF_DELETED: /* On-disk, deleted */ @@ -340,6 +341,7 @@ __rec_review(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page, case WT_REF_LOCKED: /* Being evicted */ case WT_REF_READING: /* Being read */ return (EBUSY); + WT_ILLEGAL_VALUE(session); } /* @@ -442,6 +444,7 @@ ckpt: WT_STAT_FAST_CONN_INCR(session, cache_eviction_checkpoint); */ if (__wt_page_is_modified(page) && !F_ISSET(mod, WT_PM_REC_SPLIT_MERGE)) { +#if 0 /* * If the page is larger than the maximum allowed, attempt to * split the page in memory before evicting it. The in-memory @@ -459,6 +462,7 @@ ckpt: WT_STAT_FAST_CONN_INCR(session, cache_eviction_checkpoint); *inmem_split = 1; return (0); } +#endif ret = __wt_rec_write(session, page, NULL, WT_EVICTION_SERVER_LOCKED | WT_SKIP_UPDATE_QUIT); diff --git a/src/btree/rec_merge.c b/src/btree/rec_merge.c index 15f5e643713..3383bcdb61c 100644 --- a/src/btree/rec_merge.c +++ b/src/btree/rec_merge.c @@ -9,6 +9,36 @@ #if 0 /* + * __wt_btree_new_modified_page -- + * Create a new in-memory page could be an internal or leaf page. Setup + * the page modify structure. + */ +int +__wt_btree_new_modified_page(WT_SESSION_IMPL *session, + uint8_t type, uint32_t entries, int merge, WT_PAGE **pagep) +{ + WT_DECL_RET; + WT_PAGE *newpage; + + /* Allocate a new page and fill it in. */ + WT_RET(__wt_page_alloc(session, type, entries, &newpage)); + newpage->read_gen = WT_READ_GEN_NOTSET; + newpage->entries = entries; + + WT_ERR(__wt_page_modify_init(session, newpage)); + if (merge) + F_SET(newpage->modify, WT_PM_REC_SPLIT_MERGE); + else + __wt_page_modify_set(session, newpage); + + *pagep = newpage; + return (0); + +err: __wt_page_out(session, &newpage); + return (ret); +} + +/* * WT_VISIT_STATE -- * The state maintained across calls to the "visit" callback functions: * the number of refs visited, the maximum depth, and the current page and diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c index 244b0d5c132..0681c43bc0e 100644 --- a/src/btree/rec_write.c +++ b/src/btree/rec_write.c @@ -2358,7 +2358,7 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_KV *val; WT_CELL_UNPACK *unpack, _unpack; WT_PAGE *rp; - WT_REF *ref; + WT_REF **refp, *ref; uint32_t i; int state; @@ -2368,7 +2368,7 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) unpack = &_unpack; /* For each entry in the page... */ - WT_REF_FOREACH(page, ref, i) { + WT_INTL_FOREACH(page, refp, ref, i) { /* Update the starting record number in case we split. */ r->recno = ref->key.recno; @@ -2469,11 +2469,11 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) /* Copy the updated, disk-image bytes into place. */ memcpy(r->first_free, page->u.col_fix.bitf, - __bitstr_size((size_t)page->entries * btree->bitcnt)); + __bitstr_size((size_t)page->pu_fix_entries * btree->bitcnt)); /* Calculate the number of entries per page remainder. */ - entry = page->entries; - nrecs = WT_FIX_ENTRIES(btree, r->space_avail) - page->entries; + entry = page->pu_fix_entries; + nrecs = WT_FIX_ENTRIES(btree, r->space_avail) - page->pu_fix_entries; r->recno += entry; /* Walk any append list. */ @@ -2556,7 +2556,7 @@ __rec_col_fix_slvg(WT_SESSION_IMPL *session, page, page->u.col_fix.recno, btree->maxleafpage)); /* We may not be taking all of the entries on the original page. */ - page_take = salvage->take == 0 ? page->entries : salvage->take; + page_take = salvage->take == 0 ? page->pu_fix_entries : salvage->take; page_start = salvage->skip == 0 ? 0 : salvage->skip; for (;;) { /* Calculate the number of entries per page. */ @@ -3035,7 +3035,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_IKEY *ikey; WT_KV *key, *val; WT_PAGE *rp; - WT_REF *ref; + WT_REF **refp, *ref; size_t size; uint32_t i; u_int vtype; @@ -3072,7 +3072,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) r->cell_zero = 1; /* For each entry in the in-memory page... */ - WT_REF_FOREACH(page, ref, i) { + WT_INTL_FOREACH(page, refp, ref, i) { /* * There are different paths if the key is an overflow item vs. * a straight-forward on-page value. If an overflow item, we @@ -3261,7 +3261,7 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_CELL_UNPACK *vpack, _vpack; WT_KV *key, *val; WT_PAGE *rp; - WT_REF *ref; + WT_REF **refp, *ref; size_t size; uint32_t i; u_int vtype; @@ -3275,7 +3275,7 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) vpack = &_vpack; /* For each entry in the in-memory page... */ - WT_REF_FOREACH(page, ref, i) { + WT_INTL_FOREACH(page, refp, ref, i) { vtype = 0; addr = ref->addr; rp = ref->page; @@ -3805,7 +3805,7 @@ __rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_BM *bm; WT_PAGE_MODIFY *mod; - WT_REF *ref; + WT_REF **refp, *ref; uint32_t i; bm = S2BT(session)->bm; @@ -3820,7 +3820,7 @@ __rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page) * root splits. In the case of root splits, we potentially have to * cope with the underlying sets of multiple pages. */ - WT_REF_FOREACH(page, ref, i) + WT_INTL_FOREACH(page, refp, ref, i) WT_RET(bm->free(bm, session, ((WT_ADDR *)ref->addr)->addr, ((WT_ADDR *)ref->addr)->size)); @@ -4147,14 +4147,11 @@ __rec_split_merge_new(WT_SESSION_IMPL *session, * Our caller cleans up, make sure we return a valid page reference, * even on error. */ - WT_RET(__wt_page_alloc(session, type, r->bnd_next, pagep)); + WT_RET(__wt_page_alloc(session, type, + type == WT_PAGE_COL_INT ? r->bnd[0].recno : 0, r->bnd_next, pagep)); page = *pagep; page->parent = orig->parent; page->ref_hint = orig->ref_hint; - if (type == WT_PAGE_COL_INT) - page->u.intl.recno = r->bnd[0].recno; - page->read_gen = WT_READ_GEN_NOTSET; - page->entries = r->bnd_next; /* * We don't re-write parent pages when child pages split, which means @@ -4249,13 +4246,13 @@ __rec_split_row( /* Enter each split child page into the new internal page. */ size = 0; - for (ref = page->u.intl.t, - bnd = r->bnd, i = 0; i < r->bnd_next; ++ref, ++bnd, ++i) { + for (bnd = r->bnd, i = 0; i < r->bnd_next; ++bnd, ++i) { WT_ERR(__wt_calloc(session, 1, sizeof(WT_ADDR), &addr)); *addr = bnd->addr; bnd->addr.addr = NULL; size += bnd->addr.size; + ref = page->pu_intl_index[i]; ref->page = NULL; WT_ERR(__wt_row_ikey(session, 0, bnd->key.data, bnd->key.size, &ref->key.ikey)); @@ -4296,12 +4293,12 @@ __rec_split_col( WT_ERR(__rec_split_merge_new(session, r, orig, &page, WT_PAGE_COL_INT)); /* Enter each split child page into the new internal page. */ - for (ref = page->u.intl.t, - bnd = r->bnd, i = 0; i < r->bnd_next; ++ref, ++bnd, ++i) { + for (bnd = r->bnd, i = 0; i < r->bnd_next; ++bnd, ++i) { WT_ERR(__wt_calloc(session, 1, sizeof(WT_ADDR), &addr)); *addr= bnd->addr; bnd->addr.addr = NULL; + ref = page->pu_intl_index[i]; ref->page = NULL; ref->key.recno = bnd->recno; ref->addr = addr; diff --git a/src/btree/row_key.c b/src/btree/row_key.c index 6eef0ce2189..c79908ff2cd 100644 --- a/src/btree/row_key.c +++ b/src/btree/row_key.c @@ -24,7 +24,7 @@ __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) btree = S2BT(session); - if (page->entries == 0) { /* Just checking... */ + if (page->pu_row_entries == 0) { /* Just checking... */ F_SET_ATOMIC(page, WT_PAGE_BUILD_KEYS); return (0); } @@ -48,14 +48,14 @@ __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) * marking up the array. */ WT_RET(__wt_scr_alloc( - session, (uint32_t)__bitstr_size(page->entries), &tmp)); + session, (uint32_t)__bitstr_size(page->pu_row_entries), &tmp)); if ((gap = btree->key_gap) == 0) gap = 1; - __inmem_row_leaf_slots(tmp->mem, 0, page->entries, gap); + __inmem_row_leaf_slots(tmp->mem, 0, page->pu_row_entries, gap); /* Instantiate the keys. */ - for (rip = page->u.row.d, i = 0; i < page->entries; ++rip, ++i) + for (rip = page->u.row.d, i = 0; i < page->pu_row_entries; ++rip, ++i) if (__bit_test(tmp->mem, i)) WT_ERR(__wt_row_leaf_key_work( session, page, rip, NULL, 1)); diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c index 525b682bde7..8579af76207 100644 --- a/src/btree/row_modify.c +++ b/src/btree/row_modify.c @@ -50,7 +50,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int is_remove) if (cbt->ins == NULL) { /* Allocate an update array as necessary. */ WT_PAGE_ALLOC_AND_SWAP(session, page, - page->u.row.upd, upd_entry, page->entries); + page->u.row.upd, upd_entry, page->pu_row_entries); /* Set the WT_UPDATE array reference. */ upd_entry = &page->u.row.upd[cbt->slot]; @@ -88,10 +88,10 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int is_remove) * slot. That's hard, so we set a flag. */ WT_PAGE_ALLOC_AND_SWAP(session, page, - page->u.row.ins, ins_headp, page->entries + 1); + page->u.row.ins, ins_headp, page->pu_row_entries + 1); ins_slot = F_ISSET(cbt, WT_CBT_SEARCH_SMALLEST) ? - page->entries : cbt->slot; + page->pu_row_entries: cbt->slot; ins_headp = &page->u.row.ins[ins_slot]; /* Allocate the WT_INSERT_HEAD structure as necessary. */ diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c index 052bcff67fb..f044aed6d04 100644 --- a/src/btree/row_srch.c +++ b/src/btree/row_srch.c @@ -153,8 +153,8 @@ __wt_row_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) * Fast-path internal pages with one child, a common case for * the root page in new trees. */ - base = page->entries; - ref = &page->u.intl.t[base - 1]; + base = page->pu_intl_entries; + ref = page->pu_intl_index[base - 1]; if (base == 1) goto descend; @@ -173,11 +173,11 @@ __wt_row_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) */ base = 0; ref = NULL; - limit = page->entries - 1; + limit = page->pu_intl_entries - 1; if (btree->collator == NULL) for (; limit != 0; limit >>= 1) { indx = base + (limit >> 1); - ref = page->u.intl.t + indx; + ref = page->pu_intl_index[indx]; /* * If about to compare an application key with @@ -207,7 +207,7 @@ __wt_row_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) else for (; limit != 0; limit >>= 1) { indx = base + (limit >> 1); - ref = page->u.intl.t + indx; + ref = page->pu_intl_index[indx]; /* * If about to compare an application key with * the 0th index on an internal page, pretend @@ -242,7 +242,7 @@ descend: WT_ASSERT(session, ref != NULL); * for descent is the one before base. */ if (cmp != 0) - ref = page->u.intl.t + (base - 1); + ref = page->pu_intl_index[base - 1]; /* * Swap the parent page for the child page; return on error, @@ -274,7 +274,7 @@ descend: WT_ASSERT(session, ref != NULL); */ cmp = -1; base = 0; - limit = page->entries; + limit = page->pu_row_entries; if (btree->collator == NULL) for (; limit != 0; limit >>= 1) { indx = base + (limit >> 1); @@ -405,7 +405,8 @@ __wt_row_random(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) /* Walk the internal pages of the tree. */ for (page = btree->root_page; page->type == WT_PAGE_ROW_INT;) { - ref = page->u.intl.t + __wt_random() % page->entries; + ref = + page->pu_intl_index[__wt_random() % page->pu_intl_entries]; /* * Swap the parent page for the child page; return on error, @@ -415,7 +416,7 @@ __wt_row_random(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) page = ref->page; } - if (page->entries != 0) { + if (page->pu_row_entries != 0) { /* * The use case for this call is finding a place to split the * tree. Cheat (it's not like this is "random", anyway), and @@ -427,8 +428,8 @@ __wt_row_random(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) cbt->page = page; cbt->compare = 0; cbt->slot = - btree->root_page->entries < 2 ? - __wt_random() % page->entries : 0; + btree->root_page->pu_intl_entries < 2 ? + __wt_random() % page->pu_row_entries : 0; return (0); } |