summaryrefslogtreecommitdiff
path: root/src/btree
diff options
context:
space:
mode:
authorKeith Bostic <keith@wiredtiger.com>2014-02-05 15:45:54 -0500
committerKeith Bostic <keith@wiredtiger.com>2014-02-05 15:45:54 -0500
commit4adabd868acdd540eb93b9bdafcd2394c73dc99e (patch)
treed36e65ffc598798eedf99b5657b545729e1185df /src/btree
parent50d9147a965c0ece7786b53d55bbe1149940136c (diff)
downloadmongo-4adabd868acdd540eb93b9bdafcd2394c73dc99e.tar.gz
Add a new level of indirection on internal pages so there's a way to
split them by atomically replacing the WT_REF array with a new one.
Diffstat (limited to 'src/btree')
-rw-r--r--src/btree/bt_bulk.c2
-rw-r--r--src/btree/bt_curnext.c10
-rw-r--r--src/btree/bt_curprev.c12
-rw-r--r--src/btree/bt_cursor.c23
-rw-r--r--src/btree/bt_debug.c22
-rw-r--r--src/btree/bt_discard.c26
-rw-r--r--src/btree/bt_handle.c62
-rw-r--r--src/btree/bt_page.c89
-rw-r--r--src/btree/bt_read.c6
-rw-r--r--src/btree/bt_slvg.c37
-rw-r--r--src/btree/bt_stat.c6
-rw-r--r--src/btree/bt_vrfy.c14
-rw-r--r--src/btree/bt_walk.c17
-rw-r--r--src/btree/col_modify.c9
-rw-r--r--src/btree/col_srch.c22
-rw-r--r--src/btree/rec_evict.c10
-rw-r--r--src/btree/rec_merge.c30
-rw-r--r--src/btree/rec_write.c39
-rw-r--r--src/btree/row_key.c8
-rw-r--r--src/btree/row_modify.c6
-rw-r--r--src/btree/row_srch.c23
21 files changed, 261 insertions, 212 deletions
diff --git a/src/btree/bt_bulk.c b/src/btree/bt_bulk.c
index b66f84f5a0c..70457523086 100644
--- a/src/btree/bt_bulk.c
+++ b/src/btree/bt_bulk.c
@@ -31,7 +31,7 @@ __wt_bulk_init(WT_CURSOR_BULK *cbulk)
"bulk-load is only possible for newly created trees");
/* Set a reference to the empty leaf page. */
- cbulk->leaf = btree->root_page->u.intl.t->page;
+ cbulk->leaf = btree->root_page->pu_intl_index[0]->page;
WT_RET(__wt_rec_bulk_init(cbulk));
diff --git a/src/btree/bt_curnext.c b/src/btree/bt_curnext.c
index 7f8e04b43e9..b6313535e84 100644
--- a/src/btree/bt_curnext.c
+++ b/src/btree/bt_curnext.c
@@ -84,7 +84,7 @@ __cursor_fix_next(WT_CURSOR_BTREE *cbt, int newpage)
/* Initialize for each new page. */
if (newpage) {
- cbt->last_standard_recno = __col_last_recno(cbt->page);
+ cbt->last_standard_recno = __col_fix_last_recno(cbt->page);
if (cbt->last_standard_recno == 0)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, cbt->page->u.col_fix.recno);
@@ -167,7 +167,7 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, int newpage)
/* Initialize for each new page. */
if (newpage) {
- cbt->last_standard_recno = __col_last_recno(cbt->page);
+ cbt->last_standard_recno = __col_var_last_recno(cbt->page);
if (cbt->last_standard_recno == 0)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, cbt->page->u.col_var.recno);
@@ -281,7 +281,8 @@ new_insert: if ((ins = cbt->ins) != NULL) {
}
/* Check for the end of the page. */
- if (cbt->row_iteration_slot >= cbt->page->entries * 2 + 1)
+ if (cbt->row_iteration_slot >=
+ cbt->page->pu_row_entries * 2 + 1)
return (WT_NOTFOUND);
++cbt->row_iteration_slot;
@@ -357,7 +358,8 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt, int next)
* For column-store pages, calculate the largest record on the
* page.
*/
- cbt->last_standard_recno = __col_last_recno(page);
+ cbt->last_standard_recno = page->type == WT_PAGE_COL_VAR ?
+ __col_var_last_recno(page) : __col_fix_last_recno(page);
/* If we're traversing the append list, set the reference. */
if (cbt->ins_head != NULL &&
diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c
index cdfefb5bd65..d2d3091f82a 100644
--- a/src/btree/bt_curprev.c
+++ b/src/btree/bt_curprev.c
@@ -163,7 +163,7 @@ __cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, int newpage)
cbt->recno <= WT_INSERT_RECNO(cbt->ins))
WT_RET(__cursor_skip_prev(cbt));
if (cbt->ins == NULL &&
- (cbt->recno == 1 || __col_last_recno(cbt->page) != 0))
+ (cbt->recno == 1 || __col_fix_last_recno(cbt->page) != 0))
return (WT_NOTFOUND);
}
@@ -219,7 +219,7 @@ __cursor_fix_prev(WT_CURSOR_BTREE *cbt, int newpage)
/* Initialize for each new page. */
if (newpage) {
- cbt->last_standard_recno = __col_last_recno(cbt->page);
+ cbt->last_standard_recno = __col_fix_last_recno(cbt->page);
if (cbt->last_standard_recno == 0)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, cbt->last_standard_recno);
@@ -302,7 +302,7 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, int newpage)
/* Initialize for each new page. */
if (newpage) {
- cbt->last_standard_recno = __col_last_recno(cbt->page);
+ cbt->last_standard_recno = __col_var_last_recno(cbt->page);
if (cbt->last_standard_recno == 0)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, cbt->last_standard_recno);
@@ -396,13 +396,13 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, int newpage)
if (!F_ISSET_ATOMIC(cbt->page, WT_PAGE_BUILD_KEYS))
WT_RET(__wt_row_leaf_keys(session, cbt->page));
- if (cbt->page->entries == 0)
+ if (cbt->page->pu_row_entries == 0)
cbt->ins_head = WT_ROW_INSERT_SMALLEST(cbt->page);
else
cbt->ins_head = WT_ROW_INSERT_SLOT(
- cbt->page, cbt->page->entries - 1);
+ cbt->page, cbt->page->pu_row_entries - 1);
cbt->ins = WT_SKIP_LAST(cbt->ins_head);
- cbt->row_iteration_slot = cbt->page->entries * 2 + 1;
+ cbt->row_iteration_slot = cbt->page->pu_row_entries * 2 + 1;
goto new_insert;
}
diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c
index 45a43051e48..53c7b03bf7a 100644
--- a/src/btree/bt_cursor.c
+++ b/src/btree/bt_cursor.c
@@ -94,32 +94,41 @@ __cursor_invalid(WT_CURSOR_BTREE *cbt)
/* Do we have a position on the page? */
switch (btree->type) {
case BTREE_COL_FIX:
- if (cbt->recno >= page->u.col_fix.recno + page->entries)
+ if (cbt->recno >=
+ page->u.col_fix.recno + page->pu_fix_entries)
return (1);
break;
case BTREE_COL_VAR:
+ if (cbt->slot > page->pu_var_entries)
+ return (1);
+ break;
case BTREE_ROW:
- if (cbt->slot > page->entries)
+ if (cbt->slot > page->pu_row_entries)
return (1);
break;
}
}
- /* The page may be empty, the search routine doesn't check. */
- if (page->entries == 0)
- return (1);
-
- /* Otherwise, check for an update in the page's slots. */
+ /*
+ * Check for empty pages (the page may be empty, the search routine
+ * doesn't check), otherwise, check for an update in the page's slots.
+ */
switch (btree->type) {
case BTREE_COL_FIX:
+ if (page->pu_fix_entries == 0)
+ return (1);
break;
case BTREE_COL_VAR:
+ if (page->pu_var_entries == 0)
+ return (1);
cip = &page->u.col_var.d[cbt->slot];
if ((cell = WT_COL_PTR(page, cip)) == NULL ||
__wt_cell_type(cell) == WT_CELL_DEL)
return (1);
break;
case BTREE_ROW:
+ if (page->pu_row_entries == 0)
+ return (1);
if (page->u.row.upd != NULL && (upd = __wt_txn_read(session,
page->u.row.upd[cbt->slot])) != NULL &&
WT_UPDATE_DELETED_ISSET(upd))
diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c
index c5e0f33b290..279e5cfdc8f 100644
--- a/src/btree/bt_debug.c
+++ b/src/btree/bt_debug.c
@@ -443,6 +443,7 @@ static int
__debug_page_hdr(WT_DBG *ds, WT_PAGE *page)
{
WT_SESSION_IMPL *session;
+ uint32_t entries;
session = ds->session;
@@ -452,15 +453,21 @@ __debug_page_hdr(WT_DBG *ds, WT_PAGE *page)
switch (page->type) {
case WT_PAGE_COL_INT:
__dmsg(ds, " recno %" PRIu64, page->u.intl.recno);
+ entries = page->pu_intl_entries;
break;
case WT_PAGE_COL_FIX:
__dmsg(ds, " recno %" PRIu64, page->u.col_fix.recno);
+ entries = page->pu_fix_entries;
break;
case WT_PAGE_COL_VAR:
__dmsg(ds, " recno %" PRIu64, page->u.col_var.recno);
+ entries = page->pu_var_entries;
break;
case WT_PAGE_ROW_INT:
+ entries = page->pu_intl_entries;
+ break;
case WT_PAGE_ROW_LEAF:
+ entries = page->pu_row_entries;
break;
WT_ILLEGAL_VALUE(session);
}
@@ -486,8 +493,7 @@ __debug_page_hdr(WT_DBG *ds, WT_PAGE *page)
__dmsg(ds, "\troot");
else
__dmsg(ds, "\tparent %p", page->parent);
- __dmsg(ds,
- ", disk %p, entries %" PRIu32 "\n", page->dsk, page->entries);
+ __dmsg(ds, ", disk %p, entries %" PRIu32 "\n", page->dsk, entries);
return (0);
}
@@ -589,16 +595,16 @@ __debug_page_col_fix(WT_DBG *ds, WT_PAGE *page)
static int
__debug_page_col_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
{
- WT_REF *ref;
+ WT_REF **refp, *ref;
uint32_t i;
- WT_REF_FOREACH(page, ref, i) {
+ WT_INTL_FOREACH(page, refp, ref, i) {
__dmsg(ds, "\trecno %" PRIu64 "\n", ref->key.recno);
WT_RET(__debug_ref(ds, ref, page));
}
if (LF_ISSET(WT_DEBUG_TREE_WALK))
- WT_REF_FOREACH(page, ref, i)
+ WT_INTL_FOREACH(page, refp, ref, i)
if (ref->state == WT_REF_MEM) {
__dmsg(ds, "\n");
WT_RET(__debug_page(ds, ref->page, flags));
@@ -657,19 +663,19 @@ __debug_page_col_var(WT_DBG *ds, WT_PAGE *page)
static int
__debug_page_row_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
{
- WT_REF *ref;
+ WT_REF **refp, *ref;
size_t len;
uint8_t *p;
uint32_t i;
- WT_REF_FOREACH(page, ref, i) {
+ WT_INTL_FOREACH(page, refp, ref, i) {
__wt_ref_key(page, ref, &p, &len);
__debug_item(ds, "K", p, len);
WT_RET(__debug_ref(ds, ref, page));
}
if (LF_ISSET(WT_DEBUG_TREE_WALK))
- WT_REF_FOREACH(page, ref, i)
+ WT_INTL_FOREACH(page, refp, ref, i)
if (ref->state == WT_REF_MEM) {
__dmsg(ds, "\n");
WT_RET(__debug_page(ds, ref->page, flags));
diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c
index 0815fe90ca4..8f6f8a40b04 100644
--- a/src/btree/bt_discard.c
+++ b/src/btree/bt_discard.c
@@ -129,7 +129,7 @@ __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page)
/* Free the insert/update array. */
if (mod->update != NULL)
__free_skip_array(session, mod->update,
- page->type == WT_PAGE_COL_FIX ? 1 : page->entries);
+ page->type == WT_PAGE_COL_FIX ? 1 : page->pu_var_entries);
/* Free the overflow on-page, reuse and transaction-cache skiplists. */
__wt_ovfl_onpage_discard(session, page);
@@ -148,14 +148,14 @@ __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page)
static void
__free_page_col_int(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_REF *ref;
+ WT_REF **refp, *ref;
uint32_t i;
/*
* For each referenced addr, see if the addr was an allocation, and if
* so, free it.
*/
- WT_REF_FOREACH(page, ref, i)
+ WT_INTL_FOREACH(page, refp, ref, i)
if (ref->addr != NULL &&
__wt_off_page(page, ref->addr)) {
__wt_free(session, ((WT_ADDR *)ref->addr)->addr);
@@ -182,20 +182,19 @@ static void
__free_page_row_int(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_IKEY *ikey;
- WT_REF *ref;
+ WT_REF **refp, *ref;
uint32_t i;
/*
- * Free any allocated keys.
- *
- * For each referenced addr, see if the addr was an allocation, and if
- * so, free it.
+ * For each WT_REF referenced addr, see if the key or address was an
+ * allocation, and if so, free it.
*/
- WT_REF_FOREACH(page, ref, i) {
+ if (page->pu_intl_index == NULL)
+ return;
+ WT_INTL_FOREACH(page, refp, ref, i) {
if ((ikey = __wt_ref_key_instantiated(ref)) != NULL)
__wt_free(session, ikey);
- if (ref->addr != NULL &&
- __wt_off_page(page, ref->addr)) {
+ if (ref->addr != NULL && __wt_off_page(page, ref->addr)) {
__wt_free(session, ((WT_ADDR *)ref->addr)->addr);
__wt_free(session, ref->addr);
}
@@ -234,11 +233,12 @@ __free_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
* found on the original page).
*/
if (page->u.row.ins != NULL)
- __free_skip_array(session, page->u.row.ins, page->entries + 1);
+ __free_skip_array(
+ session, page->u.row.ins, page->pu_row_entries + 1);
/* Free the update array. */
if (page->u.row.upd != NULL)
- __free_update(session, page->u.row.upd, page->entries);
+ __free_update(session, page->u.row.upd, page->pu_row_entries);
}
/*
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index 9525b12ca90..c43a4680dd5 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -390,17 +390,20 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, int creation)
switch (btree->type) {
case BTREE_COL_FIX:
case BTREE_COL_VAR:
- WT_ERR(__wt_page_alloc(session, WT_PAGE_COL_INT, 1, &root));
- root->u.intl.recno = 1;
- ref = root->u.intl.t;
+ WT_ERR(__wt_page_alloc(session, WT_PAGE_COL_INT, 1, 1, &root));
+ root->parent = NULL;
+
+ ref = root->pu_intl_index[0];
WT_ERR(__wt_btree_new_leaf_page(session, root, ref, &leaf));
ref->addr = NULL;
ref->state = WT_REF_MEM;
ref->key.recno = 1;
break;
case BTREE_ROW:
- WT_ERR(__wt_page_alloc(session, WT_PAGE_ROW_INT, 1, &root));
- ref = root->u.intl.t;
+ WT_ERR(__wt_page_alloc(session, WT_PAGE_ROW_INT, 0, 1, &root));
+ root->parent = NULL;
+
+ ref = root->pu_intl_index[0];
WT_ERR(__wt_btree_new_leaf_page(session, root, ref, &leaf));
ref->addr = NULL;
ref->state = WT_REF_MEM;
@@ -409,8 +412,6 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, int creation)
break;
WT_ILLEGAL_VALUE_ERR(session);
}
- root->entries = 1;
- root->parent = NULL;
/*
* Mark the leaf page dirty: we didn't create an entirely valid root
@@ -452,36 +453,6 @@ err: if (leaf != NULL)
}
/*
- * __wt_btree_new_modified_page --
- * Create a new in-memory page could be an internal or leaf page. Setup
- * the page modify structure.
- */
-int
-__wt_btree_new_modified_page(WT_SESSION_IMPL *session,
- uint8_t type, uint32_t entries, int merge, WT_PAGE **pagep)
-{
- WT_DECL_RET;
- WT_PAGE *newpage;
-
- /* Allocate a new page and fill it in. */
- WT_RET(__wt_page_alloc(session, type, entries, &newpage));
- newpage->read_gen = WT_READ_GEN_NOTSET;
- newpage->entries = entries;
-
- WT_ERR(__wt_page_modify_init(session, newpage));
- if (merge)
- F_SET(newpage->modify, WT_PM_REC_SPLIT_MERGE);
- else
- __wt_page_modify_set(session, newpage);
-
- *pagep = newpage;
- return (0);
-
-err: __wt_page_out(session, &newpage);
- return (ret);
-}
-
-/*
* __wt_btree_new_leaf_page --
* Create an empty leaf page and link it into a reference in its parent.
*/
@@ -496,19 +467,16 @@ __wt_btree_new_leaf_page(
switch (btree->type) {
case BTREE_COL_FIX:
- WT_RET(__wt_page_alloc(session, WT_PAGE_COL_FIX, 0, &leaf));
- leaf->u.col_fix.recno = 1;
+ WT_RET(__wt_page_alloc(session, WT_PAGE_COL_FIX, 1, 0, &leaf));
break;
case BTREE_COL_VAR:
- WT_RET(__wt_page_alloc(session, WT_PAGE_COL_VAR, 0, &leaf));
- leaf->u.col_var.recno = 1;
+ WT_RET(__wt_page_alloc(session, WT_PAGE_COL_VAR, 1, 0, &leaf));
break;
case BTREE_ROW:
- WT_RET(__wt_page_alloc(session, WT_PAGE_ROW_LEAF, 0, &leaf));
+ WT_RET(__wt_page_alloc(session, WT_PAGE_ROW_LEAF, 0, 0, &leaf));
break;
WT_ILLEGAL_VALUE(session);
}
- leaf->entries = 0;
WT_LINK_PAGE(parent, ref, leaf);
*pagep = leaf;
@@ -537,7 +505,7 @@ __btree_preload(WT_SESSION_IMPL *session)
{
WT_BM *bm;
WT_BTREE *btree;
- WT_REF *ref;
+ WT_REF **refp, *ref;
size_t addr_size;
uint32_t i;
const uint8_t *addr;
@@ -546,7 +514,7 @@ __btree_preload(WT_SESSION_IMPL *session)
bm = btree->bm;
/* Pre-load the second-level internal pages. */
- WT_REF_FOREACH(btree->root_page, ref, i) {
+ WT_INTL_FOREACH(btree->root_page, refp, ref, i) {
WT_RET(__wt_ref_info(session,
btree->root_page, ref, &addr, &addr_size, NULL));
if (addr != NULL)
@@ -572,7 +540,9 @@ __btree_get_last_recno(WT_SESSION_IMPL *session)
if (page == NULL)
return (WT_NOTFOUND);
- btree->last_recno = __col_last_recno(page);
+ btree->last_recno = page->type == WT_PAGE_COL_VAR ?
+ __col_var_last_recno(page) : __col_fix_last_recno(page);
+
return (__wt_page_release(session, page));
}
diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c
index cfc7137b13a..9d52e71e5fa 100644
--- a/src/btree/bt_page.c
+++ b/src/btree/bt_page.c
@@ -124,10 +124,13 @@ __wt_page_in_func(
*/
int
__wt_page_alloc(WT_SESSION_IMPL *session,
- uint8_t type, uint32_t alloc_entries, WT_PAGE **pagep)
+ uint8_t type, uint64_t recno, uint32_t alloc_entries, WT_PAGE **pagep)
{
WT_CACHE *cache;
+ WT_DECL_RET;
WT_PAGE *page;
+ WT_REF **refp;
+ uint32_t i;
size_t size;
void *p;
@@ -136,12 +139,18 @@ __wt_page_alloc(WT_SESSION_IMPL *session,
cache = S2C(session)->cache;
/*
- * Allocate a page, and for most page types, the additional information
- * it needs to describe the disk image.
+ * Allocate a page, and for most page types, the additional memory it
+ * needs to describe the disk image.
*/
size = sizeof(WT_PAGE);
switch (type) {
case WT_PAGE_COL_FIX:
+ /*
+ * The exception is fixed-length column-store, where we don't
+ * describe the disk image, it's too expensive at N bits per
+ * item. Ignore the passed-in value, other than setting the
+ * number of items on the page.
+ */
break;
case WT_PAGE_COL_INT:
case WT_PAGE_ROW_INT:
@@ -159,17 +168,46 @@ __wt_page_alloc(WT_SESSION_IMPL *session,
WT_RET(__wt_calloc(session, 1, size, &page));
p = (uint8_t *)page + sizeof(WT_PAGE);
+ page->type = type;
+ page->read_gen = WT_READ_GEN_NOTSET;
+
switch (type) {
case WT_PAGE_COL_FIX:
+ page->u.col_fix.recno = recno;
+ page->pu_fix_entries = alloc_entries;
break;
case WT_PAGE_COL_INT:
+ page->u.intl.recno = recno;
+ /* FALLTHROUGH */
case WT_PAGE_ROW_INT:
- page->u.intl.t = p;
+ page->u.intl.__index = p;
+
+ /*
+ * Internal pages have an array of WT_REF pointers so they can
+ * split. Allocate and initialize it to point to the first set
+ * of slots (even though those slots aren't yet initialized).
+ */
+ if ((ret = __wt_calloc(session, alloc_entries,
+ sizeof(*page->u.intl.index) +
+ alloc_entries * sizeof(WT_REF *),
+ &page->u.intl.index)) != 0) {
+ __wt_free(session, page);
+ return (ret);
+ }
+ size += sizeof(uint32_t) + alloc_entries * sizeof(WT_REF *);
+
+ page->pu_intl_entries = alloc_entries;
+ for (i = 0, refp = page->pu_intl_index; i < alloc_entries; ++i)
+ *refp++ = &page->u.intl.__index[i];
+
break;
case WT_PAGE_COL_VAR:
+ page->u.col_var.recno = recno;
page->u.col_var.d = p;
+ page->pu_var_entries = alloc_entries;
break;
case WT_PAGE_ROW_LEAF:
+ page->pu_row_entries = alloc_entries;
page->u.row.d = p;
break;
WT_ILLEGAL_VALUE(session);
@@ -179,9 +217,6 @@ __wt_page_alloc(WT_SESSION_IMPL *session,
__wt_cache_page_inmem_incr(session, page, size);
(void)WT_ATOMIC_ADD(cache->pages_inmem, 1);
- /* The one page field we set is the type. */
- page->type = type;
-
*pagep = page;
return (0);
}
@@ -200,37 +235,33 @@ __wt_page_inmem(
uint32_t alloc_entries;
size_t size;
- alloc_entries = 0;
*pagep = NULL;
+ alloc_entries = 0;
/*
- * Figure out how many underlying objects the page references so
- * we can allocate them along with the page.
+ * Figure out how many underlying objects the page references so we can
+ * allocate them along with the page.
*/
switch (dsk->type) {
case WT_PAGE_COL_FIX:
- break;
case WT_PAGE_COL_INT:
- /*
- * Column-store internal page entries map one-to-one to the
- * number of physical entries on the page (each physical entry
- * is an offset object).
- */
- alloc_entries = dsk->u.entries;
- break;
case WT_PAGE_COL_VAR:
/*
* Column-store leaf page entries map one-to-one to the number
* of physical entries on the page (each physical entry is a
* value item).
+ *
+ * Column-store internal page entries map one-to-one to the
+ * number of physical entries on the page (each entry is a
+ * location cookie).
*/
alloc_entries = dsk->u.entries;
break;
case WT_PAGE_ROW_INT:
/*
* Row-store internal page entries map one-to-two to the number
- * of physical entries on the page (each in-memory entry is a
- * key item and location cookie).
+ * of physical entries on the page (each entry is a key and
+ * location cookie pair).
*/
alloc_entries = dsk->u.entries / 2;
break;
@@ -254,9 +285,9 @@ __wt_page_inmem(
}
/* Allocate and initialize a new WT_PAGE. */
- WT_RET(__wt_page_alloc(session, dsk->type, alloc_entries, &page));
+ WT_RET(__wt_page_alloc(
+ session, dsk->type, dsk->recno, alloc_entries, &page));
page->dsk = dsk;
- page->read_gen = WT_READ_GEN_NOTSET;
F_SET_ATOMIC(page, flags);
/*
@@ -267,26 +298,18 @@ __wt_page_inmem(
switch (page->type) {
case WT_PAGE_COL_FIX:
- page->entries = dsk->u.entries;
- page->u.col_fix.recno = dsk->recno;
__inmem_col_fix(session, page);
break;
case WT_PAGE_COL_INT:
- page->entries = dsk->u.entries;
- page->u.intl.recno = dsk->recno;
__inmem_col_int(session, page);
break;
case WT_PAGE_COL_VAR:
- page->entries = dsk->u.entries;
- page->u.col_var.recno = dsk->recno;
WT_ERR(__inmem_col_var(session, page, &size));
break;
case WT_PAGE_ROW_INT:
- page->entries = dsk->u.entries / 2;
WT_ERR(__inmem_row_int(session, page, &size));
break;
case WT_PAGE_ROW_LEAF:
- page->entries = alloc_entries;
WT_ERR(__inmem_row_leaf(session, page));
break;
WT_ILLEGAL_VALUE_ERR(session);
@@ -344,7 +367,7 @@ __inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page)
* Walk the page, building references: the page contains value items.
* The value items are on-page items (WT_CELL_VALUE).
*/
- ref = page->u.intl.t;
+ ref = page->u.intl.__index;
WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
__wt_cell_unpack(cell, unpack);
ref->addr = cell;
@@ -466,14 +489,14 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
unpack = &_unpack;
dsk = page->dsk;
- WT_ERR(__wt_scr_alloc(session, 0, &current));
+ WT_RET(__wt_scr_alloc(session, 0, &current));
/*
* Walk the page, instantiating keys: the page contains sorted key and
* location cookie pairs. Keys are on-page/overflow items and location
* cookies are WT_CELL_ADDR_XXX items.
*/
- ref = page->u.intl.t;
+ ref = page->u.intl.__index;
WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
__wt_cell_unpack(cell, unpack);
switch (unpack->type) {
diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c
index 82e972a5541..7a27cc422c6 100644
--- a/src/btree/bt_read.c
+++ b/src/btree/bt_read.c
@@ -35,11 +35,11 @@ __cache_read_row_deleted(
}
/* Allocate the update array. */
- WT_RET(__wt_calloc_def(session, page->entries, &upd_array));
+ WT_RET(__wt_calloc_def(session, page->pu_row_entries, &upd_array));
page->u.row.upd = upd_array;
/* Fill in the update array with deleted items. */
- for (i = 0; i < page->entries; ++i) {
+ for (i = 0; i < page->pu_row_entries; ++i) {
WT_RET(__wt_calloc_def(session, 1, &upd));
upd->next = upd_array[i];
upd_array[i] = upd;
@@ -49,7 +49,7 @@ __cache_read_row_deleted(
}
__wt_cache_page_inmem_incr(session, page,
- page->entries * (sizeof(WT_UPDATE *) + sizeof(WT_UPDATE)));
+ page->pu_row_entries * (sizeof(WT_UPDATE *) + sizeof(WT_UPDATE)));
return (0);
}
diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c
index daa57d5e935..2d2add6ffff 100644
--- a/src/btree/bt_slvg.c
+++ b/src/btree/bt_slvg.c
@@ -534,8 +534,8 @@ __slvg_trk_leaf(WT_SESSION_IMPL *session,
WT_ERR(__wt_page_inmem(session, NULL, NULL, dsk, 0, &page));
WT_ERR(__wt_row_leaf_key_copy(session,
page, &page->u.row.d[0], &trk->row_start));
- WT_ERR(__wt_row_leaf_key_copy(session,
- page, &page->u.row.d[page->entries - 1], &trk->row_stop));
+ WT_ERR(__wt_row_leaf_key_copy(session, page,
+ &page->u.row.d[page->pu_row_entries - 1], &trk->row_stop));
if (WT_VERBOSE_ISSET(session, salvage)) {
WT_ERR(__wt_buf_set_printable(session, ss->tmp1,
@@ -1077,14 +1077,11 @@ __slvg_col_build_internal(
addr = NULL;
/* Allocate a column-store root (internal) page and fill it in. */
- WT_RET(__wt_page_alloc(session, WT_PAGE_COL_INT, leaf_cnt, &page));
+ WT_RET(__wt_page_alloc(session, WT_PAGE_COL_INT, 1, leaf_cnt, &page));
page->parent = NULL; /* Root page */
- page->read_gen = WT_READ_GEN_NOTSET;
- page->u.intl.recno = 1;
- page->entries = leaf_cnt;
WT_ERR(__slvg_modify_init(session, page));
- for (ref = page->u.intl.t, i = 0; i < ss->pages_next; ++i) {
+ for (ref = page->u.intl.__index, i = 0; i < ss->pages_next; ++i) {
if ((trk = ss->pages[i]) == NULL)
continue;
@@ -1140,7 +1137,7 @@ __slvg_col_build_leaf(
WT_PAGE *page;
WT_SALVAGE_COOKIE *cookie, _cookie;
uint64_t skip, take;
- uint32_t save_entries;
+ uint32_t *entriesp, save_entries;
cookie = &_cookie;
WT_CLEAR(*cookie);
@@ -1148,8 +1145,12 @@ __slvg_col_build_leaf(
/* Get the original page, including the full in-memory setup. */
WT_RET(__wt_page_in(session, parent, ref));
page = ref->page;
+
+ entriesp = page->type == WT_PAGE_COL_VAR ?
+ &page->pu_var_entries : &page->pu_fix_entries;
+
save_col_var = page->u.col_var.d;
- save_entries = page->entries;
+ save_entries = *entriesp;
/*
* Calculate the number of K/V entries we are going to skip, and
@@ -1210,7 +1211,7 @@ __slvg_col_build_leaf(
/* Reset the page. */
page->u.col_var.d = save_col_var;
- page->entries = save_entries;
+ *entriesp = save_entries;
ret = __wt_page_release(session, page);
if (ret == 0)
@@ -1657,13 +1658,11 @@ __slvg_row_build_internal(
addr = NULL;
/* Allocate a row-store root (internal) page and fill it in. */
- WT_RET(__wt_page_alloc(session, WT_PAGE_ROW_INT, leaf_cnt, &page));
+ WT_RET(__wt_page_alloc(session, WT_PAGE_ROW_INT, 0, leaf_cnt, &page));
page->parent = NULL;
- page->read_gen = WT_READ_GEN_NOTSET;
- page->entries = leaf_cnt;
WT_ERR(__slvg_modify_init(session, page));
- for (ref = page->u.intl.t, i = 0; i < ss->pages_next; ++i) {
+ for (ref = page->u.intl.__index, i = 0; i < ss->pages_next; ++i) {
if ((trk = ss->pages[i]) == NULL)
continue;
@@ -1808,7 +1807,7 @@ __slvg_row_build_leaf(WT_SESSION_IMPL *session,
/* We should have selected some entries, but not the entire page. */
WT_ASSERT(session,
skip_start + skip_stop > 0 &&
- skip_start + skip_stop < page->entries);
+ skip_start + skip_stop < page->pu_row_entries);
/*
* Take a copy of this page's first key to define the start of
@@ -1825,8 +1824,8 @@ __slvg_row_build_leaf(WT_SESSION_IMPL *session,
* reference overflow pages.
*/
WT_ERR(__slvg_row_merge_ovfl(session, trk, page, 0, skip_start));
- WT_ERR(__slvg_row_merge_ovfl(
- session, trk, page, page->entries - skip_stop, page->entries));
+ WT_ERR(__slvg_row_merge_ovfl(session, trk, page,
+ page->pu_row_entries - skip_stop, page->pu_row_entries));
/*
* If we take all of the keys, we don't write the page and we clear the
@@ -1843,7 +1842,7 @@ __slvg_row_build_leaf(WT_SESSION_IMPL *session,
* is no need to copy anything on the page itself, the entries
* value limits the number of page items.
*/
- page->entries -= skip_stop;
+ page->pu_row_entries -= skip_stop;
cookie->skip = skip_start;
/*
@@ -1866,7 +1865,7 @@ __slvg_row_build_leaf(WT_SESSION_IMPL *session,
session, page, cookie, WT_SKIP_UPDATE_ERR));
/* Reset the page. */
- page->entries += skip_stop;
+ page->pu_row_entries += skip_stop;
}
/*
diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c
index 266f1ec8031..bffd83282c8 100644
--- a/src/btree/bt_stat.c
+++ b/src/btree/bt_stat.c
@@ -61,11 +61,11 @@ __stat_page(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS *stats)
switch (page->type) {
case WT_PAGE_COL_FIX:
WT_STAT_INCR(stats, btree_column_fix);
- WT_STAT_INCRV(stats, btree_entries, page->entries);
+ WT_STAT_INCRV(stats, btree_entries, page->pu_fix_entries);
break;
case WT_PAGE_COL_INT:
WT_STAT_INCR(stats, btree_column_internal);
- WT_STAT_INCRV(stats, btree_entries, page->entries);
+ WT_STAT_INCRV(stats, btree_entries, page->pu_intl_entries);
break;
case WT_PAGE_COL_VAR:
WT_RET(__stat_page_col_var(page, stats));
@@ -75,7 +75,7 @@ __stat_page(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS *stats)
break;
case WT_PAGE_ROW_INT:
WT_STAT_INCR(stats, btree_row_internal);
- WT_STAT_INCRV(stats, btree_entries, page->entries);
+ WT_STAT_INCRV(stats, btree_entries, page->pu_intl_entries);
break;
case WT_PAGE_ROW_LEAF:
WT_RET(__stat_page_row_leaf(page, stats));
diff --git a/src/btree/bt_vrfy.c b/src/btree/bt_vrfy.c
index 421cdb79e68..574c703edf8 100644
--- a/src/btree/bt_vrfy.c
+++ b/src/btree/bt_vrfy.c
@@ -221,7 +221,7 @@ __verify_tree(WT_SESSION_IMPL *session, WT_PAGE *page, WT_VSTUFF *vs)
WT_CELL_UNPACK *unpack, _unpack;
WT_COL *cip;
WT_DECL_RET;
- WT_REF *ref;
+ WT_REF **refp, *ref;
uint64_t recno;
uint32_t entry, i;
int found;
@@ -297,7 +297,7 @@ recno_chk: if (recno != vs->record_total + 1)
}
switch (page->type) {
case WT_PAGE_COL_FIX:
- vs->record_total += page->entries;
+ vs->record_total += page->pu_fix_entries;
break;
case WT_PAGE_COL_VAR:
recno = 0;
@@ -389,7 +389,7 @@ celltype_err: WT_RET_MSG(session, WT_ERROR,
case WT_PAGE_COL_INT:
/* For each entry in an internal page, verify the subtree. */
entry = 0;
- WT_REF_FOREACH(page, ref, i) {
+ WT_INTL_FOREACH(page, refp, ref, i) {
/*
* It's a depth-first traversal: this entry's starting
* record number should be 1 more than the total records
@@ -423,7 +423,7 @@ celltype_err: WT_RET_MSG(session, WT_ERROR,
case WT_PAGE_ROW_INT:
/* For each entry in an internal page, verify the subtree. */
entry = 0;
- WT_REF_FOREACH(page, ref, i) {
+ WT_INTL_FOREACH(page, refp, ref, i) {
/*
* It's a depth-first traversal: this entry's starting
* key should be larger than the largest key previously
@@ -509,7 +509,7 @@ __verify_row_leaf_key_order(
* If a tree is empty (just created), it won't have keys; if there
* are no keys, we're done.
*/
- if (page->entries == 0)
+ if (page->pu_row_entries == 0)
return (0);
/*
@@ -543,8 +543,8 @@ __verify_row_leaf_key_order(
}
/* Update the largest key we've seen to the last key on this page. */
- WT_RET(__wt_row_leaf_key_copy(session,
- page, page->u.row.d + (page->entries - 1), vs->max_key));
+ WT_RET(__wt_row_leaf_key_copy(session, page,
+ page->u.row.d + (page->pu_row_entries - 1), vs->max_key));
(void)__wt_page_addr_string(session, vs->max_addr, page);
return (0);
diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c
index a7692b9fd18..f7b62b53596 100644
--- a/src/btree/bt_walk.c
+++ b/src/btree/bt_walk.c
@@ -162,7 +162,7 @@ __wt_tree_walk(WT_SESSION_IMPL *session, WT_PAGE **pagep, uint32_t flags)
WT_BTREE *btree;
WT_PAGE *couple, *page;
WT_REF *ref;
- uint32_t slot;
+ uint32_t page_entries, slot;
int cache, compact, discard, eviction, prev, set_read_gen;
int skip, skip_intl, skip_leaf;
@@ -221,7 +221,8 @@ __wt_tree_walk(WT_SESSION_IMPL *session, WT_PAGE **pagep, uint32_t flags)
if (page == NULL) {
if ((page = btree->root_page) == NULL)
return (0);
- slot = prev ? page->entries - 1 : 0;
+ page_entries = page->pu_intl_entries;
+ slot = prev ? page_entries - 1 : 0;
goto descend;
}
@@ -236,9 +237,10 @@ ascend: /*
* Figure out the current slot in the parent page's WT_REF array and
* switch to the parent.
*/
- ref = __wt_page_ref(session, page);
- slot = (uint32_t)(ref - page->parent->u.intl.t);
+ WT_RET(__wt_page_refp(session, page, &slot));
+ ref = page->parent->pu_intl_index[slot];
page = page->parent;
+ page_entries = page->pu_intl_entries;
/* If the eviction thread, clear the page's walk status.
*
@@ -259,7 +261,7 @@ ascend: /*
* next/prev slot and left/right-most element in its subtree.
*/
if ((prev && slot == 0) ||
- (!prev && slot == page->entries - 1)) {
+ (!prev && slot == page_entries - 1)) {
/* Optionally skip internal pages. */
if (skip_intl)
goto ascend;
@@ -295,7 +297,7 @@ ascend: /*
descend: for (;;) {
if (page->type == WT_PAGE_ROW_INT ||
page->type == WT_PAGE_COL_INT)
- ref = &page->u.intl.t[slot];
+ ref = page->pu_intl_index[slot];
else if (skip_leaf)
goto ascend;
else {
@@ -418,7 +420,8 @@ retry: if (ref->state != WT_REF_MEM ||
}
couple = page = ref->page;
- slot = prev ? page->entries - 1 : 0;
+ page_entries = __wt_page_entries(page);
+ slot = prev ? page_entries - 1 : 0;
}
}
/* NOTREACHED */
diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c
index 97dbc679995..7d18f3ae137 100644
--- a/src/btree/col_modify.c
+++ b/src/btree/col_modify.c
@@ -51,7 +51,9 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int is_remove)
* append list, not the update list. In addition, a recno of
* 0 implies an append operation, we're allocating a new row.
*/
- if (recno == 0 || recno > __col_last_recno(page))
+ if (recno == 0 ||
+ recno > (btree->type == BTREE_COL_VAR ?
+ __col_var_last_recno(page) : __col_fix_last_recno(page)))
append = 1;
}
@@ -102,8 +104,9 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int is_remove)
session, page, page->modify->update, ins_headp, 1);
ins_headp = &page->modify->update[0];
} else {
- WT_PAGE_ALLOC_AND_SWAP(session, page,
- page->modify->update, ins_headp, page->entries);
+ WT_PAGE_ALLOC_AND_SWAP(
+ session, page, page->modify->update,
+ ins_headp, page->pu_var_entries);
ins_headp = &page->modify->update[cbt->slot];
}
diff --git a/src/btree/col_srch.c b/src/btree/col_srch.c
index b7af547b756..56589929f59 100644
--- a/src/btree/col_srch.c
+++ b/src/btree/col_srch.c
@@ -34,20 +34,21 @@ __wt_col_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
/* Search the internal pages of the tree. */
for (depth = 2,
page = btree->root_page; page->type == WT_PAGE_COL_INT; ++depth) {
- WT_ASSERT(session, ref == NULL ||
- ref->key.recno == page->u.intl.recno);
+ WT_ASSERT(session,
+ ref == NULL || ref->key.recno == page->u.intl.recno);
/* Fast path appends. */
- base = page->entries;
- ref = &page->u.intl.t[base - 1];
+ base = page->pu_intl_entries;
+ ref = page->pu_intl_index[base - 1];
if (recno >= ref->key.recno)
goto descend;
/* Binary search of internal pages. */
for (base = 0, ref = NULL,
- limit = page->entries - 1; limit != 0; limit >>= 1) {
+ limit = page->pu_intl_entries - 1;
+ limit != 0; limit >>= 1) {
indx = base + (limit >> 1);
- ref = page->u.intl.t + indx;
+ ref = page->pu_intl_index[indx];
if (recno == ref->key.recno)
break;
@@ -73,7 +74,7 @@ descend: WT_ASSERT(session, ref != NULL);
* starting recno.
*/
WT_ASSERT(session, base > 0);
- ref = page->u.intl.t + (base - 1);
+ ref = page->pu_intl_index[base - 1];
}
/*
@@ -101,14 +102,15 @@ descend: WT_ASSERT(session, ref != NULL);
* we arrive here with a record that's impossibly large for the page.
*/
if (page->type == WT_PAGE_COL_FIX) {
- if (recno >= page->u.col_fix.recno + page->entries) {
- cbt->recno = page->u.col_fix.recno + page->entries;
+ if (recno >= page->u.col_fix.recno + page->pu_fix_entries) {
+ cbt->recno =
+ page->u.col_fix.recno + page->pu_fix_entries;
goto past_end;
} else
ins_head = WT_COL_UPDATE_SINGLE(page);
} else
if ((cip = __col_var_search(page, recno)) == NULL) {
- cbt->recno = __col_last_recno(page);
+ cbt->recno = __col_var_last_recno(page);
goto past_end;
} else {
cbt->slot = WT_COL_SLOT(page, cip);
diff --git a/src/btree/rec_evict.c b/src/btree/rec_evict.c
index 995b219de8c..d3a0d0805bb 100644
--- a/src/btree/rec_evict.c
+++ b/src/btree/rec_evict.c
@@ -253,14 +253,14 @@ __rec_page_dirty_update(
static void
__rec_discard_tree(WT_SESSION_IMPL *session, WT_PAGE *page, int exclusive)
{
- WT_REF *ref;
+ WT_REF **refp, *ref;
uint32_t i;
switch (page->type) {
case WT_PAGE_COL_INT:
case WT_PAGE_ROW_INT:
/* For each entry in the page... */
- WT_REF_FOREACH(page, ref, i) {
+ WT_INTL_FOREACH(page, refp, ref, i) {
if (ref->state == WT_REF_DISK ||
ref->state == WT_REF_DELETED)
continue;
@@ -294,6 +294,7 @@ __rec_review(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page,
WT_DECL_RET;
WT_PAGE_MODIFY *mod;
WT_PAGE *t;
+ WT_REF **refp;
uint32_t i;
btree = S2BT(session);
@@ -321,7 +322,7 @@ __rec_review(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page,
* pages after we've written them.
*/
if (page->type == WT_PAGE_COL_INT || page->type == WT_PAGE_ROW_INT)
- WT_REF_FOREACH(page, ref, i)
+ WT_INTL_FOREACH(page, refp, ref, i)
switch (ref->state) {
case WT_REF_DISK: /* On-disk */
case WT_REF_DELETED: /* On-disk, deleted */
@@ -340,6 +341,7 @@ __rec_review(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page,
case WT_REF_LOCKED: /* Being evicted */
case WT_REF_READING: /* Being read */
return (EBUSY);
+ WT_ILLEGAL_VALUE(session);
}
/*
@@ -442,6 +444,7 @@ ckpt: WT_STAT_FAST_CONN_INCR(session, cache_eviction_checkpoint);
*/
if (__wt_page_is_modified(page) &&
!F_ISSET(mod, WT_PM_REC_SPLIT_MERGE)) {
+#if 0
/*
* If the page is larger than the maximum allowed, attempt to
* split the page in memory before evicting it. The in-memory
@@ -459,6 +462,7 @@ ckpt: WT_STAT_FAST_CONN_INCR(session, cache_eviction_checkpoint);
*inmem_split = 1;
return (0);
}
+#endif
ret = __wt_rec_write(session, page,
NULL, WT_EVICTION_SERVER_LOCKED | WT_SKIP_UPDATE_QUIT);
diff --git a/src/btree/rec_merge.c b/src/btree/rec_merge.c
index 15f5e643713..3383bcdb61c 100644
--- a/src/btree/rec_merge.c
+++ b/src/btree/rec_merge.c
@@ -9,6 +9,36 @@
#if 0
/*
+ * __wt_btree_new_modified_page --
+ * Create a new in-memory page could be an internal or leaf page. Setup
+ * the page modify structure.
+ */
+int
+__wt_btree_new_modified_page(WT_SESSION_IMPL *session,
+ uint8_t type, uint32_t entries, int merge, WT_PAGE **pagep)
+{
+ WT_DECL_RET;
+ WT_PAGE *newpage;
+
+ /* Allocate a new page and fill it in. */
+ WT_RET(__wt_page_alloc(session, type, entries, &newpage));
+ newpage->read_gen = WT_READ_GEN_NOTSET;
+ newpage->entries = entries;
+
+ WT_ERR(__wt_page_modify_init(session, newpage));
+ if (merge)
+ F_SET(newpage->modify, WT_PM_REC_SPLIT_MERGE);
+ else
+ __wt_page_modify_set(session, newpage);
+
+ *pagep = newpage;
+ return (0);
+
+err: __wt_page_out(session, &newpage);
+ return (ret);
+}
+
+/*
* WT_VISIT_STATE --
* The state maintained across calls to the "visit" callback functions:
* the number of refs visited, the maximum depth, and the current page and
diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c
index 244b0d5c132..0681c43bc0e 100644
--- a/src/btree/rec_write.c
+++ b/src/btree/rec_write.c
@@ -2358,7 +2358,7 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_KV *val;
WT_CELL_UNPACK *unpack, _unpack;
WT_PAGE *rp;
- WT_REF *ref;
+ WT_REF **refp, *ref;
uint32_t i;
int state;
@@ -2368,7 +2368,7 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
unpack = &_unpack;
/* For each entry in the page... */
- WT_REF_FOREACH(page, ref, i) {
+ WT_INTL_FOREACH(page, refp, ref, i) {
/* Update the starting record number in case we split. */
r->recno = ref->key.recno;
@@ -2469,11 +2469,11 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
/* Copy the updated, disk-image bytes into place. */
memcpy(r->first_free, page->u.col_fix.bitf,
- __bitstr_size((size_t)page->entries * btree->bitcnt));
+ __bitstr_size((size_t)page->pu_fix_entries * btree->bitcnt));
/* Calculate the number of entries per page remainder. */
- entry = page->entries;
- nrecs = WT_FIX_ENTRIES(btree, r->space_avail) - page->entries;
+ entry = page->pu_fix_entries;
+ nrecs = WT_FIX_ENTRIES(btree, r->space_avail) - page->pu_fix_entries;
r->recno += entry;
/* Walk any append list. */
@@ -2556,7 +2556,7 @@ __rec_col_fix_slvg(WT_SESSION_IMPL *session,
page, page->u.col_fix.recno, btree->maxleafpage));
/* We may not be taking all of the entries on the original page. */
- page_take = salvage->take == 0 ? page->entries : salvage->take;
+ page_take = salvage->take == 0 ? page->pu_fix_entries : salvage->take;
page_start = salvage->skip == 0 ? 0 : salvage->skip;
for (;;) {
/* Calculate the number of entries per page. */
@@ -3035,7 +3035,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_IKEY *ikey;
WT_KV *key, *val;
WT_PAGE *rp;
- WT_REF *ref;
+ WT_REF **refp, *ref;
size_t size;
uint32_t i;
u_int vtype;
@@ -3072,7 +3072,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
r->cell_zero = 1;
/* For each entry in the in-memory page... */
- WT_REF_FOREACH(page, ref, i) {
+ WT_INTL_FOREACH(page, refp, ref, i) {
/*
* There are different paths if the key is an overflow item vs.
* a straight-forward on-page value. If an overflow item, we
@@ -3261,7 +3261,7 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_CELL_UNPACK *vpack, _vpack;
WT_KV *key, *val;
WT_PAGE *rp;
- WT_REF *ref;
+ WT_REF **refp, *ref;
size_t size;
uint32_t i;
u_int vtype;
@@ -3275,7 +3275,7 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
vpack = &_vpack;
/* For each entry in the in-memory page... */
- WT_REF_FOREACH(page, ref, i) {
+ WT_INTL_FOREACH(page, refp, ref, i) {
vtype = 0;
addr = ref->addr;
rp = ref->page;
@@ -3805,7 +3805,7 @@ __rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_BM *bm;
WT_PAGE_MODIFY *mod;
- WT_REF *ref;
+ WT_REF **refp, *ref;
uint32_t i;
bm = S2BT(session)->bm;
@@ -3820,7 +3820,7 @@ __rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page)
* root splits. In the case of root splits, we potentially have to
* cope with the underlying sets of multiple pages.
*/
- WT_REF_FOREACH(page, ref, i)
+ WT_INTL_FOREACH(page, refp, ref, i)
WT_RET(bm->free(bm, session,
((WT_ADDR *)ref->addr)->addr,
((WT_ADDR *)ref->addr)->size));
@@ -4147,14 +4147,11 @@ __rec_split_merge_new(WT_SESSION_IMPL *session,
* Our caller cleans up, make sure we return a valid page reference,
* even on error.
*/
- WT_RET(__wt_page_alloc(session, type, r->bnd_next, pagep));
+ WT_RET(__wt_page_alloc(session, type,
+ type == WT_PAGE_COL_INT ? r->bnd[0].recno : 0, r->bnd_next, pagep));
page = *pagep;
page->parent = orig->parent;
page->ref_hint = orig->ref_hint;
- if (type == WT_PAGE_COL_INT)
- page->u.intl.recno = r->bnd[0].recno;
- page->read_gen = WT_READ_GEN_NOTSET;
- page->entries = r->bnd_next;
/*
* We don't re-write parent pages when child pages split, which means
@@ -4249,13 +4246,13 @@ __rec_split_row(
/* Enter each split child page into the new internal page. */
size = 0;
- for (ref = page->u.intl.t,
- bnd = r->bnd, i = 0; i < r->bnd_next; ++ref, ++bnd, ++i) {
+ for (bnd = r->bnd, i = 0; i < r->bnd_next; ++bnd, ++i) {
WT_ERR(__wt_calloc(session, 1, sizeof(WT_ADDR), &addr));
*addr = bnd->addr;
bnd->addr.addr = NULL;
size += bnd->addr.size;
+ ref = page->pu_intl_index[i];
ref->page = NULL;
WT_ERR(__wt_row_ikey(session, 0,
bnd->key.data, bnd->key.size, &ref->key.ikey));
@@ -4296,12 +4293,12 @@ __rec_split_col(
WT_ERR(__rec_split_merge_new(session, r, orig, &page, WT_PAGE_COL_INT));
/* Enter each split child page into the new internal page. */
- for (ref = page->u.intl.t,
- bnd = r->bnd, i = 0; i < r->bnd_next; ++ref, ++bnd, ++i) {
+ for (bnd = r->bnd, i = 0; i < r->bnd_next; ++bnd, ++i) {
WT_ERR(__wt_calloc(session, 1, sizeof(WT_ADDR), &addr));
*addr= bnd->addr;
bnd->addr.addr = NULL;
+ ref = page->pu_intl_index[i];
ref->page = NULL;
ref->key.recno = bnd->recno;
ref->addr = addr;
diff --git a/src/btree/row_key.c b/src/btree/row_key.c
index 6eef0ce2189..c79908ff2cd 100644
--- a/src/btree/row_key.c
+++ b/src/btree/row_key.c
@@ -24,7 +24,7 @@ __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page)
btree = S2BT(session);
- if (page->entries == 0) { /* Just checking... */
+ if (page->pu_row_entries == 0) { /* Just checking... */
F_SET_ATOMIC(page, WT_PAGE_BUILD_KEYS);
return (0);
}
@@ -48,14 +48,14 @@ __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page)
* marking up the array.
*/
WT_RET(__wt_scr_alloc(
- session, (uint32_t)__bitstr_size(page->entries), &tmp));
+ session, (uint32_t)__bitstr_size(page->pu_row_entries), &tmp));
if ((gap = btree->key_gap) == 0)
gap = 1;
- __inmem_row_leaf_slots(tmp->mem, 0, page->entries, gap);
+ __inmem_row_leaf_slots(tmp->mem, 0, page->pu_row_entries, gap);
/* Instantiate the keys. */
- for (rip = page->u.row.d, i = 0; i < page->entries; ++rip, ++i)
+ for (rip = page->u.row.d, i = 0; i < page->pu_row_entries; ++rip, ++i)
if (__bit_test(tmp->mem, i))
WT_ERR(__wt_row_leaf_key_work(
session, page, rip, NULL, 1));
diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c
index 525b682bde7..8579af76207 100644
--- a/src/btree/row_modify.c
+++ b/src/btree/row_modify.c
@@ -50,7 +50,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int is_remove)
if (cbt->ins == NULL) {
/* Allocate an update array as necessary. */
WT_PAGE_ALLOC_AND_SWAP(session, page,
- page->u.row.upd, upd_entry, page->entries);
+ page->u.row.upd, upd_entry, page->pu_row_entries);
/* Set the WT_UPDATE array reference. */
upd_entry = &page->u.row.upd[cbt->slot];
@@ -88,10 +88,10 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int is_remove)
* slot. That's hard, so we set a flag.
*/
WT_PAGE_ALLOC_AND_SWAP(session, page,
- page->u.row.ins, ins_headp, page->entries + 1);
+ page->u.row.ins, ins_headp, page->pu_row_entries + 1);
ins_slot = F_ISSET(cbt, WT_CBT_SEARCH_SMALLEST) ?
- page->entries : cbt->slot;
+ page->pu_row_entries: cbt->slot;
ins_headp = &page->u.row.ins[ins_slot];
/* Allocate the WT_INSERT_HEAD structure as necessary. */
diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c
index 052bcff67fb..f044aed6d04 100644
--- a/src/btree/row_srch.c
+++ b/src/btree/row_srch.c
@@ -153,8 +153,8 @@ __wt_row_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
* Fast-path internal pages with one child, a common case for
* the root page in new trees.
*/
- base = page->entries;
- ref = &page->u.intl.t[base - 1];
+ base = page->pu_intl_entries;
+ ref = page->pu_intl_index[base - 1];
if (base == 1)
goto descend;
@@ -173,11 +173,11 @@ __wt_row_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
*/
base = 0;
ref = NULL;
- limit = page->entries - 1;
+ limit = page->pu_intl_entries - 1;
if (btree->collator == NULL)
for (; limit != 0; limit >>= 1) {
indx = base + (limit >> 1);
- ref = page->u.intl.t + indx;
+ ref = page->pu_intl_index[indx];
/*
* If about to compare an application key with
@@ -207,7 +207,7 @@ __wt_row_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
else
for (; limit != 0; limit >>= 1) {
indx = base + (limit >> 1);
- ref = page->u.intl.t + indx;
+ ref = page->pu_intl_index[indx];
/*
* If about to compare an application key with
* the 0th index on an internal page, pretend
@@ -242,7 +242,7 @@ descend: WT_ASSERT(session, ref != NULL);
* for descent is the one before base.
*/
if (cmp != 0)
- ref = page->u.intl.t + (base - 1);
+ ref = page->pu_intl_index[base - 1];
/*
* Swap the parent page for the child page; return on error,
@@ -274,7 +274,7 @@ descend: WT_ASSERT(session, ref != NULL);
*/
cmp = -1;
base = 0;
- limit = page->entries;
+ limit = page->pu_row_entries;
if (btree->collator == NULL)
for (; limit != 0; limit >>= 1) {
indx = base + (limit >> 1);
@@ -405,7 +405,8 @@ __wt_row_random(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
/* Walk the internal pages of the tree. */
for (page = btree->root_page; page->type == WT_PAGE_ROW_INT;) {
- ref = page->u.intl.t + __wt_random() % page->entries;
+ ref =
+ page->pu_intl_index[__wt_random() % page->pu_intl_entries];
/*
* Swap the parent page for the child page; return on error,
@@ -415,7 +416,7 @@ __wt_row_random(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
page = ref->page;
}
- if (page->entries != 0) {
+ if (page->pu_row_entries != 0) {
/*
* The use case for this call is finding a place to split the
* tree. Cheat (it's not like this is "random", anyway), and
@@ -427,8 +428,8 @@ __wt_row_random(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
cbt->page = page;
cbt->compare = 0;
cbt->slot =
- btree->root_page->entries < 2 ?
- __wt_random() % page->entries : 0;
+ btree->root_page->pu_intl_entries < 2 ?
+ __wt_random() % page->pu_row_entries : 0;
return (0);
}