summaryrefslogtreecommitdiff
path: root/src/btree/bt_page.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/btree/bt_page.c')
-rw-r--r--src/btree/bt_page.c97
1 files changed, 58 insertions, 39 deletions
diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c
index a4cc355096a..c9200440051 100644
--- a/src/btree/bt_page.c
+++ b/src/btree/bt_page.c
@@ -7,7 +7,7 @@
#include "wt_internal.h"
-static int __inmem_col_fix(WT_SESSION_IMPL *, WT_PAGE *, size_t *);
+static int __inmem_col_fix(WT_SESSION_IMPL *, WT_PAGE *);
static int __inmem_col_int(WT_SESSION_IMPL *, WT_PAGE *, size_t *);
static int __inmem_col_var(WT_SESSION_IMPL *, WT_PAGE *, size_t *);
static int __inmem_row_int(WT_SESSION_IMPL *, WT_PAGE *, size_t *);
@@ -26,7 +26,8 @@ __wt_page_in_func(
#endif
)
{
- int wake, read_lockout;
+ WT_PAGE *page;
+ int busy, read_lockout, wake;
/*
* Only wake the eviction server the first time through here (if the
@@ -67,17 +68,37 @@ __wt_page_in_func(
* can't get a hazard reference is because the page is
* being evicted; yield and try again.
*/
- if (__wt_hazard_set(session, ref
#ifdef HAVE_DIAGNOSTIC
- , file, line
+ WT_RET(
+ __wt_hazard_set(session, ref, &busy, file, line));
+#else
+ WT_RET(__wt_hazard_set(session, ref, &busy));
#endif
- ) == 0) {
- WT_ASSERT(session, !WT_PAGE_IS_ROOT(ref->page));
- ref->page->read_gen =
- __wt_cache_read_gen(session);
- return (0);
+ if (busy)
+ break;
+
+ page = ref->page;
+ WT_ASSERT(session, !WT_PAGE_IS_ROOT(page));
+
+ /*
+ * Ensure the page doesn't have ancient updates on it.
+ * If it did, reading the page could ignore committed
+ * updates. This should be extremely unlikely in real
+ * applications, force eviction of the page to avoid
+ * the issue.
+ */
+ if (page->modify != NULL &&
+ __wt_txn_ancient(session, page->modify->first_id)) {
+ WT_VERBOSE_RET(session, read,
+ "ancient updates, forcing eviction");
+ __wt_evict_page_request(session, page);
+ __wt_hazard_clear(session, page);
+ __wt_evict_server_wake(session);
+ break;
}
- break;
+
+ page->read_gen = __wt_cache_read_gen(session);
+ return (0);
WT_ILLEGAL_VALUE(session);
}
@@ -97,55 +118,55 @@ __wt_page_in_func(
*/
int
__wt_page_inmem(WT_SESSION_IMPL *session,
- WT_PAGE *parent, WT_REF *parent_ref, WT_PAGE_HEADER *dsk,
- size_t *inmem_sizep, WT_PAGE **pagep)
+ WT_PAGE *parent, WT_REF *parent_ref, WT_PAGE_HEADER *dsk, WT_PAGE **pagep)
{
+ WT_DECL_RET;
WT_PAGE *page;
- int ret;
+ size_t inmem_size;
WT_ASSERT_RET(session, dsk->u.entries > 0);
- *pagep = NULL;
+ *pagep = page = NULL;
/*
* Allocate and initialize the WT_PAGE.
* Set the LRU so the page is not immediately selected for eviction.
+ * Set the read generation (which can't match a search where the write
+ * generation wasn't set, that is, remained 0).
*/
WT_RET(__wt_calloc_def(session, 1, &page));
- if (inmem_sizep != NULL)
- *inmem_sizep = sizeof(*page) + dsk->size;
- page->type = dsk->type;
page->parent = parent;
page->ref = parent_ref;
page->dsk = dsk;
- /*
- * Set the write generation to 1 (which can't match a search where the
- * write generation wasn't set, that is, remained 0).
- */
page->read_gen = __wt_cache_read_gen(session);
+ page->type = dsk->type;
+ inmem_size = 0;
switch (page->type) {
case WT_PAGE_COL_FIX:
page->u.col_fix.recno = dsk->recno;
- WT_ERR(__inmem_col_fix(session, page, inmem_sizep));
+ WT_ERR(__inmem_col_fix(session, page));
break;
case WT_PAGE_COL_INT:
page->u.intl.recno = dsk->recno;
- WT_ERR(__inmem_col_int(session, page, inmem_sizep));
+ WT_ERR(__inmem_col_int(session, page, &inmem_size));
break;
case WT_PAGE_COL_VAR:
page->u.col_var.recno = dsk->recno;
- WT_ERR(__inmem_col_var(session, page, inmem_sizep));
+ WT_ERR(__inmem_col_var(session, page, &inmem_size));
break;
case WT_PAGE_ROW_INT:
- WT_ERR(__inmem_row_int(session, page, inmem_sizep));
+ WT_ERR(__inmem_row_int(session, page, &inmem_size));
break;
case WT_PAGE_ROW_LEAF:
- WT_ERR(__inmem_row_leaf(session, page, inmem_sizep));
+ WT_ERR(__inmem_row_leaf(session, page, &inmem_size));
break;
- WT_ILLEGAL_VALUE(session);
+ WT_ILLEGAL_VALUE_ERR(session);
}
+ __wt_cache_page_read(
+ session, page, sizeof(WT_PAGE) + dsk->size + inmem_size);
+
*pagep = page;
return (0);
@@ -158,13 +179,11 @@ err: __wt_free(session, page);
* Build in-memory index for fixed-length column-store leaf pages.
*/
static int
-__inmem_col_fix(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep)
+__inmem_col_fix(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_BTREE *btree;
WT_PAGE_HEADER *dsk;
- WT_UNUSED(inmem_sizep);
-
btree = session->btree;
dsk = page->dsk;
@@ -298,20 +317,20 @@ static int
__inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep)
{
WT_BTREE *btree;
- WT_ITEM *current, *last, *tmp;
WT_CELL *cell;
WT_CELL_UNPACK *unpack, _unpack;
+ WT_DECL_ITEM(current);
+ WT_DECL_ITEM(last);
+ WT_DECL_RET;
+ WT_ITEM *tmp;
WT_PAGE_HEADER *dsk;
WT_REF *ref;
uint32_t i, nindx, prefix;
- int ret;
void *huffman;
btree = session->btree;
- current = last = NULL;
unpack = &_unpack;
dsk = page->dsk;
- ret = 0;
huffman = btree->huffman_key;
WT_ERR(__wt_scr_alloc(session, 0, &current));
@@ -323,7 +342,7 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep)
* and location cookie).
*/
nindx = dsk->u.entries / 2;
- WT_RET((__wt_calloc_def(session, (size_t)nindx, &page->u.intl.t)));
+ WT_ERR((__wt_calloc_def(session, (size_t)nindx, &page->u.intl.t)));
if (inmem_sizep != NULL)
*inmem_sizep += nindx * sizeof(*page->u.intl.t);
@@ -350,7 +369,7 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep)
ref->addr = cell;
++ref;
continue;
- WT_ILLEGAL_VALUE(session);
+ WT_ILLEGAL_VALUE_ERR(session);
}
/*
@@ -361,7 +380,7 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep)
*/
prefix = unpack->prefix;
if (huffman != NULL || unpack->ovfl) {
- WT_RET(__wt_cell_unpack_copy(session, unpack, current));
+ WT_ERR(__wt_cell_unpack_copy(session, unpack, current));
/*
* If there's a prefix, make sure there's enough buffer
@@ -399,7 +418,7 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep)
*/
WT_ERR(__wt_row_ikey_alloc(session,
WT_PAGE_DISK_OFFSET(page, cell),
- current->data, current->size, (WT_IKEY **)&ref->u.key));
+ current->data, current->size, &ref->u.key));
if (inmem_sizep != NULL)
*inmem_sizep += sizeof(WT_IKEY) + current->size;
@@ -474,7 +493,7 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep)
switch (unpack->type) {
case WT_CELL_KEY:
case WT_CELL_KEY_OVFL:
- rip->key = cell;
+ WT_ROW_KEY_SET(rip, cell);
++rip;
break;
case WT_CELL_VALUE: