diff options
Diffstat (limited to 'src/btree/bt_page.c')
-rw-r--r-- | src/btree/bt_page.c | 97 |
1 files changed, 58 insertions, 39 deletions
diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c index a4cc355096a..c9200440051 100644 --- a/src/btree/bt_page.c +++ b/src/btree/bt_page.c @@ -7,7 +7,7 @@ #include "wt_internal.h" -static int __inmem_col_fix(WT_SESSION_IMPL *, WT_PAGE *, size_t *); +static int __inmem_col_fix(WT_SESSION_IMPL *, WT_PAGE *); static int __inmem_col_int(WT_SESSION_IMPL *, WT_PAGE *, size_t *); static int __inmem_col_var(WT_SESSION_IMPL *, WT_PAGE *, size_t *); static int __inmem_row_int(WT_SESSION_IMPL *, WT_PAGE *, size_t *); @@ -26,7 +26,8 @@ __wt_page_in_func( #endif ) { - int wake, read_lockout; + WT_PAGE *page; + int busy, read_lockout, wake; /* * Only wake the eviction server the first time through here (if the @@ -67,17 +68,37 @@ __wt_page_in_func( * can't get a hazard reference is because the page is * being evicted; yield and try again. */ - if (__wt_hazard_set(session, ref #ifdef HAVE_DIAGNOSTIC - , file, line + WT_RET( + __wt_hazard_set(session, ref, &busy, file, line)); +#else + WT_RET(__wt_hazard_set(session, ref, &busy)); #endif - ) == 0) { - WT_ASSERT(session, !WT_PAGE_IS_ROOT(ref->page)); - ref->page->read_gen = - __wt_cache_read_gen(session); - return (0); + if (busy) + break; + + page = ref->page; + WT_ASSERT(session, !WT_PAGE_IS_ROOT(page)); + + /* + * Ensure the page doesn't have ancient updates on it. + * If it did, reading the page could ignore committed + * updates. This should be extremely unlikely in real + * applications, force eviction of the page to avoid + * the issue. + */ + if (page->modify != NULL && + __wt_txn_ancient(session, page->modify->first_id)) { + WT_VERBOSE_RET(session, read, + "ancient updates, forcing eviction"); + __wt_evict_page_request(session, page); + __wt_hazard_clear(session, page); + __wt_evict_server_wake(session); + break; } - break; + + page->read_gen = __wt_cache_read_gen(session); + return (0); WT_ILLEGAL_VALUE(session); } @@ -97,55 +118,55 @@ __wt_page_in_func( */ int __wt_page_inmem(WT_SESSION_IMPL *session, - WT_PAGE *parent, WT_REF *parent_ref, WT_PAGE_HEADER *dsk, - size_t *inmem_sizep, WT_PAGE **pagep) + WT_PAGE *parent, WT_REF *parent_ref, WT_PAGE_HEADER *dsk, WT_PAGE **pagep) { + WT_DECL_RET; WT_PAGE *page; - int ret; + size_t inmem_size; WT_ASSERT_RET(session, dsk->u.entries > 0); - *pagep = NULL; + *pagep = page = NULL; /* * Allocate and initialize the WT_PAGE. * Set the LRU so the page is not immediately selected for eviction. + * Set the read generation (which can't match a search where the write + * generation wasn't set, that is, remained 0). */ WT_RET(__wt_calloc_def(session, 1, &page)); - if (inmem_sizep != NULL) - *inmem_sizep = sizeof(*page) + dsk->size; - page->type = dsk->type; page->parent = parent; page->ref = parent_ref; page->dsk = dsk; - /* - * Set the write generation to 1 (which can't match a search where the - * write generation wasn't set, that is, remained 0). - */ page->read_gen = __wt_cache_read_gen(session); + page->type = dsk->type; + inmem_size = 0; switch (page->type) { case WT_PAGE_COL_FIX: page->u.col_fix.recno = dsk->recno; - WT_ERR(__inmem_col_fix(session, page, inmem_sizep)); + WT_ERR(__inmem_col_fix(session, page)); break; case WT_PAGE_COL_INT: page->u.intl.recno = dsk->recno; - WT_ERR(__inmem_col_int(session, page, inmem_sizep)); + WT_ERR(__inmem_col_int(session, page, &inmem_size)); break; case WT_PAGE_COL_VAR: page->u.col_var.recno = dsk->recno; - WT_ERR(__inmem_col_var(session, page, inmem_sizep)); + WT_ERR(__inmem_col_var(session, page, &inmem_size)); break; case WT_PAGE_ROW_INT: - WT_ERR(__inmem_row_int(session, page, inmem_sizep)); + WT_ERR(__inmem_row_int(session, page, &inmem_size)); break; case WT_PAGE_ROW_LEAF: - WT_ERR(__inmem_row_leaf(session, page, inmem_sizep)); + WT_ERR(__inmem_row_leaf(session, page, &inmem_size)); break; - WT_ILLEGAL_VALUE(session); + WT_ILLEGAL_VALUE_ERR(session); } + __wt_cache_page_read( + session, page, sizeof(WT_PAGE) + dsk->size + inmem_size); + *pagep = page; return (0); @@ -158,13 +179,11 @@ err: __wt_free(session, page); * Build in-memory index for fixed-length column-store leaf pages. */ static int -__inmem_col_fix(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep) +__inmem_col_fix(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_BTREE *btree; WT_PAGE_HEADER *dsk; - WT_UNUSED(inmem_sizep); - btree = session->btree; dsk = page->dsk; @@ -298,20 +317,20 @@ static int __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep) { WT_BTREE *btree; - WT_ITEM *current, *last, *tmp; WT_CELL *cell; WT_CELL_UNPACK *unpack, _unpack; + WT_DECL_ITEM(current); + WT_DECL_ITEM(last); + WT_DECL_RET; + WT_ITEM *tmp; WT_PAGE_HEADER *dsk; WT_REF *ref; uint32_t i, nindx, prefix; - int ret; void *huffman; btree = session->btree; - current = last = NULL; unpack = &_unpack; dsk = page->dsk; - ret = 0; huffman = btree->huffman_key; WT_ERR(__wt_scr_alloc(session, 0, ¤t)); @@ -323,7 +342,7 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep) * and location cookie). */ nindx = dsk->u.entries / 2; - WT_RET((__wt_calloc_def(session, (size_t)nindx, &page->u.intl.t))); + WT_ERR((__wt_calloc_def(session, (size_t)nindx, &page->u.intl.t))); if (inmem_sizep != NULL) *inmem_sizep += nindx * sizeof(*page->u.intl.t); @@ -350,7 +369,7 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep) ref->addr = cell; ++ref; continue; - WT_ILLEGAL_VALUE(session); + WT_ILLEGAL_VALUE_ERR(session); } /* @@ -361,7 +380,7 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep) */ prefix = unpack->prefix; if (huffman != NULL || unpack->ovfl) { - WT_RET(__wt_cell_unpack_copy(session, unpack, current)); + WT_ERR(__wt_cell_unpack_copy(session, unpack, current)); /* * If there's a prefix, make sure there's enough buffer @@ -399,7 +418,7 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep) */ WT_ERR(__wt_row_ikey_alloc(session, WT_PAGE_DISK_OFFSET(page, cell), - current->data, current->size, (WT_IKEY **)&ref->u.key)); + current->data, current->size, &ref->u.key)); if (inmem_sizep != NULL) *inmem_sizep += sizeof(WT_IKEY) + current->size; @@ -474,7 +493,7 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep) switch (unpack->type) { case WT_CELL_KEY: case WT_CELL_KEY_OVFL: - rip->key = cell; + WT_ROW_KEY_SET(rip, cell); ++rip; break; case WT_CELL_VALUE: |