summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@wiredtiger.com>2012-02-24 13:53:18 +1100
committerMichael Cahill <michael.cahill@wiredtiger.com>2012-02-24 13:53:18 +1100
commit471484a9b833f51908e5050928042ba651b11694 (patch)
tree29e78f453407905c384369734785af44d1ab47e6
parent0047c5d4c75a0d6a03a4fee7866933380aee9d69 (diff)
downloadmongo-471484a9b833f51908e5050928042ba651b11694.tar.gz
Don't keep the last page of column stores pinned: it prevents eviction.
-rw-r--r--dist/serial.py1
-rw-r--r--src/btree/bt_cursor.c22
-rw-r--r--src/btree/bt_handle.c28
-rw-r--r--src/btree/col_modify.c25
-rw-r--r--src/include/btree.h6
-rw-r--r--src/include/serial_funcs.i20
6 files changed, 45 insertions, 57 deletions
diff --git a/dist/serial.py b/dist/serial.py
index 9fb8d3fed69..3da4c1fce28 100644
--- a/dist/serial.py
+++ b/dist/serial.py
@@ -17,6 +17,7 @@ class Serial:
msgtypes = [
Serial('col_append', 'WT_SERIAL_FUNC', [
+ SerialArg('WT_PAGE *', 'page'),
SerialArg('WT_INSERT_HEAD **', 'inshead'),
SerialArg('WT_INSERT ***', 'ins_stack'),
SerialArg('WT_INSERT_HEAD **', 'new_inslist', 1),
diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c
index ab38a4e97d6..132c6da755d 100644
--- a/src/btree/bt_cursor.c
+++ b/src/btree/bt_cursor.c
@@ -254,19 +254,21 @@ retry: __cursor_func_init(cbt, 1);
case BTREE_COL_FIX:
case BTREE_COL_VAR:
/*
- * If WT_CURSTD_APPEND set insert a new record (ignoring the
- * application's record number), return the record number.
+ * If WT_CURSTD_APPEND is set, insert a new record (ignoring
+ * the application's record number). First we search for the
+ * maximum possible record number so the search ends on the
+ * last page. The real record number is assigned by the
+ * serialized append operation.
+ * __wt_col_append_serial_func
*/
- if (F_ISSET(cursor, WT_CURSTD_APPEND)) {
- if ((ret =
- __wt_col_modify(session, cbt, 1)) == WT_RESTART)
- goto retry;
- cbt->iface.recno = cbt->recno;
- break;
- }
+ if (F_ISSET(cursor, WT_CURSTD_APPEND))
+ cbt->iface.recno = UINT64_MAX;
WT_ERR(__wt_col_search(session, cbt, 1));
+ if (F_ISSET(cursor, WT_CURSTD_APPEND))
+ cbt->iface.recno = 0;
+
/*
* If WT_CURSTD_OVERWRITE set, insert/update the key/value pair.
*
@@ -284,6 +286,8 @@ retry: __cursor_func_init(cbt, 1);
}
if ((ret = __wt_col_modify(session, cbt, 3)) == WT_RESTART)
goto retry;
+ if (F_ISSET(cursor, WT_CURSTD_APPEND) && ret == 0)
+ cbt->iface.recno = cbt->recno;
break;
case BTREE_ROW:
/*
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index ba30260cf9e..e4b66b442c5 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -396,25 +396,15 @@ __btree_last(WT_SESSION_IMPL *session)
btree = session->btree;
- if (btree->type == BTREE_ROW)
- return (0);
-
- page = NULL;
- WT_RET(__wt_tree_np(session, &page, 0, 0));
- if (page == NULL)
- return (WT_NOTFOUND);
-
- btree->last_page = page;
- btree->last_recno = __col_last_recno(page);
-
- F_SET(page, WT_PAGE_LAST_PAGE | WT_PAGE_PINNED);
-
- /*
- * Publish: there must be a barrier to ensure the pinned flag is set
- * before we discard our hazard reference.
- */
- WT_WRITE_BARRIER();
- __wt_hazard_clear(session, page);
+ if (btree->type != BTREE_ROW) {
+ page = NULL;
+ WT_RET(__wt_tree_np(session, &page, 0, 0));
+ if (page == NULL)
+ return (WT_NOTFOUND);
+
+ btree->last_recno = __col_last_recno(page);
+ __wt_page_release(session, page);
+ }
return (0);
}
diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c
index 468185a0704..12907739024 100644
--- a/src/btree/col_modify.c
+++ b/src/btree/col_modify.c
@@ -30,15 +30,11 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int op)
btree = cbt->btree;
page = cbt->page;
+ recno = cbt->iface.recno;
- switch (op) {
- case 1: /* Append */
- page = btree->last_page;
- __cursor_search_clear(cbt);
+ WT_ASSERT(session, op != 1);
- value = &cbt->iface.value;
- recno = 0; /* Engine allocates */
- break;
+ switch (op) {
case 2: /* Remove */
if (btree->type == BTREE_COL_FIX) {
value = &_value;
@@ -46,12 +42,10 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int op)
value->size = 1;
} else
value = NULL;
- recno = cbt->iface.recno; /* App specified */
break;
case 3: /* Insert/Update */
default:
value = &cbt->iface.value;
- recno = cbt->iface.recno; /* App specified */
/*
* There's some chance the application specified a record past
@@ -59,7 +53,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int op)
* inserting a new WT_INSERT/WT_UPDATE pair, it goes on the
* append list, not the update list.
*/
- if (recno > __col_last_recno(page))
+ if (recno == 0 || recno > __col_last_recno(page))
op = 1;
break;
}
@@ -156,13 +150,12 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int op)
ins_copy = ins;
WT_ERR(__wt_col_append_serial(session,
- inshead, cbt->ins_stack,
+ page, inshead, cbt->ins_stack,
&new_inslist, new_inslist_size,
&new_inshead, new_inshead_size,
&ins, ins_size, skipdepth));
- /* Set up the cursor for the inserted page and value. */
- cbt->page = btree->last_page;
+ /* Put the new recno into the cursor. */
cbt->recno = WT_INSERT_RECNO(ins_copy);
} else
WT_ERR(__wt_insert_serial(session,
@@ -228,10 +221,9 @@ __wt_col_append_serial_func(WT_SESSION_IMPL *session)
int ret;
btree = session->btree;
- page = btree->last_page;
ret = 0;
- __wt_col_append_unpack(session, &inshead, &ins_stack,
+ __wt_col_append_unpack(session, &page, &inshead, &ins_stack,
&new_inslist, &new_inshead, &new_ins, &skipdepth);
/*
@@ -241,6 +233,7 @@ __wt_col_append_serial_func(WT_SESSION_IMPL *session)
if (btree->append == NULL) {
btree->append = new_inslist;
__wt_col_append_new_inslist_taken(session, page);
+ F_SET(page, WT_PAGE_LAST_PAGE);
}
/*
@@ -260,7 +253,7 @@ __wt_col_append_serial_func(WT_SESSION_IMPL *session)
* record didn't exist at some point, it can only have been created
* on this list. Search for the record, if specified.
*/
- if ((recno = WT_INSERT_RECNO(new_ins)) == 0)
+ if ((recno = WT_INSERT_RECNO(new_ins)) == 0 || recno == UINT64_MAX)
recno = WT_INSERT_RECNO(new_ins) = ++btree->last_recno;
ins = __col_insert_search(*inshead, ins_stack, recno);
diff --git a/src/include/btree.h b/src/include/btree.h
index d3d7992627b..6a03e46b697 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -90,12 +90,6 @@ struct __wt_btree {
void *huffman_key; /* Key huffman encoding */
void *huffman_value; /* Value huffman encoding */
- /*
- * Column-store: track the last record in the file, and keep the last
- * page pinned in memory for fast appends, to a skiplist of appended
- * entries.
- */
- WT_PAGE *last_page; /* Col-store append, last page */
uint64_t last_recno; /* Col-store append, last recno */
WT_INSERT_HEAD **append; /* Appended items */
diff --git a/src/include/serial_funcs.i b/src/include/serial_funcs.i
index 3f30c199f36..b9a0a15e569 100644
--- a/src/include/serial_funcs.i
+++ b/src/include/serial_funcs.i
@@ -1,6 +1,7 @@
/* DO NOT EDIT: automatically built by dist/serial.py. */
typedef struct {
+ WT_PAGE *page;
WT_INSERT_HEAD **inshead;
WT_INSERT ***ins_stack;
WT_INSERT_HEAD **new_inslist;
@@ -17,14 +18,17 @@ typedef struct {
static inline int
__wt_col_append_serial(
- WT_SESSION_IMPL *session, WT_INSERT_HEAD **inshead, WT_INSERT
- ***ins_stack, WT_INSERT_HEAD ***new_inslistp, size_t new_inslist_size,
- WT_INSERT_HEAD **new_insheadp, size_t new_inshead_size, WT_INSERT
- **new_insp, size_t new_ins_size, u_int skipdepth)
+ WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT_HEAD **inshead,
+ WT_INSERT ***ins_stack, WT_INSERT_HEAD ***new_inslistp, size_t
+ new_inslist_size, WT_INSERT_HEAD **new_insheadp, size_t
+ new_inshead_size, WT_INSERT **new_insp, size_t new_ins_size, u_int
+ skipdepth)
{
__wt_col_append_args _args, *args = &_args;
int ret;
+ args->page = page;
+
args->inshead = inshead;
args->ins_stack = ins_stack;
@@ -72,13 +76,15 @@ __wt_col_append_serial(
static inline void
__wt_col_append_unpack(
- WT_SESSION_IMPL *session, WT_INSERT_HEAD ***insheadp, WT_INSERT
- ****ins_stackp, WT_INSERT_HEAD ***new_inslistp, WT_INSERT_HEAD
- **new_insheadp, WT_INSERT **new_insp, u_int *skipdepthp)
+ WT_SESSION_IMPL *session, WT_PAGE **pagep, WT_INSERT_HEAD ***insheadp,
+ WT_INSERT ****ins_stackp, WT_INSERT_HEAD ***new_inslistp,
+ WT_INSERT_HEAD **new_insheadp, WT_INSERT **new_insp, u_int
+ *skipdepthp)
{
__wt_col_append_args *args =
(__wt_col_append_args *)session->wq_args;
+ *pagep = args->page;
*insheadp = args->inshead;
*ins_stackp = args->ins_stack;
*new_inslistp = args->new_inslist;