diff options
author | Michael Cahill <michael.cahill@wiredtiger.com> | 2012-02-24 13:53:18 +1100 |
---|---|---|
committer | Michael Cahill <michael.cahill@wiredtiger.com> | 2012-02-24 13:53:18 +1100 |
commit | 471484a9b833f51908e5050928042ba651b11694 (patch) | |
tree | 29e78f453407905c384369734785af44d1ab47e6 | |
parent | 0047c5d4c75a0d6a03a4fee7866933380aee9d69 (diff) | |
download | mongo-471484a9b833f51908e5050928042ba651b11694.tar.gz |
Don't keep the last page of column stores pinned: it prevents eviction.
-rw-r--r-- | dist/serial.py | 1 | ||||
-rw-r--r-- | src/btree/bt_cursor.c | 22 | ||||
-rw-r--r-- | src/btree/bt_handle.c | 28 | ||||
-rw-r--r-- | src/btree/col_modify.c | 25 | ||||
-rw-r--r-- | src/include/btree.h | 6 | ||||
-rw-r--r-- | src/include/serial_funcs.i | 20 |
6 files changed, 45 insertions, 57 deletions
diff --git a/dist/serial.py b/dist/serial.py index 9fb8d3fed69..3da4c1fce28 100644 --- a/dist/serial.py +++ b/dist/serial.py @@ -17,6 +17,7 @@ class Serial: msgtypes = [ Serial('col_append', 'WT_SERIAL_FUNC', [ + SerialArg('WT_PAGE *', 'page'), SerialArg('WT_INSERT_HEAD **', 'inshead'), SerialArg('WT_INSERT ***', 'ins_stack'), SerialArg('WT_INSERT_HEAD **', 'new_inslist', 1), diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index ab38a4e97d6..132c6da755d 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -254,19 +254,21 @@ retry: __cursor_func_init(cbt, 1); case BTREE_COL_FIX: case BTREE_COL_VAR: /* - * If WT_CURSTD_APPEND set insert a new record (ignoring the - * application's record number), return the record number. + * If WT_CURSTD_APPEND is set, insert a new record (ignoring + * the application's record number). First we search for the + * maximum possible record number so the search ends on the + * last page. The real record number is assigned by the + * serialized append operation. + * __wt_col_append_serial_func */ - if (F_ISSET(cursor, WT_CURSTD_APPEND)) { - if ((ret = - __wt_col_modify(session, cbt, 1)) == WT_RESTART) - goto retry; - cbt->iface.recno = cbt->recno; - break; - } + if (F_ISSET(cursor, WT_CURSTD_APPEND)) + cbt->iface.recno = UINT64_MAX; WT_ERR(__wt_col_search(session, cbt, 1)); + if (F_ISSET(cursor, WT_CURSTD_APPEND)) + cbt->iface.recno = 0; + /* * If WT_CURSTD_OVERWRITE set, insert/update the key/value pair. * @@ -284,6 +286,8 @@ retry: __cursor_func_init(cbt, 1); } if ((ret = __wt_col_modify(session, cbt, 3)) == WT_RESTART) goto retry; + if (F_ISSET(cursor, WT_CURSTD_APPEND) && ret == 0) + cbt->iface.recno = cbt->recno; break; case BTREE_ROW: /* diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index ba30260cf9e..e4b66b442c5 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -396,25 +396,15 @@ __btree_last(WT_SESSION_IMPL *session) btree = session->btree; - if (btree->type == BTREE_ROW) - return (0); - - page = NULL; - WT_RET(__wt_tree_np(session, &page, 0, 0)); - if (page == NULL) - return (WT_NOTFOUND); - - btree->last_page = page; - btree->last_recno = __col_last_recno(page); - - F_SET(page, WT_PAGE_LAST_PAGE | WT_PAGE_PINNED); - - /* - * Publish: there must be a barrier to ensure the pinned flag is set - * before we discard our hazard reference. - */ - WT_WRITE_BARRIER(); - __wt_hazard_clear(session, page); + if (btree->type != BTREE_ROW) { + page = NULL; + WT_RET(__wt_tree_np(session, &page, 0, 0)); + if (page == NULL) + return (WT_NOTFOUND); + + btree->last_recno = __col_last_recno(page); + __wt_page_release(session, page); + } return (0); } diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c index 468185a0704..12907739024 100644 --- a/src/btree/col_modify.c +++ b/src/btree/col_modify.c @@ -30,15 +30,11 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int op) btree = cbt->btree; page = cbt->page; + recno = cbt->iface.recno; - switch (op) { - case 1: /* Append */ - page = btree->last_page; - __cursor_search_clear(cbt); + WT_ASSERT(session, op != 1); - value = &cbt->iface.value; - recno = 0; /* Engine allocates */ - break; + switch (op) { case 2: /* Remove */ if (btree->type == BTREE_COL_FIX) { value = &_value; @@ -46,12 +42,10 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int op) value->size = 1; } else value = NULL; - recno = cbt->iface.recno; /* App specified */ break; case 3: /* Insert/Update */ default: value = &cbt->iface.value; - recno = cbt->iface.recno; /* App specified */ /* * There's some chance the application specified a record past @@ -59,7 +53,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int op) * inserting a new WT_INSERT/WT_UPDATE pair, it goes on the * append list, not the update list. */ - if (recno > __col_last_recno(page)) + if (recno == 0 || recno > __col_last_recno(page)) op = 1; break; } @@ -156,13 +150,12 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int op) ins_copy = ins; WT_ERR(__wt_col_append_serial(session, - inshead, cbt->ins_stack, + page, inshead, cbt->ins_stack, &new_inslist, new_inslist_size, &new_inshead, new_inshead_size, &ins, ins_size, skipdepth)); - /* Set up the cursor for the inserted page and value. */ - cbt->page = btree->last_page; + /* Put the new recno into the cursor. */ cbt->recno = WT_INSERT_RECNO(ins_copy); } else WT_ERR(__wt_insert_serial(session, @@ -228,10 +221,9 @@ __wt_col_append_serial_func(WT_SESSION_IMPL *session) int ret; btree = session->btree; - page = btree->last_page; ret = 0; - __wt_col_append_unpack(session, &inshead, &ins_stack, + __wt_col_append_unpack(session, &page, &inshead, &ins_stack, &new_inslist, &new_inshead, &new_ins, &skipdepth); /* @@ -241,6 +233,7 @@ __wt_col_append_serial_func(WT_SESSION_IMPL *session) if (btree->append == NULL) { btree->append = new_inslist; __wt_col_append_new_inslist_taken(session, page); + F_SET(page, WT_PAGE_LAST_PAGE); } /* @@ -260,7 +253,7 @@ __wt_col_append_serial_func(WT_SESSION_IMPL *session) * record didn't exist at some point, it can only have been created * on this list. Search for the record, if specified. */ - if ((recno = WT_INSERT_RECNO(new_ins)) == 0) + if ((recno = WT_INSERT_RECNO(new_ins)) == 0 || recno == UINT64_MAX) recno = WT_INSERT_RECNO(new_ins) = ++btree->last_recno; ins = __col_insert_search(*inshead, ins_stack, recno); diff --git a/src/include/btree.h b/src/include/btree.h index d3d7992627b..6a03e46b697 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -90,12 +90,6 @@ struct __wt_btree { void *huffman_key; /* Key huffman encoding */ void *huffman_value; /* Value huffman encoding */ - /* - * Column-store: track the last record in the file, and keep the last - * page pinned in memory for fast appends, to a skiplist of appended - * entries. - */ - WT_PAGE *last_page; /* Col-store append, last page */ uint64_t last_recno; /* Col-store append, last recno */ WT_INSERT_HEAD **append; /* Appended items */ diff --git a/src/include/serial_funcs.i b/src/include/serial_funcs.i index 3f30c199f36..b9a0a15e569 100644 --- a/src/include/serial_funcs.i +++ b/src/include/serial_funcs.i @@ -1,6 +1,7 @@ /* DO NOT EDIT: automatically built by dist/serial.py. */ typedef struct { + WT_PAGE *page; WT_INSERT_HEAD **inshead; WT_INSERT ***ins_stack; WT_INSERT_HEAD **new_inslist; @@ -17,14 +18,17 @@ typedef struct { static inline int __wt_col_append_serial( - WT_SESSION_IMPL *session, WT_INSERT_HEAD **inshead, WT_INSERT - ***ins_stack, WT_INSERT_HEAD ***new_inslistp, size_t new_inslist_size, - WT_INSERT_HEAD **new_insheadp, size_t new_inshead_size, WT_INSERT - **new_insp, size_t new_ins_size, u_int skipdepth) + WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT_HEAD **inshead, + WT_INSERT ***ins_stack, WT_INSERT_HEAD ***new_inslistp, size_t + new_inslist_size, WT_INSERT_HEAD **new_insheadp, size_t + new_inshead_size, WT_INSERT **new_insp, size_t new_ins_size, u_int + skipdepth) { __wt_col_append_args _args, *args = &_args; int ret; + args->page = page; + args->inshead = inshead; args->ins_stack = ins_stack; @@ -72,13 +76,15 @@ __wt_col_append_serial( static inline void __wt_col_append_unpack( - WT_SESSION_IMPL *session, WT_INSERT_HEAD ***insheadp, WT_INSERT - ****ins_stackp, WT_INSERT_HEAD ***new_inslistp, WT_INSERT_HEAD - **new_insheadp, WT_INSERT **new_insp, u_int *skipdepthp) + WT_SESSION_IMPL *session, WT_PAGE **pagep, WT_INSERT_HEAD ***insheadp, + WT_INSERT ****ins_stackp, WT_INSERT_HEAD ***new_inslistp, + WT_INSERT_HEAD **new_insheadp, WT_INSERT **new_insp, u_int + *skipdepthp) { __wt_col_append_args *args = (__wt_col_append_args *)session->wq_args; + *pagep = args->page; *insheadp = args->inshead; *ins_stackp = args->ins_stack; *new_inslistp = args->new_inslist; |