summaryrefslogtreecommitdiff
path: root/src/btree/bt_ret.c
diff options
context:
space:
mode:
authorKeith Bostic <keith.bostic@wiredtiger.com>2011-09-05 09:54:27 +0000
committerKeith Bostic <keith.bostic@wiredtiger.com>2011-09-05 09:54:27 +0000
commit92b78da376403c68468725352a41e09c677990fa (patch)
tree64bb8762e3c37939fd955c02af606d62776ff945 /src/btree/bt_ret.c
parent9fa0bd1e8b5844fa14742e415d8d56e04ffa11d7 (diff)
downloadmongo-92b78da376403c68468725352a41e09c677990fa.tar.gz
Make the cursor->search_near method's "exact" argument work.
Add cursor->search function for Btree tables, cursor->search_near has to do quite a bit more work if there's no exact match, we don't want to slow down normal lookup. Replace WT_CURSOR_BTREE->match with WT_CURSOR_BTREE->compare which matches the semantics of the exact parameter to cursor->search_near, that is, -1 if it's a smaller key in the tree, 0 for an exact match, and 1 if it's a larger key in the tree. The cursor->search_near function has to return a key if the match is not exact: change __wt_return_value() to optionally return a key, and rename it to __wt_kv_return(). Split read statistics into read and read-near. Move set of WT_CURSOR_BTREE->flags(WT_CBT_SEARCH_SET) down into the search routines. Change __cursor_deleted() to be a boolean, not returning an error value, we don't necessarily return error on deleted records. The search routines don't return WT_RESTART, that only happens if the tree is modified: remove restart loops from search calls, add restart loops to modify calls.
Diffstat (limited to 'src/btree/bt_ret.c')
-rw-r--r--src/btree/bt_ret.c42
1 files changed, 33 insertions, 9 deletions
diff --git a/src/btree/bt_ret.c b/src/btree/bt_ret.c
index 9536d9a324d..6ba2d5f82b8 100644
--- a/src/btree/bt_ret.c
+++ b/src/btree/bt_ret.c
@@ -8,16 +8,17 @@
#include "wt_internal.h"
/*
- * __wt_return_value --
- * Return a page referenced value item to the application.
+ * __wt_kv_return --
+ * Return a page referenced key/value pair to the application.
*/
int
-__wt_return_value(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
+__wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int key_ret)
{
WT_BTREE *btree;
WT_CELL *cell;
WT_CELL_UNPACK *unpack, _unpack;
WT_CURSOR *cursor;
+ WT_IKEY *ikey;
WT_PAGE *page;
WT_ROW *rip;
WT_UPDATE *upd;
@@ -31,6 +32,9 @@ __wt_return_value(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
switch (page->type) {
case WT_PAGE_COL_FIX:
+ if (key_ret)
+ cursor->recno = cbt->recno;
+
/*
* If the cursor references a WT_INSERT item, take the related
* WT_UPDATE item.
@@ -44,6 +48,9 @@ __wt_return_value(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
v = __bit_getv_recno(page, cbt->iface.recno, btree->bitcnt);
return (__wt_buf_set(session, &cursor->value, &v, 1));
case WT_PAGE_COL_VAR:
+ if (key_ret)
+ cursor->recno = cbt->recno;
+
/*
* If the cursor references a WT_INSERT item, take the related
* WT_UPDATE item.
@@ -57,22 +64,39 @@ __wt_return_value(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
cell = WT_COL_PTR(page, &page->u.col_leaf.d[cbt->slot]);
break;
case WT_PAGE_ROW_LEAF:
+ rip = &page->u.row_leaf.d[cbt->slot];
+
/*
- * If the cursor references a WT_INSERT item, or if the original
- * item was updated, take the related WT_UPDATE item.
+ * If the cursor references a WT_INSERT item, take the key and
+ * related WT_UPDATE item. Otherwise, take the key from the
+ * original page, and the value from any related WT_UPDATE item,
+ * or the page if the key was never updated.
*/
- rip = &page->u.row_leaf.d[cbt->slot];
- if (cbt->ins == NULL)
+ if (cbt->ins == NULL) {
+ if (key_ret) {
+ if (__wt_off_page(page, rip->key)) {
+ ikey = rip->key;
+ cursor->key.data = WT_IKEY_DATA(ikey);
+ cursor->key.size = ikey->size;
+ } else
+ WT_RET(__wt_row_key(
+ session, page, rip, &cursor->key));
+ }
upd = WT_ROW_UPDATE(page, rip);
- else
+ } else {
+ if (key_ret) {
+ cursor->key.data = WT_INSERT_KEY(cbt->ins);
+ cursor->key.size = WT_INSERT_KEY_SIZE(cbt->ins);
+ }
upd = cbt->ins->upd;
+ }
if (upd != NULL) {
cursor->value.data = WT_UPDATE_DATA(upd);
cursor->value.size = upd->size;
return (0);
}
- /* Otherwise, take the original cell (which may be empty). */
+ /* Take the original cell (which may be empty). */
if ((cell = __wt_row_value(page, rip)) == NULL) {
cursor->value.size = 0;
return (0);