summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dist/s_string.ok2
-rw-r--r--src/btree/bt_debug.c29
-rw-r--r--src/btree/bt_discard.c14
-rw-r--r--src/btree/bt_page.c62
-rw-r--r--src/btree/bt_ret.c2
-rw-r--r--src/btree/bt_slvg.c22
-rw-r--r--src/btree/bt_vrfy_dsk.c6
-rw-r--r--src/btree/rec_write.c56
-rw-r--r--src/btree/row_key.c198
-rw-r--r--src/include/btmem.h11
-rw-r--r--src/include/btree.i229
-rw-r--r--src/include/cell.i35
-rw-r--r--src/include/cursor.i117
13 files changed, 362 insertions, 421 deletions
diff --git a/dist/s_string.ok b/dist/s_string.ok
index bb4ef607d49..fd98b3b2526 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -127,6 +127,7 @@ INTL
INUSE
ISSET
ITEMs
+Inline
Ippokratis
JPEG
JSON
@@ -571,6 +572,7 @@ init
initn
initsize
inline
+inlined
inmem
insertK
insertV
diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c
index 9e3d87ae4fb..9415f4ad790 100644
--- a/src/btree/bt_debug.c
+++ b/src/btree/bt_debug.c
@@ -779,15 +779,17 @@ __debug_page_row_leaf(WT_DBG *ds, WT_PAGE *page)
{
WT_CELL *cell;
WT_CELL_UNPACK *unpack, _unpack;
- WT_IKEY *ikey;
+ WT_DECL_ITEM(key);
+ WT_DECL_RET;
WT_INSERT_HEAD *insert;
- WT_ITEM key;
WT_ROW *rip;
+ WT_SESSION_IMPL *session;
WT_UPDATE *upd;
uint32_t i;
- void *copy;
+ session = ds->session;
unpack = &_unpack;
+ WT_RET(__wt_scr_alloc(session, 256, &key));
/*
* Dump any K/V pairs inserted into the page before the first from-disk
@@ -798,24 +800,14 @@ __debug_page_row_leaf(WT_DBG *ds, WT_PAGE *page)
/* Dump the page's K/V pairs. */
WT_ROW_FOREACH(page, rip, i) {
- copy = WT_ROW_KEY_COPY(rip);
- if (F_ISSET_ATOMIC(page, WT_PAGE_DIRECT_KEY)) {
- __wt_row_leaf_direct(page, copy, &key);
- __debug_item(ds, "K", key.data, key.size);
- } else if (__wt_off_page(page, copy)) {
- ikey = copy;
- __debug_item(ds, "K", WT_IKEY_DATA(ikey), ikey->size);
- } else {
- __wt_cell_unpack(copy, unpack);
- WT_RET(__debug_cell_data(
- ds, page, WT_PAGE_ROW_LEAF, "K", unpack));
- }
+ WT_RET(__wt_row_leaf_key(session, page, rip, key, 0));
+ __debug_item(ds, "K", key->data, key->size);
- if ((cell = __wt_row_leaf_value(page, rip)) == NULL)
+ if ((cell = __wt_row_leaf_value(page, rip, NULL)) == NULL)
__dmsg(ds, "\tV {}\n");
else {
__wt_cell_unpack(cell, unpack);
- WT_RET(__debug_cell_data(
+ WT_ERR(__debug_cell_data(
ds, page, WT_PAGE_ROW_LEAF, "V", unpack));
}
@@ -826,7 +818,8 @@ __debug_page_row_leaf(WT_DBG *ds, WT_PAGE *page)
__debug_row_skip(ds, insert);
}
- return (0);
+err: __wt_scr_free(&key);
+ return (ret);
}
/*
diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c
index 03d954d9d76..01aa46a89df 100644
--- a/src/btree/bt_discard.c
+++ b/src/btree/bt_discard.c
@@ -290,6 +290,7 @@ __free_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
WT_IKEY *ikey;
WT_ROW *rip;
uint32_t i;
+ void *copy;
/*
* Free the in-memory index array.
@@ -298,12 +299,13 @@ __free_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
* points somewhere other than the original page), and if so, free
* the memory.
*/
- if (!F_ISSET_ATOMIC(page, WT_PAGE_DIRECT_KEY))
- WT_ROW_FOREACH(page, rip, i) {
- ikey = WT_ROW_KEY_COPY(rip);
- if (ikey != NULL && __wt_off_page(page, ikey))
- __wt_free(session, ikey);
- }
+ WT_ROW_FOREACH(page, rip, i) {
+ copy = WT_ROW_KEY_COPY(rip);
+ (void)__wt_row_leaf_key_info(
+ page, copy, &ikey, NULL, NULL, NULL);
+ if (ikey != NULL)
+ __wt_free(session, ikey);
+ }
/*
* Free the insert array.
diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c
index a9c77370df4..2a20c642872 100644
--- a/src/btree/bt_page.c
+++ b/src/btree/bt_page.c
@@ -11,9 +11,9 @@ static void __inmem_col_fix(WT_SESSION_IMPL *, WT_PAGE *);
static void __inmem_col_int(WT_SESSION_IMPL *, WT_PAGE *);
static int __inmem_col_var(WT_SESSION_IMPL *, WT_PAGE *, size_t *);
static int __inmem_row_int(WT_SESSION_IMPL *, WT_PAGE *, size_t *);
-static int __inmem_row_leaf(WT_SESSION_IMPL *, WT_PAGE *, int);
+static int __inmem_row_leaf(WT_SESSION_IMPL *, WT_PAGE *);
static int __inmem_row_leaf_entries(
- WT_SESSION_IMPL *, const WT_PAGE_HEADER *, int *, uint32_t *);
+ WT_SESSION_IMPL *, const WT_PAGE_HEADER *, uint32_t *);
/*
* __wt_page_in_func --
@@ -231,20 +231,16 @@ int
__wt_page_inmem(WT_SESSION_IMPL *session,
WT_REF *ref, const void *image, uint32_t flags, WT_PAGE **pagep)
{
- WT_BTREE *btree;
WT_DECL_RET;
WT_PAGE *page;
const WT_PAGE_HEADER *dsk;
uint32_t alloc_entries;
size_t size;
- int direct_key;
*pagep = NULL;
- btree = S2BT(session);
dsk = image;
alloc_entries = 0;
- direct_key = 0;
/*
* Figure out how many underlying objects the page references so we can
@@ -275,18 +271,6 @@ __wt_page_inmem(WT_SESSION_IMPL *session,
break;
case WT_PAGE_ROW_LEAF:
/*
- * High-performance applications will turn off Huffman encoding
- * and prefix-compression, and won't have overflow keys. In
- * those cases, we'd like to reference the key on the leaf page
- * from our row-store index instead of the cell, then we don't
- * have to unpack the cell every time we look at a key. Assume
- * the fast configuration is more likely (note it's the default
- * configuration), and correct course if we're wrong.
- */
- direct_key =
- btree->huffman_key || btree->prefix_compression ? 0 : 1;
-
- /*
* If the "no empty values" flag is set, row-store leaf page
* entries map one-to-one to the number of physical entries
* on the page (each physical entry is a key or value item).
@@ -299,7 +283,7 @@ __wt_page_inmem(WT_SESSION_IMPL *session,
alloc_entries = dsk->u.entries / 2;
else
WT_RET(__inmem_row_leaf_entries(
- session, dsk, &direct_key, &alloc_entries));
+ session, dsk, &alloc_entries));
break;
WT_ILLEGAL_VALUE(session);
}
@@ -330,7 +314,7 @@ __wt_page_inmem(WT_SESSION_IMPL *session,
WT_ERR(__inmem_row_int(session, page, &size));
break;
case WT_PAGE_ROW_LEAF:
- WT_ERR(__inmem_row_leaf(session, page, direct_key));
+ WT_ERR(__inmem_row_leaf(session, page));
break;
WT_ILLEGAL_VALUE_ERR(session);
}
@@ -608,8 +592,8 @@ err: __wt_scr_free(&current);
* Return the number of entries for row-store leaf pages.
*/
static int
-__inmem_row_leaf_entries(WT_SESSION_IMPL *session,
- const WT_PAGE_HEADER *dsk, int *direct_keyp, uint32_t *nindxp)
+__inmem_row_leaf_entries(
+ WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, uint32_t *nindxp)
{
WT_BTREE *btree;
WT_CELL *cell;
@@ -635,8 +619,6 @@ __inmem_row_leaf_entries(WT_SESSION_IMPL *session,
__wt_cell_unpack(cell, unpack);
switch (unpack->type) {
case WT_CELL_KEY_OVFL:
- *direct_keyp = 0;
- /* FALLTHROUGH */
case WT_CELL_KEY:
++nindx;
break;
@@ -656,7 +638,7 @@ __inmem_row_leaf_entries(WT_SESSION_IMPL *session,
* Build in-memory index for row-store leaf pages.
*/
static int
-__inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page, int direct_key)
+__inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_BTREE *btree;
WT_CELL *cell;
@@ -669,28 +651,25 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page, int direct_key)
dsk = page->dsk;
unpack = &_unpack;
-restart:
/* Walk the page, building indices. */
rip = page->pg_row_d;
WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
__wt_cell_unpack(cell, unpack);
switch (unpack->type) {
case WT_CELL_KEY_OVFL:
+ __wt_row_leaf_key_set_cell(page, rip, cell);
+ ++rip;
+ break;
+ case WT_CELL_KEY:
/*
- * If we've been preparing a fast-path to instantiating
- * leaf page keys, we have a problem, overflow keys make
- * that impossible. Restart without direct-key set.
+ * Simple keys without compression (not Huffman encoded
+ * or prefix compressed), can be directly referenced on
+ * the page to avoid repeatedly unpacking their cells.
*/
- if (direct_key) {
- direct_key = 0;
- goto restart;
- }
- /* FALLTHROUGH */
- case WT_CELL_KEY:
- if (direct_key)
- __wt_row_leaf_key_onpage_set(page, rip, unpack);
+ if (!btree->huffman_key && unpack->prefix == 0)
+ __wt_row_leaf_key_set(page, rip, unpack);
else
- __wt_row_leaf_key_onpage_set_cell(rip, cell);
+ __wt_row_leaf_key_set_cell(page, rip, cell);
++rip;
break;
case WT_CELL_VALUE:
@@ -701,13 +680,6 @@ restart:
}
/*
- * Set the direct access flag if we read the page's keys and found no
- * problems.
- */
- if (direct_key)
- F_SET_ATOMIC(page, WT_PAGE_DIRECT_KEY);
-
- /*
* We do not currently instantiate keys on leaf pages when the page is
* loaded, they're instantiated on demand.
*/
diff --git a/src/btree/bt_ret.c b/src/btree/bt_ret.c
index d7631a972ca..19fff653d3d 100644
--- a/src/btree/bt_ret.c
+++ b/src/btree/bt_ret.c
@@ -93,7 +93,7 @@ __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
* Take the value from the original page cell (which may be
* empty).
*/
- if ((cell = __wt_row_leaf_value(page, rip)) == NULL) {
+ if ((cell = __wt_row_leaf_value(page, rip, NULL)) == NULL) {
cursor->value.size = 0;
return (0);
}
diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c
index 7d38ee83875..5f898a9151d 100644
--- a/src/btree/bt_slvg.c
+++ b/src/btree/bt_slvg.c
@@ -1891,7 +1891,8 @@ err: WT_TRET(__wt_page_release(session, ref));
/*
* __slvg_row_merge_ovfl --
- * Free file blocks referenced from keys discarded from merged pages.
+ * Free file blocks referenced from key/value pairs discarded from merged
+ * pages.
*/
static int
__slvg_row_merge_ovfl(WT_SESSION_IMPL *session,
@@ -1900,20 +1901,17 @@ __slvg_row_merge_ovfl(WT_SESSION_IMPL *session,
WT_BM *bm;
WT_CELL *cell;
WT_CELL_UNPACK *unpack, _unpack;
- WT_IKEY *ikey;
WT_ROW *rip;
+ void *copy;
bm = S2BT(session)->bm;
unpack = &_unpack;
for (rip = page->pg_row_d + start; start < stop; ++start) {
- if (!F_ISSET_ATOMIC(page, WT_PAGE_DIRECT_KEY)) {
- ikey = WT_ROW_KEY_COPY(rip);
- if (__wt_off_page(page, ikey))
- cell =
- WT_PAGE_REF_OFFSET(page, ikey->cell_offset);
- else
- cell = (WT_CELL *)ikey;
+ copy = WT_ROW_KEY_COPY(rip);
+ (void)__wt_row_leaf_key_info(
+ page, copy, NULL, &cell, NULL, NULL);
+ if (cell != NULL) {
__wt_cell_unpack(cell, unpack);
if (unpack->type == WT_CELL_KEY_OVFL) {
WT_RET(__wt_verbose(session, WT_VERB_SALVAGE,
@@ -1924,12 +1922,12 @@ __slvg_row_merge_ovfl(WT_SESSION_IMPL *session,
__wt_addr_string(session, unpack->data,
unpack->size, trk->ss->tmp2)));
- WT_RET(bm->free(
- bm, session, unpack->data, unpack->size));
+ WT_RET(bm->free(
+ bm, session, unpack->data, unpack->size));
}
}
- if ((cell = __wt_row_leaf_value(page, rip)) == NULL)
+ if ((cell = __wt_row_leaf_value(page, rip, NULL)) == NULL)
continue;
__wt_cell_unpack(cell, unpack);
if (unpack->type == WT_CELL_VALUE_OVFL) {
diff --git a/src/btree/bt_vrfy_dsk.c b/src/btree/bt_vrfy_dsk.c
index 93a872754e4..6c4d28a03ce 100644
--- a/src/btree/bt_vrfy_dsk.c
+++ b/src/btree/bt_vrfy_dsk.c
@@ -200,7 +200,7 @@ __verify_dsk_row(
++cell_num;
/* Carefully unpack the cell. */
- if (__wt_cell_unpack_safe(NULL, cell, unpack, end) != 0) {
+ if (__wt_cell_unpack_safe(cell, unpack, end) != 0) {
ret = __err_cell_corrupted(session, cell_num, addr);
goto err;
}
@@ -467,7 +467,7 @@ __verify_dsk_col_int(
++cell_num;
/* Carefully unpack the cell. */
- if (__wt_cell_unpack_safe(NULL, cell, unpack, end) != 0)
+ if (__wt_cell_unpack_safe(cell, unpack, end) != 0)
return (__err_cell_corrupted(session, cell_num, addr));
/* Check the raw and collapsed cell types. */
@@ -534,7 +534,7 @@ __verify_dsk_col_var(
++cell_num;
/* Carefully unpack the cell. */
- if (__wt_cell_unpack_safe(NULL, cell, unpack, end) != 0)
+ if (__wt_cell_unpack_safe(cell, unpack, end) != 0)
return (__err_cell_corrupted(session, cell_num, addr));
/* Check the raw and collapsed cell types. */
diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c
index 417f71d4f72..db90e2119f9 100644
--- a/src/btree/rec_write.c
+++ b/src/btree/rec_write.c
@@ -4046,28 +4046,21 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
}
/*
- * Set the WT_IKEY reference (if the key was instantiated), and
- * the key cell reference, unpack the key cell.
+ * Figure out the key: set any cell reference (and unpack it),
+ * set any instantiated key reference.
*/
copy = WT_ROW_KEY_COPY(rip);
- if (F_ISSET_ATOMIC(page, WT_PAGE_DIRECT_KEY)) {
- ikey = NULL;
- cell = NULL;
+ (void)__wt_row_leaf_key_info(
+ page, copy, &ikey, &cell, NULL, NULL);
+ if (cell == NULL)
kpack = NULL;
- } else if (__wt_off_page(page, copy)) {
- ikey = copy;
- cell = WT_PAGE_REF_OFFSET(page, ikey->cell_offset);
- kpack = &_kpack;
- __wt_cell_unpack(cell, kpack);
- } else {
- ikey = NULL;
- cell = (WT_CELL *)copy;
+ else {
kpack = &_kpack;
__wt_cell_unpack(cell, kpack);
}
/* Unpack the on-page value cell, and look for an update. */
- if ((val_cell = __wt_row_leaf_value(page, rip)) == NULL)
+ if ((val_cell = __wt_row_leaf_value(page, rip, NULL)) == NULL)
vpack = NULL;
else {
vpack = &_vpack;
@@ -4256,22 +4249,17 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
r->ovfl_items = 1;
} else {
/*
- * Use a direct-key from the page, or
- * Use an already instantiated key, or
- * Use the key from the disk image, or
- * Build a key from a previous key, or
- * Instantiate the key from scratch.
+ * Get the key from the page or an instantiated key, or
+ * inline building the key from a previous key (it's a
+ * fast path for simple, prefix-compressed keys), or by
+ * by building the key from scratch.
*/
- if (kpack == NULL)
- __wt_row_leaf_direct(page, copy, tmpkey);
- else if (ikey != NULL) {
- tmpkey->data = WT_IKEY_DATA(ikey);
- tmpkey->size = ikey->size;
- } else if (btree->huffman_key == NULL &&
- kpack->type == WT_CELL_KEY && kpack->prefix == 0) {
- tmpkey->data = kpack->data;
- tmpkey->size = kpack->size;
- } else if (btree->huffman_key == NULL &&
+ if (__wt_row_leaf_key_info(page, copy,
+ NULL, &cell, &tmpkey->data, &tmpkey->size))
+ goto build;
+
+ __wt_cell_unpack(cell, kpack);
+ if (btree->huffman_key == NULL &&
kpack->type == WT_CELL_KEY &&
tmpkey->size >= kpack->prefix) {
/*
@@ -4283,10 +4271,10 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
WT_ASSERT(session, tmpkey->size != 0);
/*
- * Grow the buffer as necessary as well as
- * ensure data has been copied into local buffer
- * space, then append the suffix to the prefix
- * already in the buffer.
+ * Grow the buffer as necessary, ensuring data
+ * data has been copied into local buffer space,
+ * then append the suffix to the prefix already
+ * in the buffer.
*
* Don't grow the buffer unnecessarily or copy
* data we don't need, truncate the item's data
@@ -4301,7 +4289,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
} else
WT_ERR(__wt_row_leaf_key_copy(
session, page, rip, tmpkey));
-
+build:
WT_ERR(__rec_cell_build_leaf_key(session, r,
tmpkey->data, tmpkey->size, &ovfl_key));
}
diff --git a/src/btree/row_key.c b/src/btree/row_key.c
index e82140d509a..a816747b9cb 100644
--- a/src/btree/row_key.c
+++ b/src/btree/row_key.c
@@ -25,8 +25,7 @@ __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page)
btree = S2BT(session);
- if (page->pg_row_entries == 0 || /* Just checking... */
- F_ISSET_ATOMIC(page, WT_PAGE_DIRECT_KEY)) {
+ if (page->pg_row_entries == 0) { /* Just checking... */
F_SET_ATOMIC(page, WT_PAGE_BUILD_KEYS);
return (0);
}
@@ -132,6 +131,7 @@ __wt_row_leaf_key_work(WT_SESSION_IMPL *session,
{
enum { FORWARD, BACKWARD } direction;
WT_BTREE *btree;
+ WT_CELL *cell;
WT_CELL_UNPACK *unpack, _unpack;
WT_DECL_ITEM(tmp);
WT_DECL_RET;
@@ -140,7 +140,7 @@ __wt_row_leaf_key_work(WT_SESSION_IMPL *session,
size_t size;
u_int last_prefix;
int jump_slot_offset, slot_offset;
- void *key;
+ void *copy;
const void *p;
/*
@@ -149,7 +149,6 @@ __wt_row_leaf_key_work(WT_SESSION_IMPL *session,
* front-end, __wt_row_leaf_key, be careful if you're calling this code
* directly.
*/
- WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_DIRECT_KEY));
btree = S2BT(session);
unpack = &_unpack;
@@ -161,51 +160,68 @@ __wt_row_leaf_key_work(WT_SESSION_IMPL *session,
direction = BACKWARD;
for (slot_offset = 0;;) {
-jump_slot: key = WT_ROW_KEY_COPY(rip);
+ if (0) {
+switch_and_jump: /* Switching to a forward roll. */
+ WT_ASSERT(session, direction == BACKWARD);
+ direction = FORWARD;
+
+ /* Skip list of keys with compatible prefixes. */
+ rip = jump_rip;
+ slot_offset = jump_slot_offset;
+ }
+ copy = WT_ROW_KEY_COPY(rip);
/*
- * Key copied.
- *
- * If another thread instantiated the key while we were doing
- * that, we don't have any work to do. Figure this out using
- * the key's value:
- *
- * If the key points off-page, another thread updated the key,
- * we can just use it.
- *
- * If the key points on-page, we have a copy of a WT_CELL value
- * that can be processed, regardless of what any other thread is
- * doing.
- *
- * Overflow keys are not prefix-compressed, we don't want to
- * read/write them during reconciliation simply because their
- * prefix might change. That means we can't use instantiated
- * overflow keys to figure out the prefix for other keys,
- * specifically, in this code when we're looking for a key we
- * can roll-forward to figure out the target key's prefix,
- * instantiated overflow keys aren't useful.
- *
- * 1: the test for an on/off page reference.
+ * Figure out what the key looks like.
*/
- if (__wt_off_page(page, key)) {
-off_page: ikey = key;
+ (void)__wt_row_leaf_key_info(
+ page, copy, &ikey, &cell, &p, &size);
+
+ /* 1: the test for a directly referenced on-page key. */
+ if (cell == NULL) {
+ keyb->data = p;
+ keyb->size = size;
+
+ /*
+ * If this is the key we originally wanted, we don't
+ * care if we're rolling forward or backward, or if
+ * it's an overflow key or not, it's what we wanted.
+ * This shouldn't normally happen, the fast-path code
+ * that front-ends this function will have figured it
+ * out before we were called.
+ *
+ * The key doesn't need to be instantiated, skip past
+ * that test.
+ */
+ if (slot_offset == 0)
+ goto done;
+
+ /*
+ * This key is not an overflow key by definition and
+ * isn't compressed in any way, we can use it to roll
+ * forward.
+ * If rolling backward, switch directions.
+ * If rolling forward: there's a bug somewhere,
+ * we should have hit this key when rolling backward.
+ */
+ goto switch_and_jump;
+ }
+ /* 2: the test for an instantiated off-page key. */
+ if (ikey != NULL) {
/*
* If this is the key we originally wanted, we don't
* care if we're rolling forward or backward, or if
* it's an overflow key or not, it's what we wanted.
* Take a copy and wrap up.
+ *
+ * The key doesn't need to be instantiated, skip past
+ * that test.
*/
if (slot_offset == 0) {
- keyb->data = WT_IKEY_DATA(ikey);
- keyb->size = ikey->size;
-
- /*
- * The key is already instantiated, ignore the
- * caller's suggestion.
- */
- instantiate = 0;
- break;
+ keyb->data = p;
+ keyb->size = size;
+ goto done;
}
/*
@@ -218,8 +234,7 @@ off_page: ikey = key;
* done because prefixes skip overflow keys: keep
* rolling forward.
*/
- if (__wt_cell_type(WT_PAGE_REF_OFFSET(
- page, ikey->cell_offset)) == WT_CELL_KEY_OVFL)
+ if (__wt_cell_type(cell) == WT_CELL_KEY_OVFL)
goto next;
/*
@@ -233,16 +248,18 @@ off_page: ikey = key;
* In short: if it's not an overflow key, take a copy
* and roll forward.
*/
- keyb->data = WT_IKEY_DATA(ikey);
- keyb->size = ikey->size;
+ keyb->data = p;
+ keyb->size = size;
direction = FORWARD;
goto next;
}
- /* Unpack the key's cell. */
- __wt_cell_unpack(key, unpack);
+ /*
+ * It must be an on-page cell, unpack it.
+ */
+ __wt_cell_unpack(cell, unpack);
- /* 2: the test for an on-page reference to an overflow key. */
+ /* 3: the test for an on-page reference to an overflow key. */
if (unpack->type == WT_CELL_KEY_OVFL) {
/*
* If this is the key we wanted from the start, we don't
@@ -260,25 +277,23 @@ off_page: ikey = key;
* the tracking cache.
*/
if (slot_offset == 0) {
- WT_ERR(__wt_readlock(
- session, btree->ovfl_lock));
- key = WT_ROW_KEY_COPY(rip);
- if (__wt_off_page(page, key)) {
- WT_ERR(__wt_rwunlock(
- session, btree->ovfl_lock));
- goto off_page;
+ WT_ERR(
+ __wt_readlock(session, btree->ovfl_lock));
+ copy = WT_ROW_KEY_COPY(rip);
+ if (!__wt_row_leaf_key_info(page, copy,
+ NULL, &cell, &keyb->data, &keyb->size)) {
+ __wt_cell_unpack(cell, unpack);
+ ret = __wt_dsk_cell_data_ref(session,
+ WT_PAGE_ROW_LEAF, unpack, keyb);
}
- ret = __wt_dsk_cell_data_ref(
- session, WT_PAGE_ROW_LEAF, unpack, keyb);
- WT_TRET(__wt_rwunlock(
- session, btree->ovfl_lock));
+ WT_TRET(
+ __wt_rwunlock(session, btree->ovfl_lock));
WT_ERR(ret);
break;
}
/*
- * If we wanted a different key and this key is an
- * overflow key:
+ * If we wanted a different key:
* If we're rolling backward, this key is useless
* to us because it doesn't have a valid prefix: keep
* rolling backward.
@@ -290,11 +305,19 @@ off_page: ikey = key;
}
/*
- * 3: the test for an on-page reference to a key that isn't
+ * 4: the test for an on-page reference to a key that isn't
* prefix compressed.
*/
if (unpack->prefix == 0) {
/*
+ * The only reason to be here is a Huffman encoded key,
+ * a non-encoded key with no prefix compression should
+ * have been directly referenced, and we should not have
+ * needed to unpack its cell.
+ */
+ WT_ASSERT(session, btree->huffman_key != NULL);
+
+ /*
* If this is the key we originally wanted, we don't
* care if we're rolling forward or backward, it's
* what we want. Take a copy and wrap up.
@@ -306,47 +329,19 @@ off_page: ikey = key;
* If rolling forward there's a bug, we should have
* found this key while rolling backwards and switched
* directions then.
+ *
+ * The key doesn't need to be instantiated, skip past
+ * that test.
*/
- if (btree->huffman_key == NULL) {
- keyb->data = unpack->data;
- keyb->size = unpack->size;
- } else
- WT_ERR(__wt_dsk_cell_data_ref(
- session, WT_PAGE_ROW_LEAF, unpack, keyb));
-
- if (slot_offset == 0) {
- /*
- * If we have an uncompressed, on-page key with
- * no prefix, don't bother instantiating it,
- * regardless of what our caller thought. The
- * memory cost is greater than the performance
- * cost of finding the key each time we need it.
- */
- if (btree->huffman_key == NULL)
- instantiate = 0;
- break;
- }
-
- WT_ASSERT(session, direction == BACKWARD);
- direction = FORWARD;
-
- /*
- * Switching to the forward roll; skip over any list of
- * keys with compatible prefixes.
- */
- rip = jump_rip;
- slot_offset = jump_slot_offset;
-
- /*
- * I'm using an explicit branch instead of a continue,
- * it needs to be obvious that new code at the top of
- * this loop is problematical.
- */
- goto jump_slot;
+ WT_ERR(__wt_dsk_cell_data_ref(
+ session, WT_PAGE_ROW_LEAF, unpack, keyb));
+ if (slot_offset == 0)
+ goto done;
+ goto switch_and_jump;
}
/*
- * 4: an on-page reference to a key that's prefix compressed.
+ * 5: an on-page reference to a key that's prefix compressed.
* If rolling backward, keep looking for something we can
* use.
* If rolling forward, build the full key and keep rolling
@@ -436,10 +431,12 @@ next: switch (direction) {
* that half of the page).
*/
if (instantiate) {
- key = WT_ROW_KEY_COPY(rip_arg);
- if (!__wt_off_page(page, key)) {
+ copy = WT_ROW_KEY_COPY(rip_arg);
+ (void)__wt_row_leaf_key_info(
+ page, copy, &ikey, &cell, NULL, NULL);
+ if (ikey == NULL) {
WT_ERR(__wt_row_ikey(session,
- WT_PAGE_DISK_OFFSET(page, key),
+ WT_PAGE_DISK_OFFSET(page, cell),
keyb->data, keyb->size, &ikey));
/*
@@ -447,7 +444,7 @@ next: switch (direction) {
* update the page's memory footprint, on failure, free
* the allocated memory.
*/
- if (WT_ATOMIC_CAS(WT_ROW_KEY_COPY(rip), key, ikey))
+ if (WT_ATOMIC_CAS(WT_ROW_KEY_COPY(rip), copy, ikey))
__wt_cache_page_inmem_incr(session,
page, sizeof(WT_IKEY) + ikey->size);
else
@@ -455,6 +452,7 @@ next: switch (direction) {
}
}
+done:
err: __wt_scr_free(&tmp);
return (ret);
}
diff --git a/src/include/btmem.h b/src/include/btmem.h
index d9986e38d98..4056ec9ed08 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -531,12 +531,11 @@ struct __wt_page {
uint8_t type; /* Page type */
#define WT_PAGE_BUILD_KEYS 0x01 /* Keys have been built in memory */
-#define WT_PAGE_DIRECT_KEY 0x02 /* Row-store leaf keys direct access */
-#define WT_PAGE_DISK_ALLOC 0x04 /* Disk image in allocated memory */
-#define WT_PAGE_DISK_MAPPED 0x08 /* Disk image in mapped memory */
-#define WT_PAGE_EVICT_LRU 0x10 /* Page is on the LRU queue */
-#define WT_PAGE_SCANNING 0x20 /* Obsolete updates are being scanned */
-#define WT_PAGE_SPLITTING 0x40 /* An internal page is growing. */
+#define WT_PAGE_DISK_ALLOC 0x02 /* Disk image in allocated memory */
+#define WT_PAGE_DISK_MAPPED 0x04 /* Disk image in mapped memory */
+#define WT_PAGE_EVICT_LRU 0x08 /* Page is on the LRU queue */
+#define WT_PAGE_SCANNING 0x10 /* Obsolete updates are being scanned */
+#define WT_PAGE_SPLITTING 0x20 /* An internal page is growing. */
uint8_t flags_atomic; /* Atomic flags, use F_*_ATOMIC */
};
diff --git a/src/include/btree.i b/src/include/btree.i
index 688324f5e45..0f81103862f 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -486,7 +486,7 @@ __wt_ref_key_onpage_set(WT_PAGE *page, WT_REF *ref, WT_CELL_UNPACK *unpack)
/*
* See the comment in __wt_ref_key for an explanation of the magic.
*/
- v = (uint64_t)unpack->size << 32 |
+ v = (uintptr_t)unpack->size << 32 |
(uint32_t)WT_PAGE_DISK_OFFSET(page, unpack->data) << 1 |
0x01;
ref->key.ikey = (void *)v;
@@ -520,67 +520,124 @@ __wt_ref_key_clear(WT_REF *ref)
}
/*
- * __wt_row_leaf_direct --
- * Return an encoded row-store leaf page key.
+ * __wt_row_leaf_key_info --
+ * Return a row-store leaf page key referenced by a WT_ROW if it can be
+ * had without unpacking a cell, and information about the cell, if the key
+ * isn't cheaply available.
*/
-static inline void
-__wt_row_leaf_direct(WT_PAGE *page, void *ripkey, WT_ITEM *key)
+static inline int
+__wt_row_leaf_key_info(WT_PAGE *page, void *copy,
+ WT_IKEY **ikeyp, WT_CELL **cellp, void *datap, size_t *sizep)
{
+ WT_IKEY *ikey;
uintptr_t v;
+ v = (uintptr_t)copy;
+
/*
* A row-store leaf page key is in one of two places: if instantiated,
* the WT_ROW pointer references a WT_IKEY structure, otherwise, it
* references an on-page location. However, on-page keys are in one of
- * two states and the reference is in one of two forms: if a row-store
- * doesn't configure prefix compression or Huffman encoding, and there
- * were no overflow keys found when reading the page into memory (all
- * of which is likely, those are the default configurations), the key's
- * location and size was encoded in the pointer and a page flag was set.
- * If we found overflow keys or one of those features is configured, the
- * reference is to the key's on-page cell, which we'll unpack (we're
- * trying to avoid that cell unpack per key read in the fast path).
- * The test is if the page flag is set, we're done, it's an encoding;
- * otherwise, if the pointer is off-page it's an instantiated key, else
- * an on-page cell.
+ * two states: if the key is a simple key (not an overflow key, prefix
+ * compressed or Huffman encoded all of which are likely), the key's
+ * offset/size is encoded in the pointer. Otherwise, the offset is to
+ * the key's on-page cell.
+ *
+ * Now the magic: Any allocated memory will have a low-order bit of 0
+ * (the return from malloc must be aligned to store any standard type,
+ * and we assume there's always going to be a standard type requiring
+ * even-byte alignment). An on-page key consists of an offset/length
+ * pair. We can fit the maximum page size into 31 bits, so we use the
+ * low-order bit in the on-page value to flag the next 31 bits as a
+ * page offset and the other 32 bits as the key's length, not a WT_IKEY
+ * pointer. This breaks if allocation chunks aren't even-byte aligned.
*
- * This function cracks an encoded key and returns a real pointer. The
- * encoding magic is simpler than internal page key encoding because we
- * are using the page's flag rather than per-key information to decide
- * if the key is encoded. The key's page offset is the bottom 4B, and
- * the key size is the top 4B.
+ * To distinguish between an on-page key and an on-page cell, we set
+ * the size to 0 in the case on an on-page cell.
+ *
+ * Perform the tests in the order we think mostly probable, this call is
+ * all about speed.
+ *
+ * This function returns a list of things about the key (instantiation
+ * reference, cell reference, unpacked cell, and key/length pair). Our
+ * callers sometimes want some things, and sometimes others, we fill in
+ * the information we have based on the arguments we're passed; since
+ * this is an inlined function, we're depending on the compiler to drop
+ * code we don't need.
*/
- v = (uintptr_t)ripkey;
- key->data = WT_PAGE_REF_OFFSET(page, (v & 0xFFFFFFFF));
- key->size = v >> 32;
+
+ /* On-page key: no instantiated key, no cell. */
+ if (v & 0x01 && (v & 0xFFFFFFFF00000000) != 0) {
+ if (cellp != NULL)
+ *cellp = NULL;
+ if (ikeyp != NULL)
+ *ikeyp = NULL;
+ if (datap != NULL) {
+ *(void **)datap =
+ WT_PAGE_REF_OFFSET(page, (v & 0xFFFFFFFF) >> 1);
+ *sizep = v >> 32;
+ return (1);
+ }
+ return (0);
+ }
+
+ /* On-page cell: no instantiated key. */
+ if (v & 0x01) {
+ if (ikeyp != NULL)
+ *ikeyp = NULL;
+ if (cellp != NULL)
+ *cellp =
+ WT_PAGE_REF_OFFSET(page, (v & 0xFFFFFFFF) >> 1);
+ return (0);
+ }
+
+ /* Instantiated key. */
+ ikey = copy;
+ if (ikeyp != NULL)
+ *ikeyp = copy;
+ if (cellp != NULL)
+ *cellp = WT_PAGE_REF_OFFSET(page, ikey->cell_offset);
+ if (datap != NULL) {
+ *(void **)datap = WT_IKEY_DATA(ikey);
+ *sizep = ikey->size;
+ return (1);
+ }
+ return (0);
}
/*
- * __wt_row_leaf_key_onpage_set --
- * Set a WT_ROW to reference an on-page key.
+ * __wt_row_leaf_key_set --
+ * Set a WT_ROW to reference an on-page row-store leaf key.
*/
static inline void
-__wt_row_leaf_key_onpage_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *unpack)
+__wt_row_leaf_key_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *unpack)
{
uintptr_t v;
/*
- * See the comment in __wt_row_leaf_direct for an explanation of the
+ * See the comment in __wt_row_leaf_key_info for an explanation of the
* magic.
*/
v = (uintptr_t)unpack->size << 32 |
- (uint32_t)WT_PAGE_DISK_OFFSET(page, unpack->data);
+ (uint32_t)WT_PAGE_DISK_OFFSET(page, unpack->data) << 1 | 0x01;
WT_ROW_KEY_SET(rip, v);
}
/*
- * __wt_row_leaf_key_onpage_set_cell --
- * Set a WT_ROW to reference an on-page key's cell.
+ * __wt_row_leaf_key_set_cell --
+ * Set a WT_ROW to reference an on-page row-store leaf cell.
*/
static inline void
-__wt_row_leaf_key_onpage_set_cell(WT_ROW *rip, WT_CELL *cell)
+__wt_row_leaf_key_set_cell(WT_PAGE *page, WT_ROW *rip, WT_CELL *cell)
{
- WT_ROW_KEY_SET(rip, cell);
+ uintptr_t v;
+
+ /*
+ * See the comment in __wt_row_leaf_key_info for an explanation of the
+ * magic.
+ */
+ v = (uintptr_t)WT_PAGE_DISK_OFFSET(page, cell) << 1 | 0x01;
+ WT_ROW_KEY_SET(rip, v);
}
/*
@@ -592,13 +649,8 @@ static inline int
__wt_row_leaf_key(WT_SESSION_IMPL *session,
WT_PAGE *page, WT_ROW *rip, WT_ITEM *key, int instantiate)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
- WT_IKEY *ikey;
void *copy;
- btree = S2BT(session);
-
/*
* A front-end for __wt_row_leaf_key_work, here to inline fast paths.
*
@@ -606,36 +658,19 @@ __wt_row_leaf_key(WT_SESSION_IMPL *session,
*/
copy = WT_ROW_KEY_COPY(rip);
- /* First, check for an encoded key. */
- if (F_ISSET_ATOMIC(page, WT_PAGE_DIRECT_KEY)) {
- __wt_row_leaf_direct(page, copy, key);
- return (0);
- }
-
- /* Second, check for an instantiated key. */
- if (__wt_off_page(page, copy)) {
- ikey = copy;
- key->data = WT_IKEY_DATA(ikey);
- key->size = ikey->size;
- return (0);
- }
-
/*
- * Third, if the key isn't compressed or an overflow, unpack the cell
- * and take it from the page.
+ * All we handle here are on-page keys (which should be a common case),
+ * and instantiated keys (which start out rare, but become more common
+ * as a leaf page is searched, instantiating prefix-compressed keys).
*/
- if (btree->huffman_key == NULL) {
- __wt_cell_unpack(copy, &unpack);
- if (unpack.type == WT_CELL_KEY && unpack.prefix == 0) {
- key->data = unpack.data;
- key->size = unpack.size;
- return (0);
- }
- }
+ if (__wt_row_leaf_key_info(
+ page, copy, NULL, NULL, &key->data, &key->size))
+ return (0);
/*
- * We have to build the key (it's never been instantiated, and it's some
- * kind of compressed or overflow key).
+ * The alternative is an on-page cell with some kind of compressed or
+ * overflow key that's never been instantiated. Call the underlying
+ * worker function to figure it out.
*/
return (__wt_row_leaf_key_work(session, page, rip, key, instantiate));
}
@@ -673,48 +708,42 @@ __wt_cursor_row_leaf_key(WT_CURSOR_BTREE *cbt, WT_ITEM *key)
* NULL if there isn't one.
*/
static inline WT_CELL *
-__wt_row_leaf_value(WT_PAGE *page, WT_ROW *rip)
+__wt_row_leaf_value(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *kpack)
{
- WT_CELL *cell;
+ WT_CELL *kcell, *vcell;
WT_CELL_UNPACK unpack;
- WT_IKEY *ikey;
- void *copy;
- uintptr_t v;
+ void *copy, *key;
+ size_t size;
- /*
- * The row-store key can change underfoot; explicitly take a copy.
- */
- copy = WT_ROW_KEY_COPY(rip);
+ /* If we already have an unpacked key cell, use it. */
+ if (kpack != NULL)
+ vcell = (WT_CELL *)
+ ((uint8_t *)kpack->cell + __wt_cell_total_len(kpack));
+ else {
+ /*
+ * The row-store key can change underfoot; explicitly take a
+ * copy.
+ */
+ copy = WT_ROW_KEY_COPY(rip);
- /*
- * See the comment in __wt_row_leaf_direct for an explanation of the
- * magic; we know where the key is, step past it to the value's cell.
- */
- if (F_ISSET_ATOMIC(page, WT_PAGE_DIRECT_KEY)) {
- v = (uintptr_t)copy;
- cell = (WT_CELL *)
- ((uint8_t *)WT_PAGE_REF_OFFSET(page, (v & 0xFFFFFFFF)) +
- (v >> 32));
- return (__wt_cell_leaf_value_parse(page, cell));
+ /*
+ * Figure out where the key is, step past it to the value cell.
+ * The test for a cell not being set tells us that we have an
+ * on-page key, otherwise we're looking at an instantiated key
+ * or on-page cell, both of which require an unpack of the key's
+ * cell to find the value cell that follows.
+ */
+ if (__wt_row_leaf_key_info(
+ page, copy, NULL, &kcell, &key, &size) && kcell == NULL)
+ vcell = (WT_CELL *)((uint8_t *)key + size);
+ else {
+ __wt_cell_unpack(kcell, &unpack);
+ vcell = (WT_CELL *)((uint8_t *)
+ unpack.cell + __wt_cell_total_len(&unpack));
+ }
}
- /*
- * Cell now either references a WT_IKEY structure with a cell offset, or
- * references the on-page key WT_CELL. Both can be processed no matter
- * what other threads are doing. If it's the former, use it to get the
- * latter.
- */
- if (__wt_off_page(page, copy)) {
- ikey = copy;
- cell = WT_PAGE_REF_OFFSET(page, ikey->cell_offset);
- } else
- cell = copy;
-
- /* Unpack the key cell, then return its associated value cell. */
- __wt_cell_unpack(cell, &unpack);
- cell = (WT_CELL *)((uint8_t *)cell + __wt_cell_total_len(&unpack));
-
- return (__wt_cell_leaf_value_parse(page, cell));
+ return (__wt_cell_leaf_value_parse(page, vcell));
}
/*
diff --git a/src/include/cell.i b/src/include/cell.i
index 071206c4b49..f5303644bef 100644
--- a/src/include/cell.i
+++ b/src/include/cell.i
@@ -555,8 +555,7 @@ __wt_cell_leaf_value_parse(WT_PAGE *page, WT_CELL *cell)
* Unpack a WT_CELL into a structure during verification.
*/
static inline int
-__wt_cell_unpack_safe(
- WT_PAGE *page, WT_CELL *cell, WT_CELL_UNPACK *unpack, uint8_t *end)
+__wt_cell_unpack_safe(WT_CELL *cell, WT_CELL_UNPACK *unpack, uint8_t *end)
{
uint64_t saved_v, v;
uint32_t saved_len;
@@ -706,20 +705,6 @@ done: WT_CELL_LEN_CHK(cell, unpack->__len);
unpack->v = saved_v;
}
- /*
- * If we just unpacked a key cell for an in-memory page, set the value
- * field to the next cell, interpreting it as a value cell, so cursors
- * can return a key/value pair without unpacking the key cell multiple
- * times.
- *
- * !!!
- * This function is only called with a non-NULL page when unpacking a
- * row-store leaf page key, which is why we don't check further.
- */
- if (page != NULL) {
- cell = (WT_CELL *)((uint8_t *)cell + unpack->__len);
- unpack->value = __wt_cell_leaf_value_parse(page, cell);
- }
return (0);
}
@@ -730,23 +715,7 @@ done: WT_CELL_LEN_CHK(cell, unpack->__len);
static inline void
__wt_cell_unpack(WT_CELL *cell, WT_CELL_UNPACK *unpack)
{
- (void)__wt_cell_unpack_safe(NULL, cell, unpack, NULL);
-}
-
-/*
- * __wt_cell_unpack_with_value --
- * Unpack a WT_CELL into a structure, and check for an associated value.
- */
-static inline void
-__wt_cell_unpack_with_value(
- WT_PAGE *page, WT_CELL *cell, WT_CELL_UNPACK *unpack)
-{
- /*
- * This routine exists so we don't have pass in a NULL page reference
- * whenever we're unpacking cells from disk images (rather than from
- * in-memory pages).
- */
- (void)__wt_cell_unpack_safe(page, cell, unpack, NULL);
+ (void)__wt_cell_unpack_safe(cell, unpack, NULL);
}
/*
diff --git a/src/include/cursor.i b/src/include/cursor.i
index e3bb225cbcb..e3834fb95ee 100644
--- a/src/include/cursor.i
+++ b/src/include/cursor.i
@@ -178,7 +178,6 @@ __cursor_row_slot_return(WT_CURSOR_BTREE *cbt, WT_ROW *rip, WT_UPDATE *upd)
WT_ITEM *kb, *vb;
WT_CELL *cell;
WT_CELL_UNPACK *unpack, _unpack;
- WT_IKEY *ikey;
WT_PAGE *page;
WT_SESSION_IMPL *session;
int key_unpacked;
@@ -200,83 +199,75 @@ __cursor_row_slot_return(WT_CURSOR_BTREE *cbt, WT_ROW *rip, WT_UPDATE *upd)
copy = WT_ROW_KEY_COPY(rip);
/*
- * Get a reference to the key, ideally without doing a copy: we could
- * call __wt_row_leaf_key, but if a cursor is running through the tree,
- * we actually have more information here than that function has, we
- * may have the prefix-compressed key that comes immediately before the
- * one we want.
+ * Get a key: we could just call __wt_row_leaf_key, but as a cursor
+ * is running through the tree, we may have additional information
+ * here (we may have the fully-built key that's immediately before
+ * the prefix-compressed key one we want).
*
- * If the key can be accessed directly, or has been instantiated (the
- * key points off-page), we don't have any work to do.
- *
- * If the key points on-page, we have a copy of a WT_CELL value that can
- * be processed, regardless of what any other thread is doing.
+ * First, check for an immediately available key.
*/
- if (F_ISSET_ATOMIC(page, WT_PAGE_DIRECT_KEY))
- __wt_row_leaf_direct(page, copy, kb);
- else if (__wt_off_page(page, copy)) {
- ikey = copy;
- kb->data = WT_IKEY_DATA(ikey);
- kb->size = ikey->size;
+ if (__wt_row_leaf_key_info(
+ page, copy, NULL, &cell, &kb->data, &kb->size))
+ goto value;
+
+ /* Huffman encoded keys are a slow path in all cases. */
+ if (btree->huffman_key != NULL)
+ goto slow;
+
+ /*
+ * Unpack the cell and deal with overflow and prefix-compressed keys.
+ * Inline building simple prefix-compressed keys from a previous key,
+ * otherwise build from scratch.
+ */
+ __wt_cell_unpack(cell, unpack);
+ key_unpacked = 1;
+ if (unpack->type == WT_CELL_KEY &&
+ cbt->rip_saved != NULL && cbt->rip_saved == rip - 1) {
+ WT_ASSERT(session, cbt->tmp.size >= unpack->prefix);
+
+ /*
+ * Grow the buffer as necessary as well as ensure data has been
+ * copied into local buffer space, then append the suffix to the
+ * prefix already in the buffer.
+ *
+ * Don't grow the buffer unnecessarily or copy data we don't
+ * need, truncate the item's data length to the prefix bytes.
+ */
+ cbt->tmp.size = unpack->prefix;
+ WT_RET(__wt_buf_grow(
+ session, &cbt->tmp, cbt->tmp.size + unpack->size));
+ memcpy((uint8_t *)cbt->tmp.data + cbt->tmp.size,
+ unpack->data, unpack->size);
+ cbt->tmp.size += unpack->size;
} else {
/*
- * If the key is simple and on-page and not prefix-compressed,
- * or we have the previous expanded key in the cursor buffer,
- * reference or build it. Else, call __wt_row_leaf_key_work to
- * do it the hard way.
+ * Call __wt_row_leaf_key_work instead of __wt_row_leaf_key: we
+ * already did __wt_row_leaf_key's fast-path checks inline.
*/
- if (btree->huffman_key != NULL)
- goto slow;
- __wt_cell_unpack_with_value(page, copy, unpack);
- key_unpacked = 1;
- if (unpack->type == WT_CELL_KEY && unpack->prefix == 0) {
- cbt->tmp.data = unpack->data;
- cbt->tmp.size = unpack->size;
- } else if (unpack->type == WT_CELL_KEY &&
- cbt->rip_saved != NULL && cbt->rip_saved == rip - 1) {
- WT_ASSERT(session, cbt->tmp.size >= unpack->prefix);
-
- /*
- * Grow the buffer as necessary as well as ensure data
- * has been copied into local buffer space, then append
- * the suffix to the prefix already in the buffer.
- *
- * Don't grow the buffer unnecessarily or copy data we
- * don't need, truncate the item's data length to the
- * prefix bytes.
- */
- cbt->tmp.size = unpack->prefix;
- WT_RET(__wt_buf_grow(
- session, &cbt->tmp, cbt->tmp.size + unpack->size));
- memcpy((uint8_t *)cbt->tmp.data + cbt->tmp.size,
- unpack->data, unpack->size);
- cbt->tmp.size += unpack->size;
- } else {
- /*
- * __wt_row_leaf_key_work instead of __wt_row_leaf_key:
- * we do __wt_row_leaf_key's fast-path checks inline.
- */
-slow: WT_RET(__wt_row_leaf_key_work(
- session, page, rip, &cbt->tmp, 0));
- }
- kb->data = cbt->tmp.data;
- kb->size = cbt->tmp.size;
- cbt->rip_saved = rip;
+slow: WT_RET(
+ __wt_row_leaf_key_work(session, page, rip, &cbt->tmp, 0));
}
+ kb->data = cbt->tmp.data;
+ kb->size = cbt->tmp.size;
+ cbt->rip_saved = rip;
+value:
/*
- * If the item was ever modified, use the WT_UPDATE data. Note that
- * the caller passes us the update: it has already resolved which one
+ * If the item was ever modified, use the WT_UPDATE data. Note the
+ * caller passes us the update: it has already resolved which one
* (if any) is visible.
- * Else, check for empty data.
- * Else, use the value from the original disk image.
*/
if (upd != NULL) {
vb->data = WT_UPDATE_DATA(upd);
vb->size = upd->size;
return (0);
}
- cell = key_unpacked ? unpack->value : __wt_row_leaf_value(page, rip);
+
+ /*
+ * Else, find the value cell and check for empty data.
+ * Else, use the value from the original disk image.
+ */
+ cell = __wt_row_leaf_value(page, rip, key_unpacked ? unpack : NULL);
if (cell == NULL) {
vb->data = "";
vb->size = 0;