diff options
author | Michael Cahill <michael.cahill@wiredtiger.com> | 2012-10-19 14:45:07 +1100 |
---|---|---|
committer | Michael Cahill <michael.cahill@wiredtiger.com> | 2012-10-19 14:45:07 +1100 |
commit | a064c45bfc945152dc178d5c3a35889f87eddc9e (patch) | |
tree | f7d71cf1815d855021b649e0f493128faefde877 | |
parent | 9eb9313ee2594bc1208511b9416e5c52cd0d75af (diff) | |
download | mongo-a064c45bfc945152dc178d5c3a35889f87eddc9e.tar.gz |
Fix LSM index searches.
The main issue was LSM search_near was not always returning the closest key to
the search key, which calling code expects. It now tries hard to find the
smallest cursor larger than the search key, and only if no larger record exists
does it return the largest record smaller than the search key.
While in the area, clean up the index search code and make it slightly more
efficient.
-rw-r--r-- | src/cursor/cur_index.c | 24 | ||||
-rw-r--r-- | src/lsm/lsm_cursor.c | 43 |
2 files changed, 42 insertions, 25 deletions
diff --git a/src/cursor/cur_index.c b/src/cursor/cur_index.c index 52f9f01e26c..a3a34105e8c 100644 --- a/src/cursor/cur_index.c +++ b/src/cursor/cur_index.c @@ -191,31 +191,22 @@ __curindex_reset(WT_CURSOR *cursor) static int __curindex_search(WT_CURSOR *cursor) { + WT_CURSOR *child; WT_CURSOR_INDEX *cindex; WT_DECL_RET; - WT_ITEM *oldkeyp; WT_SESSION_IMPL *session; int exact; cindex = (WT_CURSOR_INDEX *)cursor; + child = cindex->child; CURSOR_API_CALL(cursor, session, search, NULL); /* * We expect partial matches, but we want the smallest item that * matches the prefix. Fail if there is no matching item. */ - - /* - * Take a copy of the search key. - * XXX - * We can avoid this with a cursor flag indicating when the application - * owns the data. - */ - WT_ERR(__wt_scr_alloc(session, cursor->key.size, &oldkeyp)); - memcpy(oldkeyp->mem, cursor->key.data, cursor->key.size); - oldkeyp->size = cursor->key.size; - - WT_ERR(cursor->search_near(cursor, &exact)); + __wt_cursor_set_raw_key(child, &cursor->key); + WT_ERR(child->search_near(child, &exact)); /* * We expect partial matches, and want the smallest record with a key @@ -225,10 +216,10 @@ __curindex_search(WT_CURSOR *cursor) * but we don't disallow that (odd) case. */ if (exact < 0) - WT_ERR(cursor->next(cursor)); + WT_ERR(child->next(child)); - if (cursor->key.size < oldkeyp->size || - memcmp(oldkeyp->data, cursor->key.data, oldkeyp->size) != 0) { + if (child->key.size < cursor->key.size || + memcmp(child->key.data, cursor->key.data, cursor->key.size) != 0) { ret = WT_NOTFOUND; goto err; } @@ -238,7 +229,6 @@ __curindex_search(WT_CURSOR *cursor) if (0) { err: F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); } - __wt_scr_free(&oldkeyp); API_END(session); return (ret); diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index cc10837cc81..a7897ba5c6a 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -236,10 +236,10 @@ __clsm_get_current( if (!F_ISSET(c, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET)) continue; if (current == NULL) { - cmp = (smallest ? -1 : 1); - } else - WT_RET(WT_LSM_CURCMP(session, - clsm->lsm_tree, c, current, cmp)); + current = c; + continue; + } + WT_RET(WT_LSM_CURCMP(session, clsm->lsm_tree, c, current, cmp)); if (smallest ? cmp < 0 : cmp > 0) { current = c; multiple = 0; @@ -510,6 +510,7 @@ __clsm_search(WT_CURSOR *cursor) WT_LSM_ENTER(clsm, cursor, session, search); WT_CURSOR_NEEDKEY(cursor); + F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV); FORALL_CURSORS(clsm, c, i) { /* If there is a Bloom filter, see if we can skip the read. */ if ((bloom = clsm->blooms[i]) != NULL) { @@ -568,6 +569,7 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp) WT_LSM_ENTER(clsm, cursor, session, search_near); WT_CURSOR_NEEDKEY(cursor); + F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV); /* * search_near is somewhat fiddly: we can't just return a nearby key @@ -599,6 +601,21 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp) } /* + * Prefer larger cursors. There are two reasons: (1) we expect + * prefix searches to be a common case (as in our own indices); + * and (2) we need a way to unambiguously know we have the + * "closest" result. + */ + if (cmp < 0) { + if ((ret = c->next(c)) == 0) + cmp = 1; + else if (ret == WT_NOTFOUND) + ret = c->prev(c); + if (ret != 0) + goto err; + } + + /* * If we land on a deleted item, try going forwards or * backwards to find one that isn't deleted. */ @@ -616,6 +633,16 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp) WT_ERR_NOTFOUND_OK(ret); if (deleted) continue; + + /* + * We are trying to find the smallest cursor greater than the + * search key, or, if there is no larger key, the largest + * cursor smaller than the search key. + * + * It could happen that one cursor contains both of the closest + * records. In that case, we will track it in "larger", and it + * will be the one we finally choose. + */ if (cmp > 0) { if (larger == NULL) larger = c; @@ -637,12 +664,12 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp) } } - if (smaller != NULL) { - clsm->current = smaller; - *exactp = -1; - } else if (larger != NULL) { + if (larger != NULL) { clsm->current = larger; *exactp = 1; + } else if (smaller != NULL) { + clsm->current = smaller; + *exactp = -1; } else ret = WT_NOTFOUND; |