diff options
Diffstat (limited to 'src/btree/row_srch.c')
-rw-r--r-- | src/btree/row_srch.c | 230 |
1 files changed, 9 insertions, 221 deletions
diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c index d4e82c458d4..9c3d467340e 100644 --- a/src/btree/row_srch.c +++ b/src/btree/row_srch.c @@ -486,14 +486,14 @@ leaf_only: if (insert && descend_right) { cbt->append_tree = 1; - if (page->pg_row_entries == 0) { - cbt->slot = WT_ROW_SLOT(page, page->pg_row_d); + if (page->entries == 0) { + cbt->slot = WT_ROW_SLOT(page, page->pg_row); F_SET(cbt, WT_CBT_SEARCH_SMALLEST); ins_head = WT_ROW_INSERT_SMALLEST(page); } else { cbt->slot = WT_ROW_SLOT(page, - page->pg_row_d + (page->pg_row_entries - 1)); + page->pg_row + (page->entries - 1)); ins_head = WT_ROW_INSERT_SLOT(page, cbt->slot); } @@ -511,11 +511,11 @@ leaf_only: * doing the tests and error handling inside the loop costs about 5%. */ base = 0; - limit = page->pg_row_entries; + limit = page->entries; if (collator == NULL && srch_key->size <= WT_COMPARE_SHORT_MAXLEN) for (; limit != 0; limit >>= 1) { indx = base + (limit >> 1); - rip = page->pg_row_d + indx; + rip = page->pg_row + indx; WT_ERR( __wt_row_leaf_key(session, page, rip, item, true)); @@ -529,7 +529,7 @@ leaf_only: else if (collator == NULL) for (; limit != 0; limit >>= 1) { indx = base + (limit >> 1); - rip = page->pg_row_d + indx; + rip = page->pg_row + indx; WT_ERR( __wt_row_leaf_key(session, page, rip, item, true)); @@ -547,7 +547,7 @@ leaf_only: else for (; limit != 0; limit >>= 1) { indx = base + (limit >> 1); - rip = page->pg_row_d + indx; + rip = page->pg_row + indx; WT_ERR( __wt_row_leaf_key(session, page, rip, item, true)); @@ -591,13 +591,13 @@ leaf_match: cbt->compare = 0; */ if (base == 0) { cbt->compare = 1; - cbt->slot = WT_ROW_SLOT(page, page->pg_row_d); + cbt->slot = WT_ROW_SLOT(page, page->pg_row); F_SET(cbt, WT_CBT_SEARCH_SMALLEST); ins_head = WT_ROW_INSERT_SMALLEST(page); } else { cbt->compare = -1; - cbt->slot = WT_ROW_SLOT(page, page->pg_row_d + (base - 1)); + cbt->slot = WT_ROW_SLOT(page, page->pg_row + (base - 1)); ins_head = WT_ROW_INSERT_SLOT(page, cbt->slot); } @@ -623,215 +623,3 @@ leaf_match: cbt->compare = 0; err: WT_TRET(__wt_page_release(session, current, 0)); return (ret); } - -/* - * __wt_row_random_leaf -- - * Return a random key from a row-store leaf page. - */ -int -__wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) -{ - WT_INSERT *ins, **start, **stop; - WT_INSERT_HEAD *ins_head; - WT_PAGE *page; - uint64_t samples; - uint32_t choice, entries, i; - int level; - - page = cbt->ref->page; - start = stop = NULL; /* [-Wconditional-uninitialized] */ - entries = 0; /* [-Wconditional-uninitialized] */ - - __cursor_pos_clear(cbt); - - /* If the page has disk-based entries, select from them. */ - if (page->pg_row_entries != 0) { - cbt->compare = 0; - cbt->slot = __wt_random(&session->rnd) % page->pg_row_entries; - - /* - * The real row-store search function builds the key, so we - * have to as well. - */ - return (__wt_row_leaf_key(session, - page, page->pg_row_d + cbt->slot, cbt->tmp, false)); - } - - /* - * If the tree is new (and not empty), it might have a large insert - * list. - * - * Walk down the list until we find a level with at least 50 entries, - * that's where we'll start rolling random numbers. The value 50 is - * used to ignore levels with only a few entries, that is, levels which - * are potentially badly skewed. - */ - F_SET(cbt, WT_CBT_SEARCH_SMALLEST); - if ((ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL) - return (WT_NOTFOUND); - for (level = WT_SKIP_MAXDEPTH - 1; level >= 0; --level) { - start = &ins_head->head[level]; - for (entries = 0, stop = start; - *stop != NULL; stop = &(*stop)->next[level]) - ++entries; - - if (entries > 50) - break; - } - - /* - * If it's a tiny list and we went all the way to level 0, correct the - * level; entries is correctly set. - */ - if (level < 0) - level = 0; - - /* - * Step down the skip list levels, selecting a random chunk of the name - * space at each level. - */ - for (samples = entries; level > 0; samples += entries) { - /* - * There are (entries) or (entries + 1) chunks of the name space - * considered at each level. They are: between start and the 1st - * element, between the 1st and 2nd elements, and so on to the - * last chunk which is the name space after the stop element on - * the current level. This last chunk of name space may or may - * not be there: as we descend the levels of the skip list, this - * chunk may appear, depending if the next level down has - * entries logically after the stop point in the current level. - * We can't ignore those entries: because of the algorithm used - * to determine the depth of a skiplist, there may be a large - * number of entries "revealed" by descending a level. - * - * If the next level down has more items after the current stop - * point, there are (entries + 1) chunks to consider, else there - * are (entries) chunks. - */ - if (*(stop - 1) == NULL) - choice = __wt_random(&session->rnd) % entries; - else - choice = __wt_random(&session->rnd) % (entries + 1); - - if (choice == entries) { - /* - * We selected the name space after the stop element on - * this level. Set the start point to the current stop - * point, descend a level and move the stop element to - * the end of the list, that is, the end of the newly - * discovered name space, counting entries as we go. - */ - start = stop; - --start; - --level; - for (entries = 0, stop = start; - *stop != NULL; stop = &(*stop)->next[level]) - ++entries; - } else { - /* - * We selected another name space on the level. Move the - * start pointer the selected number of entries forward - * to the start of the selected chunk (if the selected - * number is 0, start won't move). Set the stop pointer - * to the next element in the list and drop both start - * and stop down a level. - */ - for (i = 0; i < choice; ++i) - start = &(*start)->next[level]; - stop = &(*start)->next[level]; - - --start; - --stop; - --level; - - /* Count the entries in the selected name space. */ - for (entries = 0, - ins = *start; ins != *stop; ins = ins->next[level]) - ++entries; - } - } - - /* - * When we reach the bottom level, entries will already be set. Select - * a random entry from the name space and return it. - * - * It should be impossible for the entries count to be 0 at this point, - * but check for it out of paranoia and to quiet static testing tools. - */ - if (entries > 0) - entries = __wt_random(&session->rnd) % entries; - for (ins = *start; entries > 0; --entries) - ins = ins->next[0]; - - cbt->ins = ins; - cbt->ins_head = ins_head; - cbt->compare = 0; - - /* - * Random lookups in newly created collections can be slow if a page - * consists of a large skiplist. Schedule the page for eviction if we - * encounter a large skiplist. This worthwhile because applications - * that take a sample often take many samples, so the overhead of - * traversing the skip list each time accumulates to real time. - */ - if (samples > 5000) - __wt_page_evict_soon(session, cbt->ref); - - return (0); -} - -/* - * __wt_row_random_descent -- - * Find a random leaf page in a row-store tree. - */ -int -__wt_row_random_descent(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) -{ - WT_BTREE *btree; - WT_DECL_RET; - WT_PAGE *page; - WT_PAGE_INDEX *pindex; - WT_REF *current, *descent; - - btree = S2BT(session); - current = NULL; - - if (0) { -restart: /* - * Discard the currently held page and restart the search from - * the root. - */ - WT_RET(__wt_page_release(session, current, 0)); - } - - /* Search the internal pages of the tree. */ - current = &btree->root; - for (;;) { - page = current->page; - if (page->type != WT_PAGE_ROW_INT) - break; - - WT_INTL_INDEX_GET(session, page, pindex); - descent = pindex->index[ - __wt_random(&session->rnd) % pindex->entries]; - - /* - * Swap the current page for the child page. If the page splits - * while we're retrieving it, restart the search at the root. - * - * On other error, simply return, the swap call ensures we're - * holding nothing on failure. - */ - if ((ret = __wt_page_swap( - session, current, descent, WT_READ_RESTART_OK)) == 0) { - current = descent; - continue; - } - if (ret == WT_RESTART) - goto restart; - return (ret); - } - - cbt->ref = current; - return (0); -} |