From 93ddd8e84235ce3b780f6bd5a834be068a1cbe70 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 6 Apr 2016 17:04:39 -0400 Subject: WT-2505: Review clang analyzer warnings Fix variable set but never read. --- src/cursor/cur_join.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/cursor/cur_join.c') diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 38a83217933..66c254c181e 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -454,7 +454,6 @@ __curjoin_init_iter(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin) "join cursor has not yet been joined with any other " "cursors"); - je = &cjoin->entries[0]; jeend = &cjoin->entries[cjoin->entries_next]; /* -- cgit v1.2.1 From 1707b539730b25c00e15bc81a3247a200d72d625 Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Thu, 14 Apr 2016 15:42:51 -0400 Subject: WT-2360 A large set of changes to implement disjunctive and nested joins. The join iterator now manages iteration at 'one level' of the join, as subjoins are iterated, new iterators are created, forming a stack. Added internal documentation in cursor.h. --- src/cursor/cur_join.c | 816 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 589 insertions(+), 227 deletions(-) (limited to 'src/cursor/cur_join.c') diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 38a83217933..020821886cc 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -8,8 +8,35 @@ #include "wt_internal.h" +static int __curjoin_entries_in_range(WT_SESSION_IMPL *, WT_CURSOR_JOIN *, + WT_ITEM *, WT_CURSOR_JOIN_ITER *); +static int __curjoin_entry_iter_close(WT_CURSOR_JOIN_ITER *); +static int __curjoin_entry_iter_close_all(WT_CURSOR_JOIN_ITER *); +static bool __curjoin_entry_iter_ready(WT_CURSOR_JOIN_ITER *); +static int __curjoin_entry_in_range(WT_SESSION_IMPL *, WT_CURSOR_JOIN_ENTRY *, + WT_ITEM *, WT_CURSOR_JOIN_ITER *); +static int __curjoin_entry_member(WT_SESSION_IMPL *, WT_CURSOR_JOIN_ENTRY *, + WT_ITEM *, WT_CURSOR_JOIN_ITER *); static int __curjoin_insert_endpoint(WT_SESSION_IMPL *, WT_CURSOR_JOIN_ENTRY *, u_int, WT_CURSOR_JOIN_ENDPOINT **); +static int __curjoin_iter_set_entry(WT_CURSOR_JOIN_ITER *, u_int); + +#define WT_CURJOIN_ITER_CONSUMED(iter) \ + ((iter)->entry_pos >= (iter)->entry_count) + +/* + * __wt_curjoin_joined -- + * Produce an error that this cursor is being used in a join call. + */ +int +__wt_curjoin_joined(WT_CURSOR *cursor) +{ + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)cursor->session; + __wt_errx(session, "cursor is being used in a join"); + return (ENOTSUP); +} /* * __curjoin_entry_iter_init -- @@ -18,58 +45,92 @@ static int __curjoin_insert_endpoint(WT_SESSION_IMPL *, */ static int __curjoin_entry_iter_init(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, - WT_CURSOR_JOIN_ENTRY *entry, WT_CURSOR_JOIN_ITER **iterp) + WT_CURSOR_JOIN_ITER **iterp) +{ + WT_CURSOR_JOIN_ITER *iter; + + *iterp = NULL; + WT_RET(__wt_calloc_one(session, iterp)); + iter = *iterp; + iter->cjoin = cjoin; + iter->session = session; + cjoin->iter = iter; + WT_RET(__curjoin_iter_set_entry(iter, 0)); + return (0); +} + +/* + * __curjoin_iter_set_entry -- + * Set the current entry for an iterator. + * + */ +static int +__curjoin_iter_set_entry(WT_CURSOR_JOIN_ITER *iter, u_int entry_pos) { - WT_CURSOR *to_dup; + WT_CURSOR *c, *to_dup; + WT_CURSOR_JOIN *cjoin, *topjoin; + WT_CURSOR_JOIN_ENTRY *entry; WT_DECL_RET; + WT_SESSION_IMPL *session; + char *uri; const char *raw_cfg[] = { WT_CONFIG_BASE( - session, WT_SESSION_open_cursor), "raw", NULL }; + iter->session, WT_SESSION_open_cursor), "raw", NULL }; const char *def_cfg[] = { WT_CONFIG_BASE( - session, WT_SESSION_open_cursor), NULL }; - const char *urimain, **config; - char *mainbuf, *uri; - WT_CURSOR_JOIN_ITER *iter; + iter->session, WT_SESSION_open_cursor), NULL }; + const char **config; size_t size; - iter = NULL; - mainbuf = uri = NULL; - to_dup = entry->ends[0].cursor; + session = iter->session; + cjoin = iter->cjoin; + uri = NULL; + entry = iter->entry = &cjoin->entries[entry_pos]; + iter->positioned = false; + iter->entry_pos = entry_pos; + iter->end_pos = 0; - if (F_ISSET((WT_CURSOR *)cjoin, WT_CURSTD_RAW)) - config = &raw_cfg[0]; - else - config = &def_cfg[0]; + iter->is_equal = (entry->ends_next == 1 && + WT_CURJOIN_END_RANGE(&entry->ends[0]) == WT_CURJOIN_END_EQ); + iter->end_skip = (entry->ends_next > 0 && + WT_CURJOIN_END_RANGE(&entry->ends[0]) == WT_CURJOIN_END_GE) ? 1 : 0; + + iter->end_count = WT_MIN(1, entry->ends_next); + if (F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) { + iter->entry_count = cjoin->entries_next; + if (iter->is_equal) + iter->end_count = entry->ends_next; + } else + iter->entry_count = 1; + WT_ASSERT(iter->session, iter->entry_pos < iter->entry_count); - size = strlen(to_dup->internal_uri) + 3; - WT_ERR(__wt_calloc(session, size, 1, &uri)); - snprintf(uri, size, "%s()", to_dup->internal_uri); - urimain = cjoin->table->name; - if (cjoin->projection != NULL) { - size = strlen(urimain) + strlen(cjoin->projection) + 1; - WT_ERR(__wt_calloc(session, size, 1, &mainbuf)); - snprintf(mainbuf, size, "%s%s", urimain, cjoin->projection); - urimain = mainbuf; - } + entry->stats.actual_count = 0; - WT_ERR(__wt_calloc_one(session, &iter)); - WT_ERR(__wt_open_cursor(session, uri, (WT_CURSOR *)cjoin, config, - &iter->cursor)); - WT_ERR(__wt_cursor_dup_position(to_dup, iter->cursor)); - WT_ERR(__wt_open_cursor(session, urimain, (WT_CURSOR *)cjoin, config, - &iter->main)); - iter->cjoin = cjoin; - iter->session = session; - iter->entry = entry; - iter->positioned = false; - iter->isequal = (entry->ends_next == 1 && - WT_CURJOIN_END_RANGE(&entry->ends[0]) == WT_CURJOIN_END_EQ); - *iterp = iter; + if (entry->subjoin == NULL) { + for (topjoin = iter->cjoin; topjoin->parent != NULL; + topjoin = topjoin->parent) + ; + to_dup = entry->ends[0].cursor; - if (0) { -err: __wt_free(session, iter); + if (F_ISSET((WT_CURSOR *)topjoin, WT_CURSTD_RAW)) + config = &raw_cfg[0]; + else + config = &def_cfg[0]; + + size = strlen(to_dup->internal_uri) + 3; + WT_ERR(__wt_calloc(session, size, 1, &uri)); + snprintf(uri, size, "%s()", to_dup->internal_uri); + if ((c = iter->cursor) == NULL || !WT_STREQ(c->uri, uri)) { + iter->cursor = NULL; + if (c != NULL) + WT_ERR(c->close(c)); + WT_ERR(__wt_open_cursor(session, uri, + (WT_CURSOR *)topjoin, config, &iter->cursor)); + } + WT_ERR(__wt_cursor_dup_position(to_dup, iter->cursor)); + } else if (iter->cursor != NULL) { + iter->cursor->close(iter->cursor); + iter->cursor = NULL; } - __wt_free(session, mainbuf); - __wt_free(session, uri); +err: __wt_free(session, uri); return (ret); } @@ -94,6 +155,45 @@ __curjoin_pack_recno(WT_SESSION_IMPL *session, uint64_t r, uint8_t *buf, return (0); } +/* + * __curjoin_entry_iter_bump -- + * Called to advance the iterator to the next endpoint, + * which may in turn advance to the next entry. + * + */ +static int +__curjoin_entry_iter_bump(WT_CURSOR_JOIN_ITER *iter) +{ + WT_CURSOR_JOIN_ENTRY *entry; + WT_SESSION_IMPL *session; + + session = iter->session; + iter->positioned = false; + entry = iter->entry; + if (entry->subjoin == NULL && iter->is_equal && + ++iter->end_pos < iter->end_count) { + WT_RET(__wt_cursor_dup_position( + entry->ends[iter->end_pos].cursor, iter->cursor)); + return (0); + } + iter->end_pos = iter->end_count = iter->end_skip = 0; + if (entry->subjoin != NULL && entry->subjoin->iter != NULL) + WT_RET(__curjoin_entry_iter_close_all(entry->subjoin->iter)); + + if (++iter->entry_pos >= iter->entry_count) { + iter->entry = NULL; + return (0); + } + iter->entry = ++entry; + if (entry->subjoin != NULL) { + WT_RET(__curjoin_entry_iter_init(session, entry->subjoin, + &iter->child)); + return (0); + } + WT_RET(__curjoin_iter_set_entry(iter, iter->entry_pos)); + return (0); +} + /* * __curjoin_split_key -- * Copy the primary key from a cursor (either main table or index) @@ -156,11 +256,54 @@ __curjoin_split_key(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, static int __curjoin_entry_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_CURSOR *cursor) { - if (iter->positioned) - WT_RET(iter->cursor->next(iter->cursor)); - else + WT_CURSOR_JOIN *cjoin; + WT_CURSOR_JOIN_ENTRY *entry; + WT_DECL_RET; + WT_SESSION_IMPL *session; + + cjoin = iter->cjoin; + session = iter->session; + + if (WT_CURJOIN_ITER_CONSUMED(iter)) + return (WT_NOTFOUND); +again: + entry = iter->entry; + if (entry->subjoin != NULL) { + if (iter->child == NULL) + WT_RET(__curjoin_entry_iter_init(session, + entry->subjoin, &iter->child)); + ret = __curjoin_entry_iter_next(iter->child, cursor); + if (ret == 0) { + /* The child did the work, we're done. */ + iter->curkey = &cursor->key; + iter->positioned = true; + return (ret); + } + else if (ret == WT_NOTFOUND) { + WT_RET(__curjoin_entry_iter_close_all(iter->child)); + entry->subjoin->iter = NULL; + iter->child = NULL; + WT_RET(__curjoin_entry_iter_bump(iter)); + ret = 0; + } + } else if (iter->positioned) { + ret = iter->cursor->next(iter->cursor); + if (ret == WT_NOTFOUND) { + WT_RET(__curjoin_entry_iter_bump(iter)); + ret = 0; + } else + WT_RET(ret); + } else iter->positioned = true; + if (WT_CURJOIN_ITER_CONSUMED(iter)) + return (WT_NOTFOUND); + + if (!__curjoin_entry_iter_ready(iter)) + goto again; + + WT_RET(ret); + /* * Set our key to the primary key, we'll also need this * to check membership. @@ -182,26 +325,27 @@ __curjoin_entry_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_CURSOR *cursor) static int __curjoin_entry_iter_reset(WT_CURSOR_JOIN_ITER *iter) { - if (iter->positioned) { - WT_RET(iter->cursor->reset(iter->cursor)); - WT_RET(iter->main->reset(iter->main)); - WT_RET(__wt_cursor_dup_position( - iter->cjoin->entries[0].ends[0].cursor, iter->cursor)); - iter->positioned = false; - iter->entry->stats.actual_count = 0; - } + if (iter->child != NULL) + WT_RET(__curjoin_entry_iter_close_all(iter->child)); + WT_RET(__curjoin_iter_set_entry(iter, 0)); + iter->positioned = false; return (0); } /* * __curjoin_entry_iter_ready -- - * The iterator is positioned. + * Check the positioned flag for all nested iterators. * */ static bool __curjoin_entry_iter_ready(WT_CURSOR_JOIN_ITER *iter) { - return (iter->positioned); + while (iter != NULL) { + if (!iter->positioned) + return (false); + iter = iter->child; + } + return (true); } /* @@ -216,10 +360,30 @@ __curjoin_entry_iter_close(WT_CURSOR_JOIN_ITER *iter) if (iter->cursor != NULL) WT_TRET(iter->cursor->close(iter->cursor)); - if (iter->main != NULL) - WT_TRET(iter->main->close(iter->main)); __wt_free(iter->session, iter); + return (ret); +} + +/* + * __curjoin_entry_iter_close_all -- + * Free the iterator and all of its children recursively. + * + */ +static int +__curjoin_entry_iter_close_all(WT_CURSOR_JOIN_ITER *iter) +{ + WT_CURSOR_JOIN *parent; + WT_DECL_RET; + if (iter->child) + WT_TRET(__curjoin_entry_iter_close_all(iter->child)); + iter->child = NULL; + WT_ASSERT(iter->session, iter->cjoin->parent == NULL || + iter->cjoin->parent->iter->child == iter); + if ((parent = iter->cjoin->parent) != NULL) + parent->iter->child = NULL; + iter->cjoin->iter = NULL; + WT_TRET(__curjoin_entry_iter_close(iter)); return (ret); } @@ -238,10 +402,10 @@ __curjoin_get_key(WT_CURSOR *cursor, ...) cjoin = (WT_CURSOR_JOIN *)cursor; va_start(ap, cursor); - CURSOR_API_CALL(cursor, session, get_key, NULL); + JOINABLE_CURSOR_API_CALL(cursor, session, get_key, NULL); if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED) || - !__curjoin_entry_iter_ready(cjoin->iter)) + !cjoin->iter->positioned) WT_ERR_MSG(session, EINVAL, "join cursor must be advanced with next()"); WT_ERR(__wt_cursor_get_keyv(cursor, cursor->flags, ap)); @@ -258,23 +422,21 @@ static int __curjoin_get_value(WT_CURSOR *cursor, ...) { WT_CURSOR_JOIN *cjoin; - WT_CURSOR_JOIN_ITER *iter; WT_DECL_RET; WT_SESSION_IMPL *session; va_list ap; cjoin = (WT_CURSOR_JOIN *)cursor; - iter = cjoin->iter; va_start(ap, cursor); - CURSOR_API_CALL(cursor, session, get_value, NULL); + JOINABLE_CURSOR_API_CALL(cursor, session, get_value, NULL); if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED) || - !__curjoin_entry_iter_ready(iter)) + !cjoin->iter->positioned) WT_ERR_MSG(session, EINVAL, "join cursor must be advanced with next()"); - WT_ERR(__wt_curtable_get_valuev(iter->main, ap)); + WT_ERR(__wt_curtable_get_valuev(cjoin->main, ap)); err: va_end(ap); API_END_RET(session, ret); @@ -354,7 +516,34 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, for (end = &entry->ends[skip]; end < endmax; end++) { WT_ERR(__wt_compare(session, collator, &curkey, &end->key, &cmp)); - if (!F_ISSET(end, WT_CURJOIN_END_LT)) { + if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) { + /* if condition satisfied, insert immediately */ + switch (WT_CURJOIN_END_RANGE(end)) { + case WT_CURJOIN_END_EQ: + if (cmp == 0) + goto insert; + break; + case WT_CURJOIN_END_GT: + if (cmp > 0) { + /* skip this check next time */ + skip = entry->ends_next; + goto insert; + } + break; + case WT_CURJOIN_END_GE: + if (cmp >= 0) + goto insert; + break; + case WT_CURJOIN_END_LT: + if (cmp < 0) + goto insert; + break; + case WT_CURJOIN_END_LE: + if (cmp <= 0) + goto insert; + break; + } + } else if (!F_ISSET(end, WT_CURJOIN_END_LT)) { if (cmp < 0 || (cmp == 0 && !F_ISSET(end, WT_CURJOIN_END_EQ))) goto advance; @@ -370,6 +559,14 @@ __curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, goto done; } } + /* + * Either it's a disjunction that hasn't satisfied any + * condition, or it's a conjunction that has satisfied all + * conditions. + */ + if (F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) + goto advance; +insert: if (entry->index != NULL) { curvalue.data = (unsigned char *)curkey.data + curkey.size; @@ -432,69 +629,87 @@ __curjoin_endpoint_init_key(WT_SESSION_IMPL *session, } /* - * __curjoin_init_iter -- - * Initialize before any iteration. + * __curjoin_init_next -- + * Initialize the cursor join when the next function is first called. */ static int -__curjoin_init_iter(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin) +__curjoin_init_next(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, + bool iterable) { WT_BLOOM *bloom; WT_DECL_RET; WT_CURSOR *origcur; WT_CURSOR_JOIN_ENTRY *je, *jeend, *je2; WT_CURSOR_JOIN_ENDPOINT *end; + char *mainbuf; const char *def_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_open_cursor), NULL }; const char *raw_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_open_cursor), "raw", NULL }; - uint32_t f, k; + const char **config, *proj, *urimain; + uint32_t f, k, size; + mainbuf = NULL; if (cjoin->entries_next == 0) WT_RET_MSG(session, EINVAL, "join cursor has not yet been joined with any other " "cursors"); + if (F_ISSET((WT_CURSOR *)cjoin, WT_CURSTD_RAW)) + config = &raw_cfg[0]; + else + config = &def_cfg[0]; + urimain = cjoin->table->name; + if ((proj = cjoin->projection) != NULL) { + size = strlen(urimain) + strlen(proj) + 1; + WT_ERR(__wt_calloc(session, size, 1, &mainbuf)); + snprintf(mainbuf, size, "%s%s", urimain, proj); + urimain = mainbuf; + } + WT_ERR(__wt_open_cursor(session, urimain, (WT_CURSOR *)cjoin, config, + &cjoin->main)); + je = &cjoin->entries[0]; jeend = &cjoin->entries[cjoin->entries_next]; - /* - * For a single compare=le endpoint in the first iterated entry, - * construct a companion compare=ge endpoint that will actually - * be iterated. - */ - if (((je = cjoin->entries) != jeend) && - je->ends_next == 1 && F_ISSET(&je->ends[0], WT_CURJOIN_END_LT)) { - origcur = je->ends[0].cursor; - WT_RET(__curjoin_insert_endpoint(session, je, 0, &end)); - WT_RET(__wt_open_cursor(session, origcur->uri, - (WT_CURSOR *)cjoin, - F_ISSET(origcur, WT_CURSTD_RAW) ? raw_cfg : def_cfg, - &end->cursor)); - WT_RET(end->cursor->next(end->cursor)); - end->flags = WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ | - WT_CURJOIN_END_OWN_CURSOR; - } - WT_RET(__curjoin_entry_iter_init(session, cjoin, je, &cjoin->iter)); - for (je = cjoin->entries; je < jeend; je++) { + if (je->subjoin != NULL) { + WT_ERR(__curjoin_init_next(session, je->subjoin, + iterable)); + continue; + } __wt_stat_join_init_single(&je->stats); + /* + * For a single compare=le/lt endpoint in any entry that may + * be iterated, construct a companion compare=ge endpoint + * that will actually be iterated. + */ + if (iterable && je->ends_next == 1 && + F_ISSET(&je->ends[0], WT_CURJOIN_END_LT)) { + origcur = je->ends[0].cursor; + WT_ERR(__curjoin_insert_endpoint(session, je, 0, &end)); + WT_ERR(__wt_open_cursor(session, origcur->uri, + (WT_CURSOR *)cjoin, + F_ISSET(origcur, WT_CURSTD_RAW) ? raw_cfg : def_cfg, + &end->cursor)); + end->flags = WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ | + WT_CURJOIN_END_OWN_CURSOR; + WT_ERR(end->cursor->next(end->cursor)); + F_CLR(je, WT_CURJOIN_ENTRY_DISJUNCTION); + } for (end = &je->ends[0]; end < &je->ends[je->ends_next]; end++) - WT_RET(__curjoin_endpoint_init_key(session, je, end)); + WT_ERR(__curjoin_endpoint_init_key(session, je, end)); /* - * The first entry is iterated as the 'outermost' cursor. - * For the common GE case, we don't have to test against - * the left reference key, we know it will be true since - * the btree is ordered. + * Do any needed Bloom filter initialization. Ignore Bloom + * filters for entries that will be iterated. They won't + * help since these entries either don't need an inclusion + * check or are doing any needed check during the iteration. */ - if (je == cjoin->entries && je->ends[0].flags == - (WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ)) - F_SET(cjoin, WT_CURJOIN_SKIP_FIRST_LEFT); - - if (F_ISSET(je, WT_CURJOIN_ENTRY_BLOOM)) { + if (!iterable && F_ISSET(je, WT_CURJOIN_ENTRY_BLOOM)) { if (session->txn.isolation == WT_ISO_READ_UNCOMMITTED) - WT_RET_MSG(session, EINVAL, + WT_ERR_MSG(session, EINVAL, "join cursors with Bloom filters cannot be " "used with read-uncommitted isolation"); if (je->bloom == NULL) { @@ -516,10 +731,10 @@ __curjoin_init_iter(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin) } je->bloom_bit_count = f; je->bloom_hash_count = k; - WT_RET(__wt_bloom_create(session, NULL, + WT_ERR(__wt_bloom_create(session, NULL, NULL, je->count, f, k, &je->bloom)); F_SET(je, WT_CURJOIN_ENTRY_OWN_BLOOM); - WT_RET(__curjoin_init_bloom(session, cjoin, + WT_ERR(__curjoin_init_bloom(session, cjoin, je, je->bloom)); /* * Share the Bloom filter, making all @@ -541,22 +756,61 @@ __curjoin_init_iter(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin) * merge into the shared one. The Bloom * parameters of the two filters must match. */ - WT_RET(__wt_bloom_create(session, NULL, + WT_ERR(__wt_bloom_create(session, NULL, NULL, je->count, je->bloom_bit_count, je->bloom_hash_count, &bloom)); - WT_RET(__curjoin_init_bloom(session, cjoin, + WT_ERR(__curjoin_init_bloom(session, cjoin, je, bloom)); - WT_RET(__wt_bloom_intersection(je->bloom, + WT_ERR(__wt_bloom_intersection(je->bloom, bloom)); - WT_RET(__wt_bloom_close(bloom)); + WT_ERR(__wt_bloom_close(bloom)); } } + if (!F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) + iterable = false; } - F_SET(cjoin, WT_CURJOIN_INITIALIZED); + +err: __wt_free(session, mainbuf); return (ret); } +/* + * __curjoin_entries_in_range -- + * Check if a key is in the range specified by the remaining entries, + * returning WT_NOTFOUND if not. + */ +static int +__curjoin_entries_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, + WT_ITEM *curkey, WT_CURSOR_JOIN_ITER *iterarg) +{ + WT_CURSOR_JOIN_ENTRY *entry; + WT_CURSOR_JOIN_ITER *iter; + WT_DECL_RET; + int fastret, slowret; + u_int pos; + + iter = iterarg; + if (F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) { + fastret = 0; + slowret = WT_NOTFOUND; + } else { + fastret = WT_NOTFOUND; + slowret = 0; + } + pos = (iter == NULL ? 0 : iter->entry_pos); + for (entry = &cjoin->entries[pos]; pos < cjoin->entries_next; + entry++, pos++) { + ret = __curjoin_entry_member(session, entry, curkey, iter); + if (ret == fastret) + return (fastret); + if (ret != slowret) + WT_ERR(ret); + iter = NULL; + } +err: return (ret == 0 ? slowret : ret); +} + /* * __curjoin_entry_in_range -- * Check if a key is in the range specified by the entry, returning @@ -564,32 +818,80 @@ __curjoin_init_iter(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin) */ static int __curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, - WT_ITEM *curkey, bool skip_left) + WT_ITEM *curkey, WT_CURSOR_JOIN_ITER *iter) { WT_COLLATOR *collator; WT_CURSOR_JOIN_ENDPOINT *end, *endmax; + bool disjunction, passed; int cmp; + u_int pos; collator = (entry->index != NULL) ? entry->index->collator : NULL; endmax = &entry->ends[entry->ends_next]; - for (end = &entry->ends[skip_left ? 1 : 0]; end < endmax; end++) { + disjunction = F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION); + + /* + * The iterator may have already satisfied some endpoint conditions. + * If so and we're a disjunction, we're done. If so and we're a + * conjunction, we can start past the satisfied conditions. + */ + if (iter == NULL) + pos = 0; + else { + if (disjunction && iter->end_skip) + return (0); + pos = iter->end_pos + iter->end_skip; + } + + for (end = &entry->ends[pos]; end < endmax; end++) { WT_RET(__wt_compare(session, collator, curkey, &end->key, &cmp)); - if (!F_ISSET(end, WT_CURJOIN_END_LT)) { - if (cmp < 0 || - (cmp == 0 && - !F_ISSET(end, WT_CURJOIN_END_EQ)) || - (cmp > 0 && !F_ISSET(end, WT_CURJOIN_END_GT))) - WT_RET(WT_NOTFOUND); - } else { - if (cmp > 0 || - (cmp == 0 && - !F_ISSET(end, WT_CURJOIN_END_EQ)) || - (cmp < 0 && !F_ISSET(end, WT_CURJOIN_END_LT))) - WT_RET(WT_NOTFOUND); + switch (WT_CURJOIN_END_RANGE(end)) { + case WT_CURJOIN_END_EQ: + passed = (cmp == 0); + break; + + case WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ: + passed = (cmp >= 0); + WT_ASSERT(session, iter == NULL); + break; + + case WT_CURJOIN_END_GT: + passed = (cmp > 0); + if (passed && iter != NULL && pos == 0) + iter->end_skip = 1; + break; + + case WT_CURJOIN_END_LT | WT_CURJOIN_END_EQ: + passed = (cmp <= 0); + break; + + case WT_CURJOIN_END_LT: + passed = (cmp < 0); + break; + + default: + WT_RET(__wt_illegal_value(session, NULL)); + break; } + + if (!passed) { + if (iter != NULL && + (iter->is_equal || + F_ISSET(end, WT_CURJOIN_END_LT))) { + WT_RET(__curjoin_entry_iter_bump(iter)); + return (WT_NOTFOUND); + } + if (!disjunction) + return (WT_NOTFOUND); + iter = NULL; + } else if (disjunction) + break; } - return (0); + if (disjunction && end == endmax) + return (WT_NOTFOUND); + else + return (0); } typedef struct { @@ -643,8 +945,8 @@ __curjoin_extract_insert(WT_CURSOR *cursor) { * if not a member, returns WT_NOTFOUND. */ static int -__curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, - WT_CURSOR_JOIN_ENTRY *entry, bool skip_left) +__curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, + WT_ITEM *key, WT_CURSOR_JOIN_ITER *iter) { WT_CURJOIN_EXTRACTOR extract_cursor; WT_CURSOR *c; @@ -667,12 +969,15 @@ __curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, __wt_cursor_notsup); /* close */ WT_DECL_RET; WT_INDEX *idx; - WT_ITEM *key, v; + WT_ITEM v; bool bloom_found; - if (skip_left && entry->ends_next == 1) + if (entry->subjoin == NULL && iter != NULL && + (iter->end_pos + iter->end_skip >= entry->ends_next || + (iter->end_skip > 0 && + F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)))) return (0); /* no checks to make */ - key = cjoin->iter->curkey; + entry->stats.accesses++; bloom_found = false; @@ -693,14 +998,26 @@ __curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_ERR(__wt_bloom_inmem_get(entry->bloom, key)); bloom_found = true; } + if (entry->subjoin != NULL) { + WT_ASSERT(session, + iter == NULL || entry->subjoin == iter->child->cjoin); + ret = __curjoin_entries_in_range(session, entry->subjoin, + key, iter == NULL ? NULL : iter->child); + if (iter != NULL && + WT_CURJOIN_ITER_CONSUMED(iter->child)) { + WT_ERR(__curjoin_entry_iter_bump(iter)); + ret = WT_NOTFOUND; + } + return (ret); + } if (entry->index != NULL) { /* * If this entry is used by the iterator, then we already - * have the index key, and we won't have to do any extraction - * either. + * have the index key, and we won't have to do any + * extraction either. */ - if (entry == cjoin->iter->entry) - WT_ITEM_SET(v, cjoin->iter->idxkey); + if (iter != NULL && entry == iter->entry) + WT_ITEM_SET(v, iter->idxkey); else { memset(&v, 0, sizeof(v)); /* Keep lint quiet. */ c = entry->main; @@ -717,7 +1034,7 @@ __curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_ITEM_SET(v, *key); if ((idx = entry->index) != NULL && idx->extractor != NULL && - entry != cjoin->iter->entry) { + (iter == NULL || entry != iter->entry)) { WT_CLEAR(extract_cursor); extract_cursor.iface = iface; extract_cursor.iface.session = &session->iface; @@ -729,7 +1046,7 @@ __curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, if (!extract_cursor.ismember) WT_ERR(WT_NOTFOUND); } else - WT_ERR(__curjoin_entry_in_range(session, entry, &v, skip_left)); + WT_ERR(__curjoin_entry_in_range(session, entry, &v, iter)); if (0) { err: if (ret == WT_NOTFOUND && bloom_found) @@ -750,48 +1067,29 @@ __curjoin_next(WT_CURSOR *cursor) WT_CURSOR_JOIN_ITER *iter; WT_DECL_RET; WT_SESSION_IMPL *session; - bool skip_left; - u_int i; + int tret; cjoin = (WT_CURSOR_JOIN *)cursor; - CURSOR_API_CALL(cursor, session, next, NULL); + JOINABLE_CURSOR_API_CALL(cursor, session, next, NULL); if (F_ISSET(cjoin, WT_CURJOIN_ERROR)) WT_ERR_MSG(session, WT_ERROR, "join cursor encountered previous error"); if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED)) - WT_ERR(__curjoin_init_iter(session, cjoin)); - - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + WT_ERR(__curjoin_init_next(session, cjoin, true)); + if (cjoin->iter == NULL) + WT_ERR(__curjoin_entry_iter_init(session, cjoin, &cjoin->iter)); iter = cjoin->iter; + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); -nextkey: - if ((ret = __curjoin_entry_iter_next(iter, cursor)) == 0) { - F_SET(cursor, WT_CURSTD_KEY_EXT); - - /* - * We may have already established membership for the - * 'left' case for the first entry, since we're - * using that in our iteration. - */ - skip_left = F_ISSET(cjoin, WT_CURJOIN_SKIP_FIRST_LEFT); - for (i = 0; i < cjoin->entries_next; i++) { - ret = __curjoin_entry_member(session, cjoin, - &cjoin->entries[i], skip_left); - if (ret == WT_NOTFOUND) { - /* - * If this is compare=eq on our outer iterator, - * and we've moved past it, we're done. - */ - if (iter->isequal && i == 0) - break; - goto nextkey; - } - skip_left = false; - WT_ERR(ret); - } - } else if (ret != WT_NOTFOUND) + while ((ret = __curjoin_entry_iter_next(iter, cursor)) == 0) { + if ((ret = __curjoin_entries_in_range(session, cjoin, + iter->curkey, iter)) != WT_NOTFOUND) + break; + } + iter->positioned = (ret == 0); + if (ret != 0 && ret != WT_NOTFOUND) WT_ERR(ret); if (ret == 0) { @@ -799,12 +1097,13 @@ nextkey: * Position the 'main' cursor, this will be used to * retrieve values from the cursor join. */ - c = iter->main; + c = cjoin->main; c->set_key(c, iter->curkey); - if ((ret = c->search(c)) != 0) - WT_ERR(c->search(c)); + WT_ERR(c->search(c)); F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); - } + } else if (ret == WT_NOTFOUND && + (tret = __curjoin_entry_iter_close_all(iter)) != 0) + WT_ERR(tret); if (0) { err: F_SET(cjoin, WT_CURJOIN_ERROR); @@ -825,9 +1124,9 @@ __curjoin_reset(WT_CURSOR *cursor) cjoin = (WT_CURSOR_JOIN *)cursor; - CURSOR_API_CALL(cursor, session, reset, NULL); + JOINABLE_CURSOR_API_CALL(cursor, session, reset, NULL); - if (F_ISSET(cjoin, WT_CURJOIN_INITIALIZED)) + if (cjoin->iter != NULL) WT_ERR(__curjoin_entry_iter_reset(cjoin->iter)); err: API_END_RET(session, ret); @@ -849,7 +1148,7 @@ __curjoin_close(WT_CURSOR *cursor) cjoin = (WT_CURSOR_JOIN *)cursor; - CURSOR_API_CALL(cursor, session, close, NULL); + JOINABLE_CURSOR_API_CALL(cursor, session, close, NULL); __wt_schema_release_table(session, cjoin->table); /* These are owned by the table */ @@ -862,6 +1161,10 @@ __curjoin_close(WT_CURSOR *cursor) for (entry = cjoin->entries, i = 0; i < cjoin->entries_next; entry++, i++) { + if (entry->subjoin != NULL) { + F_CLR(&entry->subjoin->iface, WT_CURSTD_JOINED); + entry->subjoin->parent = NULL; + } if (entry->main != NULL) WT_TRET(entry->main->close(entry->main)); if (F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM)) @@ -877,7 +1180,10 @@ __curjoin_close(WT_CURSOR *cursor) } if (cjoin->iter != NULL) - WT_TRET(__curjoin_entry_iter_close(cjoin->iter)); + WT_TRET(__curjoin_entry_iter_close_all(cjoin->iter)); + if (cjoin->main != NULL) + WT_TRET(cjoin->main->close(cjoin->main)); + __wt_free(session, cjoin->entries); WT_TRET(__wt_cursor_close(cursor)); @@ -967,6 +1273,52 @@ err: WT_TRET(__curjoin_close(cursor)); return (ret); } +/* + * __curjoin_open_main -- + * For the given index, open the main file with a projection + * that is the index keys. + */ +static int +__curjoin_open_main(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, + WT_CURSOR_JOIN_ENTRY *entry) +{ + WT_DECL_RET; + WT_INDEX *idx; + char *main_uri, *newformat; + const char *raw_cfg[] = { WT_CONFIG_BASE( + session, WT_SESSION_open_cursor), "raw", NULL }; + size_t len, newsize; + + main_uri = NULL; + idx = entry->index; + + newsize = strlen(cjoin->table->name) + idx->colconf.len + 1; + WT_ERR(__wt_calloc(session, 1, newsize, &main_uri)); + snprintf(main_uri, newsize, "%s%.*s", + cjoin->table->name, (int)idx->colconf.len, + idx->colconf.str); + WT_ERR(__wt_open_cursor(session, main_uri, + (WT_CURSOR *)cjoin, raw_cfg, &entry->main)); + if (idx->extractor == NULL) { + /* + * Add no-op padding so trailing 'u' formats are not + * transformed to 'U'. This matches what happens in + * the index. We don't do this when we have an + * extractor, extractors already use the padding + * byte trick. + */ + len = strlen(entry->main->value_format) + 3; + WT_ERR(__wt_calloc(session, len, 1, &newformat)); + snprintf(newformat, len, "%s0x", + entry->main->value_format); + __wt_free(session, entry->main->value_format); + entry->main->value_format = newformat; + } + +err: __wt_free(session, main_uri); + return (ret); +} + /* * __wt_curjoin_join -- * Add a new join to a join cursor. @@ -979,31 +1331,51 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_CURSOR_INDEX *cindex; WT_CURSOR_JOIN_ENDPOINT *end; WT_CURSOR_JOIN_ENTRY *entry; + WT_CURSOR_JOIN *child; WT_DECL_RET; - bool hasins, needbloom, range_eq; - char *main_uri, *newformat; - const char *raw_cfg[] = { WT_CONFIG_BASE( - session, WT_SESSION_open_cursor), "raw", NULL }; - size_t len, newsize; + bool hasins, needbloom, nested, range_eq; + size_t len; u_int i, ins, nonbloom; + uint8_t endrange; entry = NULL; hasins = needbloom = false; ins = 0; /* -Wuninitialized */ - main_uri = NULL; nonbloom = 0; /* -Wuninitialized */ - for (i = 0; i < cjoin->entries_next; i++) { - if (cjoin->entries[i].index == idx) { - entry = &cjoin->entries[i]; - break; - } - if (!needbloom && i > 0 && - !F_ISSET(&cjoin->entries[i], WT_CURJOIN_ENTRY_BLOOM)) { - needbloom = true; - nonbloom = i; + if (cjoin->entries_next == 0) { + if (LF_ISSET(WT_CURJOIN_ENTRY_DISJUNCTION)) + F_SET(cjoin, WT_CURJOIN_DISJUNCTION); + } else if (LF_ISSET(WT_CURJOIN_ENTRY_DISJUNCTION) && + !F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) + WT_ERR_MSG(session, EINVAL, + "operation=or does not match previous operation=and"); + else if (!LF_ISSET(WT_CURJOIN_ENTRY_DISJUNCTION) && + F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) + WT_ERR_MSG(session, EINVAL, + "operation=and does not match previous operation=or"); + + nested = WT_PREFIX_MATCH(ref_cursor->uri, "join:"); + if (!nested) + for (i = 0; i < cjoin->entries_next; i++) { + if (cjoin->entries[i].index == idx && + cjoin->entries[i].subjoin == NULL) { + entry = &cjoin->entries[i]; + break; + } + if (!needbloom && i > 0 && + !F_ISSET(&cjoin->entries[i], + WT_CURJOIN_ENTRY_BLOOM)) { + needbloom = true; + nonbloom = i; + } } + else { + if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM)) + WT_ERR_MSG(session, EINVAL, + "Bloom filters cannot be used with subjoins"); } + if (entry == NULL) { WT_ERR(__wt_realloc_def(session, &cjoin->entries_allocated, cjoin->entries_next + 1, &cjoin->entries)); @@ -1063,17 +1435,18 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, for (i = 0; i < entry->ends_next; i++) { end = &entry->ends[i]; range_eq = (range == WT_CURJOIN_END_EQ); + endrange = WT_CURJOIN_END_RANGE(end); if ((F_ISSET(end, WT_CURJOIN_END_GT) && ((range & WT_CURJOIN_END_GT) != 0 || range_eq)) || (F_ISSET(end, WT_CURJOIN_END_LT) && ((range & WT_CURJOIN_END_LT) != 0 || range_eq)) || - (WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ && + (endrange == WT_CURJOIN_END_EQ && (range & (WT_CURJOIN_END_LT | WT_CURJOIN_END_GT)) != 0)) WT_ERR_MSG(session, EINVAL, "join has overlapping ranges"); if (range == WT_CURJOIN_END_EQ && - WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ && + endrange == WT_CURJOIN_END_EQ && !F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) WT_ERR_MSG(session, EINVAL, "compare=eq can only be combined " @@ -1086,6 +1459,7 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, if (!hasins && ((range & WT_CURJOIN_END_GT) != 0 || (range == WT_CURJOIN_END_EQ && + endrange != WT_CURJOIN_END_EQ && !F_ISSET(end, WT_CURJOIN_END_GT)))) { ins = i; hasins = true; @@ -1098,50 +1472,38 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, entry->bloom_hash_count = WT_MAX(entry->bloom_hash_count, bloom_hash_count); } - WT_ERR(__curjoin_insert_endpoint(session, entry, - hasins ? ins : entry->ends_next, &end)); - end->cursor = ref_cursor; - F_SET(end, range); - - /* Open the main file with a projection of the indexed columns. */ - if (entry->main == NULL && idx != NULL) { - newsize = strlen(cjoin->table->name) + idx->colconf.len + 1; - WT_ERR(__wt_calloc(session, 1, newsize, &main_uri)); - snprintf(main_uri, newsize, "%s%.*s", - cjoin->table->name, (int)idx->colconf.len, - idx->colconf.str); - WT_ERR(__wt_open_cursor(session, main_uri, - (WT_CURSOR *)cjoin, raw_cfg, &entry->main)); - if (idx->extractor == NULL) { + if (nested) { + child = (WT_CURSOR_JOIN *)ref_cursor; + entry->subjoin = child; + child->parent = cjoin; + } else { + WT_ERR(__curjoin_insert_endpoint(session, entry, + hasins ? ins : entry->ends_next, &end)); + end->cursor = ref_cursor; + F_SET(end, range); + + if (entry->main == NULL && idx != NULL) { /* - * Add no-op padding so trailing 'u' formats are not - * transformed to 'U'. This matches what happens in - * the index. We don't do this when we have an - * extractor, extractors already use the padding - * byte trick. + * Open the main file with a projection of the + * indexed columns. */ - len = strlen(entry->main->value_format) + 3; - WT_ERR(__wt_calloc(session, len, 1, &newformat)); - snprintf(newformat, len, "%s0x", - entry->main->value_format); - __wt_free(session, entry->main->value_format); - entry->main->value_format = newformat; - } + WT_ERR(__curjoin_open_main(session, cjoin, entry)); - /* - * When we are repacking index keys to remove the primary - * key, we never want to transform trailing 'u'. Use no-op - * padding to force this. - */ - cindex = (WT_CURSOR_INDEX *)ref_cursor; - len = strlen(cindex->iface.key_format) + 3; - WT_ERR(__wt_calloc(session, len, 1, &entry->repack_format)); - snprintf(entry->repack_format, len, "%s0x", - cindex->iface.key_format); + /* + * When we are repacking index keys to remove the + * primary key, we never want to transform trailing + * 'u'. Use no-op padding to force this. + */ + cindex = (WT_CURSOR_INDEX *)ref_cursor; + len = strlen(cindex->iface.key_format) + 3; + WT_ERR(__wt_calloc(session, len, 1, + &entry->repack_format)); + snprintf(entry->repack_format, len, "%s0x", + cindex->iface.key_format); + } } -err: __wt_free(session, main_uri); - return (ret); +err: return (ret); } /* -- cgit v1.2.1 From 7ea2715b83458d2700520603d144185973ee9106 Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Thu, 14 Apr 2016 16:58:18 -0400 Subject: WT-2360 Removed an unneeded variable. --- src/cursor/cur_join.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/cursor/cur_join.c') diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 7d0b271f6e7..3d542841026 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -256,12 +256,10 @@ __curjoin_split_key(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, static int __curjoin_entry_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_CURSOR *cursor) { - WT_CURSOR_JOIN *cjoin; WT_CURSOR_JOIN_ENTRY *entry; WT_DECL_RET; WT_SESSION_IMPL *session; - cjoin = iter->cjoin; session = iter->session; if (WT_CURJOIN_ITER_CONSUMED(iter)) -- cgit v1.2.1 From 359f88e25d55e0da57c9a858391066b89de13585 Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Thu, 14 Apr 2016 17:07:57 -0400 Subject: WT-2360 Fixed uninitialized variable. --- src/cursor/cur_join.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/cursor/cur_join.c') diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 3d542841026..530d60de2c7 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -825,6 +825,7 @@ __curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, collator = (entry->index != NULL) ? entry->index->collator : NULL; endmax = &entry->ends[entry->ends_next]; disjunction = F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION); + passed = false; /* * The iterator may have already satisfied some endpoint conditions. -- cgit v1.2.1 From d34e4163cc23ad66476acf1e0c16de4af89a7c74 Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Mon, 18 Apr 2016 10:16:09 -0400 Subject: WT-2549 In a join cursor, a key that passes the conditions is in 'raw' format, and when the join cursor is not raw, it must be converted to 64bit in the recno case. --- src/cursor/cur_join.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/cursor/cur_join.c') diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 66c254c181e..1ee26716212 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -751,6 +751,7 @@ __curjoin_next(WT_CURSOR *cursor) WT_SESSION_IMPL *session; bool skip_left; u_int i; + const uint8_t *p; cjoin = (WT_CURSOR_JOIN *)cursor; @@ -799,7 +800,14 @@ nextkey: * retrieve values from the cursor join. */ c = iter->main; - c->set_key(c, iter->curkey); + if (WT_CURSOR_RECNO(cursor) && + !F_ISSET(cursor, WT_CURSTD_RAW)) { + p = (const uint8_t *)iter->curkey->data; + WT_ERR(__wt_vunpack_uint(&p, iter->curkey->size, + &cjoin->iface.recno)); + c->set_key(c, cjoin->iface.recno); + } else + c->set_key(c, iter->curkey); if ((ret = c->search(c)) != 0) WT_ERR(c->search(c)); F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); -- cgit v1.2.1 From 235aba3c301a366cc28efe0c862d66b80894233c Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 21 Apr 2016 07:16:27 -0400 Subject: WT-2570: Minor lint cleanups. Don't jump to an error label if there's no work to do on exit. --- src/cursor/cur_join.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'src/cursor/cur_join.c') diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 60d7825249c..55689fee6f0 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -1338,7 +1338,6 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_CURSOR_JOIN_ENDPOINT *end; WT_CURSOR_JOIN_ENTRY *entry; WT_CURSOR_JOIN *child; - WT_DECL_RET; bool hasins, needbloom, nested, range_eq; size_t len; u_int i, ins, nonbloom; @@ -1346,19 +1345,18 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, entry = NULL; hasins = needbloom = false; - ins = 0; /* -Wuninitialized */ - nonbloom = 0; /* -Wuninitialized */ + ins = nonbloom = 0; /* -Wuninitialized */ if (cjoin->entries_next == 0) { if (LF_ISSET(WT_CURJOIN_ENTRY_DISJUNCTION)) F_SET(cjoin, WT_CURJOIN_DISJUNCTION); } else if (LF_ISSET(WT_CURJOIN_ENTRY_DISJUNCTION) && !F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) - WT_ERR_MSG(session, EINVAL, + WT_RET_MSG(session, EINVAL, "operation=or does not match previous operation=and"); else if (!LF_ISSET(WT_CURJOIN_ENTRY_DISJUNCTION) && F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) - WT_ERR_MSG(session, EINVAL, + WT_RET_MSG(session, EINVAL, "operation=and does not match previous operation=or"); nested = WT_PREFIX_MATCH(ref_cursor->uri, "join:"); @@ -1378,12 +1376,12 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, } else { if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM)) - WT_ERR_MSG(session, EINVAL, + WT_RET_MSG(session, EINVAL, "Bloom filters cannot be used with subjoins"); } if (entry == NULL) { - WT_ERR(__wt_realloc_def(session, &cjoin->entries_allocated, + WT_RET(__wt_realloc_def(session, &cjoin->entries_allocated, cjoin->entries_next + 1, &cjoin->entries)); if (LF_ISSET(WT_CURJOIN_ENTRY_BLOOM) && needbloom) { /* @@ -1412,13 +1410,13 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, } else { /* Merge the join into an existing entry for this index */ if (count != 0 && entry->count != 0 && entry->count != count) - WT_ERR_MSG(session, EINVAL, + WT_RET_MSG(session, EINVAL, "count=%" PRIu64 " does not match " "previous count=%" PRIu64 " for this index", count, entry->count); if (LF_MASK(WT_CURJOIN_ENTRY_BLOOM) != F_MASK(entry, WT_CURJOIN_ENTRY_BLOOM)) - WT_ERR_MSG(session, EINVAL, + WT_RET_MSG(session, EINVAL, "join has incompatible strategy " "values for the same index"); @@ -1449,12 +1447,12 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, (endrange == WT_CURJOIN_END_EQ && (range & (WT_CURJOIN_END_LT | WT_CURJOIN_END_GT)) != 0)) - WT_ERR_MSG(session, EINVAL, + WT_RET_MSG(session, EINVAL, "join has overlapping ranges"); if (range == WT_CURJOIN_END_EQ && endrange == WT_CURJOIN_END_EQ && !F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)) - WT_ERR_MSG(session, EINVAL, + WT_RET_MSG(session, EINVAL, "compare=eq can only be combined " "using operation=or"); @@ -1483,7 +1481,7 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, entry->subjoin = child; child->parent = cjoin; } else { - WT_ERR(__curjoin_insert_endpoint(session, entry, + WT_RET(__curjoin_insert_endpoint(session, entry, hasins ? ins : entry->ends_next, &end)); end->cursor = ref_cursor; F_SET(end, range); @@ -1493,7 +1491,7 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, * Open the main file with a projection of the * indexed columns. */ - WT_ERR(__curjoin_open_main(session, cjoin, entry)); + WT_RET(__curjoin_open_main(session, cjoin, entry)); /* * When we are repacking index keys to remove the @@ -1502,14 +1500,13 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, */ cindex = (WT_CURSOR_INDEX *)ref_cursor; len = strlen(cindex->iface.key_format) + 3; - WT_ERR(__wt_calloc(session, len, 1, + WT_RET(__wt_calloc(session, len, 1, &entry->repack_format)); snprintf(entry->repack_format, len, "%s0x", cindex->iface.key_format); } } - -err: return (ret); + return (0); } /* -- cgit v1.2.1 From d7b4bfe4daa6a34d34dfd9004a5bac1f99e82915 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 21 Apr 2016 07:18:03 -0400 Subject: WT-2570: Minor lint cleanups. Don't ignore the return from the close function. --- src/cursor/cur_join.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/cursor/cur_join.c') diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 55689fee6f0..4d0354c9f09 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -127,9 +127,10 @@ __curjoin_iter_set_entry(WT_CURSOR_JOIN_ITER *iter, u_int entry_pos) } WT_ERR(__wt_cursor_dup_position(to_dup, iter->cursor)); } else if (iter->cursor != NULL) { - iter->cursor->close(iter->cursor); + WT_ERR(iter->cursor->close(iter->cursor)); iter->cursor = NULL; } + err: __wt_free(session, uri); return (ret); } -- cgit v1.2.1 From 59fd2970abb5169462f18779999f750f7c39d069 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 21 Apr 2016 07:19:36 -0400 Subject: WT-2570: Minor lint cleanups. Don't jump to an error label if there's no work to do (this one isn't so obviuos, but it was easy to avoid the lint complaint). --- src/cursor/cur_join.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/cursor/cur_join.c') diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 4d0354c9f09..0ba94079d63 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -796,17 +796,18 @@ __curjoin_entries_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, fastret = WT_NOTFOUND; slowret = 0; } - pos = (iter == NULL ? 0 : iter->entry_pos); + pos = iter == NULL ? 0 : iter->entry_pos; for (entry = &cjoin->entries[pos]; pos < cjoin->entries_next; entry++, pos++) { ret = __curjoin_entry_member(session, entry, curkey, iter); if (ret == fastret) return (fastret); if (ret != slowret) - WT_ERR(ret); + break; iter = NULL; } -err: return (ret == 0 ? slowret : ret); + + return (ret == 0 ? slowret : ret); } /* -- cgit v1.2.1 From b5dc11b5ebd4c3ac63c3dbcb4c6cad2448469e89 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Fri, 22 Apr 2016 12:15:14 +1000 Subject: WT-2360 Fix a build warning. cursor/cur_join.c:664:41: error: implicit conversion loses integer precision: 'unsigned long' to 'uint32_t' (aka 'unsigned int') [-Werror,-Wshorten-64-to-32] --- src/cursor/cur_join.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/cursor/cur_join.c') diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 0ba94079d63..0388a05a34b 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -647,7 +647,8 @@ __curjoin_init_next(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, const char *raw_cfg[] = { WT_CONFIG_BASE( session, WT_SESSION_open_cursor), "raw", NULL }; const char **config, *proj, *urimain; - uint32_t f, k, size; + size_t size; + uint32_t f, k; mainbuf = NULL; if (cjoin->entries_next == 0) -- cgit v1.2.1 From 76c952043ae961ffe3674408f3ebc6f4ccb612c1 Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Fri, 22 Apr 2016 14:15:02 -0400 Subject: WT-2571 Cosmetic changes only. 1) Renamed internal functions to use shorter, clearer names. 2) Reordered functions more sensibly. --- src/cursor/cur_join.c | 1338 +++++++++++++++++++++++++------------------------ 1 file changed, 671 insertions(+), 667 deletions(-) (limited to 'src/cursor/cur_join.c') diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 0388a05a34b..d03e53cf5ab 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -10,17 +10,21 @@ static int __curjoin_entries_in_range(WT_SESSION_IMPL *, WT_CURSOR_JOIN *, WT_ITEM *, WT_CURSOR_JOIN_ITER *); -static int __curjoin_entry_iter_close(WT_CURSOR_JOIN_ITER *); -static int __curjoin_entry_iter_close_all(WT_CURSOR_JOIN_ITER *); -static bool __curjoin_entry_iter_ready(WT_CURSOR_JOIN_ITER *); static int __curjoin_entry_in_range(WT_SESSION_IMPL *, WT_CURSOR_JOIN_ENTRY *, WT_ITEM *, WT_CURSOR_JOIN_ITER *); static int __curjoin_entry_member(WT_SESSION_IMPL *, WT_CURSOR_JOIN_ENTRY *, WT_ITEM *, WT_CURSOR_JOIN_ITER *); static int __curjoin_insert_endpoint(WT_SESSION_IMPL *, WT_CURSOR_JOIN_ENTRY *, u_int, WT_CURSOR_JOIN_ENDPOINT **); +static int __curjoin_iter_close(WT_CURSOR_JOIN_ITER *); +static int __curjoin_iter_close_all(WT_CURSOR_JOIN_ITER *); +static bool __curjoin_iter_ready(WT_CURSOR_JOIN_ITER *); static int __curjoin_iter_set_entry(WT_CURSOR_JOIN_ITER *, u_int); - +static int __curjoin_pack_recno(WT_SESSION_IMPL *, uint64_t, uint8_t *, + size_t, WT_ITEM *); +static int __curjoin_split_key(WT_SESSION_IMPL *, WT_CURSOR_JOIN *, WT_ITEM *, + WT_CURSOR *, WT_CURSOR *, const char *, bool); + #define WT_CURJOIN_ITER_CONSUMED(iter) \ ((iter)->entry_pos >= (iter)->entry_count) @@ -39,12 +43,12 @@ __wt_curjoin_joined(WT_CURSOR *cursor) } /* - * __curjoin_entry_iter_init -- + * __curjoin_iter_init -- * Initialize an iteration for the index managed by a join entry. * */ static int -__curjoin_entry_iter_init(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, +__curjoin_iter_init(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_CURSOR_JOIN_ITER **iterp) { WT_CURSOR_JOIN_ITER *iter; @@ -59,6 +63,76 @@ __curjoin_entry_iter_init(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, return (0); } +/* + * __curjoin_iter_close -- + * Close the iteration, release resources. + * + */ +static int +__curjoin_iter_close(WT_CURSOR_JOIN_ITER *iter) +{ + WT_DECL_RET; + + if (iter->cursor != NULL) + WT_TRET(iter->cursor->close(iter->cursor)); + __wt_free(iter->session, iter); + return (ret); +} + +/* + * __curjoin_iter_close_all -- + * Free the iterator and all of its children recursively. + * + */ +static int +__curjoin_iter_close_all(WT_CURSOR_JOIN_ITER *iter) +{ + WT_CURSOR_JOIN *parent; + WT_DECL_RET; + + if (iter->child) + WT_TRET(__curjoin_iter_close_all(iter->child)); + iter->child = NULL; + WT_ASSERT(iter->session, iter->cjoin->parent == NULL || + iter->cjoin->parent->iter->child == iter); + if ((parent = iter->cjoin->parent) != NULL) + parent->iter->child = NULL; + iter->cjoin->iter = NULL; + WT_TRET(__curjoin_iter_close(iter)); + return (ret); +} + +/* + * __curjoin_iter_reset -- + * Reset an iteration to the starting point. + * + */ +static int +__curjoin_iter_reset(WT_CURSOR_JOIN_ITER *iter) +{ + if (iter->child != NULL) + WT_RET(__curjoin_iter_close_all(iter->child)); + WT_RET(__curjoin_iter_set_entry(iter, 0)); + iter->positioned = false; + return (0); +} + +/* + * __curjoin_iter_ready -- + * Check the positioned flag for all nested iterators. + * + */ +static bool +__curjoin_iter_ready(WT_CURSOR_JOIN_ITER *iter) +{ + while (iter != NULL) { + if (!iter->positioned) + return (false); + iter = iter->child; + } + return (true); +} + /* * __curjoin_iter_set_entry -- * Set the current entry for an iterator. @@ -136,34 +210,13 @@ err: __wt_free(session, uri); } /* - * __curjoin_pack_recno -- - * Pack the given recno into a buffer; prepare an item referencing it. - * - */ -static int -__curjoin_pack_recno(WT_SESSION_IMPL *session, uint64_t r, uint8_t *buf, - size_t bufsize, WT_ITEM *item) -{ - WT_SESSION *wtsession; - size_t sz; - - wtsession = (WT_SESSION *)session; - WT_RET(wiredtiger_struct_size(wtsession, &sz, "r", r)); - WT_ASSERT(session, sz < bufsize); - WT_RET(wiredtiger_struct_pack(wtsession, buf, bufsize, "r", r)); - item->size = sz; - item->data = buf; - return (0); -} - -/* - * __curjoin_entry_iter_bump -- + * __curjoin_iter_bump -- * Called to advance the iterator to the next endpoint, * which may in turn advance to the next entry. * */ static int -__curjoin_entry_iter_bump(WT_CURSOR_JOIN_ITER *iter) +__curjoin_iter_bump(WT_CURSOR_JOIN_ITER *iter) { WT_CURSOR_JOIN_ENTRY *entry; WT_SESSION_IMPL *session; @@ -179,7 +232,7 @@ __curjoin_entry_iter_bump(WT_CURSOR_JOIN_ITER *iter) } iter->end_pos = iter->end_count = iter->end_skip = 0; if (entry->subjoin != NULL && entry->subjoin->iter != NULL) - WT_RET(__curjoin_entry_iter_close_all(entry->subjoin->iter)); + WT_RET(__curjoin_iter_close_all(entry->subjoin->iter)); if (++iter->entry_pos >= iter->entry_count) { iter->entry = NULL; @@ -187,7 +240,7 @@ __curjoin_entry_iter_bump(WT_CURSOR_JOIN_ITER *iter) } iter->entry = ++entry; if (entry->subjoin != NULL) { - WT_RET(__curjoin_entry_iter_init(session, entry->subjoin, + WT_RET(__curjoin_iter_init(session, entry->subjoin, &iter->child)); return (0); } @@ -196,66 +249,12 @@ __curjoin_entry_iter_bump(WT_CURSOR_JOIN_ITER *iter) } /* - * __curjoin_split_key -- - * Copy the primary key from a cursor (either main table or index) - * to another cursor. When copying from an index file, the index - * key is also returned. - * - */ -static int -__curjoin_split_key(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, - WT_ITEM *idxkey, WT_CURSOR *tocur, WT_CURSOR *fromcur, - const char *repack_fmt, bool isindex) -{ - WT_CURSOR *firstcg_cur; - WT_CURSOR_INDEX *cindex; - WT_ITEM *keyp; - const uint8_t *p; - - if (isindex) { - cindex = ((WT_CURSOR_INDEX *)fromcur); - /* - * Repack tells us where the index key ends; advance past - * that to get where the raw primary key starts. - */ - WT_RET(__wt_struct_repack(session, cindex->child->key_format, - repack_fmt != NULL ? repack_fmt : cindex->iface.key_format, - &cindex->child->key, idxkey)); - WT_ASSERT(session, cindex->child->key.size > idxkey->size); - tocur->key.data = (uint8_t *)idxkey->data + idxkey->size; - tocur->key.size = cindex->child->key.size - idxkey->size; - if (WT_CURSOR_RECNO(tocur)) { - p = (const uint8_t *)tocur->key.data; - WT_RET(__wt_vunpack_uint(&p, tocur->key.size, - &tocur->recno)); - } else - tocur->recno = 0; - } else { - firstcg_cur = ((WT_CURSOR_TABLE *)fromcur)->cg_cursors[0]; - keyp = &firstcg_cur->key; - if (WT_CURSOR_RECNO(tocur)) { - WT_ASSERT(session, keyp->size == sizeof(uint64_t)); - tocur->recno = *(uint64_t *)keyp->data; - WT_RET(__curjoin_pack_recno(session, tocur->recno, - cjoin->recno_buf, sizeof(cjoin->recno_buf), - &tocur->key)); - } else { - WT_ITEM_SET(tocur->key, *keyp); - tocur->recno = 0; - } - idxkey->data = NULL; - idxkey->size = 0; - } - return (0); -} - -/* - * __curjoin_entry_iter_next -- + * __curjoin_iter_next -- * Get the next item in an iteration. * */ static int -__curjoin_entry_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_CURSOR *cursor) +__curjoin_iter_next(WT_CURSOR_JOIN_ITER *iter, WT_CURSOR *cursor) { WT_CURSOR_JOIN_ENTRY *entry; WT_DECL_RET; @@ -269,9 +268,9 @@ again: entry = iter->entry; if (entry->subjoin != NULL) { if (iter->child == NULL) - WT_RET(__curjoin_entry_iter_init(session, + WT_RET(__curjoin_iter_init(session, entry->subjoin, &iter->child)); - ret = __curjoin_entry_iter_next(iter->child, cursor); + ret = __curjoin_iter_next(iter->child, cursor); if (ret == 0) { /* The child did the work, we're done. */ iter->curkey = &cursor->key; @@ -279,16 +278,16 @@ again: return (ret); } else if (ret == WT_NOTFOUND) { - WT_RET(__curjoin_entry_iter_close_all(iter->child)); + WT_RET(__curjoin_iter_close_all(iter->child)); entry->subjoin->iter = NULL; iter->child = NULL; - WT_RET(__curjoin_entry_iter_bump(iter)); + WT_RET(__curjoin_iter_bump(iter)); ret = 0; } } else if (iter->positioned) { ret = iter->cursor->next(iter->cursor); if (ret == WT_NOTFOUND) { - WT_RET(__curjoin_entry_iter_bump(iter)); + WT_RET(__curjoin_iter_bump(iter)); ret = 0; } else WT_RET(ret); @@ -298,7 +297,7 @@ again: if (WT_CURJOIN_ITER_CONSUMED(iter)) return (WT_NOTFOUND); - if (!__curjoin_entry_iter_ready(iter)) + if (!__curjoin_iter_ready(iter)) goto again; WT_RET(ret); @@ -317,190 +316,498 @@ again: } /* - * __curjoin_entry_iter_reset -- - * Reset an iteration to the starting point. - * + * __curjoin_close -- + * WT_CURSOR::close for join cursors. */ static int -__curjoin_entry_iter_reset(WT_CURSOR_JOIN_ITER *iter) +__curjoin_close(WT_CURSOR *cursor) { - if (iter->child != NULL) - WT_RET(__curjoin_entry_iter_close_all(iter->child)); - WT_RET(__curjoin_iter_set_entry(iter, 0)); - iter->positioned = false; - return (0); -} + WT_CURSOR_JOIN *cjoin; + WT_CURSOR_JOIN_ENDPOINT *end; + WT_CURSOR_JOIN_ENTRY *entry; + WT_DECL_RET; + WT_SESSION_IMPL *session; + u_int i; -/* - * __curjoin_entry_iter_ready -- - * Check the positioned flag for all nested iterators. - * - */ -static bool -__curjoin_entry_iter_ready(WT_CURSOR_JOIN_ITER *iter) -{ - while (iter != NULL) { - if (!iter->positioned) - return (false); - iter = iter->child; + cjoin = (WT_CURSOR_JOIN *)cursor; + + JOINABLE_CURSOR_API_CALL(cursor, session, close, NULL); + + __wt_schema_release_table(session, cjoin->table); + /* These are owned by the table */ + cursor->internal_uri = NULL; + cursor->key_format = NULL; + if (cjoin->projection != NULL) { + __wt_free(session, cjoin->projection); + __wt_free(session, cursor->value_format); } - return (true); + + for (entry = cjoin->entries, i = 0; i < cjoin->entries_next; + entry++, i++) { + if (entry->subjoin != NULL) { + F_CLR(&entry->subjoin->iface, WT_CURSTD_JOINED); + entry->subjoin->parent = NULL; + } + if (entry->main != NULL) + WT_TRET(entry->main->close(entry->main)); + if (F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM)) + WT_TRET(__wt_bloom_close(entry->bloom)); + for (end = &entry->ends[0]; + end < &entry->ends[entry->ends_next]; end++) { + F_CLR(end->cursor, WT_CURSTD_JOINED); + if (F_ISSET(end, WT_CURJOIN_END_OWN_CURSOR)) + WT_TRET(end->cursor->close(end->cursor)); + } + __wt_free(session, entry->ends); + __wt_free(session, entry->repack_format); + } + + if (cjoin->iter != NULL) + WT_TRET(__curjoin_iter_close_all(cjoin->iter)); + if (cjoin->main != NULL) + WT_TRET(cjoin->main->close(cjoin->main)); + + __wt_free(session, cjoin->entries); + WT_TRET(__wt_cursor_close(cursor)); + +err: API_END_RET(session, ret); } /* - * __curjoin_entry_iter_close -- - * Close the iteration, release resources. - * + * __curjoin_endpoint_init_key -- + * Set the key in the reference endpoint. */ static int -__curjoin_entry_iter_close(WT_CURSOR_JOIN_ITER *iter) +__curjoin_endpoint_init_key(WT_SESSION_IMPL *session, + WT_CURSOR_JOIN_ENTRY *entry, WT_CURSOR_JOIN_ENDPOINT *endpoint) { - WT_DECL_RET; + WT_CURSOR *cursor; + WT_CURSOR_INDEX *cindex; + WT_ITEM *k; + uint64_t r; - if (iter->cursor != NULL) - WT_TRET(iter->cursor->close(iter->cursor)); - __wt_free(iter->session, iter); - return (ret); + if ((cursor = endpoint->cursor) != NULL) { + if (entry->index != NULL) { + /* Extract and save the index's logical key. */ + cindex = (WT_CURSOR_INDEX *)endpoint->cursor; + WT_RET(__wt_struct_repack(session, + cindex->child->key_format, + (entry->repack_format != NULL ? + entry->repack_format : cindex->iface.key_format), + &cindex->child->key, &endpoint->key)); + } else { + k = &((WT_CURSOR_TABLE *)cursor)->cg_cursors[0]->key; + if (WT_CURSOR_RECNO(cursor)) { + r = *(uint64_t *)k->data; + WT_RET(__curjoin_pack_recno(session, r, + endpoint->recno_buf, + sizeof(endpoint->recno_buf), + &endpoint->key)); + } + else + endpoint->key = *k; + } + } + return (0); } /* - * __curjoin_entry_iter_close_all -- - * Free the iterator and all of its children recursively. - * + * __curjoin_entries_in_range -- + * Check if a key is in the range specified by the remaining entries, + * returning WT_NOTFOUND if not. */ static int -__curjoin_entry_iter_close_all(WT_CURSOR_JOIN_ITER *iter) +__curjoin_entries_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, + WT_ITEM *curkey, WT_CURSOR_JOIN_ITER *iterarg) { - WT_CURSOR_JOIN *parent; + WT_CURSOR_JOIN_ENTRY *entry; + WT_CURSOR_JOIN_ITER *iter; WT_DECL_RET; + int fastret, slowret; + u_int pos; - if (iter->child) - WT_TRET(__curjoin_entry_iter_close_all(iter->child)); - iter->child = NULL; - WT_ASSERT(iter->session, iter->cjoin->parent == NULL || - iter->cjoin->parent->iter->child == iter); - if ((parent = iter->cjoin->parent) != NULL) - parent->iter->child = NULL; - iter->cjoin->iter = NULL; - WT_TRET(__curjoin_entry_iter_close(iter)); - return (ret); + iter = iterarg; + if (F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) { + fastret = 0; + slowret = WT_NOTFOUND; + } else { + fastret = WT_NOTFOUND; + slowret = 0; + } + pos = iter == NULL ? 0 : iter->entry_pos; + for (entry = &cjoin->entries[pos]; pos < cjoin->entries_next; + entry++, pos++) { + ret = __curjoin_entry_member(session, entry, curkey, iter); + if (ret == fastret) + return (fastret); + if (ret != slowret) + break; + iter = NULL; + } + + return (ret == 0 ? slowret : ret); } /* - * __curjoin_get_key -- - * WT_CURSOR->get_key for join cursors. + * __curjoin_entry_in_range -- + * Check if a key is in the range specified by the entry, returning + * WT_NOTFOUND if not. */ static int -__curjoin_get_key(WT_CURSOR *cursor, ...) +__curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, + WT_ITEM *curkey, WT_CURSOR_JOIN_ITER *iter) { - WT_CURSOR_JOIN *cjoin; - WT_DECL_RET; - WT_SESSION_IMPL *session; - va_list ap; + WT_COLLATOR *collator; + WT_CURSOR_JOIN_ENDPOINT *end, *endmax; + bool disjunction, passed; + int cmp; + u_int pos; - cjoin = (WT_CURSOR_JOIN *)cursor; + collator = (entry->index != NULL) ? entry->index->collator : NULL; + endmax = &entry->ends[entry->ends_next]; + disjunction = F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION); + passed = false; - va_start(ap, cursor); - JOINABLE_CURSOR_API_CALL(cursor, session, get_key, NULL); + /* + * The iterator may have already satisfied some endpoint conditions. + * If so and we're a disjunction, we're done. If so and we're a + * conjunction, we can start past the satisfied conditions. + */ + if (iter == NULL) + pos = 0; + else { + if (disjunction && iter->end_skip) + return (0); + pos = iter->end_pos + iter->end_skip; + } - if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED) || - !cjoin->iter->positioned) - WT_ERR_MSG(session, EINVAL, - "join cursor must be advanced with next()"); - WT_ERR(__wt_cursor_get_keyv(cursor, cursor->flags, ap)); + for (end = &entry->ends[pos]; end < endmax; end++) { + WT_RET(__wt_compare(session, collator, curkey, &end->key, + &cmp)); + switch (WT_CURJOIN_END_RANGE(end)) { + case WT_CURJOIN_END_EQ: + passed = (cmp == 0); + break; -err: va_end(ap); - API_END_RET(session, ret); + case WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ: + passed = (cmp >= 0); + WT_ASSERT(session, iter == NULL); + break; + + case WT_CURJOIN_END_GT: + passed = (cmp > 0); + if (passed && iter != NULL && pos == 0) + iter->end_skip = 1; + break; + + case WT_CURJOIN_END_LT | WT_CURJOIN_END_EQ: + passed = (cmp <= 0); + break; + + case WT_CURJOIN_END_LT: + passed = (cmp < 0); + break; + + default: + WT_RET(__wt_illegal_value(session, NULL)); + break; + } + + if (!passed) { + if (iter != NULL && + (iter->is_equal || + F_ISSET(end, WT_CURJOIN_END_LT))) { + WT_RET(__curjoin_iter_bump(iter)); + return (WT_NOTFOUND); + } + if (!disjunction) + return (WT_NOTFOUND); + iter = NULL; + } else if (disjunction) + break; + } + if (disjunction && end == endmax) + return (WT_NOTFOUND); + else + return (0); } +typedef struct { + WT_CURSOR iface; + WT_CURSOR_JOIN_ENTRY *entry; + bool ismember; +} WT_CURJOIN_EXTRACTOR; + /* - * __curjoin_get_value -- - * WT_CURSOR->get_value for join cursors. + * __curjoin_extract_insert -- + * Handle a key produced by a custom extractor. */ static int -__curjoin_get_value(WT_CURSOR *cursor, ...) -{ - WT_CURSOR_JOIN *cjoin; +__curjoin_extract_insert(WT_CURSOR *cursor) { + WT_CURJOIN_EXTRACTOR *cextract; WT_DECL_RET; + WT_ITEM ikey; WT_SESSION_IMPL *session; - va_list ap; - cjoin = (WT_CURSOR_JOIN *)cursor; + cextract = (WT_CURJOIN_EXTRACTOR *)cursor; + /* + * This insert method may be called multiple times during a single + * extraction. If we already have a definitive answer to the + * membership question, exit early. + */ + if (cextract->ismember) + return (0); - va_start(ap, cursor); - JOINABLE_CURSOR_API_CALL(cursor, session, get_value, NULL); + session = (WT_SESSION_IMPL *)cursor->session; - if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED) || - !cjoin->iter->positioned) - WT_ERR_MSG(session, EINVAL, - "join cursor must be advanced with next()"); + WT_ITEM_SET(ikey, cursor->key); + /* + * We appended a padding byte to the key to avoid rewriting the last + * column. Strip that away here. + */ + WT_ASSERT(session, ikey.size > 0); + --ikey.size; - WT_ERR(__wt_curtable_get_valuev(cjoin->main, ap)); + ret = __curjoin_entry_in_range(session, cextract->entry, &ikey, false); + if (ret == WT_NOTFOUND) + ret = 0; + else if (ret == 0) + cextract->ismember = true; -err: va_end(ap); - API_END_RET(session, ret); + return (ret); } /* - * __curjoin_init_bloom -- - * Populate Bloom filters + * __curjoin_entry_member -- + * Do a membership check for a particular index that was joined, + * if not a member, returns WT_NOTFOUND. */ static int -__curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, - WT_CURSOR_JOIN_ENTRY *entry, WT_BLOOM *bloom) +__curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, + WT_ITEM *key, WT_CURSOR_JOIN_ITER *iter) { - WT_COLLATOR *collator; + WT_CURJOIN_EXTRACTOR extract_cursor; WT_CURSOR *c; - WT_CURSOR_JOIN_ENDPOINT *end, *endmax; + WT_CURSOR_STATIC_INIT(iface, + __wt_cursor_get_key, /* get-key */ + __wt_cursor_get_value, /* get-value */ + __wt_cursor_set_key, /* set-key */ + __wt_cursor_set_value, /* set-value */ + __wt_cursor_compare_notsup, /* compare */ + __wt_cursor_equals_notsup, /* equals */ + __wt_cursor_notsup, /* next */ + __wt_cursor_notsup, /* prev */ + __wt_cursor_notsup, /* reset */ + __wt_cursor_notsup, /* search */ + __wt_cursor_search_near_notsup, /* search-near */ + __curjoin_extract_insert, /* insert */ + __wt_cursor_notsup, /* update */ + __wt_cursor_notsup, /* remove */ + __wt_cursor_reconfigure_notsup, /* reconfigure */ + __wt_cursor_notsup); /* close */ WT_DECL_RET; - WT_DECL_ITEM(uribuf); - WT_ITEM curkey, curvalue; - const char *raw_cfg[] = { WT_CONFIG_BASE( - session, WT_SESSION_open_cursor), "raw", NULL }; - const char *uri; - size_t size; - u_int skip; - int cmp; + WT_INDEX *idx; + WT_ITEM v; + bool bloom_found; - c = NULL; - skip = 0; + if (entry->subjoin == NULL && iter != NULL && + (iter->end_pos + iter->end_skip >= entry->ends_next || + (iter->end_skip > 0 && + F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)))) + return (0); /* no checks to make */ - if (entry->index != NULL) + entry->stats.accesses++; + bloom_found = false; + + if (entry->bloom != NULL) { /* - * Open the raw index. We're avoiding any references - * to the main table, they may be expensive. + * If we don't own the Bloom filter, we must be sharing one + * in a previous entry. So the shared filter has already + * been checked and passed. */ - uri = entry->index->source; - else { + if (!F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM)) + return (0); + /* - * For joins on the main table, we just need the primary - * key for comparison, we don't need any values. + * If the item is not in the Bloom filter, we return + * immediately, otherwise, we still need to check the + * long way. */ - size = strlen(cjoin->table->name) + 3; - WT_ERR(__wt_scr_alloc(session, size, &uribuf)); - WT_ERR(__wt_buf_fmt(session, uribuf, "%s()", - cjoin->table->name)); - uri = uribuf->data; - } - WT_ERR(__wt_open_cursor(session, uri, &cjoin->iface, raw_cfg, &c)); - - /* Initially position the cursor if necessary. */ - endmax = &entry->ends[entry->ends_next]; - if ((end = &entry->ends[0]) < endmax) { - if (F_ISSET(end, WT_CURJOIN_END_GT) || - WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ) { - WT_ERR(__wt_cursor_dup_position(end->cursor, c)); - if (WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_GE) - skip = 1; - } else if (F_ISSET(end, WT_CURJOIN_END_LT)) { - if ((ret = c->next(c)) == WT_NOTFOUND) - goto done; - WT_ERR(ret); - } else - WT_ERR(__wt_illegal_value(session, NULL)); + WT_ERR(__wt_bloom_inmem_get(entry->bloom, key)); + bloom_found = true; } - collator = (entry->index == NULL) ? NULL : entry->index->collator; - while (ret == 0) { + if (entry->subjoin != NULL) { + WT_ASSERT(session, + iter == NULL || entry->subjoin == iter->child->cjoin); + ret = __curjoin_entries_in_range(session, entry->subjoin, + key, iter == NULL ? NULL : iter->child); + if (iter != NULL && + WT_CURJOIN_ITER_CONSUMED(iter->child)) { + WT_ERR(__curjoin_iter_bump(iter)); + ret = WT_NOTFOUND; + } + return (ret); + } + if (entry->index != NULL) { + /* + * If this entry is used by the iterator, then we already + * have the index key, and we won't have to do any + * extraction either. + */ + if (iter != NULL && entry == iter->entry) + WT_ITEM_SET(v, iter->idxkey); + else { + memset(&v, 0, sizeof(v)); /* Keep lint quiet. */ + c = entry->main; + c->set_key(c, key); + if ((ret = c->search(c)) == 0) + ret = c->get_value(c, &v); + else if (ret == WT_NOTFOUND) + WT_ERR_MSG(session, WT_ERROR, + "main table for join is missing entry"); + WT_TRET(c->reset(c)); + WT_ERR(ret); + } + } else + WT_ITEM_SET(v, *key); + + if ((idx = entry->index) != NULL && idx->extractor != NULL && + (iter == NULL || entry != iter->entry)) { + WT_CLEAR(extract_cursor); + extract_cursor.iface = iface; + extract_cursor.iface.session = &session->iface; + extract_cursor.iface.key_format = idx->exkey_format; + extract_cursor.ismember = false; + extract_cursor.entry = entry; + WT_ERR(idx->extractor->extract(idx->extractor, + &session->iface, key, &v, &extract_cursor.iface)); + if (!extract_cursor.ismember) + WT_ERR(WT_NOTFOUND); + } else + WT_ERR(__curjoin_entry_in_range(session, entry, &v, iter)); + + if (0) { +err: if (ret == WT_NOTFOUND && bloom_found) + entry->stats.bloom_false_positive++; + } + return (ret); +} + +/* + * __curjoin_get_key -- + * WT_CURSOR->get_key for join cursors. + */ +static int +__curjoin_get_key(WT_CURSOR *cursor, ...) +{ + WT_CURSOR_JOIN *cjoin; + WT_DECL_RET; + WT_SESSION_IMPL *session; + va_list ap; + + cjoin = (WT_CURSOR_JOIN *)cursor; + + va_start(ap, cursor); + JOINABLE_CURSOR_API_CALL(cursor, session, get_key, NULL); + + if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED) || + !cjoin->iter->positioned) + WT_ERR_MSG(session, EINVAL, + "join cursor must be advanced with next()"); + WT_ERR(__wt_cursor_get_keyv(cursor, cursor->flags, ap)); + +err: va_end(ap); + API_END_RET(session, ret); +} + +/* + * __curjoin_get_value -- + * WT_CURSOR->get_value for join cursors. + */ +static int +__curjoin_get_value(WT_CURSOR *cursor, ...) +{ + WT_CURSOR_JOIN *cjoin; + WT_DECL_RET; + WT_SESSION_IMPL *session; + va_list ap; + + cjoin = (WT_CURSOR_JOIN *)cursor; + + va_start(ap, cursor); + JOINABLE_CURSOR_API_CALL(cursor, session, get_value, NULL); + + if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED) || + !cjoin->iter->positioned) + WT_ERR_MSG(session, EINVAL, + "join cursor must be advanced with next()"); + + WT_ERR(__wt_curtable_get_valuev(cjoin->main, ap)); + +err: va_end(ap); + API_END_RET(session, ret); +} + +/* + * __curjoin_init_bloom -- + * Populate Bloom filters + */ +static int +__curjoin_init_bloom(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, + WT_CURSOR_JOIN_ENTRY *entry, WT_BLOOM *bloom) +{ + WT_COLLATOR *collator; + WT_CURSOR *c; + WT_CURSOR_JOIN_ENDPOINT *end, *endmax; + WT_DECL_RET; + WT_DECL_ITEM(uribuf); + WT_ITEM curkey, curvalue; + const char *raw_cfg[] = { WT_CONFIG_BASE( + session, WT_SESSION_open_cursor), "raw", NULL }; + const char *uri; + size_t size; + u_int skip; + int cmp; + + c = NULL; + skip = 0; + + if (entry->index != NULL) + /* + * Open the raw index. We're avoiding any references + * to the main table, they may be expensive. + */ + uri = entry->index->source; + else { + /* + * For joins on the main table, we just need the primary + * key for comparison, we don't need any values. + */ + size = strlen(cjoin->table->name) + 3; + WT_ERR(__wt_scr_alloc(session, size, &uribuf)); + WT_ERR(__wt_buf_fmt(session, uribuf, "%s()", + cjoin->table->name)); + uri = uribuf->data; + } + WT_ERR(__wt_open_cursor(session, uri, &cjoin->iface, raw_cfg, &c)); + + /* Initially position the cursor if necessary. */ + endmax = &entry->ends[entry->ends_next]; + if ((end = &entry->ends[0]) < endmax) { + if (F_ISSET(end, WT_CURJOIN_END_GT) || + WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_EQ) { + WT_ERR(__wt_cursor_dup_position(end->cursor, c)); + if (WT_CURJOIN_END_RANGE(end) == WT_CURJOIN_END_GE) + skip = 1; + } else if (F_ISSET(end, WT_CURJOIN_END_LT)) { + if ((ret = c->next(c)) == WT_NOTFOUND) + goto done; + WT_ERR(ret); + } else + WT_ERR(__wt_illegal_value(session, NULL)); + } + collator = (entry->index == NULL) ? NULL : entry->index->collator; + while (ret == 0) { WT_ERR(c->get_key(c, &curkey)); if (entry->index != NULL) { /* @@ -590,44 +897,6 @@ err: if (c != NULL) return (ret); } -/* - * __curjoin_endpoint_init_key -- - * Set the key in the reference endpoint. - */ -static int -__curjoin_endpoint_init_key(WT_SESSION_IMPL *session, - WT_CURSOR_JOIN_ENTRY *entry, WT_CURSOR_JOIN_ENDPOINT *endpoint) -{ - WT_CURSOR *cursor; - WT_CURSOR_INDEX *cindex; - WT_ITEM *k; - uint64_t r; - - if ((cursor = endpoint->cursor) != NULL) { - if (entry->index != NULL) { - /* Extract and save the index's logical key. */ - cindex = (WT_CURSOR_INDEX *)endpoint->cursor; - WT_RET(__wt_struct_repack(session, - cindex->child->key_format, - (entry->repack_format != NULL ? - entry->repack_format : cindex->iface.key_format), - &cindex->child->key, &endpoint->key)); - } else { - k = &((WT_CURSOR_TABLE *)cursor)->cg_cursors[0]->key; - if (WT_CURSOR_RECNO(cursor)) { - r = *(uint64_t *)k->data; - WT_RET(__curjoin_pack_recno(session, r, - endpoint->recno_buf, - sizeof(endpoint->recno_buf), - &endpoint->key)); - } - else - endpoint->key = *k; - } - } - return (0); -} - /* * __curjoin_init_next -- * Initialize the cursor join when the next function is first called. @@ -764,298 +1033,38 @@ __curjoin_init_next(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, bloom)); WT_ERR(__wt_bloom_close(bloom)); } - } - if (!F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) - iterable = false; - } - F_SET(cjoin, WT_CURJOIN_INITIALIZED); - -err: __wt_free(session, mainbuf); - return (ret); -} - -/* - * __curjoin_entries_in_range -- - * Check if a key is in the range specified by the remaining entries, - * returning WT_NOTFOUND if not. - */ -static int -__curjoin_entries_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, - WT_ITEM *curkey, WT_CURSOR_JOIN_ITER *iterarg) -{ - WT_CURSOR_JOIN_ENTRY *entry; - WT_CURSOR_JOIN_ITER *iter; - WT_DECL_RET; - int fastret, slowret; - u_int pos; - - iter = iterarg; - if (F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) { - fastret = 0; - slowret = WT_NOTFOUND; - } else { - fastret = WT_NOTFOUND; - slowret = 0; - } - pos = iter == NULL ? 0 : iter->entry_pos; - for (entry = &cjoin->entries[pos]; pos < cjoin->entries_next; - entry++, pos++) { - ret = __curjoin_entry_member(session, entry, curkey, iter); - if (ret == fastret) - return (fastret); - if (ret != slowret) - break; - iter = NULL; - } - - return (ret == 0 ? slowret : ret); -} - -/* - * __curjoin_entry_in_range -- - * Check if a key is in the range specified by the entry, returning - * WT_NOTFOUND if not. - */ -static int -__curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, - WT_ITEM *curkey, WT_CURSOR_JOIN_ITER *iter) -{ - WT_COLLATOR *collator; - WT_CURSOR_JOIN_ENDPOINT *end, *endmax; - bool disjunction, passed; - int cmp; - u_int pos; - - collator = (entry->index != NULL) ? entry->index->collator : NULL; - endmax = &entry->ends[entry->ends_next]; - disjunction = F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION); - passed = false; - - /* - * The iterator may have already satisfied some endpoint conditions. - * If so and we're a disjunction, we're done. If so and we're a - * conjunction, we can start past the satisfied conditions. - */ - if (iter == NULL) - pos = 0; - else { - if (disjunction && iter->end_skip) - return (0); - pos = iter->end_pos + iter->end_skip; - } - - for (end = &entry->ends[pos]; end < endmax; end++) { - WT_RET(__wt_compare(session, collator, curkey, &end->key, - &cmp)); - switch (WT_CURJOIN_END_RANGE(end)) { - case WT_CURJOIN_END_EQ: - passed = (cmp == 0); - break; - - case WT_CURJOIN_END_GT | WT_CURJOIN_END_EQ: - passed = (cmp >= 0); - WT_ASSERT(session, iter == NULL); - break; - - case WT_CURJOIN_END_GT: - passed = (cmp > 0); - if (passed && iter != NULL && pos == 0) - iter->end_skip = 1; - break; - - case WT_CURJOIN_END_LT | WT_CURJOIN_END_EQ: - passed = (cmp <= 0); - break; - - case WT_CURJOIN_END_LT: - passed = (cmp < 0); - break; - - default: - WT_RET(__wt_illegal_value(session, NULL)); - break; - } - - if (!passed) { - if (iter != NULL && - (iter->is_equal || - F_ISSET(end, WT_CURJOIN_END_LT))) { - WT_RET(__curjoin_entry_iter_bump(iter)); - return (WT_NOTFOUND); - } - if (!disjunction) - return (WT_NOTFOUND); - iter = NULL; - } else if (disjunction) - break; - } - if (disjunction && end == endmax) - return (WT_NOTFOUND); - else - return (0); -} - -typedef struct { - WT_CURSOR iface; - WT_CURSOR_JOIN_ENTRY *entry; - bool ismember; -} WT_CURJOIN_EXTRACTOR; - -/* - * __curjoin_extract_insert -- - * Handle a key produced by a custom extractor. - */ -static int -__curjoin_extract_insert(WT_CURSOR *cursor) { - WT_CURJOIN_EXTRACTOR *cextract; - WT_DECL_RET; - WT_ITEM ikey; - WT_SESSION_IMPL *session; - - cextract = (WT_CURJOIN_EXTRACTOR *)cursor; - /* - * This insert method may be called multiple times during a single - * extraction. If we already have a definitive answer to the - * membership question, exit early. - */ - if (cextract->ismember) - return (0); - - session = (WT_SESSION_IMPL *)cursor->session; - - WT_ITEM_SET(ikey, cursor->key); - /* - * We appended a padding byte to the key to avoid rewriting the last - * column. Strip that away here. - */ - WT_ASSERT(session, ikey.size > 0); - --ikey.size; - - ret = __curjoin_entry_in_range(session, cextract->entry, &ikey, false); - if (ret == WT_NOTFOUND) - ret = 0; - else if (ret == 0) - cextract->ismember = true; - - return (ret); -} - -/* - * __curjoin_entry_member -- - * Do a membership check for a particular index that was joined, - * if not a member, returns WT_NOTFOUND. - */ -static int -__curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, - WT_ITEM *key, WT_CURSOR_JOIN_ITER *iter) -{ - WT_CURJOIN_EXTRACTOR extract_cursor; - WT_CURSOR *c; - WT_CURSOR_STATIC_INIT(iface, - __wt_cursor_get_key, /* get-key */ - __wt_cursor_get_value, /* get-value */ - __wt_cursor_set_key, /* set-key */ - __wt_cursor_set_value, /* set-value */ - __wt_cursor_compare_notsup, /* compare */ - __wt_cursor_equals_notsup, /* equals */ - __wt_cursor_notsup, /* next */ - __wt_cursor_notsup, /* prev */ - __wt_cursor_notsup, /* reset */ - __wt_cursor_notsup, /* search */ - __wt_cursor_search_near_notsup, /* search-near */ - __curjoin_extract_insert, /* insert */ - __wt_cursor_notsup, /* update */ - __wt_cursor_notsup, /* remove */ - __wt_cursor_reconfigure_notsup, /* reconfigure */ - __wt_cursor_notsup); /* close */ - WT_DECL_RET; - WT_INDEX *idx; - WT_ITEM v; - bool bloom_found; - - if (entry->subjoin == NULL && iter != NULL && - (iter->end_pos + iter->end_skip >= entry->ends_next || - (iter->end_skip > 0 && - F_ISSET(entry, WT_CURJOIN_ENTRY_DISJUNCTION)))) - return (0); /* no checks to make */ - - entry->stats.accesses++; - bloom_found = false; - - if (entry->bloom != NULL) { - /* - * If we don't own the Bloom filter, we must be sharing one - * in a previous entry. So the shared filter has already - * been checked and passed. - */ - if (!F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM)) - return (0); - - /* - * If the item is not in the Bloom filter, we return - * immediately, otherwise, we still need to check the - * long way. - */ - WT_ERR(__wt_bloom_inmem_get(entry->bloom, key)); - bloom_found = true; - } - if (entry->subjoin != NULL) { - WT_ASSERT(session, - iter == NULL || entry->subjoin == iter->child->cjoin); - ret = __curjoin_entries_in_range(session, entry->subjoin, - key, iter == NULL ? NULL : iter->child); - if (iter != NULL && - WT_CURJOIN_ITER_CONSUMED(iter->child)) { - WT_ERR(__curjoin_entry_iter_bump(iter)); - ret = WT_NOTFOUND; - } - return (ret); - } - if (entry->index != NULL) { - /* - * If this entry is used by the iterator, then we already - * have the index key, and we won't have to do any - * extraction either. - */ - if (iter != NULL && entry == iter->entry) - WT_ITEM_SET(v, iter->idxkey); - else { - memset(&v, 0, sizeof(v)); /* Keep lint quiet. */ - c = entry->main; - c->set_key(c, key); - if ((ret = c->search(c)) == 0) - ret = c->get_value(c, &v); - else if (ret == WT_NOTFOUND) - WT_ERR_MSG(session, WT_ERROR, - "main table for join is missing entry"); - WT_TRET(c->reset(c)); - WT_ERR(ret); - } - } else - WT_ITEM_SET(v, *key); - - if ((idx = entry->index) != NULL && idx->extractor != NULL && - (iter == NULL || entry != iter->entry)) { - WT_CLEAR(extract_cursor); - extract_cursor.iface = iface; - extract_cursor.iface.session = &session->iface; - extract_cursor.iface.key_format = idx->exkey_format; - extract_cursor.ismember = false; - extract_cursor.entry = entry; - WT_ERR(idx->extractor->extract(idx->extractor, - &session->iface, key, &v, &extract_cursor.iface)); - if (!extract_cursor.ismember) - WT_ERR(WT_NOTFOUND); - } else - WT_ERR(__curjoin_entry_in_range(session, entry, &v, iter)); - - if (0) { -err: if (ret == WT_NOTFOUND && bloom_found) - entry->stats.bloom_false_positive++; + } + if (!F_ISSET(cjoin, WT_CURJOIN_DISJUNCTION)) + iterable = false; } + F_SET(cjoin, WT_CURJOIN_INITIALIZED); + +err: __wt_free(session, mainbuf); return (ret); } +/* + * __curjoin_insert_endpoint -- + * Insert a new entry into the endpoint array for the join entry. + */ +static int +__curjoin_insert_endpoint(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, + u_int pos, WT_CURSOR_JOIN_ENDPOINT **newendp) +{ + WT_CURSOR_JOIN_ENDPOINT *newend; + + WT_RET(__wt_realloc_def(session, &entry->ends_allocated, + entry->ends_next + 1, &entry->ends)); + newend = &entry->ends[pos]; + memmove(newend + 1, newend, + (entry->ends_next - pos) * sizeof(WT_CURSOR_JOIN_ENDPOINT)); + memset(newend, 0, sizeof(WT_CURSOR_JOIN_ENDPOINT)); + entry->ends_next++; + *newendp = newend; + + return (0); +} + /* * __curjoin_next -- * WT_CURSOR::next for join cursors. @@ -1081,11 +1090,11 @@ __curjoin_next(WT_CURSOR *cursor) if (!F_ISSET(cjoin, WT_CURJOIN_INITIALIZED)) WT_ERR(__curjoin_init_next(session, cjoin, true)); if (cjoin->iter == NULL) - WT_ERR(__curjoin_entry_iter_init(session, cjoin, &cjoin->iter)); + WT_ERR(__curjoin_iter_init(session, cjoin, &cjoin->iter)); iter = cjoin->iter; F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); - while ((ret = __curjoin_entry_iter_next(iter, cursor)) == 0) { + while ((ret = __curjoin_iter_next(iter, cursor)) == 0) { if ((ret = __curjoin_entries_in_range(session, cjoin, iter->curkey, iter)) != WT_NOTFOUND) break; @@ -1111,7 +1120,7 @@ __curjoin_next(WT_CURSOR *cursor) WT_ERR(c->search(c)); F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); } else if (ret == WT_NOTFOUND && - (tret = __curjoin_entry_iter_close_all(iter)) != 0) + (tret = __curjoin_iter_close_all(iter)) != 0) WT_ERR(tret); if (0) { @@ -1120,6 +1129,73 @@ err: F_SET(cjoin, WT_CURJOIN_ERROR); API_END_RET(session, ret); } +/* + * __curjoin_open_main -- + * For the given index, open the main file with a projection + * that is the index keys. + */ +static int +__curjoin_open_main(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, + WT_CURSOR_JOIN_ENTRY *entry) +{ + WT_DECL_RET; + WT_INDEX *idx; + char *main_uri, *newformat; + const char *raw_cfg[] = { WT_CONFIG_BASE( + session, WT_SESSION_open_cursor), "raw", NULL }; + size_t len, newsize; + + main_uri = NULL; + idx = entry->index; + + newsize = strlen(cjoin->table->name) + idx->colconf.len + 1; + WT_ERR(__wt_calloc(session, 1, newsize, &main_uri)); + snprintf(main_uri, newsize, "%s%.*s", + cjoin->table->name, (int)idx->colconf.len, + idx->colconf.str); + WT_ERR(__wt_open_cursor(session, main_uri, + (WT_CURSOR *)cjoin, raw_cfg, &entry->main)); + if (idx->extractor == NULL) { + /* + * Add no-op padding so trailing 'u' formats are not + * transformed to 'U'. This matches what happens in + * the index. We don't do this when we have an + * extractor, extractors already use the padding + * byte trick. + */ + len = strlen(entry->main->value_format) + 3; + WT_ERR(__wt_calloc(session, len, 1, &newformat)); + snprintf(newformat, len, "%s0x", + entry->main->value_format); + __wt_free(session, entry->main->value_format); + entry->main->value_format = newformat; + } + +err: __wt_free(session, main_uri); + return (ret); +} + +/* + * __curjoin_pack_recno -- + * Pack the given recno into a buffer; prepare an item referencing it. + * + */ +static int +__curjoin_pack_recno(WT_SESSION_IMPL *session, uint64_t r, uint8_t *buf, + size_t bufsize, WT_ITEM *item) +{ + WT_SESSION *wtsession; + size_t sz; + + wtsession = (WT_SESSION *)session; + WT_RET(wiredtiger_struct_size(wtsession, &sz, "r", r)); + WT_ASSERT(session, sz < bufsize); + WT_RET(wiredtiger_struct_pack(wtsession, buf, bufsize, "r", r)); + item->size = sz; + item->data = buf; + return (0); +} + /* * __curjoin_reset -- * WT_CURSOR::reset for join cursors. @@ -1136,67 +1212,63 @@ __curjoin_reset(WT_CURSOR *cursor) JOINABLE_CURSOR_API_CALL(cursor, session, reset, NULL); if (cjoin->iter != NULL) - WT_ERR(__curjoin_entry_iter_reset(cjoin->iter)); + WT_ERR(__curjoin_iter_reset(cjoin->iter)); err: API_END_RET(session, ret); } /* - * __curjoin_close -- - * WT_CURSOR::close for join cursors. + * __curjoin_split_key -- + * Copy the primary key from a cursor (either main table or index) + * to another cursor. When copying from an index file, the index + * key is also returned. + * */ static int -__curjoin_close(WT_CURSOR *cursor) +__curjoin_split_key(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, + WT_ITEM *idxkey, WT_CURSOR *tocur, WT_CURSOR *fromcur, + const char *repack_fmt, bool isindex) { - WT_CURSOR_JOIN *cjoin; - WT_CURSOR_JOIN_ENDPOINT *end; - WT_CURSOR_JOIN_ENTRY *entry; - WT_DECL_RET; - WT_SESSION_IMPL *session; - u_int i; - - cjoin = (WT_CURSOR_JOIN *)cursor; - - JOINABLE_CURSOR_API_CALL(cursor, session, close, NULL); - - __wt_schema_release_table(session, cjoin->table); - /* These are owned by the table */ - cursor->internal_uri = NULL; - cursor->key_format = NULL; - if (cjoin->projection != NULL) { - __wt_free(session, cjoin->projection); - __wt_free(session, cursor->value_format); - } + WT_CURSOR *firstcg_cur; + WT_CURSOR_INDEX *cindex; + WT_ITEM *keyp; + const uint8_t *p; - for (entry = cjoin->entries, i = 0; i < cjoin->entries_next; - entry++, i++) { - if (entry->subjoin != NULL) { - F_CLR(&entry->subjoin->iface, WT_CURSTD_JOINED); - entry->subjoin->parent = NULL; - } - if (entry->main != NULL) - WT_TRET(entry->main->close(entry->main)); - if (F_ISSET(entry, WT_CURJOIN_ENTRY_OWN_BLOOM)) - WT_TRET(__wt_bloom_close(entry->bloom)); - for (end = &entry->ends[0]; - end < &entry->ends[entry->ends_next]; end++) { - F_CLR(end->cursor, WT_CURSTD_JOINED); - if (F_ISSET(end, WT_CURJOIN_END_OWN_CURSOR)) - WT_TRET(end->cursor->close(end->cursor)); + if (isindex) { + cindex = ((WT_CURSOR_INDEX *)fromcur); + /* + * Repack tells us where the index key ends; advance past + * that to get where the raw primary key starts. + */ + WT_RET(__wt_struct_repack(session, cindex->child->key_format, + repack_fmt != NULL ? repack_fmt : cindex->iface.key_format, + &cindex->child->key, idxkey)); + WT_ASSERT(session, cindex->child->key.size > idxkey->size); + tocur->key.data = (uint8_t *)idxkey->data + idxkey->size; + tocur->key.size = cindex->child->key.size - idxkey->size; + if (WT_CURSOR_RECNO(tocur)) { + p = (const uint8_t *)tocur->key.data; + WT_RET(__wt_vunpack_uint(&p, tocur->key.size, + &tocur->recno)); + } else + tocur->recno = 0; + } else { + firstcg_cur = ((WT_CURSOR_TABLE *)fromcur)->cg_cursors[0]; + keyp = &firstcg_cur->key; + if (WT_CURSOR_RECNO(tocur)) { + WT_ASSERT(session, keyp->size == sizeof(uint64_t)); + tocur->recno = *(uint64_t *)keyp->data; + WT_RET(__curjoin_pack_recno(session, tocur->recno, + cjoin->recno_buf, sizeof(cjoin->recno_buf), + &tocur->key)); + } else { + WT_ITEM_SET(tocur->key, *keyp); + tocur->recno = 0; } - __wt_free(session, entry->ends); - __wt_free(session, entry->repack_format); + idxkey->data = NULL; + idxkey->size = 0; } - - if (cjoin->iter != NULL) - WT_TRET(__curjoin_entry_iter_close_all(cjoin->iter)); - if (cjoin->main != NULL) - WT_TRET(cjoin->main->close(cjoin->main)); - - __wt_free(session, cjoin->entries); - WT_TRET(__wt_cursor_close(cursor)); - -err: API_END_RET(session, ret); + return (0); } /* @@ -1282,52 +1354,6 @@ err: WT_TRET(__curjoin_close(cursor)); return (ret); } -/* - * __curjoin_open_main -- - * For the given index, open the main file with a projection - * that is the index keys. - */ -static int -__curjoin_open_main(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, - WT_CURSOR_JOIN_ENTRY *entry) -{ - WT_DECL_RET; - WT_INDEX *idx; - char *main_uri, *newformat; - const char *raw_cfg[] = { WT_CONFIG_BASE( - session, WT_SESSION_open_cursor), "raw", NULL }; - size_t len, newsize; - - main_uri = NULL; - idx = entry->index; - - newsize = strlen(cjoin->table->name) + idx->colconf.len + 1; - WT_ERR(__wt_calloc(session, 1, newsize, &main_uri)); - snprintf(main_uri, newsize, "%s%.*s", - cjoin->table->name, (int)idx->colconf.len, - idx->colconf.str); - WT_ERR(__wt_open_cursor(session, main_uri, - (WT_CURSOR *)cjoin, raw_cfg, &entry->main)); - if (idx->extractor == NULL) { - /* - * Add no-op padding so trailing 'u' formats are not - * transformed to 'U'. This matches what happens in - * the index. We don't do this when we have an - * extractor, extractors already use the padding - * byte trick. - */ - len = strlen(entry->main->value_format) + 3; - WT_ERR(__wt_calloc(session, len, 1, &newformat)); - snprintf(newformat, len, "%s0x", - entry->main->value_format); - __wt_free(session, entry->main->value_format); - entry->main->value_format = newformat; - } - -err: __wt_free(session, main_uri); - return (ret); -} - /* * __wt_curjoin_join -- * Add a new join to a join cursor. @@ -1511,25 +1537,3 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, } return (0); } - -/* - * __curjoin_insert_endpoint -- - * Insert a new entry into the endpoint array for the join entry. - */ -static int -__curjoin_insert_endpoint(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, - u_int pos, WT_CURSOR_JOIN_ENDPOINT **newendp) -{ - WT_CURSOR_JOIN_ENDPOINT *newend; - - WT_RET(__wt_realloc_def(session, &entry->ends_allocated, - entry->ends_next + 1, &entry->ends)); - newend = &entry->ends[pos]; - memmove(newend + 1, newend, - (entry->ends_next - pos) * sizeof(WT_CURSOR_JOIN_ENDPOINT)); - memset(newend, 0, sizeof(WT_CURSOR_JOIN_ENDPOINT)); - entry->ends_next++; - *newendp = newend; - - return (0); -} -- cgit v1.2.1 From 001d362d8043fed9f11dd978b45bde29a6daac4d Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Fri, 22 Apr 2016 14:29:37 -0400 Subject: WT-2571 whitespace fix. --- src/cursor/cur_join.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/cursor/cur_join.c') diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index d03e53cf5ab..93c1711ef93 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -24,7 +24,7 @@ static int __curjoin_pack_recno(WT_SESSION_IMPL *, uint64_t, uint8_t *, size_t, WT_ITEM *); static int __curjoin_split_key(WT_SESSION_IMPL *, WT_CURSOR_JOIN *, WT_ITEM *, WT_CURSOR *, WT_CURSOR *, const char *, bool); - + #define WT_CURJOIN_ITER_CONSUMED(iter) \ ((iter)->entry_pos >= (iter)->entry_count) -- cgit v1.2.1 From b217c497e38141e8980babd2785c98926867e675 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Thu, 28 Apr 2016 21:16:44 +1000 Subject: WT-2552 Add public API for pluggable filesystems (#2671) * WT-2552 Add public API for pluggable filesystems Not yet compiling. The main parts of this change should be here, but it involved extensive parameter re-organization. There are also a number of layering violations between our existing file system implementations and the WT_FH, that aren't possible with the new structure. There are a number of specific todo comments in the code. One of the main issues is that the in-memory file system had a special close semantic that relied on WiredTiger handle tracking. The in-memory file-system should do it's own tracking of file handles, I've gone part way down that road by adding a queue for closed handles. Need to also add in live handles, and manage the queue as appropriate. I haven't created an example application that uses the new API yet. * WT-2552 Add public API for pluggable filesystems I always forget you have to remove the already-built html files when changing PREDEFINED, add a reminder to the complaint. * WT-2552 Add public API for pluggable filesystems You have to remove the .js files, too. * WT-2552 Add public API for pluggable filesystems Make dist/s_all run cleanly. * WT-2552 Add public API for pluggable filesystems Whitespace. * WT-2552 Add public API for pluggable filesystems Make it compile/build/lint. * WT-2552 Add public API for pluggable filesystems block_write.c: In function '__wt_block_extend': block_write.c:130:71: error: missing terminating ' character [-Werror] * WT-2552 Add public API for pluggable filesystems os_fs_inmemory.c: In function '__im_file_truncate': os_fs_inmemory.c:344:10: error: 'session' is used uninitialized in this function [-Werror=uninitialized] * WT-2552 Add public API for pluggable filesystems os_fs.c: In function '__posix_directory_sync': os_fs.c:92:10: error: 'session' is used uninitialized in this function [-Werror=uninitialized] * WT-2552 Add public API for pluggable filesystems Go back to using bool types in the file-system API, this requires we add to the "standard" wiredtiger.h includes. Consistently use wt_session to represent a WT_SESSION, we were using "wtsession" in some places. Make a pass over the Windows code, but I'm sure it doesn't compile yet. * WT-2552 Add public API for pluggable filesystems Fix up another couple of bool types. * WT-2552 Add public API for pluggable filesystems Move the file naming work out of the underlying filesystem functions, the calls to __wt_filename are now in the upper-level code,n os_fs.i; that means the filesystem code is no longer responsible for figuring out paths. This is cleaner, although the directory-sync call is a bit of a kluge, and I've commimtted us to handling NULL filesystem methods. With this set of changes, in-memory runs again. More Windows naming fixes. * WT-2552 Add public API for pluggable filesystems os_fs.c: In function '__posix_directory_sync': os_fs.c:96:3: error: label 'err' used but not defined * WT-2552 Add public API for pluggable filesystems Pull out another call to __wt_filename() from the filesystem-dependent code. * WT-2552 Add public API for pluggable filesystems Consistently check for missing file-system methods when doing file-system calls. Other minor lint & cleanup. * WT-2552 Add public API for pluggable filesystems Change the in-memory code to maintain a complete list of the files it has ever opened, and depend on that list instead of reaching up into the common layer for the WT_FH handle list. This means __wt_handle_search is only used by the common WT_FH handle code, simplify it, and add a __wt_handle_is_open function that can be called for diagnostic purposes (to check for open files that are being renamed or removed, for example). * Fix comiler warning and ignore the file system API in Java * Flesh out the example file system implementation. * Add in some plumbing for set_file_system in wiredtiger_open. * WT-2552 Add public API for pluggable filesystems Whitespace. * WT-2552 Add public API for pluggable filesystems WT_CONFIG_ITEM.val isn't a boolean, don't use boolean types in equal/not-equal comparisons. * WT-2552 Add public API for pluggable filesystems Remove unused #includes. Increment/decrement the DEMO_FILE_SYSTEM.{opened,closed}_file_count. Allocate demo structures, they're larger than the underlying structures. Swap the number/size calloc arguments, number comes first. Fix a couple of statics. * WT-2552 Add public API for pluggable filesystems Use %u instead of casting to %d. * WT-2552 Add public API for pluggable filesystems Add ex_file_system.c to the list of example programs. * WT-2552 Add public API for pluggable filesystems Change ex_file_system.c to not require : strip down a copy of FreeBSD's for local inclusion, rewrite a few other minor pieces of code. * WT-2552 Add public API for pluggable filesystems Update spell check info * WT-2552 Add public API for pluggable filesystems __conn_load_extensions() shouldn't set the "early" boolean to true. * WT-2552 Add public API for pluggable filesystems Don't indirect through a NULL pointer if "local" was set and no path was specified, always set the name to something useful. * WT-2552 Add public API for pluggable filesystems Don't indirect through a NULL pointer if "local" was set and no path was specified, always set the name to something useful. * WT-2552 Add public API for pluggable filesystems wt_off_t vs. size_t conversion lint. * WT-2552 Add public API for pluggable filesystems Add -rdynamic to the load for ex_file_system, the main executable symbols are not exported by default. * WT-2552 Add public API for pluggable filesystems The underlying handle name includes the enclosing directory, compare against the WT_FH.name field instead. * WT-2552 Add public API for pluggable filesystems demo_fs_rename should return 0 if successful, simplify error handling Don't bother casting arguments to free(), it's not necessary. * WT-2552 Add public API for pluggable filesystems General WT_FILE_SYSTEM cleanup. Move OS initialization into the wiredtiger_open() code (the os_common/os_init.c file is no longer needed). Allow early-load extensions to be part of the environment settings, matching the "in-memory" and "readonly" configurations. Syntax check the set of a file-system, remove tests for NULL methods in the file-system structure unless it's legal for them to be NULL. Windows, POSIX and in-memory file systems now set WT_FILE_SYSTEM.terminate, call that function to cleanup when discarding a WT_CONNECTION. Export file-type and open-flags constants for WT_FILE_SYSTEM.open_file, sort the WT_FILE_SYSTEM methods, do an editing pass. Change the WT_FILE_HANDLE type from (const char *) to (char *), it's "owned" by the underlying layer, and it's simpler that way. Minor (untested) cleanup of the Windows WT_FILE_SYSTEM.open-file method. * WT-2552 Add public API for pluggable filesystems Export the advise argument #defines for the WT_FILE_HANDLE.fadvise method. Sort the WT_FILE_HANDLE methods. * WT-2552 Add public API for pluggable filesystems Clean up and simplify WT_FILE_SYSTEM/WT_FILE_HANDLE documentation's description of the handles. * WT-2552 Add public API for pluggable filesystems WT_FILE_HANDLE.close is a required function (at the least, it has to free the memory). WT_FILE_HANDLE.fadvise isn't a required function, if it's not configured, don't call it. * WT-2552 Add public API for pluggable filesystems The WT_FILE_HANDLE.lock function is required. Change the __wt_open() signature to match WT_FILE_SYSTEM.open_file(). * WT-2552 Add public API for pluggable filesystems Rework all of the WT_FILE_HANDLE mapped region methods to be optional. * WT-2552 Add public API for pluggable filesystems The WT_FILE_HANDLE.{read,size} methods are required. The WT_FILE_HANDLE.sync method is not required. Split the WT_FILE_HANDLE.sync method into .sync and .sync_nowait versions, it makes the upper-level code simpler (Windows supports .sync but doesn't support .sync_nowait). * WT-2552 Add public API for pluggable filesystems The WT_FILE_HANDLE.{truncate,write} methods are required IFF the file is not readonly. * WT-2552 Add public API for pluggable filesystems POSIX shouldn't declare a no-sync handle function unless the sync_file_range system call is available. * WT-2552 Add public API for pluggable filesystems Typo, missing semi-colon. * Fix a bug in ex_file_system.c * Fix a memory leak in posix file handle implementation * WT-2552 Use the correct flags when opening backup file. * WT-2552 Add public API for pluggable filesystems Simplify open-file error handling by calling the close function on the handle, that way we won't forget to free all of the applicable memory allocations. * WT-2552 Add public API for pluggable filesystems Simplify the directory-list method, don't pass in an include/exclude file, if prefix is non-NULL, it implies we only want files matching the prefix. * WT-2552 Add public API for pluggable filesystems Replace WT_FILE_HANDLE_POSIX.fallocate_{available,requires_locking} wiht WT_FILE_HANDLE.fallocate and WT_FILE_HANDLE.fallocate_nolock. Example code doesn't need to set WT_FILE_HANDLE methods to NULL, the allocation does that. Free the I/O buffer if open-handle allocation fails in the example code. Remove snippets for WT_FILE_SYSTEM and WT_FILE_HANDLE methods, we're not going to provide example code for them. * WT-2552 Add public API for pluggable filesystems Document we expect either ENOTSUP or EBUSY from optionally supported APIs. Review/cleanups ENOTSUP/EBUSY returns from optionally supported APIs. Make WT_FILE_HANDLE.lock optional. Don't configure or call the POSIX fadvise function on files configured for direct I/O. Rename __wt_filesize_name to __wt_size for consistency. Update the spelling list. * WT-2552 Add public API for pluggable filesystems WT_FILE_HANDLE.truncate requires locking in all known implementations, document it is not called concurrently with other operations. * WT-2552 Add public API for pluggable filesystems Don't terminate the filesystem unless we've actually configured one. * WT-2552 Add public API for pluggable filesystems Remove WT_FILE_SYSTEM and WT_FILE_HANDLE from SWIG so the test suite can pass again. * WT-2552 Add public API for pluggable filesystems Merge __conn_load_early_extensions() and __conn_load_extensions(). Fix a problem where I moved the early extensions load to where it could include the WiredTiger environment variable, but I didn't pass the built cfg into the function. * WT-2552 Add public API for pluggable filesystems Linux build typo. * WT-2552 Add public API for pluggable filesystems Get rid of the "bool silent" argument to WT_FILE_SYSTEM.size by testing for the file's existence before requesting the size (an extra system call, but guaranteed to hit in the buffer cache at least). * WT-2552 Add public API for pluggable filesystems Naming consistency pass over the WT_FILE_SYSTEM functions. * WT-2552 Add public API for pluggable filesystems Fix a spin lock mismatch. * WT-2552 Add public API for pluggable filesystems Another spinlock mismatch. * Update example pluggable file system. Add a directory list implementation to the example, which uncovered an issue with the API. The directory list API allocates memory that is freed by WiredTiger, which I don't think is kosher. * Change file-directory-sync to use reguar fsync. The distinction in os_fs.i doesn't work with the filesystem API. Also add directory_sync application to the example application. * WT-2552 Add public API for pluggable filesystems Whitespace. * WT-2552 Add public API for pluggable filesystems Rewrite __wt_free to not evaluate macro arguments multiple times. * WT-2552 Add public API for pluggable filesystems Simplify the directory-list functions: __wt_realloc_def() already handles scaling the size of the allocations, there's no need to involve a separate constant that increments the allocation size. * WT-2552 Add public API for pluggable filesystems Fix a grouping problem in a realloc call, we need to multiple the size times the previously allocated slots + 10. Fix buffer overrun, if "count" has already been incremented, the memset would skip clearing the first slot and clear one slot past the end of the buffer. Remove a comment, realloc requires clearing allocated memory, it's not paranoia. * WT-2552 Add public API for pluggable filesystems Add the mapping-cookie argument to the map-preload and map-discard functions. Change page-discard to stop reaching down through the block manager, instead, provide a block-manager map-discard function that does the work. * WT-2552 Add public API for pluggable filesystems Require a directory-list function. Implement a directory-list function for the in-memory filesystem. Consistency pass, make all the directory-list functions look the same. * WT-2552 Add public API for pluggable filesystems The WT_FILE_SYSTEM.{directory_sync, remove, rename} methods are not required for read-only systems. * WT-2552 Add public API for pluggable filesystems Change the WT_FILE_SYSTEM.open_file file_type argument from a set of constants to an enum. This requires changing how we store connection direct I/O configuration (the constants used to be flags stored in the WT_CONNECTION_IMPL), and requiring all callers of __wt_open() do their own work to figure out if WT_OPEN_DIRECTIO should be specified. * WT-2552 Add public API for pluggable filesystems Make no guarantees WT_FILE_SYSTEM and WT_FILE_HANDLE methods are not called concurrently (except for WT_FILE_HANDLE::fallocate and WT_FILE_HANDLE::fallocate_nolock). Rewrite the in-memory FS code to lock across all methods (for example, WT_FILE_HANDLE.close), that means including a reference to the enclosing WT_FILE_SYSTEM in the WT_FILE_HANDLE structure so we can find a lock without using the WT_CONNECTION_IMPL structure. * WT-2552 Add public API for pluggable filesystems Remove __wt_directory_sync_fh, it's no longer useful. * WT-2552 Add public API for pluggable filesystems Rename WT_INMEMORY_FILE_SYSTEM to WT_FILE_SYSTEM_INMEM, matching WT_FILE_HANDLE_INMEM. * WT-2552 Add public API for pluggable filesystems Add WT_FILE_SYSTEM.directory_list_free, to free memory allocated by WT_FILE_SYSTEM.direct_list. Fix a memory leak in __log_archive_once (if __wt_readlock failed, we leaked the directory-list memory). * WT-2552 Add public API for pluggable filesystems Typo, check WT_DIRECT_IO_LOG, not WT_DIRECT_IO_CHECKPOINT. * WT-2552 Add public API for pluggable filesystems Typo, unreachable code. * WT-2552 Add public API for pluggable filesystems We don't require WT_FILE_SYSTEM.{remove,rename} if the system is read-only. * Fix Windows build with pluggable file system. Involved removing u_int from the public API. * Fix line wrapping. * Fix Windows terminate function. * Forgot something in my last commit. * Fix Windows munmap bug. * Add new example to Windows build. Extend example to be more complete. * Fix example loading on Windows * Update documentation * Add missing spell words * Remove old comment. --- src/cursor/cur_join.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/cursor/cur_join.c') diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 93c1711ef93..8bf7007527b 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -211,9 +211,8 @@ err: __wt_free(session, uri); /* * __curjoin_iter_bump -- - * Called to advance the iterator to the next endpoint, - * which may in turn advance to the next entry. - * + * Called to advance the iterator to the next endpoint, which may in turn + * advance to the next entry. */ static int __curjoin_iter_bump(WT_CURSOR_JOIN_ITER *iter) -- cgit v1.2.1 From 92497038c06e5ef7e640434fe2b4a1166f644174 Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Mon, 2 May 2016 11:36:37 -0400 Subject: WT-2592 Fix joins for the non-recno, non-raw case (#2705) WT-2592 Fix joins for the non-recno, non-raw case. Also detect a failed search as the last step of the WT_CURSOR::next() operation, and distinguish it from WT_NOTFOUND. --- src/cursor/cur_join.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'src/cursor/cur_join.c') diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 8bf7007527b..675d4851b38 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -1076,7 +1076,6 @@ __curjoin_next(WT_CURSOR *cursor) WT_CURSOR_JOIN_ITER *iter; WT_DECL_RET; WT_SESSION_IMPL *session; - const uint8_t *p; int tret; cjoin = (WT_CURSOR_JOIN *)cursor; @@ -1104,19 +1103,17 @@ __curjoin_next(WT_CURSOR *cursor) if (ret == 0) { /* - * Position the 'main' cursor, this will be used to - * retrieve values from the cursor join. + * Position the 'main' cursor, this will be used to retrieve + * values from the cursor join. The key we have is raw, but + * the main cursor may not be raw. */ c = cjoin->main; - if (WT_CURSOR_RECNO(cursor) && - !F_ISSET(cursor, WT_CURSTD_RAW)) { - p = (const uint8_t *)iter->curkey->data; - WT_ERR(__wt_vunpack_uint(&p, iter->curkey->size, - &cjoin->iface.recno)); - c->set_key(c, cjoin->iface.recno); - } else - c->set_key(c, iter->curkey); - WT_ERR(c->search(c)); + __wt_cursor_set_raw_key(c, iter->curkey); + + /* A failed search is not expected, don't return WT_NOTFOUND. */ + if ((ret = c->search(c)) == WT_NOTFOUND) + ret = WT_ERROR; + WT_ERR(ret); F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); } else if (ret == WT_NOTFOUND && (tret = __curjoin_iter_close_all(iter)) != 0) -- cgit v1.2.1 From 6ef4c0c631514916c8b4bd7aed12913647044626 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 3 May 2016 12:00:07 -0400 Subject: WT-2609: Incorrect "skips API_END call" error. (#2713) A comment in cursor/cur_join.c contains "return WT_NOTFOUND", which is triggering a complaint from the scripts that check for returns that skip error labels. --- src/cursor/cur_join.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/cursor/cur_join.c') diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 675d4851b38..fd7de53c981 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -1110,10 +1110,14 @@ __curjoin_next(WT_CURSOR *cursor) c = cjoin->main; __wt_cursor_set_raw_key(c, iter->curkey); - /* A failed search is not expected, don't return WT_NOTFOUND. */ + /* + * A failed search is not expected, convert WT_NOTFOUND into a + * generic error. + */ if ((ret = c->search(c)) == WT_NOTFOUND) ret = WT_ERROR; WT_ERR(ret); + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); } else if (ret == WT_NOTFOUND && (tret = __curjoin_iter_close_all(iter)) != 0) -- cgit v1.2.1