summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDon Anderson <dda@mongodb.com>2017-02-12 20:13:24 -0500
committerMichael Cahill <michael.cahill@mongodb.com>2017-02-13 12:13:24 +1100
commita8fe04026ef55b8f59df24ff75ae151c7c370e2a (patch)
treec55934d6410cba13d98e56b03832cd215e6f402c
parent7f5d0f9981214c723f2ed90cf4533887ed406176 (diff)
downloadmongo-a8fe04026ef55b8f59df24ff75ae151c7c370e2a.tar.gz
WT-3135 WT-3159 Fix search_near() with custom collators for index keys of variable length. (#3254)
* For checkpoint logging, use a format that ends in 'u' to be compatible with previously created log files. In previous WT versions, these formats end in 'U', and a final 'U' does have a prefixed size. Now, a 'U' in any position has a prefixed size.
-rw-r--r--dist/s_string.ok1
-rwxr-xr-xdist/s_void4
-rw-r--r--src/cursor/cur_index.c25
-rw-r--r--src/include/packing.i7
-rw-r--r--src/txn/txn_log.c4
-rw-r--r--test/csuite/Makefile.am3
-rw-r--r--test/csuite/wt3135_search_near_collator/main.c360
7 files changed, 398 insertions, 6 deletions
diff --git a/dist/s_string.ok b/dist/s_string.ok
index bb0cacd9d5d..d2e9dffaa48 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -1182,6 +1182,7 @@ txt
typedef
uB
uS
+ui
uint
uintmax
unbare
diff --git a/dist/s_void b/dist/s_void
index 4a6b4ad91a2..947153e730b 100755
--- a/dist/s_void
+++ b/dist/s_void
@@ -87,6 +87,10 @@ func_ok()
-e '/int handle_progress$/d' \
-e '/int helium_cursor_reset$/d' \
-e '/int helium_session_verify$/d' \
+ -e '/int index_compare_primary$/d' \
+ -e '/int index_compare_S$/d' \
+ -e '/int index_compare_u$/d' \
+ -e '/int index_extractor_u$/d' \
-e '/int log_print_err$/d' \
-e '/int lz4_error$/d' \
-e '/int lz4_pre_size$/d' \
diff --git a/src/cursor/cur_index.c b/src/cursor/cur_index.c
index 4786b0524bc..13180efdea4 100644
--- a/src/cursor/cur_index.c
+++ b/src/cursor/cur_index.c
@@ -240,7 +240,16 @@ __curindex_search(WT_CURSOR *cursor)
found_key = child->key;
if (found_key.size < cursor->key.size)
WT_ERR(WT_NOTFOUND);
- found_key.size = cursor->key.size;
+
+ /*
+ * Custom collators expect to see complete keys, pass an item containing
+ * all the visible fields so it unpacks correctly.
+ */
+ if (cindex->index->collator != NULL)
+ WT_ERR(__wt_struct_repack(session, child->key_format,
+ cindex->iface.key_format, &child->key, &found_key));
+ else
+ found_key.size = cursor->key.size;
WT_ERR(__wt_compare(
session, cindex->index->collator, &cursor->key, &found_key, &cmp));
@@ -307,8 +316,18 @@ __curindex_search_near(WT_CURSOR *cursor, int *exact)
* so we flip the sign of the result to match what callers expect.
*/
found_key = child->key;
- if (found_key.size > cursor->key.size)
- found_key.size = cursor->key.size;
+ if (found_key.size > cursor->key.size) {
+ /*
+ * Custom collators expect to see complete keys, pass an item
+ * containing all the visible fields so it unpacks correctly.
+ */
+ if (cindex->index->collator != NULL)
+ WT_ERR(__wt_struct_repack(session,
+ cindex->child->key_format, cindex->iface.key_format,
+ &child->key, &found_key));
+ else
+ found_key.size = cursor->key.size;
+ }
WT_ERR(__wt_compare(
session, cindex->index->collator, &cursor->key, &found_key, exact));
diff --git a/src/include/packing.i b/src/include/packing.i
index 17ca261bcfc..8ba3dd536ac 100644
--- a/src/include/packing.i
+++ b/src/include/packing.i
@@ -168,10 +168,15 @@ next: if (pack->cur == pack->end)
(int)(pack->end - pack->orig), pack->orig);
return (0);
case 'u':
- case 'U':
/* Special case for items with a size prefix. */
pv->type = (!pv->havesize && *pack->cur != '\0') ? 'U' : 'u';
return (0);
+ case 'U':
+ /*
+ * Don't change the type. 'U' is used internally, so this type
+ * was already changed to explicitly include the size.
+ */
+ return (0);
case 'b':
case 'h':
case 'i':
diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c
index 7ad295f421b..2931dc1ce82 100644
--- a/src/txn/txn_log.c
+++ b/src/txn/txn_log.c
@@ -269,7 +269,7 @@ __wt_txn_checkpoint_logread(WT_SESSION_IMPL *session,
WT_ITEM ckpt_snapshot_unused;
uint32_t ckpt_file, ckpt_offset;
u_int ckpt_nsnapshot_unused;
- const char *fmt = WT_UNCHECKED_STRING(IIIU);
+ const char *fmt = WT_UNCHECKED_STRING(IIIu);
if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
&ckpt_file, &ckpt_offset,
@@ -297,7 +297,7 @@ __wt_txn_checkpoint_log(
uint8_t *end, *p;
size_t recsize;
uint32_t i, rectype = WT_LOGREC_CHECKPOINT;
- const char *fmt = WT_UNCHECKED_STRING(IIIIU);
+ const char *fmt = WT_UNCHECKED_STRING(IIIIu);
txn = &session->txn;
ckpt_lsn = &txn->ckpt_lsn;
diff --git a/test/csuite/Makefile.am b/test/csuite/Makefile.am
index bcdbf120d67..5167b42b433 100644
--- a/test/csuite/Makefile.am
+++ b/test/csuite/Makefile.am
@@ -43,6 +43,9 @@ noinst_PROGRAMS += test_wt2999_join_extractor
test_wt3120_filesys_SOURCES = wt3120_filesys/main.c
noinst_PROGRAMS += test_wt3120_filesys
+test_wt3135_search_near_collator_SOURCES = wt3135_search_near_collator/main.c
+noinst_PROGRAMS += test_wt3135_search_near_collator
+
# Run this during a "make check" smoke test.
TESTS = $(noinst_PROGRAMS)
LOG_COMPILER = $(TEST_WRAPPER)
diff --git a/test/csuite/wt3135_search_near_collator/main.c b/test/csuite/wt3135_search_near_collator/main.c
new file mode 100644
index 00000000000..3113d29dfa9
--- /dev/null
+++ b/test/csuite/wt3135_search_near_collator/main.c
@@ -0,0 +1,360 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "test_util.h"
+
+/*
+ * JIRA ticket reference: WT-3135
+ * Test case description: Each set of data is ordered and contains
+ * five elements (0-4). We insert elements 1 and 3, and then do
+ * search_near and search for each element. For each set of data, we perform
+ * these tests first using a custom collator, and second using a custom collator
+ * and extractor. In each case there are index keys having variable length.
+ * Failure mode: In the reported test case, the custom compare routine is
+ * given a truncated key to compare, and the unpack functions return errors
+ * because the truncation appeared in the middle of a key.
+ */
+
+#define TEST_ENTRY_COUNT 5
+typedef const char *TEST_SET[TEST_ENTRY_COUNT];
+static TEST_SET test_sets[] = {
+ { "0", "01", "012", "0123", "01234" },
+ { "A", "B", "C", "D", "E" },
+ { "5", "54", "543", "5432", "54321" },
+ { "54321", "5433", "544", "55", "6" }
+};
+#define TEST_SET_COUNT (sizeof(test_sets) / sizeof(test_sets[0]))
+
+static bool
+item_str_equal(WT_ITEM *item, const char *str)
+{
+ return (item->size == strlen(str) + 1 && strncmp((char *)item->data,
+ str, item->size) == 0);
+}
+
+static int
+compare_int(int a, int b)
+{
+ return (a < b ? -1 : (a > b ? 1 : 0));
+}
+
+static int
+index_compare_primary(WT_PACK_STREAM *s1, WT_PACK_STREAM *s2, int *cmp)
+{
+ int64_t pkey1, pkey2;
+ int rc1, rc2;
+
+ rc1 = wiredtiger_unpack_int(s1, &pkey1);
+ rc2 = wiredtiger_unpack_int(s2, &pkey2);
+
+ if (rc1 == 0 && rc2 == 0)
+ *cmp = compare_int(pkey1, pkey2);
+ else if (rc1 != 0 && rc2 != 0)
+ *cmp = 0;
+ else if (rc1 != 0)
+ *cmp = -1;
+ else
+ *cmp = 1;
+ return (0);
+}
+
+static int
+index_compare_S(WT_COLLATOR *collator, WT_SESSION *session,
+ const WT_ITEM *key1, const WT_ITEM *key2, int *cmp)
+{
+ WT_PACK_STREAM *s1, *s2;
+ const char *skey1, *skey2;
+
+ (void)collator;
+
+ testutil_check(wiredtiger_unpack_start(session, "Si", key1->data,
+ key1->size, &s1));
+ testutil_check(wiredtiger_unpack_start(session, "Si", key2->data,
+ key2->size, &s2));
+
+ testutil_check(wiredtiger_unpack_str(s1, &skey1));
+ testutil_check(wiredtiger_unpack_str(s2, &skey2));
+
+ if ((*cmp = strcmp(skey1, skey2)) == 0)
+ testutil_check(index_compare_primary(s1, s2, cmp));
+
+ testutil_check(wiredtiger_pack_close(s1, NULL));
+ testutil_check(wiredtiger_pack_close(s2, NULL));
+
+ return (0);
+}
+
+static int
+index_compare_u(WT_COLLATOR *collator, WT_SESSION *session,
+ const WT_ITEM *key1, const WT_ITEM *key2, int *cmp)
+{
+ WT_ITEM skey1, skey2;
+ WT_PACK_STREAM *s1, *s2;
+
+ (void)collator;
+
+ testutil_check(wiredtiger_unpack_start(session, "ui", key1->data,
+ key1->size, &s1));
+ testutil_check(wiredtiger_unpack_start(session, "ui", key2->data,
+ key2->size, &s2));
+
+ testutil_check(wiredtiger_unpack_item(s1, &skey1));
+ testutil_check(wiredtiger_unpack_item(s2, &skey2));
+
+ if ((*cmp = strcmp(skey1.data, skey2.data)) == 0)
+ testutil_check(index_compare_primary(s1, s2, cmp));
+
+ testutil_check(wiredtiger_pack_close(s1, NULL));
+ testutil_check(wiredtiger_pack_close(s2, NULL));
+
+ return (0);
+}
+
+static int
+index_extractor_u(WT_EXTRACTOR *extractor, WT_SESSION *session,
+ const WT_ITEM *key, const WT_ITEM *value, WT_CURSOR *result_cursor)
+{
+ (void)extractor;
+ (void)session;
+ (void)key;
+
+ result_cursor->set_key(result_cursor, value);
+ return result_cursor->insert(result_cursor);
+}
+
+static WT_COLLATOR collator_S = { index_compare_S, NULL, NULL };
+static WT_COLLATOR collator_u = { index_compare_u, NULL, NULL };
+static WT_EXTRACTOR extractor_u = { index_extractor_u, NULL, NULL };
+
+/*
+ * Check search() and search_near() using the test string indicated
+ * by test_index.
+ */
+static void
+search_using_str(WT_CURSOR *cursor, TEST_SET test_set, int test_index)
+{
+ int exact, ret;
+ const char *result;
+ const char *str_01, *str_0123, *test_str;
+
+ testutil_assert(test_index >= 0 && test_index <= 4);
+ str_01 = test_set[1];
+ str_0123 = test_set[3];
+ test_str = test_set[test_index];
+
+ cursor->set_key(cursor, test_str);
+ testutil_check(cursor->search_near(cursor, &exact));
+ testutil_check(cursor->get_key(cursor, &result));
+
+ if (test_index == 0)
+ testutil_assert(strcmp(result, str_01) == 0 && exact > 0);
+ else if (test_index == 1)
+ testutil_assert(strcmp(result, str_01) == 0 && exact == 0);
+ else if (test_index == 2)
+ testutil_assert((strcmp(result, str_0123) == 0 && exact > 0) ||
+ (strcmp(result, str_01) == 0 && exact < 0));
+ else if (test_index == 3)
+ testutil_assert(strcmp(result, str_0123) == 0 && exact == 0);
+ else if (test_index == 4)
+ testutil_assert(strcmp(result, str_0123) == 0 && exact < 0);
+
+ cursor->set_key(cursor, test_str);
+ ret = cursor->search(cursor);
+
+ if (test_index == 0 || test_index == 2 || test_index == 4)
+ testutil_assert(ret == WT_NOTFOUND);
+ else if (test_index == 1 || test_index == 3)
+ testutil_assert(ret == 0);
+}
+
+/*
+ * Check search() and search_near() using the test string indicated
+ * by test_index against a table containing a variable sized item.
+ */
+static void
+search_using_item(WT_CURSOR *cursor, TEST_SET test_set, int test_index)
+{
+ WT_ITEM item;
+ size_t testlen;
+ int exact, ret;
+ const char *str_01, *str_0123, *test_str;
+
+ testutil_assert(test_index >= 0 && test_index <= 4);
+ str_01 = test_set[1];
+ str_0123 = test_set[3];
+ test_str = test_set[test_index];
+
+ testlen = strlen(test_str) + 1;
+ item.data = test_str;
+ item.size = testlen;
+ cursor->set_key(cursor, &item);
+ testutil_check(cursor->search_near(cursor, &exact));
+ testutil_check(cursor->get_key(cursor, &item));
+
+ if (test_index == 0)
+ testutil_assert(item_str_equal(&item, str_01) && exact > 0);
+ else if (test_index == 1)
+ testutil_assert(item_str_equal(&item, str_01) && exact == 0);
+ else if (test_index == 2)
+ testutil_assert((item_str_equal(&item, str_0123) && exact > 0)
+ || (item_str_equal(&item, str_01) && exact < 0));
+ else if (test_index == 3)
+ testutil_assert(item_str_equal(&item, str_0123) && exact == 0);
+ else if (test_index == 4)
+ testutil_assert(item_str_equal(&item, str_0123) && exact < 0);
+
+ item.data = test_str;
+ item.size = testlen;
+ cursor->set_key(cursor, &item);
+ ret = cursor->search(cursor);
+
+ if (test_index == 0 || test_index == 2 || test_index == 4)
+ testutil_assert(ret == WT_NOTFOUND);
+ else if (test_index == 1 || test_index == 3)
+ testutil_assert(ret == 0);
+}
+
+/*
+ * For each set of data, perform tests.
+ */
+static void
+test_one_set(WT_SESSION *session, TEST_SET set)
+{
+ WT_CURSOR *cursor;
+ WT_ITEM item;
+ int32_t i;
+
+ /*
+ * Part 1: Using a custom collator, insert some elements
+ * and verify results from search_near.
+ */
+
+ testutil_check(session->create(session,
+ "table:main", "key_format=i,value_format=S,columns=(k,v)"));
+ testutil_check(session->create(session,
+ "index:main:def_collator", "columns=(v)"));
+ testutil_check(session->create(session,
+ "index:main:custom_collator",
+ "columns=(v),collator=collator_S"));
+
+ /* Insert only elements #1 and #3. */
+ testutil_check(session->open_cursor(session,
+ "table:main", NULL, NULL, &cursor));
+ cursor->set_key(cursor, 0);
+ cursor->set_value(cursor, set[1]);
+ testutil_check(cursor->insert(cursor));
+ cursor->set_key(cursor, 1);
+ cursor->set_value(cursor, set[3]);
+ testutil_check(cursor->insert(cursor));
+ testutil_check(cursor->close(cursor));
+
+ /* Check all elements in def_collator index. */
+ testutil_check(session->open_cursor(session,
+ "index:main:def_collator", NULL, NULL, &cursor));
+ for (i = 0; i < (int32_t)TEST_ENTRY_COUNT; i++)
+ search_using_str(cursor, set, i);
+ testutil_check(cursor->close(cursor));
+
+ /* Check all elements in custom_collator index */
+ testutil_check(session->open_cursor(session,
+ "index:main:custom_collator", NULL, NULL, &cursor));
+ for (i = 0; i < (int32_t)TEST_ENTRY_COUNT; i++)
+ search_using_str(cursor, set, i);
+ testutil_check(cursor->close(cursor));
+
+ /*
+ * Part 2: perform the same checks using a custom collator and
+ * extractor.
+ */
+ testutil_check(session->create(session,
+ "table:main2", "key_format=i,value_format=u,columns=(k,v)"));
+
+ testutil_check(session->create(session, "index:main2:idx_w_coll",
+ "key_format=u,collator=collator_u,extractor=extractor_u"));
+
+ testutil_check(session->open_cursor(session,
+ "table:main2", NULL, NULL, &cursor));
+
+ memset(&item, 0, sizeof(item));
+ item.size = strlen(set[1]) + 1;
+ item.data = set[1];
+ cursor->set_key(cursor, 1);
+ cursor->set_value(cursor, &item);
+ testutil_check(cursor->insert(cursor));
+
+ item.size = strlen(set[3]) + 1;
+ item.data = set[3];
+ cursor->set_key(cursor, 3);
+ cursor->set_value(cursor, &item);
+ testutil_check(cursor->insert(cursor));
+
+ testutil_check(cursor->close(cursor));
+
+ testutil_check(session->open_cursor(session,
+ "index:main2:idx_w_coll", NULL, NULL, &cursor));
+ for (i = 0; i < (int32_t)TEST_ENTRY_COUNT; i++)
+ search_using_item(cursor, set, i);
+ testutil_check(cursor->close(cursor));
+
+ testutil_check(session->drop(session, "table:main", NULL));
+ testutil_check(session->drop(session, "table:main2", NULL));
+}
+
+int
+main(int argc, char *argv[])
+{
+ TEST_OPTS *opts, _opts;
+ WT_SESSION *session;
+ int32_t i;
+
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ testutil_check(wiredtiger_open(opts->home, NULL, "create",
+ &opts->conn));
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ /* Add any collators and extractors used by tests */
+ testutil_check(opts->conn->add_collator(opts->conn, "collator_S",
+ &collator_S, NULL));
+ testutil_check(opts->conn->add_collator(opts->conn, "collator_u",
+ &collator_u, NULL));
+ testutil_check(opts->conn->add_extractor(opts->conn, "extractor_u",
+ &extractor_u, NULL));
+
+ for (i = 0; i < (int32_t)TEST_SET_COUNT; i++) {
+ printf("test set %d\n", i);
+ test_one_set(session, test_sets[i]);
+ }
+
+ testutil_check(session->close(session, NULL));
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
+}