Merge pull request #2592 from wiredtiger/wt-2490

WT-2490: search_near() returns wrong key for column-store
author: Michael Cahill <michael.cahill@mongodb.com> 2016-03-17 16:47:51 +1100
committer: Michael Cahill <michael.cahill@mongodb.com> 2016-03-17 16:47:51 +1100
commit: d5a67f2c0f84527a8db92ad17204be02f314366b (patch)
tree: 011ccea0cb627611f3b23e3322811d5bb3a8fae4
parent: 019fb827cd32acba72ec1b01546bd685e6641a5d (diff)
parent: d870b48d42135f0a6ce06a8a174126bb2f55077d (diff)
download: mongo-d5a67f2c0f84527a8db92ad17204be02f314366b.tar.gz
5 files changed, 218 insertions, 74 deletions
diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c
index d64b6064268..1f3ac443495 100644
--- a/src/btree/bt_cursor.c
+++ b/src/btree/bt_cursor.c
@@ -173,13 +173,18 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
 		 */
 		break;
 	case BTREE_COL_VAR:
+		/* The search function doesn't check for empty pages. */
+		if (page->pg_var_entries == 0)
+			return (false);
+		WT_ASSERT(session, cbt->slot < page->pg_var_entries);
+
 		/*
-		 * If search returned an insert object, there may or may not be
-		 * a matching on-page object, we have to check.  Variable-length
-		 * column-store pages don't map one-to-one to keys, but have
-		 * "slots", check if search returned a valid slot.
+		 * Column-store updates aren't stored on the page, instead they
+		 * are stored as "insert" objects. If search returned an insert
+		 * object we can't return, the returned on-page object must be
+		 * checked for a match.
 		 */
-		if (cbt->slot >= page->pg_var_entries)
+		if (cbt->ins != NULL && !F_ISSET(cbt, WT_CBT_VAR_ONPAGE_MATCH))
 			return (false);
 
 		/*
@@ -194,6 +199,11 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
 			return (false);
 		break;
 	case BTREE_ROW:
+		/* The search function doesn't check for empty pages. */
+		if (page->pg_row_entries == 0)
+			return (false);
+		WT_ASSERT(session, cbt->slot < page->pg_row_entries);
+
 		/*
 		 * See above: for row-store, no insert object can have the same
 		 * key as an on-page object, we're done.
@@ -201,15 +211,6 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
 		if (cbt->ins != NULL)
 			return (false);
 
-		/*
-		 * Check if searched returned a valid slot (the failure mode is
-		 * an empty page, the search function doesn't check, and so the
-		 * more exact test is "page->pg_row_entries == 0", but this test
-		 * mirrors the column-store test).
-		 */
-		if (cbt->slot >= page->pg_row_entries)
-			return (false);
-
 		/* Updates are stored on the page, check for a delete. */
 		if (page->pg_row_upd != NULL && (upd = __wt_txn_read(
 		    session, page->pg_row_upd[cbt->slot])) != NULL) {
diff --git a/src/btree/col_srch.c b/src/btree/col_srch.c
index 23eae75ec2b..4730267a545 100644
--- a/src/btree/col_srch.c
+++ b/src/btree/col_srch.c
@@ -211,7 +211,6 @@ descend:	/*
 leaf_only:
 	page = current->page;
 	cbt->ref = current;
-	cbt->recno = recno;
 
 	/* 
 	 * Don't bother searching if the caller is appending a new record where
@@ -225,13 +224,6 @@ leaf_only:
 	}
 
 	/*
-	 * Set the on-page slot to an impossible value larger than any possible
-	 * slot (it's used to interpret the search function's return after the
-	 * search returns an insert list for a page that has no entries).
-	 */
-	cbt->slot = UINT32_MAX;
-
-	/*
 	 * Search the leaf page.
 	 *
 	 * Search after a page is pinned does a search of the pinned page before
@@ -244,28 +236,38 @@ leaf_only:
 	 * that's impossibly large for the page. We do have additional setup to
 	 * do in that case, the record may be appended to the page.
 	 */
-	cbt->compare = 0;
 	if (page->type == WT_PAGE_COL_FIX) {
 		if (recno < page->pg_fix_recno) {
+			cbt->recno = page->pg_fix_recno;
 			cbt->compare = 1;
 			return (0);
 		}
 		if (recno >= page->pg_fix_recno + page->pg_fix_entries) {
 			cbt->recno = page->pg_fix_recno + page->pg_fix_entries;
 			goto past_end;
-		} else
+		} else {
+			cbt->recno = recno;
+			cbt->compare = 0;
 			ins_head = WT_COL_UPDATE_SINGLE(page);
+		}
 	} else {
 		if (recno < page->pg_var_recno) {
+			cbt->recno = page->pg_var_recno;
+			cbt->slot = 0;
 			cbt->compare = 1;
 			return (0);
 		}
 		if ((cip = __col_var_search(page, recno, NULL)) == NULL) {
 			cbt->recno = __col_var_last_recno(page);
+			cbt->slot = page->pg_var_entries == 0 ?
+			    0 : page->pg_var_entries - 1;
 			goto past_end;
 		} else {
+			cbt->recno = recno;
 			cbt->slot = WT_COL_SLOT(page, cip);
+			cbt->compare = 0;
 			ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
+			F_SET(cbt, WT_CBT_VAR_ONPAGE_MATCH);
 		}
 	}
 
diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c
index 5f873812c11..38a83217933 100644
--- a/src/cursor/cur_join.c
+++ b/src/cursor/cur_join.c
@@ -113,7 +113,6 @@ __curjoin_split_key(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
 
 	if (isindex) {
 		cindex = ((WT_CURSOR_INDEX *)fromcur);
-		firstcg_cur = cindex->cg_cursors[0];
 		/*
 		 * Repack tells us where the index key ends; advance past
 		 * that to get where the raw primary key starts.
diff --git a/src/include/cursor.h b/src/include/cursor.h
index 7ce0159c608..4b35daf106e 100644
--- a/src/include/cursor.h
+++ b/src/include/cursor.h
@@ -213,10 +213,11 @@ struct __wt_cursor_btree {
 #define	WT_CBT_NO_TXN   	0x10	/* Non-transactional cursor
 					   (e.g. on a checkpoint) */
 #define	WT_CBT_SEARCH_SMALLEST	0x20	/* Row-store: small-key insert list */
+#define	WT_CBT_VAR_ONPAGE_MATCH	0x40	/* Var-store: on-page recno match */
 
 #define	WT_CBT_POSITION_MASK		/* Flags associated with position */ \
 	(WT_CBT_ITERATE_APPEND | WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV | \
-	WT_CBT_SEARCH_SMALLEST)
+	WT_CBT_SEARCH_SMALLEST | WT_CBT_VAR_ONPAGE_MATCH)
 
 	uint8_t flags;
 };
diff --git a/test/suite/test_bug008.py b/test/suite/test_bug008.py
index 8f0526d9cef..0243887e258 100644
--- a/test/suite/test_bug008.py
+++ b/test/suite/test_bug008.py
@@ -33,65 +33,208 @@ import wiredtiger, wttest
 from helper import simple_populate, key_populate, value_populate
 from wtscenario import check_scenarios
 
-# Tests for invisible updates.
+# Test search/search-near operations, including invisible values and keys
+# past the end of the table.
 class test_bug008(wttest.WiredTigerTestCase):
+    uri = 'file:test_bug008'                # This is a btree layer test.
     scenarios = check_scenarios([
-        ('fix', dict(fmt='key_format=r,value_format=8t', empty=1)),
-        ('row', dict(fmt='key_format=S', empty=0)),
-        ('var', dict(fmt='key_format=r', empty=0))
+        ('fix', dict(fmt='key_format=r,value_format=8t', empty=1, colvar=0)),
+        ('row', dict(fmt='key_format=S', empty=0, colvar=0)),
+        ('var', dict(fmt='key_format=r', empty=0, colvar=1))
     ])
 
+    # Verify cursor search and search-near operations in an empty table.
+    def test_search_empty(self):
+        # Create the object and open a cursor.
+        self.session.create(self.uri, self.fmt)
+        cursor = self.session.open_cursor(self.uri, None)
+
+        # Search for a record past the end of the table, which should fail.
+        cursor.set_key(key_populate(cursor, 100))
+        self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND)
+
+        # Search-near for a record past the end of the table, which should fail.
+        cursor.set_key(key_populate(cursor, 100))
+        self.assertEqual(cursor.search_near(), wiredtiger.WT_NOTFOUND)
+
+    # Verify cursor search and search-near operations at and past the end of
+    # a file, with a set of on-page visible records.
+    def test_search_eot(self):
+        # Populate the tree and reopen the connection, forcing it to disk
+        # and moving the records to an on-page format.
+        simple_populate(self, self.uri, self.fmt, 100)
+        self.reopen_conn()
+
+        # Open a cursor.
+        cursor = self.session.open_cursor(self.uri, None)
+
+        # Search for a record at the end of the table, which should succeed.
+        cursor.set_key(key_populate(cursor, 100))
+        self.assertEqual(cursor.search(), 0)
+        self.assertEqual(cursor.get_key(), key_populate(cursor, 100))
+        self.assertEqual(cursor.get_value(), value_populate(cursor, 100))
+
+        # Search-near for a record at the end of the table, which should
+        # succeed, returning the last record.
+        cursor.set_key(key_populate(cursor, 100))
+        self.assertEqual(cursor.search_near(), 0)
+        self.assertEqual(cursor.get_key(), key_populate(cursor, 100))
+        self.assertEqual(cursor.get_value(), value_populate(cursor, 100))
+
+        # Search for a record past the end of the table, which should fail.
+        cursor.set_key(key_populate(cursor, 200))
+        self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND)
+
+        # Search-near for a record past the end of the table, which should
+        # succeed, returning the last record.
+        cursor.set_key(key_populate(cursor, 200))
+        self.assertEqual(cursor.search_near(), -1)
+        self.assertEqual(cursor.get_key(), key_populate(cursor, 100))
+        self.assertEqual(cursor.get_value(), value_populate(cursor, 100))
+
+    # Verify cursor search-near operations before and after a set of
+    # column-store duplicates.
+    def test_search_duplicate(self):
+        if self.colvar == 0:
+                return
+
+        # Populate the tree.
+        simple_populate(self, self.uri, self.fmt, 105)
+
+        # Set up deleted records before and after a set of duplicate records,
+        # and make sure search/search-near returns the correct record.
+        cursor = self.session.open_cursor(self.uri, None)
+        for i in range(20, 100):
+            cursor[key_populate(cursor, i)] = '=== IDENTICAL VALUE ==='
+        for i in range(15, 25):
+            cursor.set_key(key_populate(cursor, i))
+            self.assertEqual(cursor.remove(), 0)
+        for i in range(95, 106):
+            cursor.set_key(key_populate(cursor, i))
+            self.assertEqual(cursor.remove(), 0)
+        cursor.close()
+
+        # Reopen the connection, forcing it to disk and moving the records to
+        # an on-page format.
+        self.reopen_conn()
+
+        # Open a cursor.
+        cursor = self.session.open_cursor(self.uri, None)
+
+        # Search-near for a record in the deleted set before the duplicate set,
+        # which should succeed, returning the first record in the duplicate set.
+        cursor.set_key(key_populate(cursor, 18))
+        self.assertEqual(cursor.search_near(), 1)
+        self.assertEqual(cursor.get_key(), key_populate(cursor, 25))
+
+        # Search-near for a record in the deleted set after the duplicate set,
+        # which should succeed, returning the last record in the duplicate set.
+        cursor.set_key(key_populate(cursor, 98))
+        self.assertEqual(cursor.search_near(), -1)
+        self.assertEqual(cursor.get_key(), key_populate(cursor, 94))
+
     # Verify cursor search and search-near operations on a file with a set of
     # on-page visible records, and a set of insert-list invisible records.
     def test_search_invisible_one(self):
-        uri = 'file:test_bug008'                # This is a btree layer test.
+        # Populate the tree.
+        simple_populate(self, self.uri, self.fmt, 100)
 
-        # Populate the tree and reopen the connection, forcing it to disk
-        # and moving the records to an on-page format.
-        simple_populate(self, uri, self.fmt, 100)
+        # Delete a range of records.
+        for i in range(5, 10):
+            cursor = self.session.open_cursor(self.uri, None)
+            cursor.set_key(key_populate(cursor, i))
+            self.assertEqual(cursor.remove(), 0)
+
+        # Reopen the connection, forcing it to disk and moving the records to
+        # an on-page format.
         self.reopen_conn()
 
-        # Begin a transaction, and add some additional records.
+        # Add updates to the existing records (in both the deleted an undeleted
+        # range), as well as some new records after the end. Put the updates in
+        # a separate transaction so they're invisible to another cursor.
         self.session.begin_transaction()
-        cursor = self.session.open_cursor(uri, None)
+        cursor = self.session.open_cursor(self.uri, None)
+        for i in range(5, 10):
+            cursor[key_populate(cursor, i)] = value_populate(cursor, i + 1000)
+        for i in range(30, 40):
+            cursor[key_populate(cursor, i)] = value_populate(cursor, i + 1000)
         for i in range(100, 140):
-            cursor[key_populate(cursor, i)] = value_populate(cursor, i)
+            cursor[key_populate(cursor, i)] = value_populate(cursor, i + 1000)
 
         # Open a separate session and cursor.
         s = self.conn.open_session()
-        cursor = s.open_cursor(uri, None)
+        cursor = s.open_cursor(self.uri, None)
 
-        # Search for an invisible record.
-        cursor.set_key(key_populate(cursor, 130))
-        if self.empty:
-            # Invisible updates to fixed-length column-store objects are
-            # invisible to the reader, but the fact that they exist past
-            # the end of the initial records causes the instantiation of
-            # empty records: confirm successful return of an empty row.
-            cursor.search()
-            self.assertEqual(cursor.get_key(), 130)
-            self.assertEqual(cursor.get_value(), 0)
-        else:
-            # Otherwise, we should not find any matching records.
-            self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND)
+        # Search for an existing record in the deleted range, should not find
+        # it.
+        for i in range(5, 10):
+            cursor.set_key(key_populate(cursor, i))
+            if self.empty:
+                # Fixed-length column-store rows always exist.
+                self.assertEqual(cursor.search(), 0)
+                self.assertEqual(cursor.get_key(), i)
+                self.assertEqual(cursor.get_value(), 0)
+            else:
+                self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND)
 
-        # Search-near for an invisible record, which should succeed, returning
-        # the last visible record.
-        cursor.set_key(key_populate(cursor, 130))
-        cursor.search_near()
-        if self.empty:
-            # Invisible updates to fixed-length column-store objects are
-            # invisible to the reader, but the fact that they exist past
-            # the end of the initial records causes the instantiation of
-            # empty records: confirm successful return of an empty row.
-            cursor.search()
-            self.assertEqual(cursor.get_key(), 130)
-            self.assertEqual(cursor.get_value(), 0)
-        else:
-            # Otherwise, we should find the closest record for which we can see
-            # the value.
-            self.assertEqual(cursor.get_key(), key_populate(cursor, 100))
-            self.assertEqual(cursor.get_value(), value_populate(cursor, 100))
+        # Search for an existing record in the updated range, should see the
+        # original value.
+        for i in range(30, 40):
+            cursor.set_key(key_populate(cursor, i))
+            self.assertEqual(cursor.search(), 0)
+            self.assertEqual(cursor.get_key(), key_populate(cursor, i))
+
+        # Search for a added record, should not find it.
+        for i in range(120, 130):
+            cursor.set_key(key_populate(cursor, i))
+            if self.empty:
+                # Invisible updates to fixed-length column-store objects are
+                # invisible to the reader, but the fact that they exist past
+                # the end of the initial records causes the instantiation of
+                # empty records: confirm successful return of an empty row.
+                self.assertEqual(cursor.search(), 0)
+                self.assertEqual(cursor.get_key(), i)
+                self.assertEqual(cursor.get_value(), 0)
+            else:
+                # Otherwise, we should not find any matching records.
+                self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND)
+
+        # Search-near for an existing record in the deleted range, should find
+        # the next largest record. (This depends on the implementation behavior
+        # which currently includes a bias to prefix search.)
+        for i in range(5, 10):
+            cursor.set_key(key_populate(cursor, i))
+            if self.empty:
+                # Fixed-length column-store rows always exist.
+                self.assertEqual(cursor.search_near(), 0)
+                self.assertEqual(cursor.get_key(), i)
+                self.assertEqual(cursor.get_value(), 0)
+            else:
+                self.assertEqual(cursor.search_near(), 1)
+                self.assertEqual(cursor.get_key(), key_populate(cursor, 10))
+
+        # Search-near for an existing record in the updated range, should see
+        # the original value.
+        for i in range(30, 40):
+            cursor.set_key(key_populate(cursor, i))
+            self.assertEqual(cursor.search_near(), 0)
+            self.assertEqual(cursor.get_key(), key_populate(cursor, i))
+
+        # Search-near for an added record, should find the previous largest
+        # record.
+        for i in range(120, 130):
+            cursor.set_key(key_populate(cursor, i))
+            if self.empty:
+                # Invisible updates to fixed-length column-store objects are
+                # invisible to the reader, but the fact that they exist past
+                # the end of the initial records causes the instantiation of
+                # empty records: confirm successful return of an empty row.
+                self.assertEqual(cursor.search_near(), 0)
+                self.assertEqual(cursor.get_key(), i)
+                self.assertEqual(cursor.get_value(), 0)
+            else:
+                self.assertEqual(cursor.search_near(), -1)
+                self.assertEqual(cursor.get_key(), key_populate(cursor, 100))
 
     # Verify cursor search and search-near operations on a file with a set of
     # on-page visible records, a set of insert-list visible records, and a set
@@ -101,28 +244,26 @@ class test_bug008(wttest.WiredTigerTestCase):
     # fallback happens, whether the correct position is in the page slots or
     # the insert list.)
     def test_search_invisible_two(self):
-        uri = 'file:test_bug008'                # This is a btree layer test.
-
         # Populate the tree and reopen the connection, forcing it to disk
         # and moving the records to an on-page format.
-        simple_populate(self, uri, self.fmt, 100)
+        simple_populate(self, self.uri, self.fmt, 100)
         self.reopen_conn()
 
         # Add some additional visible records.
-        cursor = self.session.open_cursor(uri, None)
+        cursor = self.session.open_cursor(self.uri, None)
         for i in range(100, 120):
             cursor[key_populate(cursor, i)] = value_populate(cursor, i)
         cursor.close()
 
         # Begin a transaction, and add some additional records.
         self.session.begin_transaction()
-        cursor = self.session.open_cursor(uri, None)
+        cursor = self.session.open_cursor(self.uri, None)
         for i in range(120, 140):
             cursor[key_populate(cursor, i)] = value_populate(cursor, i)
 
         # Open a separate session and cursor.
         s = self.conn.open_session()
-        cursor = s.open_cursor(uri, None)
+        cursor = s.open_cursor(self.uri, None)
 
         # Search for an invisible record.
         cursor.set_key(key_populate(cursor, 130))
author	Michael Cahill <michael.cahill@mongodb.com>	2016-03-17 16:47:51 +1100
committer	Michael Cahill <michael.cahill@mongodb.com>	2016-03-17 16:47:51 +1100
commit	d5a67f2c0f84527a8db92ad17204be02f314366b (patch)
tree	011ccea0cb627611f3b23e3322811d5bb3a8fae4
parent	019fb827cd32acba72ec1b01546bd685e6641a5d (diff)
parent	d870b48d42135f0a6ce06a8a174126bb2f55077d (diff)
download	mongo-d5a67f2c0f84527a8db92ad17204be02f314366b.tar.gz