summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/test/suite/test_search_near01.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/test/suite/test_search_near01.py')
-rw-r--r--src/third_party/wiredtiger/test/suite/test_search_near01.py330
1 files changed, 330 insertions, 0 deletions
diff --git a/src/third_party/wiredtiger/test/suite/test_search_near01.py b/src/third_party/wiredtiger/test/suite/test_search_near01.py
new file mode 100644
index 00000000000..2e54671c06c
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_search_near01.py
@@ -0,0 +1,330 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+
+import time, wiredtiger, wttest, unittest
+from wiredtiger import stat
+
+def timestamp_str(t):
+ return '%x' % t
+
+# test_search_near01.py
+# Test various prefix search near scenarios.
+class test_search_near01(wttest.WiredTigerTestCase):
+ conn_config = 'statistics=(all)'
+ session_config = 'isolation=snapshot'
+
+ def get_stat(self, stat, local_session = None):
+ if (local_session != None):
+ stat_cursor = local_session.open_cursor('statistics:')
+ else:
+ stat_cursor = self.session.open_cursor('statistics:')
+ val = stat_cursor[stat][2]
+ stat_cursor.close()
+ return val
+
+ def unique_insert(self, cursor, prefix, id, keys):
+ key = prefix + ',' + str(id)
+ keys.append(key)
+ cursor.set_key(prefix)
+ cursor.set_value(prefix)
+ self.assertEqual(cursor.insert(), 0)
+ cursor.set_key(prefix)
+ self.assertEqual(cursor.remove(), 0)
+ cursor.set_key(prefix)
+ cursor.search_near()
+ cursor.set_key(key)
+ cursor.set_value(key)
+ self.assertEqual(cursor.insert(), 0)
+
+ def test_base_scenario(self):
+ uri = 'table:test_base_scenario'
+ self.session.create(uri, 'key_format=u,value_format=u')
+ cursor = self.session.open_cursor(uri)
+ session2 = self.conn.open_session()
+ cursor3 = self.session.open_cursor(uri, None, "debug=(release_evict=true)")
+
+ # Basic character array.
+ l = "abcdefghijklmnopqrstuvwxyz"
+
+ # Start our older reader.
+ session2.begin_transaction()
+
+ key_count = 26*26*26
+ # Insert keys aaa -> zzz.
+ self.session.begin_transaction()
+ for i in range (0, 26):
+ for j in range (0, 26):
+ for k in range (0, 26):
+ cursor[l[i] + l[j] + l[k]] = l[i] + l[j] + l[k]
+ self.session.commit_transaction()
+
+ # Evict the whole range.
+ for i in range (0, 26):
+ for j in range(0, 26):
+ cursor3.set_key(l[i] + l[j] + 'a')
+ cursor3.search()
+ cursor3.reset()
+
+ # Search near for the "aa" part of the range.
+ cursor2 = session2.open_cursor(uri)
+ cursor2.set_key('aa')
+ cursor2.search_near()
+
+ skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100)
+ # This should be equal to roughly key_count * 2 as we're going to traverse the whole
+ # range forward, and then the whole range backwards.
+ self.assertGreater(skip_count, key_count * 2)
+
+ cursor2.reconfigure("prefix_key=true")
+ cursor2.set_key('aa')
+ cursor2.search_near()
+
+ prefix_skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100)
+ # We should've skipped ~26*2 here as we're only looking at the "aa" range * 2.
+ self.assertGreaterEqual(prefix_skip_count - skip_count, 26*2)
+ skip_count = prefix_skip_count
+
+ # The prefix code will have come into play at once as we walked to "aba". The prev
+ # traversal will go off the end of the file and as such we don't expect it to increment
+ # this statistic again.
+ self.assertEqual(self.get_stat(stat.conn.cursor_search_near_prefix_fast_paths), 1)
+
+ # Search for a key not at the start.
+ cursor2.set_key('bb')
+ cursor2.search_near()
+
+ # Assert it to have only incremented the skipped statistic ~26*2 times.
+ prefix_skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100)
+ self.assertGreaterEqual(prefix_skip_count - skip_count, 26*2)
+ skip_count = prefix_skip_count
+
+ # Here we should've hit the prefix fast path code twice. Plus the time we already did.
+ self.assertEqual(self.get_stat(stat.conn.cursor_search_near_prefix_fast_paths), 2+1)
+
+ cursor2.close()
+ cursor2 = session2.open_cursor(uri)
+ cursor2.set_key('bb')
+ cursor2.search_near()
+ # Assert that we've incremented the stat key_count times, as we closed the cursor and
+ # reopened it.
+ #
+ # This validates cursor caching logic, as if we don't clear the flag correctly this will
+ # fail.
+ #
+ # It should be closer to key_count * 2 but this an approximation.
+ prefix_skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100)
+ self.assertGreaterEqual(prefix_skip_count - skip_count, key_count)
+
+ # This test aims to simulate a unique index insertion.
+ def test_unique_index_case(self):
+ uri = 'table:test_unique_index_case'
+ self.session.create(uri, 'key_format=u,value_format=u')
+ cursor = self.session.open_cursor(uri)
+ session2 = self.conn.open_session()
+ cursor3 = self.session.open_cursor(uri, None, "debug=(release_evict=true)")
+ l = "abcdefghijklmnopqrstuvwxyz"
+
+ # A unique index has the following insertion method:
+ # 1. Insert the prefix
+ # 2. Remove the prefix
+ # 3. Search near for the prefix
+ # 4. Insert the full value
+ # All of these operations are wrapped in the same txn, this test attempts to test scenarios
+ # that could arise from this insertion method.
+
+ # A unique index key has the format (prefix, _id), we'll insert keys that look similar.
+
+ # Start our old reader txn.
+ session2.begin_transaction()
+
+ key_count = 26*26
+ id = 0
+ cc_id = 0
+ keys = []
+
+ # Insert keys aa,1 -> zz,N
+ for i in range (0, 26):
+ for j in range (0, 26):
+ # Skip inserting 'c'.
+ if (i == 2 and j == 2):
+ cc_id = id
+ id = id + 1
+ continue
+ self.session.begin_transaction()
+ prefix = l[i] + l[j]
+ self.unique_insert(cursor, prefix, id, keys)
+ id = id + 1
+ self.session.commit_transaction()
+
+ # Evict the whole range.
+ for i in keys:
+ cursor3.set_key(i)
+ cursor3.search()
+ cursor3.reset()
+
+ # Using our older reader attempt to find a value.
+ # Search near for the "cc" prefix.
+ cursor2 = session2.open_cursor(uri)
+ cursor2.set_key('cc')
+ cursor2.search_near()
+
+ skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100)
+ # This should be equal to roughly key_count * 2 as we're going to traverse most of the
+ # range forward, and then the whole range backwards.
+ self.assertGreater(skip_count, key_count * 2)
+
+ cursor2.reconfigure("prefix_key=true")
+ cursor2.set_key('cc')
+ cursor2.search_near()
+ self.assertEqual(self.get_stat(stat.conn.cursor_search_near_prefix_fast_paths), 2)
+
+ # This still isn't visible to our older reader and as such we expect this statistic to
+ # increment twice.
+ self.unique_insert(cursor2, 'cc', cc_id, keys)
+ self.assertEqual(self.get_stat(stat.conn.cursor_search_near_prefix_fast_paths), 4)
+
+ # In order for prefix key fast pathing to work we rely on some guarantees provided by row
+ # search. Test some of the guarantees.
+ def test_row_search(self):
+ uri = 'table:test_row_search'
+ self.session.create(uri, 'key_format=u,value_format=u')
+ cursor = self.session.open_cursor(uri)
+ session2 = self.conn.open_session()
+ l = "abcdefghijklmnopqrstuvwxyz"
+ # Insert keys a -> z, except c
+ self.session.begin_transaction()
+ for i in range (0, 26):
+ if (i == 2):
+ continue
+ cursor[l[i]] = l[i]
+ self.session.commit_transaction()
+ # Start our older reader transaction.
+ session2.begin_transaction()
+ # Insert a few keys in the 'c' range
+ self.session.begin_transaction()
+ cursor['c'] = 'c'
+ cursor['cc'] = 'cc'
+ cursor['ccc'] = 'ccc'
+ self.session.commit_transaction()
+ # Search_near for 'c' and assert we skip 3 entries. Internally the row search is landing on
+ # 'c'.
+ cursor2 = session2.open_cursor(uri)
+ cursor2.set_key('c')
+ cursor2.search_near()
+
+ skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100)
+ self.assertEqual(skip_count, 3)
+ session2.commit_transaction()
+
+ # Perform an insertion and removal of a key next to another key, then search for the
+ # removed key.
+ self.session.begin_transaction()
+ cursor.set_key('dd')
+ cursor.set_value('dd')
+ cursor.insert()
+ cursor.set_key('dd')
+ cursor.remove()
+ cursor.set_key('ddd')
+ cursor.set_value('ddd')
+ cursor.insert()
+ cursor.set_key('dd')
+ cursor.search_near()
+ self.session.commit_transaction()
+ skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100)
+ self.assertEqual(skip_count, 4)
+
+ # Test a basic prepared scenario.
+ def test_prepared(self):
+ uri = 'table:test_base_scenario'
+ self.session.create(uri, 'key_format=u,value_format=u')
+ cursor = self.session.open_cursor(uri)
+ session2 = self.conn.open_session()
+ cursor3 = session2.open_cursor(uri, None, "debug=(release_evict=true)")
+ # Insert an update without timestamp
+ l = "abcdefghijklmnopqrstuvwxyz"
+ session2.begin_transaction()
+
+ key_count = 26*26
+
+ # Insert 'cc'
+ self.session.begin_transaction()
+ cursor['cc'] = 'cc'
+ self.session.commit_transaction()
+
+ # Prepare keys aa -> zz
+ self.session.begin_transaction()
+ for i in range (0, 26):
+ if (i == 2):
+ continue
+ for j in range (0, 26):
+ cursor[l[i] + l[j]] = l[i] + l[j]
+
+ self.session.prepare_transaction('prepare_timestamp=2')
+
+ # Evict the whole range.
+ for i in range (0, 26):
+ for j in range(0, 26):
+ cursor3.set_key(l[i] + l[j])
+ cursor3.search()
+ cursor3.reset()
+
+ # Search near for the "aa" part of the range.
+ cursor2 = session2.open_cursor(uri)
+ cursor2.set_key('c')
+ cursor2.search_near()
+
+ skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100, session2)
+ # This should be equal to roughly key_count * 2 as we're going to traverse the whole
+ # range forward, and then the whole range backwards.
+ self.assertGreater(skip_count, key_count)
+
+ cursor2.reconfigure("prefix_key=true")
+ cursor2.set_key('c')
+ cursor2.search_near()
+
+ prefix_skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100, session2)
+ self.assertEqual(prefix_skip_count - skip_count, 3)
+ skip_count = prefix_skip_count
+
+ self.assertEqual(self.get_stat(stat.conn.cursor_search_near_prefix_fast_paths, session2), 2)
+
+ session2.rollback_transaction()
+ session2.begin_transaction('ignore_prepare=true')
+ cursor4 = session2.open_cursor(uri)
+ cursor4.reconfigure("prefix_key=true")
+ cursor4.set_key('c')
+ cursor4.search_near()
+ prefix_skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100, session2)
+ self.assertEqual(prefix_skip_count - skip_count, 2)
+ skip_count = prefix_skip_count
+
+ cursor4.reconfigure("prefix_key=false")
+ cursor4.set_key('c')
+ cursor4.search_near()
+ self.assertEqual(self.get_stat(stat.conn.cursor_next_skip_lt_100, session2) - skip_count, 2)