diff options
-rw-r--r-- | test/suite/test_join01.py | 332 | ||||
-rw-r--r-- | test/suite/test_join02.py | 290 | ||||
-rw-r--r-- | test/suite/test_join03.py | 158 |
3 files changed, 780 insertions, 0 deletions
diff --git a/test/suite/test_join01.py b/test/suite/test_join01.py new file mode 100644 index 00000000000..58a135f3bcd --- /dev/null +++ b/test/suite/test_join01.py @@ -0,0 +1,332 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2015 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +from wtscenario import check_scenarios, multiply_scenarios, number_scenarios + +# test_join01.py +# Join operations +# Basic tests for join +class test_join01(wttest.WiredTigerTestCase): + table_name1 = 'test_join01' + nentries = 100 + + scenarios = [ + ('table', dict(ref='table')), + ('index', dict(ref='index')) + ] + + def gen_key(self, i): + return [ i + 1 ] + + def gen_values(self, i): + s = str(i) + rs = s[::-1] + sort3 = (self.nentries * (i % 3)) + i # multiples of 3 sort first + return [s, rs, sort3] + + # Common function for testing iteration of join cursors + def iter_common(self, jc, do_proj): + # See comments in join_common() + expect = [73, 82, 62, 83, 92] + while jc.next() == 0: + [k] = jc.get_keys() + i = k - 1 + if do_proj: # our projection test simply reverses the values + [v2,v1,v0] = jc.get_values() + else: + [v0,v1,v2] = jc.get_values() + self.assertEquals(self.gen_values(i), [v0,v1,v2]) + if len(expect) == 0 or i != expect[0]: + self.tty(' result ' + str(i) + ' is not in: ' + str(expect)) + self.assertTrue(i == expect[0]) + expect.remove(i) + self.assertEquals(0, len(expect)) + + # Common function for testing the most basic functionality + # of joins + def join_common(self, joincfg0, joincfg1, do_proj): + #self.tty('join_common(' + joincfg0 + ',' + joincfg1 + ',' + + # str(do_proj) + ')') + self.session.create('table:join01', 'key_format=r' + + ',value_format=SSi,columns=(k,v0,v1,v2)') + self.session.create('index:join01:index0','columns=(v0)') + self.session.create('index:join01:index1','columns=(v1)') + self.session.create('index:join01:index2','columns=(v2)') + + c = self.session.open_cursor('table:join01', None, None) + for i in range(0, self.nentries): + c.set_key(*self.gen_key(i)) + c.set_value(*self.gen_values(i)) + c.insert() + c.close() + + if do_proj: + proj_suffix = '(v2,v1,v0)' # Reversed values + else: + proj_suffix = '' # Default projection (v0,v1,v2) + + # We join on index2 first, not using bloom indices. + # This defines the order that items are returned. + # index2 is sorts multiples of 3 first (see gen_values()) + # and by using 'gt' and key 99, we'll skip multiples of 3, + # and examine primary keys 2,5,8,...,95,98,1,4,7,...,94,97. + jc = self.session.open_cursor('join:table:join01' + proj_suffix, + None, None) + c2 = self.session.open_cursor('index:join01:index2', None, None) + c2.set_key(99) # skips all entries w/ primary key divisible by three + self.assertEquals(0, c2.search()) + self.session.join(jc, c2, 'compare=gt') + + # Then select all the numbers 0-99 whose string representation + # sort >= '60'. + if self.ref == 'index': + c0 = self.session.open_cursor('index:join01:index0', None, None) + c0.set_key('60') + else: + c0 = self.session.open_cursor('table:join01', None, None) + c0.set_key(60) + self.assertEquals(0, c0.search()) + self.session.join(jc, c0, 'compare=ge' + joincfg0) + + # Then select all numbers whose reverse string representation + # is in '20' < x < '40'. + c1a = self.session.open_cursor('index:join01:index1', None, None) + c1a.set_key('21') + self.assertEquals(0, c1a.search()) + self.session.join(jc, c1a, 'compare=gt' + joincfg1) + + c1b = self.session.open_cursor('index:join01:index1', None, None) + c1b.set_key('41') + self.assertEquals(0, c1b.search()) + self.session.join(jc, c1b, 'compare=lt' + joincfg1) + + # Numbers that satisfy these 3 conditions (with ordering implied by c2): + # [73, 82, 62, 83, 92]. + # + # After iterating, we should be able to reset and iterate again. + self.iter_common(jc, do_proj) + jc.reset() + self.iter_common(jc, do_proj) + jc.reset() + self.iter_common(jc, do_proj) + + jc.close() + c2.close() + c1a.close() + c1b.close() + c0.close() + self.session.drop('table:join01') + + # Test joins with basic functionality + def test_join(self): + bloomcfg1000 = ',strategy=bloom,count=1000' + bloomcfg10000 = ',strategy=bloom,count=10000' + for cfga in [ '', bloomcfg1000, bloomcfg10000 ]: + for cfgb in [ '', bloomcfg1000, bloomcfg10000 ]: + for do_proj in [ False, True ]: + #self.tty('cfga=' + cfga + + # ', cfgb=' + cfgb + + # ', doproj=' + str(do_proj)) + self.join_common(cfga, cfgb, do_proj) + + def test_join_errors(self): + self.session.create('table:join01', 'key_format=r,value_format=SS' + ',columns=(k,v0,v1)') + self.session.create('table:join01B', 'key_format=r,value_format=SS' + ',columns=(k,v0,v1)') + self.session.create('index:join01:index0','columns=(v0)') + self.session.create('index:join01:index1','columns=(v1)') + self.session.create('index:join01B:index0','columns=(v0)') + jc = self.session.open_cursor('join:table:join01', None, None) + tc = self.session.open_cursor('table:join01', None, None) + fc = self.session.open_cursor('file:join01.wt', None, None) + ic0 = self.session.open_cursor('index:join01:index0', None, None) + ic0again = self.session.open_cursor('index:join01:index0', None, None) + ic1 = self.session.open_cursor('index:join01:index1', None, None) + icB = self.session.open_cursor('index:join01B:index0', None, None) + tcB = self.session.open_cursor('table:join01B', None, None) + + tc.set_key(1) + tc.set_value('val1', 'val1') + tc.insert() + tcB.set_key(1) + tcB.set_value('val1', 'val1') + tcB.insert() + fc.next() + + # Joining using a non join-cursor + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(tc, ic0, 'compare=ge'), + '/not a join cursor/') + # Joining a table cursor, not index + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, fc, 'compare=ge'), + '/not an index or table cursor/') + # Joining a non positioned cursor + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic0, 'compare=ge'), + '/requires key be set/') + + # minimally position the cursors now + ic0.next() + ic0again.next() + icB.next() + + # Joining non matching index + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, icB, 'compare=ge'), + '/table for join cursor does not match/') + + # The cursor must be positioned + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic1, 'compare=ge'), + '/requires key be set/') + ic1.next() + + # The first cursor joined cannot be bloom + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic1, + 'compare=ge,strategy=bloom,count=1000'), + '/first joined cursor cannot specify strategy=bloom/') + + # This succeeds. + self.session.join(jc, ic1, 'compare=ge'), + + # With bloom filters, a count is required + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic0, 'compare=ge,strategy=bloom'), + '/count must be nonzero/') + + # This succeeds. + self.session.join(jc, ic0, 'compare=ge,strategy=bloom,count=1000'), + + bloom_config = ',strategy=bloom,count=1000' + # Cannot use the same index cursor + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic0, + 'compare=le' + bloom_config), + '/index cursor already used in a join/') + + # When joining with the same index, need compatible compares + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic0again, 'compare=ge' + bloom_config), + '/join has overlapping ranges/') + + # Another incompatible compare + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic0again, 'compare=gt' + bloom_config), + '/join has overlapping ranges/') + + # Compare is compatible, but bloom args need to match + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic0again, 'compare=le'), + '/join has incompatible strategy/') + + # Counts need to match for bloom filters + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.join(jc, ic0again, 'compare=le,strategy=bloom,' + 'count=100'), '/count.* does not match previous count/') + + # This succeeds + self.session.join(jc, ic0again, 'compare=le,strategy=bloom,count=1000') + + # Need to do initial next() before getting key/values + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: jc.get_keys(), + '/join cursor must be advanced with next/') + + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: jc.get_values(), + '/join cursor must be advanced with next/') + + # Operations on the joined cursor are frozen until the join is closed. + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: ic0.next(), + '/index cursor is being used in a join/') + + # Operations on the joined cursor are frozen until the join is closed. + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: ic0.prev(), + '/index cursor is being used in a join/') + + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: ic0.reset(), + '/index cursor is being used in a join/') + + # Only a small number of operations allowed on a join cursor + self.assertRaises(wiredtiger.WiredTigerError, + lambda: jc.search()) + + self.assertRaises(wiredtiger.WiredTigerError, + lambda: jc.prev()) + + self.assertEquals(jc.next(), 0) + self.assertEquals(jc.next(), wiredtiger.WT_NOTFOUND) + + # Only after the join cursor is closed can we use the index cursor + # normally + jc.close() + self.assertEquals(ic0.next(), wiredtiger.WT_NOTFOUND) + self.assertEquals(ic0.prev(), 0) + + # common code for making sure that cursors can be + # implicitly closed, no matter the order they are created + def cursor_close_common(self, joinfirst): + self.session.create('table:join01', 'key_format=r' + + ',value_format=SS,columns=(k,v0,v1)') + self.session.create('index:join01:index0','columns=(v0)') + self.session.create('index:join01:index1','columns=(v1)') + c = self.session.open_cursor('table:join01', None, None) + for i in range(0, self.nentries): + c.set_key(*self.gen_key(i)) + c.set_value(*self.gen_values(i)) + c.insert() + c.close() + + if joinfirst: + jc = self.session.open_cursor('join:table:join01', None, None) + c0 = self.session.open_cursor('index:join01:index0', None, None) + c1 = self.session.open_cursor('index:join01:index1', None, None) + c0.next() # index cursors must be positioned + c1.next() + if not joinfirst: + jc = self.session.open_cursor('join:table:join01', None, None) + self.session.join(jc, c0, 'compare=ge') + self.session.join(jc, c1, 'compare=ge') + self.session.close() + self.session = None + + def test_cursor_close1(self): + self.cursor_close_common(True) + + def test_cursor_close2(self): + self.cursor_close_common(False) + +if __name__ == '__main__': + wttest.run() diff --git a/test/suite/test_join02.py b/test/suite/test_join02.py new file mode 100644 index 00000000000..16e1fff56df --- /dev/null +++ b/test/suite/test_join02.py @@ -0,0 +1,290 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2015 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest, suite_random +from wtscenario import check_scenarios, multiply_scenarios, number_scenarios + +# test_join02.py +# Join operations +# Join several indices together, trying all comparison combinations +class test_join02(wttest.WiredTigerTestCase): + table_name1 = 'test_join02' + nentries = 1000 + + keyscen = [ + ('key-r', dict(keyformat='r')), + ('key-S', dict(keyformat='S')), + ('key-i', dict(keyformat='i')), + ('key-iS', dict(keyformat='iS')) + ] + + bloomscen = [ + ('bloom', dict(usebloom=True)), + ('nobloom', dict(usebloom=False)) + ] + + scenarios = number_scenarios(multiply_scenarios('.', keyscen, bloomscen)) + + # Start our range from 1, since WT record numbers start at 1, + # it makes things work out nicer. + def range(self): + return range(1, self.nentries + 1) + + def gen_key(self, i): + if self.keyformat == 'S': + return [ 'key{:06}'.format(i) ] # zero pad so it sorts expectedly + elif self.keyformat == 'iS': + return [ i, 'key{:06}'.format(i) ] + else: + return [ i ] + + def gen_values(self, i): + s = str(i) + x = 'x' * i + rs = s[::-1] + f = int(s[0:1]) + return [i, s, x, rs, f] + + def reinit_joinconfig(self): + self.rand = suite_random.suite_random(self.seed) + self.seed += 1 + + def get_joinconfig(self): + # When we're running the bloom scenario, make it so the + # bloom filters are often shared. Make the number of + # hashes and number of bits per item so they don't always + # match up; WT should allow it. + if self.usebloom: + c = 10000 if (self.rand.rand32() % 3) != 0 else 100000 + k = 8 if (self.rand.rand32() % 10) != 0 else 10 + b = 16 if (self.rand.rand32() % 11) != 0 else 12 + return \ + ',strategy=bloom,count=' + str(c) + \ + ',bloom_bit_count=' + str(b) + \ + ',bloom_hash_count=' + str(k) + else: + return '' + + def do_join(self, jc, curleft, curright, choice, mbr): + c0 = choice[0] + if c0 == None: + return mbr + # The first join cannot use a bloom filter + if jc.first_join: + joinconfig = '' + jc.first_join = False + else: + joinconfig = self.get_joinconfig() + if c0 != None: + #self.tty('join(jc, ' + curleft.name + ' ' + c0 + + # ' ' + str(curleft.low) + ')') + curleft.reset() + curleft.set_key(*curleft.low) + self.assertEquals(0, curleft.search()) + self.session.join(jc, curleft, 'compare=' + c0 + joinconfig) + if c0 == 'eq': + mbr = mbr.intersection(curleft.eqmembers) + elif c0 == 'ge': + mbr = mbr.intersection( + set(curleft.eqmembers.union(curleft.gtmembers))) + elif c0 == 'gt': + mbr = mbr.intersection(curleft.gtmembers) + c1 = choice[1] if len(choice) > 1 else None + if c1 != None: + #self.tty('join(jc, ' + curright.name + ' ' + c1 + + # ' ' + str(curright.high) + ')') + curright.reset() + curright.set_key(*curright.high) + self.assertEquals(0, curright.search()) + self.session.join(jc, curright, 'compare=' + c1 + joinconfig) + if c1 == 'le': + mbr = mbr.intersection( + set(curright.eqmembers.union(curright.ltmembers))) + elif c1 == 'lt': + mbr = mbr.intersection(curright.ltmembers) + return mbr + + def iterate(self, jc, mbr): + #self.tty('iteration expects ' + str(len(mbr)) + + # ' entries: ' + str(mbr)) + while jc.next() == 0: + keys = jc.get_keys() + [v0,v1,v2,v3,v4] = jc.get_values() + k0 = keys[0] + k1 = keys[1] if len(keys) > 1 else None + if self.keyformat == 'S': + i = int(str(k0[3:])) + elif self.keyformat == 'iS': + i = k0 + self.assertEquals(i, int(str(k1[3:]))) + else: + i = k0 + #self.tty(' iteration got key: ' + str(k0) + ',' + str(k1)) + #self.tty(' iteration got values: ' + str([v0,v1,v2,v3,v4])) + #self.tty(' iteration expects values: ' + str(self.gen_values(i))) + self.assertEquals(self.gen_values(i), [v0,v1,v2,v3,v4]) + if not i in mbr: + self.tty(' result ' + str(i) + ' is not in: ' + str(mbr)) + self.assertTrue(i in mbr) + mbr.remove(i) + self.assertEquals(0, len(mbr)) + + def mkmbr(self, expr): + return frozenset([x for x in self.range() if expr(x)]) + + def test_basic_join(self): + self.seed = 1 + if self.keyformat == 'iS': + keycols = 'k0,k1' + else: + keycols = 'k' + self.session.create('table:join02', 'key_format=' + self.keyformat + + ',value_format=iSuSi,columns=(' + keycols + + ',v0,v1,v2,v3,v4)') + self.session.create('index:join02:index0','columns=(v0)') + self.session.create('index:join02:index1','columns=(v1)') + self.session.create('index:join02:index2','columns=(v2)') + self.session.create('index:join02:index3','columns=(v3)') + self.session.create('index:join02:index4','columns=(v4)') + c = self.session.open_cursor('table:join02', None, None) + for i in self.range(): + c.set_key(*self.gen_key(i)) + c.set_value(*self.gen_values(i)) + c.insert() + c.close() + + # Use the primary table in one of the joins. + c0a = self.session.open_cursor('table:join02', None, None) + c0b = self.session.open_cursor('table:join02', None, None) + c1a = self.session.open_cursor('index:join02:index1', None, None) + c1b = self.session.open_cursor('index:join02:index1', None, None) + c2a = self.session.open_cursor('index:join02:index2', None, None) + c2b = self.session.open_cursor('index:join02:index2', None, None) + c3a = self.session.open_cursor('index:join02:index3', None, None) + c3b = self.session.open_cursor('index:join02:index3', None, None) + c4a = self.session.open_cursor('index:join02:index4', None, None) + + # Attach extra properties to each cursor. For cursors that + # may appear on the 'left' side of a range CA < x < CB, + # we give a low value of the range, and calculate the members + # of the set we expect to see for a 'gt' comparison, as well + # as the 'eq' comparison. For cursors that appear on the + # 'right side of the range, we give a high value of the range, + # and calculate membership sets for 'lt' and 'eq'. + # + # We've defined the low/high values so that there's a lot of + # overlap between the values when we're doing ranges. + c0a.name = 'c0a' + c0b.name = 'c0b' + if self.keyformat == 'i' or self.keyformat == 'r': + c0a.low = [ 205 ] + c0b.high = [ 990 ] + elif self.keyformat == 'S': + c0a.low = [ 'key000205' ] + c0b.high = [ 'key000990' ] + elif self.keyformat == 'iS': + c0a.low = [ 205, 'key000205' ] + c0b.high = [ 990, 'key000990' ] + c0a.gtmembers = self.mkmbr(lambda x: x > 205) + c0a.eqmembers = self.mkmbr(lambda x: x == 205) + c0b.ltmembers = self.mkmbr(lambda x: x < 990) + c0b.eqmembers = self.mkmbr(lambda x: x == 990) + + c1a.low = [ '150' ] + c1a.gtmembers = self.mkmbr(lambda x: str(x) > '150') + c1a.eqmembers = self.mkmbr(lambda x: str(x) == '150') + c1a.name = 'c1a' + c1b.high = [ '733' ] + c1b.ltmembers = self.mkmbr(lambda x: str(x) < '733') + c1b.eqmembers = self.mkmbr(lambda x: str(x) == '733') + c1b.name = 'c1b' + + c2a.low = [ 'x' * 321 ] + c2a.gtmembers = self.mkmbr(lambda x: x > 321) + c2a.eqmembers = self.mkmbr(lambda x: x == 321) + c2a.name = 'c2a' + c2b.high = [ 'x' * 765 ] + c2b.ltmembers = self.mkmbr(lambda x: x < 765) + c2b.eqmembers = self.mkmbr(lambda x: x == 765) + c2b.name = 'c2b' + + c3a.low = [ '432' ] + c3a.gtmembers = self.mkmbr(lambda x: str(x)[::-1] > '432') + c3a.eqmembers = self.mkmbr(lambda x: str(x)[::-1] == '432') + c3a.name = 'c3a' + c3b.high = [ '876' ] + c3b.ltmembers = self.mkmbr(lambda x: str(x)[::-1] < '876') + c3b.eqmembers = self.mkmbr(lambda x: str(x)[::-1] == '876') + c3b.name = 'c3b' + + c4a.low = [ 4 ] + c4a.gtmembers = self.mkmbr(lambda x: str(x)[0:1] > '4') + c4a.eqmembers = self.mkmbr(lambda x: str(x)[0:1] == '4') + c4a.name = 'c4a' + + choices = [[None], ['eq'], ['ge'], ['gt'], [None, 'le'], [None, 'lt'], + ['ge', 'le' ], ['ge', 'lt' ], ['gt', 'le' ], ['gt', 'lt' ]] + smallchoices = [[None], ['eq'], ['ge'], ['gt', 'le' ]] + for i0 in smallchoices: + for i1 in choices: + for i2 in smallchoices: + for i3 in smallchoices: + for i4 in [[None], ['eq'], ['ge'], ['gt']]: + if i0[0] == None and i1[0] == None and \ + i2[0] == None and i3[0] == None and \ + i4[0] == None: + continue + self.reinit_joinconfig() + #self.tty('Begin test: ' + + # ','.join([str(i0),str(i1),str(i2), + # str(i3),str(i4)])) + jc = self.session.open_cursor('join:table:join02', + None, None) + jc.first_join = True + mbr = set(self.range()) + + # It shouldn't matter the order of the joins + mbr = self.do_join(jc, c3a, c3b, i3, mbr) + mbr = self.do_join(jc, c2a, c2b, i2, mbr) + mbr = self.do_join(jc, c4a, None, i4, mbr) + mbr = self.do_join(jc, c1a, c1b, i1, mbr) + mbr = self.do_join(jc, c0a, c0b, i0, mbr) + self.iterate(jc, mbr) + jc.close() + c0a.close() + c0b.close() + c1a.close() + c1b.close() + c2a.close() + c2b.close() + c3a.close() + c3b.close() + c4a.close() + +if __name__ == '__main__': + wttest.run() diff --git a/test/suite/test_join03.py b/test/suite/test_join03.py new file mode 100644 index 00000000000..552e3b41748 --- /dev/null +++ b/test/suite/test_join03.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2015 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import os +import wiredtiger, wttest, run +from wtscenario import check_scenarios, multiply_scenarios, number_scenarios + +# test_join03.py +# Join operations +# Joins with a custom extractor +class test_join03(wttest.WiredTigerTestCase): + table_name1 = 'test_join03' + nentries = 100 + + # Return the wiredtiger_open extension argument for a shared library. + def extensionArg(self, exts): + extfiles = [] + for ext in exts: + (dirname, name, libname) = ext + if name != None and name != 'none': + testdir = os.path.dirname(__file__) + extdir = os.path.join(run.wt_builddir, 'ext', dirname) + extfile = os.path.join( + extdir, name, '.libs', 'libwiredtiger_' + libname + '.so') + if not os.path.exists(extfile): + self.skipTest('extension "' + extfile + '" not built') + if not extfile in extfiles: + extfiles.append(extfile) + if len(extfiles) == 0: + return '' + else: + return ',extensions=["' + '","'.join(extfiles) + '"]' + + # Override WiredTigerTestCase, we have extensions. + def setUpConnectionOpen(self, dir): + extarg = self.extensionArg([('extractors', 'csv', 'csv_extractor')]) + connarg = 'create,error_prefix="{0}: ",{1}'.format( + self.shortid(), extarg) + conn = wiredtiger.wiredtiger_open(dir, connarg) + self.pr(`conn`) + return conn + + def gen_key(self, i): + return [ i + 1 ] + + def gen_values(self, i): + s = str(i) + rs = s[::-1].lstrip('0') + return [ s + ',' + rs ] + + # Common function for testing iteration of join cursors + def iter_common(self, jc): + mbr = set([62, 63, 72, 73, 82, 83, 92, 93]) + while jc.next() == 0: + [k] = jc.get_keys() + i = k - 1 + [v] = jc.get_values() + self.assertEquals(self.gen_values(i), [v]) + if not i in mbr: + self.tty(' result ' + str(i) + ' is not in: ' + str(mbr)) + self.assertTrue(i in mbr) + mbr.remove(i) + self.assertEquals(0, len(mbr)) + + # Common function for testing the most basic functionality + # of joins + def join(self, csvformat, args0, args1): + self.session.create('table:join03', 'key_format=r' + + ',value_format=S,columns=(k,v)') + fmt = csvformat[0] + self.session.create('index:join03:index0','key_format=' + fmt + ',' + + 'extractor=csv,app_metadata={"format" : "' + + fmt + '","field" : "0"}') + fmt = csvformat[1] + self.session.create('index:join03:index1','key_format=' + fmt + ',' + + 'extractor=csv,app_metadata={"format" : "' + + fmt + '","field" : "1"}') + + c = self.session.open_cursor('table:join03', None, None) + for i in range(0, self.nentries): + c.set_key(*self.gen_key(i)) + c.set_value(*self.gen_values(i)) + c.insert() + c.close() + + jc = self.session.open_cursor('join:table:join03', None, None) + + # All the numbers 0-99 whose string representation + # sort >= '60' and whose reverse string representation + # is in '20' < x < '40'. That is: [62, 63, 72, 73, 82, 83, 92, 93] + c0 = self.session.open_cursor('index:join03:index0', None, None) + if csvformat[0] == 'S': + c0.set_key('60') + else: + c0.set_key(60) + self.assertEquals(0, c0.search()) + self.session.join(jc, c0, 'compare=ge' + args0) + + c1a = self.session.open_cursor('index:join03:index1', None, None) + if csvformat[1] == 'S': + c1a.set_key('21') + else: + c1a.set_key(21) + self.assertEquals(0, c1a.search()) + self.session.join(jc, c1a, 'compare=gt' + args1) + + c1b = self.session.open_cursor('index:join03:index1', None, None) + if csvformat[1] == 'S': + c1b.set_key('41') + else: + c1b.set_key(41) + self.assertEquals(0, c1b.search()) + self.session.join(jc, c1b, 'compare=lt' + args1) + + # Iterate, and make sure that reset allows us to iterate again. + self.iter_common(jc) + + jc.close() + c1a.close() + c1b.close() + c0.close() + self.session.drop('table:join03') + + # Test joins using CSV fields that are interpreted as different types + # to make sure all the extractor plumbing used in joins is working. + def test_join(self): + for extraargs in [ '', ',strategy=bloom,count=1000' ]: + for csvformat in [ 'SS', 'ii', 'Si', 'iS' ]: + self.join(csvformat, '', extraargs) + + +if __name__ == '__main__': + wttest.run() |