3 files changed, 780 insertions, 0 deletions
diff --git a/test/suite/test_join01.py b/test/suite/test_join01.py
new file mode 100644
index 00000000000..58a135f3bcd
--- /dev/null
+++ b/test/suite/test_join01.py
@@ -0,0 +1,332 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2015 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+
+# test_join01.py
+#    Join operations
+# Basic tests for join
+class test_join01(wttest.WiredTigerTestCase):
+    table_name1 = 'test_join01'
+    nentries = 100
+
+    scenarios = [
+        ('table', dict(ref='table')),
+        ('index', dict(ref='index'))
+    ]
+
+    def gen_key(self, i):
+        return [ i + 1 ]
+
+    def gen_values(self, i):
+        s = str(i)
+        rs = s[::-1]
+        sort3 = (self.nentries * (i % 3)) + i    # multiples of 3 sort first
+        return [s, rs, sort3]
+
+    # Common function for testing iteration of join cursors
+    def iter_common(self, jc, do_proj):
+        # See comments in join_common()
+        expect = [73, 82, 62, 83, 92]
+        while jc.next() == 0:
+            [k] = jc.get_keys()
+            i = k - 1
+            if do_proj:  # our projection test simply reverses the values
+                [v2,v1,v0] = jc.get_values()
+            else:
+                [v0,v1,v2] = jc.get_values()
+            self.assertEquals(self.gen_values(i), [v0,v1,v2])
+            if len(expect) == 0 or i != expect[0]:
+                self.tty('  result ' + str(i) + ' is not in: ' + str(expect))
+            self.assertTrue(i == expect[0])
+            expect.remove(i)
+        self.assertEquals(0, len(expect))
+
+    # Common function for testing the most basic functionality
+    # of joins
+    def join_common(self, joincfg0, joincfg1, do_proj):
+        #self.tty('join_common(' + joincfg0 + ',' + joincfg1 + ',' +
+        #         str(do_proj) + ')')
+        self.session.create('table:join01', 'key_format=r' +
+                            ',value_format=SSi,columns=(k,v0,v1,v2)')
+        self.session.create('index:join01:index0','columns=(v0)')
+        self.session.create('index:join01:index1','columns=(v1)')
+        self.session.create('index:join01:index2','columns=(v2)')
+
+        c = self.session.open_cursor('table:join01', None, None)
+        for i in range(0, self.nentries):
+            c.set_key(*self.gen_key(i))
+            c.set_value(*self.gen_values(i))
+            c.insert()
+        c.close()
+
+        if do_proj:
+            proj_suffix = '(v2,v1,v0)'  # Reversed values
+        else:
+            proj_suffix = ''            # Default projection (v0,v1,v2)
+
+        # We join on index2 first, not using bloom indices.
+        # This defines the order that items are returned.
+        # index2 is sorts multiples of 3 first (see gen_values())
+        # and by using 'gt' and key 99, we'll skip multiples of 3,
+        # and examine primary keys 2,5,8,...,95,98,1,4,7,...,94,97.
+        jc = self.session.open_cursor('join:table:join01' + proj_suffix,
+                                      None, None)
+        c2 = self.session.open_cursor('index:join01:index2', None, None)
+        c2.set_key(99)   # skips all entries w/ primary key divisible by three
+        self.assertEquals(0, c2.search())
+        self.session.join(jc, c2, 'compare=gt')
+
+        # Then select all the numbers 0-99 whose string representation
+        # sort >= '60'.
+        if self.ref == 'index':
+            c0 = self.session.open_cursor('index:join01:index0', None, None)
+            c0.set_key('60')
+        else:
+            c0 = self.session.open_cursor('table:join01', None, None)
+            c0.set_key(60)
+        self.assertEquals(0, c0.search())
+        self.session.join(jc, c0, 'compare=ge' + joincfg0)
+
+        # Then select all numbers whose reverse string representation
+        # is in '20' < x < '40'.
+        c1a = self.session.open_cursor('index:join01:index1', None, None)
+        c1a.set_key('21')
+        self.assertEquals(0, c1a.search())
+        self.session.join(jc, c1a, 'compare=gt' + joincfg1)
+
+        c1b = self.session.open_cursor('index:join01:index1', None, None)
+        c1b.set_key('41')
+        self.assertEquals(0, c1b.search())
+        self.session.join(jc, c1b, 'compare=lt' + joincfg1)
+
+        # Numbers that satisfy these 3 conditions (with ordering implied by c2):
+        #    [73, 82, 62, 83, 92].
+        #
+        # After iterating, we should be able to reset and iterate again.
+        self.iter_common(jc, do_proj)
+        jc.reset()
+        self.iter_common(jc, do_proj)
+        jc.reset()
+        self.iter_common(jc, do_proj)
+
+        jc.close()
+        c2.close()
+        c1a.close()
+        c1b.close()
+        c0.close()
+        self.session.drop('table:join01')
+
+    # Test joins with basic functionality
+    def test_join(self):
+        bloomcfg1000 = ',strategy=bloom,count=1000'
+        bloomcfg10000 = ',strategy=bloom,count=10000'
+        for cfga in [ '', bloomcfg1000, bloomcfg10000 ]:
+            for cfgb in [ '', bloomcfg1000, bloomcfg10000 ]:
+                for do_proj in [ False, True ]:
+                    #self.tty('cfga=' + cfga +
+                    #         ', cfgb=' + cfgb +
+                    #         ', doproj=' + str(do_proj))
+                    self.join_common(cfga, cfgb, do_proj)
+
+    def test_join_errors(self):
+        self.session.create('table:join01', 'key_format=r,value_format=SS'
+                            ',columns=(k,v0,v1)')
+        self.session.create('table:join01B', 'key_format=r,value_format=SS'
+                            ',columns=(k,v0,v1)')
+        self.session.create('index:join01:index0','columns=(v0)')
+        self.session.create('index:join01:index1','columns=(v1)')
+        self.session.create('index:join01B:index0','columns=(v0)')
+        jc = self.session.open_cursor('join:table:join01', None, None)
+        tc = self.session.open_cursor('table:join01', None, None)
+        fc = self.session.open_cursor('file:join01.wt', None, None)
+        ic0 = self.session.open_cursor('index:join01:index0', None, None)
+        ic0again = self.session.open_cursor('index:join01:index0', None, None)
+        ic1 = self.session.open_cursor('index:join01:index1', None, None)
+        icB = self.session.open_cursor('index:join01B:index0', None, None)
+        tcB = self.session.open_cursor('table:join01B', None, None)
+
+        tc.set_key(1)
+        tc.set_value('val1', 'val1')
+        tc.insert()
+        tcB.set_key(1)
+        tcB.set_value('val1', 'val1')
+        tcB.insert()
+        fc.next()
+
+        # Joining using a non join-cursor
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.join(tc, ic0, 'compare=ge'),
+            '/not a join cursor/')
+        # Joining a table cursor, not index
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.join(jc, fc, 'compare=ge'),
+            '/not an index or table cursor/')
+        # Joining a non positioned cursor
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.join(jc, ic0, 'compare=ge'),
+            '/requires key be set/')
+
+        # minimally position the cursors now
+        ic0.next()
+        ic0again.next()
+        icB.next()
+
+        # Joining non matching index
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.join(jc, icB, 'compare=ge'),
+            '/table for join cursor does not match/')
+
+        # The cursor must be positioned
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.join(jc, ic1, 'compare=ge'),
+            '/requires key be set/')
+        ic1.next()
+
+        # The first cursor joined cannot be bloom
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.join(jc, ic1,
+            'compare=ge,strategy=bloom,count=1000'),
+            '/first joined cursor cannot specify strategy=bloom/')
+
+        # This succeeds.
+        self.session.join(jc, ic1, 'compare=ge'),
+
+        # With bloom filters, a count is required
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.join(jc, ic0, 'compare=ge,strategy=bloom'),
+            '/count must be nonzero/')
+
+        # This succeeds.
+        self.session.join(jc, ic0, 'compare=ge,strategy=bloom,count=1000'),
+
+        bloom_config = ',strategy=bloom,count=1000'
+        # Cannot use the same index cursor
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.join(jc, ic0,
+            'compare=le' + bloom_config),
+            '/index cursor already used in a join/')
+
+        # When joining with the same index, need compatible compares
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.join(jc, ic0again, 'compare=ge' + bloom_config),
+            '/join has overlapping ranges/')
+
+        # Another incompatible compare
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.join(jc, ic0again, 'compare=gt' + bloom_config),
+            '/join has overlapping ranges/')
+
+        # Compare is compatible, but bloom args need to match
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.join(jc, ic0again, 'compare=le'),
+            '/join has incompatible strategy/')
+
+        # Counts need to match for bloom filters
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.join(jc, ic0again, 'compare=le,strategy=bloom,'
+            'count=100'), '/count.* does not match previous count/')
+
+        # This succeeds
+        self.session.join(jc, ic0again, 'compare=le,strategy=bloom,count=1000')
+
+        # Need to do initial next() before getting key/values
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: jc.get_keys(),
+            '/join cursor must be advanced with next/')
+
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: jc.get_values(),
+            '/join cursor must be advanced with next/')
+
+        # Operations on the joined cursor are frozen until the join is closed.
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: ic0.next(),
+            '/index cursor is being used in a join/')
+
+        # Operations on the joined cursor are frozen until the join is closed.
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: ic0.prev(),
+            '/index cursor is being used in a join/')
+
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: ic0.reset(),
+            '/index cursor is being used in a join/')
+
+        # Only a small number of operations allowed on a join cursor
+        self.assertRaises(wiredtiger.WiredTigerError,
+            lambda: jc.search())
+
+        self.assertRaises(wiredtiger.WiredTigerError,
+            lambda: jc.prev())
+
+        self.assertEquals(jc.next(), 0)
+        self.assertEquals(jc.next(), wiredtiger.WT_NOTFOUND)
+
+        # Only after the join cursor is closed can we use the index cursor
+        # normally
+        jc.close()
+        self.assertEquals(ic0.next(), wiredtiger.WT_NOTFOUND)
+        self.assertEquals(ic0.prev(), 0)
+
+    # common code for making sure that cursors can be
+    # implicitly closed, no matter the order they are created
+    def cursor_close_common(self, joinfirst):
+        self.session.create('table:join01', 'key_format=r' +
+                            ',value_format=SS,columns=(k,v0,v1)')
+        self.session.create('index:join01:index0','columns=(v0)')
+        self.session.create('index:join01:index1','columns=(v1)')
+        c = self.session.open_cursor('table:join01', None, None)
+        for i in range(0, self.nentries):
+            c.set_key(*self.gen_key(i))
+            c.set_value(*self.gen_values(i))
+            c.insert()
+        c.close()
+
+        if joinfirst:
+            jc = self.session.open_cursor('join:table:join01', None, None)
+        c0 = self.session.open_cursor('index:join01:index0', None, None)
+        c1 = self.session.open_cursor('index:join01:index1', None, None)
+        c0.next()        # index cursors must be positioned
+        c1.next()
+        if not joinfirst:
+            jc = self.session.open_cursor('join:table:join01', None, None)
+        self.session.join(jc, c0, 'compare=ge')
+        self.session.join(jc, c1, 'compare=ge')
+        self.session.close()
+        self.session = None
+
+    def test_cursor_close1(self):
+        self.cursor_close_common(True)
+
+    def test_cursor_close2(self):
+        self.cursor_close_common(False)
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/test/suite/test_join02.py b/test/suite/test_join02.py
new file mode 100644
index 00000000000..16e1fff56df
--- /dev/null
+++ b/test/suite/test_join02.py
@@ -0,0 +1,290 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2015 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest, suite_random
+from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+
+# test_join02.py
+#    Join operations
+# Join several indices together, trying all comparison combinations
+class test_join02(wttest.WiredTigerTestCase):
+    table_name1 = 'test_join02'
+    nentries = 1000
+
+    keyscen = [
+        ('key-r', dict(keyformat='r')),
+        ('key-S', dict(keyformat='S')),
+        ('key-i', dict(keyformat='i')),
+        ('key-iS', dict(keyformat='iS'))
+    ]
+
+    bloomscen = [
+        ('bloom', dict(usebloom=True)),
+        ('nobloom', dict(usebloom=False))
+    ]
+
+    scenarios = number_scenarios(multiply_scenarios('.', keyscen, bloomscen))
+
+    # Start our range from 1, since WT record numbers start at 1,
+    # it makes things work out nicer.
+    def range(self):
+        return range(1, self.nentries + 1)
+
+    def gen_key(self, i):
+        if self.keyformat == 'S':
+            return [ 'key{:06}'.format(i) ]  # zero pad so it sorts expectedly
+        elif self.keyformat == 'iS':
+            return [ i, 'key{:06}'.format(i) ]
+        else:
+            return [ i ]
+
+    def gen_values(self, i):
+        s = str(i)
+        x = 'x' * i
+        rs = s[::-1]
+        f = int(s[0:1])
+        return [i, s, x, rs, f]
+
+    def reinit_joinconfig(self):
+        self.rand = suite_random.suite_random(self.seed)
+        self.seed += 1
+
+    def get_joinconfig(self):
+        # When we're running the bloom scenario, make it so the
+        # bloom filters are often shared. Make the number of
+        # hashes and number of bits per item so they don't always
+        # match up; WT should allow it.
+        if self.usebloom:
+            c = 10000 if (self.rand.rand32() % 3) != 0 else 100000
+            k = 8 if (self.rand.rand32() % 10) != 0 else 10
+            b = 16 if (self.rand.rand32() % 11) != 0 else 12
+            return \
+                ',strategy=bloom,count=' + str(c) + \
+                ',bloom_bit_count=' + str(b) + \
+                ',bloom_hash_count=' + str(k)
+        else:
+            return ''
+
+    def do_join(self, jc, curleft, curright, choice, mbr):
+        c0 = choice[0]
+        if c0 == None:
+            return mbr
+        # The first join cannot use a bloom filter
+        if jc.first_join:
+            joinconfig = ''
+            jc.first_join = False
+        else:
+            joinconfig = self.get_joinconfig()
+        if c0 != None:
+            #self.tty('join(jc, ' + curleft.name + ' ' + c0 +
+            #         ' ' + str(curleft.low) + ')')
+            curleft.reset()
+            curleft.set_key(*curleft.low)
+            self.assertEquals(0, curleft.search())
+            self.session.join(jc, curleft, 'compare=' + c0 + joinconfig)
+            if c0 == 'eq':
+                mbr = mbr.intersection(curleft.eqmembers)
+            elif c0 == 'ge':
+                mbr = mbr.intersection(
+                    set(curleft.eqmembers.union(curleft.gtmembers)))
+            elif c0 == 'gt':
+                mbr = mbr.intersection(curleft.gtmembers)
+        c1 = choice[1] if len(choice) > 1 else None
+        if c1 != None:
+            #self.tty('join(jc, ' + curright.name + ' ' + c1 +
+            #         ' ' + str(curright.high) + ')')
+            curright.reset()
+            curright.set_key(*curright.high)
+            self.assertEquals(0, curright.search())
+            self.session.join(jc, curright, 'compare=' + c1 + joinconfig)
+            if c1 == 'le':
+                mbr = mbr.intersection(
+                    set(curright.eqmembers.union(curright.ltmembers)))
+            elif c1 == 'lt':
+                mbr = mbr.intersection(curright.ltmembers)
+        return mbr
+
+    def iterate(self, jc, mbr):
+        #self.tty('iteration expects ' + str(len(mbr)) +
+        #         ' entries: ' + str(mbr))
+        while jc.next() == 0:
+            keys = jc.get_keys()
+            [v0,v1,v2,v3,v4] = jc.get_values()
+            k0 = keys[0]
+            k1 = keys[1] if len(keys) > 1 else None
+            if self.keyformat == 'S':
+                i = int(str(k0[3:]))
+            elif self.keyformat == 'iS':
+                i = k0
+                self.assertEquals(i, int(str(k1[3:])))
+            else:
+                i = k0
+            #self.tty('  iteration got key: ' + str(k0) + ',' + str(k1))
+            #self.tty('  iteration got values: ' + str([v0,v1,v2,v3,v4]))
+            #self.tty('  iteration expects values: ' + str(self.gen_values(i)))
+            self.assertEquals(self.gen_values(i), [v0,v1,v2,v3,v4])
+            if not i in mbr:
+                self.tty('  result ' + str(i) + ' is not in: ' + str(mbr))
+            self.assertTrue(i in mbr)
+            mbr.remove(i)
+        self.assertEquals(0, len(mbr))
+
+    def mkmbr(self, expr):
+        return frozenset([x for x in self.range() if expr(x)])
+
+    def test_basic_join(self):
+        self.seed = 1
+        if self.keyformat == 'iS':
+            keycols = 'k0,k1'
+        else:
+            keycols = 'k'
+        self.session.create('table:join02', 'key_format=' + self.keyformat +
+                            ',value_format=iSuSi,columns=(' + keycols +
+                            ',v0,v1,v2,v3,v4)')
+        self.session.create('index:join02:index0','columns=(v0)')
+        self.session.create('index:join02:index1','columns=(v1)')
+        self.session.create('index:join02:index2','columns=(v2)')
+        self.session.create('index:join02:index3','columns=(v3)')
+        self.session.create('index:join02:index4','columns=(v4)')
+        c = self.session.open_cursor('table:join02', None, None)
+        for i in self.range():
+            c.set_key(*self.gen_key(i))
+            c.set_value(*self.gen_values(i))
+            c.insert()
+        c.close()
+
+        # Use the primary table in one of the joins.
+        c0a = self.session.open_cursor('table:join02', None, None)
+        c0b = self.session.open_cursor('table:join02', None, None)
+        c1a = self.session.open_cursor('index:join02:index1', None, None)
+        c1b = self.session.open_cursor('index:join02:index1', None, None)
+        c2a = self.session.open_cursor('index:join02:index2', None, None)
+        c2b = self.session.open_cursor('index:join02:index2', None, None)
+        c3a = self.session.open_cursor('index:join02:index3', None, None)
+        c3b = self.session.open_cursor('index:join02:index3', None, None)
+        c4a = self.session.open_cursor('index:join02:index4', None, None)
+
+        # Attach extra properties to each cursor.  For cursors that
+        # may appear on the 'left' side of a range CA < x < CB,
+        # we give a low value of the range, and calculate the members
+        # of the set we expect to see for a 'gt' comparison, as well
+        # as the 'eq' comparison.  For cursors that appear on the
+        # 'right side of the range, we give a high value of the range,
+        # and calculate membership sets for 'lt' and 'eq'.
+        #
+        # We've defined the low/high values so that there's a lot of
+        # overlap between the values when we're doing ranges.
+        c0a.name = 'c0a'
+        c0b.name = 'c0b'
+        if self.keyformat == 'i' or self.keyformat == 'r':
+            c0a.low = [ 205 ]
+            c0b.high = [ 990 ]
+        elif self.keyformat == 'S':
+            c0a.low = [ 'key000205' ]
+            c0b.high = [ 'key000990' ]
+        elif self.keyformat == 'iS':
+            c0a.low = [ 205, 'key000205' ]
+            c0b.high = [ 990, 'key000990' ]
+        c0a.gtmembers = self.mkmbr(lambda x: x > 205)
+        c0a.eqmembers = self.mkmbr(lambda x: x == 205)
+        c0b.ltmembers = self.mkmbr(lambda x: x < 990)
+        c0b.eqmembers = self.mkmbr(lambda x: x == 990)
+
+        c1a.low = [ '150' ]
+        c1a.gtmembers = self.mkmbr(lambda x: str(x) > '150')
+        c1a.eqmembers = self.mkmbr(lambda x: str(x) == '150')
+        c1a.name = 'c1a'
+        c1b.high = [ '733' ]
+        c1b.ltmembers = self.mkmbr(lambda x: str(x) < '733')
+        c1b.eqmembers = self.mkmbr(lambda x: str(x) == '733')
+        c1b.name = 'c1b'
+
+        c2a.low = [ 'x' * 321 ]
+        c2a.gtmembers = self.mkmbr(lambda x: x > 321)
+        c2a.eqmembers = self.mkmbr(lambda x: x == 321)
+        c2a.name = 'c2a'
+        c2b.high = [ 'x' * 765 ]
+        c2b.ltmembers = self.mkmbr(lambda x: x < 765)
+        c2b.eqmembers = self.mkmbr(lambda x: x == 765)
+        c2b.name = 'c2b'
+
+        c3a.low = [ '432' ]
+        c3a.gtmembers = self.mkmbr(lambda x: str(x)[::-1] > '432')
+        c3a.eqmembers = self.mkmbr(lambda x: str(x)[::-1] == '432')
+        c3a.name = 'c3a'
+        c3b.high = [ '876' ]
+        c3b.ltmembers = self.mkmbr(lambda x: str(x)[::-1] < '876')
+        c3b.eqmembers = self.mkmbr(lambda x: str(x)[::-1] == '876')
+        c3b.name = 'c3b'
+
+        c4a.low = [ 4 ]
+        c4a.gtmembers = self.mkmbr(lambda x: str(x)[0:1] > '4')
+        c4a.eqmembers = self.mkmbr(lambda x: str(x)[0:1] == '4')
+        c4a.name = 'c4a'
+
+        choices = [[None], ['eq'], ['ge'], ['gt'], [None, 'le'], [None, 'lt'],
+                   ['ge', 'le' ], ['ge', 'lt' ], ['gt', 'le' ], ['gt', 'lt' ]]
+        smallchoices = [[None], ['eq'], ['ge'], ['gt', 'le' ]]
+        for i0 in smallchoices:
+            for i1 in choices:
+                for i2 in smallchoices:
+                    for i3 in smallchoices:
+                        for i4 in [[None], ['eq'], ['ge'], ['gt']]:
+                            if i0[0] == None and i1[0] == None and \
+                               i2[0] == None and i3[0] == None and \
+                               i4[0] == None:
+                                continue
+                            self.reinit_joinconfig()
+                            #self.tty('Begin test: ' +
+                            #         ','.join([str(i0),str(i1),str(i2),
+                            #                   str(i3),str(i4)]))
+                            jc = self.session.open_cursor('join:table:join02',
+                                                          None, None)
+                            jc.first_join = True
+                            mbr = set(self.range())
+
+                            # It shouldn't matter the order of the joins
+                            mbr = self.do_join(jc, c3a, c3b, i3, mbr)
+                            mbr = self.do_join(jc, c2a, c2b, i2, mbr)
+                            mbr = self.do_join(jc, c4a, None, i4, mbr)
+                            mbr = self.do_join(jc, c1a, c1b, i1, mbr)
+                            mbr = self.do_join(jc, c0a, c0b, i0, mbr)
+                            self.iterate(jc, mbr)
+                            jc.close()
+        c0a.close()
+        c0b.close()
+        c1a.close()
+        c1b.close()
+        c2a.close()
+        c2b.close()
+        c3a.close()
+        c3b.close()
+        c4a.close()
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/test/suite/test_join03.py b/test/suite/test_join03.py
new file mode 100644
index 00000000000..552e3b41748
--- /dev/null
+++ b/test/suite/test_join03.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2015 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import os
+import wiredtiger, wttest, run
+from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+
+# test_join03.py
+#    Join operations
+# Joins with a custom extractor
+class test_join03(wttest.WiredTigerTestCase):
+    table_name1 = 'test_join03'
+    nentries = 100
+
+    # Return the wiredtiger_open extension argument for a shared library.
+    def extensionArg(self, exts):
+        extfiles = []
+        for ext in exts:
+            (dirname, name, libname) = ext
+            if name != None and name != 'none':
+                testdir = os.path.dirname(__file__)
+                extdir = os.path.join(run.wt_builddir, 'ext', dirname)
+                extfile = os.path.join(
+                    extdir, name, '.libs', 'libwiredtiger_' + libname + '.so')
+                if not os.path.exists(extfile):
+                    self.skipTest('extension "' + extfile + '" not built')
+                if not extfile in extfiles:
+                    extfiles.append(extfile)
+        if len(extfiles) == 0:
+            return ''
+        else:
+            return ',extensions=["' + '","'.join(extfiles) + '"]'
+
+    # Override WiredTigerTestCase, we have extensions.
+    def setUpConnectionOpen(self, dir):
+        extarg = self.extensionArg([('extractors', 'csv', 'csv_extractor')])
+        connarg = 'create,error_prefix="{0}: ",{1}'.format(
+            self.shortid(), extarg)
+        conn = wiredtiger.wiredtiger_open(dir, connarg)
+        self.pr(`conn`)
+        return conn
+
+    def gen_key(self, i):
+        return [ i + 1 ]
+
+    def gen_values(self, i):
+        s = str(i)
+        rs = s[::-1].lstrip('0')
+        return [ s + ',' + rs ]
+
+    # Common function for testing iteration of join cursors
+    def iter_common(self, jc):
+        mbr = set([62, 63, 72, 73, 82, 83, 92, 93])
+        while jc.next() == 0:
+            [k] = jc.get_keys()
+            i = k - 1
+            [v] = jc.get_values()
+            self.assertEquals(self.gen_values(i), [v])
+            if not i in mbr:
+                self.tty('  result ' + str(i) + ' is not in: ' + str(mbr))
+            self.assertTrue(i in mbr)
+            mbr.remove(i)
+        self.assertEquals(0, len(mbr))
+
+    # Common function for testing the most basic functionality
+    # of joins
+    def join(self, csvformat, args0, args1):
+        self.session.create('table:join03', 'key_format=r' +
+                            ',value_format=S,columns=(k,v)')
+        fmt = csvformat[0]
+        self.session.create('index:join03:index0','key_format=' + fmt + ',' +
+                            'extractor=csv,app_metadata={"format" : "' +
+                            fmt + '","field" : "0"}')
+        fmt = csvformat[1]
+        self.session.create('index:join03:index1','key_format=' + fmt + ',' +
+                            'extractor=csv,app_metadata={"format" : "' +
+                            fmt + '","field" : "1"}')
+
+        c = self.session.open_cursor('table:join03', None, None)
+        for i in range(0, self.nentries):
+            c.set_key(*self.gen_key(i))
+            c.set_value(*self.gen_values(i))
+            c.insert()
+        c.close()
+
+        jc = self.session.open_cursor('join:table:join03', None, None)
+
+        # All the numbers 0-99 whose string representation
+        # sort >= '60' and whose reverse string representation
+        # is in '20' < x < '40'.  That is: [62, 63, 72, 73, 82, 83, 92, 93]
+        c0 = self.session.open_cursor('index:join03:index0', None, None)
+        if csvformat[0] == 'S':
+            c0.set_key('60')
+        else:
+            c0.set_key(60)
+        self.assertEquals(0, c0.search())
+        self.session.join(jc, c0, 'compare=ge' + args0)
+
+        c1a = self.session.open_cursor('index:join03:index1', None, None)
+        if csvformat[1] == 'S':
+            c1a.set_key('21')
+        else:
+            c1a.set_key(21)
+        self.assertEquals(0, c1a.search())
+        self.session.join(jc, c1a, 'compare=gt' + args1)
+
+        c1b = self.session.open_cursor('index:join03:index1', None, None)
+        if csvformat[1] == 'S':
+            c1b.set_key('41')
+        else:
+            c1b.set_key(41)
+        self.assertEquals(0, c1b.search())
+        self.session.join(jc, c1b, 'compare=lt' + args1)
+
+        # Iterate, and make sure that reset allows us to iterate again.
+        self.iter_common(jc)
+
+        jc.close()
+        c1a.close()
+        c1b.close()
+        c0.close()
+        self.session.drop('table:join03')
+
+    # Test joins using CSV fields that are interpreted as different types
+    # to make sure all the extractor plumbing used in joins is working.
+    def test_join(self):
+        for extraargs in [ '', ',strategy=bloom,count=1000' ]:
+            for csvformat in [ 'SS', 'ii', 'Si', 'iS' ]:
+                self.join(csvformat, '', extraargs)
+
+
+if __name__ == '__main__':
+    wttest.run()