summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorDon Anderson <dda@ddanderson.com>2011-12-09 14:13:45 -0500
committerDon Anderson <dda@ddanderson.com>2011-12-09 14:13:45 -0500
commit339d2ecd1a61a570d2b01ed12d04ee750a4111dc (patch)
treed08dabcb5dbb925109f6a95e9b8104af94a50893 /test
parentee127b1cf331319e90f6f7de4d20514b5c37224f (diff)
downloadmongo-339d2ecd1a61a570d2b01ed12d04ee750a4111dc.tar.gz
Added test_schema03, which sets up complex pseudo-randomly
generated schemas and pumps data into them. refs #16.
Diffstat (limited to 'test')
-rw-r--r--test/suite/test_schema03.py538
-rw-r--r--test/suite/wtscenario.py11
-rw-r--r--test/suite/wttest.py3
3 files changed, 552 insertions, 0 deletions
diff --git a/test/suite/test_schema03.py b/test/suite/test_schema03.py
new file mode 100644
index 00000000000..0c552f60f90
--- /dev/null
+++ b/test/suite/test_schema03.py
@@ -0,0 +1,538 @@
+#!/usr/bin/env python
+#
+# See the file LICENSE for redistribution information.
+#
+# Copyright (c) 2008-2011 WiredTiger, Inc.
+# All rights reserved.
+#
+# test_schema03.py
+# Bigger, more 'randomly generated' schemas and data.
+# This test is complex. If it fails, rerun with
+# modified values for SHOW_PYTHON* variables.
+#
+
+import unittest
+import wiredtiger
+from wiredtiger import WiredTigerError
+import wttest
+import wtscenario
+import suite_random
+import resource
+
+def extract_random_from_list(rand, list):
+ pos = rand.rand_range(0, len(list))
+ result = list[pos]
+ list = list[:pos] + list[pos+1:]
+ return (result, list)
+
+class tabconfig:
+ """
+ Configuration for a table used in the test
+ """
+ def __init__(self):
+ self.tableidx = -1
+ self.tablename = ''
+ self.cglist = [] # list of related cgconfig
+ self.idxlist = [] # list of related idxconfig
+ self.nkeys = 0 # how many key columns
+ self.nvalues = 0 # how many value columns
+ self.nentries = 0
+ self.keyformats = ''
+ self.valueformats = ''
+
+ # we don't want to insert the keys in order,
+ # so generate them with backwards digits e.g.
+ # 235 => 532. However, 100 backwards is 001,
+ # so we append a positive integer to the end
+ # before reversing.
+ def gen_keys(self, i):
+ addmod = i * 10 + (i % 7) + 1
+ rev = int((str(addmod))[::-1])
+ keys = []
+ # ASSUME: each format is 1 char
+ for format in self.keyformats:
+ if format == 'S':
+ keys.append(str(rev))
+ elif format == 'i':
+ keys.append(rev)
+ return keys
+
+ def gen_values(self, i):
+ vals = []
+ # ASSUME: each format is 1 char
+ for format in self.valueformats:
+ if format == 'S':
+ vals.append(str(i))
+ elif format == 'i':
+ vals.append(i)
+ return vals
+
+ def columns_for_groups(self, collist):
+ totalgroups = len(self.cglist)
+ ncolumns = len(collist)
+ rand = suite_random.suite_random(ncolumns, totalgroups)
+
+ # Each columngroup must have at least one column, so
+ # the only choice about distribution is with the
+ # excess columns.
+ excess = ncolumns - totalgroups
+ if excess < 0:
+ raise ValueError('columns_for_groups expects a column list (len=' + str(ncolumns) + ') larger than column group list (len=' + str(totalgroups) + ')')
+
+ # Initially, all groups get column from the collist
+ for cg in self.cglist:
+ (colno, collist) = extract_random_from_list(rand, collist)
+ cg.columns.append(colno)
+
+ # Then divy up remainder in the collist
+ for i in range(0, excess):
+ pos = rand.rand_range(0, totalgroups)
+ cg = self.cglist[pos]
+ (colno, collist) = extract_random_from_list(rand, collist)
+ cg.columns.append(colno)
+
+ # collist should be emptied
+ if len(collist) != 0:
+ raise AssertionError('column list did not get emptied')
+
+ def columns_for_indices(self, collist):
+ totalindices = len(self.idxlist)
+ ncolumns = len(collist)
+ startcol = 0
+
+ # KNOWN LIMITATION: Indices should not include primary keys
+ # Remove this statement when the limitation is fixed.
+ startcol = self.nkeys
+ # END KNOWN LIMITATION.
+
+ rand = suite_random.suite_random(ncolumns, totalindices)
+
+ # Initially, all indices get one column from the collist.
+ # Overlaps are allowed. Then probalistically, add some
+ # more columns.
+ for idx in self.idxlist:
+ prob = 1.0
+ for i in range(0, ncolumns - startcol - 1):
+ if rand.rand_float() > prob:
+ break
+ colno = collist[rand.rand_range(startcol, ncolumns)]
+ if not any(x == colno for x in idx.columns):
+ idx.columns.append(colno)
+ if colno < self.nkeys:
+ # ASSUME: each format is 1 char
+ idx.formats += self.keyformats[colno]
+ else:
+ # ASSUME: each format is 1 char
+ idx.formats += self.valueformats[colno - self.nkeys]
+ prob *= 0.5
+
+
+class cgconfig:
+ """
+ Configuration for a column group used in the test.
+ Each tabconfig contains a list of these.
+ """
+ def __init__(self):
+ self.cgname = ''
+ self.columns = []
+ self.createset = 0 # 0 or 1 depending on which set to create them.
+
+class idxconfig:
+ """
+ Configuration for an index used in the test.
+ Each tabconfig contains a list of these.
+ """
+ def __init__(self):
+ self.idxname = ''
+ self.columns = []
+ self.createset = 0 # 0 or 1 depending on which set to create them.
+ self.formats = '' # piece
+ self.tab = None # references the tabconfig
+
+ def gen_keys(self, i):
+ keys = []
+ colpos = 0
+ addmod = i * 10 + (i % 7) + 1
+ rev = int((str(addmod))[::-1])
+ for format in self.formats:
+ if self.columns[colpos] >= self.tab.nkeys:
+ # The column is a value in the primary table
+ key = i
+ else:
+ # The column is a key in the primary table
+ key = rev
+ if format == 'S':
+ key = str(key)
+ keys.append(key)
+ colpos += 1
+ return keys
+
+class test_schema03(wttest.WiredTigerTestCase):
+ """
+ Test schemas - a 'predictably random' assortment of columns,
+ column groups and indices are created within tables, and are
+ created in various orders as much as the API allows. On some runs
+ the connection will be closed and reopened at a particular point
+ to test that the schemas (and data) are saved and read correctly.
+
+ The test is run multiple times, using scenarios.
+ The test always follows these steps:
+ - table: create tables
+ - colgroup0: create (some) colgroups
+ - index0: create (some) indices
+ - colgroup1: create (more) colgroups
+ - index1: create (more) indices
+ - populate0: populate 1st time
+ - index2: create (more) indices
+ - populate1: populate 2nd time (more key/values)
+ - check: check key/values
+
+ The variations represented by scenarios are:
+ - how many tables to create
+ - how many colgroups to create at each step (may be 0)
+ - how many indices to create at each step (may be 0)
+ - between each step, whether to close/reopen the connection
+ """
+
+ ################################################################
+ # These three variables can be altered to help generate
+ # and pare down failing test cases.
+
+ # Set to true to get python test program fragment on stdout,
+ # used by show_python() below.
+ SHOW_PYTHON = False
+
+ # When SHOW_PYTHON is set, we print an enormous amount of output.
+ # To only print for a given scenario, set this
+ SHOW_PYTHON_ONLY_SCEN = None # could be e.g. [2] or [0,1]
+
+ # To print verbosely for only a given table, set this
+ SHOW_PYTHON_ONLY_TABLE = None # could be e.g. [2] or [0,1]
+
+ ################################################################
+
+ # Set whenever we are working with a table
+ current_table = None
+
+#TODO: nentries = 1000
+ nentries = 15
+
+ # We need to have a large number of open files available
+ # to run this test. We probably don't need quite this many,
+ # but boost it up to this limit anyway.
+ OPEN_FILE_LIMIT = 4096
+
+ restart_scenarios = [('table', dict(s_restart=['table'],P=0.3)),
+ ('colgroup0', dict(s_restart=['colgroup0'],P=0.3)),
+ ('index0', dict(s_restart=['index0'],P=0.3)),
+ ('colgroup1', dict(s_restart=['colgroup1'],P=0.3)),
+ ('index1', dict(s_restart=['index1'],P=0.3)),
+ ('populate0', dict(s_restart=['populate0'],P=0.3)),
+ ('index2', dict(s_restart=['index2'],P=0.3)),
+ ('populate1', dict(s_restart=['populate1'],P=0.3)),
+ ('ipop', dict(s_restart=['index0','populate0'],P=0.3)),
+ ('all', dict(s_restart=['table','colgroup0','index0','colgroup1','index1','populate0','index2','populate1'],P=1.0))]
+
+ ntable_scenarios = wtscenario.quick_scenarios('s_ntable',
+ [1,2,7,43], [1.0,0.4,0.5,1.0])
+ ncolgroup_scenarios = wtscenario.quick_scenarios('s_colgroup',
+ [[1,0],[0,1],[2,4],[18,5]], [1.0,0.2,0.3,1.0])
+ nindex_scenarios = wtscenario.quick_scenarios('s_index',
+ [[1,1,1],[3,2,4],[15,7,3]], [1.0,0.5,1.0])
+
+ all_scenarios = wtscenario.multiply_scenarios('_', restart_scenarios, ntable_scenarios, ncolgroup_scenarios, nindex_scenarios)
+
+ scenarios = wtscenario.prune_scenarios(all_scenarios)
+ scenarios = wtscenario.number_scenarios(scenarios)
+ print 'test_schema03: running ' + str(len(scenarios)) + ' scenarios'
+
+ # TODO: reduce the set to one for initial debugging
+# scenarios = [ scenarios[0], scenarios[30], scenarios[40], scenarios[50] ]
+ scenarios = [ scenarios[40] ]
+# scenarios = [ scenarios[30] ]
+# scenarios = [ scenarios[0] ]
+
+ # This test requires a large number of open files.
+ # Increase our resource limits before we start
+ def setUp(self):
+ super(test_schema03, self).setUp()
+ self.origFileLimit = resource.getrlimit(resource.RLIMIT_NOFILE)
+ newlimit = (self.OPEN_FILE_LIMIT, self.origFileLimit[1])
+ resource.setrlimit(resource.RLIMIT_NOFILE, newlimit)
+
+ def setUpConnectionOpen(self, dir):
+ cs = 10 * 1024 * 1024
+ conn = wiredtiger.wiredtiger_open(dir, 'create,cache_size=' +
+ str(cs) + ',hazard_max=100')
+ self.pr(`conn`)
+ return conn
+
+ def tearDown(self):
+ super(test_schema03, self).tearDown()
+ resource.setrlimit(resource.RLIMIT_NOFILE, self.origFileLimit)
+
+ def gen_formats(self, rand, n, iskey):
+ if iskey and n == 1:
+ if rand.rand_range(0, 2) == 0:
+ return 'r' # record number
+ result = ''
+ for i in range(0, n):
+ if rand.rand_range(0, 2) == 0:
+ result += 'S'
+ else:
+ result += 'i'
+ return result
+
+ def show_python(self, s):
+ if self.SHOW_PYTHON:
+ if self.SHOW_PYTHON_ONLY_TABLE == None or self.current_table in self.SHOW_PYTHON_ONLY_TABLE:
+ if self.SHOW_PYTHON_ONLY_SCEN == None or self.scenario_number in self.SHOW_PYTHON_ONLY_SCEN:
+ print ' ' + s
+
+ def join_names(self, sep, prefix, list):
+ return sep.join([prefix + str(val) for val in list])
+
+ def create(self, what, tablename, whatname, columnlist):
+ createarg = what + ":" + tablename + ":" + whatname
+ colarg = self.join_names(',', 'c', columnlist)
+ self.show_python("self.session.create('" + createarg + "', 'columns=(" + colarg + ")')")
+ result = self.session.create(createarg, "columns=(" + colarg + ")")
+ self.assertEqual(result, 0)
+
+ def finished_step(self, name):
+ if self.s_restart == name:
+ print " # Reopening connection at step: " + name
+ self.reopen_conn()
+
+ def test_schema(self):
+ rand = suite_random.suite_random()
+ if self.SHOW_PYTHON:
+ print ' ################################################'
+ print ' # Running scenario ' + str(self.scenario_number)
+
+ ntables = self.s_ntable
+
+ # Report known limitations in the test,
+ # we'll work around these later, in a loop where we don't want to print.
+ self.KNOWN_LIMITATION('Indices created after data population will have no entries')
+ self.KNOWN_LIMITATION('Column groups created after indices confuses things')
+ self.KNOWN_LIMITATION('Indices should not include primary keys')
+
+ # Column groups are created in two different times.
+ # We call these two batches 'createsets'.
+ # So we don't have the exactly the same number of column groups
+ # for each table, for tests that indicate >1 colgroup, we
+ # increase the number of column groups for each table
+ tabconfigs = []
+ for i in range(0, ntables):
+ self.current_table = i
+ tc = tabconfig()
+ tc.tablename = 't' + str(i)
+ tc.tableidx = i
+ tabconfigs.append(tc)
+
+ for createset in range(0, 2):
+ ncg = self.s_colgroup[createset]
+ if ncg > 1:
+ ncg += i
+ for k in range(0, ncg):
+ thiscg = cgconfig()
+ thiscg.createset = createset
+
+ # KNOWN LIMITATION: Column groups created after
+ # indices confuses things. So for now, put all
+ # column group creation in the first set.
+ # Remove this statement when the limitation is fixed.
+ thiscg.createset = 0
+ # END KNOWN LIMITATION
+
+ thiscg.cgname = 'g' + str(len(tc.cglist))
+ tc.cglist.append(thiscg)
+
+ # The same idea for indices, except that we create them in
+ # three sets
+ for createset in range(0, 3):
+ nindex = self.s_index[createset]
+ if nindex > 1:
+ nindex += i
+ for k in range(0, nindex):
+ thisidx = idxconfig()
+ thisidx.createset = createset
+ thisidx.idxname = 'i' + str(len(tc.idxlist))
+ thisidx.tab = tc
+ tc.idxlist.append(thisidx)
+
+ # We'll base the number of key/value columns
+ # loosely on the number of column groups and indices.
+
+ colgroups = len(tc.cglist)
+ indices = len(tc.idxlist)
+ nall = colgroups * 2 + indices
+ k = rand.rand_range(1, nall)
+ v = rand.rand_range(0, nall)
+ # we need at least one value per column group
+ if v < colgroups:
+ v = colgroups
+ tc.nkeys = k
+ tc.nvalues = v
+ tc.keyformats = self.gen_formats(rand, tc.nkeys, True)
+ tc.valueformats = self.gen_formats(rand, tc.nvalues, False)
+
+ # Simple naming (we'll test odd naming elsewhere):
+ # tables named 't0' --> 't<N>'
+ # within each table:
+ # columns named 'c0' --> 'c<N>'
+ # colgroups named 'g0' --> 'g<N>'
+ # indices named 'i0' --> 'i<N>'
+
+ config = "";
+ config += "key_format=" + tc.keyformats
+ config += ",value_format=" + tc.valueformats
+ config += ",columns=("
+ for j in range(0, tc.nkeys + tc.nvalues):
+ if j != 0:
+ config += ","
+ config += "c" + str(j)
+ config += "),colgroups=("
+ for j in range(0, len(tc.cglist)):
+ if j != 0:
+ config += ","
+ config += "g" + str(j)
+ config += ")"
+ # indices are not declared here
+ self.show_python("self.session.create('table:" + tc.tablename + "', '" + config + "')")
+ self.session.create("table:" + tc.tablename, config)
+
+ tc.columns_for_groups(range(tc.nkeys, tc.nkeys + tc.nvalues))
+ tc.columns_for_indices(range(0, tc.nkeys + tc.nvalues))
+
+ self.finished_step('table')
+
+ for createset in (0, 1):
+ # Create column groups in this set
+ # e.g. self.session.create("colgroup:t0:g1", "columns=(c3,c4)")
+ for tc in tabconfigs:
+ self.current_table = tc.tableidx
+ for cg in tc.cglist:
+ if cg.createset == createset:
+ self.create('colgroup', tc.tablename, cg.cgname, cg.columns)
+
+ self.finished_step('colgroup' + str(createset))
+
+ # Create indices in this set
+ # e.g. self.session.create("index:t0:i1", "columns=(c3,c4)")
+ for tc in tabconfigs:
+ self.current_table = tc.tableidx
+ for idx in tc.idxlist:
+ if idx.createset == createset:
+ self.create('index', tc.tablename, idx.idxname, idx.columns)
+
+ self.finished_step('index' + str(createset))
+
+ # populate first batch
+ for tc in tabconfigs:
+ self.current_table = tc.tableidx
+ max = rand.rand_range(0, self.nentries)
+ self.populate(tc, xrange(0, max))
+
+ self.finished_step('populate0')
+
+#TODO
+ # Create indices in third set
+# for tc in tabconfigs:
+# for idx in tc.idxlist:
+# if idx.createset == 2:
+# self.create('index', tc.tablename, idx.idxname, idx.columns)
+
+ self.finished_step('index2')
+
+ # populate second batch
+ for tc in tabconfigs:
+ self.current_table = tc.tableidx
+ self.populate(tc, xrange(tc.nentries, self.nentries))
+
+ self.finished_step('populate1')
+
+ for tc in tabconfigs:
+ self.current_table = tc.tableidx
+ self.check_entries(tc)
+
+ def populate(self, tc, insertrange):
+ self.show_python("cursor = self.session.open_cursor('table:" + tc.tablename + "', None, None)")
+ cursor = self.session.open_cursor('table:' + tc.tablename, None, None)
+ for i in insertrange:
+ key = tc.gen_keys(i)
+ val = tc.gen_values(i)
+ self.show_python("cursor.set_key(*" + str(key) + ")")
+ cursor.set_key(*key)
+ self.show_python("cursor.set_value(*" + str(val) + ")")
+ cursor.set_value(*val)
+ self.show_python("cursor.insert()")
+ cursor.insert()
+ tc.nentries += 1
+ self.show_python("cursor.close()")
+ cursor.close()
+
+ def check_one(self, name, cursor, key, val):
+ keystr = str(key)
+ valstr = str(val)
+ self.show_python('# search[' + name + '](' + keystr + ')')
+ self.show_python("cursor.set_key(*" + keystr + ")")
+ cursor.set_key(*key)
+ self.show_python("ok = cursor.search()")
+ ok = cursor.search()
+ self.show_python("self.assertEqual(ok, 0)")
+ self.assertEqual(ok, 0)
+ self.show_python("self.assertEqual(" + keystr + ", cursor.get_keys())")
+ self.assertEqual(key, cursor.get_keys())
+ self.show_python("self.assertEqual(" + valstr + ", cursor.get_values())")
+ self.assertEqual(val, cursor.get_values())
+
+ def check_entries(self, tc):
+ """
+ Verify entries in the primary and index table
+ related to the tabconfig.
+ """
+ self.show_python('# check_entries: ' + tc.tablename)
+ self.show_python("cursor = self.session.open_cursor('table:" + tc.tablename + "', None, None)")
+ cursor = self.session.open_cursor('table:' + tc.tablename, None, None)
+ count = 0
+ for x in cursor:
+ count += 1
+ self.assertEqual(count, tc.nentries)
+ for i in range(0, tc.nentries):
+ key = tc.gen_keys(i)
+ val = tc.gen_values(i)
+ self.check_one(tc.tablename, cursor, key, val)
+ cursor.close()
+ self.show_python("cursor.close()")
+
+ # for each index, check each entry
+ for idx in tc.idxlist:
+ # KNOWN LIMITATION: Indices created after data population
+ # will have no entries, so don't bother with them here
+ # Remove these statements when the limitation is fixed.
+ if idx.createset == 2:
+ continue
+ # END KNOWN LIMITATION
+
+ # Although it's possible to open an index on some partial
+ # list of columns, we'll keep it simple here, and always
+ # use all columns.
+ full_idxname = 'index:' + tc.tablename + ':' + idx.idxname
+ cols = '(' + ','.join([('c' + str(x)) for x in range(tc.nkeys, tc.nvalues + tc.nkeys)]) + ')'
+ self.show_python('# check_entries: ' + full_idxname + cols)
+ self.show_python("cursor = self.session.open_cursor('" + full_idxname + cols + "', None, None)")
+ cursor = self.session.open_cursor(full_idxname + cols, None, None)
+ count = 0
+ for x in cursor:
+ count += 1
+ self.assertEqual(count, tc.nentries)
+ for i in range(0, tc.nentries):
+ key = idx.gen_keys(i)
+ val = tc.gen_values(i)
+ self.check_one(full_idxname, cursor, key, val)
+ cursor.close()
+ self.show_python("cursor.close()")
+if __name__ == '__main__':
+ wttest.run()
diff --git a/test/suite/wtscenario.py b/test/suite/wtscenario.py
index 727c7dc6c2c..77128d82b9e 100644
--- a/test/suite/wtscenario.py
+++ b/test/suite/wtscenario.py
@@ -87,6 +87,17 @@ def prune_scenarios(scenes):
result.append(scene)
return result
+def number_scenarios(scenes):
+ """
+ Add a 'scenario_number' variable to each scenario.
+ The hash table for each scenario is altered!
+ """
+ count = 0
+ for scene in scenes:
+ scene[1]['scenario_number'] = count
+ count += 1
+ return scenes
+
def quick_scenarios(fieldname, values, probabilities):
"""
Quickly build common scenarios, like:
diff --git a/test/suite/wttest.py b/test/suite/wttest.py
index 494c5c82017..fde06187721 100644
--- a/test/suite/wttest.py
+++ b/test/suite/wttest.py
@@ -120,6 +120,9 @@ class WiredTigerTestCase(unittest.TestCase):
print '**** THIS TEST HAS A KNOWN FAILURE: ' + name + ' ****'
self.skipTest('KNOWN FAILURE: ' + name)
+ def KNOWN_LIMITATION(self, name):
+ print '**** THIS TEST HAS A KNOWN LIMITATION: ' + name + ' ****'
+
def pr(self, s):
"""
print a progress line for testing