2 files changed, 659 insertions, 0 deletions
diff --git a/bench/env.__setitem__.py b/bench/env.__setitem__.py
new file mode 100644
index 00000000..3826176f
--- /dev/null
+++ b/bench/env.__setitem__.py
@@ -0,0 +1,362 @@
+# __COPYRIGHT__
+#
+# Benchmarks for testing various possible implementations of the
+# env.__setitem__() method(s) in the src/engine/SCons/Environment.py
+# module.
+
+import os.path
+import re
+import string
+import sys
+import timeit
+
+# Utility Timing class and function from:
+# ASPN: Python Cookbook : Timing various python statements
+# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/544297
+#
+# These wrap the basic timeit function to make it a little more
+# convenient to do side-by-side tests of code.
+
+class Timing:
+    def __init__(self, name, num, init, statement):
+        self.__timer = timeit.Timer(statement, init)
+        self.__num   = num
+        self.name    = name
+        self.statement = statement
+        self.__result  = None
+        
+    def timeit(self):
+        self.__result = self.__timer.timeit(self.__num)
+        
+    def getResult(self):
+        return self.__result
+
+def times(num=1000000, init='', title='Results:', **statements):
+    # time each statement
+    timings = []
+    for n, s in statements.items():
+        t = Timing(n, num, init, s)
+        t.timeit()
+        timings.append(t)
+    
+    print
+    print title
+    l = []
+    for i in timings: l.append((i.getResult(),i.name))
+    l.sort()
+    for i in l: print "  %9.3f s   %s" % i
+
+# Import the necessary local SCons.* modules used by some of our
+# alternative implementations below, first manipulating sys.path so
+# we pull in the right local modules without forcing the user to set
+# PYTHONPATH.
+
+import __main__
+try:
+    filename = __main__.__file__
+except AttributeError:
+    filename = sys.argv[0]
+script_dir = os.path.split(filename)[0]
+if script_dir:
+    script_dir = script_dir + '/'
+sys.path = [os.path.abspath(script_dir + '../src/engine')] + sys.path
+
+import SCons.Errors
+import SCons.Environment
+
+is_valid_construction_var = SCons.Environment.is_valid_construction_var
+global_valid_var = re.compile(r'[_a-zA-Z]\w*$')
+
+# The classes with different __setitem__() implementations that we're
+# going to horse-race.
+#
+# The base class (Environment) should contain *all* class initialization
+# of anything that will be used by any of the competing sub-class
+# implementations.  Each timing run will create an instance of the class,
+# and all competing sub-classes should share the same initialization
+# overhead so our timing focuses on just the __setitem__() performance.
+#
+# All subclasses should be prefixed with env_, in which case they'll be
+# picked up automatically by the code below for testing.
+#
+# The env_Original subclass contains the original implementation (which
+# actually had the is_valid_construction_var() function in SCons.Util
+# originally).
+#
+# The other subclasses (except for env_Best) each contain *one*
+# significant change from the env_Original implementation.  The doc string
+# describes the change, and is what gets displayed in the final timing.
+# The doc strings of these other subclasses are "grouped" informally
+# by a prefix that kind of indicates what specific aspect of __setitem__()
+# is being varied and tested.
+#
+# The env_Best subclass contains the "best practices" from each of
+# the different "groups" of techniques tested in the other subclasses,
+# and is where to experiment with different combinations of techniques.
+# After we're done should be the one that shows up at the top of the
+# list as we run our timings.
+
+class Environment:
+    _special_set = {
+        'BUILDERS' : None,
+        'SCANNERS' : None,
+        'TARGET'   : None,
+        'TARGETS'  : None,
+        'SOURCE'   : None,
+        'SOURCES'  : None,
+    }
+    _special_set_keys = _special_set.keys()
+    _valid_var = re.compile(r'[_a-zA-Z]\w*$')
+    def __init__(self, **kw):
+        self._dict = kw
+
+class env_Original(Environment):
+    """Original __setitem__()"""
+    def __setitem__(self, key, value):
+        special = self._special_set.get(key)
+        if special:
+            special(self, key, value)
+        else:
+            if not SCons.Environment.is_valid_construction_var(key):
+                raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+            self._dict[key] = value
+
+class env_Global_is_valid(Environment):
+    """is_valid_construction_var():  use a global function"""
+    def __setitem__(self, key, value):
+        special = self._special_set.get(key)
+        if special:
+            special(self, key, value)
+        else:
+            if not is_valid_construction_var(key):
+                raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+            self._dict[key] = value
+
+class env_Method_is_valid(Environment):
+    """is_valid_construction_var():  use a method"""
+    def is_valid_construction_var(self, varstr):
+        """Return if the specified string is a legitimate construction
+        variable.
+        """
+        return self._valid_var.match(varstr)
+
+    def __setitem__(self, key, value):
+        special = self._special_set.get(key)
+        if special:
+            special(self, key, value)
+        else:
+            if not self.is_valid_construction_var(key):
+                raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+            self._dict[key] = value
+
+class env_regex_attribute_is_valid(Environment):
+    """is_valid_construction_var():  use a regex attribute"""
+    def __setitem__(self, key, value):
+        special = self._special_set.get(key)
+        if special:
+            special(self, key, value)
+        else:
+            if not self._valid_var.match(key):
+                raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+            self._dict[key] = value
+
+class env_global_regex_is_valid(Environment):
+    """is_valid_construction_var():  use a global regex"""
+    def __setitem__(self, key, value):
+        special = self._special_set.get(key)
+        if special:
+            special(self, key, value)
+        else:
+            if not global_valid_var.match(key):
+                raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+            self._dict[key] = value
+
+class env_special_set_has_key(Environment):
+    """_special_set.get():  use _special_set.has_key() instead"""
+    def __setitem__(self, key, value):
+        if self._special_set.has_key(key):
+            self._special_set[key](self, key, value)
+        else:
+            if not SCons.Environment.is_valid_construction_var(key):
+                raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+            self._dict[key] = value
+
+class env_key_in_tuple(Environment):
+    """_special_set.get():  use "key in tuple" instead"""
+    def __setitem__(self, key, value):
+        if key in ('BUILDERS', 'SCANNERS', 'TARGET', 'TARGETS', 'SOURCE', 'SOURCES'):
+            self._special_set[key](self, key, value)
+        else:
+            if not SCons.Environment.is_valid_construction_var(key):
+                raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+            self._dict[key] = value
+
+class env_key_in_list(Environment):
+    """_special_set.get():  use "key in list" instead"""
+    def __setitem__(self, key, value):
+        if key in ['BUILDERS', 'SCANNERS', 'TARGET', 'TARGETS', 'SOURCE', 'SOURCES']:
+            self._special_set[key](self, key, value)
+        else:
+            if not SCons.Environment.is_valid_construction_var(key):
+                raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+            self._dict[key] = value
+
+class env_key_in_attribute(Environment):
+    """_special_set.get():  use "key in attribute" instead"""
+    def __setitem__(self, key, value):
+        if key in self._special_set_keys:
+            self._special_set[key](self, key, value)
+        else:
+            if not SCons.Environment.is_valid_construction_var(key):
+                raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+            self._dict[key] = value
+
+class env_try_except(Environment):
+    """avoid is_valid_construction_var():  use try:-except:"""
+    def __setitem__(self, key, value):
+        special = self._special_set.get(key)
+        if special:
+            special(self, key, value)
+        else:
+            try:
+                self._dict[key]
+            except KeyError:
+                if not SCons.Environment.is_valid_construction_var(key):
+                    raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+            self._dict[key] = value
+
+class env_not_has_key(Environment):
+    """avoid is_valid_construction_var():  use not .has_key()"""
+    def __setitem__(self, key, value):
+        special = self._special_set.get(key)
+        if special:
+            special(self, key, value)
+        else:
+            if not self._dict.has_key(key) \
+                and not SCons.Environment.is_valid_construction_var(key):
+                    raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+            self._dict[key] = value
+
+class env_Best_attribute(Environment):
+    """Best __setitem__(), with an attribute"""
+    def __setitem__(self, key, value):
+        if key in self._special_set_keys:
+            self._special_set[key](self, key, value)
+        else:
+            if not self._dict.has_key(key) \
+               and not global_valid_var.match(key):
+                    raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+            self._dict[key] = value
+
+class env_Best_has_key(Environment):
+    """Best __setitem__(), with has_key"""
+    def __setitem__(self, key, value):
+        if self._special_set.has_key(key):
+            self._special_set[key](self, key, value)
+        else:
+            if not self._dict.has_key(key) \
+               and not global_valid_var.match(key):
+                    raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+            self._dict[key] = value
+
+class env_Best_list(Environment):
+    """Best __setitem__(), with a list"""
+    def __setitem__(self, key, value):
+        if key in ['BUILDERS', 'SCANNERS', 'TARGET', 'TARGETS', 'SOURCE', 'SOURCES']:
+            self._special_set[key](self, key, value)
+        else:
+            if not self._dict.has_key(key) \
+               and not global_valid_var.match(key):
+                    raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+            self._dict[key] = value
+
+try:
+    ''.isalnum
+except AttributeError:
+    pass
+else:
+    class env_isalnum(Environment):
+        """Greg's Folly: isalnum instead of probe"""
+        def __setitem__(self, key, value):
+            if self._special_set.has_key(key):
+                self._special_set[key](self, key, value)
+            else:
+                if not key.isalnum() and not global_valid_var.match(key):
+                    raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+                self._dict[key] = value
+
+# We'll use the names of all the env_* classes we find later to build
+# the dictionary of statements to be timed, and the import statement
+# that the timer will use to get at these classes.
+
+class_names = []
+for n in locals().keys():
+    #if n.startswith('env_'):
+    if n[:4] == 'env_':
+        class_names.append(n)
+
+# This is *the* function that gets timed.  It will get called for the
+# specified number of iterations for the cross product of the number of
+# classes we're testing and the number of data sets (defined below).
+
+iterations = 10000
+
+def do_it(names, env_class):
+    e = env_class()
+    for key in names:
+        e[key] = 1
+
+# Build the list of "statements" that will be tested.  For each class
+# we're testing, the doc string describing the class is the key, and
+# the statement we test is a simple "doit(names, {class})" call.
+
+statements = {}
+
+for class_name in class_names:
+    ec = eval(class_name)
+    statements[ec.__doc__] = 'do_it(names, %s)' % class_name
+
+# The common_imports string is used in the initialization of each
+# test run.  The timeit module insulates the test snippets from the
+# global namespace, so we have to import these explicitly from __main__.
+
+common_import_variables = ['do_it'] + class_names
+
+common_imports = """
+from __main__ import %s
+""" % string.join(common_import_variables, ', ')
+
+# The test data (lists of variable names) that we'll use for the runs.
+
+same_variable_names = ['XXX'] * 100
+uniq_variable_names = []
+for i in range(100): uniq_variable_names.append('X%05d' % i)
+mixed_variable_names = uniq_variable_names[:50] + same_variable_names[:50]
+
+# Lastly, put it all together...
+
+def run_it(title, init):
+      s = statements.copy()
+      s['num'] = iterations
+      s['title'] = title
+      s['init'] = init
+      apply(times,(),s)
+
+print 'Environment __setitem__ benchmark using',
+print 'Python', string.split(sys.version)[0],
+print 'on', sys.platform, os.name
+
+run_it('Results for re-adding an existing variable name 100 times:',
+      common_imports + """
+import __main__ ; names = __main__.same_variable_names
+""")
+
+run_it('Results for adding 100 variable names, 50 existing and 50 new:',
+      common_imports + """
+import __main__ ; names = __main__.mixed_variable_names
+""")
+
+run_it('Results for adding 100 new, unique variable names:',
+      common_imports + """
+import __main__ ; names = __main__.uniq_variable_names
+""")
diff --git a/bench/timeit.py b/bench/timeit.py
new file mode 100644
index 00000000..d5e33bb1
--- /dev/null
+++ b/bench/timeit.py
@@ -0,0 +1,297 @@
+#! /usr/bin/env python
+
+"""Tool for measuring execution time of small code snippets.
+
+This module avoids a number of common traps for measuring execution
+times.  See also Tim Peters' introduction to the Algorithms chapter in
+the Python Cookbook, published by O'Reilly.
+
+Library usage: see the Timer class.
+
+Command line usage:
+    python timeit.py [-n N] [-r N] [-s S] [-t] [-c] [-h] [statement]
+
+Options:
+  -n/--number N: how many times to execute 'statement' (default: see below)
+  -r/--repeat N: how many times to repeat the timer (default 3)
+  -s/--setup S: statement to be executed once initially (default 'pass')
+  -t/--time: use time.time() (default on Unix)
+  -c/--clock: use time.clock() (default on Windows)
+  -v/--verbose: print raw timing results; repeat for more digits precision
+  -h/--help: print this usage message and exit
+  statement: statement to be timed (default 'pass')
+
+A multi-line statement may be given by specifying each line as a
+separate argument; indented lines are possible by enclosing an
+argument in quotes and using leading spaces.  Multiple -s options are
+treated similarly.
+
+If -n is not given, a suitable number of loops is calculated by trying
+successive powers of 10 until the total time is at least 0.2 seconds.
+
+The difference in default timer function is because on Windows,
+clock() has microsecond granularity but time()'s granularity is 1/60th
+of a second; on Unix, clock() has 1/100th of a second granularity and
+time() is much more precise.  On either platform, the default timer
+functions measure wall clock time, not the CPU time.  This means that
+other processes running on the same computer may interfere with the
+timing.  The best thing to do when accurate timing is necessary is to
+repeat the timing a few times and use the best time.  The -r option is
+good for this; the default of 3 repetitions is probably enough in most
+cases.  On Unix, you can use clock() to measure CPU time.
+
+Note: there is a certain baseline overhead associated with executing a
+pass statement.  The code here doesn't try to hide it, but you should
+be aware of it.  The baseline overhead can be measured by invoking the
+program without arguments.
+
+The baseline overhead differs between Python versions!  Also, to
+fairly compare older Python versions to Python 2.3, you may want to
+use python -O for the older versions to avoid timing SET_LINENO
+instructions.
+"""
+
+try:
+    import gc
+except ImportError:
+    class _fake_gc:
+        def isenabled(self):
+            return None
+        def enable(self):
+            pass
+        def disable(self):
+            pass
+    gc = _fake_gc()
+import sys
+import time
+try:
+    import itertools
+except ImportError:
+    # Must be an older Python version (see timeit() below)
+    itertools = None
+
+import string
+
+__all__ = ["Timer"]
+
+dummy_src_name = "<timeit-src>"
+default_number = 1000000
+default_repeat = 3
+
+if sys.platform == "win32":
+    # On Windows, the best timer is time.clock()
+    default_timer = time.clock
+else:
+    # On most other platforms the best timer is time.time()
+    default_timer = time.time
+
+# Don't change the indentation of the template; the reindent() calls
+# in Timer.__init__() depend on setup being indented 4 spaces and stmt
+# being indented 8 spaces.
+template = """
+def inner(_it, _timer):
+    %(setup)s
+    _t0 = _timer()
+    for _i in _it:
+        %(stmt)s
+    _t1 = _timer()
+    return _t1 - _t0
+"""
+
+def reindent(src, indent):
+    """Helper to reindent a multi-line statement."""
+    return string.replace(src, "\n", "\n" + " "*indent)
+
+class Timer:
+    """Class for timing execution speed of small code snippets.
+
+    The constructor takes a statement to be timed, an additional
+    statement used for setup, and a timer function.  Both statements
+    default to 'pass'; the timer function is platform-dependent (see
+    module doc string).
+
+    To measure the execution time of the first statement, use the
+    timeit() method.  The repeat() method is a convenience to call
+    timeit() multiple times and return a list of results.
+
+    The statements may contain newlines, as long as they don't contain
+    multi-line string literals.
+    """
+
+    def __init__(self, stmt="pass", setup="pass", timer=default_timer):
+        """Constructor.  See class doc string."""
+        self.timer = timer
+        stmt = reindent(stmt, 8)
+        setup = reindent(setup, 4)
+        src = template % {'stmt': stmt, 'setup': setup}
+        self.src = src # Save for traceback display
+        code = compile(src, dummy_src_name, "exec")
+        ns = {}
+        exec code in globals(), ns
+        self.inner = ns["inner"]
+
+    def print_exc(self, file=None):
+        """Helper to print a traceback from the timed code.
+
+        Typical use:
+
+            t = Timer(...)       # outside the try/except
+            try:
+                t.timeit(...)    # or t.repeat(...)
+            except:
+                t.print_exc()
+
+        The advantage over the standard traceback is that source lines
+        in the compiled template will be displayed.
+
+        The optional file argument directs where the traceback is
+        sent; it defaults to sys.stderr.
+        """
+        import linecache, traceback
+        linecache.cache[dummy_src_name] = (len(self.src),
+                                           None,
+                                           self.src.split("\n"),
+                                           dummy_src_name)
+        traceback.print_exc(file=file)
+
+    def timeit(self, number=default_number):
+        """Time 'number' executions of the main statement.
+
+        To be precise, this executes the setup statement once, and
+        then returns the time it takes to execute the main statement
+        a number of times, as a float measured in seconds.  The
+        argument is the number of times through the loop, defaulting
+        to one million.  The main statement, the setup statement and
+        the timer function to be used are passed to the constructor.
+        """
+        if itertools:
+            it = itertools.repeat(None, number)
+        else:
+            it = [None] * number
+        gcold = gc.isenabled()
+        gc.disable()
+        timing = self.inner(it, self.timer)
+        if gcold:
+            gc.enable()
+        return timing
+
+    def repeat(self, repeat=default_repeat, number=default_number):
+        """Call timeit() a few times.
+
+        This is a convenience function that calls the timeit()
+        repeatedly, returning a list of results.  The first argument
+        specifies how many times to call timeit(), defaulting to 3;
+        the second argument specifies the timer argument, defaulting
+        to one million.
+
+        Note: it's tempting to calculate mean and standard deviation
+        from the result vector and report these.  However, this is not
+        very useful.  In a typical case, the lowest value gives a
+        lower bound for how fast your machine can run the given code
+        snippet; higher values in the result vector are typically not
+        caused by variability in Python's speed, but by other
+        processes interfering with your timing accuracy.  So the min()
+        of the result is probably the only number you should be
+        interested in.  After that, you should look at the entire
+        vector and apply common sense rather than statistics.
+        """
+        r = []
+        for i in range(repeat):
+            t = self.timeit(number)
+            r.append(t)
+        return r
+
+def main(args=None):
+    """Main program, used when run as a script.
+
+    The optional argument specifies the command line to be parsed,
+    defaulting to sys.argv[1:].
+
+    The return value is an exit code to be passed to sys.exit(); it
+    may be None to indicate success.
+
+    When an exception happens during timing, a traceback is printed to
+    stderr and the return value is 1.  Exceptions at other times
+    (including the template compilation) are not caught.
+    """
+    if args is None:
+        args = sys.argv[1:]
+    import getopt
+    try:
+        opts, args = getopt.getopt(args, "n:s:r:tcvh",
+                                   ["number=", "setup=", "repeat=",
+                                    "time", "clock", "verbose", "help"])
+    except getopt.error, err:
+        print err
+        print "use -h/--help for command line help"
+        return 2
+    timer = default_timer
+    stmt = string.join(args, "\n") or "pass"
+    number = 0 # auto-determine
+    setup = []
+    repeat = default_repeat
+    verbose = 0
+    precision = 3
+    for o, a in opts:
+        if o in ("-n", "--number"):
+            number = int(a)
+        if o in ("-s", "--setup"):
+            setup.append(a)
+        if o in ("-r", "--repeat"):
+            repeat = int(a)
+            if repeat <= 0:
+                repeat = 1
+        if o in ("-t", "--time"):
+            timer = time.time
+        if o in ("-c", "--clock"):
+            timer = time.clock
+        if o in ("-v", "--verbose"):
+            if verbose:
+                precision = precision + 1
+            verbose = precision + 1
+        if o in ("-h", "--help"):
+            print __doc__,
+            return 0
+    setup = string.join(setup, "\n") or "pass"
+    # Include the current directory, so that local imports work (sys.path
+    # contains the directory of this script, rather than the current
+    # directory)
+    import os
+    sys.path.insert(0, os.curdir)
+    t = Timer(stmt, setup, timer)
+    if number == 0:
+        # determine number so that 0.2 <= total time < 2.0
+        for i in range(1, 10):
+            number = 10**i
+            try:
+                x = t.timeit(number)
+            except:
+                t.print_exc()
+                return 1
+            if verbose:
+                print "%d loops -> %.*g secs" % (number, precision, x)
+            if x >= 0.2:
+                break
+    try:
+        r = t.repeat(repeat, number)
+    except:
+        t.print_exc()
+        return 1
+    best = min(r)
+    if verbose:
+        print "raw times:", string.join(map(lambda x, p=precision: "%.*g" % (p, x), r))
+    print "%d loops," % number,
+    usec = best * 1e6 / number
+    if usec < 1000:
+        print "best of %d: %.*g usec per loop" % (repeat, precision, usec)
+    else:
+        msec = usec / 1000
+        if msec < 1000:
+            print "best of %d: %.*g msec per loop" % (repeat, precision, msec)
+        else:
+            sec = msec / 1000
+            print "best of %d: %.*g sec per loop" % (repeat, precision, sec)
+    return None
+
+if __name__ == "__main__":
+    sys.exit(main())