summaryrefslogtreecommitdiff
path: root/bench
diff options
context:
space:
mode:
Diffstat (limited to 'bench')
-rw-r--r--bench/env.__setitem__.py362
-rw-r--r--bench/timeit.py297
2 files changed, 659 insertions, 0 deletions
diff --git a/bench/env.__setitem__.py b/bench/env.__setitem__.py
new file mode 100644
index 00000000..3826176f
--- /dev/null
+++ b/bench/env.__setitem__.py
@@ -0,0 +1,362 @@
+# __COPYRIGHT__
+#
+# Benchmarks for testing various possible implementations of the
+# env.__setitem__() method(s) in the src/engine/SCons/Environment.py
+# module.
+
+import os.path
+import re
+import string
+import sys
+import timeit
+
+# Utility Timing class and function from:
+# ASPN: Python Cookbook : Timing various python statements
+# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/544297
+#
+# These wrap the basic timeit function to make it a little more
+# convenient to do side-by-side tests of code.
+
+class Timing:
+ def __init__(self, name, num, init, statement):
+ self.__timer = timeit.Timer(statement, init)
+ self.__num = num
+ self.name = name
+ self.statement = statement
+ self.__result = None
+
+ def timeit(self):
+ self.__result = self.__timer.timeit(self.__num)
+
+ def getResult(self):
+ return self.__result
+
+def times(num=1000000, init='', title='Results:', **statements):
+ # time each statement
+ timings = []
+ for n, s in statements.items():
+ t = Timing(n, num, init, s)
+ t.timeit()
+ timings.append(t)
+
+ print
+ print title
+ l = []
+ for i in timings: l.append((i.getResult(),i.name))
+ l.sort()
+ for i in l: print " %9.3f s %s" % i
+
+# Import the necessary local SCons.* modules used by some of our
+# alternative implementations below, first manipulating sys.path so
+# we pull in the right local modules without forcing the user to set
+# PYTHONPATH.
+
+import __main__
+try:
+ filename = __main__.__file__
+except AttributeError:
+ filename = sys.argv[0]
+script_dir = os.path.split(filename)[0]
+if script_dir:
+ script_dir = script_dir + '/'
+sys.path = [os.path.abspath(script_dir + '../src/engine')] + sys.path
+
+import SCons.Errors
+import SCons.Environment
+
+is_valid_construction_var = SCons.Environment.is_valid_construction_var
+global_valid_var = re.compile(r'[_a-zA-Z]\w*$')
+
+# The classes with different __setitem__() implementations that we're
+# going to horse-race.
+#
+# The base class (Environment) should contain *all* class initialization
+# of anything that will be used by any of the competing sub-class
+# implementations. Each timing run will create an instance of the class,
+# and all competing sub-classes should share the same initialization
+# overhead so our timing focuses on just the __setitem__() performance.
+#
+# All subclasses should be prefixed with env_, in which case they'll be
+# picked up automatically by the code below for testing.
+#
+# The env_Original subclass contains the original implementation (which
+# actually had the is_valid_construction_var() function in SCons.Util
+# originally).
+#
+# The other subclasses (except for env_Best) each contain *one*
+# significant change from the env_Original implementation. The doc string
+# describes the change, and is what gets displayed in the final timing.
+# The doc strings of these other subclasses are "grouped" informally
+# by a prefix that kind of indicates what specific aspect of __setitem__()
+# is being varied and tested.
+#
+# The env_Best subclass contains the "best practices" from each of
+# the different "groups" of techniques tested in the other subclasses,
+# and is where to experiment with different combinations of techniques.
+# After we're done should be the one that shows up at the top of the
+# list as we run our timings.
+
+class Environment:
+ _special_set = {
+ 'BUILDERS' : None,
+ 'SCANNERS' : None,
+ 'TARGET' : None,
+ 'TARGETS' : None,
+ 'SOURCE' : None,
+ 'SOURCES' : None,
+ }
+ _special_set_keys = _special_set.keys()
+ _valid_var = re.compile(r'[_a-zA-Z]\w*$')
+ def __init__(self, **kw):
+ self._dict = kw
+
+class env_Original(Environment):
+ """Original __setitem__()"""
+ def __setitem__(self, key, value):
+ special = self._special_set.get(key)
+ if special:
+ special(self, key, value)
+ else:
+ if not SCons.Environment.is_valid_construction_var(key):
+ raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+ self._dict[key] = value
+
+class env_Global_is_valid(Environment):
+ """is_valid_construction_var(): use a global function"""
+ def __setitem__(self, key, value):
+ special = self._special_set.get(key)
+ if special:
+ special(self, key, value)
+ else:
+ if not is_valid_construction_var(key):
+ raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+ self._dict[key] = value
+
+class env_Method_is_valid(Environment):
+ """is_valid_construction_var(): use a method"""
+ def is_valid_construction_var(self, varstr):
+ """Return if the specified string is a legitimate construction
+ variable.
+ """
+ return self._valid_var.match(varstr)
+
+ def __setitem__(self, key, value):
+ special = self._special_set.get(key)
+ if special:
+ special(self, key, value)
+ else:
+ if not self.is_valid_construction_var(key):
+ raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+ self._dict[key] = value
+
+class env_regex_attribute_is_valid(Environment):
+ """is_valid_construction_var(): use a regex attribute"""
+ def __setitem__(self, key, value):
+ special = self._special_set.get(key)
+ if special:
+ special(self, key, value)
+ else:
+ if not self._valid_var.match(key):
+ raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+ self._dict[key] = value
+
+class env_global_regex_is_valid(Environment):
+ """is_valid_construction_var(): use a global regex"""
+ def __setitem__(self, key, value):
+ special = self._special_set.get(key)
+ if special:
+ special(self, key, value)
+ else:
+ if not global_valid_var.match(key):
+ raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+ self._dict[key] = value
+
+class env_special_set_has_key(Environment):
+ """_special_set.get(): use _special_set.has_key() instead"""
+ def __setitem__(self, key, value):
+ if self._special_set.has_key(key):
+ self._special_set[key](self, key, value)
+ else:
+ if not SCons.Environment.is_valid_construction_var(key):
+ raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+ self._dict[key] = value
+
+class env_key_in_tuple(Environment):
+ """_special_set.get(): use "key in tuple" instead"""
+ def __setitem__(self, key, value):
+ if key in ('BUILDERS', 'SCANNERS', 'TARGET', 'TARGETS', 'SOURCE', 'SOURCES'):
+ self._special_set[key](self, key, value)
+ else:
+ if not SCons.Environment.is_valid_construction_var(key):
+ raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+ self._dict[key] = value
+
+class env_key_in_list(Environment):
+ """_special_set.get(): use "key in list" instead"""
+ def __setitem__(self, key, value):
+ if key in ['BUILDERS', 'SCANNERS', 'TARGET', 'TARGETS', 'SOURCE', 'SOURCES']:
+ self._special_set[key](self, key, value)
+ else:
+ if not SCons.Environment.is_valid_construction_var(key):
+ raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+ self._dict[key] = value
+
+class env_key_in_attribute(Environment):
+ """_special_set.get(): use "key in attribute" instead"""
+ def __setitem__(self, key, value):
+ if key in self._special_set_keys:
+ self._special_set[key](self, key, value)
+ else:
+ if not SCons.Environment.is_valid_construction_var(key):
+ raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+ self._dict[key] = value
+
+class env_try_except(Environment):
+ """avoid is_valid_construction_var(): use try:-except:"""
+ def __setitem__(self, key, value):
+ special = self._special_set.get(key)
+ if special:
+ special(self, key, value)
+ else:
+ try:
+ self._dict[key]
+ except KeyError:
+ if not SCons.Environment.is_valid_construction_var(key):
+ raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+ self._dict[key] = value
+
+class env_not_has_key(Environment):
+ """avoid is_valid_construction_var(): use not .has_key()"""
+ def __setitem__(self, key, value):
+ special = self._special_set.get(key)
+ if special:
+ special(self, key, value)
+ else:
+ if not self._dict.has_key(key) \
+ and not SCons.Environment.is_valid_construction_var(key):
+ raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+ self._dict[key] = value
+
+class env_Best_attribute(Environment):
+ """Best __setitem__(), with an attribute"""
+ def __setitem__(self, key, value):
+ if key in self._special_set_keys:
+ self._special_set[key](self, key, value)
+ else:
+ if not self._dict.has_key(key) \
+ and not global_valid_var.match(key):
+ raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+ self._dict[key] = value
+
+class env_Best_has_key(Environment):
+ """Best __setitem__(), with has_key"""
+ def __setitem__(self, key, value):
+ if self._special_set.has_key(key):
+ self._special_set[key](self, key, value)
+ else:
+ if not self._dict.has_key(key) \
+ and not global_valid_var.match(key):
+ raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+ self._dict[key] = value
+
+class env_Best_list(Environment):
+ """Best __setitem__(), with a list"""
+ def __setitem__(self, key, value):
+ if key in ['BUILDERS', 'SCANNERS', 'TARGET', 'TARGETS', 'SOURCE', 'SOURCES']:
+ self._special_set[key](self, key, value)
+ else:
+ if not self._dict.has_key(key) \
+ and not global_valid_var.match(key):
+ raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+ self._dict[key] = value
+
+try:
+ ''.isalnum
+except AttributeError:
+ pass
+else:
+ class env_isalnum(Environment):
+ """Greg's Folly: isalnum instead of probe"""
+ def __setitem__(self, key, value):
+ if self._special_set.has_key(key):
+ self._special_set[key](self, key, value)
+ else:
+ if not key.isalnum() and not global_valid_var.match(key):
+ raise SCons.Errors.UserError, "Illegal construction variable `%s'" % key
+ self._dict[key] = value
+
+# We'll use the names of all the env_* classes we find later to build
+# the dictionary of statements to be timed, and the import statement
+# that the timer will use to get at these classes.
+
+class_names = []
+for n in locals().keys():
+ #if n.startswith('env_'):
+ if n[:4] == 'env_':
+ class_names.append(n)
+
+# This is *the* function that gets timed. It will get called for the
+# specified number of iterations for the cross product of the number of
+# classes we're testing and the number of data sets (defined below).
+
+iterations = 10000
+
+def do_it(names, env_class):
+ e = env_class()
+ for key in names:
+ e[key] = 1
+
+# Build the list of "statements" that will be tested. For each class
+# we're testing, the doc string describing the class is the key, and
+# the statement we test is a simple "doit(names, {class})" call.
+
+statements = {}
+
+for class_name in class_names:
+ ec = eval(class_name)
+ statements[ec.__doc__] = 'do_it(names, %s)' % class_name
+
+# The common_imports string is used in the initialization of each
+# test run. The timeit module insulates the test snippets from the
+# global namespace, so we have to import these explicitly from __main__.
+
+common_import_variables = ['do_it'] + class_names
+
+common_imports = """
+from __main__ import %s
+""" % string.join(common_import_variables, ', ')
+
+# The test data (lists of variable names) that we'll use for the runs.
+
+same_variable_names = ['XXX'] * 100
+uniq_variable_names = []
+for i in range(100): uniq_variable_names.append('X%05d' % i)
+mixed_variable_names = uniq_variable_names[:50] + same_variable_names[:50]
+
+# Lastly, put it all together...
+
+def run_it(title, init):
+ s = statements.copy()
+ s['num'] = iterations
+ s['title'] = title
+ s['init'] = init
+ apply(times,(),s)
+
+print 'Environment __setitem__ benchmark using',
+print 'Python', string.split(sys.version)[0],
+print 'on', sys.platform, os.name
+
+run_it('Results for re-adding an existing variable name 100 times:',
+ common_imports + """
+import __main__ ; names = __main__.same_variable_names
+""")
+
+run_it('Results for adding 100 variable names, 50 existing and 50 new:',
+ common_imports + """
+import __main__ ; names = __main__.mixed_variable_names
+""")
+
+run_it('Results for adding 100 new, unique variable names:',
+ common_imports + """
+import __main__ ; names = __main__.uniq_variable_names
+""")
diff --git a/bench/timeit.py b/bench/timeit.py
new file mode 100644
index 00000000..d5e33bb1
--- /dev/null
+++ b/bench/timeit.py
@@ -0,0 +1,297 @@
+#! /usr/bin/env python
+
+"""Tool for measuring execution time of small code snippets.
+
+This module avoids a number of common traps for measuring execution
+times. See also Tim Peters' introduction to the Algorithms chapter in
+the Python Cookbook, published by O'Reilly.
+
+Library usage: see the Timer class.
+
+Command line usage:
+ python timeit.py [-n N] [-r N] [-s S] [-t] [-c] [-h] [statement]
+
+Options:
+ -n/--number N: how many times to execute 'statement' (default: see below)
+ -r/--repeat N: how many times to repeat the timer (default 3)
+ -s/--setup S: statement to be executed once initially (default 'pass')
+ -t/--time: use time.time() (default on Unix)
+ -c/--clock: use time.clock() (default on Windows)
+ -v/--verbose: print raw timing results; repeat for more digits precision
+ -h/--help: print this usage message and exit
+ statement: statement to be timed (default 'pass')
+
+A multi-line statement may be given by specifying each line as a
+separate argument; indented lines are possible by enclosing an
+argument in quotes and using leading spaces. Multiple -s options are
+treated similarly.
+
+If -n is not given, a suitable number of loops is calculated by trying
+successive powers of 10 until the total time is at least 0.2 seconds.
+
+The difference in default timer function is because on Windows,
+clock() has microsecond granularity but time()'s granularity is 1/60th
+of a second; on Unix, clock() has 1/100th of a second granularity and
+time() is much more precise. On either platform, the default timer
+functions measure wall clock time, not the CPU time. This means that
+other processes running on the same computer may interfere with the
+timing. The best thing to do when accurate timing is necessary is to
+repeat the timing a few times and use the best time. The -r option is
+good for this; the default of 3 repetitions is probably enough in most
+cases. On Unix, you can use clock() to measure CPU time.
+
+Note: there is a certain baseline overhead associated with executing a
+pass statement. The code here doesn't try to hide it, but you should
+be aware of it. The baseline overhead can be measured by invoking the
+program without arguments.
+
+The baseline overhead differs between Python versions! Also, to
+fairly compare older Python versions to Python 2.3, you may want to
+use python -O for the older versions to avoid timing SET_LINENO
+instructions.
+"""
+
+try:
+ import gc
+except ImportError:
+ class _fake_gc:
+ def isenabled(self):
+ return None
+ def enable(self):
+ pass
+ def disable(self):
+ pass
+ gc = _fake_gc()
+import sys
+import time
+try:
+ import itertools
+except ImportError:
+ # Must be an older Python version (see timeit() below)
+ itertools = None
+
+import string
+
+__all__ = ["Timer"]
+
+dummy_src_name = "<timeit-src>"
+default_number = 1000000
+default_repeat = 3
+
+if sys.platform == "win32":
+ # On Windows, the best timer is time.clock()
+ default_timer = time.clock
+else:
+ # On most other platforms the best timer is time.time()
+ default_timer = time.time
+
+# Don't change the indentation of the template; the reindent() calls
+# in Timer.__init__() depend on setup being indented 4 spaces and stmt
+# being indented 8 spaces.
+template = """
+def inner(_it, _timer):
+ %(setup)s
+ _t0 = _timer()
+ for _i in _it:
+ %(stmt)s
+ _t1 = _timer()
+ return _t1 - _t0
+"""
+
+def reindent(src, indent):
+ """Helper to reindent a multi-line statement."""
+ return string.replace(src, "\n", "\n" + " "*indent)
+
+class Timer:
+ """Class for timing execution speed of small code snippets.
+
+ The constructor takes a statement to be timed, an additional
+ statement used for setup, and a timer function. Both statements
+ default to 'pass'; the timer function is platform-dependent (see
+ module doc string).
+
+ To measure the execution time of the first statement, use the
+ timeit() method. The repeat() method is a convenience to call
+ timeit() multiple times and return a list of results.
+
+ The statements may contain newlines, as long as they don't contain
+ multi-line string literals.
+ """
+
+ def __init__(self, stmt="pass", setup="pass", timer=default_timer):
+ """Constructor. See class doc string."""
+ self.timer = timer
+ stmt = reindent(stmt, 8)
+ setup = reindent(setup, 4)
+ src = template % {'stmt': stmt, 'setup': setup}
+ self.src = src # Save for traceback display
+ code = compile(src, dummy_src_name, "exec")
+ ns = {}
+ exec code in globals(), ns
+ self.inner = ns["inner"]
+
+ def print_exc(self, file=None):
+ """Helper to print a traceback from the timed code.
+
+ Typical use:
+
+ t = Timer(...) # outside the try/except
+ try:
+ t.timeit(...) # or t.repeat(...)
+ except:
+ t.print_exc()
+
+ The advantage over the standard traceback is that source lines
+ in the compiled template will be displayed.
+
+ The optional file argument directs where the traceback is
+ sent; it defaults to sys.stderr.
+ """
+ import linecache, traceback
+ linecache.cache[dummy_src_name] = (len(self.src),
+ None,
+ self.src.split("\n"),
+ dummy_src_name)
+ traceback.print_exc(file=file)
+
+ def timeit(self, number=default_number):
+ """Time 'number' executions of the main statement.
+
+ To be precise, this executes the setup statement once, and
+ then returns the time it takes to execute the main statement
+ a number of times, as a float measured in seconds. The
+ argument is the number of times through the loop, defaulting
+ to one million. The main statement, the setup statement and
+ the timer function to be used are passed to the constructor.
+ """
+ if itertools:
+ it = itertools.repeat(None, number)
+ else:
+ it = [None] * number
+ gcold = gc.isenabled()
+ gc.disable()
+ timing = self.inner(it, self.timer)
+ if gcold:
+ gc.enable()
+ return timing
+
+ def repeat(self, repeat=default_repeat, number=default_number):
+ """Call timeit() a few times.
+
+ This is a convenience function that calls the timeit()
+ repeatedly, returning a list of results. The first argument
+ specifies how many times to call timeit(), defaulting to 3;
+ the second argument specifies the timer argument, defaulting
+ to one million.
+
+ Note: it's tempting to calculate mean and standard deviation
+ from the result vector and report these. However, this is not
+ very useful. In a typical case, the lowest value gives a
+ lower bound for how fast your machine can run the given code
+ snippet; higher values in the result vector are typically not
+ caused by variability in Python's speed, but by other
+ processes interfering with your timing accuracy. So the min()
+ of the result is probably the only number you should be
+ interested in. After that, you should look at the entire
+ vector and apply common sense rather than statistics.
+ """
+ r = []
+ for i in range(repeat):
+ t = self.timeit(number)
+ r.append(t)
+ return r
+
+def main(args=None):
+ """Main program, used when run as a script.
+
+ The optional argument specifies the command line to be parsed,
+ defaulting to sys.argv[1:].
+
+ The return value is an exit code to be passed to sys.exit(); it
+ may be None to indicate success.
+
+ When an exception happens during timing, a traceback is printed to
+ stderr and the return value is 1. Exceptions at other times
+ (including the template compilation) are not caught.
+ """
+ if args is None:
+ args = sys.argv[1:]
+ import getopt
+ try:
+ opts, args = getopt.getopt(args, "n:s:r:tcvh",
+ ["number=", "setup=", "repeat=",
+ "time", "clock", "verbose", "help"])
+ except getopt.error, err:
+ print err
+ print "use -h/--help for command line help"
+ return 2
+ timer = default_timer
+ stmt = string.join(args, "\n") or "pass"
+ number = 0 # auto-determine
+ setup = []
+ repeat = default_repeat
+ verbose = 0
+ precision = 3
+ for o, a in opts:
+ if o in ("-n", "--number"):
+ number = int(a)
+ if o in ("-s", "--setup"):
+ setup.append(a)
+ if o in ("-r", "--repeat"):
+ repeat = int(a)
+ if repeat <= 0:
+ repeat = 1
+ if o in ("-t", "--time"):
+ timer = time.time
+ if o in ("-c", "--clock"):
+ timer = time.clock
+ if o in ("-v", "--verbose"):
+ if verbose:
+ precision = precision + 1
+ verbose = precision + 1
+ if o in ("-h", "--help"):
+ print __doc__,
+ return 0
+ setup = string.join(setup, "\n") or "pass"
+ # Include the current directory, so that local imports work (sys.path
+ # contains the directory of this script, rather than the current
+ # directory)
+ import os
+ sys.path.insert(0, os.curdir)
+ t = Timer(stmt, setup, timer)
+ if number == 0:
+ # determine number so that 0.2 <= total time < 2.0
+ for i in range(1, 10):
+ number = 10**i
+ try:
+ x = t.timeit(number)
+ except:
+ t.print_exc()
+ return 1
+ if verbose:
+ print "%d loops -> %.*g secs" % (number, precision, x)
+ if x >= 0.2:
+ break
+ try:
+ r = t.repeat(repeat, number)
+ except:
+ t.print_exc()
+ return 1
+ best = min(r)
+ if verbose:
+ print "raw times:", string.join(map(lambda x, p=precision: "%.*g" % (p, x), r))
+ print "%d loops," % number,
+ usec = best * 1e6 / number
+ if usec < 1000:
+ print "best of %d: %.*g usec per loop" % (repeat, precision, usec)
+ else:
+ msec = usec / 1000
+ if msec < 1000:
+ print "best of %d: %.*g msec per loop" % (repeat, precision, msec)
+ else:
+ sec = msec / 1000
+ print "best of %d: %.*g sec per loop" % (repeat, precision, sec)
+ return None
+
+if __name__ == "__main__":
+ sys.exit(main())