summaryrefslogtreecommitdiff
path: root/bench
diff options
context:
space:
mode:
authorSteven Knight <knight@baldmt.com>2005-09-25 18:12:01 +0000
committerSteven Knight <knight@baldmt.com>2005-09-25 18:12:01 +0000
commit781ba4618da6ab099b213fef0a73960459089558 (patch)
tree8f3f2d46b0ac09fb08f7dc9d9e9729a92fa2e7d0 /bench
parent83d2c54302835180d4f130d75cd69a92b2d7fd9b (diff)
downloadscons-git-781ba4618da6ab099b213fef0a73960459089558.tar.gz
Optimize is_List et al. Add a script harness and scripts for benchmarking Python code snippets.
Diffstat (limited to 'bench')
-rw-r--r--bench/README.txt40
-rw-r--r--bench/bench.py123
-rw-r--r--bench/is_types.py331
-rw-r--r--bench/lvars-gvars.py74
4 files changed, 568 insertions, 0 deletions
diff --git a/bench/README.txt b/bench/README.txt
new file mode 100644
index 000000000..03931076b
--- /dev/null
+++ b/bench/README.txt
@@ -0,0 +1,40 @@
+# __COPYRIGHT__
+
+This subdirectory contains a harness and various timing tests that we've
+used to decide on the most efficient implementation of various pieces
+of the code base. We're checking these in here so that they're always
+available in case we have to revisit these decisions.
+
+NOTE: This harness is for horse-racing specific snippets of Python
+code to select the best implementation to use within a given function
+or subsystem. It's not intended for end-to-end testing of SCons itself.
+
+Contents of the directory:
+
+ README.txt
+
+ What you're reading right now.
+
+ bench.py
+
+ The harness for running the timing tests that make up
+ the rest of the directory's contents. Use it to run
+ one of the timing tests as follows:
+
+ python bench.py FILE
+
+ Various command-line options control the number of runs, the
+ number of iterations on each run, etc. Help for the command-line
+ options is available:
+
+ python bench.py -h
+
+ is_types.py
+ lvars-gvars.py
+ [etc.]
+
+ The rest of the files in this directory should each contain a
+ specific timing test, consisting of various functions to be run
+ against each other, and test data to be passed to the functions.
+
+ Yes, this list of files will get out of date.
diff --git a/bench/bench.py b/bench/bench.py
new file mode 100644
index 000000000..3c5dd50b3
--- /dev/null
+++ b/bench/bench.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+#
+# __COPYRIGHT__
+#
+# A script for timing snippets of Python code.
+#
+# By default, this script will execute a single Python file specified on
+# the command line and time any functions in a list named "FunctionList"
+# set by the Python file under test, or (by default) time any functions
+# in the file whose names begin with "Func".
+#
+# All functions are assumed to get passed the same arguments, and the
+# inputs are specified in a list named "Data," each element of which
+# is a list consisting of a tag name, a list of positional arguments,
+# and a dictionary of keyword arguments.
+#
+# Each function is expected to test a single, comparable snippet of
+# of Python code. IMPORTANT: We want to test the timing of the code
+# itself, not Python function call overhead, so every function should
+# put its code under test within the following block:
+#
+# for i in IterationList:
+#
+# This will allow (as much as possible) us to time just the code itself,
+# not Python function call overhead.
+
+import getopt
+import sys
+import time
+import types
+
+Usage = """\
+Usage: bench.py OPTIONS file.py
+ --clock Use the time.clock function
+ --func PREFIX Test functions whose names begin with PREFIX
+ -h, --help Display this help and exit
+ -i ITER, --iterations ITER Run each code snippet ITER times
+ --time Use the time.time function
+ -r RUNS, --runs RUNS Average times for RUNS invocations of
+"""
+
+# How many times each snippet of code will be (or should be) run by the
+# functions under test to gather the time (the "inner loop").
+
+Iterations = 1000
+
+# How many times we'll run each function to collect its aggregate time
+# and try to average out timing differences induced by system performance
+# (the "outer loop").
+
+Runs = 10
+
+# The prefix of the functions under test. This will be used if
+# there's no explicit list defined in FunctionList.
+
+FunctionPrefix = 'Func'
+
+# The function used to get the current time. The default of time.time is
+# good on most UNIX systems, but time.clock (selectable via the --clock
+# option) is better on Windows and some other UNIX systems.
+
+Now = time.time
+
+
+opts, args = getopt.getopt(sys.argv[1:], 'hi:r:',
+ ['clock', 'func=', 'help',
+ 'iterations=', 'time', 'runs='])
+
+for o, a in opts:
+ if o in ['--clock']:
+ Now = time.clock
+ elif o in ['--func']:
+ FunctionPrefix = a
+ elif o in ['-h', '--help']:
+ sys.stdout.write(Usage)
+ sys.exit(0)
+ elif o in ['-i', '--iterations']:
+ Iterations = int(a)
+ elif o in ['--time']:
+ Now = time.time
+ elif o in ['-r', '--runs']:
+ Runs = int(a)
+
+if len(args) != 1:
+ sys.stderr.write("bench.py: only one file argument must be specified\n")
+ sys.stderr.write(Usage)
+ sys.exit(1)
+
+
+execfile(args[0])
+
+
+try:
+ FunctionList
+except NameError:
+ function_names = filter(lambda x: x[:4] == FunctionPrefix, locals().keys())
+ function_names.sort()
+ l = map(lambda f, l=locals(): l[f], function_names)
+ FunctionList = filter(lambda f: type(f) == types.FunctionType, l)
+
+IterationList = [None] * Iterations
+
+def timer(func, *args, **kw):
+ results = []
+ for i in range(Runs):
+ start = Now()
+ apply(func, args, kw)
+ finish = Now()
+ results.append((finish - start) / Iterations)
+ return results
+
+def display(label, results):
+ total = reduce(lambda x, y: x+y, results, 0.0)
+ print " %8.3f" % ((total * 1e6) / len(results)), ':', label
+
+for func in FunctionList:
+ if func.__doc__: d = ' (' + func.__doc__ + ')'
+ else: d = ''
+ print func.__name__ + d + ':'
+
+ for label, args, kw in Data:
+ r = apply(timer, (func,)+args, kw)
+ display(label, r)
diff --git a/bench/is_types.py b/bench/is_types.py
new file mode 100644
index 000000000..1f4805b99
--- /dev/null
+++ b/bench/is_types.py
@@ -0,0 +1,331 @@
+# __COPYRIGHT__
+#
+# Benchmarks for testing various possible implementations
+# of the is_Dict(), is_List() and is_String() functions in
+# src/engine/SCons/Util.py.
+
+import types
+from UserDict import UserDict
+from UserList import UserList
+
+try:
+ from UserString import UserString
+except ImportError:
+ # "Borrowed" from the Python 2.2 UserString module
+ # and modified slightly for use with SCons.
+ class UserString:
+ def __init__(self, seq):
+ if type(seq) == type(''):
+ self.data = seq
+ elif isinstance(seq, UserString):
+ self.data = seq.data[:]
+ else:
+ self.data = str(seq)
+ def __str__(self): return str(self.data)
+ def __repr__(self): return repr(self.data)
+ def __int__(self): return int(self.data)
+ def __long__(self): return long(self.data)
+ def __float__(self): return float(self.data)
+ def __complex__(self): return complex(self.data)
+ def __hash__(self): return hash(self.data)
+
+ def __cmp__(self, string):
+ if isinstance(string, UserString):
+ return cmp(self.data, string.data)
+ else:
+ return cmp(self.data, string)
+ def __contains__(self, char):
+ return char in self.data
+
+ def __len__(self): return len(self.data)
+ def __getitem__(self, index): return self.__class__(self.data[index])
+ def __getslice__(self, start, end):
+ start = max(start, 0); end = max(end, 0)
+ return self.__class__(self.data[start:end])
+
+ def __add__(self, other):
+ if isinstance(other, UserString):
+ return self.__class__(self.data + other.data)
+ elif is_String(other):
+ return self.__class__(self.data + other)
+ else:
+ return self.__class__(self.data + str(other))
+ def __radd__(self, other):
+ if is_String(other):
+ return self.__class__(other + self.data)
+ else:
+ return self.__class__(str(other) + self.data)
+ def __mul__(self, n):
+ return self.__class__(self.data*n)
+ __rmul__ = __mul__
+
+InstanceType = types.InstanceType
+DictType = types.DictType
+ListType = types.ListType
+StringType = types.StringType
+if hasattr(types, 'UnicodeType'):
+ UnicodeType = types.UnicodeType
+
+
+# The original implementations, pretty straightforward checks for the
+# type of the object and whether it's an instance of the corresponding
+# User* type.
+
+def original_is_Dict(e):
+ return type(e) is types.DictType or isinstance(e, UserDict)
+
+def original_is_List(e):
+ return type(e) is types.ListType or isinstance(e, UserList)
+
+if hasattr(types, 'UnicodeType'):
+ def original_is_String(e):
+ return type(e) is types.StringType \
+ or type(e) is types.UnicodeType \
+ or isinstance(e, UserString)
+else:
+ def original_is_String(e):
+ return type(e) is types.StringType or isinstance(e, UserString)
+
+
+
+# New candidates that explicitly check for whether the object is an
+# InstanceType before calling isinstance() on the corresponding User*
+# type.
+
+def checkInstanceType_is_Dict(e):
+ return type(e) is types.DictType or \
+ (type(e) is types.InstanceType and isinstance(e, UserDict))
+
+def checkInstanceType_is_List(e):
+ return type(e) is types.ListType \
+ or (type(e) is types.InstanceType and isinstance(e, UserList))
+
+if hasattr(types, 'UnicodeType'):
+ def checkInstanceType_is_String(e):
+ return type(e) is types.StringType \
+ or type(e) is types.UnicodeType \
+ or (type(e) is types.InstanceType and isinstance(e, UserString))
+else:
+ def checkInstanceType_is_String(e):
+ return type(e) is types.StringType \
+ or (type(e) is types.InstanceType and isinstance(e, UserString))
+
+
+
+# Improved candidates that cache the type(e) result in a variable
+# before doing any checks.
+
+def cache_type_e_is_Dict(e):
+ t = type(e)
+ return t is types.DictType or \
+ (t is types.InstanceType and isinstance(e, UserDict))
+
+def cache_type_e_is_List(e):
+ t = type(e)
+ return t is types.ListType \
+ or (t is types.InstanceType and isinstance(e, UserList))
+
+if hasattr(types, 'UnicodeType'):
+ def cache_type_e_is_String(e):
+ t = type(e)
+ return t is types.StringType \
+ or t is types.UnicodeType \
+ or (t is types.InstanceType and isinstance(e, UserString))
+else:
+ def cache_type_e_is_String(e):
+ t = type(e)
+ return t is types.StringType \
+ or (t is types.InstanceType and isinstance(e, UserString))
+
+
+
+# Improved candidates that cache the type(e) result in a variable
+# before doing any checks, but using the global names for
+# DictType, ListType and StringType.
+
+def global_cache_type_e_is_Dict(e):
+ t = type(e)
+ return t is DictType or \
+ (t is InstanceType and isinstance(e, UserDict))
+
+def global_cache_type_e_is_List(e):
+ t = type(e)
+ return t is ListType \
+ or (t is InstanceType and isinstance(e, UserList))
+
+if hasattr(types, 'UnicodeType'):
+ def global_cache_type_e_is_String(e):
+ t = type(e)
+ return t is StringType \
+ or t is UnicodeType \
+ or (t is InstanceType and isinstance(e, UserString))
+else:
+ def global_cache_type_e_is_String(e):
+ t = type(e)
+ return t is StringType \
+ or (t is InstanceType and isinstance(e, UserString))
+
+
+
+# Alternative that uses a myType() function to map the User* objects
+# to their corresponding underlying types.
+
+instanceTypeMap = {
+ UserDict : types.DictType,
+ UserList : types.ListType,
+ UserString : types.StringType,
+}
+
+if hasattr(types, 'UnicodeType'):
+ def myType(obj):
+ t = type(obj)
+ if t is types.InstanceType:
+ t = instanceTypeMap.get(obj.__class__, t)
+ elif t is types.UnicodeType:
+ t = types.StringType
+ return t
+else:
+ def myType(obj):
+ t = type(obj)
+ if t is types.InstanceType:
+ t = instanceTypeMap.get(obj.__class__, t)
+ return t
+
+def myType_is_Dict(e):
+ return myType(e) is types.DictType
+
+def myType_is_List(e):
+ return myType(e) is types.ListType
+
+def myType_is_String(e):
+ return myType(e) is types.StringType
+
+
+
+
+def Func01(obj):
+ """original_is_String"""
+ for i in IterationList:
+ original_is_String(obj)
+
+def Func02(obj):
+ """original_is_List"""
+ for i in IterationList:
+ original_is_List(obj)
+
+def Func03(obj):
+ """original_is_Dict"""
+ for i in IterationList:
+ original_is_Dict(obj)
+
+def Func04(obj):
+ """checkInstanceType_is_String"""
+ for i in IterationList:
+ checkInstanceType_is_String(obj)
+
+def Func05(obj):
+ """checkInstanceType_is_List"""
+ for i in IterationList:
+ checkInstanceType_is_List(obj)
+
+def Func06(obj):
+ """checkInstanceType_is_Dict"""
+ for i in IterationList:
+ checkInstanceType_is_Dict(obj)
+
+def Func07(obj):
+ """cache_type_e_is_String"""
+ for i in IterationList:
+ cache_type_e_is_String(obj)
+
+def Func08(obj):
+ """cache_type_e_is_List"""
+ for i in IterationList:
+ cache_type_e_is_List(obj)
+
+def Func09(obj):
+ """cache_type_e_is_Dict"""
+ for i in IterationList:
+ cache_type_e_is_Dict(obj)
+
+def Func10(obj):
+ """global_cache_type_e_is_String"""
+ for i in IterationList:
+ global_cache_type_e_is_String(obj)
+
+def Func11(obj):
+ """global_cache_type_e_is_List"""
+ for i in IterationList:
+ global_cache_type_e_is_List(obj)
+
+def Func12(obj):
+ """global_cache_type_e_is_Dict"""
+ for i in IterationList:
+ global_cache_type_e_is_Dict(obj)
+
+#def Func13(obj):
+# """myType_is_String"""
+# for i in IterationList:
+# myType_is_String(obj)
+#
+#def Func14(obj):
+# """myType_is_List"""
+# for i in IterationList:
+# myType_is_List(obj)
+#
+#def Func15(obj):
+# """myType_is_Dict"""
+# for i in IterationList:
+# myType_is_Dict(obj)
+
+
+
+# Data to pass to the functions on each run. Each entry is a
+# three-element tuple:
+#
+# (
+# "Label to print describing this data run",
+# ('positional', 'arguments'),
+# {'keyword' : 'arguments'},
+# ),
+
+class A:
+ pass
+
+Data = [
+ (
+ "String",
+ ('',),
+ {},
+ ),
+ (
+ "List",
+ ([],),
+ {},
+ ),
+ (
+ "Dict",
+ ({},),
+ {},
+ ),
+ (
+ "UserString",
+ (UserString(''),),
+ {},
+ ),
+ (
+ "UserList",
+ (UserList([]),),
+ {},
+ ),
+ (
+ "UserDict",
+ (UserDict({}),),
+ {},
+ ),
+ (
+ "Object",
+ (A(),),
+ {},
+ ),
+]
diff --git a/bench/lvars-gvars.py b/bench/lvars-gvars.py
new file mode 100644
index 000000000..efcef2adc
--- /dev/null
+++ b/bench/lvars-gvars.py
@@ -0,0 +1,74 @@
+# __COPYRIGHT__
+#
+# Functions and data for timing different idioms for fetching a keyword
+# value from a pair of dictionaries for localand global values. This was
+# used to select how to most efficiently expand single $KEYWORD strings
+# in src/engine/SCons/Subst.py.
+
+def Func1(var, gvars, lvars):
+ """lvars try:-except:, gvars try:-except:"""
+ for i in IterationList:
+ try:
+ x = lvars[var]
+ except KeyError:
+ try:
+ x = gvars[var]
+ except KeyError:
+ x = ''
+
+def Func2(var, gvars, lvars):
+ """lvars has_key(), gvars try:-except:"""
+ for i in IterationList:
+ if lvars.has_key(var):
+ x = lvars[var]
+ else:
+ try:
+ x = gvars[var]
+ except KeyError:
+ x = ''
+
+def Func3(var, gvars, lvars):
+ """lvars has_key(), gvars has_key()"""
+ for i in IterationList:
+ if lvars.has_key(var):
+ x = lvars[var]
+ elif gvars.has_key(var):
+ x = gvars[var]
+ else:
+ x = ''
+
+def Func4(var, gvars, lvars):
+ """eval()"""
+ for i in IterationList:
+ try:
+ x = eval(var, gvars, lvars)
+ except NameError:
+ x = ''
+
+# Data to pass to the functions on each run. Each entry is a
+# three-element tuple:
+#
+# (
+# "Label to print describing this data run",
+# ('positional', 'arguments'),
+# {'keyword' : 'arguments'},
+# ),
+
+Data = [
+ (
+ "Neither in gvars or lvars",
+ ('x', {}, {}),
+ {},
+ ),
+ (
+ "Missing from lvars, found in gvars",
+ ('x', {'x':1}, {}),
+ {},
+ ),
+ (
+ "Found in lvars",
+ ('x', {'x':1}, {'x':2}),
+ {},
+ ),
+]
+