From 781ba4618da6ab099b213fef0a73960459089558 Mon Sep 17 00:00:00 2001 From: Steven Knight Date: Sun, 25 Sep 2005 18:12:01 +0000 Subject: Optimize is_List et al. Add a script harness and scripts for benchmarking Python code snippets. --- bench/README.txt | 40 +++++++ bench/bench.py | 123 +++++++++++++++++++ bench/is_types.py | 331 +++++++++++++++++++++++++++++++++++++++++++++++++++ bench/lvars-gvars.py | 74 ++++++++++++ 4 files changed, 568 insertions(+) create mode 100644 bench/README.txt create mode 100644 bench/bench.py create mode 100644 bench/is_types.py create mode 100644 bench/lvars-gvars.py (limited to 'bench') diff --git a/bench/README.txt b/bench/README.txt new file mode 100644 index 000000000..03931076b --- /dev/null +++ b/bench/README.txt @@ -0,0 +1,40 @@ +# __COPYRIGHT__ + +This subdirectory contains a harness and various timing tests that we've +used to decide on the most efficient implementation of various pieces +of the code base. We're checking these in here so that they're always +available in case we have to revisit these decisions. + +NOTE: This harness is for horse-racing specific snippets of Python +code to select the best implementation to use within a given function +or subsystem. It's not intended for end-to-end testing of SCons itself. + +Contents of the directory: + + README.txt + + What you're reading right now. + + bench.py + + The harness for running the timing tests that make up + the rest of the directory's contents. Use it to run + one of the timing tests as follows: + + python bench.py FILE + + Various command-line options control the number of runs, the + number of iterations on each run, etc. Help for the command-line + options is available: + + python bench.py -h + + is_types.py + lvars-gvars.py + [etc.] + + The rest of the files in this directory should each contain a + specific timing test, consisting of various functions to be run + against each other, and test data to be passed to the functions. + + Yes, this list of files will get out of date. diff --git a/bench/bench.py b/bench/bench.py new file mode 100644 index 000000000..3c5dd50b3 --- /dev/null +++ b/bench/bench.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python +# +# __COPYRIGHT__ +# +# A script for timing snippets of Python code. +# +# By default, this script will execute a single Python file specified on +# the command line and time any functions in a list named "FunctionList" +# set by the Python file under test, or (by default) time any functions +# in the file whose names begin with "Func". +# +# All functions are assumed to get passed the same arguments, and the +# inputs are specified in a list named "Data," each element of which +# is a list consisting of a tag name, a list of positional arguments, +# and a dictionary of keyword arguments. +# +# Each function is expected to test a single, comparable snippet of +# of Python code. IMPORTANT: We want to test the timing of the code +# itself, not Python function call overhead, so every function should +# put its code under test within the following block: +# +# for i in IterationList: +# +# This will allow (as much as possible) us to time just the code itself, +# not Python function call overhead. + +import getopt +import sys +import time +import types + +Usage = """\ +Usage: bench.py OPTIONS file.py + --clock Use the time.clock function + --func PREFIX Test functions whose names begin with PREFIX + -h, --help Display this help and exit + -i ITER, --iterations ITER Run each code snippet ITER times + --time Use the time.time function + -r RUNS, --runs RUNS Average times for RUNS invocations of +""" + +# How many times each snippet of code will be (or should be) run by the +# functions under test to gather the time (the "inner loop"). + +Iterations = 1000 + +# How many times we'll run each function to collect its aggregate time +# and try to average out timing differences induced by system performance +# (the "outer loop"). + +Runs = 10 + +# The prefix of the functions under test. This will be used if +# there's no explicit list defined in FunctionList. + +FunctionPrefix = 'Func' + +# The function used to get the current time. The default of time.time is +# good on most UNIX systems, but time.clock (selectable via the --clock +# option) is better on Windows and some other UNIX systems. + +Now = time.time + + +opts, args = getopt.getopt(sys.argv[1:], 'hi:r:', + ['clock', 'func=', 'help', + 'iterations=', 'time', 'runs=']) + +for o, a in opts: + if o in ['--clock']: + Now = time.clock + elif o in ['--func']: + FunctionPrefix = a + elif o in ['-h', '--help']: + sys.stdout.write(Usage) + sys.exit(0) + elif o in ['-i', '--iterations']: + Iterations = int(a) + elif o in ['--time']: + Now = time.time + elif o in ['-r', '--runs']: + Runs = int(a) + +if len(args) != 1: + sys.stderr.write("bench.py: only one file argument must be specified\n") + sys.stderr.write(Usage) + sys.exit(1) + + +execfile(args[0]) + + +try: + FunctionList +except NameError: + function_names = filter(lambda x: x[:4] == FunctionPrefix, locals().keys()) + function_names.sort() + l = map(lambda f, l=locals(): l[f], function_names) + FunctionList = filter(lambda f: type(f) == types.FunctionType, l) + +IterationList = [None] * Iterations + +def timer(func, *args, **kw): + results = [] + for i in range(Runs): + start = Now() + apply(func, args, kw) + finish = Now() + results.append((finish - start) / Iterations) + return results + +def display(label, results): + total = reduce(lambda x, y: x+y, results, 0.0) + print " %8.3f" % ((total * 1e6) / len(results)), ':', label + +for func in FunctionList: + if func.__doc__: d = ' (' + func.__doc__ + ')' + else: d = '' + print func.__name__ + d + ':' + + for label, args, kw in Data: + r = apply(timer, (func,)+args, kw) + display(label, r) diff --git a/bench/is_types.py b/bench/is_types.py new file mode 100644 index 000000000..1f4805b99 --- /dev/null +++ b/bench/is_types.py @@ -0,0 +1,331 @@ +# __COPYRIGHT__ +# +# Benchmarks for testing various possible implementations +# of the is_Dict(), is_List() and is_String() functions in +# src/engine/SCons/Util.py. + +import types +from UserDict import UserDict +from UserList import UserList + +try: + from UserString import UserString +except ImportError: + # "Borrowed" from the Python 2.2 UserString module + # and modified slightly for use with SCons. + class UserString: + def __init__(self, seq): + if type(seq) == type(''): + self.data = seq + elif isinstance(seq, UserString): + self.data = seq.data[:] + else: + self.data = str(seq) + def __str__(self): return str(self.data) + def __repr__(self): return repr(self.data) + def __int__(self): return int(self.data) + def __long__(self): return long(self.data) + def __float__(self): return float(self.data) + def __complex__(self): return complex(self.data) + def __hash__(self): return hash(self.data) + + def __cmp__(self, string): + if isinstance(string, UserString): + return cmp(self.data, string.data) + else: + return cmp(self.data, string) + def __contains__(self, char): + return char in self.data + + def __len__(self): return len(self.data) + def __getitem__(self, index): return self.__class__(self.data[index]) + def __getslice__(self, start, end): + start = max(start, 0); end = max(end, 0) + return self.__class__(self.data[start:end]) + + def __add__(self, other): + if isinstance(other, UserString): + return self.__class__(self.data + other.data) + elif is_String(other): + return self.__class__(self.data + other) + else: + return self.__class__(self.data + str(other)) + def __radd__(self, other): + if is_String(other): + return self.__class__(other + self.data) + else: + return self.__class__(str(other) + self.data) + def __mul__(self, n): + return self.__class__(self.data*n) + __rmul__ = __mul__ + +InstanceType = types.InstanceType +DictType = types.DictType +ListType = types.ListType +StringType = types.StringType +if hasattr(types, 'UnicodeType'): + UnicodeType = types.UnicodeType + + +# The original implementations, pretty straightforward checks for the +# type of the object and whether it's an instance of the corresponding +# User* type. + +def original_is_Dict(e): + return type(e) is types.DictType or isinstance(e, UserDict) + +def original_is_List(e): + return type(e) is types.ListType or isinstance(e, UserList) + +if hasattr(types, 'UnicodeType'): + def original_is_String(e): + return type(e) is types.StringType \ + or type(e) is types.UnicodeType \ + or isinstance(e, UserString) +else: + def original_is_String(e): + return type(e) is types.StringType or isinstance(e, UserString) + + + +# New candidates that explicitly check for whether the object is an +# InstanceType before calling isinstance() on the corresponding User* +# type. + +def checkInstanceType_is_Dict(e): + return type(e) is types.DictType or \ + (type(e) is types.InstanceType and isinstance(e, UserDict)) + +def checkInstanceType_is_List(e): + return type(e) is types.ListType \ + or (type(e) is types.InstanceType and isinstance(e, UserList)) + +if hasattr(types, 'UnicodeType'): + def checkInstanceType_is_String(e): + return type(e) is types.StringType \ + or type(e) is types.UnicodeType \ + or (type(e) is types.InstanceType and isinstance(e, UserString)) +else: + def checkInstanceType_is_String(e): + return type(e) is types.StringType \ + or (type(e) is types.InstanceType and isinstance(e, UserString)) + + + +# Improved candidates that cache the type(e) result in a variable +# before doing any checks. + +def cache_type_e_is_Dict(e): + t = type(e) + return t is types.DictType or \ + (t is types.InstanceType and isinstance(e, UserDict)) + +def cache_type_e_is_List(e): + t = type(e) + return t is types.ListType \ + or (t is types.InstanceType and isinstance(e, UserList)) + +if hasattr(types, 'UnicodeType'): + def cache_type_e_is_String(e): + t = type(e) + return t is types.StringType \ + or t is types.UnicodeType \ + or (t is types.InstanceType and isinstance(e, UserString)) +else: + def cache_type_e_is_String(e): + t = type(e) + return t is types.StringType \ + or (t is types.InstanceType and isinstance(e, UserString)) + + + +# Improved candidates that cache the type(e) result in a variable +# before doing any checks, but using the global names for +# DictType, ListType and StringType. + +def global_cache_type_e_is_Dict(e): + t = type(e) + return t is DictType or \ + (t is InstanceType and isinstance(e, UserDict)) + +def global_cache_type_e_is_List(e): + t = type(e) + return t is ListType \ + or (t is InstanceType and isinstance(e, UserList)) + +if hasattr(types, 'UnicodeType'): + def global_cache_type_e_is_String(e): + t = type(e) + return t is StringType \ + or t is UnicodeType \ + or (t is InstanceType and isinstance(e, UserString)) +else: + def global_cache_type_e_is_String(e): + t = type(e) + return t is StringType \ + or (t is InstanceType and isinstance(e, UserString)) + + + +# Alternative that uses a myType() function to map the User* objects +# to their corresponding underlying types. + +instanceTypeMap = { + UserDict : types.DictType, + UserList : types.ListType, + UserString : types.StringType, +} + +if hasattr(types, 'UnicodeType'): + def myType(obj): + t = type(obj) + if t is types.InstanceType: + t = instanceTypeMap.get(obj.__class__, t) + elif t is types.UnicodeType: + t = types.StringType + return t +else: + def myType(obj): + t = type(obj) + if t is types.InstanceType: + t = instanceTypeMap.get(obj.__class__, t) + return t + +def myType_is_Dict(e): + return myType(e) is types.DictType + +def myType_is_List(e): + return myType(e) is types.ListType + +def myType_is_String(e): + return myType(e) is types.StringType + + + + +def Func01(obj): + """original_is_String""" + for i in IterationList: + original_is_String(obj) + +def Func02(obj): + """original_is_List""" + for i in IterationList: + original_is_List(obj) + +def Func03(obj): + """original_is_Dict""" + for i in IterationList: + original_is_Dict(obj) + +def Func04(obj): + """checkInstanceType_is_String""" + for i in IterationList: + checkInstanceType_is_String(obj) + +def Func05(obj): + """checkInstanceType_is_List""" + for i in IterationList: + checkInstanceType_is_List(obj) + +def Func06(obj): + """checkInstanceType_is_Dict""" + for i in IterationList: + checkInstanceType_is_Dict(obj) + +def Func07(obj): + """cache_type_e_is_String""" + for i in IterationList: + cache_type_e_is_String(obj) + +def Func08(obj): + """cache_type_e_is_List""" + for i in IterationList: + cache_type_e_is_List(obj) + +def Func09(obj): + """cache_type_e_is_Dict""" + for i in IterationList: + cache_type_e_is_Dict(obj) + +def Func10(obj): + """global_cache_type_e_is_String""" + for i in IterationList: + global_cache_type_e_is_String(obj) + +def Func11(obj): + """global_cache_type_e_is_List""" + for i in IterationList: + global_cache_type_e_is_List(obj) + +def Func12(obj): + """global_cache_type_e_is_Dict""" + for i in IterationList: + global_cache_type_e_is_Dict(obj) + +#def Func13(obj): +# """myType_is_String""" +# for i in IterationList: +# myType_is_String(obj) +# +#def Func14(obj): +# """myType_is_List""" +# for i in IterationList: +# myType_is_List(obj) +# +#def Func15(obj): +# """myType_is_Dict""" +# for i in IterationList: +# myType_is_Dict(obj) + + + +# Data to pass to the functions on each run. Each entry is a +# three-element tuple: +# +# ( +# "Label to print describing this data run", +# ('positional', 'arguments'), +# {'keyword' : 'arguments'}, +# ), + +class A: + pass + +Data = [ + ( + "String", + ('',), + {}, + ), + ( + "List", + ([],), + {}, + ), + ( + "Dict", + ({},), + {}, + ), + ( + "UserString", + (UserString(''),), + {}, + ), + ( + "UserList", + (UserList([]),), + {}, + ), + ( + "UserDict", + (UserDict({}),), + {}, + ), + ( + "Object", + (A(),), + {}, + ), +] diff --git a/bench/lvars-gvars.py b/bench/lvars-gvars.py new file mode 100644 index 000000000..efcef2adc --- /dev/null +++ b/bench/lvars-gvars.py @@ -0,0 +1,74 @@ +# __COPYRIGHT__ +# +# Functions and data for timing different idioms for fetching a keyword +# value from a pair of dictionaries for localand global values. This was +# used to select how to most efficiently expand single $KEYWORD strings +# in src/engine/SCons/Subst.py. + +def Func1(var, gvars, lvars): + """lvars try:-except:, gvars try:-except:""" + for i in IterationList: + try: + x = lvars[var] + except KeyError: + try: + x = gvars[var] + except KeyError: + x = '' + +def Func2(var, gvars, lvars): + """lvars has_key(), gvars try:-except:""" + for i in IterationList: + if lvars.has_key(var): + x = lvars[var] + else: + try: + x = gvars[var] + except KeyError: + x = '' + +def Func3(var, gvars, lvars): + """lvars has_key(), gvars has_key()""" + for i in IterationList: + if lvars.has_key(var): + x = lvars[var] + elif gvars.has_key(var): + x = gvars[var] + else: + x = '' + +def Func4(var, gvars, lvars): + """eval()""" + for i in IterationList: + try: + x = eval(var, gvars, lvars) + except NameError: + x = '' + +# Data to pass to the functions on each run. Each entry is a +# three-element tuple: +# +# ( +# "Label to print describing this data run", +# ('positional', 'arguments'), +# {'keyword' : 'arguments'}, +# ), + +Data = [ + ( + "Neither in gvars or lvars", + ('x', {}, {}), + {}, + ), + ( + "Missing from lvars, found in gvars", + ('x', {'x':1}, {}), + {}, + ), + ( + "Found in lvars", + ('x', {'x':1}, {'x':2}), + {}, + ), +] + -- cgit v1.2.1