diff options
Diffstat (limited to 'tools/dev/benchmarks/suite1/benchmark.py')
-rwxr-xr-x | tools/dev/benchmarks/suite1/benchmark.py | 1561 |
1 files changed, 1116 insertions, 445 deletions
diff --git a/tools/dev/benchmarks/suite1/benchmark.py b/tools/dev/benchmarks/suite1/benchmark.py index 7eb3dd9..fc61848 100755 --- a/tools/dev/benchmarks/suite1/benchmark.py +++ b/tools/dev/benchmarks/suite1/benchmark.py @@ -17,42 +17,171 @@ # specific language governing permissions and limitations # under the License. -""" -usage: benchmark.py run <run_file> <levels> <spread> [N] - benchmark.py show <run_file> - benchmark.py compare <run_file1> <run_file2> - benchmark.py combine <new_file> <run_file1> <run_file2> ... - -Test data is written to run_file. -If a run_file exists, data is added to it. -<levels> is the number of directory levels to create -<spread> is the number of child trees spreading off each dir level +"""Usage: benchmark.py run|list|compare|show|chart <selection> ... + +SELECTING TIMINGS -- B@R,LxS + +In the subcommands below, a timings selection consists of a string with up to +four elements: + <branch>@<revision>,<levels>x<spread> +abbreviated as: + B@R,LxS + +<branch> is a label of an svn branch, e.g. "1.7.x". +<revision> is the last-changed-revision of above branch. +<levels> is the number of directory levels created in the benchmark. +<spread> is the number of child trees spreading off each dir level. + +<branch_name> and <revision> are simply used for labeling. Upon the actual +test runs, you should enter labels matching the selected --svn-bin-dir. +Later, you can select runs individually by using these labels. + +For <revision>, you can provide special keywords: +- 'each' has the same effect as entering each available revision number that + is on record in the db in a separate timings selection. +- 'last' is the same as 'each', but shows only the last 10 revisions. 'last' + can be combined with a number, e.g. 'last12'. + +For all subcommands except 'run', you can omit some or all of the elements of +a timings selection to combine all available timings sets. Try that out with +the 'list' subcommand. + +Examples: + benchmark.py run 1.7.x@12345,5x5 + benchmark.py show trunk@12345 + benchmark.py compare 1.7.0,1x100 trunk@each,1x100 + benchmark.py chart compare 1.7.0,5x5 trunk@last12,5x5 + + +RUN BENCHMARKS + + benchmark.py run B@R,LxS [N] [options] + +Test data is added to an sqlite database created automatically, by default +'benchmark.db' in the current working directory. To specify a different path, +use option -f <path_to_db>. + If <N> is provided, the run is repeated N times. -""" + +<levels> and <spread> control the way the tested working copy is structured: + <levels>: number of directory levels to create. + <spread>: number of files and subdirectories created in each dir. + + +LIST WHAT IS ON RECORD + + benchmark.py list [B@R,LxS] + +Find entries in the database for the given constraints. Any arguments can +be omitted. (To select only a rev, start with a '@', like '@123'; to select +only spread, start with an 'x', like "x100".) + +Call without arguments to get a listing of all available constraints. + + +COMPARE TIMINGS + + benchmark.py compare B@R,LxS B@R,LxS [B@R,LxS [...]] + +Compare any number of timings sets to the first provided set (in text mode). +For example: + benchmark.py compare 1.7.0 trunk@1349903 + Compare the total timings of all combined '1.7.0' branch runs to + all combined runs of 'trunk'-at-revision-1349903. + benchmark.py compare 1.7.0,5x5 trunk@1349903,5x5 + Same as above, but only compare the working copy types with 5 levels + and a spread of 5. + +Use the -c option to limit comparison to specific command names. + + +SHOW TIMINGS + + benchmark.py show B@R,LxS [B@R,LxS [...]] + +Print out a summary of the timings selected from the given constraints. + + +GENERATE CHARTS + + benchmark.py chart compare B@R,LxS B@R,LxS [ B@R,LxS ... ] + +Produce a bar chart that compares any number of sets of timings. Like with +the plain 'compare' command, the first set is taken as a reference point for +100% and +-0 seconds. Each following dataset produces a set of labeled bar +charts, grouped by svn command names. At least two timings sets must be +provided. + +Use the -c option to limit comparison to specific command names. + + +EXAMPLES + +# Run 3 benchmarks on svn 1.7.0 with 5 dir levels and 5 files and subdirs for +# each level (spread). Timings are saved in ./benchmark.db. +# Provide label '1.7.0' and its Last-Changed-Rev for later reference. +./benchmark.py run --svn-bin-dir ~/svn-prefix/1.7.0/bin 1.7.0@1181106,5x5 3 + +# Record 3 benchmark runs on trunk, again naming its Last-Changed-Rev. +# (You may also set your $PATH instead of using --svn-bin-dir.) +./benchmark.py run --svn-bin-dir ~/svn-prefix/trunk/bin trunk@1352725,5x5 3 + +# Work with the results of above two runs +./benchmark.py list +./benchmark.py compare 1.7.0 trunk +./benchmark.py show 1.7.0 trunk +./benchmark.py chart compare 1.7.0 trunk +./benchmark.py chart compare 1.7.0 trunk -c "update,commit,TOTAL RUN" + +# Rebuild r1352598, run it and chart improvements since 1.7.0. +svn up -r1352598 ~/src/trunk +make -C ~/src/trunk dist-clean install +export PATH="$HOME/svn-prefix/trunk/bin:$PATH" +which svn +./benchmark.py run trunk@1352598,5x5 3 +./benchmark.py chart compare 1.7.0 trunk@1352598 trunk@1352725 -o chart.svg + + +GLOBAL OPTIONS""" import os -import sys +import time +import datetime +import sqlite3 +import optparse import tempfile import subprocess -import datetime import random import shutil -import cPickle -import optparse import stat +import string +from copy import copy +IGNORE_COMMANDS = ('--version', ) TOTAL_RUN = 'TOTAL RUN' -timings = None +j = os.path.join + +def bail(msg=None): + if msg: + print msg + exit(1) -def run_cmd(cmd, stdin=None, shell=False): +def time_str(): + return time.strftime('%Y-%m-%d %H:%M:%S'); - if shell: - printable_cmd = 'CMD: ' + cmd - else: - printable_cmd = 'CMD: ' + ' '.join(cmd) +def timedelta_to_seconds(td): + return ( float(td.seconds) + + float(td.microseconds) / (10**6) + + td.days * 24 * 60 * 60 ) + +def run_cmd(cmd, stdin=None, shell=False, verbose=False): if options.verbose: - print printable_cmd + if shell: + printable_cmd = cmd + else: + printable_cmd = ' '.join(cmd) + print 'CMD:', printable_cmd if stdin: stdin_arg = subprocess.PIPE @@ -66,573 +195,1115 @@ def run_cmd(cmd, stdin=None, shell=False): shell=shell) stdout,stderr = p.communicate(input=stdin) - if options.verbose: + if verbose: if (stdout): print "STDOUT: [[[\n%s]]]" % ''.join(stdout) if (stderr): print "STDERR: [[[\n%s]]]" % ''.join(stderr) - return stdout,stderr + return stdout, stderr + + +_next_unique_basename_count = 0 + +def next_unique_basename(prefix): + global _next_unique_basename_count + _next_unique_basename_count += 1 + return '_'.join((prefix, str(_next_unique_basename_count))) + + +si_units = [ + (1000 ** 5, 'P'), + (1000 ** 4, 'T'), + (1000 ** 3, 'G'), + (1000 ** 2, 'M'), + (1000 ** 1, 'K'), + (1000 ** 0, ''), + ] +def n_label(n): + """(stolen from hurry.filesize)""" + for factor, suffix in si_units: + if n >= factor: + break + amount = int(n/factor) + if isinstance(suffix, tuple): + singular, multiple = suffix + if amount == 1: + suffix = singular + else: + suffix = multiple + return str(amount) + suffix + + +def split_arg_once(l_r, sep): + if not l_r: + return (None, None) + if sep in l_r: + l, r = l_r.split(sep) + else: + l = l_r + r = None + if not l: + l = None + if not r: + r = None + return (l, r) + +RUN_KIND_SEPARATORS=('@', ',', 'x') + +class RunKind: + def __init__(self, b_r_l_s): + b_r, l_s = split_arg_once(b_r_l_s, RUN_KIND_SEPARATORS[1]) + self.branch, self.revision = split_arg_once(b_r, RUN_KIND_SEPARATORS[0]) + self.levels, self.spread = split_arg_once(l_s, RUN_KIND_SEPARATORS[2]) + if self.levels: self.levels = int(self.levels) + if self.spread: self.spread = int(self.spread) + + def label(self): + label_parts = [] + if self.branch: + label_parts.append(self.branch) + if self.revision: + label_parts.append(RUN_KIND_SEPARATORS[0]) + label_parts.append(self.revision) + if self.levels or self.spread: + label_parts.append(RUN_KIND_SEPARATORS[1]) + if self.levels: + label_parts.append(str(self.levels)) + if self.spread: + label_parts.append(RUN_KIND_SEPARATORS[2]) + label_parts.append(str(self.spread)) + return ''.join(label_parts) + + def args(self): + return (self.branch, self.revision, self.levels, self.spread) + + +def parse_timings_selections(db, *args): + run_kinds = [] + + for arg in args: + run_kind = RunKind(arg) + + if run_kind.revision == 'each': + run_kind.revision = None + query = TimingQuery(db, run_kind) + for revision in query.get_sorted_revisions(): + revision_run_kind = copy(run_kind) + revision_run_kind.revision = revision + run_kinds.append(revision_run_kind) + elif run_kind.revision and run_kind.revision.startswith('last'): + Nstr = run_kind.revision[4:] + if not Nstr: + N = 10 + else: + N = int(Nstr) + run_kind.revision = None + query = TimingQuery(db, run_kind) + for revision in query.get_sorted_revisions()[-N:]: + revision_run_kind = copy(run_kind) + revision_run_kind.revision = revision + run_kinds.append(revision_run_kind) + else: + run_kinds.append(run_kind) -def timedelta_to_seconds(td): - return ( float(td.seconds) - + float(td.microseconds) / (10**6) - + td.days * 24 * 60 * 60 ) + return run_kinds +def parse_one_timing_selection(db, *args): + run_kinds = parse_timings_selections(db, *args) + if len(run_kinds) != 1: + bail("I need exactly one timings identifier, not '%s'" + % (' '.join(*args))) + return run_kinds[0] -class Timings: - def __init__(self, *ignore_svn_cmds): - self.timings = {} - self.current_name = None + + +PATHNAME_VALID_CHARS = "-_.,@%s%s" % (string.ascii_letters, string.digits) +def filesystem_safe_string(s): + return ''.join(c for c in s if c in PATHNAME_VALID_CHARS) + +def do_div(ref, val): + if ref: + return float(val) / float(ref) + else: + return 0.0 + +def do_diff(ref, val): + return float(val) - float(ref) + + +# ------------------------- database ------------------------- + +class TimingsDb: + def __init__(self, db_path): + self.db_path = db_path; + self.conn = sqlite3.connect(db_path) + self.ensure_tables_created() + + def ensure_tables_created(self): + c = self.conn.cursor() + + c.execute("""SELECT name FROM sqlite_master WHERE type='table' AND + name='batch'""") + if c.fetchone(): + # exists + return + + print 'Creating database tables.' + c.executescript(''' + CREATE TABLE batch ( + batch_id INTEGER PRIMARY KEY AUTOINCREMENT, + started TEXT, + ended TEXT + ); + + CREATE TABLE run_kind ( + run_kind_id INTEGER PRIMARY KEY AUTOINCREMENT, + branch TEXT NOT NULL, + revision TEXT NOT NULL, + wc_levels INTEGER, + wc_spread INTEGER, + UNIQUE(branch, revision, wc_levels, wc_spread) + ); + + CREATE TABLE run ( + run_id INTEGER PRIMARY KEY AUTOINCREMENT, + batch_id INTEGER NOT NULL REFERENCES batch(batch_id), + run_kind_id INTEGER NOT NULL REFERENCES run_kind(run_kind_id), + started TEXT, + ended TEXT, + aborted INTEGER + ); + + CREATE TABLE timings ( + run_id INTEGER NOT NULL REFERENCES run(run_id), + command TEXT NOT NULL, + sequence INTEGER, + timing REAL + );''' + ) + self.conn.commit() + c.close(); + + +class Batch: + def __init__(self, db): + self.db = db + self.started = time_str() + c = db.conn.cursor() + c.execute("INSERT INTO batch (started) values (?)", (self.started,)) + db.conn.commit() + self.id = c.lastrowid + c.close() + + def done(self): + conn = self.db.conn + c = conn.cursor() + c.execute(""" + UPDATE batch + SET ended = ? + WHERE batch_id = ?""", + (time_str(), self.id)) + conn.commit() + c.close() + +class Run: + def __init__(self, batch, run_kind): + self.batch = batch + conn = self.batch.db.conn + c = conn.cursor() + + c.execute(""" + SELECT run_kind_id FROM run_kind + WHERE branch = ? + AND revision = ? + AND wc_levels = ? + AND wc_spread = ?""", + run_kind.args()) + kind_ids = c.fetchone() + if kind_ids: + kind_id = kind_ids[0] + else: + c.execute(""" + INSERT INTO run_kind (branch, revision, wc_levels, wc_spread) + VALUES (?, ?, ?, ?)""", + run_kind.args()) + conn.commit() + kind_id = c.lastrowid + + self.started = time_str() + + c.execute(""" + INSERT INTO run + (batch_id, run_kind_id, started) + VALUES + (?, ?, ?)""", + (self.batch.id, kind_id, self.started)) + conn.commit() + self.id = c.lastrowid + c.close(); self.tic_at = None - self.ignore = ignore_svn_cmds - self.name = None + self.current_command = None + self.timings = [] - def tic(self, name): - if name in self.ignore: + def tic(self, command): + if command in IGNORE_COMMANDS: return self.toc() - self.current_name = name + self.current_command = command self.tic_at = datetime.datetime.now() def toc(self): - if self.current_name and self.tic_at: + if self.current_command and self.tic_at: toc_at = datetime.datetime.now() - self.submit_timing(self.current_name, + self.remember_timing(self.current_command, timedelta_to_seconds(toc_at - self.tic_at)) - self.current_name = None + self.current_command = None self.tic_at = None - def submit_timing(self, name, seconds): - times = self.timings.get(name) - if not times: - times = [] - self.timings[name] = times - times.append(seconds) + def remember_timing(self, command, seconds): + self.timings.append((command, seconds)) + + def submit_timings(self): + conn = self.batch.db.conn + c = conn.cursor() + print 'submitting...' + + c.executemany(""" + INSERT INTO timings + (run_id, command, sequence, timing) + VALUES + (?, ?, ?, ?)""", + [(self.id, t[0], (i + 1), t[1]) for i,t in enumerate(self.timings)]) + + conn.commit() + c.close() + + def done(self, aborted=False): + conn = self.batch.db.conn + c = conn.cursor() + c.execute(""" + UPDATE run + SET ended = ?, aborted = ? + WHERE run_id = ?""", + (time_str(), aborted, self.id)) + conn.commit() + c.close() + + +class TimingQuery: + def __init__(self, db, run_kind): + self.cursor = db.conn.cursor() + self.constraints = [] + self.values = [] + self.timings = None + self.FROM_WHERE = """ + FROM batch AS b, + timings AS t, + run AS r, + run_kind as k + WHERE + t.run_id = r.run_id + AND k.run_kind_id = r.run_kind_id + AND b.batch_id = r.batch_id + AND r.aborted = 0 + """ + self.append_constraint('k.branch', run_kind.branch) + self.each_revision = False + if run_kind.revision == 'each': + self.each_revision = True + else: + self.append_constraint('k.revision', run_kind.revision) + self.append_constraint('k.wc_levels', run_kind.levels) + self.append_constraint('k.wc_spread', run_kind.spread) + self.label = run_kind.label() + + def append_constraint(self, column_name, val): + if val: + self.constraints.append('AND %s = ?' % column_name) + self.values.append(val) + + def remove_last_constraint(self): + del self.constraints[-1] + del self.values[-1] + + def get_sorted_X(self, x, n=1): + query = ['SELECT DISTINCT %s' % x, + self.FROM_WHERE ] + query.extend(self.constraints) + query.append('ORDER BY %s' % x) + c = db.conn.cursor() + try: + c.execute(' '.join(query), self.values) + if n == 1: + return [tpl[0] for tpl in c.fetchall()] + else: + return c.fetchall() + finally: + c.close() + + def get_sorted_command_names(self): + return self.get_sorted_X('t.command') - def min_max_avg(self, name): - ttimings = self.timings.get(name) - return ( min(ttimings), - max(ttimings), - reduce(lambda x,y: x + y, ttimings) / len(ttimings) ) + def get_sorted_branches(self): + return self.get_sorted_X('k.branch') - def summary(self): - s = [] - if self.name: - s.append('Timings for %s' % self.name) - s.append(' N min max avg operation (unit is seconds)') + def get_sorted_revisions(self): + return self.get_sorted_X('k.revision') + + def get_sorted_levels_spread(self): + return self.get_sorted_X('k.wc_levels,k.wc_spread', n = 2) + + def count_runs_batches(self): + query = ["""SELECT + count(DISTINCT r.run_id), + count(DISTINCT b.batch_id)""", + self.FROM_WHERE ] + query.extend(self.constraints) + c = db.conn.cursor() + try: + #print ' '.join(query) + c.execute(' '.join(query), self.values) + return c.fetchone() + finally: + c.close() + + def get_command_timings(self, command): + query = ["""SELECT + count(t.timing), + min(t.timing), + max(t.timing), + avg(t.timing)""", + self.FROM_WHERE ] + self.append_constraint('t.command', command) + try: + query.extend(self.constraints) + c = db.conn.cursor() + try: + c.execute(' '.join(query), self.values) + return c.fetchone() + finally: + c.close() + finally: + self.remove_last_constraint() - names = sorted(self.timings.keys()) + def get_timings(self): + if self.timings: + return self.timings + self.timings = {} + for command_name in self.get_sorted_command_names(): + self.timings[command_name] = self.get_command_timings(command_name) + return self.timings - for name in names: - timings = self.timings.get(name) - if not name or not timings: continue - tmin, tmax, tavg = self.min_max_avg(name) +# ------------------------------------------------------------ run tests - s.append('%5d %7.2f %7.2f %7.2f %s' % ( - len(timings), - tmin, - tmax, - tavg, - name)) - return '\n'.join(s) +def perform_run(batch, run_kind, + svn_bin, svnadmin_bin, verbose): + run = Run(batch, run_kind) - def compare_to(self, other): - def do_div(a, b): - if b: - return float(a) / float(b) - else: - return 0.0 + def create_tree(in_dir, _levels, _spread): + try: + os.mkdir(in_dir) + except: + pass + + for i in range(_spread): + # files + fn = j(in_dir, next_unique_basename('file')) + f = open(fn, 'w') + f.write('This is %s\n' % fn) + f.close() + + # dirs + if (_levels > 1): + dn = j(in_dir, next_unique_basename('dir')) + create_tree(dn, _levels - 1, _spread) + + def svn(*args): + name = args[0] + + cmd = [ svn_bin ] + cmd.extend( list(args) ) + if verbose: + print 'svn cmd:', ' '.join(cmd) + + stdin = None + if stdin: + stdin_arg = subprocess.PIPE + else: + stdin_arg = None + + run.tic(name) + try: + p = subprocess.Popen(cmd, + stdin=stdin_arg, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=False) + stdout,stderr = p.communicate(input=stdin) + except OSError: + stdout = stderr = None + finally: + run.toc() - def do_diff(a, b): - return float(a) - float(b) + if verbose: + if (stdout): + print "STDOUT: [[[\n%s]]]" % ''.join(stdout) + if (stderr): + print "STDERR: [[[\n%s]]]" % ''.join(stderr) - selfname = self.name - if not selfname: - selfname = 'unnamed' - othername = other.name - if not othername: - othername = 'the other' + return stdout,stderr - selftotal = self.min_max_avg(TOTAL_RUN)[2] - othertotal = other.min_max_avg(TOTAL_RUN)[2] - s = ['COMPARE %s to %s' % (othername, selfname)] + def add(*args): + return svn('add', *args) - if TOTAL_RUN in self.timings and TOTAL_RUN in other.timings: - s.append(' %s times: %5.1f seconds avg for %s' % (TOTAL_RUN, - othertotal, othername)) - s.append(' %s %5.1f seconds avg for %s' % (' ' * len(TOTAL_RUN), - selftotal, selfname)) + def ci(*args): + return svn('commit', '-mm', *args) + def up(*args): + return svn('update', *args) - s.append(' min max avg operation') + def st(*args): + return svn('status', *args) - names = sorted(self.timings.keys()) + def info(*args): + return svn('info', *args) - for name in names: - if not name in other.timings: - continue + _chars = [chr(x) for x in range(ord('a'), ord('z') +1)] + def randstr(len=8): + return ''.join( [random.choice(_chars) for i in range(len)] ) - min_me, max_me, avg_me = self.min_max_avg(name) - min_other, max_other, avg_other = other.min_max_avg(name) + def _copy(path): + dest = next_unique_basename(path + '_copied') + svn('copy', path, dest) - s.append('%-16s %-16s %-16s %s' % ( - '%7.2f|%+7.3f' % ( - do_div(min_me, min_other), - do_diff(min_me, min_other) - ), + def _move(path): + dest = path + '_moved' + svn('move', path, dest) - '%7.2f|%+7.3f' % ( - do_div(max_me, max_other), - do_diff(max_me, max_other) - ), + def _propmod(path): + so, se = svn('proplist', path) + propnames = [line.strip() for line in so.strip().split('\n')[1:]] - '%7.2f|%+7.3f' % ( - do_div(avg_me, avg_other), - do_diff(avg_me, avg_other) - ), + # modify? + if len(propnames): + svn('ps', propnames[len(propnames) / 2], randstr(), path) - name)) + # del? + if len(propnames) > 1: + svn('propdel', propnames[len(propnames) / 2], path) - s.extend([ - '("1.23|+0.45" means factor=1.23, difference in seconds = 0.45', - 'factor < 1 or difference < 0 means \'%s\' is faster than \'%s\')' - % (self.name, othername)]) + def _propadd(path): + # set a new one. + svn('propset', randstr(), randstr(), path) + + def _mod(path): + if os.path.isdir(path): + _propmod(path) + return + + f = open(path, 'a') + f.write('\n%s\n' % randstr()) + f.close() - return '\n'.join(s) + def _add(path): + if os.path.isfile(path): + return _mod(path) + + if random.choice((True, False)): + # create a dir + svn('mkdir', j(path, next_unique_basename('new_dir'))) + else: + # create a file + new_path = j(path, next_unique_basename('new_file')) + f = open(new_path, 'w') + f.write(randstr()) + f.close() + svn('add', new_path) + + def _del(path): + svn('delete', path) + + _mod_funcs = (_mod, _add, _propmod, _propadd, )#_copy,) # _move, _del) + + def modify_tree(in_dir, fraction): + child_names = os.listdir(in_dir) + for child_name in child_names: + if child_name[0] == '.': + continue + if random.random() < fraction: + path = j(in_dir, child_name) + random.choice(_mod_funcs)(path) + for child_name in child_names: + if child_name[0] == '.': continue + path = j(in_dir, child_name) + if os.path.isdir(path): + modify_tree(path, fraction) - def add(self, other): - for name, other_times in other.timings.items(): - my_times = self.timings.get(name) - if not my_times: - my_times = [] - self.timings[name] = my_times - my_times.extend(other_times) + def propadd_tree(in_dir, fraction): + for child_name in os.listdir(in_dir): + if child_name[0] == '.': continue + path = j(in_dir, child_name) + if random.random() < fraction: + _propadd(path) + if os.path.isdir(path): + propadd_tree(path, fraction) + def rmtree_onerror(func, path, exc_info): + """Error handler for ``shutil.rmtree``. + If the error is due to an access error (read only file) + it attempts to add write permission and then retries. -j = os.path.join + If the error is for another reason it re-raises the error. + + Usage : ``shutil.rmtree(path, onerror=onerror)`` + """ + if not os.access(path, os.W_OK): + # Is the error an access error ? + os.chmod(path, stat.S_IWUSR) + func(path) + else: + raise -_create_count = 0 + base = tempfile.mkdtemp() -def next_name(prefix): - global _create_count - _create_count += 1 - return '_'.join((prefix, str(_create_count))) + # ensure identical modifications for every run + random.seed(0) + + aborted = True -def create_tree(in_dir, levels, spread=5): try: - os.mkdir(in_dir) - except: - pass - - for i in range(spread): - # files - fn = j(in_dir, next_name('file')) - f = open(fn, 'w') - f.write('This is %s\n' % fn) - f.close() + repos = j(base, 'repos') + repos = repos.replace('\\', '/') + wc = j(base, 'wc') + wc2 = j(base, 'wc2') - # dirs - if (levels > 1): - dn = j(in_dir, next_name('dir')) - create_tree(dn, levels - 1, spread) + if repos.startswith('/'): + file_url = 'file://%s' % repos + else: + file_url = 'file:///%s' % repos + print '\nRunning svn benchmark in', base + print 'dir levels: %s; new files and dirs per leaf: %s' %( + run_kind.levels, run_kind.spread) -def svn(*args): - name = args[0] + started = datetime.datetime.now() - ### options comes from the global namespace; it should be passed - cmd = [options.svn] + list(args) - if options.verbose: - print 'svn cmd:', ' '.join(cmd) + try: + run_cmd([svnadmin_bin, 'create', repos]) + svn('checkout', file_url, wc) + + trunk = j(wc, 'trunk') + create_tree(trunk, run_kind.levels, run_kind.spread) + add(trunk) + st(wc) + ci(wc) + up(wc) + propadd_tree(trunk, 0.05) + ci(wc) + up(wc) + st(wc) + info('-R', wc) + + trunk_url = file_url + '/trunk' + branch_url = file_url + '/branch' + + svn('copy', '-mm', trunk_url, branch_url) + st(wc) + + up(wc) + st(wc) + info('-R', wc) + + svn('checkout', trunk_url, wc2) + st(wc2) + modify_tree(wc2, 0.5) + st(wc2) + ci(wc2) + up(wc2) + up(wc) + + svn('switch', branch_url, wc2) + modify_tree(wc2, 0.5) + st(wc2) + info('-R', wc2) + ci(wc2) + up(wc2) + up(wc) + + modify_tree(trunk, 0.5) + st(wc) + ci(wc) + up(wc2) + up(wc) + + svn('merge', '--accept=postpone', trunk_url, wc2) + st(wc2) + info('-R', wc2) + svn('resolve', '--accept=mine-conflict', wc2) + st(wc2) + svn('resolved', '-R', wc2) + st(wc2) + info('-R', wc2) + ci(wc2) + up(wc2) + up(wc) + + svn('merge', '--accept=postpone', '--reintegrate', branch_url, trunk) + st(wc) + svn('resolve', '--accept=mine-conflict', wc) + st(wc) + svn('resolved', '-R', wc) + st(wc) + ci(wc) + up(wc2) + up(wc) + + svn('delete', j(wc, 'branch')) + ci(wc) + up(wc) + + aborted = False - stdin = None - if stdin: - stdin_arg = subprocess.PIPE - else: - stdin_arg = None + finally: + stopped = datetime.datetime.now() + print '\nDone with svn benchmark in', (stopped - started) - ### timings comes from the global namespace; it should be passed - timings.tic(name) - try: - p = subprocess.Popen(cmd, - stdin=stdin_arg, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - shell=False) - stdout,stderr = p.communicate(input=stdin) - except OSError: - stdout = stderr = None + run.remember_timing(TOTAL_RUN, + timedelta_to_seconds(stopped - started)) finally: - timings.toc() + run.done(aborted) + run.submit_timings() + shutil.rmtree(base, onerror=rmtree_onerror) - if options.verbose: - if (stdout): - print "STDOUT: [[[\n%s]]]" % ''.join(stdout) - if (stderr): - print "STDERR: [[[\n%s]]]" % ''.join(stderr) + return aborted + + +# --------------------------------------------------------------------- - return stdout,stderr +def cmdline_run(db, options, run_kind_str, N=1): + run_kind = parse_one_timing_selection(db, run_kind_str) -def add(*args): - return svn('add', *args) + N = int(N) -def ci(*args): - return svn('commit', '-mm', *args) + print 'Hi, going to run a Subversion benchmark series of %d runs...' % N + print 'Label is %s' % run_kind.label() -def up(*args): - return svn('update', *args) + # can we run the svn binaries? + svn_bin = j(options.svn_bin_dir, 'svn') + svnadmin_bin = j(options.svn_bin_dir, 'svnadmin') -def st(*args): - return svn('status', *args) + for b in (svn_bin, svnadmin_bin): + so,se = run_cmd([b, '--version']) + if not so: + bail("Can't run %s" % b) -_chars = [chr(x) for x in range(ord('a'), ord('z') +1)] + print ', '.join([s.strip() for s in so.split('\n')[:2]]) -def randstr(len=8): - return ''.join( [random.choice(_chars) for i in range(len)] ) + batch = Batch(db) -def _copy(path): - dest = next_name(path + '_copied') - svn('copy', path, dest) + for i in range(N): + print 'Run %d of %d' % (i + 1, N) + perform_run(batch, run_kind, + svn_bin, svnadmin_bin, options.verbose) -def _move(path): - dest = path + '_moved' - svn('move', path, dest) + batch.done() -def _propmod(path): - so, se = svn('proplist', path) - propnames = [line.strip() for line in so.strip().split('\n')[1:]] - # modify? - if len(propnames): - svn('ps', propnames[len(propnames) / 2], randstr(), path) +def cmdline_list(db, options, *args): + run_kinds = parse_timings_selections(db, *args) - # del? - if len(propnames) > 1: - svn('propdel', propnames[len(propnames) / 2], path) + for run_kind in run_kinds: + constraints = [] + def add_if_not_none(name, val): + if val: + constraints.append(' %s = %s' % (name, val)) + add_if_not_none('branch', run_kind.branch) + add_if_not_none('revision', run_kind.revision) + add_if_not_none('levels', run_kind.levels) + add_if_not_none('spread', run_kind.spread) + if constraints: + print 'For\n', '\n'.join(constraints) + print 'I found:' -def _propadd(path): - # set a new one. - svn('propset', randstr(), randstr(), path) + d = TimingQuery(db, run_kind) + cmd_names = d.get_sorted_command_names() + if cmd_names: + print '\n%d command names:\n ' % len(cmd_names), '\n '.join(cmd_names) -def _mod(path): - if os.path.isdir(path): - return _propmod(path) + branches = d.get_sorted_branches() + if branches and (len(branches) > 1 or branches[0] != run_kind.branch): + print '\n%d branches:\n ' % len(branches), '\n '.join(branches) - f = open(path, 'a') - f.write('\n%s\n' % randstr()) - f.close() + revisions = d.get_sorted_revisions() + if revisions and (len(revisions) > 1 or revisions[0] != run_kind.revision): + print '\n%d revisions:\n ' % len(revisions), '\n '.join(revisions) -def _add(path): - if os.path.isfile(path): - return _mod(path) + levels_spread = d.get_sorted_levels_spread() + if levels_spread and ( + len(levels_spread) > 1 + or levels_spread[0] != (run_kind.levels, run_kind.spread)): + print '\n%d kinds of levels x spread:\n ' % len(levels_spread), '\n '.join( + [ ('%dx%d' % (l, s)) for l,s in levels_spread ]) - if random.choice((True, False)): - # create a dir - svn('mkdir', j(path, next_name('new_dir'))) - else: - # create a file - new_path = j(path, next_name('new_file')) - f = open(new_path, 'w') - f.write(randstr()) - f.close() - svn('add', new_path) + print "\n%d runs in %d batches.\n" % (d.count_runs_batches()) -def _del(path): - svn('delete', path) -_mod_funcs = (_mod, _add, _propmod, _propadd, )#_copy,) # _move, _del) +def cmdline_show(db, options, *run_kind_strings): + run_kinds = parse_timings_selections(db, *run_kind_strings) + for run_kind in run_kinds: + q = TimingQuery(db, run_kind) + timings = q.get_timings() -def modify_tree(in_dir, fraction): - child_names = os.listdir(in_dir) - for child_name in child_names: - if child_name[0] == '.': - continue - if random.random() < fraction: - path = j(in_dir, child_name) - random.choice(_mod_funcs)(path) + s = [] + s.append('Timings for %s' % run_kind.label()) + s.append(' N min max avg operation (unit is seconds)') - for child_name in child_names: - if child_name[0] == '.': continue - path = j(in_dir, child_name) - if os.path.isdir(path): - modify_tree(path, fraction) - -def propadd_tree(in_dir, fraction): - for child_name in os.listdir(in_dir): - if child_name[0] == '.': continue - path = j(in_dir, child_name) - if random.random() < fraction: - _propadd(path) - if os.path.isdir(path): - propadd_tree(path, fraction) + for command_name in q.get_sorted_command_names(): + if options.command_names and command_name not in options.command_names: + continue + n, tmin, tmax, tavg = timings[command_name] + s.append('%4s %7.2f %7.2f %7.2f %s' % ( + n_label(n), + tmin, + tmax, + tavg, + command_name)) -def rmtree_onerror(func, path, exc_info): - """Error handler for ``shutil.rmtree``. + print '\n'.join(s) - If the error is due to an access error (read only file) - it attempts to add write permission and then retries. - If the error is for another reason it re-raises the error. +def cmdline_compare(db, options, *args): + run_kinds = parse_timings_selections(db, *args) + if len(run_kinds) < 2: + bail("Need at least two sets of timings to compare.") - Usage : ``shutil.rmtree(path, onerror=onerror)`` - """ - if not os.access(path, os.W_OK): - # Is the error an access error ? - os.chmod(path, stat.S_IWUSR) - func(path) - else: - raise + left_kind = run_kinds[0] + leftq = TimingQuery(db, left_kind) + left = leftq.get_timings() + if not left: + bail("No timings for %s" % left_kind.label()) -def run(levels, spread, N): - for i in range(N): - base = tempfile.mkdtemp() + for run_kind_idx in range(1, len(run_kinds)): + right_kind = run_kinds[run_kind_idx] - # ensure identical modifications for every run - random.seed(0) + rightq = TimingQuery(db, right_kind) + right = rightq.get_timings() + if not right: + print "No timings for %s" % right_kind.label() + continue - try: - repos = j(base, 'repos') - repos = repos.replace('\\', '/') - wc = j(base, 'wc') - wc2 = j(base, 'wc2') + label = 'Compare %s to %s' % (right_kind.label(), left_kind.label()) - if repos.startswith('/'): - file_url = 'file://%s' % repos - else: - file_url = 'file:///%s' % repos + s = [label] - so, se = svn('--version') - if not so: - print "Can't find svn." - exit(1) - version = ', '.join([s.strip() for s in so.split('\n')[:2]]) + verbose = options.verbose + if not verbose: + s.append(' N avg operation') + else: + s.append(' N min max avg operation') - print '\nRunning svn benchmark in', base - print 'dir levels: %s; new files and dirs per leaf: %s; run %d of %d' %( - levels, spread, i + 1, N) + command_names = [name for name in leftq.get_sorted_command_names() + if name in right] + if options.command_names: + command_names = [name for name in command_names + if name in options.command_names] - print version - started = datetime.datetime.now() + for command_name in command_names: + left_N, left_min, left_max, left_avg = left[command_name] + right_N, right_min, right_max, right_avg = right[command_name] - try: - run_cmd(['svnadmin', 'create', repos]) - svn('checkout', file_url, wc) - - trunk = j(wc, 'trunk') - create_tree(trunk, levels, spread) - add(trunk) - st(wc) - ci(wc) - up(wc) - propadd_tree(trunk, 0.5) - ci(wc) - up(wc) - st(wc) - - trunk_url = file_url + '/trunk' - branch_url = file_url + '/branch' - - svn('copy', '-mm', trunk_url, branch_url) - st(wc) - - up(wc) - st(wc) - - svn('checkout', trunk_url, wc2) - st(wc2) - modify_tree(wc2, 0.5) - st(wc2) - ci(wc2) - up(wc2) - up(wc) - - svn('switch', branch_url, wc2) - modify_tree(wc2, 0.5) - st(wc2) - ci(wc2) - up(wc2) - up(wc) - - modify_tree(trunk, 0.5) - st(wc) - ci(wc) - up(wc2) - up(wc) - - svn('merge', '--accept=postpone', trunk_url, wc2) - st(wc2) - svn('resolve', '--accept=mine-conflict', wc2) - st(wc2) - svn('resolved', '-R', wc2) - st(wc2) - ci(wc2) - up(wc2) - up(wc) - - svn('merge', '--accept=postpone', '--reintegrate', branch_url, trunk) - st(wc) - svn('resolve', '--accept=mine-conflict', wc) - st(wc) - svn('resolved', '-R', wc) - st(wc) - ci(wc) - up(wc2) - up(wc) - - svn('delete', j(wc, 'branch')) - ci(wc) - up(wc2) - up(wc) + N_str = '%s/%s' % (n_label(left_N), n_label(right_N)) + avg_str = '%7.2f|%+7.3f' % (do_div(left_avg, right_avg), + do_diff(left_avg, right_avg)) + + if not verbose: + s.append('%9s %-16s %s' % (N_str, avg_str, command_name)) + else: + min_str = '%7.2f|%+7.3f' % (do_div(left_min, right_min), + do_diff(left_min, right_min)) + max_str = '%7.2f|%+7.3f' % (do_div(left_max, right_max), + do_diff(left_max, right_max)) + s.append('%9s %-16s %-16s %-16s %s' % (N_str, min_str, max_str, avg_str, + command_name)) - finally: - stopped = datetime.datetime.now() - print '\nDone with svn benchmark in', (stopped - started) - - ### timings comes from the global namespace; it should be passed - timings.submit_timing(TOTAL_RUN, - timedelta_to_seconds(stopped - started)) - - # rename ps to prop mod - if timings.timings.get('ps'): - has = timings.timings.get('prop mod') - if not has: - has = [] - timings.timings['prop mod'] = has - has.extend( timings.timings['ps'] ) - del timings.timings['ps'] - - print timings.summary() - finally: - shutil.rmtree(base, onerror=rmtree_onerror) + s.extend([ + '(legend: "1.23|+0.45" means: slower by factor 1.23 and by 0.45 seconds;', + ' factor < 1 and seconds < 0 means \'%s\' is faster.' + % right_kind.label(), + ' "2/3" means: \'%s\' has 2 timings on record, the other has 3.)' + % left_kind.label() + ]) -def read_from_file(file_path): - f = open(file_path, 'rb') - try: - instance = cPickle.load(f) - instance.name = os.path.basename(file_path) - finally: - f.close() - return instance + print '\n'.join(s) -def write_to_file(file_path, instance): - f = open(file_path, 'wb') - cPickle.dump(instance, f) - f.close() +# ------------------------------------------------------- charts -def cmd_compare(path1, path2): - t1 = read_from_file(path1) - t2 = read_from_file(path2) +def cmdline_chart_compare(db, options, *args): + import matplotlib + matplotlib.use('Agg') + import numpy as np + import matplotlib.pylab as plt - print t1.summary() - print '---' - print t2.summary() - print '---' - print t2.compare_to(t1) + labels = [] + timing_sets = [] + command_names = None -def cmd_combine(dest, *paths): - total = Timings('--version'); + run_kinds = parse_timings_selections(db, *args) - for path in paths: - t = read_from_file(path) - total.add(t) + # iterate the timings selections and accumulate data + for run_kind in run_kinds: + query = TimingQuery(db, run_kind) + timings = query.get_timings() + if not timings: + print "No timings for %s" % run_kind.label() + continue + labels.append(run_kind.label()) + timing_sets.append(timings) - print total.summary() - write_to_file(dest, total) + # it only makes sense to compare those commands that have timings + # in the first selection, because that is the one everything else + # is compared to. Remember the first selection's command names. + if not command_names: + command_names = query.get_sorted_command_names() -def cmd_run(timings_path, levels, spread, N=1): - levels = int(levels) - spread = int(spread) - N = int(N) - print '\n\nHi, going to run a Subversion benchmark series of %d runs...' % N + if len(timing_sets) < 2: + bail("Not enough timings") - ### UGH! should pass to run() - global timings + if options.command_names: + command_names = [name for name in command_names + if name in options.command_names] - if os.path.isfile(timings_path): - print 'Going to add results to existing file', timings_path - timings = read_from_file(timings_path) - else: - print 'Going to write results to new file', timings_path - timings = Timings('--version') + chart_path = options.chart_path + if not chart_path: + chart_path = 'compare_' + '_'.join( + [ filesystem_safe_string(l) for l in labels ] + ) + '.svg' - run(levels, spread, N) + N = len(command_names) + M = len(timing_sets) - 1 + if M < 2: + M = 2 - write_to_file(timings_path, timings) + group_positions = np.arange(N) # the y locations for the groups + dist = 1. / (1. + M) + height = (1. - dist) / M # the height of the bars -def cmd_show(*paths): - for timings_path in paths: - timings = read_from_file(timings_path) - print '---\n%s' % timings_path - print timings.summary() + fig = plt.figure(figsize=(12, 5 + 0.2*N*M)) + plot1 = fig.add_subplot(121) + plot2 = fig.add_subplot(122) + left = timing_sets[0] -def usage(): - print __doc__ + # Iterate timing sets. Each loop produces one bar for each command name + # group. + for label_i,label in enumerate(labels[1:],1): + right = timing_sets[label_i] + if not right: + continue + + for cmd_i, command_name in enumerate(command_names): + if command_name not in right: + #skip + continue + + left_N, left_min, left_max, left_avg = left[command_name] + right_N, right_min, right_max, right_avg = right[command_name] + + div_avg = 100. * (do_div(left_avg, right_avg) - 1.0) + if div_avg <= 0: + col = '#55dd55' + else: + col = '#dd5555' + + diff_val = do_diff(left_avg, right_avg) + + ofs = (dist + height) / 2. + height * (label_i - 1) + + barheight = height * (1.0 - dist) + + y = float(cmd_i) + ofs + + plot1.barh((y, ), + (div_avg, ), + barheight, + color=col, edgecolor='white') + plot1.text(0., y + height/2., + '%s %+5.1f%%' % (label, div_avg), + ha='right', va='center', size='small', + rotation=0, family='monospace') + + plot2.barh((y, ), + (diff_val, ), + barheight, + color=col, edgecolor='white') + plot2.text(0., y + height/2., + '%s %+6.2fs' % (label, diff_val), + ha='right', va='center', size='small', + rotation=0, family='monospace') + + + for p in (plot1, plot2): + xlim = list(p.get_xlim()) + if xlim[1] < 10.: + xlim[1] = 10. + # make sure the zero line is far enough right so that the annotations + # fit inside the chart. About half the width should suffice. + if xlim[0] > -xlim[1]: + xlim[0] = -xlim[1] + p.set_xlim(*xlim) + p.set_xticks((0,)) + p.set_yticks(group_positions + (height / 2.)) + p.set_yticklabels(()) + p.set_ylim((len(command_names), 0)) + p.grid() + + plot1.set_xticklabels(('+-0%',), rotation=0) + plot1.set_title('Average runtime change from %s in %%' % labels[0], + size='medium') + + plot2.set_xticklabels(('+-0s',), rotation=0) + plot2.set_title('Average runtime change from %s in seconds' % labels[0], + size='medium') + + margin = 1./(2 + N*M) + titlemargin = 0 + if options.title: + titlemargin = margin * 1.5 + + fig.subplots_adjust(left=0.005, right=0.995, wspace=0.3, bottom=margin, + top=1.0-margin-titlemargin) + + ystep = (1.0 - 2.*margin - titlemargin) / len(command_names) + + for idx,command_name in enumerate(command_names): + ylabel = '%s\nvs. %.1fs' % ( + command_name, + left[command_name][3]) + + ypos=1.0 - margin - titlemargin - ystep/M - ystep * idx + plt.figtext(0.5, ypos, + command_name, + ha='center', va='top', + size='medium', weight='bold') + plt.figtext(0.5, ypos - ystep/(M+1), + '%s\n= %.2fs' % ( + labels[0], left[command_name][3]), + ha='center', va='top', + size='small') + + if options.title: + plt.figtext(0.5, 1. - titlemargin/2, options.title, ha='center', + va='center', weight='bold') + + plt.savefig(chart_path) + print 'wrote chart file:', chart_path + + +# ------------------------------------------------------------ main + + +# Custom option formatter, keeping newlines in the description. +# adapted from: +# http://groups.google.com/group/comp.lang.python/msg/09f28e26af0699b1 +import textwrap +class IndentedHelpFormatterWithNL(optparse.IndentedHelpFormatter): + def format_description(self, description): + if not description: return "" + desc_width = self.width - self.current_indent + indent = " "*self.current_indent + bits = description.split('\n') + formatted_bits = [ + textwrap.fill(bit, + desc_width, + initial_indent=indent, + subsequent_indent=indent) + for bit in bits] + result = "\n".join(formatted_bits) + "\n" + return result if __name__ == '__main__': - parser = optparse.OptionParser() + parser = optparse.OptionParser(formatter=IndentedHelpFormatterWithNL()) # -h is automatically added. ### should probably expand the help for that. and see about -? parser.add_option('-v', '--verbose', action='store_true', dest='verbose', help='Verbose operation') - parser.add_option('--svn', action='store', dest='svn', default='svn', - help='Specify Subversion executable to use') + parser.add_option('-b', '--svn-bin-dir', action='store', dest='svn_bin_dir', + default='', + help='Specify directory to find Subversion binaries in') + parser.add_option('-f', '--db-path', action='store', dest='db_path', + default='benchmark.db', + help='Specify path to SQLite database file') + parser.add_option('-o', '--chart-path', action='store', dest='chart_path', + help='Supply a path for chart output.') + parser.add_option('-c', '--command-names', action='store', + dest='command_names', + help='Comma separated list of command names to limit to.') + parser.add_option('-t', '--title', action='store', + dest='title', + help='For charts, a title to print in the chart graphics.') + + parser.set_description(__doc__) + parser.set_usage('') - ### should start passing this, but for now: make it global - global options options, args = parser.parse_args() + def usage(msg=None): + parser.print_help() + if msg: + print + print msg + bail() + # there should be at least one arg left: the sub-command if not args: - usage() - exit(1) + usage('No command argument supplied.') cmd = args[0] del args[0] - if cmd == 'compare': - if len(args) != 2: - usage() - exit(1) - cmd_compare(*args) + db = TimingsDb(options.db_path) - elif cmd == 'combine': - if len(args) < 3: + if cmd == 'run': + if len(args) < 1 or len(args) > 2: usage() - exit(1) - cmd_combine(*args) + cmdline_run(db, options, *args) - elif cmd == 'run': - if len(args) < 3 or len(args) > 4: + elif cmd == 'compare': + if len(args) < 2: usage() - exit(1) - cmd_run(*args) + cmdline_compare(db, options, *args) + + elif cmd == 'list': + cmdline_list(db, options, *args) elif cmd == 'show': - if not args: + cmdline_show(db, options, *args) + + elif cmd == 'chart': + if 'compare'.startswith(args[0]): + cmdline_chart_compare(db, options, *args[1:]) + else: usage() - exit(1) - cmd_show(*args) else: - usage() + usage('Unknown subcommand argument: %s' % cmd) |