summaryrefslogtreecommitdiff
path: root/tools/dev/benchmarks/suite1/benchmark.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/dev/benchmarks/suite1/benchmark.py')
-rwxr-xr-xtools/dev/benchmarks/suite1/benchmark.py638
1 files changed, 638 insertions, 0 deletions
diff --git a/tools/dev/benchmarks/suite1/benchmark.py b/tools/dev/benchmarks/suite1/benchmark.py
new file mode 100755
index 0000000..7eb3dd9
--- /dev/null
+++ b/tools/dev/benchmarks/suite1/benchmark.py
@@ -0,0 +1,638 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+usage: benchmark.py run <run_file> <levels> <spread> [N]
+ benchmark.py show <run_file>
+ benchmark.py compare <run_file1> <run_file2>
+ benchmark.py combine <new_file> <run_file1> <run_file2> ...
+
+Test data is written to run_file.
+If a run_file exists, data is added to it.
+<levels> is the number of directory levels to create
+<spread> is the number of child trees spreading off each dir level
+If <N> is provided, the run is repeated N times.
+"""
+
+import os
+import sys
+import tempfile
+import subprocess
+import datetime
+import random
+import shutil
+import cPickle
+import optparse
+import stat
+
+TOTAL_RUN = 'TOTAL RUN'
+
+timings = None
+
+def run_cmd(cmd, stdin=None, shell=False):
+
+ if shell:
+ printable_cmd = 'CMD: ' + cmd
+ else:
+ printable_cmd = 'CMD: ' + ' '.join(cmd)
+ if options.verbose:
+ print printable_cmd
+
+ if stdin:
+ stdin_arg = subprocess.PIPE
+ else:
+ stdin_arg = None
+
+ p = subprocess.Popen(cmd,
+ stdin=stdin_arg,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ shell=shell)
+ stdout,stderr = p.communicate(input=stdin)
+
+ if options.verbose:
+ if (stdout):
+ print "STDOUT: [[[\n%s]]]" % ''.join(stdout)
+ if (stderr):
+ print "STDERR: [[[\n%s]]]" % ''.join(stderr)
+
+ return stdout,stderr
+
+def timedelta_to_seconds(td):
+ return ( float(td.seconds)
+ + float(td.microseconds) / (10**6)
+ + td.days * 24 * 60 * 60 )
+
+
+class Timings:
+
+ def __init__(self, *ignore_svn_cmds):
+ self.timings = {}
+ self.current_name = None
+ self.tic_at = None
+ self.ignore = ignore_svn_cmds
+ self.name = None
+
+ def tic(self, name):
+ if name in self.ignore:
+ return
+ self.toc()
+ self.current_name = name
+ self.tic_at = datetime.datetime.now()
+
+ def toc(self):
+ if self.current_name and self.tic_at:
+ toc_at = datetime.datetime.now()
+ self.submit_timing(self.current_name,
+ timedelta_to_seconds(toc_at - self.tic_at))
+ self.current_name = None
+ self.tic_at = None
+
+ def submit_timing(self, name, seconds):
+ times = self.timings.get(name)
+ if not times:
+ times = []
+ self.timings[name] = times
+ times.append(seconds)
+
+ def min_max_avg(self, name):
+ ttimings = self.timings.get(name)
+ return ( min(ttimings),
+ max(ttimings),
+ reduce(lambda x,y: x + y, ttimings) / len(ttimings) )
+
+ def summary(self):
+ s = []
+ if self.name:
+ s.append('Timings for %s' % self.name)
+ s.append(' N min max avg operation (unit is seconds)')
+
+ names = sorted(self.timings.keys())
+
+ for name in names:
+ timings = self.timings.get(name)
+ if not name or not timings: continue
+
+ tmin, tmax, tavg = self.min_max_avg(name)
+
+ s.append('%5d %7.2f %7.2f %7.2f %s' % (
+ len(timings),
+ tmin,
+ tmax,
+ tavg,
+ name))
+
+ return '\n'.join(s)
+
+
+ def compare_to(self, other):
+ def do_div(a, b):
+ if b:
+ return float(a) / float(b)
+ else:
+ return 0.0
+
+ def do_diff(a, b):
+ return float(a) - float(b)
+
+ selfname = self.name
+ if not selfname:
+ selfname = 'unnamed'
+ othername = other.name
+ if not othername:
+ othername = 'the other'
+
+ selftotal = self.min_max_avg(TOTAL_RUN)[2]
+ othertotal = other.min_max_avg(TOTAL_RUN)[2]
+
+ s = ['COMPARE %s to %s' % (othername, selfname)]
+
+ if TOTAL_RUN in self.timings and TOTAL_RUN in other.timings:
+ s.append(' %s times: %5.1f seconds avg for %s' % (TOTAL_RUN,
+ othertotal, othername))
+ s.append(' %s %5.1f seconds avg for %s' % (' ' * len(TOTAL_RUN),
+ selftotal, selfname))
+
+
+ s.append(' min max avg operation')
+
+ names = sorted(self.timings.keys())
+
+ for name in names:
+ if not name in other.timings:
+ continue
+
+
+ min_me, max_me, avg_me = self.min_max_avg(name)
+ min_other, max_other, avg_other = other.min_max_avg(name)
+
+ s.append('%-16s %-16s %-16s %s' % (
+ '%7.2f|%+7.3f' % (
+ do_div(min_me, min_other),
+ do_diff(min_me, min_other)
+ ),
+
+ '%7.2f|%+7.3f' % (
+ do_div(max_me, max_other),
+ do_diff(max_me, max_other)
+ ),
+
+ '%7.2f|%+7.3f' % (
+ do_div(avg_me, avg_other),
+ do_diff(avg_me, avg_other)
+ ),
+
+ name))
+
+ s.extend([
+ '("1.23|+0.45" means factor=1.23, difference in seconds = 0.45',
+ 'factor < 1 or difference < 0 means \'%s\' is faster than \'%s\')'
+ % (self.name, othername)])
+
+ return '\n'.join(s)
+
+
+ def add(self, other):
+ for name, other_times in other.timings.items():
+ my_times = self.timings.get(name)
+ if not my_times:
+ my_times = []
+ self.timings[name] = my_times
+ my_times.extend(other_times)
+
+
+
+
+j = os.path.join
+
+_create_count = 0
+
+def next_name(prefix):
+ global _create_count
+ _create_count += 1
+ return '_'.join((prefix, str(_create_count)))
+
+def create_tree(in_dir, levels, spread=5):
+ try:
+ os.mkdir(in_dir)
+ except:
+ pass
+
+ for i in range(spread):
+ # files
+ fn = j(in_dir, next_name('file'))
+ f = open(fn, 'w')
+ f.write('This is %s\n' % fn)
+ f.close()
+
+ # dirs
+ if (levels > 1):
+ dn = j(in_dir, next_name('dir'))
+ create_tree(dn, levels - 1, spread)
+
+
+def svn(*args):
+ name = args[0]
+
+ ### options comes from the global namespace; it should be passed
+ cmd = [options.svn] + list(args)
+ if options.verbose:
+ print 'svn cmd:', ' '.join(cmd)
+
+ stdin = None
+ if stdin:
+ stdin_arg = subprocess.PIPE
+ else:
+ stdin_arg = None
+
+ ### timings comes from the global namespace; it should be passed
+ timings.tic(name)
+ try:
+ p = subprocess.Popen(cmd,
+ stdin=stdin_arg,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ shell=False)
+ stdout,stderr = p.communicate(input=stdin)
+ except OSError:
+ stdout = stderr = None
+ finally:
+ timings.toc()
+
+ if options.verbose:
+ if (stdout):
+ print "STDOUT: [[[\n%s]]]" % ''.join(stdout)
+ if (stderr):
+ print "STDERR: [[[\n%s]]]" % ''.join(stderr)
+
+ return stdout,stderr
+
+
+def add(*args):
+ return svn('add', *args)
+
+def ci(*args):
+ return svn('commit', '-mm', *args)
+
+def up(*args):
+ return svn('update', *args)
+
+def st(*args):
+ return svn('status', *args)
+
+_chars = [chr(x) for x in range(ord('a'), ord('z') +1)]
+
+def randstr(len=8):
+ return ''.join( [random.choice(_chars) for i in range(len)] )
+
+def _copy(path):
+ dest = next_name(path + '_copied')
+ svn('copy', path, dest)
+
+def _move(path):
+ dest = path + '_moved'
+ svn('move', path, dest)
+
+def _propmod(path):
+ so, se = svn('proplist', path)
+ propnames = [line.strip() for line in so.strip().split('\n')[1:]]
+
+ # modify?
+ if len(propnames):
+ svn('ps', propnames[len(propnames) / 2], randstr(), path)
+
+ # del?
+ if len(propnames) > 1:
+ svn('propdel', propnames[len(propnames) / 2], path)
+
+
+def _propadd(path):
+ # set a new one.
+ svn('propset', randstr(), randstr(), path)
+
+
+def _mod(path):
+ if os.path.isdir(path):
+ return _propmod(path)
+
+ f = open(path, 'a')
+ f.write('\n%s\n' % randstr())
+ f.close()
+
+def _add(path):
+ if os.path.isfile(path):
+ return _mod(path)
+
+ if random.choice((True, False)):
+ # create a dir
+ svn('mkdir', j(path, next_name('new_dir')))
+ else:
+ # create a file
+ new_path = j(path, next_name('new_file'))
+ f = open(new_path, 'w')
+ f.write(randstr())
+ f.close()
+ svn('add', new_path)
+
+def _del(path):
+ svn('delete', path)
+
+_mod_funcs = (_mod, _add, _propmod, _propadd, )#_copy,) # _move, _del)
+
+def modify_tree(in_dir, fraction):
+ child_names = os.listdir(in_dir)
+ for child_name in child_names:
+ if child_name[0] == '.':
+ continue
+ if random.random() < fraction:
+ path = j(in_dir, child_name)
+ random.choice(_mod_funcs)(path)
+
+ for child_name in child_names:
+ if child_name[0] == '.': continue
+ path = j(in_dir, child_name)
+ if os.path.isdir(path):
+ modify_tree(path, fraction)
+
+def propadd_tree(in_dir, fraction):
+ for child_name in os.listdir(in_dir):
+ if child_name[0] == '.': continue
+ path = j(in_dir, child_name)
+ if random.random() < fraction:
+ _propadd(path)
+ if os.path.isdir(path):
+ propadd_tree(path, fraction)
+
+
+def rmtree_onerror(func, path, exc_info):
+ """Error handler for ``shutil.rmtree``.
+
+ If the error is due to an access error (read only file)
+ it attempts to add write permission and then retries.
+
+ If the error is for another reason it re-raises the error.
+
+ Usage : ``shutil.rmtree(path, onerror=onerror)``
+ """
+ if not os.access(path, os.W_OK):
+ # Is the error an access error ?
+ os.chmod(path, stat.S_IWUSR)
+ func(path)
+ else:
+ raise
+
+
+def run(levels, spread, N):
+ for i in range(N):
+ base = tempfile.mkdtemp()
+
+ # ensure identical modifications for every run
+ random.seed(0)
+
+ try:
+ repos = j(base, 'repos')
+ repos = repos.replace('\\', '/')
+ wc = j(base, 'wc')
+ wc2 = j(base, 'wc2')
+
+ if repos.startswith('/'):
+ file_url = 'file://%s' % repos
+ else:
+ file_url = 'file:///%s' % repos
+
+ so, se = svn('--version')
+ if not so:
+ print "Can't find svn."
+ exit(1)
+ version = ', '.join([s.strip() for s in so.split('\n')[:2]])
+
+ print '\nRunning svn benchmark in', base
+ print 'dir levels: %s; new files and dirs per leaf: %s; run %d of %d' %(
+ levels, spread, i + 1, N)
+
+ print version
+ started = datetime.datetime.now()
+
+ try:
+ run_cmd(['svnadmin', 'create', repos])
+ svn('checkout', file_url, wc)
+
+ trunk = j(wc, 'trunk')
+ create_tree(trunk, levels, spread)
+ add(trunk)
+ st(wc)
+ ci(wc)
+ up(wc)
+ propadd_tree(trunk, 0.5)
+ ci(wc)
+ up(wc)
+ st(wc)
+
+ trunk_url = file_url + '/trunk'
+ branch_url = file_url + '/branch'
+
+ svn('copy', '-mm', trunk_url, branch_url)
+ st(wc)
+
+ up(wc)
+ st(wc)
+
+ svn('checkout', trunk_url, wc2)
+ st(wc2)
+ modify_tree(wc2, 0.5)
+ st(wc2)
+ ci(wc2)
+ up(wc2)
+ up(wc)
+
+ svn('switch', branch_url, wc2)
+ modify_tree(wc2, 0.5)
+ st(wc2)
+ ci(wc2)
+ up(wc2)
+ up(wc)
+
+ modify_tree(trunk, 0.5)
+ st(wc)
+ ci(wc)
+ up(wc2)
+ up(wc)
+
+ svn('merge', '--accept=postpone', trunk_url, wc2)
+ st(wc2)
+ svn('resolve', '--accept=mine-conflict', wc2)
+ st(wc2)
+ svn('resolved', '-R', wc2)
+ st(wc2)
+ ci(wc2)
+ up(wc2)
+ up(wc)
+
+ svn('merge', '--accept=postpone', '--reintegrate', branch_url, trunk)
+ st(wc)
+ svn('resolve', '--accept=mine-conflict', wc)
+ st(wc)
+ svn('resolved', '-R', wc)
+ st(wc)
+ ci(wc)
+ up(wc2)
+ up(wc)
+
+ svn('delete', j(wc, 'branch'))
+ ci(wc)
+ up(wc2)
+ up(wc)
+
+
+ finally:
+ stopped = datetime.datetime.now()
+ print '\nDone with svn benchmark in', (stopped - started)
+
+ ### timings comes from the global namespace; it should be passed
+ timings.submit_timing(TOTAL_RUN,
+ timedelta_to_seconds(stopped - started))
+
+ # rename ps to prop mod
+ if timings.timings.get('ps'):
+ has = timings.timings.get('prop mod')
+ if not has:
+ has = []
+ timings.timings['prop mod'] = has
+ has.extend( timings.timings['ps'] )
+ del timings.timings['ps']
+
+ print timings.summary()
+ finally:
+ shutil.rmtree(base, onerror=rmtree_onerror)
+
+
+def read_from_file(file_path):
+ f = open(file_path, 'rb')
+ try:
+ instance = cPickle.load(f)
+ instance.name = os.path.basename(file_path)
+ finally:
+ f.close()
+ return instance
+
+
+def write_to_file(file_path, instance):
+ f = open(file_path, 'wb')
+ cPickle.dump(instance, f)
+ f.close()
+
+def cmd_compare(path1, path2):
+ t1 = read_from_file(path1)
+ t2 = read_from_file(path2)
+
+ print t1.summary()
+ print '---'
+ print t2.summary()
+ print '---'
+ print t2.compare_to(t1)
+
+def cmd_combine(dest, *paths):
+ total = Timings('--version');
+
+ for path in paths:
+ t = read_from_file(path)
+ total.add(t)
+
+ print total.summary()
+ write_to_file(dest, total)
+
+def cmd_run(timings_path, levels, spread, N=1):
+ levels = int(levels)
+ spread = int(spread)
+ N = int(N)
+
+ print '\n\nHi, going to run a Subversion benchmark series of %d runs...' % N
+
+ ### UGH! should pass to run()
+ global timings
+
+ if os.path.isfile(timings_path):
+ print 'Going to add results to existing file', timings_path
+ timings = read_from_file(timings_path)
+ else:
+ print 'Going to write results to new file', timings_path
+ timings = Timings('--version')
+
+ run(levels, spread, N)
+
+ write_to_file(timings_path, timings)
+
+def cmd_show(*paths):
+ for timings_path in paths:
+ timings = read_from_file(timings_path)
+ print '---\n%s' % timings_path
+ print timings.summary()
+
+
+def usage():
+ print __doc__
+
+if __name__ == '__main__':
+ parser = optparse.OptionParser()
+ # -h is automatically added.
+ ### should probably expand the help for that. and see about -?
+ parser.add_option('-v', '--verbose', action='store_true', dest='verbose',
+ help='Verbose operation')
+ parser.add_option('--svn', action='store', dest='svn', default='svn',
+ help='Specify Subversion executable to use')
+
+ ### should start passing this, but for now: make it global
+ global options
+
+ options, args = parser.parse_args()
+
+ # there should be at least one arg left: the sub-command
+ if not args:
+ usage()
+ exit(1)
+
+ cmd = args[0]
+ del args[0]
+
+ if cmd == 'compare':
+ if len(args) != 2:
+ usage()
+ exit(1)
+ cmd_compare(*args)
+
+ elif cmd == 'combine':
+ if len(args) < 3:
+ usage()
+ exit(1)
+ cmd_combine(*args)
+
+ elif cmd == 'run':
+ if len(args) < 3 or len(args) > 4:
+ usage()
+ exit(1)
+ cmd_run(*args)
+
+ elif cmd == 'show':
+ if not args:
+ usage()
+ exit(1)
+ cmd_show(*args)
+
+ else:
+ usage()