diff options
author | Eliot Horowitz <eliot@10gen.com> | 2014-11-04 15:46:40 -0500 |
---|---|---|
committer | Eliot Horowitz <eliot@10gen.com> | 2014-11-05 11:21:19 -0500 |
commit | 5ca2daf551a2c631a5f573cb054406f5d49fbef5 (patch) | |
tree | b0a23d34ffdb376bac0b79ed17b5619cfc0d9b47 /src/third_party/wiredtiger/tools | |
parent | 017704acdfc7517efadb3fab167bba06c025c01a (diff) | |
download | mongo-5ca2daf551a2c631a5f573cb054406f5d49fbef5.tar.gz |
SERVER-15953: add wiredtiger to third_party
Diffstat (limited to 'src/third_party/wiredtiger/tools')
-rw-r--r-- | src/third_party/wiredtiger/tools/stat_data.py | 66 | ||||
-rw-r--r-- | src/third_party/wiredtiger/tools/statlog.py | 124 | ||||
-rw-r--r-- | src/third_party/wiredtiger/tools/wt_nvd3_util.py | 46 | ||||
-rw-r--r-- | src/third_party/wiredtiger/tools/wtperf_graph.py | 234 | ||||
-rw-r--r-- | src/third_party/wiredtiger/tools/wtperf_stats.py | 174 | ||||
-rw-r--r-- | src/third_party/wiredtiger/tools/wtstats.py | 236 |
6 files changed, 880 insertions, 0 deletions
diff --git a/src/third_party/wiredtiger/tools/stat_data.py b/src/third_party/wiredtiger/tools/stat_data.py new file mode 100644 index 00000000000..75a3b577472 --- /dev/null +++ b/src/third_party/wiredtiger/tools/stat_data.py @@ -0,0 +1,66 @@ +# DO NOT EDIT: automatically built by dist/stat.py. */ + +no_scale_per_second_list = [ + 'async: maximum work queue length', + 'cache: tracked dirty bytes in the cache', + 'cache: bytes currently in the cache', + 'cache: maximum bytes configured', + 'cache: tracked dirty pages in the cache', + 'cache: pages currently held in the cache', + 'conn: files currently open', + 'log: total log buffer size', + 'LSM: App work units currently queued', + 'LSM: Merge work units currently queued', + 'LSM: Switch work units currently queued', + 'reconciliation: split bytes currently awaiting free', + 'reconciliation: split objects currently awaiting free', + 'session: open cursor count', + 'session: open session count', + 'txn: transaction checkpoint currently running', + 'txn: transaction range of IDs currently pinned', + 'block manager: file allocation unit size', + 'block manager: checkpoint size', + 'block manager: file magic number', + 'block manager: file major version number', + 'block manager: minor version number', + 'block manager: file size in bytes', + 'LSM: bloom filters in the LSM tree', + 'LSM: total size of bloom filters', + 'btree: column-store variable-size deleted values', + 'btree: column-store fixed-size leaf pages', + 'btree: column-store internal pages', + 'btree: column-store variable-size leaf pages', + 'btree: number of key/value pairs', + 'btree: fixed-record size', + 'btree: maximum tree depth', + 'btree: maximum internal page item size', + 'btree: maximum internal page size', + 'btree: maximum leaf page item size', + 'btree: maximum leaf page size', + 'btree: overflow pages', + 'btree: row-store internal pages', + 'btree: row-store leaf pages', + 'cache: overflow values cached in memory', + 'LSM: chunks in the LSM tree', + 'LSM: highest merge generation in the LSM tree', + 'reconciliation: maximum blocks required for a page', + 'session: open cursor count', +] +no_clear_list = [ + 'cache: bytes currently in the cache', + 'cache: maximum bytes configured', + 'cache: pages currently held in the cache', + 'conn: files currently open', + 'log: total log buffer size', + 'log: maximum log file size', + 'LSM: App work units currently queued', + 'LSM: Merge work units currently queued', + 'LSM: Switch work units currently queued', + 'reconciliation: split bytes currently awaiting free', + 'reconciliation: split objects currently awaiting free', + 'session: open cursor count', + 'session: open session count', + 'txn: transaction checkpoint currently running', + 'txn: transaction range of IDs currently pinned', + 'session: open cursor count', +] diff --git a/src/third_party/wiredtiger/tools/statlog.py b/src/third_party/wiredtiger/tools/statlog.py new file mode 100644 index 00000000000..f32b46a9ec7 --- /dev/null +++ b/src/third_party/wiredtiger/tools/statlog.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python +# +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +import fileinput, os, shutil, sys, textwrap +from collections import defaultdict +from datetime import datetime +from subprocess import call + +# Import the data describing which statistics should not be scaled +from stat_data import no_scale_per_second_list + +TIMEFMT = "%b %d %H:%M:%S" +reportno = 0 + +# Plot a set of entries for a title. +def plot(title, values): + global reportno + + # Ignore entries where the value never changes. + skip = True + t0, v0 = values[0] + for t, v in values: + if v != v0: + skip = False + break + if skip: + print 'skipping: ' + title + return + + print 'building: ' + title + reportno = reportno + 1 + num = "%03d" % reportno + + ylabel = 'Value' + if title.split(' ')[1] != 'spinlock' and \ + title.split(' ', 1)[1] in no_scale_per_second_list: + seconds = 1 + else: + t1, v1 = values[1] + seconds = (datetime.strptime(t1, TIMEFMT) - + datetime.strptime(t0, TIMEFMT)).seconds + if seconds == 0: + seconds = 1 + ylabel += ' per second' + + # Write the raw data into a file for processing. + of = open("reports/raw/report.%s.raw" % num, "w") + for t, v in sorted(values): + print >>of, "%s %g" % (t, float(v) / seconds) + of.close() + + # Write a command file for gnuplot. + of = open("gnuplot.cmd", "w") + of.write(''' +set terminal png nocrop size 800,600 +set autoscale +set grid +set style data linespoints +set title "%(title)s" +set xlabel "Time" +set xtics rotate by -45 +set xdata time +set timefmt "%(timefmt)s" +set format x "%(timefmt)s" +set ylabel "%(ylabel)s" +set yrange [0:] +set output 'reports/report.%(num)s.png' +plot "reports/raw/report.%(num)s.raw" using 1:4 notitle''' % { + 'num' : num, + 'timefmt' : TIMEFMT, + 'title' : title, + 'ylabel' : ylabel, + }) + of.close() + + # Run gnuplot. + call(["gnuplot", "gnuplot.cmd"]) + + # Remove the command file. + os.remove("gnuplot.cmd") + +# Read the input into a dictionary of lists. +if sys.argv[1:] == []: + print "usage: " + sys.argv[0] + " file ..." + sys.exit(1) + +# Remove and re-create the reports folder. +shutil.rmtree("reports", True) +os.makedirs("reports/raw") + +d = defaultdict(list) +for line in fileinput.input(sys.argv[1:]): + month, day, time, v, desc = line.strip('\n').split(" ", 4) + d[desc].append((month + " " + day + " " + time, v)) + +# Plot each entry in the dictionary. +for items in sorted(d.iteritems()): + plot('\\n'.join(l for l in textwrap.wrap(items[0], 60)), items[1]) + diff --git a/src/third_party/wiredtiger/tools/wt_nvd3_util.py b/src/third_party/wiredtiger/tools/wt_nvd3_util.py new file mode 100644 index 00000000000..6bf1396b0ff --- /dev/null +++ b/src/third_party/wiredtiger/tools/wt_nvd3_util.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +from datetime import datetime +from nvd3 import lineChart + +# Add a multiChart type so we can overlay line graphs +class multiChart(lineChart): + def __init__(self, **kwargs): + lineChart.__init__(self, **kwargs) + + # Fix the axes + del self.axislist['yAxis'] + self.create_y_axis('yAxis1', format=kwargs.get('y_axis_format', '.02f')) + self.create_y_axis('yAxis2', format=kwargs.get('y_axis_format', '.02f')) + +TIMEFMT = "%b %d %H:%M:%S" + +thisyear = datetime.today().year +def parsetime(s): + return datetime.strptime(s, TIMEFMT).replace(year=thisyear) + diff --git a/src/third_party/wiredtiger/tools/wtperf_graph.py b/src/third_party/wiredtiger/tools/wtperf_graph.py new file mode 100644 index 00000000000..f45145cf801 --- /dev/null +++ b/src/third_party/wiredtiger/tools/wtperf_graph.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python +# +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +import csv, os, sys +from subprocess import call +# Python script to read wtperf monitor output and create a performance +# graph. + +TIMEFMT = "%b %d %H:%M:%S" + +def process_monitor(fname, sfx, ckptlist, opdict): + # Read the monitor file and figure out when a checkpoint was running. + in_ckpt = 'N' + + ckptlist=[] + + ofname = 'monitor%s.png' % (sfx) + # Monitor output format currently is: + # time,totalsec,read,insert,update,ckpt,...latencies... + ops = ('read', 'insert', 'update') + csvcol = (2, 3, 4) + with open(fname, 'r') as csvfile: + reader = csv.reader(csvfile) + for row in reader: + if row[0].lstrip().startswith('#'): + continue + # Look for checkpoints and operations. + if row[5] != in_ckpt: + ckptlist.append(row[0]) + in_ckpt = row[5] + for op, col in zip(ops, csvcol): + if row[col] != '0' and opdict[op] == 0: + opdict[op] = 1 + + if in_ckpt == 'Y': + ckptlist.append(row[0]) + + # Graph time vs. read, insert and update operations per second. + gcmd = "gnuplot.mon.cmd" + of = open(gcmd, "w") + of.write(''' +set autoscale +set datafile sep ',' +set grid +set style data lines +set terminal png nocrop size 800,600 +set timefmt "%(TIMEFMT)s" +set title "read, insert and update operations per second" +set format x "%(TIMEFMT)s" +set xlabel "Time" +set xtics rotate by -45 +set xdata time +set ylabel "Operations per second (thousands)" +set yrange [0:]\n''' % { + 'TIMEFMT' : TIMEFMT + }) + it = iter(ckptlist) + for start, stop in zip(it, it): + of.write("set object rectangle from first '%s',\ + graph 0 to first '%s',\ + graph 1 fc rgb \"gray\" back\n" % (start, stop)) + of.write('set output "%s"\n' % (ofname)) + of.write("""plot "{name}" using 1:($3/1000) title "Reads", \\ + "{name}" using 1:($4/1000) title "Inserts",\\ + "{name}" using 1:($5/1000) title "Updates" + """.format(name=fname)) + of.close() + call(["gnuplot", gcmd]) + os.remove(gcmd) + +# Graph time vs. average, minimium, maximum latency for an operation. +def plot_latency_operation(name, fname, sfx, ckptlist, col_avg, col_min, col_max): + gcmd = "gnuplot." + name + ".l1.cmd" + of = open(gcmd, "w") + of.write(''' +set autoscale +set datafile sep ',' +set grid +set style data lines +set terminal png nocrop size 800,600 +set timefmt "%(TIMEFMT)s" +set title "%(NAME)s: average, minimum and maximum latency" +set format x "%(TIMEFMT)s" +set xlabel "Time" +set xtics rotate by -45 +set xdata time +set ylabel "Latency (us)" +set logscale y +set yrange [1:]\n''' % { + 'NAME' : name, + 'TIMEFMT' : TIMEFMT + }) + it = iter(ckptlist) + for start, stop in zip(it, it): + of.write('set object rectangle from first \'' + start +\ + '\', graph 0 ' + ' to first \'' + stop +\ + '\', graph 1 fc rgb "gray" back\n') + ofname = name + sfx + '.latency1.png' + of.write('set output "' + ofname + '"\n') + of.write('plot "' +\ + fname + '" using 1:($' + repr(col_avg) +\ + ') title "Average Latency", "' + fname +'" using 1:($' +\ + repr(col_min) + ') title "Minimum Latency", "' +\ + fname + '" using 1:($' + repr(col_max) +\ + ') title "Maximum Latency"\n') + of.close() + call(["gnuplot", gcmd]) + os.remove(gcmd) + + +# Graph latency vs. % operations +def plot_latency_percent(name, dirname, sfx, ckptlist): + lfile = os.path.join(dirname, 'latency.' + name) + if not os.path.exists(lfile): + return + gcmd = "gnuplot." + name + ".l2.cmd" + of = open(gcmd, "w") + of.write(''' +set autoscale +set datafile sep ',' +set grid +set style data points +set terminal png nocrop size 800,600\n''') + of.write('set title "' + name + ': latency distribution"\n') + of.write(''' +set xlabel "Latency (us)" +set xrange [1:] +set xtics rotate by -45 +set logscale x +set ylabel "%% operations" +set yrange [0:]\n''') + ofname = name + sfx + '.latency2.png' + of.write('set output "' + ofname + '"\n') + of.write('plot "' + lfile + sfx +\ + '" using (($2 * 100)/$4) title "' + name + '"\n') + of.close() + call(["gnuplot", gcmd]) + os.remove(gcmd) + + +# Graph latency vs. % operations (cumulative) +def plot_latency_cumulative_percent(name, dirname, sfx, ckptlist): + lfile = os.path.join(dirname, 'latency.' + name) + if not os.path.exists(lfile): + return + # Latency plot: cumulative operations vs. latency + gcmd = "gnuplot." + name + ".l3.cmd" + of = open(gcmd, "w") + of.write(''' +set autoscale +set datafile sep ',' +set grid +set style data lines +set terminal png nocrop size 800,600 +set title "%(NAME)s: cumulative latency distribution" +set xlabel "Latency (us)" +set xrange [1:] +set xtics rotate by -45 +set logscale x +set ylabel "%% operations" +set yrange [0:]\n''' % { + 'NAME' : name + }) + ofname = name + sfx + '.latency3.png' + of.write('set output "' + ofname + '"\n') + of.write('plot "' + lfile + sfx +\ + '" using 1:(($3 * 100)/$4) title "' + name + '"\n') + of.close() + call(["gnuplot", gcmd]) + os.remove(gcmd) + +def process_file(fname): + ckptlist = [] + # NOTE: The operations below must be in this exact order to match + # the operation latency output in the monitor file. + opdict={'read':0, 'insert':0, 'update':0} + + # This assumes the monitor file has the string "monitor" + # and any other (optional) characters in the filename are a suffix. + sfx = os.path.basename(fname).replace('monitor','') + dirname = os.path.dirname(fname) + + process_monitor(fname, sfx, ckptlist, opdict) + column = 7 # average, minimum, maximum start in column 7 + for k, v in opdict.items(): + if v != 0: + plot_latency_operation( + k, fname, sfx, ckptlist, column, column + 1, column + 2) + plot_latency_percent(k, dirname, sfx, ckptlist) + plot_latency_cumulative_percent(k, dirname, sfx, ckptlist) + else: + print fname + ': no ' + k + ' operations found. Skip.' + column = column + 3 + +def main(): + # This program takes a list of monitor files generated by + # wtperf. If no args are given, it looks for a single file + # named 'monitor'. + numargs = len(sys.argv) + if numargs < 2: + process_file('monitor') + else: + d = 1 + while d < numargs: + process_file(sys.argv[d]) + d += 1 + +if __name__ == '__main__': + main() diff --git a/src/third_party/wiredtiger/tools/wtperf_stats.py b/src/third_party/wiredtiger/tools/wtperf_stats.py new file mode 100644 index 00000000000..6f2f6dda682 --- /dev/null +++ b/src/third_party/wiredtiger/tools/wtperf_stats.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python +# +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +import os, csv, operator +from time import mktime + +try: + from wt_nvd3_util import multiChart, parsetime +except ImportError: + print >>sys.stderr, "Could not import wt_nvd3_util.py, it should be\ + in the same directory as %s" % sys.argv[0] + sys.exit(-1) + +def timesort(s): + # Sort the timestr via its parsetime() value so that the year gets + # added and it properly sorts. Times are only %b %d %H:%M:%S and + # may improperly sort if the data crosses a month boundary. + t = operator.itemgetter('#time') + timestr = t(s) + return parsetime(timestr) + +# Fixup the names and values in a dictionary read in from a csv file. One +# field must be "#time" - which is used to calculate the interval. +# Input is a dictionary, output is a list of dictionaries with a single entry. +def munge_dict(values_dict, abstime): + sorted_values = sorted(values_dict, key=timesort) + start_time = parsetime(sorted_values[0]['#time']) + + ret = [] + for v in sorted_values: + if abstime: + # Build the time series, milliseconds since the epoch + v['#time'] = int(mktime(parsetime(v['#time']).timetuple())) * 1000 + else: + # Build the time series as seconds since the start of the data + v['#time'] = (parsetime(v['#time']) - start_time).seconds + next_val = {} + for title, value in v.items(): + if title.find('uS') != -1: + title = title.replace('uS', 'ms') + value = float(value) / 1000 + if title == 'totalsec': + value = 0 + if title == 'checkpoints' and value == 'N': + value = 0 + elif title.find('time') != -1: + title = 'time' + elif title.find('latency') == -1 and \ + title.find('checkpoints') == -1: + title = title + ' (thousands)' + value = float(value) / 1000 + next_val[title] = value + ret.append(next_val) + + # After building the series, eliminate constants + d0 = ret[0] + for t0, v0 in d0.items(): + skip = True + for d in ret: + v = d[t0] + if v != v0: + skip = False + break + if skip: + for dicts in ret: + del dicts[t0] + + return ret + +def addPlotsToChart(chart, graph_data, wtstat_chart = False): + # Extract the times - they are the same for all lines. + times = [] + for v in graph_data: + times.append(v['time']) + + # Add a line to the graph for each field in the CSV file in alphabetical + # order, so the key is sorted. + for field in sorted(graph_data[0].keys()): + if field == 'time': + continue + # Split the latency and non-latency measurements onto different scales + axis = "1" + if not wtstat_chart and field.find('latency') == -1: + axis="2" + ydata = [] + for v in graph_data: + ydata.append(v[field]) + chart.add_serie(x=times, y=ydata, name=field, type="line", yaxis=axis) + +# Input parameters are a chart populated with WiredTiger statistics and +# the directory where the wtperf monitor file can be found. +def addPlotsToStatsChart(chart, dirname, abstime): + fname = os.path.join(dirname, 'monitor') + try: + with open(fname, 'rb') as csvfile: + reader = csv.DictReader(csvfile) + # Transform the data into something NVD3 can digest + graph_data = munge_dict(reader, abstime) + except IOError: + print >>sys.stderr, "Could not open wtperf monitor file." + sys.exit(-1) + addPlotsToChart(chart, graph_data, 1) + +def main(): + # Parse the command line + import argparse + + parser = argparse.ArgumentParser(description='Create graphs from WiredTiger statistics.') + parser.add_argument('--abstime', action='store_true', + help='use absolute time on the x axis') + parser.add_argument('--output', '-o', metavar='file', + default='wtperf_stats.html', help='HTML output file') + parser.add_argument('files', metavar='file', nargs='+', + help='input monitor file generated by WiredTiger wtperf application') + args = parser.parse_args() + + output_file = open(args.output, 'w') + + if len(args.files) != 1: + print 'Script currently only supports a single monitor file' + exit (1) + + chart_extra = {} + # Add in the x axis if the user wants time. + if args.abstime: + chart_extra['x_axis_format'] = '%H:%M:%S' + + for f in args.files: + with open(f, 'rb') as csvfile: + reader = csv.DictReader(csvfile) + # Transform the data into something NVD3 can digest + graph_data = munge_dict(reader, args.abstime) + + chart = multiChart(name='wtperf', + height=450 + 10*len(graph_data[0].keys()), + resize=True, + x_is_date=args.abstime, + assets_directory='http://source.wiredtiger.com/graphs/', + **chart_extra) + + addPlotsToChart(chart, graph_data) + + chart.buildhtml() + output_file.write(chart.htmlcontent) + output_file.close() + +if __name__ == '__main__': + main() + diff --git a/src/third_party/wiredtiger/tools/wtstats.py b/src/third_party/wiredtiger/tools/wtstats.py new file mode 100644 index 00000000000..371af6b4f1a --- /dev/null +++ b/src/third_party/wiredtiger/tools/wtstats.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python +# +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +import fileinput, os, re, shutil, sys, textwrap +from collections import defaultdict +from time import mktime +from subprocess import call + +try: + from stat_data import no_scale_per_second_list, no_clear_list +except ImportError: + print >>sys.stderr, "Could not import stat_data.py, it should be\ + in the same directory as %s" % sys.argv[0] + sys.exit(-1) + +try: + from wtperf_stats import addPlotsToStatsChart +except ImportError: + print >>sys.stderr, "Could not import wtperf_stats.py, it should be\ + in the same directory as %s" % sys.argv[0] + sys.exit(-1) + +try: + from wt_nvd3_util import multiChart, parsetime +except ImportError: + print >>sys.stderr, "Could not import wt_nvd3_util.py, it should be\ + in the same directory as %s" % sys.argv[0] + sys.exit(-1) + +try: + from nvd3 import lineChart, lineWithFocusChart +except ImportError: + print >>sys.stderr, "Could not import nvd3. Please install it *from source* (other versions may be missing features that we rely on). Run these commands: git clone https://github.com/areski/python-nvd3.git ; cd python-nvd3 ; sudo python setup.py install" + sys.exit(-1) + +# Plot a set of entries for a title. +def munge(title, values): + t0, v0 = values[0] + start_time = parsetime(t0) + + ylabel = ' '.join(title.split(' ')).lower() + if title.split(' ')[1] != 'spinlock' and \ + title.split(' ', 1)[1] in no_scale_per_second_list: + seconds = 1 + else: + t1, v1 = values[1] + seconds = (parsetime(t1) - start_time).seconds + ylabel += ' per second' + if seconds == 0: + seconds = 1 + + stats_cleared = False + if args.clear or title.split(' ', 1)[1] in no_clear_list: + stats_cleared = True + + # Split the values into a dictionary of y-axis values keyed by the x axis + ydata = {} + last_value = 0.0 + for t, v in sorted(values): + if args.abstime: + # Build the time series, milliseconds since the epoch + x = int(mktime(parsetime(t).timetuple())) * 1000 + else: + # Build the time series as seconds since the start of the data + x = (parsetime(t) - start_time).seconds + + float_v = float(v) + if not stats_cleared: + float_v = float_v - last_value + # Sometimes WiredTiger stats go backwards without clear, assume + # that means nothing happened + if float_v < 0: + float_v = 0.0 + last_value = float(v) + ydata[x] = float_v / seconds + + return ylabel, ydata + +# Parse the command line +import argparse + +parser = argparse.ArgumentParser(description='Create graphs from WiredTiger statistics.') +parser.add_argument('--abstime', action='store_true', + help='use absolute time on the x axis') +parser.add_argument('--clear', action='store_true', + help='WiredTiger stats gathered with clear set') +parser.add_argument('--focus', action='store_true', + help='generate a chart with focus slider') +parser.add_argument('--include', '-I', metavar='regexp', + type=re.compile, action='append', + help='include series with titles matching the specifed regexp') +parser.add_argument('--list', action='store_true', + help='list the series that would be displayed') +parser.add_argument('--output', '-o', metavar='file', default='wtstats.html', + help='HTML output file') +parser.add_argument('--right', '-R', metavar='regexp', + type=re.compile, action='append', + help='use the right axis for series with titles matching the specifed regexp') +parser.add_argument('--wtperf', '-w', action='store_true', + help='Plot wtperf statistics on the same graph') +parser.add_argument('files', metavar='file', nargs='+', + help='input files generated by WiredTiger statistics logging') +args = parser.parse_args() + +# Don't require users to specify regexps twice for right axis +if args.focus and args.right: + print >>sys.stderr, "focus charts cannot have a right-hand y-axis" + sys.exit(-1) + +# Don't require users to specify regexps twice for right axis +if args.include and args.right: + args.include += args.right + +# Read the input file(s) into a dictionary of lists. +d = defaultdict(list) +for f in args.files: + for line in open(f, 'rU'): + month, day, time, v, title = line.strip('\n').split(" ", 4) + d[title].append((month + " " + day + " " + time, v)) + +# Process the series, eliminate constants +for title, values in sorted(d.iteritems()): + skip = True + t0, v0 = values[0] + for t, v in values: + if v != v0: + skip = False + break + if skip: + #print "Skipping", title + del d[title] + +# Common prefix / suffix elimination +prefix = suffix = None + +def common_prefix(a, b): + while not b.startswith(a): + a = a[:-1] + return a + +def common_suffix(a, b): + while not a.endswith(b): + b = b[1:] + return b + +# Split out the data, convert timestamps +results = [] +for title, values in sorted(d.iteritems()): + title, ydata = munge(title, values) + # Ignore entries if a list of regular expressions was given + if args.include and not [r for r in args.include if r.search(title)]: + continue + yaxis = args.right and [r for r in args.right if r.search(title)] + prefix = title if prefix is None else common_prefix(prefix, title) + suffix = title if suffix is None else common_suffix(title, suffix) + results.append((title, yaxis, ydata)) + +# Process titles, eliminate common prefixes and suffixes +if prefix or suffix: + new_results = [] + for title, yaxis, ydata in results: + title = title[len(prefix):] + if suffix: + title = title[:-len(suffix)] + new_results.append((title, yaxis, ydata)) + results = new_results + +# Dump the results as a CSV file +#print '"time", ' + ', '.join('"%s"' % title for title, values in ydata) +#for i in xrange(len(xdata)): +# print '%d, %s' % (xdata[i], ', '.join('%g' % values[i] for title, values in ydata)) + +# Are we just listing the results? +if args.list: + for title, yaxis, ydata in results: + print title + sys.exit(0) + +# Figure out the full set of x axis values +xdata = sorted(set(k for k in ydata.iterkeys() for ydata in results)) + +# open the output file +output_file = open(args.output, 'w') +#--------------------------------------- +if args.right: + charttype = multiChart +elif args.focus: + charttype = lineWithFocusChart +else: + charttype = lineChart + +chart_extra = {} +# Add in the x axis if the user wants time. +if args.abstime: + chart_extra['x_axis_format'] = '%H:%M:%S' + +# Create the chart, add the series +chart = charttype(name='statlog', height=450+10*len(results), resize=True, x_is_date=args.abstime, y_axis_format='g', assets_directory='http://source.wiredtiger.com/graphs/', **chart_extra) + +for title, yaxis, ydata in results: + chart.add_serie(x=xdata, y=(ydata.get(x, 0) for x in xdata), name=title, + type="line", yaxis="2" if yaxis else "1") + +if args.wtperf: + addPlotsToStatsChart(chart, os.path.dirname(args.files[0]), args.abstime) + +chart.buildhtml() +output_file.write(chart.htmlcontent) + +#close Html file +output_file.close() |