summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Rueckstiess <thomas@rueckstiess.net>2015-02-16 12:50:26 +1100
committerThomas Rueckstiess <thomas@rueckstiess.net>2015-02-16 12:50:26 +1100
commit4733961a3c1fa37988178d1b1dd4eb44d83b63f6 (patch)
tree48a3f68224aa5753b3c34f2a6fb5ffb8b6998690
parent84ab01d54b0b8225af4f78a4fc331a4a973317f0 (diff)
downloadmongo-4733961a3c1fa37988178d1b1dd4eb44d83b63f6.tar.gz
fixes and improvements for wtperf parsing
- convert wtperf microsec to millisec - don't skip monitor* files when parsing directory - parsing code reorganization - renamed wtperf stats fixture - added tests
-rw-r--r--tools/wtstats/test/monitor.fixture (renamed from tools/wtstats/test/WiredTigerPerf.fixture)0
-rw-r--r--tools/wtstats/test/test_wtstats.py49
-rwxr-xr-xtools/wtstats/wtstats.py333
3 files changed, 237 insertions, 145 deletions
diff --git a/tools/wtstats/test/WiredTigerPerf.fixture b/tools/wtstats/test/monitor.fixture
index 5306a4bd1fd..5306a4bd1fd 100644
--- a/tools/wtstats/test/WiredTigerPerf.fixture
+++ b/tools/wtstats/test/monitor.fixture
diff --git a/tools/wtstats/test/test_wtstats.py b/tools/wtstats/test/test_wtstats.py
index 80078d97667..dd6098fec49 100644
--- a/tools/wtstats/test/test_wtstats.py
+++ b/tools/wtstats/test/test_wtstats.py
@@ -75,7 +75,8 @@ def helper_run_with_fixture(kwargs=None):
# path replacement
kwargs['--output'] = os.path.join(test_dir, kwargs['--output'])
- statsfile = os.path.join(test_dir, 'WiredTigerStat.fixture')
+ statsfile = os.path.join(test_dir, kwargs['files'] if 'files' in kwargs else 'WiredTigerStat.fixture')
+ print "ST", statsfile
arglist = ['./wtstats', statsfile]
for item in kwargs.items():
@@ -175,7 +176,51 @@ def test_output_option():
outfile = '_foo_bar_baz.html'
helper_run_with_fixture({'--output': outfile})
assert os.path.exists(os.path.join(test_dir, outfile))
-
+
+@with_setup(setUp, tearDown)
+def test_monitor_stats_start_with_wtperf():
+ """ wtstats should be able to parse wtperf monitor files """
+
+ outfile = '_foo_bar_baz.html'
+ helper_run_with_fixture({'files': 'monitor.fixture', '--output': outfile})
+ data = helper_get_json_from_file(outfile)
+
+ series_keys = map(lambda x: x['key'], data['series'])
+ for key in series_keys:
+ assert key.startswith('wtperf:')
+
+ assert os.path.exists(os.path.join(test_dir, outfile))
+
+
+@with_setup(setUp, tearDown)
+def test_monitor_stats_convert_us_to_ms():
+ """ wtstats should convert monitor stats us to ms """
+
+ outfile = '_foo_bar_baz.html'
+ helper_run_with_fixture({'files': 'monitor.fixture', '--output': outfile})
+ data = helper_get_json_from_file(outfile)
+
+ series_keys = map(lambda x: x['key'], data['series'])
+ for key in series_keys:
+ assert '(uS)' not in key
+
+ values = (item['values'] for item in data['series'] if item['key'] == 'wtperf: insert maximum latency (ms)').next().values()
+ assert max(values) == 103687 / 1000.
+
+
+
+@with_setup(setUp, tearDown)
+def test_directory_with_wtstats_and_wtperf():
+ """ wtstats should be able to parse directories containing both types """
+
+ outfile = '_test_output_file.html'
+ helper_run_with_fixture({'files': '.', '--output': outfile})
+ data = helper_get_json_from_file(outfile)
+
+ series_keys = map(lambda x: x['key'], data['series'])
+ assert any(map(lambda title: 'block-manager' in title, series_keys))
+ assert any(map(lambda title: 'wtperf' in title, series_keys))
+
@with_setup(setUp, tearDown)
def test_add_ext_if_missing():
diff --git a/tools/wtstats/wtstats.py b/tools/wtstats/wtstats.py
index 4c6880f0409..3749ffd6c63 100755
--- a/tools/wtstats/wtstats.py
+++ b/tools/wtstats/wtstats.py
@@ -88,9 +88,186 @@ def munge(args, title, values):
return ylabel, ydata
+
# Parse the command line
import argparse
+def common_prefix(a, b):
+ """ compute longest common prefix of a and b """
+ while not b.startswith(a):
+ a = a[:-1]
+ return a
+
+
+def common_suffix(a, b):
+ """ compute longest common suffix of a and b """
+ while not a.endswith(b):
+ b = b[1:]
+ return b
+
+
+def parse_wtstats_file(file, result):
+ """ parse wtstats file, one stat per line, example format:
+ Dec 05 14:43:14 0 /data/b block-manager: mapped bytes read
+ """
+ print 'Processing wtstats file: ' + file
+
+ # Parse file
+ for line in open(file, 'rU'):
+ month, day, time, v, title = line.strip('\n').split(" ", 4)
+ result[title].append((month + " " + day + " " + time, v))
+
+
+
+def parse_wtperf_file(file, result):
+ """ parse wtperf file, all stats on single line, example format:
+ Feb 13 17:55:14,0,0,156871,0,N,0,0,0,49,6,6146,0,0,0
+ """
+ print 'Processing wtperf file: ' + file
+ fh = open(file, 'rU')
+
+ # first line contains headings, replace microseconds with milliseconds
+ headings = fh.next().strip('\n').split(',')[1:]
+ headings = map(lambda h: h.replace('(uS)', ' (ms)'), headings)
+
+ # parse rest of file
+ for line in fh:
+ month, day, time, values = re.split(r'[ ,]', line.strip('\n'), 3)
+ values = values.split(',')
+ for i, v in enumerate(values):
+ if v == 'N':
+ v = 0
+ # convert us to ms
+ if '(ms)' in headings[i]:
+ v = float(v) / 1000.0
+ result['wtperf: ' + headings[i]].append((month + " " + day + " " + time, v))
+
+
+def skip_constants(result):
+ # Process the series, eliminate constants, delete totalsec for wtperf
+ items = list(result.iteritems())
+
+ for title, values in items:
+ skip = True
+ t0, v0 = values[0]
+ for t, v in values:
+ if v != v0:
+ skip = False
+ break
+
+ if title == 'wtperf: totalsec':
+ skip = True
+
+ if skip:
+ del result[title]
+
+ return result
+
+
+def parse_files(files_or_dir):
+ """ walk through file list or directory and parse according to file type (wtstats / wtperf). """
+
+ result = defaultdict(list)
+
+ for f in files_or_dir:
+ if os.path.isfile(f):
+ # peek at first line to determine type
+ with open(f, 'rU') as fh:
+ line = fh.readline()
+ if line.startswith('#time'):
+ parse_wtperf_file(f, result)
+ else:
+ parse_wtstats_file(f, result)
+
+ elif os.path.isdir(f):
+ for s in glob(os.path.join(f, 'WiredTigerStat*')):
+ parse_wtstats_file(s, result)
+
+ for s in glob(os.path.join(f, 'monitor*')):
+ parse_wtperf_file(s, result)
+
+ return result
+
+
+
+def output_series(results, args, prefix=None, grouplist=[]):
+ """ Write the data into the html template """
+
+ # add .html ending if not present
+ filename, ext = os.path.splitext(args.output)
+ if ext == '':
+ ext = '.html'
+
+ # open the output file based on prefix
+ if prefix == None:
+ outputname = filename + ext
+ elif len(grouplist) == 0:
+ outputname = filename +'.' + prefix + ext
+ else:
+ outputname = filename +'.group.' + prefix + ext
+
+ if prefix != None and len(grouplist) == 0:
+ this_series = []
+ for title, ydata in results:
+ if not prefix in title:
+ continue
+ #print 'Appending to dataset: ' + title
+ this_series.append((title, ydata))
+ elif prefix != None and len(grouplist) > 0:
+ this_series = []
+ for title, ydata in results:
+ for subgroup in grouplist:
+ if not subgroup in title:
+ continue
+ # print 'Appending to dataset: ' + title
+ this_series.append((title, ydata))
+ else:
+ this_series = results
+
+ if len(this_series) == 0:
+ print 'Output: ' + outputname + ' has no data. Do not create.'
+ return
+
+
+ json_output = { "series": [] }
+
+ for title, ydata in this_series:
+ json_output["series"].append({
+ "key": title,
+ "values": ydata,
+ });
+
+ # load template
+ this_path = os.path.dirname(os.path.realpath(__file__))
+ srcfile = os.path.join(this_path, 'wtstats.html.template')
+ try:
+ srcfile = open(srcfile)
+ contents = srcfile.read()
+ except IOError:
+ print >>sys.stderr, "Cannot find template file 'wtstats.html." \
+ "template'. See ./template/README.md for more information."
+ sys.exit(-1)
+
+ srcfile.close()
+
+ # if --json write data to <filename>.json
+ if args.json:
+ jsonfile = filename + '.json'
+ with open(jsonfile, 'w') as f:
+ json.dump(json_output, f)
+ print "created %s" % jsonfile
+
+ # write output file
+ dstfile = open(outputname, 'wt')
+ replaced_contents = contents.replace('"### INSERT DATA HERE ###"',
+ json.dumps(json_output))
+ dstfile.write(replaced_contents)
+ dstfile.close()
+ print "created %s" % dstfile.name
+
+
+
+
def main():
parser = argparse.ArgumentParser(description='Create graphs from' \
'WiredTiger statistics.')
@@ -112,149 +289,17 @@ def main():
'logging')
args = parser.parse_args()
- # Read the input file(s) into a dictionary of lists.
- def getfiles(l):
- for f in l:
- if os.path.isfile(f):
- yield f
- elif os.path.isdir(f):
- for s in glob(os.path.join(f, 'WiredTigerStat*')):
- print 'Processing ' + s
- yield s
-
- d = defaultdict(list)
-
- for f in getfiles(args.files):
- wtperf_mode = False
- for line in open(f, 'rU'):
- if line.startswith('#time'):
- # wtperf file, read headings and switch to wtperf mode
- wtperf_mode = True
- wtperf_headings = line.strip('\n').split(',')[1:]
- continue
-
- elif wtperf_mode:
- # wtperf file, all stats are on a single line
- month, day, time, values = re.split(r'[ ,]', line.strip('\n'), 3)
- values = values.split(',')
- for i, v in enumerate(values):
- if v == 'N':
- v = 0
- d['wtperf: ' + wtperf_headings[i]].append((month + " " + day + " " + time, v))
-
- else:
- # wtstats file, one stat per line
- month, day, time, v, title = line.strip('\n').split(" ", 4)
- d[title].append((month + " " + day + " " + time, v))
+ # Parse files or directory and skip constants
+ parsed = skip_constants(parse_files(args.files))
-
- # Process the series, eliminate constants, delete totalsec for wtperf
- for title, values in sorted(d.iteritems()):
- skip = True
- t0, v0 = values[0]
- for t, v in values:
- if v != v0:
- skip = False
- break
-
- if title == 'wtperf: totalsec':
- skip = True
-
- if skip:
- #print "Skipping", title
- del d[title]
-
- # Common prefix / suffix elimination
+ # filter results based on --include, compute common prefix and suffix
+ results = []
prefix = suffix = None
- def common_prefix(a, b):
- while not b.startswith(a):
- a = a[:-1]
- return a
-
- def common_suffix(a, b):
- while not a.endswith(b):
- b = b[1:]
- return b
-
- def output_series(results, prefix=None, grouplist=[]):
- # add .html ending if not present
- filename, ext = os.path.splitext(args.output)
- if ext == '':
- ext = '.html'
-
- # open the output file based on prefix
- if prefix == None:
- outputname = filename + ext
- elif len(grouplist) == 0:
- outputname = filename +'.' + prefix + ext
- else:
- outputname = filename +'.group.' + prefix + ext
-
- if prefix != None and len(grouplist) == 0:
- this_series = []
- for title, ydata in results:
- if not prefix in title:
- continue
- #print 'Appending to dataset: ' + title
- this_series.append((title, ydata))
- elif prefix != None and len(grouplist) > 0:
- this_series = []
- for title, ydata in results:
- for subgroup in grouplist:
- if not subgroup in title:
- continue
- # print 'Appending to dataset: ' + title
- this_series.append((title, ydata))
- else:
- this_series = results
-
- if len(this_series) == 0:
- print 'Output: ' + outputname + ' has no data. Do not create.'
- return
-
-
- json_output = { "series": [] }
-
- for title, ydata in this_series:
- json_output["series"].append({
- "key": title,
- "values": ydata,
- });
-
- # load template
- this_path = os.path.dirname(os.path.realpath(__file__))
- srcfile = os.path.join(this_path, 'wtstats.html.template')
- try:
- srcfile = open(srcfile)
- contents = srcfile.read()
- except IOError:
- print >>sys.stderr, "Cannot find template file 'wtstats.html." \
- "template'. See ./template/README.md for more information."
- sys.exit(-1)
-
- srcfile.close()
-
- # if --json write data to <filename>.json
- if args.json:
- jsonfile = filename + '.json'
- with open(jsonfile, 'w') as f:
- json.dump(json_output, f)
- print "created %s" % jsonfile
-
- # write output file
- dstfile = open(outputname, 'wt')
- replaced_contents = contents.replace('"### INSERT DATA HERE ###"',
- json.dumps(json_output))
- dstfile.write(replaced_contents)
- dstfile.close()
- print "created %s" % dstfile.name
-
- # Split out the data, convert timestamps
- results = []
- for title, values in sorted(d.iteritems()):
+ for title, values in sorted(parsed.iteritems()):
title, ydata = munge(args, title, values)
- # Ignore entries if a list of regular expressions was given
+
+ # ignore entries if a list of regular expressions was given
if args.include and not [r for r in args.include if r.search(title)]:
continue
if not 'wtperf' in title:
@@ -275,18 +320,20 @@ def main():
# Are we just listing the results?
if args.list:
+ print
+ print "Parsed stats:"
for title, ydata in results:
- print title
+ print " ", title
sys.exit(0)
- output_series(results)
+ output_series(results, args)
# If the user wants the stats split up by prefix type do so.
if args.all:
for prefix in prefix_list:
- output_series(results, prefix)
+ output_series(results, args, prefix)
for group in groups.keys():
- output_series(results, group, groups[group])
+ output_series(results, args, group, groups[group])
if __name__ == '__main__':