diff options
author | Thomas Rueckstiess <thomas@rueckstiess.net> | 2015-02-16 12:50:26 +1100 |
---|---|---|
committer | Thomas Rueckstiess <thomas@rueckstiess.net> | 2015-02-16 12:50:26 +1100 |
commit | 4733961a3c1fa37988178d1b1dd4eb44d83b63f6 (patch) | |
tree | 48a3f68224aa5753b3c34f2a6fb5ffb8b6998690 | |
parent | 84ab01d54b0b8225af4f78a4fc331a4a973317f0 (diff) | |
download | mongo-4733961a3c1fa37988178d1b1dd4eb44d83b63f6.tar.gz |
fixes and improvements for wtperf parsing
- convert wtperf microsec to millisec
- don't skip monitor* files when parsing directory
- parsing code reorganization
- renamed wtperf stats fixture
- added tests
-rw-r--r-- | tools/wtstats/test/monitor.fixture (renamed from tools/wtstats/test/WiredTigerPerf.fixture) | 0 | ||||
-rw-r--r-- | tools/wtstats/test/test_wtstats.py | 49 | ||||
-rwxr-xr-x | tools/wtstats/wtstats.py | 333 |
3 files changed, 237 insertions, 145 deletions
diff --git a/tools/wtstats/test/WiredTigerPerf.fixture b/tools/wtstats/test/monitor.fixture index 5306a4bd1fd..5306a4bd1fd 100644 --- a/tools/wtstats/test/WiredTigerPerf.fixture +++ b/tools/wtstats/test/monitor.fixture diff --git a/tools/wtstats/test/test_wtstats.py b/tools/wtstats/test/test_wtstats.py index 80078d97667..dd6098fec49 100644 --- a/tools/wtstats/test/test_wtstats.py +++ b/tools/wtstats/test/test_wtstats.py @@ -75,7 +75,8 @@ def helper_run_with_fixture(kwargs=None): # path replacement kwargs['--output'] = os.path.join(test_dir, kwargs['--output']) - statsfile = os.path.join(test_dir, 'WiredTigerStat.fixture') + statsfile = os.path.join(test_dir, kwargs['files'] if 'files' in kwargs else 'WiredTigerStat.fixture') + print "ST", statsfile arglist = ['./wtstats', statsfile] for item in kwargs.items(): @@ -175,7 +176,51 @@ def test_output_option(): outfile = '_foo_bar_baz.html' helper_run_with_fixture({'--output': outfile}) assert os.path.exists(os.path.join(test_dir, outfile)) - + +@with_setup(setUp, tearDown) +def test_monitor_stats_start_with_wtperf(): + """ wtstats should be able to parse wtperf monitor files """ + + outfile = '_foo_bar_baz.html' + helper_run_with_fixture({'files': 'monitor.fixture', '--output': outfile}) + data = helper_get_json_from_file(outfile) + + series_keys = map(lambda x: x['key'], data['series']) + for key in series_keys: + assert key.startswith('wtperf:') + + assert os.path.exists(os.path.join(test_dir, outfile)) + + +@with_setup(setUp, tearDown) +def test_monitor_stats_convert_us_to_ms(): + """ wtstats should convert monitor stats us to ms """ + + outfile = '_foo_bar_baz.html' + helper_run_with_fixture({'files': 'monitor.fixture', '--output': outfile}) + data = helper_get_json_from_file(outfile) + + series_keys = map(lambda x: x['key'], data['series']) + for key in series_keys: + assert '(uS)' not in key + + values = (item['values'] for item in data['series'] if item['key'] == 'wtperf: insert maximum latency (ms)').next().values() + assert max(values) == 103687 / 1000. + + + +@with_setup(setUp, tearDown) +def test_directory_with_wtstats_and_wtperf(): + """ wtstats should be able to parse directories containing both types """ + + outfile = '_test_output_file.html' + helper_run_with_fixture({'files': '.', '--output': outfile}) + data = helper_get_json_from_file(outfile) + + series_keys = map(lambda x: x['key'], data['series']) + assert any(map(lambda title: 'block-manager' in title, series_keys)) + assert any(map(lambda title: 'wtperf' in title, series_keys)) + @with_setup(setUp, tearDown) def test_add_ext_if_missing(): diff --git a/tools/wtstats/wtstats.py b/tools/wtstats/wtstats.py index 4c6880f0409..3749ffd6c63 100755 --- a/tools/wtstats/wtstats.py +++ b/tools/wtstats/wtstats.py @@ -88,9 +88,186 @@ def munge(args, title, values): return ylabel, ydata + # Parse the command line import argparse +def common_prefix(a, b): + """ compute longest common prefix of a and b """ + while not b.startswith(a): + a = a[:-1] + return a + + +def common_suffix(a, b): + """ compute longest common suffix of a and b """ + while not a.endswith(b): + b = b[1:] + return b + + +def parse_wtstats_file(file, result): + """ parse wtstats file, one stat per line, example format: + Dec 05 14:43:14 0 /data/b block-manager: mapped bytes read + """ + print 'Processing wtstats file: ' + file + + # Parse file + for line in open(file, 'rU'): + month, day, time, v, title = line.strip('\n').split(" ", 4) + result[title].append((month + " " + day + " " + time, v)) + + + +def parse_wtperf_file(file, result): + """ parse wtperf file, all stats on single line, example format: + Feb 13 17:55:14,0,0,156871,0,N,0,0,0,49,6,6146,0,0,0 + """ + print 'Processing wtperf file: ' + file + fh = open(file, 'rU') + + # first line contains headings, replace microseconds with milliseconds + headings = fh.next().strip('\n').split(',')[1:] + headings = map(lambda h: h.replace('(uS)', ' (ms)'), headings) + + # parse rest of file + for line in fh: + month, day, time, values = re.split(r'[ ,]', line.strip('\n'), 3) + values = values.split(',') + for i, v in enumerate(values): + if v == 'N': + v = 0 + # convert us to ms + if '(ms)' in headings[i]: + v = float(v) / 1000.0 + result['wtperf: ' + headings[i]].append((month + " " + day + " " + time, v)) + + +def skip_constants(result): + # Process the series, eliminate constants, delete totalsec for wtperf + items = list(result.iteritems()) + + for title, values in items: + skip = True + t0, v0 = values[0] + for t, v in values: + if v != v0: + skip = False + break + + if title == 'wtperf: totalsec': + skip = True + + if skip: + del result[title] + + return result + + +def parse_files(files_or_dir): + """ walk through file list or directory and parse according to file type (wtstats / wtperf). """ + + result = defaultdict(list) + + for f in files_or_dir: + if os.path.isfile(f): + # peek at first line to determine type + with open(f, 'rU') as fh: + line = fh.readline() + if line.startswith('#time'): + parse_wtperf_file(f, result) + else: + parse_wtstats_file(f, result) + + elif os.path.isdir(f): + for s in glob(os.path.join(f, 'WiredTigerStat*')): + parse_wtstats_file(s, result) + + for s in glob(os.path.join(f, 'monitor*')): + parse_wtperf_file(s, result) + + return result + + + +def output_series(results, args, prefix=None, grouplist=[]): + """ Write the data into the html template """ + + # add .html ending if not present + filename, ext = os.path.splitext(args.output) + if ext == '': + ext = '.html' + + # open the output file based on prefix + if prefix == None: + outputname = filename + ext + elif len(grouplist) == 0: + outputname = filename +'.' + prefix + ext + else: + outputname = filename +'.group.' + prefix + ext + + if prefix != None and len(grouplist) == 0: + this_series = [] + for title, ydata in results: + if not prefix in title: + continue + #print 'Appending to dataset: ' + title + this_series.append((title, ydata)) + elif prefix != None and len(grouplist) > 0: + this_series = [] + for title, ydata in results: + for subgroup in grouplist: + if not subgroup in title: + continue + # print 'Appending to dataset: ' + title + this_series.append((title, ydata)) + else: + this_series = results + + if len(this_series) == 0: + print 'Output: ' + outputname + ' has no data. Do not create.' + return + + + json_output = { "series": [] } + + for title, ydata in this_series: + json_output["series"].append({ + "key": title, + "values": ydata, + }); + + # load template + this_path = os.path.dirname(os.path.realpath(__file__)) + srcfile = os.path.join(this_path, 'wtstats.html.template') + try: + srcfile = open(srcfile) + contents = srcfile.read() + except IOError: + print >>sys.stderr, "Cannot find template file 'wtstats.html." \ + "template'. See ./template/README.md for more information." + sys.exit(-1) + + srcfile.close() + + # if --json write data to <filename>.json + if args.json: + jsonfile = filename + '.json' + with open(jsonfile, 'w') as f: + json.dump(json_output, f) + print "created %s" % jsonfile + + # write output file + dstfile = open(outputname, 'wt') + replaced_contents = contents.replace('"### INSERT DATA HERE ###"', + json.dumps(json_output)) + dstfile.write(replaced_contents) + dstfile.close() + print "created %s" % dstfile.name + + + + def main(): parser = argparse.ArgumentParser(description='Create graphs from' \ 'WiredTiger statistics.') @@ -112,149 +289,17 @@ def main(): 'logging') args = parser.parse_args() - # Read the input file(s) into a dictionary of lists. - def getfiles(l): - for f in l: - if os.path.isfile(f): - yield f - elif os.path.isdir(f): - for s in glob(os.path.join(f, 'WiredTigerStat*')): - print 'Processing ' + s - yield s - - d = defaultdict(list) - - for f in getfiles(args.files): - wtperf_mode = False - for line in open(f, 'rU'): - if line.startswith('#time'): - # wtperf file, read headings and switch to wtperf mode - wtperf_mode = True - wtperf_headings = line.strip('\n').split(',')[1:] - continue - - elif wtperf_mode: - # wtperf file, all stats are on a single line - month, day, time, values = re.split(r'[ ,]', line.strip('\n'), 3) - values = values.split(',') - for i, v in enumerate(values): - if v == 'N': - v = 0 - d['wtperf: ' + wtperf_headings[i]].append((month + " " + day + " " + time, v)) - - else: - # wtstats file, one stat per line - month, day, time, v, title = line.strip('\n').split(" ", 4) - d[title].append((month + " " + day + " " + time, v)) + # Parse files or directory and skip constants + parsed = skip_constants(parse_files(args.files)) - - # Process the series, eliminate constants, delete totalsec for wtperf - for title, values in sorted(d.iteritems()): - skip = True - t0, v0 = values[0] - for t, v in values: - if v != v0: - skip = False - break - - if title == 'wtperf: totalsec': - skip = True - - if skip: - #print "Skipping", title - del d[title] - - # Common prefix / suffix elimination + # filter results based on --include, compute common prefix and suffix + results = [] prefix = suffix = None - def common_prefix(a, b): - while not b.startswith(a): - a = a[:-1] - return a - - def common_suffix(a, b): - while not a.endswith(b): - b = b[1:] - return b - - def output_series(results, prefix=None, grouplist=[]): - # add .html ending if not present - filename, ext = os.path.splitext(args.output) - if ext == '': - ext = '.html' - - # open the output file based on prefix - if prefix == None: - outputname = filename + ext - elif len(grouplist) == 0: - outputname = filename +'.' + prefix + ext - else: - outputname = filename +'.group.' + prefix + ext - - if prefix != None and len(grouplist) == 0: - this_series = [] - for title, ydata in results: - if not prefix in title: - continue - #print 'Appending to dataset: ' + title - this_series.append((title, ydata)) - elif prefix != None and len(grouplist) > 0: - this_series = [] - for title, ydata in results: - for subgroup in grouplist: - if not subgroup in title: - continue - # print 'Appending to dataset: ' + title - this_series.append((title, ydata)) - else: - this_series = results - - if len(this_series) == 0: - print 'Output: ' + outputname + ' has no data. Do not create.' - return - - - json_output = { "series": [] } - - for title, ydata in this_series: - json_output["series"].append({ - "key": title, - "values": ydata, - }); - - # load template - this_path = os.path.dirname(os.path.realpath(__file__)) - srcfile = os.path.join(this_path, 'wtstats.html.template') - try: - srcfile = open(srcfile) - contents = srcfile.read() - except IOError: - print >>sys.stderr, "Cannot find template file 'wtstats.html." \ - "template'. See ./template/README.md for more information." - sys.exit(-1) - - srcfile.close() - - # if --json write data to <filename>.json - if args.json: - jsonfile = filename + '.json' - with open(jsonfile, 'w') as f: - json.dump(json_output, f) - print "created %s" % jsonfile - - # write output file - dstfile = open(outputname, 'wt') - replaced_contents = contents.replace('"### INSERT DATA HERE ###"', - json.dumps(json_output)) - dstfile.write(replaced_contents) - dstfile.close() - print "created %s" % dstfile.name - - # Split out the data, convert timestamps - results = [] - for title, values in sorted(d.iteritems()): + for title, values in sorted(parsed.iteritems()): title, ydata = munge(args, title, values) - # Ignore entries if a list of regular expressions was given + + # ignore entries if a list of regular expressions was given if args.include and not [r for r in args.include if r.search(title)]: continue if not 'wtperf' in title: @@ -275,18 +320,20 @@ def main(): # Are we just listing the results? if args.list: + print + print "Parsed stats:" for title, ydata in results: - print title + print " ", title sys.exit(0) - output_series(results) + output_series(results, args) # If the user wants the stats split up by prefix type do so. if args.all: for prefix in prefix_list: - output_series(results, prefix) + output_series(results, args, prefix) for group in groups.keys(): - output_series(results, group, groups[group]) + output_series(results, args, group, groups[group]) if __name__ == '__main__': |