fixes and improvements for wtperf parsing

- convert wtperf microsec to millisec - don't skip monitor* files when parsing directory - parsing code reorganization - renamed wtperf stats fixture - added tests
author: Thomas Rueckstiess <thomas@rueckstiess.net> 2015-02-16 12:50:26 +1100
committer: Thomas Rueckstiess <thomas@rueckstiess.net> 2015-02-16 12:50:26 +1100
commit: 4733961a3c1fa37988178d1b1dd4eb44d83b63f6 (patch)
tree: 48a3f68224aa5753b3c34f2a6fb5ffb8b6998690
parent: 84ab01d54b0b8225af4f78a4fc331a4a973317f0 (diff)
download: mongo-4733961a3c1fa37988178d1b1dd4eb44d83b63f6.tar.gz
3 files changed, 237 insertions, 145 deletions
diff --git a/tools/wtstats/test/WiredTigerPerf.fixture b/tools/wtstats/test/monitor.fixture
index 5306a4bd1fd..5306a4bd1fd 100644
--- a/tools/wtstats/test/WiredTigerPerf.fixture
+++ b/tools/wtstats/test/monitor.fixture
diff --git a/tools/wtstats/test/test_wtstats.py b/tools/wtstats/test/test_wtstats.py
index 80078d97667..dd6098fec49 100644
--- a/tools/wtstats/test/test_wtstats.py
+++ b/tools/wtstats/test/test_wtstats.py
@@ -75,7 +75,8 @@ def helper_run_with_fixture(kwargs=None):
     # path replacement
     kwargs['--output'] = os.path.join(test_dir, kwargs['--output'])
 
-    statsfile = os.path.join(test_dir, 'WiredTigerStat.fixture')
+    statsfile = os.path.join(test_dir, kwargs['files'] if 'files' in kwargs else 'WiredTigerStat.fixture')
+    print "ST", statsfile
 
     arglist = ['./wtstats', statsfile]
     for item in kwargs.items():
@@ -175,7 +176,51 @@ def test_output_option():
     outfile = '_foo_bar_baz.html'
     helper_run_with_fixture({'--output': outfile})
     assert os.path.exists(os.path.join(test_dir, outfile))
-    
+
+@with_setup(setUp, tearDown)
+def test_monitor_stats_start_with_wtperf():
+    """ wtstats should be able to parse wtperf monitor files """
+
+    outfile = '_foo_bar_baz.html'
+    helper_run_with_fixture({'files': 'monitor.fixture', '--output': outfile})
+    data = helper_get_json_from_file(outfile)
+
+    series_keys = map(lambda x: x['key'], data['series'])
+    for key in series_keys:
+        assert key.startswith('wtperf:')
+
+    assert os.path.exists(os.path.join(test_dir, outfile))
+
+
+@with_setup(setUp, tearDown)
+def test_monitor_stats_convert_us_to_ms():
+    """ wtstats should convert monitor stats us to ms """
+
+    outfile = '_foo_bar_baz.html'
+    helper_run_with_fixture({'files': 'monitor.fixture', '--output': outfile})
+    data = helper_get_json_from_file(outfile)
+
+    series_keys = map(lambda x: x['key'], data['series'])
+    for key in series_keys:
+        assert '(uS)' not in key
+
+    values = (item['values'] for item in data['series'] if item['key'] == 'wtperf: insert maximum latency (ms)').next().values()
+    assert max(values) == 103687 / 1000.
+
+
+
+@with_setup(setUp, tearDown)
+def test_directory_with_wtstats_and_wtperf():
+    """ wtstats should be able to parse directories containing both types """
+
+    outfile = '_test_output_file.html'
+    helper_run_with_fixture({'files': '.', '--output': outfile})
+    data = helper_get_json_from_file(outfile)
+
+    series_keys = map(lambda x: x['key'], data['series'])
+    assert any(map(lambda title: 'block-manager' in title, series_keys))
+    assert any(map(lambda title: 'wtperf' in title, series_keys))
+
 
 @with_setup(setUp, tearDown)
 def test_add_ext_if_missing():
diff --git a/tools/wtstats/wtstats.py b/tools/wtstats/wtstats.py
index 4c6880f0409..3749ffd6c63 100755
--- a/tools/wtstats/wtstats.py
+++ b/tools/wtstats/wtstats.py
@@ -88,9 +88,186 @@ def munge(args, title, values):
 
     return ylabel, ydata
 
+
 # Parse the command line
 import argparse
 
+def common_prefix(a, b):
+    """ compute longest common prefix of a and b """
+    while not b.startswith(a):
+        a = a[:-1]
+    return a
+
+
+def common_suffix(a, b):
+    """ compute longest common suffix of a and b """
+    while not a.endswith(b):
+        b = b[1:]
+    return b
+
+
+def parse_wtstats_file(file, result):
+    """ parse wtstats file, one stat per line, example format:
+           Dec 05 14:43:14 0 /data/b block-manager: mapped bytes read
+    """
+    print 'Processing wtstats file: ' + file
+
+    # Parse file
+    for line in open(file, 'rU'):
+        month, day, time, v, title = line.strip('\n').split(" ", 4)
+        result[title].append((month + " " + day + " " + time, v))
+
+
+
+def parse_wtperf_file(file, result):
+    """ parse wtperf file, all stats on single line, example format:
+           Feb 13 17:55:14,0,0,156871,0,N,0,0,0,49,6,6146,0,0,0
+    """
+    print 'Processing wtperf file: ' + file
+    fh = open(file, 'rU')
+
+    # first line contains headings, replace microseconds with milliseconds
+    headings = fh.next().strip('\n').split(',')[1:]
+    headings = map(lambda h: h.replace('(uS)', ' (ms)'), headings)
+
+    # parse rest of file
+    for line in fh:
+        month, day, time, values = re.split(r'[ ,]', line.strip('\n'), 3)
+        values = values.split(',')
+        for i, v in enumerate(values):
+            if v == 'N': 
+                v = 0
+            # convert us to ms
+            if '(ms)' in headings[i]:
+                v = float(v) / 1000.0
+            result['wtperf: ' + headings[i]].append((month + " " + day + " " + time, v))
+
+
+def skip_constants(result):
+    # Process the series, eliminate constants, delete totalsec for wtperf
+    items = list(result.iteritems())
+
+    for title, values in items:
+        skip = True
+        t0, v0 = values[0]
+        for t, v in values:
+            if v != v0:
+                skip = False
+                break
+        
+        if title == 'wtperf: totalsec':
+            skip = True
+
+        if skip:
+            del result[title]
+
+    return result
+
+
+def parse_files(files_or_dir):
+    """ walk through file list or directory and parse according to file type (wtstats / wtperf). """
+
+    result = defaultdict(list)
+
+    for f in files_or_dir:
+        if os.path.isfile(f):
+            # peek at first line to determine type
+            with open(f, 'rU') as fh:
+                line = fh.readline()
+                if line.startswith('#time'):
+                    parse_wtperf_file(f, result)
+                else: 
+                    parse_wtstats_file(f, result)
+            
+        elif os.path.isdir(f):
+            for s in glob(os.path.join(f, 'WiredTigerStat*')):
+                parse_wtstats_file(s, result)
+
+            for s in glob(os.path.join(f, 'monitor*')):
+                parse_wtperf_file(s, result)
+
+    return result
+
+
+
+def output_series(results, args, prefix=None, grouplist=[]):
+    """ Write the data into the html template """
+
+    # add .html ending if not present
+    filename, ext = os.path.splitext(args.output)
+    if ext == '':
+        ext = '.html'
+
+    # open the output file based on prefix
+    if prefix == None:
+        outputname = filename + ext
+    elif len(grouplist) == 0:
+        outputname = filename +'.' + prefix + ext
+    else:
+        outputname = filename +'.group.' + prefix + ext
+
+    if prefix != None and len(grouplist) == 0:
+        this_series = []
+        for title, ydata in results:
+            if not prefix in title:
+                continue
+            #print 'Appending to dataset: ' + title
+            this_series.append((title, ydata))
+    elif prefix != None and len(grouplist) > 0:
+        this_series = []
+        for title, ydata in results:
+            for subgroup in grouplist:
+                if not subgroup in title:
+                    continue
+                # print 'Appending to dataset: ' + title
+                this_series.append((title, ydata))
+    else:
+        this_series = results
+
+    if len(this_series) == 0:
+        print 'Output: ' + outputname + ' has no data.  Do not create.'
+        return
+
+
+    json_output = { "series": [] }
+
+    for title, ydata in this_series:
+        json_output["series"].append({
+            "key": title,
+            "values": ydata,
+        });
+    
+    # load template
+    this_path = os.path.dirname(os.path.realpath(__file__))
+    srcfile = os.path.join(this_path, 'wtstats.html.template')
+    try: 
+        srcfile = open(srcfile)
+        contents = srcfile.read()
+    except IOError: 
+        print >>sys.stderr, "Cannot find template file 'wtstats.html." \
+            "template'. See ./template/README.md for more information."
+        sys.exit(-1)  
+
+    srcfile.close()
+
+    # if --json write data to <filename>.json
+    if args.json:
+        jsonfile = filename + '.json'
+        with open(jsonfile, 'w') as f:
+            json.dump(json_output, f)
+            print "created %s" % jsonfile
+
+    # write output file
+    dstfile = open(outputname, 'wt')
+    replaced_contents = contents.replace('"### INSERT DATA HERE ###"', 
+        json.dumps(json_output))
+    dstfile.write(replaced_contents)
+    dstfile.close()
+    print "created %s" % dstfile.name
+
+
+
+
 def main():   
     parser = argparse.ArgumentParser(description='Create graphs from' \
         'WiredTiger statistics.')
@@ -112,149 +289,17 @@ def main():
         'logging')
     args = parser.parse_args()
 
-    # Read the input file(s) into a dictionary of lists.
-    def getfiles(l):
-        for f in l:
-            if os.path.isfile(f):
-                yield f
-            elif os.path.isdir(f):
-                for s in glob(os.path.join(f, 'WiredTigerStat*')):
-                    print 'Processing ' + s
-                    yield s
-
-    d = defaultdict(list)
-
-    for f in getfiles(args.files):
-        wtperf_mode = False
-        for line in open(f, 'rU'):
-            if line.startswith('#time'):
-                # wtperf file, read headings and switch to wtperf mode
-                wtperf_mode = True
-                wtperf_headings = line.strip('\n').split(',')[1:]
-                continue
-
-            elif wtperf_mode:
-                # wtperf file, all stats are on a single line
-                month, day, time, values = re.split(r'[ ,]', line.strip('\n'), 3)
-                values = values.split(',')
-                for i, v in enumerate(values):
-                    if v == 'N': 
-                        v = 0
-                    d['wtperf: ' + wtperf_headings[i]].append((month + " " + day + " " + time, v))
-
-            else:
-                # wtstats file, one stat per line
-                month, day, time, v, title = line.strip('\n').split(" ", 4)
-                d[title].append((month + " " + day + " " + time, v))
+    # Parse files or directory and skip constants
+    parsed = skip_constants(parse_files(args.files))
 
-
-    # Process the series, eliminate constants, delete totalsec for wtperf
-    for title, values in sorted(d.iteritems()):
-        skip = True
-        t0, v0 = values[0]
-        for t, v in values:
-            if v != v0:
-                skip = False
-                break
-        
-        if title == 'wtperf: totalsec':
-            skip = True
-
-        if skip:
-            #print "Skipping", title
-            del d[title]
-
-    # Common prefix / suffix elimination
+    # filter results based on --include, compute common prefix and suffix
+    results = []
     prefix = suffix = None
 
-    def common_prefix(a, b):
-        while not b.startswith(a):
-            a = a[:-1]
-        return a
-
-    def common_suffix(a, b):
-        while not a.endswith(b):
-            b = b[1:]
-        return b
-
-    def output_series(results, prefix=None, grouplist=[]):
-        # add .html ending if not present
-        filename, ext = os.path.splitext(args.output)
-        if ext == '':
-            ext = '.html'
-
-        # open the output file based on prefix
-        if prefix == None:
-            outputname = filename + ext
-        elif len(grouplist) == 0:
-            outputname = filename +'.' + prefix + ext
-        else:
-            outputname = filename +'.group.' + prefix + ext
-
-        if prefix != None and len(grouplist) == 0:
-            this_series = []
-            for title, ydata in results:
-                if not prefix in title:
-                    continue
-                #print 'Appending to dataset: ' + title
-                this_series.append((title, ydata))
-        elif prefix != None and len(grouplist) > 0:
-            this_series = []
-            for title, ydata in results:
-                for subgroup in grouplist:
-                    if not subgroup in title:
-                        continue
-                    # print 'Appending to dataset: ' + title
-                    this_series.append((title, ydata))
-        else:
-            this_series = results
-
-        if len(this_series) == 0:
-            print 'Output: ' + outputname + ' has no data.  Do not create.'
-            return
-
-
-        json_output = { "series": [] }
-
-        for title, ydata in this_series:
-            json_output["series"].append({
-                "key": title,
-                "values": ydata,
-            });
-        
-        # load template
-        this_path = os.path.dirname(os.path.realpath(__file__))
-        srcfile = os.path.join(this_path, 'wtstats.html.template')
-        try: 
-            srcfile = open(srcfile)
-            contents = srcfile.read()
-        except IOError: 
-            print >>sys.stderr, "Cannot find template file 'wtstats.html." \
-                "template'. See ./template/README.md for more information."
-            sys.exit(-1)  
-
-        srcfile.close()
-
-        # if --json write data to <filename>.json
-        if args.json:
-            jsonfile = filename + '.json'
-            with open(jsonfile, 'w') as f:
-                json.dump(json_output, f)
-                print "created %s" % jsonfile
-
-        # write output file
-        dstfile = open(outputname, 'wt')
-        replaced_contents = contents.replace('"### INSERT DATA HERE ###"', 
-            json.dumps(json_output))
-        dstfile.write(replaced_contents)
-        dstfile.close()
-        print "created %s" % dstfile.name
-
-    # Split out the data, convert timestamps
-    results = []
-    for title, values in sorted(d.iteritems()):
+    for title, values in sorted(parsed.iteritems()):
         title, ydata = munge(args, title, values)
-        # Ignore entries if a list of regular expressions was given
+
+        # ignore entries if a list of regular expressions was given
         if args.include and not [r for r in args.include if r.search(title)]:
             continue
         if not 'wtperf' in title:
@@ -275,18 +320,20 @@ def main():
 
     # Are we just listing the results?
     if args.list:
+        print 
+        print "Parsed stats:"
         for title, ydata in results:
-            print title
+            print "  ", title
         sys.exit(0)
 
-    output_series(results)
+    output_series(results, args)
 
     # If the user wants the stats split up by prefix type do so.
     if args.all:
         for prefix in prefix_list:
-            output_series(results, prefix)
+            output_series(results, args, prefix)
         for group in groups.keys():
-            output_series(results, group, groups[group])
+            output_series(results, args, group, groups[group])
 
 
 if __name__ == '__main__':
author	Thomas Rueckstiess <thomas@rueckstiess.net>	2015-02-16 12:50:26 +1100
committer	Thomas Rueckstiess <thomas@rueckstiess.net>	2015-02-16 12:50:26 +1100
commit	4733961a3c1fa37988178d1b1dd4eb44d83b63f6 (patch)
tree	48a3f68224aa5753b3c34f2a6fb5ffb8b6998690
parent	84ab01d54b0b8225af4f78a4fc331a4a973317f0 (diff)
download	mongo-4733961a3c1fa37988178d1b1dd4eb44d83b63f6.tar.gz