summaryrefslogtreecommitdiff
path: root/cloudinit/analyze
diff options
context:
space:
mode:
authorChad Smith <chad.smith@canonical.com>2017-08-21 13:46:23 -0600
committerChad Smith <chad.smith@canonical.com>2017-08-21 13:46:23 -0600
commite74d7752f1761c3a8d3c19877de4707d00c49d08 (patch)
tree0627dd5d06d83b770b38196bceabddd06c0b1b77 /cloudinit/analyze
parentdc2bd79949492bccdc1d7df0132f98c354d51943 (diff)
downloadcloud-init-git-e74d7752f1761c3a8d3c19877de4707d00c49d08.tar.gz
tools: Add tooling for basic cloud-init performance analysis.
This branch adds cloudinit-analyze into cloud-init proper. It adds an "analyze" subcommand to the cloud-init command line utility for quick performance assessment of cloud-init stages and events. On a cloud-init configured instance, running "cloud-init analyze blame" will now report which cloud-init events cost the most wall time. This allows for quick assessment of the most costly stages of cloud-init. This functionality is pulled from Ryan Harper's analyze work. The cloudinit-analyze main script itself has been refactored a bit for inclusion as a subcommand of cloud-init CLI. There will be a followup branch at some point which will optionally instrument detailed strace profiling, but that approach needs a bit more discussion first. This branch also adds: * additional debugging topic to the sphinx-generated docs describing cloud-init analyze, dump and show as well as cloud-init single usage. * Updates the Makefile unittests target to include cloudinit directory because we now have unittests within that package. LP: #1709761
Diffstat (limited to 'cloudinit/analyze')
-rw-r--r--cloudinit/analyze/__init__.py0
-rw-r--r--cloudinit/analyze/__main__.py155
-rw-r--r--cloudinit/analyze/dump.py176
-rw-r--r--cloudinit/analyze/show.py207
-rw-r--r--cloudinit/analyze/tests/test_dump.py210
5 files changed, 748 insertions, 0 deletions
diff --git a/cloudinit/analyze/__init__.py b/cloudinit/analyze/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/cloudinit/analyze/__init__.py
diff --git a/cloudinit/analyze/__main__.py b/cloudinit/analyze/__main__.py
new file mode 100644
index 00000000..71cba4f2
--- /dev/null
+++ b/cloudinit/analyze/__main__.py
@@ -0,0 +1,155 @@
+# Copyright (C) 2017 Canonical Ltd.
+#
+# This file is part of cloud-init. See LICENSE file for license information.
+
+import argparse
+import re
+import sys
+
+from . import dump
+from . import show
+
+
+def get_parser(parser=None):
+ if not parser:
+ parser = argparse.ArgumentParser(
+ prog='cloudinit-analyze',
+ description='Devel tool: Analyze cloud-init logs and data')
+ subparsers = parser.add_subparsers(title='Subcommands', dest='subcommand')
+ subparsers.required = True
+
+ parser_blame = subparsers.add_parser(
+ 'blame', help='Print list of executed stages ordered by time to init')
+ parser_blame.add_argument(
+ '-i', '--infile', action='store', dest='infile',
+ default='/var/log/cloud-init.log',
+ help='specify where to read input.')
+ parser_blame.add_argument(
+ '-o', '--outfile', action='store', dest='outfile', default='-',
+ help='specify where to write output. ')
+ parser_blame.set_defaults(action=('blame', analyze_blame))
+
+ parser_show = subparsers.add_parser(
+ 'show', help='Print list of in-order events during execution')
+ parser_show.add_argument('-f', '--format', action='store',
+ dest='print_format', default='%I%D @%Es +%ds',
+ help='specify formatting of output.')
+ parser_show.add_argument('-i', '--infile', action='store',
+ dest='infile', default='/var/log/cloud-init.log',
+ help='specify where to read input.')
+ parser_show.add_argument('-o', '--outfile', action='store',
+ dest='outfile', default='-',
+ help='specify where to write output.')
+ parser_show.set_defaults(action=('show', analyze_show))
+ parser_dump = subparsers.add_parser(
+ 'dump', help='Dump cloud-init events in JSON format')
+ parser_dump.add_argument('-i', '--infile', action='store',
+ dest='infile', default='/var/log/cloud-init.log',
+ help='specify where to read input. ')
+ parser_dump.add_argument('-o', '--outfile', action='store',
+ dest='outfile', default='-',
+ help='specify where to write output. ')
+ parser_dump.set_defaults(action=('dump', analyze_dump))
+ return parser
+
+
+def analyze_blame(name, args):
+ """Report a list of records sorted by largest time delta.
+
+ For example:
+ 30.210s (init-local) searching for datasource
+ 8.706s (init-network) reading and applying user-data
+ 166ms (modules-config) ....
+ 807us (modules-final) ...
+
+ We generate event records parsing cloud-init logs, formatting the output
+ and sorting by record data ('delta')
+ """
+ (infh, outfh) = configure_io(args)
+ blame_format = ' %ds (%n)'
+ r = re.compile('(^\s+\d+\.\d+)', re.MULTILINE)
+ for idx, record in enumerate(show.show_events(_get_events(infh),
+ blame_format)):
+ srecs = sorted(filter(r.match, record), reverse=True)
+ outfh.write('-- Boot Record %02d --\n' % (idx + 1))
+ outfh.write('\n'.join(srecs) + '\n')
+ outfh.write('\n')
+ outfh.write('%d boot records analyzed\n' % (idx + 1))
+
+
+def analyze_show(name, args):
+ """Generate output records using the 'standard' format to printing events.
+
+ Example output follows:
+ Starting stage: (init-local)
+ ...
+ Finished stage: (init-local) 0.105195 seconds
+
+ Starting stage: (init-network)
+ ...
+ Finished stage: (init-network) 0.339024 seconds
+
+ Starting stage: (modules-config)
+ ...
+ Finished stage: (modules-config) 0.NNN seconds
+
+ Starting stage: (modules-final)
+ ...
+ Finished stage: (modules-final) 0.NNN seconds
+ """
+ (infh, outfh) = configure_io(args)
+ for idx, record in enumerate(show.show_events(_get_events(infh),
+ args.print_format)):
+ outfh.write('-- Boot Record %02d --\n' % (idx + 1))
+ outfh.write('The total time elapsed since completing an event is'
+ ' printed after the "@" character.\n')
+ outfh.write('The time the event takes is printed after the "+" '
+ 'character.\n\n')
+ outfh.write('\n'.join(record) + '\n')
+ outfh.write('%d boot records analyzed\n' % (idx + 1))
+
+
+def analyze_dump(name, args):
+ """Dump cloud-init events in json format"""
+ (infh, outfh) = configure_io(args)
+ outfh.write(dump.json_dumps(_get_events(infh)) + '\n')
+
+
+def _get_events(infile):
+ rawdata = None
+ events, rawdata = show.load_events(infile, None)
+ if not events:
+ events, _ = dump.dump_events(rawdata=rawdata)
+ return events
+
+
+def configure_io(args):
+ """Common parsing and setup of input/output files"""
+ if args.infile == '-':
+ infh = sys.stdin
+ else:
+ try:
+ infh = open(args.infile, 'r')
+ except (FileNotFoundError, PermissionError):
+ sys.stderr.write('Cannot open file %s\n' % args.infile)
+ sys.exit(1)
+
+ if args.outfile == '-':
+ outfh = sys.stdout
+ else:
+ try:
+ outfh = open(args.outfile, 'w')
+ except PermissionError:
+ sys.stderr.write('Cannot open file %s\n' % args.outfile)
+ sys.exit(1)
+
+ return (infh, outfh)
+
+
+if __name__ == '__main__':
+ parser = get_parser()
+ args = parser.parse_args()
+ (name, action_functor) = args.action
+ action_functor(name, args)
+
+# vi: ts=4 expandtab
diff --git a/cloudinit/analyze/dump.py b/cloudinit/analyze/dump.py
new file mode 100644
index 00000000..ca4da496
--- /dev/null
+++ b/cloudinit/analyze/dump.py
@@ -0,0 +1,176 @@
+# This file is part of cloud-init. See LICENSE file for license information.
+
+import calendar
+from datetime import datetime
+import json
+import sys
+
+from cloudinit import util
+
+stage_to_description = {
+ 'finished': 'finished running cloud-init',
+ 'init-local': 'starting search for local datasources',
+ 'init-network': 'searching for network datasources',
+ 'init': 'searching for network datasources',
+ 'modules-config': 'running config modules',
+ 'modules-final': 'finalizing modules',
+ 'modules': 'running modules for',
+ 'single': 'running single module ',
+}
+
+# logger's asctime format
+CLOUD_INIT_ASCTIME_FMT = "%Y-%m-%d %H:%M:%S,%f"
+
+# journctl -o short-precise
+CLOUD_INIT_JOURNALCTL_FMT = "%b %d %H:%M:%S.%f %Y"
+
+# other
+DEFAULT_FMT = "%b %d %H:%M:%S %Y"
+
+
+def parse_timestamp(timestampstr):
+ # default syslog time does not include the current year
+ months = [calendar.month_abbr[m] for m in range(1, 13)]
+ if timestampstr.split()[0] in months:
+ # Aug 29 22:55:26
+ FMT = DEFAULT_FMT
+ if '.' in timestampstr:
+ FMT = CLOUD_INIT_JOURNALCTL_FMT
+ dt = datetime.strptime(timestampstr + " " +
+ str(datetime.now().year),
+ FMT)
+ timestamp = dt.strftime("%s.%f")
+ elif "," in timestampstr:
+ # 2016-09-12 14:39:20,839
+ dt = datetime.strptime(timestampstr, CLOUD_INIT_ASCTIME_FMT)
+ timestamp = dt.strftime("%s.%f")
+ else:
+ # allow date(1) to handle other formats we don't expect
+ timestamp = parse_timestamp_from_date(timestampstr)
+
+ return float(timestamp)
+
+
+def parse_timestamp_from_date(timestampstr):
+ out, _ = util.subp(['date', '+%s.%3N', '-d', timestampstr])
+ timestamp = out.strip()
+ return float(timestamp)
+
+
+def parse_ci_logline(line):
+ # Stage Starts:
+ # Cloud-init v. 0.7.7 running 'init-local' at \
+ # Fri, 02 Sep 2016 19:28:07 +0000. Up 1.0 seconds.
+ # Cloud-init v. 0.7.7 running 'init' at \
+ # Fri, 02 Sep 2016 19:28:08 +0000. Up 2.0 seconds.
+ # Cloud-init v. 0.7.7 finished at
+ # Aug 29 22:55:26 test1 [CLOUDINIT] handlers.py[DEBUG]: \
+ # finish: modules-final: SUCCESS: running modules for final
+ # 2016-08-30T21:53:25.972325+00:00 y1 [CLOUDINIT] handlers.py[DEBUG]: \
+ # finish: modules-final: SUCCESS: running modules for final
+ #
+ # Nov 03 06:51:06.074410 x2 cloud-init[106]: [CLOUDINIT] util.py[DEBUG]: \
+ # Cloud-init v. 0.7.8 running 'init-local' at \
+ # Thu, 03 Nov 2016 06:51:06 +0000. Up 1.0 seconds.
+ #
+ # 2017-05-22 18:02:01,088 - util.py[DEBUG]: Cloud-init v. 0.7.9 running \
+ # 'init-local' at Mon, 22 May 2017 18:02:01 +0000. Up 2.0 seconds.
+
+ separators = [' - ', ' [CLOUDINIT] ']
+ found = False
+ for sep in separators:
+ if sep in line:
+ found = True
+ break
+
+ if not found:
+ return None
+
+ (timehost, eventstr) = line.split(sep)
+
+ # journalctl -o short-precise
+ if timehost.endswith(":"):
+ timehost = " ".join(timehost.split()[0:-1])
+
+ if "," in timehost:
+ timestampstr, extra = timehost.split(",")
+ timestampstr += ",%s" % extra.split()[0]
+ if ' ' in extra:
+ hostname = extra.split()[-1]
+ else:
+ hostname = timehost.split()[-1]
+ timestampstr = timehost.split(hostname)[0].strip()
+ if 'Cloud-init v.' in eventstr:
+ event_type = 'start'
+ if 'running' in eventstr:
+ stage_and_timestamp = eventstr.split('running')[1].lstrip()
+ event_name, _ = stage_and_timestamp.split(' at ')
+ event_name = event_name.replace("'", "").replace(":", "-")
+ if event_name == "init":
+ event_name = "init-network"
+ else:
+ # don't generate a start for the 'finished at' banner
+ return None
+ event_description = stage_to_description[event_name]
+ else:
+ (pymodloglvl, event_type, event_name) = eventstr.split()[0:3]
+ event_description = eventstr.split(event_name)[1].strip()
+
+ event = {
+ 'name': event_name.rstrip(":"),
+ 'description': event_description,
+ 'timestamp': parse_timestamp(timestampstr),
+ 'origin': 'cloudinit',
+ 'event_type': event_type.rstrip(":"),
+ }
+ if event['event_type'] == "finish":
+ result = event_description.split(":")[0]
+ desc = event_description.split(result)[1].lstrip(':').strip()
+ event['result'] = result
+ event['description'] = desc.strip()
+
+ return event
+
+
+def json_dumps(data):
+ return json.dumps(data, indent=1, sort_keys=True,
+ separators=(',', ': '))
+
+
+def dump_events(cisource=None, rawdata=None):
+ events = []
+ event = None
+ CI_EVENT_MATCHES = ['start:', 'finish:', 'Cloud-init v.']
+
+ if not any([cisource, rawdata]):
+ raise ValueError('Either cisource or rawdata parameters are required')
+
+ if rawdata:
+ data = rawdata.splitlines()
+ else:
+ data = cisource.readlines()
+
+ for line in data:
+ for match in CI_EVENT_MATCHES:
+ if match in line:
+ try:
+ event = parse_ci_logline(line)
+ except ValueError:
+ sys.stderr.write('Skipping invalid entry\n')
+ if event:
+ events.append(event)
+
+ return events, data
+
+
+def main():
+ if len(sys.argv) > 1:
+ cisource = open(sys.argv[1])
+ else:
+ cisource = sys.stdin
+
+ return json_dumps(dump_events(cisource))
+
+
+if __name__ == "__main__":
+ print(main())
diff --git a/cloudinit/analyze/show.py b/cloudinit/analyze/show.py
new file mode 100644
index 00000000..3b356bb8
--- /dev/null
+++ b/cloudinit/analyze/show.py
@@ -0,0 +1,207 @@
+# Copyright (C) 2016 Canonical Ltd.
+#
+# Author: Ryan Harper <ryan.harper@canonical.com>
+#
+# This file is part of cloud-init. See LICENSE file for license information.
+
+import base64
+import datetime
+import json
+import os
+
+from cloudinit import util
+
+# An event:
+'''
+{
+ "description": "executing late commands",
+ "event_type": "start",
+ "level": "INFO",
+ "name": "cmd-install/stage-late"
+ "origin": "cloudinit",
+ "timestamp": 1461164249.1590767,
+},
+
+ {
+ "description": "executing late commands",
+ "event_type": "finish",
+ "level": "INFO",
+ "name": "cmd-install/stage-late",
+ "origin": "cloudinit",
+ "result": "SUCCESS",
+ "timestamp": 1461164249.1590767
+ }
+
+'''
+format_key = {
+ '%d': 'delta',
+ '%D': 'description',
+ '%E': 'elapsed',
+ '%e': 'event_type',
+ '%I': 'indent',
+ '%l': 'level',
+ '%n': 'name',
+ '%o': 'origin',
+ '%r': 'result',
+ '%t': 'timestamp',
+ '%T': 'total_time',
+}
+
+formatting_help = " ".join(["{0}: {1}".format(k.replace('%', '%%'), v)
+ for k, v in format_key.items()])
+
+
+def format_record(msg, event):
+ for i, j in format_key.items():
+ if i in msg:
+ # ensure consistent formatting of time values
+ if j in ['delta', 'elapsed', 'timestamp']:
+ msg = msg.replace(i, "{%s:08.5f}" % j)
+ else:
+ msg = msg.replace(i, "{%s}" % j)
+ return msg.format(**event)
+
+
+def dump_event_files(event):
+ content = dict((k, v) for k, v in event.items() if k not in ['content'])
+ files = content['files']
+ saved = []
+ for f in files:
+ fname = f['path']
+ fn_local = os.path.basename(fname)
+ fcontent = base64.b64decode(f['content']).decode('ascii')
+ util.write_file(fn_local, fcontent)
+ saved.append(fn_local)
+
+ return saved
+
+
+def event_name(event):
+ if event:
+ return event.get('name')
+ return None
+
+
+def event_type(event):
+ if event:
+ return event.get('event_type')
+ return None
+
+
+def event_parent(event):
+ if event:
+ return event_name(event).split("/")[0]
+ return None
+
+
+def event_timestamp(event):
+ return float(event.get('timestamp'))
+
+
+def event_datetime(event):
+ return datetime.datetime.utcfromtimestamp(event_timestamp(event))
+
+
+def delta_seconds(t1, t2):
+ return (t2 - t1).total_seconds()
+
+
+def event_duration(start, finish):
+ return delta_seconds(event_datetime(start), event_datetime(finish))
+
+
+def event_record(start_time, start, finish):
+ record = finish.copy()
+ record.update({
+ 'delta': event_duration(start, finish),
+ 'elapsed': delta_seconds(start_time, event_datetime(start)),
+ 'indent': '|' + ' ' * (event_name(start).count('/') - 1) + '`->',
+ })
+
+ return record
+
+
+def total_time_record(total_time):
+ return 'Total Time: %3.5f seconds\n' % total_time
+
+
+def generate_records(events, blame_sort=False,
+ print_format="(%n) %d seconds in %I%D",
+ dump_files=False, log_datafiles=False):
+
+ sorted_events = sorted(events, key=lambda x: x['timestamp'])
+ records = []
+ start_time = None
+ total_time = 0.0
+ stage_start_time = {}
+ stages_seen = []
+ boot_records = []
+
+ unprocessed = []
+ for e in range(0, len(sorted_events)):
+ event = events[e]
+ try:
+ next_evt = events[e + 1]
+ except IndexError:
+ next_evt = None
+
+ if event_type(event) == 'start':
+ if event.get('name') in stages_seen:
+ records.append(total_time_record(total_time))
+ boot_records.append(records)
+ records = []
+ start_time = None
+ total_time = 0.0
+
+ if start_time is None:
+ stages_seen = []
+ start_time = event_datetime(event)
+ stage_start_time[event_parent(event)] = start_time
+
+ # see if we have a pair
+ if event_name(event) == event_name(next_evt):
+ if event_type(next_evt) == 'finish':
+ records.append(format_record(print_format,
+ event_record(start_time,
+ event,
+ next_evt)))
+ else:
+ # This is a parent event
+ records.append("Starting stage: %s" % event.get('name'))
+ unprocessed.append(event)
+ stages_seen.append(event.get('name'))
+ continue
+ else:
+ prev_evt = unprocessed.pop()
+ if event_name(event) == event_name(prev_evt):
+ record = event_record(start_time, prev_evt, event)
+ records.append(format_record("Finished stage: "
+ "(%n) %d seconds ",
+ record) + "\n")
+ total_time += record.get('delta')
+ else:
+ # not a match, put it back
+ unprocessed.append(prev_evt)
+
+ records.append(total_time_record(total_time))
+ boot_records.append(records)
+ return boot_records
+
+
+def show_events(events, print_format):
+ return generate_records(events, print_format=print_format)
+
+
+def load_events(infile, rawdata=None):
+ if rawdata:
+ data = rawdata.read()
+ else:
+ data = infile.read()
+
+ j = None
+ try:
+ j = json.loads(data)
+ except json.JSONDecodeError:
+ pass
+
+ return j, data
diff --git a/cloudinit/analyze/tests/test_dump.py b/cloudinit/analyze/tests/test_dump.py
new file mode 100644
index 00000000..2c0885d0
--- /dev/null
+++ b/cloudinit/analyze/tests/test_dump.py
@@ -0,0 +1,210 @@
+# This file is part of cloud-init. See LICENSE file for license information.
+
+from datetime import datetime
+from textwrap import dedent
+
+from cloudinit.analyze.dump import (
+ dump_events, parse_ci_logline, parse_timestamp)
+from cloudinit.util import subp, write_file
+from tests.unittests.helpers import CiTestCase
+
+
+class TestParseTimestamp(CiTestCase):
+
+ def test_parse_timestamp_handles_cloud_init_default_format(self):
+ """Logs with cloud-init detailed formats will be properly parsed."""
+ trusty_fmt = '%Y-%m-%d %H:%M:%S,%f'
+ trusty_stamp = '2016-09-12 14:39:20,839'
+
+ parsed = parse_timestamp(trusty_stamp)
+
+ # convert ourselves
+ dt = datetime.strptime(trusty_stamp, trusty_fmt)
+ expected = float(dt.strftime('%s.%f'))
+
+ # use date(1)
+ out, _err = subp(['date', '+%s.%3N', '-d', trusty_stamp])
+ timestamp = out.strip()
+ date_ts = float(timestamp)
+
+ self.assertEqual(expected, parsed)
+ self.assertEqual(expected, date_ts)
+ self.assertEqual(date_ts, parsed)
+
+ def test_parse_timestamp_handles_syslog_adding_year(self):
+ """Syslog timestamps lack a year. Add year and properly parse."""
+ syslog_fmt = '%b %d %H:%M:%S %Y'
+ syslog_stamp = 'Aug 08 15:12:51'
+
+ # convert stamp ourselves by adding the missing year value
+ year = datetime.now().year
+ dt = datetime.strptime(syslog_stamp + " " + str(year), syslog_fmt)
+ expected = float(dt.strftime('%s.%f'))
+ parsed = parse_timestamp(syslog_stamp)
+
+ # use date(1)
+ out, _ = subp(['date', '+%s.%3N', '-d', syslog_stamp])
+ timestamp = out.strip()
+ date_ts = float(timestamp)
+
+ self.assertEqual(expected, parsed)
+ self.assertEqual(expected, date_ts)
+ self.assertEqual(date_ts, parsed)
+
+ def test_parse_timestamp_handles_journalctl_format_adding_year(self):
+ """Journalctl precise timestamps lack a year. Add year and parse."""
+ journal_fmt = '%b %d %H:%M:%S.%f %Y'
+ journal_stamp = 'Aug 08 17:15:50.606811'
+
+ # convert stamp ourselves by adding the missing year value
+ year = datetime.now().year
+ dt = datetime.strptime(journal_stamp + " " + str(year), journal_fmt)
+ expected = float(dt.strftime('%s.%f'))
+ parsed = parse_timestamp(journal_stamp)
+
+ # use date(1)
+ out, _ = subp(['date', '+%s.%6N', '-d', journal_stamp])
+ timestamp = out.strip()
+ date_ts = float(timestamp)
+
+ self.assertEqual(expected, parsed)
+ self.assertEqual(expected, date_ts)
+ self.assertEqual(date_ts, parsed)
+
+ def test_parse_unexpected_timestamp_format_with_date_command(self):
+ """Dump sends unexpected timestamp formats to data for processing."""
+ new_fmt = '%H:%M %m/%d %Y'
+ new_stamp = '17:15 08/08'
+
+ # convert stamp ourselves by adding the missing year value
+ year = datetime.now().year
+ dt = datetime.strptime(new_stamp + " " + str(year), new_fmt)
+ expected = float(dt.strftime('%s.%f'))
+ parsed = parse_timestamp(new_stamp)
+
+ # use date(1)
+ out, _ = subp(['date', '+%s.%6N', '-d', new_stamp])
+ timestamp = out.strip()
+ date_ts = float(timestamp)
+
+ self.assertEqual(expected, parsed)
+ self.assertEqual(expected, date_ts)
+ self.assertEqual(date_ts, parsed)
+
+
+class TestParseCILogLine(CiTestCase):
+
+ def test_parse_logline_returns_none_without_separators(self):
+ """When no separators are found, parse_ci_logline returns None."""
+ expected_parse_ignores = [
+ '', '-', 'adsf-asdf', '2017-05-22 18:02:01,088', 'CLOUDINIT']
+ for parse_ignores in expected_parse_ignores:
+ self.assertIsNone(parse_ci_logline(parse_ignores))
+
+ def test_parse_logline_returns_event_for_cloud_init_logs(self):
+ """parse_ci_logline returns an event parse from cloud-init format."""
+ line = (
+ "2017-08-08 20:05:07,147 - util.py[DEBUG]: Cloud-init v. 0.7.9"
+ " running 'init-local' at Tue, 08 Aug 2017 20:05:07 +0000. Up"
+ " 6.26 seconds.")
+ dt = datetime.strptime(
+ '2017-08-08 20:05:07,147', '%Y-%m-%d %H:%M:%S,%f')
+ timestamp = float(dt.strftime('%s.%f'))
+ expected = {
+ 'description': 'starting search for local datasources',
+ 'event_type': 'start',
+ 'name': 'init-local',
+ 'origin': 'cloudinit',
+ 'timestamp': timestamp}
+ self.assertEqual(expected, parse_ci_logline(line))
+
+ def test_parse_logline_returns_event_for_journalctl_logs(self):
+ """parse_ci_logline returns an event parse from journalctl format."""
+ line = ("Nov 03 06:51:06.074410 x2 cloud-init[106]: [CLOUDINIT]"
+ " util.py[DEBUG]: Cloud-init v. 0.7.8 running 'init-local' at"
+ " Thu, 03 Nov 2016 06:51:06 +0000. Up 1.0 seconds.")
+ year = datetime.now().year
+ dt = datetime.strptime(
+ 'Nov 03 06:51:06.074410 %d' % year, '%b %d %H:%M:%S.%f %Y')
+ timestamp = float(dt.strftime('%s.%f'))
+ expected = {
+ 'description': 'starting search for local datasources',
+ 'event_type': 'start',
+ 'name': 'init-local',
+ 'origin': 'cloudinit',
+ 'timestamp': timestamp}
+ self.assertEqual(expected, parse_ci_logline(line))
+
+ def test_parse_logline_returns_event_for_finish_events(self):
+ """parse_ci_logline returns a finish event for a parsed log line."""
+ line = ('2016-08-30 21:53:25.972325+00:00 y1 [CLOUDINIT]'
+ ' handlers.py[DEBUG]: finish: modules-final: SUCCESS: running'
+ ' modules for final')
+ expected = {
+ 'description': 'running modules for final',
+ 'event_type': 'finish',
+ 'name': 'modules-final',
+ 'origin': 'cloudinit',
+ 'result': 'SUCCESS',
+ 'timestamp': 1472594005.972}
+ self.assertEqual(expected, parse_ci_logline(line))
+
+
+SAMPLE_LOGS = dedent("""\
+Nov 03 06:51:06.074410 x2 cloud-init[106]: [CLOUDINIT] util.py[DEBUG]:\
+ Cloud-init v. 0.7.8 running 'init-local' at Thu, 03 Nov 2016\
+ 06:51:06 +0000. Up 1.0 seconds.
+2016-08-30 21:53:25.972325+00:00 y1 [CLOUDINIT] handlers.py[DEBUG]: finish:\
+ modules-final: SUCCESS: running modules for final
+""")
+
+
+class TestDumpEvents(CiTestCase):
+ maxDiff = None
+
+ def test_dump_events_with_rawdata(self):
+ """Rawdata is split and parsed into a tuple of events and data"""
+ events, data = dump_events(rawdata=SAMPLE_LOGS)
+ expected_data = SAMPLE_LOGS.splitlines()
+ year = datetime.now().year
+ dt1 = datetime.strptime(
+ 'Nov 03 06:51:06.074410 %d' % year, '%b %d %H:%M:%S.%f %Y')
+ timestamp1 = float(dt1.strftime('%s.%f'))
+ expected_events = [{
+ 'description': 'starting search for local datasources',
+ 'event_type': 'start',
+ 'name': 'init-local',
+ 'origin': 'cloudinit',
+ 'timestamp': timestamp1}, {
+ 'description': 'running modules for final',
+ 'event_type': 'finish',
+ 'name': 'modules-final',
+ 'origin': 'cloudinit',
+ 'result': 'SUCCESS',
+ 'timestamp': 1472594005.972}]
+ self.assertEqual(expected_events, events)
+ self.assertEqual(expected_data, data)
+
+ def test_dump_events_with_cisource(self):
+ """Cisource file is read and parsed into a tuple of events and data."""
+ tmpfile = self.tmp_path('logfile')
+ write_file(tmpfile, SAMPLE_LOGS)
+ events, data = dump_events(cisource=open(tmpfile))
+ year = datetime.now().year
+ dt1 = datetime.strptime(
+ 'Nov 03 06:51:06.074410 %d' % year, '%b %d %H:%M:%S.%f %Y')
+ timestamp1 = float(dt1.strftime('%s.%f'))
+ expected_events = [{
+ 'description': 'starting search for local datasources',
+ 'event_type': 'start',
+ 'name': 'init-local',
+ 'origin': 'cloudinit',
+ 'timestamp': timestamp1}, {
+ 'description': 'running modules for final',
+ 'event_type': 'finish',
+ 'name': 'modules-final',
+ 'origin': 'cloudinit',
+ 'result': 'SUCCESS',
+ 'timestamp': 1472594005.972}]
+ self.assertEqual(expected_events, events)
+ self.assertEqual(SAMPLE_LOGS.splitlines(), [d.strip() for d in data])