summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Kendrick (humdrum) <rob.kendrick@codethink.co.uk>2014-02-18 09:11:30 +0000
committerRob Kendrick (humdrum) <rob.kendrick@codethink.co.uk>2014-02-18 09:11:30 +0000
commit0a7b98adaacf063da0634983997c4e0729551780 (patch)
tree969127065d4e84e6f77f5ec460dd55c32ad57457
parentca9987f399217e6d163a0aaf4b80fb08329e441c (diff)
downloadtrove-overseer-master.tar.gz
This apparently wasn't included last time I pushed; actual body of scriptHEADmaster
-rwxr-xr-xtrove-overseer273
1 files changed, 271 insertions, 2 deletions
diff --git a/trove-overseer b/trove-overseer
index 681e18c..790fe56 100755
--- a/trove-overseer
+++ b/trove-overseer
@@ -15,7 +15,276 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+import sys, os, yaml
+from optparse import OptionParser
+from sys import stderr, stdout, exit, argv
+from smtplib import SMTP
+from email.utils import formatdate
+from socket import gethostname
+import tempfile
+import subprocess
-import overseerlib
+verbose = False
-overseerlib.app.TroveOverseer().run()
+def utter(msg):
+ if verbose:
+ stdout.write(msg + "\n")
+ stdout.flush()
+
+class html_output:
+ html_head = """<html>
+ <head>
+ <title>Trove Health Monitor status for %s</title>
+ <style type="text/css">
+ table.trove {
+ width: 100%%;
+ border-width: 1px;
+ border-spacing: 2px;
+ border-style: solid;
+ border-color: #000;
+ border-collapse: collapse;
+ }
+ table.trove th {
+ border-width: 1px;
+ padding: 5px;
+ border-style: solid;
+ border-color: #000;
+ }
+ table.trove td {
+ border-width: 1px;
+ padding: 5px;
+ border-style: solid;
+ border-color: #000;
+ }
+ </style>
+ </head>
+ <body>
+ <h1>Trove Heath Monitor</h1>
+ <p>Tests begin at %s</p>
+ <table class="trove">
+ <thead>
+ <th>Test name</th>
+ <th>Description</th>
+ <th>Threshold</th>
+ <th>Current value</th>
+ <th>Result</th>
+ </thead>
+ """ % (gethostname(), formatdate())
+
+ def __init__(self, filename):
+ self.filename = filename
+ self.tempfile = tempfile.NamedTemporaryFile(prefix = filename, delete = False)
+ self.fh = self.tempfile.file
+ utter("HTML temporary filename is %s" % self.tempfile.name)
+ fh = self.fh
+
+ fh.write(self.html_head)
+
+ def result(self, testname, testparam, result, success, msg):
+ fh = self.fh
+ fh.write("<tr><td>%s</td><td>%s</td>" % (testname, testparam['name']))
+
+ if 'threshold' in testparam:
+ fh.write("<td>%s " % str(testparam['threshold']))
+ fh.write(testparam['type'] if 'test' in testparam else "high")
+ else:
+ fh.write("<td>Pass/Fail")
+
+ fh.write("</td>")
+ fh.write("<td>%s</td>" % str(result))
+ fh.write("<td><font color='%s'>%s</font></td>" %
+ (
+ "green" if success else "red",
+ "OK" if success else "<strong>FAILURE</strong><br>%s" % msg
+ ))
+ fh.write("</tr>")
+
+ def finish(self):
+ fh = self.fh
+ fh.write("</table></body></html>")
+ fh.close()
+ os.rename(self.tempfile.name, self.filename)
+
+class email_output:
+ body = "Subject: Trove Overseer failure report from %s\n" % (gethostname())
+ failures = False
+ def __init__(self, config):
+ self.config = config
+ self.body += "From: %s\nTo: %s\n" % (config['mailfrom'], config['rcptto'])
+ self.body += "Date: %s\n\n" % formatdate()
+
+ def result(self, testname, testparam, result, success, msg):
+ if success:
+ return
+ self.failures = True
+ self.body += ("\nTest %s (%s) failed:" % (testparam['name'], testname))
+ if 'threshold' in testparam:
+ self.body += ("\n\tThreshold: %s ") % str(testparam['threshold'])
+ self.body += testparam['type'] if 'type' in testparam else "high"
+ self.body += ("\n\tResult: %s" % str(result))
+ if msg:
+ self.body += ("\n\t%s") % msg
+
+ self.body += "\n\n"
+
+ def finish(self):
+ if self.failures == False:
+ return
+ config = self.config
+ utter("SMTP server: %s" % config['server'])
+ conn = SMTP(host = config['server'])
+ if verbose:
+ conn.set_debuglevel(256)
+ conn.sendmail(config['mailfrom'], config['rcptto'], self.body)
+ conn.quit()
+
+def human_to_bytes(threshold):
+ if type(threshold) == int or type(threshold) == float:
+ return threshold
+
+ suffix = threshold[-1:].upper()
+ number = float(threshold[:-1])
+
+ if suffix == "K":
+ number *= 1024
+ elif suffix == "M":
+ number *= (1024 * 1024)
+ elif suffix == "G":
+ number *= (1024 * 1024 * 1024)
+ elif suffix == "T":
+ number *= (1024 * 1024 * 1024 * 1024)
+ else:
+ utter("Unknown power suffix %s!" % suffix)
+ return float(threshold)
+
+ return number
+
+def bytes_to_human(num):
+ for x in ['','K','M','G','T']:
+ if num < 1024.0:
+ return "%3.0f%s" % (num, x)
+ num /= 1024.0
+
+def report_failure(testname, testparam, msg, result = 0):
+ utter("Test '%s' %s failed: %s" % (testname, testparam, msg))
+ result = bytes_to_human(result)
+ if html_report:
+ html_report.result(testname, testparam, result, False, msg)
+ if email_report:
+ email_report.result(testname, testparam, result, False, msg)
+
+def run_command(command):
+ p = subprocess.Popen(command, shell=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT)
+ return p
+
+def run_test(testname, testparam):
+ utter("processing test '%s'" % testname)
+ if 'path' in testparam:
+ test_cmd = testparam["path"]
+ else:
+ test_cmd = "/usr/share/trove-overseer/plugins/" + testname
+
+ if 'parameters' in testparam:
+ test_cmd = test_cmd + (" %s" % testparam['parameters'])
+
+ pipe = run_command(test_cmd)
+ result = pipe.stdout.readline()
+ exitcode = pipe.wait()
+
+ if exitcode != 0:
+ result += pipe.stdout.read()
+ report_failure(testname, testparam,
+ "Test failed with non-zero result: %d, %s" %
+ (exitcode, result), result = exitcode)
+ return
+
+ if not 'threshold' in testparam:
+ # this is a pass/fail test: assume success
+ if html_report:
+ html_report.result(testname, testparam, 1, True, "")
+ return
+
+ result = float(result)
+
+ hightest = True
+ if 'type' in testparam and testparam['type'] == "low":
+ hightest = False
+
+ threshold = human_to_bytes(testparam['threshold'])
+ utter("test returned %d, threshold is %d\n" % (result, threshold))
+
+ if hightest and result > threshold:
+ report_failure(testname, testparam,
+ "Result above threshold of %s" % testparam['threshold'],
+ result = result)
+ return
+ if not hightest and result < threshold:
+ report_failure(testname, testparam,
+ "Result below threshold of %s" % testparam['threshold'],
+ result = result)
+ return
+
+ if html_report:
+ html_report.result(testname, testparam, result, True, "")
+
+def main():
+ if os.getenv("OVERSEER_DEBUG") != None:
+ global verbose
+ verbose = True
+
+ if len(argv) > 1:
+ webpage = argv[1]
+ else:
+ webpage = None
+
+ if os.system("git pull -q") != 0:
+ stderr.write("Unable to execute git pull\n")
+ exit(1)
+
+ try:
+ config_file = open("overseer.yaml", "r")
+ except IOError as e:
+ stderr.write("Unable to open overseer.yaml: %s\n" % e.strerror)
+ exit(1)
+
+ try:
+ config_data = yaml.load(config_file)
+ except yaml.scanner.ScannerError as e:
+ stderr.write("YAML scanner error: %s\n" % e)
+ exit(1)
+ except yaml.parser.ParserError as e:
+ stderr.write("YAML parser error: %s\n" % e)
+ exit(1)
+
+ if not 'overseer' in config_data:
+ stderr.write("Configuration contains no overseer data.\n")
+ exit(1)
+
+ global html_report
+ if webpage != None:
+ html_report = html_output(webpage)
+ else:
+ html_report = False
+
+ global email_report
+ if 'email' in config_data['overseer']:
+ utter("Creating email report")
+ email_report = email_output(config_data['overseer']['email'])
+ else:
+ utter("Skipping email report")
+ email_report = False
+
+ for test in config_data['overseer']['tests']:
+ (testname, testparam) = test.iteritems().next()
+ run_test(testname, testparam)
+
+ if html_report:
+ html_report.finish()
+
+ if email_report:
+ email_report.finish()
+
+if __name__ == '__main__':
+ main()