diff options
author | Pedro Alvarez <pedro.alvarez@codethink.co.uk> | 2014-10-23 09:49:37 +0100 |
---|---|---|
committer | Pedro Alvarez <pedro.alvarez@codethink.co.uk> | 2014-10-23 09:49:37 +0100 |
commit | d7ca3f2f14ad28437f7332ca387a790fecb8b081 (patch) | |
tree | 90d364dc107b0c032ecc5befc4f36393d9b655b0 | |
parent | f9ea6b7761a5dbfe7b5e5b2adcabf70525184d7e (diff) | |
parent | d91b652951cf9203c8e8cd255a0e0a81af6d2591 (diff) | |
download | lorry-controller-d7ca3f2f14ad28437f7332ca387a790fecb8b081.tar.gz |
Merge branch 'baserock/pedroalvarez/old-jobs-removal'
Reviewed-by: Richard Maw
Reviewed-by: Pedro Alvarez
-rw-r--r-- | ARCH | 16 | ||||
-rwxr-xr-x | lorry-controller-remove-old-jobs | 153 | ||||
-rw-r--r-- | lorrycontroller/showjob.py | 2 | ||||
-rw-r--r-- | setup.py | 3 | ||||
-rw-r--r-- | units/lorry-controller-remove-old-jobs.service | 12 | ||||
-rw-r--r-- | units/lorry-controller-remove-old-jobs.timer | 8 | ||||
-rw-r--r-- | yarns.webapp/040-running-jobs.yarn | 56 | ||||
-rw-r--r-- | yarns.webapp/900-implementations.yarn | 13 |
8 files changed, 260 insertions, 3 deletions
@@ -282,9 +282,12 @@ Running job management: MINION gets around to it.) This request returns as soon as the STATEDB change is done. -* `GET /1.0/list-all-jobs` causes WEBAPP to return a JSON list of ids +* `GET /1.0/list-jobs` causes WEBAPP to return a JSON list of ids of all jobs, running or finished, that it knows about. (RQ/ALLJOBS) +* `GET /1.0/list-jobs-html` is the same as `list-jobs`, but returns an + HTML page instead. + * `POST /1.0/remove-job` with `job_id=jobid` in the body, removes a stopped job from the state database. @@ -369,6 +372,17 @@ The MINION to WEBAPP. * Go back to top to request new job. + +Old job removal +--------------- + +To avoid the STATEDB filling up with logs of old jobs, a systemd timer +unit will run occasionally to remove jobs so old, nobody cares about +them anymore. To make it easier to experiment with the logic of +choosing what to remove (age only? keep failed ones? something else?) +the removal is kept outside the WEBAPP. + + STATEDB ------- diff --git a/lorry-controller-remove-old-jobs b/lorry-controller-remove-old-jobs new file mode 100755 index 0000000..1448649 --- /dev/null +++ b/lorry-controller-remove-old-jobs @@ -0,0 +1,153 @@ +#!/usr/bin/env python +# +# Copyright (C) 2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +import json +import logging +import time +import urllib2 +import urlparse +import contextlib + +import cliapp + + +class JobInfo(object): + + def __init__(self, job_id, exit_code, exit_timestamp): + self.job_id = job_id + self.exit_code = exit_code + self.exit_timestamp = exit_timestamp + + def __repr__(self): + return 'JobInfo(%s,%s,%s)' % ( + self.job_id, self.exit_code, self.exit_timestamp) + + +class OldJobRemover(cliapp.Application): + + def add_settings(self): + self.settings.string( + ['webapp-host'], + 'address of WEBAPP', + default='localhost') + + self.settings.integer( + ['webapp-port'], + 'port of WEBAPP', + default=12765) + + ONE_MINUTE = 60 + ONE_HOUR = 60 * ONE_MINUTE + ONE_DAY = 24 * ONE_HOUR + ONE_YEAR = 365 * ONE_DAY + + self.settings.integer( + ['max-age-in-seconds', 'max-age'], + 'maximum age of a finished job in seconds', + metavar='SECONDS', + default=ONE_YEAR) + + self.settings.integer( + ['debug-now'], + 'for tests and debugging, ' + 'set current time to SECONDS since the epoch ' + '(set to 0 to use real time', + metavar='SECONDS') + + def process_args(self, args): + logging.info('Removing old jobs from Lorry Controller STATEDB') + + job_ids = self.list_jobs() + job_infos = self.get_job_infos(job_ids) + ids_of_jobs_to_remove = self.select_for_removal(job_infos) + self.remove_jobs(ids_of_jobs_to_remove) + + def list_jobs(self): + data = self.get('/1.0/list-jobs') + obj = json.loads(data) + return obj['job_ids'] + + def get(self, path): + url = self.make_url(path) + with contextlib.closing(urllib2.urlopen(url)) as f: + return f.read() + + def make_url(self, path): + scheme = 'http' + netloc = '%s:%s' % ( + self.settings['webapp-host'], self.settings['webapp-port']) + query = None + fragment = None + parts = (scheme, netloc, path, query, fragment) + return urlparse.urlunsplit(parts) + + def get_job_infos(self, job_ids): + job_infos = [] + for job_id in job_ids: + try: + job_infos.append(self.get_job_info(job_id)) + except urllib2.HTTPError as e: + logging.warning( + 'Trouble getting job info for job %s: %s' % + (job_id, str(e))) + return job_infos + + def get_job_info(self, job_id): + data = self.get('/1.0/job/%s' % job_id) + obj = json.loads(data) + exit_code = obj['exit'] + if obj['job_ended']: + exit_timestamp = self.parse_timestamp(obj['job_ended']) + else: + exit_timestamp = None + return JobInfo(job_id, exit_code, exit_timestamp) + + def parse_timestamp(self, timestamp): + return time.mktime(time.strptime(timestamp, '%Y-%m-%d %H:%M:%S UTC')) + + def select_for_removal(self, job_infos): + return [job_info for job_info in job_infos if self.is_old(job_info)] + + def is_old(self, job_info): + if job_info.exit_timestamp is None: + return False + current_time = self.get_current_time() + age_in_seconds = current_time - job_info.exit_timestamp + return age_in_seconds >= self.settings['max-age-in-seconds'] + + def get_current_time(self): + if self.settings['debug-now']: + return self.settings['debug-now'] + return time.time() + + def remove_jobs(self, job_infos): + for job_info in job_infos: + self.remove_job(job_info.job_id) + + def remove_job(self, job_id): + logging.info('Removing job %s', job_id) + self.post('/1.0/remove-job', 'job_id=%s' % job_id) + + def post(self, path, data): + url = self.make_url(path) + f = urllib2.urlopen(url, data) + result = f.read() + f.close() + + +OldJobRemover().run() diff --git a/lorrycontroller/showjob.py b/lorrycontroller/showjob.py index 951ad22..bc82bfe 100644 --- a/lorrycontroller/showjob.py +++ b/lorrycontroller/showjob.py @@ -39,7 +39,7 @@ class JobShower(object): 'path': statedb.get_job_path(job_id), 'exit': 'no' if exit is None else exit, 'disk_usage': disk_usage, - 'disk_usage_nice': self.format_bytesize(disk_usage), + 'disk_usage_nice': self.format_bytesize(disk_usage or 0), 'output': output, 'job_started': self.format_time(started), 'job_ended': '' if ended is None else self.format_time(ended), @@ -12,7 +12,8 @@ setup(name='lorry-controller', author='Codethink Ltd', author_email='baserock-dev@baserock.org', url='http://www.baserock.com/', - scripts=['lorry-controller-webapp', 'lorry-controller-minion'], + scripts=['lorry-controller-webapp', 'lorry-controller-minion', + 'lorry-controller-remove-old-jobs'], packages=['lorrycontroller'], data_files=[ ('share/lorry-controller/templates', glob.glob('templates/*')), diff --git a/units/lorry-controller-remove-old-jobs.service b/units/lorry-controller-remove-old-jobs.service new file mode 100644 index 0000000..d1372b4 --- /dev/null +++ b/units/lorry-controller-remove-old-jobs.service @@ -0,0 +1,12 @@ +[Unit] +Description=Lorry Controller remove old jobs +After=lighttpd-lorry-controller-webapp.service + +[Install] +WantedBy=multi-user.target + +[Service] +ExecStart=/usr/bin/lorry-controller-remove-old-jobs +Restart=no +User=lorry +Group=lorry diff --git a/units/lorry-controller-remove-old-jobs.timer b/units/lorry-controller-remove-old-jobs.timer new file mode 100644 index 0000000..508a43f --- /dev/null +++ b/units/lorry-controller-remove-old-jobs.timer @@ -0,0 +1,8 @@ +[Unit] +Description=Lorry Controller remove old jobs + +[Install] +WantedBy=multi-user.target + +[Timer] +OnUnitInactiveSec=60 diff --git a/yarns.webapp/040-running-jobs.yarn b/yarns.webapp/040-running-jobs.yarn index 571afd6..cbc8f75 100644 --- a/yarns.webapp/040-running-jobs.yarn +++ b/yarns.webapp/040-running-jobs.yarn @@ -359,3 +359,59 @@ Remove it. Cleanup. FINALLY WEBAPP terminates + + +Remove old terminated jobs with helper program +-------------------------- + +There is a helper program to remove old jobs automatically. + + SCENARIO remove old terminated jobs + +Setup. + + GIVEN a new git repository in CONFGIT + AND an empty lorry-controller.conf in CONFGIT + AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream + AND WEBAPP uses CONFGIT as its configuration directory + AND a running WEBAPP + GIVEN Lorry file CONFGIT/foo.lorry with {"foo":{"type":"git","url":"git://foo"}} + WHEN admin makes request POST /1.0/read-configuration + +Start job 1. We start it a known time of 100, so that we can control +when jobs become old. + + WHEN admin makes request POST /1.0/pretend-time with now=100 + AND admin makes request POST /1.0/give-me-job with host=testhost&pid=123 + THEN response has job_id set to 1 + +Remove old jobs while job 1 is running, still pretending time is 100 +seconds since epoch. This should leave job 1 running. + + WHEN admin removes old jobs at 100 + AND admin makes request GET /1.0/list-jobs + THEN response has job_ids set to [1] + +Finish the job. + + WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=0 + WHEN admin makes request GET /1.0/list-jobs + THEN response has job_ids set to [1] + +Remove old jobs, still at 100 seconds. Job 1 should still remain, as +it just finished. + + WHEN admin removes old jobs at 100 + AND admin makes request GET /1.0/list-jobs + THEN response has job_ids set to [1] + +Let a long time pass, and remove old jobs again. Job 1 should now go +away. + + WHEN admin removes old jobs at 100000000000 + AND admin makes request GET /1.0/list-jobs + THEN response has job_ids set to [] + +Cleanup. + + FINALLY WEBAPP terminates diff --git a/yarns.webapp/900-implementations.yarn b/yarns.webapp/900-implementations.yarn index 54a82a4..455a993 100644 --- a/yarns.webapp/900-implementations.yarn +++ b/yarns.webapp/900-implementations.yarn @@ -406,6 +406,19 @@ Some responses are just plain text, so we match them with a regexp. grep "$MATCH_1" "$DATADIR/response.body" +Running the "remove old jobs" helper program +-------------------------------------------- + +Lorry Controller comes with a helper program to remove old jobs from +STATEDB. Tests need to be able to run it. + + IMPLEMENTS WHEN admin removes old jobs at (\d+) + "$SRCDIR/lorry-controller-remove-old-jobs" \ + --log "$DATADIR/remove-old-jobs.log" \ + --webapp-host=127.0.0.1 \ + --webapp-port="$(cat "$DATADIR/webapp.port")" \ + --debug-now="$MATCH_1" + Status web page --------------- |