diff options
author | Lars Wirzenius <lars.wirzenius@codethink.co.uk> | 2014-03-27 16:20:57 +0000 |
---|---|---|
committer | Lars Wirzenius <lars.wirzenius@codethink.co.uk> | 2014-03-27 16:20:57 +0000 |
commit | 2ff07fb828fcf934284d842943a3c4d6ee3320c5 (patch) | |
tree | 9d1761c683a9906b8cdfafe57a92b5c5da001a75 | |
parent | b38763491b70dfee419ec8f387657fcf36770e82 (diff) | |
download | lorry-controller-2ff07fb828fcf934284d842943a3c4d6ee3320c5.tar.gz |
Add timeout for killing jobs
-rw-r--r-- | lorrycontroller/__init__.py | 1 | ||||
-rw-r--r-- | lorrycontroller/givemejob.py | 14 | ||||
-rw-r--r-- | lorrycontroller/jobupdate.py | 30 | ||||
-rw-r--r-- | lorrycontroller/listjobs.py | 3 | ||||
-rw-r--r-- | lorrycontroller/lstroves.py | 12 | ||||
-rw-r--r-- | lorrycontroller/pretendtime.py | 42 | ||||
-rw-r--r-- | lorrycontroller/readconf.py | 8 | ||||
-rw-r--r-- | lorrycontroller/showjob.py | 51 | ||||
-rw-r--r-- | lorrycontroller/showlorry.py | 3 | ||||
-rw-r--r-- | lorrycontroller/statedb.py | 108 | ||||
-rw-r--r-- | lorrycontroller/status.py | 16 | ||||
-rw-r--r-- | yarns.webapp/040-running-jobs.yarn | 53 | ||||
-rw-r--r-- | yarns.webapp/900-implementations.yarn | 26 |
13 files changed, 298 insertions, 69 deletions
diff --git a/lorrycontroller/__init__.py b/lorrycontroller/__init__.py index 2bc0128..99e0d29 100644 --- a/lorrycontroller/__init__.py +++ b/lorrycontroller/__init__.py @@ -34,6 +34,7 @@ from listjobs import ListAllJobs, ListAllJobsHTML from showjob import ShowJob from removejob import RemoveJob from lstroves import LsTroves, ForceLsTrove +from pretendtime import PretendTime from static import StaticFile diff --git a/lorrycontroller/givemejob.py b/lorrycontroller/givemejob.py index 3690b49..0510a26 100644 --- a/lorrycontroller/givemejob.py +++ b/lorrycontroller/givemejob.py @@ -45,12 +45,13 @@ class GiveMeJob(lorrycontroller.LorryControllerRoute): statedb = self.open_statedb() with statedb: lorry_infos = statedb.get_all_lorries_info() + now = statedb.get_current_time() for lorry_info in lorry_infos: - if self.ready_to_run(lorry_info): + if self.ready_to_run(lorry_info, now): self.create_repository_in_local_trove(lorry_info) if lorry_info['from_trovehost']: self.copy_repository_metadata(lorry_info) - self.give_job_to_minion(statedb, lorry_info) + self.give_job_to_minion(statedb, lorry_info, now) logging.info( 'Giving job %s to lorry %s to MINION %s:%s', lorry_info['job_id'], @@ -62,9 +63,9 @@ class GiveMeJob(lorrycontroller.LorryControllerRoute): logging.info('No job to give MINION') return { 'job_id': None } - def ready_to_run(self, lorry_info): + def ready_to_run(self, lorry_info, now): due = lorry_info['last_run'] + lorry_info['interval'] - return (lorry_info['running_job'] is None and due <= time.time()) + return (lorry_info['running_job'] is None and due <= now) def create_repository_in_local_trove(self, lorry_info): # Create repository on local Trove. If it fails, assume @@ -134,12 +135,13 @@ class GiveMeJob(lorrycontroller.LorryControllerRoute): if exit: raise GitanoCommandFailure(trovehost, 'config set') - def give_job_to_minion(self, statedb, lorry_info): + def give_job_to_minion(self, statedb, lorry_info, now): path = lorry_info['path'] minion_host = bottle.request.forms.host minion_pid = bottle.request.forms.pid running_job = statedb.get_next_job_id() statedb.set_running_job(path, running_job) - statedb.add_new_job(running_job, minion_host, minion_pid, path) + statedb.add_new_job( + running_job, minion_host, minion_pid, path, int(now)) lorry_info['job_id'] = running_job return lorry_info diff --git a/lorrycontroller/jobupdate.py b/lorrycontroller/jobupdate.py index 4a54c73..b32efdf 100644 --- a/lorrycontroller/jobupdate.py +++ b/lorrycontroller/jobupdate.py @@ -45,9 +45,31 @@ class JobUpdate(lorrycontroller.LorryControllerRoute): statedb.append_to_job_output(job_id, stderr) path = statedb.find_lorry_running_job(job_id) + lorry_info = statedb.get_lorry_info(path) + if exit is not None and exit != 'no': - lorry_info = statedb.get_lorry_info(path) - statedb.set_lorry_last_run(path, int(time.time())) + now = statedb.get_current_time() + statedb.set_lorry_last_run(path, int(now)) statedb.set_running_job(path, None) - statedb.set_job_exit(job_id, exit) - return statedb.get_lorry_info(path) + statedb.set_job_exit(job_id, exit, int(now)) + elif self.time_to_die(statedb, job_id, lorry_info): + logging.warning( + 'Job %r has been running too long, ' + 'marking it to be exterminated', job_id) + statedb.set_kill_job(path, True) + + obj = statedb.get_lorry_info(path) + logging.debug('obj=%r', obj) + return obj + + def time_to_die(self, statedb, job_id, lorry_info): + started, ended = statedb.get_job_started_and_ended(job_id) + lorry_timeout = lorry_info['lorry_timeout'] + now = statedb.get_current_time() + age = now - started + logging.debug('started=%r', started) + logging.debug('ended=%r', ended) + logging.debug('lorry_timeout=%r', lorry_timeout) + logging.debug('now=%r', now) + logging.debug('age=%r', age) + return age >= lorry_timeout diff --git a/lorrycontroller/listjobs.py b/lorrycontroller/listjobs.py index c32552d..a2020cc 100644 --- a/lorrycontroller/listjobs.py +++ b/lorrycontroller/listjobs.py @@ -42,9 +42,10 @@ class ListAllJobsHTML(lorrycontroller.LorryControllerRoute): def run(self, **kwargs): logging.info('%s %s called', self.http_method, self.path) statedb = self.open_statedb() + now = statedb.get_current_time() values = { 'job_infos': self.get_jobs(statedb), - 'timestamp': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime()), + 'timestamp': time.strftime('%Y-%m-%d %H:%M:%S UTC', now), } return bottle.template(self._templates['list-jobs'], **values) diff --git a/lorrycontroller/lstroves.py b/lorrycontroller/lstroves.py index 60c84bb..79c6969 100644 --- a/lorrycontroller/lstroves.py +++ b/lorrycontroller/lstroves.py @@ -47,7 +47,7 @@ class TroveRepositoryLister(object): with statedb: self.update_lorries_for_trove(statedb, trove_info, repo_map) - now = int(time.time()) + now = statedb.get_current_time() statedb.set_trove_ls_last_run(trove_info['trovehost'], now) def ls(self, trove_info): @@ -139,7 +139,8 @@ class TroveRepositoryLister(object): text=json.dumps(lorry, indent=4), from_trovehost=trovehost, from_path=remote_path, - interval=trove_info['lorry_interval']) + interval=trove_info['lorry_interval'], + timeout=trove_info['lorry_timeout']) all_local_paths = set(statedb.get_lorries_for_trove(trovehost)) wanted_local_paths = set(repo_map.values()) @@ -196,11 +197,12 @@ class LsTroves(lorrycontroller.LorryControllerRoute): trove_infos = [ statedb.get_trove_info(trovehost) for trovehost in statedb.get_troves()] + now = statedb.get_current_time() return [ trove_info for trove_info in trove_infos - if self.is_due(trove_info)] + if self.is_due(trove_info, now)] - def is_due(self, trove_info): + def is_due(self, trove_info, now): ls_due = trove_info['ls_last_run'] + trove_info['ls_interval'] - return ls_due <= time.time() + return ls_due <= now diff --git a/lorrycontroller/pretendtime.py b/lorrycontroller/pretendtime.py new file mode 100644 index 0000000..3fd1a70 --- /dev/null +++ b/lorrycontroller/pretendtime.py @@ -0,0 +1,42 @@ +# Copyright (C) 2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +import errno +import glob +import json +import logging +import os +import re + +import bottle +import cliapp + +import lorrycontroller + + +class PretendTime(lorrycontroller.LorryControllerRoute): + + http_method = 'POST' + path = '/1.0/pretend-time' + + def run(self, **kwargs): + logging.info('%s %s called', self.http_method, self.path) + + now = bottle.request.forms.now + + statedb = self.open_statedb() + with statedb: + statedb.set_pretend_time(now) diff --git a/lorrycontroller/readconf.py b/lorrycontroller/readconf.py index ec0ae6d..9bbc579 100644 --- a/lorrycontroller/readconf.py +++ b/lorrycontroller/readconf.py @@ -39,6 +39,8 @@ class ReadConfiguration(lorrycontroller.LorryControllerRoute): http_method = 'POST' path = '/1.0/read-configuration' + DEFAULT_LORRY_TIMEOUT = 3600 # in seconds + def run(self, **kwargs): logging.info('%s %s called', self.http_method, self.path) @@ -187,6 +189,8 @@ class ReadConfiguration(lorrycontroller.LorryControllerRoute): path = self.deduce_repo_path(section, lorry_spec) text = self.serialise_lorry_spec(lorry_spec) interval = section['interval'] + timeout = section.get( + 'lorry-timeout', self.DEFAULT_LORRY_TIMEOUT) try: old_lorry_info = statedb.get_lorry_info(path) @@ -195,7 +199,7 @@ class ReadConfiguration(lorrycontroller.LorryControllerRoute): statedb.add_to_lorries( path=path, text=text, from_trovehost='', from_path='', - interval=interval) + interval=interval, timeout=timeout) added_paths.add(path) @@ -265,6 +269,8 @@ class ReadConfiguration(lorrycontroller.LorryControllerRoute): statedb.add_trove( trovehost=section['trovehost'], lorry_interval=section['interval'], + lorry_timeout=section.get( + 'lorry-timeout', self.DEFAULT_LORRY_TIMEOUT), ls_interval=section['ls-interval'], prefixmap=json.dumps(section['prefixmap']), ignore=json.dumps(section['ignore'])) diff --git a/lorrycontroller/showjob.py b/lorrycontroller/showjob.py index 5ac5a4d..3ebab8d 100644 --- a/lorrycontroller/showjob.py +++ b/lorrycontroller/showjob.py @@ -22,30 +22,53 @@ import bottle import lorrycontroller -class ShowJob(lorrycontroller.LorryControllerRoute): - - http_method = 'GET' - path = '/1.0/job/<job_id:int>' - - def run(self, **kwargs): - logging.info('%s %s called', self.http_method, self.path) - job_id = int(kwargs['job_id']) - - statedb = self.open_statedb() +class JobShower(object): + def get_job_as_json(self, statedb, job_id): path = statedb.get_job_path(job_id) exit = statedb.get_job_exit(job_id) output = statedb.get_job_output(job_id) + started, ended = statedb.get_job_started_and_ended(job_id) + now = statedb.get_current_time() - variables = { + return { 'job_id': job_id, 'host': statedb.get_job_minion_host(job_id), 'pid': statedb.get_job_minion_pid(job_id), 'path': statedb.get_job_path(job_id), 'exit': 'no' if exit is None else exit, 'output': output, - 'timestamp': - time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime()), + 'job-started': self.format_time(started), + 'job-ended': self.format_time(ended), + 'timestamp': self.format_time(now), } - + + def format_time(self, timestamp): + return time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(timestamp)) + + +class ShowJob(lorrycontroller.LorryControllerRoute): + + http_method = 'GET' + path = '/1.0/job/<job_id:int>' + + def run(self, **kwargs): + logging.info('%s %s called', self.http_method, self.path) + job_id = int(kwargs['job_id']) + + statedb = self.open_statedb() + return JobShower().get_job_as_json(statedb, job_id) + + +class ShowJobHTML(lorrycontroller.LorryControllerRoute): + + http_method = 'GET' + path = '/1.0/job-html/<job_id:int>' + + def run(self, **kwargs): + logging.info('%s %s called', self.http_method, self.path) + job_id = int(kwargs['job_id']) + + statedb = self.open_statedb() + variables = JobShower().get_job_as_json(statedb, job_id) return bottle.template(self._templates['job'], **variables) diff --git a/lorrycontroller/showlorry.py b/lorrycontroller/showlorry.py index 79a254e..599878c 100644 --- a/lorrycontroller/showlorry.py +++ b/lorrycontroller/showlorry.py @@ -66,7 +66,8 @@ class ShowLorryHTML(lorrycontroller.LorryControllerRoute): due = lorry_info['last_run'] + lorry_info['interval'] lorry_info['due_nice'] = renderer.format_due_nicely(due) - timestamp = time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime()) + now = statedb.get_current_time() + timestamp = time.strftime('%Y-%m-%d %H:%M:%S UTC', now) parts = urlparse.urlparse(bottle.request.url) host, port = parts.netloc.split(':', 1) diff --git a/lorrycontroller/statedb.py b/lorrycontroller/statedb.py index 1f45593..a056dc5 100644 --- a/lorrycontroller/statedb.py +++ b/lorrycontroller/statedb.py @@ -64,6 +64,7 @@ class StateDB(object): ('kill_job', 'INT'), ('last_run', 'INT'), ('interval', 'INT'), + ('lorry_timeout', 'INT'), ] self.lorries_booleans = [ 'kill_job', @@ -95,6 +96,7 @@ class StateDB(object): 'CREATE TABLE troves (' 'trovehost TEXT PRIMARY KEY, ' 'lorry_interval INT, ' + 'lorry_timeout INT, ' 'ls_interval INT, ' 'ls_last_run INT, ' 'prefixmap TEXT, ' @@ -119,10 +121,17 @@ class StateDB(object): 'job_id INT PRIMARY KEY, ' 'host TEXT, ' 'pid INT, ' + 'started INT, ' + 'ended INT, ' 'path TEXT, ' 'exit TEXT, ' 'output TEXT)') + # A table to give the current pretended time, if one is set. + # This table is either empty, in which case time.time() is + # used, or has one row, which is used for the current time. + c.execute('CREATE TABLE time (now INT)') + # Stupid table we can always write to to trigger the start of # a transaction. c.execute('CREATE TABLE stupid (value INT)') @@ -190,7 +199,7 @@ class StateDB(object): def get_trove_info(self, trovehost): c = self.get_cursor() c.execute( - 'SELECT lorry_interval, ls_interval, ls_last_run, ' + 'SELECT lorry_interval, lorry_timeout, ls_interval, ls_last_run, ' 'prefixmap, ignore ' 'FROM troves WHERE trovehost IS ?', (trovehost,)) @@ -200,20 +209,24 @@ class StateDB(object): return { 'trovehost': trovehost, 'lorry_interval': row[0], - 'ls_interval': row[1], - 'ls_last_run': row[2], - 'prefixmap': row[3], - 'ignore': row[4], + 'lorry_timeout': row[1], + 'ls_interval': row[2], + 'ls_last_run': row[3], + 'prefixmap': row[4], + 'ignore': row[5], } - def add_trove(self, trovehost=None, lorry_interval=None, ls_interval=None, + def add_trove(self, trovehost=None, lorry_interval=None, + lorry_timeout=None, ls_interval=None, prefixmap=None, ignore=None): logging.debug( - 'StateDB.add_trove(%r,%r,%r,%r,%r) called', - trovehost, lorry_interval, ls_interval, prefixmap, ignore) + 'StateDB.add_trove(%r,%r,%r,%r,%r,%r) called', + trovehost, lorry_interval, lorry_timeout, ls_interval, + prefixmap, ignore) assert trovehost is not None assert lorry_interval is not None + assert lorry_timeout is not None assert ls_interval is not None assert prefixmap is not None assert ignore is not None @@ -225,18 +238,21 @@ class StateDB(object): c = self.get_cursor() c.execute( 'INSERT INTO troves ' - '(trovehost, lorry_interval, ls_interval, ls_last_run, ' + '(trovehost, lorry_interval, lorry_timeout, ' + 'ls_interval, ls_last_run, ' 'prefixmap, ignore) ' - 'VALUES (?, ?, ?, ?, ?, ?)', - (trovehost, lorry_interval, ls_interval, 0, + 'VALUES (?, ?, ?, ?, ?, ?, ?)', + (trovehost, lorry_interval, lorry_timeout, ls_interval, 0, prefixmap, ignore)) else: c = self.get_cursor() c.execute( 'UPDATE troves ' - 'SET lorry_interval=?, ls_interval=?, prefixmap=?, ignore=? ' + 'SET lorry_interval=?, lorry_timeout=?, ls_interval=?, ' + 'prefixmap=?, ignore=? ' 'WHERE trovehost IS ?', - (lorry_interval, ls_interval, prefixmap, ignore, trovehost)) + (lorry_interval, lorry_timeout, ls_interval, prefixmap, + ignore, trovehost)) def remove_trove(self, trovehost): logging.debug('StateDB.remove_trove(%r) called', trovehost) @@ -292,20 +308,23 @@ class StateDB(object): return [row[0] for row in c.fetchall()] def add_to_lorries(self, path=None, text=None, from_trovehost=None, - from_path=None, interval=None): + from_path=None, interval=None, timeout=None): logging.debug( 'StateDB.add_to_lorries(' - 'path=%r, text=%r, from_trovehost=%r, interval=%s called', + 'path=%r, text=%r, from_trovehost=%r, interval=%s, ' + 'timeout=%r called', path, text, from_trovehost, - interval) + interval, + timeout) assert path is not None assert text is not None assert from_trovehost is not None assert from_path is not None assert interval is not None + assert timeout is not None assert self.in_transaction try: @@ -315,16 +334,18 @@ class StateDB(object): c.execute( 'INSERT INTO lorries ' '(path, text, from_trovehost, from_path, last_run, interval, ' - 'running_job, kill_job) ' - 'VALUES (?, ?, ?, ?, ?, ?, ?, ?)', - (path, text, from_trovehost, from_path, 0, interval, None, 0)) + 'lorry_timeout, running_job, kill_job) ' + 'VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', + (path, text, from_trovehost, from_path, 0, + interval, timeout, None, 0)) else: c = self.get_cursor() c.execute( 'UPDATE lorries ' - 'SET text=?, from_trovehost=?, from_path=?, interval=? ' + 'SET text=?, from_trovehost=?, from_path=?, interval=?, ' + 'lorry_timeout=? ' 'WHERE path IS ?', - (text, from_trovehost, from_path, interval, path)) + (text, from_trovehost, from_path, interval, timeout, path)) def remove_lorry(self, path): logging.debug('StateDB.remove_lorry(%r) called', path) @@ -365,6 +386,7 @@ class StateDB(object): return [row[0] for row in c.fetchall()] def set_kill_job(self, path, value): + logging.debug('StateDB.set_kill_job(%r, %r) called', path, value) assert self.in_transaction if value: value = 1 @@ -399,15 +421,16 @@ class StateDB(object): c.execute('SELECT job_id FROM jobs') return [row[0] for row in c.fetchall()] - def add_new_job(self, job_id, host, pid, path): + def add_new_job(self, job_id, host, pid, path, started): logging.debug( - 'StateDB.add_new_job(%r, %r, %r, %r) called', - job_id, host, pid, path) + 'StateDB.add_new_job(%r, %r, %r, %r, %r) called', + job_id, host, pid, path, started) assert self.in_transaction c = self.get_cursor() c.execute( - 'INSERT INTO jobs (job_id, host, pid, path) VALUES (?, ?, ?, ?)', - (job_id, host, pid, path)) + 'INSERT INTO jobs (job_id, host, pid, path, started) ' + 'VALUES (?, ?, ?, ?, ?)', + (job_id, host, pid, path, started)) def get_job_minion_host(self, job_id): c = self.get_cursor() @@ -433,6 +456,14 @@ class StateDB(object): row = c.fetchone() return row[0] + def get_job_started_and_ended(self, job_id): + c = self.get_cursor() + c.execute( + 'SELECT started, ended FROM jobs WHERE job_id IS ?', + (job_id,)) + row = c.fetchone() + return row[0], row[1] + def get_job_exit(self, job_id): c = self.get_cursor() c.execute( @@ -441,13 +472,14 @@ class StateDB(object): row = c.fetchone() return row[0] - def set_job_exit(self, job_id, exit): - logging.debug('StateDB.set_job_exit(%r, %r) called', job_id, exit) + def set_job_exit(self, job_id, exit, ended): + logging.debug( + 'StateDB.set_job_exit(%r, %r, %r) called', job_id, exit, ended) assert self.in_transaction c = self.get_cursor() c.execute( - 'UPDATE jobs SET exit=? WHERE job_id IS ?', - (exit, job_id)) + 'UPDATE jobs SET exit=?, ended=? WHERE job_id IS ?', + (exit, ended, job_id)) def get_job_output(self, job_id): c = self.get_cursor() @@ -473,3 +505,19 @@ class StateDB(object): assert self.in_transaction c = self.get_cursor() c.execute('DELETE FROM jobs WHERE job_id = ?', (job_id,)) + + def set_pretend_time(self, now): + logging.debug('StateDB.set_pretend_time(%r) called', now) + assert self.in_transaction + c = self.get_cursor() + c.execute('DELETE FROM time') + c.execute('INSERT INTO time (now) VALUES (?)', (int(now),)) + + def get_current_time(self): + c = self.get_cursor() + c.execute('SELECT now FROM time') + row = c.fetchone() + if row: + return row[0] + else: + return time.time() diff --git a/lorrycontroller/status.py b/lorrycontroller/status.py index a4fee86..b8ce1c4 100644 --- a/lorrycontroller/status.py +++ b/lorrycontroller/status.py @@ -34,10 +34,12 @@ class StatusRenderer(object): "I'm giving her all she's got, Captain!", ] import random + now = statedb.get_current_time() status = { 'quote': '%s' % random.choice(quotes), 'running_queue': statedb.get_running_queue(), - 'timestamp': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime()), + 'timestamp': + time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(now)), 'run_queue': self.get_run_queue(statedb), 'troves': self.get_troves(statedb), 'warning_msg': '', @@ -68,15 +70,15 @@ class StatusRenderer(object): def get_run_queue(self, statedb): lorries = statedb.get_all_lorries_info() - now = time.time() + now = statedb.get_current_time() for lorry in lorries: due = lorry['last_run'] + lorry['interval'] lorry['interval_nice'] = self.format_secs_nicely(lorry['interval']) - lorry['due_nice'] = self.format_due_nicely(due) + lorry['due_nice'] = self.format_due_nicely(due, now) return lorries - def format_due_nicely(self, due): - now = int(time.time()) + def format_due_nicely(self, due, now): + now = int(now) if due <= now: return 'now' else: @@ -116,8 +118,8 @@ class StatusRenderer(object): trove_info['ls_interval']) ls_due = trove_info['ls_last_run'] + trove_info['ls_interval'] - now = int(time.time()) - trove_info['ls_due_nice'] = self.format_due_nicely(ls_due) + now = int(statedb.get_current_time()) + trove_info['ls_due_nice'] = self.format_due_nicely(ls_due, now) troves.append(trove_info) return troves diff --git a/yarns.webapp/040-running-jobs.yarn b/yarns.webapp/040-running-jobs.yarn index 263e032..c6b8a72 100644 --- a/yarns.webapp/040-running-jobs.yarn +++ b/yarns.webapp/040-running-jobs.yarn @@ -115,6 +115,59 @@ Cleanup. FINALLY WEBAPP terminates +Stop a job that runs too long +----------------------------- + +Sometimes a job gets "stuck" and should be killed. The +`lorry-controller.conf` has an optional `lorry-timeout` field for +this, to set the timeout, and WEBAPP will tell MINION to kill a job +when it has been running too long. + +Some setup. Set the `lorry-timeout` to a know value. It doesn't +matter what it is since we'll be telling WEBAPP to fake its sense of +time, so that the test suite is not timing sensitive. We wouldn't want +to have the test suite fail when running on slow devices. + + SCENARIO stop stuck job + GIVEN a new git repository in CONFGIT + AND an empty lorry-controller.conf in CONFGIT + AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream + AND lorry-controller.conf in CONFGIT has lorry-timeout set to 1 for everything + AND Lorry file CONFGIT/foo.lorry with {"foo":{"type":"git","url":"git://foo"}} + AND WEBAPP uses CONFGIT as its configuration directory + AND a running WEBAPP + WHEN admin makes request POST /1.0/read-configuration with dummy=value + +Pretend it is the start of time. + + WHEN admin makes request POST /1.0/pretend-time with now=0 + WHEN admin makes request GET /1.0/status + THEN response has timestamp set to "1970-01-01 00:00:00 UTC" + +Start the job. + + WHEN admin makes request POST /1.0/give-me-job with host=testhost&pid=123 + THEN response has job_id set to 1 + +Check that the job info contains a start time. + + WHEN admin makes request GET /1.0/job/1 + THEN response has job-started set + +Pretend it is now much later, or at least later than the timeout specified. + + WHEN admin makes request POST /1.0/pretend-time with now=2 + +Pretend to be a MINION that reports an update on the job. WEBAPP +should now be telling us to kill the job. + + WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=no + THEN response has kill_job set to true + +Cleanup. + + FINALLY WEBAPP terminates + Remove a terminated jobs ------------------------ diff --git a/yarns.webapp/900-implementations.yarn b/yarns.webapp/900-implementations.yarn index 39c4a63..600f8d0 100644 --- a/yarns.webapp/900-implementations.yarn +++ b/yarns.webapp/900-implementations.yarn @@ -173,6 +173,32 @@ most of the configuration. json.dump(obj, f, indent=4) ' +Set the a specific field for all sections in a `lorry-controller.conf` +file. + + IMPLEMENTS GIVEN (\S+) in (\S+) has (\S+) set to (.+) for everything + python -c ' + import os + import json + + DATADIR = os.environ["DATADIR"] + MATCH_1 = os.environ["MATCH_1"] + MATCH_2 = os.environ["MATCH_2"] + MATCH_3 = os.environ["MATCH_3"] + MATCH_4 = os.environ["MATCH_4"] + + filename = os.path.join(DATADIR, MATCH_2, MATCH_1) + + with open(filename, "r") as f: + obj = json.load(f) + + for section in obj: + section[MATCH_3] = json.loads(MATCH_4) + + with open(filename, "w") as f: + json.dump(obj, f, indent=4) + ' + Set a specific field for a `troves` section. IMPLEMENTS GIVEN (\S+) in (\S+) sets (\S+) to (\S+) for trove (\S+) |