From bd214214053de9cf146e75f7ee4ad68c3d4a959c Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Mon, 23 Jun 2014 13:35:30 +0000 Subject: Forget "kill_job" flag for future runs of a lorry We do this by moving the "kill_job" column from the lorries table to the jobs table, renaming it to "kill" in the process. It makes no sense to have the flag to kill a specific job in the lorries table. This avoids the need to reset the flag, since it affects only a specific job, instead of all jobs of a lorry. --- lorrycontroller/jobupdate.py | 4 ++-- lorrycontroller/statedb.py | 43 +++++++++++++++++++++++++++----------- lorrycontroller/stopjob.py | 6 +++--- yarns.webapp/040-running-jobs.yarn | 13 ++++++------ 4 files changed, 42 insertions(+), 24 deletions(-) diff --git a/lorrycontroller/jobupdate.py b/lorrycontroller/jobupdate.py index 24a3c4a..3bd0e81 100644 --- a/lorrycontroller/jobupdate.py +++ b/lorrycontroller/jobupdate.py @@ -57,9 +57,9 @@ class JobUpdate(lorrycontroller.LorryControllerRoute): logging.warning( 'Job %r has been running too long, ' 'marking it to be exterminated', job_id) - statedb.set_kill_job(path, True) + statedb.set_kill_job(job_id, True) - obj = statedb.get_lorry_info(path) + obj = statedb.get_job_info(job_id) logging.debug('obj=%r', obj) return obj diff --git a/lorrycontroller/statedb.py b/lorrycontroller/statedb.py index 8316c9a..2d223e0 100644 --- a/lorrycontroller/statedb.py +++ b/lorrycontroller/statedb.py @@ -61,14 +61,12 @@ class StateDB(object): ('from_trovehost', 'TEXT'), ('from_path', 'TEXT'), ('running_job', 'INT'), - ('kill_job', 'INT'), ('last_run', 'INT'), ('interval', 'INT'), ('lorry_timeout', 'INT'), ('disk_usage', 'INT'), ] self.lorries_booleans = [ - 'kill_job', ] if self._conn is None: @@ -131,6 +129,7 @@ class StateDB(object): 'pid INT, ' 'started INT, ' 'ended INT, ' + 'kill INT, ' 'path TEXT, ' 'exit TEXT, ' 'disk_usage INT, ' @@ -356,10 +355,10 @@ class StateDB(object): c.execute( 'INSERT INTO lorries ' '(path, text, from_trovehost, from_path, last_run, interval, ' - 'lorry_timeout, running_job, kill_job) ' - 'VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', + 'lorry_timeout, running_job) ' + 'VALUES (?, ?, ?, ?, ?, ?, ?, ?)', (path, text, from_trovehost, from_path, 0, - interval, timeout, None, 0)) + interval, timeout, None)) else: c = self.get_cursor() c.execute( @@ -407,8 +406,8 @@ class StateDB(object): 'SELECT running_job FROM lorries WHERE running_job IS NOT NULL') return [row[0] for row in c.fetchall()] - def set_kill_job(self, path, value): - logging.debug('StateDB.set_kill_job(%r, %r) called', path, value) + def set_kill_job(self, job_id, value): + logging.debug('StateDB.set_kill_job(%r, %r) called', job_id, value) assert self.in_transaction if value: value = 1 @@ -416,8 +415,8 @@ class StateDB(object): value = 0 c = self.get_cursor() c.execute( - 'UPDATE lorries SET kill_job=? WHERE path=?', - (value, path)) + 'UPDATE jobs SET kill=? WHERE job_id=?', + (value, job_id)) def set_lorry_last_run(self, path, last_run): logging.debug( @@ -452,6 +451,26 @@ class StateDB(object): c.execute('SELECT job_id FROM jobs') return [row[0] for row in c.fetchall()] + def get_job_info(self, job_id): + c = self.get_cursor() + c.execute( + 'SELECT job_id, host, pid, started, ended, kill, path, exit, ' + 'disk_usage, output FROM jobs WHERE job_id=?', + (job_id,)) + row = c.fetchone() + return { + 'job_id': row[0], + 'host': row[1], + 'pid': row[2], + 'started': row[3], + 'ended': row[4], + 'kill': row[5], + 'path': row[6], + 'exit': row[7], + 'disk_usage': row[8], + 'output': row[9], + } + def add_new_job(self, job_id, host, pid, path, started): logging.debug( 'StateDB.add_new_job(%r, %r, %r, %r, %r) called', @@ -459,9 +478,9 @@ class StateDB(object): assert self.in_transaction c = self.get_cursor() c.execute( - 'INSERT INTO jobs (job_id, host, pid, path, started) ' - 'VALUES (?, ?, ?, ?, ?)', - (job_id, host, pid, path, started)) + 'INSERT INTO jobs (job_id, host, pid, path, started, kill) ' + 'VALUES (?, ?, ?, ?, ?, ?)', + (job_id, host, pid, path, started, 0)) def get_job_minion_host(self, job_id): c = self.get_cursor() diff --git a/lorrycontroller/stopjob.py b/lorrycontroller/stopjob.py index 947f733..f2ead87 100644 --- a/lorrycontroller/stopjob.py +++ b/lorrycontroller/stopjob.py @@ -28,14 +28,14 @@ class StopJob(lorrycontroller.LorryControllerRoute): def run(self, **kwargs): logging.info('%s %s called', self.http_method, self.path) + job_id = bottle.request.forms.job_id statedb = self.open_statedb() with statedb: - job_id = bottle.request.forms.job_id try: path = statedb.find_lorry_running_job(job_id) except lorrycontroller.WrongNumberLorriesRunningJob: logging.warning( "Tried to kill job %s which isn't running" % job_id) bottle.abort(409, 'Job is not currently running') - statedb.set_kill_job(path, True) - return statedb.get_lorry_info(path) + statedb.set_kill_job(job_id, True) + return statedb.get_job_info(job_id) diff --git a/yarns.webapp/040-running-jobs.yarn b/yarns.webapp/040-running-jobs.yarn index f637b0b..879d9fa 100644 --- a/yarns.webapp/040-running-jobs.yarn +++ b/yarns.webapp/040-running-jobs.yarn @@ -58,7 +58,7 @@ Requesting another job should now again return null. Inform WEBAPP the job is finished. WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=0 - THEN response has kill_job set to false + THEN response has kill set to false WHEN admin makes request GET /1.0/lorry/upstream/foo THEN response has running_job set to null WHEN admin makes request GET /1.0/list-running-jobs @@ -140,14 +140,13 @@ Admin will now ask WEBAPP to kill the job. This changes sets a field in the STATEDB only. WHEN admin makes request POST /1.0/stop-job with job_id=1 - AND admin makes request GET /1.0/lorry/upstream/foo - THEN response has kill_job set to true + THEN response has kill set to true Now, when MINION updates the job, WEBAPP will tell it to kill it. MINION will do so, and then update the job again. WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=no - THEN response has kill_job set to true + THEN response has kill set to true WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=1 Admin will now see that the job has, indeed, been killed. @@ -166,7 +165,7 @@ successfully run. THEN response has job_id set to 2 AND response has path set to "upstream/foo" WHEN MINION makes request POST /1.0/job-update with job_id=2&exit=no - THEN response has kill_job set to false + THEN response has kill set to false Cleanup. @@ -219,7 +218,7 @@ Pretend to be a MINION that reports an update on the job. WEBAPP should now be telling us to kill the job. WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=no - THEN response has kill_job set to true + THEN response has kill set to true Kill the job, as requested. @@ -232,7 +231,7 @@ a lorry that had ever been killed would never run successfully again. WHEN admin makes request POST /1.0/give-me-job with host=testhost&pid=123 THEN response has job_id set to 2 WHEN MINION makes request POST /1.0/job-update with job_id=2&exit=no - THEN response has kill_job set to false + THEN response has kill set to false Cleanup. -- cgit v1.2.1