From fcce17aa00de4c8a638ee631048dd8b89f1aa65d Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Thu, 26 Jun 2014 15:41:27 +0000 Subject: Implement /1.0/remove-ghost-jobs --- lorrycontroller/__init__.py | 1 + lorrycontroller/jobupdate.py | 4 ++- lorrycontroller/removeghostjobs.py | 65 ++++++++++++++++++++++++++++++++++++++ lorrycontroller/statedb.py | 41 ++++++++++++++++++------ 4 files changed, 100 insertions(+), 11 deletions(-) create mode 100644 lorrycontroller/removeghostjobs.py (limited to 'lorrycontroller') diff --git a/lorrycontroller/__init__.py b/lorrycontroller/__init__.py index bc51b88..a65ff02 100644 --- a/lorrycontroller/__init__.py +++ b/lorrycontroller/__init__.py @@ -32,6 +32,7 @@ from movetopbottom import MoveToTop, MoveToBottom from stopjob import StopJob from listjobs import ListAllJobs, ListAllJobsHTML from showjob import ShowJob, ShowJobHTML, JobShower +from removeghostjobs import RemoveGhostJobs from removejob import RemoveJob from lstroves import LsTroves, ForceLsTrove from pretendtime import PretendTime diff --git a/lorrycontroller/jobupdate.py b/lorrycontroller/jobupdate.py index 3bd0e81..efc9ce1 100644 --- a/lorrycontroller/jobupdate.py +++ b/lorrycontroller/jobupdate.py @@ -44,11 +44,13 @@ class JobUpdate(lorrycontroller.LorryControllerRoute): if stderr: statedb.append_to_job_output(job_id, stderr) + now = statedb.get_current_time() + statedb.set_job_updated(job_id, now) + path = statedb.find_lorry_running_job(job_id) lorry_info = statedb.get_lorry_info(path) if exit is not None and exit != 'no': - now = statedb.get_current_time() statedb.set_lorry_last_run(path, int(now)) statedb.set_running_job(path, None) statedb.set_job_exit(job_id, exit, int(now), disk_usage) diff --git a/lorrycontroller/removeghostjobs.py b/lorrycontroller/removeghostjobs.py new file mode 100644 index 0000000..2b2760c --- /dev/null +++ b/lorrycontroller/removeghostjobs.py @@ -0,0 +1,65 @@ +# Copyright (C) 2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +import logging +import time + +import bottle + +import lorrycontroller + + +class RemoveGhostJobs(lorrycontroller.LorryControllerRoute): + + http_method = 'POST' + path = '/1.0/remove-ghost-jobs' + + def run(self, **kwargs): + logging.info('%s %s called', self.http_method, self.path) + + ghost_timeout = self.app_settings['ghost-timeout'] + ghosts = [] + with self.open_statedb() as statedb: + for job_id in statedb.get_running_jobs(): + if self.is_ghost_job(statedb, job_id, ghost_timeout): + self.exorcise_ghost_job(statedb, job_id) + ghosts.append(statedb.get_job_info(job_id)) + return { + 'killed-ghost-jobs': ghosts, + } + + def is_ghost_job(self, statedb, job_id, ghost_timeout): + updated = statedb.get_job_updated(job_id) + return self.now(statedb) - updated >= ghost_timeout + + def now(self, statedb): + return statedb.get_current_time() + + def exorcise_ghost_job(self, statedb, job_id): + logging.info('Job %s is a ghost job', job_id) + self.mark_job_to_be_killed_in_case_minion_appears(statedb, job_id) + self.mark_job_as_terminated(statedb, job_id) + + def mark_job_to_be_killed_in_case_minion_appears(self, statedb, job_id): + statedb.set_kill_job(job_id, True) + + def mark_job_as_terminated(self, statedb, job_id): + statedb.append_to_job_output( + job_id, '\nTERMINATED DUE TO GHOST TIMEOUT\n') + statedb.set_job_exit(job_id, 127, self.now(statedb), -1) + + job_info = statedb.get_job_info(job_id) + statedb.set_running_job(job_info['path'], None) diff --git a/lorrycontroller/statedb.py b/lorrycontroller/statedb.py index 2d223e0..fd7857d 100644 --- a/lorrycontroller/statedb.py +++ b/lorrycontroller/statedb.py @@ -129,6 +129,7 @@ class StateDB(object): 'pid INT, ' 'started INT, ' 'ended INT, ' + 'updated INT, ' 'kill INT, ' 'path TEXT, ' 'exit TEXT, ' @@ -454,8 +455,8 @@ class StateDB(object): def get_job_info(self, job_id): c = self.get_cursor() c.execute( - 'SELECT job_id, host, pid, started, ended, kill, path, exit, ' - 'disk_usage, output FROM jobs WHERE job_id=?', + 'SELECT job_id, host, pid, started, ended, updated, kill, ' + 'path, exit, disk_usage, output FROM jobs WHERE job_id=?', (job_id,)) row = c.fetchone() return { @@ -464,11 +465,12 @@ class StateDB(object): 'pid': row[2], 'started': row[3], 'ended': row[4], - 'kill': row[5], - 'path': row[6], - 'exit': row[7], - 'disk_usage': row[8], - 'output': row[9], + 'updated': row[5], + 'kill': row[6], + 'path': row[7], + 'exit': row[8], + 'disk_usage': row[9], + 'output': row[10], } def add_new_job(self, job_id, host, pid, path, started): @@ -478,9 +480,10 @@ class StateDB(object): assert self.in_transaction c = self.get_cursor() c.execute( - 'INSERT INTO jobs (job_id, host, pid, path, started, kill) ' - 'VALUES (?, ?, ?, ?, ?, ?)', - (job_id, host, pid, path, started, 0)) + 'INSERT INTO jobs (job_id, host, pid, path, started, ' + 'updated, kill) ' + 'VALUES (?, ?, ?, ?, ?, ?, ?)', + (job_id, host, pid, path, started, started, 0)) def get_job_minion_host(self, job_id): c = self.get_cursor() @@ -514,6 +517,24 @@ class StateDB(object): row = c.fetchone() return row[0], row[1] + def get_job_updated(self, job_id): + c = self.get_cursor() + c.execute( + 'SELECT updated FROM jobs WHERE job_id IS ?', + (job_id,)) + row = c.fetchone() + return row[0] + + def set_job_updated(self, job_id, updated): + logging.debug( + 'StateDB.set_job_updated(%r, %r) called', + job_id, updated) + assert self.in_transaction + c = self.get_cursor() + c.execute( + 'UPDATE jobs SET updated=? WHERE job_id IS ?', + (updated, job_id)) + def get_job_exit(self, job_id): c = self.get_cursor() c.execute( -- cgit v1.2.1