diff options
author | bst-marge-bot <marge-bot@buildstream.build> | 2019-11-13 16:39:05 +0000 |
---|---|---|
committer | bst-marge-bot <marge-bot@buildstream.build> | 2019-11-13 16:39:05 +0000 |
commit | c3eee615fb5fe957d17151ff655ebebc3d029681 (patch) | |
tree | 9416341236dd216fbed71a4f08c1534aa35ed65f | |
parent | baff3507d2ad5bad2e0a92e3b5ee0805e19504f7 (diff) | |
parent | ecc1f607aa186192e707552f309d78708fd68159 (diff) | |
download | buildstream-c3eee615fb5fe957d17151ff655ebebc3d029681.tar.gz |
Merge branch 'bschubert/graceful-children-sigterm' into 'master'
Gracefully shutdown children on termination
Closes #1185
See merge request BuildStream/buildstream!1692
-rw-r--r-- | src/buildstream/_scheduler/jobs/job.py | 38 | ||||
-rw-r--r-- | src/buildstream/_scheduler/scheduler.py | 21 |
2 files changed, 31 insertions, 28 deletions
diff --git a/src/buildstream/_scheduler/jobs/job.py b/src/buildstream/_scheduler/jobs/job.py index 4e6199e16..3363d7b60 100644 --- a/src/buildstream/_scheduler/jobs/job.py +++ b/src/buildstream/_scheduler/jobs/job.py @@ -45,6 +45,8 @@ class _ReturnCode(FastEnum): FAIL = 1 PERM_FAIL = 2 SKIPPED = 3 + TERMINATED = 4 + KILLED = -9 # JobStatus: @@ -249,22 +251,6 @@ class Job(): def get_terminated(self): return self._terminated - # terminate_wait() - # - # Wait for terminated jobs to complete - # - # Args: - # timeout (float): Seconds to wait - # - # Returns: - # (bool): True if the process terminated cleanly, otherwise False - # - def terminate_wait(self, timeout): - - # Join the child process after sending SIGTERM - self._process.join(timeout) - return self._process.exitcode is not None - # kill() # # Forcefully kill the process, and any children it might have. @@ -471,6 +457,20 @@ class Job(): status = JobStatus.SKIPPED elif returncode in (_ReturnCode.FAIL, _ReturnCode.PERM_FAIL): status = JobStatus.FAIL + elif returncode == _ReturnCode.TERMINATED: + if self._terminated: + self.message(MessageType.INFO, "Process was terminated") + else: + self.message(MessageType.ERROR, "Process was terminated unexpectedly") + + status = JobStatus.FAIL + elif returncode == _ReturnCode.KILLED: + if self._terminated: + self.message(MessageType.INFO, "Process was killed") + else: + self.message(MessageType.ERROR, "Process was killed unexpectedly") + + status = JobStatus.FAIL else: status = JobStatus.FAIL @@ -730,6 +730,12 @@ class ChildJob(): with _signals.suspendable(stop_time, resume_time), \ self._messenger.recorded_messages(self._logfile, self._logdir) as filename: + # Graciously handle sigterms. + def handle_sigterm(_signum, _sigframe): + self._child_shutdown(_ReturnCode.TERMINATED) + + signal.signal(signal.SIGTERM, handle_sigterm) + self.message(MessageType.START, self.action_name, logfile=filename) try: diff --git a/src/buildstream/_scheduler/scheduler.py b/src/buildstream/_scheduler/scheduler.py index 7ef5c5fe3..86e3af021 100644 --- a/src/buildstream/_scheduler/scheduler.py +++ b/src/buildstream/_scheduler/scheduler.py @@ -34,6 +34,9 @@ from .._message import Message, MessageType from ..plugin import Plugin +_MAX_TIMEOUT_TO_KILL_CHILDREN = 20 # in seconds + + # A decent return code for Scheduler.run() class SchedStatus(FastEnum): SUCCESS = 0 @@ -526,21 +529,15 @@ class Scheduler(): self.loop.remove_signal_handler(signal.SIGTERM) def _terminate_jobs_real(self): - # 20 seconds is a long time, it can take a while and sometimes - # we still fail, need to look deeper into this again. - wait_start = datetime.datetime.now() - wait_limit = 20.0 + def kill_jobs(): + for job_ in self._active_jobs: + job_.kill() - # First tell all jobs to terminate - for job in self._active_jobs: - job.terminate() + # Schedule all jobs to be killed if they have not exited after timeout + self.loop.call_later(_MAX_TIMEOUT_TO_KILL_CHILDREN, kill_jobs) - # Now wait for them to really terminate for job in self._active_jobs: - elapsed = datetime.datetime.now() - wait_start - timeout = max(wait_limit - elapsed.total_seconds(), 0.0) - if not job.terminate_wait(timeout): - job.kill() + job.terminate() # Regular timeout for driving status in the UI def _tick(self): |