summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenjamin Schubert <contact@benschubert.me>2019-11-08 14:26:08 +0000
committerValentin David <valentin.david@codethink.co.uk>2020-03-10 13:15:50 +0100
commit1e7e90ec4f298a948676fae33b5694f2eaa9381f (patch)
treed1eda08c875cb28735c376e7c1ee8246c4115300
parentc12c7f596f15842028a46fff0ad062b3b4e2988f (diff)
downloadbuildstream-1e7e90ec4f298a948676fae33b5694f2eaa9381f.tar.gz
job.py: Don't use 'terminate_wait', as it uses waitpid()
Using `join()` on the subprocess calls `waitpid()` under the hood which breaks our child watcher. Instead, schedule a task for 20 seconds later that will effectively kill the tasks. Note that the task will only be called if we still have active jobs. Otherwise, it will just be skipped and we won't wait as long.
-rw-r--r--buildstream/_scheduler/jobs/job.py16
-rw-r--r--buildstream/_scheduler/scheduler.py18
2 files changed, 11 insertions, 23 deletions
diff --git a/buildstream/_scheduler/jobs/job.py b/buildstream/_scheduler/jobs/job.py
index b8b4a2c76..bf0646a31 100644
--- a/buildstream/_scheduler/jobs/job.py
+++ b/buildstream/_scheduler/jobs/job.py
@@ -182,21 +182,15 @@ class Job():
self._terminated = True
- # terminate_wait()
+ # get_terminated()
#
- # Wait for terminated jobs to complete
- #
- # Args:
- # timeout (float): Seconds to wait
+ # Check if a job has been terminated.
#
# Returns:
- # (bool): True if the process terminated cleanly, otherwise False
+ # (bool): True in the main process if Job.terminate() was called.
#
- def terminate_wait(self, timeout):
-
- # Join the child process after sending SIGTERM
- self._process.join(timeout)
- return self._process.exitcode is not None
+ def get_terminated(self):
+ return self._terminated
# kill()
#
diff --git a/buildstream/_scheduler/scheduler.py b/buildstream/_scheduler/scheduler.py
index 68c115c1b..101faccce 100644
--- a/buildstream/_scheduler/scheduler.py
+++ b/buildstream/_scheduler/scheduler.py
@@ -516,21 +516,15 @@ class Scheduler():
self.loop.remove_signal_handler(signal.SIGTERM)
def _terminate_jobs_real(self):
- # 20 seconds is a long time, it can take a while and sometimes
- # we still fail, need to look deeper into this again.
- wait_start = datetime.datetime.now()
- wait_limit = 20.0
+ def kill_jobs():
+ for job_ in self._active_jobs:
+ job_.kill()
- # First tell all jobs to terminate
- for job in self._active_jobs:
- job.terminate()
+ # Schedule all jobs to be killed if they have not exited in 20 sec
+ self.loop.call_later(20, kill_jobs)
- # Now wait for them to really terminate
for job in self._active_jobs:
- elapsed = datetime.datetime.now() - wait_start
- timeout = max(wait_limit - elapsed.total_seconds(), 0.0)
- if not job.terminate_wait(timeout):
- job.kill()
+ job.terminate()
# Regular timeout for driving status in the UI
def _tick(self):