summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosh Smith <joshsmith@codethink.co.uk>2018-07-23 11:33:31 +0100
committerTristan Van Berkom <tristan.vanberkom@codethink.co.uk>2018-07-27 13:41:26 +0900
commit9ebd7fe1182a4d6afa027b88fb3d096408d9a912 (patch)
tree5d646889a356fea05f1dccf9b42e3f1ad10b585b
parent8c9fd9e40f5ac2902a500c46d5321ab16f6e30b7 (diff)
downloadbuildstream-9ebd7fe1182a4d6afa027b88fb3d096408d9a912.tar.gz
_exceptions.py: Modify BstError API to allow optional retry
job.py: Changes to the logic surrounding retry attempts and child process return codes element.py, source.py: ElementError and SourceError also implement this change. These exceptions now have an optional parameter of temporary which defaults to false. This will potentially break backwards compatibility where exceptions were previously raised and a retry was intended. To trigger a retry, one must now raise their SourceError or ElementError with temporary=True. This aims to fix #397.
-rw-r--r--buildstream/_exceptions.py7
-rw-r--r--buildstream/_scheduler/jobs/job.py36
-rw-r--r--buildstream/element.py5
-rw-r--r--buildstream/source.py5
4 files changed, 40 insertions, 13 deletions
diff --git a/buildstream/_exceptions.py b/buildstream/_exceptions.py
index 8baf16706..ada697a53 100644
--- a/buildstream/_exceptions.py
+++ b/buildstream/_exceptions.py
@@ -99,7 +99,7 @@ class ErrorDomain(Enum):
#
class BstError(Exception):
- def __init__(self, message, *, detail=None, domain=None, reason=None):
+ def __init__(self, message, *, detail=None, domain=None, reason=None, temporary=False):
global _last_exception
super().__init__(message)
@@ -114,6 +114,11 @@ class BstError(Exception):
#
self.sandbox = None
+ # When this exception occurred during the handling of a job, indicate
+ # whether or not there is any point retrying the job.
+ #
+ self.temporary = temporary
+
# Error domain and reason
#
self.domain = domain
diff --git a/buildstream/_scheduler/jobs/job.py b/buildstream/_scheduler/jobs/job.py
index 6d4b685af..c339a333b 100644
--- a/buildstream/_scheduler/jobs/job.py
+++ b/buildstream/_scheduler/jobs/job.py
@@ -35,6 +35,12 @@ from ..._exceptions import ImplError, BstError, set_last_task_error
from ..._message import Message, MessageType, unconditional_messages
from ... import _signals, utils
+# Return code values shutdown of job handling child processes
+#
+RC_OK = 0
+RC_FAIL = 1
+RC_PERM_FAIL = 2
+
# Used to distinguish between status messages and return values
class Envelope():
@@ -111,6 +117,10 @@ class Job():
self._max_retries = max_retries # Maximum number of automatic retries
self._result = None # Return value of child action in the parent
self._tries = 0 # Try count, for retryable jobs
+
+ # If False, a retry will not be attempted regardless of whether _tries is less than _max_retries.
+ #
+ self._retry_flag = True
self._logfile = logfile
self._task_id = None
@@ -388,8 +398,9 @@ class Job():
result = self.child_process()
except BstError as e:
elapsed = datetime.datetime.now() - starttime
+ self._retry_flag = e.temporary
- if self._tries <= self._max_retries:
+ if self._retry_flag and (self._tries <= self._max_retries):
self.message(MessageType.FAIL,
"Try #{} failed, retrying".format(self._tries),
elapsed=elapsed)
@@ -402,7 +413,10 @@ class Job():
# Report the exception to the parent (for internal testing purposes)
self._child_send_error(e)
- self._child_shutdown(1)
+
+ # Set return code based on whether or not the error was temporary.
+ #
+ self._child_shutdown(RC_FAIL if self._retry_flag else RC_PERM_FAIL)
except Exception as e: # pylint: disable=broad-except
@@ -416,7 +430,7 @@ class Job():
self.message(MessageType.BUG, self.action_name,
elapsed=elapsed, detail=detail,
logfile=filename)
- self._child_shutdown(1)
+ self._child_shutdown(RC_FAIL)
else:
# No exception occurred in the action
@@ -430,7 +444,7 @@ class Job():
# Shutdown needs to stay outside of the above context manager,
# make sure we dont try to handle SIGTERM while the process
# is already busy in sys.exit()
- self._child_shutdown(0)
+ self._child_shutdown(RC_OK)
# _child_send_error()
#
@@ -495,7 +509,8 @@ class Job():
message.action_name = self.action_name
message.task_id = self._task_id
- if message.message_type == MessageType.FAIL and self._tries <= self._max_retries:
+ if (message.message_type == MessageType.FAIL and
+ self._tries <= self._max_retries and self._retry_flag):
# Job will be retried, display failures as warnings in the frontend
message.message_type = MessageType.WARN
@@ -529,12 +544,17 @@ class Job():
def _parent_child_completed(self, pid, returncode):
self._parent_shutdown()
- if returncode != 0 and self._tries <= self._max_retries:
+ # We don't want to retry if we got OK or a permanent fail.
+ # This is set in _child_action but must also be set for the parent.
+ #
+ self._retry_flag = returncode not in (RC_OK, RC_PERM_FAIL)
+
+ if self._retry_flag and (self._tries <= self._max_retries):
self.spawn()
return
- self.parent_complete(returncode == 0, self._result)
- self._scheduler.job_completed(self, returncode == 0)
+ self.parent_complete(returncode == RC_OK, self._result)
+ self._scheduler.job_completed(self, returncode == RC_OK)
# _parent_process_envelope()
#
diff --git a/buildstream/element.py b/buildstream/element.py
index e9cbdb63e..962ba1c78 100644
--- a/buildstream/element.py
+++ b/buildstream/element.py
@@ -140,9 +140,10 @@ class ElementError(BstError):
message (str): The error message to report to the user
detail (str): A possibly multiline, more detailed error message
reason (str): An optional machine readable reason string, used for test cases
+ temporary(bool): An indicator to whether the error may occur if the operation was run again. (*Since: 1.4*)
"""
- def __init__(self, message, *, detail=None, reason=None):
- super().__init__(message, detail=detail, domain=ErrorDomain.ELEMENT, reason=reason)
+ def __init__(self, message, *, detail=None, reason=None, temporary=False):
+ super().__init__(message, detail=detail, domain=ErrorDomain.ELEMENT, reason=reason, temporary=temporary)
class Element(Plugin):
diff --git a/buildstream/source.py b/buildstream/source.py
index ec38ae8f2..c7c515e12 100644
--- a/buildstream/source.py
+++ b/buildstream/source.py
@@ -108,9 +108,10 @@ class SourceError(BstError):
message (str): The breif error description to report to the user
detail (str): A possibly multiline, more detailed error message
reason (str): An optional machine readable reason string, used for test cases
+ temporary(bool): An indicator to whether the error may occur if the operation was run again. (*Since: 1.4*)
"""
- def __init__(self, message, *, detail=None, reason=None):
- super().__init__(message, detail=detail, domain=ErrorDomain.SOURCE, reason=reason)
+ def __init__(self, message, *, detail=None, reason=None, temporary=False):
+ super().__init__(message, detail=detail, domain=ErrorDomain.SOURCE, reason=reason, temporary=temporary)
class Source(Plugin):