summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Westphahl <simon.westphahl@bmw.de>2023-03-02 15:23:53 +0100
committerSimon Westphahl <simon.westphahl@bmw.de>2023-03-20 09:01:17 +0100
commit7696fda6cb6a35216b9fde54f37c31267958ee32 (patch)
treee35e8c4dc6e9dbb5f431e035c4e2af715089d31f
parent36e81916f0b3b560ad672f2100daec25c56e3f94 (diff)
downloadzuul-7696fda6cb6a35216b9fde54f37c31267958ee32.tar.gz
Don't discard all cat job results in case of error
So far we've aborted all cat jobs when any of the cat jobs failed. However, since we ignore those exceptions anyways unless we are validating the tenant configuration, we should continue processing the rest of the job results that haven't failed. If an early cat job failed we might otherwise not load configuration from repositories and branches even if those cat jobs were successful. Change-Id: I34f2a23641de9138b1e887df86ae2602ca190277
-rw-r--r--zuul/configloader.py112
1 files changed, 58 insertions, 54 deletions
diff --git a/zuul/configloader.py b/zuul/configloader.py
index fe22fe0f8..4f472cb4e 100644
--- a/zuul/configloader.py
+++ b/zuul/configloader.py
@@ -2151,21 +2151,26 @@ class TenantParser(object):
for future in futures:
future.result()
- try:
- self._processCatJobs(abide, tenant, loading_errors, jobs,
- min_ltimes)
- except Exception:
- self.log.exception("Error processing cat jobs, canceling")
- for job in jobs:
+ for i, job in enumerate(jobs, start=1):
+ try:
try:
- self.log.debug("Canceling cat job %s", job)
+ self._processCatJob(abide, tenant, loading_errors, job,
+ min_ltimes)
+ except TimeoutError:
self.merger.cancel(job)
- except Exception:
- self.log.exception("Unable to cancel job %s", job)
- if not ignore_cat_exception:
- raise
- if not ignore_cat_exception:
- raise
+ raise
+ except Exception:
+ self.log.exception("Error processing cat job")
+ if not ignore_cat_exception:
+ # Cancel remaining jobs
+ for cancel_job in jobs[i:]:
+ self.log.debug("Canceling cat job %s", cancel_job)
+ try:
+ self.merger.cancel(cancel_job)
+ except Exception:
+ self.log.exception(
+ "Unable to cancel job %s", cancel_job)
+ raise
def _cacheTenantYAMLBranch(self, abide, tenant, loading_errors, min_ltimes,
tpc, project, branch, jobs):
@@ -2234,49 +2239,48 @@ class TenantParser(object):
job.source_context = source_context
jobs.append(job)
- def _processCatJobs(self, abide, tenant, loading_errors, jobs, min_ltimes):
+ def _processCatJob(self, abide, tenant, loading_errors, job, min_ltimes):
# Called at the end of _cacheTenantYAML after all cat jobs
# have been submitted
- for job in jobs:
- self.log.debug("Waiting for cat job %s" % (job,))
- res = job.wait(self.merger.git_timeout)
- if not res:
- # We timed out
- raise Exception("Cat job %s timed out; consider setting "
- "merger.git_timeout in zuul.conf" % (job,))
- if not job.updated:
- raise Exception("Cat job %s failed" % (job,))
- self.log.debug("Cat job %s got files %s" %
- (job, job.files.keys()))
-
- self._updateUnparsedBranchCache(abide, tenant, job.source_context,
- job.files, loading_errors,
- job.ltime, min_ltimes)
-
- # Save all config files in Zookeeper (not just for the current tpc)
- files_cache = self.unparsed_config_cache.getFilesCache(
- job.source_context.project_canonical_name,
- job.source_context.branch)
- with self.unparsed_config_cache.writeLock(
- job.source_context.project_canonical_name):
- # Prevent files cache ltime from going backward
- if files_cache.ltime >= job.ltime:
- self.log.info(
- "Discarding job %s result since the files cache was "
- "updated in the meantime", job)
- continue
- # Since the cat job returns all required config files
- # for ALL tenants the project is a part of, we can
- # clear the whole cache and then populate it with the
- # updated content.
- files_cache.clear()
- for fn, content in job.files.items():
- # Cache file in Zookeeper
- if content is not None:
- files_cache[fn] = content
- files_cache.setValidFor(job.extra_config_files,
- job.extra_config_dirs,
- job.ltime)
+ self.log.debug("Waiting for cat job %s" % (job,))
+ res = job.wait(self.merger.git_timeout)
+ if not res:
+ # We timed out
+ raise TimeoutError(f"Cat job {job} timed out; consider setting "
+ "merger.git_timeout in zuul.conf")
+ if not job.updated:
+ raise Exception("Cat job %s failed" % (job,))
+ self.log.debug("Cat job %s got files %s" %
+ (job, job.files.keys()))
+
+ self._updateUnparsedBranchCache(abide, tenant, job.source_context,
+ job.files, loading_errors,
+ job.ltime, min_ltimes)
+
+ # Save all config files in Zookeeper (not just for the current tpc)
+ files_cache = self.unparsed_config_cache.getFilesCache(
+ job.source_context.project_canonical_name,
+ job.source_context.branch)
+ with self.unparsed_config_cache.writeLock(
+ job.source_context.project_canonical_name):
+ # Prevent files cache ltime from going backward
+ if files_cache.ltime >= job.ltime:
+ self.log.info(
+ "Discarding job %s result since the files cache was "
+ "updated in the meantime", job)
+ return
+ # Since the cat job returns all required config files
+ # for ALL tenants the project is a part of, we can
+ # clear the whole cache and then populate it with the
+ # updated content.
+ files_cache.clear()
+ for fn, content in job.files.items():
+ # Cache file in Zookeeper
+ if content is not None:
+ files_cache[fn] = content
+ files_cache.setValidFor(job.extra_config_files,
+ job.extra_config_dirs,
+ job.ltime)
def _updateUnparsedBranchCache(self, abide, tenant, source_context, files,
loading_errors, ltime, min_ltimes):