summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames E. Blair <jeblair@redhat.com>2020-05-05 16:02:02 -0700
committerJames E. Blair <jeblair@redhat.com>2020-05-14 14:05:30 -0700
commit319bbacfa190102442b0c314620dfa3354525b60 (patch)
treeb7642e7016c648041d999a78496be911469df1be
parenta655aee0f18f426df27651226fcc998921bdd115 (diff)
downloadzuul-319bbacfa190102442b0c314620dfa3354525b60.tar.gz
Fix loading_errors bug
This corrects an error Error in dynamic layout Traceback (most recent call last): File "/usr/local/lib/python3.8/site-packages/zuul/manager/__init__.py", line 625, in _loadDynamicLayout relevant_errors = self._findRelevantErrors(item, File "/usr/local/lib/python3.8/site-packages/zuul/manager/__init__.py", line 517, in _findRelevantErrors parent_layout.loading_errors.error_keys) or AttributeError: 'NoneType' object has no attribute 'loading_errors' which can be caused when the following conditions are met: * Tenant A is listed first. * Tenant A holds projects A and B. * Project B references a config object defined in project A. * Tenant A loads without errors. * Tenant B holds projects B and C. * Tenant B has a standing configuration error due to the unknown reference to an object in project A from project B. * Two or more changes to project C which cause dynamic configurations to be created are enqueued in a pipeline. * The merge job for the second change finishes before the first. This is because: We cache configuration objects if they load without error in their first tenant; that means that they can show up as errors in later tenants, but as long as those other tenants aren't proposing changes to that repo, it doesn't matter. But it does mean that every dynamic reconfiguration in this tenant will see errors and will execute the code path that compares the new dynamic configuration to the previous one to see if they are relevant. If a merge job for a dynamic config change arrives out of order, we will compare it to the previous configuration to determine if they are relevant, but since the previous layout had not been calculated yet, the exception above was hit. The solution is to indicate that the layout for the later change is not ready until the layout for the previous change is. Change-Id: Ibe1392a494d42b65080ab7e42f116db3869548ff
-rw-r--r--tests/fixtures/config/broken-multi-tenant/git/common-config/playbooks/job.yaml2
-rw-r--r--tests/fixtures/config/broken-multi-tenant/git/common-config/zuul.yaml23
-rw-r--r--tests/fixtures/config/broken-multi-tenant/git/org_project1/README1
-rw-r--r--tests/fixtures/config/broken-multi-tenant/git/org_project2/README1
-rw-r--r--tests/fixtures/config/broken-multi-tenant/git/org_project2/zuul.yaml3
-rw-r--r--tests/fixtures/config/broken-multi-tenant/git/org_project3/README1
-rw-r--r--tests/fixtures/config/broken-multi-tenant/git/org_project3/zuul.yaml2
-rw-r--r--tests/fixtures/config/broken-multi-tenant/main.yaml19
-rw-r--r--tests/unit/test_v3.py74
-rw-r--r--zuul/manager/__init__.py16
10 files changed, 129 insertions, 13 deletions
diff --git a/tests/fixtures/config/broken-multi-tenant/git/common-config/playbooks/job.yaml b/tests/fixtures/config/broken-multi-tenant/git/common-config/playbooks/job.yaml
new file mode 100644
index 000000000..f679dceae
--- /dev/null
+++ b/tests/fixtures/config/broken-multi-tenant/git/common-config/playbooks/job.yaml
@@ -0,0 +1,2 @@
+- hosts: all
+ tasks: []
diff --git a/tests/fixtures/config/broken-multi-tenant/git/common-config/zuul.yaml b/tests/fixtures/config/broken-multi-tenant/git/common-config/zuul.yaml
new file mode 100644
index 000000000..406a64248
--- /dev/null
+++ b/tests/fixtures/config/broken-multi-tenant/git/common-config/zuul.yaml
@@ -0,0 +1,23 @@
+- pipeline:
+ name: check
+ manager: independent
+ trigger:
+ gerrit:
+ - event: patchset-created
+ success:
+ gerrit:
+ Verified: 1
+ failure:
+ gerrit:
+ Verified: -1
+
+- job:
+ name: base
+ parent: null
+ run: playbooks/job.yaml
+
+- project:
+ name: ^.*
+ check:
+ jobs:
+ - base
diff --git a/tests/fixtures/config/broken-multi-tenant/git/org_project1/README b/tests/fixtures/config/broken-multi-tenant/git/org_project1/README
new file mode 100644
index 000000000..9daeafb98
--- /dev/null
+++ b/tests/fixtures/config/broken-multi-tenant/git/org_project1/README
@@ -0,0 +1 @@
+test
diff --git a/tests/fixtures/config/broken-multi-tenant/git/org_project2/README b/tests/fixtures/config/broken-multi-tenant/git/org_project2/README
new file mode 100644
index 000000000..9daeafb98
--- /dev/null
+++ b/tests/fixtures/config/broken-multi-tenant/git/org_project2/README
@@ -0,0 +1 @@
+test
diff --git a/tests/fixtures/config/broken-multi-tenant/git/org_project2/zuul.yaml b/tests/fixtures/config/broken-multi-tenant/git/org_project2/zuul.yaml
new file mode 100644
index 000000000..40f68b640
--- /dev/null
+++ b/tests/fixtures/config/broken-multi-tenant/git/org_project2/zuul.yaml
@@ -0,0 +1,3 @@
+- job:
+ name: child-job
+ parent: parent-job
diff --git a/tests/fixtures/config/broken-multi-tenant/git/org_project3/README b/tests/fixtures/config/broken-multi-tenant/git/org_project3/README
new file mode 100644
index 000000000..9daeafb98
--- /dev/null
+++ b/tests/fixtures/config/broken-multi-tenant/git/org_project3/README
@@ -0,0 +1 @@
+test
diff --git a/tests/fixtures/config/broken-multi-tenant/git/org_project3/zuul.yaml b/tests/fixtures/config/broken-multi-tenant/git/org_project3/zuul.yaml
new file mode 100644
index 000000000..beef1faa0
--- /dev/null
+++ b/tests/fixtures/config/broken-multi-tenant/git/org_project3/zuul.yaml
@@ -0,0 +1,2 @@
+- job:
+ name: parent-job
diff --git a/tests/fixtures/config/broken-multi-tenant/main.yaml b/tests/fixtures/config/broken-multi-tenant/main.yaml
new file mode 100644
index 000000000..053056e9f
--- /dev/null
+++ b/tests/fixtures/config/broken-multi-tenant/main.yaml
@@ -0,0 +1,19 @@
+- tenant:
+ name: tenant-one
+ source:
+ gerrit:
+ config-projects:
+ - common-config
+ untrusted-projects:
+ - org/project2
+ - org/project3
+
+- tenant:
+ name: tenant-two
+ source:
+ gerrit:
+ config-projects:
+ - common-config
+ untrusted-projects:
+ - org/project1
+ - org/project2
diff --git a/tests/unit/test_v3.py b/tests/unit/test_v3.py
index 7753b9633..a80675544 100644
--- a/tests/unit/test_v3.py
+++ b/tests/unit/test_v3.py
@@ -3358,6 +3358,80 @@ class TestBrokenConfig(ZuulTestCase):
"A should have failed the check pipeline")
+class TestBrokenMultiTenantConfig(ZuulTestCase):
+ # Test we can deal with a broken multi-tenant config
+
+ tenant_config_file = 'config/broken-multi-tenant/main.yaml'
+
+ def test_loading_errors(self):
+ # This regression test came about when we discovered the following:
+
+ # * We cache configuration objects if they load without error
+ # in their first tenant; that means that they can show up as
+ # errors in later tenants, but as long as those other
+ # tenants aren't proposing changes to that repo (which is
+ # unlikely in this situation; this usually arises if the
+ # tenant just wants to use some foreign jobs), users won't
+ # be blocked by the error.
+ #
+ # * If a merge job for a dynamic config change arrives out of
+ # order, we will build the new configuration and if there
+ # are errors, we will compare it to the previous
+ # configuration to determine if they are relevant, but that
+ # caused an error since the previous layout had not been
+ # calculated yet. It's pretty hard to end up with
+ # irrelevant errors except by virtue of the first point
+ # above, which is why this test relies on a second tenant.
+
+ # This test has two tenants. The first loads project2, and
+ # project3 without errors and all config objects are cached.
+ # The second tenant loads only project1 and project2.
+ # Project2 references a job that is defined in project3, so
+ # the tenant loads with an error, but proceeds.
+
+ # Don't run any merge jobs, so we can run them out of order.
+ self.gearman_server.hold_merge_jobs_in_queue = True
+
+ # Create a first change which modifies the config (and
+ # therefore will require a merge job).
+ in_repo_conf = textwrap.dedent(
+ """
+ - job: {'name': 'foo'}
+ """)
+ file_dict = {'.zuul.yaml': in_repo_conf}
+ A = self.fake_gerrit.addFakeChange('org/project1', 'master', 'A',
+ files=file_dict)
+
+ # Create a second change which also modifies the config.
+ B = self.fake_gerrit.addFakeChange('org/project1', 'master', 'B',
+ files=file_dict)
+ B.setDependsOn(A, 1)
+ self.fake_gerrit.addEvent(B.getPatchsetCreatedEvent(1))
+ self.waitUntilSettled()
+
+ # There should be a merge job for each change.
+ self.assertEqual(len(self.scheds.first.sched.merger.jobs), 2)
+
+ jobs = [job for job in self.gearman_server.getQueue()
+ if job.name.startswith(b'merger:')]
+ # Release the second merge job.
+ jobs[-1].waiting = False
+ self.gearman_server.wakeConnections()
+ self.waitUntilSettled()
+
+ # At this point we should still be waiting on the first
+ # change's merge job.
+ self.assertHistory([])
+
+ # Proceed.
+ self.gearman_server.hold_merge_jobs_in_queue = False
+ self.gearman_server.release()
+ self.waitUntilSettled()
+ self.assertHistory([
+ dict(name='base', result='SUCCESS', changes='1,1 2,1'),
+ ])
+
+
class TestProjectKeys(ZuulTestCase):
# Test that we can generate project keys
diff --git a/zuul/manager/__init__.py b/zuul/manager/__init__.py
index 469d2c1b3..ee0f2c241 100644
--- a/zuul/manager/__init__.py
+++ b/zuul/manager/__init__.py
@@ -563,19 +563,6 @@ class PipelineManager(metaclass=ABCMeta):
zuul_event_id=None)
untrusted_errors = len(untrusted_layout.loading_errors) > 0
- # TODO (jeblair): remove this section of extra verbose
- # debug logging when we have resolved the loading_errors
- # bug.
- log.debug("Dynamic layout: trusted errors: %s layout: %s",
- trusted_errors, trusted_layout)
- if trusted_layout:
- for err in trusted_layout.loading_errors.errors[:10]:
- log.debug(err.error)
- log.debug("Dynamic layout: untrusted errors: %s layout: %s",
- untrusted_errors, untrusted_layout)
- if untrusted_layout:
- for err in untrusted_layout.loading_errors.errors[:10]:
- log.debug(err.error)
# Configuration state handling switchboard. Intentionally verbose
# and repetetive to be exceptionally clear that we handle all
# possible cases correctly. Note we never return trusted_layout
@@ -666,6 +653,9 @@ class PipelineManager(metaclass=ABCMeta):
def getLayout(self, item):
if item.item_ahead:
fallback_layout = item.item_ahead.layout
+ if fallback_layout is None:
+ # We're probably waiting on a merge job for the item ahead.
+ return None
else:
fallback_layout = item.pipeline.tenant.layout
if not item.change.updatesConfig(item.pipeline.tenant):