diff options
author | James E. Blair <jeblair@redhat.com> | 2017-10-31 16:40:36 -0700 |
---|---|---|
committer | James E. Blair <jeblair@redhat.com> | 2017-10-31 17:01:09 -0700 |
commit | 872738fc33c51749af4d364a39b1c3bf4906ab48 (patch) | |
tree | f14c5b477ac687098094bf8a7ec84a484ee94dc8 | |
parent | a1fa9aa1c624f302a06a91c3bcb2e608ccc93604 (diff) | |
download | zuul-872738fc33c51749af4d364a39b1c3bf4906ab48.tar.gz |
On reconfiguration, re-enqueue items at the same position
Upon reconfiguration, we currently re-enqueue every item back
into its pipeline, in case a difference in the configuration would
change what should occur. In the simple case, this is fine, but
if a dependent pipeline has a branching dependency structure due
to already failing jobs, we would erroneously re-order the changes
back into a linear arrangement. The next pass through the pipeline
manager would move those items back to where they should be, but
in doing so, would reset their builds.
To correct this, after re-enqueueing a change, if the change that
was previously ahead of it in the pipeline was also successfully
re-enqueued, immediately move the change behind it (or if the
change ahead was "None" meaning it was its own head, move it behind
no change). This should have the effect of putting changes back
where they were in relation to other failing changes. If the change
ahead was not successfully re-enqueued, the current behavior of
simply putting it behind the nearest change in the queue is preserved.
If anything more complex happens, any errors will be corrected on the
next pass through the pipeline manager.
Change-Id: Ie3771d9bbbc1ca77425cf62751d8e5f70ba1f14c
-rw-r--r-- | tests/fixtures/layouts/reconfigure-failed-head.yaml | 56 | ||||
-rwxr-xr-x | tests/unit/test_scheduler.py | 64 | ||||
-rw-r--r-- | zuul/manager/__init__.py | 8 | ||||
-rw-r--r-- | zuul/scheduler.py | 11 |
4 files changed, 137 insertions, 2 deletions
diff --git a/tests/fixtures/layouts/reconfigure-failed-head.yaml b/tests/fixtures/layouts/reconfigure-failed-head.yaml new file mode 100644 index 000000000..3347c9b82 --- /dev/null +++ b/tests/fixtures/layouts/reconfigure-failed-head.yaml @@ -0,0 +1,56 @@ +- pipeline: + name: check + manager: independent + trigger: + gerrit: + - event: patchset-created + success: + gerrit: + Verified: 1 + failure: + gerrit: + Verified: -1 + +- pipeline: + name: gate + manager: dependent + success-message: Build succeeded (gate). + trigger: + gerrit: + - event: comment-added + approval: + - Approved: 1 + success: + gerrit: + Verified: 2 + submit: true + failure: + gerrit: + Verified: -2 + start: + gerrit: + Verified: 0 + precedence: high + +- job: + name: base + parent: null + +- job: + name: job1 + run: playbooks/job1.yaml + +- job: + name: job2 + run: playbooks/job2.yaml + +- project: + name: org/project + check: + jobs: + - job1 + - job2 + gate: + jobs: + - job1 + - job2 diff --git a/tests/unit/test_scheduler.py b/tests/unit/test_scheduler.py index b0df2b2df..53a20ffe2 100755 --- a/tests/unit/test_scheduler.py +++ b/tests/unit/test_scheduler.py @@ -3034,6 +3034,70 @@ class TestScheduler(ZuulTestCase): self.assertEqual(len(tenant.layout.pipelines['check'].queues), 0) self.assertIn('Build succeeded', A.messages[0]) + @simple_layout("layouts/reconfigure-failed-head.yaml") + def test_live_reconfiguration_failed_change_at_head(self): + # Test that if we reconfigure with a failed change at head, + # that the change behind it isn't reparented onto it. + + self.executor_server.hold_jobs_in_build = True + A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A') + A.addApproval('Code-Review', 2) + B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B') + B.addApproval('Code-Review', 2) + + self.executor_server.failJob('job1', A) + + self.fake_gerrit.addEvent(A.addApproval('Approved', 1)) + self.fake_gerrit.addEvent(B.addApproval('Approved', 1)) + + self.waitUntilSettled() + + self.assertBuilds([ + dict(name='job1', changes='1,1'), + dict(name='job2', changes='1,1'), + dict(name='job1', changes='1,1 2,1'), + dict(name='job2', changes='1,1 2,1'), + ]) + + self.release(self.builds[0]) + self.waitUntilSettled() + + self.assertBuilds([ + dict(name='job2', changes='1,1'), + dict(name='job1', changes='2,1'), + dict(name='job2', changes='2,1'), + ]) + + # Unordered history comparison because the aborts can finish + # in any order. + self.assertHistory([ + dict(name='job1', result='FAILURE', changes='1,1'), + dict(name='job1', result='ABORTED', changes='1,1 2,1'), + dict(name='job2', result='ABORTED', changes='1,1 2,1'), + ], ordered=False) + + self.sched.reconfigure(self.config) + self.waitUntilSettled() + + self.executor_server.hold_jobs_in_build = False + self.executor_server.release() + self.waitUntilSettled() + + self.assertBuilds([]) + + self.assertHistory([ + dict(name='job1', result='FAILURE', changes='1,1'), + dict(name='job1', result='ABORTED', changes='1,1 2,1'), + dict(name='job2', result='ABORTED', changes='1,1 2,1'), + dict(name='job2', result='SUCCESS', changes='1,1'), + dict(name='job1', result='SUCCESS', changes='2,1'), + dict(name='job2', result='SUCCESS', changes='2,1'), + ], ordered=False) + self.assertEqual(A.data['status'], 'NEW') + self.assertEqual(B.data['status'], 'MERGED') + self.assertEqual(A.reported, 2) + self.assertEqual(B.reported, 2) + def test_delayed_repo_init(self): self.init_repo("org/new-project") files = {'README': ''} diff --git a/zuul/manager/__init__.py b/zuul/manager/__init__.py index 9c30d7475..6c72c2d89 100644 --- a/zuul/manager/__init__.py +++ b/zuul/manager/__init__.py @@ -223,13 +223,19 @@ class PipelineManager(object): if item.change.equals(change): self.removeItem(item) - def reEnqueueItem(self, item, last_head): + def reEnqueueItem(self, item, last_head, old_item_ahead, item_ahead_valid): with self.getChangeQueue(item.change, last_head.queue) as change_queue: if change_queue: self.log.debug("Re-enqueing change %s in queue %s" % (item.change, change_queue)) change_queue.enqueueItem(item) + # If the old item ahead was re-enqued, this value will + # be true, so we should attempt to move the item back + # to where it was in case an item ahead is already + # failing. + if item_ahead_valid: + change_queue.moveItem(item, old_item_ahead) # Get an updated copy of the layout and update the job # graph if necessary. This resumes the buildset merge # state machine. If we have an up-to-date layout, it diff --git a/zuul/scheduler.py b/zuul/scheduler.py index 73fa923a5..a725fcd68 100644 --- a/zuul/scheduler.py +++ b/zuul/scheduler.py @@ -614,13 +614,22 @@ class Scheduler(threading.Thread): item.queue = None item.change.project = self._reenqueueGetProject( tenant, item) + # If the old item ahead made it in, re-enqueue + # this one behind it. + if item.item_ahead in items_to_remove: + old_item_ahead = None + item_ahead_valid = False + else: + old_item_ahead = item.item_ahead + item_ahead_valid = True item.item_ahead = None item.items_behind = [] reenqueued = False if item.change.project: try: reenqueued = new_pipeline.manager.reEnqueueItem( - item, last_head) + item, last_head, old_item_ahead, + item_ahead_valid=item_ahead_valid) except Exception: self.log.exception( "Exception while re-enqueing item %s", |