diff options
author | James E. Blair <jeblair@hp.com> | 2015-07-17 16:20:21 -0700 |
---|---|---|
committer | James E. Blair <jeblair@hp.com> | 2015-07-17 18:06:50 -0700 |
commit | 6bc782de75be6720aa31843e64f4cb57bfe01de9 (patch) | |
tree | 265b63b340cbfc0f0ab597e933efa30114b36fbe | |
parent | 879dafb5b910b4c8e9fc37895495b4ddf2d58ef5 (diff) | |
download | zuul-6bc782de75be6720aa31843e64f4cb57bfe01de9.tar.gz |
Handle adding a job to a failing change during reconfig
During live reconfiguration, if an item was either in a merge
conflict state, or if it had jobs marked as skipped because of
a failing superior job in a job tree, any new jobs added to that
item during the reconfiguration would not have their state
updated to 'skipped'. This would cause the item to stay in the
queue indefinitely.
Correct this by performing again the state updates that set build
statuses to 'skipped' after an item is re-enqueued.
Change-Id: I96195cb9996b01075e3705b6cd89a9863528898e
-rw-r--r-- | tests/fixtures/layout-live-reconfiguration-add-job.yaml | 38 | ||||
-rwxr-xr-x | tests/test_scheduler.py | 121 | ||||
-rw-r--r-- | zuul/scheduler.py | 12 |
3 files changed, 171 insertions, 0 deletions
diff --git a/tests/fixtures/layout-live-reconfiguration-add-job.yaml b/tests/fixtures/layout-live-reconfiguration-add-job.yaml new file mode 100644 index 000000000..e4aea6fd0 --- /dev/null +++ b/tests/fixtures/layout-live-reconfiguration-add-job.yaml @@ -0,0 +1,38 @@ +pipelines: + - name: gate + manager: DependentPipelineManager + failure-message: Build failed. For information on how to proceed, see http://wiki.example.org/Test_Failures + trigger: + gerrit: + - event: comment-added + approval: + - approved: 1 + success: + gerrit: + verified: 2 + submit: true + failure: + gerrit: + verified: -2 + start: + gerrit: + verified: 0 + precedence: high + +jobs: + - name: ^.*-merge$ + failure-message: Unable to merge change + hold-following-changes: true + - name: project-testfile + files: + - '.*-requires' + +projects: + - name: org/project + merge-mode: cherry-pick + gate: + - project-merge: + - project-test1 + - project-test2 + - project-test3 + - project-testfile diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 32d515df5..5b9a39dc9 100755 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -2265,6 +2265,127 @@ class TestScheduler(ZuulTestCase): self.assertEqual(A.data['status'], 'MERGED') self.assertEqual(A.reported, 2) + def test_live_reconfiguration_merge_conflict(self): + # A real-world bug: a change in a gate queue has a merge + # conflict and a job is added to its project while it's + # sitting in the queue. The job gets added to the change and + # enqueued and the change gets stuck. + self.worker.registerFunction('build:project-test3') + self.worker.hold_jobs_in_build = True + + # This change is fine. It's here to stop the queue long + # enough for the next change to be subject to the + # reconfiguration, as well as to provide a conflict for the + # next change. This change will succeed and merge. + A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A') + A.addPatchset(['conflict']) + A.addApproval('CRVW', 2) + self.fake_gerrit.addEvent(A.addApproval('APRV', 1)) + + # This change will be in merge conflict. During the + # reconfiguration, we will add a job. We want to make sure + # that doesn't cause it to get stuck. + B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B') + B.addPatchset(['conflict']) + B.addApproval('CRVW', 2) + self.fake_gerrit.addEvent(B.addApproval('APRV', 1)) + + self.waitUntilSettled() + + # No jobs have run yet + self.assertEqual(A.data['status'], 'NEW') + self.assertEqual(A.reported, 1) + self.assertEqual(B.data['status'], 'NEW') + self.assertEqual(B.reported, 1) + self.assertEqual(len(self.history), 0) + + # Add the "project-test3" job. + self.config.set('zuul', 'layout_config', + 'tests/fixtures/layout-live-' + 'reconfiguration-add-job.yaml') + self.sched.reconfigure(self.config) + self.waitUntilSettled() + + self.worker.hold_jobs_in_build = False + self.worker.release() + self.waitUntilSettled() + + self.assertEqual(A.data['status'], 'MERGED') + self.assertEqual(A.reported, 2) + self.assertEqual(B.data['status'], 'NEW') + self.assertEqual(B.reported, 2) + self.assertEqual(self.getJobFromHistory('project-merge').result, + 'SUCCESS') + self.assertEqual(self.getJobFromHistory('project-test1').result, + 'SUCCESS') + self.assertEqual(self.getJobFromHistory('project-test2').result, + 'SUCCESS') + self.assertEqual(self.getJobFromHistory('project-test3').result, + 'SUCCESS') + self.assertEqual(len(self.history), 4) + + def test_live_reconfiguration_failed_job(self): + # An extrapolation of test_live_reconfiguration_merge_conflict + # that tests a job added to a job tree with a failed root does + # not run. + self.worker.registerFunction('build:project-test3') + self.worker.hold_jobs_in_build = True + + # This change is fine. It's here to stop the queue long + # enough for the next change to be subject to the + # reconfiguration. This change will succeed and merge. + A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A') + A.addPatchset(['conflict']) + A.addApproval('CRVW', 2) + self.fake_gerrit.addEvent(A.addApproval('APRV', 1)) + self.waitUntilSettled() + self.worker.release('.*-merge') + self.waitUntilSettled() + + B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B') + self.worker.addFailTest('project-merge', B) + B.addApproval('CRVW', 2) + self.fake_gerrit.addEvent(B.addApproval('APRV', 1)) + self.waitUntilSettled() + + self.worker.release('.*-merge') + self.waitUntilSettled() + + # Both -merge jobs have run, but no others. + self.assertEqual(A.data['status'], 'NEW') + self.assertEqual(A.reported, 1) + self.assertEqual(B.data['status'], 'NEW') + self.assertEqual(B.reported, 1) + self.assertEqual(self.history[0].result, 'SUCCESS') + self.assertEqual(self.history[0].name, 'project-merge') + self.assertEqual(self.history[1].result, 'FAILURE') + self.assertEqual(self.history[1].name, 'project-merge') + self.assertEqual(len(self.history), 2) + + # Add the "project-test3" job. + self.config.set('zuul', 'layout_config', + 'tests/fixtures/layout-live-' + 'reconfiguration-add-job.yaml') + self.sched.reconfigure(self.config) + self.waitUntilSettled() + + self.worker.hold_jobs_in_build = False + self.worker.release() + self.waitUntilSettled() + + self.assertEqual(A.data['status'], 'MERGED') + self.assertEqual(A.reported, 2) + self.assertEqual(B.data['status'], 'NEW') + self.assertEqual(B.reported, 2) + self.assertEqual(self.history[0].result, 'SUCCESS') + self.assertEqual(self.history[0].name, 'project-merge') + self.assertEqual(self.history[1].result, 'FAILURE') + self.assertEqual(self.history[1].name, 'project-merge') + self.assertEqual(self.history[2].result, 'SUCCESS') + self.assertEqual(self.history[3].result, 'SUCCESS') + self.assertEqual(self.history[4].result, 'SUCCESS') + self.assertEqual(len(self.history), 5) + def test_live_reconfiguration_functions(self): "Test live reconfiguration with a custom function" self.worker.registerFunction('build:node-project-test1:debian') diff --git a/zuul/scheduler.py b/zuul/scheduler.py index 9620036a1..58e8a96d7 100644 --- a/zuul/scheduler.py +++ b/zuul/scheduler.py @@ -1176,6 +1176,18 @@ class BasePipelineManager(object): self.log.debug("Re-enqueing change %s in queue %s" % (item.change, change_queue)) change_queue.enqueueItem(item) + + # Re-set build results in case any new jobs have been + # added to the tree. + for build in item.current_build_set.getBuilds(): + if build.result: + self.pipeline.setResult(item, build) + # Similarly, reset the item state. + if item.current_build_set.unable_to_merge: + self.pipeline.setUnableToMerge(item) + if item.dequeued_needing_change: + self.pipeline.setDequeuedNeedingChange(item) + self.reportStats(item) return True else: |