diff options
-rw-r--r-- | doc/source/config/pipeline.rst | 13 | ||||
-rw-r--r-- | playbooks/zuul-stream/functional.yaml | 5 | ||||
-rw-r--r-- | releasenotes/notes/config-error-reporter-34887223d91544d1.yaml | 6 | ||||
-rw-r--r-- | tests/fixtures/layouts/freeze-job-failure.yaml | 32 | ||||
-rw-r--r-- | tests/fixtures/layouts/timer-freeze-job-failure.yaml | 26 | ||||
-rw-r--r-- | tests/unit/test_scheduler.py | 32 | ||||
-rw-r--r-- | zuul/ansible/base/callback/zuul_stream.py | 33 | ||||
-rw-r--r-- | zuul/configloader.py | 7 | ||||
-rw-r--r-- | zuul/driver/gerrit/gerritreporter.py | 3 | ||||
-rw-r--r-- | zuul/manager/__init__.py | 12 | ||||
-rw-r--r-- | zuul/reporter/__init__.py | 8 |
11 files changed, 149 insertions, 28 deletions
diff --git a/doc/source/config/pipeline.rst b/doc/source/config/pipeline.rst index f1c294775..f4d7cce69 100644 --- a/doc/source/config/pipeline.rst +++ b/doc/source/config/pipeline.rst @@ -332,9 +332,16 @@ success, the pipeline reports back to Gerrit with ``Verified`` vote of .. attr:: merge-conflict These reporters describe what Zuul should do if it is unable to - merge in the patchset. If no merge-conflict reporters are listed - then the ``failure`` reporters will be used to notify of - unsuccessful merges. + merge the patchset into the current state of the target + branch. If no merge-conflict reporters are listed then the + ``failure`` reporters will be used. + + .. attr:: config-error + + These reporters describe what Zuul should do if it encounters a + configuration error while trying to enqueue the item. If no + config-error reporters are listed then the ``failure`` reporters + will be used. .. attr:: enqueue diff --git a/playbooks/zuul-stream/functional.yaml b/playbooks/zuul-stream/functional.yaml index b8a44a87c..cf60d2cf6 100644 --- a/playbooks/zuul-stream/functional.yaml +++ b/playbooks/zuul-stream/functional.yaml @@ -31,11 +31,6 @@ mv job-output.txt job-output-success-19887.txt mv job-output.json job-output-success-19887.json - - name: Check protocol version - assert: - that: - - "'[node1] Reports streaming version: 1' in _success_output.stdout" - # Streamer puts out a line like # [node1] Starting to log 916b2084-4bbb-80e5-248e-000000000016-1-node1 for task TASK: Print binary data # One of the tasks in job-output shows find: results; diff --git a/releasenotes/notes/config-error-reporter-34887223d91544d1.yaml b/releasenotes/notes/config-error-reporter-34887223d91544d1.yaml new file mode 100644 index 000000000..51690f2fa --- /dev/null +++ b/releasenotes/notes/config-error-reporter-34887223d91544d1.yaml @@ -0,0 +1,6 @@ +--- +features: + - | + A new :attr:`pipeline.config-error` pipeline reporter is available + for customizing reporter actions related to Zuul configuration + errors. diff --git a/tests/fixtures/layouts/freeze-job-failure.yaml b/tests/fixtures/layouts/freeze-job-failure.yaml new file mode 100644 index 000000000..ae3f48324 --- /dev/null +++ b/tests/fixtures/layouts/freeze-job-failure.yaml @@ -0,0 +1,32 @@ +- pipeline: + name: check + manager: independent + trigger: + gerrit: + - event: patchset-created + success: + gerrit: + Verified: 1 + failure: + gerrit: + Verified: -1 + +- job: + name: base + parent: null + run: playbooks/base.yaml + +- job: + name: project-test1 + run: playbooks/project-test1.yaml + +- job: + name: project-test2 + run: playbooks/project-test2.yaml + +- project: + name: org/project + check: + jobs: + - project-test2: + dependencies: project-test1 diff --git a/tests/fixtures/layouts/timer-freeze-job-failure.yaml b/tests/fixtures/layouts/timer-freeze-job-failure.yaml new file mode 100644 index 000000000..2e6d709bb --- /dev/null +++ b/tests/fixtures/layouts/timer-freeze-job-failure.yaml @@ -0,0 +1,26 @@ +- pipeline: + name: periodic + manager: independent + trigger: + timer: + - time: '* * * * * */1' + +- job: + name: base + parent: null + run: playbooks/base.yaml + +- job: + name: project-test1 + run: playbooks/project-test1.yaml + +- job: + name: project-test2 + run: playbooks/project-test2.yaml + +- project: + name: org/project + periodic: + jobs: + - project-test2: + dependencies: project-test1 diff --git a/tests/unit/test_scheduler.py b/tests/unit/test_scheduler.py index 66c508fea..978bc00a4 100644 --- a/tests/unit/test_scheduler.py +++ b/tests/unit/test_scheduler.py @@ -5343,6 +5343,11 @@ For CI problems and help debugging, contact ci@example.org""" self.assertIn('Error merging gerrit/org/project', B.messages[0]) self.assertNotIn('logs.example.com', B.messages[0]) self.assertNotIn('SKIPPED', B.messages[0]) + buildsets = list( + self.scheds.first.connections.connections[ + 'database'].getBuildsets()) + self.assertEqual(buildsets[0].result, 'MERGE_CONFLICT') + self.assertIn('This change or one of', buildsets[0].message) def test_submit_failure(self): A = self.fake_gerrit.addFakeChange('org/project1', 'master', 'A') @@ -5357,6 +5362,33 @@ For CI problems and help debugging, contact ci@example.org""" 'database'].getBuildsets()) self.assertEqual(buildsets[0].result, 'MERGE_FAILURE') + @simple_layout('layouts/timer-freeze-job-failure.yaml') + def test_periodic_freeze_job_failure(self): + self.waitUntilSettled() + + for x in iterate_timeout(30, 'buildset complete'): + buildsets = list( + self.scheds.first.connections.connections[ + 'database'].getBuildsets()) + if buildsets: + break + self.assertEqual(buildsets[0].result, 'CONFIG_ERROR') + self.assertIn('Job project-test2 depends on project-test1 ' + 'which was not run', buildsets[0].message) + + @simple_layout('layouts/freeze-job-failure.yaml') + def test_freeze_job_failure(self): + A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A') + self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1)) + self.waitUntilSettled() + + buildsets = list( + self.scheds.first.connections.connections[ + 'database'].getBuildsets()) + self.assertEqual(buildsets[0].result, 'CONFIG_ERROR') + self.assertIn('Job project-test2 depends on project-test1 ' + 'which was not run', buildsets[0].message) + @simple_layout('layouts/nonvoting-pipeline.yaml') def test_nonvoting_pipeline(self): "Test that a nonvoting pipeline (experimental) can still report" diff --git a/zuul/ansible/base/callback/zuul_stream.py b/zuul/ansible/base/callback/zuul_stream.py index f31983ed6..39d3aa953 100644 --- a/zuul/ansible/base/callback/zuul_stream.py +++ b/zuul/ansible/base/callback/zuul_stream.py @@ -121,6 +121,21 @@ class CallbackModule(default.CallbackModule): self._logger = logging.getLogger('zuul.executor.ansible') def _log(self, msg, ts=None, job=True, executor=False, debug=False): + # With the default "linear" strategy (and likely others), + # Ansible will send the on_task_start callback, and then fork + # a worker process to execute that task. Since we spawn a + # thread in the on_task_start callback, we can end up emitting + # a log message in this method while Ansible is forking. If a + # forked process inherits a Python file object (i.e., stdout) + # that is locked by a thread that doesn't exist in the fork + # (i.e., this one), it can deadlock when trying to flush the + # file object. To minimize the chances of that happening, we + # should avoid using _display outside the main thread. + # Therefore: + + # Do not set executor=True from any calls from a thread + # spawned in this callback. + msg = msg.rstrip() if job: now = ts or datetime.datetime.now() @@ -143,10 +158,6 @@ class CallbackModule(default.CallbackModule): s.settimeout(None) return s except socket.timeout: - self._log( - "Timeout exception waiting for the logger. " - "Please check connectivity to [%s:%s]" - % (ip, port), executor=True) self._log_streamline( "localhost", "Timeout exception waiting for the logger. " @@ -155,16 +166,12 @@ class CallbackModule(default.CallbackModule): return None except Exception: if logger_retries % 10 == 0: - self._log("[%s] Waiting on logger" % host, - executor=True, debug=True) + self._log("[%s] Waiting on logger" % host) logger_retries += 1 time.sleep(0.1) continue def _read_log(self, host, ip, port, log_id, task_name, hosts): - self._log("[%s] Starting to log %s for task %s" - % (host, log_id, task_name), job=False, executor=True) - s = self._read_log_connect(host, ip, port) if s is None: # Can't connect; _read_log_connect() already logged an @@ -188,9 +195,6 @@ class CallbackModule(default.CallbackModule): return else: self._zuul_console_version = int(buff) - self._log('[%s] Reports streaming version: %d' % - (host, self._zuul_console_version), - job=False, executor=True) if self._zuul_console_version >= 1: msg = 's:%s\n' % log_id @@ -349,6 +353,9 @@ class CallbackModule(default.CallbackModule): log_id = "%s-%s-%s" % ( self._task._uuid, count, log_host) + self._log("[%s] Starting to log %s for task %s" + % (host, log_id, task_name), + job=False, executor=True) streamer = threading.Thread( target=self._read_log, args=( host, ip, port, log_id, task_name, hosts)) @@ -369,7 +376,7 @@ class CallbackModule(default.CallbackModule): streamer.join(30) if streamer.is_alive(): msg = "[Zuul] Log Stream did not terminate" - self._log(msg, job=True, executor=True) + self._log(msg) self._streamers_stop = False def _process_result_for_localhost(self, result, is_task=True): diff --git a/zuul/configloader.py b/zuul/configloader.py index eb468518f..365967d56 100644 --- a/zuul/configloader.py +++ b/zuul/configloader.py @@ -1176,6 +1176,7 @@ class PipelineParser(object): 'success': 'success_actions', 'failure': 'failure_actions', 'merge-conflict': 'merge_conflict_actions', + 'config-error': 'config_error_actions', 'no-jobs': 'no_jobs_actions', 'disabled': 'disabled_actions', 'dequeue': 'dequeue_actions', @@ -1250,7 +1251,7 @@ class PipelineParser(object): pipeline['trigger'] = vs.Required(self.getDriverSchema('trigger')) for action in ['enqueue', 'start', 'success', 'failure', 'merge-conflict', 'merge-failure', 'no-jobs', - 'disabled', 'dequeue']: + 'disabled', 'dequeue', 'config-error']: pipeline[action] = self.getDriverSchema('reporter') return vs.Schema(pipeline) @@ -1318,6 +1319,10 @@ class PipelineParser(object): if not pipeline.merge_conflict_actions: pipeline.merge_conflict_actions = pipeline.failure_actions + # If config-error actions aren't explicit, use the failure actions + if not pipeline.config_error_actions: + pipeline.config_error_actions = pipeline.failure_actions + pipeline.disable_at = conf.get( 'disable-after-consecutive-failures', None) diff --git a/zuul/driver/gerrit/gerritreporter.py b/zuul/driver/gerrit/gerritreporter.py index b99133dce..c38a9484a 100644 --- a/zuul/driver/gerrit/gerritreporter.py +++ b/zuul/driver/gerrit/gerritreporter.py @@ -36,6 +36,9 @@ class GerritReporter(BaseReporter): self._checks_api = action.pop('checks-api', None) self._labels = action + def __repr__(self): + return f"<GerritReporter: {self._action}>" + def report(self, item, phase1=True, phase2=True): """Send a message to gerrit.""" log = get_annotated_logger(self.log, item.event) diff --git a/zuul/manager/__init__.py b/zuul/manager/__init__.py index 31c4b54d7..637d31d9f 100644 --- a/zuul/manager/__init__.py +++ b/zuul/manager/__init__.py @@ -322,9 +322,10 @@ class PipelineManager(metaclass=ABCMeta): (item, ret)) def reportNormalBuildsetEnd(self, build_set, action, final, result=None): - # Report a buildset end, but only if there are jobs - if (build_set.job_graph and - len(build_set.job_graph.jobs) > 0): + # Report a buildset end if there are jobs or errors + if ((build_set.job_graph and len(build_set.job_graph.jobs) > 0) or + build_set.config_errors or + build_set.unable_to_merge): self.sql.reportBuildsetEnd(build_set, action, final, result) @@ -2032,9 +2033,8 @@ class PipelineManager(metaclass=ABCMeta): item.setReportedResult('NO_JOBS') elif item.getConfigErrors(): log.debug("Invalid config for change %s", item.change) - # TODOv3(jeblair): consider a new reporter action for this - action = 'merge-conflict' - actions = self.pipeline.merge_conflict_actions + action = 'config-error' + actions = self.pipeline.config_error_actions item.setReportedResult('CONFIG_ERROR') elif item.didMergerFail(): log.debug("Merge conflict") diff --git a/zuul/reporter/__init__.py b/zuul/reporter/__init__.py index 9cd60bff6..5af48abc6 100644 --- a/zuul/reporter/__init__.py +++ b/zuul/reporter/__init__.py @@ -135,6 +135,7 @@ class BaseReporter(object, metaclass=abc.ABCMeta): 'failure': self._formatItemReportFailure, 'merge-conflict': self._formatItemReportMergeConflict, 'merge-failure': self._formatItemReportMergeFailure, + 'config-error': self._formatItemReportConfigError, 'no-jobs': self._formatItemReportNoJobs, 'disabled': self._formatItemReportDisabled, 'dequeue': self._formatItemReportDequeue, @@ -226,6 +227,13 @@ class BaseReporter(object, metaclass=abc.ABCMeta): def _formatItemReportMergeFailure(self, item, with_jobs=True): return 'This change was not merged by the code review system.\n' + def _formatItemReportConfigError(self, item, with_jobs=True): + if item.getConfigErrors(): + msg = str(item.getConfigErrors()[0].error) + else: + msg = "Unknown configuration error" + return msg + def _formatItemReportNoJobs(self, item, with_jobs=True): status_url = get_default(self.connection.sched.config, 'web', 'status_url', '') |