summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames E. Blair <jim@acmegating.com>2022-09-03 14:03:36 -0700
committerJames E. Blair <jim@acmegating.com>2022-09-08 13:01:28 -0700
commit1958bbad03d4c2eb59502df8682bdfcb248400c2 (patch)
treeffb6bffdbd41ff93eea1a732aff536db906b2058
parent64fa9db5799d33838267183f5c6734d4ba5ee7c9 (diff)
downloadzuul-1958bbad03d4c2eb59502df8682bdfcb248400c2.tar.gz
Add nodeset alternatives
This feature instructs Zuul to attempt a second or more node request with a different node configuration (ie, possibly different labels) if the first one fails. It is intended to address the case where a cloud provider is unable to supply specialized high-performance nodes, and the user would like the job to proceed anyway on lower-performance nodes. Change-Id: Idede4244eaa3b21a34c20099214fda6ecdc992df
-rw-r--r--doc/source/config/nodeset.rst56
-rw-r--r--doc/source/developer/model-changelog.rst7
-rw-r--r--releasenotes/notes/nodeset-alternatives-7cc39a69ac4f1481.yaml7
-rw-r--r--tests/fixtures/layouts/nodeset-alternatives.yaml14
-rw-r--r--tests/fixtures/layouts/nodeset-fallback.yaml66
-rw-r--r--tests/unit/test_model_upgrade.py38
-rw-r--r--tests/unit/test_scheduler.py45
-rw-r--r--tests/unit/test_v3.py63
-rw-r--r--tests/unit/test_web.py82
-rw-r--r--web/src/containers/job/JobVariant.jsx13
-rw-r--r--zuul/configloader.py46
-rw-r--r--zuul/manager/__init__.py72
-rw-r--r--zuul/model.py132
-rw-r--r--zuul/model_api.py2
14 files changed, 587 insertions, 56 deletions
diff --git a/doc/source/config/nodeset.rst b/doc/source/config/nodeset.rst
index 84c413c9b..7c1ebbbd2 100644
--- a/doc/source/config/nodeset.rst
+++ b/doc/source/config/nodeset.rst
@@ -40,6 +40,39 @@ branch will not immediately produce a configuration error.
nodes:
- web
+Nodesets may also be used to express that Zuul should use the first of
+multiple alternative node configurations to run a job. When a Nodeset
+specifies a list of :attr:`nodeset.alternatives`, Zuul will request the
+first Nodeset in the series, and if allocation fails for any reason,
+Zuul will re-attempt the request with the subsequent Nodeset and so
+on. The first Nodeset which is sucessfully supplied by Nodepool will
+be used to run the job. An example of such a configuration follows.
+
+.. code-block:: yaml
+
+ - nodeset:
+ name: fast-nodeset
+ nodes:
+ - label: fast-label
+ name: controller
+
+ - nodeset:
+ name: slow-nodeset
+ nodes:
+ - label: slow-label
+ name: controller
+
+ - nodeset:
+ name: fast-or-slow
+ alternatives:
+ - fast-nodeset
+ - slow-nodeset
+
+In the above example, a job that requested the `fast-or-slow` nodeset
+would receive `fast-label` nodes if a provider was able to supply
+them, otherwise it would receive `slow-label` nodes. A Nodeset may
+specify nodes and groups, or alternative nodesets, but not both.
+
.. attr:: nodeset
A Nodeset requires two attributes:
@@ -54,7 +87,8 @@ branch will not immediately produce a configuration error.
definition, this attribute should be omitted.
.. attr:: nodes
- :required:
+
+ This attribute is required unless `alteranatives` is supplied.
A list of node definitions, each of which has the following format:
@@ -89,3 +123,23 @@ branch will not immediately produce a configuration error.
The nodes that shall be part of the group. This is specified as a list
of strings.
+ .. attr:: alternatives
+ :type: list
+
+ A list of alternative nodesets for which requests should be
+ attempted in series. The first request which succeeds will be
+ used for the job.
+
+ The items in the list may be either strings, in which case they
+ refer to other Nodesets within the layout, or they may be a
+ dictionary which is a nested anonymous Nodeset definition. The
+ two types (strings or nested definitions) may be mixed.
+
+ An alternative Nodeset definition may in turn refer to other
+ alternative nodeset definitions. In this case, the tree of
+ definitions will be flattened in a breadth-first manner to
+ create the ordered list of alternatives.
+
+ A Nodeset which specifies alternatives may not also specify
+ nodes or groups (this attribute is exclusive with
+ :attr:`nodeset.nodes` and :attr:`nodeset.groups`.
diff --git a/doc/source/developer/model-changelog.rst b/doc/source/developer/model-changelog.rst
index 0d4cb5077..d27ec8351 100644
--- a/doc/source/developer/model-changelog.rst
+++ b/doc/source/developer/model-changelog.rst
@@ -86,3 +86,10 @@ Version 8
:Prior Zuul version: 6.0.0
:Description: Deduplicates jobs in dependency cycles. Affects
schedulers only.
+
+Version 9
+---------
+
+:Prior Zuul version: 6.3.0
+:Description: Adds nodeset_alternatives and nodeset_index to frozen job.
+ Removes nodset from frozen job. Affects schedulers and executors.
diff --git a/releasenotes/notes/nodeset-alternatives-7cc39a69ac4f1481.yaml b/releasenotes/notes/nodeset-alternatives-7cc39a69ac4f1481.yaml
new file mode 100644
index 000000000..e4dc4274b
--- /dev/null
+++ b/releasenotes/notes/nodeset-alternatives-7cc39a69ac4f1481.yaml
@@ -0,0 +1,7 @@
+---
+features:
+ - |
+ Nodesets may now express an ordered list of alternatives so that
+ if Nodepool is unable to fulfill a request for certain labels, one
+ or more alternative Nodesets may be attempted instead. See
+ :attr:`nodeset.alternatives` for details.
diff --git a/tests/fixtures/layouts/nodeset-alternatives.yaml b/tests/fixtures/layouts/nodeset-alternatives.yaml
new file mode 100644
index 000000000..21f9f11ae
--- /dev/null
+++ b/tests/fixtures/layouts/nodeset-alternatives.yaml
@@ -0,0 +1,14 @@
+- nodeset:
+ name: fast-nodeset
+ nodes:
+ - name: controller
+ label: fast-label
+
+- job:
+ name: test-job
+ nodeset:
+ alternatives:
+ - fast-nodeset
+ - nodes:
+ - name: controller
+ label: slow-label
diff --git a/tests/fixtures/layouts/nodeset-fallback.yaml b/tests/fixtures/layouts/nodeset-fallback.yaml
new file mode 100644
index 000000000..01869537e
--- /dev/null
+++ b/tests/fixtures/layouts/nodeset-fallback.yaml
@@ -0,0 +1,66 @@
+- pipeline:
+ name: check
+ manager: independent
+ trigger:
+ gerrit:
+ - event: patchset-created
+ success:
+ gerrit:
+ Verified: 1
+ failure:
+ gerrit:
+ Verified: -1
+
+- nodeset:
+ name: fast-nodeset
+ nodes:
+ - label: fast-label
+ name: controller
+
+- nodeset:
+ name: red-nodeset
+ nodes:
+ - label: red-label
+ name: controller
+
+- nodeset:
+ name: blue-nodeset
+ nodes:
+ - label: blue-label
+ name: controller
+
+# This adds an unused second level of alternatives to verify we are
+# able to flatten it.
+- nodeset:
+ name: red-or-blue-nodeset
+ alternatives:
+ - red-nodeset
+ - blue-nodeset
+
+# Test alternatives by name or anonymous nodeset
+- nodeset:
+ name: fast-or-slow
+ alternatives:
+ - fast-nodeset
+ - nodes:
+ label: slow-label
+ name: controller
+ - red-or-blue-nodeset
+
+- job:
+ name: base
+ parent: null
+ run: playbooks/base.yaml
+
+- job:
+ name: check-job
+ nodeset: fast-or-slow
+
+- project:
+ name: org/project
+ check:
+ jobs:
+ - check-job
+ gate:
+ jobs:
+ - check-job
diff --git a/tests/unit/test_model_upgrade.py b/tests/unit/test_model_upgrade.py
index 2004b317b..020045859 100644
--- a/tests/unit/test_model_upgrade.py
+++ b/tests/unit/test_model_upgrade.py
@@ -215,6 +215,44 @@ class TestModelUpgrade(ZuulTestCase):
# code paths are exercised in existing tests since small secrets
# don't use the blob store.
+ @model_version(8)
+ def test_model_8_9(self):
+ # This excercises the upgrade to nodeset_alternates
+ first = self.scheds.first
+ second = self.createScheduler()
+ second.start()
+ self.assertEqual(len(self.scheds), 2)
+ for _ in iterate_timeout(10, "until priming is complete"):
+ state_one = first.sched.local_layout_state.get("tenant-one")
+ if state_one:
+ break
+
+ for _ in iterate_timeout(
+ 10, "all schedulers to have the same layout state"):
+ if (second.sched.local_layout_state.get(
+ "tenant-one") == state_one):
+ break
+
+ self.fake_nodepool.pause()
+ with second.sched.layout_update_lock, second.sched.run_handler_lock:
+ A = self.fake_gerrit.addFakeChange('org/project1', 'master', 'A')
+ self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
+ self.waitUntilSettled(matcher=[first])
+
+ self.model_test_component_info.model_api = 9
+ with first.sched.layout_update_lock, first.sched.run_handler_lock:
+ self.fake_nodepool.unpause()
+ self.waitUntilSettled(matcher=[second])
+
+ self.waitUntilSettled()
+ self.assertHistory([
+ dict(name='project-merge', result='SUCCESS', changes='1,1'),
+ dict(name='project-test1', result='SUCCESS', changes='1,1'),
+ dict(name='project-test2', result='SUCCESS', changes='1,1'),
+ dict(name='project1-project2-integration',
+ result='SUCCESS', changes='1,1'),
+ ], ordered=False)
+
class TestSemaphoreModelUpgrade(ZuulTestCase):
tenant_config_file = 'config/semaphore/main.yaml'
diff --git a/tests/unit/test_scheduler.py b/tests/unit/test_scheduler.py
index 978bc00a4..321905327 100644
--- a/tests/unit/test_scheduler.py
+++ b/tests/unit/test_scheduler.py
@@ -36,6 +36,7 @@ from zuul.driver.gerrit import gerritreporter
import zuul.scheduler
import zuul.model
import zuul.merger.merger
+from zuul.lib import yamlutil as yaml
from tests.base import (
SSLZuulTestCase,
@@ -6078,6 +6079,50 @@ For CI problems and help debugging, contact ci@example.org"""
self.assertFalse(node['_lock'])
self.assertEqual(node['state'], 'ready')
+ @simple_layout('layouts/nodeset-fallback.yaml')
+ def test_nodeset_fallback(self):
+ # Test that nodeset fallback works
+ self.executor_server.hold_jobs_in_build = True
+
+ # Verify that we get the correct number and order of
+ # alternates from our nested config.
+ tenant = self.scheds.first.sched.abide.tenants.get('tenant-one')
+ job = tenant.layout.getJob('check-job')
+ alts = job.flattenNodesetAlternatives(tenant.layout)
+ self.assertEqual(4, len(alts))
+ self.assertEqual('fast-nodeset', alts[0].name)
+ self.assertEqual('', alts[1].name)
+ self.assertEqual('red-nodeset', alts[2].name)
+ self.assertEqual('blue-nodeset', alts[3].name)
+
+ self.fake_nodepool.pause()
+ A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
+ self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
+ self.waitUntilSettled()
+
+ req = self.fake_nodepool.getNodeRequests()[0]
+ self.fake_nodepool.addFailRequest(req)
+
+ self.fake_nodepool.unpause()
+ self.waitUntilSettled()
+
+ build = self.getBuildByName('check-job')
+ inv_path = os.path.join(build.jobdir.root, 'ansible', 'inventory.yaml')
+ inventory = yaml.safe_load(open(inv_path, 'r'))
+ label = inventory['all']['hosts']['controller']['nodepool']['label']
+ self.assertEqual('slow-label', label)
+
+ self.executor_server.hold_jobs_in_build = False
+ self.executor_server.release()
+ self.waitUntilSettled()
+
+ self.assertEqual(A.data['status'], 'NEW')
+ self.assertEqual(A.reported, 1)
+ self.assertNotIn('NODE_FAILURE', A.messages[0])
+ self.assertHistory([
+ dict(name='check-job', result='SUCCESS', changes='1,1'),
+ ], ordered=False)
+
@simple_layout('layouts/multiple-templates.yaml')
def test_multiple_project_templates(self):
# Test that applying multiple project templates to a project
diff --git a/tests/unit/test_v3.py b/tests/unit/test_v3.py
index a89bb3007..d5c59bab0 100644
--- a/tests/unit/test_v3.py
+++ b/tests/unit/test_v3.py
@@ -2682,6 +2682,69 @@ class TestInRepoConfig(ZuulTestCase):
self.assertIn('Debug information:',
A.messages[0], "A should have debug info")
+ def test_nodeset_alternates_cycle(self):
+ in_repo_conf = textwrap.dedent(
+ """
+ - nodeset:
+ name: red
+ alternatives: [blue]
+ - nodeset:
+ name: blue
+ alternatives: [red]
+ - job:
+ name: project-test1
+ run: playbooks/project-test1.yaml
+ nodeset: blue
+ """)
+
+ file_dict = {'.zuul.yaml': in_repo_conf}
+ A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A',
+ files=file_dict)
+ self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
+ self.waitUntilSettled()
+
+ self.assertEqual(A.reported, 1)
+ self.assertIn("cycle detected", A.messages[0])
+
+ def test_nodeset_alternates_missing_from_nodeset(self):
+ in_repo_conf = textwrap.dedent(
+ """
+ - nodeset:
+ name: red
+ alternatives: [blue]
+ - job:
+ name: project-test1
+ run: playbooks/project-test1.yaml
+ """)
+
+ file_dict = {'.zuul.yaml': in_repo_conf}
+ A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A',
+ files=file_dict)
+ self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
+ self.waitUntilSettled()
+
+ self.assertEqual(A.reported, 1)
+ self.assertIn('nodeset "blue" was not found', A.messages[0])
+
+ def test_nodeset_alternates_missing_from_job(self):
+ in_repo_conf = textwrap.dedent(
+ """
+ - job:
+ name: project-test1
+ run: playbooks/project-test1.yaml
+ nodeset:
+ alternatives: [red]
+ """)
+
+ file_dict = {'.zuul.yaml': in_repo_conf}
+ A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A',
+ files=file_dict)
+ self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
+ self.waitUntilSettled()
+
+ self.assertEqual(A.reported, 1)
+ self.assertIn('nodeset "red" was not found', A.messages[0])
+
@skipIfMultiScheduler()
# See comment in TestInRepoConfigDir.scheduler_count for further
# details.
diff --git a/tests/unit/test_web.py b/tests/unit/test_web.py
index ba1931436..0fe8b89f7 100644
--- a/tests/unit/test_web.py
+++ b/tests/unit/test_web.py
@@ -341,6 +341,82 @@ class TestWeb(BaseTestWeb):
self.assertEqual(1, len(data), data)
self.assertEqual("org/project1", data[0]['project'], data)
+ @simple_layout('layouts/nodeset-alternatives.yaml')
+ def test_web_find_job_nodeset_alternatives(self):
+ # test a complex nodeset
+ data = self.get_url('api/tenant/tenant-one/job/test-job').json()
+
+ self.assertEqual([
+ {'abstract': False,
+ 'ansible_version': None,
+ 'attempts': 3,
+ 'branches': [],
+ 'cleanup_run': [],
+ 'deduplicate': 'auto',
+ 'dependencies': [],
+ 'description': None,
+ 'extra_variables': {},
+ 'files': [],
+ 'final': False,
+ 'group_variables': {},
+ 'host_variables': {},
+ 'intermediate': False,
+ 'irrelevant_files': [],
+ 'match_on_config_updates': True,
+ 'name': 'test-job',
+ 'nodeset_alternatives': [{'alternatives': [],
+ 'groups': [],
+ 'name': 'fast-nodeset',
+ 'nodes': [{'aliases': [],
+ 'comment': None,
+ 'hold_job': None,
+ 'id': None,
+ 'label': 'fast-label',
+ 'name': 'controller',
+ 'requestor': None,
+ 'state': 'unknown',
+ 'tenant_name': None,
+ 'user_data': None}]},
+ {'alternatives': [],
+ 'groups': [],
+ 'name': '',
+ 'nodes': [{'aliases': [],
+ 'comment': None,
+ 'hold_job': None,
+ 'id': None,
+ 'label': 'slow-label',
+ 'name': 'controller',
+ 'requestor': None,
+ 'state': 'unknown',
+ 'tenant_name': None,
+ 'user_data': None}]}],
+ 'override_checkout': None,
+ 'parent': 'base',
+ 'post_review': None,
+ 'post_run': [],
+ 'pre_run': [],
+ 'protected': None,
+ 'provides': [],
+ 'required_projects': [],
+ 'requires': [],
+ 'roles': [{'implicit': True,
+ 'project_canonical_name':
+ 'review.example.com/org/common-config',
+ 'target_name': 'common-config',
+ 'type': 'zuul'}],
+ 'run': [],
+ 'semaphores': [],
+ 'source_context': {'branch': 'master',
+ 'path': 'zuul.yaml',
+ 'project': 'org/common-config'},
+ 'tags': [],
+ 'timeout': None,
+ 'variables': {},
+ 'variant_description': '',
+ 'voting': True,
+ 'workspace_scheme': 'golang',
+ }], data)
+
def test_web_find_job(self):
# can we fetch the variants for a single job
data = self.get_url('api/tenant/tenant-one/job/project-test1').json()
@@ -384,6 +460,7 @@ class TestWeb(BaseTestWeb):
'match_on_config_updates': True,
'final': False,
'nodeset': {
+ 'alternatives': [],
'groups': [],
'name': '',
'nodes': [{'comment': None,
@@ -435,6 +512,7 @@ class TestWeb(BaseTestWeb):
'match_on_config_updates': True,
'final': False,
'nodeset': {
+ 'alternatives': [],
'groups': [],
'name': '',
'nodes': [{'comment': None,
@@ -1074,6 +1152,7 @@ class TestWeb(BaseTestWeb):
'branch': 'master',
'cleanup_playbooks': [],
'nodeset': {
+ 'alternatives': [],
'groups': [],
'name': '',
'nodes': [
@@ -1171,7 +1250,8 @@ class TestWeb(BaseTestWeb):
'host_vars': {},
'items': [],
'job': 'noop',
- 'nodeset': {'groups': [], 'name': '', 'nodes': []},
+ 'nodeset': {'alternatives': [],
+ 'groups': [], 'name': '', 'nodes': []},
'override_branch': None,
'override_checkout': None,
'post_timeout': None,
diff --git a/web/src/containers/job/JobVariant.jsx b/web/src/containers/job/JobVariant.jsx
index bec2276ef..9621cf333 100644
--- a/web/src/containers/job/JobVariant.jsx
+++ b/web/src/containers/job/JobVariant.jsx
@@ -107,7 +107,8 @@ class JobVariant extends React.Component {
const jobInfos = [
'source_context', 'builds', 'status',
'parent', 'attempts', 'timeout', 'semaphores',
- 'nodeset', 'variables', 'override_checkout',
+ 'nodeset', 'nodeset_alternatives', 'variables',
+ 'override_checkout',
]
jobInfos.forEach(key => {
let label = key
@@ -173,7 +174,15 @@ class JobVariant extends React.Component {
)
nice_label = (<span><ClusterIcon /> Required nodes</span>)
}
-
+ if (label === 'nodeset_alternatives') {
+ value = value.map((alt, idx) => {
+ return (<>
+ {(idx > 0 ? <span>or</span>:<></>)}
+ <Nodeset nodeset={alt} />
+ </>)
+ })
+ nice_label = (<span><ClusterIcon /> Required nodes</span>)
+ }
if (label === 'parent') {
value = (
<Link to={tenant.linkPrefix + '/job/' + value}>
diff --git a/zuul/configloader.py b/zuul/configloader.py
index 365967d56..10fa00751 100644
--- a/zuul/configloader.py
+++ b/zuul/configloader.py
@@ -488,14 +488,26 @@ class NodeSetParser(object):
vs.Required('nodes'): to_list(str),
}
- nodeset = {vs.Required('nodes'): to_list(node),
- 'groups': to_list(group),
- '_source_context': model.SourceContext,
- '_start_mark': model.ZuulMark,
- }
+ real_nodeset = {vs.Required('nodes'): to_list(node),
+ 'groups': to_list(group),
+ }
+
+ alt_nodeset = {vs.Required('alternatives'):
+ [vs.Any(real_nodeset, str)]}
+ top_nodeset = {'_source_context': model.SourceContext,
+ '_start_mark': model.ZuulMark,
+ }
if not anonymous:
- nodeset[vs.Required('name')] = str
+ top_nodeset[vs.Required('name')] = str
+
+ top_real_nodeset = real_nodeset.copy()
+ top_real_nodeset.update(top_nodeset)
+ top_alt_nodeset = alt_nodeset.copy()
+ top_alt_nodeset.update(top_nodeset)
+
+ nodeset = vs.Any(top_real_nodeset, top_alt_nodeset)
+
return vs.Schema(nodeset)
def fromYaml(self, conf, anonymous=False):
@@ -503,6 +515,24 @@ class NodeSetParser(object):
self.anon_schema(conf)
else:
self.schema(conf)
+
+ if 'alternatives' in conf:
+ return self.loadAlternatives(conf)
+ else:
+ return self.loadNodeset(conf)
+
+ def loadAlternatives(self, conf):
+ ns = model.NodeSet(conf.get('name'))
+ ns.source_context = conf.get('_source_context')
+ ns.start_mark = conf.get('_start_mark')
+ for alt in conf['alternatives']:
+ if isinstance(alt, str):
+ ns.addAlternative(alt)
+ else:
+ ns.addAlternative(self.loadNodeset(alt))
+ return ns
+
+ def loadNodeset(self, conf):
ns = model.NodeSet(conf.get('name'))
ns.source_context = conf.get('_source_context')
ns.start_mark = conf.get('_start_mark')
@@ -2400,6 +2430,10 @@ class TenantParser(object):
# Now that all the jobs are loaded, verify references to other
# config objects.
+ for nodeset in layout.nodesets.values():
+ with reference_exceptions('nodeset', nodeset,
+ layout.loading_errors):
+ nodeset.validateReferences(layout)
for jobs in layout.jobs.values():
for job in jobs:
with reference_exceptions('job', job, layout.loading_errors):
diff --git a/zuul/manager/__init__.py b/zuul/manager/__init__.py
index 637d31d9f..a4cfbfae2 100644
--- a/zuul/manager/__init__.py
+++ b/zuul/manager/__init__.py
@@ -880,23 +880,28 @@ class PipelineManager(metaclass=ABCMeta):
else:
relative_priority = 0
for job in jobs:
- provider = self._getPausedParentProvider(build_set, job)
- priority = self._calculateNodeRequestPriority(build_set, job)
- tenant_name = build_set.item.pipeline.tenant.name
- pipeline_name = build_set.item.pipeline.name
- req = self.sched.nodepool.requestNodes(
- build_set.uuid, job, tenant_name, pipeline_name, provider,
- priority, relative_priority, event=item.event)
- log.debug("Adding node request %s for job %s to item %s",
- req, job, item)
- build_set.setJobNodeRequestID(job.name, req.id)
- if req.fulfilled:
- nodeset = self.sched.nodepool.getNodeSet(req, job.nodeset)
- build_set.jobNodeRequestComplete(req.job_name, nodeset)
- else:
- job.setWaitingStatus(f'node request: {req.id}')
+ self._makeNodepoolRequest(log, build_set, job, relative_priority)
return True
+ def _makeNodepoolRequest(self, log, build_set, job, relative_priority,
+ alternative=0):
+ provider = self._getPausedParentProvider(build_set, job)
+ priority = self._calculateNodeRequestPriority(build_set, job)
+ tenant_name = build_set.item.pipeline.tenant.name
+ pipeline_name = build_set.item.pipeline.name
+ item = build_set.item
+ req = self.sched.nodepool.requestNodes(
+ build_set.uuid, job, tenant_name, pipeline_name, provider,
+ priority, relative_priority, event=item.event)
+ log.debug("Adding node request %s for job %s to item %s",
+ req, job, item)
+ build_set.setJobNodeRequestID(job.name, req.id)
+ if req.fulfilled:
+ nodeset = self.sched.nodepool.getNodeSet(req, job.nodeset)
+ build_set.jobNodeRequestComplete(req.job_name, nodeset)
+ else:
+ job.setWaitingStatus(f'node request: {req.id}')
+
def _getPausedParent(self, build_set, job):
job_graph = build_set.job_graph
if job_graph:
@@ -1909,17 +1914,46 @@ class PipelineManager(metaclass=ABCMeta):
build_set.setExtraRepoState(event.repo_state)
build_set.repo_state_state = build_set.COMPLETE
+ def _handleNodeRequestFallback(self, log, build_set, job, old_request):
+ if len(job.nodeset_alternatives) <= job.nodeset_index + 1:
+ # No alternatives to fall back upon
+ return False
+
+ # Increment the nodeset index and remove the old request
+ with job.activeContext(self.current_context):
+ job.nodeset_index = job.nodeset_index + 1
+
+ log.info("Re-attempting node request for job "
+ f"{job.name} of item {build_set.item} "
+ f"with nodeset alternative {job.nodeset_index}")
+
+ build_set.removeJobNodeRequestID(job.name)
+
+ # Make a new request
+ if self.sched.globals.use_relative_priority:
+ relative_priority = build_set.item.getNodePriority()
+ else:
+ relative_priority = 0
+ log = build_set.item.annotateLogger(self.log)
+ self._makeNodepoolRequest(log, build_set, job, relative_priority)
+ return True
+
def onNodesProvisioned(self, request, nodeset, build_set):
- # TODOv3(jeblair): handle provisioning failure here
log = get_annotated_logger(self.log, request.event_id)
self.reportPipelineTiming('node_request_time', request.created_time)
- if nodeset is not None:
- build_set.jobNodeRequestComplete(request.job_name, nodeset)
+ job = build_set.item.getJob(request.job_name)
+ # First see if we need to retry the request
if not request.fulfilled:
log.info("Node request %s: failure for %s",
request, request.job_name)
- job = build_set.item.getJob(request.job_name)
+ if self._handleNodeRequestFallback(log, build_set, job, request):
+ return
+ # No more fallbacks -- tell the buildset the request is complete
+ if nodeset is not None:
+ build_set.jobNodeRequestComplete(request.job_name, nodeset)
+ # Put a fake build through the cycle to clean it up.
+ if not request.fulfilled:
fakebuild = build_set.item.setNodeRequestFailure(job)
try:
self.sql.reportBuildEnd(
diff --git a/zuul/model.py b/zuul/model.py
index 0d889f557..876a1fbcd 100644
--- a/zuul/model.py
+++ b/zuul/model.py
@@ -1383,6 +1383,7 @@ class NodeSet(ConfigObject):
self.name = name or ''
self.nodes = OrderedDict()
self.groups = OrderedDict()
+ self.alternatives = []
def __ne__(self, other):
return not self.__eq__(other)
@@ -1391,7 +1392,9 @@ class NodeSet(ConfigObject):
if not isinstance(other, NodeSet):
return False
return (self.name == other.name and
- self.nodes == other.nodes)
+ self.nodes == other.nodes and
+ self.groups == other.groups and
+ self.alternatives == other.alternatives)
def toDict(self):
d = {}
@@ -1402,6 +1405,12 @@ class NodeSet(ConfigObject):
d['groups'] = []
for group in self.groups.values():
d['groups'].append(group.toDict())
+ d['alternatives'] = []
+ for alt in self.alternatives:
+ if isinstance(alt, NodeSet):
+ d['alternatives'].append(alt.toDict())
+ else:
+ d['alternatives'].append(alt)
return d
@classmethod
@@ -1411,6 +1420,12 @@ class NodeSet(ConfigObject):
nodeset.addNode(Node.fromDict(node))
for group in data["groups"]:
nodeset.addGroup(Group.fromDict(group))
+ for alt in data.get('alternatives', []):
+ if isinstance(alt, str):
+ if isinstance(alt, str):
+ nodeset.addAlternative(alt)
+ else:
+ nodeset.addAlternative(NodeSet.fromDict(alt))
return nodeset
def copy(self):
@@ -1419,6 +1434,11 @@ class NodeSet(ConfigObject):
n.addNode(Node(node.name, node.label))
for name, group in self.groups.items():
n.addGroup(Group(group.name, group.nodes[:]))
+ for alt in self.alternatives:
+ if isinstance(alt, str):
+ n.addAlternative(alt)
+ else:
+ n.addAlternative(alt.copy())
return n
def addNode(self, node):
@@ -1438,6 +1458,36 @@ class NodeSet(ConfigObject):
def getGroups(self):
return list(self.groups.values())
+ def addAlternative(self, alt):
+ self.alternatives.append(alt)
+
+ def flattenAlternatives(self, layout):
+ alts = []
+ history = []
+ self._flattenAlternatives(layout, self, alts, history)
+ return alts
+
+ def _flattenAlternatives(self, layout, nodeset,
+ alternatives, history):
+ if isinstance(nodeset, str):
+ # This references an existing named nodeset in the layout.
+ ns = layout.nodesets.get(nodeset)
+ if ns is None:
+ raise Exception(f'The nodeset "{nodeset}" was not found.')
+ else:
+ ns = nodeset
+ if ns in history:
+ raise Exception(f'Nodeset cycle detected on "{nodeset}"')
+ history.append(ns)
+ if ns.alternatives:
+ for alt in ns.alternatives:
+ self._flattenAlternatives(layout, alt, alternatives, history)
+ else:
+ alternatives.append(ns)
+
+ def validateReferences(self, layout):
+ self.flattenAlternatives(layout)
+
def __repr__(self):
if self.name:
name = self.name + ' '
@@ -2038,7 +2088,8 @@ class FrozenJob(zkobject.ZKObject):
'dependencies',
'inheritance_path',
'name',
- 'nodeset',
+ 'nodeset_alternatives',
+ 'nodeset_index',
'override_branch',
'override_checkout',
'post_timeout',
@@ -2149,8 +2200,8 @@ class FrozenJob(zkobject.ZKObject):
if not hasattr(self, k):
continue
v = getattr(self, k)
- if k == 'nodeset':
- v = v.toDict()
+ if k == 'nodeset_alternatives':
+ v = [alt.toDict() for alt in v]
elif k == 'dependencies':
# frozenset of JobDependency
v = [dep.toDict() for dep in v]
@@ -2173,6 +2224,9 @@ class FrozenJob(zkobject.ZKObject):
v = {'storage': 'local', 'data': v}
data[k] = v
+ if (COMPONENT_REGISTRY.model_api < 9):
+ data['nodeset'] = data['nodeset_alternatives'][0]
+
# Use json_dumps to strip any ZuulMark entries
return json_dumps(data, sort_keys=True).encode("utf8")
@@ -2183,13 +2237,18 @@ class FrozenJob(zkobject.ZKObject):
if 'deduplicate' not in data:
data['deduplicate'] = 'auto'
- if hasattr(self, 'nodeset'):
- nodeset = self.nodeset
+ # MODEL_API < 9
+ if data.get('nodeset'):
+ data['nodeset_alternatives'] = [data['nodeset']]
+ data['nodeset_index'] = 0
+ del data['nodeset']
+
+ if hasattr(self, 'nodeset_alternatives'):
+ alts = self.nodeset_alternatives
else:
- nodeset = data.get('nodeset')
- if nodeset:
- nodeset = NodeSet.fromDict(nodeset)
- data['nodeset'] = nodeset
+ alts = data.get('nodeset_alternatives', [])
+ alts = [NodeSet.fromDict(alt) for alt in alts]
+ data['nodeset_alternatives'] = alts
if hasattr(self, 'dependencies'):
data['dependencies'] = self.dependencies
@@ -2250,6 +2309,12 @@ class FrozenJob(zkobject.ZKObject):
return val
@property
+ def nodeset(self):
+ if self.nodeset_alternatives:
+ return self.nodeset_alternatives[self.nodeset_index]
+ return None
+
+ @property
def parent_data(self):
return self._getJobData('_parent_data')
@@ -2459,12 +2524,11 @@ class Job(ConfigObject):
d['parent'] = self.parent
else:
d['parent'] = tenant.default_base_job
- if isinstance(self.nodeset, str):
- ns = tenant.layout.nodesets.get(self.nodeset)
- else:
- ns = self.nodeset
- if ns:
- d['nodeset'] = ns.toDict()
+ alts = self.flattenNodesetAlternatives(tenant.layout)
+ if len(alts) == 1 and len(alts[0]):
+ d['nodeset'] = alts[0].toDict()
+ elif len(alts) > 1:
+ d['nodeset_alternatives'] = [x.toDict() for x in alts]
if self.ansible_version:
d['ansible_version'] = self.ansible_version
else:
@@ -2629,6 +2693,17 @@ class Job(ConfigObject):
secrets.append(secret_value)
playbook['secrets'][secret_key] = len(secrets) - 1
+ def flattenNodesetAlternatives(self, layout):
+ nodeset = self.nodeset
+ if isinstance(nodeset, str):
+ # This references an existing named nodeset in the layout.
+ ns = layout.nodesets.get(nodeset)
+ if ns is None:
+ raise Exception(f'The nodeset "{nodeset}" was not found.')
+ else:
+ ns = nodeset
+ return ns.flattenAlternatives(layout)
+
def freezeJob(self, context, tenant, layout, item,
redact_secrets_and_keys):
buildset = item.current_build_set
@@ -2640,6 +2715,9 @@ class Job(ConfigObject):
attributes.discard('secrets')
attributes.discard('affected_projects')
attributes.discard('config_hash')
+ # Nodeset alternatives are flattened at this point
+ attributes.discard('nodeset_alternatives')
+ attributes.discard('nodeset_index')
secrets = []
for k in attributes:
# If this is a config object, it's frozen, so it's
@@ -2663,6 +2741,8 @@ class Job(ConfigObject):
for pb in v:
self._deduplicateSecrets(context, secrets, pb)
kw[k] = v
+ kw['nodeset_alternatives'] = self.flattenNodesetAlternatives(layout)
+ kw['nodeset_index'] = 0
kw['secrets'] = secrets
kw['affected_projects'] = self._getAffectedProjects(tenant)
kw['config_hash'] = self.getConfigHash(tenant)
@@ -2735,7 +2815,7 @@ class Job(ConfigObject):
if self._get('cleanup_run') is not None:
self.cleanup_run = self.freezePlaybooks(self.cleanup_run, layout)
- def getNodeSet(self, layout):
+ def getNodeset(self, layout):
if isinstance(self.nodeset, str):
# This references an existing named nodeset in the layout.
ns = layout.nodesets.get(self.nodeset)
@@ -2752,14 +2832,14 @@ class Job(ConfigObject):
if not self.isBase() and self.parent:
layout.getJob(self.parent)
- ns = self.getNodeSet(layout)
- if layout.tenant.max_nodes_per_job != -1 and \
- len(ns) > layout.tenant.max_nodes_per_job:
- raise Exception(
- 'The job "{job}" exceeds tenant '
- 'max-nodes-per-job {maxnodes}.'.format(
- job=self.name,
- maxnodes=layout.tenant.max_nodes_per_job))
+ for ns in self.flattenNodesetAlternatives(layout):
+ if layout.tenant.max_nodes_per_job != -1 and \
+ len(ns) > layout.tenant.max_nodes_per_job:
+ raise Exception(
+ 'The job "{job}" exceeds tenant '
+ 'max-nodes-per-job {maxnodes}.'.format(
+ job=self.name,
+ maxnodes=layout.tenant.max_nodes_per_job))
for dependency in self.dependencies:
layout.getJob(dependency.name)
@@ -2956,7 +3036,7 @@ class Job(ConfigObject):
self.addRoles(other.roles)
# Freeze the nodeset
- self.nodeset = self.getNodeSet(layout)
+ self.nodeset = self.getNodeset(layout)
# Pass secrets to parents
secrets_for_parents = [s for s in other.secrets if s.pass_to_parent]
diff --git a/zuul/model_api.py b/zuul/model_api.py
index 0534ee9c4..27d4a2b07 100644
--- a/zuul/model_api.py
+++ b/zuul/model_api.py
@@ -14,4 +14,4 @@
# When making ZK schema changes, increment this and add a record to
# docs/developer/model-changelog.rst
-MODEL_API = 8
+MODEL_API = 9