summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuul <zuul@review.opendev.org>2022-12-14 14:11:40 +0000
committerGerrit Code Review <review@openstack.org>2022-12-14 14:11:40 +0000
commitd7de0c121aa5680f8e836447b111bd5975df306c (patch)
treeee60e8134351c9d1d13408c73127e8078d74cb7f
parent8b4104f9f78d0615720c0ba1e3e8cfced42efcc5 (diff)
parent3c022e968375c1b2eadf3c2dd7190b9434c6d4c1 (diff)
downloadnova-d7de0c121aa5680f8e836447b111bd5975df306c.tar.gz
Merge "Ironic nodes with instance reserved in placement"
-rw-r--r--nova/conf/workarounds.py15
-rw-r--r--nova/tests/unit/virt/ironic/test_driver.py48
-rw-r--r--nova/virt/ironic/driver.py26
-rw-r--r--releasenotes/notes/fix-ironic-scheduler-race-08cf8aba0365f512.yaml11
4 files changed, 89 insertions, 11 deletions
diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py
index 2ec53282cd..1116664d36 100644
--- a/nova/conf/workarounds.py
+++ b/nova/conf/workarounds.py
@@ -417,6 +417,21 @@ compatibility on the destination host during live migration.
When this is enabled, it will skip version-checking of hypervisors
during live migration.
"""),
+ cfg.BoolOpt(
+ 'skip_reserve_in_use_ironic_nodes',
+ default=False,
+ help="""
+This may be useful if you use the Ironic driver, but don't have
+automatic cleaning enabled in Ironic. Nova, by default, will mark
+Ironic nodes as reserved as soon as they are in use. When you free
+the Ironic node (by deleting the nova instance) it takes a while
+for Nova to un-reserve that Ironic node in placement. Usually this
+is a good idea, because it avoids placement providing an Ironic
+as a valid candidate when it is still being cleaned.
+Howerver, if you don't use automatic cleaning, it can cause an
+extra delay before and Ironic node is available for building a
+new Nova instance.
+"""),
]
diff --git a/nova/tests/unit/virt/ironic/test_driver.py b/nova/tests/unit/virt/ironic/test_driver.py
index 6ac7ca464e..958623f31a 100644
--- a/nova/tests/unit/virt/ironic/test_driver.py
+++ b/nova/tests/unit/virt/ironic/test_driver.py
@@ -935,6 +935,48 @@ class IronicDriverTestCase(test.NoDBTestCase):
expected = {
'CUSTOM_IRON_NFV': {
'total': 1,
+ 'reserved': 1,
+ 'min_unit': 1,
+ 'max_unit': 1,
+ 'step_size': 1,
+ 'allocation_ratio': 1.0,
+ },
+ }
+ mock_nfc.assert_called_once_with(mock.sentinel.nodename)
+ mock_nr.assert_called_once_with(mock_nfc.return_value)
+ mock_res_used.assert_called_once_with(mock_nfc.return_value)
+ mock_res_unavail.assert_called_once_with(mock_nfc.return_value)
+ result = self.ptree.data(mock.sentinel.nodename).inventory
+ self.assertEqual(expected, result)
+
+ @mock.patch.object(ironic_driver.IronicDriver,
+ '_node_resources_used', return_value=True)
+ @mock.patch.object(ironic_driver.IronicDriver,
+ '_node_resources_unavailable', return_value=False)
+ @mock.patch.object(ironic_driver.IronicDriver, '_node_resource')
+ @mock.patch.object(ironic_driver.IronicDriver, '_node_from_cache')
+ def test_update_provider_tree_with_rc_occupied_workaround(self,
+ mock_nfc, mock_nr, mock_res_unavail, mock_res_used):
+ """Ensure that when a node is used, we report the inventory matching
+ the consumed resources.
+ """
+ self.flags(skip_reserve_in_use_ironic_nodes=True,
+ group="workarounds")
+ mock_nr.return_value = {
+ 'vcpus': 24,
+ 'vcpus_used': 24,
+ 'memory_mb': 1024,
+ 'memory_mb_used': 1024,
+ 'local_gb': 100,
+ 'local_gb_used': 100,
+ 'resource_class': 'iron-nfv',
+ }
+
+ self.driver.update_provider_tree(self.ptree, mock.sentinel.nodename)
+
+ expected = {
+ 'CUSTOM_IRON_NFV': {
+ 'total': 1,
'reserved': 0,
'min_unit': 1,
'max_unit': 1,
@@ -945,7 +987,7 @@ class IronicDriverTestCase(test.NoDBTestCase):
mock_nfc.assert_called_once_with(mock.sentinel.nodename)
mock_nr.assert_called_once_with(mock_nfc.return_value)
mock_res_used.assert_called_once_with(mock_nfc.return_value)
- self.assertFalse(mock_res_unavail.called)
+ mock_res_unavail.assert_called_once_with(mock_nfc.return_value)
result = self.ptree.data(mock.sentinel.nodename).inventory
self.assertEqual(expected, result)
@@ -1016,7 +1058,7 @@ class IronicDriverTestCase(test.NoDBTestCase):
mock_nfc.assert_called_once_with(mock.sentinel.nodename)
mock_nr.assert_called_once_with(mock_nfc.return_value)
mock_res_used.assert_called_once_with(mock_nfc.return_value)
- self.assertFalse(mock_res_unavail.called)
+ mock_res_unavail.assert_called_once_with(mock_nfc.return_value)
result = self.ptree.data(mock.sentinel.nodename).traits
self.assertEqual(set(), result)
@@ -1048,7 +1090,7 @@ class IronicDriverTestCase(test.NoDBTestCase):
mock_nfc.assert_called_once_with(mock.sentinel.nodename)
mock_nr.assert_called_once_with(mock_nfc.return_value)
mock_res_used.assert_called_once_with(mock_nfc.return_value)
- self.assertFalse(mock_res_unavail.called)
+ mock_res_unavail.assert_called_once_with(mock_nfc.return_value)
result = self.ptree.data(mock.sentinel.nodename).traits
self.assertEqual(set(traits), result)
diff --git a/nova/virt/ironic/driver.py b/nova/virt/ironic/driver.py
index 7496db5a7c..5583ac5236 100644
--- a/nova/virt/ironic/driver.py
+++ b/nova/virt/ironic/driver.py
@@ -874,15 +874,25 @@ class IronicDriver(virt_driver.ComputeDriver):
"""
# nodename is the ironic node's UUID.
node = self._node_from_cache(nodename)
+
reserved = False
- if (not self._node_resources_used(node) and
- self._node_resources_unavailable(node)):
- LOG.debug('Node %(node)s is not ready for a deployment, '
- 'reporting resources as reserved for it. Node\'s '
- 'provision state is %(prov)s, power state is '
- '%(power)s and maintenance is %(maint)s.',
- {'node': node.uuid, 'prov': node.provision_state,
- 'power': node.power_state, 'maint': node.maintenance})
+ if self._node_resources_unavailable(node):
+ # Operators might mark a node as in maintainance,
+ # even when an instance is on the node,
+ # either way lets mark this as reserved
+ reserved = True
+
+ if (self._node_resources_used(node) and
+ not CONF.workarounds.skip_reserve_in_use_ironic_nodes):
+ # Make resources as reserved once we have
+ # and instance here.
+ # When the allocation is deleted, most likely
+ # automatic clean will start, so we keep the node
+ # reserved until it becomes available again.
+ # In the case without automatic clean, once
+ # the allocation is removed in placement it
+ # also stays as reserved until we notice on
+ # the next periodic its actually available.
reserved = True
info = self._node_resource(node)
diff --git a/releasenotes/notes/fix-ironic-scheduler-race-08cf8aba0365f512.yaml b/releasenotes/notes/fix-ironic-scheduler-race-08cf8aba0365f512.yaml
new file mode 100644
index 0000000000..4fd2cc1ca9
--- /dev/null
+++ b/releasenotes/notes/fix-ironic-scheduler-race-08cf8aba0365f512.yaml
@@ -0,0 +1,11 @@
+---
+fixes:
+ - |
+ Fixed when placement returns ironic nodes that have just started automatic
+ cleaning as possible valid candidates. This is done by marking all ironic
+ nodes with an instance on them as reserved, such that nova only makes them
+ available once we have double checked Ironic reports the node as available.
+ If you don't have automatic cleaning on, this might mean it takes longer
+ than normal for Ironic nodes to become available for new instances.
+ If you want the old behaviour use the following workaround config:
+ `[workarounds]skip_reserve_in_use_ironic_nodes=true`