summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/source/admin/troubleshooting.rst77
-rw-r--r--doc/source/install/enrollment.rst7
-rw-r--r--ironic/conductor/manager.py2
-rw-r--r--ironic/conductor/utils.py3
-rw-r--r--ironic/tests/unit/conductor/test_manager.py6
-rw-r--r--ironic/tests/unit/conductor/test_utils.py6
6 files changed, 96 insertions, 5 deletions
diff --git a/doc/source/admin/troubleshooting.rst b/doc/source/admin/troubleshooting.rst
index 1ac680e1f..0c29343c8 100644
--- a/doc/source/admin/troubleshooting.rst
+++ b/doc/source/admin/troubleshooting.rst
@@ -559,3 +559,80 @@ waiting for an event that is never happening. In these cases, it might be
helpful to connect to the IPA and inspect its logs, see the trouble shooting
guide of the :ironic-python-agent-doc:`ironic-python-agent (IPA) <>` on how
to do this.
+
+Deployments fail with "failed to update MAC address"
+====================================================
+
+The design of the integration with the Networking service (neutron) is such
+that once virtual ports have been created in the API, their MAC address must
+be updated in order for the DHCP server to be able to appropriately reply.
+
+This can sometimes result in errors being raised indicating that the MAC
+address is already in use. This is because at some point in the past, a
+virtual interface was orphaned either by accident or by some unexpected
+glitch, and a previous entry is still present in Neutron.
+
+This error looks something like this when reported in the ironic-conductor
+log output.:
+
+ Failed to update MAC address on Neutron port 305beda7-0dd0-4fec-b4d2-78b7aa4e8e6a.: MacAddressInUseClient: Unable to complete operation for network 1e252627-6223-4076-a2b9-6f56493c9bac. The mac address 52:54:00:7c:c4:56 is in use.
+
+Because we have no idea about this entry, we fail the deployment process
+as we can't make a number of assumptions in order to attempt to automatically
+resolve the conflict.
+
+How did I get here?
+-------------------
+
+Originally this was a fairly easy issue to encounter. The retry logic path
+which resulted between the Orchestration (heat) and Compute (nova) services,
+could sometimes result in additional un-necessary ports being created.
+
+Bugs of this class have been largely resolved since the Rocky development
+cycle. Since then, the way this can become encountered is due to Networking
+(neutron) VIF attachments not being removed or deleted prior to deleting a
+port in the Bare Metal service.
+
+Ultimately, the key of this is that the port is being deleted. Under most
+operating circumstances, there really is no need to delete the port, and
+VIF attachments are stored on the port object, so deleting the port
+*CAN* result in the VIF not being cleaned up from Neutron.
+
+Under normal circumstances, when deleting ports, a node should be in a
+stable state, and the node should not be provisioned. If the
+``openstack baremetal port delete`` command fails, this may indicate that
+a known VIF is still attached. Generally if they are transitory from cleaning,
+provisioning, rescuing, or even inspection, getting the node to the
+``available`` state wil unblock your delete operation, that is unless there is
+a tenant VIF attahment. In that case, the vif will need to be removed from
+with-in the Bare Metal service using the
+``openstack baremetal node vif detach`` command.
+
+A port can also be checked to see if there is a VIF attachment by consulting
+the port's ``internal_info`` field.
+
+.. warning::
+ The ``maintenance`` flag can be used to force the node's port to be
+ deleted, however this will disable any check that would normally block
+ the user from issuing a delete and accidently orphaning the VIF attachment
+ record.
+
+How do I resolve this?
+----------------------
+
+Generally, you need to identify the port with the offending MAC address.
+Example:
+
+ openstack port list --mac-address 52:54:00:7c:c4:56
+
+From the command's output, you should be able to identify the ``id`` field.
+Using that, you can delete the port. Example:
+
+ openstack port delete <id>
+
+.. warning::
+ Before deleting a port, you should always verify that it is no longer in
+ use or no longer seems applicable/operable. If multiple deployments of
+ the Bare Metal service with a single Neutron, the possibility that a
+ inventory typo, or possibly even a duplicate MAC address exists, which
+ could also produce the same basic error message.
diff --git a/doc/source/install/enrollment.rst b/doc/source/install/enrollment.rst
index 4d6b0a4b2..1e0f9957e 100644
--- a/doc/source/install/enrollment.rst
+++ b/doc/source/install/enrollment.rst
@@ -250,6 +250,13 @@ and may be combined if desired.
$ openstack baremetal port create $MAC_ADDRESS --node $NODE_UUID
+ .. note::
+ When it is time to remove the node from the Bare Metal service, the
+ command used to remove the port is ``openstack baremetal port delete
+ <port uuid>``. When doing so, it is important to ensure that the
+ baremetal node is not in ``maintenance`` as guarding logic to prevent
+ orphaning Neutron Virtual Interfaces (VIFs) will be overriden.
+
.. _enrollment-scheduling:
Adding scheduling information
diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py
index 128160211..88cc083b0 100644
--- a/ironic/conductor/manager.py
+++ b/ironic/conductor/manager.py
@@ -2074,7 +2074,7 @@ class ConductorManager(base_manager.BaseConductorManager):
with task_manager.acquire(context, port.node_id,
purpose='port deletion') as task:
vif, vif_use = utils.get_attached_vif(port)
- if vif:
+ if vif and not task.node.maintenance:
msg = _("Cannot delete the port %(port)s as it is bound "
"to VIF %(vif)s for %(use)s use.")
raise exception.InvalidState(
diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py
index 60bc4a542..b4d7bf076 100644
--- a/ironic/conductor/utils.py
+++ b/ironic/conductor/utils.py
@@ -1196,4 +1196,7 @@ def get_attached_vif(port):
rescue_vif = port.internal_info.get('rescuing_vif_port_id')
if rescue_vif:
return (rescue_vif, 'rescuing')
+ inspection_vif = port.internal_info.get('inspection_vif_port_id')
+ if inspection_vif:
+ return (inspection_vif, 'inspecting')
return (None, None)
diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py
index 67ee3c570..3556d4df2 100644
--- a/ironic/tests/unit/conductor/test_manager.py
+++ b/ironic/tests/unit/conductor/test_manager.py
@@ -6519,10 +6519,8 @@ class DestroyPortTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
self.context,
node_id=node.id,
internal_info={'tenant_vif_port_id': 'fake-id'})
- exc = self.assertRaises(messaging.rpc.ExpectedException,
- self.service.destroy_port,
- self.context, port)
- self.assertEqual(exception.InvalidState, exc.exc_info[0])
+ self.service.destroy_port(self.context, port)
+ self.assertRaises(exception.PortNotFound, port.refresh)
def test_destroy_port_node_active_and_maintenance_no_vif(self):
instance_uuid = uuidutils.generate_uuid()
diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py
index 98eb0b9f4..0dea519e2 100644
--- a/ironic/tests/unit/conductor/test_utils.py
+++ b/ironic/tests/unit/conductor/test_utils.py
@@ -2127,3 +2127,9 @@ class GetAttachedVifTestCase(db_base.DbTestCase):
vif, use = conductor_utils.get_attached_vif(self.port)
self.assertEqual('1', vif)
self.assertEqual('rescuing', use)
+
+ def test_get_attached_vif_inspecting(self):
+ self.port.internal_info = {'inspection_vif_port_id': '1'}
+ vif, use = conductor_utils.get_attached_vif(self.port)
+ self.assertEqual('1', vif)
+ self.assertEqual('inspecting', use)