diff options
author | Balazs Gibizer <gibi@redhat.com> | 2022-04-19 18:36:50 +0200 |
---|---|---|
committer | Balazs Gibizer <gibi@redhat.com> | 2022-05-04 10:57:18 +0200 |
commit | 3af2ecc13fa9334de8418accaed4fffefefb41da (patch) | |
tree | 5bcea1eb0249c28e89883affb7d163abc8376077 /nova/objects | |
parent | 9ee5d2c66255f83cc8a66f1b5648fa13e1d73f47 (diff) | |
download | nova-3af2ecc13fa9334de8418accaed4fffefefb41da.tar.gz |
Allow claiming PCI PF if child VF is unavailable
As If9ab424cc7375a1f0d41b03f01c4a823216b3eb8 stated there is a way for
the pci_device table to become inconsistent. Parent PF can be in
'available' state while children VFs are still in 'unavailable' state.
In this situation the PF is schedulable but the PCI claim will fail
when try to mark the dependent VFs unavailable.
This patch changes the PCI claim logic to allow claiming the parent PF
in the inconsistent situation as we assume that it is safe to do so.
This claim also fixed the inconsistency so that when the parent PF is
freed the children VFs become available again.
Closes-Bug: #1969496
Change-Id: I575ce06bcc913add7db0849f85728371da2032fc
Diffstat (limited to 'nova/objects')
-rw-r--r-- | nova/objects/pci_device.py | 38 |
1 files changed, 34 insertions, 4 deletions
diff --git a/nova/objects/pci_device.py b/nova/objects/pci_device.py index b675641a06..b0d5b75826 100644 --- a/nova/objects/pci_device.py +++ b/nova/objects/pci_device.py @@ -346,10 +346,40 @@ class PciDevice(base.NovaPersistentObject, base.NovaObject): # Update PF status to CLAIMED if all of it dependants are free # and set their status to UNCLAIMABLE vfs_list = self.child_devices - if not all([vf.is_available() for vf in vfs_list]): - raise exception.PciDeviceVFInvalidStatus( - compute_node_id=self.compute_node_id, - address=self.address) + non_free_dependants = [ + vf for vf in vfs_list if not vf.is_available()] + if non_free_dependants: + # NOTE(gibi): There should not be any dependent devices that + # are UNCLAIMABLE or UNAVAILABLE as the parent is AVAILABLE, + # but we got reports in bug 1969496 that this inconsistency + # can happen. So check if the only non-free devices are in + # state UNCLAIMABLE or UNAVAILABLE then we log a warning but + # allow to claim the parent. + actual_statuses = { + child.status for child in non_free_dependants} + allowed_non_free_statues = { + fields.PciDeviceStatus.UNCLAIMABLE, + fields.PciDeviceStatus.UNAVAILABLE, + } + if actual_statuses - allowed_non_free_statues == set(): + LOG.warning( + "Some child device of parent %s is in an inconsistent " + "state. If you can reproduce this warning then please " + "report a bug at " + "https://bugs.launchpad.net/nova/+filebug with " + "reproduction steps. Inconsistent children with " + "state: %s", + self.address, + ",".join( + "%s - %s" % (child.address, child.status) + for child in non_free_dependants + ), + ) + + else: + raise exception.PciDeviceVFInvalidStatus( + compute_node_id=self.compute_node_id, + address=self.address) self._bulk_update_status(vfs_list, fields.PciDeviceStatus.UNCLAIMABLE) |