Map PCI pools to RP UUIDs

Nova's PCI scheduling (and the PCI claim) works based on PCI device pools where the similar available PCI devices are assigned. The PCI devices are now represented in placement as RPs. And the allocation candidates during scheduling and the allocation after scheduling now contain PCI devices. This information needs to affect the PCI scheduling and PCI claim. To be able to do that we need to map PCI device pools to RPs. We achieve that here by first mapping PciDevice objects to RPs during placement PCI inventory reporting. Then mapping pools to RPs based on the PCI devices assigned to the pools. Also because now ResourceTracker._update_to_placement() call updates the PCI device pools the sequence of events needed to changed in the ResourceTracker to: 1) run _update_to_placement() 2) copy the pools to the CompouteNode object 3) save the compute to the DB 4) save the PCI tracker blueprint: pci-device-tracking-in-placement Change-Id: I9bb450ac235ab72ff0d8078635e7a11c04ff6c1e
author: Balazs Gibizer <gibi@redhat.com> 2022-08-19 15:32:00 +0200
committer: Balazs Gibizer <gibi@redhat.com> 2022-10-17 13:56:18 +0200
commit: e96601c606059c862e8066f91b49093f98ad46d2 (patch)
tree: c01b2b6d0b5974dacbcbde959bf2d7359e36ba1e /nova/compute
parent: c2f48412b57080ca0950e47f35ba6cc926596a85 (diff)
download: nova-e96601c606059c862e8066f91b49093f98ad46d2.tar.gz
2 files changed, 22 insertions, 5 deletions
diff --git a/nova/compute/pci_placement_translator.py b/nova/compute/pci_placement_translator.py
index 8aa7a1f4a9..3ee52e303c 100644
--- a/nova/compute/pci_placement_translator.py
+++ b/nova/compute/pci_placement_translator.py
@@ -261,6 +261,12 @@ class PciResourceProvider:
         )
         provider_tree.update_traits(self.name, self.traits)
 
+        # Here we are sure the RP exists in the provider_tree. So, we can
+        # record the RP UUID in each PciDevice this RP represents
+        rp_uuid = provider_tree.data(self.name).uuid
+        for dev in self.devs:
+            dev.extra_info['rp_uuid'] = rp_uuid
+
     def update_allocations(
         self,
         allocations: dict,
@@ -598,6 +604,11 @@ def update_provider_tree_for_pci(
 
     pv.update_provider_tree(provider_tree)
     old_alloc = copy.deepcopy(allocations)
+    # update_provider_tree correlated the PciDevice objects with RPs in
+    # placement and recorded the RP UUID in the PciDevice object. We need to
+    # trigger an update on the device pools in the tracker to get the device
+    # RP UUID mapped to the device pools
+    pci_tracker.stats.populate_pools_metadata_from_assigned_devices()
     updated = pv.update_allocations(allocations, provider_tree)
 
     if updated:
diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py
index ffbc7ed03f..ab60c77d96 100644
--- a/nova/compute/resource_tracker.py
+++ b/nova/compute/resource_tracker.py
@@ -991,8 +991,6 @@ class ResourceTracker(object):
         # notified when instances are deleted, we need remove all usages
         # from deleted instances.
         self.pci_tracker.clean_usage(instances, migrations)
-        dev_pools_obj = self.pci_tracker.stats.to_device_pools_obj()
-        cn.pci_device_pools = dev_pools_obj
 
         self._report_final_resource_view(nodename)
 
@@ -1314,13 +1312,23 @@ class ResourceTracker(object):
 
     def _update(self, context, compute_node, startup=False):
         """Update partial stats locally and populate them to Scheduler."""
+
+        self._update_to_placement(context, compute_node, startup)
+
+        if self.pci_tracker:
+            # sync PCI device pool state stored in the compute node with
+            # the actual state from the PCI tracker as we commit changes in
+            # the DB and in the PCI tracker below
+            dev_pools_obj = self.pci_tracker.stats.to_device_pools_obj()
+            compute_node.pci_device_pools = dev_pools_obj
+
         # _resource_change will update self.old_resources if it detects changes
         # but we want to restore those if compute_node.save() fails.
         nodename = compute_node.hypervisor_hostname
         old_compute = self.old_resources[nodename]
         if self._resource_change(compute_node):
             # If the compute_node's resource changed, update to DB. Note that
-            # _update_to_placement below does not supersede the need to do this
+            # _update_to_placement above does not supersede the need to do this
             # because there are stats-related fields in the ComputeNode object
             # which could have changed and still need to be reported to the
             # scheduler filters/weighers (which could be out of tree as well).
@@ -1333,8 +1341,6 @@ class ResourceTracker(object):
                 with excutils.save_and_reraise_exception(logger=LOG):
                     self.old_resources[nodename] = old_compute
 
-        self._update_to_placement(context, compute_node, startup)
-
         if self.pci_tracker:
             self.pci_tracker.save(context)
author	Balazs Gibizer <gibi@redhat.com>	2022-08-19 15:32:00 +0200
committer	Balazs Gibizer <gibi@redhat.com>	2022-10-17 13:56:18 +0200
commit	e96601c606059c862e8066f91b49093f98ad46d2 (patch)
tree	c01b2b6d0b5974dacbcbde959bf2d7359e36ba1e /nova/compute
parent	c2f48412b57080ca0950e47f35ba6cc926596a85 (diff)
download	nova-e96601c606059c862e8066f91b49093f98ad46d2.tar.gz