Heal allocation for same host resize

Same host resize needs special handling in the allocation healing logic as both the source and the dest host PCI devices are visible to the healing code as the PciDevice.instance_uuid points to the healed instance in both cases. blueprint: pci-device-tracking-in-placement Change-Id: Id02e445c55fc956965b7d725f0260876d42422f2
author: Balazs Gibizer <gibi@redhat.com> 2022-08-26 18:02:59 +0200
committer: Balazs Gibizer <gibi@redhat.com> 2022-08-26 19:05:23 +0200
commit: ab439dadb174f28ae87aea3a9265cf5adcf96eb4 (patch)
tree: 213ef5929807326f219f3c8d310a00a89595b0b5
parent: 98e9989cad0708c9254ec612a9eca249690b1b80 (diff)
download: nova-ab439dadb174f28ae87aea3a9265cf5adcf96eb4.tar.gz
6 files changed, 227 insertions, 9 deletions
diff --git a/nova/compute/pci_placement_translator.py b/nova/compute/pci_placement_translator.py
index fa74d9cb18..c6ccbf27ae 100644
--- a/nova/compute/pci_placement_translator.py
+++ b/nova/compute/pci_placement_translator.py
@@ -254,7 +254,8 @@ class PciResourceProvider:
     def update_allocations(
         self,
         allocations: dict,
-        provider_tree: provider_tree.ProviderTree
+        provider_tree: provider_tree.ProviderTree,
+        same_host_instances: ty.List[str],
     ) -> bool:
         updated = False
 
@@ -293,6 +294,21 @@ class PciResourceProvider:
                 #    heal_allocation CLI instead.
                 continue
 
+            if consumer in same_host_instances:
+                # This is a nasty special case. This instance is undergoing
+                # a same host resize. So in Placement the source host
+                # allocation is held by the migration UUID *but* the
+                # PciDevice.instance_uuid is set for the instance UUID both
+                # on the source and on the destination host. As the source and
+                # dest are the same for migration we will see PciDevice
+                # objects assigned to this instance that should not be
+                # allocated to the instance UUID in placement.
+                # As noted above we don't want to take care in progress
+                # migration during healing. So we simply ignore this instance.
+                # If the instance needs healing then it will be healed when
+                # after the migration is confirmed or reverted.
+                continue
+
             current_allocs = allocations[consumer]['allocations']
             current_rp_allocs = current_allocs.get(rp_uuid)
 
@@ -326,9 +342,14 @@ class PciResourceProvider:
 class PlacementView:
     """The PCI Placement view"""
 
-    def __init__(self, hypervisor_hostname: str) -> None:
+    def __init__(
+        self,
+        hypervisor_hostname: str,
+        instances_under_same_host_resize: ty.List[str],
+    ) -> None:
         self.rps: ty.Dict[str, PciResourceProvider] = {}
         self.root_rp_name = hypervisor_hostname
+        self.same_host_instances = instances_under_same_host_resize
 
     def _get_rp_name_for_address(self, addr: str) -> str:
         return f"{self.root_rp_name}_{addr.upper()}"
@@ -459,7 +480,11 @@ class PlacementView:
         """
         updated = False
         for rp in self.rps.values():
-            updated |= rp.update_allocations(allocations, provider_tree)
+            updated |= rp.update_allocations(
+                allocations,
+                provider_tree,
+                self.same_host_instances,
+            )
         return updated
 
 
@@ -500,6 +525,7 @@ def update_provider_tree_for_pci(
     nodename: str,
     pci_tracker: pci_manager.PciDevTracker,
     allocations: dict,
+    instances_under_same_host_resize: ty.List[str],
 ) -> bool:
     """Based on the PciDevice objects in the pci_tracker it calculates what
     inventories and allocations needs to exist in placement and create the
@@ -529,6 +555,8 @@ def update_provider_tree_for_pci(
                 },
                 ...
               }
+    :param instances_under_same_host_resize: A list of instance UUIDs that
+        are undergoing same host resize on this host.
     """
     if not _is_placement_tracking_enabled():
         ensure_tracking_was_not_enabled_before(provider_tree)
@@ -541,7 +569,7 @@ def update_provider_tree_for_pci(
         'Collecting PCI inventories and allocations to track them in Placement'
     )
 
-    pv = PlacementView(nodename)
+    pv = PlacementView(nodename, instances_under_same_host_resize)
     for dev in pci_tracker.pci_devs:
         # match the PCI device with the [pci]dev_spec config to access
         # the configuration metadata tags
diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py
index 15b2735e15..f311b11442 100644
--- a/nova/compute/resource_tracker.py
+++ b/nova/compute/resource_tracker.py
@@ -1260,6 +1260,11 @@ class ResourceTracker(object):
             context, nodename, provider_tree=prov_tree)
         prov_tree.update_traits(nodename, traits)
 
+        instances_under_same_host_resize = [
+            migration.instance_uuid
+            for migration in self.tracked_migrations.values()
+            if migration.is_same_host_resize
+        ]
         # NOTE(gibi): Tracking PCI in placement is different from other
         # resources.
         #
@@ -1278,7 +1283,12 @@ class ResourceTracker(object):
         # is enabled. So we need to be ready to heal PCI allocations at
         # every call not just at startup.
         pci_reshaped = pci_placement_translator.update_provider_tree_for_pci(
-            prov_tree, nodename, self.pci_tracker, allocs)
+            prov_tree,
+            nodename,
+            self.pci_tracker,
+            allocs,
+            instances_under_same_host_resize,
+        )
 
         self.provider_tree = prov_tree
 
diff --git a/nova/objects/migration.py b/nova/objects/migration.py
index 7e340ceb78..6f5f217b80 100644
--- a/nova/objects/migration.py
+++ b/nova/objects/migration.py
@@ -215,6 +215,10 @@ class Migration(base.NovaPersistentObject, base.NovaObject):
     def is_resize(self):
         return self.migration_type == fields.MigrationType.RESIZE
 
+    @property
+    def is_same_host_resize(self):
+        return self.is_resize and self.source_node == self.dest_node
+
 
 @base.NovaObjectRegistry.register
 class MigrationList(base.ObjectListBase, base.NovaObject):
diff --git a/nova/tests/functional/libvirt/test_pci_in_placement.py b/nova/tests/functional/libvirt/test_pci_in_placement.py
index 22b3ca16e7..233941cd56 100644
--- a/nova/tests/functional/libvirt/test_pci_in_placement.py
+++ b/nova/tests/functional/libvirt/test_pci_in_placement.py
@@ -1346,3 +1346,113 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests):
         del compute1_expected_placement_view["allocations"][server["id"]]
         self.assert_placement_pci_view(
             "compute1", **compute1_expected_placement_view)
+
+    def test_heal_allocation_during_same_host_resize(self):
+        self.flags(allow_resize_to_same_host=True)
+        # The fake libvirt will emulate on the host:
+        # * one type-PFs (slot 0) with 3 type-VFs
+        compute1_pci_info = fakelibvirt.HostPCIDevicesInfo(
+            num_pci=0, num_pfs=1, num_vfs=3)
+        # the config matches just the VFs
+        compute1_device_spec = self._to_device_spec_conf(
+            [
+                {
+                    "vendor_id": fakelibvirt.PCI_VEND_ID,
+                    "product_id": fakelibvirt.VF_PROD_ID,
+                    "address": "0000:81:00.*",
+                },
+            ]
+        )
+        self.flags(group='pci', device_spec=compute1_device_spec)
+        # Start a compute with PCI tracking in placement
+        self.mock_pci_report_in_placement.return_value = True
+        self.start_compute(hostname="compute1", pci_info=compute1_pci_info)
+        self.assertPCIDeviceCounts("compute1", total=3, free=3)
+        compute1_expected_placement_view = {
+            "inventories": {
+                "0000:81:00.0": {self.VF_RC: 3},
+            },
+            "traits": {
+                "0000:81:00.0": [],
+            },
+            "usages": {
+                "0000:81:00.0": {self.VF_RC: 0},
+            },
+            "allocations": {},
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        # Create an instance consuming one VFs
+        extra_spec = {"pci_passthrough:alias": "a-vf:1"}
+        flavor_id = self._create_flavor(extra_spec=extra_spec)
+        server = self._create_server(flavor_id=flavor_id, networks=[])
+        self.assertPCIDeviceCounts("compute1", total=3, free=2)
+        # As scheduling does not support PCI in placement yet no allocation
+        # is created for the PCI consumption by the scheduler. BUT the resource
+        # tracker in the compute will heal the missing PCI allocation
+        compute1_expected_placement_view[
+            "usages"]["0000:81:00.0"][self.VF_RC] = 1
+        compute1_expected_placement_view["allocations"][server["id"]] = {
+            "0000:81:00.0": {self.VF_RC: 1}
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        self._run_periodics()
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+
+        # resize the server to consume 2 VFs on the same host
+        extra_spec = {"pci_passthrough:alias": "a-vf:2"}
+        flavor_id = self._create_flavor(extra_spec=extra_spec)
+        server = self._resize_server(server, flavor_id)
+        # during resize both the source and the dest allocation is kept
+        # and in same host resize that means both consumed from the same host
+        self.assertPCIDeviceCounts("compute1", total=3, free=0)
+        # the source side of the allocation held by the migration
+        self._move_server_allocation(
+            compute1_expected_placement_view["allocations"], server['id'])
+        # NOTE(gibi): we intentionally don't heal allocation for the instance
+        # while it is being resized. See the comment in the
+        # pci_placement_translator about the reasoning.
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        self._run_periodics()
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+
+        # revert the resize
+        self._revert_resize(server)
+        self.assertPCIDeviceCounts("compute1", total=3, free=2)
+        # the original allocations are restored
+        self._move_server_allocation(
+            compute1_expected_placement_view["allocations"],
+            server["id"],
+            revert=True,
+        )
+        compute1_expected_placement_view[
+            "usages"]["0000:81:00.0"][self.VF_RC] = 1
+        compute1_expected_placement_view["allocations"][server["id"]] = {
+            "0000:81:00.0": {self.VF_RC: 1}
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        self._run_periodics()
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+
+        # now resize and then confirm it
+        self._resize_server(server, flavor_id)
+        self._confirm_resize(server)
+
+        # we expect that the consumption is according to the new flavor
+        self.assertPCIDeviceCounts("compute1", total=3, free=1)
+        compute1_expected_placement_view[
+            "usages"]["0000:81:00.0"][self.VF_RC] = 2
+        compute1_expected_placement_view["allocations"][server["id"]] = {
+            "0000:81:00.0": {self.VF_RC: 2}
+        }
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
+        self._run_periodics()
+        self.assert_placement_pci_view(
+            "compute1", **compute1_expected_placement_view)
diff --git a/nova/tests/unit/compute/test_pci_placement_translator.py b/nova/tests/unit/compute/test_pci_placement_translator.py
index b8fbb6a752..23fa7e1b7e 100644
--- a/nova/tests/unit/compute/test_pci_placement_translator.py
+++ b/nova/tests/unit/compute/test_pci_placement_translator.py
@@ -61,7 +61,7 @@ class TestTranslator(test.NoDBTestCase):
         provider_tree = mock.NonCallableMock()
 
         ppt.update_provider_tree_for_pci(
-            provider_tree, "fake-node", pci_tracker, {})
+            provider_tree, "fake-node", pci_tracker, {}, [])
 
         self.assertIn(
             "Device spec is not found for device 0000:81:00.0 in "
@@ -113,7 +113,8 @@ class TestTranslator(test.NoDBTestCase):
         )
 
     def test_dependent_device_pf_then_vf(self):
-        pv = ppt.PlacementView("fake-node")
+        pv = ppt.PlacementView(
+            "fake-node", instances_under_same_host_resize=[])
         pf = pci_device.PciDevice(
             address="0000:81:00.0",
             dev_type=fields.PciDeviceType.SRIOV_PF
@@ -140,7 +141,8 @@ class TestTranslator(test.NoDBTestCase):
         )
 
     def test_dependent_device_vf_then_pf(self):
-        pv = ppt.PlacementView("fake-node")
+        pv = ppt.PlacementView(
+            "fake-node", instances_under_same_host_resize=[])
         pf = pci_device.PciDevice(
             address="0000:81:00.0",
             dev_type=fields.PciDeviceType.SRIOV_PF
@@ -173,7 +175,8 @@ class TestTranslator(test.NoDBTestCase):
         )
 
     def test_mixed_rc_for_sibling_vfs(self):
-        pv = ppt.PlacementView("fake-node")
+        pv = ppt.PlacementView(
+            "fake-node", instances_under_same_host_resize=[])
         vf1, vf2, vf3, vf4 = [
             pci_device.PciDevice(
                 address="0000:81:00.%d" % f,
diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py
index 2e7b04fbb5..d31ec97f33 100644
--- a/nova/tests/unit/compute/test_resource_tracker.py
+++ b/nova/tests/unit/compute/test_resource_tracker.py
@@ -1808,6 +1808,7 @@ class TestUpdateComputeNode(BaseTestCase):
             compute_obj.hypervisor_hostname,
             self.rt.pci_tracker,
             mock_get_allocs.return_value,
+            [],
         )
         upt = self.rt.reportclient.update_from_provider_tree
         upt.assert_called_once_with(mock.sentinel.ctx, ptree, allocations=None)
@@ -1847,6 +1848,7 @@ class TestUpdateComputeNode(BaseTestCase):
             compute_obj.hypervisor_hostname,
             self.rt.pci_tracker,
             mock_get_allocs.return_value,
+            [],
         )
         upt = self.rt.reportclient.update_from_provider_tree
         upt.assert_called_once_with(
@@ -1892,11 +1894,72 @@ class TestUpdateComputeNode(BaseTestCase):
             compute_obj.hypervisor_hostname,
             self.rt.pci_tracker,
             mock_get_allocs.return_value,
+            [],
         )
         upt = self.rt.reportclient.update_from_provider_tree
         upt.assert_called_once_with(
             mock.sentinel.ctx, ptree, allocations=mock_get_allocs.return_value)
 
+    @mock.patch(
+        'nova.compute.resource_tracker.ResourceTracker.'
+        '_sync_compute_service_disabled_trait',
+        new=mock.Mock()
+    )
+    @mock.patch(
+        'nova.compute.resource_tracker.ResourceTracker._resource_change',
+        new=mock.Mock(return_value=False)
+    )
+    @mock.patch(
+        'nova.compute.pci_placement_translator.update_provider_tree_for_pci')
+    def test_update_pci_reporting_same_host_resize(
+        self, mock_update_provider_tree_for_pci
+    ):
+        """Assert that resource tracker calls update_provider_tree_for_pci
+        and with the list of instances that are being resized to the same
+        host.
+        """
+        compute_obj = _COMPUTE_NODE_FIXTURES[0].obj_clone()
+        self._setup_rt()
+        ptree = self._setup_ptree(compute_obj)
+        # simulate that pci reporting did not touch allocations
+        mock_update_provider_tree_for_pci.return_value = False
+        self.rt.tracked_migrations = {
+            uuids.inst1: objects.Migration(
+                migration_type="resize",
+                source_node="fake-node",
+                dest_node="fake-node",
+                instance_uuid=uuids.inst1,
+            ),
+            uuids.inst2: objects.Migration(
+                migration_type="evacuation",
+                source_node="fake-node",
+                dest_node="fake-node",
+                instance_uuid=uuids.inst2,
+            ),
+            uuids.inst3: objects.Migration(
+                migration_type="resize",
+                source_node="fake-node1",
+                dest_node="fake-node2",
+                instance_uuid=uuids.inst3,
+            ),
+        }
+
+        self.rt._update(mock.sentinel.ctx, compute_obj)
+
+        mock_get_allocs = (
+            self.report_client_mock.get_allocations_for_provider_tree)
+        mock_get_allocs.assert_called_once_with(
+            mock.sentinel.ctx, compute_obj.hypervisor_hostname)
+        mock_update_provider_tree_for_pci.assert_called_once_with(
+            ptree,
+            compute_obj.hypervisor_hostname,
+            self.rt.pci_tracker,
+            mock_get_allocs.return_value,
+            [uuids.inst1],
+        )
+        upt = self.rt.reportclient.update_from_provider_tree
+        upt.assert_called_once_with(mock.sentinel.ctx, ptree, allocations=None)
+
     @mock.patch('nova.objects.Service.get_by_compute_host',
                 return_value=objects.Service(disabled=True))
     def test_sync_compute_service_disabled_trait_add(self, mock_get_by_host):
author	Balazs Gibizer <gibi@redhat.com>	2022-08-26 18:02:59 +0200
committer	Balazs Gibizer <gibi@redhat.com>	2022-08-26 19:05:23 +0200
commit	ab439dadb174f28ae87aea3a9265cf5adcf96eb4 (patch)
tree	213ef5929807326f219f3c8d310a00a89595b0b5
parent	98e9989cad0708c9254ec612a9eca249690b1b80 (diff)
download	nova-ab439dadb174f28ae87aea3a9265cf5adcf96eb4.tar.gz