diff options
author | Zuul <zuul@review.opendev.org> | 2023-01-18 21:01:24 +0000 |
---|---|---|
committer | Gerrit Code Review <review@openstack.org> | 2023-01-18 21:01:24 +0000 |
commit | 3eb23d3d71221582bf924f895d47c7179b9edd78 (patch) | |
tree | 1161f7b51d3eb6bff6765253f07439eeca50c2db | |
parent | 171d11eb25a79bb4cf1831d6eeb046d13c874de0 (diff) | |
parent | d53a4922a0eea40f5dd5911af751976d83ff915b (diff) | |
download | nova-3eb23d3d71221582bf924f895d47c7179b9edd78.tar.gz |
Merge "FUP for the scheduler part of PCI in placement"
-rw-r--r-- | nova/compute/manager.py | 54 | ||||
-rw-r--r-- | nova/conf/pci.py | 24 | ||||
-rw-r--r-- | nova/pci/stats.py | 22 | ||||
-rw-r--r-- | nova/tests/functional/libvirt/test_pci_in_placement.py | 92 | ||||
-rw-r--r-- | nova/tests/functional/libvirt/test_pci_sriov_servers.py | 7 | ||||
-rw-r--r-- | nova/tests/unit/scheduler/test_host_manager.py | 14 | ||||
-rw-r--r-- | nova/tests/unit/scheduler/test_manager.py | 5 |
7 files changed, 114 insertions, 104 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 5ebc278b29..3c3d66e32c 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -2471,10 +2471,12 @@ class ComputeManager(manager.Manager): if provider_mapping: try: - compute_utils\ - .update_pci_request_with_placement_allocations( - context, self.reportclient, - instance.pci_requests.requests, provider_mapping) + compute_utils.update_pci_request_with_placement_allocations( + context, + self.reportclient, + instance.pci_requests.requests, + provider_mapping, + ) except (exception.AmbiguousResourceProviderForPCIRequest, exception.UnexpectedResourceProviderNameForPCIRequest ) as e: @@ -3789,10 +3791,12 @@ class ComputeManager(manager.Manager): provider_mapping = self._get_request_group_mapping(request_spec) if provider_mapping: - compute_utils.\ - update_pci_request_with_placement_allocations( - context, self.reportclient, - instance.pci_requests.requests, provider_mapping) + compute_utils.update_pci_request_with_placement_allocations( + context, + self.reportclient, + instance.pci_requests.requests, + provider_mapping, + ) claim_context = rebuild_claim( context, instance, scheduled_node, allocations, @@ -5415,10 +5419,12 @@ class ComputeManager(manager.Manager): if provider_mapping: try: - compute_utils.\ - update_pci_request_with_placement_allocations( - context, self.reportclient, - instance.pci_requests.requests, provider_mapping) + compute_utils.update_pci_request_with_placement_allocations( + context, + self.reportclient, + instance.pci_requests.requests, + provider_mapping, + ) except (exception.AmbiguousResourceProviderForPCIRequest, exception.UnexpectedResourceProviderNameForPCIRequest ) as e: @@ -6899,12 +6905,12 @@ class ComputeManager(manager.Manager): try: if provider_mappings: - update = ( - compute_utils. - update_pci_request_with_placement_allocations) - update( - context, self.reportclient, instance.pci_requests.requests, - provider_mappings) + compute_utils.update_pci_request_with_placement_allocations( + context, + self.reportclient, + instance.pci_requests.requests, + provider_mappings, + ) accel_info = [] if accel_uuids: @@ -7984,12 +7990,12 @@ class ComputeManager(manager.Manager): instance_uuid=instance.uuid) from e try: - update = ( - compute_utils. - update_pci_request_with_placement_allocations) - update( - context, self.reportclient, pci_reqs.requests, - provider_mappings) + compute_utils.update_pci_request_with_placement_allocations( + context, + self.reportclient, + pci_reqs.requests, + provider_mappings, + ) except ( exception.AmbiguousResourceProviderForPCIRequest, exception.UnexpectedResourceProviderNameForPCIRequest diff --git a/nova/conf/pci.py b/nova/conf/pci.py index 4a2f1aef32..533bf52ead 100644 --- a/nova/conf/pci.py +++ b/nova/conf/pci.py @@ -70,8 +70,9 @@ Possible Values: ``resource_class`` The optional Placement resource class name that is used to track the requested PCI devices in Placement. It can be a standard - resource class from the ``os-resource-classes`` lib. Or can be any string. - In that case Nova will normalize it to a proper Placement resource class by + resource class from the ``os-resource-classes`` lib. Or it can be an + arbitrary string. If it is an non-standard resource class then Nova will + normalize it to a proper Placement resource class by making it upper case, replacing any consecutive character outside of ``[A-Z0-9_]`` with a single '_', and prefixing the name with ``CUSTOM_`` if not yet prefixed. The maximum allowed length is 255 character including the @@ -85,15 +86,16 @@ Possible Values: ``traits`` An optional comma separated list of Placement trait names requested to be present on the resource provider that fulfills this alias. Each trait can - be a standard trait from ``os-traits`` lib or can be any string. If it is - not a standard trait then Nova will normalize the trait name by making it - upper case, replacing any consecutive character outside of ``[A-Z0-9_]`` - with a single '_', and prefixing the name with ``CUSTOM_`` if not yet - prefixed. The maximum allowed length of a trait name is 255 character - including the prefix. Every trait in ``traits`` requested in the alias - ensured to be in the list of traits provided in the ``traits`` field of - the ``[pci]device_spec`` when scheduling the request. This field can only - be used only if ``[filter_scheduler]pci_in_placement`` is enabled. + be a standard trait from ``os-traits`` lib or it can be an arbitrary + string. If it is a non-standard trait then Nova will normalize the + trait name by making it upper case, replacing any consecutive character + outside of ``[A-Z0-9_]`` with a single '_', and prefixing the name + with ``CUSTOM_`` if not yet prefixed. The maximum allowed length of a + trait name is 255 character including the prefix. Every trait in + ``traits`` requested in the alias ensured to be in the list of traits + provided in the ``traits`` field of the ``[pci]device_spec`` when + scheduling the request. This field can only be used only if + ``[filter_scheduler]pci_in_placement`` is enabled. * Supports multiple aliases by repeating the option (not by specifying a list value):: diff --git a/nova/pci/stats.py b/nova/pci/stats.py index a64baced00..b7b79661be 100644 --- a/nova/pci/stats.py +++ b/nova/pci/stats.py @@ -153,8 +153,7 @@ class PciDeviceStats(object): # to split the pools by PCI or PF address. We can still keep # the VFs from the same parent PF in a single pool though as they # are equivalent from placement perspective. - address = dev.parent_addr or dev.address - pool['address'] = address + pool['address'] = dev.parent_addr or dev.address # NOTE(gibi): parent_ifname acts like a tag during pci claim but # not provided as part of the whitelist spec as it is auto detected @@ -579,18 +578,13 @@ class PciDeviceStats(object): for pool in pools: rp_uuid = pool.get('rp_uuid') if rp_uuid is None: - # NOTE(gibi): There can be pools without rp_uuid field if the - # [pci]report_in_placement is not enabled for a compute with - # viable PCI devices. We have a non-empty rp_uuids, so we know - # that the [filter_scheduler]pci_in_placement is enabled. This - # is a configuration error. - LOG.warning( - "The PCI pool %s isn't mapped to an RP UUID but the " - "scheduler is configured to create PCI allocations in " - "placement. This should not happen. Please enable " - "[pci]report_in_placement on all compute hosts before " - "enabling [filter_scheduler]pci_in_placement in the " - "scheduler. This pool is ignored now.", pool) + # NOTE(gibi): As rp_uuids is not empty the scheduler allocated + # PCI resources on this host, so we know that + # [pci]report_in_placement is enabled on this host. But this + # pool has no RP mapping which can only happen if the pool + # contains PCI devices with physical_network tag, as those + # devices not yet reported in placement. But if they are not + # reported then we can ignore them here too. continue if ( diff --git a/nova/tests/functional/libvirt/test_pci_in_placement.py b/nova/tests/functional/libvirt/test_pci_in_placement.py index 5cf554c3fe..41d6c8e008 100644 --- a/nova/tests/functional/libvirt/test_pci_in_placement.py +++ b/nova/tests/functional/libvirt/test_pci_in_placement.py @@ -13,6 +13,7 @@ # under the License. from unittest import mock +import ddt import fixtures import os_resource_classes import os_traits @@ -1613,12 +1614,12 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests): "compute1", **compute1_expected_placement_view) -class RCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests): +@ddt.ddt +class SimpleRCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests): def setUp(self): super().setUp() self.flags(group='filter_scheduler', pci_in_placement=True) - def test_boot_with_custom_rc_and_traits(self): # The fake libvirt will emulate on the host: # * one type-PCI in slot 0 pci_info = fakelibvirt.HostPCIDevicesInfo( @@ -1642,7 +1643,7 @@ class RCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests): self.start_compute(hostname="compute1", pci_info=pci_info) self.assertPCIDeviceCounts("compute1", total=1, free=1) - compute1_expected_placement_view = { + self.compute1_expected_placement_view = { "inventories": { "0000:81:00.0": {"CUSTOM_GPU": 1}, }, @@ -1659,73 +1660,74 @@ class RCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests): "allocations": {}, } self.assert_placement_pci_view( - "compute1", **compute1_expected_placement_view) + "compute1", **self.compute1_expected_placement_view) - pci_alias_wrong_rc = { + @ddt.data( + { "vendor_id": fakelibvirt.PCI_VEND_ID, "product_id": fakelibvirt.PCI_PROD_ID, "name": "a-gpu-wrong-rc", - } - pci_alias_wrong_rc_2 = { + }, + { "resource_class": os_resource_classes.PGPU, "name": "a-gpu-wrong-rc-2", - } - pci_alias_asking_for_missing_trait = { + }, + { "resource_class": "GPU", # NOTE(gibi): "big" is missing from device spec "traits": "purple,big", "name": "a-gpu-missing-trait", - } + }, + ) + def test_boot_with_custom_rc_and_traits_no_matching_device( + self, pci_alias + ): + self.flags(group="pci", alias=self._to_list_of_json_str([pci_alias])) + extra_spec = {"pci_passthrough:alias": f"{pci_alias['name']}:1"} + flavor_id = self._create_flavor(extra_spec=extra_spec) + server = self._create_server( + flavor_id=flavor_id, networks=[], expected_state="ERROR" + ) + self.assertIn("fault", server) + self.assertIn("No valid host", server["fault"]["message"]) + + self.assertPCIDeviceCounts("compute1", total=1, free=1) + self.assert_placement_pci_view( + "compute1", **self.compute1_expected_placement_view + ) + + def test_boot_with_custom_rc_and_traits_succeeds(self): pci_alias_gpu = { "resource_class": "GPU", "traits": "HW_GPU_API_VULKAN,PURPLE", "name": "a-gpu", } self.flags( - group="pci", - alias=self._to_list_of_json_str( - [ - pci_alias_wrong_rc, - pci_alias_wrong_rc_2, - pci_alias_asking_for_missing_trait, - pci_alias_gpu, - ] - ), - ) - - # try to boot with each alias that does not match - for alias in [ - "a-gpu-wrong-rc", - "a-gpu-wrong-rc-2", - "a-gpu-missing-trait", - ]: - extra_spec = {"pci_passthrough:alias": f"{alias}:1"} - flavor_id = self._create_flavor(extra_spec=extra_spec) - server = self._create_server( - flavor_id=flavor_id, networks=[], expected_state='ERROR') - self.assertIn('fault', server) - self.assertIn('No valid host', server['fault']['message']) - - self.assertPCIDeviceCounts("compute1", total=1, free=1) - self.assert_placement_pci_view( - "compute1", **compute1_expected_placement_view) - - # then boot with the matching alias + group="pci", alias=self._to_list_of_json_str([pci_alias_gpu]) + ) + extra_spec = {"pci_passthrough:alias": "a-gpu:1"} flavor_id = self._create_flavor(extra_spec=extra_spec) - server = self._create_server( - flavor_id=flavor_id, networks=[]) + server = self._create_server(flavor_id=flavor_id, networks=[]) self.assertPCIDeviceCounts("compute1", total=1, free=0) - compute1_expected_placement_view[ - "usages"]["0000:81:00.0"]["CUSTOM_GPU"] = 1 - compute1_expected_placement_view["allocations"][server["id"]] = { + self.compute1_expected_placement_view["usages"]["0000:81:00.0"][ + "CUSTOM_GPU" + ] = 1 + self.compute1_expected_placement_view["allocations"][server["id"]] = { "0000:81:00.0": {"CUSTOM_GPU": 1} } self.assert_placement_pci_view( - "compute1", **compute1_expected_placement_view) + "compute1", **self.compute1_expected_placement_view + ) self.assert_no_pci_healing("compute1") + +class RCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests): + def setUp(self): + super().setUp() + self.flags(group='filter_scheduler', pci_in_placement=True) + def test_device_claim_consistent_with_placement_allocation(self): """As soon as [filter_scheduler]pci_in_placement is enabled the nova-scheduler will allocate PCI devices in placement. Then on the diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py index 3f301dbd9a..6b8b254af9 100644 --- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py +++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py @@ -1956,7 +1956,8 @@ class PCIServersTest(_PCIServersTestBase): def test_create_server_with_pci_dev_and_numa(self): """Verifies that an instance can be booted with cpu pinning and with an - assigned pci device. + assigned pci device with legacy policy and numa info for the pci + device. """ self.flags(cpu_dedicated_set='0-7', group='compute') @@ -1991,9 +1992,9 @@ class PCIServersTest(_PCIServersTestBase): def test_create_server_with_pci_dev_and_numa_fails(self): """This test ensures that it is not possible to allocated CPU and - memory resources from one NUMA node and a PCI device from another. + memory resources from one NUMA node and a PCI device from another + if we use the legacy policy and the pci device reports numa info. """ - self.flags(cpu_dedicated_set='0-7', group='compute') pci_info = fakelibvirt.HostPCIDevicesInfo(num_pci=1, numa_node=0) diff --git a/nova/tests/unit/scheduler/test_host_manager.py b/nova/tests/unit/scheduler/test_host_manager.py index 6c30e73571..1a7daa515f 100644 --- a/nova/tests/unit/scheduler/test_host_manager.py +++ b/nova/tests/unit/scheduler/test_host_manager.py @@ -1562,12 +1562,14 @@ class HostStateTestCase(test.NoDBTestCase): self.assertIsNone(host.updated) host.consume_from_request(spec_obj) - numa_fit_mock.assert_called_once_with(fake_host_numa_topology, - fake_numa_topology, - limits=None, pci_requests=None, - pci_stats=None, - provider_mapping=None, - ) + numa_fit_mock.assert_called_once_with( + fake_host_numa_topology, + fake_numa_topology, + limits=None, + pci_requests=None, + pci_stats=None, + provider_mapping=None, + ) numa_usage_mock.assert_called_once_with(fake_host_numa_topology, fake_numa_topology) sync_mock.assert_called_once_with(("fakehost", "fakenode")) diff --git a/nova/tests/unit/scheduler/test_manager.py b/nova/tests/unit/scheduler/test_manager.py index 4e7c0dc008..e7866069b3 100644 --- a/nova/tests/unit/scheduler/test_manager.py +++ b/nova/tests/unit/scheduler/test_manager.py @@ -2047,6 +2047,8 @@ class SchedulerManagerAllocationCandidateTestCase(test.NoDBTestCase): alloc_reqs_by_rp_uuid[uuids.host1] = [ { "mappings": { + # This is odd but the un-name request group uses "" as the + # name of the group. "": [uuids.host1], uuids.group_req1: [getattr(uuids, f"host1_child{i}")], } @@ -2071,7 +2073,8 @@ class SchedulerManagerAllocationCandidateTestCase(test.NoDBTestCase): self.assertEqual(1, len(selections)) selection = selections[0] self.assertEqual(uuids.host1, selection.compute_node_uuid) - # we expect that host1_child2 candidate is selected + # we expect that host1_child2 candidate is selected as the + # DropFirstFilter will drop host1_child1 expected_a_c = { "mappings": { "": [uuids.host1], |