summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuul <zuul@review.opendev.org>2023-01-18 21:01:24 +0000
committerGerrit Code Review <review@openstack.org>2023-01-18 21:01:24 +0000
commit3eb23d3d71221582bf924f895d47c7179b9edd78 (patch)
tree1161f7b51d3eb6bff6765253f07439eeca50c2db
parent171d11eb25a79bb4cf1831d6eeb046d13c874de0 (diff)
parentd53a4922a0eea40f5dd5911af751976d83ff915b (diff)
downloadnova-3eb23d3d71221582bf924f895d47c7179b9edd78.tar.gz
Merge "FUP for the scheduler part of PCI in placement"
-rw-r--r--nova/compute/manager.py54
-rw-r--r--nova/conf/pci.py24
-rw-r--r--nova/pci/stats.py22
-rw-r--r--nova/tests/functional/libvirt/test_pci_in_placement.py92
-rw-r--r--nova/tests/functional/libvirt/test_pci_sriov_servers.py7
-rw-r--r--nova/tests/unit/scheduler/test_host_manager.py14
-rw-r--r--nova/tests/unit/scheduler/test_manager.py5
7 files changed, 114 insertions, 104 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 5ebc278b29..3c3d66e32c 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -2471,10 +2471,12 @@ class ComputeManager(manager.Manager):
if provider_mapping:
try:
- compute_utils\
- .update_pci_request_with_placement_allocations(
- context, self.reportclient,
- instance.pci_requests.requests, provider_mapping)
+ compute_utils.update_pci_request_with_placement_allocations(
+ context,
+ self.reportclient,
+ instance.pci_requests.requests,
+ provider_mapping,
+ )
except (exception.AmbiguousResourceProviderForPCIRequest,
exception.UnexpectedResourceProviderNameForPCIRequest
) as e:
@@ -3789,10 +3791,12 @@ class ComputeManager(manager.Manager):
provider_mapping = self._get_request_group_mapping(request_spec)
if provider_mapping:
- compute_utils.\
- update_pci_request_with_placement_allocations(
- context, self.reportclient,
- instance.pci_requests.requests, provider_mapping)
+ compute_utils.update_pci_request_with_placement_allocations(
+ context,
+ self.reportclient,
+ instance.pci_requests.requests,
+ provider_mapping,
+ )
claim_context = rebuild_claim(
context, instance, scheduled_node, allocations,
@@ -5415,10 +5419,12 @@ class ComputeManager(manager.Manager):
if provider_mapping:
try:
- compute_utils.\
- update_pci_request_with_placement_allocations(
- context, self.reportclient,
- instance.pci_requests.requests, provider_mapping)
+ compute_utils.update_pci_request_with_placement_allocations(
+ context,
+ self.reportclient,
+ instance.pci_requests.requests,
+ provider_mapping,
+ )
except (exception.AmbiguousResourceProviderForPCIRequest,
exception.UnexpectedResourceProviderNameForPCIRequest
) as e:
@@ -6899,12 +6905,12 @@ class ComputeManager(manager.Manager):
try:
if provider_mappings:
- update = (
- compute_utils.
- update_pci_request_with_placement_allocations)
- update(
- context, self.reportclient, instance.pci_requests.requests,
- provider_mappings)
+ compute_utils.update_pci_request_with_placement_allocations(
+ context,
+ self.reportclient,
+ instance.pci_requests.requests,
+ provider_mappings,
+ )
accel_info = []
if accel_uuids:
@@ -7984,12 +7990,12 @@ class ComputeManager(manager.Manager):
instance_uuid=instance.uuid) from e
try:
- update = (
- compute_utils.
- update_pci_request_with_placement_allocations)
- update(
- context, self.reportclient, pci_reqs.requests,
- provider_mappings)
+ compute_utils.update_pci_request_with_placement_allocations(
+ context,
+ self.reportclient,
+ pci_reqs.requests,
+ provider_mappings,
+ )
except (
exception.AmbiguousResourceProviderForPCIRequest,
exception.UnexpectedResourceProviderNameForPCIRequest
diff --git a/nova/conf/pci.py b/nova/conf/pci.py
index 4a2f1aef32..533bf52ead 100644
--- a/nova/conf/pci.py
+++ b/nova/conf/pci.py
@@ -70,8 +70,9 @@ Possible Values:
``resource_class``
The optional Placement resource class name that is used
to track the requested PCI devices in Placement. It can be a standard
- resource class from the ``os-resource-classes`` lib. Or can be any string.
- In that case Nova will normalize it to a proper Placement resource class by
+ resource class from the ``os-resource-classes`` lib. Or it can be an
+ arbitrary string. If it is an non-standard resource class then Nova will
+ normalize it to a proper Placement resource class by
making it upper case, replacing any consecutive character outside of
``[A-Z0-9_]`` with a single '_', and prefixing the name with ``CUSTOM_`` if
not yet prefixed. The maximum allowed length is 255 character including the
@@ -85,15 +86,16 @@ Possible Values:
``traits``
An optional comma separated list of Placement trait names requested to be
present on the resource provider that fulfills this alias. Each trait can
- be a standard trait from ``os-traits`` lib or can be any string. If it is
- not a standard trait then Nova will normalize the trait name by making it
- upper case, replacing any consecutive character outside of ``[A-Z0-9_]``
- with a single '_', and prefixing the name with ``CUSTOM_`` if not yet
- prefixed. The maximum allowed length of a trait name is 255 character
- including the prefix. Every trait in ``traits`` requested in the alias
- ensured to be in the list of traits provided in the ``traits`` field of
- the ``[pci]device_spec`` when scheduling the request. This field can only
- be used only if ``[filter_scheduler]pci_in_placement`` is enabled.
+ be a standard trait from ``os-traits`` lib or it can be an arbitrary
+ string. If it is a non-standard trait then Nova will normalize the
+ trait name by making it upper case, replacing any consecutive character
+ outside of ``[A-Z0-9_]`` with a single '_', and prefixing the name
+ with ``CUSTOM_`` if not yet prefixed. The maximum allowed length of a
+ trait name is 255 character including the prefix. Every trait in
+ ``traits`` requested in the alias ensured to be in the list of traits
+ provided in the ``traits`` field of the ``[pci]device_spec`` when
+ scheduling the request. This field can only be used only if
+ ``[filter_scheduler]pci_in_placement`` is enabled.
* Supports multiple aliases by repeating the option (not by specifying
a list value)::
diff --git a/nova/pci/stats.py b/nova/pci/stats.py
index a64baced00..b7b79661be 100644
--- a/nova/pci/stats.py
+++ b/nova/pci/stats.py
@@ -153,8 +153,7 @@ class PciDeviceStats(object):
# to split the pools by PCI or PF address. We can still keep
# the VFs from the same parent PF in a single pool though as they
# are equivalent from placement perspective.
- address = dev.parent_addr or dev.address
- pool['address'] = address
+ pool['address'] = dev.parent_addr or dev.address
# NOTE(gibi): parent_ifname acts like a tag during pci claim but
# not provided as part of the whitelist spec as it is auto detected
@@ -579,18 +578,13 @@ class PciDeviceStats(object):
for pool in pools:
rp_uuid = pool.get('rp_uuid')
if rp_uuid is None:
- # NOTE(gibi): There can be pools without rp_uuid field if the
- # [pci]report_in_placement is not enabled for a compute with
- # viable PCI devices. We have a non-empty rp_uuids, so we know
- # that the [filter_scheduler]pci_in_placement is enabled. This
- # is a configuration error.
- LOG.warning(
- "The PCI pool %s isn't mapped to an RP UUID but the "
- "scheduler is configured to create PCI allocations in "
- "placement. This should not happen. Please enable "
- "[pci]report_in_placement on all compute hosts before "
- "enabling [filter_scheduler]pci_in_placement in the "
- "scheduler. This pool is ignored now.", pool)
+ # NOTE(gibi): As rp_uuids is not empty the scheduler allocated
+ # PCI resources on this host, so we know that
+ # [pci]report_in_placement is enabled on this host. But this
+ # pool has no RP mapping which can only happen if the pool
+ # contains PCI devices with physical_network tag, as those
+ # devices not yet reported in placement. But if they are not
+ # reported then we can ignore them here too.
continue
if (
diff --git a/nova/tests/functional/libvirt/test_pci_in_placement.py b/nova/tests/functional/libvirt/test_pci_in_placement.py
index 5cf554c3fe..41d6c8e008 100644
--- a/nova/tests/functional/libvirt/test_pci_in_placement.py
+++ b/nova/tests/functional/libvirt/test_pci_in_placement.py
@@ -13,6 +13,7 @@
# under the License.
from unittest import mock
+import ddt
import fixtures
import os_resource_classes
import os_traits
@@ -1613,12 +1614,12 @@ class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests):
"compute1", **compute1_expected_placement_view)
-class RCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests):
+@ddt.ddt
+class SimpleRCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests):
def setUp(self):
super().setUp()
self.flags(group='filter_scheduler', pci_in_placement=True)
- def test_boot_with_custom_rc_and_traits(self):
# The fake libvirt will emulate on the host:
# * one type-PCI in slot 0
pci_info = fakelibvirt.HostPCIDevicesInfo(
@@ -1642,7 +1643,7 @@ class RCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests):
self.start_compute(hostname="compute1", pci_info=pci_info)
self.assertPCIDeviceCounts("compute1", total=1, free=1)
- compute1_expected_placement_view = {
+ self.compute1_expected_placement_view = {
"inventories": {
"0000:81:00.0": {"CUSTOM_GPU": 1},
},
@@ -1659,73 +1660,74 @@ class RCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests):
"allocations": {},
}
self.assert_placement_pci_view(
- "compute1", **compute1_expected_placement_view)
+ "compute1", **self.compute1_expected_placement_view)
- pci_alias_wrong_rc = {
+ @ddt.data(
+ {
"vendor_id": fakelibvirt.PCI_VEND_ID,
"product_id": fakelibvirt.PCI_PROD_ID,
"name": "a-gpu-wrong-rc",
- }
- pci_alias_wrong_rc_2 = {
+ },
+ {
"resource_class": os_resource_classes.PGPU,
"name": "a-gpu-wrong-rc-2",
- }
- pci_alias_asking_for_missing_trait = {
+ },
+ {
"resource_class": "GPU",
# NOTE(gibi): "big" is missing from device spec
"traits": "purple,big",
"name": "a-gpu-missing-trait",
- }
+ },
+ )
+ def test_boot_with_custom_rc_and_traits_no_matching_device(
+ self, pci_alias
+ ):
+ self.flags(group="pci", alias=self._to_list_of_json_str([pci_alias]))
+ extra_spec = {"pci_passthrough:alias": f"{pci_alias['name']}:1"}
+ flavor_id = self._create_flavor(extra_spec=extra_spec)
+ server = self._create_server(
+ flavor_id=flavor_id, networks=[], expected_state="ERROR"
+ )
+ self.assertIn("fault", server)
+ self.assertIn("No valid host", server["fault"]["message"])
+
+ self.assertPCIDeviceCounts("compute1", total=1, free=1)
+ self.assert_placement_pci_view(
+ "compute1", **self.compute1_expected_placement_view
+ )
+
+ def test_boot_with_custom_rc_and_traits_succeeds(self):
pci_alias_gpu = {
"resource_class": "GPU",
"traits": "HW_GPU_API_VULKAN,PURPLE",
"name": "a-gpu",
}
self.flags(
- group="pci",
- alias=self._to_list_of_json_str(
- [
- pci_alias_wrong_rc,
- pci_alias_wrong_rc_2,
- pci_alias_asking_for_missing_trait,
- pci_alias_gpu,
- ]
- ),
- )
-
- # try to boot with each alias that does not match
- for alias in [
- "a-gpu-wrong-rc",
- "a-gpu-wrong-rc-2",
- "a-gpu-missing-trait",
- ]:
- extra_spec = {"pci_passthrough:alias": f"{alias}:1"}
- flavor_id = self._create_flavor(extra_spec=extra_spec)
- server = self._create_server(
- flavor_id=flavor_id, networks=[], expected_state='ERROR')
- self.assertIn('fault', server)
- self.assertIn('No valid host', server['fault']['message'])
-
- self.assertPCIDeviceCounts("compute1", total=1, free=1)
- self.assert_placement_pci_view(
- "compute1", **compute1_expected_placement_view)
-
- # then boot with the matching alias
+ group="pci", alias=self._to_list_of_json_str([pci_alias_gpu])
+ )
+
extra_spec = {"pci_passthrough:alias": "a-gpu:1"}
flavor_id = self._create_flavor(extra_spec=extra_spec)
- server = self._create_server(
- flavor_id=flavor_id, networks=[])
+ server = self._create_server(flavor_id=flavor_id, networks=[])
self.assertPCIDeviceCounts("compute1", total=1, free=0)
- compute1_expected_placement_view[
- "usages"]["0000:81:00.0"]["CUSTOM_GPU"] = 1
- compute1_expected_placement_view["allocations"][server["id"]] = {
+ self.compute1_expected_placement_view["usages"]["0000:81:00.0"][
+ "CUSTOM_GPU"
+ ] = 1
+ self.compute1_expected_placement_view["allocations"][server["id"]] = {
"0000:81:00.0": {"CUSTOM_GPU": 1}
}
self.assert_placement_pci_view(
- "compute1", **compute1_expected_placement_view)
+ "compute1", **self.compute1_expected_placement_view
+ )
self.assert_no_pci_healing("compute1")
+
+class RCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests):
+ def setUp(self):
+ super().setUp()
+ self.flags(group='filter_scheduler', pci_in_placement=True)
+
def test_device_claim_consistent_with_placement_allocation(self):
"""As soon as [filter_scheduler]pci_in_placement is enabled the
nova-scheduler will allocate PCI devices in placement. Then on the
diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py
index 3f301dbd9a..6b8b254af9 100644
--- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py
+++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py
@@ -1956,7 +1956,8 @@ class PCIServersTest(_PCIServersTestBase):
def test_create_server_with_pci_dev_and_numa(self):
"""Verifies that an instance can be booted with cpu pinning and with an
- assigned pci device.
+ assigned pci device with legacy policy and numa info for the pci
+ device.
"""
self.flags(cpu_dedicated_set='0-7', group='compute')
@@ -1991,9 +1992,9 @@ class PCIServersTest(_PCIServersTestBase):
def test_create_server_with_pci_dev_and_numa_fails(self):
"""This test ensures that it is not possible to allocated CPU and
- memory resources from one NUMA node and a PCI device from another.
+ memory resources from one NUMA node and a PCI device from another
+ if we use the legacy policy and the pci device reports numa info.
"""
-
self.flags(cpu_dedicated_set='0-7', group='compute')
pci_info = fakelibvirt.HostPCIDevicesInfo(num_pci=1, numa_node=0)
diff --git a/nova/tests/unit/scheduler/test_host_manager.py b/nova/tests/unit/scheduler/test_host_manager.py
index 6c30e73571..1a7daa515f 100644
--- a/nova/tests/unit/scheduler/test_host_manager.py
+++ b/nova/tests/unit/scheduler/test_host_manager.py
@@ -1562,12 +1562,14 @@ class HostStateTestCase(test.NoDBTestCase):
self.assertIsNone(host.updated)
host.consume_from_request(spec_obj)
- numa_fit_mock.assert_called_once_with(fake_host_numa_topology,
- fake_numa_topology,
- limits=None, pci_requests=None,
- pci_stats=None,
- provider_mapping=None,
- )
+ numa_fit_mock.assert_called_once_with(
+ fake_host_numa_topology,
+ fake_numa_topology,
+ limits=None,
+ pci_requests=None,
+ pci_stats=None,
+ provider_mapping=None,
+ )
numa_usage_mock.assert_called_once_with(fake_host_numa_topology,
fake_numa_topology)
sync_mock.assert_called_once_with(("fakehost", "fakenode"))
diff --git a/nova/tests/unit/scheduler/test_manager.py b/nova/tests/unit/scheduler/test_manager.py
index 4e7c0dc008..e7866069b3 100644
--- a/nova/tests/unit/scheduler/test_manager.py
+++ b/nova/tests/unit/scheduler/test_manager.py
@@ -2047,6 +2047,8 @@ class SchedulerManagerAllocationCandidateTestCase(test.NoDBTestCase):
alloc_reqs_by_rp_uuid[uuids.host1] = [
{
"mappings": {
+ # This is odd but the un-name request group uses "" as the
+ # name of the group.
"": [uuids.host1],
uuids.group_req1: [getattr(uuids, f"host1_child{i}")],
}
@@ -2071,7 +2073,8 @@ class SchedulerManagerAllocationCandidateTestCase(test.NoDBTestCase):
self.assertEqual(1, len(selections))
selection = selections[0]
self.assertEqual(uuids.host1, selection.compute_node_uuid)
- # we expect that host1_child2 candidate is selected
+ # we expect that host1_child2 candidate is selected as the
+ # DropFirstFilter will drop host1_child1
expected_a_c = {
"mappings": {
"": [uuids.host1],