summaryrefslogtreecommitdiff
path: root/nova/pci
diff options
context:
space:
mode:
authorSergey Nikitin <snikitin@mirantis.com>2016-10-25 17:24:36 +0300
committerStephen Finucane <sfinucan@redhat.com>2017-12-22 15:59:54 +0000
commit6b3b04d2113915729fd9aced9839338e429e1a01 (patch)
tree4c9ce05a8636336f04b3fc15d9dd44c3cb0ec8bf /nova/pci
parent0cd858b4ab6a500cb6c20447bd7bc1c7136e1bc5 (diff)
downloadnova-6b3b04d2113915729fd9aced9839338e429e1a01.tar.gz
Add PCI NUMA policies
This patch adds new policies for PCI devices allocation. There are 3 policies: - legacy - this is the default value and it describes the current nova behavior. Nova will boot VMs with PCI device if the PCI device is associated with at least one NUMA node on which the instance should be booted or there is no information about PCI-NUMA association - required - nova will boot VMs with PCI devices *only* if at least one of the VM's NUMA node is associated with these PCI devices. - preferred - nova will boot VMs using best effort NUMA affinity bp share-pci-between-numa-nodes Change-Id: I46d483f9de6776db1b025f925890624e5e682ada Co-Authored-By: Stephen Finucane <stephenfin@redhat.com>
Diffstat (limited to 'nova/pci')
-rw-r--r--nova/pci/request.py141
-rw-r--r--nova/pci/stats.py152
2 files changed, 213 insertions, 80 deletions
diff --git a/nova/pci/request.py b/nova/pci/request.py
index 84465982fe..d37fcc6a36 100644
--- a/nova/pci/request.py
+++ b/nova/pci/request.py
@@ -21,9 +21,10 @@
| "product_id": "0443",
| "vendor_id": "8086",
| "device_type": "type-PCI",
+ | "numa_policy": "legacy"
| }'
- Aliases with the same name and the same device_type are ORed::
+ Aliases with the same name, device_type and numa_policy are ORed::
| [pci]
| alias = '{
@@ -35,11 +36,8 @@
These two aliases define a device request meaning: vendor_id is "8086" and
product_id is "0442" or "0443".
-
"""
-import copy
-
import jsonschema
from oslo_serialization import jsonutils
import six
@@ -59,13 +57,8 @@ DEVICE_TYPE_FOR_VNIC_TYPE = {
network_model.VNIC_TYPE_DIRECT_PHYSICAL: obj_fields.PciDeviceType.SRIOV_PF
}
-
CONF = nova.conf.CONF
-
-_ALIAS_DEV_TYPE = [obj_fields.PciDeviceType.STANDARD,
- obj_fields.PciDeviceType.SRIOV_PF,
- obj_fields.PciDeviceType.SRIOV_VF]
_ALIAS_CAP_TYPE = ['pci']
_ALIAS_SCHEMA = {
"type": "object",
@@ -76,6 +69,8 @@ _ALIAS_SCHEMA = {
"minLength": 1,
"maxLength": 256,
},
+ # TODO(stephenfin): This isn't used anywhere outside of tests and
+ # should probably be removed.
"capability_type": {
"type": "string",
"enum": _ALIAS_CAP_TYPE,
@@ -90,7 +85,11 @@ _ALIAS_SCHEMA = {
},
"device_type": {
"type": "string",
- "enum": _ALIAS_DEV_TYPE,
+ "enum": list(obj_fields.PciDeviceType.ALL),
+ },
+ "numa_policy": {
+ "type": "string",
+ "enum": list(obj_fields.PCINUMAAffinityPolicy.ALL),
},
},
"required": ["name"],
@@ -98,92 +97,124 @@ _ALIAS_SCHEMA = {
def _get_alias_from_config():
- """Parse and validate PCI aliases from the nova config."""
+ """Parse and validate PCI aliases from the nova config.
+
+ :returns: A dictionary where the keys are device names and the values are
+ tuples of form ``(specs, numa_policy)``. ``specs`` is a list of PCI
+ device specs, while ``numa_policy`` describes the required NUMA
+ affinity of the device(s).
+ :raises: exception.PciInvalidAlias if two aliases with the same name have
+ different device types or different NUMA policies.
+ """
jaliases = CONF.pci.alias
aliases = {} # map alias name to alias spec list
try:
for jsonspecs in jaliases:
spec = jsonutils.loads(jsonspecs)
jsonschema.validate(spec, _ALIAS_SCHEMA)
- # It should keep consistent behaviour in configuration
- # and extra specs to call strip() function.
- name = spec.pop("name").strip()
+
+ name = spec.pop('name').strip()
+ numa_policy = spec.pop('numa_policy', None)
+ if not numa_policy:
+ numa_policy = obj_fields.PCINUMAAffinityPolicy.LEGACY
+
dev_type = spec.pop('device_type', None)
if dev_type:
spec['dev_type'] = dev_type
+
if name not in aliases:
- aliases[name] = [spec]
- else:
- if aliases[name][0]["dev_type"] == spec["dev_type"]:
- aliases[name].append(spec)
- else:
- reason = _("Device type mismatch for alias '%s'") % name
- raise exception.PciInvalidAlias(reason=reason)
+ aliases[name] = (numa_policy, [spec])
+ continue
+ if aliases[name][0] != numa_policy:
+ reason = _("NUMA policy mismatch for alias '%s'") % name
+ raise exception.PciInvalidAlias(reason=reason)
+
+ if aliases[name][1][0]['dev_type'] != spec['dev_type']:
+ reason = _("Device type mismatch for alias '%s'") % name
+ raise exception.PciInvalidAlias(reason=reason)
+
+ aliases[name][1].append(spec)
except exception.PciInvalidAlias:
raise
- except Exception as e:
- raise exception.PciInvalidAlias(reason=six.text_type(e))
+ except jsonschema.exceptions.ValidationError as exc:
+ raise exception.PciInvalidAlias(reason=exc.message)
+ except Exception as exc:
+ raise exception.PciInvalidAlias(reason=six.text_type(exc))
return aliases
def _translate_alias_to_requests(alias_spec):
"""Generate complete pci requests from pci aliases in extra_spec."""
-
pci_aliases = _get_alias_from_config()
- pci_requests = [] # list of a specs dict
+ pci_requests = []
for name, count in [spec.split(':') for spec in alias_spec.split(',')]:
name = name.strip()
if name not in pci_aliases:
raise exception.PciRequestAliasNotDefined(alias=name)
- else:
- request = objects.InstancePCIRequest(
- count=int(count),
- spec=copy.deepcopy(pci_aliases[name]),
- alias_name=name)
- pci_requests.append(request)
+
+ count = int(count)
+ numa_policy, spec = pci_aliases[name]
+
+ pci_requests.append(objects.InstancePCIRequest(
+ count=count,
+ spec=spec,
+ alias_name=name,
+ numa_policy=numa_policy))
return pci_requests
def get_pci_requests_from_flavor(flavor):
- """Get flavor's pci request.
+ """Validate and return PCI requests.
+
+ The ``pci_passthrough:alias`` extra spec describes the flavor's PCI
+ requests. The extra spec's value is a comma-separated list of format
+ ``alias_name_x:count, alias_name_y:count, ... ``, where ``alias_name`` is
+ defined in ``pci.alias`` configurations.
- The pci_passthrough:alias scope in flavor extra_specs
- describes the flavor's pci requests, the key is
- 'pci_passthrough:alias' and the value has format
- 'alias_name_x:count, alias_name_y:count, ... '. The alias_name is
- defined in 'pci.alias' configurations.
+ The flavor's requirement is translated into a PCI requests list. Each
+ entry in the list is an instance of nova.objects.InstancePCIRequests with
+ four keys/attributes.
- The flavor's requirement is translated into a pci requests list.
- Each entry in the list is a dict-ish object with three keys/attributes.
- The 'specs' gives the pci device properties requirement, the 'count' gives
- the number of devices, and the optional 'alias_name' is the corresponding
- alias definition name.
+ - 'spec' states the PCI device properties requirement
+ - 'count' states the number of devices
+ - 'alias_name' (optional) is the corresponding alias definition name
+ - 'numa_policy' (optional) states the required NUMA affinity of the devices
- Example:
- Assume alias configuration is::
+ For example, assume alias configuration is::
- | {'vendor_id':'8086',
- | 'device_id':'1502',
- | 'name':'alias_1'}
+ {
+ 'vendor_id':'8086',
+ 'device_id':'1502',
+ 'name':'alias_1'
+ }
- The flavor extra specs includes: 'pci_passthrough:alias': 'alias_1:2'.
+ While flavor extra specs includes::
- The returned pci_requests are::
+ 'pci_passthrough:alias': 'alias_1:2'
- | pci_requests = [{'count':2,
- | 'specs': [{'vendor_id':'8086',
- | 'device_id':'1502'}],
- | 'alias_name': 'alias_1'}]
+ The returned ``pci_requests`` are::
- :param flavor: the flavor to be checked
- :returns: a list of pci requests
+ [{
+ 'count':2,
+ 'specs': [{'vendor_id':'8086', 'device_id':'1502'}],
+ 'alias_name': 'alias_1'
+ }]
+
+ :param flavor: The flavor to be checked
+ :returns: A list of PCI requests
+ :rtype: nova.objects.InstancePCIRequests
+ :raises: exception.PciRequestAliasNotDefined if an invalid PCI alias is
+ provided
+ :raises: exception.PciInvalidAlias if the configuration contains invalid
+ aliases.
"""
pci_requests = []
if ('extra_specs' in flavor and
'pci_passthrough:alias' in flavor['extra_specs']):
pci_requests = _translate_alias_to_requests(
flavor['extra_specs']['pci_passthrough:alias'])
+
return objects.InstancePCIRequests(requests=pci_requests)
diff --git a/nova/pci/stats.py b/nova/pci/stats.py
index 7754c068a9..4e7bc58996 100644
--- a/nova/pci/stats.py
+++ b/nova/pci/stats.py
@@ -151,7 +151,11 @@ class PciDeviceStats(object):
# a spec may be able to match multiple pools.
pools = self._filter_pools_for_spec(self.pools, spec)
if numa_cells:
- pools = self._filter_pools_for_numa_cells(pools, numa_cells)
+ numa_policy = None
+ if 'numa_policy' in request:
+ numa_policy = request.numa_policy
+ pools = self._filter_pools_for_numa_cells(
+ pools, numa_cells, numa_policy, count)
pools = self._filter_non_requested_pfs(request, pools)
# Failed to allocate the required number of devices
# Return the devices already allocated back to their pools
@@ -209,17 +213,68 @@ class PciDeviceStats(object):
return [pool for pool in pools
if utils.pci_device_prop_match(pool, request_specs)]
- @staticmethod
- def _filter_pools_for_numa_cells(pools, numa_cells):
- # Some systems don't report numa node info for pci devices, in
- # that case None is reported in pci_device.numa_node, by adding None
- # to numa_cells we allow assigning those devices to instances with
- # numa topology
- numa_cells = [None] + [cell.id for cell in numa_cells]
- # filter out pools which numa_node is not included in numa_cells
- return [pool for pool in pools if any(utils.pci_device_prop_match(
- pool, [{'numa_node': cell}])
- for cell in numa_cells)]
+ @classmethod
+ def _filter_pools_for_numa_cells(cls, pools, numa_cells, numa_policy,
+ requested_count):
+ """Filter out pools with the wrong NUMA affinity, if required.
+
+ Exclude pools that do not have *suitable* PCI NUMA affinity.
+ ``numa_policy`` determines what *suitable* means, being one of
+ PREFERRED (nice-to-have), LEGACY (must-have-if-available) and REQUIRED
+ (must-have). We iterate through the various policies in order of
+ strictness. This means that even if we only *prefer* PCI-NUMA affinity,
+ we will still attempt to provide it if possible.
+
+ :param pools: A list of PCI device pool dicts
+ :param numa_cells: A list of InstanceNUMACell objects whose ``id``
+ corresponds to the ``id`` of host NUMACells.
+ :param numa_policy: The PCI NUMA affinity policy to apply.
+ :param requested_count: The number of PCI devices requested.
+ :returns: A list of pools that can, together, provide at least
+ ``requested_count`` PCI devices with the level of NUMA affinity
+ required by ``numa_policy``.
+ """
+ # NOTE(stephenfin): We may wish to change the default policy at a later
+ # date
+ requested_policy = numa_policy or fields.PCINUMAAffinityPolicy.LEGACY
+ numa_cell_ids = [cell.id for cell in numa_cells]
+
+ # filter out pools which numa_node is not included in numa_cell_ids
+ filtered_pools = [
+ pool for pool in pools if any(utils.pci_device_prop_match(
+ pool, [{'numa_node': cell}]) for cell in numa_cell_ids)]
+
+ # we can't apply a less strict policy than the one requested, so we
+ # need to return if we've demanded a NUMA affinity of REQUIRED.
+ # However, NUMA affinity is a good thing. If we can get enough devices
+ # with the stricter policy then we will use them.
+ if requested_policy == fields.PCINUMAAffinityPolicy.REQUIRED or sum(
+ pool['count'] for pool in filtered_pools) >= requested_count:
+ return filtered_pools
+
+ # some systems don't report NUMA node info for PCI devices, in which
+ # case None is reported in 'pci_device.numa_node'. The LEGACY policy
+ # allows us to use these devices so we include None in the list of
+ # suitable NUMA cells.
+ numa_cell_ids.append(None)
+
+ # filter out pools which numa_node is not included in numa_cell_ids
+ filtered_pools = [
+ pool for pool in pools if any(utils.pci_device_prop_match(
+ pool, [{'numa_node': cell}]) for cell in numa_cell_ids)]
+
+ # once again, we can't apply a less strict policy than the one
+ # requested, so we need to return if we've demanded a NUMA affinity of
+ # LEGACY. Similarly, we will also reurn if we have enough devices to
+ # satisfy this somewhat strict policy.
+ if requested_policy == fields.PCINUMAAffinityPolicy.LEGACY or sum(
+ pool['count'] for pool in filtered_pools) >= requested_count:
+ return filtered_pools
+
+ # if we've got here, we're using the PREFERRED policy and weren't able
+ # to provide anything with stricter affinity. Use whatever devices you
+ # can, folks.
+ return pools
def _filter_non_requested_pfs(self, request, matching_pools):
# Remove SRIOV_PFs from pools, unless it has been explicitly requested
@@ -236,16 +291,46 @@ class PciDeviceStats(object):
if not pool.get('dev_type') == fields.PciDeviceType.SRIOV_PF]
def _apply_request(self, pools, request, numa_cells=None):
+ """Apply a PCI request.
+
+ Apply a PCI request against a given set of PCI device pools, which are
+ collections of devices with similar traits.
+
+ If ``numa_cells`` is provided then NUMA locality may be taken into
+ account, depending on the value of ``request.numa_policy``.
+
+ :param pools: A list of PCI device pool dicts
+ :param request: An InstancePCIRequest object describing the type,
+ quantity and required NUMA affinity of device(s) we want..
+ :param numa_cells: A list of InstanceNUMACell objects whose ``id``
+ corresponds to the ``id`` of host NUMACells.
+ """
# NOTE(vladikr): This code maybe open to race conditions.
# Two concurrent requests may succeed when called support_requests
# because this method does not remove related devices from the pools
count = request.count
+
+ # Firstly, let's exclude all devices that don't match our spec (e.g.
+ # they've got different PCI IDs or something)
matching_pools = self._filter_pools_for_spec(pools, request.spec)
+
+ # Next, let's exclude all devices that aren't on the correct NUMA node
+ # *assuming* we have devices and care about that, as determined by
+ # policy
if numa_cells:
+ numa_policy = None
+ if 'numa_policy' in request:
+ numa_policy = request.numa_policy
+
matching_pools = self._filter_pools_for_numa_cells(matching_pools,
- numa_cells)
+ numa_cells, numa_policy, count)
+
+ # Finally, if we're not requesting PFs then we should not use these.
+ # Exclude them.
matching_pools = self._filter_non_requested_pfs(request,
matching_pools)
+
+ # Do we still have any devices left?
if sum([pool['count'] for pool in matching_pools]) < count:
return False
else:
@@ -256,30 +341,47 @@ class PciDeviceStats(object):
return True
def support_requests(self, requests, numa_cells=None):
- """Check if the pci requests can be met.
+ """Determine if the PCI requests can be met.
- Scheduler checks compute node's PCI stats to decide if an
- instance can be scheduled into the node. Support does not
- mean real allocation.
- If numa_cells is provided then only devices contained in
- those nodes are considered.
+ Determine, based on a compute node's PCI stats, if an instance can be
+ scheduled on the node. **Support does not mean real allocation**.
+
+ If ``numa_cells`` is provided then NUMA locality may be taken into
+ account, depending on the value of ``numa_policy``.
+
+ :param requests: A list of InstancePCIRequest object describing the
+ types, quantities and required NUMA affinities of devices we want.
+ :type requests: nova.objects.InstancePCIRequests
+ :param numa_cells: A list of InstanceNUMACell objects whose ``id``
+ corresponds to the ``id`` of host NUMACells, or None.
+ :returns: Whether this compute node can satisfy the given request.
"""
# note (yjiang5): this function has high possibility to fail,
# so no exception should be triggered for performance reason.
pools = copy.deepcopy(self.pools)
- return all([self._apply_request(pools, r, numa_cells)
- for r in requests])
+ return all(self._apply_request(pools, r, numa_cells) for r in requests)
def apply_requests(self, requests, numa_cells=None):
"""Apply PCI requests to the PCI stats.
This is used in multiple instance creation, when the scheduler has to
maintain how the resources are consumed by the instances.
- If numa_cells is provided then only devices contained in
- those nodes are considered.
+
+ If ``numa_cells`` is provided then NUMA locality may be taken into
+ account, depending on the value of ``numa_policy``.
+
+ :param requests: A list of InstancePCIRequest object describing the
+ types, quantities and required NUMA affinities of devices we want.
+ :type requests: nova.objects.InstancePCIRequests
+ :param numa_cells: A list of InstanceNUMACell objects whose ``id``
+ corresponds to the ``id`` of host NUMACells, or None.
+ :param numa_policy: The PCI NUMA affinity policy to apply when
+ filtering devices from ``numa_cells``, or None.
+ :raises: exception.PciDeviceRequestFailed if this compute node cannot
+ satisfy the given request.
"""
- if not all([self._apply_request(self.pools, r, numa_cells)
- for r in requests]):
+ if not all(self._apply_request(self.pools, r, numa_cells)
+ for r in requests):
raise exception.PciDeviceRequestFailed(requests=requests)
def __iter__(self):