diff options
Diffstat (limited to 'nova/scheduler/manager.py')
-rw-r--r-- | nova/scheduler/manager.py | 140 |
1 files changed, 119 insertions, 21 deletions
diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py index 10b330653d..620519d403 100644 --- a/nova/scheduler/manager.py +++ b/nova/scheduler/manager.py @@ -20,8 +20,10 @@ Scheduler Service """ import collections +import copy import random +from keystoneauth1 import exceptions as ks_exc from oslo_log import log as logging import oslo_messaging as messaging from oslo_serialization import jsonutils @@ -66,10 +68,42 @@ class SchedulerManager(manager.Manager): self.host_manager = host_manager.HostManager() self.servicegroup_api = servicegroup.API() self.notifier = rpc.get_notifier('scheduler') - self.placement_client = report.report_client_singleton() + self._placement_client = None + + try: + # Test our placement client during initialization + self.placement_client + except (ks_exc.EndpointNotFound, + ks_exc.DiscoveryFailure, + ks_exc.RequestTimeout, + ks_exc.GatewayTimeout, + ks_exc.ConnectFailure) as e: + # Non-fatal, likely transient (although not definitely); + # continue startup but log the warning so that when things + # fail later, it will be clear why we can not do certain + # things. + LOG.warning('Unable to initialize placement client (%s); ' + 'Continuing with startup, but scheduling ' + 'will not be possible.', e) + except (ks_exc.MissingAuthPlugin, + ks_exc.Unauthorized) as e: + # This is almost definitely fatal mis-configuration. The + # Unauthorized error might be transient, but it is + # probably reasonable to consider it fatal. + LOG.error('Fatal error initializing placement client; ' + 'config is incorrect or incomplete: %s', e) + raise + except Exception as e: + # Unknown/unexpected errors here are fatal + LOG.error('Fatal error initializing placement client: %s', e) + raise super().__init__(service_name='scheduler', *args, **kwargs) + @property + def placement_client(self): + return report.report_client_singleton() + @periodic_task.periodic_task( spacing=CONF.scheduler.discover_hosts_in_cells_interval, run_immediately=True) @@ -299,12 +333,29 @@ class SchedulerManager(manager.Manager): # host, we virtually consume resources on it so subsequent # selections can adjust accordingly. + def hosts_with_alloc_reqs(hosts_gen): + """Extend the HostState objects returned by the generator with + the allocation requests of that host + """ + for host in hosts_gen: + host.allocation_candidates = copy.deepcopy( + alloc_reqs_by_rp_uuid[host.uuid]) + yield host + # Note: remember, we are using a generator-iterator here. So only # traverse this list once. This can bite you if the hosts # are being scanned in a filter or weighing function. hosts = self._get_all_host_states( elevated, spec_obj, provider_summaries) + # alloc_reqs_by_rp_uuid is None during rebuild, so this mean we cannot + # run filters that are using allocation candidates during rebuild + if alloc_reqs_by_rp_uuid is not None: + # wrap the generator to extend the HostState objects with the + # allocation requests for that given host. This is needed to + # support scheduler filters filtering on allocation candidates. + hosts = hosts_with_alloc_reqs(hosts) + # NOTE(sbauza): The RequestSpec.num_instances field contains the number # of instances created when the RequestSpec was used to first boot some # instances. This is incorrect when doing a move or resize operation, @@ -332,6 +383,13 @@ class SchedulerManager(manager.Manager): # the older dict format representing HostState objects. # TODO(stephenfin): Remove this when we bump scheduler the RPC API # version to 5.0 + # NOTE(gibi): We cannot remove this branch as it is actively used + # when nova calls the scheduler during rebuild (not evacuate) to + # check if the current host is still good for the new image used + # for the rebuild. In this case placement cannot be used to + # generate candidates as that would require space on the current + # compute for double allocation. So no allocation candidates for + # rebuild and therefore alloc_reqs_by_rp_uuid is None return self._legacy_find_hosts( context, num_instances, spec_obj, hosts, num_alts, instance_uuids=instance_uuids) @@ -345,6 +403,9 @@ class SchedulerManager(manager.Manager): # The list of hosts that have been selected (and claimed). claimed_hosts = [] + # The allocation request allocated on the given claimed host + claimed_alloc_reqs = [] + for num, instance_uuid in enumerate(instance_uuids): # In a multi-create request, the first request spec from the list # is passed to the scheduler and that request spec's instance_uuid @@ -371,21 +432,20 @@ class SchedulerManager(manager.Manager): # resource provider UUID claimed_host = None for host in hosts: - cn_uuid = host.uuid - if cn_uuid not in alloc_reqs_by_rp_uuid: - msg = ("A host state with uuid = '%s' that did not have a " - "matching allocation_request was encountered while " - "scheduling. This host was skipped.") - LOG.debug(msg, cn_uuid) + if not host.allocation_candidates: + LOG.debug( + "The nova scheduler removed every allocation candidate" + "for host %s so this host was skipped.", + host + ) continue - alloc_reqs = alloc_reqs_by_rp_uuid[cn_uuid] # TODO(jaypipes): Loop through all allocation_requests instead # of just trying the first one. For now, since we'll likely # want to order the allocation_requests in the future based on # information in the provider summaries, we'll just try to # claim resources using the first allocation_request - alloc_req = alloc_reqs[0] + alloc_req = host.allocation_candidates[0] if utils.claim_resources( elevated, self.placement_client, spec_obj, instance_uuid, alloc_req, @@ -405,6 +465,15 @@ class SchedulerManager(manager.Manager): claimed_instance_uuids.append(instance_uuid) claimed_hosts.append(claimed_host) + claimed_alloc_reqs.append(alloc_req) + + # update the provider mapping in the request spec based + # on the allocated candidate as the _consume_selected_host depends + # on this information to temporally consume PCI devices tracked in + # placement + for request_group in spec_obj.requested_resources: + request_group.provider_uuids = alloc_req[ + 'mappings'][request_group.requester_id] # Now consume the resources so the filter/weights will change for # the next instance. @@ -416,11 +485,19 @@ class SchedulerManager(manager.Manager): self._ensure_sufficient_hosts( context, claimed_hosts, num_instances, claimed_instance_uuids) - # We have selected and claimed hosts for each instance. Now we need to - # find alternates for each host. + # We have selected and claimed hosts for each instance along with a + # claimed allocation request. Now we need to find alternates for each + # host. return self._get_alternate_hosts( - claimed_hosts, spec_obj, hosts, num, num_alts, - alloc_reqs_by_rp_uuid, allocation_request_version) + claimed_hosts, + spec_obj, + hosts, + num, + num_alts, + alloc_reqs_by_rp_uuid, + allocation_request_version, + claimed_alloc_reqs, + ) def _ensure_sufficient_hosts( self, context, hosts, required_count, claimed_uuids=None, @@ -532,7 +609,21 @@ class SchedulerManager(manager.Manager): def _get_alternate_hosts( self, selected_hosts, spec_obj, hosts, index, num_alts, alloc_reqs_by_rp_uuid=None, allocation_request_version=None, + selected_alloc_reqs=None, ): + """Generate the main Selection and possible alternate Selection + objects for each "instance". + + :param selected_hosts: This is a list of HostState objects. Each + HostState represents the main selection for a given instance being + scheduled (we can have multiple instances during multi create). + :param selected_alloc_reqs: This is a list of allocation requests that + are already allocated in placement for the main Selection for each + instance. This list is matching with selected_hosts by index. So + for the first instance the selected host is selected_host[0] and + the already allocated placement candidate is + selected_alloc_reqs[0]. + """ # We only need to filter/weigh the hosts again if we're dealing with # more than one instance and are going to be picking alternates. if index > 0 and num_alts > 0: @@ -546,11 +637,10 @@ class SchedulerManager(manager.Manager): # representing the selected host along with alternates from the same # cell. selections_to_return = [] - for selected_host in selected_hosts: + for i, selected_host in enumerate(selected_hosts): # This is the list of hosts for one particular instance. if alloc_reqs_by_rp_uuid: - selected_alloc_req = alloc_reqs_by_rp_uuid.get( - selected_host.uuid)[0] + selected_alloc_req = selected_alloc_reqs[i] else: selected_alloc_req = None @@ -571,15 +661,17 @@ class SchedulerManager(manager.Manager): if len(selected_plus_alts) >= num_alts + 1: break + # TODO(gibi): In theory we could generate alternatives on the + # same host if that host has different possible allocation + # candidates for the request. But we don't do that today if host.cell_uuid == cell_uuid and host not in selected_hosts: if alloc_reqs_by_rp_uuid is not None: - alt_uuid = host.uuid - if alt_uuid not in alloc_reqs_by_rp_uuid: + if not host.allocation_candidates: msg = ("A host state with uuid = '%s' that did " - "not have a matching allocation_request " + "not have any remaining allocation_request " "was encountered while scheduling. This " "host was skipped.") - LOG.debug(msg, alt_uuid) + LOG.debug(msg, host.uuid) continue # TODO(jaypipes): Loop through all allocation_requests @@ -588,7 +680,13 @@ class SchedulerManager(manager.Manager): # the future based on information in the provider # summaries, we'll just try to claim resources using # the first allocation_request - alloc_req = alloc_reqs_by_rp_uuid[alt_uuid][0] + # NOTE(gibi): we are using, and re-using, allocation + # candidates for alternatives here. This is OK as + # these candidates are not yet allocated in placement + # and we don't know if an alternate will ever be used. + # To increase our success we could try to use different + # candidate for different alternative though. + alloc_req = host.allocation_candidates[0] alt_selection = objects.Selection.from_host_state( host, alloc_req, allocation_request_version) else: |