summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--nova/scheduler/filter_scheduler.py138
-rw-r--r--nova/tests/unit/scheduler/test_filter_scheduler.py283
-rw-r--r--releasenotes/notes/placement-claims-844540aa7bf52b33.yaml14
3 files changed, 420 insertions, 15 deletions
diff --git a/nova/scheduler/filter_scheduler.py b/nova/scheduler/filter_scheduler.py
index 3bdac14c58..dfca443b6f 100644
--- a/nova/scheduler/filter_scheduler.py
+++ b/nova/scheduler/filter_scheduler.py
@@ -28,6 +28,7 @@ import nova.conf
from nova import exception
from nova.i18n import _
from nova import rpc
+from nova.scheduler import client
from nova.scheduler import driver
CONF = nova.conf.CONF
@@ -39,12 +40,16 @@ class FilterScheduler(driver.Scheduler):
def __init__(self, *args, **kwargs):
super(FilterScheduler, self).__init__(*args, **kwargs)
self.notifier = rpc.get_notifier('scheduler')
+ scheduler_client = client.SchedulerClient()
+ self.placement_client = scheduler_client.reportclient
def select_destinations(self, context, spec_obj, instance_uuids,
alloc_reqs_by_rp_uuid, provider_summaries):
"""Returns a sorted list of HostState objects that satisfy the
supplied request_spec.
+ These hosts will have already had their resources claimed in Placement.
+
:param context: The RequestContext object
:param spec_obj: The RequestSpec object
:param instance_uuids: List of UUIDs, one for each value of the spec
@@ -108,6 +113,8 @@ class FilterScheduler(driver.Scheduler):
"""Returns a list of hosts that meet the required specs, ordered by
their fitness.
+ These hosts will have already had their resources claimed in Placement.
+
:param context: The RequestContext object
:param spec_obj: The RequestSpec object
:param instance_uuids: List of UUIDs, one for each value of the spec
@@ -145,27 +152,144 @@ class FilterScheduler(driver.Scheduler):
hosts = self._get_all_host_states(elevated, spec_obj,
provider_summaries)
+ # A list of the instance UUIDs that were successfully claimed against
+ # in the placement API. If we are not able to successfully claim for
+ # all involved instances, we use this list to remove those allocations
+ # before returning
+ claimed_instance_uuids = []
+
selected_hosts = []
num_instances = spec_obj.num_instances
for num in range(num_instances):
hosts = self._get_sorted_hosts(spec_obj, hosts, num)
if not hosts:
+ # NOTE(jaypipes): If we get here, that means not all instances
+ # in instance_uuids were able to be matched to a selected host.
+ # So, let's clean up any already-claimed allocations here
+ # before breaking and returning
+ self._cleanup_allocations(claimed_instance_uuids)
break
- chosen_host = hosts[0]
+ if (instance_uuids is None or
+ not self.USES_ALLOCATION_CANDIDATES or
+ alloc_reqs_by_rp_uuid is None):
+ # Unfortunately, we still need to deal with older conductors
+ # that may not be passing in a list of instance_uuids. In those
+ # cases, obviously we can't claim resources because we don't
+ # have instance UUIDs to claim with, so we just grab the first
+ # host in the list of sorted hosts. In addition to older
+ # conductors, we need to support the caching scheduler, which
+ # doesn't use the placement API (and has
+ # USES_ALLOCATION_CANDIDATE = False) and therefore we skip all
+ # the claiming logic for that scheduler driver. Finally, if
+ # there was a problem communicating with the placement API,
+ # alloc_reqs_by_rp_uuid will be None, so we skip claiming in
+ # that case as well
+ claimed_host = hosts[0]
+ else:
+ instance_uuid = instance_uuids[num]
+
+ # Attempt to claim the resources against one or more resource
+ # providers, looping over the sorted list of possible hosts
+ # looking for an allocation request that contains that host's
+ # resource provider UUID
+ claimed_host = None
+ for host in hosts:
+ cn_uuid = host.uuid
+ if cn_uuid not in alloc_reqs_by_rp_uuid:
+ LOG.debug("Found host state %s that wasn't in "
+ "allocation requests. Skipping.", cn_uuid)
+ continue
+
+ alloc_reqs = alloc_reqs_by_rp_uuid[cn_uuid]
+ if self._claim_resources(elevated, spec_obj, instance_uuid,
+ alloc_reqs):
+ claimed_host = host
+ break
- LOG.debug("Selected host: %(host)s", {'host': chosen_host})
- selected_hosts.append(chosen_host)
+ if claimed_host is None:
+ # We weren't able to claim resources in the placement API
+ # for any of the sorted hosts identified. So, clean up any
+ # successfully-claimed resources for prior instances in
+ # this request and return an empty list which will cause
+ # select_destinations() to raise NoValidHost
+ LOG.debug("Unable to successfully claim against any host.")
+ self._cleanup_allocations(claimed_instance_uuids)
+ return []
- # Now consume the resources so the filter/weights
- # will change for the next instance.
- chosen_host.consume_from_request(spec_obj)
+ claimed_instance_uuids.append(instance_uuid)
+
+ LOG.debug("Selected host: %(host)s", {'host': claimed_host})
+ selected_hosts.append(claimed_host)
+
+ # Now consume the resources so the filter/weights will change for
+ # the next instance.
+ claimed_host.consume_from_request(spec_obj)
if spec_obj.instance_group is not None:
- spec_obj.instance_group.hosts.append(chosen_host.host)
+ spec_obj.instance_group.hosts.append(claimed_host.host)
# hosts has to be not part of the updates when saving
spec_obj.instance_group.obj_reset_changes(['hosts'])
return selected_hosts
+ def _cleanup_allocations(self, instance_uuids):
+ """Removes allocations for the supplied instance UUIDs."""
+ if not instance_uuids:
+ return
+ LOG.debug("Cleaning up allocations for %s", instance_uuids)
+ for uuid in instance_uuids:
+ self.placement_client.delete_allocation_for_instance(uuid)
+
+ def _claim_resources(self, ctx, spec_obj, instance_uuid, alloc_reqs):
+ """Given an instance UUID (representing the consumer of resources), the
+ HostState object for the host that was chosen for the instance, and a
+ list of allocation request JSON objects, attempt to claim resources for
+ the instance in the placement API. Returns True if the claim process
+ was successful, False otherwise.
+
+ :param ctx: The RequestContext object
+ :param spec_obj: The RequestSpec object
+ :param instance_uuid: The UUID of the consuming instance
+ :param cn_uuid: UUID of the host to allocate against
+ :param alloc_reqs: A list of allocation request JSON objects that
+ allocate against (at least) the compute host
+ selected by the _schedule() method. These allocation
+ requests were constructed from a call to the GET
+ /allocation_candidates placement API call. Each
+ allocation_request satisfies the original request
+ for resources and can be supplied as-is (along with
+ the project and user ID to the placement API's
+ PUT /allocations/{consumer_uuid} call to claim
+ resources for the instance
+ """
+ LOG.debug("Attempting to claim resources in the placement API for "
+ "instance %s", instance_uuid)
+
+ project_id = spec_obj.project_id
+
+ # NOTE(jaypipes): So, the RequestSpec doesn't store the user_id,
+ # only the project_id, so we need to grab the user information from
+ # the context. Perhaps we should consider putting the user ID in
+ # the spec object?
+ user_id = ctx.user_id
+
+ # TODO(jaypipes): Loop through all allocation requests instead of just
+ # trying the first one. For now, since we'll likely want to order the
+ # allocation requests in the future based on information in the
+ # provider summaries, we'll just try to claim resources using the first
+ # allocation request
+ alloc_req = alloc_reqs[0]
+
+ claimed = self.placement_client.claim_resources(instance_uuid,
+ alloc_req, project_id, user_id)
+
+ if not claimed:
+ return False
+
+ LOG.debug("Successfully claimed resources for instance %s using "
+ "allocation request %s", instance_uuid, alloc_req)
+
+ return True
+
def _get_sorted_hosts(self, spec_obj, host_states, index):
"""Returns a list of HostState objects that match the required
scheduling constraints for the request spec object and have been sorted
diff --git a/nova/tests/unit/scheduler/test_filter_scheduler.py b/nova/tests/unit/scheduler/test_filter_scheduler.py
index 2dcec4abee..1d40cc6315 100644
--- a/nova/tests/unit/scheduler/test_filter_scheduler.py
+++ b/nova/tests/unit/scheduler/test_filter_scheduler.py
@@ -20,6 +20,8 @@ import mock
from nova import exception
from nova import objects
+from nova.scheduler import client
+from nova.scheduler.client import report
from nova.scheduler import filter_scheduler
from nova.scheduler import host_manager
from nova.scheduler import utils as scheduler_utils
@@ -34,11 +36,27 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
driver_cls = filter_scheduler.FilterScheduler
+ @mock.patch('nova.scheduler.client.SchedulerClient')
+ def setUp(self, mock_client):
+ pc_client = mock.Mock(spec=report.SchedulerReportClient)
+ sched_client = mock.Mock(spec=client.SchedulerClient)
+ sched_client.reportclient = pc_client
+ mock_client.return_value = sched_client
+ self.placement_client = pc_client
+ super(FilterSchedulerTestCase, self).setUp()
+
+ @mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
+ '_claim_resources')
@mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
'_get_all_host_states')
@mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
'_get_sorted_hosts')
- def test_schedule(self, mock_get_hosts, mock_get_all_states):
+ def test_schedule_placement_bad_comms(self, mock_get_hosts,
+ mock_get_all_states, mock_claim):
+ """If there was a problem communicating with the Placement service,
+ alloc_reqs_by_rp_uuid will be None and we need to avoid trying to claim
+ in the Placement API.
+ """
spec_obj = objects.RequestSpec(
num_instances=1,
flavor=objects.Flavor(memory_mb=512,
@@ -50,11 +68,60 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
instance_group=None)
host_state = mock.Mock(spec=host_manager.HostState,
- host=mock.sentinel.host)
+ host=mock.sentinel.host, uuid=uuids.cn1)
all_host_states = [host_state]
mock_get_all_states.return_value = all_host_states
mock_get_hosts.return_value = all_host_states
- instance_uuids = [uuids.instance]
+
+ instance_uuids = None
+ ctx = mock.Mock()
+ selected_hosts = self.driver._schedule(ctx, spec_obj,
+ instance_uuids, None, mock.sentinel.provider_summaries)
+
+ mock_get_all_states.assert_called_once_with(
+ ctx.elevated.return_value, spec_obj,
+ mock.sentinel.provider_summaries)
+ mock_get_hosts.assert_called_once_with(spec_obj, all_host_states, 0)
+
+ self.assertEqual(len(selected_hosts), 1)
+ self.assertEqual([host_state], selected_hosts)
+
+ # Ensure that we have consumed the resources on the chosen host states
+ host_state.consume_from_request.assert_called_once_with(spec_obj)
+
+ # And ensure we never called _claim_resources()
+ self.assertFalse(mock_claim.called)
+
+ @mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
+ '_claim_resources')
+ @mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
+ '_get_all_host_states')
+ @mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
+ '_get_sorted_hosts')
+ def test_schedule_old_conductor(self, mock_get_hosts,
+ mock_get_all_states, mock_claim):
+ """Old conductor can call scheduler without the instance_uuids
+ parameter. When this happens, we need to ensure we do not attempt to
+ claim resources in the placement API since obviously we need instance
+ UUIDs to perform those claims.
+ """
+ spec_obj = objects.RequestSpec(
+ num_instances=1,
+ flavor=objects.Flavor(memory_mb=512,
+ root_gb=512,
+ ephemeral_gb=0,
+ swap=0,
+ vcpus=1),
+ project_id=uuids.project_id,
+ instance_group=None)
+
+ host_state = mock.Mock(spec=host_manager.HostState,
+ host=mock.sentinel.host, uuid=uuids.cn1)
+ all_host_states = [host_state]
+ mock_get_all_states.return_value = all_host_states
+ mock_get_hosts.return_value = all_host_states
+
+ instance_uuids = None
ctx = mock.Mock()
selected_hosts = self.driver._schedule(ctx, spec_obj,
instance_uuids, mock.sentinel.alloc_reqs_by_rp_uuid,
@@ -71,12 +138,161 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
# Ensure that we have consumed the resources on the chosen host states
host_state.consume_from_request.assert_called_once_with(spec_obj)
+ # And ensure we never called _claim_resources()
+ self.assertFalse(mock_claim.called)
+
+ @mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
+ '_claim_resources')
+ @mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
+ '_get_all_host_states')
+ @mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
+ '_get_sorted_hosts')
+ def test_schedule_successful_claim(self, mock_get_hosts,
+ mock_get_all_states, mock_claim):
+ spec_obj = objects.RequestSpec(
+ num_instances=1,
+ flavor=objects.Flavor(memory_mb=512,
+ root_gb=512,
+ ephemeral_gb=0,
+ swap=0,
+ vcpus=1),
+ project_id=uuids.project_id,
+ instance_group=None)
+
+ host_state = mock.Mock(spec=host_manager.HostState,
+ host=mock.sentinel.host, uuid=uuids.cn1)
+ all_host_states = [host_state]
+ mock_get_all_states.return_value = all_host_states
+ mock_get_hosts.return_value = all_host_states
+ mock_claim.return_value = True
+
+ instance_uuids = [uuids.instance]
+ alloc_reqs_by_rp_uuid = {
+ uuids.cn1: [mock.sentinel.alloc_req],
+ }
+ ctx = mock.Mock()
+ selected_hosts = self.driver._schedule(ctx, spec_obj,
+ instance_uuids, alloc_reqs_by_rp_uuid,
+ mock.sentinel.provider_summaries)
+
+ mock_get_all_states.assert_called_once_with(
+ ctx.elevated.return_value, spec_obj,
+ mock.sentinel.provider_summaries)
+ mock_get_hosts.assert_called_once_with(spec_obj, all_host_states, 0)
+ mock_claim.assert_called_once_with(ctx.elevated.return_value, spec_obj,
+ uuids.instance, [mock.sentinel.alloc_req])
+
+ self.assertEqual(len(selected_hosts), 1)
+ self.assertEqual([host_state], selected_hosts)
+
+ # Ensure that we have consumed the resources on the chosen host states
+ host_state.consume_from_request.assert_called_once_with(spec_obj)
+
+ @mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
+ '_cleanup_allocations')
+ @mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
+ '_claim_resources')
+ @mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
+ '_get_all_host_states')
+ @mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
+ '_get_sorted_hosts')
+ def test_schedule_unsuccessful_claim(self, mock_get_hosts,
+ mock_get_all_states, mock_claim, mock_cleanup):
+ """Tests that we return an empty list if we are unable to successfully
+ claim resources for the instance
+ """
+ spec_obj = objects.RequestSpec(
+ num_instances=1,
+ flavor=objects.Flavor(memory_mb=512,
+ root_gb=512,
+ ephemeral_gb=0,
+ swap=0,
+ vcpus=1),
+ project_id=uuids.project_id,
+ instance_group=None)
+
+ host_state = mock.Mock(spec=host_manager.HostState,
+ host=mock.sentinel.host, uuid=uuids.cn1)
+ all_host_states = [host_state]
+ mock_get_all_states.return_value = all_host_states
+ mock_get_hosts.return_value = all_host_states
+ mock_claim.return_value = False
+
+ instance_uuids = [uuids.instance]
+ alloc_reqs_by_rp_uuid = {
+ uuids.cn1: [mock.sentinel.alloc_req],
+ }
+ ctx = mock.Mock()
+ selected_hosts = self.driver._schedule(ctx, spec_obj,
+ instance_uuids, alloc_reqs_by_rp_uuid,
+ mock.sentinel.provider_summaries)
+
+ mock_get_all_states.assert_called_once_with(
+ ctx.elevated.return_value, spec_obj,
+ mock.sentinel.provider_summaries)
+ mock_get_hosts.assert_called_once_with(spec_obj, all_host_states, 0)
+ mock_claim.assert_called_once_with(ctx.elevated.return_value, spec_obj,
+ uuids.instance, [mock.sentinel.alloc_req])
+
+ self.assertEqual([], selected_hosts)
+
+ mock_cleanup.assert_called_once_with([])
+
+ # Ensure that we have consumed the resources on the chosen host states
+ self.assertFalse(host_state.consume_from_request.called)
+
+ @mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
+ '_cleanup_allocations')
+ @mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
+ '_claim_resources')
+ @mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
+ '_get_all_host_states')
+ @mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
+ '_get_sorted_hosts')
+ def test_schedule_not_all_instance_clean_claimed(self, mock_get_hosts,
+ mock_get_all_states, mock_claim, mock_cleanup):
+ """Tests that we clean up previously-allocated instances if not all
+ instances could be scheduled
+ """
+ spec_obj = objects.RequestSpec(
+ num_instances=2,
+ flavor=objects.Flavor(memory_mb=512,
+ root_gb=512,
+ ephemeral_gb=0,
+ swap=0,
+ vcpus=1),
+ project_id=uuids.project_id,
+ instance_group=None)
+
+ host_state = mock.Mock(spec=host_manager.HostState,
+ host=mock.sentinel.host, uuid=uuids.cn1)
+ all_host_states = [host_state]
+ mock_get_all_states.return_value = all_host_states
+ mock_get_hosts.side_effect = [
+ all_host_states, # first return all the hosts (only one)
+ [], # then act as if no more hosts were found that meet criteria
+ ]
+ mock_claim.return_value = True
+
+ instance_uuids = [uuids.instance1, uuids.instance2]
+ alloc_reqs_by_rp_uuid = {
+ uuids.cn1: [mock.sentinel.alloc_req],
+ }
+ ctx = mock.Mock()
+ self.driver._schedule(ctx, spec_obj, instance_uuids,
+ alloc_reqs_by_rp_uuid, mock.sentinel.provider_summaries)
+
+ # Ensure we cleaned up the first successfully-claimed instance
+ mock_cleanup.assert_called_once_with([uuids.instance1])
+
+ @mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
+ '_claim_resources')
@mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
'_get_all_host_states')
@mock.patch('nova.scheduler.filter_scheduler.FilterScheduler.'
'_get_sorted_hosts')
def test_schedule_instance_group(self, mock_get_hosts,
- mock_get_all_states):
+ mock_get_all_states, mock_claim):
"""Test that since the request spec object contains an instance group
object, that upon choosing a host in the primary schedule loop,
that we update the request spec's instance group information
@@ -93,10 +309,18 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
project_id=uuids.project_id,
instance_group=ig)
- hs1 = mock.Mock(spec=host_manager.HostState, host='host1')
- hs2 = mock.Mock(spec=host_manager.HostState, host='host2')
+ hs1 = mock.Mock(spec=host_manager.HostState, host='host1',
+ uuid=uuids.cn1)
+ hs2 = mock.Mock(spec=host_manager.HostState, host='host2',
+ uuid=uuids.cn2)
all_host_states = [hs1, hs2]
mock_get_all_states.return_value = all_host_states
+ mock_claim.return_value = True
+
+ alloc_reqs_by_rp_uuid = {
+ uuids.cn1: [mock.sentinel.alloc_req_cn1],
+ uuids.cn2: [mock.sentinel.alloc_req_cn2],
+ }
# Simulate host 1 and host 2 being randomly returned first by
# _get_sorted_hosts() in the two iterations for each instance in
@@ -107,8 +331,17 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
]
ctx = mock.Mock()
self.driver._schedule(ctx, spec_obj, instance_uuids,
- mock.sentinel.alloc_reqs_by_rp_uuid,
- mock.sentinel.provider_summaries)
+ alloc_reqs_by_rp_uuid, mock.sentinel.provider_summaries)
+
+ # Check that we called _claim_resources() for both the first and second
+ # host state
+ claim_calls = [
+ mock.call(ctx.elevated.return_value, spec_obj,
+ uuids.instance0, [mock.sentinel.alloc_req_cn2]),
+ mock.call(ctx.elevated.return_value, spec_obj,
+ uuids.instance1, [mock.sentinel.alloc_req_cn1]),
+ ]
+ mock_claim.assert_has_calls(claim_calls)
# Check that _get_sorted_hosts() is called twice and that the
# second time, we pass it the hosts that were returned from
@@ -218,6 +451,40 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
# weighed hosts and thus return [hs1, hs2]
self.assertEqual([hs1, hs2], results)
+ def test_cleanup_allocations(self):
+ instance_uuids = []
+ # Check we don't do anything if there's no instance UUIDs to cleanup
+ # allocations for
+ pc = self.placement_client
+
+ self.driver._cleanup_allocations(instance_uuids)
+ self.assertFalse(pc.delete_allocation_for_instance.called)
+
+ instance_uuids = [uuids.instance1, uuids.instance2]
+ self.driver._cleanup_allocations(instance_uuids)
+
+ exp_calls = [mock.call(uuids.instance1), mock.call(uuids.instance2)]
+ pc.delete_allocation_for_instance.assert_has_calls(exp_calls)
+
+ def test_claim_resources(self):
+ """Tests that when _schedule() calls _claim_resources(), that we
+ appropriately call the placement client to claim resources for the
+ instance.
+ """
+ ctx = mock.Mock(user_id=uuids.user_id)
+ spec_obj = mock.Mock(project_id=uuids.project_id)
+ instance_uuid = uuids.instance
+ alloc_reqs = [mock.sentinel.alloc_req]
+
+ res = self.driver._claim_resources(ctx, spec_obj, instance_uuid,
+ alloc_reqs)
+
+ pc = self.placement_client
+ pc.claim_resources.return_value = True
+ pc.claim_resources.assert_called_once_with(uuids.instance,
+ mock.sentinel.alloc_req, uuids.project_id, uuids.user_id)
+ self.assertTrue(res)
+
def test_add_retry_host(self):
retry = dict(num_attempts=1, hosts=[])
filter_properties = dict(retry=retry)
diff --git a/releasenotes/notes/placement-claims-844540aa7bf52b33.yaml b/releasenotes/notes/placement-claims-844540aa7bf52b33.yaml
new file mode 100644
index 0000000000..b065198359
--- /dev/null
+++ b/releasenotes/notes/placement-claims-844540aa7bf52b33.yaml
@@ -0,0 +1,14 @@
+---
+other:
+ - |
+ The filter scheduler will now attempt to claim a number of
+ resources in the placement API after determining a list of
+ potential hosts. We attempt to claim these resources for each instance
+ in the build request, and if a claim does not succeed, we try this
+ claim against the next potential host the scheduler selected. This
+ claim retry process can potentially attempt claims against a large
+ number of hosts, and we do not limit the number of hosts to attempt
+ claims against. Claims for resources may fail due to another scheduler
+ process concurrently claiming resources against the same compute node.
+ This concurrent resource claim is normal and the retry of a claim
+ request should be unusual but harmless.