summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.zuul.yaml13
-rw-r--r--doc/source/admin/availability-zones.rst2
-rw-r--r--doc/source/admin/scheduling.rst31
-rw-r--r--doc/source/reference/isolate-aggregates.rst2
-rw-r--r--nova/compute/manager.py14
-rw-r--r--nova/compute/resource_tracker.py38
-rw-r--r--nova/conf/scheduler.py43
-rw-r--r--nova/scheduler/weights/hypervisor_version.py39
-rw-r--r--nova/tests/unit/compute/test_compute.py19
-rw-r--r--nova/tests/unit/compute/test_compute_mgr.py43
-rw-r--r--nova/tests/unit/compute/test_resource_tracker.py19
-rw-r--r--nova/tests/unit/compute/test_shelve.py4
-rw-r--r--nova/tests/unit/scheduler/weights/test_weights_hypervisor_version.py97
-rw-r--r--releasenotes/notes/hypervisor-version-weigher-d0bba77e720edafe.yaml20
14 files changed, 325 insertions, 59 deletions
diff --git a/.zuul.yaml b/.zuul.yaml
index abe4d2fa4a..9c41476e68 100644
--- a/.zuul.yaml
+++ b/.zuul.yaml
@@ -658,15 +658,6 @@
image_conversion:
output_format: raw
-# TODO(gmann): As per the 2023.1 testing runtime, we need to run at least
-# one job on Focal. This job can be removed as per the future testing
-# runtime (whenever we drop the Ubuntu Focal testing).
-- job:
- name: tempest-integrated-compute-ubuntu-focal
- description: This is integrated compute job testing on Ubuntu Focal(20.04)
- parent: tempest-integrated-compute
- nodeset: openstack-single-node-focal
-
# TODO(gmann): Remove this jobs once all the required services for intergrate
# compute gate (Cinder, Glance, Neutron) by default enable scope and new
# defaults which means all the nova jobs will be tested with new RBAC in
@@ -753,8 +744,6 @@
- ^setup.cfg$
- ^tools/.*$
- ^tox.ini$
- - tempest-integrated-compute-ubuntu-focal:
- irrelevant-files: *policies-irrelevant-files
- tempest-integrated-compute-enforce-scope-new-defaults:
irrelevant-files: *policies-irrelevant-files
- grenade-skip-level-always:
@@ -790,8 +779,6 @@
- ^(?!nova/network/.*)(?!nova/virt/libvirt/vif.py).*$
- tempest-integrated-compute:
irrelevant-files: *policies-irrelevant-files
- - tempest-integrated-compute-ubuntu-focal:
- irrelevant-files: *policies-irrelevant-files
- tempest-integrated-compute-enforce-scope-new-defaults:
irrelevant-files: *policies-irrelevant-files
- grenade-skip-level-always:
diff --git a/doc/source/admin/availability-zones.rst b/doc/source/admin/availability-zones.rst
index aff8a0ab31..28c4451b60 100644
--- a/doc/source/admin/availability-zones.rst
+++ b/doc/source/admin/availability-zones.rst
@@ -39,7 +39,7 @@ when comparing availability zones and host aggregates:
The use of the default availability zone name in requests can be very
error-prone. Since the user can see the list of availability zones, they
have no way to know whether the default availability zone name (currently
- ``nova``) is provided because an host belongs to an aggregate whose AZ
+ ``nova``) is provided because a host belongs to an aggregate whose AZ
metadata key is set to ``nova``, or because there is at least one host
not belonging to any aggregate. Consequently, it is highly recommended
for users to never ever ask for booting an instance by specifying an
diff --git a/doc/source/admin/scheduling.rst b/doc/source/admin/scheduling.rst
index 9071c92ac9..353514ab55 100644
--- a/doc/source/admin/scheduling.rst
+++ b/doc/source/admin/scheduling.rst
@@ -1049,6 +1049,37 @@ Otherwise, it will fall back to the
more than one value is found for a host in aggregate metadata, the minimum
value will be used.
+``HypervisorVersionWeigher``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 28.0.0 (Bobcat)
+
+Weigh hosts by their relative hypervisor version reported by the virt driver.
+
+While the hypervisor_version filed for all virt drivers is an int,
+each nova virt driver uses a different algorithm to convert the hypervisor-specific
+version sequence into an int. As such the values are not directly comparable between
+hosts with different hypervisors.
+
+For example, the ironic virt driver uses the ironic API micro-version as the hypervisor
+version for a given node. The libvirt driver uses the libvirt version
+i.e. Libvirt `7.1.123` becomes `700100123` vs Ironic `1.82` becomes `1`
+Hyper-V `6.3` becomes `6003`.
+
+If you have a mixed virt driver deployment in the ironic vs non-ironic
+case nothing special needs to be done. ironic nodes are scheduled using custom
+resource classes so ironic flavors will never match non-ironic compute nodes.
+
+If a deployment has multiple non-ironic virt drivers it is recommended to use aggregates
+to group hosts by virt driver. While this is not strictly required, it is
+desirable to avoid bias towards one virt driver.
+see :ref:`filtering_hosts_by_isolating_aggregates` and :ref:`AggregateImagePropertiesIsolation`
+for more information.
+
+The default behavior of the HypervisorVersionWeigher is to select newer hosts.
+If you prefer to invert the behavior set the
+:oslo.config:option:`filter_scheduler.hypervisor_version_weight_multiplier` option
+to a negative number and the weighing has the opposite effect of the default.
Utilization-aware scheduling
----------------------------
diff --git a/doc/source/reference/isolate-aggregates.rst b/doc/source/reference/isolate-aggregates.rst
index f5487df912..7b493f4db9 100644
--- a/doc/source/reference/isolate-aggregates.rst
+++ b/doc/source/reference/isolate-aggregates.rst
@@ -13,6 +13,8 @@
License for the specific language governing permissions and limitations
under the License.
+.. _filtering_hosts_by_isolating_aggregates:
+
Filtering hosts by isolating aggregates
=======================================
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 5ea71827fc..5c42aa4d89 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -3791,9 +3791,21 @@ class ComputeManager(manager.Manager):
try:
compute_node = self._get_compute_info(context, self.host)
scheduled_node = compute_node.hypervisor_hostname
- except exception.ComputeHostNotFound:
+ except exception.ComputeHostNotFound as e:
+ # This means we were asked to rebuild one of our own
+ # instances, or another instance as a target of an
+ # evacuation, but we are unable to find a matching compute
+ # node.
LOG.exception('Failed to get compute_info for %s',
self.host)
+ self._set_migration_status(migration, 'failed')
+ self._notify_instance_rebuild_error(context, instance, e,
+ bdms)
+ raise exception.InstanceFaultRollback(
+ inner_exception=exception.BuildAbortException(
+ instance_uuid=instance.uuid,
+ reason=e.format_message()))
+
else:
scheduled_node = instance.node
diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py
index 3f911f3708..9ee6670c17 100644
--- a/nova/compute/resource_tracker.py
+++ b/nova/compute/resource_tracker.py
@@ -146,16 +146,20 @@ class ResourceTracker(object):
during the instance build.
"""
if self.disabled(nodename):
- # instance_claim() was called before update_available_resource()
- # (which ensures that a compute node exists for nodename). We
- # shouldn't get here but in case we do, just set the instance's
- # host and nodename attribute (probably incorrect) and return a
- # NoopClaim.
- # TODO(jaypipes): Remove all the disabled junk from the resource
- # tracker. Servicegroup API-level active-checking belongs in the
- # nova-compute manager.
- self._set_instance_host_and_node(instance, nodename)
- return claims.NopClaim()
+ # If we get here, it means we are trying to claim for an instance
+ # that was scheduled to a node that we do not have in our list,
+ # or is in some other way unmanageable by this node. This would
+ # mean that we are unable to account for resources, create
+ # allocations in placement, or do any of the other accounting
+ # necessary for this to work. In the past, this situation was
+ # effectively ignored silently, but in a world where we track
+ # resources with placement and instance assignment to compute nodes
+ # by service, we can no longer be leaky.
+ raise exception.ComputeResourcesUnavailable(
+ ('Attempt to claim resources for instance %(inst)s '
+ 'on unknown node %(node)s failed') % {
+ 'inst': instance.uuid,
+ 'node': nodename})
# sanity checks:
if instance.host:
@@ -280,9 +284,17 @@ class ResourceTracker(object):
context, instance, new_flavor, nodename, move_type)
if self.disabled(nodename):
- # compute_driver doesn't support resource tracking, just
- # generate the migration record and continue the resize:
- return claims.NopClaim(migration=migration)
+ # This means we were asked to accept an incoming migration to a
+ # node that we do not own or track. We really should not get here,
+ # but if we do, we must refuse to continue with the migration
+ # process, since we cannot account for those resources, create
+ # allocations in placement, etc. This has been a silent resource
+ # leak in the past, but it must be a hard failure now.
+ raise exception.ComputeResourcesUnavailable(
+ ('Attempt to claim move resources for instance %(inst)s on '
+ 'unknown node %(node)s failed') % {
+ 'inst': instance.uuid,
+ 'node': 'nodename'})
cn = self.compute_nodes[nodename]
diff --git a/nova/conf/scheduler.py b/nova/conf/scheduler.py
index c75bd07c5b..c7aa2ad76d 100644
--- a/nova/conf/scheduler.py
+++ b/nova/conf/scheduler.py
@@ -464,6 +464,49 @@ Possible values:
* An integer or float value, where the value corresponds to the multipler
ratio for this weigher.
"""),
+ cfg.FloatOpt("hypervisor_version_weight_multiplier",
+ default=1.0,
+ help="""
+Hypervisor Version weight multiplier ratio.
+
+The multiplier is used for weighting hosts based on the reported
+hypervisor version.
+Negative numbers indicate preferring older hosts,
+the default is to prefer newer hosts to aid with upgrades.
+
+Possible values:
+
+* An integer or float value, where the value corresponds to the multiplier
+ ratio for this weigher.
+
+Example:
+
+* Strongly prefer older hosts
+
+ .. code-block:: ini
+
+ [filter_scheduler]
+ hypervisor_version_weight_multiplier=-1000
+
+
+* Moderately prefer new hosts
+
+ .. code-block:: ini
+
+ [filter_scheduler]
+ hypervisor_version_weight_multiplier=2.5
+
+* Disable weigher influence
+
+ .. code-block:: ini
+
+ [filter_scheduler]
+ hypervisor_version_weight_multiplier=0
+
+Related options:
+
+* ``[filter_scheduler] weight_classes``
+"""),
cfg.FloatOpt("io_ops_weight_multiplier",
default=-1.0,
help="""
diff --git a/nova/scheduler/weights/hypervisor_version.py b/nova/scheduler/weights/hypervisor_version.py
new file mode 100644
index 0000000000..0cd7b0a824
--- /dev/null
+++ b/nova/scheduler/weights/hypervisor_version.py
@@ -0,0 +1,39 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+"""
+Hypervisor Version Weigher. Weigh hosts by their relative hypervior version.
+
+The default is to select newer hosts. If you prefer
+to invert the behavior set the 'hypervisor_version_weight_multiplier' option
+to a negative number and the weighing has the opposite effect of the default.
+"""
+
+import nova.conf
+from nova.scheduler import utils
+from nova.scheduler import weights
+
+CONF = nova.conf.CONF
+
+
+class HypervisorVersionWeigher(weights.BaseHostWeigher):
+
+ def weight_multiplier(self, host_state):
+ """Override the weight multiplier."""
+ return utils.get_weight_multiplier(
+ host_state, 'hypervisor_version_weight_multiplier',
+ CONF.filter_scheduler.hypervisor_version_weight_multiplier)
+
+ def _weigh_object(self, host_state, weight_properties):
+ """Higher weights win. We want newer hosts by default."""
+ # convert None to 0
+ return host_state.hypervisor_version or 0
diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py
index f62468544d..36bcd368dc 100644
--- a/nova/tests/unit/compute/test_compute.py
+++ b/nova/tests/unit/compute/test_compute.py
@@ -13515,7 +13515,8 @@ class EvacuateHostTestCase(BaseTestCase):
super(EvacuateHostTestCase, self).tearDown()
def _rebuild(self, on_shared_storage=True, migration=None,
- send_node=False, vm_states_is_stopped=False):
+ send_node=False, vm_states_is_stopped=False,
+ expect_error=False):
network_api = self.compute.network_api
ctxt = context.get_admin_context()
@@ -13562,6 +13563,11 @@ class EvacuateHostTestCase(BaseTestCase):
action='power_off', phase='start'),
mock.call(ctxt, self.inst, self.inst.host,
action='power_off', phase='end')])
+ elif expect_error:
+ mock_notify_rebuild.assert_has_calls([
+ mock.call(ctxt, self.inst, self.compute.host,
+ phase='error', exception=mock.ANY, bdms=bdms)])
+ return
else:
mock_notify_rebuild.assert_has_calls([
mock.call(ctxt, self.inst, self.inst.host, phase='start',
@@ -13616,14 +13622,15 @@ class EvacuateHostTestCase(BaseTestCase):
mock.patch.object(self.compute, '_get_compute_info',
side_effect=fake_get_compute_info)
) as (mock_inst, mock_get):
- self._rebuild()
+ self.assertRaises(exception.InstanceFaultRollback,
+ self._rebuild, expect_error=True)
# Should be on destination host
instance = db.instance_get(self.context, self.inst.id)
- self.assertEqual(instance['host'], self.compute.host)
- self.assertIsNone(instance['node'])
- self.assertTrue(mock_inst.called)
- self.assertTrue(mock_get.called)
+ self.assertEqual('fake_host_2', instance['host'])
+ self.assertEqual('fakenode2', instance['node'])
+ mock_inst.assert_not_called()
+ mock_get.assert_called_once_with(mock.ANY, self.compute.host)
def test_rebuild_on_host_node_passed(self):
patch_get_info = mock.patch.object(self.compute, '_get_compute_info')
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index 1c69cd8f1c..73c9d32197 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -2560,10 +2560,11 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
self.assertFalse(mock_get_info.called)
self.assertFalse(mock_sync_power_state.called)
+ @mock.patch('nova.compute.resource_tracker.ResourceTracker.instance_claim')
@mock.patch('nova.compute.manager.ComputeManager.'
'_sync_instance_power_state')
def test_query_driver_power_state_and_sync_not_found_driver(
- self, mock_sync_power_state):
+ self, mock_sync_power_state, mock_claim):
error = exception.InstanceNotFound(instance_id=1)
with mock.patch.object(self.compute.driver,
'get_info', side_effect=error) as mock_get_info:
@@ -6568,6 +6569,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
fake_rt = fake_resource_tracker.FakeResourceTracker(self.compute.host,
self.compute.driver)
self.compute.rt = fake_rt
+ self.compute.driver._set_nodes([self.node])
+ self.compute.rt.compute_nodes = {self.node: objects.ComputeNode()}
self.allocations = {
uuids.provider1: {
@@ -6857,6 +6860,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
mock_get_arqs.assert_called_once_with(
self.instance.uuid, only_resolved=True)
+ @mock.patch('nova.compute.resource_tracker.ResourceTracker.instance_claim')
@mock.patch.object(fake_driver.FakeDriver, 'spawn')
@mock.patch('nova.objects.Instance.save')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
@@ -6868,7 +6872,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
@mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage')
def test_spawn_called_with_accel_info(self, mock_ins_usage,
mock_ins_create, mock_dev_tag, mock_certs, mock_req_group_map,
- mock_get_allocations, mock_ins_save, mock_spawn):
+ mock_get_allocations, mock_ins_save, mock_spawn, mock_claim):
accel_info = [{'k1': 'v1', 'k2': 'v2'}]
@@ -7142,13 +7146,15 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
self.security_groups, self.block_device_mapping,
request_spec={}, host_lists=[fake_host_list])
+ @mock.patch('nova.compute.resource_tracker.ResourceTracker.instance_claim')
@mock.patch.object(manager.ComputeManager, '_shutdown_instance')
@mock.patch.object(manager.ComputeManager, '_build_networks_for_instance')
@mock.patch.object(fake_driver.FakeDriver, 'spawn')
@mock.patch.object(objects.Instance, 'save')
@mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage')
def test_rescheduled_exception_with_non_ascii_exception(self,
- mock_notify, mock_save, mock_spawn, mock_build, mock_shutdown):
+ mock_notify, mock_save, mock_spawn, mock_build, mock_shutdown,
+ mock_claim):
exc = exception.NovaException(u's\xe9quence')
mock_build.return_value = self.network_info
@@ -7164,7 +7170,6 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
self.accel_uuids)
mock_save.assert_has_calls([
mock.call(),
- mock.call(),
mock.call(expected_task_state='block_device_mapping'),
])
mock_notify.assert_has_calls([
@@ -7670,6 +7675,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
self.assertEqual(10, mock_failed.call_count)
mock_succeeded.assert_not_called()
+ @mock.patch('nova.compute.resource_tracker.ResourceTracker.instance_claim')
@mock.patch.object(manager.ComputeManager, '_shutdown_instance')
@mock.patch.object(manager.ComputeManager, '_build_networks_for_instance')
@mock.patch.object(fake_driver.FakeDriver, 'spawn')
@@ -7677,7 +7683,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
@mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage')
def _test_instance_exception(self, exc, raised_exc,
mock_notify, mock_save, mock_spawn,
- mock_build, mock_shutdown):
+ mock_build, mock_shutdown, mock_claim):
"""This method test the instance related InstanceNotFound
and reschedule on exception errors. The test cases get from
arguments.
@@ -7700,7 +7706,6 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
mock_save.assert_has_calls([
mock.call(),
- mock.call(),
mock.call(expected_task_state='block_device_mapping')])
mock_notify.assert_has_calls([
mock.call(self.context, self.instance, 'create.start',
@@ -7811,11 +7816,12 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
'_shutdown_instance'),
mock.patch.object(self.compute,
'_validate_instance_group_policy'),
+ mock.patch.object(self.compute.rt, 'instance_claim'),
mock.patch('nova.compute.utils.notify_about_instance_create')
) as (spawn, save,
_build_networks_for_instance, _notify_about_instance_usage,
_shutdown_instance, _validate_instance_group_policy,
- mock_notify):
+ mock_claim, mock_notify):
self.assertRaises(exception.BuildAbortException,
self.compute._build_and_run_instance, self.context,
@@ -7846,7 +7852,6 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
save.assert_has_calls([
mock.call(),
- mock.call(),
mock.call(
expected_task_state=task_states.BLOCK_DEVICE_MAPPING)])
@@ -7908,11 +7913,12 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
request_spec={}, host_lists=[fake_host_list])
mock_nil.assert_called_once_with(self.instance)
+ @mock.patch('nova.compute.resource_tracker.ResourceTracker.instance_claim')
@mock.patch.object(manager.ComputeManager, '_build_resources')
@mock.patch.object(objects.Instance, 'save')
@mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage')
def test_build_resources_buildabort_reraise(self, mock_notify, mock_save,
- mock_build):
+ mock_build, mock_claim):
exc = exception.BuildAbortException(
instance_uuid=self.instance.uuid, reason='')
mock_build.side_effect = exc
@@ -7926,7 +7932,6 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
self.node, self.limits, self.filter_properties,
request_spec=[], accel_uuids=self.accel_uuids)
- mock_save.assert_called_once_with()
mock_notify.assert_has_calls([
mock.call(self.context, self.instance, 'create.start',
extra_usage_info={'image_name': self.image.get('name')}),
@@ -8581,10 +8586,11 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
ctxt, instance, req_networks)
warning_mock.assert_not_called()
+ @mock.patch('nova.compute.resource_tracker.ResourceTracker.instance_claim')
@mock.patch('nova.compute.utils.notify_about_instance_create')
@mock.patch.object(manager.ComputeManager, '_instance_update')
def test_launched_at_in_create_end_notification(self,
- mock_instance_update, mock_notify_instance_create):
+ mock_instance_update, mock_notify_instance_create, mock_claim):
def fake_notify(*args, **kwargs):
if args[2] == 'create.end':
@@ -8624,6 +8630,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
self.flags(default_access_ip_network_name='test1')
instance = fake_instance.fake_db_instance()
+ @mock.patch.object(self.compute.rt, 'instance_claim')
@mock.patch.object(db, 'instance_update_and_get_original',
return_value=({}, instance))
@mock.patch.object(self.compute.driver, 'spawn')
@@ -8632,7 +8639,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
@mock.patch.object(db, 'instance_extra_update_by_uuid')
@mock.patch.object(self.compute, '_notify_about_instance_usage')
def _check_access_ip(mock_notify, mock_extra, mock_networks,
- mock_spawn, mock_db_update):
+ mock_spawn, mock_db_update, mock_claim):
self.compute._build_and_run_instance(self.context, self.instance,
self.image, self.injected_files, self.admin_pass,
self.requested_networks, self.security_groups,
@@ -8653,8 +8660,10 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
_check_access_ip()
+ @mock.patch('nova.compute.resource_tracker.ResourceTracker.instance_claim')
@mock.patch.object(manager.ComputeManager, '_instance_update')
- def test_create_error_on_instance_delete(self, mock_instance_update):
+ def test_create_error_on_instance_delete(self, mock_instance_update,
+ mock_claim):
def fake_notify(*args, **kwargs):
if args[2] == 'create.error':
@@ -8668,7 +8677,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
mock.patch.object(self.compute,
'_build_networks_for_instance', return_value=[]),
mock.patch.object(self.instance, 'save',
- side_effect=[None, None, None, exc]),
+ side_effect=[None, None, exc]),
mock.patch.object(self.compute, '_notify_about_instance_usage',
side_effect=fake_notify)
) as (mock_spawn, mock_networks, mock_save, mock_notify):
@@ -8697,7 +8706,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
mock.patch.object(
self.compute, '_build_networks_for_instance', return_value=[]),
mock.patch.object(self.instance, 'save'),
- ) as (mock_spawn, mock_networks, mock_save):
+ mock.patch.object(self.compute.rt, 'instance_claim'),
+ ) as (mock_spawn, mock_networks, mock_save, mock_claim):
self.compute._build_and_run_instance(
self.context,
self.instance, self.image, self.injected_files,
@@ -8747,7 +8757,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
mock.patch.object(self.instance, 'save'),
mock.patch('nova.scheduler.client.report.'
'SchedulerReportClient._get_resource_provider'),
- ) as (mock_spawn, mock_networks, mock_save, mock_get_rp):
+ mock.patch.object(self.compute.rt, 'instance_claim'),
+ ) as (mock_spawn, mock_networks, mock_save, mock_get_rp, mock_claim):
mock_get_rp.return_value = {
'uuid': uuids.rp1,
'name': 'compute1:sriov-agent:ens3'
diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py
index 101e96f83f..919dcb8334 100644
--- a/nova/tests/unit/compute/test_resource_tracker.py
+++ b/nova/tests/unit/compute/test_resource_tracker.py
@@ -2233,14 +2233,19 @@ class TestInstanceClaim(BaseTestCase):
self.rt.compute_nodes = {}
self.assertTrue(self.rt.disabled(_NODENAME))
- with mock.patch.object(self.instance, 'save'):
- claim = self.rt.instance_claim(mock.sentinel.ctx, self.instance,
- _NODENAME, self.allocations, None)
+ # Reset all changes to the instance to make sure that we can detect
+ # any manipulation after the failure.
+ self.instance.obj_reset_changes(recursive=True)
- self.assertEqual(self.rt.host, self.instance.host)
- self.assertEqual(self.rt.host, self.instance.launched_on)
- self.assertEqual(_NODENAME, self.instance.node)
- self.assertIsInstance(claim, claims.NopClaim)
+ with mock.patch.object(self.instance, 'save') as mock_save:
+ self.assertRaises(exc.ComputeResourcesUnavailable,
+ self.rt.instance_claim,
+ mock.sentinel.ctx, self.instance,
+ _NODENAME, self.allocations, None)
+ mock_save.assert_not_called()
+
+ # Make sure the instance was not touched by the failed claim process
+ self.assertEqual(set(), self.instance.obj_what_changed())
@mock.patch('nova.compute.utils.is_volume_backed_instance')
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
diff --git a/nova/tests/unit/compute/test_shelve.py b/nova/tests/unit/compute/test_shelve.py
index 0a1e3f54fc..62321bddec 100644
--- a/nova/tests/unit/compute/test_shelve.py
+++ b/nova/tests/unit/compute/test_shelve.py
@@ -646,7 +646,7 @@ class ShelveComputeManagerTestCase(test_compute.BaseTestCase):
self.compute.unshelve_instance(
self.context, instance, image=None,
- filter_properties={}, node='fake-node', request_spec=request_spec,
+ filter_properties={}, node='fakenode2', request_spec=request_spec,
accel_uuids=[])
mock_update_pci.assert_called_once_with(
@@ -700,7 +700,7 @@ class ShelveComputeManagerTestCase(test_compute.BaseTestCase):
self.assertRaises(test.TestingException,
self.compute.unshelve_instance, self.context, instance,
image=shelved_image, filter_properties={},
- node='fake-node', request_spec=fake_spec, accel_uuids=[])
+ node='fakenode2', request_spec=fake_spec, accel_uuids=[])
self.assertEqual(instance.image_ref, initial_image_ref)
@mock.patch.object(objects.InstanceList, 'get_by_filters')
diff --git a/nova/tests/unit/scheduler/weights/test_weights_hypervisor_version.py b/nova/tests/unit/scheduler/weights/test_weights_hypervisor_version.py
new file mode 100644
index 0000000000..c6e4abd4cd
--- /dev/null
+++ b/nova/tests/unit/scheduler/weights/test_weights_hypervisor_version.py
@@ -0,0 +1,97 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+"""
+Tests For Scheduler hypervisor version weights.
+"""
+
+from nova.scheduler import weights
+from nova.scheduler.weights import hypervisor_version
+from nova import test
+from nova.tests.unit.scheduler import fakes
+
+
+class HypervisorVersionWeigherTestCase(test.NoDBTestCase):
+ def setUp(self):
+ super().setUp()
+ self.weight_handler = weights.HostWeightHandler()
+ self.weighers = [hypervisor_version.HypervisorVersionWeigher()]
+
+ def _get_weighed_host(self, hosts, weight_properties=None):
+ if weight_properties is None:
+ weight_properties = {}
+ return self.weight_handler.get_weighed_objects(self.weighers,
+ hosts, weight_properties)[0]
+
+ def _get_all_hosts(self):
+ host_values = [
+ ('host1', 'node1', {'hypervisor_version': 1}),
+ ('host2', 'node2', {'hypervisor_version': 200}),
+ ('host3', 'node3', {'hypervisor_version': 100}),
+ ('host4', 'node4', {'hypervisor_version': 1000}),
+ ]
+ return [fakes.FakeHostState(host, node, values)
+ for host, node, values in host_values]
+
+ def test_multiplier_default(self):
+ hostinfo_list = self._get_all_hosts()
+ weighed_host = self._get_weighed_host(hostinfo_list)
+ self.assertEqual(1.0, weighed_host.weight)
+ self.assertEqual('host4', weighed_host.obj.host)
+
+ def test_multiplier_default_full_ordering(self):
+ hostinfo_list = self._get_all_hosts()
+ weighed_hosts = self.weight_handler.get_weighed_objects(
+ self.weighers, hostinfo_list, {}
+ )
+ expected_hosts = [fakes.FakeHostState(host, node, values)
+ for host, node, values in [
+ ('host4', 'node4', {'hypervisor_version': 1000}),
+ ('host2', 'node2', {'hypervisor_version': 200}),
+ ('host3', 'node3', {'hypervisor_version': 100}),
+ ('host1', 'node1', {'hypervisor_version': 1}),
+ ]]
+ for actual, expected in zip(
+ weighed_hosts,
+ expected_hosts
+ ):
+ self.assertEqual(actual.obj.host, expected.host)
+
+ def test_multiplier_none(self):
+ multi = 0.0
+ self.flags(
+ hypervisor_version_weight_multiplier=multi,
+ group='filter_scheduler'
+ )
+ hostinfo_list = self._get_all_hosts()
+ weighed_host = self._get_weighed_host(hostinfo_list)
+ self.assertEqual(multi, weighed_host.weight)
+
+ def test_multiplier_positive(self):
+ multi = 2.0
+ self.flags(
+ hypervisor_version_weight_multiplier=multi,
+ group='filter_scheduler'
+ )
+ hostinfo_list = self._get_all_hosts()
+ weighed_host = self._get_weighed_host(hostinfo_list)
+ self.assertEqual(1.0 * multi, weighed_host.weight)
+ self.assertEqual('host4', weighed_host.obj.host)
+
+ def test_multiplier_negative(self):
+ multi = -1.0
+ self.flags(
+ hypervisor_version_weight_multiplier=multi,
+ group='filter_scheduler'
+ )
+ hostinfo_list = self._get_all_hosts()
+ weighed_host = self._get_weighed_host(hostinfo_list)
+ self.assertEqual('host1', weighed_host.obj.host)
diff --git a/releasenotes/notes/hypervisor-version-weigher-d0bba77e720edafe.yaml b/releasenotes/notes/hypervisor-version-weigher-d0bba77e720edafe.yaml
new file mode 100644
index 0000000000..31f2c70926
--- /dev/null
+++ b/releasenotes/notes/hypervisor-version-weigher-d0bba77e720edafe.yaml
@@ -0,0 +1,20 @@
+---
+features:
+ - |
+ A new hypervisor version weigher has been added to prefer selecting hosts
+ with newer hypervisors installed. For the libvirt driver, this is the version
+ of libvirt on the compute node not the version of qemu. As with all
+ weighers this is enabled by default and its behavior can be modified using
+ the new ``hypervisor_version_weight_multiplier`` config option in the
+ ``filter_scheduler`` section.
+upgrade:
+ - |
+ A new hypervisor version weigher has been added that will prefer selecting
+ hosts with a newer hypervisor installed. This can help simplify rolling
+ upgrades by preferring the already upgraded hosts when moving workloads around
+ using live or cold migration. To restore the old behavior either remove
+ the weigher from the list of enabled weighers or set
+ ``[filter_scheduler] hypervisor_version_weight_multiplier=0``. The default
+ value of the hypervisor_version_weight_multiplier is 1 so only a mild
+ preference is given to new hosts, higher values will make the effect
+ more pronounced and negative values will prefer older hosts.