summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJenkins <jenkins@review.openstack.org>2017-08-11 07:06:48 +0000
committerGerrit Code Review <review@openstack.org>2017-08-11 07:06:48 +0000
commit4cd6a3a1b473795a63f076c7986d1b48d83af99d (patch)
tree0095c11d7daa078cfbd4c85f81655b311c6683f4
parentc58d4422d87af8b1e8583972d3eef5c7db327264 (diff)
parentddef9dcfc0f7b8f80c6f67f2bfcaf752558a7788 (diff)
downloadnova-16.0.0.0rc1.tar.gz
Merge "Resource tracker compatibility with Ocata and Pike"16.0.0.0rc1
-rw-r--r--doc/notification_samples/service-update.json2
-rw-r--r--nova/cmd/compute.py2
-rw-r--r--nova/compute/resource_tracker.py75
-rw-r--r--nova/objects/service.py5
-rw-r--r--nova/test.py1
-rw-r--r--nova/tests/functional/compute/test_resource_tracker.py11
-rw-r--r--nova/tests/functional/regressions/test_bug_1679750.py3
-rw-r--r--nova/tests/functional/test_servers.py34
-rw-r--r--nova/tests/unit/compute/test_compute_mgr.py5
-rw-r--r--nova/tests/unit/compute/test_resource_tracker.py41
10 files changed, 136 insertions, 43 deletions
diff --git a/doc/notification_samples/service-update.json b/doc/notification_samples/service-update.json
index e080ab9c65..9e7220e73a 100644
--- a/doc/notification_samples/service-update.json
+++ b/doc/notification_samples/service-update.json
@@ -13,7 +13,7 @@
"disabled_reason": null,
"report_count": 1,
"forced_down": false,
- "version": 21,
+ "version": 22,
"availability_zone": null,
"uuid": "fa69c544-906b-4a6a-a9c6-c1f7a8078c73"
}
diff --git a/nova/cmd/compute.py b/nova/cmd/compute.py
index 53342e2f07..eb48853d45 100644
--- a/nova/cmd/compute.py
+++ b/nova/cmd/compute.py
@@ -52,7 +52,7 @@ def main():
cmd_common.block_db_access('nova-compute')
objects_base.NovaObject.indirection_api = conductor_rpcapi.ConductorAPI()
-
+ objects.Service.enable_min_version_cache()
server = service.Service.create(binary='nova-compute',
topic=compute_rpcapi.RPC_TOPIC)
service.serve(server)
diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py
index 2ab30161ca..994f1ca82f 100644
--- a/nova/compute/resource_tracker.py
+++ b/nova/compute/resource_tracker.py
@@ -1010,7 +1010,7 @@ class ResourceTracker(object):
continue
def _update_usage_from_instance(self, context, instance, nodename,
- is_removed=False):
+ is_removed=False, has_ocata_computes=False):
"""Update usage for a single instance."""
uuid = instance['uuid']
@@ -1038,7 +1038,40 @@ class ResourceTracker(object):
self.pci_tracker.update_pci_for_instance(context,
instance,
sign=sign)
- self.reportclient.update_instance_allocation(cn, instance, sign)
+ if has_ocata_computes:
+ LOG.debug("We're on a Pike compute host in a deployment "
+ "with Ocata compute hosts. Auto-correcting "
+ "allocations to handle Ocata-style assumptions.")
+ self.reportclient.update_instance_allocation(cn, instance,
+ sign)
+ else:
+ # NOTE(jaypipes): We're on a Pike compute host or later in
+ # a deployment with all compute hosts upgraded to Pike or
+ # later
+ #
+ # If that is the case, then we know that the scheduler will
+ # have properly created an allocation and that the compute
+ # hosts have not attempted to overwrite allocations
+ # **during the periodic update_available_resource() call**.
+ # However, Pike compute hosts may still rework an
+ # allocation for an instance in a move operation during
+ # confirm_resize() on the source host which will remove the
+ # source resource provider from any allocation for an
+ # instance.
+ #
+ # In Queens and beyond, the scheduler will understand when
+ # a move operation has been requested and instead of
+ # creating a doubled-up allocation that contains both the
+ # source and destination host, the scheduler will take the
+ # original allocation (against the source host) and change
+ # the consumer ID of that allocation to be the migration
+ # UUID and not the instance UUID. The scheduler will
+ # allocate the resources for the destination host to the
+ # instance UUID.
+ LOG.debug("We're on a Pike compute host in a deployment "
+ "with all Pike compute hosts. Skipping "
+ "auto-correction of allocations.")
+
# new instance, update compute node resource usage:
self._update_usage(self._get_usage_dict(instance), nodename,
sign=sign)
@@ -1068,9 +1101,44 @@ class ResourceTracker(object):
cn.current_workload = 0
cn.running_vms = 0
+ # NOTE(jaypipes): In Pike, we need to be tolerant of Ocata compute
+ # nodes that overwrite placement allocations to look like what the
+ # resource tracker *thinks* is correct. When an instance is
+ # migrated from an Ocata compute node to a Pike compute node, the
+ # Pike scheduler will have created a "doubled-up" allocation that
+ # contains allocated resources against both the source and
+ # destination hosts. The Ocata source compute host, during its
+ # update_available_resource() periodic call will find the instance
+ # in its list of known instances and will call
+ # update_instance_allocation() in the report client. That call will
+ # pull the allocations for the instance UUID which will contain
+ # both the source and destination host providers in the allocation
+ # set. Seeing that this is different from what the Ocata source
+ # host thinks it should be and will overwrite the allocation to
+ # only be an allocation against itself.
+ #
+ # And therefore, here we need to have Pike compute hosts
+ # "correct" the improper healing that the Ocata source host did
+ # during its periodic interval. When the instance is fully migrated
+ # to the Pike compute host, the Ocata compute host will find an
+ # allocation that refers to itself for an instance it no longer
+ # controls and will *delete* all allocations that refer to that
+ # instance UUID, assuming that the instance has been deleted. We
+ # need the destination Pike compute host to recreate that
+ # allocation to refer to its own resource provider UUID.
+ #
+ # For Pike compute nodes that migrate to either a Pike compute host
+ # or a Queens compute host, we do NOT want the Pike compute host to
+ # be "healing" allocation information. Instead, we rely on the Pike
+ # scheduler to properly create allocations during scheduling.
+ compute_version = objects.Service.get_minimum_version(
+ context, 'nova-compute')
+ has_ocata_computes = compute_version < 22
+
for instance in instances:
if instance.vm_state not in vm_states.ALLOW_RESOURCE_REMOVAL:
- self._update_usage_from_instance(context, instance, nodename)
+ self._update_usage_from_instance(context, instance, nodename,
+ has_ocata_computes=has_ocata_computes)
self._remove_deleted_instances_allocations(context, cn)
@@ -1141,7 +1209,6 @@ class ResourceTracker(object):
"There are allocations remaining against the source "
"host that might need to be removed: %s.",
instance_uuid, instance.host, instance.node, alloc)
- continue
def _find_orphaned_instances(self):
"""Given the set of instances and migrations already account for
diff --git a/nova/objects/service.py b/nova/objects/service.py
index 6a7c8ea8a5..ff9058910b 100644
--- a/nova/objects/service.py
+++ b/nova/objects/service.py
@@ -32,7 +32,7 @@ LOG = logging.getLogger(__name__)
# NOTE(danms): This is the global service version counter
-SERVICE_VERSION = 21
+SERVICE_VERSION = 22
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@@ -110,6 +110,9 @@ SERVICE_VERSION_HISTORY = (
{'compute_rpc': '4.16'},
# Version 21: Compute RPC version 4.17
{'compute_rpc': '4.17'},
+ # Version 22: A marker for the behaviour change of auto-healing code on the
+ # compute host regarding allocations against an instance
+ {'compute_rpc': '4.17'},
)
diff --git a/nova/test.py b/nova/test.py
index c4fc62a10f..24d65105db 100644
--- a/nova/test.py
+++ b/nova/test.py
@@ -260,6 +260,7 @@ class TestCase(testtools.TestCase):
self._base_test_obj_backup = copy.copy(
objects_base.NovaObjectRegistry._registry._obj_classes)
self.addCleanup(self._restore_obj_registry)
+ objects.Service.clear_min_version_cache()
# NOTE(danms): Reset the cached list of cells
from nova.compute import api
diff --git a/nova/tests/functional/compute/test_resource_tracker.py b/nova/tests/functional/compute/test_resource_tracker.py
index ee7d98e472..9559ceaee2 100644
--- a/nova/tests/functional/compute/test_resource_tracker.py
+++ b/nova/tests/functional/compute/test_resource_tracker.py
@@ -290,12 +290,17 @@ class IronicResourceTrackerTest(test.TestCase):
self.assertEqual(3, len(inv))
# Now "spawn" an instance to the first compute node by calling the
- # RT's instance_claim(), which should, in the case of an Ironic
- # instance, grab the full compute node for the instance and write
- # allocation records for VCPU, MEMORY_MB, and DISK_GB
+ # RT's instance_claim().
cn1_obj = self.COMPUTE_NODE_FIXTURES[uuids.cn1]
cn1_nodename = cn1_obj.hypervisor_hostname
inst = self.INSTANCE_FIXTURES[uuids.instance1]
+ # Since we're pike, the scheduler would have created our
+ # allocation for us. So, we can use our old update routine
+ # here to mimic that before we go do the compute RT claim,
+ # and then the checks below.
+ self.rt.reportclient.update_instance_allocation(cn1_obj,
+ inst,
+ 1)
with self.rt.instance_claim(self.ctx, inst, cn1_nodename):
pass
diff --git a/nova/tests/functional/regressions/test_bug_1679750.py b/nova/tests/functional/regressions/test_bug_1679750.py
index 6cbaf9934f..b93ce5107c 100644
--- a/nova/tests/functional/regressions/test_bug_1679750.py
+++ b/nova/tests/functional/regressions/test_bug_1679750.py
@@ -41,7 +41,8 @@ class TestLocalDeleteAllocations(test.TestCase,
self.start_service('conductor')
self.start_service('consoleauth')
- self.flags(group='scheduler', driver='chance_scheduler')
+ self.flags(enabled_filters=['RetryFilter', 'ComputeFilter'],
+ group='filter_scheduler')
self.start_service('scheduler')
self.compute = self.start_service('compute')
diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py
index 503c9da25b..7c2433dc6f 100644
--- a/nova/tests/functional/test_servers.py
+++ b/nova/tests/functional/test_servers.py
@@ -1081,11 +1081,7 @@ class ServerMovingTests(test.TestCase, integrated_helpers.InstanceHelperMixin):
self.useFixture(policy_fixture.RealPolicyFixture())
self.useFixture(nova_fixtures.NeutronFixture(self))
-
- # NOTE(gibi): After fix 1707071 we need to set the service version to
- # pike to test pike only interactions. We need separate tests for
- # ocata - pike interactions
- # self.useFixture(nova_fixtures.AllServicesCurrent())
+ self.useFixture(nova_fixtures.AllServicesCurrent())
placement = self.useFixture(nova_fixtures.PlacementFixture())
self.placement_api = placement.api
@@ -1308,7 +1304,6 @@ class ServerMovingTests(test.TestCase, integrated_helpers.InstanceHelperMixin):
source_allocation = allocations[source_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(old_flavor, source_allocation)
- self.assertEqual(2, len(allocations))
dest_allocation = allocations[dest_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(new_flavor, dest_allocation)
@@ -1394,6 +1389,7 @@ class ServerMovingTests(test.TestCase, integrated_helpers.InstanceHelperMixin):
# destination host
allocations = self._get_allocations_by_server_uuid(server['id'])
+ # and the server allocates only from the target host
self.assertEqual(1, len(allocations))
source_usages = self._get_provider_usages(source_rp_uuid)
@@ -1407,6 +1403,12 @@ class ServerMovingTests(test.TestCase, integrated_helpers.InstanceHelperMixin):
'The source host %s still has usages after the '
'resize has been confirmed' % source_hostname)
+ # and the target host allocation should be according to the new flavor
+ self.assertFlavorMatchesAllocation(self.flavor2, dest_usages)
+
+ dest_allocation = allocations[dest_rp_uuid]['resources']
+ self.assertFlavorMatchesAllocation(self.flavor2, dest_allocation)
+
self._run_periodics()
# Check we're still accurate after running the periodics
@@ -1462,29 +1464,13 @@ class ServerMovingTests(test.TestCase, integrated_helpers.InstanceHelperMixin):
usages = self._get_provider_usages(rp_uuid)
- # NOTE(gibi): This is bug 1707071 where the compute "healing" periodic
- # tramples on the doubled allocations created in the scheduler.
- self.assertFlavorMatchesAllocation(new_flavor, usages)
-
- # NOTE(gibi): After fixing bug 1707252 the following is expected
- # self.assertEqual(old_flavor['vcpus'] + new_flavor['vcpus'],
- # usages['VCPU'])
- # self.assertEqual(old_flavor['ram'] + new_flavor['ram'],
- # usages['MEMORY_MB'])
- # self.assertEqual(old_flavor['disk'] + new_flavor['disk'],
- # usages['DISK_GB'])
+ self.assertFlavorsMatchAllocation(old_flavor, new_flavor, usages)
allocations = self._get_allocations_by_server_uuid(server['id'])
self.assertEqual(1, len(allocations))
allocation = allocations[rp_uuid]['resources']
- # NOTE(gibi): After fixing bug 1707252 the following is expected
- # self.assertEqual(old_flavor['vcpus'] + new_flavor['vcpus'],
- # allocation['VCPU'])
- # self.assertEqual(old_flavor['ram'] + new_flavor['ram'],
- # allocation['MEMORY_MB'])
- # self.assertEqual(old_flavor['disk'] + new_flavor['disk'],
- # allocation['DISK_GB'])
+ self.assertFlavorsMatchAllocation(old_flavor, new_flavor, allocation)
def test_resize_revert_same_host(self):
# make sure that the test only uses a single host
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index a66182d067..beb955d2ab 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -5643,6 +5643,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
self.instance, self.instance.node)
self.assertIsNotNone(self.instance.migration_context)
+ @mock.patch('nova.objects.Service.get_minimum_version',
+ return_value=22)
@mock.patch.object(self.compute, "_notify_about_instance_usage")
@mock.patch.object(self.compute, "_set_instance_info")
@mock.patch.object(self.instance, 'save')
@@ -5665,7 +5667,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
mock_mig_save,
mock_inst_save,
mock_set,
- mock_notify):
+ mock_notify,
+ mock_version):
self.compute.finish_revert_resize(context=self.context,
instance=self.instance,
reservations=None,
diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py
index 6c456b7e4f..e2c726c2bc 100644
--- a/nova/tests/unit/compute/test_resource_tracker.py
+++ b/nova/tests/unit/compute/test_resource_tracker.py
@@ -31,6 +31,7 @@ from nova.objects import base as obj_base
from nova.objects import fields as obj_fields
from nova.objects import pci_device
from nova.pci import manager as pci_manager
+from nova.scheduler import utils as sched_utils
from nova import test
from nova.tests.unit.objects import test_pci_device as fake_pci_device
from nova.tests import uuidsentinel as uuids
@@ -467,7 +468,9 @@ class BaseTestCase(test.NoDBTestCase):
class TestUpdateAvailableResources(BaseTestCase):
- def _update_available_resources(self):
+ @mock.patch('nova.objects.Service.get_minimum_version',
+ return_value=22)
+ def _update_available_resources(self, version_mock):
# We test RT._update separately, since the complexity
# of the update_available_resource() function is high enough as
# it is, we just want to focus here on testing the resources
@@ -1688,6 +1691,8 @@ class TestInstanceClaim(BaseTestCase):
class TestResize(BaseTestCase):
@mock.patch('nova.compute.utils.is_volume_backed_instance',
return_value=False)
+ @mock.patch('nova.objects.Service.get_minimum_version',
+ return_value=22)
@mock.patch('nova.objects.InstancePCIRequests.get_by_instance',
return_value=objects.InstancePCIRequests(requests=[]))
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
@@ -1698,7 +1703,7 @@ class TestResize(BaseTestCase):
@mock.patch('nova.objects.ComputeNode.save')
def test_resize_claim_same_host(self, save_mock, get_mock, migr_mock,
get_cn_mock, pci_mock, instance_pci_mock,
- is_bfv_mock):
+ version_mock, is_bfv_mock):
# Resize an existing instance from its current flavor (instance type
# 1) to a new flavor (instance type 2) and verify that the compute
# node's resources are appropriately updated to account for the new
@@ -1788,6 +1793,8 @@ class TestResize(BaseTestCase):
@mock.patch('nova.compute.utils.is_volume_backed_instance',
return_value=False)
+ @mock.patch('nova.objects.Service.get_minimum_version',
+ return_value=22)
@mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid',
return_value=objects.InstancePCIRequests(requests=[]))
@mock.patch('nova.objects.InstancePCIRequests.get_by_instance',
@@ -1809,8 +1816,10 @@ class TestResize(BaseTestCase):
pci_get_by_compute_node_mock,
pci_get_by_instance_mock,
pci_get_by_instance_uuid_mock,
+ version_mock,
is_bfv_mock,
revert=False):
+
self.flags(reserved_host_disk_mb=0,
reserved_host_memory_mb=0)
virt_resources = copy.deepcopy(_VIRT_DRIVER_AVAIL_RESOURCES)
@@ -1893,12 +1902,23 @@ class TestResize(BaseTestCase):
ignore=['stats']
))
- def test_instance_build_resize_confirm(self):
- self._test_instance_build_resize()
+ cn = self.rt.compute_nodes[_NODENAME]
+ cn_uuid = cn.uuid
+ rc = self.sched_client_mock.reportclient
+ remove_method = rc.remove_provider_from_instance_allocation
+ expected_resources = sched_utils.resources_from_flavor(instance,
+ flavor)
+ remove_method.assert_called_once_with(instance.uuid, cn_uuid,
+ instance.user_id, instance.project_id, expected_resources)
def test_instance_build_resize_revert(self):
self._test_instance_build_resize(revert=True)
+ def test_instance_build_resize_confirm(self):
+ self._test_instance_build_resize()
+
+ @mock.patch('nova.objects.Service.get_minimum_version',
+ return_value=22)
@mock.patch('nova.pci.stats.PciDeviceStats.support_requests',
return_value=True)
@mock.patch('nova.objects.PciDevice.save')
@@ -1911,7 +1931,7 @@ class TestResize(BaseTestCase):
@mock.patch('nova.objects.ComputeNode.save')
def test_resize_claim_dest_host_with_pci(self, save_mock, get_mock,
migr_mock, get_cn_mock, pci_mock, pci_req_mock, pci_claim_mock,
- pci_dev_save_mock, pci_supports_mock):
+ pci_dev_save_mock, pci_supports_mock, version_mock):
# Starting from an empty destination compute node, perform a resize
# operation for an instance containing SR-IOV PCI devices on the
# original host.
@@ -2038,7 +2058,12 @@ class TestResize(BaseTestCase):
self.rt.drop_move_claim(ctx, instance, _NODENAME)
mock_pci_free_device.assert_called_once_with(
pci_dev, mock.ANY)
+ # Check that we grabbed resourced for the right flavor...
+ mock_resources.assert_called_once_with(instance,
+ instance.flavor)
+ @mock.patch('nova.objects.Service.get_minimum_version',
+ return_value=22)
@mock.patch('nova.objects.InstancePCIRequests.get_by_instance',
return_value=objects.InstancePCIRequests(requests=[]))
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
@@ -2048,7 +2073,7 @@ class TestResize(BaseTestCase):
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
@mock.patch('nova.objects.ComputeNode.save')
def test_resize_claim_two_instances(self, save_mock, get_mock, migr_mock,
- get_cn_mock, pci_mock, instance_pci_mock):
+ get_cn_mock, pci_mock, instance_pci_mock, version_mock):
# Issue two resize claims against a destination host with no prior
# instances on it and validate that the accounting for resources is
# correct.
@@ -2163,6 +2188,8 @@ class TestResize(BaseTestCase):
class TestRebuild(BaseTestCase):
+ @mock.patch('nova.objects.Service.get_minimum_version',
+ return_value=22)
@mock.patch('nova.objects.InstancePCIRequests.get_by_instance',
return_value=objects.InstancePCIRequests(requests=[]))
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
@@ -2172,7 +2199,7 @@ class TestRebuild(BaseTestCase):
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
@mock.patch('nova.objects.ComputeNode.save')
def test_rebuild_claim(self, save_mock, get_mock, migr_mock, get_cn_mock,
- pci_mock, instance_pci_mock):
+ pci_mock, instance_pci_mock, version_mock):
# Rebuild an instance, emulating an evacuate command issued against the
# original instance. The rebuild operation uses the resource tracker's
# _move_claim() method, but unlike with resize_claim(), rebuild_claim()