Merge "Don't refresh network cache for instances building or deleting" into stable/havana

author: Jenkins <jenkins@review.openstack.org> 2014-09-19 11:42:52 +0000
committer: Gerrit Code Review <review@openstack.org> 2014-09-19 11:42:52 +0000
commit: d95f767c670e021c5ebb7cc466f55e2ec665f902 (patch)
tree: 3f08a33bc5769a91debd479a57518320270ee8cc
parent: 13bab4c9aa64f47d33f3158ebd1e14ed924d2060 (diff)
parent: 2afe82ea73069befb281508361f16462e71d6fc0 (diff)
download: nova-d95f767c670e021c5ebb7cc466f55e2ec665f902.tar.gz
2 files changed, 92 insertions, 44 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 27d61a92c2..d54e39c025 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -4341,38 +4341,76 @@ class ComputeManager(manager.SchedulerDependentManager):
             return
         self._last_info_cache_heal = curr_time
 
-        instance_uuids = getattr(self, '_instance_uuids_to_heal', None)
+        instance_uuids = getattr(self, '_instance_uuids_to_heal', [])
         instance = None
 
-        while not instance or instance['host'] != self.host:
-            if instance_uuids:
+        LOG.debug(_('Starting heal instance info cache'))
+
+        if not instance_uuids:
+            # The list of instances to heal is empty so rebuild it
+            LOG.debug(_('Rebuilding the list of instances to heal'))
+            db_instances = instance_obj.InstanceList.get_by_host(
+                context, self.host, expected_attrs=[])
+            for inst in db_instances:
+                # We don't want to refersh the cache for instances
+                # which are building or deleting so don't put them
+                # in the list. If they are building they will get
+                # added to the list next time we build it.
+                if (inst.vm_state == vm_states.BUILDING):
+                    LOG.debug(_('Skipping network cache update for instance '
+                                'because it is Building.'), instance=inst)
+                    continue
+                if (inst.task_state == task_states.DELETING):
+                    LOG.debug(_('Skipping network cache update for instance '
+                                'because it is being deleted.'), instance=inst)
+                    continue
+
+                if not instance:
+                    # Save the first one we find so we don't
+                    # have to get it again
+                    instance = inst
+                else:
+                    instance_uuids.append(inst['uuid'])
+
+            self._instance_uuids_to_heal = instance_uuids
+        else:
+            # Find the next valid instance on the list
+            while instance_uuids:
                 try:
-                    instance = instance_obj.Instance.get_by_uuid(
-                        context, instance_uuids.pop(0),
-                        expected_attrs=['system_metadata', 'info_cache'])
+                    inst = instance_obj.Instance.get_by_uuid(
+                            context, instance_uuids.pop(0),
+                            expected_attrs=['system_metadata', 'info_cache'])
                 except exception.InstanceNotFound:
                     # Instance is gone.  Try to grab another.
                     continue
-            else:
-                # No more in our copy of uuids.  Pull from the DB.
-                db_instances = instance_obj.InstanceList.get_by_host(
-                    context, self.host, expected_attrs=[])
-                if not db_instances:
-                    # None.. just return.
-                    return
-                instance = db_instances[0]
-                instance_uuids = [inst['uuid'] for inst in db_instances[1:]]
-                self._instance_uuids_to_heal = instance_uuids
 
-        # We have an instance now and it's ours
-        try:
-            # Call to network API to get instance info.. this will
-            # force an update to the instance's info_cache
-            self._get_instance_nw_info(context, instance)
-            LOG.debug(_('Updated the info_cache for instance'),
-                      instance=instance)
-        except Exception as e:
-            LOG.debug(_("An error occurred: %s"), e)
+                # Check the instance hasn't been migrated
+                if inst.host != self.host:
+                    LOG.debug(_('Skipping network cache update for instance '
+                                'because it has been migrated to another '
+                                'host.'), instance=inst)
+                # Check the instance isn't being deleting
+                elif inst.task_state == task_states.DELETING:
+                    LOG.debug(_('Skipping network cache update for instance '
+                                'because it is being deleted.'), instance=inst)
+                else:
+                    instance = inst
+                    break
+
+        if instance:
+            # We have an instance now to refresh
+            try:
+                # Call to network API to get instance info.. this will
+                # force an update to the instance's info_cache
+                self._get_instance_nw_info(context, instance)
+                LOG.debug(_('Updated the network info_cache for instance'),
+                          instance=instance)
+            except Exception:
+                LOG.error(_('An error occurred while refreshing the network '
+                            'cache.'), instance=instance, exc_info=True)
+        else:
+            LOG.debug(_("Didn't find any instances for network info cache "
+                        "update."))
 
     @periodic_task.periodic_task
     def _poll_rebooting_instances(self, context):
diff --git a/nova/tests/compute/test_compute.py b/nova/tests/compute/test_compute.py
index 44fb0bb914..9fcf841720 100644
--- a/nova/tests/compute/test_compute.py
+++ b/nova/tests/compute/test_compute.py
@@ -5010,7 +5010,7 @@ class ComputeTestCase(BaseTestCase):
 
         instance_map = {}
         instances = []
-        for x in xrange(5):
+        for x in xrange(8):
             inst_uuid = 'fake-uuid-%s' % x
             instance_map[inst_uuid] = fake_instance.fake_db_instance(
                 uuid=inst_uuid, host=CONF.host, created_at=None)
@@ -5050,40 +5050,50 @@ class ComputeTestCase(BaseTestCase):
         self.stubs.Set(self.compute, '_get_instance_nw_info',
                 fake_get_instance_nw_info)
 
-        call_info['expected_instance'] = instances[0]
+        # Make an instance appear to be still Building
+        instances[0]['vm_state'] = vm_states.BUILDING
+        # Make an instance appear to be Deleting
+        instances[1]['task_state'] = task_states.DELETING
+        # '0', '1' should be skipped..
+        call_info['expected_instance'] = instances[2]
         self.compute._heal_instance_info_cache(ctxt)
         self.assertEqual(1, call_info['get_all_by_host'])
         self.assertEqual(0, call_info['get_by_uuid'])
         self.assertEqual(1, call_info['get_nw_info'])
 
-        call_info['expected_instance'] = instances[1]
+        call_info['expected_instance'] = instances[3]
         self.compute._heal_instance_info_cache(ctxt)
         self.assertEqual(1, call_info['get_all_by_host'])
         self.assertEqual(1, call_info['get_by_uuid'])
         self.assertEqual(2, call_info['get_nw_info'])
 
         # Make an instance switch hosts
-        instances[2]['host'] = 'not-me'
+        instances[4]['host'] = 'not-me'
         # Make an instance disappear
-        instance_map.pop(instances[3]['uuid'])
-        # '2' and '3' should be skipped..
-        call_info['expected_instance'] = instances[4]
+        instance_map.pop(instances[5]['uuid'])
+        # Make an instance switch to be Deleting
+        instances[6]['task_state'] = task_states.DELETING
+        # '4', '5', and '6' should be skipped..
+        call_info['expected_instance'] = instances[7]
         self.compute._heal_instance_info_cache(ctxt)
-        self.assertEqual(call_info['get_all_by_host'], 1)
-        # Incremented for '2' and '4'.. '3' caused a raise above.
-        self.assertEqual(call_info['get_by_uuid'], 3)
-        self.assertEqual(call_info['get_nw_info'], 3)
+        self.assertEqual(1, call_info['get_all_by_host'])
+        self.assertEqual(4, call_info['get_by_uuid'])
+        self.assertEqual(3, call_info['get_nw_info'])
         # Should be no more left.
-        self.assertEqual(len(self.compute._instance_uuids_to_heal), 0)
+        self.assertEqual(0, len(self.compute._instance_uuids_to_heal))
+
+        # This should cause a DB query now, so get a list of instances
+        # where none can be processed to make sure we handle that case
+        # cleanly.   Use just '0' (Building) and '1' (Deleting)
+        instances = instances[0:2]
 
-        # This should cause a DB query now so we get first instance
-        # back again
-        call_info['expected_instance'] = instances[0]
         self.compute._heal_instance_info_cache(ctxt)
-        self.assertEqual(call_info['get_all_by_host'], 2)
-        # Stays the same, because the instance came from the DB
-        self.assertEqual(call_info['get_by_uuid'], 3)
-        self.assertEqual(call_info['get_nw_info'], 4)
+        # Should have called the list once more
+        self.assertEqual(2, call_info['get_all_by_host'])
+        # Stays the same because we remove invalid entries from the list
+        self.assertEqual(4, call_info['get_by_uuid'])
+        # Stays the same because we didn't find anything to process
+        self.assertEqual(3, call_info['get_nw_info'])
 
     def test_poll_rescued_instances(self):
         timed_out_time = timeutils.utcnow() - datetime.timedelta(minutes=5)
author	Jenkins <jenkins@review.openstack.org>	2014-09-19 11:42:52 +0000
committer	Gerrit Code Review <review@openstack.org>	2014-09-19 11:42:52 +0000
commit	d95f767c670e021c5ebb7cc466f55e2ec665f902 (patch)
tree	3f08a33bc5769a91debd479a57518320270ee8cc
parent	13bab4c9aa64f47d33f3158ebd1e14ed924d2060 (diff)
parent	2afe82ea73069befb281508361f16462e71d6fc0 (diff)
download	nova-d95f767c670e021c5ebb7cc466f55e2ec665f902.tar.gz