Merge "Reset stack status even when lock engine_id is None" into stable/kilo

author: Jenkins <jenkins@review.openstack.org> 2015-11-24 22:04:03 +0000
committer: Gerrit Code Review <review@openstack.org> 2015-11-24 22:04:03 +0000
commit: f1081b678ea8f93630a3f628197814a80b01dfc6 (patch)
tree: fe2ee26bf3003b20f7510704145b7ace3a6e1cee
parent: e0c8202622333873588cab245e2085cea1863bfd (diff)
parent: 20f6676c8b8db371c7264d3c3c9510640a896a55 (diff)
download: heat-f1081b678ea8f93630a3f628197814a80b01dfc6.tar.gz
3 files changed, 80 insertions, 30 deletions
diff --git a/heat/engine/service.py b/heat/engine/service.py
index 069d6d3ef..50e769279 100644
--- a/heat/engine/service.py
+++ b/heat/engine/service.py
@@ -1597,30 +1597,50 @@ class EngineService(service.Service):
 
     def reset_stack_status(self):
         cnxt = context.get_admin_context()
-        filters = {'status': parser.Stack.IN_PROGRESS}
+        filters = {
+            'status': parser.Stack.IN_PROGRESS,
+            'convergence': False
+        }
         stacks = stack_object.Stack.get_all(cnxt,
                                             filters=filters,
                                             tenant_safe=False,
                                             show_nested=True) or []
         for s in stacks:
+            stack_id = s.id
             stk = parser.Stack.load(cnxt, stack=s,
                                     use_stored_context=True)
             lock = stack_lock.StackLock(cnxt, stk, self.engine_id)
-            # If stacklock is released, means stack status may changed.
             engine_id = lock.get_engine_id()
-            if not engine_id:
-                continue
-            # Try to steal the lock and set status to failed.
             try:
-                lock.acquire(retry=False)
+                with lock.thread_lock(retry=False):
+
+                    # refetch stack and confirm it is still IN_PROGRESS
+                    s = stack_object.Stack.get_by_id(
+                        cnxt,
+                        stack_id,
+                        tenant_safe=False,
+                        eager_load=True)
+                    if s.status != parser.Stack.IN_PROGRESS:
+                        lock.release()
+                        continue
+
+                stk = parser.Stack.load(cnxt, stack=s,
+                                        use_stored_context=True)
+                LOG.info(_LI('Engine %(engine)s went down when stack '
+                             '%(stack_id)s was in action %(action)s'),
+                         {'engine': engine_id, 'action': stk.action,
+                          'stack_id': stk.id})
+
+                # Set stack and resources status to FAILED in sub thread
+                self.thread_group_mgr.start_with_acquired_lock(
+                    stk,
+                    lock,
+                    self.set_stack_and_resource_to_failed,
+                    stk
+                )
             except exception.ActionInProgress:
                 continue
-            LOG.info(_LI('Engine %(engine)s went down when stack %(stack_id)s'
-                         ' was in action %(action)s'),
-                     {'engine': engine_id, 'action': stk.action,
-                      'stack_id': stk.id})
-
-            # Set stack and resources status to FAILED in sub thread
-            self.thread_group_mgr.start_with_acquired_lock(
-                stk, lock, self.set_stack_and_resource_to_failed, stk
-            )
+            except Exception:
+                LOG.exception(_LE('Error while resetting stack: %s')
+                              % stack_id)
+                continue
diff --git a/heat/engine/stack_lock.py b/heat/engine/stack_lock.py
index c66758cda..627083b75 100644
--- a/heat/engine/stack_lock.py
+++ b/heat/engine/stack_lock.py
@@ -132,14 +132,17 @@ class StackLock(object):
                                      'stack': stack_id})
 
     @contextlib.contextmanager
-    def thread_lock(self, stack_id):
+    def thread_lock(self, stack_id, retry=True):
         """
         Acquire a lock and release it only if there is an exception.  The
         release method still needs to be scheduled to be run at the
         end of the thread using the Thread.link method.
+
+        :param retry: When True, retry if lock was released while stealing.
+        :type retry: boolean
         """
         try:
-            self.acquire()
+            self.acquire(retry)
             yield
         except exception.ActionInProgress:
             raise
diff --git a/heat/tests/test_engine_service.py b/heat/tests/test_engine_service.py
index b35a7f793..95ffc9476 100644
--- a/heat/tests/test_engine_service.py
+++ b/heat/tests/test_engine_service.py
@@ -924,7 +924,7 @@ class StackServiceCreateUpdateDeleteTest(common.HeatTestCase):
         stack_lock.StackLock.try_acquire().AndReturn(self.man.engine_id)
         # this is to simulate lock release on DummyThreadGroup stop
         self.m.StubOutWithMock(stack_lock.StackLock, 'acquire')
-        stack_lock.StackLock.acquire().AndReturn(None)
+        stack_lock.StackLock.acquire(True).AndReturn(None)
 
         self.m.StubOutWithMock(self.man.thread_group_mgr, 'stop')
         self.man.thread_group_mgr.stop(stack.id).AndReturn(None)
@@ -992,7 +992,7 @@ class StackServiceCreateUpdateDeleteTest(common.HeatTestCase):
             stack_identity=mox.IgnoreArg()).AndReturn(None)
 
         self.m.StubOutWithMock(stack_lock.StackLock, 'acquire')
-        stack_lock.StackLock.acquire().AndReturn(None)
+        stack_lock.StackLock.acquire(True).AndReturn(None)
         self.m.ReplayAll()
 
         self.assertIsNone(self.man.delete_stack(self.ctx, stack.identifier()))
@@ -1019,7 +1019,7 @@ class StackServiceCreateUpdateDeleteTest(common.HeatTestCase):
             self.ctx, "other-engine-fake-uuid").AndReturn(False)
 
         self.m.StubOutWithMock(stack_lock.StackLock, 'acquire')
-        stack_lock.StackLock.acquire().AndReturn(None)
+        stack_lock.StackLock.acquire(True).AndReturn(None)
         self.m.ReplayAll()
 
         self.assertIsNone(self.man.delete_stack(self.ctx, stack.identifier()))
@@ -3492,6 +3492,7 @@ class StackServiceTest(common.HeatTestCase):
     @mock.patch('heat.engine.service.ThreadGroupManager',
                 return_value=mock.Mock())
     @mock.patch.object(stack_object.Stack, 'get_all')
+    @mock.patch.object(stack_object.Stack, 'get_by_id')
     @mock.patch('heat.engine.stack_lock.StackLock',
                 return_value=mock.Mock())
     @mock.patch.object(parser.Stack, 'load')
@@ -3501,6 +3502,7 @@ class StackServiceTest(common.HeatTestCase):
             mock_admin_context,
             mock_stack_load,
             mock_stacklock,
+            mock_get_by_id,
             mock_get_all,
             mock_thread):
         mock_admin_context.return_value = self.ctx
@@ -3509,7 +3511,19 @@ class StackServiceTest(common.HeatTestCase):
         db_stack.id = 'foo'
         db_stack.status = 'IN_PROGRESS'
         db_stack.status_reason = None
-        mock_get_all.return_value = [db_stack]
+
+        unlocked_stack = mock.MagicMock()
+        unlocked_stack.id = 'bar'
+        unlocked_stack.status = 'IN_PROGRESS'
+        unlocked_stack.status_reason = None
+
+        unlocked_stack_failed = mock.MagicMock()
+        unlocked_stack_failed.id = 'bar'
+        unlocked_stack_failed.status = 'FAILED'
+        unlocked_stack_failed.status_reason = 'because'
+
+        mock_get_all.return_value = [db_stack, unlocked_stack]
+        mock_get_by_id.side_effect = [db_stack, unlocked_stack_failed]
 
         fake_stack = mock.MagicMock()
         fake_stack.action = 'CREATE'
@@ -3518,26 +3532,39 @@ class StackServiceTest(common.HeatTestCase):
 
         mock_stack_load.return_value = fake_stack
 
-        fake_lock = mock.MagicMock()
-        fake_lock.get_engine_id.return_value = 'old-engine'
-        fake_lock.acquire.return_value = None
-        mock_stacklock.return_value = fake_lock
+        lock1 = mock.MagicMock()
+        lock1.get_engine_id.return_value = 'old-engine'
+        lock1.acquire.return_value = None
+        lock2 = mock.MagicMock()
+        lock2.acquire.return_value = None
+        mock_stacklock.side_effect = [lock1, lock2]
 
         self.eng.thread_group_mgr = mock_thread
 
         self.eng.reset_stack_status()
 
         mock_admin_context.assert_called_once_with()
-        filters = {'status': parser.Stack.IN_PROGRESS}
+        filters = {
+            'status': parser.Stack.IN_PROGRESS,
+            'convergence': False
+        }
         mock_get_all.assert_called_once_with(self.ctx,
                                              filters=filters,
                                              tenant_safe=False,
                                              show_nested=True)
-        mock_stack_load.assert_called_once_with(self.ctx,
-                                                stack=db_stack,
-                                                use_stored_context=True)
+        mock_get_by_id.assert_has_calls([
+            mock.call(self.ctx, 'foo', tenant_safe=False, eager_load=True),
+            mock.call(self.ctx, 'bar', tenant_safe=False, eager_load=True),
+        ])
+        mock_stack_load.assert_has_calls([
+            mock.call(self.ctx, stack=db_stack, use_stored_context=True),
+            mock.call(self.ctx, stack=db_stack, use_stored_context=True),
+            mock.call(self.ctx, stack=unlocked_stack,
+                      use_stored_context=True),
+        ])
+        self.assertTrue(lock2.release.called)
         mock_thread.start_with_acquired_lock.assert_called_once_with(
-            fake_stack, fake_lock,
+            fake_stack, lock1,
             self.eng.set_stack_and_resource_to_failed, fake_stack
         )
author	Jenkins <jenkins@review.openstack.org>	2015-11-24 22:04:03 +0000
committer	Gerrit Code Review <review@openstack.org>	2015-11-24 22:04:03 +0000
commit	f1081b678ea8f93630a3f628197814a80b01dfc6 (patch)
tree	fe2ee26bf3003b20f7510704145b7ace3a6e1cee
parent	e0c8202622333873588cab245e2085cea1863bfd (diff)
parent	20f6676c8b8db371c7264d3c3c9510640a896a55 (diff)
download	heat-f1081b678ea8f93630a3f628197814a80b01dfc6.tar.gz