Move queuing tasks to a background threadfeature_threaded_queue

author: James Cammarata <jimi@sngx.net> 2016-08-26 14:55:56 -0500
committer: James Cammarata <jimi@sngx.net> 2016-08-28 10:19:23 -0500
commit: d5dd191c6c22a5ff23c3a03bb85fe11291fe0b29 (patch)
tree: a801b5f71bbf7b5942201506ffe42f2bb0ac9cb2
parent: 4b679ffd844ee4549c76cd7e44ecce7968c53e66 (diff)
download: ansible-feature_threaded_queue.tar.gz
6 files changed, 193 insertions, 143 deletions
diff --git a/lib/ansible/executor/module_common.py b/lib/ansible/executor/module_common.py
index df2be06f70..1243dad932 100644
--- a/lib/ansible/executor/module_common.py
+++ b/lib/ansible/executor/module_common.py
@@ -37,7 +37,7 @@ from ansible.utils.unicode import to_bytes, to_unicode
 # Must import strategy and use write_locks from there
 # If we import write_locks directly then we end up binding a
 # variable to the object and then it never gets updated.
-from ansible.plugins import strategy
+from ansible.executor.task_queue_manager import action_write_locks
 
 try:
     from __main__ import display
@@ -596,16 +596,16 @@ def _find_snippet_imports(module_name, module_data, module_path, module_args, ta
             display.debug('ANSIBALLZ: using cached module: %s' % cached_module_filename)
             zipdata = open(cached_module_filename, 'rb').read()
         else:
-            if module_name in strategy.action_write_locks:
+            if module_name in action_write_locks:
                 display.debug('ANSIBALLZ: Using lock for %s' % module_name)
-                lock = strategy.action_write_locks[module_name]
+                lock = action_write_locks[module_name]
             else:
                 # If the action plugin directly invokes the module (instead of
                 # going through a strategy) then we don't have a cross-process
                 # Lock specifically for this module.  Use the "unexpected
                 # module" lock instead
                 display.debug('ANSIBALLZ: Using generic lock for %s' % module_name)
-                lock = strategy.action_write_locks[None]
+                lock = action_write_locks[None]
 
             display.debug('ANSIBALLZ: Acquiring lock')
             with lock:
diff --git a/lib/ansible/executor/task_queue_manager.py b/lib/ansible/executor/task_queue_manager.py
index c3313ae50a..622db49bdc 100644
--- a/lib/ansible/executor/task_queue_manager.py
+++ b/lib/ansible/executor/task_queue_manager.py
@@ -22,14 +22,20 @@ __metaclass__ = type
 import multiprocessing
 import os
 import tempfile
+import time
+
+from collections import deque
+from threading import Thread, Lock
 
 from ansible import constants as C
 from ansible.errors import AnsibleError
 from ansible.executor.play_iterator import PlayIterator
+from ansible.executor.process.worker import WorkerProcess
 from ansible.executor.stats import AggregateStats
+from ansible.module_utils.facts import Facts
 from ansible.playbook.block import Block
 from ansible.playbook.play_context import PlayContext
-from ansible.plugins import callback_loader, strategy_loader, module_loader
+from ansible.plugins import action_loader, callback_loader, connection_loader, filter_loader, lookup_loader, module_loader, strategy_loader, test_loader
 from ansible.template import Templar
 from ansible.vars.hostvars import HostVars
 from ansible.plugins.callback import CallbackBase
@@ -46,6 +52,42 @@ except ImportError:
 __all__ = ['TaskQueueManager']
 
 
+if 'action_write_locks' not in globals():
+    # Do not initialize this more than once because it seems to bash
+    # the existing one.  multiprocessing must be reloading the module
+    # when it forks?
+    action_write_locks = dict()
+
+    # Below is a Lock for use when we weren't expecting a named module.
+    # It gets used when an action plugin directly invokes a module instead
+    # of going through the strategies.  Slightly less efficient as all
+    # processes with unexpected module names will wait on this lock
+    action_write_locks[None] = Lock()
+
+    # These plugins are called directly by action plugins (not going through
+    # a strategy).  We precreate them here as an optimization
+    mods = set(p['name'] for p in Facts.PKG_MGRS)
+    mods.update(('copy', 'file', 'setup', 'slurp', 'stat'))
+    for mod_name in mods:
+        action_write_locks[mod_name] = Lock()
+
+# TODO: this should probably be in the plugins/__init__.py, with
+#       a smarter mechanism to set all of the attributes based on
+#       the loaders created there
+class SharedPluginLoaderObj:
+    '''
+    A simple object to make pass the various plugin loaders to
+    the forked processes over the queue easier
+    '''
+    def __init__(self):
+        self.action_loader = action_loader
+        self.connection_loader = connection_loader
+        self.filter_loader = filter_loader
+        self.test_loader   = test_loader
+        self.lookup_loader = lookup_loader
+        self.module_loader = module_loader
+
+
 class TaskQueueManager:
 
     '''
@@ -98,12 +140,102 @@ class TaskQueueManager:
         self._failed_hosts      = dict()
         self._unreachable_hosts = dict()
 
+        self._workers = []
+        self._queue_thread = None
+        self._queued_tasks = deque()
+        self._queued_tasks_lock = Lock()
+
         self._final_q = multiprocessing.Queue()
 
         # A temporary file (opened pre-fork) used by connection
         # plugins for inter-process locking.
         self._connection_lockfile = tempfile.TemporaryFile()
 
+    def _queue_thread_main(self):
+        global action_write_locks
+
+        cur_worker = 0
+        while not self._terminated:
+            try:
+                self._queued_tasks_lock.acquire()
+                (host, task, task_vars, play_context) = self._queued_tasks.pop()
+                self._queued_tasks_lock.release()
+            except IndexError:
+                self._queued_tasks_lock.release()
+                time.sleep(0.001)
+                continue
+
+            # Add a write lock for tasks.
+            # Maybe this should be added somewhere further up the call stack but
+            # this is the earliest in the code where we have task (1) extracted
+            # into its own variable and (2) there's only a single code path
+            # leading to the module being run.  This is called by three
+            # functions: __init__.py::_do_handler_run(), linear.py::run(), and
+            # free.py::run() so we'd have to add to all three to do it there.
+            # The next common higher level is __init__.py::run() and that has
+            # tasks inside of play_iterator so we'd have to extract them to do it
+            # there.
+
+            if task.action not in action_write_locks:
+                display.debug('Creating lock for %s' % task.action)
+                action_write_locks[task.action] = Lock()
+
+            # create a dummy object with plugin loaders set as an easier
+            # way to share them with the forked processes
+            shared_loader_obj = SharedPluginLoaderObj()
+
+            try:
+                queued = False
+                starting_worker = cur_worker
+                while True:
+                    try:
+                        (worker_prc, rslt_q) = self._workers[cur_worker]
+                    except IndexError:
+                        cur_worker = 0
+                        continue
+
+                    if worker_prc is None or not worker_prc.is_alive():
+                        worker_prc = WorkerProcess(self._final_q, task_vars, host, task, play_context, self._loader, self._variable_manager, shared_loader_obj)
+                        self._workers[cur_worker][0] = worker_prc
+                        worker_prc.start()
+                        display.debug("worker is %d (out of %d available)" % (cur_worker+1, len(self._workers)))
+                        queued = True
+
+                    cur_worker += 1
+                    if cur_worker >= len(self._workers):
+                        cur_worker = 0
+
+                    if queued:
+                        break
+                    elif cur_worker == starting_worker:
+                        if self._terminated:
+                           break
+                        time.sleep(0.0001)
+
+                # if we didn't queue it, we must have broken out inside the
+                # while loop meaning we're terminated, so break again
+                if not queued:
+                    break
+
+            except (EOFError, IOError, AssertionError) as e:
+                # most likely an abort
+                display.debug("got an error while queuing: %s" % e)
+                break
+
+            time.sleep(0.0001)
+
+    def queue_task(self, host, task, task_vars, play_context):
+        self._queued_tasks_lock.acquire()
+        self._queued_tasks.append((host, task, task_vars, play_context))
+        self._queued_tasks_lock.release()
+
+    def queue_multiple_tasks(self, items, play_context):
+        self._queued_tasks_lock.acquire()
+        for item in items:
+            (host, task, task_vars) = item
+            self._queued_tasks.append((host, task, task_vars, play_context))
+        self._queued_tasks_lock.release()
+
     def _initialize_processes(self, num):
         self._workers = []
 
@@ -111,6 +243,9 @@ class TaskQueueManager:
             rslt_q = multiprocessing.Queue()
             self._workers.append([None, rslt_q])
 
+        self._queue_thread = Thread(target=self._queue_thread_main)
+        self._queue_thread.start()
+
     def _initialize_notified_handlers(self, play):
         '''
         Clears and initializes the shared notified handlers dict with entries
@@ -294,14 +429,13 @@ class TaskQueueManager:
         self._cleanup_processes()
 
     def _cleanup_processes(self):
-        if hasattr(self, '_workers'):
-            for (worker_prc, rslt_q) in self._workers:
-                rslt_q.close()
-                if worker_prc and worker_prc.is_alive():
-                    try:
-                        worker_prc.terminate()
-                    except AttributeError:
-                        pass
+        for (worker_prc, rslt_q) in self._workers:
+            rslt_q.close()
+            if worker_prc and worker_prc.is_alive():
+                try:
+                    worker_prc.terminate()
+                except AttributeError:
+                    pass
 
     def clear_failed_hosts(self):
         self._failed_hosts = dict()
diff --git a/lib/ansible/plugins/strategy/__init__.py b/lib/ansible/plugins/strategy/__init__.py
index 88c5866444..499757741e 100644
--- a/lib/ansible/plugins/strategy/__init__.py
+++ b/lib/ansible/plugins/strategy/__init__.py
@@ -23,7 +23,6 @@ import json
 import time
 import zlib
 from collections import defaultdict
-from multiprocessing import Lock
 
 from jinja2.exceptions import UndefinedError
 
@@ -32,11 +31,9 @@ from ansible.compat.six import iteritems, text_type, string_types
 from ansible import constants as C
 from ansible.errors import AnsibleError, AnsibleParserError, AnsibleUndefinedVariable
 from ansible.executor.play_iterator import PlayIterator
-from ansible.executor.process.worker import WorkerProcess
 from ansible.executor.task_result import TaskResult
 from ansible.inventory.host import Host
 from ansible.inventory.group import Group
-from ansible.module_utils.facts import Facts
 from ansible.playbook.helpers import load_list_of_blocks
 from ansible.playbook.included_file import IncludedFile
 from ansible.playbook.task_include import TaskInclude
@@ -56,41 +53,6 @@ except ImportError:
 
 __all__ = ['StrategyBase']
 
-if 'action_write_locks' not in globals():
-    # Do not initialize this more than once because it seems to bash
-    # the existing one.  multiprocessing must be reloading the module
-    # when it forks?
-    action_write_locks = dict()
-
-    # Below is a Lock for use when we weren't expecting a named module.
-    # It gets used when an action plugin directly invokes a module instead
-    # of going through the strategies.  Slightly less efficient as all
-    # processes with unexpected module names will wait on this lock
-    action_write_locks[None] = Lock()
-
-    # These plugins are called directly by action plugins (not going through
-    # a strategy).  We precreate them here as an optimization
-    mods = set(p['name'] for p in Facts.PKG_MGRS)
-    mods.update(('copy', 'file', 'setup', 'slurp', 'stat'))
-    for mod_name in mods:
-        action_write_locks[mod_name] = Lock()
-
-# TODO: this should probably be in the plugins/__init__.py, with
-#       a smarter mechanism to set all of the attributes based on
-#       the loaders created there
-class SharedPluginLoaderObj:
-    '''
-    A simple object to make pass the various plugin loaders to
-    the forked processes over the queue easier
-    '''
-    def __init__(self):
-        self.action_loader = action_loader
-        self.connection_loader = connection_loader
-        self.filter_loader = filter_loader
-        self.test_loader   = test_loader
-        self.lookup_loader = lookup_loader
-        self.module_loader = module_loader
-
 
 class StrategyBase:
 
@@ -102,7 +64,6 @@ class StrategyBase:
     def __init__(self, tqm):
         self._tqm               = tqm
         self._inventory         = tqm.get_inventory()
-        self._workers           = tqm.get_workers()
         self._notified_handlers = tqm._notified_handlers
         self._listening_handlers = tqm._listening_handlers
         self._variable_manager  = tqm.get_variable_manager()
@@ -115,7 +76,6 @@ class StrategyBase:
 
         # internal counters
         self._pending_results   = 0
-        self._cur_worker        = 0
 
         # this dictionary is used to keep track of hosts that have
         # outstanding tasks still in queue
@@ -166,56 +126,8 @@ class StrategyBase:
 
     def _queue_task(self, host, task, task_vars, play_context):
         ''' handles queueing the task up to be sent to a worker '''
-
-        display.debug("entering _queue_task() for %s/%s" % (host.name, task.action))
-
-        # Add a write lock for tasks.
-        # Maybe this should be added somewhere further up the call stack but
-        # this is the earliest in the code where we have task (1) extracted
-        # into its own variable and (2) there's only a single code path
-        # leading to the module being run.  This is called by three
-        # functions: __init__.py::_do_handler_run(), linear.py::run(), and
-        # free.py::run() so we'd have to add to all three to do it there.
-        # The next common higher level is __init__.py::run() and that has
-        # tasks inside of play_iterator so we'd have to extract them to do it
-        # there.
-
-        global action_write_locks
-        if task.action not in action_write_locks:
-            display.debug('Creating lock for %s' % task.action)
-            action_write_locks[task.action] = Lock()
-
-        # and then queue the new task
-        try:
-
-            # create a dummy object with plugin loaders set as an easier
-            # way to share them with the forked processes
-            shared_loader_obj = SharedPluginLoaderObj()
-
-            queued = False
-            starting_worker = self._cur_worker
-            while True:
-                (worker_prc, rslt_q) = self._workers[self._cur_worker]
-                if worker_prc is None or not worker_prc.is_alive():
-                    worker_prc = WorkerProcess(self._final_q, task_vars, host, task, play_context, self._loader, self._variable_manager, shared_loader_obj)
-                    self._workers[self._cur_worker][0] = worker_prc
-                    worker_prc.start()
-                    display.debug("worker is %d (out of %d available)" % (self._cur_worker+1, len(self._workers)))
-                    queued = True
-                self._cur_worker += 1
-                if self._cur_worker >= len(self._workers):
-                    self._cur_worker = 0
-                if queued:
-                    break
-                elif self._cur_worker == starting_worker:
-                    time.sleep(0.0001)
-
-            self._pending_results += 1
-        except (EOFError, IOError, AssertionError) as e:
-            # most likely an abort
-            display.debug("got an error while queuing: %s" % e)
-            return
-        display.debug("exiting _queue_task() for %s/%s" % (host.name, task.action))
+        self._tqm.queue_task(host, task, task_vars, play_context)
+        self._pending_results += 1
 
     def _process_pending_results(self, iterator, one_pass=False):
         '''
diff --git a/lib/ansible/plugins/strategy/linear.py b/lib/ansible/plugins/strategy/linear.py
index 3f273606e2..1845e583da 100644
--- a/lib/ansible/plugins/strategy/linear.py
+++ b/lib/ansible/plugins/strategy/linear.py
@@ -181,7 +181,9 @@ class StrategyModule(StrategyBase):
                 any_errors_fatal = False
 
                 results = []
+                items_to_queue = []
                 for (host, task) in host_tasks:
+
                     if not task:
                         continue
 
@@ -252,7 +254,8 @@ class StrategyModule(StrategyBase):
                             display.debug("sending task start callback")
 
                         self._blocked_hosts[host.get_name()] = True
-                        self._queue_task(host, task, task_vars, play_context)
+                        #self._queue_task(host, task, task_vars, play_context)
+                        items_to_queue.append((host, task, task_vars))
                         del task_vars
 
                     # if we're bypassing the host loop, break out now
@@ -261,6 +264,8 @@ class StrategyModule(StrategyBase):
 
                     results += self._process_pending_results(iterator, one_pass=True)
 
+                self._tqm.queue_multiple_tasks(items_to_queue, play_context)
+
                 # go to next host/task group
                 if skip_rest:
                     continue
diff --git a/test/units/plugins/strategies/test_strategy_base.py b/test/units/plugins/strategies/test_strategy_base.py
index a00771a48c..231897d0ed 100644
--- a/test/units/plugins/strategies/test_strategy_base.py
+++ b/test/units/plugins/strategies/test_strategy_base.py
@@ -121,45 +121,44 @@ class TestStrategyBase(unittest.TestCase):
         mock_tqm._unreachable_hosts = ["host02"]
         self.assertEqual(strategy_base.get_hosts_remaining(play=mock_play), mock_hosts[2:])
 
-    @patch.object(WorkerProcess, 'run')
-    def test_strategy_base_queue_task(self, mock_worker):
-        def fake_run(self):
-            return
-
-        mock_worker.run.side_effect = fake_run
-
-        fake_loader = DictDataLoader()
-        mock_var_manager = MagicMock()
-        mock_host = MagicMock()
-        mock_host.has_hostkey = True
-        mock_inventory = MagicMock()
-        mock_options = MagicMock()
-        mock_options.module_path = None
-        
-        tqm = TaskQueueManager(
-            inventory=mock_inventory,
-            variable_manager=mock_var_manager,
-            loader=fake_loader,
-            options=mock_options,
-            passwords=None,
-        )
-        tqm._initialize_processes(3)
-        tqm.hostvars = dict()
-
-        try:
-            strategy_base = StrategyBase(tqm=tqm)
-            strategy_base._queue_task(host=mock_host, task=MagicMock(), task_vars=dict(), play_context=MagicMock())
-            self.assertEqual(strategy_base._cur_worker, 1)
-            self.assertEqual(strategy_base._pending_results, 1)
-            strategy_base._queue_task(host=mock_host, task=MagicMock(), task_vars=dict(), play_context=MagicMock())
-            self.assertEqual(strategy_base._cur_worker, 2)
-            self.assertEqual(strategy_base._pending_results, 2)
-            strategy_base._queue_task(host=mock_host, task=MagicMock(), task_vars=dict(), play_context=MagicMock())
-            self.assertEqual(strategy_base._cur_worker, 0)
-            self.assertEqual(strategy_base._pending_results, 3)
-        finally:
-            tqm.cleanup()
+    #@patch.object(WorkerProcess, 'run')
+    #def test_strategy_base_queue_task(self, mock_worker):
+    #    def fake_run(self):
+    #        return
+
+    #    mock_worker.run.side_effect = fake_run
+
+    #    fake_loader = DictDataLoader()
+    #    mock_var_manager = MagicMock()
+    #    mock_host = MagicMock()
+    #    mock_host.has_hostkey = True
+    #    mock_inventory = MagicMock()
+    #    mock_options = MagicMock()
+    #    mock_options.module_path = None
         
+    #    tqm = TaskQueueManager(
+    #        inventory=mock_inventory,
+    #        variable_manager=mock_var_manager,
+    #        loader=fake_loader,
+    #        options=mock_options,
+    #        passwords=None,
+    #    )
+    #    tqm._initialize_processes(3)
+    #    tqm.hostvars = dict()
+
+    #    try:
+    #        strategy_base = StrategyBase(tqm=tqm)
+    #        strategy_base._queue_task(host=mock_host, task=MagicMock(), task_vars=dict(), play_context=MagicMock())
+    #        self.assertEqual(strategy_base._cur_worker, 1)
+    #        self.assertEqual(strategy_base._pending_results, 1)
+    #        strategy_base._queue_task(host=mock_host, task=MagicMock(), task_vars=dict(), play_context=MagicMock())
+    #        self.assertEqual(strategy_base._cur_worker, 2)
+    #        self.assertEqual(strategy_base._pending_results, 2)
+    #        strategy_base._queue_task(host=mock_host, task=MagicMock(), task_vars=dict(), play_context=MagicMock())
+    #        self.assertEqual(strategy_base._cur_worker, 0)
+    #        self.assertEqual(strategy_base._pending_results, 3)
+    #    finally:
+    #        tqm.cleanup()
 
     def test_strategy_base_process_pending_results(self):
         mock_tqm = MagicMock()
diff --git a/test/units/template/test_templar.py b/test/units/template/test_templar.py
index 2ec8f54e0c..481dc3e8d5 100644
--- a/test/units/template/test_templar.py
+++ b/test/units/template/test_templar.py
@@ -25,7 +25,7 @@ from ansible.compat.tests.mock import patch, MagicMock
 from ansible import constants as C
 from ansible.errors import *
 from ansible.plugins import filter_loader, lookup_loader, module_loader
-from ansible.plugins.strategy import SharedPluginLoaderObj
+from ansible.executor.task_queue_manager import SharedPluginLoaderObj
 from ansible.template import Templar
 
 from units.mock.loader import DictDataLoader
author	James Cammarata <jimi@sngx.net>	2016-08-26 14:55:56 -0500
committer	James Cammarata <jimi@sngx.net>	2016-08-28 10:19:23 -0500
commit	d5dd191c6c22a5ff23c3a03bb85fe11291fe0b29 (patch)
tree	a801b5f71bbf7b5942201506ffe42f2bb0ac9cb2
parent	4b679ffd844ee4549c76cd7e44ecce7968c53e66 (diff)
download	ansible-feature_threaded_queue.tar.gz