Refactoring split_args into sub-functions

author: James Cammarata <jimi@sngx.net> 2014-07-24 20:00:57 -0500
committer: James Cammarata <jimi@sngx.net> 2014-07-24 22:00:26 -0500
commit: fb738bd7b144c502d931d0fc0330c3135c6cbb42 (patch)
tree: 5591c98c6f0573f4050812b17d1236341f875415
parent: a4ce0b04639d37f1fef4f8a2d3db19267702e595 (diff)
download: ansible-fb738bd7b144c502d931d0fc0330c3135c6cbb42.tar.gz
1 files changed, 73 insertions, 86 deletions
diff --git a/lib/ansible/utils/splitter.py b/lib/ansible/utils/splitter.py
index 973c6e8ed2..ca2c37cd00 100644
--- a/lib/ansible/utils/splitter.py
+++ b/lib/ansible/utils/splitter.py
@@ -15,6 +15,39 @@
 # You should have received a copy of the GNU General Public License
 # along with Ansible.  If not, see <http://www.gnu.org/licenses/>.
 
+def _get_quote_state(token, quote_char):
+    '''
+    the goal of this block is to determine if the quoted string
+    is unterminated in which case it needs to be put back together
+    '''
+    # the char before the current one, used to see if
+    # the current character is escaped
+    prev_char = None
+    for idx, cur_char in enumerate(token):
+        if idx > 0:
+            prev_char = token[idx-1]
+        if cur_char in '"\'':
+            if quote_char:
+                if cur_char == quote_char and prev_char != '\\':
+                    quote_char = None
+            else:
+                quote_char = cur_char
+    return quote_char
+
+def _count_jinja2_blocks(token, cur_depth, open_token, close_token):
+    '''
+    this function counts the number of opening/closing blocks for a
+    given opening/closing type and adjusts the current depth for that
+    block based on the difference
+    '''
+    num_open  = token.count(open_token)
+    num_close = token.count(close_token)
+    if num_open != num_close:
+        cur_depth += (num_open - num_close)
+        if cur_depth < 0:
+            cur_depth = 0
+    return cur_depth
+
 def split_args(args):
     '''
     Splits args on whitespace, but intelligently reassembles
@@ -24,15 +57,13 @@ def split_args(args):
     jinja2 blocks, however this function is/will be used in the
     core portions as well before the args are templated.
 
-    example input: a=b c=d
-    example output: dict(a='b', c='d')
+    example input: a=b c="foo bar"
+    example output: ['a=b', 'c="foo bar"']
 
     Basically this is a variation shlex that has some more intelligence for
     how Ansible needs to use it.
     '''
 
-    # FIXME: refactoring into smaller functions
-
     # the list of params parsed out of the arg string
     # this is going to be the result value when we are donei
     params = []
@@ -40,52 +71,32 @@ def split_args(args):
     # here we encode the args, so we have a uniform charset to
     # work with, and split on white space
     args = args.encode('utf-8')
-    items = args.split()
+    tokens = args.split()
 
-    # iterate over the items, and reassemble any that may have been
-    # split on a space inside a jinja2 block. 
+    # iterate over the tokens, and reassemble any that may have been
+    # split on a space inside a jinja2 block.
     # ex if tokens are "{{", "foo", "}}" these go together
 
     # These variables are used
     # to keep track of the state of the parsing, since blocks and quotes
     # may be nested within each other.
 
-    inside_quotes = False
     quote_char = None
-    split_print_depth = 0
-    split_block_depth = 0
-    split_comment_depth = 0
+    inside_quotes = False
+    print_depth   = 0 # used to count nested jinja2 {{ }} blocks
+    block_depth   = 0 # used to count nested jinja2 {% %} blocks
+    comment_depth = 0 # used to count nested jinja2 {# #} blocks
 
-    # now we loop over each split item, coalescing items if the white space
+    # now we loop over each split token, coalescing tokens if the white space
     # split occurred within quotes or a jinja2 block of some kind
+    for token in tokens:
 
-    for item in items:
-
-        item = item.strip()
+        token = token.strip()
 
         # store the previous quoting state for checking later
         was_inside_quotes = inside_quotes
-
-        # determine the current quoting state
-        # the goal of this block is to determine if the quoted string
-        # is unterminated in which case it needs to be put back together
-
-        bc = None # before_char
-        for i in range(0, len(item)):  # use enumerate
-
-            c = item[i]  # current_char
-
-            if i > 0:
-                bc = item[i-1]
-
-            if c in ('"', "'"):
-                if inside_quotes:
-                    if c == quote_char and bc != '\\':
-                        inside_quotes = False
-                        quote_char = None
-                else:
-                    inside_quotes = True
-                    quote_char = c
+        quote_char = _get_quote_state(token, quote_char)
+        inside_quotes = quote_char is not None
 
         # multiple conditions may append a token to the list of params,
         # so we keep track with this flag to make sure it only happens once
@@ -93,69 +104,45 @@ def split_args(args):
         # it to the end of the last token
         appended = False
 
-        # if we're inside quotes now, but weren't before, append the item
+        # if we're inside quotes now, but weren't before, append the token 
         # to the end of the list, since we'll tack on more to it later
-
+        # otherwise, if we're inside any jinja2 block, inside quotes, or we were
+        # inside quotes (but aren't now) concat this token to the last param
         if inside_quotes and not was_inside_quotes:
-            params.append(item)
+            params.append(token)
             appended = True
-
-        # otherwise, if we're inside any jinja2 block, inside quotes, or we were
-        # inside quotes (but aren't now) concat this item to the last param
-        # FIXME: just or these all together
-        elif (split_print_depth or split_block_depth or split_comment_depth or inside_quotes or was_inside_quotes):
-            params[-1] = "%s %s" % (params[-1], item)
+        elif print_depth or block_depth or comment_depth or inside_quotes or was_inside_quotes:
+            params[-1] = "%s %s" % (params[-1], token)
             appended = True
 
-        # these variables are used to determine the current depth of each jinja2
-        # block type, by counting the number of openings and closing tags
-        # FIXME: assumes Jinja2 seperators aren't changeable (also true elsewhere in ansible ATM)
+        # if the number of paired block tags is not the same, the depth has changed, so we calculate that here
+        # and may append the current token to the params (if we haven't previously done so)
+        prev_print_depth = print_depth
+        print_depth = _count_jinja2_blocks(token, print_depth, "{{", "}}")
+        if print_depth != prev_print_depth and not appended:
+            params.append(token)
+            appended = True
 
-        num_print_open    = item.count('{{')
-        num_print_close   = item.count('}}')
-        num_block_open    = item.count('{%')
-        num_block_close   = item.count('%}')
-        num_comment_open  = item.count('{#')
-        num_comment_close = item.count('#}')
+        prev_block_depth = block_depth
+        block_depth = _count_jinja2_blocks(token, block_depth, "{%", "%}")
+        if block_depth != prev_block_depth and not appended:
+            params.append(token)
+            appended = True
 
-        # if the number of paired block tags is not the same, the depth has changed, so we calculate that here
-        # and may append the current item to the params (if we haven't previously done so)
-
-        # FIXME: DRY a bit
-        if num_print_open != num_print_close:
-            split_print_depth += (num_print_open - num_print_close)
-            if not appended:
-                params.append(item)
-                appended = True
-            if split_print_depth < 0:
-                split_print_depth = 0
-
-        if num_block_open != num_block_close:
-            split_block_depth += (num_block_open - num_block_close)
-            if not appended:
-                params.append(item)
-                appended = True
-            if split_block_depth < 0:
-                split_block_depth = 0
-
-        if num_comment_open != num_comment_close:
-            split_comment_depth += (num_comment_open - num_comment_close)
-            if not appended:
-                params.append(item)
-                appended = True
-            if split_comment_depth < 0:
-                split_comment_depth = 0
+        prev_comment_depth = comment_depth
+        comment_depth = _count_jinja2_blocks(token, comment_depth, "{#", "#}")
+        if comment_depth != prev_comment_depth and not appended:
+            params.append(token)
+            appended = True
 
         # finally, if we're at zero depth for all blocks and not inside quotes, and have not
         # yet appended anything to the list of params, we do so now
-
-        if not (split_print_depth or split_block_depth or split_comment_depth) and not inside_quotes and not appended:
-            params.append(item)
+        if not (print_depth or block_depth or comment_depth) and not inside_quotes and not appended:
+            params.append(token)
 
     # If we're done and things are not at zero depth or we're still inside quotes,
     # raise an error to indicate that the args were unbalanced
-
-    if (split_print_depth or split_block_depth or split_comment_depth) or inside_quotes:
+    if print_depth or block_depth or comment_depth or inside_quotes:
         raise Exception("error while splitting arguments, either an unbalanced jinja2 block or quotes")
 
     # finally, we decode each param back to the unicode it was in the arg string
author	James Cammarata <jimi@sngx.net>	2014-07-24 20:00:57 -0500
committer	James Cammarata <jimi@sngx.net>	2014-07-24 22:00:26 -0500
commit	fb738bd7b144c502d931d0fc0330c3135c6cbb42 (patch)
tree	5591c98c6f0573f4050812b17d1236341f875415
parent	a4ce0b04639d37f1fef4f8a2d3db19267702e595 (diff)
download	ansible-fb738bd7b144c502d931d0fc0330c3135c6cbb42.tar.gz