diff options
author | Toshio Kuratomi <a.badger@gmail.com> | 2017-06-28 18:10:26 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-06-28 18:10:26 -0700 |
commit | e27c2860e7b4d24cdff9fbd75a706794551e56c3 (patch) | |
tree | ab985c5c29822417bce8b05879a75ed314e556b7 /lib/ansible/parsing/metadata.py | |
parent | 6908038036667a725aac09c62fadb4fe03cfda7e (diff) | |
download | ansible-e27c2860e7b4d24cdff9fbd75a706794551e56c3.tar.gz |
Move metadata extraction into a library function (#26198)
* Move metadata extraction into a library function
* fix the string cases to throw NotImplementedError
* Some python3 fixes
Diffstat (limited to 'lib/ansible/parsing/metadata.py')
-rw-r--r-- | lib/ansible/parsing/metadata.py | 221 |
1 files changed, 221 insertions, 0 deletions
diff --git a/lib/ansible/parsing/metadata.py b/lib/ansible/parsing/metadata.py new file mode 100644 index 0000000000..691453c9bf --- /dev/null +++ b/lib/ansible/parsing/metadata.py @@ -0,0 +1,221 @@ +# (c) 2017, Toshio Kuratomi <tkuratomi@ansible.com> +# +# This file is part of Ansible +# +# Ansible is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Ansible is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ansible. If not, see <http://www.gnu.org/licenses/>. + +# Make coding more python3-ish +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import ast + +import yaml + +from ansible.module_utils._text import to_text + + +class ParseError(Exception): + """Thrown when parsing a file fails""" + pass + + +def seek_end_of_dict(module_data, start_line, start_col, next_node_line, next_node_col): + """Look for the end of a dict in a set of lines + + We know the starting position of the dict and we know the start of the + next code node but in between there may be multiple newlines and comments. + There may also be multiple python statements on the same line (separated + by semicolons) + + Examples:: + ANSIBLE_METADATA = {[..]} + DOCUMENTATION = [..] + + ANSIBLE_METADATA = {[..]} # Optional comments with confusing junk => {} + # Optional comments {} + DOCUMENTATION = [..] + + ANSIBLE_METADATA = { + [..] + } + # Optional comments {} + DOCUMENTATION = [..] + + ANSIBLE_METADATA = {[..]} ; DOCUMENTATION = [..] + + ANSIBLE_METADATA = {}EOF + """ + if next_node_line is None: + # The dict is the last statement in the file + snippet = module_data.splitlines()[start_line:] + next_node_col = 0 + # Include the last line in the file + last_line_offset = 0 + else: + # It's somewhere in the middle so we need to separate it from the rest + snippet = module_data.splitlines()[start_line:next_node_line] + # Do not include the last line because that's where the next node + # starts + last_line_offset = 1 + + if next_node_col == 0: + # This handles all variants where there are only comments and blank + # lines between the dict and the next code node + + # Step backwards through all the lines in the snippet + for line_idx, line in tuple(reversed(tuple(enumerate(snippet))))[last_line_offset:]: + end_col = None + # Step backwards through all the characters in the line + for col_idx, char in reversed(tuple(enumerate(c for c in line))): + if not isinstance(char, bytes): + # Python3 wart. slicing a byte string yields integers + char = bytes((char,)) + if char == b'}' and end_col is None: + # Potentially found the end of the dict + end_col = col_idx + + elif char == b'#' and end_col is not None: + # The previous '}' was part of a comment. Keep trying + end_col = None + + if end_col is not None: + # Found the end! + end_line = start_line + line_idx + break + else: + raise ParseError('Unable to find the end of dictionary') + else: + # Harder cases involving multiple statements on one line + # Good Ansible Module style doesn't do this so we're just going to + # treat this as an error for now: + raise ParseError('Multiple statements per line confuses the module metadata parser.') + + return end_line, end_col + + +def seek_end_of_string(module_data, start_line, start_col, next_node_line, next_node_col): + """ + This is much trickier than finding the end of a dict. A dict has only one + ending character, "}". Strings have four potential ending characters. We + have to parse the beginning of the string to determine what the ending + character will be. + + Examples: + ANSIBLE_METADATA = '''[..]''' # Optional comment with confusing chars ''' + # Optional comment with confusing chars ''' + DOCUMENTATION = [..] + + ANSIBLE_METADATA = ''' + [..] + ''' + DOCUMENTATIONS = [..] + + ANSIBLE_METADATA = '''[..]''' ; DOCUMENTATION = [..] + + SHORT_NAME = ANSIBLE_METADATA = '''[..]''' ; DOCUMENTATION = [..] + + String marker variants: + * '[..]' + * "[..]" + * '''[..]''' + * \"\"\"[..]\"\"\" + + Each of these come in u, r, and b variants: + * '[..]' + * u'[..]' + * b'[..]' + * r'[..]' + * ur'[..]' + * ru'[..]' + * br'[..]' + * b'[..]' + * rb'[..]' + """ + raise NotImplementedError('Finding end of string not yet implemented') + + +def extract_metadata(module_data): + """Extract the metadata from a module + + :arg module_data: Byte string containing a module's code + :returns: a tuple of metadata (a dict), line the metadata starts on, + column the metadata starts on, line the metadata ends on, column the + metadata ends on, and the names the metadata is assigned to. One of + the names the metadata is assigned to will be ANSIBLE_METADATA If no + metadata is found, the tuple will be (None, -1, -1, -1, -1, None) + """ + metadata = None + start_line = -1 + start_col = -1 + end_line = -1 + end_col = -1 + targets = None + mod_ast_tree = ast.parse(module_data) + for root_idx, child in enumerate(mod_ast_tree.body): + if isinstance(child, ast.Assign): + for target in child.targets: + if target.id == 'ANSIBLE_METADATA': + metadata = ast.literal_eval(child.value) + + try: + # Determine where the next node starts + next_node = mod_ast_tree.body[root_idx + 1] + next_lineno = next_node.lineno + next_col_offset = next_node.col_offset + except IndexError: + # Metadata is defined in the last node of the file + next_lineno = None + next_col_offset = None + + if isinstance(child.value, ast.Dict): + # Determine where the current metadata ends + end_line, end_col = seek_end_of_dict(module_data, + child.lineno - 1, + child.col_offset, + next_lineno, + next_col_offset) + + elif isinstance(child.value, ast.Str): + metadata = yaml.safe_load(child.value.s) + end_line, end_col = seek_end_of_string(module_data, + child.lineno - 1, + child.col_offset, + next_lineno, + next_col_offset) + elif isinstance(child.value, ast.Bytes): + metadata = yaml.safe_load(to_text(child.value.s, errors='surrogate_or_strict')) + end_line, end_col = seek_end_of_string(module_data, + child.lineno - 1, + child.col_offset, + next_lineno, + next_col_offset) + else: + # Example: + # ANSIBLE_METADATA = 'junk' + # ANSIBLE_METADATA = { [..the real metadata..] } + continue + + # Do these after the if-else so we don't pollute them in + # case this was a false positive + start_line = child.lineno - 1 + start_col = child.col_offset + targets = [t.id for t in child.targets] + break + + if metadata is not None: + # Once we've found the metadata we're done + break + + return metadata, start_line, start_col, end_line, end_col, targets |