From 386029ed647ca7fd0209506285f02365c347eef9 Mon Sep 17 00:00:00 2001 From: Anthon van der Neut Date: Fri, 28 Aug 2015 08:03:59 +0200 Subject: - main problem in moving stuff from yaml/py to yaml was that parser.py clashes with built-in parser module (CPython, C-module) which is inlucded from pkg_resources/__init__.py - no C compile yet --- .hgignore | 1 + __init__.py | 55 ++ comments.py | 393 ++++++++++++ compat.py | 106 ++++ composer.py | 169 +++++ configobjwalker.py | 65 ++ constructor.py | 1116 +++++++++++++++++++++++++++++++++ cyaml.py | 88 +++ dumper.py | 84 +++ emitter.py | 1237 ++++++++++++++++++++++++++++++++++++ error.py | 80 +++ events.py | 105 ++++ loader.py | 53 ++ main.py | 352 +++++++++++ nodes.py | 84 +++ parser_.py | 656 ++++++++++++++++++++ py/__init__.py | 43 -- py/comments.py | 393 ------------ py/compat.py | 106 ---- py/composer.py | 169 ----- py/configobjwalker.py | 65 -- py/constructor.py | 1116 --------------------------------- py/cyaml.py | 88 --- py/dumper.py | 84 --- py/emitter.py | 1237 ------------------------------------ py/error.py | 80 --- py/events.py | 105 ---- py/loader.py | 53 -- py/main.py | 352 ----------- py/nodes.py | 84 --- py/parser.py | 656 -------------------- py/reader.py | 207 ------- py/representer.py | 808 ------------------------ py/resolver.py | 233 ------- py/scalarstring.py | 44 -- py/scanner.py | 1655 ------------------------------------------------- py/serializer.py | 166 ----- py/tokens.py | 188 ------ py/yaml.py | 527 ---------------- reader.py | 207 +++++++ representer.py | 808 ++++++++++++++++++++++++ resolver.py | 233 +++++++ scalarstring.py | 44 ++ scanner.py | 1655 +++++++++++++++++++++++++++++++++++++++++++++++++ serializer.py | 166 +++++ setup.py | 455 +++++++++----- tokens.py | 188 ++++++ 47 files changed, 8253 insertions(+), 8606 deletions(-) create mode 100644 __init__.py create mode 100644 comments.py create mode 100644 compat.py create mode 100644 composer.py create mode 100644 configobjwalker.py create mode 100644 constructor.py create mode 100644 cyaml.py create mode 100644 dumper.py create mode 100644 emitter.py create mode 100644 error.py create mode 100644 events.py create mode 100644 loader.py create mode 100644 main.py create mode 100644 nodes.py create mode 100644 parser_.py delete mode 100644 py/__init__.py delete mode 100644 py/comments.py delete mode 100644 py/compat.py delete mode 100644 py/composer.py delete mode 100644 py/configobjwalker.py delete mode 100644 py/constructor.py delete mode 100644 py/cyaml.py delete mode 100644 py/dumper.py delete mode 100644 py/emitter.py delete mode 100644 py/error.py delete mode 100644 py/events.py delete mode 100644 py/loader.py delete mode 100644 py/main.py delete mode 100644 py/nodes.py delete mode 100644 py/parser.py delete mode 100644 py/reader.py delete mode 100644 py/representer.py delete mode 100644 py/resolver.py delete mode 100644 py/scalarstring.py delete mode 100644 py/scanner.py delete mode 100644 py/serializer.py delete mode 100644 py/tokens.py delete mode 100644 py/yaml.py create mode 100644 reader.py create mode 100644 representer.py create mode 100644 resolver.py create mode 100644 scalarstring.py create mode 100644 scanner.py create mode 100644 serializer.py create mode 100644 tokens.py diff --git a/.hgignore b/.hgignore index 9025cc9..4f52074 100644 --- a/.hgignore +++ b/.hgignore @@ -11,6 +11,7 @@ build *.egg-info .tox .cache +_yaml.so README.pdf ruamel convert diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..0da3598 --- /dev/null +++ b/__init__.py @@ -0,0 +1,55 @@ +# coding: utf-8 + +from __future__ import absolute_import + +_package_data = dict( + full_package_name='ruamel.yaml', + version_info=(0, 10, 7), + author='Anthon van der Neut', + author_email='a.van.der.neut@ruamel.eu', + description="ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order", # NOQA + entry_points=None, + install_requires=['ruamel.base'] +) + +def _convert_version(tup): + """create a PEP 386 pseudo-format conformant string from tuple tup""" + ret_val = str(tup[0]) # first is always digit + next_sep = "." # separator for next extension, can be "" or "." + for x in tup[1:]: + if isinstance(x, int): + ret_val += next_sep + str(x) + next_sep = '.' + continue + first_letter = x[0].lower() + next_sep = '' + if first_letter in 'abcr': + ret_val += 'rc' if first_letter == 'r' else first_letter + elif first_letter in 'pd': + ret_val += '.post' if first_letter == 'p' else '.dev' + return ret_val + + +# < +version_info = _package_data['version_info'] +__version__ = _convert_version(version_info) + +del _convert_version + +try: + from .cyaml import * + __with_libyaml__ = True +except ImportError: + __with_libyaml__ = False + +import sys + +# body extracted to main.py +from .main import * + +def main(): + # No direct import of yaml in order not to pollute namespace. + # If other utility 'bodies' exist in this directory a module level + # import here, would get you all of its initialisations/imports as well + from ruamel.yaml import main as util_main + util_main() diff --git a/comments.py b/comments.py new file mode 100644 index 0000000..0db9457 --- /dev/null +++ b/comments.py @@ -0,0 +1,393 @@ +# coding: utf-8 + +from __future__ import absolute_import +from __future__ import print_function + +__all__ = ["CommentedSeq", "CommentedMap", "CommentedOrderedMap", + "CommentedSet", 'comment_attrib', 'merge_attrib'] + +""" +stuff to deal with comments and formatting on dict/list/ordereddict/set +these are not really related, formatting could be factored out as +a separate base +""" + +from collections import MutableSet + +from .compat import ordereddict + +comment_attrib = '_yaml_comment' +format_attrib = '_yaml_format' +line_col_attrib = '_yaml_line_col' +anchor_attrib = '_yaml_anchor' +merge_attrib = '_yaml_merge' + +class Comment(object): + # sys.getsize tested the Comment objects, __slots__ make them bigger + # and adding self.end did not matter + attrib = comment_attrib + + def __init__(self): + self.comment = None # [post, [pre]] + # map key (mapping/omap/dict) or index (sequence/list) to a list of + # dict: post_key, pre_key, post_value, pre_value + # list: pre item, post item + self._items = {} + # self._start = [] # should not put these on first item + self._end = [] # end of document comments + + def __str__(self): + if self._end: + end = ',\n end=' + str(self._end) + else: + end = '' + return "Comment(comment={0},\n items={1}{2})".format( + self.comment, self._items, end) + + @property + def items(self): + return self._items + + @property + def end(self): + return self._end + + @end.setter + def end(self, value): + self._end = value + + @property + def start(self): + return self._start + + @end.setter + def start(self, value): + self._start = value + + +# to distinguish key from None +def NoComment(): + pass + + +class Format(object): + attrib = format_attrib + + def __init__(self): + self._flow_style = None + + def set_flow_style(self): + self._flow_style = True + + def set_block_style(self): + self._flow_style = False + + def flow_style(self, default=None): + """if default (the flow_style) is None, the flow style tacked on to + the object explicitly will be taken. If that is None as well the + default flow style rules the format down the line, or the type + of the constituent values (simple -> flow, map/list -> block)""" + if self._flow_style is None: + return default + return self._flow_style + + +class LineCol(object): + attrib = line_col_attrib + + def __init__(self): + self.line = None + self.col = None + +class Anchor(object): + attrib = anchor_attrib + + def __init__(self): + self.value = None + self.always_dump = False + +class CommentedBase(object): + @property + def ca(self): + if not hasattr(self, Comment.attrib): + setattr(self, Comment.attrib, Comment()) + return getattr(self, Comment.attrib) + + def yaml_end_comment_extend(self, comment, clear=False): + if clear: + self.ca.end = [] + self.ca.end.extend(comment) + + def yaml_key_comment_extend(self, key, comment, clear=False): + l = self.ca._items.setdefault(key, [None, None, None, None]) + if clear or l[1] is None: + if comment[1] is not None: + assert isinstance(comment[1], list) + l[1] = comment[1] + else: + l[1].extend(comment[0]) + l[0] = comment[0] + + def yaml_value_comment_extend(self, key, comment, clear=False): + l = self.ca._items.setdefault(key, [None, None, None, None]) + if clear or l[3] is None: + if comment[1] is not None: + assert isinstance(comment[1], list) + l[3] = comment[1] + else: + l[3].extend(comment[0]) + l[2] = comment[0] + + def yaml_set_start_comment(self, comment, indent=0): + """overwrites any preceding comment lines on an object + expects comment to be without `#` and possible have mutlple lines + """ + from .error import Mark + from .tokens import CommentToken + pre_comments = self._yaml_get_pre_comment() + if comment[-1] == '\n': + comment = comment[:-1] # strip final newline if there + start_mark = Mark(None, None, None, indent, None, None) + for com in comment.split('\n'): + pre_comments.append(CommentToken('# ' + com + '\n', start_mark, None)) + + @property + def fa(self): + if not hasattr(self, Format.attrib): + setattr(self, Format.attrib, Format()) + return getattr(self, Format.attrib) + + def yaml_add_eol_comment(self, comment, key=NoComment, column=None): + """ + there is a problem as eol comments should start with ' #' + (but at the beginning of the line the space doesn't have to be before + the #. The column index is for the # mark + """ + from .tokens import CommentToken + from .error import Mark + if column is None: + column = self._yaml_get_column(key) + if comment[0] != '#': + comment = '# ' + comment + if column is None: + if comment[0] == '#': + comment = ' ' + comment + column = 0 + start_mark = Mark(None, None, None, column, None, None) + ct = [CommentToken(comment, start_mark, None), None] + self._yaml_add_eol_comment(ct, key=key) + + @property + def lc(self): + if not hasattr(self, LineCol.attrib): + setattr(self, LineCol.attrib, LineCol()) + return getattr(self, LineCol.attrib) + + def _yaml_set_line_col(self, line, col): + self.lc.line = line + self.lc.col = col + + @property + def anchor(self): + if not hasattr(self, Anchor.attrib): + setattr(self, Anchor.attrib, Anchor()) + return getattr(self, Anchor.attrib) + + def yaml_anchor(self): + if not hasattr(self, Anchor.attrib): + return None + return self.anchor + + def yaml_set_anchor(self, value, always_dump=False): + self.anchor.value = value + self.anchor.always_dump = always_dump + +class CommentedSeq(list, CommentedBase): + __slots__ = [Comment.attrib, ] + + def _yaml_add_comment(self, comment, key=NoComment): + if key is not NoComment: + self.yaml_key_comment_extend(key, comment) + else: + self.ca.comment = comment + + def _yaml_add_eol_comment(self, comment, key): + self._yaml_add_comment(comment, key=key) + + def _yaml_get_columnX(self, key): + return self.ca.items[key][0].start_mark.column + + def _yaml_get_column(self, key): + column = None + sel_idx = None + pre, post = key-1, key+1 + if pre in self.ca.items: + sel_idx = pre + elif post in self.ca.items: + sel_idx = post + else: + # self.ca.items is not ordered + for row_idx, k1 in enumerate(self): + if row_idx >= key: + break + if row_idx not in self.ca.items: + continue + sel_idx = row_idx + if sel_idx is not None: + column = self._yaml_get_columnX(sel_idx) + return column + + def _yaml_get_pre_comment(self): + if self.ca.comment is None: + pre_comments = [] + self.ca.comment = [None, pre_comments] + else: + pre_comments = self.ca.comment[1] = [] + return pre_comments + + +class CommentedMap(ordereddict, CommentedBase): + __slots__ = [Comment.attrib, ] + + def _yaml_add_comment(self, comment, key=NoComment, value=NoComment): + """values is set to key to indicate a value attachment of comment""" + if key is not NoComment: + self.yaml_key_comment_extend(key, comment) + return + if value is not NoComment: + self.yaml_value_comment_extend(value, comment) + else: + self.ca.comment = comment + + def _yaml_add_eol_comment(self, comment, key): + """add on the value line, with value specified by the key""" + self._yaml_add_comment(comment, value=key) + + def _yaml_get_columnX(self, key): + return self.ca.items[key][2].start_mark.column + + def _yaml_get_column(self, key): + column = None + sel_idx = None + pre, post, last = None, None, None + for x in self: + if pre is not None and x != key: + post = x + break + if x == key: + pre = last + last = x + if pre in self.ca.items: + sel_idx = pre + elif post in self.ca.items: + sel_idx = post + else: + # self.ca.items is not ordered + for row_idx, k1 in enumerate(self): + if k1 >= key: + break + if k1 not in self.ca.items: + continue + sel_idx = k1 + if sel_idx is not None: + column = self._yaml_get_columnX(sel_idx) + return column + + def _yaml_get_pre_comment(self): + if self.ca.comment is None: + pre_comments = [] + self.ca.comment = [None, pre_comments] + else: + pre_comments = self.ca.comment[1] = [] + return pre_comments + + def update(self, vals): + try: + ordereddict.update(self, vals) + except TypeError: + # probably a dict that is used + for x in vals: + self[x] = vals[x] + + def mlget(self, key, default=None, list_ok=False): + """multi-level get that expects dicts within dicts""" + if not isinstance(key, list): + return self.get(key, default) + # assume that the key is a list of recursively accessible dicts + def get_one_level(key_list, level, d): + if not list_ok: + assert isinstance(d, dict) + if level >= len(key_list): + if level > len(key_list): + raise IndexError + return d[key_list[level-1]] + return get_one_level(key_list, level+1, d[key_list[level-1]]) + + try: + return get_one_level(key, 1, self) + except KeyError: + return default + except (TypeError, IndexError): + if not list_ok: + Raise + return default + + def __getitem__(self, key): + try: + return ordereddict.__getitem__(self, key) + except KeyError: + for merged in getattr(self, merge_attrib, []): + if key in merged[1]: + return merged[1][key] + raise + + def get(self, key, default=None): + try: + return self.__getitem__(key) + except: + return default + + @property + def merge(self): + if not hasattr(self, merge_attrib): + setattr(self, merge_attrib, []) + return getattr(self, merge_attrib) + + def add_yaml_merge(self, value): + self.merge.extend(value) + + + +class CommentedOrderedMap(CommentedMap): + __slots__ = [Comment.attrib, ] + + +class CommentedSet(MutableSet, CommentedMap): + __slots__ = [Comment.attrib, 'odict'] + + def __init__(self, values=None): + self.odict = ordereddict() + MutableSet.__init__(self) + if values is not None: + self |= values + + def add(self, value): + """Add an element.""" + self.odict[value] = None + + def discard(self, value): + """Remove an element. Do not raise an exception if absent.""" + del self.odict[value] + + def __contains__(self, x): + return x in self.odict + + def __iter__(self): + for x in self.odict: + yield x + + def __len__(self): + return len(self.odict) + + def __repr__(self): + return 'set({0!r})'.format(self.odict.keys()) diff --git a/compat.py b/compat.py new file mode 100644 index 0000000..dc0c51c --- /dev/null +++ b/compat.py @@ -0,0 +1,106 @@ + +from __future__ import print_function + +# partially from package six by Benjamin Peterson + +import sys +import os +import types + +try: + from ruamel.ordereddict import ordereddict +except: + try: + from collections import OrderedDict + except ImportError: + from orderddict import OrderedDict + # to get the right name import ... as ordereddict doesn't do that + + class ordereddict(OrderedDict): + pass + +PY2 = sys.version_info[0] == 2 +PY3 = sys.version_info[0] == 3 + +if PY3: + def utf8(s): + return s + + def to_str(s): + return s + + def to_unicode(s): + return s + +else: + def utf8(s): + return s.encode('utf-8') + + def to_str(s): + return str(s) + + def to_unicode(s): + return unicode(s) + +if PY3: + string_types = str, + integer_types = int, + class_types = type, + text_type = str + binary_type = bytes + + MAXSIZE = sys.maxsize + unichr = chr + import io + StringIO = io.StringIO + BytesIO = io.BytesIO + +else: + string_types = basestring, + integer_types = (int, long) + class_types = (type, types.ClassType) + text_type = unicode + binary_type = str + + unichr = unichr # to allow importing + import StringIO + StringIO = StringIO.StringIO + import cStringIO + BytesIO = cStringIO.StringIO + +if PY3: + builtins_module = 'builtins' +else: + builtins_module = '__builtin__' + + +def with_metaclass(meta, *bases): + """Create a base class with a metaclass.""" + return meta("NewBase", bases, {}) + +DBG_TOKEN = 1 +DBG_EVENT = 2 +DBG_NODE = 4 + + +_debug = None + + +# used from yaml util when testing +def dbg(val=None): + global _debug + if _debug is None: + # set to true or false + _debug = os.environ.get('YAMLDEBUG') + if _debug is None: + _debug = 0 + else: + _debug = int(_debug) + if val is None: + return _debug + return _debug & val + + +def nprint(*args, **kw): + if dbg: + print(*args, **kw) diff --git a/composer.py b/composer.py new file mode 100644 index 0000000..13436c8 --- /dev/null +++ b/composer.py @@ -0,0 +1,169 @@ +from __future__ import absolute_import +from __future__ import print_function + +__all__ = ['Composer', 'ComposerError'] + +from .error import MarkedYAMLError +from .events import * +from .nodes import * +from .compat import utf8 + + +class ComposerError(MarkedYAMLError): + pass + + +class Composer(object): + def __init__(self): + self.anchors = {} + + def check_node(self): + # Drop the STREAM-START event. + if self.check_event(StreamStartEvent): + self.get_event() + + # If there are more documents available? + return not self.check_event(StreamEndEvent) + + def get_node(self): + # Get the root node of the next document. + if not self.check_event(StreamEndEvent): + return self.compose_document() + + def get_single_node(self): + # Drop the STREAM-START event. + self.get_event() + + # Compose a document if the stream is not empty. + document = None + if not self.check_event(StreamEndEvent): + document = self.compose_document() + + # Ensure that the stream contains no more documents. + if not self.check_event(StreamEndEvent): + event = self.get_event() + raise ComposerError( + "expected a single document in the stream", + document.start_mark, "but found another document", + event.start_mark) + + # Drop the STREAM-END event. + self.get_event() + + return document + + def compose_document(self): + # Drop the DOCUMENT-START event. + self.get_event() + + # Compose the root node. + node = self.compose_node(None, None) + + # Drop the DOCUMENT-END event. + self.get_event() + + self.anchors = {} + return node + + def compose_node(self, parent, index): + if self.check_event(AliasEvent): + event = self.get_event() + alias = event.anchor + if alias not in self.anchors: + raise ComposerError( + None, None, "found undefined alias %r" + % utf8(alias), event.start_mark) + return self.anchors[alias] + event = self.peek_event() + anchor = event.anchor + if anchor is not None: # have an anchor + if anchor in self.anchors: + raise ComposerError( + "found duplicate anchor %r; first occurence" + % utf8(anchor), self.anchors[anchor].start_mark, + "second occurence", event.start_mark) + self.descend_resolver(parent, index) + if self.check_event(ScalarEvent): + node = self.compose_scalar_node(anchor) + elif self.check_event(SequenceStartEvent): + node = self.compose_sequence_node(anchor) + elif self.check_event(MappingStartEvent): + node = self.compose_mapping_node(anchor) + self.ascend_resolver() + return node + + def compose_scalar_node(self, anchor): + event = self.get_event() + tag = event.tag + if tag is None or tag == u'!': + tag = self.resolve(ScalarNode, event.value, event.implicit) + node = ScalarNode(tag, event.value, + event.start_mark, event.end_mark, style=event.style, + comment=event.comment) + if anchor is not None: + self.anchors[anchor] = node + return node + + def compose_sequence_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == u'!': + tag = self.resolve(SequenceNode, None, start_event.implicit) + node = SequenceNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style, + comment=start_event.comment, anchor=anchor) + if anchor is not None: + self.anchors[anchor] = node + index = 0 + while not self.check_event(SequenceEndEvent): + node.value.append(self.compose_node(node, index)) + index += 1 + end_event = self.get_event() + if node.flow_style is True and end_event.comment is not None: + if node.comment is not None: + print('Warning: unexpected end_event commment in sequence ' + 'node {}'.format(node.flow_style)) + node.comment = end_event.comment + node.end_mark = end_event.end_mark + self.check_end_doc_comment(end_event, node) + return node + + def compose_mapping_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == u'!': + tag = self.resolve(MappingNode, None, start_event.implicit) + node = MappingNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style, + comment=start_event.comment, anchor=anchor) + if anchor is not None: + self.anchors[anchor] = node + while not self.check_event(MappingEndEvent): + # key_event = self.peek_event() + item_key = self.compose_node(node, None) + # if item_key in node.value: + # raise ComposerError("while composing a mapping", + # start_event.start_mark, + # "found duplicate key", key_event.start_mark) + item_value = self.compose_node(node, item_key) + # node.value[item_key] = item_value + node.value.append((item_key, item_value)) + end_event = self.get_event() + if node.flow_style is True and end_event.comment is not None: + node.comment = end_event.comment + node.end_mark = end_event.end_mark + self.check_end_doc_comment(end_event, node) + return node + + def check_end_doc_comment(self, end_event, node): + if end_event.comment and end_event.comment[1]: + # pre comments on an end_event, no following to move to + if node.comment is None: + node.comment = [None, None] + assert not isinstance(node, ScalarEvent) + # this is a post comment on a mapping node, add as third element + # in the list + node.comment.append(end_event.comment[1]) + end_event.comment[1] = None diff --git a/configobjwalker.py b/configobjwalker.py new file mode 100644 index 0000000..576adcd --- /dev/null +++ b/configobjwalker.py @@ -0,0 +1,65 @@ + + +def configobj_walker(cfg): + """ + walks over a ConfigObj (INI file with comments) generating + corresponding YAML output (including comments + """ + from configobj import ConfigObj + assert isinstance(cfg, ConfigObj) + for c in cfg.initial_comment: + if c.strip(): + yield c + for s in _walk_section(cfg): + if s.strip(): + yield s + for c in cfg.final_comment: + if c.strip(): + yield c + + +def _walk_section(s, level=0): + from configobj import Section + assert isinstance(s, Section) + indent = u' ' * level + for name in s.scalars: + for c in s.comments[name]: + yield indent + c.strip() + x = s[name] + if u'\n' in x: + i = indent + u' ' + x = u'|\n' + i + x.strip().replace(u'\n', u'\n' + i) + elif ':' in x: + x = u"'" + x.replace(u"'", u"''") + u"'" + line = u'{0}{1}: {2}'.format(indent, name, x) + c = s.inline_comments[name] + if c: + line += u' ' + c + yield line + for name in s.sections: + for c in s.comments[name]: + yield indent + c.strip() + line = u'{0}{1}:'.format(indent, name) + c = s.inline_comments[name] + if c: + line += u' ' + c + yield line + for val in _walk_section(s[name], level=level+1): + yield val + +##def config_obj_2_rt_yaml(cfg): +## from .comments import CommentedMap, CommentedSeq +## from configobj import ConfigObj +## assert isinstance(cfg, ConfigObj) +## #for c in cfg.initial_comment: +## # if c.strip(): +## # pass +## cm = CommentedMap() +## for name in s.sections: +## cm[name] = d = CommentedMap() +## +## +## #for c in cfg.final_comment: +## # if c.strip(): +## # yield c +## return cm diff --git a/constructor.py b/constructor.py new file mode 100644 index 0000000..5ce8974 --- /dev/null +++ b/constructor.py @@ -0,0 +1,1116 @@ +from __future__ import absolute_import +from __future__ import print_function + +__all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor', + 'ConstructorError', 'RoundTripConstructor'] + +import collections +import datetime +import base64 +import binascii +import re +import sys +import types + +from .error import * +from .nodes import * +from .compat import (utf8, builtins_module, to_str, PY2, PY3, ordereddict, + text_type) +from .comments import * +from .scalarstring import * + + +class ConstructorError(MarkedYAMLError): + pass + + +class BaseConstructor(object): + + yaml_constructors = {} + yaml_multi_constructors = {} + + def __init__(self): + self.constructed_objects = {} + self.recursive_objects = {} + self.state_generators = [] + self.deep_construct = False + + def check_data(self): + # If there are more documents available? + return self.check_node() + + def get_data(self): + # Construct and return the next document. + if self.check_node(): + return self.construct_document(self.get_node()) + + def get_single_data(self): + # Ensure that the stream contains a single document and construct it. + node = self.get_single_node() + if node is not None: + return self.construct_document(node) + return None + + def construct_document(self, node): + data = self.construct_object(node) + while self.state_generators: + state_generators = self.state_generators + self.state_generators = [] + for generator in state_generators: + for dummy in generator: + pass + self.constructed_objects = {} + self.recursive_objects = {} + self.deep_construct = False + return data + + def construct_object(self, node, deep=False): + if node in self.constructed_objects: + return self.constructed_objects[node] + if deep: + old_deep = self.deep_construct + self.deep_construct = True + if node in self.recursive_objects: + raise ConstructorError( + None, None, + "found unconstructable recursive node", node.start_mark) + self.recursive_objects[node] = None + constructor = None + tag_suffix = None + if node.tag in self.yaml_constructors: + constructor = self.yaml_constructors[node.tag] + else: + for tag_prefix in self.yaml_multi_constructors: + if node.tag.startswith(tag_prefix): + tag_suffix = node.tag[len(tag_prefix):] + constructor = self.yaml_multi_constructors[tag_prefix] + break + else: + if None in self.yaml_multi_constructors: + tag_suffix = node.tag + constructor = self.yaml_multi_constructors[None] + elif None in self.yaml_constructors: + constructor = self.yaml_constructors[None] + elif isinstance(node, ScalarNode): + constructor = self.__class__.construct_scalar + elif isinstance(node, SequenceNode): + constructor = self.__class__.construct_sequence + elif isinstance(node, MappingNode): + constructor = self.__class__.construct_mapping + if tag_suffix is None: + data = constructor(self, node) + else: + data = constructor(self, tag_suffix, node) + if isinstance(data, types.GeneratorType): + generator = data + data = next(generator) + if self.deep_construct: + for dummy in generator: + pass + else: + self.state_generators.append(generator) + self.constructed_objects[node] = data + del self.recursive_objects[node] + if deep: + self.deep_construct = old_deep + return data + + def construct_scalar(self, node): + if not isinstance(node, ScalarNode): + raise ConstructorError( + None, None, + "expected a scalar node, but found %s" % node.id, + node.start_mark) + return node.value + + def construct_sequence(self, node, deep=False): + if not isinstance(node, SequenceNode): + raise ConstructorError( + None, None, + "expected a sequence node, but found %s" % node.id, + node.start_mark) + return [self.construct_object(child, deep=deep) + for child in node.value] + + def construct_mapping(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError( + None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + mapping = {} + for key_node, value_node in node.value: + # keys can be list -> deep + key = self.construct_object(key_node, deep=True) + # lists are not hashable, but tuples are + if not isinstance(key, collections.Hashable): + if isinstance(key, list): + key = tuple(key) + if PY2: + try: + hash(key) + except TypeError as exc: + raise ConstructorError( + "while constructing a mapping", node.start_mark, + "found unacceptable key (%s)" % + exc, key_node.start_mark) + else: + if not isinstance(key, collections.Hashable): + raise ConstructorError( + "while constructing a mapping", node.start_mark, + "found unhashable key", key_node.start_mark) + + value = self.construct_object(value_node, deep=deep) + mapping[key] = value + return mapping + + def construct_pairs(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError( + None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + pairs = [] + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + value = self.construct_object(value_node, deep=deep) + pairs.append((key, value)) + return pairs + + @classmethod + def add_constructor(cls, tag, constructor): + if 'yaml_constructors' not in cls.__dict__: + cls.yaml_constructors = cls.yaml_constructors.copy() + cls.yaml_constructors[tag] = constructor + + @classmethod + def add_multi_constructor(cls, tag_prefix, multi_constructor): + if 'yaml_multi_constructors' not in cls.__dict__: + cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() + cls.yaml_multi_constructors[tag_prefix] = multi_constructor + + +class SafeConstructor(BaseConstructor): + def construct_scalar(self, node): + if isinstance(node, MappingNode): + for key_node, value_node in node.value: + if key_node.tag == u'tag:yaml.org,2002:value': + return self.construct_scalar(value_node) + return BaseConstructor.construct_scalar(self, node) + + def flatten_mapping(self, node): + """ + This implements the merge key feature http://yaml.org/type/merge.html + by inserting keys from the merge dict/list of dicts if not yet + available in this node + """ + merge = [] + index = 0 + while index < len(node.value): + key_node, value_node = node.value[index] + if key_node.tag == u'tag:yaml.org,2002:merge': + del node.value[index] + if isinstance(value_node, MappingNode): + self.flatten_mapping(value_node) + merge.extend(value_node.value) + elif isinstance(value_node, SequenceNode): + submerge = [] + for subnode in value_node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError( + "while constructing a mapping", + node.start_mark, + "expected a mapping for merging, but found %s" + % subnode.id, subnode.start_mark) + self.flatten_mapping(subnode) + submerge.append(subnode.value) + submerge.reverse() + for value in submerge: + merge.extend(value) + else: + raise ConstructorError( + "while constructing a mapping", node.start_mark, + "expected a mapping or list of mappings for merging, " + "but found %s" + % value_node.id, value_node.start_mark) + elif key_node.tag == u'tag:yaml.org,2002:value': + key_node.tag = u'tag:yaml.org,2002:str' + index += 1 + else: + index += 1 + if merge: + node.value = merge + node.value + + def construct_mapping(self, node, deep=False): + if isinstance(node, MappingNode): + self.flatten_mapping(node) + return BaseConstructor.construct_mapping(self, node, deep=deep) + + def construct_yaml_null(self, node): + self.construct_scalar(node) + return None + + # YAML 1.2 spec doesn't mention yes/no etc any more, 1.1 does + bool_values = { + u'yes': True, + u'no': False, + u'true': True, + u'false': False, + u'on': True, + u'off': False, + } + + def construct_yaml_bool(self, node): + value = self.construct_scalar(node) + return self.bool_values[value.lower()] + + def construct_yaml_int(self, node): + value = to_str(self.construct_scalar(node)) + value = value.replace('_', '') + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '0': + return 0 + elif value.startswith('0b'): + return sign*int(value[2:], 2) + elif value.startswith('0x'): + return sign*int(value[2:], 16) + elif value.startswith('0o'): + return sign*int(value[2:], 8) + elif value[0] == '0': + return sign*int(value, 8) + elif ':' in value: + digits = [int(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*int(value) + + inf_value = 1e300 + while inf_value != inf_value*inf_value: + inf_value *= inf_value + nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99). + + def construct_yaml_float(self, node): + value = to_str(self.construct_scalar(node)) + value = value.replace('_', '').lower() + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '.inf': + return sign*self.inf_value + elif value == '.nan': + return self.nan_value + elif ':' in value: + digits = [float(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0.0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*float(value) + + if PY3: + def construct_yaml_binary(self, node): + try: + value = self.construct_scalar(node).encode('ascii') + except UnicodeEncodeError as exc: + raise ConstructorError( + None, None, + "failed to convert base64 data into ascii: %s" % exc, + node.start_mark) + try: + if hasattr(base64, 'decodebytes'): + return base64.decodebytes(value) + else: + return base64.decodestring(value) + except binascii.Error as exc: + raise ConstructorError( + None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + else: + def construct_yaml_binary(self, node): + value = self.construct_scalar(node) + try: + return to_str(value).decode('base64') + except (binascii.Error, UnicodeEncodeError) as exc: + raise ConstructorError( + None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + + timestamp_regexp = re.compile( + u'''^(?P[0-9][0-9][0-9][0-9]) + -(?P[0-9][0-9]?) + -(?P[0-9][0-9]?) + (?:(?:[Tt]|[ \\t]+) + (?P[0-9][0-9]?) + :(?P[0-9][0-9]) + :(?P[0-9][0-9]) + (?:\\.(?P[0-9]*))? + (?:[ \\t]*(?PZ|(?P[-+])(?P[0-9][0-9]?) + (?::(?P[0-9][0-9]))?))?)?$''', re.X) + + def construct_yaml_timestamp(self, node): + value = self.construct_scalar(node) + match = self.timestamp_regexp.match(node.value) + values = match.groupdict() + year = int(values['year']) + month = int(values['month']) + day = int(values['day']) + if not values['hour']: + return datetime.date(year, month, day) + hour = int(values['hour']) + minute = int(values['minute']) + second = int(values['second']) + fraction = 0 + if values['fraction']: + fraction = values['fraction'][:6] + while len(fraction) < 6: + fraction += '0' + fraction = int(fraction) + delta = None + if values['tz_sign']: + tz_hour = int(values['tz_hour']) + tz_minute = int(values['tz_minute'] or 0) + delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute) + if values['tz_sign'] == '-': + delta = -delta + data = datetime.datetime(year, month, day, hour, minute, second, + fraction) + if delta: + data -= delta + return data + + def construct_yaml_omap(self, node): + # Note: we do now check for duplicate keys + omap = ordereddict() + yield omap + if not isinstance(node, SequenceNode): + raise ConstructorError( + "while constructing an ordered map", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError( + "while constructing an ordered map", node.start_mark, + "expected a mapping of length 1, but found %s" % + subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError( + "while constructing an ordered map", node.start_mark, + "expected a single mapping item, but found %d items" % + len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + assert key not in omap + value = self.construct_object(value_node) + omap[key] = value + + def construct_yaml_pairs(self, node): + # Note: the same code as `construct_yaml_omap`. + pairs = [] + yield pairs + if not isinstance(node, SequenceNode): + raise ConstructorError( + "while constructing pairs", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError( + "while constructing pairs", node.start_mark, + "expected a mapping of length 1, but found %s" % + subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError( + "while constructing pairs", node.start_mark, + "expected a single mapping item, but found %d items" % + len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + pairs.append((key, value)) + + def construct_yaml_set(self, node): + data = set() + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_str(self, node): + value = self.construct_scalar(node) + if PY3: + return value + try: + return value.encode('ascii') + except UnicodeEncodeError: + return value + + def construct_yaml_seq(self, node): + data = [] + yield data + data.extend(self.construct_sequence(node)) + + def construct_yaml_map(self, node): + data = {} + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_object(self, node, cls): + data = cls.__new__(cls) + yield data + if hasattr(data, '__setstate__'): + state = self.construct_mapping(node, deep=True) + data.__setstate__(state) + else: + state = self.construct_mapping(node) + data.__dict__.update(state) + + def construct_undefined(self, node): + raise ConstructorError( + None, None, + "could not determine a constructor for the tag %r" % + utf8(node.tag), + node.start_mark) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:null', + SafeConstructor.construct_yaml_null) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:bool', + SafeConstructor.construct_yaml_bool) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:int', + SafeConstructor.construct_yaml_int) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:float', + SafeConstructor.construct_yaml_float) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:binary', + SafeConstructor.construct_yaml_binary) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:timestamp', + SafeConstructor.construct_yaml_timestamp) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:omap', + SafeConstructor.construct_yaml_omap) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:pairs', + SafeConstructor.construct_yaml_pairs) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:set', + SafeConstructor.construct_yaml_set) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:str', + SafeConstructor.construct_yaml_str) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:seq', + SafeConstructor.construct_yaml_seq) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:map', + SafeConstructor.construct_yaml_map) + +SafeConstructor.add_constructor( + None, SafeConstructor.construct_undefined) + + +class Constructor(SafeConstructor): + + def construct_python_str(self, node): + return utf8(self.construct_scalar(node)) + + def construct_python_unicode(self, node): + return self.construct_scalar(node) + + if PY3: + def construct_python_bytes(self, node): + try: + value = self.construct_scalar(node).encode('ascii') + except UnicodeEncodeError as exc: + raise ConstructorError( + None, None, + "failed to convert base64 data into ascii: %s" % exc, + node.start_mark) + try: + if hasattr(base64, 'decodebytes'): + return base64.decodebytes(value) + else: + return base64.decodestring(value) + except binascii.Error as exc: + raise ConstructorError( + None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + + def construct_python_long(self, node): + val = self.construct_yaml_int(node) + if PY3: + return val + return int(val) + + def construct_python_complex(self, node): + return complex(self.construct_scalar(node)) + + def construct_python_tuple(self, node): + return tuple(self.construct_sequence(node)) + + def find_python_module(self, name, mark): + if not name: + raise ConstructorError( + "while constructing a Python module", mark, + "expected non-empty name appended to the tag", mark) + try: + __import__(name) + except ImportError as exc: + raise ConstructorError( + "while constructing a Python module", mark, + "cannot find module %r (%s)" % (utf8(name), exc), mark) + return sys.modules[name] + + def find_python_name(self, name, mark): + if not name: + raise ConstructorError( + "while constructing a Python object", mark, + "expected non-empty name appended to the tag", mark) + if u'.' in name: + module_name, object_name = name.rsplit('.', 1) + else: + module_name = builtins_module + object_name = name + try: + __import__(module_name) + except ImportError as exc: + raise ConstructorError( + "while constructing a Python object", mark, + "cannot find module %r (%s)" % (utf8(module_name), exc), mark) + module = sys.modules[module_name] + if not hasattr(module, object_name): + raise ConstructorError( + "while constructing a Python object", mark, + "cannot find %r in the module %r" % (utf8(object_name), + module.__name__), mark) + return getattr(module, object_name) + + def construct_python_name(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError( + "while constructing a Python name", node.start_mark, + "expected the empty value, but found %r" % utf8(value), + node.start_mark) + return self.find_python_name(suffix, node.start_mark) + + def construct_python_module(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError( + "while constructing a Python module", node.start_mark, + "expected the empty value, but found %r" % utf8(value), + node.start_mark) + return self.find_python_module(suffix, node.start_mark) + + if PY2: + class classobj: + pass + + def make_python_instance(self, suffix, node, + args=None, kwds=None, newobj=False): + if not args: + args = [] + if not kwds: + kwds = {} + cls = self.find_python_name(suffix, node.start_mark) + if PY3: + if newobj and isinstance(cls, type): + return cls.__new__(cls, *args, **kwds) + else: + return cls(*args, **kwds) + else: + if newobj and isinstance(cls, type(self.classobj)) \ + and not args and not kwds: + instance = self.classobj() + instance.__class__ = cls + return instance + elif newobj and isinstance(cls, type): + return cls.__new__(cls, *args, **kwds) + else: + return cls(*args, **kwds) + + def set_python_instance_state(self, instance, state): + if hasattr(instance, '__setstate__'): + instance.__setstate__(state) + else: + slotstate = {} + if isinstance(state, tuple) and len(state) == 2: + state, slotstate = state + if hasattr(instance, '__dict__'): + instance.__dict__.update(state) + elif state: + slotstate.update(state) + for key, value in slotstate.items(): + setattr(object, key, value) + + def construct_python_object(self, suffix, node): + # Format: + # !!python/object:module.name { ... state ... } + instance = self.make_python_instance(suffix, node, newobj=True) + yield instance + deep = hasattr(instance, '__setstate__') + state = self.construct_mapping(node, deep=deep) + self.set_python_instance_state(instance, state) + + def construct_python_object_apply(self, suffix, node, newobj=False): + # Format: + # !!python/object/apply # (or !!python/object/new) + # args: [ ... arguments ... ] + # kwds: { ... keywords ... } + # state: ... state ... + # listitems: [ ... listitems ... ] + # dictitems: { ... dictitems ... } + # or short format: + # !!python/object/apply [ ... arguments ... ] + # The difference between !!python/object/apply and !!python/object/new + # is how an object is created, check make_python_instance for details. + if isinstance(node, SequenceNode): + args = self.construct_sequence(node, deep=True) + kwds = {} + state = {} + listitems = [] + dictitems = {} + else: + value = self.construct_mapping(node, deep=True) + args = value.get('args', []) + kwds = value.get('kwds', {}) + state = value.get('state', {}) + listitems = value.get('listitems', []) + dictitems = value.get('dictitems', {}) + instance = self.make_python_instance(suffix, node, args, kwds, newobj) + if state: + self.set_python_instance_state(instance, state) + if listitems: + instance.extend(listitems) + if dictitems: + for key in dictitems: + instance[key] = dictitems[key] + return instance + + def construct_python_object_new(self, suffix, node): + return self.construct_python_object_apply(suffix, node, newobj=True) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/none', + Constructor.construct_yaml_null) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/bool', + Constructor.construct_yaml_bool) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/str', + Constructor.construct_python_str) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/unicode', + Constructor.construct_python_unicode) + +if PY3: + Constructor.add_constructor( + u'tag:yaml.org,2002:python/bytes', + Constructor.construct_python_bytes) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/int', + Constructor.construct_yaml_int) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/long', + Constructor.construct_python_long) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/float', + Constructor.construct_yaml_float) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/complex', + Constructor.construct_python_complex) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/list', + Constructor.construct_yaml_seq) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/tuple', + Constructor.construct_python_tuple) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/dict', + Constructor.construct_yaml_map) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/name:', + Constructor.construct_python_name) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/module:', + Constructor.construct_python_module) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/object:', + Constructor.construct_python_object) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/object/apply:', + Constructor.construct_python_object_apply) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/object/new:', + Constructor.construct_python_object_new) + + +class RoundTripConstructor(SafeConstructor): + """need to store the comments on the node itself, + as well as on the items + """ + + def construct_scalar(self, node): + if not isinstance(node, ScalarNode): + raise ConstructorError( + None, None, + "expected a scalar node, but found %s" % node.id, + node.start_mark) + + if node.style == '|' and isinstance(node.value, text_type): + return PreservedScalarString(node.value) + return node.value + + def construct_yaml_str(self, node): + value = self.construct_scalar(node) + if isinstance(value, ScalarString): + return value + if PY3: + return value + try: + return value.encode('ascii') + except AttributeError: + # in case you replace the node dynamically e.g. with a dict + return value + except UnicodeEncodeError: + return value + + def construct_sequence(self, node, seqtyp, deep=False): + if not isinstance(node, SequenceNode): + raise ConstructorError( + None, None, + "expected a sequence node, but found %s" % node.id, + node.start_mark) + ret_val = [] + if node.comment: + seqtyp._yaml_add_comment(node.comment[:2]) + if len(node.comment) > 2: + seqtyp.yaml_end_comment_extend(node.comment[2], clear=True) + if node.anchor: + from ruamel.yaml.serializer import templated_id + if not templated_id(node.anchor): + seqtyp.yaml_set_anchor(node.anchor) + for idx, child in enumerate(node.value): + ret_val.append(self.construct_object(child, deep=deep)) + if child.comment: + seqtyp._yaml_add_comment(child.comment, key=idx) + return ret_val + + def flatten_mapping(self, node): + """ + This implements the merge key feature http://yaml.org/type/merge.html + by inserting keys from the merge dict/list of dicts if not yet + available in this node + """ + + def constructed(value_node): + # If the contents of a merge are defined within the + # merge marker, then they won't have been constructed + # yet. But if they were already constructed, we need to use + # the existing object. + if value_node in self.constructed_objects: + value = self.constructed_objects[value_node] + else: + value = self.construct_object(value_node, deep=False) + return value + + #merge = [] + merge_map_list = [] + index = 0 + while index < len(node.value): + key_node, value_node = node.value[index] + if key_node.tag == u'tag:yaml.org,2002:merge': + del node.value[index] + if isinstance(value_node, MappingNode): + merge_map_list.append( + (index, constructed(value_node))) + #self.flatten_mapping(value_node) + #merge.extend(value_node.value) + elif isinstance(value_node, SequenceNode): + #submerge = [] + for subnode in value_node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError( + "while constructing a mapping", + node.start_mark, + "expected a mapping for merging, but found %s" + % subnode.id, subnode.start_mark) + merge_map_list.append( + (index, constructed(subnode))) + # self.flatten_mapping(subnode) + # submerge.append(subnode.value) + #submerge.reverse() + #for value in submerge: + # merge.extend(value) + else: + raise ConstructorError( + "while constructing a mapping", node.start_mark, + "expected a mapping or list of mappings for merging, " + "but found %s" + % value_node.id, value_node.start_mark) + elif key_node.tag == u'tag:yaml.org,2002:value': + key_node.tag = u'tag:yaml.org,2002:str' + index += 1 + else: + index += 1 + #print ('merge_map_list', merge_map_list) + return merge_map_list + #if merge: + # node.value = merge + node.value + + def construct_mapping(self, node, maptyp, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError( + None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + if isinstance(node, MappingNode): + merge_map = self.flatten_mapping(node) + if merge_map: + maptyp.add_yaml_merge(merge_map) + # mapping = {} + if node.comment: + maptyp._yaml_add_comment(node.comment[:2]) + if len(node.comment) > 2: + maptyp.yaml_end_comment_extend(node.comment[2], clear=True) + if node.anchor: + from ruamel.yaml.serializer import templated_id + if not templated_id(node.anchor): + maptyp.yaml_set_anchor(node.anchor) + for key_node, value_node in node.value: + # keys can be list -> deep + key = self.construct_object(key_node, deep=True) + # lists are not hashable, but tuples are + if not isinstance(key, collections.Hashable): + if isinstance(key, list): + key = tuple(key) + if PY2: + try: + hash(key) + except TypeError as exc: + raise ConstructorError( + "while constructing a mapping", node.start_mark, + "found unacceptable key (%s)" % + exc, key_node.start_mark) + else: + if not isinstance(key, collections.Hashable): + raise ConstructorError( + "while constructing a mapping", node.start_mark, + "found unhashable key", key_node.start_mark) + value = self.construct_object(value_node, deep=deep) + if key_node.comment: + maptyp._yaml_add_comment(key_node.comment, key=key) + if value_node.comment: + maptyp._yaml_add_comment(value_node.comment, value=key) + maptyp[key] = value + + def construct_setting(self, node, typ, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError( + None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + if node.comment: + typ._yaml_add_comment(node.comment[:2]) + if len(node.comment) > 2: + typ.yaml_end_comment_extend(node.comment[2], clear=True) + if node.anchor: + from ruamel.yaml.serializer import templated_id + if not templated_id(node.anchor): + typ.yaml_set_anchor(node.anchor) + for key_node, value_node in node.value: + # keys can be list -> deep + key = self.construct_object(key_node, deep=True) + # lists are not hashable, but tuples are + if not isinstance(key, collections.Hashable): + if isinstance(key, list): + key = tuple(key) + if PY2: + try: + hash(key) + except TypeError as exc: + raise ConstructorError( + "while constructing a mapping", node.start_mark, + "found unacceptable key (%s)" % + exc, key_node.start_mark) + else: + if not isinstance(key, collections.Hashable): + raise ConstructorError( + "while constructing a mapping", node.start_mark, + "found unhashable key", key_node.start_mark) + value = self.construct_object(value_node, deep=deep) + if key_node.comment: + typ._yaml_add_comment(key_node.comment, key=key) + if value_node.comment: + typ._yaml_add_comment(value_node.comment, value=key) + typ.add(key) + + def construct_yaml_seq(self, node): + data = CommentedSeq() + data._yaml_set_line_col(node.start_mark.line, node.start_mark.column) + if node.flow_style is True: + data.fa.set_flow_style() + elif node.flow_style is False: + data.fa.set_block_style() + if node.comment: + data._yaml_add_comment(node.comment) + yield data + data.extend(self.construct_sequence(node, data)) + + def construct_yaml_map(self, node): + data = CommentedMap() + data._yaml_set_line_col(node.start_mark.line, node.start_mark.column) + if node.flow_style is True: + data.fa.set_flow_style() + elif node.flow_style is False: + data.fa.set_block_style() + yield data + self.construct_mapping(node, data) + + def construct_yaml_omap(self, node): + # Note: we do now check for duplicate keys + omap = CommentedOrderedMap() + omap._yaml_set_line_col(node.start_mark.line, node.start_mark.column) + if node.flow_style is True: + omap.fa.set_flow_style() + elif node.flow_style is False: + omap.fa.set_block_style() + yield omap + if node.comment: + omap._yaml_add_comment(node.comment[:2]) + if len(node.comment) > 2: + omap.yaml_end_comment_extend(node.comment[2], clear=True) + if not isinstance(node, SequenceNode): + raise ConstructorError( + "while constructing an ordered map", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError( + "while constructing an ordered map", node.start_mark, + "expected a mapping of length 1, but found %s" % + subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError( + "while constructing an ordered map", node.start_mark, + "expected a single mapping item, but found %d items" % + len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + assert key not in omap + value = self.construct_object(value_node) + if key_node.comment: + omap._yaml_add_comment(key_node.comment, key=key) + if subnode.comment: + omap._yaml_add_comment(subnode.comment, key=key) + if value_node.comment: + omap._yaml_add_comment(value_node.comment, value=key) + omap[key] = value + + def construct_yaml_set(self, node): + data = CommentedSet() + data._yaml_set_line_col(node.start_mark.line, node.start_mark.column) + yield data + self.construct_setting(node, data) + +RoundTripConstructor.add_constructor( + u'tag:yaml.org,2002:null', + RoundTripConstructor.construct_yaml_null) + +RoundTripConstructor.add_constructor( + u'tag:yaml.org,2002:bool', + RoundTripConstructor.construct_yaml_bool) + +RoundTripConstructor.add_constructor( + u'tag:yaml.org,2002:int', + RoundTripConstructor.construct_yaml_int) + +RoundTripConstructor.add_constructor( + u'tag:yaml.org,2002:float', + RoundTripConstructor.construct_yaml_float) + +RoundTripConstructor.add_constructor( + u'tag:yaml.org,2002:binary', + RoundTripConstructor.construct_yaml_binary) + +RoundTripConstructor.add_constructor( + u'tag:yaml.org,2002:timestamp', + RoundTripConstructor.construct_yaml_timestamp) + +RoundTripConstructor.add_constructor( + u'tag:yaml.org,2002:omap', + RoundTripConstructor.construct_yaml_omap) + +RoundTripConstructor.add_constructor( + u'tag:yaml.org,2002:pairs', + RoundTripConstructor.construct_yaml_pairs) + +RoundTripConstructor.add_constructor( + u'tag:yaml.org,2002:set', + RoundTripConstructor.construct_yaml_set) + +RoundTripConstructor.add_constructor( + u'tag:yaml.org,2002:str', + RoundTripConstructor.construct_yaml_str) + +RoundTripConstructor.add_constructor( + u'tag:yaml.org,2002:seq', + RoundTripConstructor.construct_yaml_seq) + +RoundTripConstructor.add_constructor( + u'tag:yaml.org,2002:map', + RoundTripConstructor.construct_yaml_map) + +RoundTripConstructor.add_constructor( + None, RoundTripConstructor.construct_undefined) diff --git a/cyaml.py b/cyaml.py new file mode 100644 index 0000000..1c9036c --- /dev/null +++ b/cyaml.py @@ -0,0 +1,88 @@ +from __future__ import absolute_import + +__all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader', + 'CBaseDumper', 'CSafeDumper', 'CDumper'] + +from _yaml import CParser, CEmitter + +from .constructor import * + +from .serializer import * +from .representer import * + +from .resolver import * + + +class CBaseLoader(CParser, BaseConstructor, BaseResolver): + def __init__(self, stream): + CParser.__init__(self, stream) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + + +class CSafeLoader(CParser, SafeConstructor, Resolver): + def __init__(self, stream): + CParser.__init__(self, stream) + SafeConstructor.__init__(self) + Resolver.__init__(self) + + +class CLoader(CParser, Constructor, Resolver): + def __init__(self, stream): + CParser.__init__(self, stream) + Constructor.__init__(self) + Resolver.__init__(self) + + +class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver): + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, + explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + + +class CSafeDumper(CEmitter, SafeRepresenter, Resolver): + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, + explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + + +class CDumper(CEmitter, Serializer, Representer, Resolver): + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, + explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) diff --git a/dumper.py b/dumper.py new file mode 100644 index 0000000..f83228a --- /dev/null +++ b/dumper.py @@ -0,0 +1,84 @@ +from __future__ import absolute_import + +__all__ = ['BaseDumper', 'SafeDumper', 'Dumper', 'RoundTripDumper'] + +from .emitter import * +from .serializer import * +from .representer import * +from .resolver import * + + +class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver): + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, + explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + + +class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver): + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, + explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + + +class Dumper(Emitter, Serializer, Representer, Resolver): + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, + explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + + +class RoundTripDumper(Emitter, Serializer, RoundTripRepresenter, Resolver): + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, + explicit_end=explicit_end, + version=version, tags=tags) + RoundTripRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) diff --git a/emitter.py b/emitter.py new file mode 100644 index 0000000..2e5ebdc --- /dev/null +++ b/emitter.py @@ -0,0 +1,1237 @@ +from __future__ import absolute_import +from __future__ import print_function + +# Emitter expects events obeying the following grammar: +# stream ::= STREAM-START document* STREAM-END +# document ::= DOCUMENT-START node DOCUMENT-END +# node ::= SCALAR | sequence | mapping +# sequence ::= SEQUENCE-START node* SEQUENCE-END +# mapping ::= MAPPING-START (node node)* MAPPING-END + +__all__ = ['Emitter', 'EmitterError'] + +from .error import YAMLError +from .events import * +from .compat import utf8, text_type, PY2, nprint, dbg, DBG_EVENT + + +class EmitterError(YAMLError): + pass + + +class ScalarAnalysis(object): + def __init__(self, scalar, empty, multiline, + allow_flow_plain, allow_block_plain, + allow_single_quoted, allow_double_quoted, + allow_block): + self.scalar = scalar + self.empty = empty + self.multiline = multiline + self.allow_flow_plain = allow_flow_plain + self.allow_block_plain = allow_block_plain + self.allow_single_quoted = allow_single_quoted + self.allow_double_quoted = allow_double_quoted + self.allow_block = allow_block + + +class Emitter(object): + DEFAULT_TAG_PREFIXES = { + u'!': u'!', + u'tag:yaml.org,2002:': u'!!', + } + + MAX_SIMPLE_KEY_LENGTH = 128 + + def __init__(self, stream, canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + + # The stream should have the methods `write` and possibly `flush`. + self.stream = stream + + # Encoding can be overriden by STREAM-START. + self.encoding = None + + # Emitter is a state machine with a stack of states to handle nested + # structures. + self.states = [] + self.state = self.expect_stream_start + + # Current event and the event queue. + self.events = [] + self.event = None + + # The current indentation level and the stack of previous indents. + self.indents = [] + self.indent = None + + # Flow level. + self.flow_level = 0 + + # Contexts. + self.root_context = False + self.sequence_context = False + self.mapping_context = False + self.simple_key_context = False + + # Characteristics of the last emitted character: + # - current position. + # - is it a whitespace? + # - is it an indention character + # (indentation space, '-', '?', or ':')? + self.line = 0 + self.column = 0 + self.whitespace = True + self.indention = True + + # Whether the document requires an explicit document indicator + self.open_ended = False + + # Formatting details. + self.canonical = canonical + self.allow_unicode = allow_unicode + self.best_indent = 2 + if indent and 1 < indent < 10: + self.best_indent = indent + self.best_width = 80 + if width and width > self.best_indent*2: + self.best_width = width + self.best_line_break = u'\n' + if line_break in [u'\r', u'\n', u'\r\n']: + self.best_line_break = line_break + + # Tag prefixes. + self.tag_prefixes = None + + # Prepared anchor and tag. + self.prepared_anchor = None + self.prepared_tag = None + + # Scalar analysis and style. + self.analysis = None + self.style = None + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def emit(self, event): + if dbg(DBG_EVENT): + nprint(event) + self.events.append(event) + while not self.need_more_events(): + self.event = self.events.pop(0) + self.state() + self.event = None + + # In some cases, we wait for a few next events before emitting. + + def need_more_events(self): + if not self.events: + return True + event = self.events[0] + if isinstance(event, DocumentStartEvent): + return self.need_events(1) + elif isinstance(event, SequenceStartEvent): + return self.need_events(2) + elif isinstance(event, MappingStartEvent): + return self.need_events(3) + else: + return False + + def need_events(self, count): + level = 0 + for event in self.events[1:]: + if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): + level += 1 + elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): + level -= 1 + elif isinstance(event, StreamEndEvent): + level = -1 + if level < 0: + return False + return (len(self.events) < count+1) + + def increase_indent(self, flow=False, indentless=False): + self.indents.append(self.indent) + if self.indent is None: + if flow: + self.indent = self.best_indent + else: + self.indent = 0 + elif not indentless: + self.indent += self.best_indent + + # States. + + # Stream handlers. + + def expect_stream_start(self): + if isinstance(self.event, StreamStartEvent): + if PY2: + if self.event.encoding \ + and not getattr(self.stream, 'encoding', None): + self.encoding = self.event.encoding + else: + if self.event.encoding \ + and not hasattr(self.stream, 'encoding'): + self.encoding = self.event.encoding + self.write_stream_start() + self.state = self.expect_first_document_start + else: + raise EmitterError("expected StreamStartEvent, but got %s" % + self.event) + + def expect_nothing(self): + raise EmitterError("expected nothing, but got %s" % self.event) + + # Document handlers. + + def expect_first_document_start(self): + return self.expect_document_start(first=True) + + def expect_document_start(self, first=False): + if isinstance(self.event, DocumentStartEvent): + if (self.event.version or self.event.tags) and self.open_ended: + self.write_indicator(u'...', True) + self.write_indent() + if self.event.version: + version_text = self.prepare_version(self.event.version) + self.write_version_directive(version_text) + self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() + if self.event.tags: + handles = sorted(self.event.tags.keys()) + for handle in handles: + prefix = self.event.tags[handle] + self.tag_prefixes[prefix] = handle + handle_text = self.prepare_tag_handle(handle) + prefix_text = self.prepare_tag_prefix(prefix) + self.write_tag_directive(handle_text, prefix_text) + implicit = (first and not self.event.explicit + and not self.canonical + and not self.event.version and not self.event.tags + and not self.check_empty_document()) + if not implicit: + self.write_indent() + self.write_indicator(u'---', True) + if self.canonical: + self.write_indent() + self.state = self.expect_document_root + elif isinstance(self.event, StreamEndEvent): + if self.open_ended: + self.write_indicator(u'...', True) + self.write_indent() + self.write_stream_end() + self.state = self.expect_nothing + else: + raise EmitterError("expected DocumentStartEvent, but got %s" % + self.event) + + def expect_document_end(self): + if isinstance(self.event, DocumentEndEvent): + self.write_indent() + if self.event.explicit: + self.write_indicator(u'...', True) + self.write_indent() + self.flush_stream() + self.state = self.expect_document_start + else: + raise EmitterError("expected DocumentEndEvent, but got %s" % + self.event) + + def expect_document_root(self): + self.states.append(self.expect_document_end) + self.expect_node(root=True) + + # Node handlers. + + def expect_node(self, root=False, sequence=False, mapping=False, + simple_key=False): + self.root_context = root + self.sequence_context = sequence + self.mapping_context = mapping + self.simple_key_context = simple_key + if isinstance(self.event, AliasEvent): + self.expect_alias() + elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): + self.process_anchor(u'&') + self.process_tag() + if isinstance(self.event, ScalarEvent): + self.expect_scalar() + elif isinstance(self.event, SequenceStartEvent): + if self.event.comment: + self.write_pre_comment(self.event) + if self.event.flow_style is False and self.event.comment: + self.write_post_comment(self.event) + # print('seq event', self.event) + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_sequence(): + self.expect_flow_sequence() + else: + self.expect_block_sequence() + elif isinstance(self.event, MappingStartEvent): + if self.event.flow_style is False and self.event.comment: + self.write_post_comment(self.event) + if self.event.comment and self.event.comment[1]: + self.write_pre_comment(self.event) + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_mapping(): + self.expect_flow_mapping() + else: + self.expect_block_mapping() + else: + raise EmitterError("expected NodeEvent, but got %s" % self.event) + + def expect_alias(self): + if self.event.anchor is None: + raise EmitterError("anchor is not specified for alias") + self.process_anchor(u'*') + self.state = self.states.pop() + + def expect_scalar(self): + self.increase_indent(flow=True) + self.process_scalar() + self.indent = self.indents.pop() + self.state = self.states.pop() + + # Flow sequence handlers. + + def expect_flow_sequence(self): + self.write_indicator(u'[', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_sequence_item + + def expect_first_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator(u']', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + def expect_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(u',', False) + self.write_indent() + self.write_indicator(u']', False) + if self.event.comment and self.event.comment[0]: + # eol comment on flow sequence + self.write_post_comment(self.event) + self.state = self.states.pop() + else: + self.write_indicator(u',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + # Flow mapping handlers. + + def expect_flow_mapping(self): + self.write_indicator(u'{', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_mapping_key + + def expect_first_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator(u'}', False) + # if self.event.comment and self.event.comment[0]: + # # eol comment on flow sequence + # self.write_post_comment(self.event) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator(u'?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + # if self.event.comment and self.event.comment[1]: + # self.write_pre_comment(self.event) + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(u',', False) + self.write_indent() + self.write_indicator(u'}', False) + if self.event.comment and self.event.comment[0]: + # eol comment on flow mapping + self.write_post_comment(self.event) + self.state = self.states.pop() + else: + self.write_indicator(u',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator(u'?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_simple_value(self): + self.write_indicator(u':', False) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + def expect_flow_mapping_value(self): + if self.canonical or self.column > self.best_width: + self.write_indent() + self.write_indicator(u':', True) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + # Block sequence handlers. + + def expect_block_sequence(self): + indentless = (self.mapping_context and not self.indention) + self.increase_indent(flow=False, indentless=indentless) + self.state = self.expect_first_block_sequence_item + + def expect_first_block_sequence_item(self): + return self.expect_block_sequence_item(first=True) + + def expect_block_sequence_item(self, first=False): + if not first and isinstance(self.event, SequenceEndEvent): + if self.event.comment and self.event.comment[1]: + # final comments from a doc + self.write_pre_comment(self.event) + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + if self.event.comment and self.event.comment[1]: + self.write_pre_comment(self.event) + self.write_indent() + self.write_indicator(u'-', True, indention=True) + self.states.append(self.expect_block_sequence_item) + self.expect_node(sequence=True) + + # Block mapping handlers. + + def expect_block_mapping(self): + self.increase_indent(flow=False) + self.state = self.expect_first_block_mapping_key + + def expect_first_block_mapping_key(self): + return self.expect_block_mapping_key(first=True) + + def expect_block_mapping_key(self, first=False): + if not first and isinstance(self.event, MappingEndEvent): + if self.event.comment and self.event.comment[1]: + # final comments from a doc + self.write_pre_comment(self.event) + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + if self.event.comment and self.event.comment[1]: + # final comments from a doc + self.write_pre_comment(self.event) + self.write_indent() + if self.check_simple_key(): + if self.event.style == '?': + self.write_indicator(u'?', True, indention=True) + self.states.append(self.expect_block_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator(u'?', True, indention=True) + self.states.append(self.expect_block_mapping_value) + self.expect_node(mapping=True) + + def expect_block_mapping_simple_value(self): + if getattr(self.event, 'style', None) != '?': + self.write_indicator(u':', False) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + def expect_block_mapping_value(self): + self.write_indent() + self.write_indicator(u':', True, indention=True) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + # Checkers. + + def check_empty_sequence(self): + return (isinstance(self.event, SequenceStartEvent) and self.events + and isinstance(self.events[0], SequenceEndEvent)) + + def check_empty_mapping(self): + return (isinstance(self.event, MappingStartEvent) and self.events + and isinstance(self.events[0], MappingEndEvent)) + + def check_empty_document(self): + if not isinstance(self.event, DocumentStartEvent) or not self.events: + return False + event = self.events[0] + return (isinstance(event, ScalarEvent) and event.anchor is None + and event.tag is None and event.implicit and + event.value == u'') + + def check_simple_key(self): + length = 0 + if isinstance(self.event, NodeEvent) and self.event.anchor is not None: + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + length += len(self.prepared_anchor) + if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ + and self.event.tag is not None: + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(self.event.tag) + length += len(self.prepared_tag) + if isinstance(self.event, ScalarEvent): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + length += len(self.analysis.scalar) + return (length < self.MAX_SIMPLE_KEY_LENGTH and ( + isinstance(self.event, AliasEvent) + or (isinstance(self.event, ScalarEvent) + and not self.analysis.empty and not self.analysis.multiline) + or self.check_empty_sequence() or self.check_empty_mapping())) + + # Anchor, Tag, and Scalar processors. + + def process_anchor(self, indicator): + if self.event.anchor is None: + self.prepared_anchor = None + return + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + if self.prepared_anchor: + self.write_indicator(indicator+self.prepared_anchor, True) + self.prepared_anchor = None + + def process_tag(self): + tag = self.event.tag + if isinstance(self.event, ScalarEvent): + if self.style is None: + self.style = self.choose_scalar_style() + if ((not self.canonical or tag is None) and + ((self.style == '' and self.event.implicit[0]) + or (self.style != '' and self.event.implicit[1]))): + self.prepared_tag = None + return + if self.event.implicit[0] and tag is None: + tag = u'!' + self.prepared_tag = None + else: + if (not self.canonical or tag is None) and self.event.implicit: + self.prepared_tag = None + return + if tag is None: + raise EmitterError("tag is not specified") + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(tag) + if self.prepared_tag: + self.write_indicator(self.prepared_tag, True) + self.prepared_tag = None + + def choose_scalar_style(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.event.style == '"' or self.canonical: + return '"' + if (not self.event.style or self.event.style == '?') and \ + self.event.implicit[0]: + if (not (self.simple_key_context and + (self.analysis.empty or self.analysis.multiline)) + and (self.flow_level and self.analysis.allow_flow_plain + or (not self.flow_level and + self.analysis.allow_block_plain))): + return '' + if self.event.style and self.event.style in '|>': + if (not self.flow_level and not self.simple_key_context + and self.analysis.allow_block): + return self.event.style + if not self.event.style or self.event.style == '\'': + if (self.analysis.allow_single_quoted and + not (self.simple_key_context and self.analysis.multiline)): + return '\'' + return '"' + + def process_scalar(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.style is None: + self.style = self.choose_scalar_style() + split = (not self.simple_key_context) + # if self.analysis.multiline and split \ + # and (not self.style or self.style in '\'\"'): + # self.write_indent() + if self.style == '"': + self.write_double_quoted(self.analysis.scalar, split) + elif self.style == '\'': + self.write_single_quoted(self.analysis.scalar, split) + elif self.style == '>': + self.write_folded(self.analysis.scalar) + elif self.style == '|': + self.write_literal(self.analysis.scalar) + else: + self.write_plain(self.analysis.scalar, split) + self.analysis = None + self.style = None + if self.event.comment: + self.write_post_comment(self.event) + + # Analyzers. + + def prepare_version(self, version): + major, minor = version + if major != 1: + raise EmitterError("unsupported YAML version: %d.%d" % + (major, minor)) + return u'%d.%d' % (major, minor) + + def prepare_tag_handle(self, handle): + if not handle: + raise EmitterError("tag handle must not be empty") + if handle[0] != u'!' or handle[-1] != u'!': + raise EmitterError("tag handle must start and end with '!': %r" + % (utf8(handle))) + for ch in handle[1:-1]: + if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or + u'a' <= ch <= u'z' or ch in u'-_'): + raise EmitterError("invalid character %r in the tag handle: %r" + % (utf8(ch), utf8(handle))) + return handle + + def prepare_tag_prefix(self, prefix): + if not prefix: + raise EmitterError("tag prefix must not be empty") + chunks = [] + start = end = 0 + if prefix[0] == u'!': + end = 1 + while end < len(prefix): + ch = prefix[end] + if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-;/?!:@&=+$,_.~*\'()[]': + end += 1 + else: + if start < end: + chunks.append(prefix[start:end]) + start = end = end+1 + data = utf8(ch) + for ch in data: + chunks.append(u'%%%02X' % ord(ch)) + if start < end: + chunks.append(prefix[start:end]) + return u''.join(chunks) + + def prepare_tag(self, tag): + if not tag: + raise EmitterError("tag must not be empty") + if tag == u'!': + return tag + handle = None + suffix = tag + prefixes = sorted(self.tag_prefixes.keys()) + for prefix in prefixes: + if tag.startswith(prefix) \ + and (prefix == u'!' or len(prefix) < len(tag)): + handle = self.tag_prefixes[prefix] + suffix = tag[len(prefix):] + chunks = [] + start = end = 0 + while end < len(suffix): + ch = suffix[end] + if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-;/?:@&=+$,_.~*\'()[]' \ + or (ch == u'!' and handle != u'!'): + end += 1 + else: + if start < end: + chunks.append(suffix[start:end]) + start = end = end+1 + data = utf8(ch) + for ch in data: + chunks.append(u'%%%02X' % ord(ch)) + if start < end: + chunks.append(suffix[start:end]) + suffix_text = u''.join(chunks) + if handle: + return u'%s%s' % (handle, suffix_text) + else: + return u'!<%s>' % suffix_text + + def prepare_anchor(self, anchor): + if not anchor: + raise EmitterError("anchor must not be empty") + for ch in anchor: + if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or + u'a' <= ch <= u'z' or ch in u'-_'): + raise EmitterError("invalid character %r in the anchor: %r" + % (utf8(ch), utf8(anchor))) + return anchor + + def analyze_scalar(self, scalar): + + # Empty scalar is a special case. + if not scalar: + return ScalarAnalysis( + scalar=scalar, empty=True, multiline=False, + allow_flow_plain=False, allow_block_plain=True, + allow_single_quoted=True, allow_double_quoted=True, + allow_block=False) + + # Indicators and special characters. + block_indicators = False + flow_indicators = False + line_breaks = False + special_characters = False + + # Important whitespace combinations. + leading_space = False + leading_break = False + trailing_space = False + trailing_break = False + break_space = False + space_break = False + + # Check document indicators. + if scalar.startswith(u'---') or scalar.startswith(u'...'): + block_indicators = True + flow_indicators = True + + # First character or preceded by a whitespace. + preceeded_by_whitespace = True + + # Last character or followed by a whitespace. + followed_by_whitespace = (len(scalar) == 1 or + scalar[1] in u'\0 \t\r\n\x85\u2028\u2029') + + # The previous character is a space. + previous_space = False + + # The previous character is a break. + previous_break = False + + index = 0 + while index < len(scalar): + ch = scalar[index] + + # Check for indicators. + if index == 0: + # Leading indicators are special characters. + if ch in u'#,[]{}&*!|>\'\"%@`': + flow_indicators = True + block_indicators = True + if ch in u'?:': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == u'-' and followed_by_whitespace: + flow_indicators = True + block_indicators = True + else: + # Some indicators cannot appear within a scalar as well. + if ch in u',?[]{}': + flow_indicators = True + if ch == u':': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == u'#' and preceeded_by_whitespace: + flow_indicators = True + block_indicators = True + + # Check for line breaks, special, and unicode characters. + if ch in u'\n\x85\u2028\u2029': + line_breaks = True + if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'): + if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF' + or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF': + unicode_characters = True + if not self.allow_unicode: + special_characters = True + else: + special_characters = True + + # Detect important whitespace combinations. + if ch == u' ': + if index == 0: + leading_space = True + if index == len(scalar)-1: + trailing_space = True + if previous_break: + break_space = True + previous_space = True + previous_break = False + elif ch in u'\n\x85\u2028\u2029': + if index == 0: + leading_break = True + if index == len(scalar)-1: + trailing_break = True + if previous_space: + space_break = True + previous_space = False + previous_break = True + else: + previous_space = False + previous_break = False + + # Prepare for the next character. + index += 1 + preceeded_by_whitespace = (ch in u'\0 \t\r\n\x85\u2028\u2029') + followed_by_whitespace = ( + index+1 >= len(scalar) or + scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029') + + # Let's decide what styles are allowed. + allow_flow_plain = True + allow_block_plain = True + allow_single_quoted = True + allow_double_quoted = True + allow_block = True + + # Leading and trailing whitespaces are bad for plain scalars. + if (leading_space or leading_break + or trailing_space or trailing_break): + allow_flow_plain = allow_block_plain = False + + # We do not permit trailing spaces for block scalars. + if trailing_space: + allow_block = False + + # Spaces at the beginning of a new line are only acceptable for block + # scalars. + if break_space: + allow_flow_plain = allow_block_plain = allow_single_quoted = False + + # Spaces followed by breaks, as well as special character are only + # allowed for double quoted scalars. + if space_break or special_characters: + allow_flow_plain = allow_block_plain = \ + allow_single_quoted = allow_block = False + + # Although the plain scalar writer supports breaks, we never emit + # multiline plain scalars. + if line_breaks: + allow_flow_plain = allow_block_plain = False + + # Flow indicators are forbidden for flow plain scalars. + if flow_indicators: + allow_flow_plain = False + + # Block indicators are forbidden for block plain scalars. + if block_indicators: + allow_block_plain = False + + return ScalarAnalysis(scalar=scalar, + empty=False, multiline=line_breaks, + allow_flow_plain=allow_flow_plain, + allow_block_plain=allow_block_plain, + allow_single_quoted=allow_single_quoted, + allow_double_quoted=allow_double_quoted, + allow_block=allow_block) + + # Writers. + + def flush_stream(self): + if hasattr(self.stream, 'flush'): + self.stream.flush() + + def write_stream_start(self): + # Write BOM if needed. + if self.encoding and self.encoding.startswith('utf-16'): + self.stream.write(u'\uFEFF'.encode(self.encoding)) + + def write_stream_end(self): + self.flush_stream() + + def write_indicator(self, indicator, need_whitespace, + whitespace=False, indention=False): + if self.whitespace or not need_whitespace: + data = indicator + else: + data = u' '+indicator + self.whitespace = whitespace + self.indention = self.indention and indention + self.column += len(data) + self.open_ended = False + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_indent(self): + indent = self.indent or 0 + if not self.indention or self.column > indent \ + or (self.column == indent and not self.whitespace): + self.write_line_break() + if self.column < indent: + self.whitespace = True + data = u' '*(indent-self.column) + self.column = indent + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_line_break(self, data=None): + if data is None: + data = self.best_line_break + self.whitespace = True + self.indention = True + self.line += 1 + self.column = 0 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_version_directive(self, version_text): + data = u'%%YAML %s' % version_text + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + def write_tag_directive(self, handle_text, prefix_text): + data = u'%%TAG %s %s' % (handle_text, prefix_text) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + # Scalar streams. + + def write_single_quoted(self, text, split=True): + self.write_indicator(u'\'', True) + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch is None or ch != u' ': + if start+1 == end and self.column > self.best_width and split \ + and start != 0 and end != len(text): + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch is None or ch not in u'\n\x85\u2028\u2029': + if text[start] == u'\n': + self.write_line_break() + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + start = end + else: + if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'': + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch == u'\'': + data = u'\'\'' + self.column += 2 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + 1 + if ch is not None: + spaces = (ch == u' ') + breaks = (ch in u'\n\x85\u2028\u2029') + end += 1 + self.write_indicator(u'\'', False) + + ESCAPE_REPLACEMENTS = { + u'\0': u'0', + u'\x07': u'a', + u'\x08': u'b', + u'\x09': u't', + u'\x0A': u'n', + u'\x0B': u'v', + u'\x0C': u'f', + u'\x0D': u'r', + u'\x1B': u'e', + u'\"': u'\"', + u'\\': u'\\', + u'\x85': u'N', + u'\xA0': u'_', + u'\u2028': u'L', + u'\u2029': u'P', + } + + def write_double_quoted(self, text, split=True): + self.write_indicator(u'"', True) + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \ + or not (u'\x20' <= ch <= u'\x7E' + or (self.allow_unicode + and (u'\xA0' <= ch <= u'\uD7FF' + or u'\uE000' <= ch <= u'\uFFFD'))): + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + if ch in self.ESCAPE_REPLACEMENTS: + data = u'\\'+self.ESCAPE_REPLACEMENTS[ch] + elif ch <= u'\xFF': + data = u'\\x%02X' % ord(ch) + elif ch <= u'\uFFFF': + data = u'\\u%04X' % ord(ch) + else: + data = u'\\U%08X' % ord(ch) + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end+1 + if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \ + and self.column+(end-start) > self.best_width and split: + data = text[start:end]+u'\\' + if start < end: + start = end + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_indent() + self.whitespace = False + self.indention = False + if text[start] == u' ': + data = u'\\' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + end += 1 + self.write_indicator(u'"', False) + + def determine_block_hints(self, text): + hints = u'' + if text: + if text[0] in u' \n\x85\u2028\u2029': + hints += text_type(self.best_indent) + if text[-1] not in u'\n\x85\u2028\u2029': + hints += u'-' + elif len(text) == 1 or text[-2] in u'\n\x85\u2028\u2029': + hints += u'+' + return hints + + def write_folded(self, text): + hints = self.determine_block_hints(text) + self.write_indicator(u'>'+hints, True) + if hints[-1:] == u'+': + self.open_ended = True + self.write_line_break() + leading_space = True + spaces = False + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in u'\n\x85\u2028\u2029': + if not leading_space and ch is not None and ch != u' ' \ + and text[start] == u'\n': + self.write_line_break() + leading_space = (ch == u' ') + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + elif spaces: + if ch != u' ': + if start+1 == end and self.column > self.best_width: + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + else: + if ch is None or ch in u' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in u'\n\x85\u2028\u2029') + spaces = (ch == u' ') + end += 1 + + def write_literal(self, text): + hints = self.determine_block_hints(text) + self.write_indicator(u'|'+hints, True) + if hints[-1:] == u'+': + self.open_ended = True + self.write_line_break() + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in u'\n\x85\u2028\u2029': + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + else: + if ch is None or ch in u'\n\x85\u2028\u2029': + data = text[start:end] + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in u'\n\x85\u2028\u2029') + end += 1 + + def write_plain(self, text, split=True): + if self.root_context: + self.open_ended = True + if not text: + return + if not self.whitespace: + data = u' ' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.whitespace = False + self.indention = False + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch != u' ': + if start+1 == end and self.column > self.best_width \ + and split: + self.write_indent() + self.whitespace = False + self.indention = False + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch not in u'\n\x85\u2028\u2029': + if text[start] == u'\n': + self.write_line_break() + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + self.whitespace = False + self.indention = False + start = end + else: + if ch is None or ch in u' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + spaces = (ch == u' ') + breaks = (ch in u'\n\x85\u2028\u2029') + end += 1 + + def write_comment(self, comment): + value = comment.value + if value[-1] == '\n': + value = value[:-1] + try: + # get original column position + col = comment.start_mark.column + if col < self.column + 1: + ValueError + except ValueError: + col = self.column + 1 + # print('post_comment', self.line, self.column, value) + self.stream.write(' ' * (col - self.column) + value) + self.write_line_break() + + def write_pre_comment(self, event): + comments = event.comment[1] + if comments is None: + return + try: + for comment in comments: + if isinstance(event, MappingStartEvent) and \ + getattr(comment, 'pre_done', None): + continue + if self.column != 0: + self.write_line_break() + self.write_comment(comment) + if isinstance(event, MappingStartEvent): + comment.pre_done = True + except TypeError: + print ('eventtt', type(event), event) + raise + + def write_post_comment(self, event): + if self.event.comment[0] is None: + return + comment = event.comment[0] + self.write_comment(comment) diff --git a/error.py b/error.py new file mode 100644 index 0000000..2b76904 --- /dev/null +++ b/error.py @@ -0,0 +1,80 @@ +from __future__ import absolute_import + +__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError'] + +from .compat import utf8 + + +class Mark(object): + def __init__(self, name, index, line, column, buffer, pointer): + self.name = name + self.index = index + self.line = line + self.column = column + self.buffer = buffer + self.pointer = pointer + + def get_snippet(self, indent=4, max_length=75): + if self.buffer is None: + return None + head = '' + start = self.pointer + while (start > 0 and + self.buffer[start-1] not in u'\0\r\n\x85\u2028\u2029'): + start -= 1 + if self.pointer-start > max_length/2-1: + head = ' ... ' + start += 5 + break + tail = '' + end = self.pointer + while (end < len(self.buffer) and + self.buffer[end] not in u'\0\r\n\x85\u2028\u2029'): + end += 1 + if end-self.pointer > max_length/2-1: + tail = ' ... ' + end -= 5 + break + snippet = utf8(self.buffer[start:end]) + return ' '*indent + head + snippet + tail + '\n' \ + + ' '*(indent+self.pointer-start+len(head)) + '^' + + def __str__(self): + snippet = self.get_snippet() + where = " in \"%s\", line %d, column %d" \ + % (self.name, self.line+1, self.column+1) + if snippet is not None: + where += ":\n"+snippet + return where + + +class YAMLError(Exception): + pass + + +class MarkedYAMLError(YAMLError): + def __init__(self, context=None, context_mark=None, + problem=None, problem_mark=None, note=None): + self.context = context + self.context_mark = context_mark + self.problem = problem + self.problem_mark = problem_mark + self.note = note + + def __str__(self): + lines = [] + if self.context is not None: + lines.append(self.context) + if self.context_mark is not None \ + and (self.problem is None or self.problem_mark is None + or self.context_mark.name != self.problem_mark.name + or self.context_mark.line != self.problem_mark.line + or self.context_mark.column != self.problem_mark.column): + lines.append(str(self.context_mark)) + if self.problem is not None: + lines.append(self.problem) + if self.problem_mark is not None: + lines.append(str(self.problem_mark)) + if self.note is not None: + lines.append(self.note) + return '\n'.join(lines) diff --git a/events.py b/events.py new file mode 100644 index 0000000..e1b9c62 --- /dev/null +++ b/events.py @@ -0,0 +1,105 @@ + +# Abstract classes. + + +def CommentCheck(): + pass + + +class Event(object): + def __init__(self, start_mark=None, end_mark=None, comment=CommentCheck): + self.start_mark = start_mark + self.end_mark = end_mark + # assert comment is not CommentCheck + if comment is CommentCheck: + comment = None + self.comment = comment + + def __repr__(self): + attributes = [key for key in ['anchor', 'tag', 'implicit', 'value', + 'flow_style', 'style'] + if hasattr(self, key)] + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + if self.comment not in [None, CommentCheck]: + arguments += ', comment={!r}'.format(self.comment) + return '%s(%s)' % (self.__class__.__name__, arguments) + + +class NodeEvent(Event): + def __init__(self, anchor, start_mark=None, end_mark=None, comment=None): + Event.__init__(self, start_mark, end_mark, comment) + self.anchor = anchor + + +class CollectionStartEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None, + flow_style=None, comment=None): + Event.__init__(self, start_mark, end_mark, comment) + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.flow_style = flow_style + + +class CollectionEndEvent(Event): + pass + +# Implementations. + + +class StreamStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, encoding=None, + comment=None): + Event.__init__(self, start_mark, end_mark, comment) + self.encoding = encoding + + +class StreamEndEvent(Event): + pass + + +class DocumentStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None, version=None, tags=None, comment=None): + Event.__init__(self, start_mark, end_mark, comment) + self.explicit = explicit + self.version = version + self.tags = tags + + +class DocumentEndEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None, comment=None): + Event.__init__(self, start_mark, end_mark, comment) + self.explicit = explicit + + +class AliasEvent(NodeEvent): + pass + + +class ScalarEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, value, + start_mark=None, end_mark=None, style=None, comment=None): + NodeEvent.__init__(self, anchor, start_mark, end_mark, comment) + self.tag = tag + self.implicit = implicit + self.value = value + self.style = style + + +class SequenceStartEvent(CollectionStartEvent): + pass + + +class SequenceEndEvent(CollectionEndEvent): + pass + + +class MappingStartEvent(CollectionStartEvent): + pass + + +class MappingEndEvent(CollectionEndEvent): + pass diff --git a/loader.py b/loader.py new file mode 100644 index 0000000..16f01b6 --- /dev/null +++ b/loader.py @@ -0,0 +1,53 @@ +from __future__ import absolute_import + +__all__ = ['BaseLoader', 'SafeLoader', 'Loader', 'RoundTripLoader'] + +from .reader import * +from .scanner import * +from .parser_ import * +from .composer import * +from .constructor import * +from .resolver import * + + +class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, + BaseResolver): + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + + +class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + SafeConstructor.__init__(self) + Resolver.__init__(self) + + +class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + Constructor.__init__(self) + Resolver.__init__(self) + + +class RoundTripLoader(Reader, RoundTripScanner, Parser, + Composer, RoundTripConstructor, Resolver): + def __init__(self, stream): + Reader.__init__(self, stream) + RoundTripScanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + RoundTripConstructor.__init__(self) + Resolver.__init__(self) diff --git a/main.py b/main.py new file mode 100644 index 0000000..90ee4d5 --- /dev/null +++ b/main.py @@ -0,0 +1,352 @@ +# coding: utf-8 + +from __future__ import absolute_import + + +from ruamel.yaml.error import * + +from ruamel.yaml.tokens import * +from ruamel.yaml.events import * +from ruamel.yaml.nodes import * + +from ruamel.yaml.loader import * +from ruamel.yaml.dumper import * +from ruamel.yaml.compat import StringIO, BytesIO, with_metaclass, PY3 + +import io + +def scan(stream, Loader=Loader): + """ + Scan a YAML stream and produce scanning tokens. + """ + loader = Loader(stream) + try: + while loader.check_token(): + yield loader.get_token() + finally: + loader.dispose() + + +def parse(stream, Loader=Loader): + """ + Parse a YAML stream and produce parsing events. + """ + loader = Loader(stream) + try: + while loader.check_event(): + yield loader.get_event() + finally: + loader.dispose() + + +def compose(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding representation tree. + """ + loader = Loader(stream) + try: + return loader.get_single_node() + finally: + loader.dispose() + + +def compose_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding representation trees. + """ + loader = Loader(stream) + try: + while loader.check_node(): + yield loader.get_node() + finally: + loader.dispose() + + +def load(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + """ + loader = Loader(stream) + try: + return loader.get_single_data() + finally: + loader.dispose() + + +def load_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + """ + loader = Loader(stream) + try: + while loader.check_data(): + yield loader.get_data() + finally: + loader.dispose() + + +def safe_load(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + Resolve only basic YAML tags. + """ + return load(stream, SafeLoader) + + +def safe_load_all(stream): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + Resolve only basic YAML tags. + """ + return load_all(stream, SafeLoader) + + +def emit(events, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + """ + Emit YAML parsing events into a stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + stream = StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + try: + for event in events: + dumper.emit(event) + finally: + dumper.dispose() + if getvalue: + return getvalue() + +enc = None if PY3 else 'utf-8' + + +def serialize_all(nodes, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=enc, explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of representation trees into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + stream = StringIO() + else: + stream = BytesIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + try: + dumper.open() + for node in nodes: + dumper.serialize(node) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + + +def serialize(node, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a representation tree into a YAML stream. + If stream is None, return the produced string instead. + """ + return serialize_all([node], stream, Dumper=Dumper, **kwds) + + +def dump_all(documents, stream=None, Dumper=Dumper, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=enc, explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of Python objects into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + stream = StringIO() + else: + stream = BytesIO() + getvalue = stream.getvalue + dumper = Dumper(stream, default_style=default_style, + default_flow_style=default_flow_style, + canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + try: + dumper.open() + for data in documents: + dumper.represent(data) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + + +def dump(data, stream=None, Dumper=Dumper, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=enc, explicit_start=None, explicit_end=None, + version=None, tags=None): + + """ + Serialize a Python object into a YAML stream. + If stream is None, return the produced string instead. + + default_style ∈ None, '', '"', "'", '|', '>' + + """ + return dump_all([data], stream, Dumper=Dumper, + default_style=default_style, + default_flow_style=default_flow_style, + canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, + line_break=line_break, + encoding=encoding, explicit_start=explicit_start, + explicit_end=explicit_end, + version=version, tags=tags) + + +def safe_dump_all(documents, stream=None, **kwds): + """ + Serialize a sequence of Python objects into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all(documents, stream, Dumper=SafeDumper, **kwds) + + +def safe_dump(data, stream=None, **kwds): + """ + Serialize a Python object into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=SafeDumper, **kwds) + + +def add_implicit_resolver(tag, regexp, first=None, + Loader=Loader, Dumper=Dumper): + """ + Add an implicit scalar detector. + If an implicit scalar value matches the given regexp, + the corresponding tag is assigned to the scalar. + first is a sequence of possible initial characters or None. + """ + Loader.add_implicit_resolver(tag, regexp, first) + Dumper.add_implicit_resolver(tag, regexp, first) + + +def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper): + """ + Add a path based resolver for the given tag. + A path is a list of keys that forms a path + to a node in the representation tree. + Keys can be string values, integers, or None. + """ + Loader.add_path_resolver(tag, path, kind) + Dumper.add_path_resolver(tag, path, kind) + + +def add_constructor(tag, constructor, Loader=Loader): + """ + Add a constructor for the given tag. + Constructor is a function that accepts a Loader instance + and a node object and produces the corresponding Python object. + """ + Loader.add_constructor(tag, constructor) + + +def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader): + """ + Add a multi-constructor for the given tag prefix. + Multi-constructor is called for a node if its tag starts with tag_prefix. + Multi-constructor accepts a Loader instance, a tag suffix, + and a node object and produces the corresponding Python object. + """ + Loader.add_multi_constructor(tag_prefix, multi_constructor) + + +def add_representer(data_type, representer, Dumper=Dumper): + """ + Add a representer for the given type. + Representer is a function accepting a Dumper instance + and an instance of the given data type + and producing the corresponding representation node. + """ + Dumper.add_representer(data_type, representer) + + +def add_multi_representer(data_type, multi_representer, Dumper=Dumper): + """ + Add a representer for the given type. + Multi-representer is a function accepting a Dumper instance + and an instance of the given data type or subtype + and producing the corresponding representation node. + """ + Dumper.add_multi_representer(data_type, multi_representer) + + +class YAMLObjectMetaclass(type): + """ + The metaclass for YAMLObject. + """ + def __init__(cls, name, bases, kwds): + super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds) + if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: + cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml) + cls.yaml_dumper.add_representer(cls, cls.to_yaml) + + +class YAMLObject(with_metaclass(YAMLObjectMetaclass)): + """ + An object that can dump itself to a YAML stream + and load itself from a YAML stream. + """ + __slots__ = () # no direct instantiation, so allow immutable subclasses + + yaml_loader = Loader + yaml_dumper = Dumper + + yaml_tag = None + yaml_flow_style = None + + @classmethod + def from_yaml(cls, loader, node): + """ + Convert a representation node to a Python object. + """ + return loader.construct_yaml_object(node, cls) + + @classmethod + def to_yaml(cls, dumper, data): + """ + Convert a Python object to a representation node. + """ + return dumper.represent_yaml_object(cls.yaml_tag, data, cls, + flow_style=cls.yaml_flow_style) + + diff --git a/nodes.py b/nodes.py new file mode 100644 index 0000000..382b492 --- /dev/null +++ b/nodes.py @@ -0,0 +1,84 @@ +from __future__ import print_function + + +class Node(object): + def __init__(self, tag, value, start_mark, end_mark, comment=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.comment = comment + self.anchor = None + + def __repr__(self): + value = self.value + # if isinstance(value, list): + # if len(value) == 0: + # value = '' + # elif len(value) == 1: + # value = '<1 item>' + # else: + # value = '<%d items>' % len(value) + # else: + # if len(value) > 75: + # value = repr(value[:70]+u' ... ') + # else: + # value = repr(value) + value = repr(value) + return '%s(tag=%r, value=%s)' % (self.__class__.__name__, + self.tag, value) + + def dump(self, indent=0): + if isinstance(self.value, basestring): + print('{}{}(tag={!r}, value={!r})'.format( + ' ' * indent, self.__class__.__name__, self.tag, self.value)) + if self.comment: + print(' {}comment: {})'.format( + ' ' * indent, self.comment)) + return + print('{}{}(tag={!r})'.format( + ' ' * indent, self.__class__.__name__, self.tag)) + if self.comment: + print(' {}comment: {})'.format( + ' ' * indent, self.comment)) + for v in self.value: + if isinstance(v, tuple): + for v1 in v: + v1.dump(indent+1) + elif isinstance(v, Node): + v.dump(indent+1) + else: + print('Node value type?', type(v)) + + +class ScalarNode(Node): + """ + styles: + ? -> set() ? key, no value + " -> double quoted + ' -> single quoted + | -> literal style + > -> + """ + id = 'scalar' + + def __init__(self, tag, value, start_mark=None, end_mark=None, style=None, + comment=None): + Node.__init__(self, tag, value, start_mark, end_mark, comment=comment) + self.style = style + + +class CollectionNode(Node): + def __init__(self, tag, value, start_mark=None, end_mark=None, + flow_style=None, comment=None, anchor=None): + Node.__init__(self, tag, value, start_mark, end_mark, comment=comment) + self.flow_style = flow_style + self.anchor = anchor + + +class SequenceNode(CollectionNode): + id = 'sequence' + + +class MappingNode(CollectionNode): + id = 'mapping' diff --git a/parser_.py b/parser_.py new file mode 100644 index 0000000..0afb596 --- /dev/null +++ b/parser_.py @@ -0,0 +1,656 @@ +from __future__ import absolute_import + +# The following YAML grammar is LL(1) and is parsed by a recursive descent +# parser. +# +# stream ::= STREAM-START implicit_document? explicit_document* +# STREAM-END +# implicit_document ::= block_node DOCUMENT-END* +# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +# block_node_or_indentless_sequence ::= +# ALIAS +# | properties (block_content | +# indentless_block_sequence)? +# | block_content +# | indentless_block_sequence +# block_node ::= ALIAS +# | properties block_content? +# | block_content +# flow_node ::= ALIAS +# | properties flow_content? +# | flow_content +# properties ::= TAG ANCHOR? | ANCHOR TAG? +# block_content ::= block_collection | flow_collection | SCALAR +# flow_content ::= flow_collection | SCALAR +# block_collection ::= block_sequence | block_mapping +# flow_collection ::= flow_sequence | flow_mapping +# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* +# BLOCK-END +# indentless_sequence ::= (BLOCK-ENTRY block_node?)+ +# block_mapping ::= BLOCK-MAPPING_START +# ((KEY block_node_or_indentless_sequence?)? +# (VALUE block_node_or_indentless_sequence?)?)* +# BLOCK-END +# flow_sequence ::= FLOW-SEQUENCE-START +# (flow_sequence_entry FLOW-ENTRY)* +# flow_sequence_entry? +# FLOW-SEQUENCE-END +# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# flow_mapping ::= FLOW-MAPPING-START +# (flow_mapping_entry FLOW-ENTRY)* +# flow_mapping_entry? +# FLOW-MAPPING-END +# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# +# FIRST sets: +# +# stream: { STREAM-START } +# explicit_document: { DIRECTIVE DOCUMENT-START } +# implicit_document: FIRST(block_node) +# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START +# BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START +# FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_sequence: { BLOCK-SEQUENCE-START } +# block_mapping: { BLOCK-MAPPING-START } +# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR +# BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START +# FLOW-MAPPING-START BLOCK-ENTRY } +# indentless_sequence: { ENTRY } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_sequence: { FLOW-SEQUENCE-START } +# flow_mapping: { FLOW-MAPPING-START } +# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START +# FLOW-MAPPING-START KEY } +# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START +# FLOW-MAPPING-START KEY } + +__all__ = ['Parser', 'ParserError'] + +from .error import MarkedYAMLError +from .tokens import * +from .events import * +from .scanner import * +from .compat import utf8 + + +class ParserError(MarkedYAMLError): + pass + + +class Parser(object): + # Since writing a recursive-descendant parser is a straightforward task, we + # do not give many comments here. + + DEFAULT_TAGS = { + u'!': u'!', + u'!!': u'tag:yaml.org,2002:', + } + + def __init__(self): + self.current_event = None + self.yaml_version = None + self.tag_handles = {} + self.states = [] + self.marks = [] + self.state = self.parse_stream_start + + def dispose(self): + # Reset the state attributes (to clear self-references) + self.states = [] + self.state = None + + def check_event(self, *choices): + # Check the type of the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + if self.current_event is not None: + if not choices: + return True + for choice in choices: + if isinstance(self.current_event, choice): + return True + return False + + def peek_event(self): + # Get the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + return self.current_event + + def get_event(self): + # Get the next event and proceed further. + if self.current_event is None: + if self.state: + self.current_event = self.state() + value = self.current_event + self.current_event = None + return value + + # stream ::= STREAM-START implicit_document? explicit_document* + # STREAM-END + # implicit_document ::= block_node DOCUMENT-END* + # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + + def parse_stream_start(self): + + # Parse the stream start. + token = self.get_token() + token.move_comment(self.peek_token()) + event = StreamStartEvent(token.start_mark, token.end_mark, + encoding=token.encoding) + + # Prepare the next state. + self.state = self.parse_implicit_document_start + + return event + + def parse_implicit_document_start(self): + + # Parse an implicit document. + if not self.check_token(DirectiveToken, DocumentStartToken, + StreamEndToken): + self.tag_handles = self.DEFAULT_TAGS + token = self.peek_token() + start_mark = end_mark = token.start_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=False) + + # Prepare the next state. + self.states.append(self.parse_document_end) + self.state = self.parse_block_node + + return event + + else: + return self.parse_document_start() + + def parse_document_start(self): + + # Parse any extra document end indicators. + while self.check_token(DocumentEndToken): + self.get_token() + + # Parse an explicit document. + if not self.check_token(StreamEndToken): + token = self.peek_token() + start_mark = token.start_mark + version, tags = self.process_directives() + if not self.check_token(DocumentStartToken): + raise ParserError(None, None, + "expected '', but found %r" + % self.peek_token().id, + self.peek_token().start_mark) + token = self.get_token() + end_mark = token.end_mark + event = DocumentStartEvent( + start_mark, end_mark, + explicit=True, version=version, tags=tags) + self.states.append(self.parse_document_end) + self.state = self.parse_document_content + else: + # Parse the end of the stream. + token = self.get_token() + event = StreamEndEvent(token.start_mark, token.end_mark, + comment=token.comment) + assert not self.states + assert not self.marks + self.state = None + return event + + def parse_document_end(self): + + # Parse the document end. + token = self.peek_token() + start_mark = end_mark = token.start_mark + explicit = False + if self.check_token(DocumentEndToken): + token = self.get_token() + end_mark = token.end_mark + explicit = True + event = DocumentEndEvent(start_mark, end_mark, explicit=explicit) + + # Prepare the next state. + self.state = self.parse_document_start + + return event + + def parse_document_content(self): + if self.check_token( + DirectiveToken, + DocumentStartToken, DocumentEndToken, StreamEndToken): + event = self.process_empty_scalar(self.peek_token().start_mark) + self.state = self.states.pop() + return event + else: + return self.parse_block_node() + + def process_directives(self): + self.yaml_version = None + self.tag_handles = {} + while self.check_token(DirectiveToken): + token = self.get_token() + if token.name == u'YAML': + if self.yaml_version is not None: + raise ParserError( + None, None, + "found duplicate YAML directive", token.start_mark) + major, minor = token.value + if major != 1: + raise ParserError( + None, None, + "found incompatible YAML document (version 1.* is " + "required)", + token.start_mark) + self.yaml_version = token.value + elif token.name == u'TAG': + handle, prefix = token.value + if handle in self.tag_handles: + raise ParserError(None, None, + "duplicate tag handle %r" % utf8(handle), + token.start_mark) + self.tag_handles[handle] = prefix + if self.tag_handles: + value = self.yaml_version, self.tag_handles.copy() + else: + value = self.yaml_version, None + for key in self.DEFAULT_TAGS: + if key not in self.tag_handles: + self.tag_handles[key] = self.DEFAULT_TAGS[key] + return value + + # block_node_or_indentless_sequence ::= ALIAS + # | properties (block_content | indentless_block_sequence)? + # | block_content + # | indentless_block_sequence + # block_node ::= ALIAS + # | properties block_content? + # | block_content + # flow_node ::= ALIAS + # | properties flow_content? + # | flow_content + # properties ::= TAG ANCHOR? | ANCHOR TAG? + # block_content ::= block_collection | flow_collection | SCALAR + # flow_content ::= flow_collection | SCALAR + # block_collection ::= block_sequence | block_mapping + # flow_collection ::= flow_sequence | flow_mapping + + def parse_block_node(self): + return self.parse_node(block=True) + + def parse_flow_node(self): + return self.parse_node() + + def parse_block_node_or_indentless_sequence(self): + return self.parse_node(block=True, indentless_sequence=True) + + def parse_node(self, block=False, indentless_sequence=False): + if self.check_token(AliasToken): + token = self.get_token() + event = AliasEvent(token.value, token.start_mark, token.end_mark) + self.state = self.states.pop() + else: + anchor = None + tag = None + start_mark = end_mark = tag_mark = None + if self.check_token(AnchorToken): + token = self.get_token() + start_mark = token.start_mark + end_mark = token.end_mark + anchor = token.value + if self.check_token(TagToken): + token = self.get_token() + tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + elif self.check_token(TagToken): + token = self.get_token() + start_mark = tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + if self.check_token(AnchorToken): + token = self.get_token() + end_mark = token.end_mark + anchor = token.value + if tag is not None: + handle, suffix = tag + if handle is not None: + if handle not in self.tag_handles: + raise ParserError( + "while parsing a node", start_mark, + "found undefined tag handle %r" % utf8(handle), + tag_mark) + tag = self.tag_handles[handle]+suffix + else: + tag = suffix + # if tag == u'!': + # raise ParserError("while parsing a node", start_mark, + # "found non-specific tag '!'", tag_mark, + # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' + # and share your opinion.") + if start_mark is None: + start_mark = end_mark = self.peek_token().start_mark + event = None + implicit = (tag is None or tag == u'!') + if indentless_sequence and self.check_token(BlockEntryToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark) + self.state = self.parse_indentless_sequence_entry + else: + if self.check_token(ScalarToken): + token = self.get_token() + end_mark = token.end_mark + if (token.plain and tag is None) or tag == u'!': + implicit = (True, False) + elif tag is None: + implicit = (False, True) + else: + implicit = (False, False) + event = ScalarEvent( + anchor, tag, implicit, token.value, + start_mark, end_mark, style=token.style, + comment=token.comment + ) + self.state = self.states.pop() + elif self.check_token(FlowSequenceStartToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent( + anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_sequence_first_entry + elif self.check_token(FlowMappingStartToken): + end_mark = self.peek_token().end_mark + event = MappingStartEvent( + anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_mapping_first_key + elif block and self.check_token(BlockSequenceStartToken): + end_mark = self.peek_token().start_mark + # should inserting the comment be dependent on the + # indentation? + pt = self.peek_token() + comment = pt.comment + # print('pt0', type(pt)) + if comment is None or comment[1] is None: + comment = pt.split_comment() + # print('pt1', comment) + event = SequenceStartEvent( + anchor, tag, implicit, start_mark, end_mark, + flow_style=False, + comment=comment, + ) + self.state = self.parse_block_sequence_first_entry + elif block and self.check_token(BlockMappingStartToken): + end_mark = self.peek_token().start_mark + comment = self.peek_token().comment + event = MappingStartEvent( + anchor, tag, implicit, start_mark, end_mark, + flow_style=False, comment=comment) + self.state = self.parse_block_mapping_first_key + elif anchor is not None or tag is not None: + # Empty scalars are allowed even if a tag or an anchor is + # specified. + event = ScalarEvent(anchor, tag, (implicit, False), u'', + start_mark, end_mark) + self.state = self.states.pop() + else: + if block: + node = 'block' + else: + node = 'flow' + token = self.peek_token() + raise ParserError( + "while parsing a %s node" % node, start_mark, + "expected the node content, but found %r" % token.id, + token.start_mark) + return event + + # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* + # BLOCK-END + + def parse_block_sequence_first_entry(self): + token = self.get_token() + # move any comment from start token + # token.move_comment(self.peek_token()) + self.marks.append(token.start_mark) + return self.parse_block_sequence_entry() + + def parse_block_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + token.move_comment(self.peek_token()) + if not self.check_token(BlockEntryToken, BlockEndToken): + self.states.append(self.parse_block_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_block_sequence_entry + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError( + "while parsing a block collection", self.marks[-1], + "expected , but found %r" % + token.id, token.start_mark) + token = self.get_token() # BlockEndToken + event = SequenceEndEvent(token.start_mark, token.end_mark, + comment=token.comment) + self.state = self.states.pop() + self.marks.pop() + return event + + # indentless_sequence ::= (BLOCK-ENTRY block_node?)+ + + # indentless_sequence? + # sequence: + # - entry + # - nested + + def parse_indentless_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + token.move_comment(self.peek_token()) + if not self.check_token(BlockEntryToken, + KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_indentless_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_indentless_sequence_entry + return self.process_empty_scalar(token.end_mark) + token = self.peek_token() + event = SequenceEndEvent(token.start_mark, token.start_mark, + comment=token.comment) + self.state = self.states.pop() + return event + + # block_mapping ::= BLOCK-MAPPING_START + # ((KEY block_node_or_indentless_sequence?)? + # (VALUE block_node_or_indentless_sequence?)?)* + # BLOCK-END + + def parse_block_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_mapping_key() + + def parse_block_mapping_key(self): + if self.check_token(KeyToken): + token = self.get_token() + token.move_comment(self.peek_token()) + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_value) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_value + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError( + "while parsing a block mapping", self.marks[-1], + "expected , but found %r" % token.id, + token.start_mark) + token = self.get_token() + token.move_comment(self.peek_token()) + event = MappingEndEvent(token.start_mark, token.end_mark, + comment=token.comment) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_block_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + # value token might have post comment move it to e.g. block + token.move_comment(self.peek_token()) + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_key) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_block_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + # flow_sequence ::= FLOW-SEQUENCE-START + # (flow_sequence_entry FLOW-ENTRY)* + # flow_sequence_entry? + # FLOW-SEQUENCE-END + # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + # + # Note that while production rules for both flow_sequence_entry and + # flow_mapping_entry are equal, their interpretations are different. + # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` + # generate an inline mapping (set syntax). + + def parse_flow_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_sequence_entry(first=True) + + def parse_flow_sequence_entry(self, first=False): + if not self.check_token(FlowSequenceEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError( + "while parsing a flow sequence", self.marks[-1], + "expected ',' or ']', but got %r" % token.id, + token.start_mark) + + if self.check_token(KeyToken): + token = self.peek_token() + event = MappingStartEvent(None, None, True, + token.start_mark, token.end_mark, + flow_style=True) + self.state = self.parse_flow_sequence_entry_mapping_key + return event + elif not self.check_token(FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry) + return self.parse_flow_node() + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark, + comment=token.comment) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_sequence_entry_mapping_key(self): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_value + return self.process_empty_scalar(token.end_mark) + + def parse_flow_sequence_entry_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_end) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_end + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_sequence_entry_mapping_end + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_sequence_entry_mapping_end(self): + self.state = self.parse_flow_sequence_entry + token = self.peek_token() + return MappingEndEvent(token.start_mark, token.start_mark) + + # flow_mapping ::= FLOW-MAPPING-START + # (flow_mapping_entry FLOW-ENTRY)* + # flow_mapping_entry? + # FLOW-MAPPING-END + # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + + def parse_flow_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_mapping_key(first=True) + + def parse_flow_mapping_key(self, first=False): + if not self.check_token(FlowMappingEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError( + "while parsing a flow mapping", self.marks[-1], + "expected ',' or '}', but got %r" % token.id, + token.start_mark) + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_value + return self.process_empty_scalar(token.end_mark) + elif not self.check_token(FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_empty_value) + return self.parse_flow_node() + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark, + comment=token.comment) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_key) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_mapping_empty_value(self): + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(self.peek_token().start_mark) + + def process_empty_scalar(self, mark): + return ScalarEvent(None, None, (True, False), u'', mark, mark) diff --git a/py/__init__.py b/py/__init__.py deleted file mode 100644 index ac41035..0000000 --- a/py/__init__.py +++ /dev/null @@ -1,43 +0,0 @@ -# coding: utf-8 - -from __future__ import absolute_import - - -def _convert_version(tup): - """create a PEP 386 pseudo-format conformant string from tuple tup""" - ret_val = str(tup[0]) # first is always digit - next_sep = "." # separator for next extension, can be "" or "." - for x in tup[1:]: - if isinstance(x, int): - ret_val += next_sep + str(x) - next_sep = '.' - continue - first_letter = x[0].lower() - next_sep = '' - if first_letter in 'abcr': - ret_val += 'rc' if first_letter == 'r' else first_letter - elif first_letter in 'pd': - ret_val += '.post' if first_letter == 'p' else '.dev' - return ret_val - - -version_info = (0, 10, 6) -__version__ = _convert_version(version_info) - -del _convert_version - -try: - from .cyaml import * - __with_libyaml__ = True -except ImportError: - __with_libyaml__ = False - -# body extracted to main.py -from .main import * - -def main(): - # No direct import of yaml in order not to pollute namespace. - # If other utility 'bodies' exist in this directory a module level - # import here, would get you all of its initialisations/imports as well - from ruamel.yaml.yaml import main as util_main - util_main() diff --git a/py/comments.py b/py/comments.py deleted file mode 100644 index 0db9457..0000000 --- a/py/comments.py +++ /dev/null @@ -1,393 +0,0 @@ -# coding: utf-8 - -from __future__ import absolute_import -from __future__ import print_function - -__all__ = ["CommentedSeq", "CommentedMap", "CommentedOrderedMap", - "CommentedSet", 'comment_attrib', 'merge_attrib'] - -""" -stuff to deal with comments and formatting on dict/list/ordereddict/set -these are not really related, formatting could be factored out as -a separate base -""" - -from collections import MutableSet - -from .compat import ordereddict - -comment_attrib = '_yaml_comment' -format_attrib = '_yaml_format' -line_col_attrib = '_yaml_line_col' -anchor_attrib = '_yaml_anchor' -merge_attrib = '_yaml_merge' - -class Comment(object): - # sys.getsize tested the Comment objects, __slots__ make them bigger - # and adding self.end did not matter - attrib = comment_attrib - - def __init__(self): - self.comment = None # [post, [pre]] - # map key (mapping/omap/dict) or index (sequence/list) to a list of - # dict: post_key, pre_key, post_value, pre_value - # list: pre item, post item - self._items = {} - # self._start = [] # should not put these on first item - self._end = [] # end of document comments - - def __str__(self): - if self._end: - end = ',\n end=' + str(self._end) - else: - end = '' - return "Comment(comment={0},\n items={1}{2})".format( - self.comment, self._items, end) - - @property - def items(self): - return self._items - - @property - def end(self): - return self._end - - @end.setter - def end(self, value): - self._end = value - - @property - def start(self): - return self._start - - @end.setter - def start(self, value): - self._start = value - - -# to distinguish key from None -def NoComment(): - pass - - -class Format(object): - attrib = format_attrib - - def __init__(self): - self._flow_style = None - - def set_flow_style(self): - self._flow_style = True - - def set_block_style(self): - self._flow_style = False - - def flow_style(self, default=None): - """if default (the flow_style) is None, the flow style tacked on to - the object explicitly will be taken. If that is None as well the - default flow style rules the format down the line, or the type - of the constituent values (simple -> flow, map/list -> block)""" - if self._flow_style is None: - return default - return self._flow_style - - -class LineCol(object): - attrib = line_col_attrib - - def __init__(self): - self.line = None - self.col = None - -class Anchor(object): - attrib = anchor_attrib - - def __init__(self): - self.value = None - self.always_dump = False - -class CommentedBase(object): - @property - def ca(self): - if not hasattr(self, Comment.attrib): - setattr(self, Comment.attrib, Comment()) - return getattr(self, Comment.attrib) - - def yaml_end_comment_extend(self, comment, clear=False): - if clear: - self.ca.end = [] - self.ca.end.extend(comment) - - def yaml_key_comment_extend(self, key, comment, clear=False): - l = self.ca._items.setdefault(key, [None, None, None, None]) - if clear or l[1] is None: - if comment[1] is not None: - assert isinstance(comment[1], list) - l[1] = comment[1] - else: - l[1].extend(comment[0]) - l[0] = comment[0] - - def yaml_value_comment_extend(self, key, comment, clear=False): - l = self.ca._items.setdefault(key, [None, None, None, None]) - if clear or l[3] is None: - if comment[1] is not None: - assert isinstance(comment[1], list) - l[3] = comment[1] - else: - l[3].extend(comment[0]) - l[2] = comment[0] - - def yaml_set_start_comment(self, comment, indent=0): - """overwrites any preceding comment lines on an object - expects comment to be without `#` and possible have mutlple lines - """ - from .error import Mark - from .tokens import CommentToken - pre_comments = self._yaml_get_pre_comment() - if comment[-1] == '\n': - comment = comment[:-1] # strip final newline if there - start_mark = Mark(None, None, None, indent, None, None) - for com in comment.split('\n'): - pre_comments.append(CommentToken('# ' + com + '\n', start_mark, None)) - - @property - def fa(self): - if not hasattr(self, Format.attrib): - setattr(self, Format.attrib, Format()) - return getattr(self, Format.attrib) - - def yaml_add_eol_comment(self, comment, key=NoComment, column=None): - """ - there is a problem as eol comments should start with ' #' - (but at the beginning of the line the space doesn't have to be before - the #. The column index is for the # mark - """ - from .tokens import CommentToken - from .error import Mark - if column is None: - column = self._yaml_get_column(key) - if comment[0] != '#': - comment = '# ' + comment - if column is None: - if comment[0] == '#': - comment = ' ' + comment - column = 0 - start_mark = Mark(None, None, None, column, None, None) - ct = [CommentToken(comment, start_mark, None), None] - self._yaml_add_eol_comment(ct, key=key) - - @property - def lc(self): - if not hasattr(self, LineCol.attrib): - setattr(self, LineCol.attrib, LineCol()) - return getattr(self, LineCol.attrib) - - def _yaml_set_line_col(self, line, col): - self.lc.line = line - self.lc.col = col - - @property - def anchor(self): - if not hasattr(self, Anchor.attrib): - setattr(self, Anchor.attrib, Anchor()) - return getattr(self, Anchor.attrib) - - def yaml_anchor(self): - if not hasattr(self, Anchor.attrib): - return None - return self.anchor - - def yaml_set_anchor(self, value, always_dump=False): - self.anchor.value = value - self.anchor.always_dump = always_dump - -class CommentedSeq(list, CommentedBase): - __slots__ = [Comment.attrib, ] - - def _yaml_add_comment(self, comment, key=NoComment): - if key is not NoComment: - self.yaml_key_comment_extend(key, comment) - else: - self.ca.comment = comment - - def _yaml_add_eol_comment(self, comment, key): - self._yaml_add_comment(comment, key=key) - - def _yaml_get_columnX(self, key): - return self.ca.items[key][0].start_mark.column - - def _yaml_get_column(self, key): - column = None - sel_idx = None - pre, post = key-1, key+1 - if pre in self.ca.items: - sel_idx = pre - elif post in self.ca.items: - sel_idx = post - else: - # self.ca.items is not ordered - for row_idx, k1 in enumerate(self): - if row_idx >= key: - break - if row_idx not in self.ca.items: - continue - sel_idx = row_idx - if sel_idx is not None: - column = self._yaml_get_columnX(sel_idx) - return column - - def _yaml_get_pre_comment(self): - if self.ca.comment is None: - pre_comments = [] - self.ca.comment = [None, pre_comments] - else: - pre_comments = self.ca.comment[1] = [] - return pre_comments - - -class CommentedMap(ordereddict, CommentedBase): - __slots__ = [Comment.attrib, ] - - def _yaml_add_comment(self, comment, key=NoComment, value=NoComment): - """values is set to key to indicate a value attachment of comment""" - if key is not NoComment: - self.yaml_key_comment_extend(key, comment) - return - if value is not NoComment: - self.yaml_value_comment_extend(value, comment) - else: - self.ca.comment = comment - - def _yaml_add_eol_comment(self, comment, key): - """add on the value line, with value specified by the key""" - self._yaml_add_comment(comment, value=key) - - def _yaml_get_columnX(self, key): - return self.ca.items[key][2].start_mark.column - - def _yaml_get_column(self, key): - column = None - sel_idx = None - pre, post, last = None, None, None - for x in self: - if pre is not None and x != key: - post = x - break - if x == key: - pre = last - last = x - if pre in self.ca.items: - sel_idx = pre - elif post in self.ca.items: - sel_idx = post - else: - # self.ca.items is not ordered - for row_idx, k1 in enumerate(self): - if k1 >= key: - break - if k1 not in self.ca.items: - continue - sel_idx = k1 - if sel_idx is not None: - column = self._yaml_get_columnX(sel_idx) - return column - - def _yaml_get_pre_comment(self): - if self.ca.comment is None: - pre_comments = [] - self.ca.comment = [None, pre_comments] - else: - pre_comments = self.ca.comment[1] = [] - return pre_comments - - def update(self, vals): - try: - ordereddict.update(self, vals) - except TypeError: - # probably a dict that is used - for x in vals: - self[x] = vals[x] - - def mlget(self, key, default=None, list_ok=False): - """multi-level get that expects dicts within dicts""" - if not isinstance(key, list): - return self.get(key, default) - # assume that the key is a list of recursively accessible dicts - def get_one_level(key_list, level, d): - if not list_ok: - assert isinstance(d, dict) - if level >= len(key_list): - if level > len(key_list): - raise IndexError - return d[key_list[level-1]] - return get_one_level(key_list, level+1, d[key_list[level-1]]) - - try: - return get_one_level(key, 1, self) - except KeyError: - return default - except (TypeError, IndexError): - if not list_ok: - Raise - return default - - def __getitem__(self, key): - try: - return ordereddict.__getitem__(self, key) - except KeyError: - for merged in getattr(self, merge_attrib, []): - if key in merged[1]: - return merged[1][key] - raise - - def get(self, key, default=None): - try: - return self.__getitem__(key) - except: - return default - - @property - def merge(self): - if not hasattr(self, merge_attrib): - setattr(self, merge_attrib, []) - return getattr(self, merge_attrib) - - def add_yaml_merge(self, value): - self.merge.extend(value) - - - -class CommentedOrderedMap(CommentedMap): - __slots__ = [Comment.attrib, ] - - -class CommentedSet(MutableSet, CommentedMap): - __slots__ = [Comment.attrib, 'odict'] - - def __init__(self, values=None): - self.odict = ordereddict() - MutableSet.__init__(self) - if values is not None: - self |= values - - def add(self, value): - """Add an element.""" - self.odict[value] = None - - def discard(self, value): - """Remove an element. Do not raise an exception if absent.""" - del self.odict[value] - - def __contains__(self, x): - return x in self.odict - - def __iter__(self): - for x in self.odict: - yield x - - def __len__(self): - return len(self.odict) - - def __repr__(self): - return 'set({0!r})'.format(self.odict.keys()) diff --git a/py/compat.py b/py/compat.py deleted file mode 100644 index dc0c51c..0000000 --- a/py/compat.py +++ /dev/null @@ -1,106 +0,0 @@ - -from __future__ import print_function - -# partially from package six by Benjamin Peterson - -import sys -import os -import types - -try: - from ruamel.ordereddict import ordereddict -except: - try: - from collections import OrderedDict - except ImportError: - from orderddict import OrderedDict - # to get the right name import ... as ordereddict doesn't do that - - class ordereddict(OrderedDict): - pass - -PY2 = sys.version_info[0] == 2 -PY3 = sys.version_info[0] == 3 - -if PY3: - def utf8(s): - return s - - def to_str(s): - return s - - def to_unicode(s): - return s - -else: - def utf8(s): - return s.encode('utf-8') - - def to_str(s): - return str(s) - - def to_unicode(s): - return unicode(s) - -if PY3: - string_types = str, - integer_types = int, - class_types = type, - text_type = str - binary_type = bytes - - MAXSIZE = sys.maxsize - unichr = chr - import io - StringIO = io.StringIO - BytesIO = io.BytesIO - -else: - string_types = basestring, - integer_types = (int, long) - class_types = (type, types.ClassType) - text_type = unicode - binary_type = str - - unichr = unichr # to allow importing - import StringIO - StringIO = StringIO.StringIO - import cStringIO - BytesIO = cStringIO.StringIO - -if PY3: - builtins_module = 'builtins' -else: - builtins_module = '__builtin__' - - -def with_metaclass(meta, *bases): - """Create a base class with a metaclass.""" - return meta("NewBase", bases, {}) - -DBG_TOKEN = 1 -DBG_EVENT = 2 -DBG_NODE = 4 - - -_debug = None - - -# used from yaml util when testing -def dbg(val=None): - global _debug - if _debug is None: - # set to true or false - _debug = os.environ.get('YAMLDEBUG') - if _debug is None: - _debug = 0 - else: - _debug = int(_debug) - if val is None: - return _debug - return _debug & val - - -def nprint(*args, **kw): - if dbg: - print(*args, **kw) diff --git a/py/composer.py b/py/composer.py deleted file mode 100644 index 13436c8..0000000 --- a/py/composer.py +++ /dev/null @@ -1,169 +0,0 @@ -from __future__ import absolute_import -from __future__ import print_function - -__all__ = ['Composer', 'ComposerError'] - -from .error import MarkedYAMLError -from .events import * -from .nodes import * -from .compat import utf8 - - -class ComposerError(MarkedYAMLError): - pass - - -class Composer(object): - def __init__(self): - self.anchors = {} - - def check_node(self): - # Drop the STREAM-START event. - if self.check_event(StreamStartEvent): - self.get_event() - - # If there are more documents available? - return not self.check_event(StreamEndEvent) - - def get_node(self): - # Get the root node of the next document. - if not self.check_event(StreamEndEvent): - return self.compose_document() - - def get_single_node(self): - # Drop the STREAM-START event. - self.get_event() - - # Compose a document if the stream is not empty. - document = None - if not self.check_event(StreamEndEvent): - document = self.compose_document() - - # Ensure that the stream contains no more documents. - if not self.check_event(StreamEndEvent): - event = self.get_event() - raise ComposerError( - "expected a single document in the stream", - document.start_mark, "but found another document", - event.start_mark) - - # Drop the STREAM-END event. - self.get_event() - - return document - - def compose_document(self): - # Drop the DOCUMENT-START event. - self.get_event() - - # Compose the root node. - node = self.compose_node(None, None) - - # Drop the DOCUMENT-END event. - self.get_event() - - self.anchors = {} - return node - - def compose_node(self, parent, index): - if self.check_event(AliasEvent): - event = self.get_event() - alias = event.anchor - if alias not in self.anchors: - raise ComposerError( - None, None, "found undefined alias %r" - % utf8(alias), event.start_mark) - return self.anchors[alias] - event = self.peek_event() - anchor = event.anchor - if anchor is not None: # have an anchor - if anchor in self.anchors: - raise ComposerError( - "found duplicate anchor %r; first occurence" - % utf8(anchor), self.anchors[anchor].start_mark, - "second occurence", event.start_mark) - self.descend_resolver(parent, index) - if self.check_event(ScalarEvent): - node = self.compose_scalar_node(anchor) - elif self.check_event(SequenceStartEvent): - node = self.compose_sequence_node(anchor) - elif self.check_event(MappingStartEvent): - node = self.compose_mapping_node(anchor) - self.ascend_resolver() - return node - - def compose_scalar_node(self, anchor): - event = self.get_event() - tag = event.tag - if tag is None or tag == u'!': - tag = self.resolve(ScalarNode, event.value, event.implicit) - node = ScalarNode(tag, event.value, - event.start_mark, event.end_mark, style=event.style, - comment=event.comment) - if anchor is not None: - self.anchors[anchor] = node - return node - - def compose_sequence_node(self, anchor): - start_event = self.get_event() - tag = start_event.tag - if tag is None or tag == u'!': - tag = self.resolve(SequenceNode, None, start_event.implicit) - node = SequenceNode(tag, [], - start_event.start_mark, None, - flow_style=start_event.flow_style, - comment=start_event.comment, anchor=anchor) - if anchor is not None: - self.anchors[anchor] = node - index = 0 - while not self.check_event(SequenceEndEvent): - node.value.append(self.compose_node(node, index)) - index += 1 - end_event = self.get_event() - if node.flow_style is True and end_event.comment is not None: - if node.comment is not None: - print('Warning: unexpected end_event commment in sequence ' - 'node {}'.format(node.flow_style)) - node.comment = end_event.comment - node.end_mark = end_event.end_mark - self.check_end_doc_comment(end_event, node) - return node - - def compose_mapping_node(self, anchor): - start_event = self.get_event() - tag = start_event.tag - if tag is None or tag == u'!': - tag = self.resolve(MappingNode, None, start_event.implicit) - node = MappingNode(tag, [], - start_event.start_mark, None, - flow_style=start_event.flow_style, - comment=start_event.comment, anchor=anchor) - if anchor is not None: - self.anchors[anchor] = node - while not self.check_event(MappingEndEvent): - # key_event = self.peek_event() - item_key = self.compose_node(node, None) - # if item_key in node.value: - # raise ComposerError("while composing a mapping", - # start_event.start_mark, - # "found duplicate key", key_event.start_mark) - item_value = self.compose_node(node, item_key) - # node.value[item_key] = item_value - node.value.append((item_key, item_value)) - end_event = self.get_event() - if node.flow_style is True and end_event.comment is not None: - node.comment = end_event.comment - node.end_mark = end_event.end_mark - self.check_end_doc_comment(end_event, node) - return node - - def check_end_doc_comment(self, end_event, node): - if end_event.comment and end_event.comment[1]: - # pre comments on an end_event, no following to move to - if node.comment is None: - node.comment = [None, None] - assert not isinstance(node, ScalarEvent) - # this is a post comment on a mapping node, add as third element - # in the list - node.comment.append(end_event.comment[1]) - end_event.comment[1] = None diff --git a/py/configobjwalker.py b/py/configobjwalker.py deleted file mode 100644 index 576adcd..0000000 --- a/py/configobjwalker.py +++ /dev/null @@ -1,65 +0,0 @@ - - -def configobj_walker(cfg): - """ - walks over a ConfigObj (INI file with comments) generating - corresponding YAML output (including comments - """ - from configobj import ConfigObj - assert isinstance(cfg, ConfigObj) - for c in cfg.initial_comment: - if c.strip(): - yield c - for s in _walk_section(cfg): - if s.strip(): - yield s - for c in cfg.final_comment: - if c.strip(): - yield c - - -def _walk_section(s, level=0): - from configobj import Section - assert isinstance(s, Section) - indent = u' ' * level - for name in s.scalars: - for c in s.comments[name]: - yield indent + c.strip() - x = s[name] - if u'\n' in x: - i = indent + u' ' - x = u'|\n' + i + x.strip().replace(u'\n', u'\n' + i) - elif ':' in x: - x = u"'" + x.replace(u"'", u"''") + u"'" - line = u'{0}{1}: {2}'.format(indent, name, x) - c = s.inline_comments[name] - if c: - line += u' ' + c - yield line - for name in s.sections: - for c in s.comments[name]: - yield indent + c.strip() - line = u'{0}{1}:'.format(indent, name) - c = s.inline_comments[name] - if c: - line += u' ' + c - yield line - for val in _walk_section(s[name], level=level+1): - yield val - -##def config_obj_2_rt_yaml(cfg): -## from .comments import CommentedMap, CommentedSeq -## from configobj import ConfigObj -## assert isinstance(cfg, ConfigObj) -## #for c in cfg.initial_comment: -## # if c.strip(): -## # pass -## cm = CommentedMap() -## for name in s.sections: -## cm[name] = d = CommentedMap() -## -## -## #for c in cfg.final_comment: -## # if c.strip(): -## # yield c -## return cm diff --git a/py/constructor.py b/py/constructor.py deleted file mode 100644 index 5ce8974..0000000 --- a/py/constructor.py +++ /dev/null @@ -1,1116 +0,0 @@ -from __future__ import absolute_import -from __future__ import print_function - -__all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor', - 'ConstructorError', 'RoundTripConstructor'] - -import collections -import datetime -import base64 -import binascii -import re -import sys -import types - -from .error import * -from .nodes import * -from .compat import (utf8, builtins_module, to_str, PY2, PY3, ordereddict, - text_type) -from .comments import * -from .scalarstring import * - - -class ConstructorError(MarkedYAMLError): - pass - - -class BaseConstructor(object): - - yaml_constructors = {} - yaml_multi_constructors = {} - - def __init__(self): - self.constructed_objects = {} - self.recursive_objects = {} - self.state_generators = [] - self.deep_construct = False - - def check_data(self): - # If there are more documents available? - return self.check_node() - - def get_data(self): - # Construct and return the next document. - if self.check_node(): - return self.construct_document(self.get_node()) - - def get_single_data(self): - # Ensure that the stream contains a single document and construct it. - node = self.get_single_node() - if node is not None: - return self.construct_document(node) - return None - - def construct_document(self, node): - data = self.construct_object(node) - while self.state_generators: - state_generators = self.state_generators - self.state_generators = [] - for generator in state_generators: - for dummy in generator: - pass - self.constructed_objects = {} - self.recursive_objects = {} - self.deep_construct = False - return data - - def construct_object(self, node, deep=False): - if node in self.constructed_objects: - return self.constructed_objects[node] - if deep: - old_deep = self.deep_construct - self.deep_construct = True - if node in self.recursive_objects: - raise ConstructorError( - None, None, - "found unconstructable recursive node", node.start_mark) - self.recursive_objects[node] = None - constructor = None - tag_suffix = None - if node.tag in self.yaml_constructors: - constructor = self.yaml_constructors[node.tag] - else: - for tag_prefix in self.yaml_multi_constructors: - if node.tag.startswith(tag_prefix): - tag_suffix = node.tag[len(tag_prefix):] - constructor = self.yaml_multi_constructors[tag_prefix] - break - else: - if None in self.yaml_multi_constructors: - tag_suffix = node.tag - constructor = self.yaml_multi_constructors[None] - elif None in self.yaml_constructors: - constructor = self.yaml_constructors[None] - elif isinstance(node, ScalarNode): - constructor = self.__class__.construct_scalar - elif isinstance(node, SequenceNode): - constructor = self.__class__.construct_sequence - elif isinstance(node, MappingNode): - constructor = self.__class__.construct_mapping - if tag_suffix is None: - data = constructor(self, node) - else: - data = constructor(self, tag_suffix, node) - if isinstance(data, types.GeneratorType): - generator = data - data = next(generator) - if self.deep_construct: - for dummy in generator: - pass - else: - self.state_generators.append(generator) - self.constructed_objects[node] = data - del self.recursive_objects[node] - if deep: - self.deep_construct = old_deep - return data - - def construct_scalar(self, node): - if not isinstance(node, ScalarNode): - raise ConstructorError( - None, None, - "expected a scalar node, but found %s" % node.id, - node.start_mark) - return node.value - - def construct_sequence(self, node, deep=False): - if not isinstance(node, SequenceNode): - raise ConstructorError( - None, None, - "expected a sequence node, but found %s" % node.id, - node.start_mark) - return [self.construct_object(child, deep=deep) - for child in node.value] - - def construct_mapping(self, node, deep=False): - if not isinstance(node, MappingNode): - raise ConstructorError( - None, None, - "expected a mapping node, but found %s" % node.id, - node.start_mark) - mapping = {} - for key_node, value_node in node.value: - # keys can be list -> deep - key = self.construct_object(key_node, deep=True) - # lists are not hashable, but tuples are - if not isinstance(key, collections.Hashable): - if isinstance(key, list): - key = tuple(key) - if PY2: - try: - hash(key) - except TypeError as exc: - raise ConstructorError( - "while constructing a mapping", node.start_mark, - "found unacceptable key (%s)" % - exc, key_node.start_mark) - else: - if not isinstance(key, collections.Hashable): - raise ConstructorError( - "while constructing a mapping", node.start_mark, - "found unhashable key", key_node.start_mark) - - value = self.construct_object(value_node, deep=deep) - mapping[key] = value - return mapping - - def construct_pairs(self, node, deep=False): - if not isinstance(node, MappingNode): - raise ConstructorError( - None, None, - "expected a mapping node, but found %s" % node.id, - node.start_mark) - pairs = [] - for key_node, value_node in node.value: - key = self.construct_object(key_node, deep=deep) - value = self.construct_object(value_node, deep=deep) - pairs.append((key, value)) - return pairs - - @classmethod - def add_constructor(cls, tag, constructor): - if 'yaml_constructors' not in cls.__dict__: - cls.yaml_constructors = cls.yaml_constructors.copy() - cls.yaml_constructors[tag] = constructor - - @classmethod - def add_multi_constructor(cls, tag_prefix, multi_constructor): - if 'yaml_multi_constructors' not in cls.__dict__: - cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() - cls.yaml_multi_constructors[tag_prefix] = multi_constructor - - -class SafeConstructor(BaseConstructor): - def construct_scalar(self, node): - if isinstance(node, MappingNode): - for key_node, value_node in node.value: - if key_node.tag == u'tag:yaml.org,2002:value': - return self.construct_scalar(value_node) - return BaseConstructor.construct_scalar(self, node) - - def flatten_mapping(self, node): - """ - This implements the merge key feature http://yaml.org/type/merge.html - by inserting keys from the merge dict/list of dicts if not yet - available in this node - """ - merge = [] - index = 0 - while index < len(node.value): - key_node, value_node = node.value[index] - if key_node.tag == u'tag:yaml.org,2002:merge': - del node.value[index] - if isinstance(value_node, MappingNode): - self.flatten_mapping(value_node) - merge.extend(value_node.value) - elif isinstance(value_node, SequenceNode): - submerge = [] - for subnode in value_node.value: - if not isinstance(subnode, MappingNode): - raise ConstructorError( - "while constructing a mapping", - node.start_mark, - "expected a mapping for merging, but found %s" - % subnode.id, subnode.start_mark) - self.flatten_mapping(subnode) - submerge.append(subnode.value) - submerge.reverse() - for value in submerge: - merge.extend(value) - else: - raise ConstructorError( - "while constructing a mapping", node.start_mark, - "expected a mapping or list of mappings for merging, " - "but found %s" - % value_node.id, value_node.start_mark) - elif key_node.tag == u'tag:yaml.org,2002:value': - key_node.tag = u'tag:yaml.org,2002:str' - index += 1 - else: - index += 1 - if merge: - node.value = merge + node.value - - def construct_mapping(self, node, deep=False): - if isinstance(node, MappingNode): - self.flatten_mapping(node) - return BaseConstructor.construct_mapping(self, node, deep=deep) - - def construct_yaml_null(self, node): - self.construct_scalar(node) - return None - - # YAML 1.2 spec doesn't mention yes/no etc any more, 1.1 does - bool_values = { - u'yes': True, - u'no': False, - u'true': True, - u'false': False, - u'on': True, - u'off': False, - } - - def construct_yaml_bool(self, node): - value = self.construct_scalar(node) - return self.bool_values[value.lower()] - - def construct_yaml_int(self, node): - value = to_str(self.construct_scalar(node)) - value = value.replace('_', '') - sign = +1 - if value[0] == '-': - sign = -1 - if value[0] in '+-': - value = value[1:] - if value == '0': - return 0 - elif value.startswith('0b'): - return sign*int(value[2:], 2) - elif value.startswith('0x'): - return sign*int(value[2:], 16) - elif value.startswith('0o'): - return sign*int(value[2:], 8) - elif value[0] == '0': - return sign*int(value, 8) - elif ':' in value: - digits = [int(part) for part in value.split(':')] - digits.reverse() - base = 1 - value = 0 - for digit in digits: - value += digit*base - base *= 60 - return sign*value - else: - return sign*int(value) - - inf_value = 1e300 - while inf_value != inf_value*inf_value: - inf_value *= inf_value - nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99). - - def construct_yaml_float(self, node): - value = to_str(self.construct_scalar(node)) - value = value.replace('_', '').lower() - sign = +1 - if value[0] == '-': - sign = -1 - if value[0] in '+-': - value = value[1:] - if value == '.inf': - return sign*self.inf_value - elif value == '.nan': - return self.nan_value - elif ':' in value: - digits = [float(part) for part in value.split(':')] - digits.reverse() - base = 1 - value = 0.0 - for digit in digits: - value += digit*base - base *= 60 - return sign*value - else: - return sign*float(value) - - if PY3: - def construct_yaml_binary(self, node): - try: - value = self.construct_scalar(node).encode('ascii') - except UnicodeEncodeError as exc: - raise ConstructorError( - None, None, - "failed to convert base64 data into ascii: %s" % exc, - node.start_mark) - try: - if hasattr(base64, 'decodebytes'): - return base64.decodebytes(value) - else: - return base64.decodestring(value) - except binascii.Error as exc: - raise ConstructorError( - None, None, - "failed to decode base64 data: %s" % exc, node.start_mark) - else: - def construct_yaml_binary(self, node): - value = self.construct_scalar(node) - try: - return to_str(value).decode('base64') - except (binascii.Error, UnicodeEncodeError) as exc: - raise ConstructorError( - None, None, - "failed to decode base64 data: %s" % exc, node.start_mark) - - timestamp_regexp = re.compile( - u'''^(?P[0-9][0-9][0-9][0-9]) - -(?P[0-9][0-9]?) - -(?P[0-9][0-9]?) - (?:(?:[Tt]|[ \\t]+) - (?P[0-9][0-9]?) - :(?P[0-9][0-9]) - :(?P[0-9][0-9]) - (?:\\.(?P[0-9]*))? - (?:[ \\t]*(?PZ|(?P[-+])(?P[0-9][0-9]?) - (?::(?P[0-9][0-9]))?))?)?$''', re.X) - - def construct_yaml_timestamp(self, node): - value = self.construct_scalar(node) - match = self.timestamp_regexp.match(node.value) - values = match.groupdict() - year = int(values['year']) - month = int(values['month']) - day = int(values['day']) - if not values['hour']: - return datetime.date(year, month, day) - hour = int(values['hour']) - minute = int(values['minute']) - second = int(values['second']) - fraction = 0 - if values['fraction']: - fraction = values['fraction'][:6] - while len(fraction) < 6: - fraction += '0' - fraction = int(fraction) - delta = None - if values['tz_sign']: - tz_hour = int(values['tz_hour']) - tz_minute = int(values['tz_minute'] or 0) - delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute) - if values['tz_sign'] == '-': - delta = -delta - data = datetime.datetime(year, month, day, hour, minute, second, - fraction) - if delta: - data -= delta - return data - - def construct_yaml_omap(self, node): - # Note: we do now check for duplicate keys - omap = ordereddict() - yield omap - if not isinstance(node, SequenceNode): - raise ConstructorError( - "while constructing an ordered map", node.start_mark, - "expected a sequence, but found %s" % node.id, node.start_mark) - for subnode in node.value: - if not isinstance(subnode, MappingNode): - raise ConstructorError( - "while constructing an ordered map", node.start_mark, - "expected a mapping of length 1, but found %s" % - subnode.id, - subnode.start_mark) - if len(subnode.value) != 1: - raise ConstructorError( - "while constructing an ordered map", node.start_mark, - "expected a single mapping item, but found %d items" % - len(subnode.value), - subnode.start_mark) - key_node, value_node = subnode.value[0] - key = self.construct_object(key_node) - assert key not in omap - value = self.construct_object(value_node) - omap[key] = value - - def construct_yaml_pairs(self, node): - # Note: the same code as `construct_yaml_omap`. - pairs = [] - yield pairs - if not isinstance(node, SequenceNode): - raise ConstructorError( - "while constructing pairs", node.start_mark, - "expected a sequence, but found %s" % node.id, node.start_mark) - for subnode in node.value: - if not isinstance(subnode, MappingNode): - raise ConstructorError( - "while constructing pairs", node.start_mark, - "expected a mapping of length 1, but found %s" % - subnode.id, - subnode.start_mark) - if len(subnode.value) != 1: - raise ConstructorError( - "while constructing pairs", node.start_mark, - "expected a single mapping item, but found %d items" % - len(subnode.value), - subnode.start_mark) - key_node, value_node = subnode.value[0] - key = self.construct_object(key_node) - value = self.construct_object(value_node) - pairs.append((key, value)) - - def construct_yaml_set(self, node): - data = set() - yield data - value = self.construct_mapping(node) - data.update(value) - - def construct_yaml_str(self, node): - value = self.construct_scalar(node) - if PY3: - return value - try: - return value.encode('ascii') - except UnicodeEncodeError: - return value - - def construct_yaml_seq(self, node): - data = [] - yield data - data.extend(self.construct_sequence(node)) - - def construct_yaml_map(self, node): - data = {} - yield data - value = self.construct_mapping(node) - data.update(value) - - def construct_yaml_object(self, node, cls): - data = cls.__new__(cls) - yield data - if hasattr(data, '__setstate__'): - state = self.construct_mapping(node, deep=True) - data.__setstate__(state) - else: - state = self.construct_mapping(node) - data.__dict__.update(state) - - def construct_undefined(self, node): - raise ConstructorError( - None, None, - "could not determine a constructor for the tag %r" % - utf8(node.tag), - node.start_mark) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:null', - SafeConstructor.construct_yaml_null) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:bool', - SafeConstructor.construct_yaml_bool) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:int', - SafeConstructor.construct_yaml_int) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:float', - SafeConstructor.construct_yaml_float) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:binary', - SafeConstructor.construct_yaml_binary) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:timestamp', - SafeConstructor.construct_yaml_timestamp) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:omap', - SafeConstructor.construct_yaml_omap) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:pairs', - SafeConstructor.construct_yaml_pairs) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:set', - SafeConstructor.construct_yaml_set) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:str', - SafeConstructor.construct_yaml_str) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:seq', - SafeConstructor.construct_yaml_seq) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:map', - SafeConstructor.construct_yaml_map) - -SafeConstructor.add_constructor( - None, SafeConstructor.construct_undefined) - - -class Constructor(SafeConstructor): - - def construct_python_str(self, node): - return utf8(self.construct_scalar(node)) - - def construct_python_unicode(self, node): - return self.construct_scalar(node) - - if PY3: - def construct_python_bytes(self, node): - try: - value = self.construct_scalar(node).encode('ascii') - except UnicodeEncodeError as exc: - raise ConstructorError( - None, None, - "failed to convert base64 data into ascii: %s" % exc, - node.start_mark) - try: - if hasattr(base64, 'decodebytes'): - return base64.decodebytes(value) - else: - return base64.decodestring(value) - except binascii.Error as exc: - raise ConstructorError( - None, None, - "failed to decode base64 data: %s" % exc, node.start_mark) - - def construct_python_long(self, node): - val = self.construct_yaml_int(node) - if PY3: - return val - return int(val) - - def construct_python_complex(self, node): - return complex(self.construct_scalar(node)) - - def construct_python_tuple(self, node): - return tuple(self.construct_sequence(node)) - - def find_python_module(self, name, mark): - if not name: - raise ConstructorError( - "while constructing a Python module", mark, - "expected non-empty name appended to the tag", mark) - try: - __import__(name) - except ImportError as exc: - raise ConstructorError( - "while constructing a Python module", mark, - "cannot find module %r (%s)" % (utf8(name), exc), mark) - return sys.modules[name] - - def find_python_name(self, name, mark): - if not name: - raise ConstructorError( - "while constructing a Python object", mark, - "expected non-empty name appended to the tag", mark) - if u'.' in name: - module_name, object_name = name.rsplit('.', 1) - else: - module_name = builtins_module - object_name = name - try: - __import__(module_name) - except ImportError as exc: - raise ConstructorError( - "while constructing a Python object", mark, - "cannot find module %r (%s)" % (utf8(module_name), exc), mark) - module = sys.modules[module_name] - if not hasattr(module, object_name): - raise ConstructorError( - "while constructing a Python object", mark, - "cannot find %r in the module %r" % (utf8(object_name), - module.__name__), mark) - return getattr(module, object_name) - - def construct_python_name(self, suffix, node): - value = self.construct_scalar(node) - if value: - raise ConstructorError( - "while constructing a Python name", node.start_mark, - "expected the empty value, but found %r" % utf8(value), - node.start_mark) - return self.find_python_name(suffix, node.start_mark) - - def construct_python_module(self, suffix, node): - value = self.construct_scalar(node) - if value: - raise ConstructorError( - "while constructing a Python module", node.start_mark, - "expected the empty value, but found %r" % utf8(value), - node.start_mark) - return self.find_python_module(suffix, node.start_mark) - - if PY2: - class classobj: - pass - - def make_python_instance(self, suffix, node, - args=None, kwds=None, newobj=False): - if not args: - args = [] - if not kwds: - kwds = {} - cls = self.find_python_name(suffix, node.start_mark) - if PY3: - if newobj and isinstance(cls, type): - return cls.__new__(cls, *args, **kwds) - else: - return cls(*args, **kwds) - else: - if newobj and isinstance(cls, type(self.classobj)) \ - and not args and not kwds: - instance = self.classobj() - instance.__class__ = cls - return instance - elif newobj and isinstance(cls, type): - return cls.__new__(cls, *args, **kwds) - else: - return cls(*args, **kwds) - - def set_python_instance_state(self, instance, state): - if hasattr(instance, '__setstate__'): - instance.__setstate__(state) - else: - slotstate = {} - if isinstance(state, tuple) and len(state) == 2: - state, slotstate = state - if hasattr(instance, '__dict__'): - instance.__dict__.update(state) - elif state: - slotstate.update(state) - for key, value in slotstate.items(): - setattr(object, key, value) - - def construct_python_object(self, suffix, node): - # Format: - # !!python/object:module.name { ... state ... } - instance = self.make_python_instance(suffix, node, newobj=True) - yield instance - deep = hasattr(instance, '__setstate__') - state = self.construct_mapping(node, deep=deep) - self.set_python_instance_state(instance, state) - - def construct_python_object_apply(self, suffix, node, newobj=False): - # Format: - # !!python/object/apply # (or !!python/object/new) - # args: [ ... arguments ... ] - # kwds: { ... keywords ... } - # state: ... state ... - # listitems: [ ... listitems ... ] - # dictitems: { ... dictitems ... } - # or short format: - # !!python/object/apply [ ... arguments ... ] - # The difference between !!python/object/apply and !!python/object/new - # is how an object is created, check make_python_instance for details. - if isinstance(node, SequenceNode): - args = self.construct_sequence(node, deep=True) - kwds = {} - state = {} - listitems = [] - dictitems = {} - else: - value = self.construct_mapping(node, deep=True) - args = value.get('args', []) - kwds = value.get('kwds', {}) - state = value.get('state', {}) - listitems = value.get('listitems', []) - dictitems = value.get('dictitems', {}) - instance = self.make_python_instance(suffix, node, args, kwds, newobj) - if state: - self.set_python_instance_state(instance, state) - if listitems: - instance.extend(listitems) - if dictitems: - for key in dictitems: - instance[key] = dictitems[key] - return instance - - def construct_python_object_new(self, suffix, node): - return self.construct_python_object_apply(suffix, node, newobj=True) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/none', - Constructor.construct_yaml_null) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/bool', - Constructor.construct_yaml_bool) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/str', - Constructor.construct_python_str) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/unicode', - Constructor.construct_python_unicode) - -if PY3: - Constructor.add_constructor( - u'tag:yaml.org,2002:python/bytes', - Constructor.construct_python_bytes) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/int', - Constructor.construct_yaml_int) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/long', - Constructor.construct_python_long) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/float', - Constructor.construct_yaml_float) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/complex', - Constructor.construct_python_complex) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/list', - Constructor.construct_yaml_seq) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/tuple', - Constructor.construct_python_tuple) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/dict', - Constructor.construct_yaml_map) - -Constructor.add_multi_constructor( - u'tag:yaml.org,2002:python/name:', - Constructor.construct_python_name) - -Constructor.add_multi_constructor( - u'tag:yaml.org,2002:python/module:', - Constructor.construct_python_module) - -Constructor.add_multi_constructor( - u'tag:yaml.org,2002:python/object:', - Constructor.construct_python_object) - -Constructor.add_multi_constructor( - u'tag:yaml.org,2002:python/object/apply:', - Constructor.construct_python_object_apply) - -Constructor.add_multi_constructor( - u'tag:yaml.org,2002:python/object/new:', - Constructor.construct_python_object_new) - - -class RoundTripConstructor(SafeConstructor): - """need to store the comments on the node itself, - as well as on the items - """ - - def construct_scalar(self, node): - if not isinstance(node, ScalarNode): - raise ConstructorError( - None, None, - "expected a scalar node, but found %s" % node.id, - node.start_mark) - - if node.style == '|' and isinstance(node.value, text_type): - return PreservedScalarString(node.value) - return node.value - - def construct_yaml_str(self, node): - value = self.construct_scalar(node) - if isinstance(value, ScalarString): - return value - if PY3: - return value - try: - return value.encode('ascii') - except AttributeError: - # in case you replace the node dynamically e.g. with a dict - return value - except UnicodeEncodeError: - return value - - def construct_sequence(self, node, seqtyp, deep=False): - if not isinstance(node, SequenceNode): - raise ConstructorError( - None, None, - "expected a sequence node, but found %s" % node.id, - node.start_mark) - ret_val = [] - if node.comment: - seqtyp._yaml_add_comment(node.comment[:2]) - if len(node.comment) > 2: - seqtyp.yaml_end_comment_extend(node.comment[2], clear=True) - if node.anchor: - from ruamel.yaml.serializer import templated_id - if not templated_id(node.anchor): - seqtyp.yaml_set_anchor(node.anchor) - for idx, child in enumerate(node.value): - ret_val.append(self.construct_object(child, deep=deep)) - if child.comment: - seqtyp._yaml_add_comment(child.comment, key=idx) - return ret_val - - def flatten_mapping(self, node): - """ - This implements the merge key feature http://yaml.org/type/merge.html - by inserting keys from the merge dict/list of dicts if not yet - available in this node - """ - - def constructed(value_node): - # If the contents of a merge are defined within the - # merge marker, then they won't have been constructed - # yet. But if they were already constructed, we need to use - # the existing object. - if value_node in self.constructed_objects: - value = self.constructed_objects[value_node] - else: - value = self.construct_object(value_node, deep=False) - return value - - #merge = [] - merge_map_list = [] - index = 0 - while index < len(node.value): - key_node, value_node = node.value[index] - if key_node.tag == u'tag:yaml.org,2002:merge': - del node.value[index] - if isinstance(value_node, MappingNode): - merge_map_list.append( - (index, constructed(value_node))) - #self.flatten_mapping(value_node) - #merge.extend(value_node.value) - elif isinstance(value_node, SequenceNode): - #submerge = [] - for subnode in value_node.value: - if not isinstance(subnode, MappingNode): - raise ConstructorError( - "while constructing a mapping", - node.start_mark, - "expected a mapping for merging, but found %s" - % subnode.id, subnode.start_mark) - merge_map_list.append( - (index, constructed(subnode))) - # self.flatten_mapping(subnode) - # submerge.append(subnode.value) - #submerge.reverse() - #for value in submerge: - # merge.extend(value) - else: - raise ConstructorError( - "while constructing a mapping", node.start_mark, - "expected a mapping or list of mappings for merging, " - "but found %s" - % value_node.id, value_node.start_mark) - elif key_node.tag == u'tag:yaml.org,2002:value': - key_node.tag = u'tag:yaml.org,2002:str' - index += 1 - else: - index += 1 - #print ('merge_map_list', merge_map_list) - return merge_map_list - #if merge: - # node.value = merge + node.value - - def construct_mapping(self, node, maptyp, deep=False): - if not isinstance(node, MappingNode): - raise ConstructorError( - None, None, - "expected a mapping node, but found %s" % node.id, - node.start_mark) - if isinstance(node, MappingNode): - merge_map = self.flatten_mapping(node) - if merge_map: - maptyp.add_yaml_merge(merge_map) - # mapping = {} - if node.comment: - maptyp._yaml_add_comment(node.comment[:2]) - if len(node.comment) > 2: - maptyp.yaml_end_comment_extend(node.comment[2], clear=True) - if node.anchor: - from ruamel.yaml.serializer import templated_id - if not templated_id(node.anchor): - maptyp.yaml_set_anchor(node.anchor) - for key_node, value_node in node.value: - # keys can be list -> deep - key = self.construct_object(key_node, deep=True) - # lists are not hashable, but tuples are - if not isinstance(key, collections.Hashable): - if isinstance(key, list): - key = tuple(key) - if PY2: - try: - hash(key) - except TypeError as exc: - raise ConstructorError( - "while constructing a mapping", node.start_mark, - "found unacceptable key (%s)" % - exc, key_node.start_mark) - else: - if not isinstance(key, collections.Hashable): - raise ConstructorError( - "while constructing a mapping", node.start_mark, - "found unhashable key", key_node.start_mark) - value = self.construct_object(value_node, deep=deep) - if key_node.comment: - maptyp._yaml_add_comment(key_node.comment, key=key) - if value_node.comment: - maptyp._yaml_add_comment(value_node.comment, value=key) - maptyp[key] = value - - def construct_setting(self, node, typ, deep=False): - if not isinstance(node, MappingNode): - raise ConstructorError( - None, None, - "expected a mapping node, but found %s" % node.id, - node.start_mark) - if node.comment: - typ._yaml_add_comment(node.comment[:2]) - if len(node.comment) > 2: - typ.yaml_end_comment_extend(node.comment[2], clear=True) - if node.anchor: - from ruamel.yaml.serializer import templated_id - if not templated_id(node.anchor): - typ.yaml_set_anchor(node.anchor) - for key_node, value_node in node.value: - # keys can be list -> deep - key = self.construct_object(key_node, deep=True) - # lists are not hashable, but tuples are - if not isinstance(key, collections.Hashable): - if isinstance(key, list): - key = tuple(key) - if PY2: - try: - hash(key) - except TypeError as exc: - raise ConstructorError( - "while constructing a mapping", node.start_mark, - "found unacceptable key (%s)" % - exc, key_node.start_mark) - else: - if not isinstance(key, collections.Hashable): - raise ConstructorError( - "while constructing a mapping", node.start_mark, - "found unhashable key", key_node.start_mark) - value = self.construct_object(value_node, deep=deep) - if key_node.comment: - typ._yaml_add_comment(key_node.comment, key=key) - if value_node.comment: - typ._yaml_add_comment(value_node.comment, value=key) - typ.add(key) - - def construct_yaml_seq(self, node): - data = CommentedSeq() - data._yaml_set_line_col(node.start_mark.line, node.start_mark.column) - if node.flow_style is True: - data.fa.set_flow_style() - elif node.flow_style is False: - data.fa.set_block_style() - if node.comment: - data._yaml_add_comment(node.comment) - yield data - data.extend(self.construct_sequence(node, data)) - - def construct_yaml_map(self, node): - data = CommentedMap() - data._yaml_set_line_col(node.start_mark.line, node.start_mark.column) - if node.flow_style is True: - data.fa.set_flow_style() - elif node.flow_style is False: - data.fa.set_block_style() - yield data - self.construct_mapping(node, data) - - def construct_yaml_omap(self, node): - # Note: we do now check for duplicate keys - omap = CommentedOrderedMap() - omap._yaml_set_line_col(node.start_mark.line, node.start_mark.column) - if node.flow_style is True: - omap.fa.set_flow_style() - elif node.flow_style is False: - omap.fa.set_block_style() - yield omap - if node.comment: - omap._yaml_add_comment(node.comment[:2]) - if len(node.comment) > 2: - omap.yaml_end_comment_extend(node.comment[2], clear=True) - if not isinstance(node, SequenceNode): - raise ConstructorError( - "while constructing an ordered map", node.start_mark, - "expected a sequence, but found %s" % node.id, node.start_mark) - for subnode in node.value: - if not isinstance(subnode, MappingNode): - raise ConstructorError( - "while constructing an ordered map", node.start_mark, - "expected a mapping of length 1, but found %s" % - subnode.id, - subnode.start_mark) - if len(subnode.value) != 1: - raise ConstructorError( - "while constructing an ordered map", node.start_mark, - "expected a single mapping item, but found %d items" % - len(subnode.value), - subnode.start_mark) - key_node, value_node = subnode.value[0] - key = self.construct_object(key_node) - assert key not in omap - value = self.construct_object(value_node) - if key_node.comment: - omap._yaml_add_comment(key_node.comment, key=key) - if subnode.comment: - omap._yaml_add_comment(subnode.comment, key=key) - if value_node.comment: - omap._yaml_add_comment(value_node.comment, value=key) - omap[key] = value - - def construct_yaml_set(self, node): - data = CommentedSet() - data._yaml_set_line_col(node.start_mark.line, node.start_mark.column) - yield data - self.construct_setting(node, data) - -RoundTripConstructor.add_constructor( - u'tag:yaml.org,2002:null', - RoundTripConstructor.construct_yaml_null) - -RoundTripConstructor.add_constructor( - u'tag:yaml.org,2002:bool', - RoundTripConstructor.construct_yaml_bool) - -RoundTripConstructor.add_constructor( - u'tag:yaml.org,2002:int', - RoundTripConstructor.construct_yaml_int) - -RoundTripConstructor.add_constructor( - u'tag:yaml.org,2002:float', - RoundTripConstructor.construct_yaml_float) - -RoundTripConstructor.add_constructor( - u'tag:yaml.org,2002:binary', - RoundTripConstructor.construct_yaml_binary) - -RoundTripConstructor.add_constructor( - u'tag:yaml.org,2002:timestamp', - RoundTripConstructor.construct_yaml_timestamp) - -RoundTripConstructor.add_constructor( - u'tag:yaml.org,2002:omap', - RoundTripConstructor.construct_yaml_omap) - -RoundTripConstructor.add_constructor( - u'tag:yaml.org,2002:pairs', - RoundTripConstructor.construct_yaml_pairs) - -RoundTripConstructor.add_constructor( - u'tag:yaml.org,2002:set', - RoundTripConstructor.construct_yaml_set) - -RoundTripConstructor.add_constructor( - u'tag:yaml.org,2002:str', - RoundTripConstructor.construct_yaml_str) - -RoundTripConstructor.add_constructor( - u'tag:yaml.org,2002:seq', - RoundTripConstructor.construct_yaml_seq) - -RoundTripConstructor.add_constructor( - u'tag:yaml.org,2002:map', - RoundTripConstructor.construct_yaml_map) - -RoundTripConstructor.add_constructor( - None, RoundTripConstructor.construct_undefined) diff --git a/py/cyaml.py b/py/cyaml.py deleted file mode 100644 index 1c9036c..0000000 --- a/py/cyaml.py +++ /dev/null @@ -1,88 +0,0 @@ -from __future__ import absolute_import - -__all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader', - 'CBaseDumper', 'CSafeDumper', 'CDumper'] - -from _yaml import CParser, CEmitter - -from .constructor import * - -from .serializer import * -from .representer import * - -from .resolver import * - - -class CBaseLoader(CParser, BaseConstructor, BaseResolver): - def __init__(self, stream): - CParser.__init__(self, stream) - BaseConstructor.__init__(self) - BaseResolver.__init__(self) - - -class CSafeLoader(CParser, SafeConstructor, Resolver): - def __init__(self, stream): - CParser.__init__(self, stream) - SafeConstructor.__init__(self) - Resolver.__init__(self) - - -class CLoader(CParser, Constructor, Resolver): - def __init__(self, stream): - CParser.__init__(self, stream) - Constructor.__init__(self) - Resolver.__init__(self) - - -class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver): - def __init__(self, stream, - default_style=None, default_flow_style=None, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None): - CEmitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, encoding=encoding, - allow_unicode=allow_unicode, line_break=line_break, - explicit_start=explicit_start, - explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style) - Resolver.__init__(self) - - -class CSafeDumper(CEmitter, SafeRepresenter, Resolver): - def __init__(self, stream, - default_style=None, default_flow_style=None, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None): - CEmitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, encoding=encoding, - allow_unicode=allow_unicode, line_break=line_break, - explicit_start=explicit_start, - explicit_end=explicit_end, - version=version, tags=tags) - SafeRepresenter.__init__(self, default_style=default_style, - default_flow_style=default_flow_style) - Resolver.__init__(self) - - -class CDumper(CEmitter, Serializer, Representer, Resolver): - def __init__(self, stream, - default_style=None, default_flow_style=None, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None): - CEmitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, encoding=encoding, - allow_unicode=allow_unicode, line_break=line_break, - explicit_start=explicit_start, - explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style) - Resolver.__init__(self) diff --git a/py/dumper.py b/py/dumper.py deleted file mode 100644 index f83228a..0000000 --- a/py/dumper.py +++ /dev/null @@ -1,84 +0,0 @@ -from __future__ import absolute_import - -__all__ = ['BaseDumper', 'SafeDumper', 'Dumper', 'RoundTripDumper'] - -from .emitter import * -from .serializer import * -from .representer import * -from .resolver import * - - -class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver): - def __init__(self, stream, - default_style=None, default_flow_style=None, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None): - Emitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - Serializer.__init__(self, encoding=encoding, - explicit_start=explicit_start, - explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style) - Resolver.__init__(self) - - -class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver): - def __init__(self, stream, - default_style=None, default_flow_style=None, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None): - Emitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - Serializer.__init__(self, encoding=encoding, - explicit_start=explicit_start, - explicit_end=explicit_end, - version=version, tags=tags) - SafeRepresenter.__init__(self, default_style=default_style, - default_flow_style=default_flow_style) - Resolver.__init__(self) - - -class Dumper(Emitter, Serializer, Representer, Resolver): - def __init__(self, stream, - default_style=None, default_flow_style=None, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None): - Emitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - Serializer.__init__(self, encoding=encoding, - explicit_start=explicit_start, - explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style) - Resolver.__init__(self) - - -class RoundTripDumper(Emitter, Serializer, RoundTripRepresenter, Resolver): - def __init__(self, stream, - default_style=None, default_flow_style=None, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None): - Emitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - Serializer.__init__(self, encoding=encoding, - explicit_start=explicit_start, - explicit_end=explicit_end, - version=version, tags=tags) - RoundTripRepresenter.__init__(self, default_style=default_style, - default_flow_style=default_flow_style) - Resolver.__init__(self) diff --git a/py/emitter.py b/py/emitter.py deleted file mode 100644 index 2e5ebdc..0000000 --- a/py/emitter.py +++ /dev/null @@ -1,1237 +0,0 @@ -from __future__ import absolute_import -from __future__ import print_function - -# Emitter expects events obeying the following grammar: -# stream ::= STREAM-START document* STREAM-END -# document ::= DOCUMENT-START node DOCUMENT-END -# node ::= SCALAR | sequence | mapping -# sequence ::= SEQUENCE-START node* SEQUENCE-END -# mapping ::= MAPPING-START (node node)* MAPPING-END - -__all__ = ['Emitter', 'EmitterError'] - -from .error import YAMLError -from .events import * -from .compat import utf8, text_type, PY2, nprint, dbg, DBG_EVENT - - -class EmitterError(YAMLError): - pass - - -class ScalarAnalysis(object): - def __init__(self, scalar, empty, multiline, - allow_flow_plain, allow_block_plain, - allow_single_quoted, allow_double_quoted, - allow_block): - self.scalar = scalar - self.empty = empty - self.multiline = multiline - self.allow_flow_plain = allow_flow_plain - self.allow_block_plain = allow_block_plain - self.allow_single_quoted = allow_single_quoted - self.allow_double_quoted = allow_double_quoted - self.allow_block = allow_block - - -class Emitter(object): - DEFAULT_TAG_PREFIXES = { - u'!': u'!', - u'tag:yaml.org,2002:': u'!!', - } - - MAX_SIMPLE_KEY_LENGTH = 128 - - def __init__(self, stream, canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None): - - # The stream should have the methods `write` and possibly `flush`. - self.stream = stream - - # Encoding can be overriden by STREAM-START. - self.encoding = None - - # Emitter is a state machine with a stack of states to handle nested - # structures. - self.states = [] - self.state = self.expect_stream_start - - # Current event and the event queue. - self.events = [] - self.event = None - - # The current indentation level and the stack of previous indents. - self.indents = [] - self.indent = None - - # Flow level. - self.flow_level = 0 - - # Contexts. - self.root_context = False - self.sequence_context = False - self.mapping_context = False - self.simple_key_context = False - - # Characteristics of the last emitted character: - # - current position. - # - is it a whitespace? - # - is it an indention character - # (indentation space, '-', '?', or ':')? - self.line = 0 - self.column = 0 - self.whitespace = True - self.indention = True - - # Whether the document requires an explicit document indicator - self.open_ended = False - - # Formatting details. - self.canonical = canonical - self.allow_unicode = allow_unicode - self.best_indent = 2 - if indent and 1 < indent < 10: - self.best_indent = indent - self.best_width = 80 - if width and width > self.best_indent*2: - self.best_width = width - self.best_line_break = u'\n' - if line_break in [u'\r', u'\n', u'\r\n']: - self.best_line_break = line_break - - # Tag prefixes. - self.tag_prefixes = None - - # Prepared anchor and tag. - self.prepared_anchor = None - self.prepared_tag = None - - # Scalar analysis and style. - self.analysis = None - self.style = None - - def dispose(self): - # Reset the state attributes (to clear self-references) - self.states = [] - self.state = None - - def emit(self, event): - if dbg(DBG_EVENT): - nprint(event) - self.events.append(event) - while not self.need_more_events(): - self.event = self.events.pop(0) - self.state() - self.event = None - - # In some cases, we wait for a few next events before emitting. - - def need_more_events(self): - if not self.events: - return True - event = self.events[0] - if isinstance(event, DocumentStartEvent): - return self.need_events(1) - elif isinstance(event, SequenceStartEvent): - return self.need_events(2) - elif isinstance(event, MappingStartEvent): - return self.need_events(3) - else: - return False - - def need_events(self, count): - level = 0 - for event in self.events[1:]: - if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): - level += 1 - elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): - level -= 1 - elif isinstance(event, StreamEndEvent): - level = -1 - if level < 0: - return False - return (len(self.events) < count+1) - - def increase_indent(self, flow=False, indentless=False): - self.indents.append(self.indent) - if self.indent is None: - if flow: - self.indent = self.best_indent - else: - self.indent = 0 - elif not indentless: - self.indent += self.best_indent - - # States. - - # Stream handlers. - - def expect_stream_start(self): - if isinstance(self.event, StreamStartEvent): - if PY2: - if self.event.encoding \ - and not getattr(self.stream, 'encoding', None): - self.encoding = self.event.encoding - else: - if self.event.encoding \ - and not hasattr(self.stream, 'encoding'): - self.encoding = self.event.encoding - self.write_stream_start() - self.state = self.expect_first_document_start - else: - raise EmitterError("expected StreamStartEvent, but got %s" % - self.event) - - def expect_nothing(self): - raise EmitterError("expected nothing, but got %s" % self.event) - - # Document handlers. - - def expect_first_document_start(self): - return self.expect_document_start(first=True) - - def expect_document_start(self, first=False): - if isinstance(self.event, DocumentStartEvent): - if (self.event.version or self.event.tags) and self.open_ended: - self.write_indicator(u'...', True) - self.write_indent() - if self.event.version: - version_text = self.prepare_version(self.event.version) - self.write_version_directive(version_text) - self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() - if self.event.tags: - handles = sorted(self.event.tags.keys()) - for handle in handles: - prefix = self.event.tags[handle] - self.tag_prefixes[prefix] = handle - handle_text = self.prepare_tag_handle(handle) - prefix_text = self.prepare_tag_prefix(prefix) - self.write_tag_directive(handle_text, prefix_text) - implicit = (first and not self.event.explicit - and not self.canonical - and not self.event.version and not self.event.tags - and not self.check_empty_document()) - if not implicit: - self.write_indent() - self.write_indicator(u'---', True) - if self.canonical: - self.write_indent() - self.state = self.expect_document_root - elif isinstance(self.event, StreamEndEvent): - if self.open_ended: - self.write_indicator(u'...', True) - self.write_indent() - self.write_stream_end() - self.state = self.expect_nothing - else: - raise EmitterError("expected DocumentStartEvent, but got %s" % - self.event) - - def expect_document_end(self): - if isinstance(self.event, DocumentEndEvent): - self.write_indent() - if self.event.explicit: - self.write_indicator(u'...', True) - self.write_indent() - self.flush_stream() - self.state = self.expect_document_start - else: - raise EmitterError("expected DocumentEndEvent, but got %s" % - self.event) - - def expect_document_root(self): - self.states.append(self.expect_document_end) - self.expect_node(root=True) - - # Node handlers. - - def expect_node(self, root=False, sequence=False, mapping=False, - simple_key=False): - self.root_context = root - self.sequence_context = sequence - self.mapping_context = mapping - self.simple_key_context = simple_key - if isinstance(self.event, AliasEvent): - self.expect_alias() - elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): - self.process_anchor(u'&') - self.process_tag() - if isinstance(self.event, ScalarEvent): - self.expect_scalar() - elif isinstance(self.event, SequenceStartEvent): - if self.event.comment: - self.write_pre_comment(self.event) - if self.event.flow_style is False and self.event.comment: - self.write_post_comment(self.event) - # print('seq event', self.event) - if self.flow_level or self.canonical or self.event.flow_style \ - or self.check_empty_sequence(): - self.expect_flow_sequence() - else: - self.expect_block_sequence() - elif isinstance(self.event, MappingStartEvent): - if self.event.flow_style is False and self.event.comment: - self.write_post_comment(self.event) - if self.event.comment and self.event.comment[1]: - self.write_pre_comment(self.event) - if self.flow_level or self.canonical or self.event.flow_style \ - or self.check_empty_mapping(): - self.expect_flow_mapping() - else: - self.expect_block_mapping() - else: - raise EmitterError("expected NodeEvent, but got %s" % self.event) - - def expect_alias(self): - if self.event.anchor is None: - raise EmitterError("anchor is not specified for alias") - self.process_anchor(u'*') - self.state = self.states.pop() - - def expect_scalar(self): - self.increase_indent(flow=True) - self.process_scalar() - self.indent = self.indents.pop() - self.state = self.states.pop() - - # Flow sequence handlers. - - def expect_flow_sequence(self): - self.write_indicator(u'[', True, whitespace=True) - self.flow_level += 1 - self.increase_indent(flow=True) - self.state = self.expect_first_flow_sequence_item - - def expect_first_flow_sequence_item(self): - if isinstance(self.event, SequenceEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - self.write_indicator(u']', False) - self.state = self.states.pop() - else: - if self.canonical or self.column > self.best_width: - self.write_indent() - self.states.append(self.expect_flow_sequence_item) - self.expect_node(sequence=True) - - def expect_flow_sequence_item(self): - if isinstance(self.event, SequenceEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - if self.canonical: - self.write_indicator(u',', False) - self.write_indent() - self.write_indicator(u']', False) - if self.event.comment and self.event.comment[0]: - # eol comment on flow sequence - self.write_post_comment(self.event) - self.state = self.states.pop() - else: - self.write_indicator(u',', False) - if self.canonical or self.column > self.best_width: - self.write_indent() - self.states.append(self.expect_flow_sequence_item) - self.expect_node(sequence=True) - - # Flow mapping handlers. - - def expect_flow_mapping(self): - self.write_indicator(u'{', True, whitespace=True) - self.flow_level += 1 - self.increase_indent(flow=True) - self.state = self.expect_first_flow_mapping_key - - def expect_first_flow_mapping_key(self): - if isinstance(self.event, MappingEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - self.write_indicator(u'}', False) - # if self.event.comment and self.event.comment[0]: - # # eol comment on flow sequence - # self.write_post_comment(self.event) - self.state = self.states.pop() - else: - if self.canonical or self.column > self.best_width: - self.write_indent() - if not self.canonical and self.check_simple_key(): - self.states.append(self.expect_flow_mapping_simple_value) - self.expect_node(mapping=True, simple_key=True) - else: - self.write_indicator(u'?', True) - self.states.append(self.expect_flow_mapping_value) - self.expect_node(mapping=True) - - def expect_flow_mapping_key(self): - if isinstance(self.event, MappingEndEvent): - # if self.event.comment and self.event.comment[1]: - # self.write_pre_comment(self.event) - self.indent = self.indents.pop() - self.flow_level -= 1 - if self.canonical: - self.write_indicator(u',', False) - self.write_indent() - self.write_indicator(u'}', False) - if self.event.comment and self.event.comment[0]: - # eol comment on flow mapping - self.write_post_comment(self.event) - self.state = self.states.pop() - else: - self.write_indicator(u',', False) - if self.canonical or self.column > self.best_width: - self.write_indent() - if not self.canonical and self.check_simple_key(): - self.states.append(self.expect_flow_mapping_simple_value) - self.expect_node(mapping=True, simple_key=True) - else: - self.write_indicator(u'?', True) - self.states.append(self.expect_flow_mapping_value) - self.expect_node(mapping=True) - - def expect_flow_mapping_simple_value(self): - self.write_indicator(u':', False) - self.states.append(self.expect_flow_mapping_key) - self.expect_node(mapping=True) - - def expect_flow_mapping_value(self): - if self.canonical or self.column > self.best_width: - self.write_indent() - self.write_indicator(u':', True) - self.states.append(self.expect_flow_mapping_key) - self.expect_node(mapping=True) - - # Block sequence handlers. - - def expect_block_sequence(self): - indentless = (self.mapping_context and not self.indention) - self.increase_indent(flow=False, indentless=indentless) - self.state = self.expect_first_block_sequence_item - - def expect_first_block_sequence_item(self): - return self.expect_block_sequence_item(first=True) - - def expect_block_sequence_item(self, first=False): - if not first and isinstance(self.event, SequenceEndEvent): - if self.event.comment and self.event.comment[1]: - # final comments from a doc - self.write_pre_comment(self.event) - self.indent = self.indents.pop() - self.state = self.states.pop() - else: - self.write_indent() - if self.event.comment and self.event.comment[1]: - self.write_pre_comment(self.event) - self.write_indent() - self.write_indicator(u'-', True, indention=True) - self.states.append(self.expect_block_sequence_item) - self.expect_node(sequence=True) - - # Block mapping handlers. - - def expect_block_mapping(self): - self.increase_indent(flow=False) - self.state = self.expect_first_block_mapping_key - - def expect_first_block_mapping_key(self): - return self.expect_block_mapping_key(first=True) - - def expect_block_mapping_key(self, first=False): - if not first and isinstance(self.event, MappingEndEvent): - if self.event.comment and self.event.comment[1]: - # final comments from a doc - self.write_pre_comment(self.event) - self.indent = self.indents.pop() - self.state = self.states.pop() - else: - if self.event.comment and self.event.comment[1]: - # final comments from a doc - self.write_pre_comment(self.event) - self.write_indent() - if self.check_simple_key(): - if self.event.style == '?': - self.write_indicator(u'?', True, indention=True) - self.states.append(self.expect_block_mapping_simple_value) - self.expect_node(mapping=True, simple_key=True) - else: - self.write_indicator(u'?', True, indention=True) - self.states.append(self.expect_block_mapping_value) - self.expect_node(mapping=True) - - def expect_block_mapping_simple_value(self): - if getattr(self.event, 'style', None) != '?': - self.write_indicator(u':', False) - self.states.append(self.expect_block_mapping_key) - self.expect_node(mapping=True) - - def expect_block_mapping_value(self): - self.write_indent() - self.write_indicator(u':', True, indention=True) - self.states.append(self.expect_block_mapping_key) - self.expect_node(mapping=True) - - # Checkers. - - def check_empty_sequence(self): - return (isinstance(self.event, SequenceStartEvent) and self.events - and isinstance(self.events[0], SequenceEndEvent)) - - def check_empty_mapping(self): - return (isinstance(self.event, MappingStartEvent) and self.events - and isinstance(self.events[0], MappingEndEvent)) - - def check_empty_document(self): - if not isinstance(self.event, DocumentStartEvent) or not self.events: - return False - event = self.events[0] - return (isinstance(event, ScalarEvent) and event.anchor is None - and event.tag is None and event.implicit and - event.value == u'') - - def check_simple_key(self): - length = 0 - if isinstance(self.event, NodeEvent) and self.event.anchor is not None: - if self.prepared_anchor is None: - self.prepared_anchor = self.prepare_anchor(self.event.anchor) - length += len(self.prepared_anchor) - if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ - and self.event.tag is not None: - if self.prepared_tag is None: - self.prepared_tag = self.prepare_tag(self.event.tag) - length += len(self.prepared_tag) - if isinstance(self.event, ScalarEvent): - if self.analysis is None: - self.analysis = self.analyze_scalar(self.event.value) - length += len(self.analysis.scalar) - return (length < self.MAX_SIMPLE_KEY_LENGTH and ( - isinstance(self.event, AliasEvent) - or (isinstance(self.event, ScalarEvent) - and not self.analysis.empty and not self.analysis.multiline) - or self.check_empty_sequence() or self.check_empty_mapping())) - - # Anchor, Tag, and Scalar processors. - - def process_anchor(self, indicator): - if self.event.anchor is None: - self.prepared_anchor = None - return - if self.prepared_anchor is None: - self.prepared_anchor = self.prepare_anchor(self.event.anchor) - if self.prepared_anchor: - self.write_indicator(indicator+self.prepared_anchor, True) - self.prepared_anchor = None - - def process_tag(self): - tag = self.event.tag - if isinstance(self.event, ScalarEvent): - if self.style is None: - self.style = self.choose_scalar_style() - if ((not self.canonical or tag is None) and - ((self.style == '' and self.event.implicit[0]) - or (self.style != '' and self.event.implicit[1]))): - self.prepared_tag = None - return - if self.event.implicit[0] and tag is None: - tag = u'!' - self.prepared_tag = None - else: - if (not self.canonical or tag is None) and self.event.implicit: - self.prepared_tag = None - return - if tag is None: - raise EmitterError("tag is not specified") - if self.prepared_tag is None: - self.prepared_tag = self.prepare_tag(tag) - if self.prepared_tag: - self.write_indicator(self.prepared_tag, True) - self.prepared_tag = None - - def choose_scalar_style(self): - if self.analysis is None: - self.analysis = self.analyze_scalar(self.event.value) - if self.event.style == '"' or self.canonical: - return '"' - if (not self.event.style or self.event.style == '?') and \ - self.event.implicit[0]: - if (not (self.simple_key_context and - (self.analysis.empty or self.analysis.multiline)) - and (self.flow_level and self.analysis.allow_flow_plain - or (not self.flow_level and - self.analysis.allow_block_plain))): - return '' - if self.event.style and self.event.style in '|>': - if (not self.flow_level and not self.simple_key_context - and self.analysis.allow_block): - return self.event.style - if not self.event.style or self.event.style == '\'': - if (self.analysis.allow_single_quoted and - not (self.simple_key_context and self.analysis.multiline)): - return '\'' - return '"' - - def process_scalar(self): - if self.analysis is None: - self.analysis = self.analyze_scalar(self.event.value) - if self.style is None: - self.style = self.choose_scalar_style() - split = (not self.simple_key_context) - # if self.analysis.multiline and split \ - # and (not self.style or self.style in '\'\"'): - # self.write_indent() - if self.style == '"': - self.write_double_quoted(self.analysis.scalar, split) - elif self.style == '\'': - self.write_single_quoted(self.analysis.scalar, split) - elif self.style == '>': - self.write_folded(self.analysis.scalar) - elif self.style == '|': - self.write_literal(self.analysis.scalar) - else: - self.write_plain(self.analysis.scalar, split) - self.analysis = None - self.style = None - if self.event.comment: - self.write_post_comment(self.event) - - # Analyzers. - - def prepare_version(self, version): - major, minor = version - if major != 1: - raise EmitterError("unsupported YAML version: %d.%d" % - (major, minor)) - return u'%d.%d' % (major, minor) - - def prepare_tag_handle(self, handle): - if not handle: - raise EmitterError("tag handle must not be empty") - if handle[0] != u'!' or handle[-1] != u'!': - raise EmitterError("tag handle must start and end with '!': %r" - % (utf8(handle))) - for ch in handle[1:-1]: - if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or - u'a' <= ch <= u'z' or ch in u'-_'): - raise EmitterError("invalid character %r in the tag handle: %r" - % (utf8(ch), utf8(handle))) - return handle - - def prepare_tag_prefix(self, prefix): - if not prefix: - raise EmitterError("tag prefix must not be empty") - chunks = [] - start = end = 0 - if prefix[0] == u'!': - end = 1 - while end < len(prefix): - ch = prefix[end] - if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-;/?!:@&=+$,_.~*\'()[]': - end += 1 - else: - if start < end: - chunks.append(prefix[start:end]) - start = end = end+1 - data = utf8(ch) - for ch in data: - chunks.append(u'%%%02X' % ord(ch)) - if start < end: - chunks.append(prefix[start:end]) - return u''.join(chunks) - - def prepare_tag(self, tag): - if not tag: - raise EmitterError("tag must not be empty") - if tag == u'!': - return tag - handle = None - suffix = tag - prefixes = sorted(self.tag_prefixes.keys()) - for prefix in prefixes: - if tag.startswith(prefix) \ - and (prefix == u'!' or len(prefix) < len(tag)): - handle = self.tag_prefixes[prefix] - suffix = tag[len(prefix):] - chunks = [] - start = end = 0 - while end < len(suffix): - ch = suffix[end] - if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-;/?:@&=+$,_.~*\'()[]' \ - or (ch == u'!' and handle != u'!'): - end += 1 - else: - if start < end: - chunks.append(suffix[start:end]) - start = end = end+1 - data = utf8(ch) - for ch in data: - chunks.append(u'%%%02X' % ord(ch)) - if start < end: - chunks.append(suffix[start:end]) - suffix_text = u''.join(chunks) - if handle: - return u'%s%s' % (handle, suffix_text) - else: - return u'!<%s>' % suffix_text - - def prepare_anchor(self, anchor): - if not anchor: - raise EmitterError("anchor must not be empty") - for ch in anchor: - if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or - u'a' <= ch <= u'z' or ch in u'-_'): - raise EmitterError("invalid character %r in the anchor: %r" - % (utf8(ch), utf8(anchor))) - return anchor - - def analyze_scalar(self, scalar): - - # Empty scalar is a special case. - if not scalar: - return ScalarAnalysis( - scalar=scalar, empty=True, multiline=False, - allow_flow_plain=False, allow_block_plain=True, - allow_single_quoted=True, allow_double_quoted=True, - allow_block=False) - - # Indicators and special characters. - block_indicators = False - flow_indicators = False - line_breaks = False - special_characters = False - - # Important whitespace combinations. - leading_space = False - leading_break = False - trailing_space = False - trailing_break = False - break_space = False - space_break = False - - # Check document indicators. - if scalar.startswith(u'---') or scalar.startswith(u'...'): - block_indicators = True - flow_indicators = True - - # First character or preceded by a whitespace. - preceeded_by_whitespace = True - - # Last character or followed by a whitespace. - followed_by_whitespace = (len(scalar) == 1 or - scalar[1] in u'\0 \t\r\n\x85\u2028\u2029') - - # The previous character is a space. - previous_space = False - - # The previous character is a break. - previous_break = False - - index = 0 - while index < len(scalar): - ch = scalar[index] - - # Check for indicators. - if index == 0: - # Leading indicators are special characters. - if ch in u'#,[]{}&*!|>\'\"%@`': - flow_indicators = True - block_indicators = True - if ch in u'?:': - flow_indicators = True - if followed_by_whitespace: - block_indicators = True - if ch == u'-' and followed_by_whitespace: - flow_indicators = True - block_indicators = True - else: - # Some indicators cannot appear within a scalar as well. - if ch in u',?[]{}': - flow_indicators = True - if ch == u':': - flow_indicators = True - if followed_by_whitespace: - block_indicators = True - if ch == u'#' and preceeded_by_whitespace: - flow_indicators = True - block_indicators = True - - # Check for line breaks, special, and unicode characters. - if ch in u'\n\x85\u2028\u2029': - line_breaks = True - if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'): - if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF' - or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF': - unicode_characters = True - if not self.allow_unicode: - special_characters = True - else: - special_characters = True - - # Detect important whitespace combinations. - if ch == u' ': - if index == 0: - leading_space = True - if index == len(scalar)-1: - trailing_space = True - if previous_break: - break_space = True - previous_space = True - previous_break = False - elif ch in u'\n\x85\u2028\u2029': - if index == 0: - leading_break = True - if index == len(scalar)-1: - trailing_break = True - if previous_space: - space_break = True - previous_space = False - previous_break = True - else: - previous_space = False - previous_break = False - - # Prepare for the next character. - index += 1 - preceeded_by_whitespace = (ch in u'\0 \t\r\n\x85\u2028\u2029') - followed_by_whitespace = ( - index+1 >= len(scalar) or - scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029') - - # Let's decide what styles are allowed. - allow_flow_plain = True - allow_block_plain = True - allow_single_quoted = True - allow_double_quoted = True - allow_block = True - - # Leading and trailing whitespaces are bad for plain scalars. - if (leading_space or leading_break - or trailing_space or trailing_break): - allow_flow_plain = allow_block_plain = False - - # We do not permit trailing spaces for block scalars. - if trailing_space: - allow_block = False - - # Spaces at the beginning of a new line are only acceptable for block - # scalars. - if break_space: - allow_flow_plain = allow_block_plain = allow_single_quoted = False - - # Spaces followed by breaks, as well as special character are only - # allowed for double quoted scalars. - if space_break or special_characters: - allow_flow_plain = allow_block_plain = \ - allow_single_quoted = allow_block = False - - # Although the plain scalar writer supports breaks, we never emit - # multiline plain scalars. - if line_breaks: - allow_flow_plain = allow_block_plain = False - - # Flow indicators are forbidden for flow plain scalars. - if flow_indicators: - allow_flow_plain = False - - # Block indicators are forbidden for block plain scalars. - if block_indicators: - allow_block_plain = False - - return ScalarAnalysis(scalar=scalar, - empty=False, multiline=line_breaks, - allow_flow_plain=allow_flow_plain, - allow_block_plain=allow_block_plain, - allow_single_quoted=allow_single_quoted, - allow_double_quoted=allow_double_quoted, - allow_block=allow_block) - - # Writers. - - def flush_stream(self): - if hasattr(self.stream, 'flush'): - self.stream.flush() - - def write_stream_start(self): - # Write BOM if needed. - if self.encoding and self.encoding.startswith('utf-16'): - self.stream.write(u'\uFEFF'.encode(self.encoding)) - - def write_stream_end(self): - self.flush_stream() - - def write_indicator(self, indicator, need_whitespace, - whitespace=False, indention=False): - if self.whitespace or not need_whitespace: - data = indicator - else: - data = u' '+indicator - self.whitespace = whitespace - self.indention = self.indention and indention - self.column += len(data) - self.open_ended = False - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - - def write_indent(self): - indent = self.indent or 0 - if not self.indention or self.column > indent \ - or (self.column == indent and not self.whitespace): - self.write_line_break() - if self.column < indent: - self.whitespace = True - data = u' '*(indent-self.column) - self.column = indent - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - - def write_line_break(self, data=None): - if data is None: - data = self.best_line_break - self.whitespace = True - self.indention = True - self.line += 1 - self.column = 0 - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - - def write_version_directive(self, version_text): - data = u'%%YAML %s' % version_text - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.write_line_break() - - def write_tag_directive(self, handle_text, prefix_text): - data = u'%%TAG %s %s' % (handle_text, prefix_text) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.write_line_break() - - # Scalar streams. - - def write_single_quoted(self, text, split=True): - self.write_indicator(u'\'', True) - spaces = False - breaks = False - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if spaces: - if ch is None or ch != u' ': - if start+1 == end and self.column > self.best_width and split \ - and start != 0 and end != len(text): - self.write_indent() - else: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - elif breaks: - if ch is None or ch not in u'\n\x85\u2028\u2029': - if text[start] == u'\n': - self.write_line_break() - for br in text[start:end]: - if br == u'\n': - self.write_line_break() - else: - self.write_line_break(br) - self.write_indent() - start = end - else: - if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'': - if start < end: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - if ch == u'\'': - data = u'\'\'' - self.column += 2 - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end + 1 - if ch is not None: - spaces = (ch == u' ') - breaks = (ch in u'\n\x85\u2028\u2029') - end += 1 - self.write_indicator(u'\'', False) - - ESCAPE_REPLACEMENTS = { - u'\0': u'0', - u'\x07': u'a', - u'\x08': u'b', - u'\x09': u't', - u'\x0A': u'n', - u'\x0B': u'v', - u'\x0C': u'f', - u'\x0D': u'r', - u'\x1B': u'e', - u'\"': u'\"', - u'\\': u'\\', - u'\x85': u'N', - u'\xA0': u'_', - u'\u2028': u'L', - u'\u2029': u'P', - } - - def write_double_quoted(self, text, split=True): - self.write_indicator(u'"', True) - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \ - or not (u'\x20' <= ch <= u'\x7E' - or (self.allow_unicode - and (u'\xA0' <= ch <= u'\uD7FF' - or u'\uE000' <= ch <= u'\uFFFD'))): - if start < end: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - if ch is not None: - if ch in self.ESCAPE_REPLACEMENTS: - data = u'\\'+self.ESCAPE_REPLACEMENTS[ch] - elif ch <= u'\xFF': - data = u'\\x%02X' % ord(ch) - elif ch <= u'\uFFFF': - data = u'\\u%04X' % ord(ch) - else: - data = u'\\U%08X' % ord(ch) - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end+1 - if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \ - and self.column+(end-start) > self.best_width and split: - data = text[start:end]+u'\\' - if start < end: - start = end - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.write_indent() - self.whitespace = False - self.indention = False - if text[start] == u' ': - data = u'\\' - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - end += 1 - self.write_indicator(u'"', False) - - def determine_block_hints(self, text): - hints = u'' - if text: - if text[0] in u' \n\x85\u2028\u2029': - hints += text_type(self.best_indent) - if text[-1] not in u'\n\x85\u2028\u2029': - hints += u'-' - elif len(text) == 1 or text[-2] in u'\n\x85\u2028\u2029': - hints += u'+' - return hints - - def write_folded(self, text): - hints = self.determine_block_hints(text) - self.write_indicator(u'>'+hints, True) - if hints[-1:] == u'+': - self.open_ended = True - self.write_line_break() - leading_space = True - spaces = False - breaks = True - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if breaks: - if ch is None or ch not in u'\n\x85\u2028\u2029': - if not leading_space and ch is not None and ch != u' ' \ - and text[start] == u'\n': - self.write_line_break() - leading_space = (ch == u' ') - for br in text[start:end]: - if br == u'\n': - self.write_line_break() - else: - self.write_line_break(br) - if ch is not None: - self.write_indent() - start = end - elif spaces: - if ch != u' ': - if start+1 == end and self.column > self.best_width: - self.write_indent() - else: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - else: - if ch is None or ch in u' \n\x85\u2028\u2029': - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - if ch is None: - self.write_line_break() - start = end - if ch is not None: - breaks = (ch in u'\n\x85\u2028\u2029') - spaces = (ch == u' ') - end += 1 - - def write_literal(self, text): - hints = self.determine_block_hints(text) - self.write_indicator(u'|'+hints, True) - if hints[-1:] == u'+': - self.open_ended = True - self.write_line_break() - breaks = True - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if breaks: - if ch is None or ch not in u'\n\x85\u2028\u2029': - for br in text[start:end]: - if br == u'\n': - self.write_line_break() - else: - self.write_line_break(br) - if ch is not None: - self.write_indent() - start = end - else: - if ch is None or ch in u'\n\x85\u2028\u2029': - data = text[start:end] - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - if ch is None: - self.write_line_break() - start = end - if ch is not None: - breaks = (ch in u'\n\x85\u2028\u2029') - end += 1 - - def write_plain(self, text, split=True): - if self.root_context: - self.open_ended = True - if not text: - return - if not self.whitespace: - data = u' ' - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.whitespace = False - self.indention = False - spaces = False - breaks = False - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if spaces: - if ch != u' ': - if start+1 == end and self.column > self.best_width \ - and split: - self.write_indent() - self.whitespace = False - self.indention = False - else: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - elif breaks: - if ch not in u'\n\x85\u2028\u2029': - if text[start] == u'\n': - self.write_line_break() - for br in text[start:end]: - if br == u'\n': - self.write_line_break() - else: - self.write_line_break(br) - self.write_indent() - self.whitespace = False - self.indention = False - start = end - else: - if ch is None or ch in u' \n\x85\u2028\u2029': - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - if ch is not None: - spaces = (ch == u' ') - breaks = (ch in u'\n\x85\u2028\u2029') - end += 1 - - def write_comment(self, comment): - value = comment.value - if value[-1] == '\n': - value = value[:-1] - try: - # get original column position - col = comment.start_mark.column - if col < self.column + 1: - ValueError - except ValueError: - col = self.column + 1 - # print('post_comment', self.line, self.column, value) - self.stream.write(' ' * (col - self.column) + value) - self.write_line_break() - - def write_pre_comment(self, event): - comments = event.comment[1] - if comments is None: - return - try: - for comment in comments: - if isinstance(event, MappingStartEvent) and \ - getattr(comment, 'pre_done', None): - continue - if self.column != 0: - self.write_line_break() - self.write_comment(comment) - if isinstance(event, MappingStartEvent): - comment.pre_done = True - except TypeError: - print ('eventtt', type(event), event) - raise - - def write_post_comment(self, event): - if self.event.comment[0] is None: - return - comment = event.comment[0] - self.write_comment(comment) diff --git a/py/error.py b/py/error.py deleted file mode 100644 index 2b76904..0000000 --- a/py/error.py +++ /dev/null @@ -1,80 +0,0 @@ -from __future__ import absolute_import - -__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError'] - -from .compat import utf8 - - -class Mark(object): - def __init__(self, name, index, line, column, buffer, pointer): - self.name = name - self.index = index - self.line = line - self.column = column - self.buffer = buffer - self.pointer = pointer - - def get_snippet(self, indent=4, max_length=75): - if self.buffer is None: - return None - head = '' - start = self.pointer - while (start > 0 and - self.buffer[start-1] not in u'\0\r\n\x85\u2028\u2029'): - start -= 1 - if self.pointer-start > max_length/2-1: - head = ' ... ' - start += 5 - break - tail = '' - end = self.pointer - while (end < len(self.buffer) and - self.buffer[end] not in u'\0\r\n\x85\u2028\u2029'): - end += 1 - if end-self.pointer > max_length/2-1: - tail = ' ... ' - end -= 5 - break - snippet = utf8(self.buffer[start:end]) - return ' '*indent + head + snippet + tail + '\n' \ - + ' '*(indent+self.pointer-start+len(head)) + '^' - - def __str__(self): - snippet = self.get_snippet() - where = " in \"%s\", line %d, column %d" \ - % (self.name, self.line+1, self.column+1) - if snippet is not None: - where += ":\n"+snippet - return where - - -class YAMLError(Exception): - pass - - -class MarkedYAMLError(YAMLError): - def __init__(self, context=None, context_mark=None, - problem=None, problem_mark=None, note=None): - self.context = context - self.context_mark = context_mark - self.problem = problem - self.problem_mark = problem_mark - self.note = note - - def __str__(self): - lines = [] - if self.context is not None: - lines.append(self.context) - if self.context_mark is not None \ - and (self.problem is None or self.problem_mark is None - or self.context_mark.name != self.problem_mark.name - or self.context_mark.line != self.problem_mark.line - or self.context_mark.column != self.problem_mark.column): - lines.append(str(self.context_mark)) - if self.problem is not None: - lines.append(self.problem) - if self.problem_mark is not None: - lines.append(str(self.problem_mark)) - if self.note is not None: - lines.append(self.note) - return '\n'.join(lines) diff --git a/py/events.py b/py/events.py deleted file mode 100644 index e1b9c62..0000000 --- a/py/events.py +++ /dev/null @@ -1,105 +0,0 @@ - -# Abstract classes. - - -def CommentCheck(): - pass - - -class Event(object): - def __init__(self, start_mark=None, end_mark=None, comment=CommentCheck): - self.start_mark = start_mark - self.end_mark = end_mark - # assert comment is not CommentCheck - if comment is CommentCheck: - comment = None - self.comment = comment - - def __repr__(self): - attributes = [key for key in ['anchor', 'tag', 'implicit', 'value', - 'flow_style', 'style'] - if hasattr(self, key)] - arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) - for key in attributes]) - if self.comment not in [None, CommentCheck]: - arguments += ', comment={!r}'.format(self.comment) - return '%s(%s)' % (self.__class__.__name__, arguments) - - -class NodeEvent(Event): - def __init__(self, anchor, start_mark=None, end_mark=None, comment=None): - Event.__init__(self, start_mark, end_mark, comment) - self.anchor = anchor - - -class CollectionStartEvent(NodeEvent): - def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None, - flow_style=None, comment=None): - Event.__init__(self, start_mark, end_mark, comment) - self.anchor = anchor - self.tag = tag - self.implicit = implicit - self.flow_style = flow_style - - -class CollectionEndEvent(Event): - pass - -# Implementations. - - -class StreamStartEvent(Event): - def __init__(self, start_mark=None, end_mark=None, encoding=None, - comment=None): - Event.__init__(self, start_mark, end_mark, comment) - self.encoding = encoding - - -class StreamEndEvent(Event): - pass - - -class DocumentStartEvent(Event): - def __init__(self, start_mark=None, end_mark=None, - explicit=None, version=None, tags=None, comment=None): - Event.__init__(self, start_mark, end_mark, comment) - self.explicit = explicit - self.version = version - self.tags = tags - - -class DocumentEndEvent(Event): - def __init__(self, start_mark=None, end_mark=None, - explicit=None, comment=None): - Event.__init__(self, start_mark, end_mark, comment) - self.explicit = explicit - - -class AliasEvent(NodeEvent): - pass - - -class ScalarEvent(NodeEvent): - def __init__(self, anchor, tag, implicit, value, - start_mark=None, end_mark=None, style=None, comment=None): - NodeEvent.__init__(self, anchor, start_mark, end_mark, comment) - self.tag = tag - self.implicit = implicit - self.value = value - self.style = style - - -class SequenceStartEvent(CollectionStartEvent): - pass - - -class SequenceEndEvent(CollectionEndEvent): - pass - - -class MappingStartEvent(CollectionStartEvent): - pass - - -class MappingEndEvent(CollectionEndEvent): - pass diff --git a/py/loader.py b/py/loader.py deleted file mode 100644 index 9c17d9b..0000000 --- a/py/loader.py +++ /dev/null @@ -1,53 +0,0 @@ -from __future__ import absolute_import - -__all__ = ['BaseLoader', 'SafeLoader', 'Loader', 'RoundTripLoader'] - -from .reader import * -from .scanner import * -from .parser import * -from .composer import * -from .constructor import * -from .resolver import * - - -class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, - BaseResolver): - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - BaseConstructor.__init__(self) - BaseResolver.__init__(self) - - -class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - SafeConstructor.__init__(self) - Resolver.__init__(self) - - -class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - Constructor.__init__(self) - Resolver.__init__(self) - - -class RoundTripLoader(Reader, RoundTripScanner, Parser, - Composer, RoundTripConstructor, Resolver): - def __init__(self, stream): - Reader.__init__(self, stream) - RoundTripScanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - RoundTripConstructor.__init__(self) - Resolver.__init__(self) diff --git a/py/main.py b/py/main.py deleted file mode 100644 index 90ee4d5..0000000 --- a/py/main.py +++ /dev/null @@ -1,352 +0,0 @@ -# coding: utf-8 - -from __future__ import absolute_import - - -from ruamel.yaml.error import * - -from ruamel.yaml.tokens import * -from ruamel.yaml.events import * -from ruamel.yaml.nodes import * - -from ruamel.yaml.loader import * -from ruamel.yaml.dumper import * -from ruamel.yaml.compat import StringIO, BytesIO, with_metaclass, PY3 - -import io - -def scan(stream, Loader=Loader): - """ - Scan a YAML stream and produce scanning tokens. - """ - loader = Loader(stream) - try: - while loader.check_token(): - yield loader.get_token() - finally: - loader.dispose() - - -def parse(stream, Loader=Loader): - """ - Parse a YAML stream and produce parsing events. - """ - loader = Loader(stream) - try: - while loader.check_event(): - yield loader.get_event() - finally: - loader.dispose() - - -def compose(stream, Loader=Loader): - """ - Parse the first YAML document in a stream - and produce the corresponding representation tree. - """ - loader = Loader(stream) - try: - return loader.get_single_node() - finally: - loader.dispose() - - -def compose_all(stream, Loader=Loader): - """ - Parse all YAML documents in a stream - and produce corresponding representation trees. - """ - loader = Loader(stream) - try: - while loader.check_node(): - yield loader.get_node() - finally: - loader.dispose() - - -def load(stream, Loader=Loader): - """ - Parse the first YAML document in a stream - and produce the corresponding Python object. - """ - loader = Loader(stream) - try: - return loader.get_single_data() - finally: - loader.dispose() - - -def load_all(stream, Loader=Loader): - """ - Parse all YAML documents in a stream - and produce corresponding Python objects. - """ - loader = Loader(stream) - try: - while loader.check_data(): - yield loader.get_data() - finally: - loader.dispose() - - -def safe_load(stream): - """ - Parse the first YAML document in a stream - and produce the corresponding Python object. - Resolve only basic YAML tags. - """ - return load(stream, SafeLoader) - - -def safe_load_all(stream): - """ - Parse all YAML documents in a stream - and produce corresponding Python objects. - Resolve only basic YAML tags. - """ - return load_all(stream, SafeLoader) - - -def emit(events, stream=None, Dumper=Dumper, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None): - """ - Emit YAML parsing events into a stream. - If stream is None, return the produced string instead. - """ - getvalue = None - if stream is None: - stream = StringIO() - getvalue = stream.getvalue - dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - try: - for event in events: - dumper.emit(event) - finally: - dumper.dispose() - if getvalue: - return getvalue() - -enc = None if PY3 else 'utf-8' - - -def serialize_all(nodes, stream=None, Dumper=Dumper, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=enc, explicit_start=None, explicit_end=None, - version=None, tags=None): - """ - Serialize a sequence of representation trees into a YAML stream. - If stream is None, return the produced string instead. - """ - getvalue = None - if stream is None: - if encoding is None: - stream = StringIO() - else: - stream = BytesIO() - getvalue = stream.getvalue - dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break, - encoding=encoding, version=version, tags=tags, - explicit_start=explicit_start, explicit_end=explicit_end) - try: - dumper.open() - for node in nodes: - dumper.serialize(node) - dumper.close() - finally: - dumper.dispose() - if getvalue: - return getvalue() - - -def serialize(node, stream=None, Dumper=Dumper, **kwds): - """ - Serialize a representation tree into a YAML stream. - If stream is None, return the produced string instead. - """ - return serialize_all([node], stream, Dumper=Dumper, **kwds) - - -def dump_all(documents, stream=None, Dumper=Dumper, - default_style=None, default_flow_style=None, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=enc, explicit_start=None, explicit_end=None, - version=None, tags=None): - """ - Serialize a sequence of Python objects into a YAML stream. - If stream is None, return the produced string instead. - """ - getvalue = None - if stream is None: - if encoding is None: - stream = StringIO() - else: - stream = BytesIO() - getvalue = stream.getvalue - dumper = Dumper(stream, default_style=default_style, - default_flow_style=default_flow_style, - canonical=canonical, indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break, - encoding=encoding, version=version, tags=tags, - explicit_start=explicit_start, explicit_end=explicit_end) - try: - dumper.open() - for data in documents: - dumper.represent(data) - dumper.close() - finally: - dumper.dispose() - if getvalue: - return getvalue() - - -def dump(data, stream=None, Dumper=Dumper, - default_style=None, default_flow_style=None, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=enc, explicit_start=None, explicit_end=None, - version=None, tags=None): - - """ - Serialize a Python object into a YAML stream. - If stream is None, return the produced string instead. - - default_style ∈ None, '', '"', "'", '|', '>' - - """ - return dump_all([data], stream, Dumper=Dumper, - default_style=default_style, - default_flow_style=default_flow_style, - canonical=canonical, - indent=indent, width=width, - allow_unicode=allow_unicode, - line_break=line_break, - encoding=encoding, explicit_start=explicit_start, - explicit_end=explicit_end, - version=version, tags=tags) - - -def safe_dump_all(documents, stream=None, **kwds): - """ - Serialize a sequence of Python objects into a YAML stream. - Produce only basic YAML tags. - If stream is None, return the produced string instead. - """ - return dump_all(documents, stream, Dumper=SafeDumper, **kwds) - - -def safe_dump(data, stream=None, **kwds): - """ - Serialize a Python object into a YAML stream. - Produce only basic YAML tags. - If stream is None, return the produced string instead. - """ - return dump_all([data], stream, Dumper=SafeDumper, **kwds) - - -def add_implicit_resolver(tag, regexp, first=None, - Loader=Loader, Dumper=Dumper): - """ - Add an implicit scalar detector. - If an implicit scalar value matches the given regexp, - the corresponding tag is assigned to the scalar. - first is a sequence of possible initial characters or None. - """ - Loader.add_implicit_resolver(tag, regexp, first) - Dumper.add_implicit_resolver(tag, regexp, first) - - -def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper): - """ - Add a path based resolver for the given tag. - A path is a list of keys that forms a path - to a node in the representation tree. - Keys can be string values, integers, or None. - """ - Loader.add_path_resolver(tag, path, kind) - Dumper.add_path_resolver(tag, path, kind) - - -def add_constructor(tag, constructor, Loader=Loader): - """ - Add a constructor for the given tag. - Constructor is a function that accepts a Loader instance - and a node object and produces the corresponding Python object. - """ - Loader.add_constructor(tag, constructor) - - -def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader): - """ - Add a multi-constructor for the given tag prefix. - Multi-constructor is called for a node if its tag starts with tag_prefix. - Multi-constructor accepts a Loader instance, a tag suffix, - and a node object and produces the corresponding Python object. - """ - Loader.add_multi_constructor(tag_prefix, multi_constructor) - - -def add_representer(data_type, representer, Dumper=Dumper): - """ - Add a representer for the given type. - Representer is a function accepting a Dumper instance - and an instance of the given data type - and producing the corresponding representation node. - """ - Dumper.add_representer(data_type, representer) - - -def add_multi_representer(data_type, multi_representer, Dumper=Dumper): - """ - Add a representer for the given type. - Multi-representer is a function accepting a Dumper instance - and an instance of the given data type or subtype - and producing the corresponding representation node. - """ - Dumper.add_multi_representer(data_type, multi_representer) - - -class YAMLObjectMetaclass(type): - """ - The metaclass for YAMLObject. - """ - def __init__(cls, name, bases, kwds): - super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds) - if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: - cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml) - cls.yaml_dumper.add_representer(cls, cls.to_yaml) - - -class YAMLObject(with_metaclass(YAMLObjectMetaclass)): - """ - An object that can dump itself to a YAML stream - and load itself from a YAML stream. - """ - __slots__ = () # no direct instantiation, so allow immutable subclasses - - yaml_loader = Loader - yaml_dumper = Dumper - - yaml_tag = None - yaml_flow_style = None - - @classmethod - def from_yaml(cls, loader, node): - """ - Convert a representation node to a Python object. - """ - return loader.construct_yaml_object(node, cls) - - @classmethod - def to_yaml(cls, dumper, data): - """ - Convert a Python object to a representation node. - """ - return dumper.represent_yaml_object(cls.yaml_tag, data, cls, - flow_style=cls.yaml_flow_style) - - diff --git a/py/nodes.py b/py/nodes.py deleted file mode 100644 index 382b492..0000000 --- a/py/nodes.py +++ /dev/null @@ -1,84 +0,0 @@ -from __future__ import print_function - - -class Node(object): - def __init__(self, tag, value, start_mark, end_mark, comment=None): - self.tag = tag - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - self.comment = comment - self.anchor = None - - def __repr__(self): - value = self.value - # if isinstance(value, list): - # if len(value) == 0: - # value = '' - # elif len(value) == 1: - # value = '<1 item>' - # else: - # value = '<%d items>' % len(value) - # else: - # if len(value) > 75: - # value = repr(value[:70]+u' ... ') - # else: - # value = repr(value) - value = repr(value) - return '%s(tag=%r, value=%s)' % (self.__class__.__name__, - self.tag, value) - - def dump(self, indent=0): - if isinstance(self.value, basestring): - print('{}{}(tag={!r}, value={!r})'.format( - ' ' * indent, self.__class__.__name__, self.tag, self.value)) - if self.comment: - print(' {}comment: {})'.format( - ' ' * indent, self.comment)) - return - print('{}{}(tag={!r})'.format( - ' ' * indent, self.__class__.__name__, self.tag)) - if self.comment: - print(' {}comment: {})'.format( - ' ' * indent, self.comment)) - for v in self.value: - if isinstance(v, tuple): - for v1 in v: - v1.dump(indent+1) - elif isinstance(v, Node): - v.dump(indent+1) - else: - print('Node value type?', type(v)) - - -class ScalarNode(Node): - """ - styles: - ? -> set() ? key, no value - " -> double quoted - ' -> single quoted - | -> literal style - > -> - """ - id = 'scalar' - - def __init__(self, tag, value, start_mark=None, end_mark=None, style=None, - comment=None): - Node.__init__(self, tag, value, start_mark, end_mark, comment=comment) - self.style = style - - -class CollectionNode(Node): - def __init__(self, tag, value, start_mark=None, end_mark=None, - flow_style=None, comment=None, anchor=None): - Node.__init__(self, tag, value, start_mark, end_mark, comment=comment) - self.flow_style = flow_style - self.anchor = anchor - - -class SequenceNode(CollectionNode): - id = 'sequence' - - -class MappingNode(CollectionNode): - id = 'mapping' diff --git a/py/parser.py b/py/parser.py deleted file mode 100644 index 0afb596..0000000 --- a/py/parser.py +++ /dev/null @@ -1,656 +0,0 @@ -from __future__ import absolute_import - -# The following YAML grammar is LL(1) and is parsed by a recursive descent -# parser. -# -# stream ::= STREAM-START implicit_document? explicit_document* -# STREAM-END -# implicit_document ::= block_node DOCUMENT-END* -# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* -# block_node_or_indentless_sequence ::= -# ALIAS -# | properties (block_content | -# indentless_block_sequence)? -# | block_content -# | indentless_block_sequence -# block_node ::= ALIAS -# | properties block_content? -# | block_content -# flow_node ::= ALIAS -# | properties flow_content? -# | flow_content -# properties ::= TAG ANCHOR? | ANCHOR TAG? -# block_content ::= block_collection | flow_collection | SCALAR -# flow_content ::= flow_collection | SCALAR -# block_collection ::= block_sequence | block_mapping -# flow_collection ::= flow_sequence | flow_mapping -# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* -# BLOCK-END -# indentless_sequence ::= (BLOCK-ENTRY block_node?)+ -# block_mapping ::= BLOCK-MAPPING_START -# ((KEY block_node_or_indentless_sequence?)? -# (VALUE block_node_or_indentless_sequence?)?)* -# BLOCK-END -# flow_sequence ::= FLOW-SEQUENCE-START -# (flow_sequence_entry FLOW-ENTRY)* -# flow_sequence_entry? -# FLOW-SEQUENCE-END -# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? -# flow_mapping ::= FLOW-MAPPING-START -# (flow_mapping_entry FLOW-ENTRY)* -# flow_mapping_entry? -# FLOW-MAPPING-END -# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? -# -# FIRST sets: -# -# stream: { STREAM-START } -# explicit_document: { DIRECTIVE DOCUMENT-START } -# implicit_document: FIRST(block_node) -# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START -# BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } -# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } -# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START -# FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } -# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } -# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } -# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } -# block_sequence: { BLOCK-SEQUENCE-START } -# block_mapping: { BLOCK-MAPPING-START } -# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR -# BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START -# FLOW-MAPPING-START BLOCK-ENTRY } -# indentless_sequence: { ENTRY } -# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } -# flow_sequence: { FLOW-SEQUENCE-START } -# flow_mapping: { FLOW-MAPPING-START } -# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START -# FLOW-MAPPING-START KEY } -# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START -# FLOW-MAPPING-START KEY } - -__all__ = ['Parser', 'ParserError'] - -from .error import MarkedYAMLError -from .tokens import * -from .events import * -from .scanner import * -from .compat import utf8 - - -class ParserError(MarkedYAMLError): - pass - - -class Parser(object): - # Since writing a recursive-descendant parser is a straightforward task, we - # do not give many comments here. - - DEFAULT_TAGS = { - u'!': u'!', - u'!!': u'tag:yaml.org,2002:', - } - - def __init__(self): - self.current_event = None - self.yaml_version = None - self.tag_handles = {} - self.states = [] - self.marks = [] - self.state = self.parse_stream_start - - def dispose(self): - # Reset the state attributes (to clear self-references) - self.states = [] - self.state = None - - def check_event(self, *choices): - # Check the type of the next event. - if self.current_event is None: - if self.state: - self.current_event = self.state() - if self.current_event is not None: - if not choices: - return True - for choice in choices: - if isinstance(self.current_event, choice): - return True - return False - - def peek_event(self): - # Get the next event. - if self.current_event is None: - if self.state: - self.current_event = self.state() - return self.current_event - - def get_event(self): - # Get the next event and proceed further. - if self.current_event is None: - if self.state: - self.current_event = self.state() - value = self.current_event - self.current_event = None - return value - - # stream ::= STREAM-START implicit_document? explicit_document* - # STREAM-END - # implicit_document ::= block_node DOCUMENT-END* - # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* - - def parse_stream_start(self): - - # Parse the stream start. - token = self.get_token() - token.move_comment(self.peek_token()) - event = StreamStartEvent(token.start_mark, token.end_mark, - encoding=token.encoding) - - # Prepare the next state. - self.state = self.parse_implicit_document_start - - return event - - def parse_implicit_document_start(self): - - # Parse an implicit document. - if not self.check_token(DirectiveToken, DocumentStartToken, - StreamEndToken): - self.tag_handles = self.DEFAULT_TAGS - token = self.peek_token() - start_mark = end_mark = token.start_mark - event = DocumentStartEvent(start_mark, end_mark, - explicit=False) - - # Prepare the next state. - self.states.append(self.parse_document_end) - self.state = self.parse_block_node - - return event - - else: - return self.parse_document_start() - - def parse_document_start(self): - - # Parse any extra document end indicators. - while self.check_token(DocumentEndToken): - self.get_token() - - # Parse an explicit document. - if not self.check_token(StreamEndToken): - token = self.peek_token() - start_mark = token.start_mark - version, tags = self.process_directives() - if not self.check_token(DocumentStartToken): - raise ParserError(None, None, - "expected '', but found %r" - % self.peek_token().id, - self.peek_token().start_mark) - token = self.get_token() - end_mark = token.end_mark - event = DocumentStartEvent( - start_mark, end_mark, - explicit=True, version=version, tags=tags) - self.states.append(self.parse_document_end) - self.state = self.parse_document_content - else: - # Parse the end of the stream. - token = self.get_token() - event = StreamEndEvent(token.start_mark, token.end_mark, - comment=token.comment) - assert not self.states - assert not self.marks - self.state = None - return event - - def parse_document_end(self): - - # Parse the document end. - token = self.peek_token() - start_mark = end_mark = token.start_mark - explicit = False - if self.check_token(DocumentEndToken): - token = self.get_token() - end_mark = token.end_mark - explicit = True - event = DocumentEndEvent(start_mark, end_mark, explicit=explicit) - - # Prepare the next state. - self.state = self.parse_document_start - - return event - - def parse_document_content(self): - if self.check_token( - DirectiveToken, - DocumentStartToken, DocumentEndToken, StreamEndToken): - event = self.process_empty_scalar(self.peek_token().start_mark) - self.state = self.states.pop() - return event - else: - return self.parse_block_node() - - def process_directives(self): - self.yaml_version = None - self.tag_handles = {} - while self.check_token(DirectiveToken): - token = self.get_token() - if token.name == u'YAML': - if self.yaml_version is not None: - raise ParserError( - None, None, - "found duplicate YAML directive", token.start_mark) - major, minor = token.value - if major != 1: - raise ParserError( - None, None, - "found incompatible YAML document (version 1.* is " - "required)", - token.start_mark) - self.yaml_version = token.value - elif token.name == u'TAG': - handle, prefix = token.value - if handle in self.tag_handles: - raise ParserError(None, None, - "duplicate tag handle %r" % utf8(handle), - token.start_mark) - self.tag_handles[handle] = prefix - if self.tag_handles: - value = self.yaml_version, self.tag_handles.copy() - else: - value = self.yaml_version, None - for key in self.DEFAULT_TAGS: - if key not in self.tag_handles: - self.tag_handles[key] = self.DEFAULT_TAGS[key] - return value - - # block_node_or_indentless_sequence ::= ALIAS - # | properties (block_content | indentless_block_sequence)? - # | block_content - # | indentless_block_sequence - # block_node ::= ALIAS - # | properties block_content? - # | block_content - # flow_node ::= ALIAS - # | properties flow_content? - # | flow_content - # properties ::= TAG ANCHOR? | ANCHOR TAG? - # block_content ::= block_collection | flow_collection | SCALAR - # flow_content ::= flow_collection | SCALAR - # block_collection ::= block_sequence | block_mapping - # flow_collection ::= flow_sequence | flow_mapping - - def parse_block_node(self): - return self.parse_node(block=True) - - def parse_flow_node(self): - return self.parse_node() - - def parse_block_node_or_indentless_sequence(self): - return self.parse_node(block=True, indentless_sequence=True) - - def parse_node(self, block=False, indentless_sequence=False): - if self.check_token(AliasToken): - token = self.get_token() - event = AliasEvent(token.value, token.start_mark, token.end_mark) - self.state = self.states.pop() - else: - anchor = None - tag = None - start_mark = end_mark = tag_mark = None - if self.check_token(AnchorToken): - token = self.get_token() - start_mark = token.start_mark - end_mark = token.end_mark - anchor = token.value - if self.check_token(TagToken): - token = self.get_token() - tag_mark = token.start_mark - end_mark = token.end_mark - tag = token.value - elif self.check_token(TagToken): - token = self.get_token() - start_mark = tag_mark = token.start_mark - end_mark = token.end_mark - tag = token.value - if self.check_token(AnchorToken): - token = self.get_token() - end_mark = token.end_mark - anchor = token.value - if tag is not None: - handle, suffix = tag - if handle is not None: - if handle not in self.tag_handles: - raise ParserError( - "while parsing a node", start_mark, - "found undefined tag handle %r" % utf8(handle), - tag_mark) - tag = self.tag_handles[handle]+suffix - else: - tag = suffix - # if tag == u'!': - # raise ParserError("while parsing a node", start_mark, - # "found non-specific tag '!'", tag_mark, - # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' - # and share your opinion.") - if start_mark is None: - start_mark = end_mark = self.peek_token().start_mark - event = None - implicit = (tag is None or tag == u'!') - if indentless_sequence and self.check_token(BlockEntryToken): - end_mark = self.peek_token().end_mark - event = SequenceStartEvent(anchor, tag, implicit, - start_mark, end_mark) - self.state = self.parse_indentless_sequence_entry - else: - if self.check_token(ScalarToken): - token = self.get_token() - end_mark = token.end_mark - if (token.plain and tag is None) or tag == u'!': - implicit = (True, False) - elif tag is None: - implicit = (False, True) - else: - implicit = (False, False) - event = ScalarEvent( - anchor, tag, implicit, token.value, - start_mark, end_mark, style=token.style, - comment=token.comment - ) - self.state = self.states.pop() - elif self.check_token(FlowSequenceStartToken): - end_mark = self.peek_token().end_mark - event = SequenceStartEvent( - anchor, tag, implicit, - start_mark, end_mark, flow_style=True) - self.state = self.parse_flow_sequence_first_entry - elif self.check_token(FlowMappingStartToken): - end_mark = self.peek_token().end_mark - event = MappingStartEvent( - anchor, tag, implicit, - start_mark, end_mark, flow_style=True) - self.state = self.parse_flow_mapping_first_key - elif block and self.check_token(BlockSequenceStartToken): - end_mark = self.peek_token().start_mark - # should inserting the comment be dependent on the - # indentation? - pt = self.peek_token() - comment = pt.comment - # print('pt0', type(pt)) - if comment is None or comment[1] is None: - comment = pt.split_comment() - # print('pt1', comment) - event = SequenceStartEvent( - anchor, tag, implicit, start_mark, end_mark, - flow_style=False, - comment=comment, - ) - self.state = self.parse_block_sequence_first_entry - elif block and self.check_token(BlockMappingStartToken): - end_mark = self.peek_token().start_mark - comment = self.peek_token().comment - event = MappingStartEvent( - anchor, tag, implicit, start_mark, end_mark, - flow_style=False, comment=comment) - self.state = self.parse_block_mapping_first_key - elif anchor is not None or tag is not None: - # Empty scalars are allowed even if a tag or an anchor is - # specified. - event = ScalarEvent(anchor, tag, (implicit, False), u'', - start_mark, end_mark) - self.state = self.states.pop() - else: - if block: - node = 'block' - else: - node = 'flow' - token = self.peek_token() - raise ParserError( - "while parsing a %s node" % node, start_mark, - "expected the node content, but found %r" % token.id, - token.start_mark) - return event - - # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* - # BLOCK-END - - def parse_block_sequence_first_entry(self): - token = self.get_token() - # move any comment from start token - # token.move_comment(self.peek_token()) - self.marks.append(token.start_mark) - return self.parse_block_sequence_entry() - - def parse_block_sequence_entry(self): - if self.check_token(BlockEntryToken): - token = self.get_token() - token.move_comment(self.peek_token()) - if not self.check_token(BlockEntryToken, BlockEndToken): - self.states.append(self.parse_block_sequence_entry) - return self.parse_block_node() - else: - self.state = self.parse_block_sequence_entry - return self.process_empty_scalar(token.end_mark) - if not self.check_token(BlockEndToken): - token = self.peek_token() - raise ParserError( - "while parsing a block collection", self.marks[-1], - "expected , but found %r" % - token.id, token.start_mark) - token = self.get_token() # BlockEndToken - event = SequenceEndEvent(token.start_mark, token.end_mark, - comment=token.comment) - self.state = self.states.pop() - self.marks.pop() - return event - - # indentless_sequence ::= (BLOCK-ENTRY block_node?)+ - - # indentless_sequence? - # sequence: - # - entry - # - nested - - def parse_indentless_sequence_entry(self): - if self.check_token(BlockEntryToken): - token = self.get_token() - token.move_comment(self.peek_token()) - if not self.check_token(BlockEntryToken, - KeyToken, ValueToken, BlockEndToken): - self.states.append(self.parse_indentless_sequence_entry) - return self.parse_block_node() - else: - self.state = self.parse_indentless_sequence_entry - return self.process_empty_scalar(token.end_mark) - token = self.peek_token() - event = SequenceEndEvent(token.start_mark, token.start_mark, - comment=token.comment) - self.state = self.states.pop() - return event - - # block_mapping ::= BLOCK-MAPPING_START - # ((KEY block_node_or_indentless_sequence?)? - # (VALUE block_node_or_indentless_sequence?)?)* - # BLOCK-END - - def parse_block_mapping_first_key(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_block_mapping_key() - - def parse_block_mapping_key(self): - if self.check_token(KeyToken): - token = self.get_token() - token.move_comment(self.peek_token()) - if not self.check_token(KeyToken, ValueToken, BlockEndToken): - self.states.append(self.parse_block_mapping_value) - return self.parse_block_node_or_indentless_sequence() - else: - self.state = self.parse_block_mapping_value - return self.process_empty_scalar(token.end_mark) - if not self.check_token(BlockEndToken): - token = self.peek_token() - raise ParserError( - "while parsing a block mapping", self.marks[-1], - "expected , but found %r" % token.id, - token.start_mark) - token = self.get_token() - token.move_comment(self.peek_token()) - event = MappingEndEvent(token.start_mark, token.end_mark, - comment=token.comment) - self.state = self.states.pop() - self.marks.pop() - return event - - def parse_block_mapping_value(self): - if self.check_token(ValueToken): - token = self.get_token() - # value token might have post comment move it to e.g. block - token.move_comment(self.peek_token()) - if not self.check_token(KeyToken, ValueToken, BlockEndToken): - self.states.append(self.parse_block_mapping_key) - return self.parse_block_node_or_indentless_sequence() - else: - self.state = self.parse_block_mapping_key - return self.process_empty_scalar(token.end_mark) - else: - self.state = self.parse_block_mapping_key - token = self.peek_token() - return self.process_empty_scalar(token.start_mark) - - # flow_sequence ::= FLOW-SEQUENCE-START - # (flow_sequence_entry FLOW-ENTRY)* - # flow_sequence_entry? - # FLOW-SEQUENCE-END - # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? - # - # Note that while production rules for both flow_sequence_entry and - # flow_mapping_entry are equal, their interpretations are different. - # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` - # generate an inline mapping (set syntax). - - def parse_flow_sequence_first_entry(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_flow_sequence_entry(first=True) - - def parse_flow_sequence_entry(self, first=False): - if not self.check_token(FlowSequenceEndToken): - if not first: - if self.check_token(FlowEntryToken): - self.get_token() - else: - token = self.peek_token() - raise ParserError( - "while parsing a flow sequence", self.marks[-1], - "expected ',' or ']', but got %r" % token.id, - token.start_mark) - - if self.check_token(KeyToken): - token = self.peek_token() - event = MappingStartEvent(None, None, True, - token.start_mark, token.end_mark, - flow_style=True) - self.state = self.parse_flow_sequence_entry_mapping_key - return event - elif not self.check_token(FlowSequenceEndToken): - self.states.append(self.parse_flow_sequence_entry) - return self.parse_flow_node() - token = self.get_token() - event = SequenceEndEvent(token.start_mark, token.end_mark, - comment=token.comment) - self.state = self.states.pop() - self.marks.pop() - return event - - def parse_flow_sequence_entry_mapping_key(self): - token = self.get_token() - if not self.check_token(ValueToken, - FlowEntryToken, FlowSequenceEndToken): - self.states.append(self.parse_flow_sequence_entry_mapping_value) - return self.parse_flow_node() - else: - self.state = self.parse_flow_sequence_entry_mapping_value - return self.process_empty_scalar(token.end_mark) - - def parse_flow_sequence_entry_mapping_value(self): - if self.check_token(ValueToken): - token = self.get_token() - if not self.check_token(FlowEntryToken, FlowSequenceEndToken): - self.states.append(self.parse_flow_sequence_entry_mapping_end) - return self.parse_flow_node() - else: - self.state = self.parse_flow_sequence_entry_mapping_end - return self.process_empty_scalar(token.end_mark) - else: - self.state = self.parse_flow_sequence_entry_mapping_end - token = self.peek_token() - return self.process_empty_scalar(token.start_mark) - - def parse_flow_sequence_entry_mapping_end(self): - self.state = self.parse_flow_sequence_entry - token = self.peek_token() - return MappingEndEvent(token.start_mark, token.start_mark) - - # flow_mapping ::= FLOW-MAPPING-START - # (flow_mapping_entry FLOW-ENTRY)* - # flow_mapping_entry? - # FLOW-MAPPING-END - # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? - - def parse_flow_mapping_first_key(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_flow_mapping_key(first=True) - - def parse_flow_mapping_key(self, first=False): - if not self.check_token(FlowMappingEndToken): - if not first: - if self.check_token(FlowEntryToken): - self.get_token() - else: - token = self.peek_token() - raise ParserError( - "while parsing a flow mapping", self.marks[-1], - "expected ',' or '}', but got %r" % token.id, - token.start_mark) - if self.check_token(KeyToken): - token = self.get_token() - if not self.check_token(ValueToken, - FlowEntryToken, FlowMappingEndToken): - self.states.append(self.parse_flow_mapping_value) - return self.parse_flow_node() - else: - self.state = self.parse_flow_mapping_value - return self.process_empty_scalar(token.end_mark) - elif not self.check_token(FlowMappingEndToken): - self.states.append(self.parse_flow_mapping_empty_value) - return self.parse_flow_node() - token = self.get_token() - event = MappingEndEvent(token.start_mark, token.end_mark, - comment=token.comment) - self.state = self.states.pop() - self.marks.pop() - return event - - def parse_flow_mapping_value(self): - if self.check_token(ValueToken): - token = self.get_token() - if not self.check_token(FlowEntryToken, FlowMappingEndToken): - self.states.append(self.parse_flow_mapping_key) - return self.parse_flow_node() - else: - self.state = self.parse_flow_mapping_key - return self.process_empty_scalar(token.end_mark) - else: - self.state = self.parse_flow_mapping_key - token = self.peek_token() - return self.process_empty_scalar(token.start_mark) - - def parse_flow_mapping_empty_value(self): - self.state = self.parse_flow_mapping_key - return self.process_empty_scalar(self.peek_token().start_mark) - - def process_empty_scalar(self, mark): - return ScalarEvent(None, None, (True, False), u'', mark, mark) diff --git a/py/reader.py b/py/reader.py deleted file mode 100644 index 7bbec42..0000000 --- a/py/reader.py +++ /dev/null @@ -1,207 +0,0 @@ -from __future__ import absolute_import -# This module contains abstractions for the input stream. You don't have to -# looks further, there are no pretty code. -# -# We define two classes here. -# -# Mark(source, line, column) -# It's just a record and its only use is producing nice error messages. -# Parser does not use it for any other purposes. -# -# Reader(source, data) -# Reader determines the encoding of `data` and converts it to unicode. -# Reader provides the following methods and attributes: -# reader.peek(length=1) - return the next `length` characters -# reader.forward(length=1) - move the current position to `length` -# characters. -# reader.index - the number of the current character. -# reader.line, stream.column - the line and the column of the current -# character. - -__all__ = ['Reader', 'ReaderError'] - -import codecs -import re - -from .error import YAMLError, Mark -from .compat import text_type, binary_type, PY3 - - -class ReaderError(YAMLError): - - def __init__(self, name, position, character, encoding, reason): - self.name = name - self.character = character - self.position = position - self.encoding = encoding - self.reason = reason - - def __str__(self): - if isinstance(self.character, binary_type): - return "'%s' codec can't decode byte #x%02x: %s\n" \ - " in \"%s\", position %d" \ - % (self.encoding, ord(self.character), self.reason, - self.name, self.position) - else: - return "unacceptable character #x%04x: %s\n" \ - " in \"%s\", position %d" \ - % (self.character, self.reason, - self.name, self.position) - - -class Reader(object): - # Reader: - # - determines the data encoding and converts it to a unicode string, - # - checks if characters are in allowed range, - # - adds '\0' to the end. - - # Reader accepts - # - a `str` object (PY2) / a `bytes` object (PY3), - # - a `unicode` object (PY2) / a `str` object (PY3), - # - a file-like object with its `read` method returning `str`, - # - a file-like object with its `read` method returning `unicode`. - - # Yeah, it's ugly and slow. - - def __init__(self, stream): - self.name = None - self.stream = None - self.stream_pointer = 0 - self.eof = True - self.buffer = u'' - self.pointer = 0 - self.raw_buffer = None - self.raw_decode = None - self.encoding = None - self.index = 0 - self.line = 0 - self.column = 0 - if isinstance(stream, text_type): - self.name = "" - self.check_printable(stream) - self.buffer = stream+u'\0' - elif isinstance(stream, binary_type): - self.name = "" - self.raw_buffer = stream - self.determine_encoding() - else: - self.stream = stream - self.name = getattr(stream, 'name', "") - self.eof = False - self.raw_buffer = None - self.determine_encoding() - - def peek(self, index=0): - try: - return self.buffer[self.pointer+index] - except IndexError: - self.update(index+1) - return self.buffer[self.pointer+index] - - def prefix(self, length=1): - if self.pointer+length >= len(self.buffer): - self.update(length) - return self.buffer[self.pointer:self.pointer+length] - - def forward(self, length=1): - if self.pointer+length+1 >= len(self.buffer): - self.update(length+1) - while length: - ch = self.buffer[self.pointer] - self.pointer += 1 - self.index += 1 - if ch in u'\n\x85\u2028\u2029' \ - or (ch == u'\r' and self.buffer[self.pointer] != u'\n'): - self.line += 1 - self.column = 0 - elif ch != u'\uFEFF': - self.column += 1 - length -= 1 - - def get_mark(self): - if self.stream is None: - return Mark(self.name, self.index, self.line, self.column, - self.buffer, self.pointer) - else: - return Mark(self.name, self.index, self.line, self.column, - None, None) - - def determine_encoding(self): - while not self.eof and (self.raw_buffer is None or - len(self.raw_buffer) < 2): - self.update_raw() - if isinstance(self.raw_buffer, binary_type): - if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): - self.raw_decode = codecs.utf_16_le_decode - self.encoding = 'utf-16-le' - elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): - self.raw_decode = codecs.utf_16_be_decode - self.encoding = 'utf-16-be' - else: - self.raw_decode = codecs.utf_8_decode - self.encoding = 'utf-8' - self.update(1) - - NON_PRINTABLE = re.compile( - u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') - - def check_printable(self, data): - match = self.NON_PRINTABLE.search(data) - if match: - character = match.group() - position = self.index+(len(self.buffer)-self.pointer)+match.start() - raise ReaderError(self.name, position, ord(character), - 'unicode', "special characters are not allowed") - - def update(self, length): - if self.raw_buffer is None: - return - self.buffer = self.buffer[self.pointer:] - self.pointer = 0 - while len(self.buffer) < length: - if not self.eof: - self.update_raw() - if self.raw_decode is not None: - try: - data, converted = self.raw_decode(self.raw_buffer, - 'strict', self.eof) - except UnicodeDecodeError as exc: - if PY3: - character = self.raw_buffer[exc.start] - else: - character = exc.object[exc.start] - if self.stream is not None: - position = self.stream_pointer - \ - len(self.raw_buffer) + exc.start - else: - position = exc.start - raise ReaderError(self.name, position, character, - exc.encoding, exc.reason) - else: - data = self.raw_buffer - converted = len(data) - self.check_printable(data) - self.buffer += data - self.raw_buffer = self.raw_buffer[converted:] - if self.eof: - self.buffer += u'\0' - self.raw_buffer = None - break - - def update_raw(self, size=None): - if size is None: - size = 4096 if PY3 else 1024 - data = self.stream.read(size) - if self.raw_buffer is None: - self.raw_buffer = data - else: - self.raw_buffer += data - self.stream_pointer += len(data) - if not data: - self.eof = True - -# try: -# import psyco -# psyco.bind(Reader) -# except ImportError: -# pass diff --git a/py/representer.py b/py/representer.py deleted file mode 100644 index 63c0d0d..0000000 --- a/py/representer.py +++ /dev/null @@ -1,808 +0,0 @@ -from __future__ import absolute_import -from __future__ import print_function - -__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer', - 'RepresenterError', 'RoundTripRepresenter'] - -from .error import * -from .nodes import * -from .compat import text_type, binary_type, to_unicode, PY2, PY3, \ - ordereddict, nprint -from .scalarstring import * - -import datetime -import sys -import types -if PY3: - import copyreg - import base64 -else: - import copy_reg as copyreg - - -class RepresenterError(YAMLError): - pass - - -class BaseRepresenter(object): - - yaml_representers = {} - yaml_multi_representers = {} - - def __init__(self, default_style=None, default_flow_style=None): - self.default_style = default_style - self.default_flow_style = default_flow_style - self.represented_objects = {} - self.object_keeper = [] - self.alias_key = None - - def represent(self, data): - node = self.represent_data(data) - self.serialize(node) - self.represented_objects = {} - self.object_keeper = [] - self.alias_key = None - - if PY2: - def get_classobj_bases(self, cls): - bases = [cls] - for base in cls.__bases__: - bases.extend(self.get_classobj_bases(base)) - return bases - - def represent_data(self, data): - if self.ignore_aliases(data): - self.alias_key = None - else: - self.alias_key = id(data) - if self.alias_key is not None: - if self.alias_key in self.represented_objects: - node = self.represented_objects[self.alias_key] - # if node is None: - # raise RepresenterError( - # "recursive objects are not allowed: %r" % data) - return node - # self.represented_objects[alias_key] = None - self.object_keeper.append(data) - data_types = type(data).__mro__ - if PY2: - # if type(data) is types.InstanceType: - if isinstance(data, types.InstanceType): - data_types = self.get_classobj_bases(data.__class__) + \ - list(data_types) - if data_types[0] in self.yaml_representers: - node = self.yaml_representers[data_types[0]](self, data) - else: - for data_type in data_types: - if data_type in self.yaml_multi_representers: - node = self.yaml_multi_representers[data_type](self, data) - break - else: - if None in self.yaml_multi_representers: - node = self.yaml_multi_representers[None](self, data) - elif None in self.yaml_representers: - node = self.yaml_representers[None](self, data) - else: - node = ScalarNode(None, text_type(data)) - # if alias_key is not None: - # self.represented_objects[alias_key] = node - return node - - def represent_key(self, data): - """ - David Fraser: Extract a method to represent keys in mappings, so that - a subclass can choose not to quote them (for example) - used in repesent_mapping - https://bitbucket.org/davidfraser/pyyaml/commits/d81df6eb95f20cac4a79eed95ae553b5c6f77b8c - """ - return self.represent_data(data) - - @classmethod - def add_representer(cls, data_type, representer): - if 'yaml_representers' not in cls.__dict__: - cls.yaml_representers = cls.yaml_representers.copy() - cls.yaml_representers[data_type] = representer - - @classmethod - def add_multi_representer(cls, data_type, representer): - if 'yaml_multi_representers' not in cls.__dict__: - cls.yaml_multi_representers = cls.yaml_multi_representers.copy() - cls.yaml_multi_representers[data_type] = representer - - def represent_scalar(self, tag, value, style=None): - if style is None: - style = self.default_style - node = ScalarNode(tag, value, style=style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - return node - - def represent_sequence(self, tag, sequence, flow_style=None): - value = [] - node = SequenceNode(tag, value, flow_style=flow_style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = True - for item in sequence: - node_item = self.represent_data(item) - if not (isinstance(node_item, ScalarNode) and not node_item.style): - best_style = False - value.append(node_item) - if flow_style is None: - if self.default_flow_style is not None: - node.flow_style = self.default_flow_style - else: - node.flow_style = best_style - return node - - def represent_omap(self, tag, omap, flow_style=None): - value = [] - node = SequenceNode(tag, value, flow_style=flow_style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = True - for item_key in omap: - item_val = omap[item_key] - node_item = self.represent_data({item_key: item_val}) - # if not (isinstance(node_item, ScalarNode) \ - # and not node_item.style): - # best_style = False - value.append(node_item) - if flow_style is None: - if self.default_flow_style is not None: - node.flow_style = self.default_flow_style - else: - node.flow_style = best_style - return node - - def represent_mapping(self, tag, mapping, flow_style=None): - value = [] - node = MappingNode(tag, value, flow_style=flow_style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = True - if hasattr(mapping, 'items'): - mapping = list(mapping.items()) - try: - mapping = sorted(mapping) - except TypeError: - pass - for item_key, item_value in mapping: - node_key = self.represent_key(item_key) - node_value = self.represent_data(item_value) - if not (isinstance(node_key, ScalarNode) and not node_key.style): - best_style = False - if not (isinstance(node_value, ScalarNode) and not - node_value.style): - best_style = False - value.append((node_key, node_value)) - if flow_style is None: - if self.default_flow_style is not None: - node.flow_style = self.default_flow_style - else: - node.flow_style = best_style - return node - - def ignore_aliases(self, data): - return False - - -class SafeRepresenter(BaseRepresenter): - - def ignore_aliases(self, data): - if data in [None, ()]: - return True - if isinstance(data, (binary_type, text_type, bool, int, float)): - return True - - def represent_none(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:null', - u'null') - - if PY3: - def represent_str(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:str', data) - - def represent_binary(self, data): - if hasattr(base64, 'encodebytes'): - data = base64.encodebytes(data).decode('ascii') - else: - data = base64.encodestring(data).decode('ascii') - return self.represent_scalar(u'tag:yaml.org,2002:binary', data, - style='|') - else: - def represent_str(self, data): - tag = None - style = None - try: - data = unicode(data, 'ascii') - tag = u'tag:yaml.org,2002:str' - except UnicodeDecodeError: - try: - data = unicode(data, 'utf-8') - tag = u'tag:yaml.org,2002:str' - except UnicodeDecodeError: - data = data.encode('base64') - tag = u'tag:yaml.org,2002:binary' - style = '|' - return self.represent_scalar(tag, data, style=style) - - def represent_unicode(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:str', data) - - def represent_bool(self, data): - if data: - value = u'true' - else: - value = u'false' - return self.represent_scalar(u'tag:yaml.org,2002:bool', value) - - def represent_int(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:int', text_type(data)) - - if PY2: - def represent_long(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:int', - text_type(data)) - - inf_value = 1e300 - while repr(inf_value) != repr(inf_value*inf_value): - inf_value *= inf_value - - def represent_float(self, data): - if data != data or (data == 0.0 and data == 1.0): - value = u'.nan' - elif data == self.inf_value: - value = u'.inf' - elif data == -self.inf_value: - value = u'-.inf' - else: - value = to_unicode(repr(data)).lower() - # Note that in some cases `repr(data)` represents a float number - # without the decimal parts. For instance: - # >>> repr(1e17) - # '1e17' - # Unfortunately, this is not a valid float representation according - # to the definition of the `!!float` tag. We fix this by adding - # '.0' before the 'e' symbol. - if u'.' not in value and u'e' in value: - value = value.replace(u'e', u'.0e', 1) - return self.represent_scalar(u'tag:yaml.org,2002:float', value) - - def represent_list(self, data): - # pairs = (len(data) > 0 and isinstance(data, list)) - # if pairs: - # for item in data: - # if not isinstance(item, tuple) or len(item) != 2: - # pairs = False - # break - # if not pairs: - return self.represent_sequence(u'tag:yaml.org,2002:seq', data) - # value = [] - # for item_key, item_value in data: - # value.append(self.represent_mapping(u'tag:yaml.org,2002:map', - # [(item_key, item_value)])) - # return SequenceNode(u'tag:yaml.org,2002:pairs', value) - - def represent_dict(self, data): - return self.represent_mapping(u'tag:yaml.org,2002:map', data) - - def represent_ordereddict(self, data): - return self.represent_omap(u'tag:yaml.org,2002:omap', data) - - def represent_set(self, data): - value = {} - for key in data: - value[key] = None - return self.represent_mapping(u'tag:yaml.org,2002:set', value) - - def represent_date(self, data): - value = to_unicode(data.isoformat()) - return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) - - def represent_datetime(self, data): - value = to_unicode(data.isoformat(' ')) - return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) - - def represent_yaml_object(self, tag, data, cls, flow_style=None): - if hasattr(data, '__getstate__'): - state = data.__getstate__() - else: - state = data.__dict__.copy() - return self.represent_mapping(tag, state, flow_style=flow_style) - - def represent_undefined(self, data): - raise RepresenterError("cannot represent an object: %s" % data) - -SafeRepresenter.add_representer(type(None), - SafeRepresenter.represent_none) - -SafeRepresenter.add_representer(str, - SafeRepresenter.represent_str) - -if PY2: - SafeRepresenter.add_representer(unicode, - SafeRepresenter.represent_unicode) -else: - SafeRepresenter.add_representer(bytes, - SafeRepresenter.represent_binary) - -SafeRepresenter.add_representer(bool, - SafeRepresenter.represent_bool) - -SafeRepresenter.add_representer(int, - SafeRepresenter.represent_int) - -if PY2: - SafeRepresenter.add_representer(long, - SafeRepresenter.represent_long) - -SafeRepresenter.add_representer(float, - SafeRepresenter.represent_float) - -SafeRepresenter.add_representer(list, - SafeRepresenter.represent_list) - -SafeRepresenter.add_representer(tuple, - SafeRepresenter.represent_list) - -SafeRepresenter.add_representer(dict, - SafeRepresenter.represent_dict) - -SafeRepresenter.add_representer(set, - SafeRepresenter.represent_set) - -SafeRepresenter.add_representer(ordereddict, - SafeRepresenter.represent_ordereddict) - -SafeRepresenter.add_representer(datetime.date, - SafeRepresenter.represent_date) - -SafeRepresenter.add_representer(datetime.datetime, - SafeRepresenter.represent_datetime) - -SafeRepresenter.add_representer(None, - SafeRepresenter.represent_undefined) - - -class Representer(SafeRepresenter): - if PY2: - def represent_str(self, data): - tag = None - style = None - try: - data = unicode(data, 'ascii') - tag = u'tag:yaml.org,2002:str' - except UnicodeDecodeError: - try: - data = unicode(data, 'utf-8') - tag = u'tag:yaml.org,2002:python/str' - except UnicodeDecodeError: - data = data.encode('base64') - tag = u'tag:yaml.org,2002:binary' - style = '|' - return self.represent_scalar(tag, data, style=style) - - def represent_unicode(self, data): - tag = None - try: - data.encode('ascii') - tag = u'tag:yaml.org,2002:python/unicode' - except UnicodeEncodeError: - tag = u'tag:yaml.org,2002:str' - return self.represent_scalar(tag, data) - - def represent_long(self, data): - tag = u'tag:yaml.org,2002:int' - if int(data) is not data: - tag = u'tag:yaml.org,2002:python/long' - return self.represent_scalar(tag, to_unicode(data)) - - def represent_complex(self, data): - if data.imag == 0.0: - data = u'%r' % data.real - elif data.real == 0.0: - data = u'%rj' % data.imag - elif data.imag > 0: - data = u'%r+%rj' % (data.real, data.imag) - else: - data = u'%r%rj' % (data.real, data.imag) - return self.represent_scalar(u'tag:yaml.org,2002:python/complex', data) - - def represent_tuple(self, data): - return self.represent_sequence(u'tag:yaml.org,2002:python/tuple', data) - - def represent_name(self, data): - name = u'%s.%s' % (data.__module__, data.__name__) - return self.represent_scalar(u'tag:yaml.org,2002:python/name:' + - name, u'') - - def represent_module(self, data): - return self.represent_scalar( - u'tag:yaml.org,2002:python/module:'+data.__name__, u'') - - if PY2: - def represent_instance(self, data): - # For instances of classic classes, we use __getinitargs__ and - # __getstate__ to serialize the data. - - # If data.__getinitargs__ exists, the object must be reconstructed - # by calling cls(**args), where args is a tuple returned by - # __getinitargs__. Otherwise, the cls.__init__ method should never - # be called and the class instance is created by instantiating a - # trivial class and assigning to the instance's __class__ variable. - - # If data.__getstate__ exists, it returns the state of the object. - # Otherwise, the state of the object is data.__dict__. - - # We produce either a !!python/object or !!python/object/new node. - # If data.__getinitargs__ does not exist and state is a dictionary, - # we produce a !!python/object node . Otherwise we produce a - # !!python/object/new node. - - cls = data.__class__ - class_name = u'%s.%s' % (cls.__module__, cls.__name__) - args = None - state = None - if hasattr(data, '__getinitargs__'): - args = list(data.__getinitargs__()) - if hasattr(data, '__getstate__'): - state = data.__getstate__() - else: - state = data.__dict__ - if args is None and isinstance(state, dict): - return self.represent_mapping( - u'tag:yaml.org,2002:python/object:'+class_name, state) - if isinstance(state, dict) and not state: - return self.represent_sequence( - u'tag:yaml.org,2002:python/object/new:' + - class_name, args) - value = {} - if args: - value['args'] = args - value['state'] = state - return self.represent_mapping( - u'tag:yaml.org,2002:python/object/new:'+class_name, value) - - def represent_object(self, data): - # We use __reduce__ API to save the data. data.__reduce__ returns - # a tuple of length 2-5: - # (function, args, state, listitems, dictitems) - - # For reconstructing, we calls function(*args), then set its state, - # listitems, and dictitems if they are not None. - - # A special case is when function.__name__ == '__newobj__'. In this - # case we create the object with args[0].__new__(*args). - - # Another special case is when __reduce__ returns a string - we don't - # support it. - - # We produce a !!python/object, !!python/object/new or - # !!python/object/apply node. - - cls = type(data) - if cls in copyreg.dispatch_table: - reduce = copyreg.dispatch_table[cls](data) - elif hasattr(data, '__reduce_ex__'): - reduce = data.__reduce_ex__(2) - elif hasattr(data, '__reduce__'): - reduce = data.__reduce__() - else: - raise RepresenterError("cannot represent object: %r" % data) - reduce = (list(reduce)+[None]*5)[:5] - function, args, state, listitems, dictitems = reduce - args = list(args) - if state is None: - state = {} - if listitems is not None: - listitems = list(listitems) - if dictitems is not None: - dictitems = dict(dictitems) - if function.__name__ == '__newobj__': - function = args[0] - args = args[1:] - tag = u'tag:yaml.org,2002:python/object/new:' - newobj = True - else: - tag = u'tag:yaml.org,2002:python/object/apply:' - newobj = False - function_name = u'%s.%s' % (function.__module__, function.__name__) - if not args and not listitems and not dictitems \ - and isinstance(state, dict) and newobj: - return self.represent_mapping( - u'tag:yaml.org,2002:python/object:'+function_name, state) - if not listitems and not dictitems \ - and isinstance(state, dict) and not state: - return self.represent_sequence(tag+function_name, args) - value = {} - if args: - value['args'] = args - if state or not isinstance(state, dict): - value['state'] = state - if listitems: - value['listitems'] = listitems - if dictitems: - value['dictitems'] = dictitems - return self.represent_mapping(tag+function_name, value) - -if PY2: - Representer.add_representer(str, - Representer.represent_str) - - Representer.add_representer(unicode, - Representer.represent_unicode) - - Representer.add_representer(long, - Representer.represent_long) - -Representer.add_representer(complex, - Representer.represent_complex) - -Representer.add_representer(tuple, - Representer.represent_tuple) - -Representer.add_representer(type, - Representer.represent_name) - -if PY2: - Representer.add_representer(types.ClassType, - Representer.represent_name) - -Representer.add_representer(types.FunctionType, - Representer.represent_name) - -Representer.add_representer(types.BuiltinFunctionType, - Representer.represent_name) - -Representer.add_representer(types.ModuleType, - Representer.represent_module) - -if PY2: - Representer.add_multi_representer(types.InstanceType, - Representer.represent_instance) - -Representer.add_multi_representer(object, - Representer.represent_object) - - -from .comments import CommentedMap, CommentedOrderedMap, CommentedSeq, \ - CommentedSet, comment_attrib, merge_attrib - - -class RoundTripRepresenter(SafeRepresenter): - # need to add type here and write out the .comment - # in serializer and emitter - - def __init__(self, default_style=None, default_flow_style=None): - if default_flow_style is None: - default_flow_style = False - SafeRepresenter.__init__(self, default_style=default_style, - default_flow_style=default_flow_style) - - def represent_none(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:null', - u'') - - def represent_preserved_scalarstring(self, data): - tag = None - style = '|' - if PY2 and not isinstance(data, unicode): - data = unicode(data, 'ascii') - tag = u'tag:yaml.org,2002:str' - return self.represent_scalar(tag, data, style=style) - - def represent_sequence(self, tag, sequence, flow_style=None): - value = [] - # if the flow_style is None, the flow style tacked on to the object - # explicitly will be taken. If that is None as well the default flow - # style rules - try: - flow_style = sequence.fa.flow_style(flow_style) - except AttributeError: - flow_style = flow_style - try: - anchor = sequence.yaml_anchor() - except AttributeError: - anchor = None - node = SequenceNode(tag, value, flow_style=flow_style, anchor=anchor) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = True - try: - comment = getattr(sequence, comment_attrib) - item_comments = comment.items - node.comment = comment.comment - try: - node.comment.append(comment.end) - except AttributeError: - pass - except AttributeError: - item_comments = {} - for idx, item in enumerate(sequence): - node_item = self.represent_data(item) - node_item.comment = item_comments.get(idx) - if not (isinstance(node_item, ScalarNode) and not node_item.style): - best_style = False - value.append(node_item) - if flow_style is None: - if self.default_flow_style is not None: - node.flow_style = self.default_flow_style - else: - node.flow_style = best_style - return node - - def represent_mapping(self, tag, mapping, flow_style=None): - value = [] - try: - flow_style = mapping.fa.flow_style(flow_style) - except AttributeError: - flow_style = flow_style - try: - anchor = mapping.yaml_anchor() - except AttributeError: - anchor = None - node = MappingNode(tag, value, flow_style=flow_style, anchor=anchor) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = True - # no sorting! !! - try: - comment = getattr(mapping, comment_attrib) - node.comment = comment.comment - item_comments = comment.items - try: - node.comment.append(comment.end) - except AttributeError: - pass - except AttributeError: - item_comments = {} - for item_key, item_value in mapping.items(): - node_key = self.represent_key(item_key) - node_value = self.represent_data(item_value) - item_comment = item_comments.get(item_key) - if item_comment: - assert getattr(node_key, 'comment', None) is None - node_key.comment = item_comment[:2] - nvc = getattr(node_value, 'comment', None) - if nvc is not None: # end comment already there - nvc[0] = item_comment[2] - nvc[1] = item_comment[3] - else: - node_value.comment = item_comment[2:] - if not (isinstance(node_key, ScalarNode) and not node_key.style): - best_style = False - if not (isinstance(node_value, ScalarNode) and not - node_value.style): - best_style = False - value.append((node_key, node_value)) - if flow_style is None: - if self.default_flow_style is not None: - node.flow_style = self.default_flow_style - else: - node.flow_style = best_style - merge_list = [m[1] for m in getattr(mapping, merge_attrib, [])] - if merge_list: - # because of the call to represent_data here, the anchors - # are marked as being used and thereby created - if len(merge_list) == 1: - arg = self.represent_data(merge_list[0]) - else: - arg = self.represent_data(merge_list) - arg.flow_style = True - value.insert(0, - (ScalarNode(u'tag:yaml.org,2002:merge', '<<'), arg)) - return node - - def represent_omap(self, tag, omap, flow_style=None): - value = [] - try: - flow_style = omap.fa.flow_style(flow_style) - except AttributeError: - flow_style = flow_style - try: - anchor = omap.yaml_anchor() - except AttributeError: - anchor = None - node = SequenceNode(tag, value, flow_style=flow_style, anchor=anchor) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = True - try: - comment = getattr(omap, comment_attrib) - node.comment = comment.comment - item_comments = comment.items - try: - node.comment.append(comment.end) - except AttributeError: - pass - except AttributeError: - item_comments = {} - for item_key in omap: - item_val = omap[item_key] - node_item = self.represent_data({item_key: item_val}) - # node item has two scalars in value: node_key and node_value - item_comment = item_comments.get(item_key) - if item_comment: - if item_comment[1]: - node_item.comment = [None, item_comment[1]] - assert getattr(node_item.value[0][0], 'comment', None) is None - node_item.value[0][0].comment = [item_comment[0], None] - nvc = getattr(node_item.value[0][1], 'comment', None) - if nvc is not None: # end comment already there - nvc[0] = item_comment[2] - nvc[1] = item_comment[3] - else: - node_item.value[0][1].comment = item_comment[2:] - # if not (isinstance(node_item, ScalarNode) \ - # and not node_item.style): - # best_style = False - value.append(node_item) - if flow_style is None: - if self.default_flow_style is not None: - node.flow_style = self.default_flow_style - else: - node.flow_style = best_style - return node - - def represent_set(self, setting): - flow_style = False - tag = u'tag:yaml.org,2002:set' - # return self.represent_mapping(tag, value) - value = [] - flow_style = setting.fa.flow_style(flow_style) - try: - anchor = setting.yaml_anchor() - except AttributeError: - anchor = None - node = MappingNode(tag, value, flow_style=flow_style, anchor=anchor) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = True - # no sorting! !! - try: - comment = getattr(setting, comment_attrib) - node.comment = comment.comment - item_comments = comment.items - try: - node.comment.append(comment.end) - except AttributeError: - pass - except AttributeError: - item_comments = {} - for item_key in setting.odict: - node_key = self.represent_key(item_key) - node_value = self.represent_data(None) - item_comment = item_comments.get(item_key) - if item_comment: - assert getattr(node_key, 'comment', None) is None - node_key.comment = item_comment[:2] - node_key.style = node_value.style = "?" - if not (isinstance(node_key, ScalarNode) and not node_key.style): - best_style = False - if not (isinstance(node_value, ScalarNode) and not - node_value.style): - best_style = False - value.append((node_key, node_value)) - return node - - -RoundTripRepresenter.add_representer(type(None), - RoundTripRepresenter.represent_none) - -RoundTripRepresenter.add_representer( - PreservedScalarString, - RoundTripRepresenter.represent_preserved_scalarstring) - -RoundTripRepresenter.add_representer(CommentedSeq, - RoundTripRepresenter.represent_list) - -RoundTripRepresenter.add_representer(CommentedMap, - RoundTripRepresenter.represent_dict) - -RoundTripRepresenter.add_representer( - CommentedOrderedMap, - RoundTripRepresenter.represent_ordereddict) - -RoundTripRepresenter.add_representer(CommentedSet, - RoundTripRepresenter.represent_set) diff --git a/py/resolver.py b/py/resolver.py deleted file mode 100644 index d385b0e..0000000 --- a/py/resolver.py +++ /dev/null @@ -1,233 +0,0 @@ -from __future__ import absolute_import - -__all__ = ['BaseResolver', 'Resolver'] - -from .error import * -from .nodes import * -from .compat import string_types - -import re - - -class ResolverError(YAMLError): - pass - - -class BaseResolver(object): - - DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str' - DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq' - DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map' - - yaml_implicit_resolvers = {} - yaml_path_resolvers = {} - - def __init__(self): - self.resolver_exact_paths = [] - self.resolver_prefix_paths = [] - - @classmethod - def add_implicit_resolver(cls, tag, regexp, first): - if 'yaml_implicit_resolvers' not in cls.__dict__: - cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy() - if first is None: - first = [None] - for ch in first: - cls.yaml_implicit_resolvers.setdefault(ch, []).append( - (tag, regexp)) - - @classmethod - def add_path_resolver(cls, tag, path, kind=None): - # Note: `add_path_resolver` is experimental. The API could be changed. - # `new_path` is a pattern that is matched against the path from the - # root to the node that is being considered. `node_path` elements are - # tuples `(node_check, index_check)`. `node_check` is a node class: - # `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None` - # matches any kind of a node. `index_check` could be `None`, a boolean - # value, a string value, or a number. `None` and `False` match against - # any _value_ of sequence and mapping nodes. `True` matches against - # any _key_ of a mapping node. A string `index_check` matches against - # a mapping value that corresponds to a scalar key which content is - # equal to the `index_check` value. An integer `index_check` matches - # against a sequence value with the index equal to `index_check`. - if 'yaml_path_resolvers' not in cls.__dict__: - cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy() - new_path = [] - for element in path: - if isinstance(element, (list, tuple)): - if len(element) == 2: - node_check, index_check = element - elif len(element) == 1: - node_check = element[0] - index_check = True - else: - raise ResolverError("Invalid path element: %s" % element) - else: - node_check = None - index_check = element - if node_check is str: - node_check = ScalarNode - elif node_check is list: - node_check = SequenceNode - elif node_check is dict: - node_check = MappingNode - elif node_check not in [ScalarNode, SequenceNode, MappingNode] \ - and not isinstance(node_check, string_types) \ - and node_check is not None: - raise ResolverError("Invalid node checker: %s" % node_check) - if not isinstance(index_check, (string_types, int)) \ - and index_check is not None: - raise ResolverError("Invalid index checker: %s" % index_check) - new_path.append((node_check, index_check)) - if kind is str: - kind = ScalarNode - elif kind is list: - kind = SequenceNode - elif kind is dict: - kind = MappingNode - elif kind not in [ScalarNode, SequenceNode, MappingNode] \ - and kind is not None: - raise ResolverError("Invalid node kind: %s" % kind) - cls.yaml_path_resolvers[tuple(new_path), kind] = tag - - def descend_resolver(self, current_node, current_index): - if not self.yaml_path_resolvers: - return - exact_paths = {} - prefix_paths = [] - if current_node: - depth = len(self.resolver_prefix_paths) - for path, kind in self.resolver_prefix_paths[-1]: - if self.check_resolver_prefix(depth, path, kind, - current_node, current_index): - if len(path) > depth: - prefix_paths.append((path, kind)) - else: - exact_paths[kind] = self.yaml_path_resolvers[path, - kind] - else: - for path, kind in self.yaml_path_resolvers: - if not path: - exact_paths[kind] = self.yaml_path_resolvers[path, kind] - else: - prefix_paths.append((path, kind)) - self.resolver_exact_paths.append(exact_paths) - self.resolver_prefix_paths.append(prefix_paths) - - def ascend_resolver(self): - if not self.yaml_path_resolvers: - return - self.resolver_exact_paths.pop() - self.resolver_prefix_paths.pop() - - def check_resolver_prefix(self, depth, path, kind, - current_node, current_index): - node_check, index_check = path[depth-1] - if isinstance(node_check, string_types): - if current_node.tag != node_check: - return - elif node_check is not None: - if not isinstance(current_node, node_check): - return - if index_check is True and current_index is not None: - return - if (index_check is False or index_check is None) \ - and current_index is None: - return - if isinstance(index_check, string_types): - if not (isinstance(current_index, ScalarNode) - and index_check == current_index.value): - return - elif isinstance(index_check, int) and not isinstance(index_check, - bool): - if index_check != current_index: - return - return True - - def resolve(self, kind, value, implicit): - if kind is ScalarNode and implicit[0]: - if value == u'': - resolvers = self.yaml_implicit_resolvers.get(u'', []) - else: - resolvers = self.yaml_implicit_resolvers.get(value[0], []) - resolvers += self.yaml_implicit_resolvers.get(None, []) - for tag, regexp in resolvers: - if regexp.match(value): - return tag - implicit = implicit[1] - if self.yaml_path_resolvers: - exact_paths = self.resolver_exact_paths[-1] - if kind in exact_paths: - return exact_paths[kind] - if None in exact_paths: - return exact_paths[None] - if kind is ScalarNode: - return self.DEFAULT_SCALAR_TAG - elif kind is SequenceNode: - return self.DEFAULT_SEQUENCE_TAG - elif kind is MappingNode: - return self.DEFAULT_MAPPING_TAG - - -class Resolver(BaseResolver): - pass - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:bool', - re.compile(u'''^(?:yes|Yes|YES|no|No|NO - |true|True|TRUE|false|False|FALSE - |on|On|ON|off|Off|OFF)$''', re.X), - list(u'yYnNtTfFoO')) - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:float', - re.compile(u'''^(?: - [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)? - |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+) - |\\.[0-9_]+(?:[eE][-+][0-9]+)? - |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]* - |[-+]?\\.(?:inf|Inf|INF) - |\\.(?:nan|NaN|NAN))$''', re.X), - list(u'-+0123456789.')) - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:int', - re.compile(u'''^(?:[-+]?0b[0-1_]+ - |[-+]?0o?[0-7_]+ - |[-+]?(?:0|[1-9][0-9_]*) - |[-+]?0x[0-9a-fA-F_]+ - |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X), - list(u'-+0123456789')) - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:merge', - re.compile(u'^(?:<<)$'), - [u'<']) - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:null', - re.compile(u'''^(?: ~ - |null|Null|NULL - | )$''', re.X), - [u'~', u'n', u'N', u'']) - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:timestamp', - re.compile(u'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] - |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? - (?:[Tt]|[ \\t]+)[0-9][0-9]? - :[0-9][0-9] :[0-9][0-9] (?:\\.[0-9]*)? - (?:[ \\t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X), - list(u'0123456789')) - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:value', - re.compile(u'^(?:=)$'), - [u'=']) - -# The following resolver is only for documentation purposes. It cannot work -# because plain scalars cannot start with '!', '&', or '*'. -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:yaml', - re.compile(u'^(?:!|&|\\*)$'), - list(u'!&*')) diff --git a/py/scalarstring.py b/py/scalarstring.py deleted file mode 100644 index a628396..0000000 --- a/py/scalarstring.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import absolute_import -from __future__ import print_function - -__all__ = ["ScalarString", "PreservedScalarString"] - -from .compat import text_type - - -class ScalarString(text_type): - def __new__(cls, *args, **kw): - return text_type.__new__(cls, *args, **kw) - - -class PreservedScalarString(ScalarString): - def __new__(cls, value): - return ScalarString.__new__(cls, value) - -def preserve_literal(s): - return PreservedScalarString(s.replace('\r\n', '\n').replace('\r', '\n')) - - -def walk_tree(base): - """ - the routine here walks over a simple yaml tree (recursing in - dict values and list items) and converts strings that - have multiple lines to literal scalars - """ - from ruamel.yaml.compat import string_types - - - if isinstance(base, dict): - for k in base: - v = base[k] - if isinstance(v, string_types) and '\n' in v: - base[k] = preserve_literal(v) - else: - walk_tree(v) - elif isinstance(base, list): - for idx, elem in enumerate(base): - if isinstance(elem, string_types) and '\n' in elem: - print(elem) - base[idx] = preserve_literal(elem) - else: - walk_tree(elem) diff --git a/py/scanner.py b/py/scanner.py deleted file mode 100644 index 24a13a8..0000000 --- a/py/scanner.py +++ /dev/null @@ -1,1655 +0,0 @@ -from __future__ import absolute_import -from __future__ import print_function - -# Scanner produces tokens of the following types: -# STREAM-START -# STREAM-END -# DIRECTIVE(name, value) -# DOCUMENT-START -# DOCUMENT-END -# BLOCK-SEQUENCE-START -# BLOCK-MAPPING-START -# BLOCK-END -# FLOW-SEQUENCE-START -# FLOW-MAPPING-START -# FLOW-SEQUENCE-END -# FLOW-MAPPING-END -# BLOCK-ENTRY -# FLOW-ENTRY -# KEY -# VALUE -# ALIAS(value) -# ANCHOR(value) -# TAG(value) -# SCALAR(value, plain, style) -# -# RoundTripScanner -# COMMENT(value) -# -# Read comments in the Scanner code for more details. -# - -__all__ = ['Scanner', 'RoundTripScanner', 'ScannerError'] - -from .error import MarkedYAMLError -from .tokens import * -from .compat import utf8, unichr, PY3 - - -class ScannerError(MarkedYAMLError): - pass - - -class SimpleKey(object): - # See below simple keys treatment. - - def __init__(self, token_number, required, index, line, column, mark): - self.token_number = token_number - self.required = required - self.index = index - self.line = line - self.column = column - self.mark = mark - - -class Scanner(object): - - def __init__(self): - """Initialize the scanner.""" - # It is assumed that Scanner and Reader will have a common descendant. - # Reader do the dirty work of checking for BOM and converting the - # input data to Unicode. It also adds NUL to the end. - # - # Reader supports the following methods - # self.peek(i=0) # peek the next i-th character - # self.prefix(l=1) # peek the next l characters - # self.forward(l=1) # read the next l characters and move the pointer - - # Had we reached the end of the stream? - self.done = False - - # The number of unclosed '{' and '['. `flow_level == 0` means block - # context. - self.flow_level = 0 - - # List of processed tokens that are not yet emitted. - self.tokens = [] - - # Add the STREAM-START token. - self.fetch_stream_start() - - # Number of tokens that were emitted through the `get_token` method. - self.tokens_taken = 0 - - # The current indentation level. - self.indent = -1 - - # Past indentation levels. - self.indents = [] - - # Variables related to simple keys treatment. - - # A simple key is a key that is not denoted by the '?' indicator. - # Example of simple keys: - # --- - # block simple key: value - # ? not a simple key: - # : { flow simple key: value } - # We emit the KEY token before all keys, so when we find a potential - # simple key, we try to locate the corresponding ':' indicator. - # Simple keys should be limited to a single line and 1024 characters. - - # Can a simple key start at the current position? A simple key may - # start: - # - at the beginning of the line, not counting indentation spaces - # (in block context), - # - after '{', '[', ',' (in the flow context), - # - after '?', ':', '-' (in the block context). - # In the block context, this flag also signifies if a block collection - # may start at the current position. - self.allow_simple_key = True - - # Keep track of possible simple keys. This is a dictionary. The key - # is `flow_level`; there can be no more that one possible simple key - # for each level. The value is a SimpleKey record: - # (token_number, required, index, line, column, mark) - # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), - # '[', or '{' tokens. - self.possible_simple_keys = {} - - # Public methods. - - def check_token(self, *choices): - # Check if the next token is one of the given types. - while self.need_more_tokens(): - self.fetch_more_tokens() - if self.tokens: - if not choices: - return True - for choice in choices: - if isinstance(self.tokens[0], choice): - return True - return False - - def peek_token(self): - # Return the next token, but do not delete if from the queue. - while self.need_more_tokens(): - self.fetch_more_tokens() - if self.tokens: - return self.tokens[0] - - def get_token(self): - # Return the next token. - while self.need_more_tokens(): - self.fetch_more_tokens() - if self.tokens: - self.tokens_taken += 1 - return self.tokens.pop(0) - - # Private methods. - - def need_more_tokens(self): - if self.done: - return False - if not self.tokens: - return True - # The current token may be a potential simple key, so we - # need to look further. - self.stale_possible_simple_keys() - if self.next_possible_simple_key() == self.tokens_taken: - return True - - def fetch_more_tokens(self): - - # Eat whitespaces and comments until we reach the next token. - comment = self.scan_to_next_token() - - if comment is not None: # never happens for base scanner - return self.fetch_comment(comment) - - # Remove obsolete possible simple keys. - self.stale_possible_simple_keys() - - # Compare the current indentation and column. It may add some tokens - # and decrease the current indentation level. - self.unwind_indent(self.column) - - # Peek the next character. - ch = self.peek() - - # Is it the end of stream? - if ch == u'\0': - return self.fetch_stream_end() - - # Is it a directive? - if ch == u'%' and self.check_directive(): - return self.fetch_directive() - - # Is it the document start? - if ch == u'-' and self.check_document_start(): - return self.fetch_document_start() - - # Is it the document end? - if ch == u'.' and self.check_document_end(): - return self.fetch_document_end() - - # TODO: support for BOM within a stream. - # if ch == u'\uFEFF': - # return self.fetch_bom() <-- issue BOMToken - - # Note: the order of the following checks is NOT significant. - - # Is it the flow sequence start indicator? - if ch == u'[': - return self.fetch_flow_sequence_start() - - # Is it the flow mapping start indicator? - if ch == u'{': - return self.fetch_flow_mapping_start() - - # Is it the flow sequence end indicator? - if ch == u']': - return self.fetch_flow_sequence_end() - - # Is it the flow mapping end indicator? - if ch == u'}': - return self.fetch_flow_mapping_end() - - # Is it the flow entry indicator? - if ch == u',': - return self.fetch_flow_entry() - - # Is it the block entry indicator? - if ch == u'-' and self.check_block_entry(): - return self.fetch_block_entry() - - # Is it the key indicator? - if ch == u'?' and self.check_key(): - return self.fetch_key() - - # Is it the value indicator? - if ch == u':' and self.check_value(): - return self.fetch_value() - - # Is it an alias? - if ch == u'*': - return self.fetch_alias() - - # Is it an anchor? - if ch == u'&': - return self.fetch_anchor() - - # Is it a tag? - if ch == u'!': - return self.fetch_tag() - - # Is it a literal scalar? - if ch == u'|' and not self.flow_level: - return self.fetch_literal() - - # Is it a folded scalar? - if ch == u'>' and not self.flow_level: - return self.fetch_folded() - - # Is it a single quoted scalar? - if ch == u'\'': - return self.fetch_single() - - # Is it a double quoted scalar? - if ch == u'\"': - return self.fetch_double() - - # It must be a plain scalar then. - if self.check_plain(): - return self.fetch_plain() - - # No? It's an error. Let's produce a nice error message. - raise ScannerError("while scanning for the next token", None, - "found character %r that cannot start any token" - % utf8(ch), self.get_mark()) - - # Simple keys treatment. - - def next_possible_simple_key(self): - # Return the number of the nearest possible simple key. Actually we - # don't need to loop through the whole dictionary. We may replace it - # with the following code: - # if not self.possible_simple_keys: - # return None - # return self.possible_simple_keys[ - # min(self.possible_simple_keys.keys())].token_number - min_token_number = None - for level in self.possible_simple_keys: - key = self.possible_simple_keys[level] - if min_token_number is None or key.token_number < min_token_number: - min_token_number = key.token_number - return min_token_number - - def stale_possible_simple_keys(self): - # Remove entries that are no longer possible simple keys. According to - # the YAML specification, simple keys - # - should be limited to a single line, - # - should be no longer than 1024 characters. - # Disabling this procedure will allow simple keys of any length and - # height (may cause problems if indentation is broken though). - for level in list(self.possible_simple_keys): - key = self.possible_simple_keys[level] - if key.line != self.line \ - or self.index-key.index > 1024: - if key.required: - raise ScannerError( - "while scanning a simple key", key.mark, - "could not find expected ':'", self.get_mark()) - del self.possible_simple_keys[level] - - def save_possible_simple_key(self): - # The next token may start a simple key. We check if it's possible - # and save its position. This function is called for - # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. - - # Check if a simple key is required at the current position. - required = not self.flow_level and self.indent == self.column - - # The next token might be a simple key. Let's save it's number and - # position. - if self.allow_simple_key: - self.remove_possible_simple_key() - token_number = self.tokens_taken+len(self.tokens) - key = SimpleKey( - token_number, required, - self.index, self.line, self.column, self.get_mark()) - self.possible_simple_keys[self.flow_level] = key - - def remove_possible_simple_key(self): - # Remove the saved possible key position at the current flow level. - if self.flow_level in self.possible_simple_keys: - key = self.possible_simple_keys[self.flow_level] - - if key.required: - raise ScannerError( - "while scanning a simple key", key.mark, - "could not find expected ':'", self.get_mark()) - - del self.possible_simple_keys[self.flow_level] - - # Indentation functions. - - def unwind_indent(self, column): - - # In flow context, tokens should respect indentation. - # Actually the condition should be `self.indent >= column` according to - # the spec. But this condition will prohibit intuitively correct - # constructions such as - # key : { - # } - # #### - # if self.flow_level and self.indent > column: - # raise ScannerError(None, None, - # "invalid intendation or unclosed '[' or '{'", - # self.get_mark()) - - # In the flow context, indentation is ignored. We make the scanner less - # restrictive then specification requires. - if self.flow_level: - return - - # In block context, we may need to issue the BLOCK-END tokens. - while self.indent > column: - mark = self.get_mark() - self.indent = self.indents.pop() - self.tokens.append(BlockEndToken(mark, mark)) - - def add_indent(self, column): - # Check if we need to increase indentation. - if self.indent < column: - self.indents.append(self.indent) - self.indent = column - return True - return False - - # Fetchers. - - def fetch_stream_start(self): - # We always add STREAM-START as the first token and STREAM-END as the - # last token. - - # Read the token. - mark = self.get_mark() - - # Add STREAM-START. - self.tokens.append(StreamStartToken(mark, mark, - encoding=self.encoding)) - - def fetch_stream_end(self): - - # Set the current intendation to -1. - self.unwind_indent(-1) - - # Reset simple keys. - self.remove_possible_simple_key() - self.allow_simple_key = False - self.possible_simple_keys = {} - - # Read the token. - mark = self.get_mark() - - # Add STREAM-END. - self.tokens.append(StreamEndToken(mark, mark)) - - # The steam is finished. - self.done = True - - def fetch_directive(self): - - # Set the current intendation to -1. - self.unwind_indent(-1) - - # Reset simple keys. - self.remove_possible_simple_key() - self.allow_simple_key = False - - # Scan and add DIRECTIVE. - self.tokens.append(self.scan_directive()) - - def fetch_document_start(self): - self.fetch_document_indicator(DocumentStartToken) - - def fetch_document_end(self): - self.fetch_document_indicator(DocumentEndToken) - - def fetch_document_indicator(self, TokenClass): - - # Set the current intendation to -1. - self.unwind_indent(-1) - - # Reset simple keys. Note that there could not be a block collection - # after '---'. - self.remove_possible_simple_key() - self.allow_simple_key = False - - # Add DOCUMENT-START or DOCUMENT-END. - start_mark = self.get_mark() - self.forward(3) - end_mark = self.get_mark() - self.tokens.append(TokenClass(start_mark, end_mark)) - - def fetch_flow_sequence_start(self): - self.fetch_flow_collection_start(FlowSequenceStartToken) - - def fetch_flow_mapping_start(self): - self.fetch_flow_collection_start(FlowMappingStartToken) - - def fetch_flow_collection_start(self, TokenClass): - - # '[' and '{' may start a simple key. - self.save_possible_simple_key() - - # Increase the flow level. - self.flow_level += 1 - - # Simple keys are allowed after '[' and '{'. - self.allow_simple_key = True - - # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(TokenClass(start_mark, end_mark)) - - def fetch_flow_sequence_end(self): - self.fetch_flow_collection_end(FlowSequenceEndToken) - - def fetch_flow_mapping_end(self): - self.fetch_flow_collection_end(FlowMappingEndToken) - - def fetch_flow_collection_end(self, TokenClass): - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Decrease the flow level. - self.flow_level -= 1 - - # No simple keys after ']' or '}'. - self.allow_simple_key = False - - # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(TokenClass(start_mark, end_mark)) - - def fetch_flow_entry(self): - - # Simple keys are allowed after ','. - self.allow_simple_key = True - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add FLOW-ENTRY. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(FlowEntryToken(start_mark, end_mark)) - - def fetch_block_entry(self): - - # Block context needs additional checks. - if not self.flow_level: - - # Are we allowed to start a new entry? - if not self.allow_simple_key: - raise ScannerError(None, None, - "sequence entries are not allowed here", - self.get_mark()) - - # We may need to add BLOCK-SEQUENCE-START. - if self.add_indent(self.column): - mark = self.get_mark() - self.tokens.append(BlockSequenceStartToken(mark, mark)) - - # It's an error for the block entry to occur in the flow context, - # but we let the parser detect this. - else: - pass - - # Simple keys are allowed after '-'. - self.allow_simple_key = True - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add BLOCK-ENTRY. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(BlockEntryToken(start_mark, end_mark)) - - def fetch_key(self): - - # Block context needs additional checks. - if not self.flow_level: - - # Are we allowed to start a key (not nessesary a simple)? - if not self.allow_simple_key: - raise ScannerError(None, None, - "mapping keys are not allowed here", - self.get_mark()) - - # We may need to add BLOCK-MAPPING-START. - if self.add_indent(self.column): - mark = self.get_mark() - self.tokens.append(BlockMappingStartToken(mark, mark)) - - # Simple keys are allowed after '?' in the block context. - self.allow_simple_key = not self.flow_level - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add KEY. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(KeyToken(start_mark, end_mark)) - - def fetch_value(self): - - # Do we determine a simple key? - if self.flow_level in self.possible_simple_keys: - # Add KEY. - key = self.possible_simple_keys[self.flow_level] - del self.possible_simple_keys[self.flow_level] - self.tokens.insert(key.token_number-self.tokens_taken, - KeyToken(key.mark, key.mark)) - - # If this key starts a new block mapping, we need to add - # BLOCK-MAPPING-START. - if not self.flow_level: - if self.add_indent(key.column): - self.tokens.insert( - key.token_number-self.tokens_taken, - BlockMappingStartToken(key.mark, key.mark)) - - # There cannot be two simple keys one after another. - self.allow_simple_key = False - - # It must be a part of a complex key. - else: - - # Block context needs additional checks. - # (Do we really need them? They will be catched by the parser - # anyway.) - if not self.flow_level: - - # We are allowed to start a complex value if and only if - # we can start a simple key. - if not self.allow_simple_key: - raise ScannerError(None, None, - "mapping values are not allowed here", - self.get_mark()) - - # If this value starts a new block mapping, we need to add - # BLOCK-MAPPING-START. It will be detected as an error later by - # the parser. - if not self.flow_level: - if self.add_indent(self.column): - mark = self.get_mark() - self.tokens.append(BlockMappingStartToken(mark, mark)) - - # Simple keys are allowed after ':' in the block context. - self.allow_simple_key = not self.flow_level - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add VALUE. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(ValueToken(start_mark, end_mark)) - - def fetch_alias(self): - - # ALIAS could be a simple key. - self.save_possible_simple_key() - - # No simple keys after ALIAS. - self.allow_simple_key = False - - # Scan and add ALIAS. - self.tokens.append(self.scan_anchor(AliasToken)) - - def fetch_anchor(self): - - # ANCHOR could start a simple key. - self.save_possible_simple_key() - - # No simple keys after ANCHOR. - self.allow_simple_key = False - - # Scan and add ANCHOR. - self.tokens.append(self.scan_anchor(AnchorToken)) - - def fetch_tag(self): - - # TAG could start a simple key. - self.save_possible_simple_key() - - # No simple keys after TAG. - self.allow_simple_key = False - - # Scan and add TAG. - self.tokens.append(self.scan_tag()) - - def fetch_literal(self): - self.fetch_block_scalar(style='|') - - def fetch_folded(self): - self.fetch_block_scalar(style='>') - - def fetch_block_scalar(self, style): - - # A simple key may follow a block scalar. - self.allow_simple_key = True - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Scan and add SCALAR. - self.tokens.append(self.scan_block_scalar(style)) - - def fetch_single(self): - self.fetch_flow_scalar(style='\'') - - def fetch_double(self): - self.fetch_flow_scalar(style='"') - - def fetch_flow_scalar(self, style): - - # A flow scalar could be a simple key. - self.save_possible_simple_key() - - # No simple keys after flow scalars. - self.allow_simple_key = False - - # Scan and add SCALAR. - self.tokens.append(self.scan_flow_scalar(style)) - - def fetch_plain(self): - - # A plain scalar could be a simple key. - self.save_possible_simple_key() - - # No simple keys after plain scalars. But note that `scan_plain` will - # change this flag if the scan is finished at the beginning of the - # line. - self.allow_simple_key = False - - # Scan and add SCALAR. May change `allow_simple_key`. - self.tokens.append(self.scan_plain()) - - # Checkers. - - def check_directive(self): - - # DIRECTIVE: ^ '%' ... - # The '%' indicator is already checked. - if self.column == 0: - return True - - def check_document_start(self): - - # DOCUMENT-START: ^ '---' (' '|'\n') - if self.column == 0: - if self.prefix(3) == u'---' \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': - return True - - def check_document_end(self): - - # DOCUMENT-END: ^ '...' (' '|'\n') - if self.column == 0: - if self.prefix(3) == u'...' \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': - return True - - def check_block_entry(self): - - # BLOCK-ENTRY: '-' (' '|'\n') - return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' - - def check_key(self): - - # KEY(flow context): '?' - if self.flow_level: - return True - - # KEY(block context): '?' (' '|'\n') - else: - return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' - - def check_value(self): - - # VALUE(flow context): ':' - if self.flow_level: - return True - - # VALUE(block context): ':' (' '|'\n') - else: - return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' - - def check_plain(self): - # A plain scalar may start with any non-space character except: - # '-', '?', ':', ',', '[', ']', '{', '}', - # '#', '&', '*', '!', '|', '>', '\'', '\"', - # '%', '@', '`'. - # - # It may also start with - # '-', '?', ':' - # if it is followed by a non-space character. - # - # Note that we limit the last rule to the block context (except the - # '-' character) because we want the flow context to be space - # independent. - ch = self.peek() - return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ - or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029' - and (ch == u'-' or (not self.flow_level and ch in u'?:'))) - - # Scanners. - - def scan_to_next_token(self): - # We ignore spaces, line breaks and comments. - # If we find a line break in the block context, we set the flag - # `allow_simple_key` on. - # The byte order mark is stripped if it's the first character in the - # stream. We do not yet support BOM inside the stream as the - # specification requires. Any such mark will be considered as a part - # of the document. - # - # TODO: We need to make tab handling rules more sane. A good rule is - # Tabs cannot precede tokens - # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, - # KEY(block), VALUE(block), BLOCK-ENTRY - # So the checking code is - # if : - # self.allow_simple_keys = False - # We also need to add the check for `allow_simple_keys == True` to - # `unwind_indent` before issuing BLOCK-END. - # Scanners for block, flow, and plain scalars need to be modified. - - if self.index == 0 and self.peek() == u'\uFEFF': - self.forward() - found = False - while not found: - while self.peek() == u' ': - self.forward() - if self.peek() == u'#': - while self.peek() not in u'\0\r\n\x85\u2028\u2029': - self.forward() - if self.scan_line_break(): - if not self.flow_level: - self.allow_simple_key = True - else: - found = True - - def scan_directive(self): - # See the specification for details. - start_mark = self.get_mark() - self.forward() - name = self.scan_directive_name(start_mark) - value = None - if name == u'YAML': - value = self.scan_yaml_directive_value(start_mark) - end_mark = self.get_mark() - elif name == u'TAG': - value = self.scan_tag_directive_value(start_mark) - end_mark = self.get_mark() - else: - end_mark = self.get_mark() - while self.peek() not in u'\0\r\n\x85\u2028\u2029': - self.forward() - self.scan_directive_ignored_line(start_mark) - return DirectiveToken(name, value, start_mark, end_mark) - - def scan_directive_name(self, start_mark): - # See the specification for details. - length = 0 - ch = self.peek(length) - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_:.': - length += 1 - ch = self.peek(length) - if not length: - raise ScannerError( - "while scanning a directive", start_mark, - "expected alphabetic or numeric character, but found %r" - % utf8(ch), self.get_mark()) - value = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError( - "while scanning a directive", start_mark, - "expected alphabetic or numeric character, but found %r" - % utf8(ch), self.get_mark()) - return value - - def scan_yaml_directive_value(self, start_mark): - # See the specification for details. - while self.peek() == u' ': - self.forward() - major = self.scan_yaml_directive_number(start_mark) - if self.peek() != '.': - raise ScannerError( - "while scanning a directive", start_mark, - "expected a digit or '.', but found %r" - % utf8(self.peek()), - self.get_mark()) - self.forward() - minor = self.scan_yaml_directive_number(start_mark) - if self.peek() not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError( - "while scanning a directive", start_mark, - "expected a digit or ' ', but found %r" - % utf8(self.peek()), - self.get_mark()) - return (major, minor) - - def scan_yaml_directive_number(self, start_mark): - # See the specification for details. - ch = self.peek() - if not (u'0' <= ch <= u'9'): - raise ScannerError( - "while scanning a directive", start_mark, - "expected a digit, but found %r" % utf8(ch), - self.get_mark()) - length = 0 - while u'0' <= self.peek(length) <= u'9': - length += 1 - value = int(self.prefix(length)) - self.forward(length) - return value - - def scan_tag_directive_value(self, start_mark): - # See the specification for details. - while self.peek() == u' ': - self.forward() - handle = self.scan_tag_directive_handle(start_mark) - while self.peek() == u' ': - self.forward() - prefix = self.scan_tag_directive_prefix(start_mark) - return (handle, prefix) - - def scan_tag_directive_handle(self, start_mark): - # See the specification for details. - value = self.scan_tag_handle('directive', start_mark) - ch = self.peek() - if ch != u' ': - raise ScannerError("while scanning a directive", start_mark, - "expected ' ', but found %r" % utf8(ch), - self.get_mark()) - return value - - def scan_tag_directive_prefix(self, start_mark): - # See the specification for details. - value = self.scan_tag_uri('directive', start_mark) - ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_mark, - "expected ' ', but found %r" % utf8(ch), - self.get_mark()) - return value - - def scan_directive_ignored_line(self, start_mark): - # See the specification for details. - while self.peek() == u' ': - self.forward() - if self.peek() == u'#': - while self.peek() not in u'\0\r\n\x85\u2028\u2029': - self.forward() - ch = self.peek() - if ch not in u'\0\r\n\x85\u2028\u2029': - raise ScannerError( - "while scanning a directive", start_mark, - "expected a comment or a line break, but found %r" - % utf8(ch), self.get_mark()) - self.scan_line_break() - - def scan_anchor(self, TokenClass): - # The specification does not restrict characters for anchors and - # aliases. This may lead to problems, for instance, the document: - # [ *alias, value ] - # can be interpteted in two ways, as - # [ "value" ] - # and - # [ *alias , "value" ] - # Therefore we restrict aliases to numbers and ASCII letters. - start_mark = self.get_mark() - indicator = self.peek() - if indicator == u'*': - name = 'alias' - else: - name = 'anchor' - self.forward() - length = 0 - ch = self.peek(length) - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_': - length += 1 - ch = self.peek(length) - if not length: - raise ScannerError( - "while scanning an %s" % name, start_mark, - "expected alphabetic or numeric character, but found %r" - % utf8(ch), self.get_mark()) - value = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`': - raise ScannerError( - "while scanning an %s" % name, start_mark, - "expected alphabetic or numeric character, but found %r" - % utf8(ch), self.get_mark()) - end_mark = self.get_mark() - return TokenClass(value, start_mark, end_mark) - - def scan_tag(self): - # See the specification for details. - start_mark = self.get_mark() - ch = self.peek(1) - if ch == u'<': - handle = None - self.forward(2) - suffix = self.scan_tag_uri('tag', start_mark) - if self.peek() != u'>': - raise ScannerError( - "while parsing a tag", start_mark, - "expected '>', but found %r" % utf8(self.peek()), - self.get_mark()) - self.forward() - elif ch in u'\0 \t\r\n\x85\u2028\u2029': - handle = None - suffix = u'!' - self.forward() - else: - length = 1 - use_handle = False - while ch not in u'\0 \r\n\x85\u2028\u2029': - if ch == u'!': - use_handle = True - break - length += 1 - ch = self.peek(length) - handle = u'!' - if use_handle: - handle = self.scan_tag_handle('tag', start_mark) - else: - handle = u'!' - self.forward() - suffix = self.scan_tag_uri('tag', start_mark) - ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a tag", start_mark, - "expected ' ', but found %r" % utf8(ch), - self.get_mark()) - value = (handle, suffix) - end_mark = self.get_mark() - return TagToken(value, start_mark, end_mark) - - def scan_block_scalar(self, style): - # See the specification for details. - - if style == '>': - folded = True - else: - folded = False - - chunks = [] - start_mark = self.get_mark() - - # Scan the header. - self.forward() - chomping, increment = self.scan_block_scalar_indicators(start_mark) - self.scan_block_scalar_ignored_line(start_mark) - - # Determine the indentation level and go to the first non-empty line. - min_indent = self.indent+1 - if min_indent < 1: - min_indent = 1 - if increment is None: - breaks, max_indent, end_mark = self.scan_block_scalar_indentation() - indent = max(min_indent, max_indent) - else: - indent = min_indent+increment-1 - breaks, end_mark = self.scan_block_scalar_breaks(indent) - line_break = u'' - - # Scan the inner part of the block scalar. - while self.column == indent and self.peek() != u'\0': - chunks.extend(breaks) - leading_non_space = self.peek() not in u' \t' - length = 0 - while self.peek(length) not in u'\0\r\n\x85\u2028\u2029': - length += 1 - chunks.append(self.prefix(length)) - self.forward(length) - line_break = self.scan_line_break() - breaks, end_mark = self.scan_block_scalar_breaks(indent) - if self.column == indent and self.peek() != u'\0': - - # Unfortunately, folding rules are ambiguous. - # - # This is the folding according to the specification: - - if folded and line_break == u'\n' \ - and leading_non_space and self.peek() not in u' \t': - if not breaks: - chunks.append(u' ') - else: - chunks.append(line_break) - - # This is Clark Evans's interpretation (also in the spec - # examples): - # - # if folded and line_break == u'\n': - # if not breaks: - # if self.peek() not in ' \t': - # chunks.append(u' ') - # else: - # chunks.append(line_break) - # else: - # chunks.append(line_break) - else: - break - - # Process trailing line breaks. The 'chomping' setting determines - # whether they are included in the value. - comment = [] - if chomping in [None, True]: - chunks.append(line_break) - if chomping is True: - chunks.extend(breaks) - elif chomping in [None, False]: - comment.extend(breaks) - - # We are done. - token = ScalarToken(u''.join(chunks), False, start_mark, end_mark, - style) - if len(comment) > 0: - # Keep track of the trailing whitespace as a comment token, if - # isn't all included in the actual value. - comment_end_mark = self.get_mark() - comment = CommentToken(''.join(comment), end_mark, - comment_end_mark) - token.add_post_comment(comment) - return token - - def scan_block_scalar_indicators(self, start_mark): - # See the specification for details. - chomping = None - increment = None - ch = self.peek() - if ch in u'+-': - if ch == '+': - chomping = True - else: - chomping = False - self.forward() - ch = self.peek() - if ch in u'0123456789': - increment = int(ch) - if increment == 0: - raise ScannerError( - "while scanning a block scalar", start_mark, - "expected indentation indicator in the range 1-9, " - "but found 0", self.get_mark()) - self.forward() - elif ch in u'0123456789': - increment = int(ch) - if increment == 0: - raise ScannerError( - "while scanning a block scalar", start_mark, - "expected indentation indicator in the range 1-9, " - "but found 0", - self.get_mark()) - self.forward() - ch = self.peek() - if ch in u'+-': - if ch == '+': - chomping = True - else: - chomping = False - self.forward() - ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError( - "while scanning a block scalar", start_mark, - "expected chomping or indentation indicators, but found %r" - % utf8(ch), self.get_mark()) - return chomping, increment - - def scan_block_scalar_ignored_line(self, start_mark): - # See the specification for details. - while self.peek() == u' ': - self.forward() - if self.peek() == u'#': - while self.peek() not in u'\0\r\n\x85\u2028\u2029': - self.forward() - ch = self.peek() - if ch not in u'\0\r\n\x85\u2028\u2029': - raise ScannerError( - "while scanning a block scalar", start_mark, - "expected a comment or a line break, but found %r" - % utf8(ch), self.get_mark()) - self.scan_line_break() - - def scan_block_scalar_indentation(self): - # See the specification for details. - chunks = [] - max_indent = 0 - end_mark = self.get_mark() - while self.peek() in u' \r\n\x85\u2028\u2029': - if self.peek() != u' ': - chunks.append(self.scan_line_break()) - end_mark = self.get_mark() - else: - self.forward() - if self.column > max_indent: - max_indent = self.column - return chunks, max_indent, end_mark - - def scan_block_scalar_breaks(self, indent): - # See the specification for details. - chunks = [] - end_mark = self.get_mark() - while self.column < indent and self.peek() == u' ': - self.forward() - while self.peek() in u'\r\n\x85\u2028\u2029': - chunks.append(self.scan_line_break()) - end_mark = self.get_mark() - while self.column < indent and self.peek() == u' ': - self.forward() - return chunks, end_mark - - def scan_flow_scalar(self, style): - # See the specification for details. - # Note that we loose indentation rules for quoted scalars. Quoted - # scalars don't need to adhere indentation because " and ' clearly - # mark the beginning and the end of them. Therefore we are less - # restrictive then the specification requires. We only need to check - # that document separators are not included in scalars. - if style == '"': - double = True - else: - double = False - chunks = [] - start_mark = self.get_mark() - quote = self.peek() - self.forward() - chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) - while self.peek() != quote: - chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) - chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) - self.forward() - end_mark = self.get_mark() - return ScalarToken(u''.join(chunks), False, start_mark, end_mark, - style) - - ESCAPE_REPLACEMENTS = { - u'0': u'\0', - u'a': u'\x07', - u'b': u'\x08', - u't': u'\x09', - u'\t': u'\x09', - u'n': u'\x0A', - u'v': u'\x0B', - u'f': u'\x0C', - u'r': u'\x0D', - u'e': u'\x1B', - u' ': u'\x20', - u'\"': u'\"', - u'/': u'/', # as per http://www.json.org/ - u'\\': u'\\', - u'N': u'\x85', - u'_': u'\xA0', - u'L': u'\u2028', - u'P': u'\u2029', - } - - ESCAPE_CODES = { - u'x': 2, - u'u': 4, - u'U': 8, - } - - def scan_flow_scalar_non_spaces(self, double, start_mark): - # See the specification for details. - chunks = [] - while True: - length = 0 - while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029': - length += 1 - if length: - chunks.append(self.prefix(length)) - self.forward(length) - ch = self.peek() - if not double and ch == u'\'' and self.peek(1) == u'\'': - chunks.append(u'\'') - self.forward(2) - elif (double and ch == u'\'') or (not double and ch in u'\"\\'): - chunks.append(ch) - self.forward() - elif double and ch == u'\\': - self.forward() - ch = self.peek() - if ch in self.ESCAPE_REPLACEMENTS: - chunks.append(self.ESCAPE_REPLACEMENTS[ch]) - self.forward() - elif ch in self.ESCAPE_CODES: - length = self.ESCAPE_CODES[ch] - self.forward() - for k in range(length): - if self.peek(k) not in u'0123456789ABCDEFabcdef': - raise ScannerError( - "while scanning a double-quoted scalar", - start_mark, - "expected escape sequence of %d hexdecimal " - "numbers, but found %r" % - (length, utf8(self.peek(k))), self.get_mark()) - code = int(self.prefix(length), 16) - chunks.append(unichr(code)) - self.forward(length) - elif ch in u'\r\n\x85\u2028\u2029': - self.scan_line_break() - chunks.extend(self.scan_flow_scalar_breaks( - double, start_mark)) - else: - raise ScannerError( - "while scanning a double-quoted scalar", start_mark, - "found unknown escape character %r" % utf8(ch), - self.get_mark()) - else: - return chunks - - def scan_flow_scalar_spaces(self, double, start_mark): - # See the specification for details. - chunks = [] - length = 0 - while self.peek(length) in u' \t': - length += 1 - whitespaces = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch == u'\0': - raise ScannerError( - "while scanning a quoted scalar", start_mark, - "found unexpected end of stream", self.get_mark()) - elif ch in u'\r\n\x85\u2028\u2029': - line_break = self.scan_line_break() - breaks = self.scan_flow_scalar_breaks(double, start_mark) - if line_break != u'\n': - chunks.append(line_break) - elif not breaks: - chunks.append(u' ') - chunks.extend(breaks) - else: - chunks.append(whitespaces) - return chunks - - def scan_flow_scalar_breaks(self, double, start_mark): - # See the specification for details. - chunks = [] - while True: - # Instead of checking indentation, we check for document - # separators. - prefix = self.prefix(3) - if (prefix == u'---' or prefix == u'...') \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': - raise ScannerError("while scanning a quoted scalar", - start_mark, - "found unexpected document separator", - self.get_mark()) - while self.peek() in u' \t': - self.forward() - if self.peek() in u'\r\n\x85\u2028\u2029': - chunks.append(self.scan_line_break()) - else: - return chunks - - def scan_plain(self): - # See the specification for details. - # We add an additional restriction for the flow context: - # plain scalars in the flow context cannot contain ',', ':' and '?'. - # We also keep track of the `allow_simple_key` flag here. - # Indentation rules are loosed for the flow context. - chunks = [] - start_mark = self.get_mark() - end_mark = start_mark - indent = self.indent+1 - # We allow zero indentation for scalars, but then we need to check for - # document separators at the beginning of the line. - # if indent == 0: - # indent = 1 - spaces = [] - while True: - length = 0 - if self.peek() == u'#': - break - while True: - ch = self.peek(length) - if ch in u'\0 \t\r\n\x85\u2028\u2029' \ - or (not self.flow_level and ch == u':' and - self.peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029') \ - or (self.flow_level and ch in u',:?[]{}'): - break - length += 1 - # It's not clear what we should do with ':' in the flow context. - if (self.flow_level and ch == u':' and - self.peek(length+1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'): - self.forward(length) - raise ScannerError( - "while scanning a plain scalar", start_mark, - "found unexpected ':'", self.get_mark(), - "Please check " - "http://pyyaml.org/wiki/YAMLColonInFlowContext " - "for details.") - if length == 0: - break - self.allow_simple_key = False - chunks.extend(spaces) - chunks.append(self.prefix(length)) - self.forward(length) - end_mark = self.get_mark() - spaces = self.scan_plain_spaces(indent, start_mark) - if not spaces or self.peek() == u'#' \ - or (not self.flow_level and self.column < indent): - break - - token = ScalarToken(u''.join(chunks), True, start_mark, end_mark) - if spaces and spaces[0] == '\n': - # Create a comment token to preserve the trailing line breaks. - comment = CommentToken(''.join(spaces) + '\n', start_mark, end_mark) - token.add_post_comment(comment) - return token - - - def scan_plain_spaces(self, indent, start_mark): - # See the specification for details. - # The specification is really confusing about tabs in plain scalars. - # We just forbid them completely. Do not use tabs in YAML! - chunks = [] - length = 0 - while self.peek(length) in u' ': - length += 1 - whitespaces = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch in u'\r\n\x85\u2028\u2029': - line_break = self.scan_line_break() - self.allow_simple_key = True - prefix = self.prefix(3) - if (prefix == u'---' or prefix == u'...') \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': - return - breaks = [] - while self.peek() in u' \r\n\x85\u2028\u2029': - if self.peek() == ' ': - self.forward() - else: - breaks.append(self.scan_line_break()) - prefix = self.prefix(3) - if (prefix == u'---' or prefix == u'...') \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': - return - if line_break != u'\n': - chunks.append(line_break) - elif not breaks: - chunks.append(u' ') - chunks.extend(breaks) - elif whitespaces: - chunks.append(whitespaces) - return chunks - - def scan_tag_handle(self, name, start_mark): - # See the specification for details. - # For some strange reasons, the specification does not allow '_' in - # tag handles. I have allowed it anyway. - ch = self.peek() - if ch != u'!': - raise ScannerError("while scanning a %s" % name, start_mark, - "expected '!', but found %r" % utf8(ch), - self.get_mark()) - length = 1 - ch = self.peek(length) - if ch != u' ': - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' \ - or u'a' <= ch <= u'z' \ - or ch in u'-_': - length += 1 - ch = self.peek(length) - if ch != u'!': - self.forward(length) - raise ScannerError("while scanning a %s" % name, start_mark, - "expected '!', but found %r" % utf8(ch), - self.get_mark()) - length += 1 - value = self.prefix(length) - self.forward(length) - return value - - def scan_tag_uri(self, name, start_mark): - # See the specification for details. - # Note: we do not check if URI is well-formed. - chunks = [] - length = 0 - ch = self.peek(length) - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-;/?:@&=+$,_.!~*\'()[]%': - if ch == u'%': - chunks.append(self.prefix(length)) - self.forward(length) - length = 0 - chunks.append(self.scan_uri_escapes(name, start_mark)) - else: - length += 1 - ch = self.peek(length) - if length: - chunks.append(self.prefix(length)) - self.forward(length) - length = 0 - if not chunks: - raise ScannerError("while parsing a %s" % name, start_mark, - "expected URI, but found %r" % utf8(ch), - self.get_mark()) - return u''.join(chunks) - - def scan_uri_escapes(self, name, start_mark): - # See the specification for details. - code_bytes = [] - mark = self.get_mark() - while self.peek() == u'%': - self.forward() - for k in range(2): - if self.peek(k) not in u'0123456789ABCDEFabcdef': - raise ScannerError( - "while scanning a %s" % name, start_mark, - "expected URI escape sequence of 2 hexdecimal numbers," - " but found %r" - % utf8(self.peek(k)), self.get_mark()) - if PY3: - code_bytes.append(int(self.prefix(2), 16)) - else: - code_bytes.append(chr(int(self.prefix(2), 16))) - self.forward(2) - try: - if PY3: - value = bytes(code_bytes).decode('utf-8') - else: - value = unicode(''.join(code_bytes), 'utf-8') - except UnicodeDecodeError as exc: - raise ScannerError("while scanning a %s" % name, start_mark, - str(exc), mark) - return value - - def scan_line_break(self): - # Transforms: - # '\r\n' : '\n' - # '\r' : '\n' - # '\n' : '\n' - # '\x85' : '\n' - # '\u2028' : '\u2028' - # '\u2029 : '\u2029' - # default : '' - ch = self.peek() - if ch in u'\r\n\x85': - if self.prefix(2) == u'\r\n': - self.forward(2) - else: - self.forward() - return u'\n' - elif ch in u'\u2028\u2029': - self.forward() - return ch - return u'' - - -class RoundTripScanner(Scanner): - def check_token(self, *choices): - # Check if the next token is one of the given types. - while self.need_more_tokens(): - self.fetch_more_tokens() - self._gather_comments() - if self.tokens: - if not choices: - return True - for choice in choices: - if isinstance(self.tokens[0], choice): - return True - return False - - def peek_token(self): - # Return the next token, but do not delete if from the queue. - while self.need_more_tokens(): - self.fetch_more_tokens() - self._gather_comments() - if self.tokens: - return self.tokens[0] - - def _gather_comments(self): - """combine multiple comment lines""" - comments = [] - if not self.tokens: - return comments - if isinstance(self.tokens[0], CommentToken): - comment = self.tokens.pop(0) - self.tokens_taken += 1 - # print('################ dropping', comment) - comments.append(comment) - while self.need_more_tokens(): - self.fetch_more_tokens() - if not self.tokens: - return comments - if isinstance(self.tokens[0], CommentToken): - self.tokens_taken += 1 - comment = self.tokens.pop(0) - # print 'dropping2', comment - comments.append(comment) - if len(comments) >= 1: - # print(' len', len(comments), comments) - # print(' com', comments[0], comments[0].start_mark.line) - # print(' tok', self.tokens[0].end_mark.line) - self.tokens[0].add_pre_comments(comments) - # pull in post comment on e.g. ':' - if not self.done and len(self.tokens) < 2: - self.fetch_more_tokens() - - def get_token(self): - # Return the next token. - while self.need_more_tokens(): - self.fetch_more_tokens() - self._gather_comments() - if self.tokens: - # only add post comment to single line tokens: - # scalar, value token. FlowXEndToken, otherwise - # hidden streamtokens could get them (leave them and they will be - # pre comments for the next map/seq - if len(self.tokens) > 1 and \ - isinstance(self.tokens[0], ( - ScalarToken, - ValueToken, - FlowSequenceEndToken, - FlowMappingEndToken, - )) and \ - isinstance(self.tokens[1], CommentToken) and \ - self.tokens[0].end_mark.line == self.tokens[1].start_mark.line: - self.tokens_taken += 1 - self.tokens[0].add_post_comment(self.tokens.pop(1)) - self.tokens_taken += 1 - return self.tokens.pop(0) - - def fetch_comment(self, comment): # XXXX - value, start_mark, end_mark = comment - self.tokens.append(CommentToken(value, start_mark, end_mark)) - - # scanner - - def scan_to_next_token(self): - # We ignore spaces, line breaks and comments. - # If we find a line break in the block context, we set the flag - # `allow_simple_key` on. - # The byte order mark is stripped if it's the first character in the - # stream. We do not yet support BOM inside the stream as the - # specification requires. Any such mark will be considered as a part - # of the document. - # - # TODO: We need to make tab handling rules more sane. A good rule is - # Tabs cannot precede tokens - # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, - # KEY(block), VALUE(block), BLOCK-ENTRY - # So the checking code is - # if : - # self.allow_simple_keys = False - # We also need to add the check for `allow_simple_keys == True` to - # `unwind_indent` before issuing BLOCK-END. - # Scanners for block, flow, and plain scalars need to be modified. - - if self.index == 0 and self.peek() == u'\uFEFF': - self.forward() - found = False - while not found: - while self.peek() == u' ': - self.forward() - ch = self.peek() - if ch == u'#': - start_mark = self.get_mark() - comment = ch - self.forward() - while ch not in u'\0\r\n\x85\u2028\u2029': - ch = self.peek() - if ch == u'\0': # don't gobble the end-of-stream character - break - comment += ch - self.forward() - # gather any blank lines following the comment too - ch = self.scan_line_break() - while len(ch) > 0: - comment += ch - ch = self.scan_line_break() - end_mark = self.get_mark() - if not self.flow_level: - self.allow_simple_key = True - return comment, start_mark, end_mark - if self.scan_line_break(): - if not self.flow_level: - self.allow_simple_key = True - else: - found = True - -# try: -# import psyco -# psyco.bind(Scanner) -# except ImportError: -# pass diff --git a/py/serializer.py b/py/serializer.py deleted file mode 100644 index 0bdc558..0000000 --- a/py/serializer.py +++ /dev/null @@ -1,166 +0,0 @@ -from __future__ import absolute_import - -__all__ = ['Serializer', 'SerializerError'] - -import re - -from .error import YAMLError -from .events import * -from .nodes import * - -from .compat import nprint, DBG_NODE, dbg - - -class SerializerError(YAMLError): - pass - - -class Serializer(object): - - # 'id' and 3+ numbers, but not 000 - ANCHOR_TEMPLATE = u'id%03d' - ANCHOR_RE = re.compile(u'id(?!000$)\\d{3,}') - - - def __init__(self, encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None): - self.use_encoding = encoding - self.use_explicit_start = explicit_start - self.use_explicit_end = explicit_end - self.use_version = version - self.use_tags = tags - self.serialized_nodes = {} - self.anchors = {} - self.last_anchor_id = 0 - self.closed = None - self._templated_id = None - - def open(self): - if self.closed is None: - self.emit(StreamStartEvent(encoding=self.use_encoding)) - self.closed = False - elif self.closed: - raise SerializerError("serializer is closed") - else: - raise SerializerError("serializer is already opened") - - def close(self): - if self.closed is None: - raise SerializerError("serializer is not opened") - elif not self.closed: - self.emit(StreamEndEvent()) - self.closed = True - - # def __del__(self): - # self.close() - - def serialize(self, node): - if dbg(DBG_NODE): - nprint('Serializing nodes') - node.dump() - if self.closed is None: - raise SerializerError("serializer is not opened") - elif self.closed: - raise SerializerError("serializer is closed") - self.emit(DocumentStartEvent(explicit=self.use_explicit_start, - version=self.use_version, - tags=self.use_tags)) - self.anchor_node(node) - self.serialize_node(node, None, None) - self.emit(DocumentEndEvent(explicit=self.use_explicit_end)) - self.serialized_nodes = {} - self.anchors = {} - self.last_anchor_id = 0 - - def anchor_node(self, node): - if node in self.anchors: - if self.anchors[node] is None: - self.anchors[node] = self.generate_anchor(node) - else: - anchor = None - try: - if node.anchor.always_dump: - anchor = node.anchor.value - except: - pass - self.anchors[node] = anchor - if isinstance(node, SequenceNode): - for item in node.value: - self.anchor_node(item) - elif isinstance(node, MappingNode): - for key, value in node.value: - self.anchor_node(key) - self.anchor_node(value) - - def generate_anchor(self, node): - try: - anchor = node.anchor.value - except: - anchor = None - if anchor is None: - self.last_anchor_id += 1 - return self.ANCHOR_TEMPLATE % self.last_anchor_id - return anchor - - def serialize_node(self, node, parent, index): - alias = self.anchors[node] - if node in self.serialized_nodes: - self.emit(AliasEvent(alias)) - else: - self.serialized_nodes[node] = True - self.descend_resolver(parent, index) - if isinstance(node, ScalarNode): - detected_tag = self.resolve(ScalarNode, node.value, - (True, False)) - default_tag = self.resolve(ScalarNode, node.value, - (False, True)) - implicit = \ - (node.tag == detected_tag), (node.tag == default_tag) - self.emit(ScalarEvent(alias, node.tag, implicit, node.value, - style=node.style, comment=node.comment)) - elif isinstance(node, SequenceNode): - implicit = (node.tag - == self.resolve(SequenceNode, node.value, True)) - comment = node.comment - # print('comment >>>>>>>>>>>>>.', comment, node.flow_style) - end_comment = None - seq_comment = None - if node.flow_style is True: - if comment: # eol comment on flow style sequence - seq_comment = comment[0] - # comment[0] = None - if comment and len(comment) > 2: - end_comment = comment[2] - else: - end_comment = None - self.emit(SequenceStartEvent(alias, node.tag, implicit, - flow_style=node.flow_style, - comment=node.comment)) - index = 0 - for item in node.value: - self.serialize_node(item, node, index) - index += 1 - self.emit(SequenceEndEvent(comment=[seq_comment, end_comment])) - elif isinstance(node, MappingNode): - implicit = (node.tag - == self.resolve(MappingNode, node.value, True)) - comment = node.comment - end_comment = None - map_comment = None - if node.flow_style is True: - if comment: # eol comment on flow style sequence - map_comment = comment[0] - # comment[0] = None - if comment and len(comment) > 2: - end_comment = comment[2] - self.emit(MappingStartEvent(alias, node.tag, implicit, - flow_style=node.flow_style, - comment=node.comment)) - for key, value in node.value: - self.serialize_node(key, node, None) - self.serialize_node(value, node, key) - self.emit(MappingEndEvent(comment=[map_comment, end_comment])) - self.ascend_resolver() - -def templated_id(s): - return Serializer.ANCHOR_RE.match(s) \ No newline at end of file diff --git a/py/tokens.py b/py/tokens.py deleted file mode 100644 index 452803f..0000000 --- a/py/tokens.py +++ /dev/null @@ -1,188 +0,0 @@ - -class Token(object): - def __init__(self, start_mark, end_mark): - self.start_mark = start_mark - self.end_mark = end_mark - - def __repr__(self): - attributes = [key for key in self.__dict__ - if not key.endswith('_mark')] - attributes.sort() - arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) - for key in attributes]) - return '%s(%s)' % (self.__class__.__name__, arguments) - - def add_post_comment(self, comment): - if not hasattr(self, '_comment'): - self._comment = [None, None] - self._comment[0] = comment - - def add_pre_comments(self, comments): - if not hasattr(self, '_comment'): - self._comment = [None, None] - assert self._comment[1] is None - self._comment[1] = comments - - def get_comment(self): - return getattr(self, '_comment', None) - - @property - def comment(self): - return getattr(self, '_comment', None) - - def move_comment(self, target): - """move a comment from this token to target (normally next token) - used to combine e.g. comments before a BlockEntryToken to the - ScalarToken that follows it - """ - c = self.comment - if c is None: - return - # don't push beyond last element - if isinstance(target, StreamEndToken): - return - delattr(self, '_comment') - tc = target.comment - if not tc: # target comment, just insert - target._comment = c - return self - if c[0] and tc[0] or c[1] and tc[1]: - raise NotImplementedError('overlap in comment %r %r' % c, tc) - if c[0]: - tc[0] = c[0] - if c[1]: - tc[1] = c[1] - return self - - def split_comment(self): - """ split the post part of a comment, and return it - as comment to be added. Delete second part if [None, None] - abc: # this goes to sequence - # this goes to first element - - first element - """ - comment = self.comment - if comment is None or comment[0] is None: - return None # nothing to do - ret_val = [comment[0], None] - if comment[1] is None: - delattr(self, '_comment') - return ret_val - - -# class BOMToken(Token): -# id = '' - -class DirectiveToken(Token): - id = '' - - def __init__(self, name, value, start_mark, end_mark): - Token.__init__(self, start_mark, end_mark) - self.name = name - self.value = value - - -class DocumentStartToken(Token): - id = '' - - -class DocumentEndToken(Token): - id = '' - - -class StreamStartToken(Token): - id = '' - - def __init__(self, start_mark=None, end_mark=None, encoding=None): - Token.__init__(self, start_mark, end_mark) - self.encoding = encoding - - -class StreamEndToken(Token): - id = '' - - -class BlockSequenceStartToken(Token): - id = '' - - -class BlockMappingStartToken(Token): - id = '' - - -class BlockEndToken(Token): - id = '' - - -class FlowSequenceStartToken(Token): - id = '[' - - -class FlowMappingStartToken(Token): - id = '{' - - -class FlowSequenceEndToken(Token): - id = ']' - - -class FlowMappingEndToken(Token): - id = '}' - - -class KeyToken(Token): - id = '?' - - -class ValueToken(Token): - id = ':' - - -class BlockEntryToken(Token): - id = '-' - - -class FlowEntryToken(Token): - id = ',' - - -class AliasToken(Token): - id = '' - - def __init__(self, value, start_mark, end_mark): - Token.__init__(self, start_mark, end_mark) - self.value = value - - -class AnchorToken(Token): - id = '' - - def __init__(self, value, start_mark, end_mark): - Token.__init__(self, start_mark, end_mark) - self.value = value - - -class TagToken(Token): - id = '' - - def __init__(self, value, start_mark, end_mark): - Token.__init__(self, start_mark, end_mark) - self.value = value - - -class ScalarToken(Token): - id = '' - - def __init__(self, value, plain, start_mark, end_mark, style=None): - Token.__init__(self, start_mark, end_mark) - self.value = value - self.plain = plain - self.style = style - - -class CommentToken(Token): - id = '' - - def __init__(self, value, start_mark, end_mark): - Token.__init__(self, start_mark, end_mark) - self.value = value diff --git a/py/yaml.py b/py/yaml.py deleted file mode 100644 index 6eda0ab..0000000 --- a/py/yaml.py +++ /dev/null @@ -1,527 +0,0 @@ -# coding: utf-8 - -""" -this is the source for the yaml utility -""" - -from __future__ import print_function -from __future__ import absolute_import - - -import sys -import os -import io -from textwrap import dedent - -from ruamel.std.argparse import ProgramBase, option, sub_parser, version, \ - CountAction, SmartFormatter -# from ruamel.appconfig import AppConfig -from . import __version__ - -import ruamel.yaml -from ruamel.yaml.compat import ordereddict, DBG_TOKEN, DBG_EVENT, DBG_NODE -from ruamel.yaml.configobjwalker import configobj_walker - - -def yaml_to_html2(code): - buf = io.StringIO() - buf.write(u'\n') - buf.write(u'\n') - buf.write(u'\n') - buf.write(u'\n') - buf.write(u'\n') - if isinstance(code, dict): - for k in code: - buf.write(u' \n') - for x in [k] + code[k]: - buf.write(u' \n'.format(x)) - buf.write(u' \n') - buf.write(u'
{0}
\n') - buf.write(u'\n') - buf.write(u'\n') - return buf.getvalue() - - -def yaml_to_html(code, level): - if level == 2: - return yaml_to_html2(code) - elif level == 3: - return yaml_to_html3(code) - raise NotImplementedError - - -class YAML: - def __init__(self, args, config): - self._args = args - self._config = config - - def from_ini(self): - try: - from configobj import ConfigObj - except ImportError: - print("to convert from .ini you need to install configobj:") - print(" pip install configobj:") - sys.exit(1) - errors = 0 - doc = [] - cfg = ConfigObj(open(self._args.file)) - if self._args.test: - print(ruamel.yaml.dump(cfg)) - return - for line in configobj_walker(cfg): - doc.append(line) - joined = '\n'.join(doc) - rto = self.round_trip_single(joined) - if self._args.basename: - out_fn = os.path.splitext(self._args.file)[0] + '.yaml' - if self._args.verbose > 0: - print('writing', out_fn) - with open(out_fn, 'w') as fp: - print(rto, end='', file=fp) # already has eol at eof - else: - print(rto, end='') # already has eol at eof - # print() - # if rto != joined: - # self.diff(joined, rto, "test.ini") - return 1 if errors else 0 - - def test(self): - self._args.event = self._args.node = True - dbg = 0 - if self._args.event: - dbg |= DBG_EVENT - if self._args.node: - dbg |= DBG_NODE - os.environ['YAMLDEBUG'] = str(dbg) - if False: - x = ruamel.yaml.comment.Comment() - print(sys.getsizeof(x)) - return - - def print_input(input): - print(input, end='') - print('-' * 15) - - def print_tokens(input): - print('Tokens (from scanner) ' + '#' * 50) - tokens = ruamel.yaml.scan(input, ruamel.yaml.RoundTripLoader) - for idx, token in enumerate(tokens): - # print(token.start_mark) - # print(token.end_mark) - print("{0:2} {1}".format(idx, token)) - - def rt_events(input): - dumper = ruamel.yaml.RoundTripDumper - events = ruamel.yaml.parse(input, ruamel.yaml.RoundTripLoader) - print(ruamel.yaml.emit(events, indent=False, Dumper=dumper)) - - def rt_nodes(input): - dumper = ruamel.yaml.RoundTripDumper - nodes = ruamel.yaml.compose(input, ruamel.yaml.RoundTripLoader) - print(ruamel.yaml.serialize(nodes, indent=False, Dumper=dumper)) - - def print_events(input): - print('Events (from parser) ' + '#' * 50) - events = ruamel.yaml.parse(input, ruamel.yaml.RoundTripLoader) - for idx, event in enumerate(events): - print("{0:2} {1}".format(idx, event)) - - def print_nodes(input): - print('Nodes (from composer) ' + '#' * 50) - x = ruamel.yaml.compose(input, ruamel.yaml.RoundTripLoader) - x.dump() # dump the node - - def scan_file(file_name): - inp = open(file_name).read() - print('---------\n', file_name) - print('---', repr(self.first_non_empty_line(inp))) - print('<<<', repr(self.last_non_empty_line(inp))) - - if False: - for x in self._args.file: - scan_file(x) - return - - if True: - import pickle - lines = 0 - for x in self._args.file: - print(x, end=' ') - if x.endswith('.yaml'): - data = ruamel.yaml.load(open(x)) - print(len(data), end=' ') - lines += len(data) - out_name = x.replace('.yaml', '.pickle') - with open(out_name, 'w') as fp: - pickle.dump(data, fp) - elif x.endswith('.pickle'): - with open(x) as fp: - data = pickle.load(fp) - print(len(data), end=' ') - lines += len(data) - print() - print('lines', lines) - return - - input = dedent(""" - application: web2py - version: 1 - runtime: python27 - api_version: 1 - threadsafe: false - - default_expiration: "24h" - - handlers: - - url: /(?P.+?)/static/(?P.+) - static_files: 'applications/\\1/static/\\2' - upload: applications/(.+?)/static/(.+) - secure: optional - """) - - input = dedent("""\ - a: - b: foo - c: bar - """) - - print_input(input) - print_tokens(input) - print_events(input) - # rt_events(input) - print_nodes(input) - # rt_nodes(input) - - data = ruamel.yaml.load(input, ruamel.yaml.RoundTripLoader) - print('data', data) - if False: - data['american'][0] = 'Fijenoord' - l = data['american'] - l = data - if True: - # print type(l), '\n', dir(l) - comment = getattr(l, '_yaml_comment', None) - print('comment_1', comment) - dumper = ruamel.yaml.RoundTripDumper - print('>>>>>>>>>>') - # print(ruamel.yaml.dump(data, default_flow_style=False, - # Dumper=dumper), '===========') - print("{0}=========".format(ruamel.yaml.dump( - data, - indent=4, - Dumper=dumper))) - comment = getattr(l, '_yaml_comment', None) - print('comment_2', comment) - - # test end - - def from_json(self): - # use roundtrip to preserve order - errors = 0 - docs = [] - for file_name in self._args.file: - if file_name == '-': - inp = sys.stdin.read() - else: - inp = open(file_name).read() - loader = ruamel.yaml.Loader # RoundTripLoader - docs.append(ruamel.yaml.load(inp, loader)) - #if self._args.literal: - # from ruamel.yaml.convert.literal import walk_tree - # for doc in docs: - # walk_tree(doc) - dumper = ruamel.yaml.RoundTripDumper - print(ruamel.yaml.dump_all( - docs, Dumper=dumper, - default_flow_style=self._args.flow)) - return 1 if errors else 0 - - def to_htmltable(self): - def vals(x): - if isinstance(x, list): - return x - if isinstance(x, (dict, ordereddict)): - return x.values() - return [] - - def seek_levels(x, count=0): - my_level = count - sub_level = 0 - for v in vals(x): - if v is None: - continue - sub_level = max(sub_level, seek_levels(v, my_level+1)) - return max(my_level, sub_level) - - inp = open(self._args.file).read() - loader = ruamel.yaml.RoundTripLoader - code = ruamel.yaml.load(inp, loader) - # assert isinstance(code, [ruamel.yaml.comments.CommentedMap]) - assert isinstance(code, (dict, list)) - levels = seek_levels(code) - if self._args.level: - print("levels:", levels) - return - print(yaml_to_html(code, levels)) - - def from_html(self): - from .convert.html import HTML2YAML - h2y = HTML2YAML(self._args) - with open(self._args.file) as fp: - print(h2y(fp.read())) - - def from_csv(self): - from .convert.from_csv import CSV2YAML - c2y = CSV2YAML(self._args) - c2y(self._args.file) - - def round_trip(self): - errors = 0 - warnings = 0 - for file_name in self._args.file: - inp = open(file_name).read() - e, w, stabilize, outp = self.round_trip_input(inp) - if w == 0: - if self._args.verbose > 0: - print(u"{0}: ok".format(file_name)) - continue - if self._args.save: - backup_file_name = file_name + '.orig' - if not os.path.exists(backup_file_name): - os.rename(file_name, backup_file_name) - with open(file_name, 'w') as ofp: - ofp.write(outp) - if not self._args.save or self._args.verbose > 0: - print("{0}:\n {1}".format(file_name, u', '.join(stabilize))) - self.diff(inp, outp, file_name) - errors += e - warnings += w - if errors > 0: - return 2 - if warnings > 0: - return 1 - return 0 - - def round_trip_input(self, inp): - errors = 0 - warnings = 0 - stabilize = [] - outp = self.round_trip_single(inp) - if inp == outp: - return errors, warnings, stabilize, outp - warnings += 1 - if inp.split() != outp.split(): - errors += 1 - stabilize.append(u"drops info on round trip") - else: - if self.round_trip_single(outp) == outp: - stabilize.append(u"stabilizes on second round trip") - else: - errors += 1 - ncoutp = self.round_trip_single(inp, drop_comment=True) - if self.round_trip_single(ncoutp, drop_comment=True) == ncoutp: - stabilize.append(u"ok without comments") - return errors, warnings, stabilize, outp - - def round_trip_single(self, inp, drop_comment=False): - explicit_start=self.first_non_empty_line(inp) == '---' - explicit_end=self.last_non_empty_line(inp) == '...' - indent = self._args.indent - loader = ruamel.yaml.SafeLoader if drop_comment else \ - ruamel.yaml.RoundTripLoader - code = ruamel.yaml.load(inp, loader) - dumper = ruamel.yaml.SafeDumper if drop_comment else \ - ruamel.yaml.RoundTripDumper - return ruamel.yaml.dump( - code, - Dumper=dumper, - indent=indent, - explicit_start=explicit_start, - explicit_end=explicit_end, - ) - - def first_non_empty_line(self, txt): - """return the first non-empty line of a block of text (stripped) - do not split or strip the complete txt - """ - pos = txt.find('\n') - prev_pos = 0 - while pos >= 0: - segment = txt[prev_pos:pos].strip() - if segment: - break - # print (pos, repr(segment)) - prev_pos = pos - pos = txt.find('\n', pos+1) - return segment - - def last_non_empty_line(self, txt): - """return the last non-empty line of a block of text (stripped) - do not split or strip the complete txt - """ - pos = txt.rfind('\n') - prev_pos = len(txt) - maxloop = 10 - while pos >= 0: - segment = txt[pos:prev_pos].strip() - if segment: - break - # print (pos, repr(segment)) - prev_pos = pos - pos = txt.rfind('\n', 0, pos-1) - maxloop -= 1 - if maxloop < 0: - break - return segment - - def diff(self, inp, outp, file_name): - import difflib - inl = inp.splitlines(True) # True for keepends - outl = outp.splitlines(True) - diff = difflib.unified_diff(inl, outl, file_name, 'round trip YAML') - # 2.6 difflib has trailing space on filename lines %-) - strip_trailing_space = sys.version_info < (2, 7) - for line in diff: - if strip_trailing_space and line[:4] in ['--- ', '+++ ']: - line = line.rstrip() + '\n' - sys.stdout.write(line) - - -def to_stdout(*args): - sys.stdout.write(' '.join(args)) - - -class YAML_Cmd(ProgramBase): - def __init__(self): - super(YAML_Cmd, self).__init__( - formatter_class=SmartFormatter, - aliases=True, - ) - self._config = None - - # you can put these on __init__, but subclassing YAML_Cmd - # will cause that to break - @option('--verbose', '-v', - help='increase verbosity level', action=CountAction, - const=1, nargs=0, default=0, global_option=True) - @option('--indent', type=int, global_option=True) - @version('version: ' + __version__) - def _pb_init(self): - # special name for which attribs are included in help - pass - - def run(self): - self._yaml = YAML(self._args, self._config) - if self._args.func: - return self._args.func() - - def parse_args(self): - # self._config = AppConfig( - # 'yaml', - # filename=AppConfig.check, - # parser=self._parser, # sets --config option - # warning=to_stdout, - # add_save=False, # add a --save-defaults (to config) option - # ) - # self._config._file_name can be handed to objects that need - # to get other information from the configuration directory - # self._config.set_defaults() - self._parse_args() - - @sub_parser( - aliases=['round-trip'], - help='test round trip on YAML data', - description='test round trip on YAML data', - ) - @option('--save', action='store_true', help="""save the rewritten data back - to the input file (if it doesn't exist a '.orig' backup will be made) - """) - @option('file', nargs='+') - def rt(self): - return self._yaml.round_trip() - - @sub_parser( - aliases=['from-json'], - help='convert json to block YAML', - description='convert json to block YAML', - ) - @option('--flow', action='store_true', - help='use flow instead of block style') - #@option('--literal', action='store_true', - # help='convert scalars with newlines to literal block style') - @option('file', nargs='+') - def json(self): - return self._yaml.from_json() - - @sub_parser( - aliases=['from-ini'], - help='convert .ini/config to block YAML', - description='convert .ini/config to block YAML', - ) - @option('--basename', '-b', action='store_true', - help='re-use basename of file for .yaml file, instead of writing' - ' to stdout') - @option('--test', action='store_true') - @option('file') - def ini(self): - return self._yaml.from_ini() - - @sub_parser( - #aliases=['to-html'], - help='convert YAML to html tables', - description="""convert YAML to html tables. If hierarchy is two deep ( - sequence/mapping over sequence/mapping) this is mapped to one table - If the hierarchy is three deep, a list of 2 deep tables is assumed, but - any non-list/mapp second level items are considered text. - Row level keys are inserted in first column (unless --no-row-key), - item level keys are used as classes for the TD. - """, - ) - @option("--level", action='store_true', help="print # levels and exit") - @option('file') - def htmltable(self): - return self._yaml.to_htmltable() - - @sub_parser('from-html', - help='convert HTML to YAML', - description="""convert HTML to YAML. Tags become keys with as - value a list. The first item in the list is a key value pair with - key ".attribute" if attributes are available followed by tag and string - segment items. Lists with one item are by default flattened. - """, - ) - @option("--no-body", action='store_true', - help="drop top level html and body from HTML code segments") - @option("--strip", action='store_true', - help="strip whitespace surrounding strings") - @option('file') - def from_html(self): - return self._yaml.from_html() - - @sub_parser('from-csv', - aliases=['csv'], - help='convert CSV to YAML', - description="""convert CSV to YAML. - By default generates a list of rows, with the items in a 2nd level - list. - """, - ) - @option('--delimiter') - #@option('--quotechar') - @option('file') - def from_csv(self): - return self._yaml.from_csv() - - if 'test' in sys.argv: - @sub_parser( - description='internal test function', - ) - @option('file', nargs='*') - def test(self): - return self._yaml.test() - - -def main(): - n = YAML_Cmd() - n.parse_args() - sys.exit(n.run()) diff --git a/reader.py b/reader.py new file mode 100644 index 0000000..7bbec42 --- /dev/null +++ b/reader.py @@ -0,0 +1,207 @@ +from __future__ import absolute_import +# This module contains abstractions for the input stream. You don't have to +# looks further, there are no pretty code. +# +# We define two classes here. +# +# Mark(source, line, column) +# It's just a record and its only use is producing nice error messages. +# Parser does not use it for any other purposes. +# +# Reader(source, data) +# Reader determines the encoding of `data` and converts it to unicode. +# Reader provides the following methods and attributes: +# reader.peek(length=1) - return the next `length` characters +# reader.forward(length=1) - move the current position to `length` +# characters. +# reader.index - the number of the current character. +# reader.line, stream.column - the line and the column of the current +# character. + +__all__ = ['Reader', 'ReaderError'] + +import codecs +import re + +from .error import YAMLError, Mark +from .compat import text_type, binary_type, PY3 + + +class ReaderError(YAMLError): + + def __init__(self, name, position, character, encoding, reason): + self.name = name + self.character = character + self.position = position + self.encoding = encoding + self.reason = reason + + def __str__(self): + if isinstance(self.character, binary_type): + return "'%s' codec can't decode byte #x%02x: %s\n" \ + " in \"%s\", position %d" \ + % (self.encoding, ord(self.character), self.reason, + self.name, self.position) + else: + return "unacceptable character #x%04x: %s\n" \ + " in \"%s\", position %d" \ + % (self.character, self.reason, + self.name, self.position) + + +class Reader(object): + # Reader: + # - determines the data encoding and converts it to a unicode string, + # - checks if characters are in allowed range, + # - adds '\0' to the end. + + # Reader accepts + # - a `str` object (PY2) / a `bytes` object (PY3), + # - a `unicode` object (PY2) / a `str` object (PY3), + # - a file-like object with its `read` method returning `str`, + # - a file-like object with its `read` method returning `unicode`. + + # Yeah, it's ugly and slow. + + def __init__(self, stream): + self.name = None + self.stream = None + self.stream_pointer = 0 + self.eof = True + self.buffer = u'' + self.pointer = 0 + self.raw_buffer = None + self.raw_decode = None + self.encoding = None + self.index = 0 + self.line = 0 + self.column = 0 + if isinstance(stream, text_type): + self.name = "" + self.check_printable(stream) + self.buffer = stream+u'\0' + elif isinstance(stream, binary_type): + self.name = "" + self.raw_buffer = stream + self.determine_encoding() + else: + self.stream = stream + self.name = getattr(stream, 'name', "") + self.eof = False + self.raw_buffer = None + self.determine_encoding() + + def peek(self, index=0): + try: + return self.buffer[self.pointer+index] + except IndexError: + self.update(index+1) + return self.buffer[self.pointer+index] + + def prefix(self, length=1): + if self.pointer+length >= len(self.buffer): + self.update(length) + return self.buffer[self.pointer:self.pointer+length] + + def forward(self, length=1): + if self.pointer+length+1 >= len(self.buffer): + self.update(length+1) + while length: + ch = self.buffer[self.pointer] + self.pointer += 1 + self.index += 1 + if ch in u'\n\x85\u2028\u2029' \ + or (ch == u'\r' and self.buffer[self.pointer] != u'\n'): + self.line += 1 + self.column = 0 + elif ch != u'\uFEFF': + self.column += 1 + length -= 1 + + def get_mark(self): + if self.stream is None: + return Mark(self.name, self.index, self.line, self.column, + self.buffer, self.pointer) + else: + return Mark(self.name, self.index, self.line, self.column, + None, None) + + def determine_encoding(self): + while not self.eof and (self.raw_buffer is None or + len(self.raw_buffer) < 2): + self.update_raw() + if isinstance(self.raw_buffer, binary_type): + if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): + self.raw_decode = codecs.utf_16_le_decode + self.encoding = 'utf-16-le' + elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): + self.raw_decode = codecs.utf_16_be_decode + self.encoding = 'utf-16-be' + else: + self.raw_decode = codecs.utf_8_decode + self.encoding = 'utf-8' + self.update(1) + + NON_PRINTABLE = re.compile( + u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') + + def check_printable(self, data): + match = self.NON_PRINTABLE.search(data) + if match: + character = match.group() + position = self.index+(len(self.buffer)-self.pointer)+match.start() + raise ReaderError(self.name, position, ord(character), + 'unicode', "special characters are not allowed") + + def update(self, length): + if self.raw_buffer is None: + return + self.buffer = self.buffer[self.pointer:] + self.pointer = 0 + while len(self.buffer) < length: + if not self.eof: + self.update_raw() + if self.raw_decode is not None: + try: + data, converted = self.raw_decode(self.raw_buffer, + 'strict', self.eof) + except UnicodeDecodeError as exc: + if PY3: + character = self.raw_buffer[exc.start] + else: + character = exc.object[exc.start] + if self.stream is not None: + position = self.stream_pointer - \ + len(self.raw_buffer) + exc.start + else: + position = exc.start + raise ReaderError(self.name, position, character, + exc.encoding, exc.reason) + else: + data = self.raw_buffer + converted = len(data) + self.check_printable(data) + self.buffer += data + self.raw_buffer = self.raw_buffer[converted:] + if self.eof: + self.buffer += u'\0' + self.raw_buffer = None + break + + def update_raw(self, size=None): + if size is None: + size = 4096 if PY3 else 1024 + data = self.stream.read(size) + if self.raw_buffer is None: + self.raw_buffer = data + else: + self.raw_buffer += data + self.stream_pointer += len(data) + if not data: + self.eof = True + +# try: +# import psyco +# psyco.bind(Reader) +# except ImportError: +# pass diff --git a/representer.py b/representer.py new file mode 100644 index 0000000..63c0d0d --- /dev/null +++ b/representer.py @@ -0,0 +1,808 @@ +from __future__ import absolute_import +from __future__ import print_function + +__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer', + 'RepresenterError', 'RoundTripRepresenter'] + +from .error import * +from .nodes import * +from .compat import text_type, binary_type, to_unicode, PY2, PY3, \ + ordereddict, nprint +from .scalarstring import * + +import datetime +import sys +import types +if PY3: + import copyreg + import base64 +else: + import copy_reg as copyreg + + +class RepresenterError(YAMLError): + pass + + +class BaseRepresenter(object): + + yaml_representers = {} + yaml_multi_representers = {} + + def __init__(self, default_style=None, default_flow_style=None): + self.default_style = default_style + self.default_flow_style = default_flow_style + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def represent(self, data): + node = self.represent_data(data) + self.serialize(node) + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + if PY2: + def get_classobj_bases(self, cls): + bases = [cls] + for base in cls.__bases__: + bases.extend(self.get_classobj_bases(base)) + return bases + + def represent_data(self, data): + if self.ignore_aliases(data): + self.alias_key = None + else: + self.alias_key = id(data) + if self.alias_key is not None: + if self.alias_key in self.represented_objects: + node = self.represented_objects[self.alias_key] + # if node is None: + # raise RepresenterError( + # "recursive objects are not allowed: %r" % data) + return node + # self.represented_objects[alias_key] = None + self.object_keeper.append(data) + data_types = type(data).__mro__ + if PY2: + # if type(data) is types.InstanceType: + if isinstance(data, types.InstanceType): + data_types = self.get_classobj_bases(data.__class__) + \ + list(data_types) + if data_types[0] in self.yaml_representers: + node = self.yaml_representers[data_types[0]](self, data) + else: + for data_type in data_types: + if data_type in self.yaml_multi_representers: + node = self.yaml_multi_representers[data_type](self, data) + break + else: + if None in self.yaml_multi_representers: + node = self.yaml_multi_representers[None](self, data) + elif None in self.yaml_representers: + node = self.yaml_representers[None](self, data) + else: + node = ScalarNode(None, text_type(data)) + # if alias_key is not None: + # self.represented_objects[alias_key] = node + return node + + def represent_key(self, data): + """ + David Fraser: Extract a method to represent keys in mappings, so that + a subclass can choose not to quote them (for example) + used in repesent_mapping + https://bitbucket.org/davidfraser/pyyaml/commits/d81df6eb95f20cac4a79eed95ae553b5c6f77b8c + """ + return self.represent_data(data) + + @classmethod + def add_representer(cls, data_type, representer): + if 'yaml_representers' not in cls.__dict__: + cls.yaml_representers = cls.yaml_representers.copy() + cls.yaml_representers[data_type] = representer + + @classmethod + def add_multi_representer(cls, data_type, representer): + if 'yaml_multi_representers' not in cls.__dict__: + cls.yaml_multi_representers = cls.yaml_multi_representers.copy() + cls.yaml_multi_representers[data_type] = representer + + def represent_scalar(self, tag, value, style=None): + if style is None: + style = self.default_style + node = ScalarNode(tag, value, style=style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + return node + + def represent_sequence(self, tag, sequence, flow_style=None): + value = [] + node = SequenceNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + for item in sequence: + node_item = self.represent_data(item) + if not (isinstance(node_item, ScalarNode) and not node_item.style): + best_style = False + value.append(node_item) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def represent_omap(self, tag, omap, flow_style=None): + value = [] + node = SequenceNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + for item_key in omap: + item_val = omap[item_key] + node_item = self.represent_data({item_key: item_val}) + # if not (isinstance(node_item, ScalarNode) \ + # and not node_item.style): + # best_style = False + value.append(node_item) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def represent_mapping(self, tag, mapping, flow_style=None): + value = [] + node = MappingNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + if hasattr(mapping, 'items'): + mapping = list(mapping.items()) + try: + mapping = sorted(mapping) + except TypeError: + pass + for item_key, item_value in mapping: + node_key = self.represent_key(item_key) + node_value = self.represent_data(item_value) + if not (isinstance(node_key, ScalarNode) and not node_key.style): + best_style = False + if not (isinstance(node_value, ScalarNode) and not + node_value.style): + best_style = False + value.append((node_key, node_value)) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def ignore_aliases(self, data): + return False + + +class SafeRepresenter(BaseRepresenter): + + def ignore_aliases(self, data): + if data in [None, ()]: + return True + if isinstance(data, (binary_type, text_type, bool, int, float)): + return True + + def represent_none(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:null', + u'null') + + if PY3: + def represent_str(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:str', data) + + def represent_binary(self, data): + if hasattr(base64, 'encodebytes'): + data = base64.encodebytes(data).decode('ascii') + else: + data = base64.encodestring(data).decode('ascii') + return self.represent_scalar(u'tag:yaml.org,2002:binary', data, + style='|') + else: + def represent_str(self, data): + tag = None + style = None + try: + data = unicode(data, 'ascii') + tag = u'tag:yaml.org,2002:str' + except UnicodeDecodeError: + try: + data = unicode(data, 'utf-8') + tag = u'tag:yaml.org,2002:str' + except UnicodeDecodeError: + data = data.encode('base64') + tag = u'tag:yaml.org,2002:binary' + style = '|' + return self.represent_scalar(tag, data, style=style) + + def represent_unicode(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:str', data) + + def represent_bool(self, data): + if data: + value = u'true' + else: + value = u'false' + return self.represent_scalar(u'tag:yaml.org,2002:bool', value) + + def represent_int(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:int', text_type(data)) + + if PY2: + def represent_long(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:int', + text_type(data)) + + inf_value = 1e300 + while repr(inf_value) != repr(inf_value*inf_value): + inf_value *= inf_value + + def represent_float(self, data): + if data != data or (data == 0.0 and data == 1.0): + value = u'.nan' + elif data == self.inf_value: + value = u'.inf' + elif data == -self.inf_value: + value = u'-.inf' + else: + value = to_unicode(repr(data)).lower() + # Note that in some cases `repr(data)` represents a float number + # without the decimal parts. For instance: + # >>> repr(1e17) + # '1e17' + # Unfortunately, this is not a valid float representation according + # to the definition of the `!!float` tag. We fix this by adding + # '.0' before the 'e' symbol. + if u'.' not in value and u'e' in value: + value = value.replace(u'e', u'.0e', 1) + return self.represent_scalar(u'tag:yaml.org,2002:float', value) + + def represent_list(self, data): + # pairs = (len(data) > 0 and isinstance(data, list)) + # if pairs: + # for item in data: + # if not isinstance(item, tuple) or len(item) != 2: + # pairs = False + # break + # if not pairs: + return self.represent_sequence(u'tag:yaml.org,2002:seq', data) + # value = [] + # for item_key, item_value in data: + # value.append(self.represent_mapping(u'tag:yaml.org,2002:map', + # [(item_key, item_value)])) + # return SequenceNode(u'tag:yaml.org,2002:pairs', value) + + def represent_dict(self, data): + return self.represent_mapping(u'tag:yaml.org,2002:map', data) + + def represent_ordereddict(self, data): + return self.represent_omap(u'tag:yaml.org,2002:omap', data) + + def represent_set(self, data): + value = {} + for key in data: + value[key] = None + return self.represent_mapping(u'tag:yaml.org,2002:set', value) + + def represent_date(self, data): + value = to_unicode(data.isoformat()) + return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) + + def represent_datetime(self, data): + value = to_unicode(data.isoformat(' ')) + return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) + + def represent_yaml_object(self, tag, data, cls, flow_style=None): + if hasattr(data, '__getstate__'): + state = data.__getstate__() + else: + state = data.__dict__.copy() + return self.represent_mapping(tag, state, flow_style=flow_style) + + def represent_undefined(self, data): + raise RepresenterError("cannot represent an object: %s" % data) + +SafeRepresenter.add_representer(type(None), + SafeRepresenter.represent_none) + +SafeRepresenter.add_representer(str, + SafeRepresenter.represent_str) + +if PY2: + SafeRepresenter.add_representer(unicode, + SafeRepresenter.represent_unicode) +else: + SafeRepresenter.add_representer(bytes, + SafeRepresenter.represent_binary) + +SafeRepresenter.add_representer(bool, + SafeRepresenter.represent_bool) + +SafeRepresenter.add_representer(int, + SafeRepresenter.represent_int) + +if PY2: + SafeRepresenter.add_representer(long, + SafeRepresenter.represent_long) + +SafeRepresenter.add_representer(float, + SafeRepresenter.represent_float) + +SafeRepresenter.add_representer(list, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(tuple, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(dict, + SafeRepresenter.represent_dict) + +SafeRepresenter.add_representer(set, + SafeRepresenter.represent_set) + +SafeRepresenter.add_representer(ordereddict, + SafeRepresenter.represent_ordereddict) + +SafeRepresenter.add_representer(datetime.date, + SafeRepresenter.represent_date) + +SafeRepresenter.add_representer(datetime.datetime, + SafeRepresenter.represent_datetime) + +SafeRepresenter.add_representer(None, + SafeRepresenter.represent_undefined) + + +class Representer(SafeRepresenter): + if PY2: + def represent_str(self, data): + tag = None + style = None + try: + data = unicode(data, 'ascii') + tag = u'tag:yaml.org,2002:str' + except UnicodeDecodeError: + try: + data = unicode(data, 'utf-8') + tag = u'tag:yaml.org,2002:python/str' + except UnicodeDecodeError: + data = data.encode('base64') + tag = u'tag:yaml.org,2002:binary' + style = '|' + return self.represent_scalar(tag, data, style=style) + + def represent_unicode(self, data): + tag = None + try: + data.encode('ascii') + tag = u'tag:yaml.org,2002:python/unicode' + except UnicodeEncodeError: + tag = u'tag:yaml.org,2002:str' + return self.represent_scalar(tag, data) + + def represent_long(self, data): + tag = u'tag:yaml.org,2002:int' + if int(data) is not data: + tag = u'tag:yaml.org,2002:python/long' + return self.represent_scalar(tag, to_unicode(data)) + + def represent_complex(self, data): + if data.imag == 0.0: + data = u'%r' % data.real + elif data.real == 0.0: + data = u'%rj' % data.imag + elif data.imag > 0: + data = u'%r+%rj' % (data.real, data.imag) + else: + data = u'%r%rj' % (data.real, data.imag) + return self.represent_scalar(u'tag:yaml.org,2002:python/complex', data) + + def represent_tuple(self, data): + return self.represent_sequence(u'tag:yaml.org,2002:python/tuple', data) + + def represent_name(self, data): + name = u'%s.%s' % (data.__module__, data.__name__) + return self.represent_scalar(u'tag:yaml.org,2002:python/name:' + + name, u'') + + def represent_module(self, data): + return self.represent_scalar( + u'tag:yaml.org,2002:python/module:'+data.__name__, u'') + + if PY2: + def represent_instance(self, data): + # For instances of classic classes, we use __getinitargs__ and + # __getstate__ to serialize the data. + + # If data.__getinitargs__ exists, the object must be reconstructed + # by calling cls(**args), where args is a tuple returned by + # __getinitargs__. Otherwise, the cls.__init__ method should never + # be called and the class instance is created by instantiating a + # trivial class and assigning to the instance's __class__ variable. + + # If data.__getstate__ exists, it returns the state of the object. + # Otherwise, the state of the object is data.__dict__. + + # We produce either a !!python/object or !!python/object/new node. + # If data.__getinitargs__ does not exist and state is a dictionary, + # we produce a !!python/object node . Otherwise we produce a + # !!python/object/new node. + + cls = data.__class__ + class_name = u'%s.%s' % (cls.__module__, cls.__name__) + args = None + state = None + if hasattr(data, '__getinitargs__'): + args = list(data.__getinitargs__()) + if hasattr(data, '__getstate__'): + state = data.__getstate__() + else: + state = data.__dict__ + if args is None and isinstance(state, dict): + return self.represent_mapping( + u'tag:yaml.org,2002:python/object:'+class_name, state) + if isinstance(state, dict) and not state: + return self.represent_sequence( + u'tag:yaml.org,2002:python/object/new:' + + class_name, args) + value = {} + if args: + value['args'] = args + value['state'] = state + return self.represent_mapping( + u'tag:yaml.org,2002:python/object/new:'+class_name, value) + + def represent_object(self, data): + # We use __reduce__ API to save the data. data.__reduce__ returns + # a tuple of length 2-5: + # (function, args, state, listitems, dictitems) + + # For reconstructing, we calls function(*args), then set its state, + # listitems, and dictitems if they are not None. + + # A special case is when function.__name__ == '__newobj__'. In this + # case we create the object with args[0].__new__(*args). + + # Another special case is when __reduce__ returns a string - we don't + # support it. + + # We produce a !!python/object, !!python/object/new or + # !!python/object/apply node. + + cls = type(data) + if cls in copyreg.dispatch_table: + reduce = copyreg.dispatch_table[cls](data) + elif hasattr(data, '__reduce_ex__'): + reduce = data.__reduce_ex__(2) + elif hasattr(data, '__reduce__'): + reduce = data.__reduce__() + else: + raise RepresenterError("cannot represent object: %r" % data) + reduce = (list(reduce)+[None]*5)[:5] + function, args, state, listitems, dictitems = reduce + args = list(args) + if state is None: + state = {} + if listitems is not None: + listitems = list(listitems) + if dictitems is not None: + dictitems = dict(dictitems) + if function.__name__ == '__newobj__': + function = args[0] + args = args[1:] + tag = u'tag:yaml.org,2002:python/object/new:' + newobj = True + else: + tag = u'tag:yaml.org,2002:python/object/apply:' + newobj = False + function_name = u'%s.%s' % (function.__module__, function.__name__) + if not args and not listitems and not dictitems \ + and isinstance(state, dict) and newobj: + return self.represent_mapping( + u'tag:yaml.org,2002:python/object:'+function_name, state) + if not listitems and not dictitems \ + and isinstance(state, dict) and not state: + return self.represent_sequence(tag+function_name, args) + value = {} + if args: + value['args'] = args + if state or not isinstance(state, dict): + value['state'] = state + if listitems: + value['listitems'] = listitems + if dictitems: + value['dictitems'] = dictitems + return self.represent_mapping(tag+function_name, value) + +if PY2: + Representer.add_representer(str, + Representer.represent_str) + + Representer.add_representer(unicode, + Representer.represent_unicode) + + Representer.add_representer(long, + Representer.represent_long) + +Representer.add_representer(complex, + Representer.represent_complex) + +Representer.add_representer(tuple, + Representer.represent_tuple) + +Representer.add_representer(type, + Representer.represent_name) + +if PY2: + Representer.add_representer(types.ClassType, + Representer.represent_name) + +Representer.add_representer(types.FunctionType, + Representer.represent_name) + +Representer.add_representer(types.BuiltinFunctionType, + Representer.represent_name) + +Representer.add_representer(types.ModuleType, + Representer.represent_module) + +if PY2: + Representer.add_multi_representer(types.InstanceType, + Representer.represent_instance) + +Representer.add_multi_representer(object, + Representer.represent_object) + + +from .comments import CommentedMap, CommentedOrderedMap, CommentedSeq, \ + CommentedSet, comment_attrib, merge_attrib + + +class RoundTripRepresenter(SafeRepresenter): + # need to add type here and write out the .comment + # in serializer and emitter + + def __init__(self, default_style=None, default_flow_style=None): + if default_flow_style is None: + default_flow_style = False + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + + def represent_none(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:null', + u'') + + def represent_preserved_scalarstring(self, data): + tag = None + style = '|' + if PY2 and not isinstance(data, unicode): + data = unicode(data, 'ascii') + tag = u'tag:yaml.org,2002:str' + return self.represent_scalar(tag, data, style=style) + + def represent_sequence(self, tag, sequence, flow_style=None): + value = [] + # if the flow_style is None, the flow style tacked on to the object + # explicitly will be taken. If that is None as well the default flow + # style rules + try: + flow_style = sequence.fa.flow_style(flow_style) + except AttributeError: + flow_style = flow_style + try: + anchor = sequence.yaml_anchor() + except AttributeError: + anchor = None + node = SequenceNode(tag, value, flow_style=flow_style, anchor=anchor) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + try: + comment = getattr(sequence, comment_attrib) + item_comments = comment.items + node.comment = comment.comment + try: + node.comment.append(comment.end) + except AttributeError: + pass + except AttributeError: + item_comments = {} + for idx, item in enumerate(sequence): + node_item = self.represent_data(item) + node_item.comment = item_comments.get(idx) + if not (isinstance(node_item, ScalarNode) and not node_item.style): + best_style = False + value.append(node_item) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def represent_mapping(self, tag, mapping, flow_style=None): + value = [] + try: + flow_style = mapping.fa.flow_style(flow_style) + except AttributeError: + flow_style = flow_style + try: + anchor = mapping.yaml_anchor() + except AttributeError: + anchor = None + node = MappingNode(tag, value, flow_style=flow_style, anchor=anchor) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + # no sorting! !! + try: + comment = getattr(mapping, comment_attrib) + node.comment = comment.comment + item_comments = comment.items + try: + node.comment.append(comment.end) + except AttributeError: + pass + except AttributeError: + item_comments = {} + for item_key, item_value in mapping.items(): + node_key = self.represent_key(item_key) + node_value = self.represent_data(item_value) + item_comment = item_comments.get(item_key) + if item_comment: + assert getattr(node_key, 'comment', None) is None + node_key.comment = item_comment[:2] + nvc = getattr(node_value, 'comment', None) + if nvc is not None: # end comment already there + nvc[0] = item_comment[2] + nvc[1] = item_comment[3] + else: + node_value.comment = item_comment[2:] + if not (isinstance(node_key, ScalarNode) and not node_key.style): + best_style = False + if not (isinstance(node_value, ScalarNode) and not + node_value.style): + best_style = False + value.append((node_key, node_value)) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + merge_list = [m[1] for m in getattr(mapping, merge_attrib, [])] + if merge_list: + # because of the call to represent_data here, the anchors + # are marked as being used and thereby created + if len(merge_list) == 1: + arg = self.represent_data(merge_list[0]) + else: + arg = self.represent_data(merge_list) + arg.flow_style = True + value.insert(0, + (ScalarNode(u'tag:yaml.org,2002:merge', '<<'), arg)) + return node + + def represent_omap(self, tag, omap, flow_style=None): + value = [] + try: + flow_style = omap.fa.flow_style(flow_style) + except AttributeError: + flow_style = flow_style + try: + anchor = omap.yaml_anchor() + except AttributeError: + anchor = None + node = SequenceNode(tag, value, flow_style=flow_style, anchor=anchor) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + try: + comment = getattr(omap, comment_attrib) + node.comment = comment.comment + item_comments = comment.items + try: + node.comment.append(comment.end) + except AttributeError: + pass + except AttributeError: + item_comments = {} + for item_key in omap: + item_val = omap[item_key] + node_item = self.represent_data({item_key: item_val}) + # node item has two scalars in value: node_key and node_value + item_comment = item_comments.get(item_key) + if item_comment: + if item_comment[1]: + node_item.comment = [None, item_comment[1]] + assert getattr(node_item.value[0][0], 'comment', None) is None + node_item.value[0][0].comment = [item_comment[0], None] + nvc = getattr(node_item.value[0][1], 'comment', None) + if nvc is not None: # end comment already there + nvc[0] = item_comment[2] + nvc[1] = item_comment[3] + else: + node_item.value[0][1].comment = item_comment[2:] + # if not (isinstance(node_item, ScalarNode) \ + # and not node_item.style): + # best_style = False + value.append(node_item) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def represent_set(self, setting): + flow_style = False + tag = u'tag:yaml.org,2002:set' + # return self.represent_mapping(tag, value) + value = [] + flow_style = setting.fa.flow_style(flow_style) + try: + anchor = setting.yaml_anchor() + except AttributeError: + anchor = None + node = MappingNode(tag, value, flow_style=flow_style, anchor=anchor) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + # no sorting! !! + try: + comment = getattr(setting, comment_attrib) + node.comment = comment.comment + item_comments = comment.items + try: + node.comment.append(comment.end) + except AttributeError: + pass + except AttributeError: + item_comments = {} + for item_key in setting.odict: + node_key = self.represent_key(item_key) + node_value = self.represent_data(None) + item_comment = item_comments.get(item_key) + if item_comment: + assert getattr(node_key, 'comment', None) is None + node_key.comment = item_comment[:2] + node_key.style = node_value.style = "?" + if not (isinstance(node_key, ScalarNode) and not node_key.style): + best_style = False + if not (isinstance(node_value, ScalarNode) and not + node_value.style): + best_style = False + value.append((node_key, node_value)) + return node + + +RoundTripRepresenter.add_representer(type(None), + RoundTripRepresenter.represent_none) + +RoundTripRepresenter.add_representer( + PreservedScalarString, + RoundTripRepresenter.represent_preserved_scalarstring) + +RoundTripRepresenter.add_representer(CommentedSeq, + RoundTripRepresenter.represent_list) + +RoundTripRepresenter.add_representer(CommentedMap, + RoundTripRepresenter.represent_dict) + +RoundTripRepresenter.add_representer( + CommentedOrderedMap, + RoundTripRepresenter.represent_ordereddict) + +RoundTripRepresenter.add_representer(CommentedSet, + RoundTripRepresenter.represent_set) diff --git a/resolver.py b/resolver.py new file mode 100644 index 0000000..d385b0e --- /dev/null +++ b/resolver.py @@ -0,0 +1,233 @@ +from __future__ import absolute_import + +__all__ = ['BaseResolver', 'Resolver'] + +from .error import * +from .nodes import * +from .compat import string_types + +import re + + +class ResolverError(YAMLError): + pass + + +class BaseResolver(object): + + DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str' + DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq' + DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map' + + yaml_implicit_resolvers = {} + yaml_path_resolvers = {} + + def __init__(self): + self.resolver_exact_paths = [] + self.resolver_prefix_paths = [] + + @classmethod + def add_implicit_resolver(cls, tag, regexp, first): + if 'yaml_implicit_resolvers' not in cls.__dict__: + cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy() + if first is None: + first = [None] + for ch in first: + cls.yaml_implicit_resolvers.setdefault(ch, []).append( + (tag, regexp)) + + @classmethod + def add_path_resolver(cls, tag, path, kind=None): + # Note: `add_path_resolver` is experimental. The API could be changed. + # `new_path` is a pattern that is matched against the path from the + # root to the node that is being considered. `node_path` elements are + # tuples `(node_check, index_check)`. `node_check` is a node class: + # `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None` + # matches any kind of a node. `index_check` could be `None`, a boolean + # value, a string value, or a number. `None` and `False` match against + # any _value_ of sequence and mapping nodes. `True` matches against + # any _key_ of a mapping node. A string `index_check` matches against + # a mapping value that corresponds to a scalar key which content is + # equal to the `index_check` value. An integer `index_check` matches + # against a sequence value with the index equal to `index_check`. + if 'yaml_path_resolvers' not in cls.__dict__: + cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy() + new_path = [] + for element in path: + if isinstance(element, (list, tuple)): + if len(element) == 2: + node_check, index_check = element + elif len(element) == 1: + node_check = element[0] + index_check = True + else: + raise ResolverError("Invalid path element: %s" % element) + else: + node_check = None + index_check = element + if node_check is str: + node_check = ScalarNode + elif node_check is list: + node_check = SequenceNode + elif node_check is dict: + node_check = MappingNode + elif node_check not in [ScalarNode, SequenceNode, MappingNode] \ + and not isinstance(node_check, string_types) \ + and node_check is not None: + raise ResolverError("Invalid node checker: %s" % node_check) + if not isinstance(index_check, (string_types, int)) \ + and index_check is not None: + raise ResolverError("Invalid index checker: %s" % index_check) + new_path.append((node_check, index_check)) + if kind is str: + kind = ScalarNode + elif kind is list: + kind = SequenceNode + elif kind is dict: + kind = MappingNode + elif kind not in [ScalarNode, SequenceNode, MappingNode] \ + and kind is not None: + raise ResolverError("Invalid node kind: %s" % kind) + cls.yaml_path_resolvers[tuple(new_path), kind] = tag + + def descend_resolver(self, current_node, current_index): + if not self.yaml_path_resolvers: + return + exact_paths = {} + prefix_paths = [] + if current_node: + depth = len(self.resolver_prefix_paths) + for path, kind in self.resolver_prefix_paths[-1]: + if self.check_resolver_prefix(depth, path, kind, + current_node, current_index): + if len(path) > depth: + prefix_paths.append((path, kind)) + else: + exact_paths[kind] = self.yaml_path_resolvers[path, + kind] + else: + for path, kind in self.yaml_path_resolvers: + if not path: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + prefix_paths.append((path, kind)) + self.resolver_exact_paths.append(exact_paths) + self.resolver_prefix_paths.append(prefix_paths) + + def ascend_resolver(self): + if not self.yaml_path_resolvers: + return + self.resolver_exact_paths.pop() + self.resolver_prefix_paths.pop() + + def check_resolver_prefix(self, depth, path, kind, + current_node, current_index): + node_check, index_check = path[depth-1] + if isinstance(node_check, string_types): + if current_node.tag != node_check: + return + elif node_check is not None: + if not isinstance(current_node, node_check): + return + if index_check is True and current_index is not None: + return + if (index_check is False or index_check is None) \ + and current_index is None: + return + if isinstance(index_check, string_types): + if not (isinstance(current_index, ScalarNode) + and index_check == current_index.value): + return + elif isinstance(index_check, int) and not isinstance(index_check, + bool): + if index_check != current_index: + return + return True + + def resolve(self, kind, value, implicit): + if kind is ScalarNode and implicit[0]: + if value == u'': + resolvers = self.yaml_implicit_resolvers.get(u'', []) + else: + resolvers = self.yaml_implicit_resolvers.get(value[0], []) + resolvers += self.yaml_implicit_resolvers.get(None, []) + for tag, regexp in resolvers: + if regexp.match(value): + return tag + implicit = implicit[1] + if self.yaml_path_resolvers: + exact_paths = self.resolver_exact_paths[-1] + if kind in exact_paths: + return exact_paths[kind] + if None in exact_paths: + return exact_paths[None] + if kind is ScalarNode: + return self.DEFAULT_SCALAR_TAG + elif kind is SequenceNode: + return self.DEFAULT_SEQUENCE_TAG + elif kind is MappingNode: + return self.DEFAULT_MAPPING_TAG + + +class Resolver(BaseResolver): + pass + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:bool', + re.compile(u'''^(?:yes|Yes|YES|no|No|NO + |true|True|TRUE|false|False|FALSE + |on|On|ON|off|Off|OFF)$''', re.X), + list(u'yYnNtTfFoO')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:float', + re.compile(u'''^(?: + [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)? + |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+) + |\\.[0-9_]+(?:[eE][-+][0-9]+)? + |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]* + |[-+]?\\.(?:inf|Inf|INF) + |\\.(?:nan|NaN|NAN))$''', re.X), + list(u'-+0123456789.')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:int', + re.compile(u'''^(?:[-+]?0b[0-1_]+ + |[-+]?0o?[0-7_]+ + |[-+]?(?:0|[1-9][0-9_]*) + |[-+]?0x[0-9a-fA-F_]+ + |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X), + list(u'-+0123456789')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:merge', + re.compile(u'^(?:<<)$'), + [u'<']) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:null', + re.compile(u'''^(?: ~ + |null|Null|NULL + | )$''', re.X), + [u'~', u'n', u'N', u'']) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:timestamp', + re.compile(u'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] + |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? + (?:[Tt]|[ \\t]+)[0-9][0-9]? + :[0-9][0-9] :[0-9][0-9] (?:\\.[0-9]*)? + (?:[ \\t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X), + list(u'0123456789')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:value', + re.compile(u'^(?:=)$'), + [u'=']) + +# The following resolver is only for documentation purposes. It cannot work +# because plain scalars cannot start with '!', '&', or '*'. +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:yaml', + re.compile(u'^(?:!|&|\\*)$'), + list(u'!&*')) diff --git a/scalarstring.py b/scalarstring.py new file mode 100644 index 0000000..a628396 --- /dev/null +++ b/scalarstring.py @@ -0,0 +1,44 @@ +from __future__ import absolute_import +from __future__ import print_function + +__all__ = ["ScalarString", "PreservedScalarString"] + +from .compat import text_type + + +class ScalarString(text_type): + def __new__(cls, *args, **kw): + return text_type.__new__(cls, *args, **kw) + + +class PreservedScalarString(ScalarString): + def __new__(cls, value): + return ScalarString.__new__(cls, value) + +def preserve_literal(s): + return PreservedScalarString(s.replace('\r\n', '\n').replace('\r', '\n')) + + +def walk_tree(base): + """ + the routine here walks over a simple yaml tree (recursing in + dict values and list items) and converts strings that + have multiple lines to literal scalars + """ + from ruamel.yaml.compat import string_types + + + if isinstance(base, dict): + for k in base: + v = base[k] + if isinstance(v, string_types) and '\n' in v: + base[k] = preserve_literal(v) + else: + walk_tree(v) + elif isinstance(base, list): + for idx, elem in enumerate(base): + if isinstance(elem, string_types) and '\n' in elem: + print(elem) + base[idx] = preserve_literal(elem) + else: + walk_tree(elem) diff --git a/scanner.py b/scanner.py new file mode 100644 index 0000000..24a13a8 --- /dev/null +++ b/scanner.py @@ -0,0 +1,1655 @@ +from __future__ import absolute_import +from __future__ import print_function + +# Scanner produces tokens of the following types: +# STREAM-START +# STREAM-END +# DIRECTIVE(name, value) +# DOCUMENT-START +# DOCUMENT-END +# BLOCK-SEQUENCE-START +# BLOCK-MAPPING-START +# BLOCK-END +# FLOW-SEQUENCE-START +# FLOW-MAPPING-START +# FLOW-SEQUENCE-END +# FLOW-MAPPING-END +# BLOCK-ENTRY +# FLOW-ENTRY +# KEY +# VALUE +# ALIAS(value) +# ANCHOR(value) +# TAG(value) +# SCALAR(value, plain, style) +# +# RoundTripScanner +# COMMENT(value) +# +# Read comments in the Scanner code for more details. +# + +__all__ = ['Scanner', 'RoundTripScanner', 'ScannerError'] + +from .error import MarkedYAMLError +from .tokens import * +from .compat import utf8, unichr, PY3 + + +class ScannerError(MarkedYAMLError): + pass + + +class SimpleKey(object): + # See below simple keys treatment. + + def __init__(self, token_number, required, index, line, column, mark): + self.token_number = token_number + self.required = required + self.index = index + self.line = line + self.column = column + self.mark = mark + + +class Scanner(object): + + def __init__(self): + """Initialize the scanner.""" + # It is assumed that Scanner and Reader will have a common descendant. + # Reader do the dirty work of checking for BOM and converting the + # input data to Unicode. It also adds NUL to the end. + # + # Reader supports the following methods + # self.peek(i=0) # peek the next i-th character + # self.prefix(l=1) # peek the next l characters + # self.forward(l=1) # read the next l characters and move the pointer + + # Had we reached the end of the stream? + self.done = False + + # The number of unclosed '{' and '['. `flow_level == 0` means block + # context. + self.flow_level = 0 + + # List of processed tokens that are not yet emitted. + self.tokens = [] + + # Add the STREAM-START token. + self.fetch_stream_start() + + # Number of tokens that were emitted through the `get_token` method. + self.tokens_taken = 0 + + # The current indentation level. + self.indent = -1 + + # Past indentation levels. + self.indents = [] + + # Variables related to simple keys treatment. + + # A simple key is a key that is not denoted by the '?' indicator. + # Example of simple keys: + # --- + # block simple key: value + # ? not a simple key: + # : { flow simple key: value } + # We emit the KEY token before all keys, so when we find a potential + # simple key, we try to locate the corresponding ':' indicator. + # Simple keys should be limited to a single line and 1024 characters. + + # Can a simple key start at the current position? A simple key may + # start: + # - at the beginning of the line, not counting indentation spaces + # (in block context), + # - after '{', '[', ',' (in the flow context), + # - after '?', ':', '-' (in the block context). + # In the block context, this flag also signifies if a block collection + # may start at the current position. + self.allow_simple_key = True + + # Keep track of possible simple keys. This is a dictionary. The key + # is `flow_level`; there can be no more that one possible simple key + # for each level. The value is a SimpleKey record: + # (token_number, required, index, line, column, mark) + # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), + # '[', or '{' tokens. + self.possible_simple_keys = {} + + # Public methods. + + def check_token(self, *choices): + # Check if the next token is one of the given types. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + if not choices: + return True + for choice in choices: + if isinstance(self.tokens[0], choice): + return True + return False + + def peek_token(self): + # Return the next token, but do not delete if from the queue. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + return self.tokens[0] + + def get_token(self): + # Return the next token. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + self.tokens_taken += 1 + return self.tokens.pop(0) + + # Private methods. + + def need_more_tokens(self): + if self.done: + return False + if not self.tokens: + return True + # The current token may be a potential simple key, so we + # need to look further. + self.stale_possible_simple_keys() + if self.next_possible_simple_key() == self.tokens_taken: + return True + + def fetch_more_tokens(self): + + # Eat whitespaces and comments until we reach the next token. + comment = self.scan_to_next_token() + + if comment is not None: # never happens for base scanner + return self.fetch_comment(comment) + + # Remove obsolete possible simple keys. + self.stale_possible_simple_keys() + + # Compare the current indentation and column. It may add some tokens + # and decrease the current indentation level. + self.unwind_indent(self.column) + + # Peek the next character. + ch = self.peek() + + # Is it the end of stream? + if ch == u'\0': + return self.fetch_stream_end() + + # Is it a directive? + if ch == u'%' and self.check_directive(): + return self.fetch_directive() + + # Is it the document start? + if ch == u'-' and self.check_document_start(): + return self.fetch_document_start() + + # Is it the document end? + if ch == u'.' and self.check_document_end(): + return self.fetch_document_end() + + # TODO: support for BOM within a stream. + # if ch == u'\uFEFF': + # return self.fetch_bom() <-- issue BOMToken + + # Note: the order of the following checks is NOT significant. + + # Is it the flow sequence start indicator? + if ch == u'[': + return self.fetch_flow_sequence_start() + + # Is it the flow mapping start indicator? + if ch == u'{': + return self.fetch_flow_mapping_start() + + # Is it the flow sequence end indicator? + if ch == u']': + return self.fetch_flow_sequence_end() + + # Is it the flow mapping end indicator? + if ch == u'}': + return self.fetch_flow_mapping_end() + + # Is it the flow entry indicator? + if ch == u',': + return self.fetch_flow_entry() + + # Is it the block entry indicator? + if ch == u'-' and self.check_block_entry(): + return self.fetch_block_entry() + + # Is it the key indicator? + if ch == u'?' and self.check_key(): + return self.fetch_key() + + # Is it the value indicator? + if ch == u':' and self.check_value(): + return self.fetch_value() + + # Is it an alias? + if ch == u'*': + return self.fetch_alias() + + # Is it an anchor? + if ch == u'&': + return self.fetch_anchor() + + # Is it a tag? + if ch == u'!': + return self.fetch_tag() + + # Is it a literal scalar? + if ch == u'|' and not self.flow_level: + return self.fetch_literal() + + # Is it a folded scalar? + if ch == u'>' and not self.flow_level: + return self.fetch_folded() + + # Is it a single quoted scalar? + if ch == u'\'': + return self.fetch_single() + + # Is it a double quoted scalar? + if ch == u'\"': + return self.fetch_double() + + # It must be a plain scalar then. + if self.check_plain(): + return self.fetch_plain() + + # No? It's an error. Let's produce a nice error message. + raise ScannerError("while scanning for the next token", None, + "found character %r that cannot start any token" + % utf8(ch), self.get_mark()) + + # Simple keys treatment. + + def next_possible_simple_key(self): + # Return the number of the nearest possible simple key. Actually we + # don't need to loop through the whole dictionary. We may replace it + # with the following code: + # if not self.possible_simple_keys: + # return None + # return self.possible_simple_keys[ + # min(self.possible_simple_keys.keys())].token_number + min_token_number = None + for level in self.possible_simple_keys: + key = self.possible_simple_keys[level] + if min_token_number is None or key.token_number < min_token_number: + min_token_number = key.token_number + return min_token_number + + def stale_possible_simple_keys(self): + # Remove entries that are no longer possible simple keys. According to + # the YAML specification, simple keys + # - should be limited to a single line, + # - should be no longer than 1024 characters. + # Disabling this procedure will allow simple keys of any length and + # height (may cause problems if indentation is broken though). + for level in list(self.possible_simple_keys): + key = self.possible_simple_keys[level] + if key.line != self.line \ + or self.index-key.index > 1024: + if key.required: + raise ScannerError( + "while scanning a simple key", key.mark, + "could not find expected ':'", self.get_mark()) + del self.possible_simple_keys[level] + + def save_possible_simple_key(self): + # The next token may start a simple key. We check if it's possible + # and save its position. This function is called for + # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. + + # Check if a simple key is required at the current position. + required = not self.flow_level and self.indent == self.column + + # The next token might be a simple key. Let's save it's number and + # position. + if self.allow_simple_key: + self.remove_possible_simple_key() + token_number = self.tokens_taken+len(self.tokens) + key = SimpleKey( + token_number, required, + self.index, self.line, self.column, self.get_mark()) + self.possible_simple_keys[self.flow_level] = key + + def remove_possible_simple_key(self): + # Remove the saved possible key position at the current flow level. + if self.flow_level in self.possible_simple_keys: + key = self.possible_simple_keys[self.flow_level] + + if key.required: + raise ScannerError( + "while scanning a simple key", key.mark, + "could not find expected ':'", self.get_mark()) + + del self.possible_simple_keys[self.flow_level] + + # Indentation functions. + + def unwind_indent(self, column): + + # In flow context, tokens should respect indentation. + # Actually the condition should be `self.indent >= column` according to + # the spec. But this condition will prohibit intuitively correct + # constructions such as + # key : { + # } + # #### + # if self.flow_level and self.indent > column: + # raise ScannerError(None, None, + # "invalid intendation or unclosed '[' or '{'", + # self.get_mark()) + + # In the flow context, indentation is ignored. We make the scanner less + # restrictive then specification requires. + if self.flow_level: + return + + # In block context, we may need to issue the BLOCK-END tokens. + while self.indent > column: + mark = self.get_mark() + self.indent = self.indents.pop() + self.tokens.append(BlockEndToken(mark, mark)) + + def add_indent(self, column): + # Check if we need to increase indentation. + if self.indent < column: + self.indents.append(self.indent) + self.indent = column + return True + return False + + # Fetchers. + + def fetch_stream_start(self): + # We always add STREAM-START as the first token and STREAM-END as the + # last token. + + # Read the token. + mark = self.get_mark() + + # Add STREAM-START. + self.tokens.append(StreamStartToken(mark, mark, + encoding=self.encoding)) + + def fetch_stream_end(self): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + self.possible_simple_keys = {} + + # Read the token. + mark = self.get_mark() + + # Add STREAM-END. + self.tokens.append(StreamEndToken(mark, mark)) + + # The steam is finished. + self.done = True + + def fetch_directive(self): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Scan and add DIRECTIVE. + self.tokens.append(self.scan_directive()) + + def fetch_document_start(self): + self.fetch_document_indicator(DocumentStartToken) + + def fetch_document_end(self): + self.fetch_document_indicator(DocumentEndToken) + + def fetch_document_indicator(self, TokenClass): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. Note that there could not be a block collection + # after '---'. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Add DOCUMENT-START or DOCUMENT-END. + start_mark = self.get_mark() + self.forward(3) + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_start(self): + self.fetch_flow_collection_start(FlowSequenceStartToken) + + def fetch_flow_mapping_start(self): + self.fetch_flow_collection_start(FlowMappingStartToken) + + def fetch_flow_collection_start(self, TokenClass): + + # '[' and '{' may start a simple key. + self.save_possible_simple_key() + + # Increase the flow level. + self.flow_level += 1 + + # Simple keys are allowed after '[' and '{'. + self.allow_simple_key = True + + # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_end(self): + self.fetch_flow_collection_end(FlowSequenceEndToken) + + def fetch_flow_mapping_end(self): + self.fetch_flow_collection_end(FlowMappingEndToken) + + def fetch_flow_collection_end(self, TokenClass): + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Decrease the flow level. + self.flow_level -= 1 + + # No simple keys after ']' or '}'. + self.allow_simple_key = False + + # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_entry(self): + + # Simple keys are allowed after ','. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add FLOW-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(FlowEntryToken(start_mark, end_mark)) + + def fetch_block_entry(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a new entry? + if not self.allow_simple_key: + raise ScannerError(None, None, + "sequence entries are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-SEQUENCE-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockSequenceStartToken(mark, mark)) + + # It's an error for the block entry to occur in the flow context, + # but we let the parser detect this. + else: + pass + + # Simple keys are allowed after '-'. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add BLOCK-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(BlockEntryToken(start_mark, end_mark)) + + def fetch_key(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a key (not nessesary a simple)? + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping keys are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-MAPPING-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after '?' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add KEY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(KeyToken(start_mark, end_mark)) + + def fetch_value(self): + + # Do we determine a simple key? + if self.flow_level in self.possible_simple_keys: + # Add KEY. + key = self.possible_simple_keys[self.flow_level] + del self.possible_simple_keys[self.flow_level] + self.tokens.insert(key.token_number-self.tokens_taken, + KeyToken(key.mark, key.mark)) + + # If this key starts a new block mapping, we need to add + # BLOCK-MAPPING-START. + if not self.flow_level: + if self.add_indent(key.column): + self.tokens.insert( + key.token_number-self.tokens_taken, + BlockMappingStartToken(key.mark, key.mark)) + + # There cannot be two simple keys one after another. + self.allow_simple_key = False + + # It must be a part of a complex key. + else: + + # Block context needs additional checks. + # (Do we really need them? They will be catched by the parser + # anyway.) + if not self.flow_level: + + # We are allowed to start a complex value if and only if + # we can start a simple key. + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping values are not allowed here", + self.get_mark()) + + # If this value starts a new block mapping, we need to add + # BLOCK-MAPPING-START. It will be detected as an error later by + # the parser. + if not self.flow_level: + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after ':' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add VALUE. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(ValueToken(start_mark, end_mark)) + + def fetch_alias(self): + + # ALIAS could be a simple key. + self.save_possible_simple_key() + + # No simple keys after ALIAS. + self.allow_simple_key = False + + # Scan and add ALIAS. + self.tokens.append(self.scan_anchor(AliasToken)) + + def fetch_anchor(self): + + # ANCHOR could start a simple key. + self.save_possible_simple_key() + + # No simple keys after ANCHOR. + self.allow_simple_key = False + + # Scan and add ANCHOR. + self.tokens.append(self.scan_anchor(AnchorToken)) + + def fetch_tag(self): + + # TAG could start a simple key. + self.save_possible_simple_key() + + # No simple keys after TAG. + self.allow_simple_key = False + + # Scan and add TAG. + self.tokens.append(self.scan_tag()) + + def fetch_literal(self): + self.fetch_block_scalar(style='|') + + def fetch_folded(self): + self.fetch_block_scalar(style='>') + + def fetch_block_scalar(self, style): + + # A simple key may follow a block scalar. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Scan and add SCALAR. + self.tokens.append(self.scan_block_scalar(style)) + + def fetch_single(self): + self.fetch_flow_scalar(style='\'') + + def fetch_double(self): + self.fetch_flow_scalar(style='"') + + def fetch_flow_scalar(self, style): + + # A flow scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after flow scalars. + self.allow_simple_key = False + + # Scan and add SCALAR. + self.tokens.append(self.scan_flow_scalar(style)) + + def fetch_plain(self): + + # A plain scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after plain scalars. But note that `scan_plain` will + # change this flag if the scan is finished at the beginning of the + # line. + self.allow_simple_key = False + + # Scan and add SCALAR. May change `allow_simple_key`. + self.tokens.append(self.scan_plain()) + + # Checkers. + + def check_directive(self): + + # DIRECTIVE: ^ '%' ... + # The '%' indicator is already checked. + if self.column == 0: + return True + + def check_document_start(self): + + # DOCUMENT-START: ^ '---' (' '|'\n') + if self.column == 0: + if self.prefix(3) == u'---' \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return True + + def check_document_end(self): + + # DOCUMENT-END: ^ '...' (' '|'\n') + if self.column == 0: + if self.prefix(3) == u'...' \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return True + + def check_block_entry(self): + + # BLOCK-ENTRY: '-' (' '|'\n') + return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + + def check_key(self): + + # KEY(flow context): '?' + if self.flow_level: + return True + + # KEY(block context): '?' (' '|'\n') + else: + return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + + def check_value(self): + + # VALUE(flow context): ':' + if self.flow_level: + return True + + # VALUE(block context): ':' (' '|'\n') + else: + return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + + def check_plain(self): + # A plain scalar may start with any non-space character except: + # '-', '?', ':', ',', '[', ']', '{', '}', + # '#', '&', '*', '!', '|', '>', '\'', '\"', + # '%', '@', '`'. + # + # It may also start with + # '-', '?', ':' + # if it is followed by a non-space character. + # + # Note that we limit the last rule to the block context (except the + # '-' character) because we want the flow context to be space + # independent. + ch = self.peek() + return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ + or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029' + and (ch == u'-' or (not self.flow_level and ch in u'?:'))) + + # Scanners. + + def scan_to_next_token(self): + # We ignore spaces, line breaks and comments. + # If we find a line break in the block context, we set the flag + # `allow_simple_key` on. + # The byte order mark is stripped if it's the first character in the + # stream. We do not yet support BOM inside the stream as the + # specification requires. Any such mark will be considered as a part + # of the document. + # + # TODO: We need to make tab handling rules more sane. A good rule is + # Tabs cannot precede tokens + # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, + # KEY(block), VALUE(block), BLOCK-ENTRY + # So the checking code is + # if : + # self.allow_simple_keys = False + # We also need to add the check for `allow_simple_keys == True` to + # `unwind_indent` before issuing BLOCK-END. + # Scanners for block, flow, and plain scalars need to be modified. + + if self.index == 0 and self.peek() == u'\uFEFF': + self.forward() + found = False + while not found: + while self.peek() == u' ': + self.forward() + if self.peek() == u'#': + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + if self.scan_line_break(): + if not self.flow_level: + self.allow_simple_key = True + else: + found = True + + def scan_directive(self): + # See the specification for details. + start_mark = self.get_mark() + self.forward() + name = self.scan_directive_name(start_mark) + value = None + if name == u'YAML': + value = self.scan_yaml_directive_value(start_mark) + end_mark = self.get_mark() + elif name == u'TAG': + value = self.scan_tag_directive_value(start_mark) + end_mark = self.get_mark() + else: + end_mark = self.get_mark() + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + self.scan_directive_ignored_line(start_mark) + return DirectiveToken(name, value, start_mark, end_mark) + + def scan_directive_name(self, start_mark): + # See the specification for details. + length = 0 + ch = self.peek(length) + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_:.': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError( + "while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % utf8(ch), self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError( + "while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % utf8(ch), self.get_mark()) + return value + + def scan_yaml_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + major = self.scan_yaml_directive_number(start_mark) + if self.peek() != '.': + raise ScannerError( + "while scanning a directive", start_mark, + "expected a digit or '.', but found %r" + % utf8(self.peek()), + self.get_mark()) + self.forward() + minor = self.scan_yaml_directive_number(start_mark) + if self.peek() not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError( + "while scanning a directive", start_mark, + "expected a digit or ' ', but found %r" + % utf8(self.peek()), + self.get_mark()) + return (major, minor) + + def scan_yaml_directive_number(self, start_mark): + # See the specification for details. + ch = self.peek() + if not (u'0' <= ch <= u'9'): + raise ScannerError( + "while scanning a directive", start_mark, + "expected a digit, but found %r" % utf8(ch), + self.get_mark()) + length = 0 + while u'0' <= self.peek(length) <= u'9': + length += 1 + value = int(self.prefix(length)) + self.forward(length) + return value + + def scan_tag_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + handle = self.scan_tag_directive_handle(start_mark) + while self.peek() == u' ': + self.forward() + prefix = self.scan_tag_directive_prefix(start_mark) + return (handle, prefix) + + def scan_tag_directive_handle(self, start_mark): + # See the specification for details. + value = self.scan_tag_handle('directive', start_mark) + ch = self.peek() + if ch != u' ': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % utf8(ch), + self.get_mark()) + return value + + def scan_tag_directive_prefix(self, start_mark): + # See the specification for details. + value = self.scan_tag_uri('directive', start_mark) + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % utf8(ch), + self.get_mark()) + return value + + def scan_directive_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + if self.peek() == u'#': + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in u'\0\r\n\x85\u2028\u2029': + raise ScannerError( + "while scanning a directive", start_mark, + "expected a comment or a line break, but found %r" + % utf8(ch), self.get_mark()) + self.scan_line_break() + + def scan_anchor(self, TokenClass): + # The specification does not restrict characters for anchors and + # aliases. This may lead to problems, for instance, the document: + # [ *alias, value ] + # can be interpteted in two ways, as + # [ "value" ] + # and + # [ *alias , "value" ] + # Therefore we restrict aliases to numbers and ASCII letters. + start_mark = self.get_mark() + indicator = self.peek() + if indicator == u'*': + name = 'alias' + else: + name = 'anchor' + self.forward() + length = 0 + ch = self.peek(length) + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError( + "while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % utf8(ch), self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`': + raise ScannerError( + "while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % utf8(ch), self.get_mark()) + end_mark = self.get_mark() + return TokenClass(value, start_mark, end_mark) + + def scan_tag(self): + # See the specification for details. + start_mark = self.get_mark() + ch = self.peek(1) + if ch == u'<': + handle = None + self.forward(2) + suffix = self.scan_tag_uri('tag', start_mark) + if self.peek() != u'>': + raise ScannerError( + "while parsing a tag", start_mark, + "expected '>', but found %r" % utf8(self.peek()), + self.get_mark()) + self.forward() + elif ch in u'\0 \t\r\n\x85\u2028\u2029': + handle = None + suffix = u'!' + self.forward() + else: + length = 1 + use_handle = False + while ch not in u'\0 \r\n\x85\u2028\u2029': + if ch == u'!': + use_handle = True + break + length += 1 + ch = self.peek(length) + handle = u'!' + if use_handle: + handle = self.scan_tag_handle('tag', start_mark) + else: + handle = u'!' + self.forward() + suffix = self.scan_tag_uri('tag', start_mark) + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a tag", start_mark, + "expected ' ', but found %r" % utf8(ch), + self.get_mark()) + value = (handle, suffix) + end_mark = self.get_mark() + return TagToken(value, start_mark, end_mark) + + def scan_block_scalar(self, style): + # See the specification for details. + + if style == '>': + folded = True + else: + folded = False + + chunks = [] + start_mark = self.get_mark() + + # Scan the header. + self.forward() + chomping, increment = self.scan_block_scalar_indicators(start_mark) + self.scan_block_scalar_ignored_line(start_mark) + + # Determine the indentation level and go to the first non-empty line. + min_indent = self.indent+1 + if min_indent < 1: + min_indent = 1 + if increment is None: + breaks, max_indent, end_mark = self.scan_block_scalar_indentation() + indent = max(min_indent, max_indent) + else: + indent = min_indent+increment-1 + breaks, end_mark = self.scan_block_scalar_breaks(indent) + line_break = u'' + + # Scan the inner part of the block scalar. + while self.column == indent and self.peek() != u'\0': + chunks.extend(breaks) + leading_non_space = self.peek() not in u' \t' + length = 0 + while self.peek(length) not in u'\0\r\n\x85\u2028\u2029': + length += 1 + chunks.append(self.prefix(length)) + self.forward(length) + line_break = self.scan_line_break() + breaks, end_mark = self.scan_block_scalar_breaks(indent) + if self.column == indent and self.peek() != u'\0': + + # Unfortunately, folding rules are ambiguous. + # + # This is the folding according to the specification: + + if folded and line_break == u'\n' \ + and leading_non_space and self.peek() not in u' \t': + if not breaks: + chunks.append(u' ') + else: + chunks.append(line_break) + + # This is Clark Evans's interpretation (also in the spec + # examples): + # + # if folded and line_break == u'\n': + # if not breaks: + # if self.peek() not in ' \t': + # chunks.append(u' ') + # else: + # chunks.append(line_break) + # else: + # chunks.append(line_break) + else: + break + + # Process trailing line breaks. The 'chomping' setting determines + # whether they are included in the value. + comment = [] + if chomping in [None, True]: + chunks.append(line_break) + if chomping is True: + chunks.extend(breaks) + elif chomping in [None, False]: + comment.extend(breaks) + + # We are done. + token = ScalarToken(u''.join(chunks), False, start_mark, end_mark, + style) + if len(comment) > 0: + # Keep track of the trailing whitespace as a comment token, if + # isn't all included in the actual value. + comment_end_mark = self.get_mark() + comment = CommentToken(''.join(comment), end_mark, + comment_end_mark) + token.add_post_comment(comment) + return token + + def scan_block_scalar_indicators(self, start_mark): + # See the specification for details. + chomping = None + increment = None + ch = self.peek() + if ch in u'+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch in u'0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError( + "while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, " + "but found 0", self.get_mark()) + self.forward() + elif ch in u'0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError( + "while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, " + "but found 0", + self.get_mark()) + self.forward() + ch = self.peek() + if ch in u'+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError( + "while scanning a block scalar", start_mark, + "expected chomping or indentation indicators, but found %r" + % utf8(ch), self.get_mark()) + return chomping, increment + + def scan_block_scalar_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + if self.peek() == u'#': + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in u'\0\r\n\x85\u2028\u2029': + raise ScannerError( + "while scanning a block scalar", start_mark, + "expected a comment or a line break, but found %r" + % utf8(ch), self.get_mark()) + self.scan_line_break() + + def scan_block_scalar_indentation(self): + # See the specification for details. + chunks = [] + max_indent = 0 + end_mark = self.get_mark() + while self.peek() in u' \r\n\x85\u2028\u2029': + if self.peek() != u' ': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + else: + self.forward() + if self.column > max_indent: + max_indent = self.column + return chunks, max_indent, end_mark + + def scan_block_scalar_breaks(self, indent): + # See the specification for details. + chunks = [] + end_mark = self.get_mark() + while self.column < indent and self.peek() == u' ': + self.forward() + while self.peek() in u'\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + while self.column < indent and self.peek() == u' ': + self.forward() + return chunks, end_mark + + def scan_flow_scalar(self, style): + # See the specification for details. + # Note that we loose indentation rules for quoted scalars. Quoted + # scalars don't need to adhere indentation because " and ' clearly + # mark the beginning and the end of them. Therefore we are less + # restrictive then the specification requires. We only need to check + # that document separators are not included in scalars. + if style == '"': + double = True + else: + double = False + chunks = [] + start_mark = self.get_mark() + quote = self.peek() + self.forward() + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + while self.peek() != quote: + chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + self.forward() + end_mark = self.get_mark() + return ScalarToken(u''.join(chunks), False, start_mark, end_mark, + style) + + ESCAPE_REPLACEMENTS = { + u'0': u'\0', + u'a': u'\x07', + u'b': u'\x08', + u't': u'\x09', + u'\t': u'\x09', + u'n': u'\x0A', + u'v': u'\x0B', + u'f': u'\x0C', + u'r': u'\x0D', + u'e': u'\x1B', + u' ': u'\x20', + u'\"': u'\"', + u'/': u'/', # as per http://www.json.org/ + u'\\': u'\\', + u'N': u'\x85', + u'_': u'\xA0', + u'L': u'\u2028', + u'P': u'\u2029', + } + + ESCAPE_CODES = { + u'x': 2, + u'u': 4, + u'U': 8, + } + + def scan_flow_scalar_non_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + length = 0 + while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029': + length += 1 + if length: + chunks.append(self.prefix(length)) + self.forward(length) + ch = self.peek() + if not double and ch == u'\'' and self.peek(1) == u'\'': + chunks.append(u'\'') + self.forward(2) + elif (double and ch == u'\'') or (not double and ch in u'\"\\'): + chunks.append(ch) + self.forward() + elif double and ch == u'\\': + self.forward() + ch = self.peek() + if ch in self.ESCAPE_REPLACEMENTS: + chunks.append(self.ESCAPE_REPLACEMENTS[ch]) + self.forward() + elif ch in self.ESCAPE_CODES: + length = self.ESCAPE_CODES[ch] + self.forward() + for k in range(length): + if self.peek(k) not in u'0123456789ABCDEFabcdef': + raise ScannerError( + "while scanning a double-quoted scalar", + start_mark, + "expected escape sequence of %d hexdecimal " + "numbers, but found %r" % + (length, utf8(self.peek(k))), self.get_mark()) + code = int(self.prefix(length), 16) + chunks.append(unichr(code)) + self.forward(length) + elif ch in u'\r\n\x85\u2028\u2029': + self.scan_line_break() + chunks.extend(self.scan_flow_scalar_breaks( + double, start_mark)) + else: + raise ScannerError( + "while scanning a double-quoted scalar", start_mark, + "found unknown escape character %r" % utf8(ch), + self.get_mark()) + else: + return chunks + + def scan_flow_scalar_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + length = 0 + while self.peek(length) in u' \t': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch == u'\0': + raise ScannerError( + "while scanning a quoted scalar", start_mark, + "found unexpected end of stream", self.get_mark()) + elif ch in u'\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + breaks = self.scan_flow_scalar_breaks(double, start_mark) + if line_break != u'\n': + chunks.append(line_break) + elif not breaks: + chunks.append(u' ') + chunks.extend(breaks) + else: + chunks.append(whitespaces) + return chunks + + def scan_flow_scalar_breaks(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + # Instead of checking indentation, we check for document + # separators. + prefix = self.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a quoted scalar", + start_mark, + "found unexpected document separator", + self.get_mark()) + while self.peek() in u' \t': + self.forward() + if self.peek() in u'\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + else: + return chunks + + def scan_plain(self): + # See the specification for details. + # We add an additional restriction for the flow context: + # plain scalars in the flow context cannot contain ',', ':' and '?'. + # We also keep track of the `allow_simple_key` flag here. + # Indentation rules are loosed for the flow context. + chunks = [] + start_mark = self.get_mark() + end_mark = start_mark + indent = self.indent+1 + # We allow zero indentation for scalars, but then we need to check for + # document separators at the beginning of the line. + # if indent == 0: + # indent = 1 + spaces = [] + while True: + length = 0 + if self.peek() == u'#': + break + while True: + ch = self.peek(length) + if ch in u'\0 \t\r\n\x85\u2028\u2029' \ + or (not self.flow_level and ch == u':' and + self.peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029') \ + or (self.flow_level and ch in u',:?[]{}'): + break + length += 1 + # It's not clear what we should do with ':' in the flow context. + if (self.flow_level and ch == u':' and + self.peek(length+1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'): + self.forward(length) + raise ScannerError( + "while scanning a plain scalar", start_mark, + "found unexpected ':'", self.get_mark(), + "Please check " + "http://pyyaml.org/wiki/YAMLColonInFlowContext " + "for details.") + if length == 0: + break + self.allow_simple_key = False + chunks.extend(spaces) + chunks.append(self.prefix(length)) + self.forward(length) + end_mark = self.get_mark() + spaces = self.scan_plain_spaces(indent, start_mark) + if not spaces or self.peek() == u'#' \ + or (not self.flow_level and self.column < indent): + break + + token = ScalarToken(u''.join(chunks), True, start_mark, end_mark) + if spaces and spaces[0] == '\n': + # Create a comment token to preserve the trailing line breaks. + comment = CommentToken(''.join(spaces) + '\n', start_mark, end_mark) + token.add_post_comment(comment) + return token + + + def scan_plain_spaces(self, indent, start_mark): + # See the specification for details. + # The specification is really confusing about tabs in plain scalars. + # We just forbid them completely. Do not use tabs in YAML! + chunks = [] + length = 0 + while self.peek(length) in u' ': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch in u'\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + self.allow_simple_key = True + prefix = self.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return + breaks = [] + while self.peek() in u' \r\n\x85\u2028\u2029': + if self.peek() == ' ': + self.forward() + else: + breaks.append(self.scan_line_break()) + prefix = self.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return + if line_break != u'\n': + chunks.append(line_break) + elif not breaks: + chunks.append(u' ') + chunks.extend(breaks) + elif whitespaces: + chunks.append(whitespaces) + return chunks + + def scan_tag_handle(self, name, start_mark): + # See the specification for details. + # For some strange reasons, the specification does not allow '_' in + # tag handles. I have allowed it anyway. + ch = self.peek() + if ch != u'!': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % utf8(ch), + self.get_mark()) + length = 1 + ch = self.peek(length) + if ch != u' ': + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' \ + or u'a' <= ch <= u'z' \ + or ch in u'-_': + length += 1 + ch = self.peek(length) + if ch != u'!': + self.forward(length) + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % utf8(ch), + self.get_mark()) + length += 1 + value = self.prefix(length) + self.forward(length) + return value + + def scan_tag_uri(self, name, start_mark): + # See the specification for details. + # Note: we do not check if URI is well-formed. + chunks = [] + length = 0 + ch = self.peek(length) + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-;/?:@&=+$,_.!~*\'()[]%': + if ch == u'%': + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + chunks.append(self.scan_uri_escapes(name, start_mark)) + else: + length += 1 + ch = self.peek(length) + if length: + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + if not chunks: + raise ScannerError("while parsing a %s" % name, start_mark, + "expected URI, but found %r" % utf8(ch), + self.get_mark()) + return u''.join(chunks) + + def scan_uri_escapes(self, name, start_mark): + # See the specification for details. + code_bytes = [] + mark = self.get_mark() + while self.peek() == u'%': + self.forward() + for k in range(2): + if self.peek(k) not in u'0123456789ABCDEFabcdef': + raise ScannerError( + "while scanning a %s" % name, start_mark, + "expected URI escape sequence of 2 hexdecimal numbers," + " but found %r" + % utf8(self.peek(k)), self.get_mark()) + if PY3: + code_bytes.append(int(self.prefix(2), 16)) + else: + code_bytes.append(chr(int(self.prefix(2), 16))) + self.forward(2) + try: + if PY3: + value = bytes(code_bytes).decode('utf-8') + else: + value = unicode(''.join(code_bytes), 'utf-8') + except UnicodeDecodeError as exc: + raise ScannerError("while scanning a %s" % name, start_mark, + str(exc), mark) + return value + + def scan_line_break(self): + # Transforms: + # '\r\n' : '\n' + # '\r' : '\n' + # '\n' : '\n' + # '\x85' : '\n' + # '\u2028' : '\u2028' + # '\u2029 : '\u2029' + # default : '' + ch = self.peek() + if ch in u'\r\n\x85': + if self.prefix(2) == u'\r\n': + self.forward(2) + else: + self.forward() + return u'\n' + elif ch in u'\u2028\u2029': + self.forward() + return ch + return u'' + + +class RoundTripScanner(Scanner): + def check_token(self, *choices): + # Check if the next token is one of the given types. + while self.need_more_tokens(): + self.fetch_more_tokens() + self._gather_comments() + if self.tokens: + if not choices: + return True + for choice in choices: + if isinstance(self.tokens[0], choice): + return True + return False + + def peek_token(self): + # Return the next token, but do not delete if from the queue. + while self.need_more_tokens(): + self.fetch_more_tokens() + self._gather_comments() + if self.tokens: + return self.tokens[0] + + def _gather_comments(self): + """combine multiple comment lines""" + comments = [] + if not self.tokens: + return comments + if isinstance(self.tokens[0], CommentToken): + comment = self.tokens.pop(0) + self.tokens_taken += 1 + # print('################ dropping', comment) + comments.append(comment) + while self.need_more_tokens(): + self.fetch_more_tokens() + if not self.tokens: + return comments + if isinstance(self.tokens[0], CommentToken): + self.tokens_taken += 1 + comment = self.tokens.pop(0) + # print 'dropping2', comment + comments.append(comment) + if len(comments) >= 1: + # print(' len', len(comments), comments) + # print(' com', comments[0], comments[0].start_mark.line) + # print(' tok', self.tokens[0].end_mark.line) + self.tokens[0].add_pre_comments(comments) + # pull in post comment on e.g. ':' + if not self.done and len(self.tokens) < 2: + self.fetch_more_tokens() + + def get_token(self): + # Return the next token. + while self.need_more_tokens(): + self.fetch_more_tokens() + self._gather_comments() + if self.tokens: + # only add post comment to single line tokens: + # scalar, value token. FlowXEndToken, otherwise + # hidden streamtokens could get them (leave them and they will be + # pre comments for the next map/seq + if len(self.tokens) > 1 and \ + isinstance(self.tokens[0], ( + ScalarToken, + ValueToken, + FlowSequenceEndToken, + FlowMappingEndToken, + )) and \ + isinstance(self.tokens[1], CommentToken) and \ + self.tokens[0].end_mark.line == self.tokens[1].start_mark.line: + self.tokens_taken += 1 + self.tokens[0].add_post_comment(self.tokens.pop(1)) + self.tokens_taken += 1 + return self.tokens.pop(0) + + def fetch_comment(self, comment): # XXXX + value, start_mark, end_mark = comment + self.tokens.append(CommentToken(value, start_mark, end_mark)) + + # scanner + + def scan_to_next_token(self): + # We ignore spaces, line breaks and comments. + # If we find a line break in the block context, we set the flag + # `allow_simple_key` on. + # The byte order mark is stripped if it's the first character in the + # stream. We do not yet support BOM inside the stream as the + # specification requires. Any such mark will be considered as a part + # of the document. + # + # TODO: We need to make tab handling rules more sane. A good rule is + # Tabs cannot precede tokens + # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, + # KEY(block), VALUE(block), BLOCK-ENTRY + # So the checking code is + # if : + # self.allow_simple_keys = False + # We also need to add the check for `allow_simple_keys == True` to + # `unwind_indent` before issuing BLOCK-END. + # Scanners for block, flow, and plain scalars need to be modified. + + if self.index == 0 and self.peek() == u'\uFEFF': + self.forward() + found = False + while not found: + while self.peek() == u' ': + self.forward() + ch = self.peek() + if ch == u'#': + start_mark = self.get_mark() + comment = ch + self.forward() + while ch not in u'\0\r\n\x85\u2028\u2029': + ch = self.peek() + if ch == u'\0': # don't gobble the end-of-stream character + break + comment += ch + self.forward() + # gather any blank lines following the comment too + ch = self.scan_line_break() + while len(ch) > 0: + comment += ch + ch = self.scan_line_break() + end_mark = self.get_mark() + if not self.flow_level: + self.allow_simple_key = True + return comment, start_mark, end_mark + if self.scan_line_break(): + if not self.flow_level: + self.allow_simple_key = True + else: + found = True + +# try: +# import psyco +# psyco.bind(Scanner) +# except ImportError: +# pass diff --git a/serializer.py b/serializer.py new file mode 100644 index 0000000..0bdc558 --- /dev/null +++ b/serializer.py @@ -0,0 +1,166 @@ +from __future__ import absolute_import + +__all__ = ['Serializer', 'SerializerError'] + +import re + +from .error import YAMLError +from .events import * +from .nodes import * + +from .compat import nprint, DBG_NODE, dbg + + +class SerializerError(YAMLError): + pass + + +class Serializer(object): + + # 'id' and 3+ numbers, but not 000 + ANCHOR_TEMPLATE = u'id%03d' + ANCHOR_RE = re.compile(u'id(?!000$)\\d{3,}') + + + def __init__(self, encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + self.use_encoding = encoding + self.use_explicit_start = explicit_start + self.use_explicit_end = explicit_end + self.use_version = version + self.use_tags = tags + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + self.closed = None + self._templated_id = None + + def open(self): + if self.closed is None: + self.emit(StreamStartEvent(encoding=self.use_encoding)) + self.closed = False + elif self.closed: + raise SerializerError("serializer is closed") + else: + raise SerializerError("serializer is already opened") + + def close(self): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif not self.closed: + self.emit(StreamEndEvent()) + self.closed = True + + # def __del__(self): + # self.close() + + def serialize(self, node): + if dbg(DBG_NODE): + nprint('Serializing nodes') + node.dump() + if self.closed is None: + raise SerializerError("serializer is not opened") + elif self.closed: + raise SerializerError("serializer is closed") + self.emit(DocumentStartEvent(explicit=self.use_explicit_start, + version=self.use_version, + tags=self.use_tags)) + self.anchor_node(node) + self.serialize_node(node, None, None) + self.emit(DocumentEndEvent(explicit=self.use_explicit_end)) + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + + def anchor_node(self, node): + if node in self.anchors: + if self.anchors[node] is None: + self.anchors[node] = self.generate_anchor(node) + else: + anchor = None + try: + if node.anchor.always_dump: + anchor = node.anchor.value + except: + pass + self.anchors[node] = anchor + if isinstance(node, SequenceNode): + for item in node.value: + self.anchor_node(item) + elif isinstance(node, MappingNode): + for key, value in node.value: + self.anchor_node(key) + self.anchor_node(value) + + def generate_anchor(self, node): + try: + anchor = node.anchor.value + except: + anchor = None + if anchor is None: + self.last_anchor_id += 1 + return self.ANCHOR_TEMPLATE % self.last_anchor_id + return anchor + + def serialize_node(self, node, parent, index): + alias = self.anchors[node] + if node in self.serialized_nodes: + self.emit(AliasEvent(alias)) + else: + self.serialized_nodes[node] = True + self.descend_resolver(parent, index) + if isinstance(node, ScalarNode): + detected_tag = self.resolve(ScalarNode, node.value, + (True, False)) + default_tag = self.resolve(ScalarNode, node.value, + (False, True)) + implicit = \ + (node.tag == detected_tag), (node.tag == default_tag) + self.emit(ScalarEvent(alias, node.tag, implicit, node.value, + style=node.style, comment=node.comment)) + elif isinstance(node, SequenceNode): + implicit = (node.tag + == self.resolve(SequenceNode, node.value, True)) + comment = node.comment + # print('comment >>>>>>>>>>>>>.', comment, node.flow_style) + end_comment = None + seq_comment = None + if node.flow_style is True: + if comment: # eol comment on flow style sequence + seq_comment = comment[0] + # comment[0] = None + if comment and len(comment) > 2: + end_comment = comment[2] + else: + end_comment = None + self.emit(SequenceStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style, + comment=node.comment)) + index = 0 + for item in node.value: + self.serialize_node(item, node, index) + index += 1 + self.emit(SequenceEndEvent(comment=[seq_comment, end_comment])) + elif isinstance(node, MappingNode): + implicit = (node.tag + == self.resolve(MappingNode, node.value, True)) + comment = node.comment + end_comment = None + map_comment = None + if node.flow_style is True: + if comment: # eol comment on flow style sequence + map_comment = comment[0] + # comment[0] = None + if comment and len(comment) > 2: + end_comment = comment[2] + self.emit(MappingStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style, + comment=node.comment)) + for key, value in node.value: + self.serialize_node(key, node, None) + self.serialize_node(value, node, key) + self.emit(MappingEndEvent(comment=[map_comment, end_comment])) + self.ascend_resolver() + +def templated_id(s): + return Serializer.ANCHOR_RE.match(s) \ No newline at end of file diff --git a/setup.py b/setup.py index ff0840b..e0b37c5 100644 --- a/setup.py +++ b/setup.py @@ -1,34 +1,68 @@ -#! /usr/bin/env python +# # header # coding: utf-8 from __future__ import print_function -import sys -import os -import platform -from textwrap import dedent +if __name__ != '__main__': + raise NotImplementedError('should never include setup.py') + +# # definitions -name_space = 'ruamel' -package_name = 'yaml' -full_package_name = name_space + '.' + package_name +full_package_name = None + + +def _package_data(fn): + data = {} + with open(fn) as fp: + parsing = False + for line in fp.readlines(): + if line.startswith('_package_data'): + parsing = True + continue + if not parsing: + continue + if line.startswith(')'): + break + if '# NOQA' in line: + line = line.split('# NOQA', 1)[0].rstrip() + k, v = [x.strip() for x in line.split('=', 1)] + if v[-1] == ',': + v = v[:-1] + if v[0] in '\'"' and v[0] == v[-1]: + data[k] = v[1:-1] + elif v == 'None': + data[k] = None + elif v == 'True': + data[k] = True + elif v == 'False': + data[k] = False + elif v[0] == '(' and v[-1] == ')': + data[k] = tuple([x.strip()[1:-1] if x[0] in '\'"' else int(x) + for x in v[1:-1].split(', ')]) + elif v[0] == '[' and v[-1] == ']': + data[k] = [x.strip()[1:-1] if x[0] in '\'"' else int(x) + for x in v[1:-1].split(', ')] + else: + print('Unknown: >>>>> {0!r} {1!r}'.format(k, v)) + return data + +pkg_data = _package_data('__init__.py') exclude_files = [ 'setup.py', ] +# # imports +import os +import sys -def get_version(): - v_i = 'version_info = ' - for line in open('py/__init__.py'): - if not line.startswith(v_i): - continue - s_e = line[len(v_i):].strip()[1:-1].split(', ') - els = [x.strip()[1:-1] if x[0] in '\'"' else int(x) for x in s_e] - return els +from setuptools import setup +from setuptools.command import install_lib +# # helper def _check_convert_version(tup): - """create a PEP 386 pseudo-format conformant string from tuple tup""" + """Create a PEP 386 pseudo-format conformant string from tuple tup.""" ret_val = str(tup[0]) # first is always digit next_sep = "." # separator for next extension, can be "" or "." nr_digits = 0 # nr of adjacent digits in rest, to verify @@ -58,29 +92,13 @@ def _check_convert_version(tup): return ret_val -version_info = get_version() +version_info = pkg_data['version_info'] version_str = _check_convert_version(version_info) -if __name__ == '__main__': - # put here so setup.py can be imported more easily - from setuptools import setup, find_packages, Extension, Distribution - from setuptools.command import install_lib - - # windows things this is pure, that way you would get pure python - # whl files that take precedence on Linux over source with compilable C - if '--universal' in sys.argv: - Distribution.is_pure = lambda *args: True - Distribution.is_pure = lambda *args: False - - class MyInstallLib(install_lib.install_lib): - "create __init__.py on the fly" - def run(self): - install_lib.install_lib.run(self) - def install(self): - fpp = full_package_name.split('.') # full package path + fpp = pkg_data['full_package_name'].split('.') # full package path full_exclude_files = [os.path.join(*(fpp + [x])) for x in exclude_files] alt_files = [] @@ -95,127 +113,270 @@ class MyInstallLib(install_lib.install_lib): return alt_files -def check_extensions(): - """check if the C module can be build by trying to compile a small - program against the libyaml development library""" - if sys.platform == "win32": - return None - import tempfile - import shutil - - import distutils.sysconfig - import distutils.ccompiler - from distutils.errors import CompileError, LinkError - - libraries = ['yaml'] - - # write a temporary .c file to compile - c_code = dedent(""" - #include - - int main(int argc, char* argv[]) - { - yaml_parser_t parser; - parser = parser; /* prevent warning */ - return 0; - } - """) - tmp_dir = tempfile.mkdtemp(prefix='tmp_ruamel_yaml_') - ret_val = None - try: - bin_file_name = os.path.join(tmp_dir, 'test_yaml') - file_name = bin_file_name + '.c' - with open(file_name, 'w') as fp: - fp.write(c_code) +class NameSpacePackager(object): + def __init__(self, pkg_data): + assert isinstance(pkg_data, dict) + self._pkg_data = pkg_data + self.full_package_name = self._pkg_data['full_package_name'] + self._split = None + self.depth = self.full_package_name.count('.') + self.command = None + if sys.argv[0] == 'setup.py' and sys.argv[1] == 'install' and \ + '--single-version-externally-managed' not in sys.argv: + print('error: have to install with "pip install ."') + sys.exit(1) + for x in sys.argv: + if x[0] == '-' or x == 'setup.py': + continue + self.command = x + break - # and try to compile it - compiler = distutils.ccompiler.new_compiler() - assert isinstance(compiler, distutils.ccompiler.CCompiler) - distutils.sysconfig.customize_compiler(compiler) + @property + def split(self): + """split the full package name in list of compontents""" + if self._split is None: + fpn = self.full_package_name.split('.') + self._split = [] + while fpn: + self._split.insert(0, '.'.join(fpn)) + fpn = fpn[:-1] + for d in os.listdir('.'): + if not os.path.isdir(d) or d == self._split[0] or d[0] == '_': + continue + x = os.path.join(d, 'setup.py') # prevent sub-packages in namespace from being included + if os.path.exists(x): + if not os.path.exists(os.path.join(d, 'tox.ini')): + print('\n>>>>> found "{0}" without tox.ini <<<<<\n' + ''.format(x)) + continue + x = os.path.join(d, '__init__.py') + if os.path.exists(x): + self._split.append(self.full_package_name + '.' + d) + return self._split - try: - compiler.link_executable( - compiler.compile([file_name]), - bin_file_name, - libraries=libraries, + @property + def namespace_packages(self): + return self.split[:self.depth] + + @property + def package_dir(self): + return { + # don't specify empty dir, clashes with package_data spec + self.full_package_name: '.', + self.split[0]: self.split[0], + } + + def create_dirs(self): + """create the directories necessary for namespace packaging""" + if not os.path.exists(self.split[0]): + for d in self.split[:self.depth]: + d = os.path.join(*d.split('.')) + os.mkdir(d) + with open(os.path.join(d, '__init__.py'), 'w') as fp: + fp.write('import pkg_resources\n' + 'pkg_resources.declare_namespace(__name__)\n') + os.symlink( + # a.b gives a/b -> .. + # a.b.c gives a/b/c -> ../.. + os.path.join(*['..'] * self.depth), + os.path.join(*self.split[self.depth].split('.')) ) - except CompileError: - print('libyaml compile error') - except LinkError: - print('libyaml link error') - else: - ret_val = [ - Extension( - '_yaml', - sources=['ext/_yaml.c'], - libraries=libraries, - ), - ] - except: - pass - finally: - shutil.rmtree(tmp_dir) - return ret_val + def check(self): + try: + from pip.exceptions import InstallationError + except ImportError: + return + # arg is either develop (pip install -e) or install + if self.command not in ['install', 'develop']: + return -def main(): - install_requires = [ - "ruamel.base>=1.0.0", - "ruamel.std.argparse", - ] - # use fast ordereddict for !!omap - if sys.version_info[0] == 2 and \ - platform.python_implementation() == "CPython": - install_requires.extend(['ruamel.ordereddict']) - # if sys.version_info < (3, 4): - # install_requires.append("") - packages = [full_package_name] + [ - (full_package_name + '.' + x) - for x in find_packages('py', exclude=['test'])] - setup( - name=full_package_name, - version=version_str, - description=full_package_name + " is a YAML parser/emitter that " - "supports roundtrip preservation of comments, seq/map flow style, and " - "map key order", - install_requires=install_requires, - long_description=open('README.rst').read(), - url='https://bitbucket.org/ruamel/' + package_name, - author='Anthon van der Neut', - author_email='a.van.der.neut@ruamel.eu', - license="MIT license", - package_dir={full_package_name: 'py'}, - namespace_packages=[name_space], - packages=packages, - ext_modules=check_extensions(), - #entry_points=mk_entry_points(full_package_name), - cmdclass={'install_lib': MyInstallLib}, - classifiers=[ - 'Development Status :: 4 - Beta', + # if hgi and hgi.base are both in namespace_packages matching + # against the top (hgi.) it suffices to find minus-e and non-minus-e + # installed packages. As we don't know the order in namespace_packages + # do some magic + prefix = self.split[0] + prefixes = set([prefix, prefix.replace('_', '-')]) + for p in sys.path: + if not p: + continue # directory with setup.py + if os.path.exists(os.path.join(p, 'setup.py')): + continue # some linked in stuff might not be hgi based + if not os.path.isdir(p): + continue + if p.startswith('/tmp/'): + continue + for fn in os.listdir(p): + for pre in prefixes: + if fn.startswith(pre): + break + else: + continue + full_name = os.path.join(p, fn) + # not in prefixes the toplevel is never changed from _ to - + if fn == prefix and os.path.isdir(full_name): + # directory -> other, non-minus-e, install + if self.command == 'develop': + raise InstallationError( + 'Cannot mix develop (pip install -e),\nwith ' + 'non-develop installs for package name {0}'.format( + fn)) + elif fn == prefix: + raise InstallationError( + 'non directory package {0} in {1}'.format( + fn, p)) + for pre in [x + '.' for x in prefixes]: + if fn.startswith(pre): + break + else: + continue # hgiabc instead of hgi. + if fn.endswith('-link') and self.command == 'install': + raise InstallationError( + 'Cannot mix non-develop with develop\n(pip install -e)' + ' installs for package name {0}'.format(fn)) + + def entry_points(self, script_name=None, package_name=None): + ep = self._pkg_data.get('entry_points', True) + if ep is None: + return None + if ep is not True: + return {'console_scripts': [ep]} + if package_name is None: + package_name = self.full_package_name + if not script_name: + script_name = package_name.split('.')[-1] + return {'console_scripts': [ + '{0} = {1}:main'.format(script_name, package_name), + ]} + + @property + def url(self): + return 'https://bitbucket.org/{0}/{1}'.format( + *self.full_package_name.split('.', 1)) + + @property + def author(self): + return self._pkg_data['author'] + + @property + def author_email(self): + return self._pkg_data['author_email'] + + @property + def license(self): + lic = self._pkg_data.get('license') + if lic is None: + # lic_fn = os.path.join(os.path.dirname(__file__), 'LICENSE') + # assert os.path.exists(lic_fn) + return "MIT license" + return license + + @property + def description(self): + return self._pkg_data['description'] + + @property + def status(self): + # αβ + status = self._pkg_data.get('status', 'β') + if status == 'α': + return (3, 'Alpha') + elif status == 'β': + return (4, 'Beta') + elif 'stable' in status.lower(): + return (5, 'Production/Stable') + raise NotImplementedError + + @property + def classifiers(self): + return [ + 'Development Status :: {0} - {1}'.format(*self.status), 'Intended Audience :: Developers', - 'License :: OSI Approved :: MIT License', + 'License :: ' + ('Other/Proprietary License' + if self._pkg_data.get('license') else + 'OSI Approved :: MIT License'), 'Operating System :: OS Independent', 'Programming Language :: Python', - "Programming Language :: Python :: 2.6", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3.3", - "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", - "Topic :: Software Development :: Libraries :: Python Modules", - "Topic :: Text Processing :: Markup", ] - ) + @property + def install_requires(self): + return pkg_data.get('install_requires', []) + + @property + def data_files(self): + df = self._pkg_data.get('data_files', []) + if self._pkg_data.get('license') is None: + df.append('LICENSE') + if not df: + return None + return [('.', df), ] + + @property + def package_data(self): + df = self._pkg_data.get('data_files', []) + if self._pkg_data.get('license') is None: + # include the file + df.append('LICENSE') + # but don't install it + exclude_files.append('LICENSE') + if not df: + return {} + return {self.full_package_name: df} + + def wheel(self, kw, setup): + """temporary add setup.cfg if creating a wheel to include LICENSE file + https://bitbucket.org/pypa/wheel/issues/47 + """ + if 'bdist_wheel' not in sys.argv or not os.path.exists('LICENSE'): + return + file_name = 'setup.cfg' + if os.path.exists(file_name): # add it if not in there? + return + with open(file_name, 'w') as fp: + fp.write('[metadata]\nlicense-file = LICENSE\n') + if self._pkg_data.get('universal'): + fp.write('[bdist_wheel]\nuniversal = 1\n') + try: + setup(**kw) + except: + raise + finally: + os.remove(file_name) + return True + + +# # call setup +def main(): + nsp = NameSpacePackager(pkg_data) + nsp.check() + nsp.create_dirs() + kw = dict( + name=nsp.full_package_name, + namespace_packages=nsp.namespace_packages, + version=version_str, + packages=nsp.split, + url=nsp.url, + author=nsp.author, + author_email=nsp.author_email, + cmdclass={'install_lib': MyInstallLib}, + package_dir=nsp.package_dir, + entry_points=nsp.entry_points(), + description=nsp.description, + install_requires=nsp.install_requires, + license=nsp.license, + classifiers=nsp.classifiers, + package_data=nsp.package_data, + ) + if '--version' not in sys.argv: + for k in sorted(kw): + v = kw[k] + print(k, '->', v) + with open('README.rst') as fp: + kw['long_description'] = fp.read() + if nsp.wheel(kw, setup): + return + setup(**kw) -def mk_entry_points(full_package_name): - script_name = full_package_name.rsplit('.', 1)[-1] - return {'console_scripts': [ - '{0} = {1}:main'.format(script_name, full_package_name), - ]} -if __name__ == '__main__': - if 'yamlctest' in sys.argv: - print(check_extensions()) - sys.exit(1) - main() +main() diff --git a/tokens.py b/tokens.py new file mode 100644 index 0000000..452803f --- /dev/null +++ b/tokens.py @@ -0,0 +1,188 @@ + +class Token(object): + def __init__(self, start_mark, end_mark): + self.start_mark = start_mark + self.end_mark = end_mark + + def __repr__(self): + attributes = [key for key in self.__dict__ + if not key.endswith('_mark')] + attributes.sort() + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + + def add_post_comment(self, comment): + if not hasattr(self, '_comment'): + self._comment = [None, None] + self._comment[0] = comment + + def add_pre_comments(self, comments): + if not hasattr(self, '_comment'): + self._comment = [None, None] + assert self._comment[1] is None + self._comment[1] = comments + + def get_comment(self): + return getattr(self, '_comment', None) + + @property + def comment(self): + return getattr(self, '_comment', None) + + def move_comment(self, target): + """move a comment from this token to target (normally next token) + used to combine e.g. comments before a BlockEntryToken to the + ScalarToken that follows it + """ + c = self.comment + if c is None: + return + # don't push beyond last element + if isinstance(target, StreamEndToken): + return + delattr(self, '_comment') + tc = target.comment + if not tc: # target comment, just insert + target._comment = c + return self + if c[0] and tc[0] or c[1] and tc[1]: + raise NotImplementedError('overlap in comment %r %r' % c, tc) + if c[0]: + tc[0] = c[0] + if c[1]: + tc[1] = c[1] + return self + + def split_comment(self): + """ split the post part of a comment, and return it + as comment to be added. Delete second part if [None, None] + abc: # this goes to sequence + # this goes to first element + - first element + """ + comment = self.comment + if comment is None or comment[0] is None: + return None # nothing to do + ret_val = [comment[0], None] + if comment[1] is None: + delattr(self, '_comment') + return ret_val + + +# class BOMToken(Token): +# id = '' + +class DirectiveToken(Token): + id = '' + + def __init__(self, name, value, start_mark, end_mark): + Token.__init__(self, start_mark, end_mark) + self.name = name + self.value = value + + +class DocumentStartToken(Token): + id = '' + + +class DocumentEndToken(Token): + id = '' + + +class StreamStartToken(Token): + id = '' + + def __init__(self, start_mark=None, end_mark=None, encoding=None): + Token.__init__(self, start_mark, end_mark) + self.encoding = encoding + + +class StreamEndToken(Token): + id = '' + + +class BlockSequenceStartToken(Token): + id = '' + + +class BlockMappingStartToken(Token): + id = '' + + +class BlockEndToken(Token): + id = '' + + +class FlowSequenceStartToken(Token): + id = '[' + + +class FlowMappingStartToken(Token): + id = '{' + + +class FlowSequenceEndToken(Token): + id = ']' + + +class FlowMappingEndToken(Token): + id = '}' + + +class KeyToken(Token): + id = '?' + + +class ValueToken(Token): + id = ':' + + +class BlockEntryToken(Token): + id = '-' + + +class FlowEntryToken(Token): + id = ',' + + +class AliasToken(Token): + id = '' + + def __init__(self, value, start_mark, end_mark): + Token.__init__(self, start_mark, end_mark) + self.value = value + + +class AnchorToken(Token): + id = '' + + def __init__(self, value, start_mark, end_mark): + Token.__init__(self, start_mark, end_mark) + self.value = value + + +class TagToken(Token): + id = '' + + def __init__(self, value, start_mark, end_mark): + Token.__init__(self, start_mark, end_mark) + self.value = value + + +class ScalarToken(Token): + id = '' + + def __init__(self, value, plain, start_mark, end_mark, style=None): + Token.__init__(self, start_mark, end_mark) + self.value = value + self.plain = plain + self.style = style + + +class CommentToken(Token): + id = '' + + def __init__(self, value, start_mark, end_mark): + Token.__init__(self, start_mark, end_mark) + self.value = value -- cgit v1.2.1