diff options
-rw-r--r-- | README.rst | 11 | ||||
-rw-r--r-- | __init__.py | 2 | ||||
-rw-r--r-- | comments.py | 29 | ||||
-rw-r--r-- | compat.py | 14 | ||||
-rw-r--r-- | error.py | 34 | ||||
-rw-r--r-- | events.py | 33 | ||||
-rw-r--r-- | nodes.py | 7 | ||||
-rw-r--r-- | reader.py | 9 | ||||
-rw-r--r-- | scalarstring.py | 8 | ||||
-rw-r--r-- | tokens.py | 27 | ||||
-rw-r--r-- | util.py | 3 |
11 files changed, 136 insertions, 41 deletions
@@ -16,11 +16,14 @@ ruamel.yaml ChangeLog ========= -.. should insert 0.13.0 (2016-11-20): for next key +.. should insert NEXT: at the beginning of line for next key +0.13.1 (2016-11-22): + - optimisations on memory usage when loading YAML from large files (py3: -50%, py2: -85%) -NEXT: - - if load() or load_all() is called with only a single argument (stream or string) +0.13.0 (2016-11-20): + - if ``load()`` or ``load_all()`` is called with only a single argument + (stream or string) a UnsafeLoaderWarning will be issued once. If appropriate you can surpress this warning by filtering it. Explicitly supplying the ``Loader=ruamel.yaml.Loader`` argument, will also prevent it from being issued. You should however consider @@ -30,7 +33,7 @@ NEXT: `msinn <http://stackoverflow.com/users/7185467/msinn>`_) 0.12.18 (2016-11-16): - - another fix for numpy (re-reported by Nathanial Burdic) + - another fix for numpy (re-reported independently by PaulG & Nathanial Burdic) 0.12.17 (2016-11-15): - only the RoundTripLoader included the Resolver that supports YAML 1.2 diff --git a/__init__.py b/__init__.py index adfd7f8..006f948 100644 --- a/__init__.py +++ b/__init__.py @@ -9,7 +9,7 @@ from __future__ import absolute_import _package_data = dict( full_package_name="ruamel.yaml", - version_info=(0, 13, 0), + version_info=(0, 13, 1), author="Anthon van der Neut", author_email="a.van.der.neut@ruamel.eu", description="ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order", # NOQA diff --git a/comments.py b/comments.py index 25f0f8a..2fa66e6 100644 --- a/comments.py +++ b/comments.py @@ -27,6 +27,7 @@ tag_attrib = '_yaml_tag' class Comment(object): # sys.getsize tested the Comment objects, __slots__ makes them bigger # and adding self.end did not matter + __slots__ = 'comment', '_items', '_end', '_start', attrib = comment_attrib def __init__(self): @@ -73,6 +74,7 @@ def NoComment(): class Format(object): + __slots__ = '_flow_style', attrib = format_attrib def __init__(self): @@ -131,6 +133,7 @@ class LineCol(object): class Anchor(object): + __slots__ = 'value', 'always_dump', attrib = anchor_attrib def __init__(self): @@ -140,6 +143,7 @@ class Anchor(object): class Tag(object): """store tag information for roundtripping""" + __slots__ = 'value', attrib = tag_attrib def __init__(self): @@ -182,12 +186,12 @@ class CommentedBase(object): """overwrites any preceding comment lines on an object expects comment to be without `#` and possible have multiple lines """ - from .error import Mark + from .error import CommentMark from .tokens import CommentToken pre_comments = self._yaml_get_pre_comment() if comment[-1] == '\n': comment = comment[:-1] # strip final newline if there - start_mark = Mark(None, None, None, indent, None, None) + start_mark = CommentMark(indent) for com in comment.split('\n'): pre_comments.append(CommentToken('# ' + com + '\n', start_mark, None)) @@ -196,7 +200,7 @@ class CommentedBase(object): """ expects comment (before/after) to be without `#` and possible have multiple lines """ - from ruamel.yaml.error import Mark + from ruamel.yaml.error import CommentMark from ruamel.yaml.tokens import CommentToken def comment_token(s, mark): @@ -209,13 +213,13 @@ class CommentedBase(object): before = before[:-1] # strip final newline if there if after and after[-1] == '\n': after = after[:-1] # strip final newline if there - start_mark = Mark(None, None, None, indent, None, None) + start_mark = CommentMark(indent) c = self.ca.items.setdefault(key, [None, [], None, None]) if before: for com in before.split('\n'): c[1].append(comment_token(com, start_mark)) if after: - start_mark = Mark(None, None, None, after_indent, None, None) + start_mark = CommentMark(after_indent) if c[3] is None: c[3] = [] for com in after.split('\n'): @@ -237,7 +241,7 @@ class CommentedBase(object): the #. The column index is for the # mark """ from .tokens import CommentToken - from .error import Mark + from .error import CommentMark if column is None: column = self._yaml_get_column(key) if comment[0] != '#': @@ -246,7 +250,7 @@ class CommentedBase(object): if comment[0] == '#': comment = ' ' + comment column = 0 - start_mark = Mark(None, None, None, column, None, None) + start_mark = CommentMark(column) ct = [CommentToken(comment, start_mark, None), None] self._yaml_add_eol_comment(ct, key=key) @@ -292,7 +296,7 @@ class CommentedBase(object): class CommentedSeq(list, CommentedBase): - __slots__ = [Comment.attrib, ] + __slots__ = Comment.attrib, def _yaml_add_comment(self, comment, key=NoComment): if key is not NoComment: @@ -421,7 +425,6 @@ class CommentedMapView(Sized): class CommentedMapKeysView(CommentedMapView, Set): - __slots__ = () @classmethod @@ -438,7 +441,6 @@ class CommentedMapKeysView(CommentedMapView, Set): class CommentedMapItemsView(CommentedMapView, Set): - __slots__ = () @classmethod @@ -460,7 +462,6 @@ class CommentedMapItemsView(CommentedMapView, Set): class CommentedMapValuesView(CommentedMapView): - __slots__ = () def __contains__(self, value): @@ -475,7 +476,7 @@ class CommentedMapValuesView(CommentedMapView): class CommentedMap(ordereddict, CommentedBase): - __slots__ = [Comment.attrib, ] + __slots__ = Comment.attrib, def _yaml_add_comment(self, comment, key=NoComment, value=NoComment): """values is set to key to indicate a value attachment of comment""" @@ -730,11 +731,11 @@ class CommentedMap(ordereddict, CommentedBase): class CommentedOrderedMap(CommentedMap): - __slots__ = [Comment.attrib, ] + __slots__ = Comment.attrib, class CommentedSet(MutableSet, CommentedMap): - __slots__ = [Comment.attrib, 'odict'] + __slots__ = Comment.attrib, 'odict', def __init__(self, values=None): self.odict = ordereddict() @@ -101,6 +101,20 @@ DBG_NODE = 4 _debug = None +if _debug: + class ObjectCounter(object): + def __init__(self): + self.map = {} + + def __call__(self, k): + self.map[k] = self.map.get(k, 0) + 1 + + def dump(self): + for k in sorted(self.map): + print(k, '->', self.map[k]) + + object_counter = ObjectCounter() + # used from yaml util when testing def dbg(val=None): @@ -6,21 +6,40 @@ import warnings from ruamel.yaml.compat import utf8 -__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError', 'ReusedAnchorWarning', +__all__ = ['FileMark', 'StringMark', 'CommentMark', + 'YAMLError', 'MarkedYAMLError', 'ReusedAnchorWarning', 'UnsafeLoaderWarning'] -class Mark(object): - def __init__(self, name, index, line, column, buffer, pointer): +class StreamMark(object): + __slots__ = 'name', 'index', 'line', 'column', + + def __init__(self, name, index, line, column): self.name = name self.index = index self.line = line self.column = column + + def __str__(self): + where = " in \"%s\", line %d, column %d" \ + % (self.name, self.line+1, self.column+1) + return where + + +class FileMark(StreamMark): + __slots__ = () + + +class StringMark(StreamMark): + __slots__ = 'name', 'index', 'line', 'column', 'buffer', 'pointer', + + def __init__(self, name, index, line, column, buffer, pointer): + StreamMark.__init__(self, name, index, line, column) self.buffer = buffer self.pointer = pointer def get_snippet(self, indent=4, max_length=75): - if self.buffer is None: + if self.buffer is None: # always False return None head = '' start = self.pointer @@ -53,6 +72,13 @@ class Mark(object): return where +class CommentMark(object): + __slots__ = 'column', + + def __init__(self, column): + self.column = column + + class YAMLError(Exception): pass @@ -8,6 +8,8 @@ def CommentCheck(): class Event(object): + __slots__ = 'start_mark', 'end_mark', 'comment', + def __init__(self, start_mark=None, end_mark=None, comment=CommentCheck): self.start_mark = start_mark self.end_mark = end_mark @@ -28,28 +30,33 @@ class Event(object): class NodeEvent(Event): + __slots__ = 'anchor', + def __init__(self, anchor, start_mark=None, end_mark=None, comment=None): Event.__init__(self, start_mark, end_mark, comment) self.anchor = anchor class CollectionStartEvent(NodeEvent): + __slots__ = 'tag', 'implicit', 'flow_style', + def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None, flow_style=None, comment=None): - Event.__init__(self, start_mark, end_mark, comment) - self.anchor = anchor + NodeEvent.__init__(self, anchor, start_mark, end_mark, comment) self.tag = tag self.implicit = implicit self.flow_style = flow_style class CollectionEndEvent(Event): - pass + __slots__ = () -# Implementations. +# Implementations. class StreamStartEvent(Event): + __slots__ = 'encoding', + def __init__(self, start_mark=None, end_mark=None, encoding=None, comment=None): Event.__init__(self, start_mark, end_mark, comment) @@ -57,10 +64,12 @@ class StreamStartEvent(Event): class StreamEndEvent(Event): - pass + __slots__ = () class DocumentStartEvent(Event): + __slots__ = 'explicit', 'version', 'tags', + def __init__(self, start_mark=None, end_mark=None, explicit=None, version=None, tags=None, comment=None): Event.__init__(self, start_mark, end_mark, comment) @@ -70,6 +79,8 @@ class DocumentStartEvent(Event): class DocumentEndEvent(Event): + __slots__ = 'explicit', + def __init__(self, start_mark=None, end_mark=None, explicit=None, comment=None): Event.__init__(self, start_mark, end_mark, comment) @@ -77,10 +88,12 @@ class DocumentEndEvent(Event): class AliasEvent(NodeEvent): - pass + __slots__ = () class ScalarEvent(NodeEvent): + __slots__ = 'tag', 'implicit', 'value', 'style', + def __init__(self, anchor, tag, implicit, value, start_mark=None, end_mark=None, style=None, comment=None): NodeEvent.__init__(self, anchor, start_mark, end_mark, comment) @@ -91,16 +104,16 @@ class ScalarEvent(NodeEvent): class SequenceStartEvent(CollectionStartEvent): - pass + __slots__ = () class SequenceEndEvent(CollectionEndEvent): - pass + __slots__ = () class MappingStartEvent(CollectionStartEvent): - pass + __slots__ = () class MappingEndEvent(CollectionEndEvent): - pass + __slots__ = () @@ -4,6 +4,8 @@ from __future__ import print_function class Node(object): + __slots__ = 'tag', 'value', 'start_mark', 'end_mark', 'comment', 'anchor', + def __init__(self, tag, value, start_mark, end_mark, comment=None): self.tag = tag self.value = value @@ -62,6 +64,7 @@ class ScalarNode(Node): | -> literal style > -> folding style """ + __slots__ = 'style', id = 'scalar' def __init__(self, tag, value, start_mark=None, end_mark=None, style=None, @@ -71,6 +74,8 @@ class ScalarNode(Node): class CollectionNode(Node): + __slots__ = 'flow_style', 'anchor', + def __init__(self, tag, value, start_mark=None, end_mark=None, flow_style=None, comment=None, anchor=None): Node.__init__(self, tag, value, start_mark, end_mark, comment=comment) @@ -79,8 +84,10 @@ class CollectionNode(Node): class SequenceNode(CollectionNode): + __slots__ = () id = 'sequence' class MappingNode(CollectionNode): + __slots__ = () id = 'mapping' @@ -23,7 +23,7 @@ from __future__ import absolute_import import codecs import re -from ruamel.yaml.error import YAMLError, Mark +from ruamel.yaml.error import YAMLError, FileMark, StringMark from ruamel.yaml.compat import text_type, binary_type, PY3 __all__ = ['Reader', 'ReaderError'] @@ -122,11 +122,10 @@ class Reader(object): def get_mark(self): if self.stream is None: - return Mark(self.name, self.index, self.line, self.column, - self.buffer, self.pointer) + return StringMark(self.name, self.index, self.line, self.column, + self.buffer, self.pointer) else: - return Mark(self.name, self.index, self.line, self.column, - None, None) + return FileMark(self.name, self.index, self.line, self.column) def determine_encoding(self): while not self.eof and (self.raw_buffer is None or diff --git a/scalarstring.py b/scalarstring.py index eb10910..c6e5734 100644 --- a/scalarstring.py +++ b/scalarstring.py @@ -10,21 +10,29 @@ __all__ = ["ScalarString", "PreservedScalarString", "SingleQuotedScalarString", class ScalarString(text_type): + __slots__ = () + def __new__(cls, *args, **kw): return text_type.__new__(cls, *args, **kw) class PreservedScalarString(ScalarString): + __slots__ = () + def __new__(cls, value): return ScalarString.__new__(cls, value) class SingleQuotedScalarString(ScalarString): + __slots__ = () + def __new__(cls, value): return ScalarString.__new__(cls, value) class DoubleQuotedScalarString(ScalarString): + __slots__ = () + def __new__(cls, value): return ScalarString.__new__(cls, value) @@ -3,13 +3,15 @@ class Token(object): + __slots__ = 'start_mark', 'end_mark', '_comment', + def __init__(self, start_mark, end_mark): self.start_mark = start_mark self.end_mark = end_mark def __repr__(self): - attributes = [key for key in self.__dict__ - if not key.endswith('_mark')] + attributes = [key for key in self.__slots__ if not key.endswith('_mark') and + hasattr('self', key)] attributes.sort() arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) for key in attributes]) @@ -77,6 +79,7 @@ class Token(object): # id = '<byte order mark>' class DirectiveToken(Token): + __slots__ = 'name', 'value', id = '<directive>' def __init__(self, name, value, start_mark, end_mark): @@ -86,14 +89,17 @@ class DirectiveToken(Token): class DocumentStartToken(Token): + __slots__ = () id = '<document start>' class DocumentEndToken(Token): + __slots__ = () id = '<document end>' class StreamStartToken(Token): + __slots__ = 'encoding', id = '<stream start>' def __init__(self, start_mark=None, end_mark=None, encoding=None): @@ -102,54 +108,67 @@ class StreamStartToken(Token): class StreamEndToken(Token): + __slots__ = () id = '<stream end>' class BlockSequenceStartToken(Token): + __slots__ = () id = '<block sequence start>' class BlockMappingStartToken(Token): + __slots__ = () id = '<block mapping start>' class BlockEndToken(Token): + __slots__ = () id = '<block end>' class FlowSequenceStartToken(Token): + __slots__ = () id = '[' class FlowMappingStartToken(Token): + __slots__ = () id = '{' class FlowSequenceEndToken(Token): + __slots__ = () id = ']' class FlowMappingEndToken(Token): + __slots__ = () id = '}' class KeyToken(Token): + __slots__ = () id = '?' class ValueToken(Token): + __slots__ = () id = ':' class BlockEntryToken(Token): + __slots__ = () id = '-' class FlowEntryToken(Token): + __slots__ = () id = ',' class AliasToken(Token): + __slots__ = 'value', id = '<alias>' def __init__(self, value, start_mark, end_mark): @@ -158,6 +177,7 @@ class AliasToken(Token): class AnchorToken(Token): + __slots__ = 'value', id = '<anchor>' def __init__(self, value, start_mark, end_mark): @@ -166,6 +186,7 @@ class AnchorToken(Token): class TagToken(Token): + __slots__ = 'value', id = '<tag>' def __init__(self, value, start_mark, end_mark): @@ -174,6 +195,7 @@ class TagToken(Token): class ScalarToken(Token): + __slots__ = 'value', 'plain', 'style', id = '<scalar>' def __init__(self, value, plain, start_mark, end_mark, style=None): @@ -184,6 +206,7 @@ class ScalarToken(Token): class CommentToken(Token): + __slots__ = 'value', 'pre_done', id = '<comment>' def __init__(self, value, start_mark, end_mark): @@ -8,7 +8,6 @@ from __future__ import print_function from __future__ import absolute_import from .compat import text_type, binary_type -from .main import round_trip_load # originally as comment @@ -24,6 +23,8 @@ def load_yaml_guess_indent(stream, **kw): - if there are no block sequences, indent is taken from nested mappings, block sequence indent is unset (None) in that case """ + from .main import round_trip_load + # load a yaml file guess the indentation, if you use TABs ... def leading_spaces(l): idx = 0 |