summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthon van der Neut <anthon@mnt.org>2016-11-22 09:24:43 +0100
committerAnthon van der Neut <anthon@mnt.org>2016-11-22 09:24:43 +0100
commit77559065be3f3e8a76d76e733c26bb9a149f7b42 (patch)
tree37bb94598d77b0eb879bb958c7dad76f7f51fc8f
parenta72e8cf2f64855d29106f4f5b1687e09bcc9284e (diff)
downloadruamel.yaml-77559065be3f3e8a76d76e733c26bb9a149f7b42.tar.gz
memory usage optimisations0.13.1
-rw-r--r--README.rst11
-rw-r--r--__init__.py2
-rw-r--r--comments.py29
-rw-r--r--compat.py14
-rw-r--r--error.py34
-rw-r--r--events.py33
-rw-r--r--nodes.py7
-rw-r--r--reader.py9
-rw-r--r--scalarstring.py8
-rw-r--r--tokens.py27
-rw-r--r--util.py3
11 files changed, 136 insertions, 41 deletions
diff --git a/README.rst b/README.rst
index a60018b..8b6ce97 100644
--- a/README.rst
+++ b/README.rst
@@ -16,11 +16,14 @@ ruamel.yaml
ChangeLog
=========
-.. should insert 0.13.0 (2016-11-20): for next key
+.. should insert NEXT: at the beginning of line for next key
+0.13.1 (2016-11-22):
+ - optimisations on memory usage when loading YAML from large files (py3: -50%, py2: -85%)
-NEXT:
- - if load() or load_all() is called with only a single argument (stream or string)
+0.13.0 (2016-11-20):
+ - if ``load()`` or ``load_all()`` is called with only a single argument
+ (stream or string)
a UnsafeLoaderWarning will be issued once. If appropriate you can surpress this
warning by filtering it. Explicitly supplying the ``Loader=ruamel.yaml.Loader``
argument, will also prevent it from being issued. You should however consider
@@ -30,7 +33,7 @@ NEXT:
`msinn <http://stackoverflow.com/users/7185467/msinn>`_)
0.12.18 (2016-11-16):
- - another fix for numpy (re-reported by Nathanial Burdic)
+ - another fix for numpy (re-reported independently by PaulG & Nathanial Burdic)
0.12.17 (2016-11-15):
- only the RoundTripLoader included the Resolver that supports YAML 1.2
diff --git a/__init__.py b/__init__.py
index adfd7f8..006f948 100644
--- a/__init__.py
+++ b/__init__.py
@@ -9,7 +9,7 @@ from __future__ import absolute_import
_package_data = dict(
full_package_name="ruamel.yaml",
- version_info=(0, 13, 0),
+ version_info=(0, 13, 1),
author="Anthon van der Neut",
author_email="a.van.der.neut@ruamel.eu",
description="ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order", # NOQA
diff --git a/comments.py b/comments.py
index 25f0f8a..2fa66e6 100644
--- a/comments.py
+++ b/comments.py
@@ -27,6 +27,7 @@ tag_attrib = '_yaml_tag'
class Comment(object):
# sys.getsize tested the Comment objects, __slots__ makes them bigger
# and adding self.end did not matter
+ __slots__ = 'comment', '_items', '_end', '_start',
attrib = comment_attrib
def __init__(self):
@@ -73,6 +74,7 @@ def NoComment():
class Format(object):
+ __slots__ = '_flow_style',
attrib = format_attrib
def __init__(self):
@@ -131,6 +133,7 @@ class LineCol(object):
class Anchor(object):
+ __slots__ = 'value', 'always_dump',
attrib = anchor_attrib
def __init__(self):
@@ -140,6 +143,7 @@ class Anchor(object):
class Tag(object):
"""store tag information for roundtripping"""
+ __slots__ = 'value',
attrib = tag_attrib
def __init__(self):
@@ -182,12 +186,12 @@ class CommentedBase(object):
"""overwrites any preceding comment lines on an object
expects comment to be without `#` and possible have multiple lines
"""
- from .error import Mark
+ from .error import CommentMark
from .tokens import CommentToken
pre_comments = self._yaml_get_pre_comment()
if comment[-1] == '\n':
comment = comment[:-1] # strip final newline if there
- start_mark = Mark(None, None, None, indent, None, None)
+ start_mark = CommentMark(indent)
for com in comment.split('\n'):
pre_comments.append(CommentToken('# ' + com + '\n', start_mark, None))
@@ -196,7 +200,7 @@ class CommentedBase(object):
"""
expects comment (before/after) to be without `#` and possible have multiple lines
"""
- from ruamel.yaml.error import Mark
+ from ruamel.yaml.error import CommentMark
from ruamel.yaml.tokens import CommentToken
def comment_token(s, mark):
@@ -209,13 +213,13 @@ class CommentedBase(object):
before = before[:-1] # strip final newline if there
if after and after[-1] == '\n':
after = after[:-1] # strip final newline if there
- start_mark = Mark(None, None, None, indent, None, None)
+ start_mark = CommentMark(indent)
c = self.ca.items.setdefault(key, [None, [], None, None])
if before:
for com in before.split('\n'):
c[1].append(comment_token(com, start_mark))
if after:
- start_mark = Mark(None, None, None, after_indent, None, None)
+ start_mark = CommentMark(after_indent)
if c[3] is None:
c[3] = []
for com in after.split('\n'):
@@ -237,7 +241,7 @@ class CommentedBase(object):
the #. The column index is for the # mark
"""
from .tokens import CommentToken
- from .error import Mark
+ from .error import CommentMark
if column is None:
column = self._yaml_get_column(key)
if comment[0] != '#':
@@ -246,7 +250,7 @@ class CommentedBase(object):
if comment[0] == '#':
comment = ' ' + comment
column = 0
- start_mark = Mark(None, None, None, column, None, None)
+ start_mark = CommentMark(column)
ct = [CommentToken(comment, start_mark, None), None]
self._yaml_add_eol_comment(ct, key=key)
@@ -292,7 +296,7 @@ class CommentedBase(object):
class CommentedSeq(list, CommentedBase):
- __slots__ = [Comment.attrib, ]
+ __slots__ = Comment.attrib,
def _yaml_add_comment(self, comment, key=NoComment):
if key is not NoComment:
@@ -421,7 +425,6 @@ class CommentedMapView(Sized):
class CommentedMapKeysView(CommentedMapView, Set):
-
__slots__ = ()
@classmethod
@@ -438,7 +441,6 @@ class CommentedMapKeysView(CommentedMapView, Set):
class CommentedMapItemsView(CommentedMapView, Set):
-
__slots__ = ()
@classmethod
@@ -460,7 +462,6 @@ class CommentedMapItemsView(CommentedMapView, Set):
class CommentedMapValuesView(CommentedMapView):
-
__slots__ = ()
def __contains__(self, value):
@@ -475,7 +476,7 @@ class CommentedMapValuesView(CommentedMapView):
class CommentedMap(ordereddict, CommentedBase):
- __slots__ = [Comment.attrib, ]
+ __slots__ = Comment.attrib,
def _yaml_add_comment(self, comment, key=NoComment, value=NoComment):
"""values is set to key to indicate a value attachment of comment"""
@@ -730,11 +731,11 @@ class CommentedMap(ordereddict, CommentedBase):
class CommentedOrderedMap(CommentedMap):
- __slots__ = [Comment.attrib, ]
+ __slots__ = Comment.attrib,
class CommentedSet(MutableSet, CommentedMap):
- __slots__ = [Comment.attrib, 'odict']
+ __slots__ = Comment.attrib, 'odict',
def __init__(self, values=None):
self.odict = ordereddict()
diff --git a/compat.py b/compat.py
index 1cbe923..7a5ba06 100644
--- a/compat.py
+++ b/compat.py
@@ -101,6 +101,20 @@ DBG_NODE = 4
_debug = None
+if _debug:
+ class ObjectCounter(object):
+ def __init__(self):
+ self.map = {}
+
+ def __call__(self, k):
+ self.map[k] = self.map.get(k, 0) + 1
+
+ def dump(self):
+ for k in sorted(self.map):
+ print(k, '->', self.map[k])
+
+ object_counter = ObjectCounter()
+
# used from yaml util when testing
def dbg(val=None):
diff --git a/error.py b/error.py
index 20c7979..5015828 100644
--- a/error.py
+++ b/error.py
@@ -6,21 +6,40 @@ import warnings
from ruamel.yaml.compat import utf8
-__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError', 'ReusedAnchorWarning',
+__all__ = ['FileMark', 'StringMark', 'CommentMark',
+ 'YAMLError', 'MarkedYAMLError', 'ReusedAnchorWarning',
'UnsafeLoaderWarning']
-class Mark(object):
- def __init__(self, name, index, line, column, buffer, pointer):
+class StreamMark(object):
+ __slots__ = 'name', 'index', 'line', 'column',
+
+ def __init__(self, name, index, line, column):
self.name = name
self.index = index
self.line = line
self.column = column
+
+ def __str__(self):
+ where = " in \"%s\", line %d, column %d" \
+ % (self.name, self.line+1, self.column+1)
+ return where
+
+
+class FileMark(StreamMark):
+ __slots__ = ()
+
+
+class StringMark(StreamMark):
+ __slots__ = 'name', 'index', 'line', 'column', 'buffer', 'pointer',
+
+ def __init__(self, name, index, line, column, buffer, pointer):
+ StreamMark.__init__(self, name, index, line, column)
self.buffer = buffer
self.pointer = pointer
def get_snippet(self, indent=4, max_length=75):
- if self.buffer is None:
+ if self.buffer is None: # always False
return None
head = ''
start = self.pointer
@@ -53,6 +72,13 @@ class Mark(object):
return where
+class CommentMark(object):
+ __slots__ = 'column',
+
+ def __init__(self, column):
+ self.column = column
+
+
class YAMLError(Exception):
pass
diff --git a/events.py b/events.py
index 7667c01..a92be74 100644
--- a/events.py
+++ b/events.py
@@ -8,6 +8,8 @@ def CommentCheck():
class Event(object):
+ __slots__ = 'start_mark', 'end_mark', 'comment',
+
def __init__(self, start_mark=None, end_mark=None, comment=CommentCheck):
self.start_mark = start_mark
self.end_mark = end_mark
@@ -28,28 +30,33 @@ class Event(object):
class NodeEvent(Event):
+ __slots__ = 'anchor',
+
def __init__(self, anchor, start_mark=None, end_mark=None, comment=None):
Event.__init__(self, start_mark, end_mark, comment)
self.anchor = anchor
class CollectionStartEvent(NodeEvent):
+ __slots__ = 'tag', 'implicit', 'flow_style',
+
def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None,
flow_style=None, comment=None):
- Event.__init__(self, start_mark, end_mark, comment)
- self.anchor = anchor
+ NodeEvent.__init__(self, anchor, start_mark, end_mark, comment)
self.tag = tag
self.implicit = implicit
self.flow_style = flow_style
class CollectionEndEvent(Event):
- pass
+ __slots__ = ()
-# Implementations.
+# Implementations.
class StreamStartEvent(Event):
+ __slots__ = 'encoding',
+
def __init__(self, start_mark=None, end_mark=None, encoding=None,
comment=None):
Event.__init__(self, start_mark, end_mark, comment)
@@ -57,10 +64,12 @@ class StreamStartEvent(Event):
class StreamEndEvent(Event):
- pass
+ __slots__ = ()
class DocumentStartEvent(Event):
+ __slots__ = 'explicit', 'version', 'tags',
+
def __init__(self, start_mark=None, end_mark=None,
explicit=None, version=None, tags=None, comment=None):
Event.__init__(self, start_mark, end_mark, comment)
@@ -70,6 +79,8 @@ class DocumentStartEvent(Event):
class DocumentEndEvent(Event):
+ __slots__ = 'explicit',
+
def __init__(self, start_mark=None, end_mark=None,
explicit=None, comment=None):
Event.__init__(self, start_mark, end_mark, comment)
@@ -77,10 +88,12 @@ class DocumentEndEvent(Event):
class AliasEvent(NodeEvent):
- pass
+ __slots__ = ()
class ScalarEvent(NodeEvent):
+ __slots__ = 'tag', 'implicit', 'value', 'style',
+
def __init__(self, anchor, tag, implicit, value,
start_mark=None, end_mark=None, style=None, comment=None):
NodeEvent.__init__(self, anchor, start_mark, end_mark, comment)
@@ -91,16 +104,16 @@ class ScalarEvent(NodeEvent):
class SequenceStartEvent(CollectionStartEvent):
- pass
+ __slots__ = ()
class SequenceEndEvent(CollectionEndEvent):
- pass
+ __slots__ = ()
class MappingStartEvent(CollectionStartEvent):
- pass
+ __slots__ = ()
class MappingEndEvent(CollectionEndEvent):
- pass
+ __slots__ = ()
diff --git a/nodes.py b/nodes.py
index 214284a..b518513 100644
--- a/nodes.py
+++ b/nodes.py
@@ -4,6 +4,8 @@ from __future__ import print_function
class Node(object):
+ __slots__ = 'tag', 'value', 'start_mark', 'end_mark', 'comment', 'anchor',
+
def __init__(self, tag, value, start_mark, end_mark, comment=None):
self.tag = tag
self.value = value
@@ -62,6 +64,7 @@ class ScalarNode(Node):
| -> literal style
> -> folding style
"""
+ __slots__ = 'style',
id = 'scalar'
def __init__(self, tag, value, start_mark=None, end_mark=None, style=None,
@@ -71,6 +74,8 @@ class ScalarNode(Node):
class CollectionNode(Node):
+ __slots__ = 'flow_style', 'anchor',
+
def __init__(self, tag, value, start_mark=None, end_mark=None,
flow_style=None, comment=None, anchor=None):
Node.__init__(self, tag, value, start_mark, end_mark, comment=comment)
@@ -79,8 +84,10 @@ class CollectionNode(Node):
class SequenceNode(CollectionNode):
+ __slots__ = ()
id = 'sequence'
class MappingNode(CollectionNode):
+ __slots__ = ()
id = 'mapping'
diff --git a/reader.py b/reader.py
index e234b71..63307b7 100644
--- a/reader.py
+++ b/reader.py
@@ -23,7 +23,7 @@ from __future__ import absolute_import
import codecs
import re
-from ruamel.yaml.error import YAMLError, Mark
+from ruamel.yaml.error import YAMLError, FileMark, StringMark
from ruamel.yaml.compat import text_type, binary_type, PY3
__all__ = ['Reader', 'ReaderError']
@@ -122,11 +122,10 @@ class Reader(object):
def get_mark(self):
if self.stream is None:
- return Mark(self.name, self.index, self.line, self.column,
- self.buffer, self.pointer)
+ return StringMark(self.name, self.index, self.line, self.column,
+ self.buffer, self.pointer)
else:
- return Mark(self.name, self.index, self.line, self.column,
- None, None)
+ return FileMark(self.name, self.index, self.line, self.column)
def determine_encoding(self):
while not self.eof and (self.raw_buffer is None or
diff --git a/scalarstring.py b/scalarstring.py
index eb10910..c6e5734 100644
--- a/scalarstring.py
+++ b/scalarstring.py
@@ -10,21 +10,29 @@ __all__ = ["ScalarString", "PreservedScalarString", "SingleQuotedScalarString",
class ScalarString(text_type):
+ __slots__ = ()
+
def __new__(cls, *args, **kw):
return text_type.__new__(cls, *args, **kw)
class PreservedScalarString(ScalarString):
+ __slots__ = ()
+
def __new__(cls, value):
return ScalarString.__new__(cls, value)
class SingleQuotedScalarString(ScalarString):
+ __slots__ = ()
+
def __new__(cls, value):
return ScalarString.__new__(cls, value)
class DoubleQuotedScalarString(ScalarString):
+ __slots__ = ()
+
def __new__(cls, value):
return ScalarString.__new__(cls, value)
diff --git a/tokens.py b/tokens.py
index bd97785..56adbe6 100644
--- a/tokens.py
+++ b/tokens.py
@@ -3,13 +3,15 @@
class Token(object):
+ __slots__ = 'start_mark', 'end_mark', '_comment',
+
def __init__(self, start_mark, end_mark):
self.start_mark = start_mark
self.end_mark = end_mark
def __repr__(self):
- attributes = [key for key in self.__dict__
- if not key.endswith('_mark')]
+ attributes = [key for key in self.__slots__ if not key.endswith('_mark') and
+ hasattr('self', key)]
attributes.sort()
arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
for key in attributes])
@@ -77,6 +79,7 @@ class Token(object):
# id = '<byte order mark>'
class DirectiveToken(Token):
+ __slots__ = 'name', 'value',
id = '<directive>'
def __init__(self, name, value, start_mark, end_mark):
@@ -86,14 +89,17 @@ class DirectiveToken(Token):
class DocumentStartToken(Token):
+ __slots__ = ()
id = '<document start>'
class DocumentEndToken(Token):
+ __slots__ = ()
id = '<document end>'
class StreamStartToken(Token):
+ __slots__ = 'encoding',
id = '<stream start>'
def __init__(self, start_mark=None, end_mark=None, encoding=None):
@@ -102,54 +108,67 @@ class StreamStartToken(Token):
class StreamEndToken(Token):
+ __slots__ = ()
id = '<stream end>'
class BlockSequenceStartToken(Token):
+ __slots__ = ()
id = '<block sequence start>'
class BlockMappingStartToken(Token):
+ __slots__ = ()
id = '<block mapping start>'
class BlockEndToken(Token):
+ __slots__ = ()
id = '<block end>'
class FlowSequenceStartToken(Token):
+ __slots__ = ()
id = '['
class FlowMappingStartToken(Token):
+ __slots__ = ()
id = '{'
class FlowSequenceEndToken(Token):
+ __slots__ = ()
id = ']'
class FlowMappingEndToken(Token):
+ __slots__ = ()
id = '}'
class KeyToken(Token):
+ __slots__ = ()
id = '?'
class ValueToken(Token):
+ __slots__ = ()
id = ':'
class BlockEntryToken(Token):
+ __slots__ = ()
id = '-'
class FlowEntryToken(Token):
+ __slots__ = ()
id = ','
class AliasToken(Token):
+ __slots__ = 'value',
id = '<alias>'
def __init__(self, value, start_mark, end_mark):
@@ -158,6 +177,7 @@ class AliasToken(Token):
class AnchorToken(Token):
+ __slots__ = 'value',
id = '<anchor>'
def __init__(self, value, start_mark, end_mark):
@@ -166,6 +186,7 @@ class AnchorToken(Token):
class TagToken(Token):
+ __slots__ = 'value',
id = '<tag>'
def __init__(self, value, start_mark, end_mark):
@@ -174,6 +195,7 @@ class TagToken(Token):
class ScalarToken(Token):
+ __slots__ = 'value', 'plain', 'style',
id = '<scalar>'
def __init__(self, value, plain, start_mark, end_mark, style=None):
@@ -184,6 +206,7 @@ class ScalarToken(Token):
class CommentToken(Token):
+ __slots__ = 'value', 'pre_done',
id = '<comment>'
def __init__(self, value, start_mark, end_mark):
diff --git a/util.py b/util.py
index afc46fb..25c64e4 100644
--- a/util.py
+++ b/util.py
@@ -8,7 +8,6 @@ from __future__ import print_function
from __future__ import absolute_import
from .compat import text_type, binary_type
-from .main import round_trip_load
# originally as comment
@@ -24,6 +23,8 @@ def load_yaml_guess_indent(stream, **kw):
- if there are no block sequences, indent is taken from nested mappings, block sequence
indent is unset (None) in that case
"""
+ from .main import round_trip_load
+
# load a yaml file guess the indentation, if you use TABs ...
def leading_spaces(l):
idx = 0