11 files changed, 136 insertions, 41 deletions
diff --git a/README.rst b/README.rst
index a60018b..8b6ce97 100644
--- a/README.rst
+++ b/README.rst
@@ -16,11 +16,14 @@ ruamel.yaml
 ChangeLog
 =========
 
-.. should insert 0.13.0 (2016-11-20): for next key
+.. should insert NEXT: at the beginning of line for next key
 
+0.13.1 (2016-11-22):
+  - optimisations on memory usage when loading YAML from large files (py3: -50%, py2: -85%)
 
-NEXT:
-  - if load() or load_all() is called with only a single argument (stream or string)
+0.13.0 (2016-11-20):
+  - if ``load()`` or ``load_all()`` is called with only a single argument
+    (stream or string)
     a UnsafeLoaderWarning will be issued once. If appropriate you can surpress this
     warning by filtering it. Explicitly supplying the ``Loader=ruamel.yaml.Loader``
     argument, will also prevent it from being issued. You should however consider
@@ -30,7 +33,7 @@ NEXT:
     `msinn <http://stackoverflow.com/users/7185467/msinn>`_)
 
 0.12.18 (2016-11-16):
-  - another fix for numpy (re-reported by Nathanial Burdic)
+  - another fix for numpy (re-reported independently by PaulG & Nathanial Burdic)
 
 0.12.17 (2016-11-15):
   - only the RoundTripLoader included the Resolver that supports YAML 1.2
diff --git a/__init__.py b/__init__.py
index adfd7f8..006f948 100644
--- a/__init__.py
+++ b/__init__.py
@@ -9,7 +9,7 @@ from __future__ import absolute_import
 
 _package_data = dict(
     full_package_name="ruamel.yaml",
-    version_info=(0, 13, 0),
+    version_info=(0, 13, 1),
     author="Anthon van der Neut",
     author_email="a.van.der.neut@ruamel.eu",
     description="ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order",  # NOQA
diff --git a/comments.py b/comments.py
index 25f0f8a..2fa66e6 100644
--- a/comments.py
+++ b/comments.py
@@ -27,6 +27,7 @@ tag_attrib = '_yaml_tag'
 class Comment(object):
     # sys.getsize tested the Comment objects, __slots__ makes them bigger
     # and adding self.end did not matter
+    __slots__ = 'comment', '_items', '_end', '_start',
     attrib = comment_attrib
 
     def __init__(self):
@@ -73,6 +74,7 @@ def NoComment():
 
 
 class Format(object):
+    __slots__ = '_flow_style',
     attrib = format_attrib
 
     def __init__(self):
@@ -131,6 +133,7 @@ class LineCol(object):
 
 
 class Anchor(object):
+    __slots__ = 'value', 'always_dump',
     attrib = anchor_attrib
 
     def __init__(self):
@@ -140,6 +143,7 @@ class Anchor(object):
 
 class Tag(object):
     """store tag information for roundtripping"""
+    __slots__ = 'value',
     attrib = tag_attrib
 
     def __init__(self):
@@ -182,12 +186,12 @@ class CommentedBase(object):
         """overwrites any preceding comment lines on an object
         expects comment to be without `#` and possible have multiple lines
         """
-        from .error import Mark
+        from .error import CommentMark
         from .tokens import CommentToken
         pre_comments = self._yaml_get_pre_comment()
         if comment[-1] == '\n':
             comment = comment[:-1]  # strip final newline if there
-        start_mark = Mark(None, None, None, indent, None, None)
+        start_mark = CommentMark(indent)
         for com in comment.split('\n'):
             pre_comments.append(CommentToken('# ' + com + '\n', start_mark, None))
 
@@ -196,7 +200,7 @@ class CommentedBase(object):
         """
         expects comment (before/after) to be without `#` and possible have multiple lines
         """
-        from ruamel.yaml.error import Mark
+        from ruamel.yaml.error import CommentMark
         from ruamel.yaml.tokens import CommentToken
 
         def comment_token(s, mark):
@@ -209,13 +213,13 @@ class CommentedBase(object):
             before = before[:-1]  # strip final newline if there
         if after and after[-1] == '\n':
             after = after[:-1]  # strip final newline if there
-        start_mark = Mark(None, None, None, indent, None, None)
+        start_mark = CommentMark(indent)
         c = self.ca.items.setdefault(key, [None, [], None, None])
         if before:
             for com in before.split('\n'):
                 c[1].append(comment_token(com, start_mark))
         if after:
-            start_mark = Mark(None, None, None, after_indent, None, None)
+            start_mark = CommentMark(after_indent)
             if c[3] is None:
                 c[3] = []
             for com in after.split('\n'):
@@ -237,7 +241,7 @@ class CommentedBase(object):
         the #. The column index is for the # mark
         """
         from .tokens import CommentToken
-        from .error import Mark
+        from .error import CommentMark
         if column is None:
             column = self._yaml_get_column(key)
         if comment[0] != '#':
@@ -246,7 +250,7 @@ class CommentedBase(object):
             if comment[0] == '#':
                 comment = ' ' + comment
                 column = 0
-        start_mark = Mark(None, None, None, column, None, None)
+        start_mark = CommentMark(column)
         ct = [CommentToken(comment, start_mark, None), None]
         self._yaml_add_eol_comment(ct, key=key)
 
@@ -292,7 +296,7 @@ class CommentedBase(object):
 
 
 class CommentedSeq(list, CommentedBase):
-    __slots__ = [Comment.attrib, ]
+    __slots__ = Comment.attrib,
 
     def _yaml_add_comment(self, comment, key=NoComment):
         if key is not NoComment:
@@ -421,7 +425,6 @@ class CommentedMapView(Sized):
 
 
 class CommentedMapKeysView(CommentedMapView, Set):
-
     __slots__ = ()
 
     @classmethod
@@ -438,7 +441,6 @@ class CommentedMapKeysView(CommentedMapView, Set):
 
 
 class CommentedMapItemsView(CommentedMapView, Set):
-
     __slots__ = ()
 
     @classmethod
@@ -460,7 +462,6 @@ class CommentedMapItemsView(CommentedMapView, Set):
 
 
 class CommentedMapValuesView(CommentedMapView):
-
     __slots__ = ()
 
     def __contains__(self, value):
@@ -475,7 +476,7 @@ class CommentedMapValuesView(CommentedMapView):
 
 
 class CommentedMap(ordereddict, CommentedBase):
-    __slots__ = [Comment.attrib, ]
+    __slots__ = Comment.attrib,
 
     def _yaml_add_comment(self, comment, key=NoComment, value=NoComment):
         """values is set to key to indicate a value attachment of comment"""
@@ -730,11 +731,11 @@ class CommentedMap(ordereddict, CommentedBase):
 
 
 class CommentedOrderedMap(CommentedMap):
-    __slots__ = [Comment.attrib, ]
+    __slots__ = Comment.attrib,
 
 
 class CommentedSet(MutableSet, CommentedMap):
-    __slots__ = [Comment.attrib, 'odict']
+    __slots__ = Comment.attrib, 'odict',
 
     def __init__(self, values=None):
         self.odict = ordereddict()
diff --git a/compat.py b/compat.py
index 1cbe923..7a5ba06 100644
--- a/compat.py
+++ b/compat.py
@@ -101,6 +101,20 @@ DBG_NODE = 4
 
 _debug = None
 
+if _debug:
+    class ObjectCounter(object):
+        def __init__(self):
+            self.map = {}
+
+        def __call__(self, k):
+            self.map[k] = self.map.get(k, 0) + 1
+
+        def dump(self):
+            for k in sorted(self.map):
+                print(k, '->', self.map[k])
+
+    object_counter = ObjectCounter()
+
 
 # used from yaml util when testing
 def dbg(val=None):
diff --git a/error.py b/error.py
index 20c7979..5015828 100644
--- a/error.py
+++ b/error.py
@@ -6,21 +6,40 @@ import warnings
 
 from ruamel.yaml.compat import utf8
 
-__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError', 'ReusedAnchorWarning',
+__all__ = ['FileMark', 'StringMark', 'CommentMark',
+           'YAMLError', 'MarkedYAMLError', 'ReusedAnchorWarning',
            'UnsafeLoaderWarning']
 
 
-class Mark(object):
-    def __init__(self, name, index, line, column, buffer, pointer):
+class StreamMark(object):
+    __slots__ = 'name', 'index', 'line', 'column',
+
+    def __init__(self, name, index, line, column):
         self.name = name
         self.index = index
         self.line = line
         self.column = column
+
+    def __str__(self):
+        where = "  in \"%s\", line %d, column %d"   \
+                % (self.name, self.line+1, self.column+1)
+        return where
+
+
+class FileMark(StreamMark):
+    __slots__ = ()
+
+
+class StringMark(StreamMark):
+    __slots__ = 'name', 'index', 'line', 'column', 'buffer', 'pointer',
+
+    def __init__(self, name, index, line, column, buffer, pointer):
+        StreamMark.__init__(self, name, index, line, column)
         self.buffer = buffer
         self.pointer = pointer
 
     def get_snippet(self, indent=4, max_length=75):
-        if self.buffer is None:
+        if self.buffer is None:  # always False
             return None
         head = ''
         start = self.pointer
@@ -53,6 +72,13 @@ class Mark(object):
         return where
 
 
+class CommentMark(object):
+    __slots__ = 'column',
+
+    def __init__(self, column):
+        self.column = column
+
+
 class YAMLError(Exception):
     pass
 
diff --git a/events.py b/events.py
index 7667c01..a92be74 100644
--- a/events.py
+++ b/events.py
@@ -8,6 +8,8 @@ def CommentCheck():
 
 
 class Event(object):
+    __slots__ = 'start_mark', 'end_mark', 'comment',
+
     def __init__(self, start_mark=None, end_mark=None, comment=CommentCheck):
         self.start_mark = start_mark
         self.end_mark = end_mark
@@ -28,28 +30,33 @@ class Event(object):
 
 
 class NodeEvent(Event):
+    __slots__ = 'anchor',
+
     def __init__(self, anchor, start_mark=None, end_mark=None, comment=None):
         Event.__init__(self, start_mark, end_mark, comment)
         self.anchor = anchor
 
 
 class CollectionStartEvent(NodeEvent):
+    __slots__ = 'tag', 'implicit', 'flow_style',
+
     def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None,
                  flow_style=None, comment=None):
-        Event.__init__(self, start_mark, end_mark, comment)
-        self.anchor = anchor
+        NodeEvent.__init__(self, anchor, start_mark, end_mark, comment)
         self.tag = tag
         self.implicit = implicit
         self.flow_style = flow_style
 
 
 class CollectionEndEvent(Event):
-    pass
+    __slots__ = ()
 
-# Implementations.
 
+# Implementations.
 
 class StreamStartEvent(Event):
+    __slots__ = 'encoding',
+
     def __init__(self, start_mark=None, end_mark=None, encoding=None,
                  comment=None):
         Event.__init__(self, start_mark, end_mark, comment)
@@ -57,10 +64,12 @@ class StreamStartEvent(Event):
 
 
 class StreamEndEvent(Event):
-    pass
+    __slots__ = ()
 
 
 class DocumentStartEvent(Event):
+    __slots__ = 'explicit', 'version', 'tags',
+
     def __init__(self, start_mark=None, end_mark=None,
                  explicit=None, version=None, tags=None, comment=None):
         Event.__init__(self, start_mark, end_mark, comment)
@@ -70,6 +79,8 @@ class DocumentStartEvent(Event):
 
 
 class DocumentEndEvent(Event):
+    __slots__ = 'explicit',
+
     def __init__(self, start_mark=None, end_mark=None,
                  explicit=None, comment=None):
         Event.__init__(self, start_mark, end_mark, comment)
@@ -77,10 +88,12 @@ class DocumentEndEvent(Event):
 
 
 class AliasEvent(NodeEvent):
-    pass
+    __slots__ = ()
 
 
 class ScalarEvent(NodeEvent):
+    __slots__ = 'tag', 'implicit', 'value', 'style',
+
     def __init__(self, anchor, tag, implicit, value,
                  start_mark=None, end_mark=None, style=None, comment=None):
         NodeEvent.__init__(self, anchor, start_mark, end_mark, comment)
@@ -91,16 +104,16 @@ class ScalarEvent(NodeEvent):
 
 
 class SequenceStartEvent(CollectionStartEvent):
-    pass
+    __slots__ = ()
 
 
 class SequenceEndEvent(CollectionEndEvent):
-    pass
+    __slots__ = ()
 
 
 class MappingStartEvent(CollectionStartEvent):
-    pass
+    __slots__ = ()
 
 
 class MappingEndEvent(CollectionEndEvent):
-    pass
+    __slots__ = ()
diff --git a/nodes.py b/nodes.py
index 214284a..b518513 100644
--- a/nodes.py
+++ b/nodes.py
@@ -4,6 +4,8 @@ from __future__ import print_function
 
 
 class Node(object):
+    __slots__ = 'tag', 'value', 'start_mark', 'end_mark', 'comment', 'anchor',
+
     def __init__(self, tag, value, start_mark, end_mark, comment=None):
         self.tag = tag
         self.value = value
@@ -62,6 +64,7 @@ class ScalarNode(Node):
       | -> literal style
       > -> folding style
     """
+    __slots__ = 'style',
     id = 'scalar'
 
     def __init__(self, tag, value, start_mark=None, end_mark=None, style=None,
@@ -71,6 +74,8 @@ class ScalarNode(Node):
 
 
 class CollectionNode(Node):
+    __slots__ = 'flow_style', 'anchor',
+
     def __init__(self, tag, value, start_mark=None, end_mark=None,
                  flow_style=None, comment=None, anchor=None):
         Node.__init__(self, tag, value, start_mark, end_mark, comment=comment)
@@ -79,8 +84,10 @@ class CollectionNode(Node):
 
 
 class SequenceNode(CollectionNode):
+    __slots__ = ()
     id = 'sequence'
 
 
 class MappingNode(CollectionNode):
+    __slots__ = ()
     id = 'mapping'
diff --git a/reader.py b/reader.py
index e234b71..63307b7 100644
--- a/reader.py
+++ b/reader.py
@@ -23,7 +23,7 @@ from __future__ import absolute_import
 import codecs
 import re
 
-from ruamel.yaml.error import YAMLError, Mark
+from ruamel.yaml.error import YAMLError, FileMark, StringMark
 from ruamel.yaml.compat import text_type, binary_type, PY3
 
 __all__ = ['Reader', 'ReaderError']
@@ -122,11 +122,10 @@ class Reader(object):
 
     def get_mark(self):
         if self.stream is None:
-            return Mark(self.name, self.index, self.line, self.column,
-                        self.buffer, self.pointer)
+            return StringMark(self.name, self.index, self.line, self.column,
+                              self.buffer, self.pointer)
         else:
-            return Mark(self.name, self.index, self.line, self.column,
-                        None, None)
+            return FileMark(self.name, self.index, self.line, self.column)
 
     def determine_encoding(self):
         while not self.eof and (self.raw_buffer is None or
diff --git a/scalarstring.py b/scalarstring.py
index eb10910..c6e5734 100644
--- a/scalarstring.py
+++ b/scalarstring.py
@@ -10,21 +10,29 @@ __all__ = ["ScalarString", "PreservedScalarString", "SingleQuotedScalarString",
 
 
 class ScalarString(text_type):
+    __slots__ = ()
+
     def __new__(cls, *args, **kw):
         return text_type.__new__(cls, *args, **kw)
 
 
 class PreservedScalarString(ScalarString):
+    __slots__ = ()
+
     def __new__(cls, value):
         return ScalarString.__new__(cls, value)
 
 
 class SingleQuotedScalarString(ScalarString):
+    __slots__ = ()
+
     def __new__(cls, value):
         return ScalarString.__new__(cls, value)
 
 
 class DoubleQuotedScalarString(ScalarString):
+    __slots__ = ()
+
     def __new__(cls, value):
         return ScalarString.__new__(cls, value)
 
diff --git a/tokens.py b/tokens.py
index bd97785..56adbe6 100644
--- a/tokens.py
+++ b/tokens.py
@@ -3,13 +3,15 @@
 
 
 class Token(object):
+    __slots__ = 'start_mark', 'end_mark', '_comment',
+
     def __init__(self, start_mark, end_mark):
         self.start_mark = start_mark
         self.end_mark = end_mark
 
     def __repr__(self):
-        attributes = [key for key in self.__dict__
-                      if not key.endswith('_mark')]
+        attributes = [key for key in self.__slots__ if not key.endswith('_mark') and
+                      hasattr('self', key)]
         attributes.sort()
         arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
                                for key in attributes])
@@ -77,6 +79,7 @@ class Token(object):
 #     id = '<byte order mark>'
 
 class DirectiveToken(Token):
+    __slots__ = 'name', 'value',
     id = '<directive>'
 
     def __init__(self, name, value, start_mark, end_mark):
@@ -86,14 +89,17 @@ class DirectiveToken(Token):
 
 
 class DocumentStartToken(Token):
+    __slots__ = ()
     id = '<document start>'
 
 
 class DocumentEndToken(Token):
+    __slots__ = ()
     id = '<document end>'
 
 
 class StreamStartToken(Token):
+    __slots__ = 'encoding',
     id = '<stream start>'
 
     def __init__(self, start_mark=None, end_mark=None, encoding=None):
@@ -102,54 +108,67 @@ class StreamStartToken(Token):
 
 
 class StreamEndToken(Token):
+    __slots__ = ()
     id = '<stream end>'
 
 
 class BlockSequenceStartToken(Token):
+    __slots__ = ()
     id = '<block sequence start>'
 
 
 class BlockMappingStartToken(Token):
+    __slots__ = ()
     id = '<block mapping start>'
 
 
 class BlockEndToken(Token):
+    __slots__ = ()
     id = '<block end>'
 
 
 class FlowSequenceStartToken(Token):
+    __slots__ = ()
     id = '['
 
 
 class FlowMappingStartToken(Token):
+    __slots__ = ()
     id = '{'
 
 
 class FlowSequenceEndToken(Token):
+    __slots__ = ()
     id = ']'
 
 
 class FlowMappingEndToken(Token):
+    __slots__ = ()
     id = '}'
 
 
 class KeyToken(Token):
+    __slots__ = ()
     id = '?'
 
 
 class ValueToken(Token):
+    __slots__ = ()
     id = ':'
 
 
 class BlockEntryToken(Token):
+    __slots__ = ()
     id = '-'
 
 
 class FlowEntryToken(Token):
+    __slots__ = ()
     id = ','
 
 
 class AliasToken(Token):
+    __slots__ = 'value',
     id = '<alias>'
 
     def __init__(self, value, start_mark, end_mark):
@@ -158,6 +177,7 @@ class AliasToken(Token):
 
 
 class AnchorToken(Token):
+    __slots__ = 'value',
     id = '<anchor>'
 
     def __init__(self, value, start_mark, end_mark):
@@ -166,6 +186,7 @@ class AnchorToken(Token):
 
 
 class TagToken(Token):
+    __slots__ = 'value',
     id = '<tag>'
 
     def __init__(self, value, start_mark, end_mark):
@@ -174,6 +195,7 @@ class TagToken(Token):
 
 
 class ScalarToken(Token):
+    __slots__ = 'value', 'plain', 'style',
     id = '<scalar>'
 
     def __init__(self, value, plain, start_mark, end_mark, style=None):
@@ -184,6 +206,7 @@ class ScalarToken(Token):
 
 
 class CommentToken(Token):
+    __slots__ = 'value', 'pre_done',
     id = '<comment>'
 
     def __init__(self, value, start_mark, end_mark):
diff --git a/util.py b/util.py
index afc46fb..25c64e4 100644
--- a/util.py
+++ b/util.py
@@ -8,7 +8,6 @@ from __future__ import print_function
 from __future__ import absolute_import
 
 from .compat import text_type, binary_type
-from .main import round_trip_load
 
 
 # originally as comment
@@ -24,6 +23,8 @@ def load_yaml_guess_indent(stream, **kw):
     - if there are no block sequences, indent is taken from nested mappings, block sequence
       indent is unset (None) in that case
     """
+    from .main import round_trip_load
+
     # load a yaml file guess the indentation, if you use TABs ...
     def leading_spaces(l):
         idx = 0