From 17b35c376fd0fc9a94ba0adfdbf5bf63a6177dc9 Mon Sep 17 00:00:00 2001
From: Anthon van der Neut <anthon@mnt.org>
Date: Thu, 6 May 2021 08:36:49 +0200
Subject: * extend EOL token handling * extending comment

---
 comments.py    | 101 ++++++++++---
 constructor.py | 187 ++++++++++++++++++------
 events.py      |   5 +
 main.py        |  19 ++-
 parser.py      |  92 ++++++++++--
 scanner.py     | 445 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 tokens.py      |  14 +-
 7 files changed, 756 insertions(+), 107 deletions(-)

diff --git a/comments.py b/comments.py
index d133299..f49c183 100644
--- a/comments.py
+++ b/comments.py
@@ -11,7 +11,7 @@ import copy
 
 
 from ruamel.yaml.compat import ordereddict  # type: ignore
-from ruamel.yaml.compat import MutableSliceableSequence, _F
+from ruamel.yaml.compat import MutableSliceableSequence, _F, nprintf
 from ruamel.yaml.scalarstring import ScalarString
 from ruamel.yaml.anchor import Anchor
 
@@ -35,13 +35,44 @@ __all__ = ['CommentedSeq', 'CommentedKeySeq',
 # bits 0 and 1 are combined, you can choose only one
 C_POST = 0b00
 C_PRE =  0b01
-C_SPLIT_ON_FIRST_BLANK = 0b10   # as C_POST, but if blank line then C_PRE everything before first
-                                # blank goes to POST even if no following real FLC
+C_SPLIT_ON_FIRST_BLANK = 0b10   # as C_POST, but if blank line then C_PRE all lines before first
+                                # blank goes to POST even if no following real FLC (first blank -> first of post)
 # 0b11 -> reserved for future use
 C_BLANK_LINE_PRESERVE_SPACE = 0b100 
 # C_EOL_PRESERVE_SPACE2 = 0b1000 
 
 
+class IDX:
+    # temporary auto increment, so rearranging is easier
+    def __init__(self):
+        self._idx = 0
+
+    def __call__(self):
+        x = self._idx
+        self._idx += 1
+        return x
+
+    def __str__(self):
+        return str(self._idx)
+
+cidx = IDX()
+
+# more or less in order of subjective expected likelyhood
+# the _POST and _PRE ones are lists themselves
+C_VALUE_EOL = C_ELEM_EOL = cidx()
+C_KEY_EOL = cidx()
+C_KEY_PRE = C_ELEM_PRE = cidx()   # not this is not value
+C_VALUE_POST = C_ELEM_POST = cidx()   # not this is not value
+C_VALUE_PRE = cidx()
+C_KEY_POST = cidx()
+C_TAG_EOL = cidx()
+C_TAG_POST = cidx()
+C_TAG_PRE = cidx()
+C_ANCHOR_EOL = cidx()
+C_ANCHOR_POST = cidx()
+C_ANCHOR_PRE = cidx()
+
+
 comment_attrib = '_yaml_comment'
 format_attrib = '_yaml_format'
 line_col_attrib = '_yaml_line_col'
@@ -52,31 +83,32 @@ tag_attrib = '_yaml_tag'
 class Comment:
     # using sys.getsize tested the Comment objects, __slots__ makes them bigger
     # and adding self.end did not matter
-    __slots__ = 'comment', '_items', '_end', '_start'
+    __slots__ = 'comment', '_items', '_post', '_pre'
     attrib = comment_attrib
 
-    def __init__(self):
+    def __init__(self, old=True):
         # type: () -> None
+        self._pre = None if old else []
         self.comment = None  # [post, [pre]]
         # map key (mapping/omap/dict) or index (sequence/list) to a  list of
         # dict: post_key, pre_key, post_value, pre_value
         # list: pre item, post item
         self._items = {}  # type: Dict[Any, Any]
         # self._start = [] # should not put these on first item
-        self._end = []  # type: List[Any] # end of document comments
+        self._post = []  # type: List[Any] # end of document comments
 
     def __str__(self):
         # type: () -> str
-        if bool(self._end):
+        if bool(self._post):
             end = ',\n  end=' + str(self._end)
         else:
             end = ""
         return 'Comment(comment={0},\n  items={1}{2})'.format(self.comment, self._items, end)
 
-    def __repr__(self):
+    def _old__repr__(self):
         # type: () -> str
-        if bool(self._end):
-            end = ',\n  end=' + str(self._end)
+        if bool(self._post):
+            end = ',\n  end=' + str(self._post)
         else:
             end = ""
         try:
@@ -90,6 +122,25 @@ class Comment:
             it = '\n    ' + it + '  '
         return 'Comment(\n  start={},\n  items={{{}}}{})'.format(self.comment, it, end)
 
+    def __repr__(self):
+        if self._pre is None:
+            return self._old__repr__()
+        if bool(self._post):
+            end = ',\n  end=' + repr(self._post)
+        else:
+            end = ""
+        try:
+            ln = max([len(str(k)) for k in self._items]) + 1
+        except ValueError:
+            ln = ''
+        it = '    '.join(
+            ['{:{}} {}\n'.format(str(k) + ':', ln, v) for k, v in self._items.items()]
+        )
+        if it:
+            it = '\n    ' + it + '  '
+        return 'Comment(\n  pre={},\n  items={{{}}}{})'.format(self.pre, it, end)
+
+
     @property
     def items(self):
         # type: () -> Any
@@ -98,22 +149,38 @@ class Comment:
     @property
     def end(self):
         # type: () -> Any
-        return self._end
+        return self._post
 
     @end.setter
     def end(self, value):
         # type: (Any) -> None
-        self._end = value
+        self._post = value
 
     @property
-    def start(self):
+    def pre(self):
         # type: () -> Any
-        return self._start
+        return self._pre
 
-    @start.setter
-    def start(self, value):
+    @pre.setter
+    def pre(self, value):
         # type: (Any) -> None
-        self._start = value
+        self._pre = value
+
+    def get(self, item, pos):
+        x = self._items.get(item)
+        if x is None or len(x) < pos:
+            return None
+        return x[pos]  # can be None
+
+    def set(self, item, pos, value):
+        x = self._items.get(item)
+        if x is None:
+            self._items[item] = x = [None] * (pos + 1)
+        else:
+            while len(x) <= pos:
+                x.append(None)
+        assert x[pos] is None
+        x[pos] = value
 
     def __contains__(self, x):
         # test if a substring is in any of the attached comments
diff --git a/constructor.py b/constructor.py
index 7b7426f..199129e 100644
--- a/constructor.py
+++ b/constructor.py
@@ -21,7 +21,10 @@ from ruamel.yaml.compat import ordereddict  # type: ignore
 from ruamel.yaml.comments import *                               # NOQA
 from ruamel.yaml.comments import (CommentedMap, CommentedOrderedMap, CommentedSet,
                                   CommentedKeySeq, CommentedSeq, TaggedScalar,
-                                  CommentedKeyMap)
+                                  CommentedKeyMap,
+                                  C_KEY_PRE, C_KEY_EOL, C_KEY_POST,
+                                  C_VALUE_PRE, C_VALUE_EOL, C_VALUE_POST,
+                                  )
 from ruamel.yaml.scalarstring import (SingleQuotedScalarString, DoubleQuotedScalarString,
                                       LiteralScalarString, FoldedScalarString,
                                       PlainScalarString, ScalarString,)
@@ -92,6 +95,14 @@ class BaseConstructor:
             return self.loader.resolver
         return self.loader._resolver
 
+    @property
+    def scanner(self):
+        # type: () -> Any
+        # needed to get to the expanded comments
+        if hasattr(self.loader, 'typ'):
+            return self.loader.scanner
+        return self.loader._scanner
+
     def check_data(self):
         # type: () -> Any
         # If there are more documents available?
@@ -1056,6 +1067,23 @@ class RoundTripConstructor(SafeConstructor):
     as well as on the items
     """
 
+    def comment(self, idx):
+        assert self.loader.comment_handling is not None
+        x = self.scanner.comments[idx]
+        x.set_assigned()
+        return x
+
+    def comments(self, list_of_comments, idx=None):
+        # hand in the comment and optional pre, eol, post segment
+        if list_of_comments is None:
+            return []
+        if idx is not None:
+            if list_of_comments[idx] is None:
+                return []
+            list_of_comments = list_of_comments[idx]
+        for x in list_of_comments:
+            yield self.comment(x)
+
     def construct_scalar(self, node):
         # type: (Any) -> Any
         if not isinstance(node, ScalarNode):
@@ -1068,8 +1096,14 @@ class RoundTripConstructor(SafeConstructor):
 
         if node.style == '|' and isinstance(node.value, str):
             lss = LiteralScalarString(node.value, anchor=node.anchor)
-            if node.comment and node.comment[1]:
-                lss.comment = node.comment[1][0]  # type: ignore
+            if self.loader and self.loader.comment_handling is None:
+                if node.comment and node.comment[1]:
+                    lss.comment = node.comment[1][0]  # type: ignore
+            else:
+                # NEWCMNT
+                if node.comment is not None and node.comment[1]:
+                    # nprintf('>>>>nc1', node.comment)
+                    lss.comment = self.comment(node.comment[1][0])  # EOL comment after |
             return lss
         if node.style == '>' and isinstance(node.value, str):
             fold_positions = []  # type: List[int]
@@ -1080,8 +1114,14 @@ class RoundTripConstructor(SafeConstructor):
                     break
                 fold_positions.append(idx - len(fold_positions))
             fss = FoldedScalarString(node.value.replace('\a', ''), anchor=node.anchor)
-            if node.comment and node.comment[1]:
-                fss.comment = node.comment[1][0]  # type: ignore
+            if self.loader and self.loader.comment_handling is None:
+                if node.comment and node.comment[1]:
+                    fss.comment = node.comment[1][0]  # type: ignore
+            else:
+                # NEWCMNT
+                if node.comment is not None and node.comment[1]:
+                    # nprintf('>>>>nc2', node.comment)
+                    lss.comment = self.comment(node.comment[1][0])  # EOL comment after >
             if fold_positions:
                 fss.fold_pos = fold_positions  # type: ignore
             return fss
@@ -1279,12 +1319,17 @@ class RoundTripConstructor(SafeConstructor):
                 node.start_mark,
             )
         ret_val = []
-        if node.comment:
-            seqtyp._yaml_add_comment(node.comment[:2])
-            if len(node.comment) > 2:
-                # this happens e.g. if you have a sequence element that is a flow-style mapping
-                # and that has no EOL comment but a following commentline or empty line
-                seqtyp.yaml_end_comment_extend(node.comment[2], clear=True)
+        if self.loader and self.loader.comment_handling is None:
+            if node.comment:
+                seqtyp._yaml_add_comment(node.comment[:2])
+                if len(node.comment) > 2:
+                    # this happens e.g. if you have a sequence element that is a flow-style mapping
+                    # and that has no EOL comment but a following commentline or empty line
+                    seqtyp.yaml_end_comment_extend(node.comment[2], clear=True)
+        else:
+            # NEWCMNT
+            if node.comment:
+                nprintf('nc3', node.comment)
         if node.anchor:
             from ruamel.yaml.serializer import templated_id
 
@@ -1408,10 +1453,19 @@ class RoundTripConstructor(SafeConstructor):
             )
         merge_map = self.flatten_mapping(node)
         # mapping = {}
-        if node.comment:
-            maptyp._yaml_add_comment(node.comment[:2])
-            if len(node.comment) > 2:
-                maptyp.yaml_end_comment_extend(node.comment[2], clear=True)
+        if self.loader and self.loader.comment_handling is None:
+            if node.comment:
+                maptyp._yaml_add_comment(node.comment[:2])
+                if len(node.comment) > 2:
+                    maptyp.yaml_end_comment_extend(node.comment[2], clear=True)
+        else:
+            # NEWCMNT
+            if node.comment:
+                # nprintf('nc4', node.comment, node.start_mark)
+                if maptyp.ca.pre is None:
+                    maptyp.ca.pre = []
+                for cmnt in self.comments(node.comment, 0):
+                    maptyp.ca.pre.append(cmnt)
         if node.anchor:
             from ruamel.yaml.serializer import templated_id
 
@@ -1446,18 +1500,37 @@ class RoundTripConstructor(SafeConstructor):
                 )
             value = self.construct_object(value_node, deep=deep)
             if self.check_mapping_key(node, key_node, maptyp, key, value):
-                if key_node.comment and len(key_node.comment) > 4 and key_node.comment[4]:
-                    if last_value is None:
-                        key_node.comment[0] = key_node.comment.pop(4)
-                        maptyp._yaml_add_comment(key_node.comment, value=last_key)
-                    else:
-                        key_node.comment[2] = key_node.comment.pop(4)
+                if self.loader and self.loader.comment_handling is None:
+                    if key_node.comment and len(key_node.comment) > 4 and key_node.comment[4]:
+                        if last_value is None:
+                            key_node.comment[0] = key_node.comment.pop(4)
+                            maptyp._yaml_add_comment(key_node.comment, value=last_key)
+                        else:
+                            key_node.comment[2] = key_node.comment.pop(4)
+                            maptyp._yaml_add_comment(key_node.comment, key=key)
+                        key_node.comment = None
+                    if key_node.comment:
                         maptyp._yaml_add_comment(key_node.comment, key=key)
-                    key_node.comment = None
-                if key_node.comment:
-                    maptyp._yaml_add_comment(key_node.comment, key=key)
-                if value_node.comment:
-                    maptyp._yaml_add_comment(value_node.comment, value=key)
+                    if value_node.comment:
+                        maptyp._yaml_add_comment(value_node.comment, value=key)
+                else:
+                    # NEWCMNT
+                    if key_node.comment:
+                        nprintf('nc5a', key, key_node.comment)
+                        if key_node.comment[0]:
+                            maptyp.ca.set(key, C_KEY_PRE, key_node.comment[0])
+                        if key_node.comment[1]:
+                            maptyp.ca.set(key, C_KEY_EOL, key_node.comment[1])
+                        if key_node.comment[2]:
+                            maptyp.ca.set(key, C_KEY_POST, key_node.comment[2])
+                    if value_node.comment:
+                        nprintf('nc5b', key, value_node.comment)
+                        if value_node.comment[0]:
+                            maptyp.ca.set(key, C_VALUE_PRE, value_node.comment[0])
+                        if value_node.comment[1]:
+                            maptyp.ca.set(key, C_VALUE_EOL, value_node.comment[1])
+                        if value_node.comment[2]:
+                            maptyp.ca.set(key, C_VALUE_POST, value_node.comment[2])
                 maptyp._yaml_set_kv_line_col(
                     key,
                     [
@@ -1483,10 +1556,15 @@ class RoundTripConstructor(SafeConstructor):
                 _F('expected a mapping node, but found {node_id!s}', node_id=node.id),
                 node.start_mark,
             )
-        if node.comment:
-            typ._yaml_add_comment(node.comment[:2])
-            if len(node.comment) > 2:
-                typ.yaml_end_comment_extend(node.comment[2], clear=True)
+        if self.loader and self.loader.comment_handling is None:
+            if node.comment:
+                typ._yaml_add_comment(node.comment[:2])
+                if len(node.comment) > 2:
+                    typ.yaml_end_comment_extend(node.comment[2], clear=True)
+        else:
+            # NEWCMNT
+            if node.comment:
+                nprintf('nc6', node.comment)
         if node.anchor:
             from ruamel.yaml.serializer import templated_id
 
@@ -1509,10 +1587,17 @@ class RoundTripConstructor(SafeConstructor):
             # construct but should be null
             value = self.construct_object(value_node, deep=deep)  # NOQA
             self.check_set_key(node, key_node, typ, key)
-            if key_node.comment:
-                typ._yaml_add_comment(key_node.comment, key=key)
-            if value_node.comment:
-                typ._yaml_add_comment(value_node.comment, value=key)
+            if self.loader and self.loader.comment_handling is None:
+                if key_node.comment:
+                    typ._yaml_add_comment(key_node.comment, key=key)
+                if value_node.comment:
+                    typ._yaml_add_comment(value_node.comment, value=key)
+            else:
+                # NEWCMNT
+                if key_node.comment:
+                    nprintf('nc7a', key_node.comment)
+                if value_node.comment:
+                    nprintf('nc7b', value_node.comment)
             typ.add(key)
 
     def construct_yaml_seq(self, node):
@@ -1563,10 +1648,15 @@ class RoundTripConstructor(SafeConstructor):
         elif node.flow_style is False:
             omap.fa.set_block_style()
         yield omap
-        if node.comment:
-            omap._yaml_add_comment(node.comment[:2])
-            if len(node.comment) > 2:
-                omap.yaml_end_comment_extend(node.comment[2], clear=True)
+        if self.loader and self.loader.comment_handling is None:
+            if node.comment:
+                omap._yaml_add_comment(node.comment[:2])
+                if len(node.comment) > 2:
+                    omap.yaml_end_comment_extend(node.comment[2], clear=True)
+        else:
+            # NEWCMNT
+            if node.comment:
+                nprintf('nc8', node.comment)
         if not isinstance(node, SequenceNode):
             raise ConstructorError(
                 'while constructing an ordered map',
@@ -1599,12 +1689,21 @@ class RoundTripConstructor(SafeConstructor):
             key = self.construct_object(key_node)
             assert key not in omap
             value = self.construct_object(value_node)
-            if key_node.comment:
-                omap._yaml_add_comment(key_node.comment, key=key)
-            if subnode.comment:
-                omap._yaml_add_comment(subnode.comment, key=key)
-            if value_node.comment:
-                omap._yaml_add_comment(value_node.comment, value=key)
+            if self.loader and self.loader.comment_handling is None:
+                if key_node.comment:
+                    omap._yaml_add_comment(key_node.comment, key=key)
+                if subnode.comment:
+                    omap._yaml_add_comment(subnode.comment, key=key)
+                if value_node.comment:
+                    omap._yaml_add_comment(value_node.comment, value=key)
+            else:
+                # NEWCMNT
+                if key_node.comment:
+                    nprintf('nc9a', key_node.comment)
+                if subnode.comment:
+                    nprintf('nc9b', subnode.comment)
+                if value_node.comment:
+                    nprintf('nc9c', value_node.comment)
             omap[key] = value
 
     def construct_yaml_set(self, node):
diff --git a/events.py b/events.py
index ef63dad..e0c7f68 100644
--- a/events.py
+++ b/events.py
@@ -7,6 +7,8 @@ from ruamel.yaml.compat import _F
 if False:  # MYPY
     from typing import Any, Dict, Optional, List  # NOQA
 
+SHOW_LINES = False
+
 
 def CommentCheck():
     # type: () -> None
@@ -37,6 +39,9 @@ class Event:
                     arguments.append(_F('{key!s}={v!r}', key=key, v=v))
             if self.comment not in [None, CommentCheck]:
                 arguments.append('comment={!r}'.format(self.comment))
+            if SHOW_LINES:
+                arguments.append('({}:{}/{}:{})'.format(self.start_mark.line, self.start_mark.column,
+                                                    self.end_mark.line, self.end_mark.column))
             arguments = ', '.join(arguments)
         else:
             attributes = [
diff --git a/main.py b/main.py
index 7d2f177..e19f28c 100644
--- a/main.py
+++ b/main.py
@@ -31,6 +31,7 @@ from ruamel.yaml.constructor import (
     RoundTripConstructor,
 )
 from ruamel.yaml.loader import Loader as UnsafeLoader
+from ruamel.yaml.comments import CommentedMap, CommentedSeq, C_PRE
 
 if False:  # MYPY
     from typing import List, Set, Dict, Union, Any, Callable, Optional, Text  # NOQA
@@ -81,6 +82,7 @@ class YAML:
         self.Scanner = None  # type: Any
         self.Serializer = None  # type: Any
         self.default_flow_style = None  # type: Any
+        self.comment_handling = None
         typ_found = 1
         setup_rt = False
         if 'rt' in self.typ:
@@ -107,6 +109,18 @@ class YAML:
             self.Parser = ruamel.yaml.parser.Parser if pure or CParser is None else CParser
             self.Composer = ruamel.yaml.composer.Composer
             self.Constructor = ruamel.yaml.constructor.Constructor
+        elif 'rtsc' in self.typ:
+            self.default_flow_style = False
+            # no optimized rt-dumper yet
+            self.Emitter = ruamel.yaml.emitter.Emitter
+            self.Serializer = ruamel.yaml.serializer.Serializer
+            self.Representer = ruamel.yaml.representer.RoundTripRepresenter
+            self.Scanner = ruamel.yaml.scanner.RoundTripScannerSC
+            # no optimized rt-parser yet
+            self.Parser = ruamel.yaml.parser.RoundTripParserSC
+            self.Composer = ruamel.yaml.composer.Composer
+            self.Constructor = ruamel.yaml.constructor.RoundTripConstructor
+            self.comment_handling = C_PRE
         else:
             setup_rt = True
             typ_found = 0
@@ -150,7 +164,6 @@ class YAML:
         self.scalar_after_indicator = None
         # [a, b: 1, c: {d: 2}]  vs. [a, {b: 1}, {c: {d: 2}}]
         self.brace_single_entry_mapping_in_flow_sequence = False
-        self.comment_handling = None
         for module in self.plug_ins:
             if getattr(module, 'typ', None) in self.typ:
                 typ_found += 1
@@ -711,8 +724,6 @@ class YAML:
     def map(self, **kw):
         # type: (Any) -> Any
         if 'rt' in self.typ:
-            from ruamel.yaml.comments import CommentedMap
-
             return CommentedMap(**kw)
         else:
             return dict(**kw)
@@ -720,8 +731,6 @@ class YAML:
     def seq(self, *args):
         # type: (Any) -> Any
         if 'rt' in self.typ:
-            from ruamel.yaml.comments import CommentedSeq
-
             return CommentedSeq(*args)
         else:
             return list(*args)
diff --git a/parser.py b/parser.py
index 279fc20..8e2f54e 100644
--- a/parser.py
+++ b/parser.py
@@ -44,7 +44,7 @@
 #
 # FIRST sets:
 #
-# stream: { STREAM-START }
+# stream: { STREAM-START <}
 # explicit_document: { DIRECTIVE DOCUMENT-START }
 # implicit_document: FIRST(block_node)
 # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START
@@ -78,6 +78,8 @@ from ruamel.yaml.error import MarkedYAMLError
 from ruamel.yaml.tokens import *  # NOQA
 from ruamel.yaml.events import *  # NOQA
 from ruamel.yaml.scanner import Scanner, RoundTripScanner, ScannerError  # NOQA
+from ruamel.yaml.scanner import BlankLineComment
+from ruamel.yaml.comments import C_PRE, C_POST, C_SPLIT_ON_FIRST_BLANK
 from ruamel.yaml.compat import _F, nprint, nprintf  # NOQA
 
 if False:  # MYPY
@@ -86,6 +88,10 @@ if False:  # MYPY
 __all__ = ['Parser', 'RoundTripParser', 'ParserError']
 
 
+def xprintf(*args, **kw):
+    return nprintf(*args, **kw)
+    pass
+
 class ParserError(MarkedYAMLError):
     pass
 
@@ -106,7 +112,7 @@ class Parser:
     def reset_parser(self):
         # type: () -> None
         # Reset the state attributes (to clear self-references)
-        self.current_event = None
+        self.current_event = self.last_event = None
         self.tag_handles = {}  # type: Dict[Any, Any]
         self.states = []  # type: List[Any]
         self.marks = []  # type: List[Any]
@@ -158,7 +164,10 @@ class Parser:
         if self.current_event is None:
             if self.state:
                 self.current_event = self.state()
-        value = self.current_event
+        # assert self.current_event is not None
+        # if self.current_event.end_mark.line != self.peek_event().start_mark.line:
+        xprintf('get_event', repr(self.current_event), self.peek_event().start_mark.line)
+        self.last_event = value = self.current_event
         self.current_event = None
         return value
 
@@ -204,8 +213,6 @@ class Parser:
             self.scanner.get_token()
         # Parse an explicit document.
         if not self.scanner.check_token(StreamEndToken):
-            token = self.scanner.peek_token()
-            start_mark = token.start_mark
             version, tags = self.process_directives()
             if not self.scanner.check_token(DocumentStartToken):
                 raise ParserError(
@@ -218,6 +225,7 @@ class Parser:
                     self.scanner.peek_token().start_mark,
                 )
             token = self.scanner.get_token()
+            start_mark = token.start_mark
             end_mark = token.end_mark
             # if self.loader is not None and \
             #    end_mark.line != self.scanner.peek_token().start_mark.line:
@@ -401,9 +409,13 @@ class Parser:
         if indentless_sequence and self.scanner.check_token(BlockEntryToken):
             comment = None
             pt = self.scanner.peek_token()
-            if pt.comment and pt.comment[0]:
-                comment = [pt.comment[0], []]
-                pt.comment[0] = None
+            if self.loader and self.loader.comment_handling is None:
+                if pt.comment and pt.comment[0]:
+                    comment = [pt.comment[0], []]
+                    pt.comment[0] = None
+            elif self.loader:
+                if pt.comment:
+                    comment = pt.comment
             end_mark = self.scanner.peek_token().end_mark
             event = SequenceStartEvent(
                 anchor, tag, implicit, start_mark, end_mark, flow_style=False, comment=comment
@@ -556,7 +568,14 @@ class Parser:
                 self.state = self.parse_indentless_sequence_entry
                 return self.process_empty_scalar(token.end_mark)
         token = self.scanner.peek_token()
-        event = SequenceEndEvent(token.start_mark, token.start_mark, comment=token.comment)
+        c = None
+        if self.loader and self.loader.comment_handling is None:
+            c = token.comment
+            start_mark = token.start_mark
+        else:
+            start_mark = self.last_event.end_mark
+            c = self.distribute_comment(token.comment, start_mark.line)
+        event = SequenceEndEvent(start_mark, start_mark, comment=c)
         self.state = self.states.pop()
         return event
 
@@ -783,10 +802,8 @@ class Parser:
         return ScalarEvent(None, None, (True, False), "", mark, mark, comment=comment)
 
     def move_token_comment(self, token, nt=None, empty=False):
-        if getattr(self.loader, 'comment_handling', None) is None:  # pre 0.18
-            token.move_old_comment(self.scanner.peek_token() if nt is None else nt, empty=empty)
-        else:
-            token.move_new_comment(self.scanner.peek_token() if nt is None else nt, empty=empty)
+        pass
+
 
 class RoundTripParser(Parser):
     """roundtrip is a safe loader, that wants to see the unmangled tag"""
@@ -810,3 +827,52 @@ class RoundTripParser(Parser):
         ):
             return Parser.transform_tag(self, handle, suffix)
         return handle + suffix
+
+    def move_token_comment(self, token, nt=None, empty=False):
+        token.move_old_comment(self.scanner.peek_token() if nt is None else nt, empty=empty)
+
+
+class RoundTripParserSC(RoundTripParser):
+    """roundtrip is a safe loader, that wants to see the unmangled tag"""
+
+    # some of the differences are based on the superclass testing if self.loader.comment_handling is not None
+
+    def move_token_comment(self, token, nt=None, empty=False):
+        token.move_new_comment(self.scanner.peek_token() if nt is None else nt, empty=empty)
+
+    def distribute_comment(self, comment, line):
+        # ToDo, look at indentation of the comment to determine attachment
+        if comment is None:
+            return None
+        if not comment[0]:
+            return None
+        if  comment[0][0] != line + 1:
+            nprintf('>>>dcxxx', comment, line, typ)
+        assert comment[0][0] == line + 1
+        #if comment[0] - line > 1:
+        #    return
+        typ = self.loader.comment_handling & 0b11
+        # nprintf('>>>dca', comment, line, typ)
+        if typ == C_POST:
+            return None
+        if typ == C_PRE:
+            c = [None, None, comment[0]]
+            comment[0] = None
+            return c
+        # nprintf('>>>dcb', comment[0])
+        for idx, cmntidx in enumerate(comment[0]):
+            # nprintf('>>>dcb', cmntidx)
+            if isinstance(self.scanner.comments[cmntidx], BlankLineComment):
+                break
+        else:
+            return None  # no space found
+        if idx == 0:
+            return None  # first line was blank
+        # nprintf('>>>dcc', idx)
+        if typ == C_SPLIT_ON_FIRST_BLANK:
+            c = [None, None, comment[0][:idx]]
+            comment[0] = comment[0][idx:]
+            return c
+        raise NotImplementedError  # reserved
+
+
diff --git a/scanner.py b/scanner.py
index f98da00..f9e6052 100644
--- a/scanner.py
+++ b/scanner.py
@@ -44,6 +44,10 @@ _THE_END = '\n\0\r\x85\u2028\u2029'
 _THE_END_SPACE_TAB = ' \n\0\t\r\x85\u2028\u2029'
 _SPACE_TAB = ' \t'
 
+def xprintf(*args, **kw):
+    return nprintf(*args, **kw)
+    pass
+
 
 class ScannerError(MarkedYAMLError):
     pass
@@ -167,7 +171,7 @@ class Scanner:
         # Check if the next token is one of the given types.
         while self.need_more_tokens():
             self.fetch_more_tokens()
-        if bool(self.tokens):
+        if len(self.tokens) > 0:
             if not choices:
                 return True
             for choice in choices:
@@ -180,7 +184,7 @@ class Scanner:
         # Return the next token, but do not delete if from the queue.
         while self.need_more_tokens():
             self.fetch_more_tokens()
-        if bool(self.tokens):
+        if len(self.tokens) > 0:
             return self.tokens[0]
 
     def get_token(self):
@@ -188,7 +192,7 @@ class Scanner:
         # Return the next token.
         while self.need_more_tokens():
             self.fetch_more_tokens()
-        if bool(self.tokens):
+        if len(self.tokens) > 0:
             self.tokens_taken += 1
             return self.tokens.pop(0)
 
@@ -198,7 +202,7 @@ class Scanner:
         # type: () -> bool
         if self.done:
             return False
-        if not self.tokens:
+        if len(self.tokens) == 0:
             return True
         # The current token may be a potential simple key, so we
         # need to look further.
@@ -1231,21 +1235,33 @@ class Scanner:
 
         # We are done.
         token = ScalarToken("".join(chunks), False, start_mark, end_mark, style)
-        if block_scalar_comment is not None:
-            token.add_pre_comments([block_scalar_comment])
+        if self.loader is not None:
+            comment_handler = getattr(self.loader, 'comment_handling', False)
+            if comment_handler is None:
+                if block_scalar_comment is not None:
+                    token.add_pre_comments([block_scalar_comment])
         if len(trailing) > 0:
-            # nprint('trailing 1', trailing)  # XXXXX
             # Eat whitespaces and comments until we reach the next token.
+            if self.loader is not None:
+                comment_handler = getattr(self.loader, 'comment_handling', None)
+                if comment_handler is not None:
+                    line = end_mark.line - len(trailing)
+                    for x in trailing:
+                        assert x[-1] == '\n'
+                        self.comments.add_blank_line(x, 0, line)
+                        line += 1
             comment = self.scan_to_next_token()
             while comment:
                 trailing.append(' ' * comment[1].column + comment[0])
                 comment = self.scan_to_next_token()
-
-            # Keep track of the trailing whitespace and following comments
-            # as a comment token, if isn't all included in the actual value.
-            comment_end_mark = self.reader.get_mark()
-            comment = CommentToken("".join(trailing), end_mark, comment_end_mark)
-            token.add_post_comment(comment)
+            if self.loader is not None:
+                comment_handler = getattr(self.loader, 'comment_handling', False)
+                if comment_handler is None:
+                    # Keep track of the trailing whitespace and following comments
+                    # as a comment token, if isn't all included in the actual value.
+                    comment_end_mark = self.reader.get_mark()
+                    comment = CommentToken("".join(trailing), end_mark, comment_end_mark)
+                    token.add_post_comment(comment)
         return token
 
     def scan_block_scalar_indicators(self, start_mark):
@@ -1590,10 +1606,21 @@ class Scanner:
                 break
 
         token = ScalarToken("".join(chunks), True, start_mark, end_mark)
-        if spaces and spaces[0] == '\n':
-            # Create a comment token to preserve the trailing line breaks.
-            comment = CommentToken("".join(spaces) + '\n', start_mark, end_mark)
-            token.add_post_comment(comment)
+        # getattr provides True so C type loader, which cannot handle comment, will not make CommentToken
+        if self.loader is not None:
+            comment_handler = getattr(self.loader, 'comment_handling', False)
+            if comment_handler is None:
+                if spaces and spaces[0] == '\n':
+                    # Create a comment token to preserve the trailing line breaks.
+                    comment = CommentToken("".join(spaces) + '\n', start_mark, end_mark)
+                    token.add_post_comment(comment)
+            elif comment_handler is not False:
+                line = start_mark.line + 1
+                for ch in spaces:
+                    if ch == '\n':
+                        self.comments.add_blank_line('\n', 0, line)
+                        line += 1
+
         return token
 
     def scan_plain_spaces(self, indent, start_mark):
@@ -1764,7 +1791,7 @@ class RoundTripScanner(Scanner):
         while self.need_more_tokens():
             self.fetch_more_tokens()
         self._gather_comments()
-        if bool(self.tokens):
+        if len(self.tokens) > 0:
             if not choices:
                 return True
             for choice in choices:
@@ -1778,13 +1805,13 @@ class RoundTripScanner(Scanner):
         while self.need_more_tokens():
             self.fetch_more_tokens()
         self._gather_comments()
-        if bool(self.tokens):
+        if len(self.tokens) > 0:
             return self.tokens[0]
         return None
 
     def _gather_comments(self):
         # type: () -> Any
-        """combine multiple comment lines"""
+        """combine multiple comment lines and assign to next non-comment-token"""
         comments = []  # type: List[Any]
         if not self.tokens:
             return comments
@@ -1813,7 +1840,7 @@ class RoundTripScanner(Scanner):
         while self.need_more_tokens():
             self.fetch_more_tokens()
         self._gather_comments()
-        if bool(self.tokens):
+        if len(self.tokens) > 0:
             # nprint('tk', self.tokens)
             # only add post comment to single line tokens:
             # scalar, value token. FlowXEndToken, otherwise
@@ -1925,7 +1952,7 @@ class RoundTripScanner(Scanner):
                 if not self.flow_level:
                     self.allow_simple_key = True
                 return comment, start_mark, end_mark
-            if bool(self.scan_line_break()):
+            if self.scan_line_break() != '':
                 start_mark = self.reader.get_mark()
                 if not self.flow_level:
                     self.allow_simple_key = True
@@ -1973,3 +2000,377 @@ class RoundTripScanner(Scanner):
     def scan_block_scalar(self, style, rt=True):
         # type: (Any, Optional[bool]) -> Any
         return Scanner.scan_block_scalar(self, style, rt=rt)
+
+
+# commenthandling 2021, differentiatiation not needed 
+
+VALUECMNT = 0
+KEYCMNT = 0  # 1
+#TAGCMNT = 2
+#ANCHORCMNT = 3
+
+
+class CommentBase:
+    __slots__ = ('value', 'line', 'column', 'used', 'function', 'fline', 'ufun', 'uline')
+    def __init__(self, value, line, column):
+        self.value = value
+        self.line = line
+        self.column = column
+        self.used = ' '
+        info = inspect.getframeinfo(inspect.stack()[3][0])
+        self.function = info.function
+        self.fline = info.lineno
+        self.ufun = None
+        self.uline = None
+
+    def set_used(self, v='+'):
+        self.used = v
+        info = inspect.getframeinfo(inspect.stack()[1][0])
+        self.ufun = info.function
+        self.uline = info.lineno
+
+    def set_assigned(self):
+        self.used = '|'
+
+    def __str__(self):
+        return _F('{value}', value=self.value)
+
+    def __repr__(self):
+        return _F('{value!r}', value=self.value)
+
+    def info(self):
+        return _F('{name}{used} {line:2}:{column:<2} "{value:40s} {function}:{fline} {ufun}:{uline}',
+                  name=self.name, line=self.line, column=self.column, value=self.value + '"', used=self.used,
+                  function=self.function, fline=self.fline, ufun=self.ufun, uline=self.uline)
+
+
+class EOLComment(CommentBase):
+    name = 'EOLC'
+
+    def __init__(self, value, line, column):
+        super().__init__(value, line, column)
+
+
+class FullLineComment(CommentBase):
+    name = 'FULL'
+
+    def __init__(self, value, line, column):
+        super().__init__(value, line, column)
+
+
+class BlankLineComment(CommentBase):
+    name = 'BLNK'
+
+    def __init__(self, value, line, column):
+        super().__init__(value, line, column)
+
+
+class ScannedComments:
+    def __init__(self):
+        self.comments = {}
+        self.unused = []
+
+    def add_eol_comment(self, comment, column, line):
+        info = inspect.getframeinfo(inspect.stack()[1][0])
+        if comment.count('\n') == 1:
+            assert comment[-1] == '\n'
+        else:
+            assert '\n' not in comment
+        self.comments[line] = retval = EOLComment(comment[:-1], line, column)
+        self.unused.append(line)
+        return retval
+
+    def add_blank_line(self, comment, column, line):
+        info = inspect.getframeinfo(inspect.stack()[1][0])
+        assert comment.count('\n') == 1 and comment[-1] == '\n'
+        assert line not in self.comments
+        self.comments[line] = retval = BlankLineComment(comment[:-1], line, column)
+        self.unused.append(line)
+        return retval
+
+    def add_full_line_comment(self, comment, column, line):
+        info = inspect.getframeinfo(inspect.stack()[1][0])
+        assert comment.count('\n') == 1 and comment[-1] == '\n'
+        #if comment.startswith('# C12'):
+        #    raise
+        # this raises in line 2127 fro 330
+        self.comments[line] = retval = FullLineComment(comment[:-1], line, column)
+        self.unused.append(line)
+        return retval
+
+    def __getitem__(self, idx):
+        return self.comments[idx]
+
+    def __str__(self):
+        return 'ParsedComments:\n  '  + \
+        '\n  '.join((_F('{lineno:2} {x}', lineno=lineno, x=x.info()) for lineno, x in self.comments.items())) + '\n'
+
+    def last(self):
+        lineno, x = list(self.comments.items())[-1]
+        return _F('{lineno:2} {x}\n', lineno=lineno, x=x.info()) 
+
+    def any_unprocessed(self):
+        # ToDo: might want to differentiate based on lineno
+        return len(self.unused) > 0
+        #for lno, comment in reversed(self.comments.items()):
+        #    if comment.used == ' ':
+        #        return True
+        #return False
+
+    def unprocessed(self, use=False):
+        while len(self.unused) > 0:
+            first = self.unused.pop(0) if use else self.unused[0]
+            info = inspect.getframeinfo(inspect.stack()[1][0])
+            xprintf('using', first, self.comments[first].value, info.function, info.lineno)
+            yield first, self.comments[first]
+            if use:
+                self.comments[first].set_used()
+
+    def assign_pre(self, token):
+        token_line = token.start_mark.line
+        info = inspect.getframeinfo(inspect.stack()[1][0])
+        xprintf('assign_pre', token_line, self.unused, info.function, info.lineno)
+        gobbled = False
+        while self.unused and self.unused[0] < token_line:
+            gobled = True
+            first = self.unused.pop(0)
+            xprintf('assign_pre < ', first)
+            self.comments[first].set_used()
+            token.add_comment_pre(first)
+        return gobbled
+
+    def assign_eol(self, tokens):
+        try:
+            comment_line = self.unused[0]
+        except IndexError:
+            return
+        if not isinstance(self.comments[comment_line], EOLComment):
+            return
+        idx = 1
+        while tokens[-idx].start_mark.line > comment_line or isinstance(tokens[-idx], ValueToken):
+            idx += 1
+        xprintf('idx1', idx)
+        if len(tokens) > idx  and isinstance(tokens[-idx], ScalarToken) and isinstance(tokens[-(idx+1)], ScalarToken):
+            return
+        try:
+            if isinstance(tokens[-idx], ScalarToken) and isinstance(tokens[-(idx+1)], KeyToken):
+                try:
+                    eol_idx = self.unused.pop(0)
+                    self.comments[eol_idx].set_used()
+                    xprintf('>>>>>a', idx, eol_idx, KEYCMNT)
+                    tokens[-idx].add_comment_eol(eol_idx, KEYCMNT)
+                except IndexError:
+                    raise NotImplementedError
+                return
+        except IndexError:
+            xprintf('IndexError1')
+            pass
+        try:
+            if isinstance(tokens[-idx], ScalarToken) and isinstance(tokens[-(idx+1)], (ValueToken, BlockEntryToken)):
+                try:
+                    eol_idx = self.unused.pop(0)
+                    self.comments[eol_idx].set_used()
+                    tokens[-idx].add_comment_eol(eol_idx, VALUECMNT)
+                except IndexError:
+                    raise NotImplementedError
+                return
+        except IndexError:
+            xprintf('IndexError2')
+            pass
+        for t in tokens:
+            xprintf('tt-', t)
+        xprintf('not implemented EOL', type(tokens[-idx]))
+        import sys; sys.exit(0)
+
+    def assign_post(self, token):
+        token_line = token.start_mark.line
+        info = inspect.getframeinfo(inspect.stack()[1][0])
+        xprintf('assign_post', token_line, self.unused, info.function, info.lineno)
+        gobbled = False
+        while self.unused and self.unused[0] < token_line:
+            gobled = True
+            first = self.unused.pop(0)
+            xprintf('assign_post < ', first)
+            self.comments[first].set_used()
+            token.add_comment_post(first)
+        return gobbled
+
+    def str_unprocessed(self):
+        return ''.join((_F('  {ind:2} {x}\n', ind=ind, x=x.info()) for ind, x in self.comments.items() if x.used == ' '))
+
+
+class RoundTripScannerSC(Scanner):  # RoundTripScanner Split Comments
+    def __init__(self, *arg, **kw):
+        super().__init__(*arg, **kw)
+        assert self.loader is not None
+        # comments isinitialised on .need_more_tokens and persist on self.loader.parsed_comments
+        # 
+        self.comments = None 
+
+    def get_token(self):
+        # type: () -> Any
+        # Return the next token.
+        while self.need_more_tokens():
+            self.fetch_more_tokens()
+        if len(self.tokens) > 0:
+            if isinstance(self.tokens[0], BlockEndToken):
+                self.comments.assign_post(self.tokens[0])
+            else:
+                self.comments.assign_pre(self.tokens[0])
+            self.tokens_taken += 1
+            return self.tokens.pop(0)
+
+    def need_more_tokens(self):
+        if self.comments is None:
+            self.loader.parsed_comments = self.comments = ScannedComments()
+        if self.done:
+            return False
+        if len(self.tokens) == 0:
+            return True
+        # The current token may be a potential simple key, so we
+        # need to look further.
+        self.stale_possible_simple_keys()
+        if self.next_possible_simple_key() == self.tokens_taken:
+            return True
+        if len(self.tokens) < 2:
+            return True
+        if self.tokens[0].start_mark.line == self.tokens[-1].start_mark.line:
+            return True
+        if True:
+            xprintf('-x--', len(self.tokens))
+            for t in self.tokens:
+                xprintf(t)
+            #xprintf(self.comments.last())
+            xprintf(self.comments.str_unprocessed())
+        self.comments.assign_pre(self.tokens[0])
+        self.comments.assign_eol(self.tokens)
+        return False
+
+    def scan_to_next_token(self):
+        srp = self.reader.peek
+        srf = self.reader.forward
+        if self.reader.index == 0 and srp() == '\uFEFF':
+            srf()
+        start_mark = self.reader.get_mark()
+        # xprintf('current_mark', start_mark.line, start_mark.column)
+        found = False
+        idx = 0
+        while not found:
+            while srp() == ' ':
+                srf()
+            ch = srp()
+            if ch == '#':
+                comment_start_mark = self.reader.get_mark()
+                comment = ch
+                srf() # skipt the '#'
+                while ch not in _THE_END:
+                    ch = srp()
+                    if ch == '\0':  # don't gobble the end-of-stream character
+                        # but add an explicit newline as "YAML processors should terminate
+                        # the stream with an explicit line break
+                        # https://yaml.org/spec/1.2/spec.html#id2780069
+                        comment += '\n'
+                        break
+                    comment += ch
+                    srf()
+                # we have a comment
+                if start_mark.column == 0:
+                    self.comments.add_full_line_comment(comment, comment_start_mark.column, comment_start_mark.line)
+                else:
+                    self.comments.add_eol_comment(comment, comment_start_mark.column, comment_start_mark.line)
+                    comment = ""
+                # gather any blank lines or full line comments following the comment as well
+                self.scan_empty_or_full_line_comments()
+                if not self.flow_level:
+                    self.allow_simple_key = True
+                return
+            if bool(self.scan_line_break()):
+                # start_mark = self.reader.get_mark()
+                if not self.flow_level:
+                    self.allow_simple_key = True
+                self.scan_empty_or_full_line_comments()
+                return None
+                ch = srp()
+                if ch == '\n':  # empty toplevel lines
+                    start_mark = self.reader.get_mark()
+                    comment = ""
+                    while ch:
+                        ch = self.scan_line_break(empty_line=True)
+                        comment += ch
+                    if srp() == '#':
+                        # empty line followed by indented real comment
+                        comment = comment.rsplit('\n', 1)[0] + '\n'
+                    end_mark = self.reader.get_mark()
+                    return None
+            else:
+                found = True
+        return None
+
+    def scan_empty_or_full_line_comments(self):
+        blmark = self.reader.get_mark()
+        assert blmark.column == 0
+        blanks = ""
+        comment = None
+        mark = None
+        ch = self.reader.peek()
+        while True:
+            # nprint('ch', repr(ch), self.reader.get_mark().column)
+            if ch in '\r\n\x85\u2028\u2029':
+                if self.reader.prefix(2) == '\r\n':
+                    self.reader.forward(2)
+                else:
+                    self.reader.forward()
+                if comment is not None:
+                    comment += '\n'
+                    self.comments.add_full_line_comment(comment, mark.column, mark.line)
+                    comment = None
+                else:
+                    blanks += '\n'
+                    self.comments.add_blank_line(blanks, blmark.column, blmark.line)
+                blanks = ""
+                blmark = self.reader.get_mark()
+                ch = self.reader.peek()
+                continue
+            if comment is None:
+                if ch in ' \t':
+                    blanks += ch
+                elif ch == '#':
+                    mark = self.reader.get_mark()
+                    comment = '#'
+                else:
+                    # print('breaking on', repr(ch))
+                    break
+            else:
+                comment += ch
+            self.reader.forward()
+            ch = self.reader.peek()
+
+    def scan_block_scalar_ignored_line(self, start_mark):
+        # type: (Any) -> Any
+        # See the specification for details.
+        srp = self.reader.peek
+        srf = self.reader.forward
+        prefix = ''
+        comment = None
+        while srp() == ' ':
+            prefix += srp()
+            srf()
+        if srp() == '#':
+            comment = ''
+            mark = self.reader.get_mark()
+            while srp() not in _THE_END:
+                comment += srp()
+                srf()
+            comment += '\n'
+        ch = srp()
+        if ch not in _THE_END:
+            raise ScannerError(
+                'while scanning a block scalar',
+                start_mark,
+                _F('expected a comment or a line break, but found {ch!r}', ch=ch),
+                self.reader.get_mark(),
+            )
+        if comment is not None:
+            self.comments.add_eol_comment(comment, mark.column, mark.line)
+        self.scan_line_break()
+        return None
diff --git a/tokens.py b/tokens.py
index 490866b..01cae1f 100644
--- a/tokens.py
+++ b/tokens.py
@@ -1,6 +1,6 @@
 # coding: utf-8
 
-from ruamel.yaml.compat import _F
+from ruamel.yaml.compat import _F, nprintf
 
 if False:  # MYPY
     from typing import Text, Any, Dict, Optional, List  # NOQA
@@ -89,13 +89,17 @@ class Token:
                 self._comment[0] = []
         self._comment[0].append(comment)
 
-    def add_comment_eol(self, comment):
+    def add_comment_eol(self, comment, comment_type):
         if not hasattr(self, '_comment'):
             self._comment = [None, None, None]
         else:
             assert len(self._comment) == 3
             assert self._comment[1] is None
-        self._comment[1] = comment
+        if self.comment[1] is None:
+            self._comment[1] = []
+        self._comment[1].extend([None] * (comment_type + 1 - len(self.comment[1])))
+        # nprintf('commy', self.comment, comment_type)
+        self._comment[1][comment_type] = comment
 
     def add_comment_post(self, comment):
         if not hasattr(self, '_comment'):
@@ -184,11 +188,9 @@ class Token:
             target._comment = c
             # nprint('mco2:', self, target, target.comment, empty)
             return self
-        return
-        raise NotImplemtedError
         # if self and target have both pre, eol or post comments, something seems wrong
         for idx in range(3):
-            if c[idx] and tc[idx]:
+            if c[idx] is not None and tc[idx] is not None:
                 raise NotImplementedError(_F('overlap in comment {c!r} {tc!r}', c=c, tc=tc))
         # move the comment parts
         for idx in range(3):
-- 
cgit v1.2.1