summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthon van der Neut <anthon@mnt.org>2021-05-06 08:36:49 +0200
committerAnthon van der Neut <anthon@mnt.org>2021-05-06 08:36:49 +0200
commit17b35c376fd0fc9a94ba0adfdbf5bf63a6177dc9 (patch)
tree49a76a7328cbc20efde9603d5373ecf003adbbc6
parent3d77f16e00124b74e150625396617b41e41da014 (diff)
downloadruamel.yaml-17b35c376fd0fc9a94ba0adfdbf5bf63a6177dc9.tar.gz
* extend EOL token handling
* extending comment
-rw-r--r--comments.py101
-rw-r--r--constructor.py187
-rw-r--r--events.py5
-rw-r--r--main.py19
-rw-r--r--parser.py92
-rw-r--r--scanner.py445
-rw-r--r--tokens.py14
7 files changed, 756 insertions, 107 deletions
diff --git a/comments.py b/comments.py
index d133299..f49c183 100644
--- a/comments.py
+++ b/comments.py
@@ -11,7 +11,7 @@ import copy
from ruamel.yaml.compat import ordereddict # type: ignore
-from ruamel.yaml.compat import MutableSliceableSequence, _F
+from ruamel.yaml.compat import MutableSliceableSequence, _F, nprintf
from ruamel.yaml.scalarstring import ScalarString
from ruamel.yaml.anchor import Anchor
@@ -35,13 +35,44 @@ __all__ = ['CommentedSeq', 'CommentedKeySeq',
# bits 0 and 1 are combined, you can choose only one
C_POST = 0b00
C_PRE = 0b01
-C_SPLIT_ON_FIRST_BLANK = 0b10 # as C_POST, but if blank line then C_PRE everything before first
- # blank goes to POST even if no following real FLC
+C_SPLIT_ON_FIRST_BLANK = 0b10 # as C_POST, but if blank line then C_PRE all lines before first
+ # blank goes to POST even if no following real FLC (first blank -> first of post)
# 0b11 -> reserved for future use
C_BLANK_LINE_PRESERVE_SPACE = 0b100
# C_EOL_PRESERVE_SPACE2 = 0b1000
+class IDX:
+ # temporary auto increment, so rearranging is easier
+ def __init__(self):
+ self._idx = 0
+
+ def __call__(self):
+ x = self._idx
+ self._idx += 1
+ return x
+
+ def __str__(self):
+ return str(self._idx)
+
+cidx = IDX()
+
+# more or less in order of subjective expected likelyhood
+# the _POST and _PRE ones are lists themselves
+C_VALUE_EOL = C_ELEM_EOL = cidx()
+C_KEY_EOL = cidx()
+C_KEY_PRE = C_ELEM_PRE = cidx() # not this is not value
+C_VALUE_POST = C_ELEM_POST = cidx() # not this is not value
+C_VALUE_PRE = cidx()
+C_KEY_POST = cidx()
+C_TAG_EOL = cidx()
+C_TAG_POST = cidx()
+C_TAG_PRE = cidx()
+C_ANCHOR_EOL = cidx()
+C_ANCHOR_POST = cidx()
+C_ANCHOR_PRE = cidx()
+
+
comment_attrib = '_yaml_comment'
format_attrib = '_yaml_format'
line_col_attrib = '_yaml_line_col'
@@ -52,31 +83,32 @@ tag_attrib = '_yaml_tag'
class Comment:
# using sys.getsize tested the Comment objects, __slots__ makes them bigger
# and adding self.end did not matter
- __slots__ = 'comment', '_items', '_end', '_start'
+ __slots__ = 'comment', '_items', '_post', '_pre'
attrib = comment_attrib
- def __init__(self):
+ def __init__(self, old=True):
# type: () -> None
+ self._pre = None if old else []
self.comment = None # [post, [pre]]
# map key (mapping/omap/dict) or index (sequence/list) to a list of
# dict: post_key, pre_key, post_value, pre_value
# list: pre item, post item
self._items = {} # type: Dict[Any, Any]
# self._start = [] # should not put these on first item
- self._end = [] # type: List[Any] # end of document comments
+ self._post = [] # type: List[Any] # end of document comments
def __str__(self):
# type: () -> str
- if bool(self._end):
+ if bool(self._post):
end = ',\n end=' + str(self._end)
else:
end = ""
return 'Comment(comment={0},\n items={1}{2})'.format(self.comment, self._items, end)
- def __repr__(self):
+ def _old__repr__(self):
# type: () -> str
- if bool(self._end):
- end = ',\n end=' + str(self._end)
+ if bool(self._post):
+ end = ',\n end=' + str(self._post)
else:
end = ""
try:
@@ -90,6 +122,25 @@ class Comment:
it = '\n ' + it + ' '
return 'Comment(\n start={},\n items={{{}}}{})'.format(self.comment, it, end)
+ def __repr__(self):
+ if self._pre is None:
+ return self._old__repr__()
+ if bool(self._post):
+ end = ',\n end=' + repr(self._post)
+ else:
+ end = ""
+ try:
+ ln = max([len(str(k)) for k in self._items]) + 1
+ except ValueError:
+ ln = ''
+ it = ' '.join(
+ ['{:{}} {}\n'.format(str(k) + ':', ln, v) for k, v in self._items.items()]
+ )
+ if it:
+ it = '\n ' + it + ' '
+ return 'Comment(\n pre={},\n items={{{}}}{})'.format(self.pre, it, end)
+
+
@property
def items(self):
# type: () -> Any
@@ -98,22 +149,38 @@ class Comment:
@property
def end(self):
# type: () -> Any
- return self._end
+ return self._post
@end.setter
def end(self, value):
# type: (Any) -> None
- self._end = value
+ self._post = value
@property
- def start(self):
+ def pre(self):
# type: () -> Any
- return self._start
+ return self._pre
- @start.setter
- def start(self, value):
+ @pre.setter
+ def pre(self, value):
# type: (Any) -> None
- self._start = value
+ self._pre = value
+
+ def get(self, item, pos):
+ x = self._items.get(item)
+ if x is None or len(x) < pos:
+ return None
+ return x[pos] # can be None
+
+ def set(self, item, pos, value):
+ x = self._items.get(item)
+ if x is None:
+ self._items[item] = x = [None] * (pos + 1)
+ else:
+ while len(x) <= pos:
+ x.append(None)
+ assert x[pos] is None
+ x[pos] = value
def __contains__(self, x):
# test if a substring is in any of the attached comments
diff --git a/constructor.py b/constructor.py
index 7b7426f..199129e 100644
--- a/constructor.py
+++ b/constructor.py
@@ -21,7 +21,10 @@ from ruamel.yaml.compat import ordereddict # type: ignore
from ruamel.yaml.comments import * # NOQA
from ruamel.yaml.comments import (CommentedMap, CommentedOrderedMap, CommentedSet,
CommentedKeySeq, CommentedSeq, TaggedScalar,
- CommentedKeyMap)
+ CommentedKeyMap,
+ C_KEY_PRE, C_KEY_EOL, C_KEY_POST,
+ C_VALUE_PRE, C_VALUE_EOL, C_VALUE_POST,
+ )
from ruamel.yaml.scalarstring import (SingleQuotedScalarString, DoubleQuotedScalarString,
LiteralScalarString, FoldedScalarString,
PlainScalarString, ScalarString,)
@@ -92,6 +95,14 @@ class BaseConstructor:
return self.loader.resolver
return self.loader._resolver
+ @property
+ def scanner(self):
+ # type: () -> Any
+ # needed to get to the expanded comments
+ if hasattr(self.loader, 'typ'):
+ return self.loader.scanner
+ return self.loader._scanner
+
def check_data(self):
# type: () -> Any
# If there are more documents available?
@@ -1056,6 +1067,23 @@ class RoundTripConstructor(SafeConstructor):
as well as on the items
"""
+ def comment(self, idx):
+ assert self.loader.comment_handling is not None
+ x = self.scanner.comments[idx]
+ x.set_assigned()
+ return x
+
+ def comments(self, list_of_comments, idx=None):
+ # hand in the comment and optional pre, eol, post segment
+ if list_of_comments is None:
+ return []
+ if idx is not None:
+ if list_of_comments[idx] is None:
+ return []
+ list_of_comments = list_of_comments[idx]
+ for x in list_of_comments:
+ yield self.comment(x)
+
def construct_scalar(self, node):
# type: (Any) -> Any
if not isinstance(node, ScalarNode):
@@ -1068,8 +1096,14 @@ class RoundTripConstructor(SafeConstructor):
if node.style == '|' and isinstance(node.value, str):
lss = LiteralScalarString(node.value, anchor=node.anchor)
- if node.comment and node.comment[1]:
- lss.comment = node.comment[1][0] # type: ignore
+ if self.loader and self.loader.comment_handling is None:
+ if node.comment and node.comment[1]:
+ lss.comment = node.comment[1][0] # type: ignore
+ else:
+ # NEWCMNT
+ if node.comment is not None and node.comment[1]:
+ # nprintf('>>>>nc1', node.comment)
+ lss.comment = self.comment(node.comment[1][0]) # EOL comment after |
return lss
if node.style == '>' and isinstance(node.value, str):
fold_positions = [] # type: List[int]
@@ -1080,8 +1114,14 @@ class RoundTripConstructor(SafeConstructor):
break
fold_positions.append(idx - len(fold_positions))
fss = FoldedScalarString(node.value.replace('\a', ''), anchor=node.anchor)
- if node.comment and node.comment[1]:
- fss.comment = node.comment[1][0] # type: ignore
+ if self.loader and self.loader.comment_handling is None:
+ if node.comment and node.comment[1]:
+ fss.comment = node.comment[1][0] # type: ignore
+ else:
+ # NEWCMNT
+ if node.comment is not None and node.comment[1]:
+ # nprintf('>>>>nc2', node.comment)
+ lss.comment = self.comment(node.comment[1][0]) # EOL comment after >
if fold_positions:
fss.fold_pos = fold_positions # type: ignore
return fss
@@ -1279,12 +1319,17 @@ class RoundTripConstructor(SafeConstructor):
node.start_mark,
)
ret_val = []
- if node.comment:
- seqtyp._yaml_add_comment(node.comment[:2])
- if len(node.comment) > 2:
- # this happens e.g. if you have a sequence element that is a flow-style mapping
- # and that has no EOL comment but a following commentline or empty line
- seqtyp.yaml_end_comment_extend(node.comment[2], clear=True)
+ if self.loader and self.loader.comment_handling is None:
+ if node.comment:
+ seqtyp._yaml_add_comment(node.comment[:2])
+ if len(node.comment) > 2:
+ # this happens e.g. if you have a sequence element that is a flow-style mapping
+ # and that has no EOL comment but a following commentline or empty line
+ seqtyp.yaml_end_comment_extend(node.comment[2], clear=True)
+ else:
+ # NEWCMNT
+ if node.comment:
+ nprintf('nc3', node.comment)
if node.anchor:
from ruamel.yaml.serializer import templated_id
@@ -1408,10 +1453,19 @@ class RoundTripConstructor(SafeConstructor):
)
merge_map = self.flatten_mapping(node)
# mapping = {}
- if node.comment:
- maptyp._yaml_add_comment(node.comment[:2])
- if len(node.comment) > 2:
- maptyp.yaml_end_comment_extend(node.comment[2], clear=True)
+ if self.loader and self.loader.comment_handling is None:
+ if node.comment:
+ maptyp._yaml_add_comment(node.comment[:2])
+ if len(node.comment) > 2:
+ maptyp.yaml_end_comment_extend(node.comment[2], clear=True)
+ else:
+ # NEWCMNT
+ if node.comment:
+ # nprintf('nc4', node.comment, node.start_mark)
+ if maptyp.ca.pre is None:
+ maptyp.ca.pre = []
+ for cmnt in self.comments(node.comment, 0):
+ maptyp.ca.pre.append(cmnt)
if node.anchor:
from ruamel.yaml.serializer import templated_id
@@ -1446,18 +1500,37 @@ class RoundTripConstructor(SafeConstructor):
)
value = self.construct_object(value_node, deep=deep)
if self.check_mapping_key(node, key_node, maptyp, key, value):
- if key_node.comment and len(key_node.comment) > 4 and key_node.comment[4]:
- if last_value is None:
- key_node.comment[0] = key_node.comment.pop(4)
- maptyp._yaml_add_comment(key_node.comment, value=last_key)
- else:
- key_node.comment[2] = key_node.comment.pop(4)
+ if self.loader and self.loader.comment_handling is None:
+ if key_node.comment and len(key_node.comment) > 4 and key_node.comment[4]:
+ if last_value is None:
+ key_node.comment[0] = key_node.comment.pop(4)
+ maptyp._yaml_add_comment(key_node.comment, value=last_key)
+ else:
+ key_node.comment[2] = key_node.comment.pop(4)
+ maptyp._yaml_add_comment(key_node.comment, key=key)
+ key_node.comment = None
+ if key_node.comment:
maptyp._yaml_add_comment(key_node.comment, key=key)
- key_node.comment = None
- if key_node.comment:
- maptyp._yaml_add_comment(key_node.comment, key=key)
- if value_node.comment:
- maptyp._yaml_add_comment(value_node.comment, value=key)
+ if value_node.comment:
+ maptyp._yaml_add_comment(value_node.comment, value=key)
+ else:
+ # NEWCMNT
+ if key_node.comment:
+ nprintf('nc5a', key, key_node.comment)
+ if key_node.comment[0]:
+ maptyp.ca.set(key, C_KEY_PRE, key_node.comment[0])
+ if key_node.comment[1]:
+ maptyp.ca.set(key, C_KEY_EOL, key_node.comment[1])
+ if key_node.comment[2]:
+ maptyp.ca.set(key, C_KEY_POST, key_node.comment[2])
+ if value_node.comment:
+ nprintf('nc5b', key, value_node.comment)
+ if value_node.comment[0]:
+ maptyp.ca.set(key, C_VALUE_PRE, value_node.comment[0])
+ if value_node.comment[1]:
+ maptyp.ca.set(key, C_VALUE_EOL, value_node.comment[1])
+ if value_node.comment[2]:
+ maptyp.ca.set(key, C_VALUE_POST, value_node.comment[2])
maptyp._yaml_set_kv_line_col(
key,
[
@@ -1483,10 +1556,15 @@ class RoundTripConstructor(SafeConstructor):
_F('expected a mapping node, but found {node_id!s}', node_id=node.id),
node.start_mark,
)
- if node.comment:
- typ._yaml_add_comment(node.comment[:2])
- if len(node.comment) > 2:
- typ.yaml_end_comment_extend(node.comment[2], clear=True)
+ if self.loader and self.loader.comment_handling is None:
+ if node.comment:
+ typ._yaml_add_comment(node.comment[:2])
+ if len(node.comment) > 2:
+ typ.yaml_end_comment_extend(node.comment[2], clear=True)
+ else:
+ # NEWCMNT
+ if node.comment:
+ nprintf('nc6', node.comment)
if node.anchor:
from ruamel.yaml.serializer import templated_id
@@ -1509,10 +1587,17 @@ class RoundTripConstructor(SafeConstructor):
# construct but should be null
value = self.construct_object(value_node, deep=deep) # NOQA
self.check_set_key(node, key_node, typ, key)
- if key_node.comment:
- typ._yaml_add_comment(key_node.comment, key=key)
- if value_node.comment:
- typ._yaml_add_comment(value_node.comment, value=key)
+ if self.loader and self.loader.comment_handling is None:
+ if key_node.comment:
+ typ._yaml_add_comment(key_node.comment, key=key)
+ if value_node.comment:
+ typ._yaml_add_comment(value_node.comment, value=key)
+ else:
+ # NEWCMNT
+ if key_node.comment:
+ nprintf('nc7a', key_node.comment)
+ if value_node.comment:
+ nprintf('nc7b', value_node.comment)
typ.add(key)
def construct_yaml_seq(self, node):
@@ -1563,10 +1648,15 @@ class RoundTripConstructor(SafeConstructor):
elif node.flow_style is False:
omap.fa.set_block_style()
yield omap
- if node.comment:
- omap._yaml_add_comment(node.comment[:2])
- if len(node.comment) > 2:
- omap.yaml_end_comment_extend(node.comment[2], clear=True)
+ if self.loader and self.loader.comment_handling is None:
+ if node.comment:
+ omap._yaml_add_comment(node.comment[:2])
+ if len(node.comment) > 2:
+ omap.yaml_end_comment_extend(node.comment[2], clear=True)
+ else:
+ # NEWCMNT
+ if node.comment:
+ nprintf('nc8', node.comment)
if not isinstance(node, SequenceNode):
raise ConstructorError(
'while constructing an ordered map',
@@ -1599,12 +1689,21 @@ class RoundTripConstructor(SafeConstructor):
key = self.construct_object(key_node)
assert key not in omap
value = self.construct_object(value_node)
- if key_node.comment:
- omap._yaml_add_comment(key_node.comment, key=key)
- if subnode.comment:
- omap._yaml_add_comment(subnode.comment, key=key)
- if value_node.comment:
- omap._yaml_add_comment(value_node.comment, value=key)
+ if self.loader and self.loader.comment_handling is None:
+ if key_node.comment:
+ omap._yaml_add_comment(key_node.comment, key=key)
+ if subnode.comment:
+ omap._yaml_add_comment(subnode.comment, key=key)
+ if value_node.comment:
+ omap._yaml_add_comment(value_node.comment, value=key)
+ else:
+ # NEWCMNT
+ if key_node.comment:
+ nprintf('nc9a', key_node.comment)
+ if subnode.comment:
+ nprintf('nc9b', subnode.comment)
+ if value_node.comment:
+ nprintf('nc9c', value_node.comment)
omap[key] = value
def construct_yaml_set(self, node):
diff --git a/events.py b/events.py
index ef63dad..e0c7f68 100644
--- a/events.py
+++ b/events.py
@@ -7,6 +7,8 @@ from ruamel.yaml.compat import _F
if False: # MYPY
from typing import Any, Dict, Optional, List # NOQA
+SHOW_LINES = False
+
def CommentCheck():
# type: () -> None
@@ -37,6 +39,9 @@ class Event:
arguments.append(_F('{key!s}={v!r}', key=key, v=v))
if self.comment not in [None, CommentCheck]:
arguments.append('comment={!r}'.format(self.comment))
+ if SHOW_LINES:
+ arguments.append('({}:{}/{}:{})'.format(self.start_mark.line, self.start_mark.column,
+ self.end_mark.line, self.end_mark.column))
arguments = ', '.join(arguments)
else:
attributes = [
diff --git a/main.py b/main.py
index 7d2f177..e19f28c 100644
--- a/main.py
+++ b/main.py
@@ -31,6 +31,7 @@ from ruamel.yaml.constructor import (
RoundTripConstructor,
)
from ruamel.yaml.loader import Loader as UnsafeLoader
+from ruamel.yaml.comments import CommentedMap, CommentedSeq, C_PRE
if False: # MYPY
from typing import List, Set, Dict, Union, Any, Callable, Optional, Text # NOQA
@@ -81,6 +82,7 @@ class YAML:
self.Scanner = None # type: Any
self.Serializer = None # type: Any
self.default_flow_style = None # type: Any
+ self.comment_handling = None
typ_found = 1
setup_rt = False
if 'rt' in self.typ:
@@ -107,6 +109,18 @@ class YAML:
self.Parser = ruamel.yaml.parser.Parser if pure or CParser is None else CParser
self.Composer = ruamel.yaml.composer.Composer
self.Constructor = ruamel.yaml.constructor.Constructor
+ elif 'rtsc' in self.typ:
+ self.default_flow_style = False
+ # no optimized rt-dumper yet
+ self.Emitter = ruamel.yaml.emitter.Emitter
+ self.Serializer = ruamel.yaml.serializer.Serializer
+ self.Representer = ruamel.yaml.representer.RoundTripRepresenter
+ self.Scanner = ruamel.yaml.scanner.RoundTripScannerSC
+ # no optimized rt-parser yet
+ self.Parser = ruamel.yaml.parser.RoundTripParserSC
+ self.Composer = ruamel.yaml.composer.Composer
+ self.Constructor = ruamel.yaml.constructor.RoundTripConstructor
+ self.comment_handling = C_PRE
else:
setup_rt = True
typ_found = 0
@@ -150,7 +164,6 @@ class YAML:
self.scalar_after_indicator = None
# [a, b: 1, c: {d: 2}] vs. [a, {b: 1}, {c: {d: 2}}]
self.brace_single_entry_mapping_in_flow_sequence = False
- self.comment_handling = None
for module in self.plug_ins:
if getattr(module, 'typ', None) in self.typ:
typ_found += 1
@@ -711,8 +724,6 @@ class YAML:
def map(self, **kw):
# type: (Any) -> Any
if 'rt' in self.typ:
- from ruamel.yaml.comments import CommentedMap
-
return CommentedMap(**kw)
else:
return dict(**kw)
@@ -720,8 +731,6 @@ class YAML:
def seq(self, *args):
# type: (Any) -> Any
if 'rt' in self.typ:
- from ruamel.yaml.comments import CommentedSeq
-
return CommentedSeq(*args)
else:
return list(*args)
diff --git a/parser.py b/parser.py
index 279fc20..8e2f54e 100644
--- a/parser.py
+++ b/parser.py
@@ -44,7 +44,7 @@
#
# FIRST sets:
#
-# stream: { STREAM-START }
+# stream: { STREAM-START <}
# explicit_document: { DIRECTIVE DOCUMENT-START }
# implicit_document: FIRST(block_node)
# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START
@@ -78,6 +78,8 @@ from ruamel.yaml.error import MarkedYAMLError
from ruamel.yaml.tokens import * # NOQA
from ruamel.yaml.events import * # NOQA
from ruamel.yaml.scanner import Scanner, RoundTripScanner, ScannerError # NOQA
+from ruamel.yaml.scanner import BlankLineComment
+from ruamel.yaml.comments import C_PRE, C_POST, C_SPLIT_ON_FIRST_BLANK
from ruamel.yaml.compat import _F, nprint, nprintf # NOQA
if False: # MYPY
@@ -86,6 +88,10 @@ if False: # MYPY
__all__ = ['Parser', 'RoundTripParser', 'ParserError']
+def xprintf(*args, **kw):
+ return nprintf(*args, **kw)
+ pass
+
class ParserError(MarkedYAMLError):
pass
@@ -106,7 +112,7 @@ class Parser:
def reset_parser(self):
# type: () -> None
# Reset the state attributes (to clear self-references)
- self.current_event = None
+ self.current_event = self.last_event = None
self.tag_handles = {} # type: Dict[Any, Any]
self.states = [] # type: List[Any]
self.marks = [] # type: List[Any]
@@ -158,7 +164,10 @@ class Parser:
if self.current_event is None:
if self.state:
self.current_event = self.state()
- value = self.current_event
+ # assert self.current_event is not None
+ # if self.current_event.end_mark.line != self.peek_event().start_mark.line:
+ xprintf('get_event', repr(self.current_event), self.peek_event().start_mark.line)
+ self.last_event = value = self.current_event
self.current_event = None
return value
@@ -204,8 +213,6 @@ class Parser:
self.scanner.get_token()
# Parse an explicit document.
if not self.scanner.check_token(StreamEndToken):
- token = self.scanner.peek_token()
- start_mark = token.start_mark
version, tags = self.process_directives()
if not self.scanner.check_token(DocumentStartToken):
raise ParserError(
@@ -218,6 +225,7 @@ class Parser:
self.scanner.peek_token().start_mark,
)
token = self.scanner.get_token()
+ start_mark = token.start_mark
end_mark = token.end_mark
# if self.loader is not None and \
# end_mark.line != self.scanner.peek_token().start_mark.line:
@@ -401,9 +409,13 @@ class Parser:
if indentless_sequence and self.scanner.check_token(BlockEntryToken):
comment = None
pt = self.scanner.peek_token()
- if pt.comment and pt.comment[0]:
- comment = [pt.comment[0], []]
- pt.comment[0] = None
+ if self.loader and self.loader.comment_handling is None:
+ if pt.comment and pt.comment[0]:
+ comment = [pt.comment[0], []]
+ pt.comment[0] = None
+ elif self.loader:
+ if pt.comment:
+ comment = pt.comment
end_mark = self.scanner.peek_token().end_mark
event = SequenceStartEvent(
anchor, tag, implicit, start_mark, end_mark, flow_style=False, comment=comment
@@ -556,7 +568,14 @@ class Parser:
self.state = self.parse_indentless_sequence_entry
return self.process_empty_scalar(token.end_mark)
token = self.scanner.peek_token()
- event = SequenceEndEvent(token.start_mark, token.start_mark, comment=token.comment)
+ c = None
+ if self.loader and self.loader.comment_handling is None:
+ c = token.comment
+ start_mark = token.start_mark
+ else:
+ start_mark = self.last_event.end_mark
+ c = self.distribute_comment(token.comment, start_mark.line)
+ event = SequenceEndEvent(start_mark, start_mark, comment=c)
self.state = self.states.pop()
return event
@@ -783,10 +802,8 @@ class Parser:
return ScalarEvent(None, None, (True, False), "", mark, mark, comment=comment)
def move_token_comment(self, token, nt=None, empty=False):
- if getattr(self.loader, 'comment_handling', None) is None: # pre 0.18
- token.move_old_comment(self.scanner.peek_token() if nt is None else nt, empty=empty)
- else:
- token.move_new_comment(self.scanner.peek_token() if nt is None else nt, empty=empty)
+ pass
+
class RoundTripParser(Parser):
"""roundtrip is a safe loader, that wants to see the unmangled tag"""
@@ -810,3 +827,52 @@ class RoundTripParser(Parser):
):
return Parser.transform_tag(self, handle, suffix)
return handle + suffix
+
+ def move_token_comment(self, token, nt=None, empty=False):
+ token.move_old_comment(self.scanner.peek_token() if nt is None else nt, empty=empty)
+
+
+class RoundTripParserSC(RoundTripParser):
+ """roundtrip is a safe loader, that wants to see the unmangled tag"""
+
+ # some of the differences are based on the superclass testing if self.loader.comment_handling is not None
+
+ def move_token_comment(self, token, nt=None, empty=False):
+ token.move_new_comment(self.scanner.peek_token() if nt is None else nt, empty=empty)
+
+ def distribute_comment(self, comment, line):
+ # ToDo, look at indentation of the comment to determine attachment
+ if comment is None:
+ return None
+ if not comment[0]:
+ return None
+ if comment[0][0] != line + 1:
+ nprintf('>>>dcxxx', comment, line, typ)
+ assert comment[0][0] == line + 1
+ #if comment[0] - line > 1:
+ # return
+ typ = self.loader.comment_handling & 0b11
+ # nprintf('>>>dca', comment, line, typ)
+ if typ == C_POST:
+ return None
+ if typ == C_PRE:
+ c = [None, None, comment[0]]
+ comment[0] = None
+ return c
+ # nprintf('>>>dcb', comment[0])
+ for idx, cmntidx in enumerate(comment[0]):
+ # nprintf('>>>dcb', cmntidx)
+ if isinstance(self.scanner.comments[cmntidx], BlankLineComment):
+ break
+ else:
+ return None # no space found
+ if idx == 0:
+ return None # first line was blank
+ # nprintf('>>>dcc', idx)
+ if typ == C_SPLIT_ON_FIRST_BLANK:
+ c = [None, None, comment[0][:idx]]
+ comment[0] = comment[0][idx:]
+ return c
+ raise NotImplementedError # reserved
+
+
diff --git a/scanner.py b/scanner.py
index f98da00..f9e6052 100644
--- a/scanner.py
+++ b/scanner.py
@@ -44,6 +44,10 @@ _THE_END = '\n\0\r\x85\u2028\u2029'
_THE_END_SPACE_TAB = ' \n\0\t\r\x85\u2028\u2029'
_SPACE_TAB = ' \t'
+def xprintf(*args, **kw):
+ return nprintf(*args, **kw)
+ pass
+
class ScannerError(MarkedYAMLError):
pass
@@ -167,7 +171,7 @@ class Scanner:
# Check if the next token is one of the given types.
while self.need_more_tokens():
self.fetch_more_tokens()
- if bool(self.tokens):
+ if len(self.tokens) > 0:
if not choices:
return True
for choice in choices:
@@ -180,7 +184,7 @@ class Scanner:
# Return the next token, but do not delete if from the queue.
while self.need_more_tokens():
self.fetch_more_tokens()
- if bool(self.tokens):
+ if len(self.tokens) > 0:
return self.tokens[0]
def get_token(self):
@@ -188,7 +192,7 @@ class Scanner:
# Return the next token.
while self.need_more_tokens():
self.fetch_more_tokens()
- if bool(self.tokens):
+ if len(self.tokens) > 0:
self.tokens_taken += 1
return self.tokens.pop(0)
@@ -198,7 +202,7 @@ class Scanner:
# type: () -> bool
if self.done:
return False
- if not self.tokens:
+ if len(self.tokens) == 0:
return True
# The current token may be a potential simple key, so we
# need to look further.
@@ -1231,21 +1235,33 @@ class Scanner:
# We are done.
token = ScalarToken("".join(chunks), False, start_mark, end_mark, style)
- if block_scalar_comment is not None:
- token.add_pre_comments([block_scalar_comment])
+ if self.loader is not None:
+ comment_handler = getattr(self.loader, 'comment_handling', False)
+ if comment_handler is None:
+ if block_scalar_comment is not None:
+ token.add_pre_comments([block_scalar_comment])
if len(trailing) > 0:
- # nprint('trailing 1', trailing) # XXXXX
# Eat whitespaces and comments until we reach the next token.
+ if self.loader is not None:
+ comment_handler = getattr(self.loader, 'comment_handling', None)
+ if comment_handler is not None:
+ line = end_mark.line - len(trailing)
+ for x in trailing:
+ assert x[-1] == '\n'
+ self.comments.add_blank_line(x, 0, line)
+ line += 1
comment = self.scan_to_next_token()
while comment:
trailing.append(' ' * comment[1].column + comment[0])
comment = self.scan_to_next_token()
-
- # Keep track of the trailing whitespace and following comments
- # as a comment token, if isn't all included in the actual value.
- comment_end_mark = self.reader.get_mark()
- comment = CommentToken("".join(trailing), end_mark, comment_end_mark)
- token.add_post_comment(comment)
+ if self.loader is not None:
+ comment_handler = getattr(self.loader, 'comment_handling', False)
+ if comment_handler is None:
+ # Keep track of the trailing whitespace and following comments
+ # as a comment token, if isn't all included in the actual value.
+ comment_end_mark = self.reader.get_mark()
+ comment = CommentToken("".join(trailing), end_mark, comment_end_mark)
+ token.add_post_comment(comment)
return token
def scan_block_scalar_indicators(self, start_mark):
@@ -1590,10 +1606,21 @@ class Scanner:
break
token = ScalarToken("".join(chunks), True, start_mark, end_mark)
- if spaces and spaces[0] == '\n':
- # Create a comment token to preserve the trailing line breaks.
- comment = CommentToken("".join(spaces) + '\n', start_mark, end_mark)
- token.add_post_comment(comment)
+ # getattr provides True so C type loader, which cannot handle comment, will not make CommentToken
+ if self.loader is not None:
+ comment_handler = getattr(self.loader, 'comment_handling', False)
+ if comment_handler is None:
+ if spaces and spaces[0] == '\n':
+ # Create a comment token to preserve the trailing line breaks.
+ comment = CommentToken("".join(spaces) + '\n', start_mark, end_mark)
+ token.add_post_comment(comment)
+ elif comment_handler is not False:
+ line = start_mark.line + 1
+ for ch in spaces:
+ if ch == '\n':
+ self.comments.add_blank_line('\n', 0, line)
+ line += 1
+
return token
def scan_plain_spaces(self, indent, start_mark):
@@ -1764,7 +1791,7 @@ class RoundTripScanner(Scanner):
while self.need_more_tokens():
self.fetch_more_tokens()
self._gather_comments()
- if bool(self.tokens):
+ if len(self.tokens) > 0:
if not choices:
return True
for choice in choices:
@@ -1778,13 +1805,13 @@ class RoundTripScanner(Scanner):
while self.need_more_tokens():
self.fetch_more_tokens()
self._gather_comments()
- if bool(self.tokens):
+ if len(self.tokens) > 0:
return self.tokens[0]
return None
def _gather_comments(self):
# type: () -> Any
- """combine multiple comment lines"""
+ """combine multiple comment lines and assign to next non-comment-token"""
comments = [] # type: List[Any]
if not self.tokens:
return comments
@@ -1813,7 +1840,7 @@ class RoundTripScanner(Scanner):
while self.need_more_tokens():
self.fetch_more_tokens()
self._gather_comments()
- if bool(self.tokens):
+ if len(self.tokens) > 0:
# nprint('tk', self.tokens)
# only add post comment to single line tokens:
# scalar, value token. FlowXEndToken, otherwise
@@ -1925,7 +1952,7 @@ class RoundTripScanner(Scanner):
if not self.flow_level:
self.allow_simple_key = True
return comment, start_mark, end_mark
- if bool(self.scan_line_break()):
+ if self.scan_line_break() != '':
start_mark = self.reader.get_mark()
if not self.flow_level:
self.allow_simple_key = True
@@ -1973,3 +2000,377 @@ class RoundTripScanner(Scanner):
def scan_block_scalar(self, style, rt=True):
# type: (Any, Optional[bool]) -> Any
return Scanner.scan_block_scalar(self, style, rt=rt)
+
+
+# commenthandling 2021, differentiatiation not needed
+
+VALUECMNT = 0
+KEYCMNT = 0 # 1
+#TAGCMNT = 2
+#ANCHORCMNT = 3
+
+
+class CommentBase:
+ __slots__ = ('value', 'line', 'column', 'used', 'function', 'fline', 'ufun', 'uline')
+ def __init__(self, value, line, column):
+ self.value = value
+ self.line = line
+ self.column = column
+ self.used = ' '
+ info = inspect.getframeinfo(inspect.stack()[3][0])
+ self.function = info.function
+ self.fline = info.lineno
+ self.ufun = None
+ self.uline = None
+
+ def set_used(self, v='+'):
+ self.used = v
+ info = inspect.getframeinfo(inspect.stack()[1][0])
+ self.ufun = info.function
+ self.uline = info.lineno
+
+ def set_assigned(self):
+ self.used = '|'
+
+ def __str__(self):
+ return _F('{value}', value=self.value)
+
+ def __repr__(self):
+ return _F('{value!r}', value=self.value)
+
+ def info(self):
+ return _F('{name}{used} {line:2}:{column:<2} "{value:40s} {function}:{fline} {ufun}:{uline}',
+ name=self.name, line=self.line, column=self.column, value=self.value + '"', used=self.used,
+ function=self.function, fline=self.fline, ufun=self.ufun, uline=self.uline)
+
+
+class EOLComment(CommentBase):
+ name = 'EOLC'
+
+ def __init__(self, value, line, column):
+ super().__init__(value, line, column)
+
+
+class FullLineComment(CommentBase):
+ name = 'FULL'
+
+ def __init__(self, value, line, column):
+ super().__init__(value, line, column)
+
+
+class BlankLineComment(CommentBase):
+ name = 'BLNK'
+
+ def __init__(self, value, line, column):
+ super().__init__(value, line, column)
+
+
+class ScannedComments:
+ def __init__(self):
+ self.comments = {}
+ self.unused = []
+
+ def add_eol_comment(self, comment, column, line):
+ info = inspect.getframeinfo(inspect.stack()[1][0])
+ if comment.count('\n') == 1:
+ assert comment[-1] == '\n'
+ else:
+ assert '\n' not in comment
+ self.comments[line] = retval = EOLComment(comment[:-1], line, column)
+ self.unused.append(line)
+ return retval
+
+ def add_blank_line(self, comment, column, line):
+ info = inspect.getframeinfo(inspect.stack()[1][0])
+ assert comment.count('\n') == 1 and comment[-1] == '\n'
+ assert line not in self.comments
+ self.comments[line] = retval = BlankLineComment(comment[:-1], line, column)
+ self.unused.append(line)
+ return retval
+
+ def add_full_line_comment(self, comment, column, line):
+ info = inspect.getframeinfo(inspect.stack()[1][0])
+ assert comment.count('\n') == 1 and comment[-1] == '\n'
+ #if comment.startswith('# C12'):
+ # raise
+ # this raises in line 2127 fro 330
+ self.comments[line] = retval = FullLineComment(comment[:-1], line, column)
+ self.unused.append(line)
+ return retval
+
+ def __getitem__(self, idx):
+ return self.comments[idx]
+
+ def __str__(self):
+ return 'ParsedComments:\n ' + \
+ '\n '.join((_F('{lineno:2} {x}', lineno=lineno, x=x.info()) for lineno, x in self.comments.items())) + '\n'
+
+ def last(self):
+ lineno, x = list(self.comments.items())[-1]
+ return _F('{lineno:2} {x}\n', lineno=lineno, x=x.info())
+
+ def any_unprocessed(self):
+ # ToDo: might want to differentiate based on lineno
+ return len(self.unused) > 0
+ #for lno, comment in reversed(self.comments.items()):
+ # if comment.used == ' ':
+ # return True
+ #return False
+
+ def unprocessed(self, use=False):
+ while len(self.unused) > 0:
+ first = self.unused.pop(0) if use else self.unused[0]
+ info = inspect.getframeinfo(inspect.stack()[1][0])
+ xprintf('using', first, self.comments[first].value, info.function, info.lineno)
+ yield first, self.comments[first]
+ if use:
+ self.comments[first].set_used()
+
+ def assign_pre(self, token):
+ token_line = token.start_mark.line
+ info = inspect.getframeinfo(inspect.stack()[1][0])
+ xprintf('assign_pre', token_line, self.unused, info.function, info.lineno)
+ gobbled = False
+ while self.unused and self.unused[0] < token_line:
+ gobled = True
+ first = self.unused.pop(0)
+ xprintf('assign_pre < ', first)
+ self.comments[first].set_used()
+ token.add_comment_pre(first)
+ return gobbled
+
+ def assign_eol(self, tokens):
+ try:
+ comment_line = self.unused[0]
+ except IndexError:
+ return
+ if not isinstance(self.comments[comment_line], EOLComment):
+ return
+ idx = 1
+ while tokens[-idx].start_mark.line > comment_line or isinstance(tokens[-idx], ValueToken):
+ idx += 1
+ xprintf('idx1', idx)
+ if len(tokens) > idx and isinstance(tokens[-idx], ScalarToken) and isinstance(tokens[-(idx+1)], ScalarToken):
+ return
+ try:
+ if isinstance(tokens[-idx], ScalarToken) and isinstance(tokens[-(idx+1)], KeyToken):
+ try:
+ eol_idx = self.unused.pop(0)
+ self.comments[eol_idx].set_used()
+ xprintf('>>>>>a', idx, eol_idx, KEYCMNT)
+ tokens[-idx].add_comment_eol(eol_idx, KEYCMNT)
+ except IndexError:
+ raise NotImplementedError
+ return
+ except IndexError:
+ xprintf('IndexError1')
+ pass
+ try:
+ if isinstance(tokens[-idx], ScalarToken) and isinstance(tokens[-(idx+1)], (ValueToken, BlockEntryToken)):
+ try:
+ eol_idx = self.unused.pop(0)
+ self.comments[eol_idx].set_used()
+ tokens[-idx].add_comment_eol(eol_idx, VALUECMNT)
+ except IndexError:
+ raise NotImplementedError
+ return
+ except IndexError:
+ xprintf('IndexError2')
+ pass
+ for t in tokens:
+ xprintf('tt-', t)
+ xprintf('not implemented EOL', type(tokens[-idx]))
+ import sys; sys.exit(0)
+
+ def assign_post(self, token):
+ token_line = token.start_mark.line
+ info = inspect.getframeinfo(inspect.stack()[1][0])
+ xprintf('assign_post', token_line, self.unused, info.function, info.lineno)
+ gobbled = False
+ while self.unused and self.unused[0] < token_line:
+ gobled = True
+ first = self.unused.pop(0)
+ xprintf('assign_post < ', first)
+ self.comments[first].set_used()
+ token.add_comment_post(first)
+ return gobbled
+
+ def str_unprocessed(self):
+ return ''.join((_F(' {ind:2} {x}\n', ind=ind, x=x.info()) for ind, x in self.comments.items() if x.used == ' '))
+
+
+class RoundTripScannerSC(Scanner): # RoundTripScanner Split Comments
+ def __init__(self, *arg, **kw):
+ super().__init__(*arg, **kw)
+ assert self.loader is not None
+ # comments isinitialised on .need_more_tokens and persist on self.loader.parsed_comments
+ #
+ self.comments = None
+
+ def get_token(self):
+ # type: () -> Any
+ # Return the next token.
+ while self.need_more_tokens():
+ self.fetch_more_tokens()
+ if len(self.tokens) > 0:
+ if isinstance(self.tokens[0], BlockEndToken):
+ self.comments.assign_post(self.tokens[0])
+ else:
+ self.comments.assign_pre(self.tokens[0])
+ self.tokens_taken += 1
+ return self.tokens.pop(0)
+
+ def need_more_tokens(self):
+ if self.comments is None:
+ self.loader.parsed_comments = self.comments = ScannedComments()
+ if self.done:
+ return False
+ if len(self.tokens) == 0:
+ return True
+ # The current token may be a potential simple key, so we
+ # need to look further.
+ self.stale_possible_simple_keys()
+ if self.next_possible_simple_key() == self.tokens_taken:
+ return True
+ if len(self.tokens) < 2:
+ return True
+ if self.tokens[0].start_mark.line == self.tokens[-1].start_mark.line:
+ return True
+ if True:
+ xprintf('-x--', len(self.tokens))
+ for t in self.tokens:
+ xprintf(t)
+ #xprintf(self.comments.last())
+ xprintf(self.comments.str_unprocessed())
+ self.comments.assign_pre(self.tokens[0])
+ self.comments.assign_eol(self.tokens)
+ return False
+
+ def scan_to_next_token(self):
+ srp = self.reader.peek
+ srf = self.reader.forward
+ if self.reader.index == 0 and srp() == '\uFEFF':
+ srf()
+ start_mark = self.reader.get_mark()
+ # xprintf('current_mark', start_mark.line, start_mark.column)
+ found = False
+ idx = 0
+ while not found:
+ while srp() == ' ':
+ srf()
+ ch = srp()
+ if ch == '#':
+ comment_start_mark = self.reader.get_mark()
+ comment = ch
+ srf() # skipt the '#'
+ while ch not in _THE_END:
+ ch = srp()
+ if ch == '\0': # don't gobble the end-of-stream character
+ # but add an explicit newline as "YAML processors should terminate
+ # the stream with an explicit line break
+ # https://yaml.org/spec/1.2/spec.html#id2780069
+ comment += '\n'
+ break
+ comment += ch
+ srf()
+ # we have a comment
+ if start_mark.column == 0:
+ self.comments.add_full_line_comment(comment, comment_start_mark.column, comment_start_mark.line)
+ else:
+ self.comments.add_eol_comment(comment, comment_start_mark.column, comment_start_mark.line)
+ comment = ""
+ # gather any blank lines or full line comments following the comment as well
+ self.scan_empty_or_full_line_comments()
+ if not self.flow_level:
+ self.allow_simple_key = True
+ return
+ if bool(self.scan_line_break()):
+ # start_mark = self.reader.get_mark()
+ if not self.flow_level:
+ self.allow_simple_key = True
+ self.scan_empty_or_full_line_comments()
+ return None
+ ch = srp()
+ if ch == '\n': # empty toplevel lines
+ start_mark = self.reader.get_mark()
+ comment = ""
+ while ch:
+ ch = self.scan_line_break(empty_line=True)
+ comment += ch
+ if srp() == '#':
+ # empty line followed by indented real comment
+ comment = comment.rsplit('\n', 1)[0] + '\n'
+ end_mark = self.reader.get_mark()
+ return None
+ else:
+ found = True
+ return None
+
+ def scan_empty_or_full_line_comments(self):
+ blmark = self.reader.get_mark()
+ assert blmark.column == 0
+ blanks = ""
+ comment = None
+ mark = None
+ ch = self.reader.peek()
+ while True:
+ # nprint('ch', repr(ch), self.reader.get_mark().column)
+ if ch in '\r\n\x85\u2028\u2029':
+ if self.reader.prefix(2) == '\r\n':
+ self.reader.forward(2)
+ else:
+ self.reader.forward()
+ if comment is not None:
+ comment += '\n'
+ self.comments.add_full_line_comment(comment, mark.column, mark.line)
+ comment = None
+ else:
+ blanks += '\n'
+ self.comments.add_blank_line(blanks, blmark.column, blmark.line)
+ blanks = ""
+ blmark = self.reader.get_mark()
+ ch = self.reader.peek()
+ continue
+ if comment is None:
+ if ch in ' \t':
+ blanks += ch
+ elif ch == '#':
+ mark = self.reader.get_mark()
+ comment = '#'
+ else:
+ # print('breaking on', repr(ch))
+ break
+ else:
+ comment += ch
+ self.reader.forward()
+ ch = self.reader.peek()
+
+ def scan_block_scalar_ignored_line(self, start_mark):
+ # type: (Any) -> Any
+ # See the specification for details.
+ srp = self.reader.peek
+ srf = self.reader.forward
+ prefix = ''
+ comment = None
+ while srp() == ' ':
+ prefix += srp()
+ srf()
+ if srp() == '#':
+ comment = ''
+ mark = self.reader.get_mark()
+ while srp() not in _THE_END:
+ comment += srp()
+ srf()
+ comment += '\n'
+ ch = srp()
+ if ch not in _THE_END:
+ raise ScannerError(
+ 'while scanning a block scalar',
+ start_mark,
+ _F('expected a comment or a line break, but found {ch!r}', ch=ch),
+ self.reader.get_mark(),
+ )
+ if comment is not None:
+ self.comments.add_eol_comment(comment, mark.column, mark.line)
+ self.scan_line_break()
+ return None
diff --git a/tokens.py b/tokens.py
index 490866b..01cae1f 100644
--- a/tokens.py
+++ b/tokens.py
@@ -1,6 +1,6 @@
# coding: utf-8
-from ruamel.yaml.compat import _F
+from ruamel.yaml.compat import _F, nprintf
if False: # MYPY
from typing import Text, Any, Dict, Optional, List # NOQA
@@ -89,13 +89,17 @@ class Token:
self._comment[0] = []
self._comment[0].append(comment)
- def add_comment_eol(self, comment):
+ def add_comment_eol(self, comment, comment_type):
if not hasattr(self, '_comment'):
self._comment = [None, None, None]
else:
assert len(self._comment) == 3
assert self._comment[1] is None
- self._comment[1] = comment
+ if self.comment[1] is None:
+ self._comment[1] = []
+ self._comment[1].extend([None] * (comment_type + 1 - len(self.comment[1])))
+ # nprintf('commy', self.comment, comment_type)
+ self._comment[1][comment_type] = comment
def add_comment_post(self, comment):
if not hasattr(self, '_comment'):
@@ -184,11 +188,9 @@ class Token:
target._comment = c
# nprint('mco2:', self, target, target.comment, empty)
return self
- return
- raise NotImplemtedError
# if self and target have both pre, eol or post comments, something seems wrong
for idx in range(3):
- if c[idx] and tc[idx]:
+ if c[idx] is not None and tc[idx] is not None:
raise NotImplementedError(_F('overlap in comment {c!r} {tc!r}', c=c, tc=tc))
# move the comment parts
for idx in range(3):