From 17b35c376fd0fc9a94ba0adfdbf5bf63a6177dc9 Mon Sep 17 00:00:00 2001 From: Anthon van der Neut Date: Thu, 6 May 2021 08:36:49 +0200 Subject: * extend EOL token handling * extending comment --- comments.py | 101 ++++++++++--- constructor.py | 187 ++++++++++++++++++------ events.py | 5 + main.py | 19 ++- parser.py | 92 ++++++++++-- scanner.py | 445 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- tokens.py | 14 +- 7 files changed, 756 insertions(+), 107 deletions(-) diff --git a/comments.py b/comments.py index d133299..f49c183 100644 --- a/comments.py +++ b/comments.py @@ -11,7 +11,7 @@ import copy from ruamel.yaml.compat import ordereddict # type: ignore -from ruamel.yaml.compat import MutableSliceableSequence, _F +from ruamel.yaml.compat import MutableSliceableSequence, _F, nprintf from ruamel.yaml.scalarstring import ScalarString from ruamel.yaml.anchor import Anchor @@ -35,13 +35,44 @@ __all__ = ['CommentedSeq', 'CommentedKeySeq', # bits 0 and 1 are combined, you can choose only one C_POST = 0b00 C_PRE = 0b01 -C_SPLIT_ON_FIRST_BLANK = 0b10 # as C_POST, but if blank line then C_PRE everything before first - # blank goes to POST even if no following real FLC +C_SPLIT_ON_FIRST_BLANK = 0b10 # as C_POST, but if blank line then C_PRE all lines before first + # blank goes to POST even if no following real FLC (first blank -> first of post) # 0b11 -> reserved for future use C_BLANK_LINE_PRESERVE_SPACE = 0b100 # C_EOL_PRESERVE_SPACE2 = 0b1000 +class IDX: + # temporary auto increment, so rearranging is easier + def __init__(self): + self._idx = 0 + + def __call__(self): + x = self._idx + self._idx += 1 + return x + + def __str__(self): + return str(self._idx) + +cidx = IDX() + +# more or less in order of subjective expected likelyhood +# the _POST and _PRE ones are lists themselves +C_VALUE_EOL = C_ELEM_EOL = cidx() +C_KEY_EOL = cidx() +C_KEY_PRE = C_ELEM_PRE = cidx() # not this is not value +C_VALUE_POST = C_ELEM_POST = cidx() # not this is not value +C_VALUE_PRE = cidx() +C_KEY_POST = cidx() +C_TAG_EOL = cidx() +C_TAG_POST = cidx() +C_TAG_PRE = cidx() +C_ANCHOR_EOL = cidx() +C_ANCHOR_POST = cidx() +C_ANCHOR_PRE = cidx() + + comment_attrib = '_yaml_comment' format_attrib = '_yaml_format' line_col_attrib = '_yaml_line_col' @@ -52,31 +83,32 @@ tag_attrib = '_yaml_tag' class Comment: # using sys.getsize tested the Comment objects, __slots__ makes them bigger # and adding self.end did not matter - __slots__ = 'comment', '_items', '_end', '_start' + __slots__ = 'comment', '_items', '_post', '_pre' attrib = comment_attrib - def __init__(self): + def __init__(self, old=True): # type: () -> None + self._pre = None if old else [] self.comment = None # [post, [pre]] # map key (mapping/omap/dict) or index (sequence/list) to a list of # dict: post_key, pre_key, post_value, pre_value # list: pre item, post item self._items = {} # type: Dict[Any, Any] # self._start = [] # should not put these on first item - self._end = [] # type: List[Any] # end of document comments + self._post = [] # type: List[Any] # end of document comments def __str__(self): # type: () -> str - if bool(self._end): + if bool(self._post): end = ',\n end=' + str(self._end) else: end = "" return 'Comment(comment={0},\n items={1}{2})'.format(self.comment, self._items, end) - def __repr__(self): + def _old__repr__(self): # type: () -> str - if bool(self._end): - end = ',\n end=' + str(self._end) + if bool(self._post): + end = ',\n end=' + str(self._post) else: end = "" try: @@ -90,6 +122,25 @@ class Comment: it = '\n ' + it + ' ' return 'Comment(\n start={},\n items={{{}}}{})'.format(self.comment, it, end) + def __repr__(self): + if self._pre is None: + return self._old__repr__() + if bool(self._post): + end = ',\n end=' + repr(self._post) + else: + end = "" + try: + ln = max([len(str(k)) for k in self._items]) + 1 + except ValueError: + ln = '' + it = ' '.join( + ['{:{}} {}\n'.format(str(k) + ':', ln, v) for k, v in self._items.items()] + ) + if it: + it = '\n ' + it + ' ' + return 'Comment(\n pre={},\n items={{{}}}{})'.format(self.pre, it, end) + + @property def items(self): # type: () -> Any @@ -98,22 +149,38 @@ class Comment: @property def end(self): # type: () -> Any - return self._end + return self._post @end.setter def end(self, value): # type: (Any) -> None - self._end = value + self._post = value @property - def start(self): + def pre(self): # type: () -> Any - return self._start + return self._pre - @start.setter - def start(self, value): + @pre.setter + def pre(self, value): # type: (Any) -> None - self._start = value + self._pre = value + + def get(self, item, pos): + x = self._items.get(item) + if x is None or len(x) < pos: + return None + return x[pos] # can be None + + def set(self, item, pos, value): + x = self._items.get(item) + if x is None: + self._items[item] = x = [None] * (pos + 1) + else: + while len(x) <= pos: + x.append(None) + assert x[pos] is None + x[pos] = value def __contains__(self, x): # test if a substring is in any of the attached comments diff --git a/constructor.py b/constructor.py index 7b7426f..199129e 100644 --- a/constructor.py +++ b/constructor.py @@ -21,7 +21,10 @@ from ruamel.yaml.compat import ordereddict # type: ignore from ruamel.yaml.comments import * # NOQA from ruamel.yaml.comments import (CommentedMap, CommentedOrderedMap, CommentedSet, CommentedKeySeq, CommentedSeq, TaggedScalar, - CommentedKeyMap) + CommentedKeyMap, + C_KEY_PRE, C_KEY_EOL, C_KEY_POST, + C_VALUE_PRE, C_VALUE_EOL, C_VALUE_POST, + ) from ruamel.yaml.scalarstring import (SingleQuotedScalarString, DoubleQuotedScalarString, LiteralScalarString, FoldedScalarString, PlainScalarString, ScalarString,) @@ -92,6 +95,14 @@ class BaseConstructor: return self.loader.resolver return self.loader._resolver + @property + def scanner(self): + # type: () -> Any + # needed to get to the expanded comments + if hasattr(self.loader, 'typ'): + return self.loader.scanner + return self.loader._scanner + def check_data(self): # type: () -> Any # If there are more documents available? @@ -1056,6 +1067,23 @@ class RoundTripConstructor(SafeConstructor): as well as on the items """ + def comment(self, idx): + assert self.loader.comment_handling is not None + x = self.scanner.comments[idx] + x.set_assigned() + return x + + def comments(self, list_of_comments, idx=None): + # hand in the comment and optional pre, eol, post segment + if list_of_comments is None: + return [] + if idx is not None: + if list_of_comments[idx] is None: + return [] + list_of_comments = list_of_comments[idx] + for x in list_of_comments: + yield self.comment(x) + def construct_scalar(self, node): # type: (Any) -> Any if not isinstance(node, ScalarNode): @@ -1068,8 +1096,14 @@ class RoundTripConstructor(SafeConstructor): if node.style == '|' and isinstance(node.value, str): lss = LiteralScalarString(node.value, anchor=node.anchor) - if node.comment and node.comment[1]: - lss.comment = node.comment[1][0] # type: ignore + if self.loader and self.loader.comment_handling is None: + if node.comment and node.comment[1]: + lss.comment = node.comment[1][0] # type: ignore + else: + # NEWCMNT + if node.comment is not None and node.comment[1]: + # nprintf('>>>>nc1', node.comment) + lss.comment = self.comment(node.comment[1][0]) # EOL comment after | return lss if node.style == '>' and isinstance(node.value, str): fold_positions = [] # type: List[int] @@ -1080,8 +1114,14 @@ class RoundTripConstructor(SafeConstructor): break fold_positions.append(idx - len(fold_positions)) fss = FoldedScalarString(node.value.replace('\a', ''), anchor=node.anchor) - if node.comment and node.comment[1]: - fss.comment = node.comment[1][0] # type: ignore + if self.loader and self.loader.comment_handling is None: + if node.comment and node.comment[1]: + fss.comment = node.comment[1][0] # type: ignore + else: + # NEWCMNT + if node.comment is not None and node.comment[1]: + # nprintf('>>>>nc2', node.comment) + lss.comment = self.comment(node.comment[1][0]) # EOL comment after > if fold_positions: fss.fold_pos = fold_positions # type: ignore return fss @@ -1279,12 +1319,17 @@ class RoundTripConstructor(SafeConstructor): node.start_mark, ) ret_val = [] - if node.comment: - seqtyp._yaml_add_comment(node.comment[:2]) - if len(node.comment) > 2: - # this happens e.g. if you have a sequence element that is a flow-style mapping - # and that has no EOL comment but a following commentline or empty line - seqtyp.yaml_end_comment_extend(node.comment[2], clear=True) + if self.loader and self.loader.comment_handling is None: + if node.comment: + seqtyp._yaml_add_comment(node.comment[:2]) + if len(node.comment) > 2: + # this happens e.g. if you have a sequence element that is a flow-style mapping + # and that has no EOL comment but a following commentline or empty line + seqtyp.yaml_end_comment_extend(node.comment[2], clear=True) + else: + # NEWCMNT + if node.comment: + nprintf('nc3', node.comment) if node.anchor: from ruamel.yaml.serializer import templated_id @@ -1408,10 +1453,19 @@ class RoundTripConstructor(SafeConstructor): ) merge_map = self.flatten_mapping(node) # mapping = {} - if node.comment: - maptyp._yaml_add_comment(node.comment[:2]) - if len(node.comment) > 2: - maptyp.yaml_end_comment_extend(node.comment[2], clear=True) + if self.loader and self.loader.comment_handling is None: + if node.comment: + maptyp._yaml_add_comment(node.comment[:2]) + if len(node.comment) > 2: + maptyp.yaml_end_comment_extend(node.comment[2], clear=True) + else: + # NEWCMNT + if node.comment: + # nprintf('nc4', node.comment, node.start_mark) + if maptyp.ca.pre is None: + maptyp.ca.pre = [] + for cmnt in self.comments(node.comment, 0): + maptyp.ca.pre.append(cmnt) if node.anchor: from ruamel.yaml.serializer import templated_id @@ -1446,18 +1500,37 @@ class RoundTripConstructor(SafeConstructor): ) value = self.construct_object(value_node, deep=deep) if self.check_mapping_key(node, key_node, maptyp, key, value): - if key_node.comment and len(key_node.comment) > 4 and key_node.comment[4]: - if last_value is None: - key_node.comment[0] = key_node.comment.pop(4) - maptyp._yaml_add_comment(key_node.comment, value=last_key) - else: - key_node.comment[2] = key_node.comment.pop(4) + if self.loader and self.loader.comment_handling is None: + if key_node.comment and len(key_node.comment) > 4 and key_node.comment[4]: + if last_value is None: + key_node.comment[0] = key_node.comment.pop(4) + maptyp._yaml_add_comment(key_node.comment, value=last_key) + else: + key_node.comment[2] = key_node.comment.pop(4) + maptyp._yaml_add_comment(key_node.comment, key=key) + key_node.comment = None + if key_node.comment: maptyp._yaml_add_comment(key_node.comment, key=key) - key_node.comment = None - if key_node.comment: - maptyp._yaml_add_comment(key_node.comment, key=key) - if value_node.comment: - maptyp._yaml_add_comment(value_node.comment, value=key) + if value_node.comment: + maptyp._yaml_add_comment(value_node.comment, value=key) + else: + # NEWCMNT + if key_node.comment: + nprintf('nc5a', key, key_node.comment) + if key_node.comment[0]: + maptyp.ca.set(key, C_KEY_PRE, key_node.comment[0]) + if key_node.comment[1]: + maptyp.ca.set(key, C_KEY_EOL, key_node.comment[1]) + if key_node.comment[2]: + maptyp.ca.set(key, C_KEY_POST, key_node.comment[2]) + if value_node.comment: + nprintf('nc5b', key, value_node.comment) + if value_node.comment[0]: + maptyp.ca.set(key, C_VALUE_PRE, value_node.comment[0]) + if value_node.comment[1]: + maptyp.ca.set(key, C_VALUE_EOL, value_node.comment[1]) + if value_node.comment[2]: + maptyp.ca.set(key, C_VALUE_POST, value_node.comment[2]) maptyp._yaml_set_kv_line_col( key, [ @@ -1483,10 +1556,15 @@ class RoundTripConstructor(SafeConstructor): _F('expected a mapping node, but found {node_id!s}', node_id=node.id), node.start_mark, ) - if node.comment: - typ._yaml_add_comment(node.comment[:2]) - if len(node.comment) > 2: - typ.yaml_end_comment_extend(node.comment[2], clear=True) + if self.loader and self.loader.comment_handling is None: + if node.comment: + typ._yaml_add_comment(node.comment[:2]) + if len(node.comment) > 2: + typ.yaml_end_comment_extend(node.comment[2], clear=True) + else: + # NEWCMNT + if node.comment: + nprintf('nc6', node.comment) if node.anchor: from ruamel.yaml.serializer import templated_id @@ -1509,10 +1587,17 @@ class RoundTripConstructor(SafeConstructor): # construct but should be null value = self.construct_object(value_node, deep=deep) # NOQA self.check_set_key(node, key_node, typ, key) - if key_node.comment: - typ._yaml_add_comment(key_node.comment, key=key) - if value_node.comment: - typ._yaml_add_comment(value_node.comment, value=key) + if self.loader and self.loader.comment_handling is None: + if key_node.comment: + typ._yaml_add_comment(key_node.comment, key=key) + if value_node.comment: + typ._yaml_add_comment(value_node.comment, value=key) + else: + # NEWCMNT + if key_node.comment: + nprintf('nc7a', key_node.comment) + if value_node.comment: + nprintf('nc7b', value_node.comment) typ.add(key) def construct_yaml_seq(self, node): @@ -1563,10 +1648,15 @@ class RoundTripConstructor(SafeConstructor): elif node.flow_style is False: omap.fa.set_block_style() yield omap - if node.comment: - omap._yaml_add_comment(node.comment[:2]) - if len(node.comment) > 2: - omap.yaml_end_comment_extend(node.comment[2], clear=True) + if self.loader and self.loader.comment_handling is None: + if node.comment: + omap._yaml_add_comment(node.comment[:2]) + if len(node.comment) > 2: + omap.yaml_end_comment_extend(node.comment[2], clear=True) + else: + # NEWCMNT + if node.comment: + nprintf('nc8', node.comment) if not isinstance(node, SequenceNode): raise ConstructorError( 'while constructing an ordered map', @@ -1599,12 +1689,21 @@ class RoundTripConstructor(SafeConstructor): key = self.construct_object(key_node) assert key not in omap value = self.construct_object(value_node) - if key_node.comment: - omap._yaml_add_comment(key_node.comment, key=key) - if subnode.comment: - omap._yaml_add_comment(subnode.comment, key=key) - if value_node.comment: - omap._yaml_add_comment(value_node.comment, value=key) + if self.loader and self.loader.comment_handling is None: + if key_node.comment: + omap._yaml_add_comment(key_node.comment, key=key) + if subnode.comment: + omap._yaml_add_comment(subnode.comment, key=key) + if value_node.comment: + omap._yaml_add_comment(value_node.comment, value=key) + else: + # NEWCMNT + if key_node.comment: + nprintf('nc9a', key_node.comment) + if subnode.comment: + nprintf('nc9b', subnode.comment) + if value_node.comment: + nprintf('nc9c', value_node.comment) omap[key] = value def construct_yaml_set(self, node): diff --git a/events.py b/events.py index ef63dad..e0c7f68 100644 --- a/events.py +++ b/events.py @@ -7,6 +7,8 @@ from ruamel.yaml.compat import _F if False: # MYPY from typing import Any, Dict, Optional, List # NOQA +SHOW_LINES = False + def CommentCheck(): # type: () -> None @@ -37,6 +39,9 @@ class Event: arguments.append(_F('{key!s}={v!r}', key=key, v=v)) if self.comment not in [None, CommentCheck]: arguments.append('comment={!r}'.format(self.comment)) + if SHOW_LINES: + arguments.append('({}:{}/{}:{})'.format(self.start_mark.line, self.start_mark.column, + self.end_mark.line, self.end_mark.column)) arguments = ', '.join(arguments) else: attributes = [ diff --git a/main.py b/main.py index 7d2f177..e19f28c 100644 --- a/main.py +++ b/main.py @@ -31,6 +31,7 @@ from ruamel.yaml.constructor import ( RoundTripConstructor, ) from ruamel.yaml.loader import Loader as UnsafeLoader +from ruamel.yaml.comments import CommentedMap, CommentedSeq, C_PRE if False: # MYPY from typing import List, Set, Dict, Union, Any, Callable, Optional, Text # NOQA @@ -81,6 +82,7 @@ class YAML: self.Scanner = None # type: Any self.Serializer = None # type: Any self.default_flow_style = None # type: Any + self.comment_handling = None typ_found = 1 setup_rt = False if 'rt' in self.typ: @@ -107,6 +109,18 @@ class YAML: self.Parser = ruamel.yaml.parser.Parser if pure or CParser is None else CParser self.Composer = ruamel.yaml.composer.Composer self.Constructor = ruamel.yaml.constructor.Constructor + elif 'rtsc' in self.typ: + self.default_flow_style = False + # no optimized rt-dumper yet + self.Emitter = ruamel.yaml.emitter.Emitter + self.Serializer = ruamel.yaml.serializer.Serializer + self.Representer = ruamel.yaml.representer.RoundTripRepresenter + self.Scanner = ruamel.yaml.scanner.RoundTripScannerSC + # no optimized rt-parser yet + self.Parser = ruamel.yaml.parser.RoundTripParserSC + self.Composer = ruamel.yaml.composer.Composer + self.Constructor = ruamel.yaml.constructor.RoundTripConstructor + self.comment_handling = C_PRE else: setup_rt = True typ_found = 0 @@ -150,7 +164,6 @@ class YAML: self.scalar_after_indicator = None # [a, b: 1, c: {d: 2}] vs. [a, {b: 1}, {c: {d: 2}}] self.brace_single_entry_mapping_in_flow_sequence = False - self.comment_handling = None for module in self.plug_ins: if getattr(module, 'typ', None) in self.typ: typ_found += 1 @@ -711,8 +724,6 @@ class YAML: def map(self, **kw): # type: (Any) -> Any if 'rt' in self.typ: - from ruamel.yaml.comments import CommentedMap - return CommentedMap(**kw) else: return dict(**kw) @@ -720,8 +731,6 @@ class YAML: def seq(self, *args): # type: (Any) -> Any if 'rt' in self.typ: - from ruamel.yaml.comments import CommentedSeq - return CommentedSeq(*args) else: return list(*args) diff --git a/parser.py b/parser.py index 279fc20..8e2f54e 100644 --- a/parser.py +++ b/parser.py @@ -44,7 +44,7 @@ # # FIRST sets: # -# stream: { STREAM-START } +# stream: { STREAM-START <} # explicit_document: { DIRECTIVE DOCUMENT-START } # implicit_document: FIRST(block_node) # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START @@ -78,6 +78,8 @@ from ruamel.yaml.error import MarkedYAMLError from ruamel.yaml.tokens import * # NOQA from ruamel.yaml.events import * # NOQA from ruamel.yaml.scanner import Scanner, RoundTripScanner, ScannerError # NOQA +from ruamel.yaml.scanner import BlankLineComment +from ruamel.yaml.comments import C_PRE, C_POST, C_SPLIT_ON_FIRST_BLANK from ruamel.yaml.compat import _F, nprint, nprintf # NOQA if False: # MYPY @@ -86,6 +88,10 @@ if False: # MYPY __all__ = ['Parser', 'RoundTripParser', 'ParserError'] +def xprintf(*args, **kw): + return nprintf(*args, **kw) + pass + class ParserError(MarkedYAMLError): pass @@ -106,7 +112,7 @@ class Parser: def reset_parser(self): # type: () -> None # Reset the state attributes (to clear self-references) - self.current_event = None + self.current_event = self.last_event = None self.tag_handles = {} # type: Dict[Any, Any] self.states = [] # type: List[Any] self.marks = [] # type: List[Any] @@ -158,7 +164,10 @@ class Parser: if self.current_event is None: if self.state: self.current_event = self.state() - value = self.current_event + # assert self.current_event is not None + # if self.current_event.end_mark.line != self.peek_event().start_mark.line: + xprintf('get_event', repr(self.current_event), self.peek_event().start_mark.line) + self.last_event = value = self.current_event self.current_event = None return value @@ -204,8 +213,6 @@ class Parser: self.scanner.get_token() # Parse an explicit document. if not self.scanner.check_token(StreamEndToken): - token = self.scanner.peek_token() - start_mark = token.start_mark version, tags = self.process_directives() if not self.scanner.check_token(DocumentStartToken): raise ParserError( @@ -218,6 +225,7 @@ class Parser: self.scanner.peek_token().start_mark, ) token = self.scanner.get_token() + start_mark = token.start_mark end_mark = token.end_mark # if self.loader is not None and \ # end_mark.line != self.scanner.peek_token().start_mark.line: @@ -401,9 +409,13 @@ class Parser: if indentless_sequence and self.scanner.check_token(BlockEntryToken): comment = None pt = self.scanner.peek_token() - if pt.comment and pt.comment[0]: - comment = [pt.comment[0], []] - pt.comment[0] = None + if self.loader and self.loader.comment_handling is None: + if pt.comment and pt.comment[0]: + comment = [pt.comment[0], []] + pt.comment[0] = None + elif self.loader: + if pt.comment: + comment = pt.comment end_mark = self.scanner.peek_token().end_mark event = SequenceStartEvent( anchor, tag, implicit, start_mark, end_mark, flow_style=False, comment=comment @@ -556,7 +568,14 @@ class Parser: self.state = self.parse_indentless_sequence_entry return self.process_empty_scalar(token.end_mark) token = self.scanner.peek_token() - event = SequenceEndEvent(token.start_mark, token.start_mark, comment=token.comment) + c = None + if self.loader and self.loader.comment_handling is None: + c = token.comment + start_mark = token.start_mark + else: + start_mark = self.last_event.end_mark + c = self.distribute_comment(token.comment, start_mark.line) + event = SequenceEndEvent(start_mark, start_mark, comment=c) self.state = self.states.pop() return event @@ -783,10 +802,8 @@ class Parser: return ScalarEvent(None, None, (True, False), "", mark, mark, comment=comment) def move_token_comment(self, token, nt=None, empty=False): - if getattr(self.loader, 'comment_handling', None) is None: # pre 0.18 - token.move_old_comment(self.scanner.peek_token() if nt is None else nt, empty=empty) - else: - token.move_new_comment(self.scanner.peek_token() if nt is None else nt, empty=empty) + pass + class RoundTripParser(Parser): """roundtrip is a safe loader, that wants to see the unmangled tag""" @@ -810,3 +827,52 @@ class RoundTripParser(Parser): ): return Parser.transform_tag(self, handle, suffix) return handle + suffix + + def move_token_comment(self, token, nt=None, empty=False): + token.move_old_comment(self.scanner.peek_token() if nt is None else nt, empty=empty) + + +class RoundTripParserSC(RoundTripParser): + """roundtrip is a safe loader, that wants to see the unmangled tag""" + + # some of the differences are based on the superclass testing if self.loader.comment_handling is not None + + def move_token_comment(self, token, nt=None, empty=False): + token.move_new_comment(self.scanner.peek_token() if nt is None else nt, empty=empty) + + def distribute_comment(self, comment, line): + # ToDo, look at indentation of the comment to determine attachment + if comment is None: + return None + if not comment[0]: + return None + if comment[0][0] != line + 1: + nprintf('>>>dcxxx', comment, line, typ) + assert comment[0][0] == line + 1 + #if comment[0] - line > 1: + # return + typ = self.loader.comment_handling & 0b11 + # nprintf('>>>dca', comment, line, typ) + if typ == C_POST: + return None + if typ == C_PRE: + c = [None, None, comment[0]] + comment[0] = None + return c + # nprintf('>>>dcb', comment[0]) + for idx, cmntidx in enumerate(comment[0]): + # nprintf('>>>dcb', cmntidx) + if isinstance(self.scanner.comments[cmntidx], BlankLineComment): + break + else: + return None # no space found + if idx == 0: + return None # first line was blank + # nprintf('>>>dcc', idx) + if typ == C_SPLIT_ON_FIRST_BLANK: + c = [None, None, comment[0][:idx]] + comment[0] = comment[0][idx:] + return c + raise NotImplementedError # reserved + + diff --git a/scanner.py b/scanner.py index f98da00..f9e6052 100644 --- a/scanner.py +++ b/scanner.py @@ -44,6 +44,10 @@ _THE_END = '\n\0\r\x85\u2028\u2029' _THE_END_SPACE_TAB = ' \n\0\t\r\x85\u2028\u2029' _SPACE_TAB = ' \t' +def xprintf(*args, **kw): + return nprintf(*args, **kw) + pass + class ScannerError(MarkedYAMLError): pass @@ -167,7 +171,7 @@ class Scanner: # Check if the next token is one of the given types. while self.need_more_tokens(): self.fetch_more_tokens() - if bool(self.tokens): + if len(self.tokens) > 0: if not choices: return True for choice in choices: @@ -180,7 +184,7 @@ class Scanner: # Return the next token, but do not delete if from the queue. while self.need_more_tokens(): self.fetch_more_tokens() - if bool(self.tokens): + if len(self.tokens) > 0: return self.tokens[0] def get_token(self): @@ -188,7 +192,7 @@ class Scanner: # Return the next token. while self.need_more_tokens(): self.fetch_more_tokens() - if bool(self.tokens): + if len(self.tokens) > 0: self.tokens_taken += 1 return self.tokens.pop(0) @@ -198,7 +202,7 @@ class Scanner: # type: () -> bool if self.done: return False - if not self.tokens: + if len(self.tokens) == 0: return True # The current token may be a potential simple key, so we # need to look further. @@ -1231,21 +1235,33 @@ class Scanner: # We are done. token = ScalarToken("".join(chunks), False, start_mark, end_mark, style) - if block_scalar_comment is not None: - token.add_pre_comments([block_scalar_comment]) + if self.loader is not None: + comment_handler = getattr(self.loader, 'comment_handling', False) + if comment_handler is None: + if block_scalar_comment is not None: + token.add_pre_comments([block_scalar_comment]) if len(trailing) > 0: - # nprint('trailing 1', trailing) # XXXXX # Eat whitespaces and comments until we reach the next token. + if self.loader is not None: + comment_handler = getattr(self.loader, 'comment_handling', None) + if comment_handler is not None: + line = end_mark.line - len(trailing) + for x in trailing: + assert x[-1] == '\n' + self.comments.add_blank_line(x, 0, line) + line += 1 comment = self.scan_to_next_token() while comment: trailing.append(' ' * comment[1].column + comment[0]) comment = self.scan_to_next_token() - - # Keep track of the trailing whitespace and following comments - # as a comment token, if isn't all included in the actual value. - comment_end_mark = self.reader.get_mark() - comment = CommentToken("".join(trailing), end_mark, comment_end_mark) - token.add_post_comment(comment) + if self.loader is not None: + comment_handler = getattr(self.loader, 'comment_handling', False) + if comment_handler is None: + # Keep track of the trailing whitespace and following comments + # as a comment token, if isn't all included in the actual value. + comment_end_mark = self.reader.get_mark() + comment = CommentToken("".join(trailing), end_mark, comment_end_mark) + token.add_post_comment(comment) return token def scan_block_scalar_indicators(self, start_mark): @@ -1590,10 +1606,21 @@ class Scanner: break token = ScalarToken("".join(chunks), True, start_mark, end_mark) - if spaces and spaces[0] == '\n': - # Create a comment token to preserve the trailing line breaks. - comment = CommentToken("".join(spaces) + '\n', start_mark, end_mark) - token.add_post_comment(comment) + # getattr provides True so C type loader, which cannot handle comment, will not make CommentToken + if self.loader is not None: + comment_handler = getattr(self.loader, 'comment_handling', False) + if comment_handler is None: + if spaces and spaces[0] == '\n': + # Create a comment token to preserve the trailing line breaks. + comment = CommentToken("".join(spaces) + '\n', start_mark, end_mark) + token.add_post_comment(comment) + elif comment_handler is not False: + line = start_mark.line + 1 + for ch in spaces: + if ch == '\n': + self.comments.add_blank_line('\n', 0, line) + line += 1 + return token def scan_plain_spaces(self, indent, start_mark): @@ -1764,7 +1791,7 @@ class RoundTripScanner(Scanner): while self.need_more_tokens(): self.fetch_more_tokens() self._gather_comments() - if bool(self.tokens): + if len(self.tokens) > 0: if not choices: return True for choice in choices: @@ -1778,13 +1805,13 @@ class RoundTripScanner(Scanner): while self.need_more_tokens(): self.fetch_more_tokens() self._gather_comments() - if bool(self.tokens): + if len(self.tokens) > 0: return self.tokens[0] return None def _gather_comments(self): # type: () -> Any - """combine multiple comment lines""" + """combine multiple comment lines and assign to next non-comment-token""" comments = [] # type: List[Any] if not self.tokens: return comments @@ -1813,7 +1840,7 @@ class RoundTripScanner(Scanner): while self.need_more_tokens(): self.fetch_more_tokens() self._gather_comments() - if bool(self.tokens): + if len(self.tokens) > 0: # nprint('tk', self.tokens) # only add post comment to single line tokens: # scalar, value token. FlowXEndToken, otherwise @@ -1925,7 +1952,7 @@ class RoundTripScanner(Scanner): if not self.flow_level: self.allow_simple_key = True return comment, start_mark, end_mark - if bool(self.scan_line_break()): + if self.scan_line_break() != '': start_mark = self.reader.get_mark() if not self.flow_level: self.allow_simple_key = True @@ -1973,3 +2000,377 @@ class RoundTripScanner(Scanner): def scan_block_scalar(self, style, rt=True): # type: (Any, Optional[bool]) -> Any return Scanner.scan_block_scalar(self, style, rt=rt) + + +# commenthandling 2021, differentiatiation not needed + +VALUECMNT = 0 +KEYCMNT = 0 # 1 +#TAGCMNT = 2 +#ANCHORCMNT = 3 + + +class CommentBase: + __slots__ = ('value', 'line', 'column', 'used', 'function', 'fline', 'ufun', 'uline') + def __init__(self, value, line, column): + self.value = value + self.line = line + self.column = column + self.used = ' ' + info = inspect.getframeinfo(inspect.stack()[3][0]) + self.function = info.function + self.fline = info.lineno + self.ufun = None + self.uline = None + + def set_used(self, v='+'): + self.used = v + info = inspect.getframeinfo(inspect.stack()[1][0]) + self.ufun = info.function + self.uline = info.lineno + + def set_assigned(self): + self.used = '|' + + def __str__(self): + return _F('{value}', value=self.value) + + def __repr__(self): + return _F('{value!r}', value=self.value) + + def info(self): + return _F('{name}{used} {line:2}:{column:<2} "{value:40s} {function}:{fline} {ufun}:{uline}', + name=self.name, line=self.line, column=self.column, value=self.value + '"', used=self.used, + function=self.function, fline=self.fline, ufun=self.ufun, uline=self.uline) + + +class EOLComment(CommentBase): + name = 'EOLC' + + def __init__(self, value, line, column): + super().__init__(value, line, column) + + +class FullLineComment(CommentBase): + name = 'FULL' + + def __init__(self, value, line, column): + super().__init__(value, line, column) + + +class BlankLineComment(CommentBase): + name = 'BLNK' + + def __init__(self, value, line, column): + super().__init__(value, line, column) + + +class ScannedComments: + def __init__(self): + self.comments = {} + self.unused = [] + + def add_eol_comment(self, comment, column, line): + info = inspect.getframeinfo(inspect.stack()[1][0]) + if comment.count('\n') == 1: + assert comment[-1] == '\n' + else: + assert '\n' not in comment + self.comments[line] = retval = EOLComment(comment[:-1], line, column) + self.unused.append(line) + return retval + + def add_blank_line(self, comment, column, line): + info = inspect.getframeinfo(inspect.stack()[1][0]) + assert comment.count('\n') == 1 and comment[-1] == '\n' + assert line not in self.comments + self.comments[line] = retval = BlankLineComment(comment[:-1], line, column) + self.unused.append(line) + return retval + + def add_full_line_comment(self, comment, column, line): + info = inspect.getframeinfo(inspect.stack()[1][0]) + assert comment.count('\n') == 1 and comment[-1] == '\n' + #if comment.startswith('# C12'): + # raise + # this raises in line 2127 fro 330 + self.comments[line] = retval = FullLineComment(comment[:-1], line, column) + self.unused.append(line) + return retval + + def __getitem__(self, idx): + return self.comments[idx] + + def __str__(self): + return 'ParsedComments:\n ' + \ + '\n '.join((_F('{lineno:2} {x}', lineno=lineno, x=x.info()) for lineno, x in self.comments.items())) + '\n' + + def last(self): + lineno, x = list(self.comments.items())[-1] + return _F('{lineno:2} {x}\n', lineno=lineno, x=x.info()) + + def any_unprocessed(self): + # ToDo: might want to differentiate based on lineno + return len(self.unused) > 0 + #for lno, comment in reversed(self.comments.items()): + # if comment.used == ' ': + # return True + #return False + + def unprocessed(self, use=False): + while len(self.unused) > 0: + first = self.unused.pop(0) if use else self.unused[0] + info = inspect.getframeinfo(inspect.stack()[1][0]) + xprintf('using', first, self.comments[first].value, info.function, info.lineno) + yield first, self.comments[first] + if use: + self.comments[first].set_used() + + def assign_pre(self, token): + token_line = token.start_mark.line + info = inspect.getframeinfo(inspect.stack()[1][0]) + xprintf('assign_pre', token_line, self.unused, info.function, info.lineno) + gobbled = False + while self.unused and self.unused[0] < token_line: + gobled = True + first = self.unused.pop(0) + xprintf('assign_pre < ', first) + self.comments[first].set_used() + token.add_comment_pre(first) + return gobbled + + def assign_eol(self, tokens): + try: + comment_line = self.unused[0] + except IndexError: + return + if not isinstance(self.comments[comment_line], EOLComment): + return + idx = 1 + while tokens[-idx].start_mark.line > comment_line or isinstance(tokens[-idx], ValueToken): + idx += 1 + xprintf('idx1', idx) + if len(tokens) > idx and isinstance(tokens[-idx], ScalarToken) and isinstance(tokens[-(idx+1)], ScalarToken): + return + try: + if isinstance(tokens[-idx], ScalarToken) and isinstance(tokens[-(idx+1)], KeyToken): + try: + eol_idx = self.unused.pop(0) + self.comments[eol_idx].set_used() + xprintf('>>>>>a', idx, eol_idx, KEYCMNT) + tokens[-idx].add_comment_eol(eol_idx, KEYCMNT) + except IndexError: + raise NotImplementedError + return + except IndexError: + xprintf('IndexError1') + pass + try: + if isinstance(tokens[-idx], ScalarToken) and isinstance(tokens[-(idx+1)], (ValueToken, BlockEntryToken)): + try: + eol_idx = self.unused.pop(0) + self.comments[eol_idx].set_used() + tokens[-idx].add_comment_eol(eol_idx, VALUECMNT) + except IndexError: + raise NotImplementedError + return + except IndexError: + xprintf('IndexError2') + pass + for t in tokens: + xprintf('tt-', t) + xprintf('not implemented EOL', type(tokens[-idx])) + import sys; sys.exit(0) + + def assign_post(self, token): + token_line = token.start_mark.line + info = inspect.getframeinfo(inspect.stack()[1][0]) + xprintf('assign_post', token_line, self.unused, info.function, info.lineno) + gobbled = False + while self.unused and self.unused[0] < token_line: + gobled = True + first = self.unused.pop(0) + xprintf('assign_post < ', first) + self.comments[first].set_used() + token.add_comment_post(first) + return gobbled + + def str_unprocessed(self): + return ''.join((_F(' {ind:2} {x}\n', ind=ind, x=x.info()) for ind, x in self.comments.items() if x.used == ' ')) + + +class RoundTripScannerSC(Scanner): # RoundTripScanner Split Comments + def __init__(self, *arg, **kw): + super().__init__(*arg, **kw) + assert self.loader is not None + # comments isinitialised on .need_more_tokens and persist on self.loader.parsed_comments + # + self.comments = None + + def get_token(self): + # type: () -> Any + # Return the next token. + while self.need_more_tokens(): + self.fetch_more_tokens() + if len(self.tokens) > 0: + if isinstance(self.tokens[0], BlockEndToken): + self.comments.assign_post(self.tokens[0]) + else: + self.comments.assign_pre(self.tokens[0]) + self.tokens_taken += 1 + return self.tokens.pop(0) + + def need_more_tokens(self): + if self.comments is None: + self.loader.parsed_comments = self.comments = ScannedComments() + if self.done: + return False + if len(self.tokens) == 0: + return True + # The current token may be a potential simple key, so we + # need to look further. + self.stale_possible_simple_keys() + if self.next_possible_simple_key() == self.tokens_taken: + return True + if len(self.tokens) < 2: + return True + if self.tokens[0].start_mark.line == self.tokens[-1].start_mark.line: + return True + if True: + xprintf('-x--', len(self.tokens)) + for t in self.tokens: + xprintf(t) + #xprintf(self.comments.last()) + xprintf(self.comments.str_unprocessed()) + self.comments.assign_pre(self.tokens[0]) + self.comments.assign_eol(self.tokens) + return False + + def scan_to_next_token(self): + srp = self.reader.peek + srf = self.reader.forward + if self.reader.index == 0 and srp() == '\uFEFF': + srf() + start_mark = self.reader.get_mark() + # xprintf('current_mark', start_mark.line, start_mark.column) + found = False + idx = 0 + while not found: + while srp() == ' ': + srf() + ch = srp() + if ch == '#': + comment_start_mark = self.reader.get_mark() + comment = ch + srf() # skipt the '#' + while ch not in _THE_END: + ch = srp() + if ch == '\0': # don't gobble the end-of-stream character + # but add an explicit newline as "YAML processors should terminate + # the stream with an explicit line break + # https://yaml.org/spec/1.2/spec.html#id2780069 + comment += '\n' + break + comment += ch + srf() + # we have a comment + if start_mark.column == 0: + self.comments.add_full_line_comment(comment, comment_start_mark.column, comment_start_mark.line) + else: + self.comments.add_eol_comment(comment, comment_start_mark.column, comment_start_mark.line) + comment = "" + # gather any blank lines or full line comments following the comment as well + self.scan_empty_or_full_line_comments() + if not self.flow_level: + self.allow_simple_key = True + return + if bool(self.scan_line_break()): + # start_mark = self.reader.get_mark() + if not self.flow_level: + self.allow_simple_key = True + self.scan_empty_or_full_line_comments() + return None + ch = srp() + if ch == '\n': # empty toplevel lines + start_mark = self.reader.get_mark() + comment = "" + while ch: + ch = self.scan_line_break(empty_line=True) + comment += ch + if srp() == '#': + # empty line followed by indented real comment + comment = comment.rsplit('\n', 1)[0] + '\n' + end_mark = self.reader.get_mark() + return None + else: + found = True + return None + + def scan_empty_or_full_line_comments(self): + blmark = self.reader.get_mark() + assert blmark.column == 0 + blanks = "" + comment = None + mark = None + ch = self.reader.peek() + while True: + # nprint('ch', repr(ch), self.reader.get_mark().column) + if ch in '\r\n\x85\u2028\u2029': + if self.reader.prefix(2) == '\r\n': + self.reader.forward(2) + else: + self.reader.forward() + if comment is not None: + comment += '\n' + self.comments.add_full_line_comment(comment, mark.column, mark.line) + comment = None + else: + blanks += '\n' + self.comments.add_blank_line(blanks, blmark.column, blmark.line) + blanks = "" + blmark = self.reader.get_mark() + ch = self.reader.peek() + continue + if comment is None: + if ch in ' \t': + blanks += ch + elif ch == '#': + mark = self.reader.get_mark() + comment = '#' + else: + # print('breaking on', repr(ch)) + break + else: + comment += ch + self.reader.forward() + ch = self.reader.peek() + + def scan_block_scalar_ignored_line(self, start_mark): + # type: (Any) -> Any + # See the specification for details. + srp = self.reader.peek + srf = self.reader.forward + prefix = '' + comment = None + while srp() == ' ': + prefix += srp() + srf() + if srp() == '#': + comment = '' + mark = self.reader.get_mark() + while srp() not in _THE_END: + comment += srp() + srf() + comment += '\n' + ch = srp() + if ch not in _THE_END: + raise ScannerError( + 'while scanning a block scalar', + start_mark, + _F('expected a comment or a line break, but found {ch!r}', ch=ch), + self.reader.get_mark(), + ) + if comment is not None: + self.comments.add_eol_comment(comment, mark.column, mark.line) + self.scan_line_break() + return None diff --git a/tokens.py b/tokens.py index 490866b..01cae1f 100644 --- a/tokens.py +++ b/tokens.py @@ -1,6 +1,6 @@ # coding: utf-8 -from ruamel.yaml.compat import _F +from ruamel.yaml.compat import _F, nprintf if False: # MYPY from typing import Text, Any, Dict, Optional, List # NOQA @@ -89,13 +89,17 @@ class Token: self._comment[0] = [] self._comment[0].append(comment) - def add_comment_eol(self, comment): + def add_comment_eol(self, comment, comment_type): if not hasattr(self, '_comment'): self._comment = [None, None, None] else: assert len(self._comment) == 3 assert self._comment[1] is None - self._comment[1] = comment + if self.comment[1] is None: + self._comment[1] = [] + self._comment[1].extend([None] * (comment_type + 1 - len(self.comment[1]))) + # nprintf('commy', self.comment, comment_type) + self._comment[1][comment_type] = comment def add_comment_post(self, comment): if not hasattr(self, '_comment'): @@ -184,11 +188,9 @@ class Token: target._comment = c # nprint('mco2:', self, target, target.comment, empty) return self - return - raise NotImplemtedError # if self and target have both pre, eol or post comments, something seems wrong for idx in range(3): - if c[idx] and tc[idx]: + if c[idx] is not None and tc[idx] is not None: raise NotImplementedError(_F('overlap in comment {c!r} {tc!r}', c=c, tc=tc)) # move the comment parts for idx in range(3): -- cgit v1.2.1