diff options
author | Anthon van der Neut <anthon@mnt.org> | 2018-08-03 22:14:57 +0200 |
---|---|---|
committer | Anthon van der Neut <anthon@mnt.org> | 2018-08-03 22:14:57 +0200 |
commit | dce10fcff1de54121fb8b440b883ef5d3fe2f96a (patch) | |
tree | 072b4bd247e6f1cd95c08c7b67fea0fc96f0578e /scanner.py | |
parent | 2966a4f215861fa05e0dc7e0cd53350766e794c6 (diff) | |
download | ruamel.yaml-dce10fcff1de54121fb8b440b883ef5d3fe2f96a.tar.gz |
Apply oitnb and mypy 0.620, then make everything work again0.15.48
Diffstat (limited to 'scanner.py')
-rw-r--r-- | scanner.py | 692 |
1 files changed, 390 insertions, 302 deletions
@@ -31,7 +31,7 @@ from __future__ import print_function, absolute_import, division, unicode_litera # from ruamel.yaml.error import MarkedYAMLError -from ruamel.yaml.tokens import * # NOQA +from ruamel.yaml.tokens import * # NOQA from ruamel.yaml.compat import utf8, unichr, PY3, check_anchorname_char, nprint # NOQA if False: # MYPY @@ -41,9 +41,9 @@ if False: # MYPY __all__ = ['Scanner', 'RoundTripScanner', 'ScannerError'] -_THE_END = u'\0\r\n\x85\u2028\u2029' -_THE_END_SPACE_TAB = u'\0 \t\r\n\x85\u2028\u2029' -_SPACE_TAB = u' \t' +_THE_END = '\0\r\n\x85\u2028\u2029' +_THE_END_SPACE_TAB = '\0 \t\r\n\x85\u2028\u2029' +_SPACE_TAB = ' \t' class ScannerError(MarkedYAMLError): @@ -64,7 +64,6 @@ class SimpleKey(object): class Scanner(object): - def __init__(self, loader=None): # type: (Any) -> None """Initialize the scanner.""" @@ -141,8 +140,8 @@ class Scanner(object): def reader(self): # type: () -> Any if hasattr(self.loader, 'typ'): - self.loader.reader # type: ignore - return self.loader._reader # type: ignore + self.loader.reader + return self.loader._reader @property def scanner_processing_version(self): # prefix until un-composited @@ -219,19 +218,19 @@ class Scanner(object): ch = self.reader.peek() # Is it the end of stream? - if ch == u'\0': + if ch == '\0': return self.fetch_stream_end() # Is it a directive? - if ch == u'%' and self.check_directive(): + if ch == '%' and self.check_directive(): return self.fetch_directive() # Is it the document start? - if ch == u'-' and self.check_document_start(): + if ch == '-' and self.check_document_start(): return self.fetch_document_start() # Is it the document end? - if ch == u'.' and self.check_document_end(): + if ch == '.' and self.check_document_end(): return self.fetch_document_end() # TODO: support for BOM within a stream. @@ -241,63 +240,63 @@ class Scanner(object): # Note: the order of the following checks is NOT significant. # Is it the flow sequence start indicator? - if ch == u'[': + if ch == '[': return self.fetch_flow_sequence_start() # Is it the flow mapping start indicator? - if ch == u'{': + if ch == '{': return self.fetch_flow_mapping_start() # Is it the flow sequence end indicator? - if ch == u']': + if ch == ']': return self.fetch_flow_sequence_end() # Is it the flow mapping end indicator? - if ch == u'}': + if ch == '}': return self.fetch_flow_mapping_end() # Is it the flow entry indicator? - if ch == u',': + if ch == ',': return self.fetch_flow_entry() # Is it the block entry indicator? - if ch == u'-' and self.check_block_entry(): + if ch == '-' and self.check_block_entry(): return self.fetch_block_entry() # Is it the key indicator? - if ch == u'?' and self.check_key(): + if ch == '?' and self.check_key(): return self.fetch_key() # Is it the value indicator? - if ch == u':' and self.check_value(): + if ch == ':' and self.check_value(): return self.fetch_value() # Is it an alias? - if ch == u'*': + if ch == '*': return self.fetch_alias() # Is it an anchor? - if ch == u'&': + if ch == '&': return self.fetch_anchor() # Is it a tag? - if ch == u'!': + if ch == '!': return self.fetch_tag() # Is it a literal scalar? - if ch == u'|' and not self.flow_level: + if ch == '|' and not self.flow_level: return self.fetch_literal() # Is it a folded scalar? - if ch == u'>' and not self.flow_level: + if ch == '>' and not self.flow_level: return self.fetch_folded() # Is it a single quoted scalar? - if ch == u'\'': + if ch == "'": return self.fetch_single() # Is it a double quoted scalar? - if ch == u'\"': + if ch == '"': return self.fetch_double() # It must be a plain scalar then. @@ -305,9 +304,12 @@ class Scanner(object): return self.fetch_plain() # No? It's an error. Let's produce a nice error message. - raise ScannerError("while scanning for the next token", None, - "found character %r that cannot start any token" - % utf8(ch), self.reader.get_mark()) + raise ScannerError( + 'while scanning for the next token', + None, + 'found character %r that cannot start any token' % utf8(ch), + self.reader.get_mark(), + ) # Simple keys treatment. @@ -337,12 +339,14 @@ class Scanner(object): # height (may cause problems if indentation is broken though). for level in list(self.possible_simple_keys): key = self.possible_simple_keys[level] - if key.line != self.reader.line \ - or self.reader.index - key.index > 1024: + if key.line != self.reader.line or self.reader.index - key.index > 1024: if key.required: raise ScannerError( - "while scanning a simple key", key.mark, - "could not find expected ':'", self.reader.get_mark()) + 'while scanning a simple key', + key.mark, + "could not find expected ':'", + self.reader.get_mark(), + ) del self.possible_simple_keys[level] def save_possible_simple_key(self): @@ -360,9 +364,13 @@ class Scanner(object): self.remove_possible_simple_key() token_number = self.tokens_taken + len(self.tokens) key = SimpleKey( - token_number, required, - self.reader.index, self.reader.line, self.reader.column, - self.reader.get_mark()) + token_number, + required, + self.reader.index, + self.reader.line, + self.reader.column, + self.reader.get_mark(), + ) self.possible_simple_keys[self.flow_level] = key def remove_possible_simple_key(self): @@ -373,8 +381,11 @@ class Scanner(object): if key.required: raise ScannerError( - "while scanning a simple key", key.mark, - "could not find expected ':'", self.reader.get_mark()) + 'while scanning a simple key', + key.mark, + "could not find expected ':'", + self.reader.get_mark(), + ) del self.possible_simple_keys[self.flow_level] @@ -423,8 +434,7 @@ class Scanner(object): # Read the token. mark = self.reader.get_mark() # Add STREAM-START. - self.tokens.append(StreamStartToken(mark, mark, - encoding=self.reader.encoding)) + self.tokens.append(StreamStartToken(mark, mark, encoding=self.reader.encoding)) def fetch_stream_end(self): # type: () -> None @@ -539,9 +549,9 @@ class Scanner(object): if not self.flow_level: # Are we allowed to start a new entry? if not self.allow_simple_key: - raise ScannerError(None, None, - "sequence entries are not allowed here", - self.reader.get_mark()) + raise ScannerError( + None, None, 'sequence entries are not allowed here', self.reader.get_mark() + ) # We may need to add BLOCK-SEQUENCE-START. if self.add_indent(self.reader.column): mark = self.reader.get_mark() @@ -568,9 +578,9 @@ class Scanner(object): # Are we allowed to start a key (not nessesary a simple)? if not self.allow_simple_key: - raise ScannerError(None, None, - "mapping keys are not allowed here", - self.reader.get_mark()) + raise ScannerError( + None, None, 'mapping keys are not allowed here', self.reader.get_mark() + ) # We may need to add BLOCK-MAPPING-START. if self.add_indent(self.reader.column): @@ -596,8 +606,9 @@ class Scanner(object): # Add KEY. key = self.possible_simple_keys[self.flow_level] del self.possible_simple_keys[self.flow_level] - self.tokens.insert(key.token_number - self.tokens_taken, - KeyToken(key.mark, key.mark)) + self.tokens.insert( + key.token_number - self.tokens_taken, KeyToken(key.mark, key.mark) + ) # If this key starts a new block mapping, we need to add # BLOCK-MAPPING-START. @@ -605,7 +616,8 @@ class Scanner(object): if self.add_indent(key.column): self.tokens.insert( key.token_number - self.tokens_taken, - BlockMappingStartToken(key.mark, key.mark)) + BlockMappingStartToken(key.mark, key.mark), + ) # There cannot be two simple keys one after another. self.allow_simple_key = False @@ -621,9 +633,12 @@ class Scanner(object): # We are allowed to start a complex value if and only if # we can start a simple key. if not self.allow_simple_key: - raise ScannerError(None, None, - "mapping values are not allowed here", - self.reader.get_mark()) + raise ScannerError( + None, + None, + 'mapping values are not allowed here', + self.reader.get_mark(), + ) # If this value starts a new block mapping, we need to add # BLOCK-MAPPING-START. It will be detected as an error later by @@ -691,7 +706,7 @@ class Scanner(object): def fetch_single(self): # type: () -> None - self.fetch_flow_scalar(style='\'') + self.fetch_flow_scalar(style="'") def fetch_double(self): # type: () -> None @@ -731,8 +746,7 @@ class Scanner(object): # type: () -> Any # DOCUMENT-START: ^ '---' (' '|'\n') if self.reader.column == 0: - if self.reader.prefix(3) == u'---' \ - and self.reader.peek(3) in _THE_END_SPACE_TAB: + if self.reader.prefix(3) == '---' and self.reader.peek(3) in _THE_END_SPACE_TAB: return True return None @@ -740,8 +754,7 @@ class Scanner(object): # type: () -> Any # DOCUMENT-END: ^ '...' (' '|'\n') if self.reader.column == 0: - if self.reader.prefix(3) == u'...' \ - and self.reader.peek(3) in _THE_END_SPACE_TAB: + if self.reader.prefix(3) == '...' and self.reader.peek(3) in _THE_END_SPACE_TAB: return True return None @@ -786,11 +799,12 @@ class Scanner(object): # independent. ch = self.reader.peek() if self.scanner_processing_version == (1, 1): - return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' or \ - (self.reader.peek(1) not in _THE_END_SPACE_TAB and - (ch == u'-' or (not self.flow_level and ch in u'?:'))) + return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'"%@`' or ( + self.reader.peek(1) not in _THE_END_SPACE_TAB + and (ch == '-' or (not self.flow_level and ch in '?:')) + ) # YAML 1.2 - if ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`': + if ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'"%@`': # ################### ^ ??? return True ch1 = self.reader.peek(1) @@ -799,8 +813,9 @@ class Scanner(object): if ch == ':' and bool(self.flow_level) and ch1 not in _SPACE_TAB: return True - return (self.reader.peek(1) not in _THE_END_SPACE_TAB and - (ch == u'-' or (not self.flow_level and ch in u'?:'))) + return self.reader.peek(1) not in _THE_END_SPACE_TAB and ( + ch == '-' or (not self.flow_level and ch in '?:') + ) # Scanners. @@ -825,13 +840,13 @@ class Scanner(object): # `unwind_indent` before issuing BLOCK-END. # Scanners for block, flow, and plain scalars need to be modified. - if self.reader.index == 0 and self.reader.peek() == u'\uFEFF': + if self.reader.index == 0 and self.reader.peek() == '\uFEFF': self.reader.forward() found = False while not found: - while self.reader.peek() == u' ': + while self.reader.peek() == ' ': self.reader.forward() - if self.reader.peek() == u'#': + if self.reader.peek() == '#': while self.reader.peek() not in _THE_END: self.reader.forward() if self.scan_line_break(): @@ -848,10 +863,10 @@ class Scanner(object): self.reader.forward() name = self.scan_directive_name(start_mark) value = None - if name == u'YAML': + if name == 'YAML': value = self.scan_yaml_directive_value(start_mark) end_mark = self.reader.get_mark() - elif name == u'TAG': + elif name == 'TAG': value = self.scan_tag_directive_value(start_mark) end_mark = self.reader.get_mark() else: @@ -866,58 +881,65 @@ class Scanner(object): # See the specification for details. length = 0 ch = self.reader.peek(length) - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_:.': + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' or ch in '-_:.': length += 1 ch = self.reader.peek(length) if not length: raise ScannerError( - "while scanning a directive", start_mark, - "expected alphabetic or numeric character, but found %r" - % utf8(ch), self.reader.get_mark()) + 'while scanning a directive', + start_mark, + 'expected alphabetic or numeric character, but found %r' % utf8(ch), + self.reader.get_mark(), + ) value = self.reader.prefix(length) self.reader.forward(length) ch = self.reader.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': + if ch not in '\0 \r\n\x85\u2028\u2029': raise ScannerError( - "while scanning a directive", start_mark, - "expected alphabetic or numeric character, but found %r" - % utf8(ch), self.reader.get_mark()) + 'while scanning a directive', + start_mark, + 'expected alphabetic or numeric character, but found %r' % utf8(ch), + self.reader.get_mark(), + ) return value def scan_yaml_directive_value(self, start_mark): # type: (Any) -> Any # See the specification for details. - while self.reader.peek() == u' ': + while self.reader.peek() == ' ': self.reader.forward() major = self.scan_yaml_directive_number(start_mark) if self.reader.peek() != '.': raise ScannerError( - "while scanning a directive", start_mark, - "expected a digit or '.', but found %r" - % utf8(self.reader.peek()), - self.reader.get_mark()) + 'while scanning a directive', + start_mark, + "expected a digit or '.', but found %r" % utf8(self.reader.peek()), + self.reader.get_mark(), + ) self.reader.forward() minor = self.scan_yaml_directive_number(start_mark) - if self.reader.peek() not in u'\0 \r\n\x85\u2028\u2029': + if self.reader.peek() not in '\0 \r\n\x85\u2028\u2029': raise ScannerError( - "while scanning a directive", start_mark, - "expected a digit or ' ', but found %r" - % utf8(self.reader.peek()), - self.reader.get_mark()) + 'while scanning a directive', + start_mark, + "expected a digit or ' ', but found %r" % utf8(self.reader.peek()), + self.reader.get_mark(), + ) return (major, minor) def scan_yaml_directive_number(self, start_mark): # type: (Any) -> Any # See the specification for details. ch = self.reader.peek() - if not (u'0' <= ch <= u'9'): + if not ('0' <= ch <= '9'): raise ScannerError( - "while scanning a directive", start_mark, - "expected a digit, but found %r" % utf8(ch), - self.reader.get_mark()) + 'while scanning a directive', + start_mark, + 'expected a digit, but found %r' % utf8(ch), + self.reader.get_mark(), + ) length = 0 - while u'0' <= self.reader.peek(length) <= u'9': + while '0' <= self.reader.peek(length) <= '9': length += 1 value = int(self.reader.prefix(length)) self.reader.forward(length) @@ -926,10 +948,10 @@ class Scanner(object): def scan_tag_directive_value(self, start_mark): # type: (Any) -> Any # See the specification for details. - while self.reader.peek() == u' ': + while self.reader.peek() == ' ': self.reader.forward() handle = self.scan_tag_directive_handle(start_mark) - while self.reader.peek() == u' ': + while self.reader.peek() == ' ': self.reader.forward() prefix = self.scan_tag_directive_prefix(start_mark) return (handle, prefix) @@ -939,10 +961,13 @@ class Scanner(object): # See the specification for details. value = self.scan_tag_handle('directive', start_mark) ch = self.reader.peek() - if ch != u' ': - raise ScannerError("while scanning a directive", start_mark, - "expected ' ', but found %r" % utf8(ch), - self.reader.get_mark()) + if ch != ' ': + raise ScannerError( + 'while scanning a directive', + start_mark, + "expected ' ', but found %r" % utf8(ch), + self.reader.get_mark(), + ) return value def scan_tag_directive_prefix(self, start_mark): @@ -950,26 +975,31 @@ class Scanner(object): # See the specification for details. value = self.scan_tag_uri('directive', start_mark) ch = self.reader.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_mark, - "expected ' ', but found %r" % utf8(ch), - self.reader.get_mark()) + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError( + 'while scanning a directive', + start_mark, + "expected ' ', but found %r" % utf8(ch), + self.reader.get_mark(), + ) return value def scan_directive_ignored_line(self, start_mark): # type: (Any) -> None # See the specification for details. - while self.reader.peek() == u' ': + while self.reader.peek() == ' ': self.reader.forward() - if self.reader.peek() == u'#': + if self.reader.peek() == '#': while self.reader.peek() not in _THE_END: self.reader.forward() ch = self.reader.peek() if ch not in _THE_END: raise ScannerError( - "while scanning a directive", start_mark, - "expected a comment or a line break, but found %r" - % utf8(ch), self.reader.get_mark()) + 'while scanning a directive', + start_mark, + 'expected a comment or a line break, but found %r' % utf8(ch), + self.reader.get_mark(), + ) self.scan_line_break() def scan_anchor(self, TokenClass): @@ -984,7 +1014,7 @@ class Scanner(object): # Therefore we restrict aliases to numbers and ASCII letters. start_mark = self.reader.get_mark() indicator = self.reader.peek() - if indicator == u'*': + if indicator == '*': name = 'alias' else: name = 'anchor' @@ -998,19 +1028,23 @@ class Scanner(object): ch = self.reader.peek(length) if not length: raise ScannerError( - "while scanning an %s" % name, start_mark, - "expected alphabetic or numeric character, but found %r" - % utf8(ch), self.reader.get_mark()) + 'while scanning an %s' % name, + start_mark, + 'expected alphabetic or numeric character, but found %r' % utf8(ch), + self.reader.get_mark(), + ) value = self.reader.prefix(length) self.reader.forward(length) # ch1 = ch # ch = self.reader.peek() # no need to peek, ch is already set # assert ch1 == ch - if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,[]{}%@`': + if ch not in '\0 \t\r\n\x85\u2028\u2029?:,[]{}%@`': raise ScannerError( - "while scanning an %s" % name, start_mark, - "expected alphabetic or numeric character, but found %r" - % utf8(ch), self.reader.get_mark()) + 'while scanning an %s' % name, + start_mark, + 'expected alphabetic or numeric character, but found %r' % utf8(ch), + self.reader.get_mark(), + ) end_mark = self.reader.get_mark() return TokenClass(value, start_mark, end_mark) @@ -1019,41 +1053,46 @@ class Scanner(object): # See the specification for details. start_mark = self.reader.get_mark() ch = self.reader.peek(1) - if ch == u'<': + if ch == '<': handle = None self.reader.forward(2) suffix = self.scan_tag_uri('tag', start_mark) - if self.reader.peek() != u'>': + if self.reader.peek() != '>': raise ScannerError( - "while parsing a tag", start_mark, + 'while parsing a tag', + start_mark, "expected '>', but found %r" % utf8(self.reader.peek()), - self.reader.get_mark()) + self.reader.get_mark(), + ) self.reader.forward() elif ch in _THE_END_SPACE_TAB: handle = None - suffix = u'!' + suffix = '!' self.reader.forward() else: length = 1 use_handle = False - while ch not in u'\0 \r\n\x85\u2028\u2029': - if ch == u'!': + while ch not in '\0 \r\n\x85\u2028\u2029': + if ch == '!': use_handle = True break length += 1 ch = self.reader.peek(length) - handle = u'!' + handle = '!' if use_handle: handle = self.scan_tag_handle('tag', start_mark) else: - handle = u'!' + handle = '!' self.reader.forward() suffix = self.scan_tag_uri('tag', start_mark) ch = self.reader.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a tag", start_mark, - "expected ' ', but found %r" % utf8(ch), - self.reader.get_mark()) + if ch not in '\0 \r\n\x85\u2028\u2029': + raise ScannerError( + 'while scanning a tag', + start_mark, + "expected ' ', but found %r" % utf8(ch), + self.reader.get_mark(), + ) value = (handle, suffix) end_mark = self.reader.get_mark() return TagToken(value, start_mark, end_mark) @@ -1078,11 +1117,13 @@ class Scanner(object): min_indent = self.indent + 1 if increment is None: # no increment and top level, min_indent could be 0 - if min_indent < 1 and \ - (style not in '|>' or ( - self.scanner_processing_version == (1, 1)) and - getattr(self.loader, - 'top_level_block_style_scalar_no_indent_error_1_1', False)): + if min_indent < 1 and ( + style not in '|>' + or (self.scanner_processing_version == (1, 1)) + and getattr( + self.loader, 'top_level_block_style_scalar_no_indent_error_1_1', False + ) + ): min_indent = 1 breaks, max_indent, end_mark = self.scan_block_scalar_indentation() indent = max(min_indent, max_indent) @@ -1091,12 +1132,12 @@ class Scanner(object): min_indent = 1 indent = min_indent + increment - 1 breaks, end_mark = self.scan_block_scalar_breaks(indent) - line_break = u'' + line_break = "" # Scan the inner part of the block scalar. - while self.reader.column == indent and self.reader.peek() != u'\0': + while self.reader.column == indent and self.reader.peek() != '\0': chunks.extend(breaks) - leading_non_space = self.reader.peek() not in u' \t' + leading_non_space = self.reader.peek() not in ' \t' length = 0 while self.reader.peek(length) not in _THE_END: length += 1 @@ -1109,16 +1150,20 @@ class Scanner(object): # end of document/start_new_document if self.check_document_start() or self.check_document_end(): break - if self.reader.column == indent and self.reader.peek() != u'\0': + if self.reader.column == indent and self.reader.peek() != '\0': # Unfortunately, folding rules are ambiguous. # # This is the folding according to the specification: - if folded and line_break == u'\n' \ - and leading_non_space and self.reader.peek() not in u' \t': + if ( + folded + and line_break == '\n' + and leading_non_space + and self.reader.peek() not in ' \t' + ): if not breaks: - chunks.append(u' ') + chunks.append(' ') else: chunks.append(line_break) @@ -1147,7 +1192,7 @@ class Scanner(object): trailing.extend(breaks) # We are done. - token = ScalarToken(u''.join(chunks), False, start_mark, end_mark, style) + token = ScalarToken("".join(chunks), False, start_mark, end_mark, style) if len(trailing) > 0: # print('trailing 1', trailing) # XXXXX # Eat whitespaces and comments until we reach the next token. @@ -1159,8 +1204,7 @@ class Scanner(object): # Keep track of the trailing whitespace and following comments # as a comment token, if isn't all included in the actual value. comment_end_mark = self.reader.get_mark() - comment = CommentToken(''.join(trailing), end_mark, - comment_end_mark) + comment = CommentToken("".join(trailing), end_mark, comment_end_mark) token.add_post_comment(comment) return token @@ -1170,59 +1214,66 @@ class Scanner(object): chomping = None increment = None ch = self.reader.peek() - if ch in u'+-': + if ch in '+-': if ch == '+': chomping = True else: chomping = False self.reader.forward() ch = self.reader.peek() - if ch in u'0123456789': + if ch in '0123456789': increment = int(ch) if increment == 0: raise ScannerError( - "while scanning a block scalar", start_mark, - "expected indentation indicator in the range 1-9, " - "but found 0", self.reader.get_mark()) + 'while scanning a block scalar', + start_mark, + 'expected indentation indicator in the range 1-9, ' 'but found 0', + self.reader.get_mark(), + ) self.reader.forward() - elif ch in u'0123456789': + elif ch in '0123456789': increment = int(ch) if increment == 0: raise ScannerError( - "while scanning a block scalar", start_mark, - "expected indentation indicator in the range 1-9, " - "but found 0", - self.reader.get_mark()) + 'while scanning a block scalar', + start_mark, + 'expected indentation indicator in the range 1-9, ' 'but found 0', + self.reader.get_mark(), + ) self.reader.forward() ch = self.reader.peek() - if ch in u'+-': + if ch in '+-': if ch == '+': chomping = True else: chomping = False self.reader.forward() ch = self.reader.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': + if ch not in '\0 \r\n\x85\u2028\u2029': raise ScannerError( - "while scanning a block scalar", start_mark, - "expected chomping or indentation indicators, but found %r" - % utf8(ch), self.reader.get_mark()) + 'while scanning a block scalar', + start_mark, + 'expected chomping or indentation indicators, but found %r' % utf8(ch), + self.reader.get_mark(), + ) return chomping, increment def scan_block_scalar_ignored_line(self, start_mark): # type: (Any) -> Any # See the specification for details. - while self.reader.peek() == u' ': + while self.reader.peek() == ' ': self.reader.forward() - if self.reader.peek() == u'#': + if self.reader.peek() == '#': while self.reader.peek() not in _THE_END: self.reader.forward() ch = self.reader.peek() if ch not in _THE_END: raise ScannerError( - "while scanning a block scalar", start_mark, - "expected a comment or a line break, but found %r" - % utf8(ch), self.reader.get_mark()) + 'while scanning a block scalar', + start_mark, + 'expected a comment or a line break, but found %r' % utf8(ch), + self.reader.get_mark(), + ) self.scan_line_break() def scan_block_scalar_indentation(self): @@ -1231,8 +1282,8 @@ class Scanner(object): chunks = [] max_indent = 0 end_mark = self.reader.get_mark() - while self.reader.peek() in u' \r\n\x85\u2028\u2029': - if self.reader.peek() != u' ': + while self.reader.peek() in ' \r\n\x85\u2028\u2029': + if self.reader.peek() != ' ': chunks.append(self.scan_line_break()) end_mark = self.reader.get_mark() else: @@ -1246,12 +1297,12 @@ class Scanner(object): # See the specification for details. chunks = [] end_mark = self.reader.get_mark() - while self.reader.column < indent and self.reader.peek() == u' ': + while self.reader.column < indent and self.reader.peek() == ' ': self.reader.forward() - while self.reader.peek() in u'\r\n\x85\u2028\u2029': + while self.reader.peek() in '\r\n\x85\u2028\u2029': chunks.append(self.scan_line_break()) end_mark = self.reader.get_mark() - while self.reader.column < indent and self.reader.peek() == u' ': + while self.reader.column < indent and self.reader.peek() == ' ': self.reader.forward() return chunks, end_mark @@ -1277,35 +1328,30 @@ class Scanner(object): chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) self.reader.forward() end_mark = self.reader.get_mark() - return ScalarToken(u''.join(chunks), False, start_mark, end_mark, - style) + return ScalarToken("".join(chunks), False, start_mark, end_mark, style) ESCAPE_REPLACEMENTS = { - u'0': u'\0', - u'a': u'\x07', - u'b': u'\x08', - u't': u'\x09', - u'\t': u'\x09', - u'n': u'\x0A', - u'v': u'\x0B', - u'f': u'\x0C', - u'r': u'\x0D', - u'e': u'\x1B', - u' ': u'\x20', - u'\"': u'\"', - u'/': u'/', # as per http://www.json.org/ - u'\\': u'\\', - u'N': u'\x85', - u'_': u'\xA0', - u'L': u'\u2028', - u'P': u'\u2029', + '0': '\0', + 'a': '\x07', + 'b': '\x08', + 't': '\x09', + '\t': '\x09', + 'n': '\x0A', + 'v': '\x0B', + 'f': '\x0C', + 'r': '\x0D', + 'e': '\x1B', + ' ': '\x20', + '"': '"', + '/': '/', # as per http://www.json.org/ + '\\': '\\', + 'N': '\x85', + '_': '\xA0', + 'L': '\u2028', + 'P': '\u2029', } - ESCAPE_CODES = { - u'x': 2, - u'u': 4, - u'U': 8, - } + ESCAPE_CODES = {'x': 2, 'u': 4, 'U': 8} def scan_flow_scalar_non_spaces(self, double, start_mark): # type: (Any, Any) -> Any @@ -1313,19 +1359,19 @@ class Scanner(object): chunks = [] # type: List[Any] while True: length = 0 - while self.reader.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029': + while self.reader.peek(length) not in '\'"\\\0 \t\r\n\x85\u2028\u2029': length += 1 if length != 0: chunks.append(self.reader.prefix(length)) self.reader.forward(length) ch = self.reader.peek() - if not double and ch == u'\'' and self.reader.peek(1) == u'\'': - chunks.append(u'\'') + if not double and ch == "'" and self.reader.peek(1) == "'": + chunks.append("'") self.reader.forward(2) - elif (double and ch == u'\'') or (not double and ch in u'\"\\'): + elif (double and ch == "'") or (not double and ch in '"\\'): chunks.append(ch) self.reader.forward() - elif double and ch == u'\\': + elif double and ch == '\\': self.reader.forward() ch = self.reader.peek() if ch in self.ESCAPE_REPLACEMENTS: @@ -1335,25 +1381,27 @@ class Scanner(object): length = self.ESCAPE_CODES[ch] self.reader.forward() for k in range(length): - if self.reader.peek(k) not in u'0123456789ABCDEFabcdef': + if self.reader.peek(k) not in '0123456789ABCDEFabcdef': raise ScannerError( - "while scanning a double-quoted scalar", + 'while scanning a double-quoted scalar', start_mark, - "expected escape sequence of %d hexdecimal " - "numbers, but found %r" % - (length, utf8(self.reader.peek(k))), self.reader.get_mark()) + 'expected escape sequence of %d hexdecimal ' + 'numbers, but found %r' % (length, utf8(self.reader.peek(k))), + self.reader.get_mark(), + ) code = int(self.reader.prefix(length), 16) chunks.append(unichr(code)) self.reader.forward(length) - elif ch in u'\r\n\x85\u2028\u2029': + elif ch in '\r\n\x85\u2028\u2029': self.scan_line_break() - chunks.extend(self.scan_flow_scalar_breaks( - double, start_mark)) + chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) else: raise ScannerError( - "while scanning a double-quoted scalar", start_mark, - "found unknown escape character %r" % utf8(ch), - self.reader.get_mark()) + 'while scanning a double-quoted scalar', + start_mark, + 'found unknown escape character %r' % utf8(ch), + self.reader.get_mark(), + ) else: return chunks @@ -1362,22 +1410,25 @@ class Scanner(object): # See the specification for details. chunks = [] length = 0 - while self.reader.peek(length) in u' \t': + while self.reader.peek(length) in ' \t': length += 1 whitespaces = self.reader.prefix(length) self.reader.forward(length) ch = self.reader.peek() - if ch == u'\0': + if ch == '\0': raise ScannerError( - "while scanning a quoted scalar", start_mark, - "found unexpected end of stream", self.reader.get_mark()) - elif ch in u'\r\n\x85\u2028\u2029': + 'while scanning a quoted scalar', + start_mark, + 'found unexpected end of stream', + self.reader.get_mark(), + ) + elif ch in '\r\n\x85\u2028\u2029': line_break = self.scan_line_break() breaks = self.scan_flow_scalar_breaks(double, start_mark) - if line_break != u'\n': + if line_break != '\n': chunks.append(line_break) elif not breaks: - chunks.append(u' ') + chunks.append(' ') chunks.extend(breaks) else: chunks.append(whitespaces) @@ -1391,15 +1442,18 @@ class Scanner(object): # Instead of checking indentation, we check for document # separators. prefix = self.reader.prefix(3) - if (prefix == u'---' or prefix == u'...') \ - and self.reader.peek(3) in _THE_END_SPACE_TAB: - raise ScannerError("while scanning a quoted scalar", - start_mark, - "found unexpected document separator", - self.reader.get_mark()) - while self.reader.peek() in u' \t': + if (prefix == '---' or prefix == '...') and self.reader.peek( + 3 + ) in _THE_END_SPACE_TAB: + raise ScannerError( + 'while scanning a quoted scalar', + start_mark, + 'found unexpected document separator', + self.reader.get_mark(), + ) + while self.reader.peek() in ' \t': self.reader.forward() - if self.reader.peek() in u'\r\n\x85\u2028\u2029': + if self.reader.peek() in '\r\n\x85\u2028\u2029': chunks.append(self.scan_line_break()) else: return chunks @@ -1422,31 +1476,41 @@ class Scanner(object): spaces = [] # type: List[Any] while True: length = 0 - if self.reader.peek() == u'#': + if self.reader.peek() == '#': break while True: ch = self.reader.peek(length) - if (ch == u':' and - self.reader.peek(length + 1) not in _THE_END_SPACE_TAB): + if ch == ':' and self.reader.peek(length + 1) not in _THE_END_SPACE_TAB: pass - elif (ch == u'?' and self.scanner_processing_version != (1, 1)): + elif ch == '?' and self.scanner_processing_version != (1, 1): pass - elif (ch in _THE_END_SPACE_TAB or - (not self.flow_level and ch == u':' and - self.reader.peek(length + 1) in _THE_END_SPACE_TAB) or - (self.flow_level and ch in u',:?[]{}')): + elif ( + ch in _THE_END_SPACE_TAB + or ( + not self.flow_level + and ch == ':' + and self.reader.peek(length + 1) in _THE_END_SPACE_TAB + ) + or (self.flow_level and ch in ',:?[]{}') + ): break length += 1 # It's not clear what we should do with ':' in the flow context. - if (self.flow_level and ch == u':' and - self.reader.peek(length + 1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'): + if ( + self.flow_level + and ch == ':' + and self.reader.peek(length + 1) not in '\0 \t\r\n\x85\u2028\u2029,[]{}' + ): self.reader.forward(length) raise ScannerError( - "while scanning a plain scalar", start_mark, - "found unexpected ':'", self.reader.get_mark(), - "Please check " - "http://pyyaml.org/wiki/YAMLColonInFlowContext " - "for details.") + 'while scanning a plain scalar', + start_mark, + "found unexpected ':'", + self.reader.get_mark(), + 'Please check ' + 'http://pyyaml.org/wiki/YAMLColonInFlowContext ' + 'for details.', + ) if length == 0: break self.allow_simple_key = False @@ -1455,14 +1519,17 @@ class Scanner(object): self.reader.forward(length) end_mark = self.reader.get_mark() spaces = self.scan_plain_spaces(indent, start_mark) - if not spaces or self.reader.peek() == u'#' \ - or (not self.flow_level and self.reader.column < indent): + if ( + not spaces + or self.reader.peek() == '#' + or (not self.flow_level and self.reader.column < indent) + ): break - token = ScalarToken(u''.join(chunks), True, start_mark, end_mark) + token = ScalarToken("".join(chunks), True, start_mark, end_mark) if spaces and spaces[0] == '\n': # Create a comment token to preserve the trailing line breaks. - comment = CommentToken(''.join(spaces) + '\n', start_mark, end_mark) + comment = CommentToken("".join(spaces) + '\n', start_mark, end_mark) token.add_post_comment(comment) return token @@ -1473,32 +1540,34 @@ class Scanner(object): # We just forbid them completely. Do not use tabs in YAML! chunks = [] length = 0 - while self.reader.peek(length) in u' ': + while self.reader.peek(length) in ' ': length += 1 whitespaces = self.reader.prefix(length) self.reader.forward(length) ch = self.reader.peek() - if ch in u'\r\n\x85\u2028\u2029': + if ch in '\r\n\x85\u2028\u2029': line_break = self.scan_line_break() self.allow_simple_key = True prefix = self.reader.prefix(3) - if (prefix == u'---' or prefix == u'...') \ - and self.reader.peek(3) in _THE_END_SPACE_TAB: + if (prefix == '---' or prefix == '...') and self.reader.peek( + 3 + ) in _THE_END_SPACE_TAB: return breaks = [] - while self.reader.peek() in u' \r\n\x85\u2028\u2029': + while self.reader.peek() in ' \r\n\x85\u2028\u2029': if self.reader.peek() == ' ': self.reader.forward() else: breaks.append(self.scan_line_break()) prefix = self.reader.prefix(3) - if (prefix == u'---' or prefix == u'...') \ - and self.reader.peek(3) in _THE_END_SPACE_TAB: + if (prefix == '---' or prefix == '...') and self.reader.peek( + 3 + ) in _THE_END_SPACE_TAB: return - if line_break != u'\n': + if line_break != '\n': chunks.append(line_break) elif not breaks: - chunks.append(u' ') + chunks.append(' ') chunks.extend(breaks) elif whitespaces: chunks.append(whitespaces) @@ -1510,23 +1579,27 @@ class Scanner(object): # For some strange reasons, the specification does not allow '_' in # tag handles. I have allowed it anyway. ch = self.reader.peek() - if ch != u'!': - raise ScannerError("while scanning a %s" % name, start_mark, - "expected '!', but found %r" % utf8(ch), - self.reader.get_mark()) + if ch != '!': + raise ScannerError( + 'while scanning a %s' % name, + start_mark, + "expected '!', but found %r" % utf8(ch), + self.reader.get_mark(), + ) length = 1 ch = self.reader.peek(length) - if ch != u' ': - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' \ - or u'a' <= ch <= u'z' \ - or ch in u'-_': + if ch != ' ': + while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' or ch in '-_': length += 1 ch = self.reader.peek(length) - if ch != u'!': + if ch != '!': self.reader.forward(length) - raise ScannerError("while scanning a %s" % name, start_mark, - "expected '!', but found %r" % utf8(ch), - self.reader.get_mark()) + raise ScannerError( + 'while scanning a %s' % name, + start_mark, + "expected '!', but found %r" % utf8(ch), + self.reader.get_mark(), + ) length += 1 value = self.reader.prefix(length) self.reader.forward(length) @@ -1539,9 +1612,13 @@ class Scanner(object): chunks = [] length = 0 ch = self.reader.peek(length) - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-;/?:@&=+$,_.!~*\'()[]%': - if ch == u'%': + while ( + '0' <= ch <= '9' + or 'A' <= ch <= 'Z' + or 'a' <= ch <= 'z' + or ch in "-;/?:@&=+$,_.!~*'()[]%" + ): + if ch == '%': chunks.append(self.reader.prefix(length)) self.reader.forward(length) length = 0 @@ -1554,25 +1631,30 @@ class Scanner(object): self.reader.forward(length) length = 0 if not chunks: - raise ScannerError("while parsing a %s" % name, start_mark, - "expected URI, but found %r" % utf8(ch), - self.reader.get_mark()) - return u''.join(chunks) + raise ScannerError( + 'while parsing a %s' % name, + start_mark, + 'expected URI, but found %r' % utf8(ch), + self.reader.get_mark(), + ) + return "".join(chunks) def scan_uri_escapes(self, name, start_mark): # type: (Any, Any) -> Any # See the specification for details. code_bytes = [] # type: List[Any] mark = self.reader.get_mark() - while self.reader.peek() == u'%': + while self.reader.peek() == '%': self.reader.forward() for k in range(2): - if self.reader.peek(k) not in u'0123456789ABCDEFabcdef': + if self.reader.peek(k) not in '0123456789ABCDEFabcdef': raise ScannerError( - "while scanning a %s" % name, start_mark, - "expected URI escape sequence of 2 hexdecimal numbers," - " but found %r" - % utf8(self.reader.peek(k)), self.reader.get_mark()) + 'while scanning a %s' % name, + start_mark, + 'expected URI escape sequence of 2 hexdecimal numbers,' + ' but found %r' % utf8(self.reader.peek(k)), + self.reader.get_mark(), + ) if PY3: code_bytes.append(int(self.reader.prefix(2), 16)) else: @@ -1582,10 +1664,11 @@ class Scanner(object): if PY3: value = bytes(code_bytes).decode('utf-8') else: - value = unicode(b''.join(code_bytes), 'utf-8') # type: ignore + # fmt: off + value = unicode(b"".join(code_bytes), 'utf-8') # type: ignore + # fmt: on except UnicodeDecodeError as exc: - raise ScannerError("while scanning a %s" % name, start_mark, - str(exc), mark) + raise ScannerError('while scanning a %s' % name, start_mark, str(exc), mark) return value def scan_line_break(self): @@ -1599,16 +1682,16 @@ class Scanner(object): # '\u2029 : '\u2029' # default : '' ch = self.reader.peek() - if ch in u'\r\n\x85': - if self.reader.prefix(2) == u'\r\n': + if ch in '\r\n\x85': + if self.reader.prefix(2) == '\r\n': self.reader.forward(2) else: self.reader.forward() - return u'\n' - elif ch in u'\u2028\u2029': + return '\n' + elif ch in '\u2028\u2029': self.reader.forward() return ch - return u'' + return "" class RoundTripScanner(Scanner): @@ -1673,11 +1756,15 @@ class RoundTripScanner(Scanner): # scalar, value token. FlowXEndToken, otherwise # hidden streamtokens could get them (leave them and they will be # pre comments for the next map/seq - if len(self.tokens) > 1 and \ - isinstance(self.tokens[0], (ScalarToken, ValueToken, - FlowSequenceEndToken, FlowMappingEndToken, )) and \ - isinstance(self.tokens[1], CommentToken) and \ - self.tokens[0].end_mark.line == self.tokens[1].start_mark.line: + if ( + len(self.tokens) > 1 + and isinstance( + self.tokens[0], + (ScalarToken, ValueToken, FlowSequenceEndToken, FlowMappingEndToken), + ) + and isinstance(self.tokens[1], CommentToken) + and self.tokens[0].end_mark.line == self.tokens[1].start_mark.line + ): self.tokens_taken += 1 self.tokens[0].add_post_comment(self.tokens.pop(1)) self.tokens_taken += 1 @@ -1687,7 +1774,7 @@ class RoundTripScanner(Scanner): def fetch_comment(self, comment): # type: (Any) -> None value, start_mark, end_mark = comment - while value and value[-1] == u' ': + while value and value[-1] == ' ': # empty line within indented key context # no need to update end-mark, that is not used value = value[:-1] @@ -1716,20 +1803,20 @@ class RoundTripScanner(Scanner): # `unwind_indent` before issuing BLOCK-END. # Scanners for block, flow, and plain scalars need to be modified. - if self.reader.index == 0 and self.reader.peek() == u'\uFEFF': + if self.reader.index == 0 and self.reader.peek() == '\uFEFF': self.reader.forward() found = False while not found: - while self.reader.peek() == u' ': + while self.reader.peek() == ' ': self.reader.forward() ch = self.reader.peek() - if ch == u'#': + if ch == '#': start_mark = self.reader.get_mark() comment = ch self.reader.forward() while ch not in _THE_END: ch = self.reader.peek() - if ch == u'\0': # don't gobble the end-of-stream character + if ch == '\0': # don't gobble the end-of-stream character break comment += ch self.reader.forward() @@ -1747,9 +1834,9 @@ class RoundTripScanner(Scanner): if not self.flow_level: self.allow_simple_key = True ch = self.reader.peek() - if ch == '\n': # empty toplevel lines + if ch == '\n': # empty toplevel lines start_mark = self.reader.get_mark() - comment = '' + comment = "" while ch: ch = self.scan_line_break(empty_line=True) comment += ch @@ -1773,19 +1860,20 @@ class RoundTripScanner(Scanner): # '\u2029 : '\u2029' # default : '' ch = self.reader.peek() # type: Text - if ch in u'\r\n\x85': - if self.reader.prefix(2) == u'\r\n': + if ch in '\r\n\x85': + if self.reader.prefix(2) == '\r\n': self.reader.forward(2) else: self.reader.forward() - return u'\n' - elif ch in u'\u2028\u2029': + return '\n' + elif ch in '\u2028\u2029': self.reader.forward() return ch elif empty_line and ch in '\t ': self.reader.forward() return ch - return u'' + return "" + # try: # import psyco |