summaryrefslogtreecommitdiff
path: root/scanner.py
diff options
context:
space:
mode:
authorAnthon van der Neut <anthon@mnt.org>2018-08-03 22:14:57 +0200
committerAnthon van der Neut <anthon@mnt.org>2018-08-03 22:14:57 +0200
commitdce10fcff1de54121fb8b440b883ef5d3fe2f96a (patch)
tree072b4bd247e6f1cd95c08c7b67fea0fc96f0578e /scanner.py
parent2966a4f215861fa05e0dc7e0cd53350766e794c6 (diff)
downloadruamel.yaml-dce10fcff1de54121fb8b440b883ef5d3fe2f96a.tar.gz
Apply oitnb and mypy 0.620, then make everything work again0.15.48
Diffstat (limited to 'scanner.py')
-rw-r--r--scanner.py692
1 files changed, 390 insertions, 302 deletions
diff --git a/scanner.py b/scanner.py
index 0aab137..92fbf52 100644
--- a/scanner.py
+++ b/scanner.py
@@ -31,7 +31,7 @@ from __future__ import print_function, absolute_import, division, unicode_litera
#
from ruamel.yaml.error import MarkedYAMLError
-from ruamel.yaml.tokens import * # NOQA
+from ruamel.yaml.tokens import * # NOQA
from ruamel.yaml.compat import utf8, unichr, PY3, check_anchorname_char, nprint # NOQA
if False: # MYPY
@@ -41,9 +41,9 @@ if False: # MYPY
__all__ = ['Scanner', 'RoundTripScanner', 'ScannerError']
-_THE_END = u'\0\r\n\x85\u2028\u2029'
-_THE_END_SPACE_TAB = u'\0 \t\r\n\x85\u2028\u2029'
-_SPACE_TAB = u' \t'
+_THE_END = '\0\r\n\x85\u2028\u2029'
+_THE_END_SPACE_TAB = '\0 \t\r\n\x85\u2028\u2029'
+_SPACE_TAB = ' \t'
class ScannerError(MarkedYAMLError):
@@ -64,7 +64,6 @@ class SimpleKey(object):
class Scanner(object):
-
def __init__(self, loader=None):
# type: (Any) -> None
"""Initialize the scanner."""
@@ -141,8 +140,8 @@ class Scanner(object):
def reader(self):
# type: () -> Any
if hasattr(self.loader, 'typ'):
- self.loader.reader # type: ignore
- return self.loader._reader # type: ignore
+ self.loader.reader
+ return self.loader._reader
@property
def scanner_processing_version(self): # prefix until un-composited
@@ -219,19 +218,19 @@ class Scanner(object):
ch = self.reader.peek()
# Is it the end of stream?
- if ch == u'\0':
+ if ch == '\0':
return self.fetch_stream_end()
# Is it a directive?
- if ch == u'%' and self.check_directive():
+ if ch == '%' and self.check_directive():
return self.fetch_directive()
# Is it the document start?
- if ch == u'-' and self.check_document_start():
+ if ch == '-' and self.check_document_start():
return self.fetch_document_start()
# Is it the document end?
- if ch == u'.' and self.check_document_end():
+ if ch == '.' and self.check_document_end():
return self.fetch_document_end()
# TODO: support for BOM within a stream.
@@ -241,63 +240,63 @@ class Scanner(object):
# Note: the order of the following checks is NOT significant.
# Is it the flow sequence start indicator?
- if ch == u'[':
+ if ch == '[':
return self.fetch_flow_sequence_start()
# Is it the flow mapping start indicator?
- if ch == u'{':
+ if ch == '{':
return self.fetch_flow_mapping_start()
# Is it the flow sequence end indicator?
- if ch == u']':
+ if ch == ']':
return self.fetch_flow_sequence_end()
# Is it the flow mapping end indicator?
- if ch == u'}':
+ if ch == '}':
return self.fetch_flow_mapping_end()
# Is it the flow entry indicator?
- if ch == u',':
+ if ch == ',':
return self.fetch_flow_entry()
# Is it the block entry indicator?
- if ch == u'-' and self.check_block_entry():
+ if ch == '-' and self.check_block_entry():
return self.fetch_block_entry()
# Is it the key indicator?
- if ch == u'?' and self.check_key():
+ if ch == '?' and self.check_key():
return self.fetch_key()
# Is it the value indicator?
- if ch == u':' and self.check_value():
+ if ch == ':' and self.check_value():
return self.fetch_value()
# Is it an alias?
- if ch == u'*':
+ if ch == '*':
return self.fetch_alias()
# Is it an anchor?
- if ch == u'&':
+ if ch == '&':
return self.fetch_anchor()
# Is it a tag?
- if ch == u'!':
+ if ch == '!':
return self.fetch_tag()
# Is it a literal scalar?
- if ch == u'|' and not self.flow_level:
+ if ch == '|' and not self.flow_level:
return self.fetch_literal()
# Is it a folded scalar?
- if ch == u'>' and not self.flow_level:
+ if ch == '>' and not self.flow_level:
return self.fetch_folded()
# Is it a single quoted scalar?
- if ch == u'\'':
+ if ch == "'":
return self.fetch_single()
# Is it a double quoted scalar?
- if ch == u'\"':
+ if ch == '"':
return self.fetch_double()
# It must be a plain scalar then.
@@ -305,9 +304,12 @@ class Scanner(object):
return self.fetch_plain()
# No? It's an error. Let's produce a nice error message.
- raise ScannerError("while scanning for the next token", None,
- "found character %r that cannot start any token"
- % utf8(ch), self.reader.get_mark())
+ raise ScannerError(
+ 'while scanning for the next token',
+ None,
+ 'found character %r that cannot start any token' % utf8(ch),
+ self.reader.get_mark(),
+ )
# Simple keys treatment.
@@ -337,12 +339,14 @@ class Scanner(object):
# height (may cause problems if indentation is broken though).
for level in list(self.possible_simple_keys):
key = self.possible_simple_keys[level]
- if key.line != self.reader.line \
- or self.reader.index - key.index > 1024:
+ if key.line != self.reader.line or self.reader.index - key.index > 1024:
if key.required:
raise ScannerError(
- "while scanning a simple key", key.mark,
- "could not find expected ':'", self.reader.get_mark())
+ 'while scanning a simple key',
+ key.mark,
+ "could not find expected ':'",
+ self.reader.get_mark(),
+ )
del self.possible_simple_keys[level]
def save_possible_simple_key(self):
@@ -360,9 +364,13 @@ class Scanner(object):
self.remove_possible_simple_key()
token_number = self.tokens_taken + len(self.tokens)
key = SimpleKey(
- token_number, required,
- self.reader.index, self.reader.line, self.reader.column,
- self.reader.get_mark())
+ token_number,
+ required,
+ self.reader.index,
+ self.reader.line,
+ self.reader.column,
+ self.reader.get_mark(),
+ )
self.possible_simple_keys[self.flow_level] = key
def remove_possible_simple_key(self):
@@ -373,8 +381,11 @@ class Scanner(object):
if key.required:
raise ScannerError(
- "while scanning a simple key", key.mark,
- "could not find expected ':'", self.reader.get_mark())
+ 'while scanning a simple key',
+ key.mark,
+ "could not find expected ':'",
+ self.reader.get_mark(),
+ )
del self.possible_simple_keys[self.flow_level]
@@ -423,8 +434,7 @@ class Scanner(object):
# Read the token.
mark = self.reader.get_mark()
# Add STREAM-START.
- self.tokens.append(StreamStartToken(mark, mark,
- encoding=self.reader.encoding))
+ self.tokens.append(StreamStartToken(mark, mark, encoding=self.reader.encoding))
def fetch_stream_end(self):
# type: () -> None
@@ -539,9 +549,9 @@ class Scanner(object):
if not self.flow_level:
# Are we allowed to start a new entry?
if not self.allow_simple_key:
- raise ScannerError(None, None,
- "sequence entries are not allowed here",
- self.reader.get_mark())
+ raise ScannerError(
+ None, None, 'sequence entries are not allowed here', self.reader.get_mark()
+ )
# We may need to add BLOCK-SEQUENCE-START.
if self.add_indent(self.reader.column):
mark = self.reader.get_mark()
@@ -568,9 +578,9 @@ class Scanner(object):
# Are we allowed to start a key (not nessesary a simple)?
if not self.allow_simple_key:
- raise ScannerError(None, None,
- "mapping keys are not allowed here",
- self.reader.get_mark())
+ raise ScannerError(
+ None, None, 'mapping keys are not allowed here', self.reader.get_mark()
+ )
# We may need to add BLOCK-MAPPING-START.
if self.add_indent(self.reader.column):
@@ -596,8 +606,9 @@ class Scanner(object):
# Add KEY.
key = self.possible_simple_keys[self.flow_level]
del self.possible_simple_keys[self.flow_level]
- self.tokens.insert(key.token_number - self.tokens_taken,
- KeyToken(key.mark, key.mark))
+ self.tokens.insert(
+ key.token_number - self.tokens_taken, KeyToken(key.mark, key.mark)
+ )
# If this key starts a new block mapping, we need to add
# BLOCK-MAPPING-START.
@@ -605,7 +616,8 @@ class Scanner(object):
if self.add_indent(key.column):
self.tokens.insert(
key.token_number - self.tokens_taken,
- BlockMappingStartToken(key.mark, key.mark))
+ BlockMappingStartToken(key.mark, key.mark),
+ )
# There cannot be two simple keys one after another.
self.allow_simple_key = False
@@ -621,9 +633,12 @@ class Scanner(object):
# We are allowed to start a complex value if and only if
# we can start a simple key.
if not self.allow_simple_key:
- raise ScannerError(None, None,
- "mapping values are not allowed here",
- self.reader.get_mark())
+ raise ScannerError(
+ None,
+ None,
+ 'mapping values are not allowed here',
+ self.reader.get_mark(),
+ )
# If this value starts a new block mapping, we need to add
# BLOCK-MAPPING-START. It will be detected as an error later by
@@ -691,7 +706,7 @@ class Scanner(object):
def fetch_single(self):
# type: () -> None
- self.fetch_flow_scalar(style='\'')
+ self.fetch_flow_scalar(style="'")
def fetch_double(self):
# type: () -> None
@@ -731,8 +746,7 @@ class Scanner(object):
# type: () -> Any
# DOCUMENT-START: ^ '---' (' '|'\n')
if self.reader.column == 0:
- if self.reader.prefix(3) == u'---' \
- and self.reader.peek(3) in _THE_END_SPACE_TAB:
+ if self.reader.prefix(3) == '---' and self.reader.peek(3) in _THE_END_SPACE_TAB:
return True
return None
@@ -740,8 +754,7 @@ class Scanner(object):
# type: () -> Any
# DOCUMENT-END: ^ '...' (' '|'\n')
if self.reader.column == 0:
- if self.reader.prefix(3) == u'...' \
- and self.reader.peek(3) in _THE_END_SPACE_TAB:
+ if self.reader.prefix(3) == '...' and self.reader.peek(3) in _THE_END_SPACE_TAB:
return True
return None
@@ -786,11 +799,12 @@ class Scanner(object):
# independent.
ch = self.reader.peek()
if self.scanner_processing_version == (1, 1):
- return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' or \
- (self.reader.peek(1) not in _THE_END_SPACE_TAB and
- (ch == u'-' or (not self.flow_level and ch in u'?:')))
+ return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'"%@`' or (
+ self.reader.peek(1) not in _THE_END_SPACE_TAB
+ and (ch == '-' or (not self.flow_level and ch in '?:'))
+ )
# YAML 1.2
- if ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`':
+ if ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'"%@`':
# ################### ^ ???
return True
ch1 = self.reader.peek(1)
@@ -799,8 +813,9 @@ class Scanner(object):
if ch == ':' and bool(self.flow_level) and ch1 not in _SPACE_TAB:
return True
- return (self.reader.peek(1) not in _THE_END_SPACE_TAB and
- (ch == u'-' or (not self.flow_level and ch in u'?:')))
+ return self.reader.peek(1) not in _THE_END_SPACE_TAB and (
+ ch == '-' or (not self.flow_level and ch in '?:')
+ )
# Scanners.
@@ -825,13 +840,13 @@ class Scanner(object):
# `unwind_indent` before issuing BLOCK-END.
# Scanners for block, flow, and plain scalars need to be modified.
- if self.reader.index == 0 and self.reader.peek() == u'\uFEFF':
+ if self.reader.index == 0 and self.reader.peek() == '\uFEFF':
self.reader.forward()
found = False
while not found:
- while self.reader.peek() == u' ':
+ while self.reader.peek() == ' ':
self.reader.forward()
- if self.reader.peek() == u'#':
+ if self.reader.peek() == '#':
while self.reader.peek() not in _THE_END:
self.reader.forward()
if self.scan_line_break():
@@ -848,10 +863,10 @@ class Scanner(object):
self.reader.forward()
name = self.scan_directive_name(start_mark)
value = None
- if name == u'YAML':
+ if name == 'YAML':
value = self.scan_yaml_directive_value(start_mark)
end_mark = self.reader.get_mark()
- elif name == u'TAG':
+ elif name == 'TAG':
value = self.scan_tag_directive_value(start_mark)
end_mark = self.reader.get_mark()
else:
@@ -866,58 +881,65 @@ class Scanner(object):
# See the specification for details.
length = 0
ch = self.reader.peek(length)
- while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
- or ch in u'-_:.':
+ while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' or ch in '-_:.':
length += 1
ch = self.reader.peek(length)
if not length:
raise ScannerError(
- "while scanning a directive", start_mark,
- "expected alphabetic or numeric character, but found %r"
- % utf8(ch), self.reader.get_mark())
+ 'while scanning a directive',
+ start_mark,
+ 'expected alphabetic or numeric character, but found %r' % utf8(ch),
+ self.reader.get_mark(),
+ )
value = self.reader.prefix(length)
self.reader.forward(length)
ch = self.reader.peek()
- if ch not in u'\0 \r\n\x85\u2028\u2029':
+ if ch not in '\0 \r\n\x85\u2028\u2029':
raise ScannerError(
- "while scanning a directive", start_mark,
- "expected alphabetic or numeric character, but found %r"
- % utf8(ch), self.reader.get_mark())
+ 'while scanning a directive',
+ start_mark,
+ 'expected alphabetic or numeric character, but found %r' % utf8(ch),
+ self.reader.get_mark(),
+ )
return value
def scan_yaml_directive_value(self, start_mark):
# type: (Any) -> Any
# See the specification for details.
- while self.reader.peek() == u' ':
+ while self.reader.peek() == ' ':
self.reader.forward()
major = self.scan_yaml_directive_number(start_mark)
if self.reader.peek() != '.':
raise ScannerError(
- "while scanning a directive", start_mark,
- "expected a digit or '.', but found %r"
- % utf8(self.reader.peek()),
- self.reader.get_mark())
+ 'while scanning a directive',
+ start_mark,
+ "expected a digit or '.', but found %r" % utf8(self.reader.peek()),
+ self.reader.get_mark(),
+ )
self.reader.forward()
minor = self.scan_yaml_directive_number(start_mark)
- if self.reader.peek() not in u'\0 \r\n\x85\u2028\u2029':
+ if self.reader.peek() not in '\0 \r\n\x85\u2028\u2029':
raise ScannerError(
- "while scanning a directive", start_mark,
- "expected a digit or ' ', but found %r"
- % utf8(self.reader.peek()),
- self.reader.get_mark())
+ 'while scanning a directive',
+ start_mark,
+ "expected a digit or ' ', but found %r" % utf8(self.reader.peek()),
+ self.reader.get_mark(),
+ )
return (major, minor)
def scan_yaml_directive_number(self, start_mark):
# type: (Any) -> Any
# See the specification for details.
ch = self.reader.peek()
- if not (u'0' <= ch <= u'9'):
+ if not ('0' <= ch <= '9'):
raise ScannerError(
- "while scanning a directive", start_mark,
- "expected a digit, but found %r" % utf8(ch),
- self.reader.get_mark())
+ 'while scanning a directive',
+ start_mark,
+ 'expected a digit, but found %r' % utf8(ch),
+ self.reader.get_mark(),
+ )
length = 0
- while u'0' <= self.reader.peek(length) <= u'9':
+ while '0' <= self.reader.peek(length) <= '9':
length += 1
value = int(self.reader.prefix(length))
self.reader.forward(length)
@@ -926,10 +948,10 @@ class Scanner(object):
def scan_tag_directive_value(self, start_mark):
# type: (Any) -> Any
# See the specification for details.
- while self.reader.peek() == u' ':
+ while self.reader.peek() == ' ':
self.reader.forward()
handle = self.scan_tag_directive_handle(start_mark)
- while self.reader.peek() == u' ':
+ while self.reader.peek() == ' ':
self.reader.forward()
prefix = self.scan_tag_directive_prefix(start_mark)
return (handle, prefix)
@@ -939,10 +961,13 @@ class Scanner(object):
# See the specification for details.
value = self.scan_tag_handle('directive', start_mark)
ch = self.reader.peek()
- if ch != u' ':
- raise ScannerError("while scanning a directive", start_mark,
- "expected ' ', but found %r" % utf8(ch),
- self.reader.get_mark())
+ if ch != ' ':
+ raise ScannerError(
+ 'while scanning a directive',
+ start_mark,
+ "expected ' ', but found %r" % utf8(ch),
+ self.reader.get_mark(),
+ )
return value
def scan_tag_directive_prefix(self, start_mark):
@@ -950,26 +975,31 @@ class Scanner(object):
# See the specification for details.
value = self.scan_tag_uri('directive', start_mark)
ch = self.reader.peek()
- if ch not in u'\0 \r\n\x85\u2028\u2029':
- raise ScannerError("while scanning a directive", start_mark,
- "expected ' ', but found %r" % utf8(ch),
- self.reader.get_mark())
+ if ch not in '\0 \r\n\x85\u2028\u2029':
+ raise ScannerError(
+ 'while scanning a directive',
+ start_mark,
+ "expected ' ', but found %r" % utf8(ch),
+ self.reader.get_mark(),
+ )
return value
def scan_directive_ignored_line(self, start_mark):
# type: (Any) -> None
# See the specification for details.
- while self.reader.peek() == u' ':
+ while self.reader.peek() == ' ':
self.reader.forward()
- if self.reader.peek() == u'#':
+ if self.reader.peek() == '#':
while self.reader.peek() not in _THE_END:
self.reader.forward()
ch = self.reader.peek()
if ch not in _THE_END:
raise ScannerError(
- "while scanning a directive", start_mark,
- "expected a comment or a line break, but found %r"
- % utf8(ch), self.reader.get_mark())
+ 'while scanning a directive',
+ start_mark,
+ 'expected a comment or a line break, but found %r' % utf8(ch),
+ self.reader.get_mark(),
+ )
self.scan_line_break()
def scan_anchor(self, TokenClass):
@@ -984,7 +1014,7 @@ class Scanner(object):
# Therefore we restrict aliases to numbers and ASCII letters.
start_mark = self.reader.get_mark()
indicator = self.reader.peek()
- if indicator == u'*':
+ if indicator == '*':
name = 'alias'
else:
name = 'anchor'
@@ -998,19 +1028,23 @@ class Scanner(object):
ch = self.reader.peek(length)
if not length:
raise ScannerError(
- "while scanning an %s" % name, start_mark,
- "expected alphabetic or numeric character, but found %r"
- % utf8(ch), self.reader.get_mark())
+ 'while scanning an %s' % name,
+ start_mark,
+ 'expected alphabetic or numeric character, but found %r' % utf8(ch),
+ self.reader.get_mark(),
+ )
value = self.reader.prefix(length)
self.reader.forward(length)
# ch1 = ch
# ch = self.reader.peek() # no need to peek, ch is already set
# assert ch1 == ch
- if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,[]{}%@`':
+ if ch not in '\0 \t\r\n\x85\u2028\u2029?:,[]{}%@`':
raise ScannerError(
- "while scanning an %s" % name, start_mark,
- "expected alphabetic or numeric character, but found %r"
- % utf8(ch), self.reader.get_mark())
+ 'while scanning an %s' % name,
+ start_mark,
+ 'expected alphabetic or numeric character, but found %r' % utf8(ch),
+ self.reader.get_mark(),
+ )
end_mark = self.reader.get_mark()
return TokenClass(value, start_mark, end_mark)
@@ -1019,41 +1053,46 @@ class Scanner(object):
# See the specification for details.
start_mark = self.reader.get_mark()
ch = self.reader.peek(1)
- if ch == u'<':
+ if ch == '<':
handle = None
self.reader.forward(2)
suffix = self.scan_tag_uri('tag', start_mark)
- if self.reader.peek() != u'>':
+ if self.reader.peek() != '>':
raise ScannerError(
- "while parsing a tag", start_mark,
+ 'while parsing a tag',
+ start_mark,
"expected '>', but found %r" % utf8(self.reader.peek()),
- self.reader.get_mark())
+ self.reader.get_mark(),
+ )
self.reader.forward()
elif ch in _THE_END_SPACE_TAB:
handle = None
- suffix = u'!'
+ suffix = '!'
self.reader.forward()
else:
length = 1
use_handle = False
- while ch not in u'\0 \r\n\x85\u2028\u2029':
- if ch == u'!':
+ while ch not in '\0 \r\n\x85\u2028\u2029':
+ if ch == '!':
use_handle = True
break
length += 1
ch = self.reader.peek(length)
- handle = u'!'
+ handle = '!'
if use_handle:
handle = self.scan_tag_handle('tag', start_mark)
else:
- handle = u'!'
+ handle = '!'
self.reader.forward()
suffix = self.scan_tag_uri('tag', start_mark)
ch = self.reader.peek()
- if ch not in u'\0 \r\n\x85\u2028\u2029':
- raise ScannerError("while scanning a tag", start_mark,
- "expected ' ', but found %r" % utf8(ch),
- self.reader.get_mark())
+ if ch not in '\0 \r\n\x85\u2028\u2029':
+ raise ScannerError(
+ 'while scanning a tag',
+ start_mark,
+ "expected ' ', but found %r" % utf8(ch),
+ self.reader.get_mark(),
+ )
value = (handle, suffix)
end_mark = self.reader.get_mark()
return TagToken(value, start_mark, end_mark)
@@ -1078,11 +1117,13 @@ class Scanner(object):
min_indent = self.indent + 1
if increment is None:
# no increment and top level, min_indent could be 0
- if min_indent < 1 and \
- (style not in '|>' or (
- self.scanner_processing_version == (1, 1)) and
- getattr(self.loader,
- 'top_level_block_style_scalar_no_indent_error_1_1', False)):
+ if min_indent < 1 and (
+ style not in '|>'
+ or (self.scanner_processing_version == (1, 1))
+ and getattr(
+ self.loader, 'top_level_block_style_scalar_no_indent_error_1_1', False
+ )
+ ):
min_indent = 1
breaks, max_indent, end_mark = self.scan_block_scalar_indentation()
indent = max(min_indent, max_indent)
@@ -1091,12 +1132,12 @@ class Scanner(object):
min_indent = 1
indent = min_indent + increment - 1
breaks, end_mark = self.scan_block_scalar_breaks(indent)
- line_break = u''
+ line_break = ""
# Scan the inner part of the block scalar.
- while self.reader.column == indent and self.reader.peek() != u'\0':
+ while self.reader.column == indent and self.reader.peek() != '\0':
chunks.extend(breaks)
- leading_non_space = self.reader.peek() not in u' \t'
+ leading_non_space = self.reader.peek() not in ' \t'
length = 0
while self.reader.peek(length) not in _THE_END:
length += 1
@@ -1109,16 +1150,20 @@ class Scanner(object):
# end of document/start_new_document
if self.check_document_start() or self.check_document_end():
break
- if self.reader.column == indent and self.reader.peek() != u'\0':
+ if self.reader.column == indent and self.reader.peek() != '\0':
# Unfortunately, folding rules are ambiguous.
#
# This is the folding according to the specification:
- if folded and line_break == u'\n' \
- and leading_non_space and self.reader.peek() not in u' \t':
+ if (
+ folded
+ and line_break == '\n'
+ and leading_non_space
+ and self.reader.peek() not in ' \t'
+ ):
if not breaks:
- chunks.append(u' ')
+ chunks.append(' ')
else:
chunks.append(line_break)
@@ -1147,7 +1192,7 @@ class Scanner(object):
trailing.extend(breaks)
# We are done.
- token = ScalarToken(u''.join(chunks), False, start_mark, end_mark, style)
+ token = ScalarToken("".join(chunks), False, start_mark, end_mark, style)
if len(trailing) > 0:
# print('trailing 1', trailing) # XXXXX
# Eat whitespaces and comments until we reach the next token.
@@ -1159,8 +1204,7 @@ class Scanner(object):
# Keep track of the trailing whitespace and following comments
# as a comment token, if isn't all included in the actual value.
comment_end_mark = self.reader.get_mark()
- comment = CommentToken(''.join(trailing), end_mark,
- comment_end_mark)
+ comment = CommentToken("".join(trailing), end_mark, comment_end_mark)
token.add_post_comment(comment)
return token
@@ -1170,59 +1214,66 @@ class Scanner(object):
chomping = None
increment = None
ch = self.reader.peek()
- if ch in u'+-':
+ if ch in '+-':
if ch == '+':
chomping = True
else:
chomping = False
self.reader.forward()
ch = self.reader.peek()
- if ch in u'0123456789':
+ if ch in '0123456789':
increment = int(ch)
if increment == 0:
raise ScannerError(
- "while scanning a block scalar", start_mark,
- "expected indentation indicator in the range 1-9, "
- "but found 0", self.reader.get_mark())
+ 'while scanning a block scalar',
+ start_mark,
+ 'expected indentation indicator in the range 1-9, ' 'but found 0',
+ self.reader.get_mark(),
+ )
self.reader.forward()
- elif ch in u'0123456789':
+ elif ch in '0123456789':
increment = int(ch)
if increment == 0:
raise ScannerError(
- "while scanning a block scalar", start_mark,
- "expected indentation indicator in the range 1-9, "
- "but found 0",
- self.reader.get_mark())
+ 'while scanning a block scalar',
+ start_mark,
+ 'expected indentation indicator in the range 1-9, ' 'but found 0',
+ self.reader.get_mark(),
+ )
self.reader.forward()
ch = self.reader.peek()
- if ch in u'+-':
+ if ch in '+-':
if ch == '+':
chomping = True
else:
chomping = False
self.reader.forward()
ch = self.reader.peek()
- if ch not in u'\0 \r\n\x85\u2028\u2029':
+ if ch not in '\0 \r\n\x85\u2028\u2029':
raise ScannerError(
- "while scanning a block scalar", start_mark,
- "expected chomping or indentation indicators, but found %r"
- % utf8(ch), self.reader.get_mark())
+ 'while scanning a block scalar',
+ start_mark,
+ 'expected chomping or indentation indicators, but found %r' % utf8(ch),
+ self.reader.get_mark(),
+ )
return chomping, increment
def scan_block_scalar_ignored_line(self, start_mark):
# type: (Any) -> Any
# See the specification for details.
- while self.reader.peek() == u' ':
+ while self.reader.peek() == ' ':
self.reader.forward()
- if self.reader.peek() == u'#':
+ if self.reader.peek() == '#':
while self.reader.peek() not in _THE_END:
self.reader.forward()
ch = self.reader.peek()
if ch not in _THE_END:
raise ScannerError(
- "while scanning a block scalar", start_mark,
- "expected a comment or a line break, but found %r"
- % utf8(ch), self.reader.get_mark())
+ 'while scanning a block scalar',
+ start_mark,
+ 'expected a comment or a line break, but found %r' % utf8(ch),
+ self.reader.get_mark(),
+ )
self.scan_line_break()
def scan_block_scalar_indentation(self):
@@ -1231,8 +1282,8 @@ class Scanner(object):
chunks = []
max_indent = 0
end_mark = self.reader.get_mark()
- while self.reader.peek() in u' \r\n\x85\u2028\u2029':
- if self.reader.peek() != u' ':
+ while self.reader.peek() in ' \r\n\x85\u2028\u2029':
+ if self.reader.peek() != ' ':
chunks.append(self.scan_line_break())
end_mark = self.reader.get_mark()
else:
@@ -1246,12 +1297,12 @@ class Scanner(object):
# See the specification for details.
chunks = []
end_mark = self.reader.get_mark()
- while self.reader.column < indent and self.reader.peek() == u' ':
+ while self.reader.column < indent and self.reader.peek() == ' ':
self.reader.forward()
- while self.reader.peek() in u'\r\n\x85\u2028\u2029':
+ while self.reader.peek() in '\r\n\x85\u2028\u2029':
chunks.append(self.scan_line_break())
end_mark = self.reader.get_mark()
- while self.reader.column < indent and self.reader.peek() == u' ':
+ while self.reader.column < indent and self.reader.peek() == ' ':
self.reader.forward()
return chunks, end_mark
@@ -1277,35 +1328,30 @@ class Scanner(object):
chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
self.reader.forward()
end_mark = self.reader.get_mark()
- return ScalarToken(u''.join(chunks), False, start_mark, end_mark,
- style)
+ return ScalarToken("".join(chunks), False, start_mark, end_mark, style)
ESCAPE_REPLACEMENTS = {
- u'0': u'\0',
- u'a': u'\x07',
- u'b': u'\x08',
- u't': u'\x09',
- u'\t': u'\x09',
- u'n': u'\x0A',
- u'v': u'\x0B',
- u'f': u'\x0C',
- u'r': u'\x0D',
- u'e': u'\x1B',
- u' ': u'\x20',
- u'\"': u'\"',
- u'/': u'/', # as per http://www.json.org/
- u'\\': u'\\',
- u'N': u'\x85',
- u'_': u'\xA0',
- u'L': u'\u2028',
- u'P': u'\u2029',
+ '0': '\0',
+ 'a': '\x07',
+ 'b': '\x08',
+ 't': '\x09',
+ '\t': '\x09',
+ 'n': '\x0A',
+ 'v': '\x0B',
+ 'f': '\x0C',
+ 'r': '\x0D',
+ 'e': '\x1B',
+ ' ': '\x20',
+ '"': '"',
+ '/': '/', # as per http://www.json.org/
+ '\\': '\\',
+ 'N': '\x85',
+ '_': '\xA0',
+ 'L': '\u2028',
+ 'P': '\u2029',
}
- ESCAPE_CODES = {
- u'x': 2,
- u'u': 4,
- u'U': 8,
- }
+ ESCAPE_CODES = {'x': 2, 'u': 4, 'U': 8}
def scan_flow_scalar_non_spaces(self, double, start_mark):
# type: (Any, Any) -> Any
@@ -1313,19 +1359,19 @@ class Scanner(object):
chunks = [] # type: List[Any]
while True:
length = 0
- while self.reader.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029':
+ while self.reader.peek(length) not in '\'"\\\0 \t\r\n\x85\u2028\u2029':
length += 1
if length != 0:
chunks.append(self.reader.prefix(length))
self.reader.forward(length)
ch = self.reader.peek()
- if not double and ch == u'\'' and self.reader.peek(1) == u'\'':
- chunks.append(u'\'')
+ if not double and ch == "'" and self.reader.peek(1) == "'":
+ chunks.append("'")
self.reader.forward(2)
- elif (double and ch == u'\'') or (not double and ch in u'\"\\'):
+ elif (double and ch == "'") or (not double and ch in '"\\'):
chunks.append(ch)
self.reader.forward()
- elif double and ch == u'\\':
+ elif double and ch == '\\':
self.reader.forward()
ch = self.reader.peek()
if ch in self.ESCAPE_REPLACEMENTS:
@@ -1335,25 +1381,27 @@ class Scanner(object):
length = self.ESCAPE_CODES[ch]
self.reader.forward()
for k in range(length):
- if self.reader.peek(k) not in u'0123456789ABCDEFabcdef':
+ if self.reader.peek(k) not in '0123456789ABCDEFabcdef':
raise ScannerError(
- "while scanning a double-quoted scalar",
+ 'while scanning a double-quoted scalar',
start_mark,
- "expected escape sequence of %d hexdecimal "
- "numbers, but found %r" %
- (length, utf8(self.reader.peek(k))), self.reader.get_mark())
+ 'expected escape sequence of %d hexdecimal '
+ 'numbers, but found %r' % (length, utf8(self.reader.peek(k))),
+ self.reader.get_mark(),
+ )
code = int(self.reader.prefix(length), 16)
chunks.append(unichr(code))
self.reader.forward(length)
- elif ch in u'\r\n\x85\u2028\u2029':
+ elif ch in '\r\n\x85\u2028\u2029':
self.scan_line_break()
- chunks.extend(self.scan_flow_scalar_breaks(
- double, start_mark))
+ chunks.extend(self.scan_flow_scalar_breaks(double, start_mark))
else:
raise ScannerError(
- "while scanning a double-quoted scalar", start_mark,
- "found unknown escape character %r" % utf8(ch),
- self.reader.get_mark())
+ 'while scanning a double-quoted scalar',
+ start_mark,
+ 'found unknown escape character %r' % utf8(ch),
+ self.reader.get_mark(),
+ )
else:
return chunks
@@ -1362,22 +1410,25 @@ class Scanner(object):
# See the specification for details.
chunks = []
length = 0
- while self.reader.peek(length) in u' \t':
+ while self.reader.peek(length) in ' \t':
length += 1
whitespaces = self.reader.prefix(length)
self.reader.forward(length)
ch = self.reader.peek()
- if ch == u'\0':
+ if ch == '\0':
raise ScannerError(
- "while scanning a quoted scalar", start_mark,
- "found unexpected end of stream", self.reader.get_mark())
- elif ch in u'\r\n\x85\u2028\u2029':
+ 'while scanning a quoted scalar',
+ start_mark,
+ 'found unexpected end of stream',
+ self.reader.get_mark(),
+ )
+ elif ch in '\r\n\x85\u2028\u2029':
line_break = self.scan_line_break()
breaks = self.scan_flow_scalar_breaks(double, start_mark)
- if line_break != u'\n':
+ if line_break != '\n':
chunks.append(line_break)
elif not breaks:
- chunks.append(u' ')
+ chunks.append(' ')
chunks.extend(breaks)
else:
chunks.append(whitespaces)
@@ -1391,15 +1442,18 @@ class Scanner(object):
# Instead of checking indentation, we check for document
# separators.
prefix = self.reader.prefix(3)
- if (prefix == u'---' or prefix == u'...') \
- and self.reader.peek(3) in _THE_END_SPACE_TAB:
- raise ScannerError("while scanning a quoted scalar",
- start_mark,
- "found unexpected document separator",
- self.reader.get_mark())
- while self.reader.peek() in u' \t':
+ if (prefix == '---' or prefix == '...') and self.reader.peek(
+ 3
+ ) in _THE_END_SPACE_TAB:
+ raise ScannerError(
+ 'while scanning a quoted scalar',
+ start_mark,
+ 'found unexpected document separator',
+ self.reader.get_mark(),
+ )
+ while self.reader.peek() in ' \t':
self.reader.forward()
- if self.reader.peek() in u'\r\n\x85\u2028\u2029':
+ if self.reader.peek() in '\r\n\x85\u2028\u2029':
chunks.append(self.scan_line_break())
else:
return chunks
@@ -1422,31 +1476,41 @@ class Scanner(object):
spaces = [] # type: List[Any]
while True:
length = 0
- if self.reader.peek() == u'#':
+ if self.reader.peek() == '#':
break
while True:
ch = self.reader.peek(length)
- if (ch == u':' and
- self.reader.peek(length + 1) not in _THE_END_SPACE_TAB):
+ if ch == ':' and self.reader.peek(length + 1) not in _THE_END_SPACE_TAB:
pass
- elif (ch == u'?' and self.scanner_processing_version != (1, 1)):
+ elif ch == '?' and self.scanner_processing_version != (1, 1):
pass
- elif (ch in _THE_END_SPACE_TAB or
- (not self.flow_level and ch == u':' and
- self.reader.peek(length + 1) in _THE_END_SPACE_TAB) or
- (self.flow_level and ch in u',:?[]{}')):
+ elif (
+ ch in _THE_END_SPACE_TAB
+ or (
+ not self.flow_level
+ and ch == ':'
+ and self.reader.peek(length + 1) in _THE_END_SPACE_TAB
+ )
+ or (self.flow_level and ch in ',:?[]{}')
+ ):
break
length += 1
# It's not clear what we should do with ':' in the flow context.
- if (self.flow_level and ch == u':' and
- self.reader.peek(length + 1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'):
+ if (
+ self.flow_level
+ and ch == ':'
+ and self.reader.peek(length + 1) not in '\0 \t\r\n\x85\u2028\u2029,[]{}'
+ ):
self.reader.forward(length)
raise ScannerError(
- "while scanning a plain scalar", start_mark,
- "found unexpected ':'", self.reader.get_mark(),
- "Please check "
- "http://pyyaml.org/wiki/YAMLColonInFlowContext "
- "for details.")
+ 'while scanning a plain scalar',
+ start_mark,
+ "found unexpected ':'",
+ self.reader.get_mark(),
+ 'Please check '
+ 'http://pyyaml.org/wiki/YAMLColonInFlowContext '
+ 'for details.',
+ )
if length == 0:
break
self.allow_simple_key = False
@@ -1455,14 +1519,17 @@ class Scanner(object):
self.reader.forward(length)
end_mark = self.reader.get_mark()
spaces = self.scan_plain_spaces(indent, start_mark)
- if not spaces or self.reader.peek() == u'#' \
- or (not self.flow_level and self.reader.column < indent):
+ if (
+ not spaces
+ or self.reader.peek() == '#'
+ or (not self.flow_level and self.reader.column < indent)
+ ):
break
- token = ScalarToken(u''.join(chunks), True, start_mark, end_mark)
+ token = ScalarToken("".join(chunks), True, start_mark, end_mark)
if spaces and spaces[0] == '\n':
# Create a comment token to preserve the trailing line breaks.
- comment = CommentToken(''.join(spaces) + '\n', start_mark, end_mark)
+ comment = CommentToken("".join(spaces) + '\n', start_mark, end_mark)
token.add_post_comment(comment)
return token
@@ -1473,32 +1540,34 @@ class Scanner(object):
# We just forbid them completely. Do not use tabs in YAML!
chunks = []
length = 0
- while self.reader.peek(length) in u' ':
+ while self.reader.peek(length) in ' ':
length += 1
whitespaces = self.reader.prefix(length)
self.reader.forward(length)
ch = self.reader.peek()
- if ch in u'\r\n\x85\u2028\u2029':
+ if ch in '\r\n\x85\u2028\u2029':
line_break = self.scan_line_break()
self.allow_simple_key = True
prefix = self.reader.prefix(3)
- if (prefix == u'---' or prefix == u'...') \
- and self.reader.peek(3) in _THE_END_SPACE_TAB:
+ if (prefix == '---' or prefix == '...') and self.reader.peek(
+ 3
+ ) in _THE_END_SPACE_TAB:
return
breaks = []
- while self.reader.peek() in u' \r\n\x85\u2028\u2029':
+ while self.reader.peek() in ' \r\n\x85\u2028\u2029':
if self.reader.peek() == ' ':
self.reader.forward()
else:
breaks.append(self.scan_line_break())
prefix = self.reader.prefix(3)
- if (prefix == u'---' or prefix == u'...') \
- and self.reader.peek(3) in _THE_END_SPACE_TAB:
+ if (prefix == '---' or prefix == '...') and self.reader.peek(
+ 3
+ ) in _THE_END_SPACE_TAB:
return
- if line_break != u'\n':
+ if line_break != '\n':
chunks.append(line_break)
elif not breaks:
- chunks.append(u' ')
+ chunks.append(' ')
chunks.extend(breaks)
elif whitespaces:
chunks.append(whitespaces)
@@ -1510,23 +1579,27 @@ class Scanner(object):
# For some strange reasons, the specification does not allow '_' in
# tag handles. I have allowed it anyway.
ch = self.reader.peek()
- if ch != u'!':
- raise ScannerError("while scanning a %s" % name, start_mark,
- "expected '!', but found %r" % utf8(ch),
- self.reader.get_mark())
+ if ch != '!':
+ raise ScannerError(
+ 'while scanning a %s' % name,
+ start_mark,
+ "expected '!', but found %r" % utf8(ch),
+ self.reader.get_mark(),
+ )
length = 1
ch = self.reader.peek(length)
- if ch != u' ':
- while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' \
- or u'a' <= ch <= u'z' \
- or ch in u'-_':
+ if ch != ' ':
+ while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' or ch in '-_':
length += 1
ch = self.reader.peek(length)
- if ch != u'!':
+ if ch != '!':
self.reader.forward(length)
- raise ScannerError("while scanning a %s" % name, start_mark,
- "expected '!', but found %r" % utf8(ch),
- self.reader.get_mark())
+ raise ScannerError(
+ 'while scanning a %s' % name,
+ start_mark,
+ "expected '!', but found %r" % utf8(ch),
+ self.reader.get_mark(),
+ )
length += 1
value = self.reader.prefix(length)
self.reader.forward(length)
@@ -1539,9 +1612,13 @@ class Scanner(object):
chunks = []
length = 0
ch = self.reader.peek(length)
- while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
- or ch in u'-;/?:@&=+$,_.!~*\'()[]%':
- if ch == u'%':
+ while (
+ '0' <= ch <= '9'
+ or 'A' <= ch <= 'Z'
+ or 'a' <= ch <= 'z'
+ or ch in "-;/?:@&=+$,_.!~*'()[]%"
+ ):
+ if ch == '%':
chunks.append(self.reader.prefix(length))
self.reader.forward(length)
length = 0
@@ -1554,25 +1631,30 @@ class Scanner(object):
self.reader.forward(length)
length = 0
if not chunks:
- raise ScannerError("while parsing a %s" % name, start_mark,
- "expected URI, but found %r" % utf8(ch),
- self.reader.get_mark())
- return u''.join(chunks)
+ raise ScannerError(
+ 'while parsing a %s' % name,
+ start_mark,
+ 'expected URI, but found %r' % utf8(ch),
+ self.reader.get_mark(),
+ )
+ return "".join(chunks)
def scan_uri_escapes(self, name, start_mark):
# type: (Any, Any) -> Any
# See the specification for details.
code_bytes = [] # type: List[Any]
mark = self.reader.get_mark()
- while self.reader.peek() == u'%':
+ while self.reader.peek() == '%':
self.reader.forward()
for k in range(2):
- if self.reader.peek(k) not in u'0123456789ABCDEFabcdef':
+ if self.reader.peek(k) not in '0123456789ABCDEFabcdef':
raise ScannerError(
- "while scanning a %s" % name, start_mark,
- "expected URI escape sequence of 2 hexdecimal numbers,"
- " but found %r"
- % utf8(self.reader.peek(k)), self.reader.get_mark())
+ 'while scanning a %s' % name,
+ start_mark,
+ 'expected URI escape sequence of 2 hexdecimal numbers,'
+ ' but found %r' % utf8(self.reader.peek(k)),
+ self.reader.get_mark(),
+ )
if PY3:
code_bytes.append(int(self.reader.prefix(2), 16))
else:
@@ -1582,10 +1664,11 @@ class Scanner(object):
if PY3:
value = bytes(code_bytes).decode('utf-8')
else:
- value = unicode(b''.join(code_bytes), 'utf-8') # type: ignore
+ # fmt: off
+ value = unicode(b"".join(code_bytes), 'utf-8') # type: ignore
+ # fmt: on
except UnicodeDecodeError as exc:
- raise ScannerError("while scanning a %s" % name, start_mark,
- str(exc), mark)
+ raise ScannerError('while scanning a %s' % name, start_mark, str(exc), mark)
return value
def scan_line_break(self):
@@ -1599,16 +1682,16 @@ class Scanner(object):
# '\u2029 : '\u2029'
# default : ''
ch = self.reader.peek()
- if ch in u'\r\n\x85':
- if self.reader.prefix(2) == u'\r\n':
+ if ch in '\r\n\x85':
+ if self.reader.prefix(2) == '\r\n':
self.reader.forward(2)
else:
self.reader.forward()
- return u'\n'
- elif ch in u'\u2028\u2029':
+ return '\n'
+ elif ch in '\u2028\u2029':
self.reader.forward()
return ch
- return u''
+ return ""
class RoundTripScanner(Scanner):
@@ -1673,11 +1756,15 @@ class RoundTripScanner(Scanner):
# scalar, value token. FlowXEndToken, otherwise
# hidden streamtokens could get them (leave them and they will be
# pre comments for the next map/seq
- if len(self.tokens) > 1 and \
- isinstance(self.tokens[0], (ScalarToken, ValueToken,
- FlowSequenceEndToken, FlowMappingEndToken, )) and \
- isinstance(self.tokens[1], CommentToken) and \
- self.tokens[0].end_mark.line == self.tokens[1].start_mark.line:
+ if (
+ len(self.tokens) > 1
+ and isinstance(
+ self.tokens[0],
+ (ScalarToken, ValueToken, FlowSequenceEndToken, FlowMappingEndToken),
+ )
+ and isinstance(self.tokens[1], CommentToken)
+ and self.tokens[0].end_mark.line == self.tokens[1].start_mark.line
+ ):
self.tokens_taken += 1
self.tokens[0].add_post_comment(self.tokens.pop(1))
self.tokens_taken += 1
@@ -1687,7 +1774,7 @@ class RoundTripScanner(Scanner):
def fetch_comment(self, comment):
# type: (Any) -> None
value, start_mark, end_mark = comment
- while value and value[-1] == u' ':
+ while value and value[-1] == ' ':
# empty line within indented key context
# no need to update end-mark, that is not used
value = value[:-1]
@@ -1716,20 +1803,20 @@ class RoundTripScanner(Scanner):
# `unwind_indent` before issuing BLOCK-END.
# Scanners for block, flow, and plain scalars need to be modified.
- if self.reader.index == 0 and self.reader.peek() == u'\uFEFF':
+ if self.reader.index == 0 and self.reader.peek() == '\uFEFF':
self.reader.forward()
found = False
while not found:
- while self.reader.peek() == u' ':
+ while self.reader.peek() == ' ':
self.reader.forward()
ch = self.reader.peek()
- if ch == u'#':
+ if ch == '#':
start_mark = self.reader.get_mark()
comment = ch
self.reader.forward()
while ch not in _THE_END:
ch = self.reader.peek()
- if ch == u'\0': # don't gobble the end-of-stream character
+ if ch == '\0': # don't gobble the end-of-stream character
break
comment += ch
self.reader.forward()
@@ -1747,9 +1834,9 @@ class RoundTripScanner(Scanner):
if not self.flow_level:
self.allow_simple_key = True
ch = self.reader.peek()
- if ch == '\n': # empty toplevel lines
+ if ch == '\n': # empty toplevel lines
start_mark = self.reader.get_mark()
- comment = ''
+ comment = ""
while ch:
ch = self.scan_line_break(empty_line=True)
comment += ch
@@ -1773,19 +1860,20 @@ class RoundTripScanner(Scanner):
# '\u2029 : '\u2029'
# default : ''
ch = self.reader.peek() # type: Text
- if ch in u'\r\n\x85':
- if self.reader.prefix(2) == u'\r\n':
+ if ch in '\r\n\x85':
+ if self.reader.prefix(2) == '\r\n':
self.reader.forward(2)
else:
self.reader.forward()
- return u'\n'
- elif ch in u'\u2028\u2029':
+ return '\n'
+ elif ch in '\u2028\u2029':
self.reader.forward()
return ch
elif empty_line and ch in '\t ':
self.reader.forward()
return ch
- return u''
+ return ""
+
# try:
# import psyco