diff options
Diffstat (limited to 'scanner.py')
-rw-r--r-- | scanner.py | 95 |
1 files changed, 48 insertions, 47 deletions
@@ -1,7 +1,5 @@ # coding: utf-8 -from __future__ import print_function, absolute_import, division, unicode_literals - # Scanner produces tokens of the following types: # STREAM-START # STREAM-END @@ -32,7 +30,7 @@ from __future__ import print_function, absolute_import, division, unicode_litera from ruamel.yaml.error import MarkedYAMLError from ruamel.yaml.tokens import * # NOQA -from ruamel.yaml.compat import utf8, unichr, PY3, check_anchorname_char, nprint # NOQA +from ruamel.yaml.compat import _F, check_anchorname_char, nprint # NOQA if False: # MYPY from typing import Any, Dict, Optional, List, Union, Text # NOQA @@ -245,7 +243,7 @@ class Scanner(object): return self.fetch_document_end() # TODO: support for BOM within a stream. - # if ch == u'\uFEFF': + # if ch == '\uFEFF': # return self.fetch_bom() <-- issue BOMToken # Note: the order of the following checks is NOT significant. @@ -318,7 +316,7 @@ class Scanner(object): raise ScannerError( 'while scanning for the next token', None, - 'found character %r that cannot start any token' % utf8(ch), + _F('found character {ch!r} that cannot start any token', ch=ch), self.reader.get_mark(), ) @@ -917,7 +915,7 @@ class Scanner(object): raise ScannerError( 'while scanning a directive', start_mark, - 'expected alphabetic or numeric character, but found %r' % utf8(ch), + _F('expected alphabetic or numeric character, but found {ch!r}', ch=ch), self.reader.get_mark(), ) value = self.reader.prefix(length) @@ -927,7 +925,7 @@ class Scanner(object): raise ScannerError( 'while scanning a directive', start_mark, - 'expected alphabetic or numeric character, but found %r' % utf8(ch), + _F('expected alphabetic or numeric character, but found {ch!r}', ch=ch), self.reader.get_mark(), ) return value @@ -944,7 +942,7 @@ class Scanner(object): raise ScannerError( 'while scanning a directive', start_mark, - "expected a digit or '.', but found %r" % utf8(srp()), + _F("expected a digit or '.', but found {srp_call!r}", srp_call=srp()), self.reader.get_mark(), ) srf() @@ -953,7 +951,7 @@ class Scanner(object): raise ScannerError( 'while scanning a directive', start_mark, - "expected a digit or ' ', but found %r" % utf8(srp()), + _F("expected a digit or '.', but found {srp_call!r}", srp_call=srp()), self.reader.get_mark(), ) self.yaml_version = (major, minor) @@ -969,7 +967,7 @@ class Scanner(object): raise ScannerError( 'while scanning a directive', start_mark, - 'expected a digit, but found %r' % utf8(ch), + _F('expected a digit, but found {ch!r}', ch=ch), self.reader.get_mark(), ) length = 0 @@ -1001,7 +999,7 @@ class Scanner(object): raise ScannerError( 'while scanning a directive', start_mark, - "expected ' ', but found %r" % utf8(ch), + _F("expected ' ', but found {ch!r}", ch=ch), self.reader.get_mark(), ) return value @@ -1015,7 +1013,7 @@ class Scanner(object): raise ScannerError( 'while scanning a directive', start_mark, - "expected ' ', but found %r" % utf8(ch), + _F("expected ' ', but found {ch!r}", ch=ch), self.reader.get_mark(), ) return value @@ -1035,7 +1033,7 @@ class Scanner(object): raise ScannerError( 'while scanning a directive', start_mark, - 'expected a comment or a line break, but found %r' % utf8(ch), + _F('expected a comment or a line break, but found {ch!r}', ch=ch), self.reader.get_mark(), ) self.scan_line_break() @@ -1060,16 +1058,16 @@ class Scanner(object): self.reader.forward() length = 0 ch = srp(length) - # while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - # or ch in u'-_': + # while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ + # or ch in '-_': while check_anchorname_char(ch): length += 1 ch = srp(length) if not length: raise ScannerError( - 'while scanning an %s' % (name,), + _F('while scanning an {name!s}', name=name), start_mark, - 'expected alphabetic or numeric character, but found %r' % utf8(ch), + _F('expected alphabetic or numeric character, but found {ch!r}', ch=ch), self.reader.get_mark(), ) value = self.reader.prefix(length) @@ -1079,9 +1077,9 @@ class Scanner(object): # assert ch1 == ch if ch not in '\0 \t\r\n\x85\u2028\u2029?:,[]{}%@`': raise ScannerError( - 'while scanning an %s' % (name,), + _F('while scanning an {name!s}', name=name), start_mark, - 'expected alphabetic or numeric character, but found %r' % utf8(ch), + _F('expected alphabetic or numeric character, but found {ch!r}', ch=ch), self.reader.get_mark(), ) end_mark = self.reader.get_mark() @@ -1101,7 +1099,7 @@ class Scanner(object): raise ScannerError( 'while parsing a tag', start_mark, - "expected '>', but found %r" % utf8(srp()), + _F("expected '>', but found {srp_call!r}", srp_call=srp()), self.reader.get_mark(), ) self.reader.forward() @@ -1130,7 +1128,7 @@ class Scanner(object): raise ScannerError( 'while scanning a tag', start_mark, - "expected ' ', but found %r" % utf8(ch), + _F("expected ' ', but found {ch!r}", ch=ch), self.reader.get_mark(), ) value = (handle, suffix) @@ -1209,10 +1207,10 @@ class Scanner(object): # This is Clark Evans's interpretation (also in the spec # examples): # - # if folded and line_break == u'\n': + # if folded and line_break == '\n': # if not breaks: # if srp() not in ' \t': - # chunks.append(u' ') + # chunks.append(' ') # else: # chunks.append(line_break) # else: @@ -1295,7 +1293,7 @@ class Scanner(object): raise ScannerError( 'while scanning a block scalar', start_mark, - 'expected chomping or indentation indicators, but found %r' % utf8(ch), + _F('expected chomping or indentation indicators, but found {ch!r}', ch=ch), self.reader.get_mark(), ) return chomping, increment @@ -1320,7 +1318,7 @@ class Scanner(object): raise ScannerError( 'while scanning a block scalar', start_mark, - 'expected a comment or a line break, but found %r' % utf8(ch), + _F('expected a comment or a line break, but found {ch!r}', ch=ch), self.reader.get_mark(), ) self.scan_line_break() @@ -1442,12 +1440,16 @@ class Scanner(object): raise ScannerError( 'while scanning a double-quoted scalar', start_mark, - 'expected escape sequence of %d hexdecimal ' - 'numbers, but found %r' % (length, utf8(srp(k))), + _F( + 'expected escape sequence of {length:d} hexdecimal ' + 'numbers, but found {srp_call!r}', + length=length, + srp_call=srp(k), + ), self.reader.get_mark(), ) code = int(self.reader.prefix(length), 16) - chunks.append(unichr(code)) + chunks.append(chr(code)) srf(length) elif ch in '\n\r\x85\u2028\u2029': self.scan_line_break() @@ -1456,7 +1458,7 @@ class Scanner(object): raise ScannerError( 'while scanning a double-quoted scalar', start_mark, - 'found unknown escape character %r' % utf8(ch), + _F('found unknown escape character {ch!r}', ch=ch), self.reader.get_mark(), ) else: @@ -1640,9 +1642,9 @@ class Scanner(object): ch = srp() if ch != '!': raise ScannerError( - 'while scanning a %s' % (name,), + _F('while scanning an {name!s}', name=name), start_mark, - "expected '!', but found %r" % utf8(ch), + _F("expected '!', but found {ch!r}", ch=ch), self.reader.get_mark(), ) length = 1 @@ -1654,9 +1656,9 @@ class Scanner(object): if ch != '!': self.reader.forward(length) raise ScannerError( - 'while scanning a %s' % (name,), + _F('while scanning an {name!s}', name=name), start_mark, - "expected '!', but found %r" % utf8(ch), + _F("expected '!', but found {ch!r}", ch=ch), self.reader.get_mark(), ) length += 1 @@ -1693,9 +1695,9 @@ class Scanner(object): length = 0 if not chunks: raise ScannerError( - 'while parsing a %s' % (name,), + _F('while parsing an {name!s}', name=name), start_mark, - 'expected URI, but found %r' % utf8(ch), + _F('expected URI, but found {ch!r}', ch=ch), self.reader.get_mark(), ) return "".join(chunks) @@ -1712,24 +1714,23 @@ class Scanner(object): for k in range(2): if srp(k) not in '0123456789ABCDEFabcdef': raise ScannerError( - 'while scanning a %s' % (name,), + _F('while scanning an {name!s}', name=name), start_mark, - 'expected URI escape sequence of 2 hexdecimal numbers,' - ' but found %r' % utf8(srp(k)), + _F( + 'expected URI escape sequence of 2 hexdecimal numbers,' + ' but found {srp_call!r}', + srp_call=srp(k), + ), self.reader.get_mark(), ) - if PY3: - code_bytes.append(int(self.reader.prefix(2), 16)) - else: - code_bytes.append(chr(int(self.reader.prefix(2), 16))) + code_bytes.append(int(self.reader.prefix(2), 16)) srf(2) try: - if PY3: - value = bytes(code_bytes).decode('utf-8') - else: - value = unicode(b"".join(code_bytes), 'utf-8') + value = bytes(code_bytes).decode('utf-8') except UnicodeDecodeError as exc: - raise ScannerError('while scanning a %s' % (name,), start_mark, str(exc), mark) + raise ScannerError( + _F('while scanning an {name!s}', name=name), + start_mark, str(exc), mark) return value def scan_line_break(self): |