summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcel Bargull <marcel.bargull@udo.edu>2018-03-16 19:52:06 +0100
committerMarcel Bargull <marcel.bargull@udo.edu>2018-03-16 19:52:06 +0100
commit541b5e265d0dfa41176e60483f453ae3013b24e5 (patch)
treef376be9198f4055c69fc23e2e1d574067ec50485
parent9ece6742b14cc7773c048bef35711b5e060d80f6 (diff)
downloadruamel.yaml-541b5e265d0dfa41176e60483f453ae3013b24e5.tar.gz
use faster str.translate in Reader.check_printable
-rw-r--r--reader.py40
1 files changed, 23 insertions, 17 deletions
diff --git a/reader.py b/reader.py
index 705d6d7..a045514 100644
--- a/reader.py
+++ b/reader.py
@@ -184,29 +184,35 @@ class Reader(object):
try:
re.compile(u'[^\U00010000]')
except:
- NON_PRINTABLE = RegExp(
- u'[^\x09\x0A\x0D\x20-\x7E\x85'
- u'\xA0-\uD7FF'
- u'\uE000-\uFFFD'
- u']'
- )
UNICODE_SIZE = 2
else:
- NON_PRINTABLE = RegExp(
- u'[^\x09\x0A\x0D\x20-\x7E\x85'
- u'\xA0-\uD7FF'
- u'\uE000-\uFFFD'
- u'\U00010000-\U0010FFFF'
- u']'
- )
UNICODE_SIZE = 4
+ class _NonPrintable:
+ def __getitem__(self, i):
+ if (
+ 0x20 <= i <= 0x7E or
+ i in {0x09, 0x0A, 0x0D, 0x85} or
+ 0xA0 <= i <= 0xD7FF or
+ 0xE000 <= i <= 0xFFFD or
+ 0x00010000 <= i <= 0x0010FFFF):
+ return None
+ return i
+
+ @classmethod
+ def _get_non_printable(cls, data):
+ non_printables = data.translate(cls._NonPrintable())
+ if not non_printables:
+ return None
+ non_printable = non_printables[:1]
+ return data.index(non_printable), non_printable
+
def check_printable(self, data):
# type: (Any) -> None
- match = self.NON_PRINTABLE.search(data)
- if bool(match):
- character = match.group()
- position = self.index + (len(self.buffer) - self.pointer) + match.start()
+ non_printable_match = self._get_non_printable(data)
+ if non_printable_match is not None:
+ start, character = non_printable_match
+ position = self.index + (len(self.buffer) - self.pointer) + start
raise ReaderError(self.name, position, ord(character),
'unicode', "special characters are not allowed")