From 0db66d2aa3c5abe79e55017435894898a2ebe898 Mon Sep 17 00:00:00 2001 From: John Stark Date: Thu, 5 Jan 2023 20:40:13 +0100 Subject: Fix secondary tag encoding. See #449 The second exclamation mark was getting url encoded, when it should be preserved. Added a regression test. --- scanner.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'scanner.py') diff --git a/scanner.py b/scanner.py index 61cae63..86f7caf 100644 --- a/scanner.py +++ b/scanner.py @@ -1098,6 +1098,13 @@ class Scanner: srp = self.reader.peek start_mark = self.reader.get_mark() ch = srp(1) + short_handle = '!' + if ch == '!': + short_handle = '!!' + self.reader.forward() + srp = self.reader.peek + ch = srp(1) + if ch == '<': handle = None self.reader.forward(2) @@ -1112,7 +1119,7 @@ class Scanner: self.reader.forward() elif ch in _THE_END_SPACE_TAB: handle = None - suffix = '!' + suffix = short_handle self.reader.forward() else: length = 1 @@ -1123,11 +1130,11 @@ class Scanner: break length += 1 ch = srp(length) - handle = '!' + handle = short_handle if use_handle: handle = self.scan_tag_handle('tag', start_mark) else: - handle = '!' + handle = short_handle self.reader.forward() suffix = self.scan_tag_uri('tag', start_mark) ch = srp() -- cgit v1.2.1