summaryrefslogtreecommitdiff
path: root/Lib/test/test_re.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_re.py')
-rw-r--r--Lib/test/test_re.py239
1 files changed, 223 insertions, 16 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index f093812442..a4e11c6746 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -3,10 +3,12 @@ from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
import io
import re
from re import Scanner
+import sre_compile
import sre_constants
import sys
import string
import traceback
+import unittest
from weakref import proxy
# Misc tests from Tim Peters' re.doc
@@ -15,10 +17,26 @@ from weakref import proxy
# what you're doing. Some of these tests were carefully modeled to
# cover most of the code.
-import unittest
+class S(str):
+ def __getitem__(self, index):
+ return S(super().__getitem__(index))
+
+class B(bytes):
+ def __getitem__(self, index):
+ return B(super().__getitem__(index))
class ReTests(unittest.TestCase):
+ def assertTypedEqual(self, actual, expect, msg=None):
+ self.assertEqual(actual, expect, msg)
+ def recurse(actual, expect):
+ if isinstance(expect, (tuple, list)):
+ for x, y in zip(actual, expect):
+ recurse(x, y)
+ else:
+ self.assertIs(type(actual), type(expect), msg)
+ recurse(actual, expect)
+
def test_keep_buffer(self):
# See bug 14212
b = bytearray(b'x')
@@ -53,6 +71,15 @@ class ReTests(unittest.TestCase):
return str(int_value + 1)
def test_basic_re_sub(self):
+ self.assertTypedEqual(re.sub('y', 'a', 'xyz'), 'xaz')
+ self.assertTypedEqual(re.sub('y', S('a'), S('xyz')), 'xaz')
+ self.assertTypedEqual(re.sub(b'y', b'a', b'xyz'), b'xaz')
+ self.assertTypedEqual(re.sub(b'y', B(b'a'), B(b'xyz')), b'xaz')
+ self.assertTypedEqual(re.sub(b'y', bytearray(b'a'), bytearray(b'xyz')), b'xaz')
+ self.assertTypedEqual(re.sub(b'y', memoryview(b'a'), memoryview(b'xyz')), b'xaz')
+ for y in ("\xe0", "\u0430", "\U0001d49c"):
+ self.assertEqual(re.sub(y, 'a', 'x%sz' % y), 'xaz')
+
self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
'9.3 -3 24x100y')
@@ -210,10 +237,29 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
def test_re_split(self):
- self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
- self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
- self.assertEqual(re.split("(:*)", ":a:b::c"),
- ['', ':', 'a', ':', 'b', '::', 'c'])
+ for string in ":a:b::c", S(":a:b::c"):
+ self.assertTypedEqual(re.split(":", string),
+ ['', 'a', 'b', '', 'c'])
+ self.assertTypedEqual(re.split(":*", string),
+ ['', 'a', 'b', 'c'])
+ self.assertTypedEqual(re.split("(:*)", string),
+ ['', ':', 'a', ':', 'b', '::', 'c'])
+ for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"),
+ memoryview(b":a:b::c")):
+ self.assertTypedEqual(re.split(b":", string),
+ [b'', b'a', b'b', b'', b'c'])
+ self.assertTypedEqual(re.split(b":*", string),
+ [b'', b'a', b'b', b'c'])
+ self.assertTypedEqual(re.split(b"(:*)", string),
+ [b'', b':', b'a', b':', b'b', b'::', b'c'])
+ for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432",
+ "\U0001d49c\U0001d49e\U0001d4b5"):
+ string = ":%s:%s::%s" % (a, b, c)
+ self.assertEqual(re.split(":", string), ['', a, b, '', c])
+ self.assertEqual(re.split(":*", string), ['', a, b, c])
+ self.assertEqual(re.split("(:*)", string),
+ ['', ':', a, ':', b, '::', c])
+
self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
self.assertEqual(re.split("(:)*", ":a:b::c"),
['', ':', 'a', ':', 'b', ':', 'c'])
@@ -235,22 +281,53 @@ class ReTests(unittest.TestCase):
def test_re_findall(self):
self.assertEqual(re.findall(":+", "abc"), [])
- self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
- self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
- self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
- (":", ":"),
- (":", "::")])
+ for string in "a:b::c:::d", S("a:b::c:::d"):
+ self.assertTypedEqual(re.findall(":+", string),
+ [":", "::", ":::"])
+ self.assertTypedEqual(re.findall("(:+)", string),
+ [":", "::", ":::"])
+ self.assertTypedEqual(re.findall("(:)(:*)", string),
+ [(":", ""), (":", ":"), (":", "::")])
+ for string in (b"a:b::c:::d", B(b"a:b::c:::d"), bytearray(b"a:b::c:::d"),
+ memoryview(b"a:b::c:::d")):
+ self.assertTypedEqual(re.findall(b":+", string),
+ [b":", b"::", b":::"])
+ self.assertTypedEqual(re.findall(b"(:+)", string),
+ [b":", b"::", b":::"])
+ self.assertTypedEqual(re.findall(b"(:)(:*)", string),
+ [(b":", b""), (b":", b":"), (b":", b"::")])
+ for x in ("\xe0", "\u0430", "\U0001d49c"):
+ xx = x * 2
+ xxx = x * 3
+ string = "a%sb%sc%sd" % (x, xx, xxx)
+ self.assertEqual(re.findall("%s+" % x, string), [x, xx, xxx])
+ self.assertEqual(re.findall("(%s+)" % x, string), [x, xx, xxx])
+ self.assertEqual(re.findall("(%s)(%s*)" % (x, x), string),
+ [(x, ""), (x, x), (x, xx)])
def test_bug_117612(self):
self.assertEqual(re.findall(r"(a|(b))", "aba"),
[("a", ""),("b", "b"),("a", "")])
def test_re_match(self):
- self.assertEqual(re.match('a', 'a').groups(), ())
- self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
- self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
- self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
- self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
+ for string in 'a', S('a'):
+ self.assertEqual(re.match('a', string).groups(), ())
+ self.assertEqual(re.match('(a)', string).groups(), ('a',))
+ self.assertEqual(re.match('(a)', string).group(0), 'a')
+ self.assertEqual(re.match('(a)', string).group(1), 'a')
+ self.assertEqual(re.match('(a)', string).group(1, 1), ('a', 'a'))
+ for string in b'a', B(b'a'), bytearray(b'a'), memoryview(b'a'):
+ self.assertEqual(re.match(b'a', string).groups(), ())
+ self.assertEqual(re.match(b'(a)', string).groups(), (b'a',))
+ self.assertEqual(re.match(b'(a)', string).group(0), b'a')
+ self.assertEqual(re.match(b'(a)', string).group(1), b'a')
+ self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a'))
+ for a in ("\xe0", "\u0430", "\U0001d49c"):
+ self.assertEqual(re.match(a, a).groups(), ())
+ self.assertEqual(re.match('(%s)' % a, a).groups(), (a,))
+ self.assertEqual(re.match('(%s)' % a, a).group(0), a)
+ self.assertEqual(re.match('(%s)' % a, a).group(1), a)
+ self.assertEqual(re.match('(%s)' % a, a).group(1, 1), (a, a))
pat = re.compile('((a)|(b))(c)?')
self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
@@ -272,6 +349,36 @@ class ReTests(unittest.TestCase):
(None, 'b', None))
self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
+ def test_re_fullmatch(self):
+ # Issue 16203: Proposal: add re.fullmatch() method.
+ self.assertEqual(re.fullmatch(r"a", "a").span(), (0, 1))
+ for string in "ab", S("ab"):
+ self.assertEqual(re.fullmatch(r"a|ab", string).span(), (0, 2))
+ for string in b"ab", B(b"ab"), bytearray(b"ab"), memoryview(b"ab"):
+ self.assertEqual(re.fullmatch(br"a|ab", string).span(), (0, 2))
+ for a, b in "\xe0\xdf", "\u0430\u0431", "\U0001d49c\U0001d49e":
+ r = r"%s|%s" % (a, a + b)
+ self.assertEqual(re.fullmatch(r, a + b).span(), (0, 2))
+ self.assertEqual(re.fullmatch(r".*?$", "abc").span(), (0, 3))
+ self.assertEqual(re.fullmatch(r".*?", "abc").span(), (0, 3))
+ self.assertEqual(re.fullmatch(r"a.*?b", "ab").span(), (0, 2))
+ self.assertEqual(re.fullmatch(r"a.*?b", "abb").span(), (0, 3))
+ self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4))
+ self.assertIsNone(re.fullmatch(r"a+", "ab"))
+ self.assertIsNone(re.fullmatch(r"abc$", "abc\n"))
+ self.assertIsNone(re.fullmatch(r"abc\Z", "abc\n"))
+ self.assertIsNone(re.fullmatch(r"(?m)abc$", "abc\n"))
+ self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4))
+ self.assertEqual(re.fullmatch(r"ab(?<=b)cd", "abcd").span(), (0, 4))
+ self.assertEqual(re.fullmatch(r"(?=a|ab)ab", "ab").span(), (0, 2))
+
+ self.assertEqual(
+ re.compile(r"bc").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
+ self.assertEqual(
+ re.compile(r".*?$").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
+ self.assertEqual(
+ re.compile(r".*?").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
+
def test_re_groupref_exists(self):
self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
('(', 'a'))
@@ -1053,6 +1160,28 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
[b'xyz'], msg=pattern)
+ def test_match_repr(self):
+ for string in '[abracadabra]', S('[abracadabra]'):
+ m = re.search(r'(.+)(.*?)\1', string)
+ self.assertEqual(repr(m), "<%s.%s object; "
+ "span=(1, 12), match='abracadabra'>" %
+ (type(m).__module__, type(m).__qualname__))
+ for string in (b'[abracadabra]', B(b'[abracadabra]'),
+ bytearray(b'[abracadabra]'),
+ memoryview(b'[abracadabra]')):
+ m = re.search(rb'(.+)(.*?)\1', string)
+ self.assertEqual(repr(m), "<%s.%s object; "
+ "span=(1, 12), match=b'abracadabra'>" %
+ (type(m).__module__, type(m).__qualname__))
+
+ first, second = list(re.finditer("(aa)|(bb)", "aa bb"))
+ self.assertEqual(repr(first), "<%s.%s object; "
+ "span=(0, 2), match='aa'>" %
+ (type(second).__module__, type(first).__qualname__))
+ self.assertEqual(repr(second), "<%s.%s object; "
+ "span=(3, 5), match='bb'>" %
+ (type(second).__module__, type(second).__qualname__))
+
def test_bug_2537(self):
# issue 2537: empty submatches
@@ -1064,6 +1193,84 @@ class ReTests(unittest.TestCase):
self.assertEqual(m.group(1), "")
self.assertEqual(m.group(2), "y")
+
+class PatternReprTests(unittest.TestCase):
+ def check(self, pattern, expected):
+ self.assertEqual(repr(re.compile(pattern)), expected)
+
+ def check_flags(self, pattern, flags, expected):
+ self.assertEqual(repr(re.compile(pattern, flags)), expected)
+
+ def test_without_flags(self):
+ self.check('random pattern',
+ "re.compile('random pattern')")
+
+ def test_single_flag(self):
+ self.check_flags('random pattern', re.IGNORECASE,
+ "re.compile('random pattern', re.IGNORECASE)")
+
+ def test_multiple_flags(self):
+ self.check_flags('random pattern', re.I|re.S|re.X,
+ "re.compile('random pattern', "
+ "re.IGNORECASE|re.DOTALL|re.VERBOSE)")
+
+ def test_unicode_flag(self):
+ self.check_flags('random pattern', re.U,
+ "re.compile('random pattern')")
+ self.check_flags('random pattern', re.I|re.S|re.U,
+ "re.compile('random pattern', "
+ "re.IGNORECASE|re.DOTALL)")
+
+ def test_inline_flags(self):
+ self.check('(?i)pattern',
+ "re.compile('(?i)pattern', re.IGNORECASE)")
+
+ def test_unknown_flags(self):
+ self.check_flags('random pattern', 0x123000,
+ "re.compile('random pattern', 0x123000)")
+ self.check_flags('random pattern', 0x123000|re.I,
+ "re.compile('random pattern', re.IGNORECASE|0x123000)")
+
+ def test_bytes(self):
+ self.check(b'bytes pattern',
+ "re.compile(b'bytes pattern')")
+ self.check_flags(b'bytes pattern', re.A,
+ "re.compile(b'bytes pattern', re.ASCII)")
+
+ def test_quotes(self):
+ self.check('random "double quoted" pattern',
+ '''re.compile('random "double quoted" pattern')''')
+ self.check("random 'single quoted' pattern",
+ '''re.compile("random 'single quoted' pattern")''')
+ self.check('''both 'single' and "double" quotes''',
+ '''re.compile('both \\'single\\' and "double" quotes')''')
+
+ def test_long_pattern(self):
+ pattern = 'Very %spattern' % ('long ' * 1000)
+ r = repr(re.compile(pattern))
+ self.assertLess(len(r), 300)
+ self.assertEqual(r[:30], "re.compile('Very long long lon")
+ r = repr(re.compile(pattern, re.I))
+ self.assertLess(len(r), 300)
+ self.assertEqual(r[:30], "re.compile('Very long long lon")
+ self.assertEqual(r[-16:], ", re.IGNORECASE)")
+
+
+class ImplementationTest(unittest.TestCase):
+ """
+ Test implementation details of the re module.
+ """
+
+ def test_overlap_table(self):
+ f = sre_compile._generate_overlap_table
+ self.assertEqual(f(""), [])
+ self.assertEqual(f("a"), [0])
+ self.assertEqual(f("abcd"), [0, 0, 0, 0])
+ self.assertEqual(f("aaaa"), [0, 1, 2, 3])
+ self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1])
+ self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0])
+
+
def run_re_tests():
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
if verbose:
@@ -1193,7 +1400,7 @@ def run_re_tests():
def test_main():
- run_unittest(ReTests)
+ run_unittest(__name__)
run_re_tests()
if __name__ == "__main__":