summaryrefslogtreecommitdiff
path: root/Lib/test/test_re.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_re.py')
-rw-r--r--Lib/test/test_re.py465
1 files changed, 305 insertions, 160 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 0834fe0f40..4bdaa4b6c6 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -3,13 +3,13 @@ from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
import io
import locale
import re
-from re import Scanner
import sre_compile
-import sre_constants
-import sys
import string
+import sys
import traceback
import unittest
+import warnings
+from re import Scanner
from weakref import proxy
# Misc tests from Tim Peters' re.doc
@@ -115,10 +115,10 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
- self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
- self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
- self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
- self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
+ self.assertEqual(re.sub('(?P<a>x)', r'\g<a>\g<a>', 'xx'), 'xxxx')
+ self.assertEqual(re.sub('(?P<a>x)', r'\g<a>\g<1>', 'xx'), 'xxxx')
+ self.assertEqual(re.sub('(?P<unk>x)', r'\g<unk>\g<unk>', 'xx'), 'xxxx')
+ self.assertEqual(re.sub('(?P<unk>x)', r'\g<1>\g<1>', 'xx'), 'xxxx')
self.assertEqual(re.sub('a', r'\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
@@ -129,11 +129,11 @@ class ReTests(unittest.TestCase):
with self.assertWarns(DeprecationWarning):
self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c)
- self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
+ self.assertEqual(re.sub(r'^\s*', 'X', 'test'), 'Xtest')
def test_bug_449964(self):
# fails for group followed by other escape
- self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
+ self.assertEqual(re.sub(r'(?P<unk>x)', r'\g<1>\g<1>\b', 'xx'),
'xx\bxx\b')
def test_bug_449000(self):
@@ -186,18 +186,19 @@ class ReTests(unittest.TestCase):
r'octal escape value \777 outside of '
r'range 0-0o377', 0)
- self.checkTemplateError('x', r'\1', 'x', 'invalid group reference')
- self.checkTemplateError('x', r'\8', 'x', 'invalid group reference')
- self.checkTemplateError('x', r'\9', 'x', 'invalid group reference')
- self.checkTemplateError('x', r'\11', 'x', 'invalid group reference')
- self.checkTemplateError('x', r'\18', 'x', 'invalid group reference')
- self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference')
- self.checkTemplateError('x', r'\90', 'x', 'invalid group reference')
- self.checkTemplateError('x', r'\99', 'x', 'invalid group reference')
- self.checkTemplateError('x', r'\118', 'x', 'invalid group reference') # r'\11' + '8'
- self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference')
- self.checkTemplateError('x', r'\181', 'x', 'invalid group reference') # r'\18' + '1'
- self.checkTemplateError('x', r'\800', 'x', 'invalid group reference') # r'\80' + '0'
+ self.checkTemplateError('x', r'\1', 'x', 'invalid group reference 1', 1)
+ self.checkTemplateError('x', r'\8', 'x', 'invalid group reference 8', 1)
+ self.checkTemplateError('x', r'\9', 'x', 'invalid group reference 9', 1)
+ self.checkTemplateError('x', r'\11', 'x', 'invalid group reference 11', 1)
+ self.checkTemplateError('x', r'\18', 'x', 'invalid group reference 18', 1)
+ self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference 1', 1)
+ self.checkTemplateError('x', r'\90', 'x', 'invalid group reference 90', 1)
+ self.checkTemplateError('x', r'\99', 'x', 'invalid group reference 99', 1)
+ self.checkTemplateError('x', r'\118', 'x', 'invalid group reference 11', 1)
+ self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference 11', 1)
+ self.checkTemplateError('x', r'\181', 'x', 'invalid group reference 18', 1)
+ self.checkTemplateError('x', r'\800', 'x', 'invalid group reference 80', 1)
+ self.checkTemplateError('x', r'\8', '', 'invalid group reference 8', 1)
# in python2.3 (etc), these loop endlessly in sre_parser.py
self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
@@ -221,26 +222,26 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
def test_symbolic_groups(self):
- re.compile('(?P<a>x)(?P=a)(?(a)y)')
- re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
- re.compile('(?P<a1>x)\1(?(1)y)')
- self.checkPatternError('(?P<a>)(?P<a>)',
+ re.compile(r'(?P<a>x)(?P=a)(?(a)y)')
+ re.compile(r'(?P<a1>x)(?P=a1)(?(a1)y)')
+ re.compile(r'(?P<a1>x)\1(?(1)y)')
+ self.checkPatternError(r'(?P<a>)(?P<a>)',
"redefinition of group name 'a' as group 2; "
"was group 1")
- self.checkPatternError('(?P<a>(?P=a))',
+ self.checkPatternError(r'(?P<a>(?P=a))',
"cannot refer to an open group", 10)
- self.checkPatternError('(?Pxy)', 'unknown extension ?Px')
- self.checkPatternError('(?P<a>)(?P=a', 'missing ), unterminated name', 11)
- self.checkPatternError('(?P=', 'missing group name', 4)
- self.checkPatternError('(?P=)', 'missing group name', 4)
- self.checkPatternError('(?P=1)', "bad character in group name '1'", 4)
- self.checkPatternError('(?P=a)', "unknown group name 'a'")
- self.checkPatternError('(?P=a1)', "unknown group name 'a1'")
- self.checkPatternError('(?P=a.)', "bad character in group name 'a.'", 4)
- self.checkPatternError('(?P<)', 'missing >, unterminated name', 4)
- self.checkPatternError('(?P<a', 'missing >, unterminated name', 4)
- self.checkPatternError('(?P<', 'missing group name', 4)
- self.checkPatternError('(?P<>)', 'missing group name', 4)
+ self.checkPatternError(r'(?Pxy)', 'unknown extension ?Px')
+ self.checkPatternError(r'(?P<a>)(?P=a', 'missing ), unterminated name', 11)
+ self.checkPatternError(r'(?P=', 'missing group name', 4)
+ self.checkPatternError(r'(?P=)', 'missing group name', 4)
+ self.checkPatternError(r'(?P=1)', "bad character in group name '1'", 4)
+ self.checkPatternError(r'(?P=a)', "unknown group name 'a'")
+ self.checkPatternError(r'(?P=a1)', "unknown group name 'a1'")
+ self.checkPatternError(r'(?P=a.)', "bad character in group name 'a.'", 4)
+ self.checkPatternError(r'(?P<)', 'missing >, unterminated name', 4)
+ self.checkPatternError(r'(?P<a', 'missing >, unterminated name', 4)
+ self.checkPatternError(r'(?P<', 'missing group name', 4)
+ self.checkPatternError(r'(?P<>)', 'missing group name', 4)
self.checkPatternError(r'(?P<1>)', "bad character in group name '1'", 4)
self.checkPatternError(r'(?P<a.>)', "bad character in group name 'a.'", 4)
self.checkPatternError(r'(?(', 'missing group name', 3)
@@ -259,35 +260,35 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
def test_symbolic_refs(self):
- self.checkTemplateError('(?P<a>x)', '\g<a', 'xx',
+ self.checkTemplateError('(?P<a>x)', r'\g<a', 'xx',
'missing >, unterminated name', 3)
- self.checkTemplateError('(?P<a>x)', '\g<', 'xx',
+ self.checkTemplateError('(?P<a>x)', r'\g<', 'xx',
'missing group name', 3)
- self.checkTemplateError('(?P<a>x)', '\g', 'xx', 'missing <', 2)
- self.checkTemplateError('(?P<a>x)', '\g<a a>', 'xx',
+ self.checkTemplateError('(?P<a>x)', r'\g', 'xx', 'missing <', 2)
+ self.checkTemplateError('(?P<a>x)', r'\g<a a>', 'xx',
"bad character in group name 'a a'", 3)
- self.checkTemplateError('(?P<a>x)', '\g<>', 'xx',
+ self.checkTemplateError('(?P<a>x)', r'\g<>', 'xx',
'missing group name', 3)
- self.checkTemplateError('(?P<a>x)', '\g<1a1>', 'xx',
+ self.checkTemplateError('(?P<a>x)', r'\g<1a1>', 'xx',
"bad character in group name '1a1'", 3)
self.checkTemplateError('(?P<a>x)', r'\g<2>', 'xx',
- 'invalid group reference')
+ 'invalid group reference 2', 3)
self.checkTemplateError('(?P<a>x)', r'\2', 'xx',
- 'invalid group reference')
+ 'invalid group reference 2', 1)
with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
- re.sub('(?P<a>x)', '\g<ab>', 'xx')
+ re.sub('(?P<a>x)', r'\g<ab>', 'xx')
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
- self.checkTemplateError('(?P<a>x)', '\g<-1>', 'xx',
+ self.checkTemplateError('(?P<a>x)', r'\g<-1>', 'xx',
"bad character in group name '-1'", 3)
# New valid/invalid identifiers in Python 3
self.assertEqual(re.sub('(?P<ยต>x)', r'\g<ยต>', 'xx'), 'xx')
self.assertEqual(re.sub('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)', r'\g<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>', 'xx'), 'xx')
- self.checkTemplateError('(?P<a>x)', '\g<ยฉ>', 'xx',
+ self.checkTemplateError('(?P<a>x)', r'\g<ยฉ>', 'xx',
"bad character in group name 'ยฉ'", 3)
# Support > 100 groups.
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
- self.assertEqual(re.sub(pat, '\g<200>', 'xc8yzxc8y'), 'c8zc8')
+ self.assertEqual(re.sub(pat, r'\g<200>', 'xc8yzxc8y'), 'c8zc8')
def test_re_subn(self):
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
@@ -419,19 +420,77 @@ class ReTests(unittest.TestCase):
self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
- # A single group
- m = re.match('(a)', 'a')
- self.assertEqual(m.group(0), 'a')
- self.assertEqual(m.group(0), 'a')
- self.assertEqual(m.group(1), 'a')
- self.assertEqual(m.group(1, 1), ('a', 'a'))
-
pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
(None, 'b', None))
self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
+ def test_group(self):
+ class Index:
+ def __init__(self, value):
+ self.value = value
+ def __index__(self):
+ return self.value
+ # A single group
+ m = re.match('(a)(b)', 'ab')
+ self.assertEqual(m.group(), 'ab')
+ self.assertEqual(m.group(0), 'ab')
+ self.assertEqual(m.group(1), 'a')
+ self.assertEqual(m.group(Index(1)), 'a')
+ self.assertRaises(IndexError, m.group, -1)
+ self.assertRaises(IndexError, m.group, 3)
+ self.assertRaises(IndexError, m.group, 1<<1000)
+ self.assertRaises(IndexError, m.group, Index(1<<1000))
+ self.assertRaises(IndexError, m.group, 'x')
+ # Multiple groups
+ self.assertEqual(m.group(2, 1), ('b', 'a'))
+ self.assertEqual(m.group(Index(2), Index(1)), ('b', 'a'))
+
+ def test_match_getitem(self):
+ pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
+
+ m = pat.match('a')
+ self.assertEqual(m['a1'], 'a')
+ self.assertEqual(m['b2'], None)
+ self.assertEqual(m['c3'], None)
+ self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=None')
+ self.assertEqual(m[0], 'a')
+ self.assertEqual(m[1], 'a')
+ self.assertEqual(m[2], None)
+ self.assertEqual(m[3], None)
+ with self.assertRaisesRegex(IndexError, 'no such group'):
+ m['X']
+ with self.assertRaisesRegex(IndexError, 'no such group'):
+ m[-1]
+ with self.assertRaisesRegex(IndexError, 'no such group'):
+ m[4]
+ with self.assertRaisesRegex(IndexError, 'no such group'):
+ m[0, 1]
+ with self.assertRaisesRegex(IndexError, 'no such group'):
+ m[(0,)]
+ with self.assertRaisesRegex(IndexError, 'no such group'):
+ m[(0, 1)]
+ with self.assertRaisesRegex(KeyError, 'a2'):
+ 'a1={a2}'.format_map(m)
+
+ m = pat.match('ac')
+ self.assertEqual(m['a1'], 'a')
+ self.assertEqual(m['b2'], None)
+ self.assertEqual(m['c3'], 'c')
+ self.assertEqual('a1={a1} b2={b2} c3={c3}'.format_map(m), 'a1=a b2=None c3=c')
+ self.assertEqual(m[0], 'ac')
+ self.assertEqual(m[1], 'a')
+ self.assertEqual(m[2], None)
+ self.assertEqual(m[3], 'c')
+
+ # Cannot assign.
+ with self.assertRaises(TypeError):
+ m[0] = 1
+
+ # No len().
+ self.assertRaises(TypeError, len, m)
+
def test_re_fullmatch(self):
# Issue 16203: Proposal: add re.fullmatch() method.
self.assertEqual(re.fullmatch(r"a", "a").span(), (0, 1))
@@ -463,19 +522,19 @@ class ReTests(unittest.TestCase):
re.compile(r".*?").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
def test_re_groupref_exists(self):
- self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
+ self.assertEqual(re.match(r'^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
('(', 'a'))
- self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
+ self.assertEqual(re.match(r'^(\()?([^()]+)(?(1)\))$', 'a').groups(),
(None, 'a'))
- self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'))
- self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', '(a'))
+ self.assertIsNone(re.match(r'^(\()?([^()]+)(?(1)\))$', 'a)'))
+ self.assertIsNone(re.match(r'^(\()?([^()]+)(?(1)\))$', '(a'))
self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
('a', 'b'))
- self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
+ self.assertEqual(re.match(r'^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
(None, 'd'))
- self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
+ self.assertEqual(re.match(r'^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
(None, 'd'))
- self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
+ self.assertEqual(re.match(r'^(?:(a)|c)((?(1)|d))$', 'a').groups(),
('a', ''))
# Tests for bug #1177831: exercise groups other than the first group
@@ -500,10 +559,11 @@ class ReTests(unittest.TestCase):
'two branches', 10)
def test_re_groupref_overflow(self):
- self.checkTemplateError('()', '\g<%s>' % sre_constants.MAXGROUPS, 'xx',
- 'invalid group reference', 3)
- self.checkPatternError(r'(?P<a>)(?(%d))' % sre_constants.MAXGROUPS,
- 'invalid group reference', 10)
+ from sre_constants import MAXGROUPS
+ self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx',
+ 'invalid group reference %d' % MAXGROUPS, 3)
+ self.checkPatternError(r'(?P<a>)(?(%d))' % MAXGROUPS,
+ 'invalid group reference %d' % MAXGROUPS, 10)
def test_re_groupref(self):
self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
@@ -535,37 +595,37 @@ class ReTests(unittest.TestCase):
" ")
def test_repeat_minmax(self):
- self.assertIsNone(re.match("^(\w){1}$", "abc"))
- self.assertIsNone(re.match("^(\w){1}?$", "abc"))
- self.assertIsNone(re.match("^(\w){1,2}$", "abc"))
- self.assertIsNone(re.match("^(\w){1,2}?$", "abc"))
-
- self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
- self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
- self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
- self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
- self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
- self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
- self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
- self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
-
- self.assertIsNone(re.match("^x{1}$", "xxx"))
- self.assertIsNone(re.match("^x{1}?$", "xxx"))
- self.assertIsNone(re.match("^x{1,2}$", "xxx"))
- self.assertIsNone(re.match("^x{1,2}?$", "xxx"))
-
- self.assertTrue(re.match("^x{3}$", "xxx"))
- self.assertTrue(re.match("^x{1,3}$", "xxx"))
- self.assertTrue(re.match("^x{3,3}$", "xxx"))
- self.assertTrue(re.match("^x{1,4}$", "xxx"))
- self.assertTrue(re.match("^x{3,4}?$", "xxx"))
- self.assertTrue(re.match("^x{3}?$", "xxx"))
- self.assertTrue(re.match("^x{1,3}?$", "xxx"))
- self.assertTrue(re.match("^x{1,4}?$", "xxx"))
- self.assertTrue(re.match("^x{3,4}?$", "xxx"))
-
- self.assertIsNone(re.match("^x{}$", "xxx"))
- self.assertTrue(re.match("^x{}$", "x{}"))
+ self.assertIsNone(re.match(r"^(\w){1}$", "abc"))
+ self.assertIsNone(re.match(r"^(\w){1}?$", "abc"))
+ self.assertIsNone(re.match(r"^(\w){1,2}$", "abc"))
+ self.assertIsNone(re.match(r"^(\w){1,2}?$", "abc"))
+
+ self.assertEqual(re.match(r"^(\w){3}$", "abc").group(1), "c")
+ self.assertEqual(re.match(r"^(\w){1,3}$", "abc").group(1), "c")
+ self.assertEqual(re.match(r"^(\w){1,4}$", "abc").group(1), "c")
+ self.assertEqual(re.match(r"^(\w){3,4}?$", "abc").group(1), "c")
+ self.assertEqual(re.match(r"^(\w){3}?$", "abc").group(1), "c")
+ self.assertEqual(re.match(r"^(\w){1,3}?$", "abc").group(1), "c")
+ self.assertEqual(re.match(r"^(\w){1,4}?$", "abc").group(1), "c")
+ self.assertEqual(re.match(r"^(\w){3,4}?$", "abc").group(1), "c")
+
+ self.assertIsNone(re.match(r"^x{1}$", "xxx"))
+ self.assertIsNone(re.match(r"^x{1}?$", "xxx"))
+ self.assertIsNone(re.match(r"^x{1,2}$", "xxx"))
+ self.assertIsNone(re.match(r"^x{1,2}?$", "xxx"))
+
+ self.assertTrue(re.match(r"^x{3}$", "xxx"))
+ self.assertTrue(re.match(r"^x{1,3}$", "xxx"))
+ self.assertTrue(re.match(r"^x{3,3}$", "xxx"))
+ self.assertTrue(re.match(r"^x{1,4}$", "xxx"))
+ self.assertTrue(re.match(r"^x{3,4}?$", "xxx"))
+ self.assertTrue(re.match(r"^x{3}?$", "xxx"))
+ self.assertTrue(re.match(r"^x{1,3}?$", "xxx"))
+ self.assertTrue(re.match(r"^x{1,4}?$", "xxx"))
+ self.assertTrue(re.match(r"^x{3,4}?$", "xxx"))
+
+ self.assertIsNone(re.match(r"^x{}$", "xxx"))
+ self.assertTrue(re.match(r"^x{}$", "x{}"))
self.checkPatternError(r'x{2,1}',
'min repeat greater than max repeat', 2)
@@ -638,14 +698,10 @@ class ReTests(unittest.TestCase):
re.purge() # for warnings
for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY':
with self.subTest(c):
- with self.assertWarns(DeprecationWarning):
- self.assertEqual(re.fullmatch('\\%c' % c, c).group(), c)
- self.assertIsNone(re.match('\\%c' % c, 'a'))
+ self.assertRaises(re.error, re.compile, '\\%c' % c)
for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ':
with self.subTest(c):
- with self.assertWarns(DeprecationWarning):
- self.assertEqual(re.fullmatch('[\\%c]' % c, c).group(), c)
- self.assertIsNone(re.match('[\\%c]' % c, 'a'))
+ self.assertRaises(re.error, re.compile, '[\\%c]' % c)
def test_string_boundaries(self):
# See http://bugs.python.org/issue10713
@@ -692,10 +748,10 @@ class ReTests(unittest.TestCase):
"a\n\nb")
def test_lookahead(self):
- self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
- self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
- self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
- self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
+ self.assertEqual(re.match(r"(a(?=\s[^a]))", "a b").group(1), "a")
+ self.assertEqual(re.match(r"(a(?=\s[^a]*))", "a b").group(1), "a")
+ self.assertEqual(re.match(r"(a(?=\s[abc]))", "a b").group(1), "a")
+ self.assertEqual(re.match(r"(a(?=\s[abc]*))", "a bc").group(1), "a")
self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
@@ -843,12 +899,12 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC")
def test_not_literal(self):
- self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
- self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
+ self.assertEqual(re.search(r"\s([^a])", " b").group(1), "b")
+ self.assertEqual(re.search(r"\s([^a]*)", " bb").group(1), "bb")
def test_search_coverage(self):
- self.assertEqual(re.search("\s(b)", " b").group(1), "b")
- self.assertEqual(re.search("a\s", "a ").group(0), "a ")
+ self.assertEqual(re.search(r"\s(b)", " b").group(1), "b")
+ self.assertEqual(re.search(r"a\s", "a ").group(0), "a ")
def assertMatch(self, pattern, text, match=None, span=None,
matcher=re.match):
@@ -953,7 +1009,7 @@ class ReTests(unittest.TestCase):
self.checkPatternError(r"\567",
r'octal escape value \567 outside of '
r'range 0-0o377', 0)
- self.checkPatternError(r"\911", 'invalid group reference', 0)
+ self.checkPatternError(r"\911", 'invalid group reference 91', 1)
self.checkPatternError(r"\x1", r'incomplete escape \x1', 0)
self.checkPatternError(r"\x1z", r'incomplete escape \x1', 0)
self.checkPatternError(r"\u123", r'incomplete escape \u123', 0)
@@ -998,10 +1054,8 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i])))
self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
- with self.assertWarns(DeprecationWarning):
- self.assertTrue(re.match(br"\u1234", b'u1234'))
- with self.assertWarns(DeprecationWarning):
- self.assertTrue(re.match(br"\U00012345", b'U00012345'))
+ self.assertRaises(re.error, re.compile, br"\u1234")
+ self.assertRaises(re.error, re.compile, br"\U00012345")
self.assertTrue(re.match(br"\0", b"\000"))
self.assertTrue(re.match(br"\08", b"\0008"))
self.assertTrue(re.match(br"\01", b"\001"))
@@ -1009,7 +1063,7 @@ class ReTests(unittest.TestCase):
self.checkPatternError(br"\567",
r'octal escape value \567 outside of '
r'range 0-0o377', 0)
- self.checkPatternError(br"\911", 'invalid group reference', 0)
+ self.checkPatternError(br"\911", 'invalid group reference 91', 1)
self.checkPatternError(br"\x1", r'incomplete escape \x1', 0)
self.checkPatternError(br"\x1z", r'incomplete escape \x1', 0)
@@ -1023,10 +1077,8 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
- with self.assertWarns(DeprecationWarning):
- self.assertTrue(re.match(br"[\u1234]", b'u'))
- with self.assertWarns(DeprecationWarning):
- self.assertTrue(re.match(br"[\U00012345]", b'U'))
+ self.assertRaises(re.error, re.compile, br"[\u1234]")
+ self.assertRaises(re.error, re.compile, br"[\U00012345]")
self.checkPatternError(br"[\567]",
r'octal escape value \567 outside of '
r'range 0-0o377', 1)
@@ -1054,8 +1106,8 @@ class ReTests(unittest.TestCase):
self.assertIsNone(re.match(r'(a)?a','a').lastindex)
self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
- self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
- self.assertEqual(re.match("((a))", "a").lastindex, 1)
+ self.assertEqual(re.match(r"(?P<a>a(b))", "ab").lastgroup, 'a')
+ self.assertEqual(re.match(r"((a))", "a").lastindex, 1)
def test_bug_418626(self):
# bugs 418626 at al. -- Testing Greg Chapman's addition of op code
@@ -1227,7 +1279,7 @@ class ReTests(unittest.TestCase):
'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
]
for x in decimal_digits:
- self.assertEqual(re.match('^\d$', x).group(0), x)
+ self.assertEqual(re.match(r'^\d$', x).group(0), x)
not_decimal_digits = [
'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
@@ -1236,7 +1288,7 @@ class ReTests(unittest.TestCase):
'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
]
for x in not_decimal_digits:
- self.assertIsNone(re.match('^\d$', x))
+ self.assertIsNone(re.match(r'^\d$', x))
def test_empty_array(self):
# SF buf 1647541
@@ -1278,6 +1330,22 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))
self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))
+ p = upper_char + '(?i)'
+ with self.assertWarns(DeprecationWarning) as warns:
+ self.assertTrue(re.match(p, lower_char))
+ self.assertEqual(
+ str(warns.warnings[0].message),
+ 'Flags not at the start of the expression %s' % p
+ )
+
+ p = upper_char + '(?i)%s' % ('.?' * 100)
+ with self.assertWarns(DeprecationWarning) as warns:
+ self.assertTrue(re.match(p, lower_char))
+ self.assertEqual(
+ str(warns.warnings[0].message),
+ 'Flags not at the start of the expression %s (truncated)' % p[:20]
+ )
+
def test_dollar_matches_twice(self):
"$ matches the end of string, and just before the terminating \n"
pattern = re.compile('$')
@@ -1308,29 +1376,29 @@ class ReTests(unittest.TestCase):
for flags in (0, re.UNICODE):
pat = re.compile('\xc0', flags | re.IGNORECASE)
self.assertTrue(pat.match('\xe0'))
- pat = re.compile('\w', flags)
+ pat = re.compile(r'\w', flags)
self.assertTrue(pat.match('\xe0'))
pat = re.compile('\xc0', re.ASCII | re.IGNORECASE)
self.assertIsNone(pat.match('\xe0'))
pat = re.compile('(?a)\xc0', re.IGNORECASE)
self.assertIsNone(pat.match('\xe0'))
- pat = re.compile('\w', re.ASCII)
+ pat = re.compile(r'\w', re.ASCII)
self.assertIsNone(pat.match('\xe0'))
- pat = re.compile('(?a)\w')
+ pat = re.compile(r'(?a)\w')
self.assertIsNone(pat.match('\xe0'))
# Bytes patterns
for flags in (0, re.ASCII):
pat = re.compile(b'\xc0', flags | re.IGNORECASE)
self.assertIsNone(pat.match(b'\xe0'))
- pat = re.compile(b'\w', flags)
+ pat = re.compile(br'\w', flags)
self.assertIsNone(pat.match(b'\xe0'))
# Incompatibilities
- self.assertRaises(ValueError, re.compile, b'\w', re.UNICODE)
- self.assertRaises(ValueError, re.compile, b'(?u)\w')
- self.assertRaises(ValueError, re.compile, '\w', re.UNICODE | re.ASCII)
- self.assertRaises(ValueError, re.compile, '(?u)\w', re.ASCII)
- self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
- self.assertRaises(ValueError, re.compile, '(?au)\w')
+ self.assertRaises(ValueError, re.compile, br'\w', re.UNICODE)
+ self.assertRaises(ValueError, re.compile, br'(?u)\w')
+ self.assertRaises(ValueError, re.compile, r'\w', re.UNICODE | re.ASCII)
+ self.assertRaises(ValueError, re.compile, r'(?u)\w', re.ASCII)
+ self.assertRaises(ValueError, re.compile, r'(?a)\w', re.UNICODE)
+ self.assertRaises(ValueError, re.compile, r'(?au)\w')
def test_locale_flag(self):
import locale
@@ -1361,27 +1429,59 @@ class ReTests(unittest.TestCase):
pat = re.compile(bpat, re.IGNORECASE)
if bletter:
self.assertIsNone(pat.match(bletter))
- pat = re.compile(b'\w', re.LOCALE)
+ pat = re.compile(br'\w', re.LOCALE)
if bletter:
self.assertTrue(pat.match(bletter))
- pat = re.compile(b'(?L)\w')
+ pat = re.compile(br'(?L)\w')
if bletter:
self.assertTrue(pat.match(bletter))
- pat = re.compile(b'\w')
+ pat = re.compile(br'\w')
if bletter:
self.assertIsNone(pat.match(bletter))
# Incompatibilities
- self.assertWarns(DeprecationWarning, re.compile, '', re.LOCALE)
- self.assertWarns(DeprecationWarning, re.compile, '(?L)')
- self.assertWarns(DeprecationWarning, re.compile, b'', re.LOCALE | re.ASCII)
- self.assertWarns(DeprecationWarning, re.compile, b'(?L)', re.ASCII)
- self.assertWarns(DeprecationWarning, re.compile, b'(?a)', re.LOCALE)
- self.assertWarns(DeprecationWarning, re.compile, b'(?aL)')
+ self.assertRaises(ValueError, re.compile, '', re.LOCALE)
+ self.assertRaises(ValueError, re.compile, '(?L)')
+ self.assertRaises(ValueError, re.compile, b'', re.LOCALE | re.ASCII)
+ self.assertRaises(ValueError, re.compile, b'(?L)', re.ASCII)
+ self.assertRaises(ValueError, re.compile, b'(?a)', re.LOCALE)
+ self.assertRaises(ValueError, re.compile, b'(?aL)')
+
+ def test_scoped_flags(self):
+ self.assertTrue(re.match(r'(?i:a)b', 'Ab'))
+ self.assertIsNone(re.match(r'(?i:a)b', 'aB'))
+ self.assertIsNone(re.match(r'(?-i:a)b', 'Ab', re.IGNORECASE))
+ self.assertTrue(re.match(r'(?-i:a)b', 'aB', re.IGNORECASE))
+ self.assertIsNone(re.match(r'(?i:(?-i:a)b)', 'Ab'))
+ self.assertTrue(re.match(r'(?i:(?-i:a)b)', 'aB'))
+
+ self.assertTrue(re.match(r'(?x: a) b', 'a b'))
+ self.assertIsNone(re.match(r'(?x: a) b', ' a b'))
+ self.assertTrue(re.match(r'(?-x: a) b', ' ab', re.VERBOSE))
+ self.assertIsNone(re.match(r'(?-x: a) b', 'ab', re.VERBOSE))
+
+ self.checkPatternError(r'(?a:\w)',
+ 'bad inline flags: cannot turn on global flag', 3)
+ self.checkPatternError(r'(?a)(?-a:\w)',
+ 'bad inline flags: cannot turn off global flag', 8)
+ self.checkPatternError(r'(?i-i:a)',
+ 'bad inline flags: flag turned on and off', 5)
+
+ self.checkPatternError(r'(?-', 'missing flag', 3)
+ self.checkPatternError(r'(?-+', 'missing flag', 3)
+ self.checkPatternError(r'(?-z', 'unknown flag', 3)
+ self.checkPatternError(r'(?-i', 'missing :', 4)
+ self.checkPatternError(r'(?-i)', 'missing :', 4)
+ self.checkPatternError(r'(?-i+', 'missing :', 4)
+ self.checkPatternError(r'(?-iz', 'unknown flag', 4)
+ self.checkPatternError(r'(?i:', 'missing ), unterminated subpattern', 0)
+ self.checkPatternError(r'(?i', 'missing -, : or )', 3)
+ self.checkPatternError(r'(?i+', 'missing -, : or )', 3)
+ self.checkPatternError(r'(?iz', 'unknown flag', 3)
def test_bug_6509(self):
# Replacement strings of both types must parse properly.
# all strings
- pat = re.compile('a(\w)')
+ pat = re.compile(r'a(\w)')
self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
pat = re.compile('a(.)')
self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
@@ -1389,7 +1489,7 @@ class ReTests(unittest.TestCase):
self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
# all bytes
- pat = re.compile(b'a(\w)')
+ pat = re.compile(br'a(\w)')
self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
pat = re.compile(b'a(.)')
self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
@@ -1427,13 +1527,6 @@ class ReTests(unittest.TestCase):
# Test behaviour when not given a string or pattern as parameter
self.assertRaises(TypeError, re.compile, 0)
- def test_bug_13899(self):
- # Issue #13899: re pattern r"[\A]" should work like "A" but matches
- # nothing. Ditto B and Z.
- with self.assertWarns(DeprecationWarning):
- self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
- ['A', 'B', '\b', 'C', 'Z'])
-
@bigmemtest(size=_2G, memuse=1)
def test_large_search(self, size):
# Issue #10182: indices were 32-bit-truncated.
@@ -1518,7 +1611,7 @@ class ReTests(unittest.TestCase):
for string in (b'[abracadabra]', B(b'[abracadabra]'),
bytearray(b'[abracadabra]'),
memoryview(b'[abracadabra]')):
- m = re.search(rb'(.+)(.*?)\1', string)
+ m = re.search(br'(.+)(.*?)\1', string)
self.assertEqual(repr(m), "<%s.%s object; "
"span=(1, 12), match=b'abracadabra'>" %
(type(m).__module__, type(m).__qualname__))
@@ -1547,9 +1640,9 @@ class ReTests(unittest.TestCase):
with captured_stdout() as out:
re.compile(pat, re.DEBUG)
dump = '''\
-SUBPATTERN 1
+SUBPATTERN 1 0 0
LITERAL 46
-SUBPATTERN None
+SUBPATTERN None 0 0
BRANCH
IN
LITERAL 99
@@ -1557,7 +1650,7 @@ SUBPATTERN None
OR
LITERAL 112
LITERAL 121
-SUBPATTERN None
+SUBPATTERN None 0 0
GROUPREF_EXISTS 1
AT AT_END
ELSE
@@ -1673,12 +1766,64 @@ SUBPATTERN None
self.checkPatternError(r'(?P', 'unexpected end of pattern', 3)
self.checkPatternError(r'(?z)', 'unknown extension ?z', 1)
self.checkPatternError(r'(?iz)', 'unknown flag', 3)
- self.checkPatternError(r'(?i', 'missing )', 3)
+ self.checkPatternError(r'(?i', 'missing -, : or )', 3)
self.checkPatternError(r'(?#abc', 'missing ), unterminated comment', 0)
self.checkPatternError(r'(?<', 'unexpected end of pattern', 3)
self.checkPatternError(r'(?<>)', 'unknown extension ?<>', 1)
self.checkPatternError(r'(?', 'unexpected end of pattern', 2)
+ def test_enum(self):
+ # Issue #28082: Check that str(flag) returns a human readable string
+ # instead of an integer
+ self.assertIn('ASCII', str(re.A))
+ self.assertIn('DOTALL', str(re.S))
+
+ def test_pattern_compare(self):
+ pattern1 = re.compile('abc', re.IGNORECASE)
+
+ # equal to itself
+ self.assertEqual(pattern1, pattern1)
+ self.assertFalse(pattern1 != pattern1)
+
+ # equal
+ re.purge()
+ pattern2 = re.compile('abc', re.IGNORECASE)
+ self.assertEqual(hash(pattern2), hash(pattern1))
+ self.assertEqual(pattern2, pattern1)
+
+ # not equal: different pattern
+ re.purge()
+ pattern3 = re.compile('XYZ', re.IGNORECASE)
+ # Don't test hash(pattern3) != hash(pattern1) because there is no
+ # warranty that hash values are different
+ self.assertNotEqual(pattern3, pattern1)
+
+ # not equal: different flag (flags=0)
+ re.purge()
+ pattern4 = re.compile('abc')
+ self.assertNotEqual(pattern4, pattern1)
+
+ # only == and != comparison operators are supported
+ with self.assertRaises(TypeError):
+ pattern1 < pattern2
+
+ def test_pattern_compare_bytes(self):
+ pattern1 = re.compile(b'abc')
+
+ # equal: test bytes patterns
+ re.purge()
+ pattern2 = re.compile(b'abc')
+ self.assertEqual(hash(pattern2), hash(pattern1))
+ self.assertEqual(pattern2, pattern1)
+
+ # not equal: pattern of a different types (str vs bytes),
+ # comparison must not raise a BytesWarning
+ re.purge()
+ pattern3 = re.compile('abc')
+ with warnings.catch_warnings():
+ warnings.simplefilter('error', BytesWarning)
+ self.assertNotEqual(pattern3, pattern1)
+
class PatternReprTests(unittest.TestCase):
def check(self, pattern, expected):