summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2016-09-11 12:50:02 +0300
committerSerhiy Storchaka <storchaka@gmail.com>2016-09-11 12:50:02 +0300
commit03d61e6976c4e3899fc84576c91427b60e78cf7a (patch)
treee0bbd516cc0d25d3ab27964d898a7d8d754b7fff
parent1383ce2312c21c2a66f47fd5acc96bd58162710a (diff)
downloadcpython-03d61e6976c4e3899fc84576c91427b60e78cf7a.tar.gz
Issue #22493: Inline flags now should be used only at the start of the
regular expression. Deprecation warning is emitted if uses them in the middle of the regular expression.
-rw-r--r--Doc/library/re.rst8
-rw-r--r--Doc/whatsnew/3.6.rst9
-rw-r--r--Lib/distutils/filelist.py15
-rw-r--r--Lib/distutils/tests/test_filelist.py14
-rw-r--r--Lib/fnmatch.py2
-rw-r--r--Lib/http/cookies.py3
-rw-r--r--Lib/sre_parse.py8
-rwxr-xr-xLib/test/re_tests.py8
-rw-r--r--Lib/test/test_fnmatch.py16
-rw-r--r--Lib/test/test_pyclbr.py2
-rw-r--r--Lib/test/test_re.py3
-rw-r--r--Misc/NEWS4
12 files changed, 58 insertions, 34 deletions
diff --git a/Doc/library/re.rst b/Doc/library/re.rst
index 5297f0b52d..87cd553601 100644
--- a/Doc/library/re.rst
+++ b/Doc/library/re.rst
@@ -224,12 +224,8 @@ The special characters are:
flags are described in :ref:`contents-of-module-re`.) This
is useful if you wish to include the flags as part of the regular
expression, instead of passing a *flag* argument to the
- :func:`re.compile` function.
-
- Note that the ``(?x)`` flag changes how the expression is parsed. It should be
- used first in the expression string, or after one or more whitespace characters.
- If there are non-whitespace characters before the flag, the results are
- undefined.
+ :func:`re.compile` function. Flags should be used first in the
+ expression string.
``(?:...)``
A non-capturing version of regular parentheses. Matches whatever regular
diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst
index 6bb34690e8..8752b83e63 100644
--- a/Doc/whatsnew/3.6.rst
+++ b/Doc/whatsnew/3.6.rst
@@ -1124,6 +1124,15 @@ Deprecated features
that will not be for several Python releases. (Contributed by Emanuel Barry
in :issue:`27364`.)
+* Inline flags ``(?letters)`` now should be used only at the start of the
+ regular expression. Inline flags in the middle of the regular expression
+ affects global flags in Python :mod:`re` module. This is an exception to
+ other regular expression engines that either apply flags to only part of
+ the regular expression or treat them as an error. To avoid distinguishing
+ inline flags in the middle of the regular expression now emit a deprecation
+ warning. It will be an error in future Python releases.
+ (Contributed by Serhiy Storchaka in :issue:`22493`.)
+
Deprecated Python behavior
--------------------------
diff --git a/Lib/distutils/filelist.py b/Lib/distutils/filelist.py
index 6522e69f06..c92d5fdba3 100644
--- a/Lib/distutils/filelist.py
+++ b/Lib/distutils/filelist.py
@@ -302,21 +302,26 @@ def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0):
else:
return pattern
+ # ditch start and end characters
+ start, _, end = glob_to_re('_').partition('_')
+
if pattern:
pattern_re = glob_to_re(pattern)
+ assert pattern_re.startswith(start) and pattern_re.endswith(end)
else:
pattern_re = ''
if prefix is not None:
- # ditch end of pattern character
- empty_pattern = glob_to_re('')
- prefix_re = glob_to_re(prefix)[:-len(empty_pattern)]
+ prefix_re = glob_to_re(prefix)
+ assert prefix_re.startswith(start) and prefix_re.endswith(end)
+ prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
sep = os.sep
if os.sep == '\\':
sep = r'\\'
- pattern_re = "^" + sep.join((prefix_re, ".*" + pattern_re))
+ pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
+ pattern_re = r'%s\A%s%s.*%s%s' % (start, prefix_re, sep, pattern_re, end)
else: # no prefix -- respect anchor flag
if anchor:
- pattern_re = "^" + pattern_re
+ pattern_re = r'%s\A%s' % (start, pattern_re[len(start):])
return re.compile(pattern_re)
diff --git a/Lib/distutils/tests/test_filelist.py b/Lib/distutils/tests/test_filelist.py
index 391af3cba2..c71342d0dc 100644
--- a/Lib/distutils/tests/test_filelist.py
+++ b/Lib/distutils/tests/test_filelist.py
@@ -51,14 +51,14 @@ class FileListTestCase(support.LoggingSilencer,
for glob, regex in (
# simple cases
- ('foo*', r'foo[^%(sep)s]*\Z(?ms)'),
- ('foo?', r'foo[^%(sep)s]\Z(?ms)'),
- ('foo??', r'foo[^%(sep)s][^%(sep)s]\Z(?ms)'),
+ ('foo*', r'(?s:foo[^%(sep)s]*)\Z'),
+ ('foo?', r'(?s:foo[^%(sep)s])\Z'),
+ ('foo??', r'(?s:foo[^%(sep)s][^%(sep)s])\Z'),
# special cases
- (r'foo\\*', r'foo\\\\[^%(sep)s]*\Z(?ms)'),
- (r'foo\\\*', r'foo\\\\\\[^%(sep)s]*\Z(?ms)'),
- ('foo????', r'foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s]\Z(?ms)'),
- (r'foo\\??', r'foo\\\\[^%(sep)s][^%(sep)s]\Z(?ms)')):
+ (r'foo\\*', r'(?s:foo\\\\[^%(sep)s]*)\Z'),
+ (r'foo\\\*', r'(?s:foo\\\\\\[^%(sep)s]*)\Z'),
+ ('foo????', r'(?s:foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s])\Z'),
+ (r'foo\\??', r'(?s:foo\\\\[^%(sep)s][^%(sep)s])\Z')):
regex = regex % {'sep': sep}
self.assertEqual(glob_to_re(glob), regex)
diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index 07b12295df..fd3b5142e3 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -106,4 +106,4 @@ def translate(pat):
res = '%s[%s]' % (res, stuff)
else:
res = res + re.escape(c)
- return res + r'\Z(?ms)'
+ return r'(?s:%s)\Z' % res
diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py
index f078da5469..be3b080aa3 100644
--- a/Lib/http/cookies.py
+++ b/Lib/http/cookies.py
@@ -458,7 +458,6 @@ class Morsel(dict):
_LegalKeyChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
_LegalValueChars = _LegalKeyChars + r'\[\]'
_CookiePattern = re.compile(r"""
- (?x) # This is a verbose pattern
\s* # Optional whitespace at start of cookie
(?P<key> # Start of group 'key'
[""" + _LegalKeyChars + r"""]+? # Any word of at least one letter
@@ -475,7 +474,7 @@ _CookiePattern = re.compile(r"""
)? # End of optional value group
\s* # Any number of spaces.
(\s+|;|$) # Ending either at space, semicolon, or EOS.
- """, re.ASCII) # May be removed if safe.
+ """, re.ASCII | re.VERBOSE) # re.ASCII may be removed if safe.
# At long last, here is the cookie class. Using this class is almost just like
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index d74e93ff5c..4a77f0c9a7 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -279,6 +279,9 @@ class Tokenizer:
break
result += c
return result
+ @property
+ def pos(self):
+ return self.index - len(self.next or '')
def tell(self):
return self.index - len(self.next or '')
def seek(self, index):
@@ -727,8 +730,13 @@ def _parse(source, state, verbose):
state.checklookbehindgroup(condgroup, source)
elif char in FLAGS or char == "-":
# flags
+ pos = source.pos
flags = _parse_flags(source, state, char)
if flags is None: # global flags
+ if pos != 3: # "(?x"
+ import warnings
+ warnings.warn('Flags not at the start of the expression',
+ DeprecationWarning, stacklevel=7)
continue
add_flags, del_flags = flags
group = None
diff --git a/Lib/test/re_tests.py b/Lib/test/re_tests.py
index d3692f859a..a379d33aec 100755
--- a/Lib/test/re_tests.py
+++ b/Lib/test/re_tests.py
@@ -106,8 +106,8 @@ tests = [
('a.*b', 'acc\nccb', FAIL),
('a.{4,5}b', 'acc\nccb', FAIL),
('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
- ('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'),
- ('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
+ ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
+ ('(?s)a.*b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
@@ -563,7 +563,7 @@ tests = [
# Check odd placement of embedded pattern modifiers
# not an error under PCRE/PRE:
- ('w(?i)', 'W', SUCCEED, 'found', 'W'),
+ ('(?i)w', 'W', SUCCEED, 'found', 'W'),
# ('w(?i)', 'W', SYNTAX_ERROR),
# Comments using the x embedded pattern modifier
@@ -627,7 +627,7 @@ xyzabc
# bug 114033: nothing to repeat
(r'(x?)?', 'x', SUCCEED, 'found', 'x'),
# bug 115040: rescan if flags are modified inside pattern
- (r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'),
+ (r'(?x) foo ', 'foo', SUCCEED, 'found', 'foo'),
# bug 115618: negative lookahead
(r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'),
# bug 116251: character class bug
diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index a5f5832544..fb7424624b 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -62,14 +62,14 @@ class FnmatchTestCase(unittest.TestCase):
class TranslateTestCase(unittest.TestCase):
def test_translate(self):
- self.assertEqual(translate('*'), r'.*\Z(?ms)')
- self.assertEqual(translate('?'), r'.\Z(?ms)')
- self.assertEqual(translate('a?b*'), r'a.b.*\Z(?ms)')
- self.assertEqual(translate('[abc]'), r'[abc]\Z(?ms)')
- self.assertEqual(translate('[]]'), r'[]]\Z(?ms)')
- self.assertEqual(translate('[!x]'), r'[^x]\Z(?ms)')
- self.assertEqual(translate('[^x]'), r'[\^x]\Z(?ms)')
- self.assertEqual(translate('[x'), r'\[x\Z(?ms)')
+ self.assertEqual(translate('*'), r'(?s:.*)\Z')
+ self.assertEqual(translate('?'), r'(?s:.)\Z')
+ self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
+ self.assertEqual(translate('[abc]'), r'(?s:[abc])\Z')
+ self.assertEqual(translate('[]]'), r'(?s:[]])\Z')
+ self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z')
+ self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z')
+ self.assertEqual(translate('[x'), r'(?s:\[x)\Z')
class FilterTestCase(unittest.TestCase):
diff --git a/Lib/test/test_pyclbr.py b/Lib/test/test_pyclbr.py
index 06c10c17af..2cff1c526e 100644
--- a/Lib/test/test_pyclbr.py
+++ b/Lib/test/test_pyclbr.py
@@ -158,7 +158,7 @@ class PyclbrTest(TestCase):
cm('cgi', ignore=('log',)) # set with = in module
cm('pickle', ignore=('partial',))
cm('aifc', ignore=('openfp', '_aifc_params')) # set with = in module
- cm('sre_parse', ignore=('dump', 'groups')) # from sre_constants import *; property
+ cm('sre_parse', ignore=('dump', 'groups', 'pos')) # from sre_constants import *; property
cm('pdb')
cm('pydoc')
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index afe8738e83..79a7a057a0 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1279,6 +1279,9 @@ class ReTests(unittest.TestCase):
self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))
self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))
+ with self.assertWarns(DeprecationWarning):
+ self.assertTrue(re.match(upper_char + '(?i)', lower_char))
+
def test_dollar_matches_twice(self):
"$ matches the end of string, and just before the terminating \n"
pattern = re.compile('$')
diff --git a/Misc/NEWS b/Misc/NEWS
index 6b30a10397..fe5fab147d 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -143,6 +143,10 @@ Core and Builtins
Library
-------
+- Issue #22493: Inline flags now should be used only at the start of the
+ regular expression. Deprecation warning is emitted if uses them in the
+ middle of the regular expression.
+
- Issue #26885: xmlrpc now supports unmarshalling additional data types used
by Apache XML-RPC implementation for numerics and None.