From e6fd29f3b49be58c94d8627e3aaa5b97b009c053 Mon Sep 17 00:00:00 2001 From: facelessuser Date: Wed, 20 Dec 2017 17:54:23 -0700 Subject: Make sure regex patterns are raw strings Python 3.6 is starting to reject invalid escapes. Regular expression patterns should be raw strings to avoid having regex escapes being mistaken for invalid string escapes. Ref #611. --- markdown/extensions/smarty.py | 20 ++++++++++---------- markdown/extensions/toc.py | 4 ++-- markdown/inlinepatterns.py | 2 +- markdown/util.py | 12 ++++++------ 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py index 5031bc4..c2af7cd 100644 --- a/markdown/extensions/smarty.py +++ b/markdown/extensions/smarty.py @@ -91,16 +91,16 @@ from ..treeprocessors import InlineProcessor # Constants for quote education. punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" endOfWordClass = r"[\s.,;:!?)]" -closeClass = "[^\ \t\r\n\[\{\(\-\u0002\u0003]" +closeClass = r"[^\ \t\r\n\[\{\(\-\u0002\u0003]" openingQuotesBase = ( - '(\s' # a whitespace char - '| ' # or a non-breaking space entity - '|--' # or dashes - '|–|—' # or unicode - '|&[mn]dash;' # or named dash entities - '|–|—' # or decimal entities - ')' + r'(\s' # a whitespace char + r'| ' # or a non-breaking space entity + r'|--' # or dashes + r'|–|—' # or unicode + r'|&[mn]dash;' # or named dash entities + r'|–|—' # or decimal entities + r')' ) substitutions = { @@ -144,8 +144,8 @@ closingSingleQuotesRegex = r"(?<=%s)'(?!\s|s\b|\d)" % closeClass closingSingleQuotesRegex2 = r"(?<=%s)'(\s|s\b)" % closeClass # All remaining quotes should be opening ones -remainingSingleQuotesRegex = "'" -remainingDoubleQuotesRegex = '"' +remainingSingleQuotesRegex = r"'" +remainingDoubleQuotesRegex = r'"' HTML_STRICT_RE = HTML_RE + r'(?!\>)' diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index 2c4a4b5..b222cb4 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -25,8 +25,8 @@ import unicodedata def slugify(value, separator): """ Slugify a string, to make it URL friendly. """ value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') - value = re.sub('[^\w\s-]', '', value.decode('ascii')).strip().lower() - return re.sub('[%s\s]+' % separator, separator, value) + value = re.sub(r'[^\w\s-]', '', value.decode('ascii')).strip().lower() + return re.sub(r'[%s\s]+' % separator, separator, value) IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$') diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index 2f00b3d..bfdffb3 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -201,7 +201,7 @@ class Pattern(object): """ self.pattern = pattern - self.compiled_re = re.compile("^(.*?)%s(.*)$" % pattern, + self.compiled_re = re.compile(r"^(.*?)%s(.*)$" % pattern, re.DOTALL | re.UNICODE) # Api for Markdown to pass safe_mode into instance diff --git a/markdown/util.py b/markdown/util.py index b37e5ae..9e87019 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -27,12 +27,12 @@ Constants you might want to modify BLOCK_LEVEL_ELEMENTS = re.compile( - "^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" - "|script|noscript|form|fieldset|iframe|math" - "|hr|hr/|style|li|dt|dd|thead|tbody" - "|tr|th|td|section|footer|header|group|figure" - "|figcaption|aside|article|canvas|output" - "|progress|video|nav|main)$", + r"^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" + r"|script|noscript|form|fieldset|iframe|math" + r"|hr|hr/|style|li|dt|dd|thead|tbody" + r"|tr|th|td|section|footer|header|group|figure" + r"|figcaption|aside|article|canvas|output" + r"|progress|video|nav|main)$", re.IGNORECASE ) # Placeholders -- cgit v1.2.1