From cb47805e10333dfbebc094d9f07fdf18e200aa89 Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Wed, 6 Mar 2019 12:36:27 -0800 Subject: Optimize several regexes from quadratic time to linear time Part of the discussion in #798. Signed-off-by: Anders Kaseorg --- markdown/inlinepatterns.py | 10 +++++----- tests/misc/html.html | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index 88e8ead..74cda18 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -147,10 +147,10 @@ IMAGE_REFERENCE_RE = IMAGE_LINK_RE NOT_STRONG_RE = r'((^|\s)(\*|_)(\s|$))' # -AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^>]*)>' +AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^<>]*)>' # -AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>' +AUTOMAIL_RE = r'<([^<> !]*@[^@<> ]*)>' # <...> HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)' @@ -433,7 +433,7 @@ class HtmlInlineProcessor(InlineProcessor): class LinkInlineProcessor(InlineProcessor): """ Return a link element from the given match. """ - RE_LINK = re.compile(r'''\(\s*(?:(<.*?>)\s*(?:(['"])(.*?)\2\s*)?\))?''', re.DOTALL | re.UNICODE) + RE_LINK = re.compile(r'''\(\s*(?:(<[^<>]*>)\s*(?:('[^']*'|"[^"]*")\s*)?\))?''', re.DOTALL | re.UNICODE) RE_TITLE_CLEAN = re.compile(r'\s') def handleMatch(self, m, data): @@ -467,8 +467,8 @@ class LinkInlineProcessor(InlineProcessor): if m and m.group(1): # Matches [Text]( "title") href = m.group(1)[1:-1].strip() - if m.group(3): - title = m.group(3) + if m.group(2): + title = m.group(2)[1:-1] index = m.end(0) handled = True elif m: diff --git a/tests/misc/html.html b/tests/misc/html.html index 5380bbd..293e6cc 100644 --- a/tests/misc/html.html +++ b/tests/misc/html.html @@ -20,7 +20,7 @@ Html with various attributes.

And of course .

-

this