summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnders Kaseorg <andersk@mit.edu>2019-03-06 12:36:27 -0800
committerWaylan Limberg <waylan.limberg@icloud.com>2019-03-06 20:47:05 -0500
commitcb47805e10333dfbebc094d9f07fdf18e200aa89 (patch)
tree4df80a2245df40a27e1b151d49f7279855b70e39
parent4b11593b681966e5c9a7a70c84a19546fc2e185c (diff)
downloadpython-markdown-cb47805e10333dfbebc094d9f07fdf18e200aa89.tar.gz
Optimize several regexes from quadratic time to linear time
Part of the discussion in #798. Signed-off-by: Anders Kaseorg <andersk@mit.edu>
-rw-r--r--markdown/inlinepatterns.py10
-rw-r--r--tests/misc/html.html2
2 files changed, 6 insertions, 6 deletions
diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py
index 88e8ead..74cda18 100644
--- a/markdown/inlinepatterns.py
+++ b/markdown/inlinepatterns.py
@@ -147,10 +147,10 @@ IMAGE_REFERENCE_RE = IMAGE_LINK_RE
NOT_STRONG_RE = r'((^|\s)(\*|_)(\s|$))'
# <http://www.123.com>
-AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^>]*)>'
+AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^<>]*)>'
# <me@example.com>
-AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>'
+AUTOMAIL_RE = r'<([^<> !]*@[^@<> ]*)>'
# <...>
HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)'
@@ -433,7 +433,7 @@ class HtmlInlineProcessor(InlineProcessor):
class LinkInlineProcessor(InlineProcessor):
""" Return a link element from the given match. """
- RE_LINK = re.compile(r'''\(\s*(?:(<.*?>)\s*(?:(['"])(.*?)\2\s*)?\))?''', re.DOTALL | re.UNICODE)
+ RE_LINK = re.compile(r'''\(\s*(?:(<[^<>]*>)\s*(?:('[^']*'|"[^"]*")\s*)?\))?''', re.DOTALL | re.UNICODE)
RE_TITLE_CLEAN = re.compile(r'\s')
def handleMatch(self, m, data):
@@ -467,8 +467,8 @@ class LinkInlineProcessor(InlineProcessor):
if m and m.group(1):
# Matches [Text](<link> "title")
href = m.group(1)[1:-1].strip()
- if m.group(3):
- title = m.group(3)
+ if m.group(2):
+ title = m.group(2)[1:-1]
index = m.end(0)
handled = True
elif m:
diff --git a/tests/misc/html.html b/tests/misc/html.html
index 5380bbd..293e6cc 100644
--- a/tests/misc/html.html
+++ b/tests/misc/html.html
@@ -20,7 +20,7 @@ Html with various attributes.
</div>
<p>And of course <script>blah</script>.</p>
-<p><a href="script&gt;stuff&lt;/script">this <script>link</a></p>
+<p><a href="&lt;script&gt;stuff&lt;/script&gt;">this <script>link</a></p>
<p>Some funky <x\]> inline stuff with markdown escaping syntax.</p>
<p><img scr="foo.png" title="Only one inline element on a line." /></p>
<p>And now a line with only an opening bracket:</p>