diff options
author | Waylan Limberg <waylan.limberg@icloud.com> | 2022-11-14 14:26:27 -0500 |
---|---|---|
committer | Waylan Limberg <waylan.limberg@icloud.com> | 2022-11-15 11:55:37 -0500 |
commit | 939a2fe70580c8e6b7e10af82ebdd6c8c72e019a (patch) | |
tree | ce92d886eac0f519ba8418d93071167f44a5605b | |
parent | e97ffebc9d22f6dc84087caaf11978d4548c617c (diff) | |
download | python-markdown-939a2fe70580c8e6b7e10af82ebdd6c8c72e019a.tar.gz |
Improve standalone * and _ parsing.
The `NOT_STRONG_RE` regex matchs 1, 2, or 3 * or _ which are surrounded by
white space to prevent them from being parsed as tokens. However, the
surrounding white space should not be consumed by the regex, which is why
lookhead and lookbehind assertions are used. As `^` cannot be matched in a
lookbehind assertion, it is left outside the assertion, but as it is zero
length, that should not matter.
Tests added and/or updated to cover various edge cases. Fixes #1300.
-rw-r--r-- | docs/change_log/index.md | 4 | ||||
-rw-r--r-- | markdown/inlinepatterns.py | 2 | ||||
-rw-r--r-- | tests/test_syntax/inline/test_emphasis.py | 48 |
3 files changed, 50 insertions, 4 deletions
diff --git a/docs/change_log/index.md b/docs/change_log/index.md index 9626900..6dd57c4 100644 --- a/docs/change_log/index.md +++ b/docs/change_log/index.md @@ -3,6 +3,10 @@ title: Change Log Python-Markdown Change Log ========================= +*under development*: version 3.4.1 (a bug-fix release). + +* Improve standalone * and _ parsing (#1300). + July 15, 2022: version 3.4.1 (a bug-fix release). * Fix an import issue with `importlib.util` (#1274). diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index eb313bd..0bd129c 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -154,7 +154,7 @@ REFERENCE_RE = LINK_RE IMAGE_REFERENCE_RE = IMAGE_LINK_RE # stand-alone * or _ -NOT_STRONG_RE = r'((^|\s)(\*|_)(\s|$))' +NOT_STRONG_RE = r'((^|(?<=\s))(\*{1,3}|_{1,3})(?=\s|$))' # <http://www.123.com> AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^<>]*)>' diff --git a/tests/test_syntax/inline/test_emphasis.py b/tests/test_syntax/inline/test_emphasis.py index 1e7fafa..29107c7 100644 --- a/tests/test_syntax/inline/test_emphasis.py +++ b/tests/test_syntax/inline/test_emphasis.py @@ -36,6 +36,42 @@ class TestNotEmphasis(TestCase): '<p>_</p>' ) + def test_standalone_asterisks_consecutive(self): + self.assertMarkdownRenders( + 'Foo * * * *', + '<p>Foo * * * *</p>' + ) + + def test_standalone_understore_consecutive(self): + self.assertMarkdownRenders( + 'Foo _ _ _ _', + '<p>Foo _ _ _ _</p>' + ) + + def test_standalone_asterisks_pairs(self): + self.assertMarkdownRenders( + 'Foo ** ** ** **', + '<p>Foo ** ** ** **</p>' + ) + + def test_standalone_understore_pairs(self): + self.assertMarkdownRenders( + 'Foo __ __ __ __', + '<p>Foo __ __ __ __</p>' + ) + + def test_standalone_asterisks_triples(self): + self.assertMarkdownRenders( + 'Foo *** *** *** ***', + '<p>Foo *** *** *** ***</p>' + ) + + def test_standalone_understore_triples(self): + self.assertMarkdownRenders( + 'Foo ___ ___ ___ ___', + '<p>Foo ___ ___ ___ ___</p>' + ) + def test_standalone_asterisk_in_text(self): self.assertMarkdownRenders( 'foo * bar', @@ -72,10 +108,16 @@ class TestNotEmphasis(TestCase): '<p>foo\n_ bar _\nbaz</p>' ) - def test_standalone_asterisks_at_end(self): + def test_standalone_underscore_at_begin(self): + self.assertMarkdownRenders( + '_ foo_ bar', + '<p>_ foo_ bar</p>' + ) + + def test_standalone_asterisk_at_end(self): self.assertMarkdownRenders( - 'foo * bar *', - '<p>foo * bar *</p>' + 'foo *bar *', + '<p>foo *bar *</p>' ) def test_standalone_understores_at_begin_end(self): |