diff options
author | Jim Porter <826865+jimporter@users.noreply.github.com> | 2020-01-31 17:08:49 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-01-31 11:08:49 -0500 |
commit | ccf56edd1404c828c8c2f4e5492ae854e3b06dd7 (patch) | |
tree | e4c3fa1828ed07534872ee0b89f65a35aa2a108a | |
parent | 66517465786b90ccb7fcffeef7996b593374d889 (diff) | |
download | python-markdown-ccf56edd1404c828c8c2f4e5492ae854e3b06dd7.tar.gz |
Don't emit stashed HTML tag placeholders in `.toc_tokens` (#901)
Note: this slightly changes existing behavior in that raw HTML tags are no
longer included in the HTML `.toc`. However, the fact that that worked before
was an oversight. The intention was always to strip all markup. Resolves #899.
-rw-r--r-- | markdown/extensions/toc.py | 13 | ||||
-rw-r--r-- | tests/test_extensions.py | 102 |
2 files changed, 87 insertions, 28 deletions
diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index d490a06..e42836a 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -43,7 +43,7 @@ def unique(id, ids): return id -def stashedHTML2text(text, md): +def stashedHTML2text(text, md, strip_entities=True): """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """ def _html_sub(m): """ Substitute raw html with plain text. """ @@ -51,8 +51,11 @@ def stashedHTML2text(text, md): raw = md.htmlStash.rawHtmlBlocks[int(m.group(1))] except (IndexError, TypeError): # pragma: no cover return m.group(0) - # Strip out tags and entities - leaveing text - return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw) + # Strip out tags and/or entities - leaving text + res = re.sub(r'(<[^>]+>)', '', raw) + if strip_entities: + res = re.sub(r'(&[\#a-zA-Z0-9]+;)', '', res) + return res return HTML_PLACEHOLDER_RE.sub(_html_sub, text) @@ -259,7 +262,9 @@ class TocTreeprocessor(Treeprocessor): toc_tokens.append({ 'level': int(el.tag[-1]), 'id': el.attrib["id"], - 'name': el.attrib.get('data-toc-label', text) + 'name': unescape(stashedHTML2text( + el.attrib.get('data-toc-label', text), self.md, strip_entities=False + )) }) # Remove the data-toc-label attribute as it is no longer needed diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 3ff4f7f..c787886 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -780,6 +780,7 @@ class TestTOC(TestCaseWithAssertStartsWith): self.assertStartsWith('<div class="toc">', self.md.toc) self.md.reset() self.assertEqual(self.md.toc, '') + self.assertEqual(self.md.toc_tokens, []) def testUniqueIds(self): """ Test Unique IDs. """ @@ -791,6 +792,21 @@ class TestTOC(TestCaseWithAssertStartsWith): '<h1 id="header_1">Header</h1>\n' '<h1 id="header_2">Header</h1>' ) + self.assertEqual( + self.md.toc, + '<div class="toc">\n' + '<ul>\n' # noqa + '<li><a href="#header">Header</a></li>\n' # noqa + '<li><a href="#header_1">Header</a></li>\n' # noqa + '<li><a href="#header_2">Header</a></li>\n' # noqa + '</ul>\n' # noqa + '</div>\n' + ) + self.assertEqual(self.md.toc_tokens, [ + {'level': 1, 'id': 'header', 'name': 'Header', 'children': []}, + {'level': 1, 'id': 'header_1', 'name': 'Header', 'children': []}, + {'level': 1, 'id': 'header_2', 'name': 'Header', 'children': []}, + ]) def testHtmlEntities(self): """ Test Headers with HTML Entities. """ @@ -799,6 +815,17 @@ class TestTOC(TestCaseWithAssertStartsWith): self.md.convert(text), '<h1 id="foo-bar">Foo & bar</h1>' ) + self.assertEqual( + self.md.toc, + '<div class="toc">\n' + '<ul>\n' # noqa + '<li><a href="#foo-bar">Foo & bar</a></li>\n' # noqa + '</ul>\n' # noqa + '</div>\n' + ) + self.assertEqual(self.md.toc_tokens, [ + {'level': 1, 'id': 'foo-bar', 'name': 'Foo & bar', 'children': []}, + ]) def testRawHtml(self): """ Test Headers with raw HTML. """ @@ -807,6 +834,17 @@ class TestTOC(TestCaseWithAssertStartsWith): self.md.convert(text), '<h1 id="foo-bar-baz">Foo <b>Bar</b> Baz.</h1>' ) + self.assertEqual( + self.md.toc, + '<div class="toc">\n' + '<ul>\n' # noqa + '<li><a href="#foo-bar-baz">Foo Bar Baz.</a></li>\n' # noqa + '</ul>\n' # noqa + '</div>\n' + ) + self.assertEqual(self.md.toc_tokens, [ + {'level': 1, 'id': 'foo-bar-baz', 'name': 'Foo Bar Baz.', 'children': []}, + ]) def testBaseLevel(self): """ Test Header Base Level. """ @@ -833,6 +871,12 @@ class TestTOC(TestCaseWithAssertStartsWith): '</ul>\n' # noqa '</div>\n' ) + self.assertEqual(md.toc_tokens, [ + {'level': 5, 'id': 'some-header', 'name': 'Some Header', 'children': [ + {'level': 6, 'id': 'next-level', 'name': 'Next Level', 'children': []}, + {'level': 6, 'id': 'too-high', 'name': 'Too High', 'children': []}, + ]}, + ]) def testHeaderInlineMarkup(self): """ Test Headers with inline markup. """ @@ -843,6 +887,18 @@ class TestTOC(TestCaseWithAssertStartsWith): '<h1 id="some-header-with-markup">Some <em>Header</em> with ' '<a href="http://example.com">markup</a>.</h1>' ) + self.assertEqual( + self.md.toc, + '<div class="toc">\n' + '<ul>\n' # noqa + '<li><a href="#some-header-with-markup">' # noqa + 'Some Header with markup.</a></li>\n' # noqa + '</ul>\n' # noqa + '</div>\n' + ) + self.assertEqual(self.md.toc_tokens, [ + {'level': 1, 'id': 'some-header-with-markup', 'name': 'Some Header with markup.', 'children': []}, + ]) def testAnchorLink(self): """ Test TOC Anchorlink. """ @@ -942,40 +998,38 @@ class TestTOC(TestCaseWithAssertStartsWith): def testWithAttrList(self): """ Test TOC with attr_list Extension. """ md = markdown.Markdown(extensions=['toc', 'attr_list']) - text = '# Header 1\n\n## Header 2 { #foo }\n\n## Header 3 { data-toc-label="Foo Bar"}' + text = ('# Header 1\n\n' + '## Header 2 { #foo }\n\n' + '## Header 3 { data-toc-label="Foo Bar"}\n\n' + '# Header 4 { data-toc-label="Foo <b>Baz</b>" }') self.assertEqual( md.convert(text), '<h1 id="header-1">Header 1</h1>\n' '<h2 id="foo">Header 2</h2>\n' - '<h2 id="header-3">Header 3</h2>' + '<h2 id="header-3">Header 3</h2>\n' + '<h1 id="header-4">Header 4</h1>' ) self.assertEqual( md.toc, '<div class="toc">\n' - '<ul>\n' # noqa - '<li><a href="#header-1">Header 1</a>' # noqa - '<ul>\n' # noqa - '<li><a href="#foo">Header 2</a></li>\n' # noqa - '<li><a href="#header-3">Foo Bar</a></li>\n' # noqa - '</ul>\n' # noqa - '</li>\n' # noqa - '</ul>\n' # noqa + '<ul>\n' # noqa + '<li><a href="#header-1">Header 1</a>' # noqa + '<ul>\n' # noqa + '<li><a href="#foo">Header 2</a></li>\n' # noqa + '<li><a href="#header-3">Foo Bar</a></li>\n' # noqa + '</ul>\n' # noqa + '</li>\n' # noqa + '<li><a href="#header-4">Foo Baz</a></li>\n' # noqa + '</ul>\n' # noqa '</div>\n' ) - self.assertEqual( - md.toc_tokens, - [ - { - 'level': 1, - 'id': 'header-1', - 'name': 'Header 1', - 'children': [ - {'level': 2, 'id': 'foo', 'name': 'Header 2', 'children': []}, - {'level': 2, 'id': 'header-3', 'name': 'Foo Bar', 'children': []} - ] - } - ] - ) + self.assertEqual(md.toc_tokens, [ + {'level': 1, 'id': 'header-1', 'name': 'Header 1', 'children': [ + {'level': 2, 'id': 'foo', 'name': 'Header 2', 'children': []}, + {'level': 2, 'id': 'header-3', 'name': 'Foo Bar', 'children': []} + ]}, + {'level': 1, 'id': 'header-4', 'name': 'Foo Baz', 'children': []}, + ]) def testUniqueFunc(self): """ Test 'unique' function. """ |