diff options
author | Waylan Limberg <waylan.limberg@icloud.com> | 2020-10-14 13:48:47 -0400 |
---|---|---|
committer | Waylan Limberg <waylan.limberg@icloud.com> | 2020-10-14 15:21:44 -0400 |
commit | 607a091918004c4ca10fb621e53a09a00b8d299b (patch) | |
tree | d6adc30ad8159e69060518d462f37dbaa017a33f | |
parent | b4a399ca1a39244a84a2dda0ee3fa2d8f17916fa (diff) | |
download | python-markdown-607a091918004c4ca10fb621e53a09a00b8d299b.tar.gz |
Account for Etree Elements in HTML Stash
By calling str on all stash elements we ensure they don't raise an error.
Worse case, soemthing like `<Element 'div' at 0x000001B2DAE94900>` gets
inserted into the output. However, with the override in the md_in_html
extension, we actually serialize and reinsert the original HTML. Worse case,
an HTML block which should be parsed as Markdown gets skipped by the
extension (`<div markdown="block"></div>` gets inserting into the output).
The tricky part is testing as there should be no known cases where this
ever occurs. Therefore, we forefully pass an etree Element directly to
the method in the test. That said, as #1040 is unresolved at this point,
I have tested locally with a real existing case and it works well.
Related to #1040.
-rw-r--r-- | markdown/extensions/md_in_html.py | 12 | ||||
-rw-r--r-- | markdown/postprocessors.py | 6 | ||||
-rw-r--r-- | tests/test_syntax/extensions/test_md_in_html.py | 17 |
3 files changed, 33 insertions, 2 deletions
diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index 3518d05..174224a 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -17,6 +17,7 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php) from . import Extension from ..blockprocessors import BlockProcessor from ..preprocessors import Preprocessor +from ..postprocessors import RawHtmlPostprocessor from .. import util from ..htmlparser import HTMLExtractor import xml.etree.ElementTree as etree @@ -263,6 +264,15 @@ class MarkdownInHtmlProcessor(BlockProcessor): return False +class MarkdownInHTMLPostprocessor(RawHtmlPostprocessor): + def stash_to_string(self, text): + """ Override default to handle any etree elements still in the stash. """ + if isinstance(text, etree.Element): + return self.md.serializer(text) + else: + return str(text) + + class MarkdownInHtmlExtension(Extension): """Add Markdown parsing in HTML to Markdown class.""" @@ -275,6 +285,8 @@ class MarkdownInHtmlExtension(Extension): md.parser.blockprocessors.register( MarkdownInHtmlProcessor(md.parser), 'markdown_block', 105 ) + # Replace raw HTML postprocessor + md.postprocessors.register(MarkdownInHTMLPostprocessor(md), 'raw_html', 30) def makeExtension(**kwargs): # pragma: no cover diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index cd32687..2e68cd9 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -69,7 +69,7 @@ class RawHtmlPostprocessor(Postprocessor): """ Iterate over html stash and restore html. """ replacements = OrderedDict() for i in range(self.md.htmlStash.html_counter): - html = self.md.htmlStash.rawHtmlBlocks[i] + html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i]) if self.isblocklevel(html): replacements["<p>{}</p>".format( self.md.htmlStash.get_placeholder(i))] = html @@ -95,6 +95,10 @@ class RawHtmlPostprocessor(Postprocessor): return self.md.is_block_level(m.group(1)) return False + def stash_to_string(self, text): + """ Convert a stashed object to a string. """ + return str(text) + class AndSubstitutePostprocessor(Postprocessor): """ Restore valid entities """ diff --git a/tests/test_syntax/extensions/test_md_in_html.py b/tests/test_syntax/extensions/test_md_in_html.py index b68412c..433cdd5 100644 --- a/tests/test_syntax/extensions/test_md_in_html.py +++ b/tests/test_syntax/extensions/test_md_in_html.py @@ -23,6 +23,21 @@ License: BSD (see LICENSE.md for details). from unittest import TestSuite from markdown.test_tools import TestCase from ..blocks.test_html_blocks import TestHTMLBlocks +from markdown import Markdown +from xml.etree.ElementTree import Element + + +class TestMarkdownInHTMLPostProcessor(TestCase): + """ Ensure any remaining elements in HTML stash are properly serialized. """ + + def test_stash_to_string(self): + # There should be no known cases where this actually happens so we need to + # forcefully pass an etree Element to the method to ensure proper behavior. + element = Element('div') + element.text = 'Foo bar.' + md = Markdown(extensions=['md_in_html']) + result = md.postprocessors['raw_html'].stash_to_string(element) + self.assertEqual(result, '<div>Foo bar.</div>') class TestDefaultwMdInHTML(TestHTMLBlocks): @@ -758,7 +773,7 @@ class TestMdInHTML(TestCase): def load_tests(loader, tests, pattern): ''' Ensure TestHTMLBlocks doesn't get run twice by excluding it here. ''' suite = TestSuite() - for test_class in [TestDefaultwMdInHTML, TestMdInHTML]: + for test_class in [TestDefaultwMdInHTML, TestMdInHTML, TestMarkdownInHTMLPostProcessor]: tests = loader.loadTestsFromTestCase(test_class) suite.addTests(tests) return suite |