diff options
Diffstat (limited to 'creole/emitter/html2markdown_emitter.py')
-rw-r--r-- | creole/emitter/html2markdown_emitter.py | 28 |
1 files changed, 23 insertions, 5 deletions
diff --git a/creole/emitter/html2markdown_emitter.py b/creole/emitter/html2markdown_emitter.py index 848add0..b9bf8a0 100644 --- a/creole/emitter/html2markdown_emitter.py +++ b/creole/emitter/html2markdown_emitter.py @@ -8,7 +8,7 @@ :license: GNU GPL v3 or above, see LICENSE for more details. """ - +from urllib.parse import ParseResult, quote, urlparse, urlunparse from creole.parser.html_parser import HtmlParser from creole.shared.base_emitter import BaseEmitter @@ -16,6 +16,19 @@ from creole.shared.document_tree import DocNode from creole.shared.markup_table import MarkupTable +def quote_link(uri): + """ + >>> quote_link('http://foo.tld/a image with spaces.png') + 'http://foo.tld/a%20image%20with%20spaces.png' + + >>> quote_link('https://foo.tld/a image.png?bar=1#anchor') + 'https://foo.tld/a%20image.png?bar=1#anchor' + """ + scheme, netloc, url, params, query, fragment = urlparse(uri) + url = quote(url) + return urlunparse(ParseResult(scheme, netloc, url, params, query, fragment)) + + class MarkdownEmitter(BaseEmitter): """ Build from a document_tree (html2creole.parser.HtmlParser instance) a @@ -109,11 +122,14 @@ class MarkdownEmitter(BaseEmitter): def tt_emit(self, node: DocNode): return self._typeface(node, key='##') + def _typeface_html(self, node, tag): + return f'<{tag}>{self.emit_children(node)}</{tag}>' + def sup_emit(self, node: DocNode): - return self._typeface(node, key='^^') + return self._typeface_html(node, tag='sup') def sub_emit(self, node: DocNode): - return self._typeface(node, key=',,') + return self._typeface_html(node, tag='sub') def u_emit(self, node: DocNode): return self._typeface(node, key='__') @@ -133,7 +149,9 @@ class MarkdownEmitter(BaseEmitter): def a_emit(self, node: DocNode): link_text = self.emit_children(node) - url = node.attrs['href'] + + url = quote_link(node.attrs['href']) + title = node.attrs.get('title') if title: return f'[{link_text}]({url} "{title}")' @@ -141,7 +159,7 @@ class MarkdownEmitter(BaseEmitter): return f'[{link_text}]({url})' def img_emit(self, node: DocNode): - src = node.attrs['src'] + src = quote_link(node.attrs['src']) title = node.attrs.get('title') alt = node.attrs.get('alt', '') |