summaryrefslogtreecommitdiff
path: root/creole/emitter/html2markdown_emitter.py
diff options
context:
space:
mode:
Diffstat (limited to 'creole/emitter/html2markdown_emitter.py')
-rw-r--r--creole/emitter/html2markdown_emitter.py28
1 files changed, 23 insertions, 5 deletions
diff --git a/creole/emitter/html2markdown_emitter.py b/creole/emitter/html2markdown_emitter.py
index 848add0..b9bf8a0 100644
--- a/creole/emitter/html2markdown_emitter.py
+++ b/creole/emitter/html2markdown_emitter.py
@@ -8,7 +8,7 @@
:license: GNU GPL v3 or above, see LICENSE for more details.
"""
-
+from urllib.parse import ParseResult, quote, urlparse, urlunparse
from creole.parser.html_parser import HtmlParser
from creole.shared.base_emitter import BaseEmitter
@@ -16,6 +16,19 @@ from creole.shared.document_tree import DocNode
from creole.shared.markup_table import MarkupTable
+def quote_link(uri):
+ """
+ >>> quote_link('http://foo.tld/a image with spaces.png')
+ 'http://foo.tld/a%20image%20with%20spaces.png'
+
+ >>> quote_link('https://foo.tld/a image.png?bar=1#anchor')
+ 'https://foo.tld/a%20image.png?bar=1#anchor'
+ """
+ scheme, netloc, url, params, query, fragment = urlparse(uri)
+ url = quote(url)
+ return urlunparse(ParseResult(scheme, netloc, url, params, query, fragment))
+
+
class MarkdownEmitter(BaseEmitter):
"""
Build from a document_tree (html2creole.parser.HtmlParser instance) a
@@ -109,11 +122,14 @@ class MarkdownEmitter(BaseEmitter):
def tt_emit(self, node: DocNode):
return self._typeface(node, key='##')
+ def _typeface_html(self, node, tag):
+ return f'<{tag}>{self.emit_children(node)}</{tag}>'
+
def sup_emit(self, node: DocNode):
- return self._typeface(node, key='^^')
+ return self._typeface_html(node, tag='sup')
def sub_emit(self, node: DocNode):
- return self._typeface(node, key=',,')
+ return self._typeface_html(node, tag='sub')
def u_emit(self, node: DocNode):
return self._typeface(node, key='__')
@@ -133,7 +149,9 @@ class MarkdownEmitter(BaseEmitter):
def a_emit(self, node: DocNode):
link_text = self.emit_children(node)
- url = node.attrs['href']
+
+ url = quote_link(node.attrs['href'])
+
title = node.attrs.get('title')
if title:
return f'[{link_text}]({url} "{title}")'
@@ -141,7 +159,7 @@ class MarkdownEmitter(BaseEmitter):
return f'[{link_text}]({url})'
def img_emit(self, node: DocNode):
- src = node.attrs['src']
+ src = quote_link(node.attrs['src'])
title = node.attrs.get('title')
alt = node.attrs.get('alt', '')