blob: 677f2bab12897a7e5d0374029ddf29016498ad59 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
# -*- coding: utf-8 -*-
"""
Python Markdown
A Python implementation of John Gruber's Markdown.
Documentation: https://python-markdown.github.io/
GitHub: https://github.com/Python-Markdown/markdown/
PyPI: https://pypi.org/project/Markdown/
Started by Manfred Stienstra (http://www.dwerg.net/).
Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
Currently maintained by Waylan Limberg (https://github.com/waylan),
Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
Copyright 2007-2019 The Python Markdown Project (v. 1.7 and later)
Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
Copyright 2004 Manfred Stienstra (the original version)
License: BSD (see LICENSE.md for details).
"""
try:
from HTMLParser import HTMLParser
PY2 = True
except ImportError:
from html.parser import HTMLParser
PY2 = False
class HTMLExtractor(HTMLParser):
"""
Extract raw HTML from text.
The raw HTML is stored in the `htmlStash` of the Markdown instance passed
to `md` and the remaining text is stored in `cleandoc` as a list of strings.
"""
def __init__(self, md):
if PY2:
# In PY2 HTMLParser is an old style class :(
HTMLParser.__init__(self)
else:
super(HTMLExtractor, self).__init__()
self.md = md
self.inraw = False
self.stack = [] # When inraw==True, stack contains a list of tags
self._cache = []
self.cleandoc = []
def handle_starttag(self, tag, attrs):
self.stack.append(tag)
line, col = self.getpos()
if col < 4 and self.md.is_block_level(tag) and not self.inraw:
# Started a new raw block
self.inraw = True
text = self.get_starttag_text()
if self.inraw:
self._cache.append(text)
else:
self.cleandoc.append(text)
def handle_endtag(self, tag):
text = '<{0}/>'.format(tag)
if tag in self.stack:
while self.stack:
if self.stack.pop() == tag:
break
if self.inraw and len(self.stack) == 0:
# End of raw block
self.inraw = False
self._cache.append(text)
self.cleandoc.append(self.md.htmlStash.store(''.join(self._cache)))
self._cache = []
elif self.inraw:
self._cache.append(text)
else:
self.cleandoc.append(text)
def handle_data(self, data):
if self.inraw:
self._cache.append(data)
else:
self.cleandoc.append(data)
|