summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2018-11-20 23:55:23 +0000
committermilde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2018-11-20 23:55:23 +0000
commit68406f5b1612ed128449330a7ed0418bb2ae6787 (patch)
tree4ed7108b501c006df57b640a3ebe9129b2432491
parente3464eabc52829aeecad3db352a02e8f0e29a4a8 (diff)
downloaddocutils-68406f5b1612ed128449330a7ed0418bb2ae6787.tar.gz
Do not unescape text when generating Text nodes.
Store the null-escaped text in the doctree and unescape on export. Enables transforms to account for escaped markup. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@8233 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
-rw-r--r--docutils/docutils/parsers/rst/roles.py12
-rw-r--r--docutils/docutils/parsers/rst/states.py68
2 files changed, 42 insertions, 38 deletions
diff --git a/docutils/docutils/parsers/rst/roles.py b/docutils/docutils/parsers/rst/roles.py
index 35227e6d1..ef690c5fa 100644
--- a/docutils/docutils/parsers/rst/roles.py
+++ b/docutils/docutils/parsers/rst/roles.py
@@ -195,7 +195,7 @@ class GenericRole:
def __call__(self, role, rawtext, text, lineno, inliner,
options={}, content=[]):
set_classes(options)
- return [self.node_class(rawtext, utils.unescape(text), **options)], []
+ return [self.node_class(rawtext, text, **options)], []
class CustomRole:
@@ -234,7 +234,7 @@ def generic_custom_role(role, rawtext, text, lineno, inliner,
# Once nested inline markup is implemented, this and other methods should
# recursively call inliner.nested_parse().
set_classes(options)
- return [nodes.inline(rawtext, utils.unescape(text), **options)], []
+ return [nodes.inline(rawtext, text, **options)], []
generic_custom_role.options = {'class': directives.class_option}
@@ -255,7 +255,7 @@ register_generic_role('title-reference', nodes.title_reference)
def pep_reference_role(role, rawtext, text, lineno, inliner,
options={}, content=[]):
try:
- pepnum = int(text)
+ pepnum = int(utils.unescape(text))
if pepnum < 0 or pepnum > 9999:
raise ValueError
except ValueError:
@@ -268,7 +268,7 @@ def pep_reference_role(role, rawtext, text, lineno, inliner,
ref = (inliner.document.settings.pep_base_url
+ inliner.document.settings.pep_file_url_template % pepnum)
set_classes(options)
- return [nodes.reference(rawtext, 'PEP ' + utils.unescape(text), refuri=ref,
+ return [nodes.reference(rawtext, 'PEP ' + text, refuri=ref,
**options)], []
register_canonical_role('pep-reference', pep_reference_role)
@@ -276,7 +276,7 @@ register_canonical_role('pep-reference', pep_reference_role)
def rfc_reference_role(role, rawtext, text, lineno, inliner,
options={}, content=[]):
try:
- rfcnum = int(text)
+ rfcnum = int(utils.unescape(text))
if rfcnum <= 0:
raise ValueError
except ValueError:
@@ -288,7 +288,7 @@ def rfc_reference_role(role, rawtext, text, lineno, inliner,
# Base URL mainly used by inliner.rfc_reference, so this is correct:
ref = inliner.document.settings.rfc_base_url + inliner.rfc_url % rfcnum
set_classes(options)
- node = nodes.reference(rawtext, 'RFC ' + utils.unescape(text), refuri=ref,
+ node = nodes.reference(rawtext, 'RFC ' + text, refuri=ref,
**options)
return [node], []
diff --git a/docutils/docutils/parsers/rst/states.py b/docutils/docutils/parsers/rst/states.py
index c7ee06f66..bfa07314c 100644
--- a/docutils/docutils/parsers/rst/states.py
+++ b/docutils/docutils/parsers/rst/states.py
@@ -713,20 +713,20 @@ class Inliner:
return (string[:matchend], [], string[matchend:], [], '')
endmatch = end_pattern.search(string[matchend:])
if endmatch and endmatch.start(1): # 1 or more chars
- _text = endmatch.string[:endmatch.start(1)]
- text = unescape(_text, restore_backslashes)
+ text = endmatch.string[:endmatch.start(1)]
+ if restore_backslashes:
+ text = unescape(text, True)
textend = matchend + endmatch.end(1)
rawsource = unescape(string[matchstart:textend], True)
node = nodeclass(rawsource, text)
- node[0].rawsource = unescape(_text, True)
+ node[0].rawsource = unescape(text, True)
return (string[:matchstart], [node],
string[textend:], [], endmatch.group(1))
msg = self.reporter.warning(
'Inline %s start-string without end-string.'
% nodeclass.__name__, line=lineno)
text = unescape(string[matchstart:matchend], True)
- rawsource = unescape(string[matchstart:matchend], True)
- prb = self.problematic(text, rawsource, msg)
+ prb = self.problematic(text, text, msg)
return string[:matchstart], [prb], string[matchend:], [msg], ''
def problematic(self, text, rawsource, message):
@@ -784,7 +784,7 @@ class Inliner:
prb = self.problematic(text, text, msg)
return string[:rolestart], [prb], string[textend:], [msg]
return self.phrase_ref(string[:matchstart], string[textend:],
- rawsource, escaped, unescape(escaped))
+ rawsource, escaped)
else:
rawsource = unescape(string[rolestart:textend], True)
nodelist, messages = self.interpreted(rawsource, escaped, role,
@@ -798,26 +798,30 @@ class Inliner:
prb = self.problematic(text, text, msg)
return string[:matchstart], [prb], string[matchend:], [msg]
- def phrase_ref(self, before, after, rawsource, escaped, text):
+ def phrase_ref(self, before, after, rawsource, escaped, text=None):
+ # `text` is ignored (since 0.15dev)
match = self.patterns.embedded_link.search(escaped)
if match: # embedded <URI> or <alias_>
- text = unescape(escaped[:match.start(0)])
- rawtext = unescape(escaped[:match.start(0)], True)
- aliastext = unescape(match.group(2))
- rawaliastext = unescape(match.group(2), True)
+ text = escaped[:match.start(0)]
+ unescaped = unescape(text)
+ rawtext = unescape(text, True)
+ aliastext = match.group(2)
+ rawaliastext = unescape(aliastext, True)
underscore_escaped = rawaliastext.endswith(r'\_')
if aliastext.endswith('_') and not (underscore_escaped
or self.patterns.uri.match(aliastext)):
aliastype = 'name'
- alias = normalize_name(aliastext[:-1])
+ alias = normalize_name(unescape(aliastext[:-1]))
target = nodes.target(match.group(1), refname=alias)
- target.indirect_reference_name = aliastext[:-1]
+ target.indirect_reference_name = whitespace_normalize_name(
+ unescape(aliastext[:-1]))
else:
aliastype = 'uri'
+ # remove unescaped whitespace
alias_parts = split_escaped_whitespace(match.group(2))
- alias = ' '.join(''.join(unescape(part).split())
+ alias = ' '.join(''.join(part.split())
for part in alias_parts)
- alias = self.adjust_uri(alias)
+ alias = self.adjust_uri(unescape(alias))
if alias.endswith(r'\_'):
alias = alias[:-2] + '_'
target = nodes.target(match.group(1), refuri=alias)
@@ -827,14 +831,17 @@ class Inliner:
% aliastext)
if not text:
text = alias
+ unescaped = unescape(text)
rawtext = rawaliastext
else:
+ text = escaped
+ unescaped = unescape(text)
target = None
rawtext = unescape(escaped, True)
- refname = normalize_name(text)
+ refname = normalize_name(unescaped)
reference = nodes.reference(rawsource, text,
- name=whitespace_normalize_name(text))
+ name=whitespace_normalize_name(unescaped))
reference[0].rawsource = rawtext
node_list = [reference]
@@ -991,11 +998,9 @@ class Inliner:
else:
addscheme = ''
text = match.group('whole')
- unescaped = unescape(text)
- rawsource = unescape(text, True)
- reference = nodes.reference(rawsource, unescaped,
- refuri=addscheme + unescaped)
- reference[0].rawsource = rawsource
+ refuri = addscheme + unescape(text)
+ reference = nodes.reference(unescape(text, True), text,
+ refuri=refuri)
return [reference]
else: # not a valid scheme
raise MarkupMismatch
@@ -1003,27 +1008,25 @@ class Inliner:
def pep_reference(self, match, lineno):
text = match.group(0)
if text.startswith('pep-'):
- pepnum = int(match.group('pepnum1'))
+ pepnum = int(unescape(match.group('pepnum1')))
elif text.startswith('PEP'):
- pepnum = int(match.group('pepnum2'))
+ pepnum = int(unescape(match.group('pepnum2')))
else:
raise MarkupMismatch
ref = (self.document.settings.pep_base_url
+ self.document.settings.pep_file_url_template % pepnum)
- unescaped = unescape(text)
- return [nodes.reference(unescape(text, True), unescaped, refuri=ref)]
+ return [nodes.reference(unescape(text, True), text, refuri=ref)]
rfc_url = 'rfc%d.html'
def rfc_reference(self, match, lineno):
text = match.group(0)
if text.startswith('RFC'):
- rfcnum = int(match.group('rfcnum'))
+ rfcnum = int(unescape(match.group('rfcnum')))
ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
else:
raise MarkupMismatch
- unescaped = unescape(text)
- return [nodes.reference(unescape(text, True), unescaped, refuri=ref)]
+ return [nodes.reference(unescape(text, True), text, refuri=ref)]
def implicit_inline(self, text, lineno):
"""
@@ -1045,7 +1048,7 @@ class Inliner:
self.implicit_inline(text[match.end():], lineno))
except MarkupMismatch:
pass
- return [nodes.Text(unescape(text), rawsource=unescape(text, True))]
+ return [nodes.Text(text, unescape(text, True))]
dispatch = {'*': emphasis,
'**': strong,
@@ -2842,6 +2845,7 @@ class Text(RSTState):
self.nested_parse(indented, input_offset=line_offset, node=definition)
return itemnode, blank_finish
+ #@ TODO ignore null-escaped delimiter
classifier_delimiter = re.compile(' +: +')
def term(self, lines, lineno):
@@ -2855,12 +2859,12 @@ class Text(RSTState):
for i in range(len(text_nodes)):
node = text_nodes[i]
if isinstance(node, nodes.Text):
- parts = self.classifier_delimiter.split(node.rawsource)
+ parts = self.classifier_delimiter.split(node)
if len(parts) == 1:
node_list[-1] += node
else:
text = parts[0].rstrip()
- textnode = nodes.Text(utils.unescape(text, True))
+ textnode = nodes.Text(text, unescape(text, True))
node_list[-1] += textnode
for part in parts[1:]:
classifier_node = nodes.classifier(