diff options
author | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2018-11-20 23:55:23 +0000 |
---|---|---|
committer | milde <milde@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2018-11-20 23:55:23 +0000 |
commit | 68406f5b1612ed128449330a7ed0418bb2ae6787 (patch) | |
tree | 4ed7108b501c006df57b640a3ebe9129b2432491 | |
parent | e3464eabc52829aeecad3db352a02e8f0e29a4a8 (diff) | |
download | docutils-68406f5b1612ed128449330a7ed0418bb2ae6787.tar.gz |
Do not unescape text when generating Text nodes.
Store the null-escaped text in the doctree and unescape on export.
Enables transforms to account for escaped markup.
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@8233 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
-rw-r--r-- | docutils/docutils/parsers/rst/roles.py | 12 | ||||
-rw-r--r-- | docutils/docutils/parsers/rst/states.py | 68 |
2 files changed, 42 insertions, 38 deletions
diff --git a/docutils/docutils/parsers/rst/roles.py b/docutils/docutils/parsers/rst/roles.py index 35227e6d1..ef690c5fa 100644 --- a/docutils/docutils/parsers/rst/roles.py +++ b/docutils/docutils/parsers/rst/roles.py @@ -195,7 +195,7 @@ class GenericRole: def __call__(self, role, rawtext, text, lineno, inliner, options={}, content=[]): set_classes(options) - return [self.node_class(rawtext, utils.unescape(text), **options)], [] + return [self.node_class(rawtext, text, **options)], [] class CustomRole: @@ -234,7 +234,7 @@ def generic_custom_role(role, rawtext, text, lineno, inliner, # Once nested inline markup is implemented, this and other methods should # recursively call inliner.nested_parse(). set_classes(options) - return [nodes.inline(rawtext, utils.unescape(text), **options)], [] + return [nodes.inline(rawtext, text, **options)], [] generic_custom_role.options = {'class': directives.class_option} @@ -255,7 +255,7 @@ register_generic_role('title-reference', nodes.title_reference) def pep_reference_role(role, rawtext, text, lineno, inliner, options={}, content=[]): try: - pepnum = int(text) + pepnum = int(utils.unescape(text)) if pepnum < 0 or pepnum > 9999: raise ValueError except ValueError: @@ -268,7 +268,7 @@ def pep_reference_role(role, rawtext, text, lineno, inliner, ref = (inliner.document.settings.pep_base_url + inliner.document.settings.pep_file_url_template % pepnum) set_classes(options) - return [nodes.reference(rawtext, 'PEP ' + utils.unescape(text), refuri=ref, + return [nodes.reference(rawtext, 'PEP ' + text, refuri=ref, **options)], [] register_canonical_role('pep-reference', pep_reference_role) @@ -276,7 +276,7 @@ register_canonical_role('pep-reference', pep_reference_role) def rfc_reference_role(role, rawtext, text, lineno, inliner, options={}, content=[]): try: - rfcnum = int(text) + rfcnum = int(utils.unescape(text)) if rfcnum <= 0: raise ValueError except ValueError: @@ -288,7 +288,7 @@ def rfc_reference_role(role, rawtext, text, lineno, inliner, # Base URL mainly used by inliner.rfc_reference, so this is correct: ref = inliner.document.settings.rfc_base_url + inliner.rfc_url % rfcnum set_classes(options) - node = nodes.reference(rawtext, 'RFC ' + utils.unescape(text), refuri=ref, + node = nodes.reference(rawtext, 'RFC ' + text, refuri=ref, **options) return [node], [] diff --git a/docutils/docutils/parsers/rst/states.py b/docutils/docutils/parsers/rst/states.py index c7ee06f66..bfa07314c 100644 --- a/docutils/docutils/parsers/rst/states.py +++ b/docutils/docutils/parsers/rst/states.py @@ -713,20 +713,20 @@ class Inliner: return (string[:matchend], [], string[matchend:], [], '') endmatch = end_pattern.search(string[matchend:]) if endmatch and endmatch.start(1): # 1 or more chars - _text = endmatch.string[:endmatch.start(1)] - text = unescape(_text, restore_backslashes) + text = endmatch.string[:endmatch.start(1)] + if restore_backslashes: + text = unescape(text, True) textend = matchend + endmatch.end(1) rawsource = unescape(string[matchstart:textend], True) node = nodeclass(rawsource, text) - node[0].rawsource = unescape(_text, True) + node[0].rawsource = unescape(text, True) return (string[:matchstart], [node], string[textend:], [], endmatch.group(1)) msg = self.reporter.warning( 'Inline %s start-string without end-string.' % nodeclass.__name__, line=lineno) text = unescape(string[matchstart:matchend], True) - rawsource = unescape(string[matchstart:matchend], True) - prb = self.problematic(text, rawsource, msg) + prb = self.problematic(text, text, msg) return string[:matchstart], [prb], string[matchend:], [msg], '' def problematic(self, text, rawsource, message): @@ -784,7 +784,7 @@ class Inliner: prb = self.problematic(text, text, msg) return string[:rolestart], [prb], string[textend:], [msg] return self.phrase_ref(string[:matchstart], string[textend:], - rawsource, escaped, unescape(escaped)) + rawsource, escaped) else: rawsource = unescape(string[rolestart:textend], True) nodelist, messages = self.interpreted(rawsource, escaped, role, @@ -798,26 +798,30 @@ class Inliner: prb = self.problematic(text, text, msg) return string[:matchstart], [prb], string[matchend:], [msg] - def phrase_ref(self, before, after, rawsource, escaped, text): + def phrase_ref(self, before, after, rawsource, escaped, text=None): + # `text` is ignored (since 0.15dev) match = self.patterns.embedded_link.search(escaped) if match: # embedded <URI> or <alias_> - text = unescape(escaped[:match.start(0)]) - rawtext = unescape(escaped[:match.start(0)], True) - aliastext = unescape(match.group(2)) - rawaliastext = unescape(match.group(2), True) + text = escaped[:match.start(0)] + unescaped = unescape(text) + rawtext = unescape(text, True) + aliastext = match.group(2) + rawaliastext = unescape(aliastext, True) underscore_escaped = rawaliastext.endswith(r'\_') if aliastext.endswith('_') and not (underscore_escaped or self.patterns.uri.match(aliastext)): aliastype = 'name' - alias = normalize_name(aliastext[:-1]) + alias = normalize_name(unescape(aliastext[:-1])) target = nodes.target(match.group(1), refname=alias) - target.indirect_reference_name = aliastext[:-1] + target.indirect_reference_name = whitespace_normalize_name( + unescape(aliastext[:-1])) else: aliastype = 'uri' + # remove unescaped whitespace alias_parts = split_escaped_whitespace(match.group(2)) - alias = ' '.join(''.join(unescape(part).split()) + alias = ' '.join(''.join(part.split()) for part in alias_parts) - alias = self.adjust_uri(alias) + alias = self.adjust_uri(unescape(alias)) if alias.endswith(r'\_'): alias = alias[:-2] + '_' target = nodes.target(match.group(1), refuri=alias) @@ -827,14 +831,17 @@ class Inliner: % aliastext) if not text: text = alias + unescaped = unescape(text) rawtext = rawaliastext else: + text = escaped + unescaped = unescape(text) target = None rawtext = unescape(escaped, True) - refname = normalize_name(text) + refname = normalize_name(unescaped) reference = nodes.reference(rawsource, text, - name=whitespace_normalize_name(text)) + name=whitespace_normalize_name(unescaped)) reference[0].rawsource = rawtext node_list = [reference] @@ -991,11 +998,9 @@ class Inliner: else: addscheme = '' text = match.group('whole') - unescaped = unescape(text) - rawsource = unescape(text, True) - reference = nodes.reference(rawsource, unescaped, - refuri=addscheme + unescaped) - reference[0].rawsource = rawsource + refuri = addscheme + unescape(text) + reference = nodes.reference(unescape(text, True), text, + refuri=refuri) return [reference] else: # not a valid scheme raise MarkupMismatch @@ -1003,27 +1008,25 @@ class Inliner: def pep_reference(self, match, lineno): text = match.group(0) if text.startswith('pep-'): - pepnum = int(match.group('pepnum1')) + pepnum = int(unescape(match.group('pepnum1'))) elif text.startswith('PEP'): - pepnum = int(match.group('pepnum2')) + pepnum = int(unescape(match.group('pepnum2'))) else: raise MarkupMismatch ref = (self.document.settings.pep_base_url + self.document.settings.pep_file_url_template % pepnum) - unescaped = unescape(text) - return [nodes.reference(unescape(text, True), unescaped, refuri=ref)] + return [nodes.reference(unescape(text, True), text, refuri=ref)] rfc_url = 'rfc%d.html' def rfc_reference(self, match, lineno): text = match.group(0) if text.startswith('RFC'): - rfcnum = int(match.group('rfcnum')) + rfcnum = int(unescape(match.group('rfcnum'))) ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum else: raise MarkupMismatch - unescaped = unescape(text) - return [nodes.reference(unescape(text, True), unescaped, refuri=ref)] + return [nodes.reference(unescape(text, True), text, refuri=ref)] def implicit_inline(self, text, lineno): """ @@ -1045,7 +1048,7 @@ class Inliner: self.implicit_inline(text[match.end():], lineno)) except MarkupMismatch: pass - return [nodes.Text(unescape(text), rawsource=unescape(text, True))] + return [nodes.Text(text, unescape(text, True))] dispatch = {'*': emphasis, '**': strong, @@ -2842,6 +2845,7 @@ class Text(RSTState): self.nested_parse(indented, input_offset=line_offset, node=definition) return itemnode, blank_finish + #@ TODO ignore null-escaped delimiter classifier_delimiter = re.compile(' +: +') def term(self, lines, lineno): @@ -2855,12 +2859,12 @@ class Text(RSTState): for i in range(len(text_nodes)): node = text_nodes[i] if isinstance(node, nodes.Text): - parts = self.classifier_delimiter.split(node.rawsource) + parts = self.classifier_delimiter.split(node) if len(parts) == 1: node_list[-1] += node else: text = parts[0].rstrip() - textnode = nodes.Text(utils.unescape(text, True)) + textnode = nodes.Text(text, unescape(text, True)) node_list[-1] += textnode for part in parts[1:]: classifier_node = nodes.classifier( |