diff options
author | david_abrahams <david_abrahams@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2004-01-30 03:39:41 +0000 |
---|---|---|
committer | david_abrahams <david_abrahams@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2004-01-30 03:39:41 +0000 |
commit | da3677954688a874ab00524586b7896675f03f7a (patch) | |
tree | a70ec0fdf9e71179d7f2c0912c8ba95bf380963d | |
parent | ebaa35c82398d98beb3916d921b1d7161839752d (diff) | |
download | docutils-da3677954688a874ab00524586b7896675f03f7a.tar.gz |
Pass all known tests
git-svn-id: http://svn.code.sf.net/p/docutils/code/branches/nesting@1822 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
-rw-r--r-- | docutils/docutils/parsers/rst/states.py | 115 | ||||
-rwxr-xr-x | docutils/test/test_parsers/test_rst/test_inline_markup.py | 34 | ||||
-rwxr-xr-x | docutils/test/test_parsers/test_rst/test_nested_inline_markup.py | 166 |
3 files changed, 259 insertions, 56 deletions
diff --git a/docutils/docutils/parsers/rst/states.py b/docutils/docutils/parsers/rst/states.py index 734b3fccf..eb95da7b0 100644 --- a/docutils/docutils/parsers/rst/states.py +++ b/docutils/docutils/parsers/rst/states.py @@ -591,6 +591,25 @@ class Inliner: remaining, lineno, self.patterns.initial.search) return nodes, messages + def debug_match(self, msg, match, group): + """ + Show where the given group of the given sre match object + falls in the text that was searched, and where the match + overall falls + """ + if match and self._debug: + searched = match.string + self %= msg + ' ' + repr(searched) + x0 = len(repr(searched[:match.start(0)])) + x1 = len(repr(searched[:match.start(group)])) + x2 = len(repr(searched[:match.end(group)])) + x3 = len(repr(searched[:match.end(0)])) + self %= ( + (x0 + len(msg)) * ' ' + + (x1-x0) * '-' + + (x2-x1)*'^' + + (x3-x2) * '-') + def inner_parse(self, remaining, lineno, token_search): """The guts of the parse method""" nodes = [] # Buffer for result nodes @@ -598,27 +617,36 @@ class Inliner: prefixes = [] # Buffer for un-marked text preceding first # recognized explicit inline markup self.indent += ' ' - match = None + token_match = None while remaining: # !!! all "self %=" lines for debugging; 2Bcut self %= 'inner_parse of: %r' % remaining - match = token_search(remaining) - if not match: + token_match = token_search(remaining) + if not token_match: break - groups = match.groupdict() - # self %= 'with: %r' % match.re.pattern - self %= 'inner_parse, groupdict', groups - self %= 'inner_parse, groups:', match.groups() + groups = token_match.groupdict() + # self %= 'with: %r' % token_match.re.pattern + # self %= 'inner_parse, groupdict', groups + # self %= 'inner_parse, groups:', token_match.groups() + opener = groups['start'] or groups['backquote'] \ or groups['refend'] or groups['fnend'] + if not opener: # either the end-string was found or we're done - self %= 'inner_parse, dropping: %r' % remaining[match.start(1):] - remaining = remaining[:match.start(1)] + self %= 'inner_parse, dropping: %r' % remaining[token_match.start(1):] + remaining = remaining[:token_match.start(1)] break - self %= 'inner_parse, opener: %r' % opener + + # self %= 'inner_parse, opener: %r' % opener + if self._debug: + opengrp = [ + x for x in ('start', 'backquote', 'refend', 'fnend') if groups[x]][0] + self.debug_match('inner_parse:', token_match, opengrp) + method = self.dispatch[opener] before, inlines, remaining, sysmessages = method( - self, match, lineno) + self, token_match, lineno) + prefixes.append(before) messages += sysmessages if inlines: @@ -629,7 +657,7 @@ class Inliner: if remaining: nodes += self.implicit_inline(remaining, lineno) self.indent = self.indent[:-2] - return nodes, messages, match + return nodes, messages, token_match openers = '\'"([{<' closers = '\'")]}>' @@ -687,9 +715,9 @@ class Inliner: '', non_whitespace_after, [ ('backquote', - '(?P<role>(:%s:)?)' % simplename, # optional role + '(?P<role>(:%s:)?)' % simplename, # optional role '', - ['`(?!`)'] # backquote but not literal + ['`(?!`)'] # backquote but not literal ) ] ) @@ -707,12 +735,20 @@ class Inliner: ) interpreted_or_phrase_ref_end = ( r'(?:' + embedded_uri + ')?' + + # The assumption here is that the only excuse for escaped + # whitespace immediately following an inline markup start + # string is to prevent the backquote from being interpreted as + # part of the literal token (``). This negative lookbehind is + # required for cases such as: + # + # `\ `\ suffix roles`:emphasis: aren't worth the trouble`:strong: + # + # (hint, hint) + + r'(?<!^\x00[ \n])' + '`(?P<rolesuffix>:' + simplename + ':)?(?:__?)?' ) - interpreted_or_phrase_ref_end2 = ( - r'(?:' + embedded_uri + ')?' - + '`(?:(?P<rolesuffix>:' + simplename + ':)(?:__?)?|(?:__?))' - ) + _end_pattern = non_whitespace_escape_before + '(%s)' + end_string_suffix def _compile_end_pattern(endpattern, _end_pattern = _end_pattern): @@ -720,6 +756,7 @@ class Inliner: _re_map = {} + # Matches named groups in regexps _groupname = re.compile(r'\(\?P\<[^>]+\>', re.UNICODE) def _push_end_string(self, startmatch, endpattern): @@ -729,13 +766,14 @@ class Inliner: end-of-string in nested markup """ oldpattern = startmatch.re.pattern - self %= '_push_end_string, endpattern=', endpattern + # self %= '_push_end_string, endpattern=', endpattern # self %= '_push_end_string, oldpattern=', oldpattern X = '(?##)' parts = oldpattern.split(X) if len(parts) == 2: + # no end strings being sought yet part1 = ( '(?:' + self.non_whitespace_escape_before + '(?:' @@ -744,11 +782,13 @@ class Inliner: allends = '(' + endpattern + ')' part2 = ')' + self.end_string_suffix + ')|' + parts[0] part3 = parts[1] + else: part1,oldends,part2,part3 = parts - groupnames = self._groupname.findall(endpattern) - for n in groupnames: - oldends = oldends.replace(n, '(?:') + + # No group name can appear twice in a regexp. + oldends = self._groupname.sub('(?:',oldends) + allends = '(%s)(?:%s)?|%s' % (endpattern,oldends,oldends) newpattern = ''.join(( @@ -759,7 +799,7 @@ class Inliner: part3 )) - self %= "_push_end_string -> r'" + newpattern + "'" + # self %= "_push_end_string -> r'" + newpattern + "'" return self._compile_re(newpattern) def _compile_re(self, pattern): @@ -779,8 +819,6 @@ class Inliner: end-of-string in nested markup """ oldpattern = startmatch.re.pattern - self %= '_push_end_string, endpattern=', endpattern - # self %= '_push_end_string, oldpattern=', oldpattern X = '(?##)' parts = oldpattern.split(X) @@ -921,7 +959,7 @@ class Inliner: % nodeclass.__name__, line=lineno) rawsource = null2escape(start) prb = self.problematic(rawsource, rawsource, msg) - return before, [prb], remaining, [msg], '' + return (before, [prb], remaining, [msg], '') def problematic(self, text, rawsource, message): msgid = self.document.set_id(message, self.parent) @@ -944,8 +982,9 @@ class Inliner: return self.nested_inline_obj(match, lineno, r'\*\*', nodes.strong) def interpreted_or_phrase_ref(self, startmatch, lineno): + # self %= 'interpreted_or_phrase_ref:' before,start,remaining = _split_match(startmatch,'interpreted_or_phrase_ref') - + role = startmatch.group('role') roleposition = '' @@ -955,31 +994,23 @@ class Inliner: role = role[1:-1] # drop the leading and trailing colons roleposition = 'prefix' elif self.quoted_start(startmatch): - return (remaining, [], remaining, []) + return (before+start, [], remaining, []) # Look for the end - if role: - endpattern = self.interpreted_or_phrase_ref_end - else: - endpattern = self.interpreted_or_phrase_ref_end2 - - pattern = self._push_end_string(startmatch, endpattern) + newpattern = self._push_end_string(startmatch, self.interpreted_or_phrase_ref_end) - children,msgs,endmatch = self.inner_parse(remaining,lineno,pattern.search) - self %= 'interpreted_or_phrase_ref:' + children,msgs,endmatch = self.inner_parse(remaining,lineno,newpattern.search) self %= ' searched = %r' % remaining self %= ' children =', children if endmatch and endmatch.group(1): - # self %= ' groups = ', endmatch.groups() - # self %= ' named groups = ', endmatch.groupdict() mid, end, after = _split_match(endmatch, 1) # length absorbed by inner_parse innerlen = len(remaining)-len(endmatch.string) between = remaining[:innerlen]+mid - # self %= ' split=', (before, start, between, end, after) + rawsource = null2escape(start+between+end) if endmatch.group('rolesuffix'): @@ -994,8 +1025,6 @@ class Inliner: role = endmatch.group('rolesuffix')[1:-1] # drop the colons roleposition = 'suffix' - rawsource = null2escape(start+between+end) - if rawsource[-1:] == '_': if not role: @@ -1338,7 +1367,7 @@ class Inliner: prb = self.problematic(between, between, msg) return [prb], [msg] ref = self.pep_url % pepnum - return [nodes.reference(rawsource, 'PEP ' + between, refuri=ref, *children)], [] + return [nodes.reference(rawsource, 'PEP ' + between, refuri=ref)], [] def rfc_reference_role(self, role, rawsource, between, children, lineno): try: @@ -1352,7 +1381,7 @@ class Inliner: prb = self.problematic(between, between, msg) return [prb], [msg] ref = self.rfc_url % rfcnum - return [nodes.reference(rawsource, 'RFC ' + between, refuri=ref, *children)], [] + return [nodes.reference(rawsource, 'RFC ' + between, refuri=ref)], [] class Body(RSTState): diff --git a/docutils/test/test_parsers/test_rst/test_inline_markup.py b/docutils/test/test_parsers/test_rst/test_inline_markup.py index 10893f069..c90b34db2 100755 --- a/docutils/test/test_parsers/test_rst/test_inline_markup.py +++ b/docutils/test/test_parsers/test_rst/test_inline_markup.py @@ -56,8 +56,7 @@ across lines* """], ["""\ '*emphasis*' and 1/*emphasis*/2 and 3-*emphasis*-4 and 5:*emphasis*:6 -but not '*' or '"*"' or x*2* or 2*x* or \\*args or * -or *the\\* *stars\\\\\\* *inside* +but not '*' or '"*"' or x*2* or 2*x* or \\*args or * . (however, '*args' will trigger a warning and may be problematic) @@ -79,16 +78,13 @@ what about *this**? <emphasis> emphasis :6 - but not '*' or '"*"' or x*2* or 2*x* or *args or * - or \n\ - <emphasis> - the* *stars\* *inside + but not '*' or '"*"' or x*2* or 2*x* or *args or * . <paragraph> (however, ' <problematic id="id2" refid="id1"> * args' will trigger a warning and may be problematic) - <system_message backrefs="id2" id="id1" level="2" line="5" source="test data" type="WARNING"> + <system_message backrefs="id2" id="id1" level="2" line="4" source="test data" type="WARNING"> <paragraph> Inline emphasis start-string without end-string. <paragraph> @@ -147,9 +143,9 @@ totest['strong'] = [ Inline strong start-string without end-string. """], ["""\ -Strong asterisk: ***** +Strong asterisk: **\*** -Strong double asterisk: ****** +Strong double asterisk: **\*\*** """, """\ <document source="test data"> @@ -505,18 +501,30 @@ See `HTML Anchors: \\<a>`_. <document source="test data"> <paragraph> <reference anonymous="1"> - embedded URI with too much whitespace < http://example.com/ + embedded URI with too much whitespace < + <reference refuri="http://example.com/"> + http://example.com/ + \n\ long/path /and /whitespace > <paragraph> <reference anonymous="1"> - embedded URI with too much whitespace at end <http://example.com/ + embedded URI with too much whitespace at end < + <reference refuri="http://example.com/"> + http://example.com/ + \n\ long/path /and /whitespace > <paragraph> <reference anonymous="1"> - embedded URI with no preceding whitespace<http://example.com> + embedded URI with no preceding whitespace< + <reference refuri="http://example.com"> + http://example.com + > <paragraph> <reference anonymous="1"> - escaped URI <http://example.com> + escaped URI < + <reference refuri="http://example.com"> + http://example.com + > <paragraph> See \n\ <reference refname="html anchors: <a>"> diff --git a/docutils/test/test_parsers/test_rst/test_nested_inline_markup.py b/docutils/test/test_parsers/test_rst/test_nested_inline_markup.py new file mode 100755 index 000000000..3e6a3f052 --- /dev/null +++ b/docutils/test/test_parsers/test_rst/test_nested_inline_markup.py @@ -0,0 +1,166 @@ +#! /usr/bin/env python + +# Copy this file to docutils/test/test_parsers/test_rst/ and do +# ``chmod +x test_inline_markup.py``, then execute this file to test. + +# To be added (later) to +# docutils/test/test_parsers/test_rst/test_inline_markup.py? + +from __init__ import DocutilsTestSupport + +def suite(): + s = DocutilsTestSupport.ParserTestSuite() + s.generateTests(totest) + return s + +totest = {} + + +totest['nested'] = [ +["""\ +*emphasis **strong*** +""", +"""\ +<document source="test data"> + <paragraph> + <emphasis> + emphasis \n\ + <strong> + strong +"""], +["""\ +**strong *emphasis*** +""", +"""\ +<document source="test data"> + <paragraph> + <strong> + strong \n\ + <emphasis> + emphasis +"""], +["""\ +*emphasis ``literal``* +""", +"""\ +<document source="test data"> + <paragraph> + <emphasis> + emphasis \n\ + <literal> + literal +"""], +["""\ +*emphasis **strong* +""", +"""\ +<document source="test data"> + <paragraph> + <emphasis> + emphasis \n\ + <problematic id="id2" refid="id1"> + ** + strong + <system_message backrefs="id2" id="id1" level="2" line="1" source="test data" type="WARNING"> + <paragraph> + Inline strong start-string without end-string. +"""], +["""\ +:emphasis:`interpreted :strong:`text`` +""", +"""\ +<document source="test data"> + <paragraph> + <emphasis> + interpreted + <strong> + text +""" +], +["""\ +*emphasis **strong** +""", +"""\ +<document source="test data"> + <paragraph> + <problematic id="id2" refid="id1"> + * + emphasis \n\ + <strong> + strong + <system_message backrefs="id2" id="id1" level="2" line="1" source="test data" type="WARNING"> + <paragraph> + Inline emphasis start-string without end-string. +"""], +["""\ +*emphasized ``literal`` and |substitution ref| and ref_* +""", +"""\ +<document source="test data"> + <paragraph> + <emphasis> + emphasized \n\ + <literal> + literal + and \n\ + <substitution_reference refname="substitution ref"> + substitution ref + and \n\ + <reference refname="ref"> + ref +"""], +["""\ +Explicit roles for standard inline markup: + +:emphasis:`emphasis including :strong:`strong +including :literal:`inline literal text```. +""", +"""\ +<document source="test data"> + <paragraph> + Explicit roles for standard inline markup: + <paragraph> + <emphasis> + emphasis including \n\ + <strong> + strong + including \n\ + <literal> + inline literal text + . +"""], +["""\ +Suffix-based nested explicit roles: + +`\ `\ `inline literal text`:literal: inside +strong`:strong: within emphasis`:emphasis:. +""", +"""\ +<document source="test data"> + <paragraph> + Suffix-based nested explicit roles: + <paragraph> + <emphasis> + <strong> + <literal> + inline literal text + inside + strong + within emphasis + . +"""], +["""\ +``literal *doesn't* **get** `parsed``` +""", +"""\ +<document source="test data"> + <paragraph> + <literal> + literal *doesn't* **get** `parsed` +"""], +] + + +if __name__ == '__main__': + import unittest + unittest.main(defaultTest='suite') |