summaryrefslogtreecommitdiff
path: root/sandbox/cben/rolehack/rolehack.py
blob: 13895caa5ecf3ee2cdf3c96c473f3858b3e38693 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
"""Preprocess reStructuredText roles to directives.

This is a stop-gap hack for prototyping new syntaxes for reST, mainly useful
when you find yourself using directives every 3 lines.  In such cases an
extension to docutils allowing some inline syntax is desired thing and such
extensions most probably will take the form of new interpretted text roles.

This module allows to easily prototype them by converting given interpretted
text roles to directives.  To make them inline, the uses of the roles are
replaced with substitutions and all substitution definitions are appeneded at
the end of the document (hint: use ``replace::`` if you don't want a
directive).

Since what's useful for inline syntaxes might also be useful outside of
paragraphs, preprocessing simple directives (only an argument, no options or
content) into other directives is also supported.

I was too lazy to implement an elaborate command-line interface, so this is
only a module.  You should import it from a python script and call this module
with specific role->template mappings to do the work.

BUGS
====

There are too many.  Most can't be fixed here, the right thing is to extend
the docutils parser...

- Backslashes are not interpretted in any way (except that backticks preceded
  by backslashes are won't be treated as start/end of interpretted text).
  This means backslashes are passed to the directive which probably won't stay
  this way if the role is accepted into docutils (only the double-backtick
  literal text syntax behaves this way).

  This bug is semi-intentional because it makes LaTeX generation easier...

- Any number of lines is consumed in search for the closing backtick,
  disregarding indentation.  The content is pasted into the directive as one
  line with normalized whitespace.

- The width of the substitution references is not equal to the original, so
  you can't use it in tables.

- Long parts of the document without empty lines might cause ``recursion limit
  exceeded`` errors.

- Directives not recognized if preceded by non-whitespace (e.g. in a table).

"""

import re

# Named groups are used to allow simultaneous replacement of all roles.

_re_options = re.IGNORECASE | re.MULTILINE | re.DOTALL | re.VERBOSE

def _role_re(group_name):
    return r'''
    # Start-string:
    (?:^|(?<=\s|[\'"([{<\-/:]))
    `
    (?=\S)
    # Content:
    (?P<%(group_name)s>
        (?:[^`]|\\.)+                           # skip escaped chars
    )
    # End-string:
    (?<=\S)
    `
    (?=$|\s|[#\'")\]}>\-/:.,;!?\\])
    ''' % locals()

_default_re = _role_re('_DEFAULT')

def _role2regexp(role):
    """Return regexp for approximate recognition of `role`."""
    prefix_re = _role_re('prefix_' + role)
    postfix_re = _role_re('postfix_' + role)
    return r'''
    :%(role)s:
    %(prefix_re)s
    |
    %(postfix_re)s
    :%(role)s:
    ''' % locals()

def _dir2regexp(dir):
    """Return regexp for approximate recognition of directive `dir`."""
    return r'''
    ^(?P<indent_%(dir)s> [ \t]* )       # record indentation
    \.\. \s+
    (?P<subst_%(dir)s>
    ##    (?:|[^|]*|)?                    # optional substitution
    )
    \s*
    %(dir)s \s* ::
    (?P<argument_%(dir)s>
        [^\n]*
        (?:
            \n
            (?P=indent_%(dir)s) [ \t]   # bigger indentation
            [^\n]+
        )*
    )
    ''' % locals()

def process(doc, roles={}, default_role=None, directives={}):
    """Process `doc` replacing given `roles`.

    `doc` should be a single string containing the whole document.  The
    `roles` dictionary maps from role names to replacement functions that
    should accept the role content and return the directive text, starting
    from the directive name, e.g.::

        def repl(text):
            return 'raw:: latex\n\n    %s\n' % (text,)

    See `template()` for an easy way to create such trivial functions.  The
    optional `default_role` argument specifies a replacement for the default
    role.

    The `directives` dictionary like `roles` but specifies directive names to
    handle.  The directive can have only an argument; substitution definitions
    with these directives are also recognized.  Indentation is adjusted
    properly for directives.

    """
    re_parts = []
    repls = {}
    if default_role:
        re_parts.append(_default_re)
        repls['_DEFAULT'] = default_role
    for role, repl in roles.items():
        re_parts.append(_role2regexp(role))
        repls['prefix_' + role] = repls['postfix_' + role] = repl
    for dir, repl in directives.items():
        re_parts.append(_dir2regexp(dir))
        repls['argument_' + dir] = repl
    full_re = '\n|'.join(re_parts)
    full_re = re.compile(full_re, _re_options)

    after_output = []
    def count(n=0):
        while True:
            yield n
            n += 1
    ids = count()
    def dispatch(match):
        groupname = match.lastgroup
        content = match.group(groupname)
        kind, name = groupname.split('_', 1)
        if kind == 'argument':          # substitution
            indent = match.group('indent_' + name)
            subst = match.group('subst_' + name)
            repl = '\n.. %s %s' % (subst, repls[groupname](content))
            return indent + repl.replace('\n', '\n' + indent)
        else:                           # role
            id = ids.next()
            subst = '|rolehack_%d|' % (id,)
            repl = '.. %s %s' % (subst, repls[groupname](content))
            after_output.append(repl)
            return subst

    # Hack: process by chunks separated by blank lines, trying to avoid
    # "recursion limit exceeded" errors.
    empty_line_re = re.compile(r'\n[ \t]*\n')
    output = [full_re.sub(dispatch, chunk)
              for chunk in empty_line_re.split(doc)]
    return '\n\n'.join(output + after_output)

def template(pre, post):
    """Make replacement function for wrapping content with two strings."""
    def repl(text):
        return ''.join((pre, text, post))
    return repl

def main(*args, **kw_args):
    """Simple command-line interface."""
    import sys
    def parse_args(input='-', output='-'):
        if input == '-':
            input = sys.stdin
        else:
            input = file(input)
        if output == '-':
            output = sys.stdout
        else:
            output = file(output, 'w')
        output.write(process(input.read(), *args, **kw_args))
    parse_args(*sys.argv[1:])

##main({'foo': template('foo::\n\n    ', '\n')},
##     template('default::\n\n   ', '\n'))