1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
|
# -*- coding: iso-8859-1 -*-
"""
MoinMoin - ReStructured Text Parser
@copyright: 2004 by Matthew Gilbert <gilbert AT voxmea DOT net>
and by Alexander Schremmer <alex AT alexanderweb DOT de>
@license: GNU GPL, see COPYING for details.
REQUIRES docutils 0.3.3 or later
"""
#############################################################################
### ReStructured Text Parser
#############################################################################
import re
import new
import StringIO
import __builtin__
import sys
import copy
# docutils imports are below
import MoinMoin.parser.wiki
from MoinMoin.Page import Page
Dependencies = [] # this parser just depends on the raw text
# --- make docutils safe by overriding all module-scoped names related to IO ---
# TODO: Add an error message to dummyOpen so that the user knows what they did
# requested an unsupported feature of docutils in MoinMoin.
def dummyOpen(x, y=None, z=None): return
class dummyIO(StringIO.StringIO):
def __init__(self, destination=None, destination_path=None,
encoding=None, error_handler='', autoclose=1,
handle_io_errors=1, source_path=None):
StringIO.StringIO.__init__(self)
pass
class dummyUrllib2:
def urlopen(a):
return StringIO.StringIO()
urlopen = staticmethod(urlopen)
# # # All docutils imports must be contained below here
import docutils
from docutils.core import publish_parts
from docutils.writers import html4css1
from docutils.nodes import fully_normalize_name, reference
from docutils.parsers import rst
from docutils.parsers.rst import directives, roles
# # # All docutils imports must be contained above here
def safe_import(name, globals = None, locals = None, fromlist = None):
mod = __builtin__.__import__(name, globals, locals, fromlist)
if mod:
mod.open = dummyOpen
mod.urllib2 = dummyUrllib2
return mod
# Go through and change all docutils modules to use a dummyOpen and dummyUrllib2
# module. Also make sure that any docutils imported modules also get the dummy
# implementations.
for i in sys.modules.keys():
if i.startswith('docutils') and sys.modules[i]:
sys.modules[i].open = dummyOpen
sys.modules[i].urllib2 = dummyUrllib2
sys.modules[i].__import__ = safe_import
docutils.io.FileInput = dummyIO
docutils.io.FileOutput = dummyIO
# --- End of dummy-code --------------------------------------------------------
def html_escape_unicode(node):
# Find Python function that does this for me. string.encode('ascii',
# 'xmlcharrefreplace') only 2.3 and above.
for i in node:
if ord(i) > 127:
node = node.replace(i, '&#%d;' % (ord(i)))
return node
class MoinWriter(html4css1.Writer):
config_section = 'MoinMoin writer'
config_section_dependencies = ('writers',)
#"""Final translated form of `document`."""
output = None
def wiki_resolver(self, node):
"""
Normally an unknown reference would be an error in an reST document.
However, this is how new documents are created in the wiki. This
passes on unknown references to eventually be handled by the
MoinMoin formatter.
"""
# TODO: Need to better document the attributes here.
if getattr(node, 'indirect_reference_name', None):
node['refuri'] = node.indirect_reference_name
return 1
elif 'id' in node.attributes:
# I'm pretty sure the first test should catch any targets or
# references with the "id" attribute. Therefore, if we get to here
# its probably an internal link that didn't work so we let it go
# through as an error.
return 0
node['refuri'] = node['refname']
del node['refname']
self.nodes.append(node)
return 1
wiki_resolver.priority = 001
def __init__(self, formatter, request):
html4css1.Writer.__init__(self)
self.formatter = formatter
self.request = request
# Add our wiki unknown_reference_resolver to our list of functions to
# run when a target isn't found
self.unknown_reference_resolvers = [self.wiki_resolver]
# We create a new parser to process MoinMoin wiki style links in the
# reST.
self.wikiparser = MoinMoin.parser.wiki.Parser('', self.request)
self.wikiparser.formatter = self.formatter
self.wikiparser.hilite_re = None
self.nodes = []
def translate(self):
visitor = MoinTranslator(self.document,
self.formatter,
self.request,
self.wikiparser,
self)
self.document.walkabout(visitor)
self.visitor = visitor
self.output = html_escape_unicode(visitor.astext())
class Parser:
# allow caching - This should be turned off when testing.
caching = 1
def __init__(self, raw, request, **kw):
self.raw = raw
self.request = request
self.form = request.form
def format(self, formatter):
# Create our simple parser
parser = MoinDirectives(self.request)
parts = publish_parts(source = self.raw,
writer = MoinWriter(formatter, self.request))
text = ''
if parts['title']:
text += '<h2>' + parts['title'] + '</h2>'
# If there is only one subtitle then it is held in parts['subtitle'].
# However, if there is more than one subtitle then this is empty and
# fragment contains all of the subtitles.
if parts['subtitle']:
text += '<h3>' + parts['subtitle'] + '</h3>'
if parts['docinfo']:
text += parts['docinfo']
text += parts['fragment']
self.request.write(html_escape_unicode(text))
class MoinTranslator(html4css1.HTMLTranslator):
def __init__(self, document, formatter, request, parser, writer):
html4css1.HTMLTranslator.__init__(self, document)
self.formatter = formatter
self.request = request
# MMG: Using our own writer when needed. Save the old one to restore
# after the page has been processed by the html4css1 parser.
self.original_write, self.request.write = self.request.write, self.capture_wiki_formatting
self.wikiparser = parser
self.wikiparser.request = request
# MoinMoin likes to start the initial headers at level 3 and the title
# gets level 2, so to comply with their styles, we do here also.
# TODO: Could this be fixed by passing this value in settings_overrides?
self.initial_header_level = 3
# Temporary place for wiki returned markup. This will be filled when
# replacing the default writer with the capture_wiki_formatting
# function (see visit_image for an example).
self.wiki_text = ''
self.setup_wiki_handlers()
def capture_wiki_formatting(self, text):
"""
Captures MoinMoin generated markup to the instance variable
wiki_text.
"""
# For some reason getting empty strings here which of course overwrites
# what we really want (this is called multiple times per MoinMoin
# format call, which I don't understand).
self.wiki_text += text
def process_wiki_text(self, text):
"""
This sequence is repeated numerous times, so its captured as a
single call here. Its important that wiki_text is blanked before we
make the format call. format will call request.write which we've
hooked to capture_wiki_formatting. If wiki_text is not blanked
before a call to request.write we will get the old markup as well as
the newly generated markup.
TODO: Could implement this as a list so that it acts as a stack. I
don't like having to remember to blank wiki_text.
"""
self.wiki_text = ''
self.wikiparser.raw = text
self.wikiparser.format(self.formatter)
def add_wiki_markup(self):
"""
Place holder in case this becomes more elaborate someday. For now it
only appends the MoinMoin generated markup to the html body and
raises SkipNode.
"""
self.body.append(self.wiki_text)
self.wiki_text = ''
raise docutils.nodes.SkipNode
def astext(self):
self.request.write = self.original_write
return html4css1.HTMLTranslator.astext(self)
def process_inline(self, node, uri_string):
"""
Process the "inline:" link scheme. This can either ome from
visit_reference or from visit_image. The uri_string changes
depending on the caller. The uri is passed to MoinMoin to handle the
inline link. If it is an image, the src line is extracted and passed
to the html4css1 writer to allow the reST image attributes.
Otherwise, the html from MoinMoin is inserted into the reST document
and SkipNode is raised.
"""
self.process_wiki_text(node[uri_string])
# Only pass the src and alt parts to the writer. The reST writer
# inserts its own tags so we don't need the MoinMoin html markup.
src = re.search('src="([^"]+)"', self.wiki_text)
if src:
node['uri'] = src.groups()[0]
if not 'alt' in node.attributes:
alt = re.search('alt="([^"]*)"', self.wiki_text)
if alt:
node['alt'] = alt.groups()[0]
else:
# Image doesn't exist yet for the page so just use what's
# returned from MoinMoin verbatim
self.add_wiki_markup()
def process_wiki_target(self, target):
self.process_wiki_text(target)
# MMG: May need a call to fixup_wiki_formatting here but I
# don't think so.
self.add_wiki_markup()
def fixup_wiki_formatting(self, text):
replacement = {'<p>': '', '</p>': '', '\n': '', '> ': '>'}
for src, dst in replacement.items():
text = text.replace(src, dst)
# Everything seems to have a space ending the text block. We want to
# get rid of this
if text and text[-1] == ' ':
text = text[:-1]
return text
def visit_reference(self, node):
"""
Pass links to MoinMoin to get the correct wiki space url. Extract
the url and pass it on to the html4css1 writer to handle. Inline
images are also handled by visit_image. Not sure what the "drawing:"
link scheme is used for, so for now it is handled here.
Also included here is a hack to allow MoinMoin macros. This routine
checks for a link which starts with "[[". This link is passed to the
MoinMoin formatter and the resulting markup is inserted into the
document in the place of the original link reference.
"""
moin_link_schemes = ['wiki:', 'attachment:', 'drawing:', '[[',
'inline:']
if 'refuri' in node.attributes:
target = None
refuri = node['refuri']
# MMG: Fix this line
if [scheme for scheme in moin_link_schemes if
refuri.lstrip().startswith(scheme)]:
# For a macro, We want the actuall text from the user in target,
# not the fully normalized version that is contained in refuri.
if refuri.startswith('[['):
target = node['name']
else:
target = refuri
# TODO: Figure out the following two elif's and comment
# appropriately.
# The node should have a whitespace normalized name if the docutlis
# reStructuredText parser would normally fully normalize the name.
elif ('name' in node.attributes and
fully_normalize_name(node['name']) == refuri):
target = ':%s:' % (node['name'])
# If its not a uri containing a ':' then its probably destined for
# wiki space.
elif ':' not in refuri:
target = ':%s:' % (refuri)
if target:
if target.startswith('inline:'):
self.process_inline(node, 'refuri')
elif target.startswith('[[') and target.endswith(']]'):
self.process_wiki_target(target)
else:
# Not a macro or inline so hopefully its a link. Put the target in
# brackets so that MoinMoin knows its a link. Extract the
# href, if it exists, and let docutils handle it from there.
# If there is no href just add whatever MoinMoin returned.
node_text = node.astext().replace('\n', ' ')
self.process_wiki_text('[%s %s]' % (target, node_text))
href = re.search('href="([^"]+)"', self.wiki_text)
if href:
# dirty hack in order to undo the HTML entity quoting
node['refuri'] = href.groups()[0].replace("&", "&")
else:
self.wiki_text = self.fixup_wiki_formatting(self.wiki_text)
self.add_wiki_markup()
html4css1.HTMLTranslator.visit_reference(self, node)
def visit_image(self, node):
"""
Need to intervene in the case of inline images. We need MoinMoin to
give us the actual src line to the image and then we can feed this
to the default html4css1 writer. NOTE: Since the writer can't "open"
this image the scale attribute doesn't work without directly
specifying the height or width (or both).
TODO: Need to handle figures similarly.
"""
uri = node['uri'].lstrip()
prefix = '' # assume no prefix
if ':' in uri:
prefix = uri.split(':',1)[0]
# if prefix isn't URL, try to display in page
if not prefix.lower() in ('file', 'http', 'https', 'ftp'):
# no prefix given, so fake "inline:"
if not prefix:
node['uri'] = 'inline:' + uri
self.process_inline(node, 'uri')
html4css1.HTMLTranslator.visit_image(self, node)
def create_wiki_functor(self, moin_func):
moin_callable = getattr(self.formatter, moin_func)
def visit_func(self, node):
self.wiki_text = ''
self.request.write(moin_callable(1))
self.body.append(self.wiki_text)
def depart_func(self, node):
self.wiki_text = ''
self.request.write(moin_callable(0))
self.body.append(self.wiki_text)
return visit_func, depart_func
def setup_wiki_handlers(self):
"""
Have the MoinMoin formatter handle markup when it makes sense. These
are portions of the document that do not contain reST specific
markup. This allows these portions of the document to look
consistent with other wiki pages.
Setup dispatch routines to handle basic document markup. The
hanlders dict is the html4css1 handler name followed by the wiki
handler name.
"""
handlers = {
# Text Markup
'emphasis': 'emphasis',
'strong': 'strong',
'literal': 'code',
# Blocks
'literal_block': 'preformatted',
# Simple Lists
'bullet_list': 'bullet_list',
'list_item': 'listitem',
# Definition List
'definition_list': 'definition_list',
# Admonitions
'warning': 'highlight'}
for rest_func, moin_func in handlers.items():
visit_func, depart_func = self.create_wiki_functor(moin_func)
visit_func = new.instancemethod(visit_func, self, MoinTranslator)
depart_func = new.instancemethod(depart_func, self, MoinTranslator)
setattr(self, 'visit_%s' % (rest_func), visit_func)
setattr(self, 'depart_%s' % (rest_func), depart_func)
# Enumerated list takes an extra paramter so we handle this differently
def visit_enumerated_list(self, node):
self.wiki_text = ''
self.request.write(self.formatter.number_list(1, start=node.get('start', None)))
self.body.append(self.wiki_text)
def depart_enumerated_list(self, node):
self.wiki_text = ''
self.request.write(self.formatter.number_list(0))
self.body.append(self.wiki_text)
class MoinDirectives:
"""
Class to handle all custom directive handling. This code is called as
part of the parsing stage.
"""
def __init__(self, request):
self.request = request
# include MoinMoin pages
directives.register_directive('include', self.include)
# used for MoinMoin macros
directives.register_directive('macro', self.macro)
# disallow a few directives in order to prevent XSS
# disallowed include because it suffers from these bugs:
# * recursive includes are possible
# for directive in ('meta', 'include', 'raw'):
for directive in ('meta', 'raw'):
directives.register_directive(directive, None)
# disable the raw role
roles._roles['raw'] = None
# As a quick fix to handle recursive includes we limit the times a
# document can be included to one.
self.included_documents = []
# Handle the include directive rather than letting the default docutils
# parser handle it. This allows the inclusion of MoinMoin pages instead of
# something from the filesystem.
def include(self, name, arguments, options, content, lineno,
content_offset, block_text, state, state_machine):
# content contains the included file name
_ = self.request.getText
if len(content):
if content[0] in self.included_documents:
lines = [_("**Duplicate included files are not permitted**")]
state_machine.insert_input(lines, 'MoinDirectives')
return
self.included_documents.append(content[0])
page = Page(page_name = content[0], request = self.request)
if page.exists():
text = page.get_raw_body()
lines = text.split('\n')
# Remove the "#format rst" line
if lines[0].startswith("#format"):
del lines[0]
else:
lines = [_("**Could not find the referenced page: %s**") % (content[0],)]
# Insert the text from the included document and then continue
# parsing
state_machine.insert_input(lines, 'MoinDirectives')
return
include.content = True
# Add additional macro directive.
# This allows MoinMoin macros to be used either by using the directive
# directly or by using the substitution syntax. Much cleaner than using the
# reference hack (`[[SomeMacro]]`_). This however simply adds a node to the
# document tree which is a reference, but through a much better user
# interface.
def macro(self, name, arguments, options, content, lineno,
content_offset, block_text, state, state_machine):
# content contains macro to be called
if len(content):
# Allow either with or without brackets
if content[0].startswith('[['):
macro = content[0]
else:
macro = '[[%s]]' % content[0]
ref = reference(macro, refuri = macro)
ref['name'] = macro
return [ref]
return
macro.content = True
|