1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
# -*- coding: utf-8 -*-
"""
Python documentation LaTeX file tokenizer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
For more documentation, look into the ``restwriter.py`` file.
:copyright: 2007 by Georg Brandl.
:license: Python license.
"""
import re
from .scanner import Scanner
class Tokenizer(Scanner):
""" Lex a Python doc LaTeX document. """
specials = {
'{': 'bgroup',
'}': 'egroup',
'[': 'boptional',
']': 'eoptional',
'~': 'tilde',
'$': 'mathmode',
}
@property
def mtext(self):
return self.match.group()
def tokenize(self):
return TokenStream(self._tokenize())
def _tokenize(self):
lineno = 1
while not self.eos:
if self.scan(r'\\verb([^a-zA-Z])(.*?)(\1)'):
# specialcase \verb here
yield lineno, 'command', 'verb', '\\verb'
yield lineno, 'text', self.match.group(1), self.match.group(1)
yield lineno, 'text', self.match.group(2), self.match.group(2)
yield lineno, 'text', self.match.group(3), self.match.group(3)
elif self.scan(r'\\([a-zA-Z]+\*?)[ \t]*'):
yield lineno, 'command', self.match.group(1), self.mtext
elif self.scan(r'\\.'):
yield lineno, 'command', self.mtext[1], self.mtext
elif self.scan(r'\\\n'):
yield lineno, 'text', self.mtext, self.mtext
lineno += 1
elif self.scan(r'%(.*)\n[ \t]*'):
yield lineno, 'comment', self.match.group(1), self.mtext
lineno += 1
elif self.scan(r'[{}\[\]~$]'):
yield lineno, self.specials[self.mtext], self.mtext, self.mtext
elif self.scan(r'(\n[ \t]*){2,}'):
lines = self.mtext.count('\n')
yield lineno, 'parasep', '\n' * lines, self.mtext
lineno += lines
elif self.scan(r'\n[ \t]*'):
yield lineno, 'text', ' ', self.mtext
lineno += 1
elif self.scan(r'[^\\%}{\[\]~\n]+'):
yield lineno, 'text', self.mtext, self.mtext
else:
raise RuntimeError('unexpected text on line %d: %r' %
(lineno, self.data[self.pos:self.pos+100]))
class TokenStream(object):
"""
A token stream works like a normal generator just that
it supports peeking and pushing tokens back to the stream.
"""
def __init__(self, generator):
self._generator = generator
self._pushed = []
self.last = (1, 'initial', '')
def __iter__(self):
return self
def __nonzero__(self):
""" Are we at the end of the tokenstream? """
if self._pushed:
return True
try:
self.push(self.next())
except StopIteration:
return False
return True
def pop(self):
""" Return the next token from the stream. """
if self._pushed:
rv = self._pushed.pop()
else:
rv = self._generator.next()
self.last = rv
return rv
next = pop
def popmany(self, num=1):
""" Pop a list of tokens. """
return [self.next() for i in range(num)]
def peek(self):
""" Pop and push a token, return it. """
token = self.next()
self.push(token)
return token
def peekmany(self, num=1):
""" Pop and push a list of tokens. """
tokens = self.popmany(num)
for tok in tokens:
self.push(tok)
return tokens
def push(self, item):
""" Push a token back to the stream. """
self._pushed.append(item)
|