1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
|
"""SourceReader class for Cheetah's Parser and CodeGenerator
"""
import re
import sys
EOLre = re.compile(r'[ \f\t]*(?:\r\n|\r|\n)')
EOLZre = re.compile(r'(?:\r\n|\r|\n|\Z)')
ENCODINGsearch = re.compile("coding[=:]\s*([-\w.]+)").search
class Error(Exception):
pass
class SourceReader(object):
def __init__(self, src, filename=None, breakPoint=None, encoding=None):
## @@TR 2005-01-17: the following comes from a patch Terrel Shumway
## contributed to add unicode support to the reading of Cheetah source
## files with dynamically compiled templates. All the existing unit
## tests pass but, it needs more testing and some test cases of its
## own. My instinct is to move this up into the code that passes in the
## src string rather than leaving it here. As implemented here it
## forces all src strings to unicode, which IMO is not what we want.
# if encoding is None:
# # peek at the encoding in the first two lines
# m = EOLZre.search(src)
# pos = m.end()
# if pos<len(src):
# m = EOLZre.search(src,pos)
# pos = m.end()
# m = ENCODINGsearch(src,0,pos)
# if m:
# encoding = m.group(1)
# else:
# encoding = sys.getfilesystemencoding()
# self._encoding = encoding
# if type(src) is not unicode:
# src = src.decode(encoding)
## end of Terrel's patch
self._src = src
self._filename = filename
self._srcLen = len(src)
if breakPoint == None:
self._breakPoint = self._srcLen
else:
self.setBreakPoint(breakPoint)
self._pos = 0
self._bookmarks = {}
self._posTobookmarkMap = {}
## collect some meta-information
self._EOLs = []
pos = 0
while pos < len(self):
EOLmatch = EOLZre.search(src, pos)
self._EOLs.append(EOLmatch.start())
pos = EOLmatch.end()
self._BOLs = []
for pos in self._EOLs:
BOLpos = self.findBOL(pos)
self._BOLs.append(BOLpos)
def src(self):
return self._src
def filename(self):
return self._filename
def __len__(self):
return self._breakPoint
def __getitem__(self, i):
self.checkPos(i)
return self._src[i]
def __getslice__(self, i, j):
i = max(i, 0); j = max(j, 0)
return self._src[i:j]
def splitlines(self):
if not hasattr(self, '_srcLines'):
self._srcLines = self._src.splitlines()
return self._srcLines
def lineNum(self, pos=None):
if pos == None:
pos = self._pos
for i in range(len(self._BOLs)):
if pos >= self._BOLs[i] and pos <= self._EOLs[i]:
return i
def getRowCol(self, pos=None):
if pos == None:
pos = self._pos
lineNum = self.lineNum(pos)
BOL, EOL = self._BOLs[lineNum], self._EOLs[lineNum]
return lineNum+1, pos-BOL+1
def getRowColLine(self, pos=None):
if pos == None:
pos = self._pos
row, col = self.getRowCol(pos)
return row, col, self.splitlines()[row-1]
def getLine(self, pos):
if pos == None:
pos = self._pos
lineNum = self.lineNum(pos)
return self.splitlines()[lineNum]
def pos(self):
return self._pos
def setPos(self, pos):
self.checkPos(pos)
self._pos = pos
def validPos(self, pos):
return pos <= self._breakPoint and pos >=0
def checkPos(self, pos):
if not pos <= self._breakPoint:
raise Error("pos (" + str(pos) + ") is invalid: beyond the stream's end (" +
str(self._breakPoint-1) + ")" )
elif not pos >=0:
raise Error("pos (" + str(pos) + ") is invalid: less than 0" )
def breakPoint(self):
return self._breakPoint
def setBreakPoint(self, pos):
if pos > self._srcLen:
raise Error("New breakpoint (" + str(pos) +
") is invalid: beyond the end of stream's source string (" +
str(self._srcLen) + ")" )
elif not pos >= 0:
raise Error("New breakpoint (" + str(pos) + ") is invalid: less than 0" )
self._breakPoint = pos
def setBookmark(self, name):
self._bookmarks[name] = self._pos
self._posTobookmarkMap[self._pos] = name
def hasBookmark(self, name):
return name in self._bookmarks
def gotoBookmark(self, name):
if not self.hasBookmark(name):
raise Error("Invalid bookmark (" + name + ") is invalid: does not exist")
pos = self._bookmarks[name]
if not self.validPos(pos):
raise Error("Invalid bookmark (" + name + ', '+
str(pos) + ") is invalid: pos is out of range" )
self._pos = pos
def atEnd(self):
return self._pos >= self._breakPoint
def atStart(self):
return self._pos == 0
def peek(self, offset=0):
self.checkPos(self._pos+offset)
pos = self._pos + offset
return self._src[pos]
def getc(self):
pos = self._pos
if self.validPos(pos+1):
self._pos += 1
return self._src[pos]
def ungetc(self, c=None):
if not self.atStart():
raise Error('Already at beginning of stream')
self._pos -= 1
if not c==None:
self._src[self._pos] = c
def advance(self, offset=1):
self.checkPos(self._pos + offset)
self._pos += offset
def rev(self, offset=1):
self.checkPos(self._pos - offset)
self._pos -= offset
def read(self, offset):
self.checkPos(self._pos + offset)
start = self._pos
self._pos += offset
return self._src[start:self._pos]
def readTo(self, to, start=None):
self.checkPos(to)
if start == None:
start = self._pos
self._pos = to
return self._src[start:to]
def readToEOL(self, start=None, gobble=True):
EOLmatch = EOLZre.search(self.src(), self.pos())
if gobble:
pos = EOLmatch.end()
else:
pos = EOLmatch.start()
return self.readTo(to=pos, start=start)
def find(self, it, pos=None):
if pos == None:
pos = self._pos
return self._src.find(it, pos )
def startswith(self, it, pos=None):
if self.find(it, pos) == self.pos():
return True
else:
return False
def rfind(self, it, pos):
if pos == None:
pos = self._pos
return self._src.rfind(it, pos)
def findBOL(self, pos=None):
if pos == None:
pos = self._pos
src = self.src()
return max(src.rfind('\n', 0, pos)+1, src.rfind('\r', 0, pos)+1, 0)
def findEOL(self, pos=None, gobble=False):
if pos == None:
pos = self._pos
match = EOLZre.search(self.src(), pos)
if gobble:
return match.end()
else:
return match.start()
def isLineClearToPos(self, pos=None):
if pos == None:
pos = self.pos()
self.checkPos(pos)
src = self.src()
BOL = self.findBOL()
return BOL == pos or src[BOL:pos].isspace()
def matches(self, strOrRE):
if isinstance(strOrRE, (str, unicode)):
return self.startswith(strOrRE, pos=self.pos())
else: # assume an re object
return strOrRE.match(self.src(), self.pos())
def matchWhiteSpace(self, WSchars=' \f\t'):
return (not self.atEnd()) and self.peek() in WSchars
def getWhiteSpace(self, max=None, WSchars=' \f\t'):
if not self.matchWhiteSpace(WSchars):
return ''
start = self.pos()
breakPoint = self.breakPoint()
if max is not None:
breakPoint = min(breakPoint, self.pos()+max)
while self.pos() < breakPoint:
self.advance()
if not self.matchWhiteSpace(WSchars):
break
return self.src()[start:self.pos()]
def matchNonWhiteSpace(self, WSchars=' \f\t\n\r'):
return self.atEnd() or not self.peek() in WSchars
def getNonWhiteSpace(self, WSchars=' \f\t\n\r'):
if not self.matchNonWhiteSpace(WSchars):
return ''
start = self.pos()
while self.pos() < self.breakPoint():
self.advance()
if not self.matchNonWhiteSpace(WSchars):
break
return self.src()[start:self.pos()]
|