diff options
author | R. Tyler Ballance <tyler@slide.com> | 2009-07-16 15:25:04 -0700 |
---|---|---|
committer | R. Tyler Ballance <tyler@slide.com> | 2009-07-16 15:25:04 -0700 |
commit | 832a7c766de46cff23d6716ece9efd79db78cf5d (patch) | |
tree | 33e02b22a69491ea12241461ffdc9caa1f65d15b /cheetah/SourceReader.py | |
parent | dc896aa348b7d5e4dbeed440c6ae8cf8ebdf2fdd (diff) | |
download | python-cheetah-832a7c766de46cff23d6716ece9efd79db78cf5d.tar.gz |
Rename the root package to "cheetah" instead of "src" to follow more conventional python package naming
Diffstat (limited to 'cheetah/SourceReader.py')
-rw-r--r-- | cheetah/SourceReader.py | 303 |
1 files changed, 303 insertions, 0 deletions
diff --git a/cheetah/SourceReader.py b/cheetah/SourceReader.py new file mode 100644 index 0000000..0dc0e60 --- /dev/null +++ b/cheetah/SourceReader.py @@ -0,0 +1,303 @@ +# $Id: SourceReader.py,v 1.15 2007/04/03 01:57:42 tavis_rudd Exp $ +"""SourceReader class for Cheetah's Parser and CodeGenerator + +Meta-Data +================================================================================ +Author: Tavis Rudd <tavis@damnsimple.com> +License: This software is released for unlimited distribution under the + terms of the MIT license. See the LICENSE file. +Version: $Revision: 1.15 $ +Start Date: 2001/09/19 +Last Revision Date: $Date: 2007/04/03 01:57:42 $ +""" +__author__ = "Tavis Rudd <tavis@damnsimple.com>" +__revision__ = "$Revision: 1.15 $"[11:-2] + +import re +import sys + +EOLre = re.compile(r'[ \f\t]*(?:\r\n|\r|\n)') +EOLZre = re.compile(r'(?:\r\n|\r|\n|\Z)') +ENCODINGsearch = re.compile("coding[=:]\s*([-\w.]+)").search + +class Error(Exception): + pass + +class SourceReader: + def __init__(self, src, filename=None, breakPoint=None, encoding=None): + + ## @@TR 2005-01-17: the following comes from a patch Terrel Shumway + ## contributed to add unicode support to the reading of Cheetah source + ## files with dynamically compiled templates. All the existing unit + ## tests pass but, it needs more testing and some test cases of its + ## own. My instinct is to move this up into the code that passes in the + ## src string rather than leaving it here. As implemented here it + ## forces all src strings to unicode, which IMO is not what we want. + # if encoding is None: + # # peek at the encoding in the first two lines + # m = EOLZre.search(src) + # pos = m.end() + # if pos<len(src): + # m = EOLZre.search(src,pos) + # pos = m.end() + # m = ENCODINGsearch(src,0,pos) + # if m: + # encoding = m.group(1) + # else: + # encoding = sys.getfilesystemencoding() + # self._encoding = encoding + # if type(src) is not unicode: + # src = src.decode(encoding) + ## end of Terrel's patch + + self._src = src + self._filename = filename + + self._srcLen = len(src) + if breakPoint == None: + self._breakPoint = self._srcLen + else: + self.setBreakPoint(breakPoint) + self._pos = 0 + self._bookmarks = {} + self._posTobookmarkMap = {} + + ## collect some meta-information + self._EOLs = [] + pos = 0 + while pos < len(self): + EOLmatch = EOLZre.search(src, pos) + self._EOLs.append(EOLmatch.start()) + pos = EOLmatch.end() + + self._BOLs = [] + for pos in self._EOLs: + BOLpos = self.findBOL(pos) + self._BOLs.append(BOLpos) + + def src(self): + return self._src + + def filename(self): + return self._filename + + def __len__(self): + return self._breakPoint + + def __getitem__(self, i): + self.checkPos(i) + return self._src[i] + + def __getslice__(self, i, j): + i = max(i, 0); j = max(j, 0) + return self._src[i:j] + + def splitlines(self): + if not hasattr(self, '_srcLines'): + self._srcLines = self._src.splitlines() + return self._srcLines + + def lineNum(self, pos=None): + if pos == None: + pos = self._pos + + for i in range(len(self._BOLs)): + if pos >= self._BOLs[i] and pos <= self._EOLs[i]: + return i + + def getRowCol(self, pos=None): + if pos == None: + pos = self._pos + lineNum = self.lineNum(pos) + BOL, EOL = self._BOLs[lineNum], self._EOLs[lineNum] + return lineNum+1, pos-BOL+1 + + def getRowColLine(self, pos=None): + if pos == None: + pos = self._pos + row, col = self.getRowCol(pos) + return row, col, self.splitlines()[row-1] + + def getLine(self, pos): + if pos == None: + pos = self._pos + lineNum = self.lineNum(pos) + return self.splitlines()[lineNum] + + def pos(self): + return self._pos + + def setPos(self, pos): + self.checkPos(pos) + self._pos = pos + + + def validPos(self, pos): + return pos <= self._breakPoint and pos >=0 + + def checkPos(self, pos): + if not pos <= self._breakPoint: + raise Error("pos (" + str(pos) + ") is invalid: beyond the stream's end (" + + str(self._breakPoint-1) + ")" ) + elif not pos >=0: + raise Error("pos (" + str(pos) + ") is invalid: less than 0" ) + + def breakPoint(self): + return self._breakPoint + + def setBreakPoint(self, pos): + if pos > self._srcLen: + raise Error("New breakpoint (" + str(pos) + + ") is invalid: beyond the end of stream's source string (" + + str(self._srcLen) + ")" ) + elif not pos >= 0: + raise Error("New breakpoint (" + str(pos) + ") is invalid: less than 0" ) + + self._breakPoint = pos + + def setBookmark(self, name): + self._bookmarks[name] = self._pos + self._posTobookmarkMap[self._pos] = name + + def hasBookmark(self, name): + return self._bookmarks.has_key(name) + + def gotoBookmark(self, name): + if not self.hasBookmark(name): + raise Error("Invalid bookmark (" + name + ") is invalid: does not exist") + pos = self._bookmarks[name] + if not self.validPos(pos): + raise Error("Invalid bookmark (" + name + ', '+ + str(pos) + ") is invalid: pos is out of range" ) + self._pos = pos + + def atEnd(self): + return self._pos >= self._breakPoint + + def atStart(self): + return self._pos == 0 + + def peek(self, offset=0): + self.checkPos(self._pos+offset) + pos = self._pos + offset + return self._src[pos] + + def getc(self): + pos = self._pos + if self.validPos(pos+1): + self._pos += 1 + return self._src[pos] + + def ungetc(self, c=None): + if not self.atStart(): + raise Error('Already at beginning of stream') + + self._pos -= 1 + if not c==None: + self._src[self._pos] = c + + def advance(self, offset=1): + self.checkPos(self._pos + offset) + self._pos += offset + + def rev(self, offset=1): + self.checkPos(self._pos - offset) + self._pos -= offset + + def read(self, offset): + self.checkPos(self._pos + offset) + start = self._pos + self._pos += offset + return self._src[start:self._pos] + + def readTo(self, to, start=None): + self.checkPos(to) + if start == None: + start = self._pos + self._pos = to + return self._src[start:to] + + + def readToEOL(self, start=None, gobble=True): + EOLmatch = EOLZre.search(self.src(), self.pos()) + if gobble: + pos = EOLmatch.end() + else: + pos = EOLmatch.start() + return self.readTo(to=pos, start=start) + + + def find(self, it, pos=None): + if pos == None: + pos = self._pos + return self._src.find(it, pos ) + + def startswith(self, it, pos=None): + if self.find(it, pos) == self.pos(): + return True + else: + return False + + def rfind(self, it, pos): + if pos == None: + pos = self._pos + return self._src.rfind(it, pos) + + def findBOL(self, pos=None): + if pos == None: + pos = self._pos + src = self.src() + return max(src.rfind('\n',0,pos)+1, src.rfind('\r',0,pos)+1, 0) + + def findEOL(self, pos=None, gobble=False): + if pos == None: + pos = self._pos + + match = EOLZre.search(self.src(), pos) + if gobble: + return match.end() + else: + return match.start() + + def isLineClearToPos(self, pos=None): + if pos == None: + pos = self.pos() + self.checkPos(pos) + src = self.src() + BOL = self.findBOL() + return BOL == pos or src[BOL:pos].isspace() + + def matches(self, strOrRE): + if isinstance(strOrRE, (str, unicode)): + return self.startswith(strOrRE, pos=self.pos()) + else: # assume an re object + return strOrRE.match(self.src(), self.pos()) + + def matchWhiteSpace(self, WSchars=' \f\t'): + return (not self.atEnd()) and self.peek() in WSchars + + def getWhiteSpace(self, max=None, WSchars=' \f\t'): + if not self.matchWhiteSpace(WSchars): + return '' + start = self.pos() + breakPoint = self.breakPoint() + if max is not None: + breakPoint = min(breakPoint, self.pos()+max) + while self.pos() < breakPoint: + self.advance() + if not self.matchWhiteSpace(WSchars): + break + return self.src()[start:self.pos()] + + def matchNonWhiteSpace(self, WSchars=' \f\t\n\r'): + return self.atEnd() or not self.peek() in WSchars + + def getNonWhiteSpace(self, WSchars=' \f\t\n\r'): + if not self.matchNonWhiteSpace(WSchars): + return '' + start = self.pos() + while self.pos() < self.breakPoint(): + self.advance() + if not self.matchNonWhiteSpace(WSchars): + break + return self.src()[start:self.pos()] |