diff options
author | R. Tyler Ballance <tyler@slide.com> | 2009-07-16 15:25:04 -0700 |
---|---|---|
committer | R. Tyler Ballance <tyler@slide.com> | 2009-07-16 15:25:04 -0700 |
commit | 832a7c766de46cff23d6716ece9efd79db78cf5d (patch) | |
tree | 33e02b22a69491ea12241461ffdc9caa1f65d15b /cheetah/FileUtils.py | |
parent | dc896aa348b7d5e4dbeed440c6ae8cf8ebdf2fdd (diff) | |
download | python-cheetah-832a7c766de46cff23d6716ece9efd79db78cf5d.tar.gz |
Rename the root package to "cheetah" instead of "src" to follow more conventional python package naming
Diffstat (limited to 'cheetah/FileUtils.py')
-rw-r--r-- | cheetah/FileUtils.py | 373 |
1 files changed, 373 insertions, 0 deletions
diff --git a/cheetah/FileUtils.py b/cheetah/FileUtils.py new file mode 100644 index 0000000..97e77f8 --- /dev/null +++ b/cheetah/FileUtils.py @@ -0,0 +1,373 @@ +# $Id: FileUtils.py,v 1.12 2005/11/02 22:26:07 tavis_rudd Exp $ +"""File utitilies for Python: + +Meta-Data +================================================================================ +Author: Tavis Rudd <tavis@damnsimple.com> +License: This software is released for unlimited distribution under the + terms of the MIT license. See the LICENSE file. +Version: $Revision: 1.12 $ +Start Date: 2001/09/26 +Last Revision Date: $Date: 2005/11/02 22:26:07 $ +""" +__author__ = "Tavis Rudd <tavis@damnsimple.com>" +__revision__ = "$Revision: 1.12 $"[11:-2] + + +from glob import glob +import os +from os import listdir +import os.path +import re +from types import StringType +from tempfile import mktemp + +def _escapeRegexChars(txt, + escapeRE=re.compile(r'([\$\^\*\+\.\?\{\}\[\]\(\)\|\\])')): + return escapeRE.sub(r'\\\1' , txt) + +def findFiles(*args, **kw): + """Recursively find all the files matching a glob pattern. + + This function is a wrapper around the FileFinder class. See its docstring + for details about the accepted arguments, etc.""" + + return FileFinder(*args, **kw).files() + +def replaceStrInFiles(files, theStr, repl): + + """Replace all instances of 'theStr' with 'repl' for each file in the 'files' + list. Returns a dictionary with data about the matches found. + + This is like string.replace() on a multi-file basis. + + This function is a wrapper around the FindAndReplace class. See its + docstring for more details.""" + + pattern = _escapeRegexChars(theStr) + return FindAndReplace(files, pattern, repl).results() + +def replaceRegexInFiles(files, pattern, repl): + + """Replace all instances of regex 'pattern' with 'repl' for each file in the + 'files' list. Returns a dictionary with data about the matches found. + + This is like re.sub on a multi-file basis. + + This function is a wrapper around the FindAndReplace class. See its + docstring for more details.""" + + return FindAndReplace(files, pattern, repl).results() + + +################################################## +## CLASSES + +class FileFinder: + + """Traverses a directory tree and finds all files in it that match one of + the specified glob patterns.""" + + def __init__(self, rootPath, + globPatterns=('*',), + ignoreBasenames=('CVS','.svn'), + ignoreDirs=(), + ): + + self._rootPath = rootPath + self._globPatterns = globPatterns + self._ignoreBasenames = ignoreBasenames + self._ignoreDirs = ignoreDirs + self._files = [] + + self.walkDirTree(rootPath) + + def walkDirTree(self, dir='.', + + listdir=os.listdir, + isdir=os.path.isdir, + join=os.path.join, + ): + + """Recursively walk through a directory tree and find matching files.""" + processDir = self.processDir + filterDir = self.filterDir + + pendingDirs = [dir] + addDir = pendingDirs.append + getDir = pendingDirs.pop + + while pendingDirs: + dir = getDir() + ## process this dir + processDir(dir) + + ## and add sub-dirs + for baseName in listdir(dir): + fullPath = join(dir, baseName) + if isdir(fullPath): + if filterDir(baseName, fullPath): + addDir( fullPath ) + + def filterDir(self, baseName, fullPath): + + """A hook for filtering out certain dirs. """ + + return not (baseName in self._ignoreBasenames or + fullPath in self._ignoreDirs) + + def processDir(self, dir, glob=glob): + extend = self._files.extend + for pattern in self._globPatterns: + extend( glob(os.path.join(dir, pattern)) ) + + def files(self): + return self._files + +class _GenSubberFunc: + + """Converts a 'sub' string in the form that one feeds to re.sub (backrefs, + groups, etc.) into a function that can be used to do the substitutions in + the FindAndReplace class.""" + + backrefRE = re.compile(r'\\([1-9][0-9]*)') + groupRE = re.compile(r'\\g<([a-zA-Z_][a-zA-Z_]*)>') + + def __init__(self, replaceStr): + self._src = replaceStr + self._pos = 0 + self._codeChunks = [] + self.parse() + + def src(self): + return self._src + + def pos(self): + return self._pos + + def setPos(self, pos): + self._pos = pos + + def atEnd(self): + return self._pos >= len(self._src) + + def advance(self, offset=1): + self._pos += offset + + def readTo(self, to, start=None): + if start == None: + start = self._pos + self._pos = to + if self.atEnd(): + return self._src[start:] + else: + return self._src[start:to] + + ## match and get methods + + def matchBackref(self): + return self.backrefRE.match(self.src(), self.pos()) + + def getBackref(self): + m = self.matchBackref() + self.setPos(m.end()) + return m.group(1) + + def matchGroup(self): + return self.groupRE.match(self.src(), self.pos()) + + def getGroup(self): + m = self.matchGroup() + self.setPos(m.end()) + return m.group(1) + + ## main parse loop and the eat methods + + def parse(self): + while not self.atEnd(): + if self.matchBackref(): + self.eatBackref() + elif self.matchGroup(): + self.eatGroup() + else: + self.eatStrConst() + + def eatStrConst(self): + startPos = self.pos() + while not self.atEnd(): + if self.matchBackref() or self.matchGroup(): + break + else: + self.advance() + strConst = self.readTo(self.pos(), start=startPos) + self.addChunk(repr(strConst)) + + def eatBackref(self): + self.addChunk( 'm.group(' + self.getBackref() + ')' ) + + def eatGroup(self): + self.addChunk( 'm.group("' + self.getGroup() + '")' ) + + def addChunk(self, chunk): + self._codeChunks.append(chunk) + + ## code wrapping methods + + def codeBody(self): + return ', '.join(self._codeChunks) + + def code(self): + return "def subber(m):\n\treturn ''.join([%s])\n" % (self.codeBody()) + + def subberFunc(self): + exec self.code() + return subber + + +class FindAndReplace: + + """Find and replace all instances of 'patternOrRE' with 'replacement' for + each file in the 'files' list. This is a multi-file version of re.sub(). + + 'patternOrRE' can be a raw regex pattern or + a regex object as generated by the re module. 'replacement' can be any + string that would work with patternOrRE.sub(replacement, fileContents). + """ + + def __init__(self, files, patternOrRE, replacement, + recordResults=True): + + + if type(patternOrRE) == StringType: + self._regex = re.compile(patternOrRE) + else: + self._regex = patternOrRE + if type(replacement) == StringType: + self._subber = _GenSubberFunc(replacement).subberFunc() + else: + self._subber = replacement + + self._pattern = pattern = self._regex.pattern + self._files = files + self._results = {} + self._recordResults = recordResults + + ## see if we should use pgrep to do the file matching + self._usePgrep = False + if (os.popen3('pgrep')[2].read()).startswith('Usage:'): + ## now check to make sure pgrep understands the pattern + tmpFile = mktemp() + open(tmpFile, 'w').write('#') + if not (os.popen3('pgrep "' + pattern + '" ' + tmpFile)[2].read()): + # it didn't print an error msg so we're ok + self._usePgrep = True + os.remove(tmpFile) + + self._run() + + def results(self): + return self._results + + def _run(self): + regex = self._regex + subber = self._subDispatcher + usePgrep = self._usePgrep + pattern = self._pattern + for file in self._files: + if not os.path.isfile(file): + continue # skip dirs etc. + + self._currFile = file + found = False + if locals().has_key('orig'): + del orig + if self._usePgrep: + if os.popen('pgrep "' + pattern + '" ' + file ).read(): + found = True + else: + orig = open(file).read() + if regex.search(orig): + found = True + if found: + if not locals().has_key('orig'): + orig = open(file).read() + new = regex.sub(subber, orig) + open(file, 'w').write(new) + + def _subDispatcher(self, match): + if self._recordResults: + if not self._results.has_key(self._currFile): + res = self._results[self._currFile] = {} + res['count'] = 0 + res['matches'] = [] + else: + res = self._results[self._currFile] + res['count'] += 1 + res['matches'].append({'contents':match.group(), + 'start':match.start(), + 'end':match.end(), + } + ) + return self._subber(match) + + +class SourceFileStats: + + """ + """ + + _fileStats = None + + def __init__(self, files): + self._fileStats = stats = {} + for file in files: + stats[file] = self.getFileStats(file) + + def rawStats(self): + return self._fileStats + + def summary(self): + codeLines = 0 + blankLines = 0 + commentLines = 0 + totalLines = 0 + for fileStats in self.rawStats().values(): + codeLines += fileStats['codeLines'] + blankLines += fileStats['blankLines'] + commentLines += fileStats['commentLines'] + totalLines += fileStats['totalLines'] + + stats = {'codeLines':codeLines, + 'blankLines':blankLines, + 'commentLines':commentLines, + 'totalLines':totalLines, + } + return stats + + def printStats(self): + pass + + def getFileStats(self, fileName): + codeLines = 0 + blankLines = 0 + commentLines = 0 + commentLineRe = re.compile(r'\s#.*$') + blankLineRe = re.compile('\s$') + lines = open(fileName).read().splitlines() + totalLines = len(lines) + + for line in lines: + if commentLineRe.match(line): + commentLines += 1 + elif blankLineRe.match(line): + blankLines += 1 + else: + codeLines += 1 + + stats = {'codeLines':codeLines, + 'blankLines':blankLines, + 'commentLines':commentLines, + 'totalLines':totalLines, + } + + return stats |