from glob import glob import os from os import listdir import os.path import re from tempfile import mktemp def _escapeRegexChars(txt, escapeRE=re.compile(r'([\$\^\*\+\.\?\{\}\[\]\(\)\|\\])')): return escapeRE.sub(r'\\\1', txt) def findFiles(*args, **kw): """Recursively find all the files matching a glob pattern. This function is a wrapper around the FileFinder class. See its docstring for details about the accepted arguments, etc.""" return FileFinder(*args, **kw).files() def replaceStrInFiles(files, theStr, repl): """Replace all instances of 'theStr' with 'repl' for each file in the 'files' list. Returns a dictionary with data about the matches found. This is like string.replace() on a multi-file basis. This function is a wrapper around the FindAndReplace class. See its docstring for more details.""" pattern = _escapeRegexChars(theStr) return FindAndReplace(files, pattern, repl).results() def replaceRegexInFiles(files, pattern, repl): """Replace all instances of regex 'pattern' with 'repl' for each file in the 'files' list. Returns a dictionary with data about the matches found. This is like re.sub on a multi-file basis. This function is a wrapper around the FindAndReplace class. See its docstring for more details.""" return FindAndReplace(files, pattern, repl).results() ################################################## ## CLASSES class FileFinder: """Traverses a directory tree and finds all files in it that match one of the specified glob patterns.""" def __init__(self, rootPath, globPatterns=('*',), ignoreBasenames=('CVS', '.svn'), ignoreDirs=(), ): self._rootPath = rootPath self._globPatterns = globPatterns self._ignoreBasenames = ignoreBasenames self._ignoreDirs = ignoreDirs self._files = [] self.walkDirTree(rootPath) def walkDirTree(self, dir='.', listdir=os.listdir, isdir=os.path.isdir, join=os.path.join, ): """Recursively walk through a directory tree and find matching files.""" processDir = self.processDir filterDir = self.filterDir pendingDirs = [dir] addDir = pendingDirs.append getDir = pendingDirs.pop while pendingDirs: dir = getDir() ## process this dir processDir(dir) ## and add sub-dirs for baseName in listdir(dir): fullPath = join(dir, baseName) if isdir(fullPath): if filterDir(baseName, fullPath): addDir( fullPath ) def filterDir(self, baseName, fullPath): """A hook for filtering out certain dirs. """ return not (baseName in self._ignoreBasenames or fullPath in self._ignoreDirs) def processDir(self, dir, glob=glob): extend = self._files.extend for pattern in self._globPatterns: extend( glob(os.path.join(dir, pattern)) ) def files(self): return self._files class _GenSubberFunc: """Converts a 'sub' string in the form that one feeds to re.sub (backrefs, groups, etc.) into a function that can be used to do the substitutions in the FindAndReplace class.""" backrefRE = re.compile(r'\\([1-9][0-9]*)') groupRE = re.compile(r'\\g<([a-zA-Z_][a-zA-Z_]*)>') def __init__(self, replaceStr): self._src = replaceStr self._pos = 0 self._codeChunks = [] self.parse() def src(self): return self._src def pos(self): return self._pos def setPos(self, pos): self._pos = pos def atEnd(self): return self._pos >= len(self._src) def advance(self, offset=1): self._pos += offset def readTo(self, to, start=None): if start == None: start = self._pos self._pos = to if self.atEnd(): return self._src[start:] else: return self._src[start:to] ## match and get methods def matchBackref(self): return self.backrefRE.match(self.src(), self.pos()) def getBackref(self): m = self.matchBackref() self.setPos(m.end()) return m.group(1) def matchGroup(self): return self.groupRE.match(self.src(), self.pos()) def getGroup(self): m = self.matchGroup() self.setPos(m.end()) return m.group(1) ## main parse loop and the eat methods def parse(self): while not self.atEnd(): if self.matchBackref(): self.eatBackref() elif self.matchGroup(): self.eatGroup() else: self.eatStrConst() def eatStrConst(self): startPos = self.pos() while not self.atEnd(): if self.matchBackref() or self.matchGroup(): break else: self.advance() strConst = self.readTo(self.pos(), start=startPos) self.addChunk(repr(strConst)) def eatBackref(self): self.addChunk( 'm.group(' + self.getBackref() + ')' ) def eatGroup(self): self.addChunk( 'm.group("' + self.getGroup() + '")' ) def addChunk(self, chunk): self._codeChunks.append(chunk) ## code wrapping methods def codeBody(self): return ', '.join(self._codeChunks) def code(self): return "def subber(m):\n\treturn ''.join([%s])\n" % (self.codeBody()) def subberFunc(self): exec(self.code()) return subber class FindAndReplace: """Find and replace all instances of 'patternOrRE' with 'replacement' for each file in the 'files' list. This is a multi-file version of re.sub(). 'patternOrRE' can be a raw regex pattern or a regex object as generated by the re module. 'replacement' can be any string that would work with patternOrRE.sub(replacement, fileContents). """ def __init__(self, files, patternOrRE, replacement, recordResults=True): if isinstance(patternOrRE, basestring): self._regex = re.compile(patternOrRE) else: self._regex = patternOrRE if isinstance(replacement, basestring): self._subber = _GenSubberFunc(replacement).subberFunc() else: self._subber = replacement self._pattern = pattern = self._regex.pattern self._files = files self._results = {} self._recordResults = recordResults ## see if we should use pgrep to do the file matching self._usePgrep = False if (os.popen3('pgrep')[2].read()).startswith('Usage:'): ## now check to make sure pgrep understands the pattern tmpFile = mktemp() open(tmpFile, 'w').write('#') if not (os.popen3('pgrep "' + pattern + '" ' + tmpFile)[2].read()): # it didn't print an error msg so we're ok self._usePgrep = True os.remove(tmpFile) self._run() def results(self): return self._results def _run(self): regex = self._regex subber = self._subDispatcher usePgrep = self._usePgrep pattern = self._pattern for file in self._files: if not os.path.isfile(file): continue # skip dirs etc. self._currFile = file found = False if 'orig' in locals(): del orig if self._usePgrep: if os.popen('pgrep "' + pattern + '" ' + file ).read(): found = True else: orig = open(file).read() if regex.search(orig): found = True if found: if 'orig' not in locals(): orig = open(file).read() new = regex.sub(subber, orig) open(file, 'w').write(new) def _subDispatcher(self, match): if self._recordResults: if self._currFile not in self._results: res = self._results[self._currFile] = {} res['count'] = 0 res['matches'] = [] else: res = self._results[self._currFile] res['count'] += 1 res['matches'].append({'contents': match.group(), 'start': match.start(), 'end': match.end(), } ) return self._subber(match) class SourceFileStats: """ """ _fileStats = None def __init__(self, files): self._fileStats = stats = {} for file in files: stats[file] = self.getFileStats(file) def rawStats(self): return self._fileStats def summary(self): codeLines = 0 blankLines = 0 commentLines = 0 totalLines = 0 for fileStats in self.rawStats().values(): codeLines += fileStats['codeLines'] blankLines += fileStats['blankLines'] commentLines += fileStats['commentLines'] totalLines += fileStats['totalLines'] stats = {'codeLines': codeLines, 'blankLines': blankLines, 'commentLines': commentLines, 'totalLines': totalLines, } return stats def printStats(self): pass def getFileStats(self, fileName): codeLines = 0 blankLines = 0 commentLines = 0 commentLineRe = re.compile(r'\s#.*$') blankLineRe = re.compile('\s$') lines = open(fileName).read().splitlines() totalLines = len(lines) for line in lines: if commentLineRe.match(line): commentLines += 1 elif blankLineRe.match(line): blankLines += 1 else: codeLines += 1 stats = {'codeLines': codeLines, 'blankLines': blankLines, 'commentLines': commentLines, 'totalLines': totalLines, } return stats