diff options
author | Matt Kangas <matt.kangas@10gen.com> | 2013-07-26 16:09:07 -0400 |
---|---|---|
committer | Matt Kangas <matt.kangas@mongodb.com> | 2014-06-18 23:56:23 -0400 |
commit | 153f2302cac190167dd1c039ad2c68c4aa520fcc (patch) | |
tree | 06a4ff2a3f5d6b39c4894c0568566ed3c1b9ed4b /buildscripts/errorcodes.py | |
parent | 23b998ebaf42560281ca93a67d5cb90f53b533a1 (diff) | |
download | mongo-153f2302cac190167dd1c039ad2c68c4aa520fcc.tar.gz |
SERVER-10069 Rewrite errorcodes.py so it captures multiline messages
- captures multiline messages, when possible
- no longer silently modifies source code. (for hygenic build)
- prompts user to reinvoke with "--fix" when conflict or zero code detected
- faster
Diffstat (limited to 'buildscripts/errorcodes.py')
-rwxr-xr-x | buildscripts/errorcodes.py | 398 |
1 files changed, 268 insertions, 130 deletions
diff --git a/buildscripts/errorcodes.py b/buildscripts/errorcodes.py index e49073cadce..826515e4478 100755 --- a/buildscripts/errorcodes.py +++ b/buildscripts/errorcodes.py @@ -1,16 +1,30 @@ #!/usr/bin/env python +"""Produces a report of all assertions in the MongoDB server codebase. + +Parses .cpp files for assertions and verifies assertion codes are distinct. +Optionally replaces zero codes in source code with new distinct values. +""" + import os -import sys import re import utils +from collections import defaultdict, namedtuple +from optparse import OptionParser + +ASSERT_NAMES = [ "uassert" , "massert", "fassert", "fassertFailed" ] +MINIMUM_CODE = 10000 + +codes = [] +# Each AssertLocation identifies the C++ source location of an assertion +AssertLocation = namedtuple( "AssertLocation", ['sourceFile', 'lineNum', 'lines', 'code'] ) -assertNames = [ "uassert" , "massert", "fassert", "fassertFailed" ] +# Of historical interest only def assignErrorCodes(): - cur = 10000 - for root in assertNames: + cur = MINIMUM_CODE + for root in ASSERT_NAMES: for x in utils.getAllSourceFiles(): print( x ) didAnything = False @@ -29,151 +43,275 @@ def assignErrorCodes(): out.close() -codes = [] +def parseSourceFiles( callback ): + """Walks MongoDB sourcefiles and invokes callback for each AssertLocation found.""" -def readErrorCodes( callback, replaceZero = False ): - quick = [ "assert" , "Exception"] - ps = [ re.compile( "(([umsgf]asser(t|ted))) *\(( *)(\d+)" ) , - re.compile( "((User|Msg|MsgAssertion)Exceptio(n))\(( *)(\d+)" ), - re.compile( "((fassertFailed)()) *\(( *)(\d+)" ) - ] - - bad = [ re.compile( "^\s*assert *\(" ) ] - - for x in utils.getAllSourceFiles(): - - needReplace = [False] - lines = [] - lastCodes = [0] - lineNum = 1 - - for line in open( x ): - - found = False - for zz in quick: - if line.find( zz ) >= 0: - found = True - break - - if found: - - if x.find( "src/mongo/" ) >= 0: - for b in bad: - if len(b.findall( line )) > 0: - print( x ) - print( line ) - raise Exception( "you can't use a bare assert" ) - - for p in ps: - - def repl( m ): - m = m.groups() - - start = m[0] - spaces = m[3] - code = m[4] - if code == '0' and replaceZero : - code = getNextCode( lastCodes ) - lastCodes.append( code ) - code = str( code ) - needReplace[0] = True - - print( "Adding code " + code + " to line " + x + ":" + str( lineNum ) ) - - else : - codes.append( ( x , lineNum , line , code ) ) - callback( x , lineNum , line , code ) - - return start + "(" + spaces + code - - line = re.sub( p, repl, line ) - # end if ps loop - - if replaceZero : lines.append( line ) - lineNum = lineNum + 1 - - if replaceZero and needReplace[0] : - print( "Replacing file " + x ) - of = open( x + ".tmp", 'w' ) - of.write( "".join( lines ) ) - of.close() - os.remove(x) - os.rename( x + ".tmp", x ) - - -def getNextCode( lastCodes = [0] ): - highest = [max(lastCodes)] - def check( fileName , lineNum , line , code ): - code = int( code ) - if code > highest[0]: - highest[0] = code - readErrorCodes( check ) - return highest[0] + 1 + patterns = [ + re.compile( r"[umsgf]asser(?:t|ted) *\( *(\d+)" ) , + re.compile( r"(?:User|Msg|MsgAssertion)Exception *\( *(\d+)" ), + re.compile( r"fassertFailed(?:NoTrace)? *\( *(\d+)" ) + ] + + bad = [ re.compile( r"^\s*assert *\(" ) ] + + for sourceFile in utils.getAllSourceFiles(): + if not sourceFile.find( "src/mongo/" ) >= 0: + # Skip generated sources + continue + + with open(sourceFile) as f: + line_iterator = enumerate(f, 1) + for (lineNum, line) in line_iterator: + + # See if we can skip regexes + if not any([zz in line for zz in quick]): + continue + + for b in bad: + if b.search(line): + print( "%s\n%d" % (sourceFile, line) ) + msg = "Bare assert prohibited. Replace with [umwdf]assert" + raise Exception(msg) + + # no more than one pattern should ever match + matches = [x for x in [p.search(line) for p in patterns] + if x] + assert len(matches) <= 1, matches + if matches: + match = matches[0] + code = match.group(1) + span = match.span() + + # Advance to statement terminator iff not on this line + lines = [line.strip()] + + if not isTerminated(lines): + for (_lineNum, line) in line_iterator: + lines.append(line.strip()) + if isTerminated(lines): + break + + thisLoc = AssertLocation(sourceFile, lineNum, lines, code) + callback( thisLoc ) + + # end for sourceFile loop + + +def isTerminated( lines ): + """Given .cpp/.h source lines as text, determine if assert is terminated.""" + x = " ".join(lines) + return ';' in x \ + or x.count('(') - x.count(')') <= 0 + + +def getNextCode(): + """Finds next unused assertion code. + + Called by: SConstruct and main() + Since SConstruct calls us, codes[] must be global OR WE REPARSE EVERYTHING + """ + if not len(codes) > 0: + readErrorCodes() + + highest = reduce( lambda x, y: max(int(x), int(y)), + (loc.code for loc in codes) ) + return highest + 1 + def checkErrorCodes(): + """SConstruct expects a boolean response from this function. + """ + (codes, errors) = readErrorCodes() + return len( errors ) == 0 + + +def readErrorCodes(): + """Defines callback, calls parseSourceFiles() with callback, + and saves matches to global codes list. + """ seen = {} errors = [] - def checkDups( fileName , lineNum , line , code ): - if code in seen: - print( "DUPLICATE IDS" ) - print( "%s:%d:%s %s" % ( fileName , lineNum , line.strip() , code ) ) - print( "%s:%d:%s %s" % seen[code] ) - errors.append( seen[code] ) - seen[code] = ( fileName , lineNum , line , code ) - readErrorCodes( checkDups, True ) - return len( errors ) == 0 - -def getBestMessage( err , start ): - err = err.partition( start )[2] - if not err: - return "" - err = err.partition( "\"" )[2] + dups = defaultdict(list) + + # define callback + def checkDups( assertLoc ): + codes.append( assertLoc ) + code = assertLoc.code + + if not code in seen: + seen[code] = assertLoc + else: + if not code in dups: + # on first duplicate, add original to dups, errors + dups[code].append( seen[code] ) + errors.append( seen[code] ) + + dups[code].append( assertLoc ) + errors.append( assertLoc ) + + parseSourceFiles( checkDups ) + + if seen.has_key("0"): + code = "0" + bad = seen[code] + errors.append( bad ) + print( "ZERO_CODE:" ) + print( " %s:%d:%s" % (bad.sourceFile, bad.lineNum, bad.lines[0]) ) + + for code, locations in dups.items(): + print( "DUPLICATE IDS: %s" % code ) + for loc in locations: + print( " %s:%d:%s" % (loc.sourceFile, loc.lineNum, loc.lines[0]) ) + + return (codes, errors) + + +def replaceBadCodes( errors, nextCode ): + """Modifies C++ source files to replace invalid assertion codes. + For now, we only modify zero codes. + + Args: + errors: list of AssertLocation + nextCode: int, next non-conflicting assertion code + """ + zero_errors = [e for e in errors if int(e.code) == 0] + skip_errors = [e for e in errors if int(e.code) != 0] + + for loc in skip_errors: + print "SKIPPING NONZERO code=%s: %s:%s" % (loc.code, loc.sourceFile, loc.lineNum) + + for assertLoc in zero_errors: + (sourceFile, lineNum, lines, code) = assertLoc + print "UPDATING_FILE: %s:%s" % (sourceFile, lineNum) + + ln = lineNum - 1 + + with open(sourceFile, 'r+') as f: + fileLines = f.readlines() + line = fileLines[ln] + print "LINE_%d_BEFORE:%s" % (lineNum, line.rstrip()) + line = re.sub(r'(\( *)(\d+)', + r'\g<1>' + str(nextCode), + line) + print "LINE_%d_AFTER :%s" % (lineNum, line.rstrip()) + fileLines[ln] = line + f.seek(0) + f.writelines(fileLines) + nextCode += 1 + + +def getBestMessage( lines , codeStr ): + """Extracts message from one AssertionLocation.lines entry + + Args: + lines: list of contiguous C++ source lines + codeStr: assertion code found in first line + """ + line = lines if isinstance(lines, str) else " ".join(lines) + + err = line.partition( codeStr )[2] if not err: return "" - err = err.rpartition( "\"" )[0] - if not err: + + # Trim to outer quotes + m = re.search(r'"(.*)"', err) + if not m: return "" - return err - -def genErrorOutput(): - - if os.path.exists( "docs/errors.md" ): - i = open( "docs/errors.md" , "r" ) - - - out = open( "docs/errors.md" , 'wb' ) - out.write( "MongoDB Error Codes\n==========\n\n\n" ) + err = m.group(1) + + # Trim inner quote pairs + err = re.sub(r'" +"', '', err) + err = re.sub(r'" *<< *"', '', err) + err = re.sub(r'" *<<[^<]+<< *"', '<X>', err) + err = re.sub(r'" *\+[^+]+\+ *"', '<X>', err) + + # Trim escaped quotes + err = re.sub(r'\\"', '', err) + + # Iff doublequote still present, trim that and any trailing text + err = re.sub(r'".*$', '', err) + + return err.strip() + + +def writeMarkdownReport( codes, outfile ): + """Write errors.md report to filesystem in Markdown format + + Args: + codes: list of AssertLocation + outfile: string path to a file to overwrite + """ + + baseurl = "http://github.com/mongodb/mongo/blob/master" + + if os.path.exists(outfile): + i = open(outfile, "r" ) + i.close() + + out = open(outfile, 'wb') + out.write("MongoDB Error Codes\n") + out.write("===================\n\n") + out.write("This file is generated by errorcodes.py. Do not edit.\n\n") prev = "" seen = {} - - codes.sort( key=lambda x: x[0]+"-"+x[3] ) - for f,l,line,num in codes: - if num in seen: + + # Sort by sourceFile, then code + codes.sort( key=lambda loc: loc.sourceFile+"-"+loc.code ) + + for assertLoc in codes: + if assertLoc.code in seen: continue - seen[num] = True + seen[assertLoc.code] = True + + (sourceFile, lineNum, lines, code) = assertLoc + + if sourceFile.startswith("./"): + sourceFile = sourceFile[2:] - if f.startswith( "./" ): - f = f[2:] + if sourceFile != prev: + out.write("\n\n%s\n----\n" % sourceFile) + prev = sourceFile - if f != prev: - out.write( "\n\n" ) - out.write( f + "\n----\n" ) - prev = f + url = "%s/%s#L%s" % (baseurl, sourceFile, lineNum) + message = getBestMessage(lines, str(code)) + out.write("* %s [code](%s) %s\n" % (code, url, message)) - url = "http://github.com/mongodb/mongo/blob/master/" + f + "#L" + str(l) - - out.write( "* " + str(num) + " [code](" + url + ") " + getBestMessage( line , str(num) ) + "\n" ) - out.write( "\n" ) out.close() -if __name__ == "__main__": - ok = checkErrorCodes() - print( "ok:" + str( ok ) ) - print( "next: " + str( getNextCode() ) ) + +def main(): + parser = OptionParser(description=__doc__.strip()) + parser.add_option("--fix", dest="replace", + action="store_true", default=False, + help="Fix zero codes in source files [default: %default]") + parser.add_option("-o", dest="outfile", + default="docs/errors.md", + help="Report file [default: %default]") + (options, args) = parser.parse_args() + + (codes, errors) = readErrorCodes() + ok = len(errors) == 0 + next = getNextCode() + + print("ok: %s" % ok) + print("next: %s" % next) + if ok: - genErrorOutput() + writeMarkdownReport(codes, options.outfile) + elif options.replace: + replaceBadCodes(errors, next) + else: + print ERROR_HELP + +ERROR_HELP = """ +ERRORS DETECTED. To correct, run "buildscripts/errorcodes.py --fix" to replace zero codes. +Other errors require manual correction. +""" + +if __name__ == "__main__": + main() |