diff options
author | dalyd <david.daly@mongodb.com> | 2015-08-14 16:44:15 -0400 |
---|---|---|
committer | dalyd <david.daly@mongodb.com> | 2015-09-01 11:36:52 -0400 |
commit | a28f451c46007019e2418825813ac63042dccfa8 (patch) | |
tree | 7a58cb0a5b11464e7707377de867eceb882b4724 /buildscripts | |
parent | 86c25cde6bd26162d93afc2c9a1a28410f58e90f (diff) | |
download | mongo-a28f451c46007019e2418825813ac63042dccfa8.tar.gz |
SERVER-19902: Mongo-perf analysis script -- Use noise data for regression comparison instead of fixed percentage
Signed-off-by: Ramon Fernandez <ramon.fernandez@mongodb.com>
(cherry picked from commit cb91350bf017337a734dcd0321bf4e6c34990b6a)
Diffstat (limited to 'buildscripts')
-rw-r--r-- | buildscripts/perf_regression_check.py | 184 |
1 files changed, 149 insertions, 35 deletions
diff --git a/buildscripts/perf_regression_check.py b/buildscripts/perf_regression_check.py index cfa10eff9d6..7041d7dce79 100644 --- a/buildscripts/perf_regression_check.py +++ b/buildscripts/perf_regression_check.py @@ -10,44 +10,96 @@ from datetime import timedelta, datetime # Loads the history json file, and looks for regressions at the revision 18808cd... # Will exit with status code 1 if any regression is found, 0 otherwise. -def compareResults(this_one, reference, threshold, label, threadThreshold=None) : +def compareOneResultNoise(this_one, reference, label, threadlevel="max", noiseLevel=0, + noiseMultiple=1, minThreshold=0.05): ''' - Take two result series and compare them to see if they are acceptable. + Take two result series and compare them to see if they are acceptable. Return true if failed, and false if pass + Uses historical noise data for the comparison. + ''' - failed = False; - if not reference : + if not reference: + return failed + + ref = "" + current = "" + noise = 0 + + if threadlevel == "max": + ref = reference["max"] + current = this_one["max"] + else: + # Don't do a comparison if the thread data is missing + if not threadlevel in reference["results"].keys(): + return failed + ref = reference["results"][threadlevel]['ops_per_sec'] + current = this_one["results"][threadlevel]['ops_per_sec'] + + noise = noiseLevel * noiseMultiple + delta = minThreshold * ref + if (delta < noise): + delta = noise + # Do the check + if ref - current >= delta: + print ("\tregression found on %s: drop from %s (commit %s) to %s for comparison %s. Diff is" + " %.2f (%.2f%%), noise level is %.2f and multiple is %.2f" % + (threadlevel, ref, reference["revision"][:5], current, label, ref - current, + 100*(ref-current)/ref, noiseLevel, noiseMultiple)) + failed = True + return failed + + +def compareResults(this_one, reference, threshold, label, noiseLevels={}, threadThreshold=None): + ''' + Take two result series and compare them to see if they are acceptable. + Return true if failed, and false if pass + ''' + + failed = False; + if not reference: return failed # Default threadThreshold to the same as the max threshold - if not threadThreshold : + if not threadThreshold: threadThreshold = threshold - + # Check max throughput first - if reference["max"] - this_one["max"] >= (threshold * reference["max"]): - print "\tregression found on max: drop from %s (commit %s) to %s for comparison %s" % (reference["max"], reference["revision"][:5], this_one["max"], label) - failed = True + noise = 0 + # For the max throughput, use the max noise across the thread levels as the noise parameter + if len(noiseLevels.values()) > 0: + noise = max(noiseLevels.values()) + if compareOneResultNoise(this_one, reference, label, "max", noiseLevel=noise, + minThreshold=threshold): + failed = True; # Check for regression on threading levels - for (level, ops_per_sec) in ([(r, this_one["results"][r]['ops_per_sec']) for r in this_one["results"] if type(this_one["results"][r]) == type({})]) : - # Need to get the reference data to compare against - refvalue = reference["results"][level]['ops_per_sec'] - if refvalue - ops_per_sec >= (threadThreshold * refvalue): - print "\tregression found on thread level %s: drop from %s (commit %s) to %s for comparison %s" % (level, refvalue, reference["revision"][:7], ops_per_sec, label) + for (level, ops_per_sec) in (((r, this_one["results"][r]['ops_per_sec']) for r in + this_one["results"] if type(this_one["results"][r]) == type({}))): + noise = 0 + if level in noiseLevels: + noise = noiseLevels[level] + if compareOneResultNoise(this_one, reference, label, level, noiseLevel=noise, + minThreshold=threadThreshold): failed = True - if not failed : - print "\tno regresion against %s" %(label) + if not failed: + print "\tno regression against %s and githash %s" %(label, reference["revision"][:5]) return failed - + def main(args): parser = argparse.ArgumentParser() - parser.add_argument("-f", "--file", dest="file", help="path to json file containing history data") + parser.add_argument("-f", "--file", dest="file", help="path to json file containing" + "history data") parser.add_argument("--rev", dest="rev", help="revision to examine for regressions") - parser.add_argument("--ndays", default=7, type=int, dest="ndays", help="Check against commit form n days ago.") - parser.add_argument("--threshold", default=0.1, type=float, dest="threshold", help="Flag an error if throughput is more than 'threshold'x100 percent off") - parser.add_argument("--threadThreshold", type=float, dest="threadThreshold", help="Flag an error if thread level throughput is more than 'threadThreshold'x100 percent off") - parser.add_argument("--reference", dest="reference", help="Reference commit to compare against. Should be a githash") + parser.add_argument("--ndays", default=7, type=int, dest="ndays", help="Check against" + "commit from n days ago.") + parser.add_argument("--threshold", default=0.05, type=float, dest="threshold", help= + "Don't flag an error if throughput is less than 'threshold'x100 percent off") + parser.add_argument("--threadThreshold", type=float, dest="threadThreshold", help= + "Don't flag an error if thread level throughput is more than" + "'threadThreshold'x100 percent off") + parser.add_argument("--reference", dest="reference", help= + "Reference commit to compare against. Should be a githash") args = parser.parse_args() j = get_json(args.file) h = History(j) @@ -61,18 +113,23 @@ def main(args): print "\tno data at this revision, skipping" continue - #If the new build is 10% lower than the target (3.0 will be used as the baseline for 3.2 for instance), consider it regressed. + #If the new build is 10% lower than the target (3.0 will be + #used as the baseline for 3.2 for instance), consider it + #regressed. previous = h.seriesItemsNBefore(test, args.rev, 1) if not previous: print "\tno previous data, skipping" continue - if compareResults(this_one, previous[0], args.threshold, "Previous", args.threadThreshold) : + if compareResults(this_one, previous[0], args.threshold, "Previous", h.noiseLevels(test), + args.threadThreshold): failed = True daysprevious = h.seriesItemsNDaysBefore(test, args.rev,args.ndays) reference = h.seriesAtRevision(test, args.reference) - if compareResults(this_one, daysprevious, args.threshold, "NDays", args.threadThreshold) : + if compareResults(this_one, daysprevious, args.threshold, "NDays", h.noiseLevels(test), + threadThreshold=args.threadThreshold): failed = True - if compareResults(this_one, reference, args.threshold, "Reference", args.threadThreshold) : + if compareResults(this_one, reference, args.threshold, "Reference", h.noiseLevels(test), + threadThreshold=args.threadThreshold): failed = True if failed: @@ -80,6 +137,19 @@ def main(args): else: sys.exit(0) +# We wouldn't need this function if we had numpy installed on the system +def computeRange(result_list): + ''' + Compute the max, min, and range (max - min) for the result list + ''' + min = max = result_list[0] + for result in result_list: + if result < min: + min = result + if result > max: + max = result + return (max,min,max-min) + def get_json(filename): jf = open(filename, 'r') json_obj = json.load(jf) @@ -88,9 +158,11 @@ def get_json(filename): class History(object): def __init__(self, jsonobj): self._raw = sorted(jsonobj, key=lambda d: d["order"]) + self._noise = None def testnames(self): - return set(list(itertools.chain.from_iterable([[z["name"] for z in c["data"]["results"]] for c in self._raw]))) + return set(list(itertools.chain.from_iterable([[z["name"] for z in c["data"]["results"]] + for c in self._raw]))) def seriesAtRevision(self, testname, revision): s = self.series(testname) @@ -101,7 +173,7 @@ class History(object): def seriesItemsNBefore(self, testname, revision, n): """ - Returns the 'n' items in the series under the given test name that + Returns the 'n' items in the series under the given test name that appear prior to the specified revision. """ results = [] @@ -117,27 +189,66 @@ class History(object): return results[-1*n:] return [] + def computeNoiseLevels(self): + """ + For each test, go through all results, and compute the average + noise (max - min) for the series + + """ + self._noise = {} + testnames = self.testnames() + for test in testnames: + self._noise[test] = {} + s = self.series(test) + threads = [] + for result in s: + threads = result["threads"] + break + + # Determine levels from last commit? Probably a better way to do this. + for thread in threads: + s = self.series(test) + self._noise[test][thread] = sum((computeRange(x["results"][thread]["ops_per_sec_values"])[2] + for x in s)) + s = self.series(test) + self._noise[test][thread] /= sum(1 for x in s) + + + def noiseLevels(self, testname): + """ + Returns the average noise level of the given test. Noise levels + are thread specific. Returns an array + + """ + # check if noise has been computed. Compute if it hasn't + if not self._noise: + print "Computing noise levels" + self.computeNoiseLevels() + # Look up noise value for test + if not testname in self._noise: + print "Test %s not in self._noise" % (testname) + return self._noise[testname] + - # I tried to do this in the form of this file. I feel like it's unneccessarily complicated right now. def seriesItemsNDaysBefore(self, testname, revision, n): """ - Returns the items in the series under the given test name that + Returns the items in the series under the given test name that appear 'n' days prior to the specified revision. """ results = {} # Date for this revision s = self.seriesAtRevision(testname, revision) - if s==[] : + if s==[]: return [] refdate = parser.parse(s["end"]) - timedelta(days=n) - + s = self.series(testname) for result in s: if parser.parse(result["end"]) < refdate: results = result return results - + def series(self, testname): for commit in self._raw: @@ -148,7 +259,10 @@ class History(object): result["revision"] = commit["revision"] result["end"] = commit["data"]["end"] result["order"] = commit["order"] - result["max"] = max(f["ops_per_sec"] for f in result["results"].values() if type(f) == type({})) + result["max"] = max(f["ops_per_sec"] for f in result["results"].values() + if type(f) == type({})) + result["threads"] = [f for f in result["results"] if type(result["results"][f]) + == type({})] yield result @@ -158,5 +272,5 @@ class TestResult: #def max(self): -if __name__ == '__main__': +if __name__ == '__main__': main(sys.argv[1:]) |