summaryrefslogtreecommitdiff
path: root/buildscripts
diff options
context:
space:
mode:
authordalyd <david.daly@mongodb.com>2015-08-14 16:44:15 -0400
committerdalyd <david.daly@mongodb.com>2015-09-01 11:36:52 -0400
commita28f451c46007019e2418825813ac63042dccfa8 (patch)
tree7a58cb0a5b11464e7707377de867eceb882b4724 /buildscripts
parent86c25cde6bd26162d93afc2c9a1a28410f58e90f (diff)
downloadmongo-a28f451c46007019e2418825813ac63042dccfa8.tar.gz
SERVER-19902: Mongo-perf analysis script -- Use noise data for regression comparison instead of fixed percentage
Signed-off-by: Ramon Fernandez <ramon.fernandez@mongodb.com> (cherry picked from commit cb91350bf017337a734dcd0321bf4e6c34990b6a)
Diffstat (limited to 'buildscripts')
-rw-r--r--buildscripts/perf_regression_check.py184
1 files changed, 149 insertions, 35 deletions
diff --git a/buildscripts/perf_regression_check.py b/buildscripts/perf_regression_check.py
index cfa10eff9d6..7041d7dce79 100644
--- a/buildscripts/perf_regression_check.py
+++ b/buildscripts/perf_regression_check.py
@@ -10,44 +10,96 @@ from datetime import timedelta, datetime
# Loads the history json file, and looks for regressions at the revision 18808cd...
# Will exit with status code 1 if any regression is found, 0 otherwise.
-def compareResults(this_one, reference, threshold, label, threadThreshold=None) :
+def compareOneResultNoise(this_one, reference, label, threadlevel="max", noiseLevel=0,
+ noiseMultiple=1, minThreshold=0.05):
'''
- Take two result series and compare them to see if they are acceptable.
+ Take two result series and compare them to see if they are acceptable.
Return true if failed, and false if pass
+ Uses historical noise data for the comparison.
+
'''
-
failed = False;
- if not reference :
+ if not reference:
+ return failed
+
+ ref = ""
+ current = ""
+ noise = 0
+
+ if threadlevel == "max":
+ ref = reference["max"]
+ current = this_one["max"]
+ else:
+ # Don't do a comparison if the thread data is missing
+ if not threadlevel in reference["results"].keys():
+ return failed
+ ref = reference["results"][threadlevel]['ops_per_sec']
+ current = this_one["results"][threadlevel]['ops_per_sec']
+
+ noise = noiseLevel * noiseMultiple
+ delta = minThreshold * ref
+ if (delta < noise):
+ delta = noise
+ # Do the check
+ if ref - current >= delta:
+ print ("\tregression found on %s: drop from %s (commit %s) to %s for comparison %s. Diff is"
+ " %.2f (%.2f%%), noise level is %.2f and multiple is %.2f" %
+ (threadlevel, ref, reference["revision"][:5], current, label, ref - current,
+ 100*(ref-current)/ref, noiseLevel, noiseMultiple))
+ failed = True
+ return failed
+
+
+def compareResults(this_one, reference, threshold, label, noiseLevels={}, threadThreshold=None):
+ '''
+ Take two result series and compare them to see if they are acceptable.
+ Return true if failed, and false if pass
+ '''
+
+ failed = False;
+ if not reference:
return failed
# Default threadThreshold to the same as the max threshold
- if not threadThreshold :
+ if not threadThreshold:
threadThreshold = threshold
-
+
# Check max throughput first
- if reference["max"] - this_one["max"] >= (threshold * reference["max"]):
- print "\tregression found on max: drop from %s (commit %s) to %s for comparison %s" % (reference["max"], reference["revision"][:5], this_one["max"], label)
- failed = True
+ noise = 0
+ # For the max throughput, use the max noise across the thread levels as the noise parameter
+ if len(noiseLevels.values()) > 0:
+ noise = max(noiseLevels.values())
+ if compareOneResultNoise(this_one, reference, label, "max", noiseLevel=noise,
+ minThreshold=threshold):
+ failed = True;
# Check for regression on threading levels
- for (level, ops_per_sec) in ([(r, this_one["results"][r]['ops_per_sec']) for r in this_one["results"] if type(this_one["results"][r]) == type({})]) :
- # Need to get the reference data to compare against
- refvalue = reference["results"][level]['ops_per_sec']
- if refvalue - ops_per_sec >= (threadThreshold * refvalue):
- print "\tregression found on thread level %s: drop from %s (commit %s) to %s for comparison %s" % (level, refvalue, reference["revision"][:7], ops_per_sec, label)
+ for (level, ops_per_sec) in (((r, this_one["results"][r]['ops_per_sec']) for r in
+ this_one["results"] if type(this_one["results"][r]) == type({}))):
+ noise = 0
+ if level in noiseLevels:
+ noise = noiseLevels[level]
+ if compareOneResultNoise(this_one, reference, label, level, noiseLevel=noise,
+ minThreshold=threadThreshold):
failed = True
- if not failed :
- print "\tno regresion against %s" %(label)
+ if not failed:
+ print "\tno regression against %s and githash %s" %(label, reference["revision"][:5])
return failed
-
+
def main(args):
parser = argparse.ArgumentParser()
- parser.add_argument("-f", "--file", dest="file", help="path to json file containing history data")
+ parser.add_argument("-f", "--file", dest="file", help="path to json file containing"
+ "history data")
parser.add_argument("--rev", dest="rev", help="revision to examine for regressions")
- parser.add_argument("--ndays", default=7, type=int, dest="ndays", help="Check against commit form n days ago.")
- parser.add_argument("--threshold", default=0.1, type=float, dest="threshold", help="Flag an error if throughput is more than 'threshold'x100 percent off")
- parser.add_argument("--threadThreshold", type=float, dest="threadThreshold", help="Flag an error if thread level throughput is more than 'threadThreshold'x100 percent off")
- parser.add_argument("--reference", dest="reference", help="Reference commit to compare against. Should be a githash")
+ parser.add_argument("--ndays", default=7, type=int, dest="ndays", help="Check against"
+ "commit from n days ago.")
+ parser.add_argument("--threshold", default=0.05, type=float, dest="threshold", help=
+ "Don't flag an error if throughput is less than 'threshold'x100 percent off")
+ parser.add_argument("--threadThreshold", type=float, dest="threadThreshold", help=
+ "Don't flag an error if thread level throughput is more than"
+ "'threadThreshold'x100 percent off")
+ parser.add_argument("--reference", dest="reference", help=
+ "Reference commit to compare against. Should be a githash")
args = parser.parse_args()
j = get_json(args.file)
h = History(j)
@@ -61,18 +113,23 @@ def main(args):
print "\tno data at this revision, skipping"
continue
- #If the new build is 10% lower than the target (3.0 will be used as the baseline for 3.2 for instance), consider it regressed.
+ #If the new build is 10% lower than the target (3.0 will be
+ #used as the baseline for 3.2 for instance), consider it
+ #regressed.
previous = h.seriesItemsNBefore(test, args.rev, 1)
if not previous:
print "\tno previous data, skipping"
continue
- if compareResults(this_one, previous[0], args.threshold, "Previous", args.threadThreshold) :
+ if compareResults(this_one, previous[0], args.threshold, "Previous", h.noiseLevels(test),
+ args.threadThreshold):
failed = True
daysprevious = h.seriesItemsNDaysBefore(test, args.rev,args.ndays)
reference = h.seriesAtRevision(test, args.reference)
- if compareResults(this_one, daysprevious, args.threshold, "NDays", args.threadThreshold) :
+ if compareResults(this_one, daysprevious, args.threshold, "NDays", h.noiseLevels(test),
+ threadThreshold=args.threadThreshold):
failed = True
- if compareResults(this_one, reference, args.threshold, "Reference", args.threadThreshold) :
+ if compareResults(this_one, reference, args.threshold, "Reference", h.noiseLevels(test),
+ threadThreshold=args.threadThreshold):
failed = True
if failed:
@@ -80,6 +137,19 @@ def main(args):
else:
sys.exit(0)
+# We wouldn't need this function if we had numpy installed on the system
+def computeRange(result_list):
+ '''
+ Compute the max, min, and range (max - min) for the result list
+ '''
+ min = max = result_list[0]
+ for result in result_list:
+ if result < min:
+ min = result
+ if result > max:
+ max = result
+ return (max,min,max-min)
+
def get_json(filename):
jf = open(filename, 'r')
json_obj = json.load(jf)
@@ -88,9 +158,11 @@ def get_json(filename):
class History(object):
def __init__(self, jsonobj):
self._raw = sorted(jsonobj, key=lambda d: d["order"])
+ self._noise = None
def testnames(self):
- return set(list(itertools.chain.from_iterable([[z["name"] for z in c["data"]["results"]] for c in self._raw])))
+ return set(list(itertools.chain.from_iterable([[z["name"] for z in c["data"]["results"]]
+ for c in self._raw])))
def seriesAtRevision(self, testname, revision):
s = self.series(testname)
@@ -101,7 +173,7 @@ class History(object):
def seriesItemsNBefore(self, testname, revision, n):
"""
- Returns the 'n' items in the series under the given test name that
+ Returns the 'n' items in the series under the given test name that
appear prior to the specified revision.
"""
results = []
@@ -117,27 +189,66 @@ class History(object):
return results[-1*n:]
return []
+ def computeNoiseLevels(self):
+ """
+ For each test, go through all results, and compute the average
+ noise (max - min) for the series
+
+ """
+ self._noise = {}
+ testnames = self.testnames()
+ for test in testnames:
+ self._noise[test] = {}
+ s = self.series(test)
+ threads = []
+ for result in s:
+ threads = result["threads"]
+ break
+
+ # Determine levels from last commit? Probably a better way to do this.
+ for thread in threads:
+ s = self.series(test)
+ self._noise[test][thread] = sum((computeRange(x["results"][thread]["ops_per_sec_values"])[2]
+ for x in s))
+ s = self.series(test)
+ self._noise[test][thread] /= sum(1 for x in s)
+
+
+ def noiseLevels(self, testname):
+ """
+ Returns the average noise level of the given test. Noise levels
+ are thread specific. Returns an array
+
+ """
+ # check if noise has been computed. Compute if it hasn't
+ if not self._noise:
+ print "Computing noise levels"
+ self.computeNoiseLevels()
+ # Look up noise value for test
+ if not testname in self._noise:
+ print "Test %s not in self._noise" % (testname)
+ return self._noise[testname]
+
- # I tried to do this in the form of this file. I feel like it's unneccessarily complicated right now.
def seriesItemsNDaysBefore(self, testname, revision, n):
"""
- Returns the items in the series under the given test name that
+ Returns the items in the series under the given test name that
appear 'n' days prior to the specified revision.
"""
results = {}
# Date for this revision
s = self.seriesAtRevision(testname, revision)
- if s==[] :
+ if s==[]:
return []
refdate = parser.parse(s["end"]) - timedelta(days=n)
-
+
s = self.series(testname)
for result in s:
if parser.parse(result["end"]) < refdate:
results = result
return results
-
+
def series(self, testname):
for commit in self._raw:
@@ -148,7 +259,10 @@ class History(object):
result["revision"] = commit["revision"]
result["end"] = commit["data"]["end"]
result["order"] = commit["order"]
- result["max"] = max(f["ops_per_sec"] for f in result["results"].values() if type(f) == type({}))
+ result["max"] = max(f["ops_per_sec"] for f in result["results"].values()
+ if type(f) == type({}))
+ result["threads"] = [f for f in result["results"] if type(result["results"][f])
+ == type({})]
yield result
@@ -158,5 +272,5 @@ class TestResult:
#def max(self):
-if __name__ == '__main__':
+if __name__ == '__main__':
main(sys.argv[1:])