SERVER-19902: Mongo-perf analysis script -- Use noise data for regression comparison instead of fixed percentage

Signed-off-by: Ramon Fernandez <ramon.fernandez@mongodb.com> (cherry picked from commit cb91350bf017337a734dcd0321bf4e6c34990b6a)
author: dalyd <david.daly@mongodb.com> 2015-08-14 16:44:15 -0400
committer: dalyd <david.daly@mongodb.com> 2015-09-01 11:36:52 -0400
commit: a28f451c46007019e2418825813ac63042dccfa8 (patch)
tree: 7a58cb0a5b11464e7707377de867eceb882b4724 /buildscripts
parent: 86c25cde6bd26162d93afc2c9a1a28410f58e90f (diff)
download: mongo-a28f451c46007019e2418825813ac63042dccfa8.tar.gz
1 files changed, 149 insertions, 35 deletions
diff --git a/buildscripts/perf_regression_check.py b/buildscripts/perf_regression_check.py
index cfa10eff9d6..7041d7dce79 100644
--- a/buildscripts/perf_regression_check.py
+++ b/buildscripts/perf_regression_check.py
@@ -10,44 +10,96 @@ from datetime import timedelta, datetime
 # Loads the history json file, and looks for regressions at the revision 18808cd...
 # Will exit with status code 1 if any regression is found, 0 otherwise.
 
-def compareResults(this_one, reference, threshold, label, threadThreshold=None) : 
+def compareOneResultNoise(this_one, reference, label, threadlevel="max", noiseLevel=0,
+                          noiseMultiple=1, minThreshold=0.05):
     '''
-    Take two result series and compare them to see if they are acceptable. 
+    Take two result series and compare them to see if they are acceptable.
     Return true if failed, and false if pass
+    Uses historical noise data for the comparison.
+
     '''
-    
     failed = False;
-    if not reference : 
+    if not reference:
+        return failed
+
+    ref = ""
+    current = ""
+    noise = 0
+
+    if threadlevel == "max":
+        ref = reference["max"]
+        current = this_one["max"]
+    else:
+        # Don't do a comparison if the thread data is missing
+        if not threadlevel in reference["results"].keys():
+            return failed
+        ref = reference["results"][threadlevel]['ops_per_sec']
+        current = this_one["results"][threadlevel]['ops_per_sec']
+
+    noise = noiseLevel * noiseMultiple
+    delta = minThreshold * ref
+    if (delta < noise):
+        delta = noise
+    # Do the check
+    if ref - current >= delta:
+        print ("\tregression found on %s: drop from %s (commit %s) to %s for comparison %s. Diff is"
+               " %.2f (%.2f%%), noise level is %.2f and multiple is %.2f" %
+               (threadlevel, ref, reference["revision"][:5], current, label, ref - current,
+                100*(ref-current)/ref, noiseLevel, noiseMultiple))
+        failed = True
+    return failed
+
+
+def compareResults(this_one, reference, threshold, label, noiseLevels={}, threadThreshold=None):
+    '''
+    Take two result series and compare them to see if they are acceptable.
+    Return true if failed, and false if pass
+    '''
+
+    failed = False;
+    if not reference:
         return failed
     # Default threadThreshold to the same as the max threshold
-    if  not threadThreshold : 
+    if  not threadThreshold:
         threadThreshold = threshold
-    
+
     # Check max throughput first
-    if reference["max"] - this_one["max"] >= (threshold * reference["max"]):
-        print "\tregression found on max: drop from %s (commit %s) to %s for comparison %s" % (reference["max"], reference["revision"][:5], this_one["max"], label)
-        failed = True
+    noise = 0
+    # For the max throughput, use the max noise across the thread levels as the noise parameter
+    if len(noiseLevels.values()) > 0:
+        noise = max(noiseLevels.values())
+    if compareOneResultNoise(this_one, reference, label, "max", noiseLevel=noise,
+                             minThreshold=threshold):
+        failed = True;
     # Check for regression on threading levels
-    for (level, ops_per_sec) in ([(r, this_one["results"][r]['ops_per_sec']) for r in this_one["results"] if type(this_one["results"][r]) == type({})]) :
-        # Need to get the reference data to compare against
-        refvalue = reference["results"][level]['ops_per_sec']
-        if refvalue - ops_per_sec >= (threadThreshold * refvalue):
-            print "\tregression found on thread level %s: drop from %s (commit %s) to %s for comparison %s" % (level, refvalue, reference["revision"][:7], ops_per_sec, label)
+    for (level, ops_per_sec) in (((r, this_one["results"][r]['ops_per_sec']) for r in
+                                  this_one["results"] if type(this_one["results"][r]) == type({}))):
+        noise = 0
+        if level in noiseLevels:
+            noise = noiseLevels[level]
+        if compareOneResultNoise(this_one, reference, label, level, noiseLevel=noise,
+                                 minThreshold=threadThreshold):
             failed = True
-    if not failed : 
-        print "\tno regresion against %s" %(label)
+    if not failed:
+        print "\tno regression against %s and githash %s" %(label, reference["revision"][:5])
     return failed
-            
+
 
 
 def main(args):
     parser = argparse.ArgumentParser()
-    parser.add_argument("-f", "--file", dest="file", help="path to json file containing history data")
+    parser.add_argument("-f", "--file", dest="file", help="path to json file containing"
+                        "history data")
     parser.add_argument("--rev", dest="rev", help="revision to examine for regressions")
-    parser.add_argument("--ndays", default=7, type=int, dest="ndays", help="Check against commit form n days ago.")
-    parser.add_argument("--threshold", default=0.1, type=float, dest="threshold", help="Flag an error if throughput is more than 'threshold'x100 percent off")
-    parser.add_argument("--threadThreshold", type=float, dest="threadThreshold", help="Flag an error if thread level throughput is more than 'threadThreshold'x100 percent off")
-    parser.add_argument("--reference", dest="reference", help="Reference commit to compare against. Should be a githash")
+    parser.add_argument("--ndays", default=7, type=int, dest="ndays", help="Check against"
+                        "commit from n days ago.")
+    parser.add_argument("--threshold", default=0.05, type=float, dest="threshold", help=
+                        "Don't flag an error if throughput is less than 'threshold'x100 percent off")
+    parser.add_argument("--threadThreshold", type=float, dest="threadThreshold", help=
+                        "Don't flag an error if thread level throughput is more than"
+                        "'threadThreshold'x100 percent off")
+    parser.add_argument("--reference", dest="reference", help=
+                        "Reference commit to compare against. Should be a githash")
     args = parser.parse_args()
     j = get_json(args.file)
     h = History(j)
@@ -61,18 +113,23 @@ def main(args):
             print "\tno data at this revision, skipping"
             continue
 
-        #If the new build is 10% lower than the target (3.0 will be used as the baseline for 3.2 for instance), consider it regressed.
+        #If the new build is 10% lower than the target (3.0 will be
+        #used as the baseline for 3.2 for instance), consider it
+        #regressed.
         previous = h.seriesItemsNBefore(test, args.rev, 1)
         if not previous:
             print "\tno previous data, skipping"
             continue
-        if compareResults(this_one, previous[0], args.threshold, "Previous", args.threadThreshold) : 
+        if compareResults(this_one, previous[0], args.threshold, "Previous", h.noiseLevels(test),
+                          args.threadThreshold):
             failed = True
         daysprevious = h.seriesItemsNDaysBefore(test, args.rev,args.ndays)
         reference = h.seriesAtRevision(test, args.reference)
-        if compareResults(this_one, daysprevious, args.threshold, "NDays", args.threadThreshold) : 
+        if compareResults(this_one, daysprevious, args.threshold, "NDays", h.noiseLevels(test),
+                          threadThreshold=args.threadThreshold):
             failed = True
-        if compareResults(this_one, reference, args.threshold, "Reference", args.threadThreshold) : 
+        if compareResults(this_one, reference, args.threshold, "Reference", h.noiseLevels(test),
+                          threadThreshold=args.threadThreshold):
             failed = True
 
     if failed:
@@ -80,6 +137,19 @@ def main(args):
     else:
         sys.exit(0)
 
+# We wouldn't need this function if we had numpy installed on the system
+def computeRange(result_list):
+    '''
+       Compute the max, min, and range (max - min) for the result list
+    '''
+    min = max = result_list[0]
+    for result in result_list:
+        if result < min:
+            min = result
+        if result > max:
+            max = result
+    return (max,min,max-min)
+
 def get_json(filename):
     jf = open(filename, 'r')
     json_obj = json.load(jf)
@@ -88,9 +158,11 @@ def get_json(filename):
 class History(object):
     def __init__(self, jsonobj):
         self._raw = sorted(jsonobj, key=lambda d: d["order"])
+        self._noise = None
 
     def testnames(self):
-        return set(list(itertools.chain.from_iterable([[z["name"] for z in c["data"]["results"]] for c in self._raw])))
+        return set(list(itertools.chain.from_iterable([[z["name"] for z in c["data"]["results"]]
+                                                       for c in self._raw])))
 
     def seriesAtRevision(self, testname, revision):
         s = self.series(testname)
@@ -101,7 +173,7 @@ class History(object):
 
     def seriesItemsNBefore(self, testname, revision, n):
         """
-            Returns the 'n' items in the series under the given test name that 
+            Returns the 'n' items in the series under the given test name that
             appear prior to the specified revision.
         """
         results = []
@@ -117,27 +189,66 @@ class History(object):
             return results[-1*n:]
         return []
 
+    def computeNoiseLevels(self):
+        """
+        For each test, go through all results, and compute the average
+        noise (max - min) for the series
+
+        """
+        self._noise = {}
+        testnames = self.testnames()
+        for test in testnames:
+            self._noise[test] = {}
+            s = self.series(test)
+            threads = []
+            for result in s:
+                threads = result["threads"]
+                break
+
+            # Determine levels from last commit? Probably a better way to do this.
+            for thread in threads:
+                s = self.series(test)
+                self._noise[test][thread] = sum((computeRange(x["results"][thread]["ops_per_sec_values"])[2]
+                                                 for x in s))
+                s = self.series(test)
+                self._noise[test][thread] /= sum(1 for x in s)
+
+
+    def noiseLevels(self, testname):
+        """
+        Returns the average noise level of the given test. Noise levels
+        are thread specific. Returns an array
+
+        """
+        # check if noise has been computed. Compute if it hasn't
+        if not self._noise:
+            print "Computing noise levels"
+            self.computeNoiseLevels()
+        # Look up noise value for test
+        if not testname in self._noise:
+            print "Test %s not in self._noise" % (testname)
+        return self._noise[testname]
+
 
-    # I tried to do this in the form of this file. I feel like it's unneccessarily complicated right now. 
     def seriesItemsNDaysBefore(self, testname, revision, n):
         """
-            Returns the items in the series under the given test name that 
+            Returns the items in the series under the given test name that
             appear 'n' days prior to the specified revision.
         """
         results = {}
         # Date for this revision
         s = self.seriesAtRevision(testname, revision)
-        if s==[] : 
+        if s==[]:
             return []
         refdate = parser.parse(s["end"]) - timedelta(days=n)
-        
+
         s = self.series(testname)
         for result in s:
             if parser.parse(result["end"]) < refdate:
                 results = result
         return results
 
-        
+
 
     def series(self, testname):
         for commit in self._raw:
@@ -148,7 +259,10 @@ class History(object):
                 result["revision"] = commit["revision"]
                 result["end"] = commit["data"]["end"]
                 result["order"] = commit["order"]
-                result["max"] = max(f["ops_per_sec"] for f in result["results"].values() if type(f) == type({}))
+                result["max"] = max(f["ops_per_sec"] for f in result["results"].values()
+                                    if type(f) == type({}))
+                result["threads"] = [f for f in result["results"] if type(result["results"][f])
+                                     == type({})]
                 yield result
 
 
@@ -158,5 +272,5 @@ class TestResult:
 
     #def max(self):
 
-if __name__ == '__main__': 
+if __name__ == '__main__':
     main(sys.argv[1:])
author	dalyd <david.daly@mongodb.com>	2015-08-14 16:44:15 -0400
committer	dalyd <david.daly@mongodb.com>	2015-09-01 11:36:52 -0400
commit	a28f451c46007019e2418825813ac63042dccfa8 (patch)
tree	7a58cb0a5b11464e7707377de867eceb882b4724 /buildscripts
parent	86c25cde6bd26162d93afc2c9a1a28410f58e90f (diff)
download	mongo-a28f451c46007019e2418825813ac63042dccfa8.tar.gz