1 files changed, 34 insertions, 44 deletions
diff --git a/testsuite/driver/testlib.py b/testsuite/driver/testlib.py
index 4a87f0a117..b637b1992d 100644
--- a/testsuite/driver/testlib.py
+++ b/testsuite/driver/testlib.py
@@ -65,7 +65,7 @@ def isCompilerStatsTest():
 
 def isStatsTest():
     opts = getTestOpts()
-    return bool(opts.stats_range_fields)
+    return opts.is_stats_test
 
 
 # This can be called at the top of a file of tests, to set default test options
@@ -348,29 +348,18 @@ def testing_metrics():
 # measures the performance numbers of the compiler.
 # As this is a fairly rare case in the testsuite, it defaults to false to
 # indicate that it is a 'normal' performance test.
-def _collect_stats(name, opts, metric, deviation, is_compiler_stats_test=False):
+def _collect_stats(name, opts, metrics, deviation, is_compiler_stats_test=False):
     if not re.match('^[0-9]*[a-zA-Z][a-zA-Z0-9._-]*$', name):
         failBecause('This test has an invalid name.')
 
-    tests = Perf.get_perf_stats('HEAD^')
-
-    # Might have multiple metrics being measured for a single test.
-    test = [t for t in tests if t.test == name]
-
-    if tests == [] or test == []:
-        # There are no prior metrics for this test.
-        if isinstance(metric, str):
-            if metric == 'all':
-                for field in testing_metrics():
-                    opts.stats_range_fields[field] = None
-            else:
-                opts.stats_range_fields[metric] = None
-        if isinstance(metric, list):
-            for field in metric:
-                opts.stats_range_fields[field] = None
-
-        return
+    # Normalize metrics to a list of strings.
+    if isinstance(metrics, str):
+        if metrics == 'all':
+            metrics = testing_metrics()
+        else:
+            metrics = [metrics]
 
+    opts.is_stats_test = True
     if is_compiler_stats_test:
         opts.is_compiler_stats_test = True
 
@@ -379,24 +368,11 @@ def _collect_stats(name, opts, metric, deviation, is_compiler_stats_test=False):
     if config.compiler_debugged and is_compiler_stats_test:
         opts.skip = 1
 
-    # get the average value of the given metric from test
-    def get_avg_val(metric_2):
-        metric_2_metrics = [float(t.value) for t in test if t.metric == metric_2]
-        return sum(metric_2_metrics) / len(metric_2_metrics)
-
-    # 'all' is a shorthand to test for bytes allocated, peak megabytes allocated, and max bytes used.
-    if isinstance(metric, str):
-        if metric == 'all':
-            for field in testing_metrics():
-                opts.stats_range_fields[field] = (get_avg_val(field), deviation)
-                return
-        else:
-            opts.stats_range_fields[metric] = (get_avg_val(metric), deviation)
-            return
+    for metric in metrics:
+        baselineByWay = lambda way, target_commit: Perf.baseline_metric( \
+                              target_commit, name, config.test_env, metric, way)
 
-    if isinstance(metric, list):
-        for field in metric:
-            opts.stats_range_fields[field] = (get_avg_val(field), deviation)
+        opts.stats_range_fields[metric] = (baselineByWay, deviation)
 
 # -----
 
@@ -896,7 +872,7 @@ def do_test(name, way, func, args, files):
                 framework_fail(name, way,
                     'extra_file does not exist: ' + extra_file)
 
-    if func.__name__ == 'run_command' or opts.pre_cmd:
+    if func.__name__ == 'run_command' or func.__name__ == 'makefile_test' or opts.pre_cmd:
         # When running 'MAKE' make sure 'TOP' still points to the
         # root of the testsuite.
         src_makefile = in_srcdir('Makefile')
@@ -1002,6 +978,13 @@ def badResult(result):
 def run_command( name, way, cmd ):
     return simple_run( name, '', override_options(cmd), '' )
 
+def makefile_test( name, way, target=None ):
+    if target is None:
+        target = name
+
+    cmd = '$MAKE -s --no-print-directory {target}'.format(target=target)
+    return run_command(name, way, cmd)
+
 # -----------------------------------------------------------------------------
 # GHCi tests
 
@@ -1157,10 +1140,11 @@ def metric_dict(name, way, metric, value):
 # name: name of the test.
 # way: the way.
 # stats_file: the path of the stats_file containing the stats for the test.
-# range_fields
+# range_fields: see TestOptions.stats_range_fields
 # Returns a pass/fail object. Passes if the stats are withing the expected value ranges.
 # This prints the results for the user.
 def check_stats(name, way, stats_file, range_fields):
+    head_commit = Perf.commit_hash('HEAD')
     result = passed()
     if range_fields:
         try:
@@ -1170,7 +1154,7 @@ def check_stats(name, way, stats_file, range_fields):
         stats_file_contents = f.read()
         f.close()
 
-        for (metric, range_val_dev) in range_fields.items():
+        for (metric, baseline_and_dev) in range_fields.items():
             field_match = re.search('\("' + metric + '", "([0-9]+)"\)', stats_file_contents)
             if field_match == None:
                 print('Failed to find metric: ', metric)
@@ -1183,14 +1167,15 @@ def check_stats(name, way, stats_file, range_fields):
                 change = None
 
                 # If this is the first time running the benchmark, then pass.
-                if range_val_dev == None:
+                baseline = baseline_and_dev[0](way, head_commit)
+                if baseline == None:
                     metric_result = passed()
                     change = MetricChange.NewMetric
                 else:
-                    (expected_val, tolerance_dev) = range_val_dev
+                    tolerance_dev = baseline_and_dev[1]
                     (change, metric_result) = Perf.check_stats_change(
                         perf_stat,
-                        expected_val,
+                        baseline,
                         tolerance_dev,
                         config.allowed_perf_changes,
                         config.verbose >= 4)
@@ -1323,8 +1308,13 @@ def simple_run(name, way, prog, extra_run_opts):
 
     my_rts_flags = rts_flags(way)
 
+    # Collect stats if necessary:
+    # isStatsTest and not isCompilerStatsTest():
+    #   assume we are running a ghc compiled program. Collect stats.
+    # isStatsTest and way == 'ghci':
+    #   assume we are running a program via ghci. Collect stats
     stats_file = name + '.stats'
-    if isStatsTest() and not isCompilerStatsTest():
+    if isStatsTest() and (not isCompilerStatsTest() or way == 'ghci'):
         stats_args = ' +RTS -V0 -t' + stats_file + ' --machine-readable -RTS'
     else:
         stats_args = ''