summaryrefslogtreecommitdiff
path: root/testsuite/driver
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2019-01-31 19:47:53 -0500
committerBen Gamari <ben@smart-cactus.org>2019-01-31 19:47:53 -0500
commitebe2d344899e2165182d3c41b0796ec022561790 (patch)
tree0771fe65262a7bbb7a465f1b9e9bb50b91649f29 /testsuite/driver
parent21462a3a4edede710582b8038695667d2b9d0159 (diff)
downloadhaskell-ebe2d344899e2165182d3c41b0796ec022561790.tar.gz
Revert "Performance tests: recover a baseline from ancestor commits and CI results."
Unfortunately this has broken all future commits due to spurious(?) performance changes which I have been unable to work around. This reverts commit cc2261d42f6a954d88e355aaad41f001f65c95da.
Diffstat (limited to 'testsuite/driver')
-rw-r--r--testsuite/driver/perf_notes.py202
-rw-r--r--testsuite/driver/runtests.py36
-rw-r--r--testsuite/driver/testglobals.py12
-rw-r--r--testsuite/driver/testlib.py69
-rw-r--r--testsuite/driver/testutil.py2
5 files changed, 62 insertions, 259 deletions
diff --git a/testsuite/driver/perf_notes.py b/testsuite/driver/perf_notes.py
index c5d09e04db..6d80e07e12 100644
--- a/testsuite/driver/perf_notes.py
+++ b/testsuite/driver/perf_notes.py
@@ -13,7 +13,6 @@ import argparse
import re
import subprocess
import time
-import sys
from collections import namedtuple
from math import ceil, trunc
@@ -42,7 +41,7 @@ def is_worktree_dirty():
# The metrics (a.k.a stats) are named tuples, PerfStat, in this form:
#
# ( test_env : 'val', # Test environment.
-# test : 'val', # Name of the test
+# test : 'val', # Name of the test
# way : 'val',
# metric : 'val', # Metric being recorded
# value : 'val', # The statistic result e.g. runtime
@@ -74,21 +73,6 @@ def get_perf_stats(commit='HEAD', namespace='perf'):
log = [parse_perf_stat(stat_str) for stat_str in log]
return log
-# Check if a str is in a 40 character git commit hash.
-# str -> bool
-_commit_hash_re = re.compile('[0-9a-f]' * 40)
-def is_commit_hash(hash):
- return _commit_hash_re.fullmatch(hash) != None
-
-# Convert a <ref> to a commit hash code.
-# str -> str
-def commit_hash(commit):
- if is_commit_hash(commit):
- return commit
- return subprocess.check_output(['git', 'rev-parse', commit], \
- stderr=subprocess.STDOUT) \
- .decode() \
- .strip()
# Get allowed changes to performance. This is extracted from the commit message of
# the given commit in this form:
@@ -99,20 +83,13 @@ def commit_hash(commit):
# 'metrics': ['metricA', 'metricB', ...],
# 'opts': {
# 'optionA': 'string value',
-# 'optionB': 'string value', # e.g. test_env: "x86_64-linux"
+# 'optionB': 'string value',
# ...
# }
# }
-_get_allowed_perf_changes_cache = {}
def get_allowed_perf_changes(commit='HEAD'):
- global _get_allowed_perf_changes_cache
- commit = commit_hash(commit)
- if not commit in _get_allowed_perf_changes_cache:
- commitByteStr = subprocess.check_output(\
- ['git', '--no-pager', 'log', '-n1', '--format=%B', commit])
- _get_allowed_perf_changes_cache[commit] \
- = parse_allowed_perf_changes(commitByteStr.decode())
- return _get_allowed_perf_changes_cache[commit]
+ commitByteStr = subprocess.check_output(['git', '--no-pager', 'log', '-n1', '--format=%B', commit])
+ return parse_allowed_perf_changes(commitByteStr.decode())
def parse_allowed_perf_changes(commitMsg):
# Helper regex. Non-capturing unless postfixed with Cap.
@@ -125,7 +102,7 @@ def parse_allowed_perf_changes(commitMsg):
exp = (r"^Metric"
+s+r"(Increase|Decrease)"
+s+r"?("+qstr+r"|"+qstrList+r")?" # Metric or list of metrics.s..
- +s+r"?(\(" + r"(?:[^')]|"+qstr+r")*" + r"\))?" # Options surrounded in parenthesis. (allow parenthases in quoted strings)
+ +s+r"?(\(" + r"(?:[^')]|"+qstr+r")*" + r"\))?" # Options surounded in parenthesis. (allow parenthases in quoted strings))
+s+r"?:?" # Optional ":"
+s+r"?((?:(?!\n\n)(?!\n[^\s])(?:.|\n))*)" # Test names. Stop parsing on empty or non-indented new line.
)
@@ -236,176 +213,11 @@ def append_perf_stat(stats, commit='HEAD', namespace='perf', max_tries=5):
tries += 1
time.sleep(1)
- print("\nAn error occurred while writing the performance metrics to git notes.\n \
- This is usually due to a lock-file existing somewhere in the git repo.")
+ print("\nAn error occured while writing the performance metrics to git notes.\n \
+ ​ This is usually due to a lock-file existing somewhere in the git repo.")
return False
-#
-# Baseline calculation
-#
-
-# Max number of ancestor commits to search when compiling a baseline performance metric.
-BaselineSearchDepth = 75
-
-# The git notes name space for local results.
-LocalNamespace = "perf"
-
-# The git notes name space for ci results.
-CiNamespace = "ci/" + LocalNamespace
-
-# (isCalculated, best fit ci test_env or None)
-BestFitCiTestEnv = (False, None)
-
-# test_env string or None
-def best_fit_ci_test_env():
- global BestFitCiTestEnv
- if not BestFitCiTestEnv[0]:
- platform = sys.platform
- isArch64 = sys.maxsize > 2**32
- arch = "x86_64" if isArch64 else "i386"
-
- if platform.startswith("linux"):
- test_env = arch + "-linux-deb9"
- elif platform.startswith("win32"):
- # There are no windows CI test results.
- test_env = None
- elif isArch64 and platform.startswith("darwin"):
- test_env = arch + "-darwin"
- elif isArch64 and platform.startswith("freebsd"):
- test_env = arch + "-freebsd"
- else:
- test_env = None
-
- BestFitCiTestEnv = (True, test_env)
-
- return BestFitCiTestEnv[1]
-
-_baseline_depth_commit_log = {}
-
-# Get the commit hashes for the last BaselineSearchDepth commits from and
-# including the input commit. The output commits are all commit hashes.
-# str -> [str]
-def baseline_commit_log(commit):
- global _baseline_depth_commit_log
- commit = commit_hash(commit)
- if not commit in _baseline_depth_commit_log:
- _baseline_depth_commit_log[commit] = \
- subprocess.check_output(['git', 'log', '--format=%H', \
- '-n' + str(BaselineSearchDepth)]) \
- .decode().split('\n')
- return _baseline_depth_commit_log[commit]
-
-# Cache of baseline values. This is a dict of dicts indexed on:
-# (useCiNamespace, commit) -> (test_env, test, metric, way) -> baseline
-# (bool , str ) -> (str , str , str , str) -> float
-_commit_metric_cache = {}
-
-# Get the baseline (expected value) of a test at a given commit. This searches
-# git notes from older commits for recorded metrics (locally and from ci). More
-# recent commits are favoured, then local results over ci results are favoured.
-#
-# commit: str - must be a commit hash (see commit_has())
-# name: str - test name
-# test_env: str - test environment (note a best fit test_env will be used
-# instead when looking for ci results)
-# metric: str - test metric
-# way: str - test way
-# returns: the baseline float or None if no metric was found within
-# BaselineSearchDepth commits and since the last expected change.
-def baseline_metric(commit, name, test_env, metric, way):
- # For performance reasons (in order to avoid calling commit_hash), we assert
- # commit is already a commit hash.
- assert is_commit_hash(commit)
-
- # Get all recent commit hashes.
- commit_hashes = baseline_commit_log(commit)
-
- # TODO PERF use git log to get hashes of all BaselineSearchDepth commits
- def depth_to_commit(depth):
- return commit_hashes[depth]
-
- def has_expected_change(commit):
- return get_allowed_perf_changes(commit).get(name) \
- != None
-
- # Bool -> String
- def namespace(useCiNamespace):
- return CiNamespace if useCiNamespace else LocalNamespace
-
- ci_test_env = best_fit_ci_test_env()
-
- # gets the metric of a given commit
- # (Bool, Int) -> (float | None)
- def commit_metric(useCiNamespace, currentCommit):
- global _commit_metric_cache
-
- # Get test environment.
- effective_test_env = ci_test_env if useCiNamespace else test_env
- if effective_test_env == None:
- # This can happen when no best fit ci test is found.
- return None
-
- # Check for cached value.
- cacheKeyA = (useCiNamespace, currentCommit)
- cacheKeyB = (effective_test_env, name, metric, way)
- if cacheKeyA in _commit_metric_cache:
- return _commit_metric_cache[cacheKeyA].get(cacheKeyB)
-
- # Cache miss.
- # Calculate baselines from the current commit's git note.
- # Note that the git note may contain data for other tests. All tests'
- # baselines will be collected and cached for future use.
- allCommitMetrics = get_perf_stats(
- currentCommit,
- namespace(useCiNamespace))
-
- # Collect recorded values by cacheKeyB.
- values_by_cache_key_b = {}
- for perfStat in allCommitMetrics:
- currentCacheKey = (perfStat.test_env, perfStat.test, \
- perfStat.metric, perfStat.way)
- currentValues = values_by_cache_key_b.setdefault(currentCacheKey, [])
- currentValues.append(float(perfStat.value))
-
- # Calculate and baseline (average of values) by cacheKeyB.
- baseline_by_cache_key_b = {}
- for currentCacheKey, currentValues in values_by_cache_key_b.items():
- baseline_by_cache_key_b[currentCacheKey] = \
- sum(currentValues) / len(currentValues)
-
- # Save baselines to the cache.
- _commit_metric_cache[cacheKeyA] = baseline_by_cache_key_b
- return baseline_by_cache_key_b.get(cacheKeyB)
-
- # Searches through previous commits trying local then ci for each commit in.
- def search(useCiNamespace, depth):
- # Stop if reached the max search depth, or if
- # there is an expected change at the child commit (depth-1). This is a
- # subtlety: Metrics recorded on commit x incorporate the expected
- # changes for commit x. Hence metrics from x are still a valid baseline,
- # while older commits are not. This is why we check for expected changes
- # on depth-1 rather than depth.
- if depth >= BaselineSearchDepth or has_expected_change( \
- depth_to_commit(depth - 1)):
- return None
-
- # Check for a metric on this commit.
- current_metric = commit_metric(useCiNamespace, depth_to_commit(depth))
- if current_metric != None:
- return current_metric
-
- # Metric is not available.
- # If tried local, now try CI. Else move to the parent commit.
- if not useCiNamespace:
- return search(True, depth)
- else:
- return search(False, depth + 1)
-
- # Start search from parent commit using local name space.
- return search(False, 1)
-
-
# Check test stats. This prints the results for the user.
# actual: the PerfStat with actual value.
# expected_val: the expected value (this should generally be derived from get_perf_stats())
diff --git a/testsuite/driver/runtests.py b/testsuite/driver/runtests.py
index 73297dae46..247a5cc330 100644
--- a/testsuite/driver/runtests.py
+++ b/testsuite/driver/runtests.py
@@ -379,37 +379,18 @@ else:
new_metrics = [metric for (change, metric) in t.metrics if change == MetricChange.NewMetric]
if any(new_metrics):
if canGitStatus:
- reason = 'a baseline (expected value) cannot be recovered from' + \
- ' previous git commits. This may be due to HEAD having' + \
- ' new tests or having expected changes, the presence of' + \
- ' expected changes since the last run of the tests, and/or' + \
- ' the latest test run being too old.'
- fix = 'If the tests exist on the previous' + \
- ' commit (And are configured to run with the same ways),' + \
- ' then check out that commit and run the tests to generate' + \
- ' the missing metrics. Alternatively, a baseline may be' + \
- ' recovered from ci results once fetched (where origin' + \
- ' is the official ghc git repo):\n\n' + \
- spacing + 'git fetch ' + \
- 'https://gitlab.haskell.org/ghc/ghc-performance-notes.git' + \
- ' refs/notes/perf:refs/notes/' + Perf.CiNamespace
+ reason = 'the previous git commit doesn\'t have recorded metrics for the following tests.' + \
+ ' If the tests exist on the previous commit, then check it out and run the tests to generate the missing metrics.'
else:
- reason = "this is not a git repo so the previous git commit's" + \
- " metrics cannot be loaded from git notes:"
- fix = ""
+ reason = 'this is not a git repo so the previous git commit\'s metrics cannot be loaded from git notes:'
print()
- print(str_warn('Missing Baseline Metrics') + \
- ' these metrics trivially pass because ' + reason)
- print(spacing + (' ').join(set([metric.test for metric in new_metrics])))
- if fix != "":
- print()
- print(fix)
+ print(str_warn('New Metrics') + ' these metrics trivially pass because ' + reason)
+ print(spacing + ('\n' + spacing).join(set([metric.test for metric in new_metrics])))
# Inform of how to accept metric changes.
if (len(t.unexpected_stat_failures) > 0):
print()
- print(str_info("Some stats have changed") + " If this is expected, " + \
- "allow changes by appending the git commit message with this:")
+ print(str_info("Some stats have changed") + " If this is expected, allow changes by appending the git commit message with this:")
print('-' * 25)
print(Perf.allow_changes_string(t.metrics))
print('-' * 25)
@@ -425,9 +406,8 @@ else:
elif canGitStatus and any(stats):
if is_worktree_dirty():
print()
- print(str_warn('Performance Metrics NOT Saved') + \
- ' working tree is dirty. Commit changes or use ' + \
- '--metrics-file to save metrics to a file.')
+ print(str_warn('Working Tree is Dirty') + ' performance metrics will not be saved.' + \
+ ' Commit changes or use --metrics-file to save metrics to a file.')
else:
Perf.append_perf_stat(stats)
diff --git a/testsuite/driver/testglobals.py b/testsuite/driver/testglobals.py
index 423925e87e..0e0240db8e 100644
--- a/testsuite/driver/testglobals.py
+++ b/testsuite/driver/testglobals.py
@@ -235,17 +235,11 @@ class TestOptions:
# extra files to copy to the testdir
self.extra_files = []
- # Map from metric to (fuction from way to baseline value, allowed percentage deviation) e.g.
- # { 'bytes allocated': (
- # lambda way: if way1: return None ... elif way2:return 9300000000 ...,
- # 10) }
- # This means no baseline is available for way1. For way 2, allow a 10%
- # deviation from 9300000000.
+ # Map from metric to expectected value and allowed percentage deviation. e.g.
+ # { 'bytes allocated': (9300000000, 10) }
+ # To allow a 10% deviation from 9300000000 for the 'bytes allocated' metric.
self.stats_range_fields = {}
- # Is the test testing performance?
- self.is_stats_test = False
-
# Does this test the compiler's performance as opposed to the generated code.
self.is_compiler_stats_test = False
diff --git a/testsuite/driver/testlib.py b/testsuite/driver/testlib.py
index b637b1992d..040e674312 100644
--- a/testsuite/driver/testlib.py
+++ b/testsuite/driver/testlib.py
@@ -65,7 +65,7 @@ def isCompilerStatsTest():
def isStatsTest():
opts = getTestOpts()
- return opts.is_stats_test
+ return bool(opts.stats_range_fields)
# This can be called at the top of a file of tests, to set default test options
@@ -348,18 +348,29 @@ def testing_metrics():
# measures the performance numbers of the compiler.
# As this is a fairly rare case in the testsuite, it defaults to false to
# indicate that it is a 'normal' performance test.
-def _collect_stats(name, opts, metrics, deviation, is_compiler_stats_test=False):
+def _collect_stats(name, opts, metric, deviation, is_compiler_stats_test=False):
if not re.match('^[0-9]*[a-zA-Z][a-zA-Z0-9._-]*$', name):
failBecause('This test has an invalid name.')
- # Normalize metrics to a list of strings.
- if isinstance(metrics, str):
- if metrics == 'all':
- metrics = testing_metrics()
- else:
- metrics = [metrics]
+ tests = Perf.get_perf_stats('HEAD^')
+
+ # Might have multiple metrics being measured for a single test.
+ test = [t for t in tests if t.test == name]
+
+ if tests == [] or test == []:
+ # There are no prior metrics for this test.
+ if isinstance(metric, str):
+ if metric == 'all':
+ for field in testing_metrics():
+ opts.stats_range_fields[field] = None
+ else:
+ opts.stats_range_fields[metric] = None
+ if isinstance(metric, list):
+ for field in metric:
+ opts.stats_range_fields[field] = None
+
+ return
- opts.is_stats_test = True
if is_compiler_stats_test:
opts.is_compiler_stats_test = True
@@ -368,11 +379,24 @@ def _collect_stats(name, opts, metrics, deviation, is_compiler_stats_test=False)
if config.compiler_debugged and is_compiler_stats_test:
opts.skip = 1
- for metric in metrics:
- baselineByWay = lambda way, target_commit: Perf.baseline_metric( \
- target_commit, name, config.test_env, metric, way)
+ # get the average value of the given metric from test
+ def get_avg_val(metric_2):
+ metric_2_metrics = [float(t.value) for t in test if t.metric == metric_2]
+ return sum(metric_2_metrics) / len(metric_2_metrics)
+
+ # 'all' is a shorthand to test for bytes allocated, peak megabytes allocated, and max bytes used.
+ if isinstance(metric, str):
+ if metric == 'all':
+ for field in testing_metrics():
+ opts.stats_range_fields[field] = (get_avg_val(field), deviation)
+ return
+ else:
+ opts.stats_range_fields[metric] = (get_avg_val(metric), deviation)
+ return
- opts.stats_range_fields[metric] = (baselineByWay, deviation)
+ if isinstance(metric, list):
+ for field in metric:
+ opts.stats_range_fields[field] = (get_avg_val(field), deviation)
# -----
@@ -1140,11 +1164,10 @@ def metric_dict(name, way, metric, value):
# name: name of the test.
# way: the way.
# stats_file: the path of the stats_file containing the stats for the test.
-# range_fields: see TestOptions.stats_range_fields
+# range_fields
# Returns a pass/fail object. Passes if the stats are withing the expected value ranges.
# This prints the results for the user.
def check_stats(name, way, stats_file, range_fields):
- head_commit = Perf.commit_hash('HEAD')
result = passed()
if range_fields:
try:
@@ -1154,7 +1177,7 @@ def check_stats(name, way, stats_file, range_fields):
stats_file_contents = f.read()
f.close()
- for (metric, baseline_and_dev) in range_fields.items():
+ for (metric, range_val_dev) in range_fields.items():
field_match = re.search('\("' + metric + '", "([0-9]+)"\)', stats_file_contents)
if field_match == None:
print('Failed to find metric: ', metric)
@@ -1167,15 +1190,14 @@ def check_stats(name, way, stats_file, range_fields):
change = None
# If this is the first time running the benchmark, then pass.
- baseline = baseline_and_dev[0](way, head_commit)
- if baseline == None:
+ if range_val_dev == None:
metric_result = passed()
change = MetricChange.NewMetric
else:
- tolerance_dev = baseline_and_dev[1]
+ (expected_val, tolerance_dev) = range_val_dev
(change, metric_result) = Perf.check_stats_change(
perf_stat,
- baseline,
+ expected_val,
tolerance_dev,
config.allowed_perf_changes,
config.verbose >= 4)
@@ -1308,13 +1330,8 @@ def simple_run(name, way, prog, extra_run_opts):
my_rts_flags = rts_flags(way)
- # Collect stats if necessary:
- # isStatsTest and not isCompilerStatsTest():
- # assume we are running a ghc compiled program. Collect stats.
- # isStatsTest and way == 'ghci':
- # assume we are running a program via ghci. Collect stats
stats_file = name + '.stats'
- if isStatsTest() and (not isCompilerStatsTest() or way == 'ghci'):
+ if isStatsTest() and not isCompilerStatsTest():
stats_args = ' +RTS -V0 -t' + stats_file + ' --machine-readable -RTS'
else:
stats_args = ''
diff --git a/testsuite/driver/testutil.py b/testsuite/driver/testutil.py
index cc4a4eea0d..6e0c2684d7 100644
--- a/testsuite/driver/testutil.py
+++ b/testsuite/driver/testutil.py
@@ -16,7 +16,7 @@ def strip_quotes(s):
return s.strip('\'"')
def str_fail(s):
- return '\033[1m\033[31m' + s + '\033[0m'
+ return '\033[1m\033[43m\033[31m' + s + '\033[0m'
def str_pass(s):
return '\033[1m\033[32m' + s + '\033[0m'