summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Eichmann <EichmannD@gmail.com>2019-02-04 16:51:58 +0000
committerMarge Bot <ben+marge-bot@smart-cactus.org>2019-02-16 01:07:53 -0500
commit0b92bdc79ddd38ceb69820dbc27ed36d7e5d7a57 (patch)
treee41e614c3bab769fb631a4dbe1a101cc92de4117
parentbcaba30a9602d7c5899c9754096a4460191dc667 (diff)
downloadhaskell-0b92bdc79ddd38ceb69820dbc27ed36d7e5d7a57.tar.gz
Fix and Reapply "Performance tests: recover a baseline from ancestor commits and CI results."
-rw-r--r--.gitlab-ci.yml31
-rwxr-xr-x.gitlab/push-test-metrics.sh67
-rw-r--r--testsuite/driver/perf_notes.py219
-rw-r--r--testsuite/driver/runtests.py35
-rw-r--r--testsuite/driver/testglobals.py16
-rw-r--r--testsuite/driver/testlib.py70
-rw-r--r--testsuite/driver/testutil.py2
7 files changed, 374 insertions, 66 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 068b3e871d..b630d9f489 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -6,6 +6,7 @@ before_script:
- git submodule sync --recursive
- git submodule update --init --recursive
- git checkout .gitmodules
+ - "git fetch https://gitlab.haskell.org/ghc/ghc-performance-notes.git refs/notes/perf:refs/notes/perf || true"
stages:
- lint
@@ -76,6 +77,7 @@ validate-x86_64-linux-deb8-hadrian:
- git submodule sync --recursive
- git submodule update --init --recursive
- git checkout .gitmodules
+ - "git fetch https://gitlab.haskell.org/ghc/ghc-performance-notes.git refs/notes/perf:refs/notes/perf || true"
tags:
- x86_64-linux
@@ -98,8 +100,16 @@ validate-x86_64-linux-deb8-hadrian:
make binary-dist TAR_COMP_OPTS="-1"
mv ghc-*.tar.xz ghc.tar.xz
- |
+ # Prepare to push git notes.
+ METRICS_FILE=$(mktemp)
+ git config user.email "ben+ghc-ci@smart-cactus.org"
+ git config user.name "GHC GitLab CI"
+ - |
THREADS=`mk/detect-cpu-count.sh`
- make $TEST_TYPE THREADS=$THREADS JUNIT_FILE=../../junit.xml
+ make $TEST_TYPE THREADS=$THREADS JUNIT_FILE=../../junit.xml METRICS_FILE=$METRICS_FILE
+ - |
+ # Push git notes.
+ METRICS_FILE=$METRICS_FILE .gitlab/push-test-metrics.sh
dependencies: []
artifacts:
reports:
@@ -121,12 +131,14 @@ validate-x86_64-darwin:
ac_cv_func_clock_gettime: "no"
LANG: "en_US.UTF-8"
CONFIGURE_ARGS: --with-intree-gmp
+ TEST_ENV: "x86_64-darwin"
before_script:
- git clean -xdf && git submodule foreach git clean -xdf
- python3 .gitlab/fix-submodules.py
- git submodule sync --recursive
- git submodule update --init --recursive
- git checkout .gitmodules
+ - "git fetch https://gitlab.haskell.org/ghc/ghc-performance-notes.git refs/notes/perf:refs/notes/perf || true"
- bash .gitlab/darwin-init.sh
- PATH="`pwd`/toolchain/bin:$PATH"
@@ -151,6 +163,7 @@ validate-x86_64-darwin:
- git submodule sync --recursive
- git submodule update --init --recursive
- git checkout .gitmodules
+ - "git fetch https://gitlab.haskell.org/ghc/ghc-performance-notes.git refs/notes/perf:refs/notes/perf || true"
- bash .circleci/prepare-system.sh
# workaround for docker permissions
@@ -168,6 +181,8 @@ validate-aarch64-linux-deb9:
stage: full-build
image: ghcci/aarch64-linux-deb9:0.1
allow_failure: true
+ variables:
+ TEST_ENV: "aarch64-linux-deb9"
artifacts:
when: always
expire_in: 2 week
@@ -192,6 +207,8 @@ validate-i386-linux-deb9:
stage: full-build
image: ghcci/i386-linux-deb9:0.1
allow_failure: true
+ variables:
+ TEST_ENV: "i386-linux-deb9"
artifacts:
when: always
expire_in: 2 week
@@ -205,6 +222,7 @@ nightly-i386-linux-deb9:
allow_failure: true
variables:
TEST_TYPE: slowtest
+ TEST_ENV: "i386-linux-deb9"
artifacts:
when: always
expire_in: 2 week
@@ -221,6 +239,7 @@ validate-x86_64-linux-deb9-debug:
image: ghcci/x86_64-linux-deb9:0.2
variables:
BUILD_FLAVOUR: devel2
+ TEST_ENV: "x86_64-linux-deb9-debug"
cache:
key: linux-x86_64-deb9
@@ -228,6 +247,8 @@ validate-x86_64-linux-deb9:
extends: .validate-linux
stage: build
image: ghcci/x86_64-linux-deb9:0.2
+ variables:
+ TEST_ENV: "x86_64-linux-deb9"
artifacts:
when: always
expire_in: 2 week
@@ -251,6 +272,7 @@ validate-x86_64-linux-deb9-llvm:
image: ghcci/x86_64-linux-deb9:0.2
variables:
BUILD_FLAVOUR: perf-llvm
+ TEST_ENV: "x86_64-linux-deb9-llvm"
cache:
key: linux-x86_64-deb9
@@ -258,6 +280,8 @@ validate-x86_64-linux-deb8:
extends: .validate-linux
stage: full-build
image: ghcci/x86_64-linux-deb8:0.1
+ variables:
+ TEST_ENV: "x86_64-linux-deb8"
cache:
key: linux-x86_64-deb8
artifacts:
@@ -268,6 +292,8 @@ validate-x86_64-linux-fedora27:
extends: .validate-linux
stage: full-build
image: ghcci/x86_64-linux-fedora27:0.1
+ variables:
+ TEST_ENV: "x86_64-linux-fedora27"
cache:
key: linux-x86_64-fedora27
artifacts:
@@ -279,6 +305,7 @@ validate-x86_64-linux-deb9-integer-simple:
stage: full-build
variables:
INTEGER_LIBRARY: integer-simple
+ TEST_ENV: "x86_64-linux-deb9-integer-simple"
image: ghcci/x86_64-linux-deb9:0.2
cache:
key: linux-x86_64-deb9
@@ -299,6 +326,7 @@ validate-x86_64-linux-deb9-unreg:
stage: full-build
variables:
CONFIGURE_ARGS: --enable-unregisterised
+ TEST_ENV: "x86_64-linux-deb9-unreg"
image: ghcci/x86_64-linux-deb9:0.2
cache:
key: linux-x86_64-deb9
@@ -324,6 +352,7 @@ validate-x86_64-linux-deb9-unreg:
- git submodule sync --recursive
- git submodule update --init --recursive
- git checkout .gitmodules
+ - "git fetch https://gitlab.haskell.org/ghc/ghc-performance-notes.git refs/notes/perf:refs/notes/perf || true"
- bash .gitlab/win32-init.sh
after_script:
- rd /s /q tmp
diff --git a/.gitlab/push-test-metrics.sh b/.gitlab/push-test-metrics.sh
new file mode 100755
index 0000000000..302587721f
--- /dev/null
+++ b/.gitlab/push-test-metrics.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+# vim: sw=2 et
+set -euo pipefail
+
+NOTES_ORIGIN="git@gitlab.haskell.org:ghc/ghc-performance-notes.git"
+REF="perf"
+
+fail() {
+ echo "ERROR: $*" >&2
+ exit 1
+}
+
+# Check that private key is available (Set on all GitLab protected branches).
+if [ -z ${PERF_NOTE_KEY+"$PERF_NOTE_KEY"} ]
+then
+ echo "Not pushing performance git notes: PERF_NOTE_KEY is not set."
+ exit 0
+fi
+
+# TEST_ENV must be set.
+if [ -z ${TEST_ENV+"$TEST_ENV"} ]
+then
+ fail "Not pushing performance git notes: TEST_ENV must be set."
+fi
+
+# Assert that the METRICS_FILE exists and can be read.
+if [ -z ${METRICS_FILE+"$METRICS_FILE"} ]
+then
+ fail "\$METRICS_FILE not set."
+fi
+if ! [ -r $METRICS_FILE ]
+then
+ fail "Metrics file not found: $METRICS_FILE"
+fi
+
+# Add gitlab as a known host.
+mkdir -p ~/.ssh
+echo "|1|+AUrMGS1elvPeLNt+NHGa5+c6pU=|4XvfRsQftO1OgZD4c0JJ7oNaii8= ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDXilA5l4kOZPx0nM6xDATF+t4fS6te0eYPDwBI/jLWD9cJVtCnsrwMl5ar+/NfmcD0jnCYztUiVHuXyTaWPJYSQpwltfpTeqpo9/z/0MxkPtSl1uMP2cLbDiqA01OWveChktOXwU6hRQ+7MmO+dNRS/iXrRmYrGv/p1W811QgLBLS9fefEdF25n+0dP71L7Ov7riOawlDmd0C11FraE/R8HX6gs6lbXta1kisdxGyKojYSiCtobUaJxRoatMfUP0a9rwTAyl8tf56LgB+igjMky879VAbL7eQ/AmfHYPrSGJ/YlWP6Jj23Dnos5nOVlWL/rVTs9Y/NakLpPwMs75KTC0Pd74hdf2e3folDdAi2kLrQgO2SI6so7rOYZ+mFkCM751QdDVy4DzjmDvSgSIVf9SV7RQf7e7unE7pSZ/ILupZqz9KhR1MOwVO+ePa5qJMNSdC204PIsRWkIO5KP0QLl507NI9Ri84+aODoHD7gDIWNhU08J2P8/E6r0wcC8uWaxh+HaOjI9BkHjqRYsrgfn54BAuO9kw1cDvyi3c8n7VFlNtvQP15lANwim3gr9upV+r95KEPJCgZMYWJBDPIVtp4GdYxCfXxWj5oMXbA5pf0tNixwNJjAsY7I6RN2htHbuySH36JybOZk+gCj6mQkxpCT/tKaUn14hBJWLq7Q+Q==" >> ~/.ssh/known_hosts
+echo "|1|JZkdAPJmpX6SzGeqhmQLfMWLGQA=|4vTELroOlbFxbCr0WX+PK9EcpD0= ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJknufU+I6A5Nm58lmse4/o11Ai2UzYbYe7782J1+kRk" >> ~/.ssh/known_hosts
+
+# Setup ssh keys.
+eval `ssh-agent`
+echo "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDJPR1vrZgeGTXmgJw2PsJfMjf22LcDnVVwt3l0rwTZ+8Q2J0bHaYxMRKBco1sON6LGcZepw0Hy76RQ87v057pTz18SXvnfE7U/B6v9qBk0ILJz+4BOX9sEhxu2XmScp/wMxkG9IoyruMlsxXzd1sz09o+rzzx24U2Rp27PRm08vG0oipve6BWLbYEqYrE4/nCufqOJmGd56fju7OTU0lTpEkGDEDWGMxutaX2CbTbDju7qy07Ld8BjSc9aHfvuQaslUbj3ex3EF8EXahURzGpHQn/UFFzVGMokFumiJCAagHQb7cj6jOkKseZLaysbA/mTBQsOzjWiRmkN23bQf1wF ben+ghc-ci@smart-cactus.org" > ~/.ssh/perf_rsa.pub
+touch ~/.ssh/perf_rsa
+chmod 0600 ~/.ssh/perf_rsa
+echo "$PERF_NOTE_KEY" >> ~/.ssh/perf_rsa
+ssh-add ~/.ssh/perf_rsa
+
+# Reset the git notes and append the metrics file to the notes, then push and return the result.
+# This is favoured over a git notes merge as it avoids potential data loss/duplication from the merge strategy.
+function reset_append_note_push {
+ git fetch -f $NOTES_ORIGIN refs/notes/$REF:refs/notes/$REF || true
+ echo "git notes --ref=$REF append -F $METRICS_FILE HEAD"
+ git notes --ref=$REF append -F $METRICS_FILE HEAD
+ echo "git push $NOTES_ORIGIN refs/notes/$REF"
+ git push $NOTES_ORIGIN refs/notes/$REF
+}
+
+# Push the metrics file as a git note. This may fail if another task pushes a note first. In that case
+# the latest note is fetched and appended.
+MAX_RETRY=20
+until reset_append_note_push || [ $MAX_RETRY -le 0 ]
+do
+ ((MAX_RETRY--))
+ echo ""
+ echo "Failed to push git notes. Fetching, appending, and retrying... $MAX_RETRY retries left."
+done
diff --git a/testsuite/driver/perf_notes.py b/testsuite/driver/perf_notes.py
index 6d80e07e12..365732ca69 100644
--- a/testsuite/driver/perf_notes.py
+++ b/testsuite/driver/perf_notes.py
@@ -13,6 +13,7 @@ import argparse
import re
import subprocess
import time
+import sys
from collections import namedtuple
from math import ceil, trunc
@@ -41,7 +42,7 @@ def is_worktree_dirty():
# The metrics (a.k.a stats) are named tuples, PerfStat, in this form:
#
# ( test_env : 'val', # Test environment.
-# test : 'val', # Name of the test
+# test : 'val', # Name of the test
# way : 'val',
# metric : 'val', # Metric being recorded
# value : 'val', # The statistic result e.g. runtime
@@ -50,6 +51,9 @@ def is_worktree_dirty():
# All the fields of a metric (excluding commit field).
PerfStat = namedtuple('PerfStat', ['test_env','test','way','metric','value'])
+# A baseline recovered form stored metrics.
+Baseline = namedtuple('Baseline', ['perfStat','commit','commitDepth'])
+
class MetricChange:
NewMetric = 'NewMetric'
NoChange = 'NoChange'
@@ -73,6 +77,21 @@ def get_perf_stats(commit='HEAD', namespace='perf'):
log = [parse_perf_stat(stat_str) for stat_str in log]
return log
+# Check if a str is in a 40 character git commit hash.
+# str -> bool
+_commit_hash_re = re.compile('[0-9a-f]' * 40)
+def is_commit_hash(hash):
+ return _commit_hash_re.fullmatch(hash) != None
+
+# Convert a <ref> to a commit hash code.
+# str -> str
+def commit_hash(commit):
+ if is_commit_hash(commit):
+ return commit
+ return subprocess.check_output(['git', 'rev-parse', commit], \
+ stderr=subprocess.STDOUT) \
+ .decode() \
+ .strip()
# Get allowed changes to performance. This is extracted from the commit message of
# the given commit in this form:
@@ -83,13 +102,20 @@ def get_perf_stats(commit='HEAD', namespace='perf'):
# 'metrics': ['metricA', 'metricB', ...],
# 'opts': {
# 'optionA': 'string value',
-# 'optionB': 'string value',
+# 'optionB': 'string value', # e.g. test_env: "x86_64-linux"
# ...
# }
# }
+_get_allowed_perf_changes_cache = {}
def get_allowed_perf_changes(commit='HEAD'):
- commitByteStr = subprocess.check_output(['git', '--no-pager', 'log', '-n1', '--format=%B', commit])
- return parse_allowed_perf_changes(commitByteStr.decode())
+ global _get_allowed_perf_changes_cache
+ commit = commit_hash(commit)
+ if not commit in _get_allowed_perf_changes_cache:
+ commitByteStr = subprocess.check_output(\
+ ['git', '--no-pager', 'log', '-n1', '--format=%B', commit])
+ _get_allowed_perf_changes_cache[commit] \
+ = parse_allowed_perf_changes(commitByteStr.decode())
+ return _get_allowed_perf_changes_cache[commit]
def parse_allowed_perf_changes(commitMsg):
# Helper regex. Non-capturing unless postfixed with Cap.
@@ -102,7 +128,7 @@ def parse_allowed_perf_changes(commitMsg):
exp = (r"^Metric"
+s+r"(Increase|Decrease)"
+s+r"?("+qstr+r"|"+qstrList+r")?" # Metric or list of metrics.s..
- +s+r"?(\(" + r"(?:[^')]|"+qstr+r")*" + r"\))?" # Options surounded in parenthesis. (allow parenthases in quoted strings))
+ +s+r"?(\(" + r"(?:[^')]|"+qstr+r")*" + r"\))?" # Options surrounded in parenthesis. (allow parenthases in quoted strings)
+s+r"?:?" # Optional ":"
+s+r"?((?:(?!\n\n)(?!\n[^\s])(?:.|\n))*)" # Test names. Stop parsing on empty or non-indented new line.
)
@@ -213,19 +239,191 @@ def append_perf_stat(stats, commit='HEAD', namespace='perf', max_tries=5):
tries += 1
time.sleep(1)
- print("\nAn error occured while writing the performance metrics to git notes.\n \
- ​ This is usually due to a lock-file existing somewhere in the git repo.")
+ print("\nAn error occurred while writing the performance metrics to git notes.\n \
+ This is usually due to a lock-file existing somewhere in the git repo.")
return False
+#
+# Baseline calculation
+#
+
+# Max number of ancestor commits to search when compiling a baseline performance metric.
+BaselineSearchDepth = 75
+
+# The git notes name space for local results.
+LocalNamespace = "perf"
+
+# The git notes name space for ci results.
+CiNamespace = "ci/" + LocalNamespace
+
+# (isCalculated, best fit ci test_env or None)
+BestFitCiTestEnv = (False, None)
+
+# test_env string or None
+def best_fit_ci_test_env():
+ global BestFitCiTestEnv
+ if not BestFitCiTestEnv[0]:
+ platform = sys.platform
+ isArch64 = sys.maxsize > 2**32
+ arch = "x86_64" if isArch64 else "i386"
+
+ if platform.startswith("linux"):
+ test_env = arch + "-linux-deb9"
+ elif platform.startswith("win32"):
+ # There are no windows CI test results.
+ test_env = None
+ elif isArch64 and platform.startswith("darwin"):
+ test_env = arch + "-darwin"
+ elif isArch64 and platform.startswith("freebsd"):
+ test_env = arch + "-freebsd"
+ else:
+ test_env = None
+
+ BestFitCiTestEnv = (True, test_env)
+
+ return BestFitCiTestEnv[1]
+
+_baseline_depth_commit_log = {}
+
+# Get the commit hashes for the last BaselineSearchDepth commits from and
+# including the input commit. The output commits are all commit hashes.
+# str -> [str]
+def baseline_commit_log(commit):
+ global _baseline_depth_commit_log
+ commit = commit_hash(commit)
+ if not commit in _baseline_depth_commit_log:
+ _baseline_depth_commit_log[commit] = \
+ subprocess.check_output(['git', 'log', '--format=%H', \
+ '-n' + str(BaselineSearchDepth)]) \
+ .decode().split('\n')
+ return _baseline_depth_commit_log[commit]
+
+# Cache of baseline values. This is a dict of dicts indexed on:
+# (useCiNamespace, commit) -> (test_env, test, metric, way) -> baseline
+# (bool , str ) -> (str , str , str , str) -> float
+_commit_metric_cache = {}
+
+# Get the baseline (expected value) of a test at a given commit. This searches
+# git notes from older commits for recorded metrics (locally and from ci). More
+# recent commits are favoured, then local results over ci results are favoured.
+#
+# commit: str - must be a commit hash (see commit_has())
+# name: str - test name
+# test_env: str - test environment (note a best fit test_env will be used
+# instead when looking for ci results)
+# metric: str - test metric
+# way: str - test way
+# returns: the Baseline named tuple or None if no metric was found within
+# BaselineSearchDepth commits and since the last expected change.
+def baseline_metric(commit, name, test_env, metric, way):
+ # For performance reasons (in order to avoid calling commit_hash), we assert
+ # commit is already a commit hash.
+ assert is_commit_hash(commit)
+
+ # Get all recent commit hashes.
+ commit_hashes = baseline_commit_log(commit)
+ def depth_to_commit(depth):
+ return commit_hashes[depth]
+
+ def has_expected_change(commit):
+ return get_allowed_perf_changes(commit).get(name) \
+ != None
+
+ # Bool -> String
+ def namespace(useCiNamespace):
+ return CiNamespace if useCiNamespace else LocalNamespace
+
+ ci_test_env = best_fit_ci_test_env()
+
+ # gets the metric of a given commit
+ # (Bool, Int) -> (float | None)
+ def commit_metric(useCiNamespace, depth):
+ global _commit_metric_cache
+ currentCommit = depth_to_commit(depth)
+
+ # Get test environment.
+ effective_test_env = ci_test_env if useCiNamespace else test_env
+ if effective_test_env == None:
+ # This can happen when no best fit ci test is found.
+ return None
+
+ # Check for cached value.
+ cacheKeyA = (useCiNamespace, currentCommit)
+ cacheKeyB = (effective_test_env, name, metric, way)
+ if cacheKeyA in _commit_metric_cache:
+ return _commit_metric_cache[cacheKeyA].get(cacheKeyB)
+
+ # Cache miss.
+ # Calculate baselines from the current commit's git note.
+ # Note that the git note may contain data for other tests. All tests'
+ # baselines will be collected and cached for future use.
+ allCommitMetrics = get_perf_stats(
+ currentCommit,
+ namespace(useCiNamespace))
+
+ # Collect recorded values by cacheKeyB.
+ values_by_cache_key_b = {}
+ for perfStat in allCommitMetrics:
+ currentCacheKey = (perfStat.test_env, perfStat.test, \
+ perfStat.metric, perfStat.way)
+ currentValues = values_by_cache_key_b.setdefault(currentCacheKey, [])
+ currentValues.append(float(perfStat.value))
+
+ # Calculate and baseline (average of values) by cacheKeyB.
+ baseline_by_cache_key_b = {}
+ for currentCacheKey, currentValues in values_by_cache_key_b.items():
+ baseline_by_cache_key_b[currentCacheKey] = Baseline( \
+ PerfStat( \
+ currentCacheKey[0],
+ currentCacheKey[1],
+ currentCacheKey[3],
+ currentCacheKey[2],
+ sum(currentValues) / len(currentValues)),
+ currentCommit,
+ depth)
+
+ # Save baselines to the cache.
+ _commit_metric_cache[cacheKeyA] = baseline_by_cache_key_b
+ return baseline_by_cache_key_b.get(cacheKeyB)
+
+ # Searches through previous commits trying local then ci for each commit in.
+ def search(useCiNamespace, depth):
+ # Stop if reached the max search depth, or if
+ # there is an expected change at the child commit (depth-1). This is a
+ # subtlety: Metrics recorded on commit x incorporate the expected
+ # changes for commit x. Hence metrics from x are still a valid baseline,
+ # while older commits are not. This is why we check for expected changes
+ # on depth-1 rather than depth.
+ if depth >= BaselineSearchDepth or has_expected_change( \
+ depth_to_commit(depth - 1)):
+ return None
+
+ # Check for a metric on this commit.
+ current_metric = commit_metric(useCiNamespace, depth)
+ if current_metric != None:
+ return current_metric
+
+ # Metric is not available.
+ # If tried local, now try CI. Else move to the parent commit.
+ if not useCiNamespace:
+ return search(True, depth)
+ else:
+ return search(False, depth + 1)
+
+ # Start search from parent commit using local name space.
+ return search(False, 1)
+
+
# Check test stats. This prints the results for the user.
# actual: the PerfStat with actual value.
-# expected_val: the expected value (this should generally be derived from get_perf_stats())
+# baseline: the expected Baseline value (this should generally be derived from baseline_metric())
# tolerance_dev: allowed deviation of the actual value from the expected value.
# allowed_perf_changes: allowed changes in stats. This is a dictionary as returned by get_allowed_perf_changes().
# force_print: Print stats even if the test stat was in the tolerance range.
# Returns a (MetricChange, pass/fail object) tuple. Passes if the stats are withing the expected value ranges.
-def check_stats_change(actual, expected_val, tolerance_dev, allowed_perf_changes = {}, force_print = False):
+def check_stats_change(actual, baseline, tolerance_dev, allowed_perf_changes = {}, force_print = False):
+ expected_val = baseline.perfStat.value
full_name = actual.test + ' (' + actual.way + ')'
lowerBound = trunc( int(expected_val) * ((100 - float(tolerance_dev))/100))
@@ -256,7 +454,8 @@ def check_stats_change(actual, expected_val, tolerance_dev, allowed_perf_changes
# Print errors and create pass/fail object.
result = passed()
if not change_allowed:
- error = change + ' not allowed'
+ error = change + ' from ' + baseline.perfStat.test_env + \
+ ' baseline @ HEAD~' + str(baseline.commitDepth)
print(actual.metric, error + ':')
result = failBecause('stat ' + error, tag='stat')
diff --git a/testsuite/driver/runtests.py b/testsuite/driver/runtests.py
index 247a5cc330..f61ebbc6b1 100644
--- a/testsuite/driver/runtests.py
+++ b/testsuite/driver/runtests.py
@@ -379,18 +379,36 @@ else:
new_metrics = [metric for (change, metric) in t.metrics if change == MetricChange.NewMetric]
if any(new_metrics):
if canGitStatus:
- reason = 'the previous git commit doesn\'t have recorded metrics for the following tests.' + \
- ' If the tests exist on the previous commit, then check it out and run the tests to generate the missing metrics.'
+ reason = 'a baseline (expected value) cannot be recovered from' + \
+ ' previous git commits. This may be due to HEAD having' + \
+ ' new tests or having expected changes, the presence of' + \
+ ' expected changes since the last run of the tests, and/or' + \
+ ' the latest test run being too old.'
+ fix = 'If the tests exist on the previous' + \
+ ' commit (And are configured to run with the same ways),' + \
+ ' then check out that commit and run the tests to generate' + \
+ ' the missing metrics. Alternatively, a baseline may be' + \
+ ' recovered from ci results once fetched:\n\n' + \
+ spacing + 'git fetch ' + \
+ 'https://gitlab.haskell.org/ghc/ghc-performance-notes.git' + \
+ ' refs/notes/perf:refs/notes/' + Perf.CiNamespace
else:
- reason = 'this is not a git repo so the previous git commit\'s metrics cannot be loaded from git notes:'
+ reason = "this is not a git repo so the previous git commit's" + \
+ " metrics cannot be loaded from git notes:"
+ fix = ""
print()
- print(str_warn('New Metrics') + ' these metrics trivially pass because ' + reason)
- print(spacing + ('\n' + spacing).join(set([metric.test for metric in new_metrics])))
+ print(str_warn('Missing Baseline Metrics') + \
+ ' these metrics trivially pass because ' + reason)
+ print(spacing + (' ').join(set([metric.test for metric in new_metrics])))
+ if fix != "":
+ print()
+ print(fix)
# Inform of how to accept metric changes.
if (len(t.unexpected_stat_failures) > 0):
print()
- print(str_info("Some stats have changed") + " If this is expected, allow changes by appending the git commit message with this:")
+ print(str_info("Some stats have changed") + " If this is expected, " + \
+ "allow changes by appending the git commit message with this:")
print('-' * 25)
print(Perf.allow_changes_string(t.metrics))
print('-' * 25)
@@ -406,8 +424,9 @@ else:
elif canGitStatus and any(stats):
if is_worktree_dirty():
print()
- print(str_warn('Working Tree is Dirty') + ' performance metrics will not be saved.' + \
- ' Commit changes or use --metrics-file to save metrics to a file.')
+ print(str_warn('Performance Metrics NOT Saved') + \
+ ' working tree is dirty. Commit changes or use ' + \
+ '--metrics-file to save metrics to a file.')
else:
Perf.append_perf_stat(stats)
diff --git a/testsuite/driver/testglobals.py b/testsuite/driver/testglobals.py
index de1d1e660a..b3c9a55131 100644
--- a/testsuite/driver/testglobals.py
+++ b/testsuite/driver/testglobals.py
@@ -250,11 +250,21 @@ class TestOptions:
# extra files to copy to the testdir
self.extra_files = []
- # Map from metric to expectected value and allowed percentage deviation. e.g.
- # { 'bytes allocated': (9300000000, 10) }
- # To allow a 10% deviation from 9300000000 for the 'bytes allocated' metric.
+ # Map from metric to (function from way and commit to baseline value, allowed percentage deviation) e.g.
+ # { 'bytes allocated': (
+ # lambda way commit:
+ # ...
+ # if way1: return None ...
+ # elif way2:return 9300000000 ...
+ # ...
+ # , 10) }
+ # This means no baseline is available for way1. For way 2, allow a 10%
+ # deviation from 9300000000.
self.stats_range_fields = {}
+ # Is the test testing performance?
+ self.is_stats_test = False
+
# Does this test the compiler's performance as opposed to the generated code.
self.is_compiler_stats_test = False
diff --git a/testsuite/driver/testlib.py b/testsuite/driver/testlib.py
index 710800b9f0..dd66340e5f 100644
--- a/testsuite/driver/testlib.py
+++ b/testsuite/driver/testlib.py
@@ -65,7 +65,7 @@ def isCompilerStatsTest():
def isStatsTest():
opts = getTestOpts()
- return bool(opts.stats_range_fields)
+ return opts.is_stats_test
# This can be called at the top of a file of tests, to set default test options
@@ -348,29 +348,18 @@ def testing_metrics():
# measures the performance numbers of the compiler.
# As this is a fairly rare case in the testsuite, it defaults to false to
# indicate that it is a 'normal' performance test.
-def _collect_stats(name, opts, metric, deviation, is_compiler_stats_test=False):
+def _collect_stats(name, opts, metrics, deviation, is_compiler_stats_test=False):
if not re.match('^[0-9]*[a-zA-Z][a-zA-Z0-9._-]*$', name):
failBecause('This test has an invalid name.')
- tests = Perf.get_perf_stats('HEAD^')
-
- # Might have multiple metrics being measured for a single test.
- test = [t for t in tests if t.test == name]
-
- if tests == [] or test == []:
- # There are no prior metrics for this test.
- if isinstance(metric, str):
- if metric == 'all':
- for field in testing_metrics():
- opts.stats_range_fields[field] = None
- else:
- opts.stats_range_fields[metric] = None
- if isinstance(metric, list):
- for field in metric:
- opts.stats_range_fields[field] = None
-
- return
+ # Normalize metrics to a list of strings.
+ if isinstance(metrics, str):
+ if metrics == 'all':
+ metrics = testing_metrics()
+ else:
+ metrics = [metrics]
+ opts.is_stats_test = True
if is_compiler_stats_test:
opts.is_compiler_stats_test = True
@@ -379,24 +368,12 @@ def _collect_stats(name, opts, metric, deviation, is_compiler_stats_test=False):
if config.compiler_debugged and is_compiler_stats_test:
opts.skip = 1
- # get the average value of the given metric from test
- def get_avg_val(metric_2):
- metric_2_metrics = [float(t.value) for t in test if t.metric == metric_2]
- return sum(metric_2_metrics) / len(metric_2_metrics)
-
- # 'all' is a shorthand to test for bytes allocated, peak megabytes allocated, and max bytes used.
- if isinstance(metric, str):
- if metric == 'all':
- for field in testing_metrics():
- opts.stats_range_fields[field] = (get_avg_val(field), deviation)
- return
- else:
- opts.stats_range_fields[metric] = (get_avg_val(metric), deviation)
- return
+ for metric in metrics:
+ def baselineByWay(way, target_commit, metric=metric):
+ return Perf.baseline_metric( \
+ target_commit, name, config.test_env, metric, way)
- if isinstance(metric, list):
- for field in metric:
- opts.stats_range_fields[field] = (get_avg_val(field), deviation)
+ opts.stats_range_fields[metric] = (baselineByWay, deviation)
# -----
@@ -1179,10 +1156,11 @@ def metric_dict(name, way, metric, value):
# name: name of the test.
# way: the way.
# stats_file: the path of the stats_file containing the stats for the test.
-# range_fields
+# range_fields: see TestOptions.stats_range_fields
# Returns a pass/fail object. Passes if the stats are withing the expected value ranges.
# This prints the results for the user.
def check_stats(name, way, stats_file, range_fields):
+ head_commit = Perf.commit_hash('HEAD')
result = passed()
if range_fields:
try:
@@ -1192,7 +1170,7 @@ def check_stats(name, way, stats_file, range_fields):
stats_file_contents = f.read()
f.close()
- for (metric, range_val_dev) in range_fields.items():
+ for (metric, baseline_and_dev) in range_fields.items():
field_match = re.search('\("' + metric + '", "([0-9]+)"\)', stats_file_contents)
if field_match == None:
print('Failed to find metric: ', metric)
@@ -1205,14 +1183,15 @@ def check_stats(name, way, stats_file, range_fields):
change = None
# If this is the first time running the benchmark, then pass.
- if range_val_dev == None:
+ baseline = baseline_and_dev[0](way, head_commit)
+ if baseline == None:
metric_result = passed()
change = MetricChange.NewMetric
else:
- (expected_val, tolerance_dev) = range_val_dev
+ tolerance_dev = baseline_and_dev[1]
(change, metric_result) = Perf.check_stats_change(
perf_stat,
- expected_val,
+ baseline,
tolerance_dev,
config.allowed_perf_changes,
config.verbose >= 4)
@@ -1348,8 +1327,13 @@ def simple_run(name, way, prog, extra_run_opts):
my_rts_flags = rts_flags(way)
+ # Collect stats if necessary:
+ # isStatsTest and not isCompilerStatsTest():
+ # assume we are running a ghc compiled program. Collect stats.
+ # isStatsTest and way == 'ghci':
+ # assume we are running a program via ghci. Collect stats
stats_file = name + '.stats'
- if isStatsTest() and not isCompilerStatsTest():
+ if isStatsTest() and (not isCompilerStatsTest() or way == 'ghci'):
stats_args = ' +RTS -V0 -t' + stats_file + ' --machine-readable -RTS'
else:
stats_args = ''
diff --git a/testsuite/driver/testutil.py b/testsuite/driver/testutil.py
index d5bd2f33c3..b73204fe43 100644
--- a/testsuite/driver/testutil.py
+++ b/testsuite/driver/testutil.py
@@ -19,7 +19,7 @@ def strip_quotes(s):
return s.strip('\'"')
def str_fail(s):
- return '\033[1m\033[43m\033[31m' + s + '\033[0m'
+ return '\033[1m\033[31m' + s + '\033[0m'
def str_pass(s):
return '\033[1m\033[32m' + s + '\033[0m'