diff options
author | David Eichmann <EichmannD@gmail.com> | 2019-02-04 16:51:58 +0000 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2019-02-16 01:07:53 -0500 |
commit | 0b92bdc79ddd38ceb69820dbc27ed36d7e5d7a57 (patch) | |
tree | e41e614c3bab769fb631a4dbe1a101cc92de4117 | |
parent | bcaba30a9602d7c5899c9754096a4460191dc667 (diff) | |
download | haskell-0b92bdc79ddd38ceb69820dbc27ed36d7e5d7a57.tar.gz |
Fix and Reapply "Performance tests: recover a baseline from ancestor commits and CI results."
-rw-r--r-- | .gitlab-ci.yml | 31 | ||||
-rwxr-xr-x | .gitlab/push-test-metrics.sh | 67 | ||||
-rw-r--r-- | testsuite/driver/perf_notes.py | 219 | ||||
-rw-r--r-- | testsuite/driver/runtests.py | 35 | ||||
-rw-r--r-- | testsuite/driver/testglobals.py | 16 | ||||
-rw-r--r-- | testsuite/driver/testlib.py | 70 | ||||
-rw-r--r-- | testsuite/driver/testutil.py | 2 |
7 files changed, 374 insertions, 66 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 068b3e871d..b630d9f489 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,6 +6,7 @@ before_script: - git submodule sync --recursive - git submodule update --init --recursive - git checkout .gitmodules + - "git fetch https://gitlab.haskell.org/ghc/ghc-performance-notes.git refs/notes/perf:refs/notes/perf || true" stages: - lint @@ -76,6 +77,7 @@ validate-x86_64-linux-deb8-hadrian: - git submodule sync --recursive - git submodule update --init --recursive - git checkout .gitmodules + - "git fetch https://gitlab.haskell.org/ghc/ghc-performance-notes.git refs/notes/perf:refs/notes/perf || true" tags: - x86_64-linux @@ -98,8 +100,16 @@ validate-x86_64-linux-deb8-hadrian: make binary-dist TAR_COMP_OPTS="-1" mv ghc-*.tar.xz ghc.tar.xz - | + # Prepare to push git notes. + METRICS_FILE=$(mktemp) + git config user.email "ben+ghc-ci@smart-cactus.org" + git config user.name "GHC GitLab CI" + - | THREADS=`mk/detect-cpu-count.sh` - make $TEST_TYPE THREADS=$THREADS JUNIT_FILE=../../junit.xml + make $TEST_TYPE THREADS=$THREADS JUNIT_FILE=../../junit.xml METRICS_FILE=$METRICS_FILE + - | + # Push git notes. + METRICS_FILE=$METRICS_FILE .gitlab/push-test-metrics.sh dependencies: [] artifacts: reports: @@ -121,12 +131,14 @@ validate-x86_64-darwin: ac_cv_func_clock_gettime: "no" LANG: "en_US.UTF-8" CONFIGURE_ARGS: --with-intree-gmp + TEST_ENV: "x86_64-darwin" before_script: - git clean -xdf && git submodule foreach git clean -xdf - python3 .gitlab/fix-submodules.py - git submodule sync --recursive - git submodule update --init --recursive - git checkout .gitmodules + - "git fetch https://gitlab.haskell.org/ghc/ghc-performance-notes.git refs/notes/perf:refs/notes/perf || true" - bash .gitlab/darwin-init.sh - PATH="`pwd`/toolchain/bin:$PATH" @@ -151,6 +163,7 @@ validate-x86_64-darwin: - git submodule sync --recursive - git submodule update --init --recursive - git checkout .gitmodules + - "git fetch https://gitlab.haskell.org/ghc/ghc-performance-notes.git refs/notes/perf:refs/notes/perf || true" - bash .circleci/prepare-system.sh # workaround for docker permissions @@ -168,6 +181,8 @@ validate-aarch64-linux-deb9: stage: full-build image: ghcci/aarch64-linux-deb9:0.1 allow_failure: true + variables: + TEST_ENV: "aarch64-linux-deb9" artifacts: when: always expire_in: 2 week @@ -192,6 +207,8 @@ validate-i386-linux-deb9: stage: full-build image: ghcci/i386-linux-deb9:0.1 allow_failure: true + variables: + TEST_ENV: "i386-linux-deb9" artifacts: when: always expire_in: 2 week @@ -205,6 +222,7 @@ nightly-i386-linux-deb9: allow_failure: true variables: TEST_TYPE: slowtest + TEST_ENV: "i386-linux-deb9" artifacts: when: always expire_in: 2 week @@ -221,6 +239,7 @@ validate-x86_64-linux-deb9-debug: image: ghcci/x86_64-linux-deb9:0.2 variables: BUILD_FLAVOUR: devel2 + TEST_ENV: "x86_64-linux-deb9-debug" cache: key: linux-x86_64-deb9 @@ -228,6 +247,8 @@ validate-x86_64-linux-deb9: extends: .validate-linux stage: build image: ghcci/x86_64-linux-deb9:0.2 + variables: + TEST_ENV: "x86_64-linux-deb9" artifacts: when: always expire_in: 2 week @@ -251,6 +272,7 @@ validate-x86_64-linux-deb9-llvm: image: ghcci/x86_64-linux-deb9:0.2 variables: BUILD_FLAVOUR: perf-llvm + TEST_ENV: "x86_64-linux-deb9-llvm" cache: key: linux-x86_64-deb9 @@ -258,6 +280,8 @@ validate-x86_64-linux-deb8: extends: .validate-linux stage: full-build image: ghcci/x86_64-linux-deb8:0.1 + variables: + TEST_ENV: "x86_64-linux-deb8" cache: key: linux-x86_64-deb8 artifacts: @@ -268,6 +292,8 @@ validate-x86_64-linux-fedora27: extends: .validate-linux stage: full-build image: ghcci/x86_64-linux-fedora27:0.1 + variables: + TEST_ENV: "x86_64-linux-fedora27" cache: key: linux-x86_64-fedora27 artifacts: @@ -279,6 +305,7 @@ validate-x86_64-linux-deb9-integer-simple: stage: full-build variables: INTEGER_LIBRARY: integer-simple + TEST_ENV: "x86_64-linux-deb9-integer-simple" image: ghcci/x86_64-linux-deb9:0.2 cache: key: linux-x86_64-deb9 @@ -299,6 +326,7 @@ validate-x86_64-linux-deb9-unreg: stage: full-build variables: CONFIGURE_ARGS: --enable-unregisterised + TEST_ENV: "x86_64-linux-deb9-unreg" image: ghcci/x86_64-linux-deb9:0.2 cache: key: linux-x86_64-deb9 @@ -324,6 +352,7 @@ validate-x86_64-linux-deb9-unreg: - git submodule sync --recursive - git submodule update --init --recursive - git checkout .gitmodules + - "git fetch https://gitlab.haskell.org/ghc/ghc-performance-notes.git refs/notes/perf:refs/notes/perf || true" - bash .gitlab/win32-init.sh after_script: - rd /s /q tmp diff --git a/.gitlab/push-test-metrics.sh b/.gitlab/push-test-metrics.sh new file mode 100755 index 0000000000..302587721f --- /dev/null +++ b/.gitlab/push-test-metrics.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# vim: sw=2 et +set -euo pipefail + +NOTES_ORIGIN="git@gitlab.haskell.org:ghc/ghc-performance-notes.git" +REF="perf" + +fail() { + echo "ERROR: $*" >&2 + exit 1 +} + +# Check that private key is available (Set on all GitLab protected branches). +if [ -z ${PERF_NOTE_KEY+"$PERF_NOTE_KEY"} ] +then + echo "Not pushing performance git notes: PERF_NOTE_KEY is not set." + exit 0 +fi + +# TEST_ENV must be set. +if [ -z ${TEST_ENV+"$TEST_ENV"} ] +then + fail "Not pushing performance git notes: TEST_ENV must be set." +fi + +# Assert that the METRICS_FILE exists and can be read. +if [ -z ${METRICS_FILE+"$METRICS_FILE"} ] +then + fail "\$METRICS_FILE not set." +fi +if ! [ -r $METRICS_FILE ] +then + fail "Metrics file not found: $METRICS_FILE" +fi + +# Add gitlab as a known host. +mkdir -p ~/.ssh +echo "|1|+AUrMGS1elvPeLNt+NHGa5+c6pU=|4XvfRsQftO1OgZD4c0JJ7oNaii8= ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDXilA5l4kOZPx0nM6xDATF+t4fS6te0eYPDwBI/jLWD9cJVtCnsrwMl5ar+/NfmcD0jnCYztUiVHuXyTaWPJYSQpwltfpTeqpo9/z/0MxkPtSl1uMP2cLbDiqA01OWveChktOXwU6hRQ+7MmO+dNRS/iXrRmYrGv/p1W811QgLBLS9fefEdF25n+0dP71L7Ov7riOawlDmd0C11FraE/R8HX6gs6lbXta1kisdxGyKojYSiCtobUaJxRoatMfUP0a9rwTAyl8tf56LgB+igjMky879VAbL7eQ/AmfHYPrSGJ/YlWP6Jj23Dnos5nOVlWL/rVTs9Y/NakLpPwMs75KTC0Pd74hdf2e3folDdAi2kLrQgO2SI6so7rOYZ+mFkCM751QdDVy4DzjmDvSgSIVf9SV7RQf7e7unE7pSZ/ILupZqz9KhR1MOwVO+ePa5qJMNSdC204PIsRWkIO5KP0QLl507NI9Ri84+aODoHD7gDIWNhU08J2P8/E6r0wcC8uWaxh+HaOjI9BkHjqRYsrgfn54BAuO9kw1cDvyi3c8n7VFlNtvQP15lANwim3gr9upV+r95KEPJCgZMYWJBDPIVtp4GdYxCfXxWj5oMXbA5pf0tNixwNJjAsY7I6RN2htHbuySH36JybOZk+gCj6mQkxpCT/tKaUn14hBJWLq7Q+Q==" >> ~/.ssh/known_hosts +echo "|1|JZkdAPJmpX6SzGeqhmQLfMWLGQA=|4vTELroOlbFxbCr0WX+PK9EcpD0= ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJknufU+I6A5Nm58lmse4/o11Ai2UzYbYe7782J1+kRk" >> ~/.ssh/known_hosts + +# Setup ssh keys. +eval `ssh-agent` +echo "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDJPR1vrZgeGTXmgJw2PsJfMjf22LcDnVVwt3l0rwTZ+8Q2J0bHaYxMRKBco1sON6LGcZepw0Hy76RQ87v057pTz18SXvnfE7U/B6v9qBk0ILJz+4BOX9sEhxu2XmScp/wMxkG9IoyruMlsxXzd1sz09o+rzzx24U2Rp27PRm08vG0oipve6BWLbYEqYrE4/nCufqOJmGd56fju7OTU0lTpEkGDEDWGMxutaX2CbTbDju7qy07Ld8BjSc9aHfvuQaslUbj3ex3EF8EXahURzGpHQn/UFFzVGMokFumiJCAagHQb7cj6jOkKseZLaysbA/mTBQsOzjWiRmkN23bQf1wF ben+ghc-ci@smart-cactus.org" > ~/.ssh/perf_rsa.pub +touch ~/.ssh/perf_rsa +chmod 0600 ~/.ssh/perf_rsa +echo "$PERF_NOTE_KEY" >> ~/.ssh/perf_rsa +ssh-add ~/.ssh/perf_rsa + +# Reset the git notes and append the metrics file to the notes, then push and return the result. +# This is favoured over a git notes merge as it avoids potential data loss/duplication from the merge strategy. +function reset_append_note_push { + git fetch -f $NOTES_ORIGIN refs/notes/$REF:refs/notes/$REF || true + echo "git notes --ref=$REF append -F $METRICS_FILE HEAD" + git notes --ref=$REF append -F $METRICS_FILE HEAD + echo "git push $NOTES_ORIGIN refs/notes/$REF" + git push $NOTES_ORIGIN refs/notes/$REF +} + +# Push the metrics file as a git note. This may fail if another task pushes a note first. In that case +# the latest note is fetched and appended. +MAX_RETRY=20 +until reset_append_note_push || [ $MAX_RETRY -le 0 ] +do + ((MAX_RETRY--)) + echo "" + echo "Failed to push git notes. Fetching, appending, and retrying... $MAX_RETRY retries left." +done diff --git a/testsuite/driver/perf_notes.py b/testsuite/driver/perf_notes.py index 6d80e07e12..365732ca69 100644 --- a/testsuite/driver/perf_notes.py +++ b/testsuite/driver/perf_notes.py @@ -13,6 +13,7 @@ import argparse import re import subprocess import time +import sys from collections import namedtuple from math import ceil, trunc @@ -41,7 +42,7 @@ def is_worktree_dirty(): # The metrics (a.k.a stats) are named tuples, PerfStat, in this form: # # ( test_env : 'val', # Test environment. -# test : 'val', # Name of the test +# test : 'val', # Name of the test # way : 'val', # metric : 'val', # Metric being recorded # value : 'val', # The statistic result e.g. runtime @@ -50,6 +51,9 @@ def is_worktree_dirty(): # All the fields of a metric (excluding commit field). PerfStat = namedtuple('PerfStat', ['test_env','test','way','metric','value']) +# A baseline recovered form stored metrics. +Baseline = namedtuple('Baseline', ['perfStat','commit','commitDepth']) + class MetricChange: NewMetric = 'NewMetric' NoChange = 'NoChange' @@ -73,6 +77,21 @@ def get_perf_stats(commit='HEAD', namespace='perf'): log = [parse_perf_stat(stat_str) for stat_str in log] return log +# Check if a str is in a 40 character git commit hash. +# str -> bool +_commit_hash_re = re.compile('[0-9a-f]' * 40) +def is_commit_hash(hash): + return _commit_hash_re.fullmatch(hash) != None + +# Convert a <ref> to a commit hash code. +# str -> str +def commit_hash(commit): + if is_commit_hash(commit): + return commit + return subprocess.check_output(['git', 'rev-parse', commit], \ + stderr=subprocess.STDOUT) \ + .decode() \ + .strip() # Get allowed changes to performance. This is extracted from the commit message of # the given commit in this form: @@ -83,13 +102,20 @@ def get_perf_stats(commit='HEAD', namespace='perf'): # 'metrics': ['metricA', 'metricB', ...], # 'opts': { # 'optionA': 'string value', -# 'optionB': 'string value', +# 'optionB': 'string value', # e.g. test_env: "x86_64-linux" # ... # } # } +_get_allowed_perf_changes_cache = {} def get_allowed_perf_changes(commit='HEAD'): - commitByteStr = subprocess.check_output(['git', '--no-pager', 'log', '-n1', '--format=%B', commit]) - return parse_allowed_perf_changes(commitByteStr.decode()) + global _get_allowed_perf_changes_cache + commit = commit_hash(commit) + if not commit in _get_allowed_perf_changes_cache: + commitByteStr = subprocess.check_output(\ + ['git', '--no-pager', 'log', '-n1', '--format=%B', commit]) + _get_allowed_perf_changes_cache[commit] \ + = parse_allowed_perf_changes(commitByteStr.decode()) + return _get_allowed_perf_changes_cache[commit] def parse_allowed_perf_changes(commitMsg): # Helper regex. Non-capturing unless postfixed with Cap. @@ -102,7 +128,7 @@ def parse_allowed_perf_changes(commitMsg): exp = (r"^Metric" +s+r"(Increase|Decrease)" +s+r"?("+qstr+r"|"+qstrList+r")?" # Metric or list of metrics.s.. - +s+r"?(\(" + r"(?:[^')]|"+qstr+r")*" + r"\))?" # Options surounded in parenthesis. (allow parenthases in quoted strings)) + +s+r"?(\(" + r"(?:[^')]|"+qstr+r")*" + r"\))?" # Options surrounded in parenthesis. (allow parenthases in quoted strings) +s+r"?:?" # Optional ":" +s+r"?((?:(?!\n\n)(?!\n[^\s])(?:.|\n))*)" # Test names. Stop parsing on empty or non-indented new line. ) @@ -213,19 +239,191 @@ def append_perf_stat(stats, commit='HEAD', namespace='perf', max_tries=5): tries += 1 time.sleep(1) - print("\nAn error occured while writing the performance metrics to git notes.\n \ - This is usually due to a lock-file existing somewhere in the git repo.") + print("\nAn error occurred while writing the performance metrics to git notes.\n \ + This is usually due to a lock-file existing somewhere in the git repo.") return False +# +# Baseline calculation +# + +# Max number of ancestor commits to search when compiling a baseline performance metric. +BaselineSearchDepth = 75 + +# The git notes name space for local results. +LocalNamespace = "perf" + +# The git notes name space for ci results. +CiNamespace = "ci/" + LocalNamespace + +# (isCalculated, best fit ci test_env or None) +BestFitCiTestEnv = (False, None) + +# test_env string or None +def best_fit_ci_test_env(): + global BestFitCiTestEnv + if not BestFitCiTestEnv[0]: + platform = sys.platform + isArch64 = sys.maxsize > 2**32 + arch = "x86_64" if isArch64 else "i386" + + if platform.startswith("linux"): + test_env = arch + "-linux-deb9" + elif platform.startswith("win32"): + # There are no windows CI test results. + test_env = None + elif isArch64 and platform.startswith("darwin"): + test_env = arch + "-darwin" + elif isArch64 and platform.startswith("freebsd"): + test_env = arch + "-freebsd" + else: + test_env = None + + BestFitCiTestEnv = (True, test_env) + + return BestFitCiTestEnv[1] + +_baseline_depth_commit_log = {} + +# Get the commit hashes for the last BaselineSearchDepth commits from and +# including the input commit. The output commits are all commit hashes. +# str -> [str] +def baseline_commit_log(commit): + global _baseline_depth_commit_log + commit = commit_hash(commit) + if not commit in _baseline_depth_commit_log: + _baseline_depth_commit_log[commit] = \ + subprocess.check_output(['git', 'log', '--format=%H', \ + '-n' + str(BaselineSearchDepth)]) \ + .decode().split('\n') + return _baseline_depth_commit_log[commit] + +# Cache of baseline values. This is a dict of dicts indexed on: +# (useCiNamespace, commit) -> (test_env, test, metric, way) -> baseline +# (bool , str ) -> (str , str , str , str) -> float +_commit_metric_cache = {} + +# Get the baseline (expected value) of a test at a given commit. This searches +# git notes from older commits for recorded metrics (locally and from ci). More +# recent commits are favoured, then local results over ci results are favoured. +# +# commit: str - must be a commit hash (see commit_has()) +# name: str - test name +# test_env: str - test environment (note a best fit test_env will be used +# instead when looking for ci results) +# metric: str - test metric +# way: str - test way +# returns: the Baseline named tuple or None if no metric was found within +# BaselineSearchDepth commits and since the last expected change. +def baseline_metric(commit, name, test_env, metric, way): + # For performance reasons (in order to avoid calling commit_hash), we assert + # commit is already a commit hash. + assert is_commit_hash(commit) + + # Get all recent commit hashes. + commit_hashes = baseline_commit_log(commit) + def depth_to_commit(depth): + return commit_hashes[depth] + + def has_expected_change(commit): + return get_allowed_perf_changes(commit).get(name) \ + != None + + # Bool -> String + def namespace(useCiNamespace): + return CiNamespace if useCiNamespace else LocalNamespace + + ci_test_env = best_fit_ci_test_env() + + # gets the metric of a given commit + # (Bool, Int) -> (float | None) + def commit_metric(useCiNamespace, depth): + global _commit_metric_cache + currentCommit = depth_to_commit(depth) + + # Get test environment. + effective_test_env = ci_test_env if useCiNamespace else test_env + if effective_test_env == None: + # This can happen when no best fit ci test is found. + return None + + # Check for cached value. + cacheKeyA = (useCiNamespace, currentCommit) + cacheKeyB = (effective_test_env, name, metric, way) + if cacheKeyA in _commit_metric_cache: + return _commit_metric_cache[cacheKeyA].get(cacheKeyB) + + # Cache miss. + # Calculate baselines from the current commit's git note. + # Note that the git note may contain data for other tests. All tests' + # baselines will be collected and cached for future use. + allCommitMetrics = get_perf_stats( + currentCommit, + namespace(useCiNamespace)) + + # Collect recorded values by cacheKeyB. + values_by_cache_key_b = {} + for perfStat in allCommitMetrics: + currentCacheKey = (perfStat.test_env, perfStat.test, \ + perfStat.metric, perfStat.way) + currentValues = values_by_cache_key_b.setdefault(currentCacheKey, []) + currentValues.append(float(perfStat.value)) + + # Calculate and baseline (average of values) by cacheKeyB. + baseline_by_cache_key_b = {} + for currentCacheKey, currentValues in values_by_cache_key_b.items(): + baseline_by_cache_key_b[currentCacheKey] = Baseline( \ + PerfStat( \ + currentCacheKey[0], + currentCacheKey[1], + currentCacheKey[3], + currentCacheKey[2], + sum(currentValues) / len(currentValues)), + currentCommit, + depth) + + # Save baselines to the cache. + _commit_metric_cache[cacheKeyA] = baseline_by_cache_key_b + return baseline_by_cache_key_b.get(cacheKeyB) + + # Searches through previous commits trying local then ci for each commit in. + def search(useCiNamespace, depth): + # Stop if reached the max search depth, or if + # there is an expected change at the child commit (depth-1). This is a + # subtlety: Metrics recorded on commit x incorporate the expected + # changes for commit x. Hence metrics from x are still a valid baseline, + # while older commits are not. This is why we check for expected changes + # on depth-1 rather than depth. + if depth >= BaselineSearchDepth or has_expected_change( \ + depth_to_commit(depth - 1)): + return None + + # Check for a metric on this commit. + current_metric = commit_metric(useCiNamespace, depth) + if current_metric != None: + return current_metric + + # Metric is not available. + # If tried local, now try CI. Else move to the parent commit. + if not useCiNamespace: + return search(True, depth) + else: + return search(False, depth + 1) + + # Start search from parent commit using local name space. + return search(False, 1) + + # Check test stats. This prints the results for the user. # actual: the PerfStat with actual value. -# expected_val: the expected value (this should generally be derived from get_perf_stats()) +# baseline: the expected Baseline value (this should generally be derived from baseline_metric()) # tolerance_dev: allowed deviation of the actual value from the expected value. # allowed_perf_changes: allowed changes in stats. This is a dictionary as returned by get_allowed_perf_changes(). # force_print: Print stats even if the test stat was in the tolerance range. # Returns a (MetricChange, pass/fail object) tuple. Passes if the stats are withing the expected value ranges. -def check_stats_change(actual, expected_val, tolerance_dev, allowed_perf_changes = {}, force_print = False): +def check_stats_change(actual, baseline, tolerance_dev, allowed_perf_changes = {}, force_print = False): + expected_val = baseline.perfStat.value full_name = actual.test + ' (' + actual.way + ')' lowerBound = trunc( int(expected_val) * ((100 - float(tolerance_dev))/100)) @@ -256,7 +454,8 @@ def check_stats_change(actual, expected_val, tolerance_dev, allowed_perf_changes # Print errors and create pass/fail object. result = passed() if not change_allowed: - error = change + ' not allowed' + error = change + ' from ' + baseline.perfStat.test_env + \ + ' baseline @ HEAD~' + str(baseline.commitDepth) print(actual.metric, error + ':') result = failBecause('stat ' + error, tag='stat') diff --git a/testsuite/driver/runtests.py b/testsuite/driver/runtests.py index 247a5cc330..f61ebbc6b1 100644 --- a/testsuite/driver/runtests.py +++ b/testsuite/driver/runtests.py @@ -379,18 +379,36 @@ else: new_metrics = [metric for (change, metric) in t.metrics if change == MetricChange.NewMetric] if any(new_metrics): if canGitStatus: - reason = 'the previous git commit doesn\'t have recorded metrics for the following tests.' + \ - ' If the tests exist on the previous commit, then check it out and run the tests to generate the missing metrics.' + reason = 'a baseline (expected value) cannot be recovered from' + \ + ' previous git commits. This may be due to HEAD having' + \ + ' new tests or having expected changes, the presence of' + \ + ' expected changes since the last run of the tests, and/or' + \ + ' the latest test run being too old.' + fix = 'If the tests exist on the previous' + \ + ' commit (And are configured to run with the same ways),' + \ + ' then check out that commit and run the tests to generate' + \ + ' the missing metrics. Alternatively, a baseline may be' + \ + ' recovered from ci results once fetched:\n\n' + \ + spacing + 'git fetch ' + \ + 'https://gitlab.haskell.org/ghc/ghc-performance-notes.git' + \ + ' refs/notes/perf:refs/notes/' + Perf.CiNamespace else: - reason = 'this is not a git repo so the previous git commit\'s metrics cannot be loaded from git notes:' + reason = "this is not a git repo so the previous git commit's" + \ + " metrics cannot be loaded from git notes:" + fix = "" print() - print(str_warn('New Metrics') + ' these metrics trivially pass because ' + reason) - print(spacing + ('\n' + spacing).join(set([metric.test for metric in new_metrics]))) + print(str_warn('Missing Baseline Metrics') + \ + ' these metrics trivially pass because ' + reason) + print(spacing + (' ').join(set([metric.test for metric in new_metrics]))) + if fix != "": + print() + print(fix) # Inform of how to accept metric changes. if (len(t.unexpected_stat_failures) > 0): print() - print(str_info("Some stats have changed") + " If this is expected, allow changes by appending the git commit message with this:") + print(str_info("Some stats have changed") + " If this is expected, " + \ + "allow changes by appending the git commit message with this:") print('-' * 25) print(Perf.allow_changes_string(t.metrics)) print('-' * 25) @@ -406,8 +424,9 @@ else: elif canGitStatus and any(stats): if is_worktree_dirty(): print() - print(str_warn('Working Tree is Dirty') + ' performance metrics will not be saved.' + \ - ' Commit changes or use --metrics-file to save metrics to a file.') + print(str_warn('Performance Metrics NOT Saved') + \ + ' working tree is dirty. Commit changes or use ' + \ + '--metrics-file to save metrics to a file.') else: Perf.append_perf_stat(stats) diff --git a/testsuite/driver/testglobals.py b/testsuite/driver/testglobals.py index de1d1e660a..b3c9a55131 100644 --- a/testsuite/driver/testglobals.py +++ b/testsuite/driver/testglobals.py @@ -250,11 +250,21 @@ class TestOptions: # extra files to copy to the testdir self.extra_files = [] - # Map from metric to expectected value and allowed percentage deviation. e.g. - # { 'bytes allocated': (9300000000, 10) } - # To allow a 10% deviation from 9300000000 for the 'bytes allocated' metric. + # Map from metric to (function from way and commit to baseline value, allowed percentage deviation) e.g. + # { 'bytes allocated': ( + # lambda way commit: + # ... + # if way1: return None ... + # elif way2:return 9300000000 ... + # ... + # , 10) } + # This means no baseline is available for way1. For way 2, allow a 10% + # deviation from 9300000000. self.stats_range_fields = {} + # Is the test testing performance? + self.is_stats_test = False + # Does this test the compiler's performance as opposed to the generated code. self.is_compiler_stats_test = False diff --git a/testsuite/driver/testlib.py b/testsuite/driver/testlib.py index 710800b9f0..dd66340e5f 100644 --- a/testsuite/driver/testlib.py +++ b/testsuite/driver/testlib.py @@ -65,7 +65,7 @@ def isCompilerStatsTest(): def isStatsTest(): opts = getTestOpts() - return bool(opts.stats_range_fields) + return opts.is_stats_test # This can be called at the top of a file of tests, to set default test options @@ -348,29 +348,18 @@ def testing_metrics(): # measures the performance numbers of the compiler. # As this is a fairly rare case in the testsuite, it defaults to false to # indicate that it is a 'normal' performance test. -def _collect_stats(name, opts, metric, deviation, is_compiler_stats_test=False): +def _collect_stats(name, opts, metrics, deviation, is_compiler_stats_test=False): if not re.match('^[0-9]*[a-zA-Z][a-zA-Z0-9._-]*$', name): failBecause('This test has an invalid name.') - tests = Perf.get_perf_stats('HEAD^') - - # Might have multiple metrics being measured for a single test. - test = [t for t in tests if t.test == name] - - if tests == [] or test == []: - # There are no prior metrics for this test. - if isinstance(metric, str): - if metric == 'all': - for field in testing_metrics(): - opts.stats_range_fields[field] = None - else: - opts.stats_range_fields[metric] = None - if isinstance(metric, list): - for field in metric: - opts.stats_range_fields[field] = None - - return + # Normalize metrics to a list of strings. + if isinstance(metrics, str): + if metrics == 'all': + metrics = testing_metrics() + else: + metrics = [metrics] + opts.is_stats_test = True if is_compiler_stats_test: opts.is_compiler_stats_test = True @@ -379,24 +368,12 @@ def _collect_stats(name, opts, metric, deviation, is_compiler_stats_test=False): if config.compiler_debugged and is_compiler_stats_test: opts.skip = 1 - # get the average value of the given metric from test - def get_avg_val(metric_2): - metric_2_metrics = [float(t.value) for t in test if t.metric == metric_2] - return sum(metric_2_metrics) / len(metric_2_metrics) - - # 'all' is a shorthand to test for bytes allocated, peak megabytes allocated, and max bytes used. - if isinstance(metric, str): - if metric == 'all': - for field in testing_metrics(): - opts.stats_range_fields[field] = (get_avg_val(field), deviation) - return - else: - opts.stats_range_fields[metric] = (get_avg_val(metric), deviation) - return + for metric in metrics: + def baselineByWay(way, target_commit, metric=metric): + return Perf.baseline_metric( \ + target_commit, name, config.test_env, metric, way) - if isinstance(metric, list): - for field in metric: - opts.stats_range_fields[field] = (get_avg_val(field), deviation) + opts.stats_range_fields[metric] = (baselineByWay, deviation) # ----- @@ -1179,10 +1156,11 @@ def metric_dict(name, way, metric, value): # name: name of the test. # way: the way. # stats_file: the path of the stats_file containing the stats for the test. -# range_fields +# range_fields: see TestOptions.stats_range_fields # Returns a pass/fail object. Passes if the stats are withing the expected value ranges. # This prints the results for the user. def check_stats(name, way, stats_file, range_fields): + head_commit = Perf.commit_hash('HEAD') result = passed() if range_fields: try: @@ -1192,7 +1170,7 @@ def check_stats(name, way, stats_file, range_fields): stats_file_contents = f.read() f.close() - for (metric, range_val_dev) in range_fields.items(): + for (metric, baseline_and_dev) in range_fields.items(): field_match = re.search('\("' + metric + '", "([0-9]+)"\)', stats_file_contents) if field_match == None: print('Failed to find metric: ', metric) @@ -1205,14 +1183,15 @@ def check_stats(name, way, stats_file, range_fields): change = None # If this is the first time running the benchmark, then pass. - if range_val_dev == None: + baseline = baseline_and_dev[0](way, head_commit) + if baseline == None: metric_result = passed() change = MetricChange.NewMetric else: - (expected_val, tolerance_dev) = range_val_dev + tolerance_dev = baseline_and_dev[1] (change, metric_result) = Perf.check_stats_change( perf_stat, - expected_val, + baseline, tolerance_dev, config.allowed_perf_changes, config.verbose >= 4) @@ -1348,8 +1327,13 @@ def simple_run(name, way, prog, extra_run_opts): my_rts_flags = rts_flags(way) + # Collect stats if necessary: + # isStatsTest and not isCompilerStatsTest(): + # assume we are running a ghc compiled program. Collect stats. + # isStatsTest and way == 'ghci': + # assume we are running a program via ghci. Collect stats stats_file = name + '.stats' - if isStatsTest() and not isCompilerStatsTest(): + if isStatsTest() and (not isCompilerStatsTest() or way == 'ghci'): stats_args = ' +RTS -V0 -t' + stats_file + ' --machine-readable -RTS' else: stats_args = '' diff --git a/testsuite/driver/testutil.py b/testsuite/driver/testutil.py index d5bd2f33c3..b73204fe43 100644 --- a/testsuite/driver/testutil.py +++ b/testsuite/driver/testutil.py @@ -19,7 +19,7 @@ def strip_quotes(s): return s.strip('\'"') def str_fail(s): - return '\033[1m\033[43m\033[31m' + s + '\033[0m' + return '\033[1m\033[31m' + s + '\033[0m' def str_pass(s): return '\033[1m\033[32m' + s + '\033[0m' |