diff options
-rw-r--r-- | .circleci/config.yml | 43 | ||||
-rwxr-xr-x | .circleci/push-test-metrics.sh | 46 | ||||
-rw-r--r-- | libraries/base/tests/all.T | 18 | ||||
-rw-r--r-- | testsuite/driver/README.md | 133 | ||||
-rw-r--r-- | testsuite/driver/perf_notes.py | 382 | ||||
-rw-r--r-- | testsuite/driver/runtests.py | 71 | ||||
-rw-r--r-- | testsuite/driver/testglobals.py | 33 | ||||
-rw-r--r-- | testsuite/driver/testlib.py | 232 | ||||
-rw-r--r-- | testsuite/driver/testutil.py | 18 | ||||
-rw-r--r-- | testsuite/mk/test.mk | 12 | ||||
-rw-r--r-- | testsuite/tests/callarity/perf/all.T | 12 | ||||
-rw-r--r-- | testsuite/tests/deriving/perf/all.T | 11 | ||||
-rw-r--r-- | testsuite/tests/perf/compiler/all.T | 1056 | ||||
-rw-r--r-- | testsuite/tests/perf/haddock/all.T | 184 | ||||
-rw-r--r-- | testsuite/tests/perf/join_points/all.T | 16 | ||||
-rw-r--r-- | testsuite/tests/perf/should_run/all.T | 374 | ||||
-rw-r--r-- | testsuite/tests/perf/space_leaks/all.T | 75 | ||||
-rw-r--r-- | testsuite/tests/pmcheck/should_compile/all.T | 27 | ||||
-rw-r--r-- | testsuite/tests/primops/should_run/all.T | 6 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/all.T | 3 | ||||
-rw-r--r-- | testsuite/tests/simplStg/should_run/all.T | 5 |
21 files changed, 1020 insertions, 1737 deletions
diff --git a/.circleci/config.yml b/.circleci/config.yml index f35690124b..f80b2b321b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,7 +18,7 @@ aliases: # ideally we would simply set THREADS here instead of re-detecting it every # time we need it below. Unfortunately, there is no way to set an environment # variable with the result of a shell script. - SKIP_PERF_TESTS: YES + SKIP_PERF_TESTS: NO VERBOSE: 2 - &boot run: @@ -32,6 +32,12 @@ aliases: include mk/flavours/\$(BuildFlavour).mk endif EOF + - &set_git_identity + run: + name: Set Git Identity + command: | + git config user.email "ghc-circleci@haskell.org" + git config user.name "GHC CircleCI" - &configure_unix run: name: Configure @@ -64,10 +70,16 @@ aliases: name: Test command: | mkdir -p test-results - make test THREADS=`mk/detect-cpu-count.sh` SKIP_PERF_TESTS=YES JUNIT_FILE=../../test-results/junit.xml + METRICS_FILE=$(mktemp) + echo "export METRICS_FILE=$METRICS_FILE" >> $BASH_ENV + make test THREADS=`mk/detect-cpu-count.sh` SKIP_PERF_TESTS=$SKIP_PERF_TESTS TEST_ENV=$TEST_ENV JUNIT_FILE=../../test-results/junit.xml METRICS_FILE=$METRICS_FILE - &store_test_results store_test_results: path: test-results + - &push_perf_note + run: + name: Push Performance Git Notes + command: .circleci/push-test-metrics.sh - &slowtest run: name: Full Test @@ -102,8 +114,10 @@ jobs: environment: <<: *buildenv GHC_COLLECTOR_FLAVOR: x86_64-linux + TEST_ENV: x86_64-linux steps: - checkout + - *set_git_identity - *prepare - *submodules - *boot @@ -113,6 +127,7 @@ jobs: - *storeartifacts - *test - *store_test_results + - *push_perf_note "validate-x86_64-freebsd": resource_class: xlarge @@ -122,8 +137,10 @@ jobs: TARGET: FreeBSD <<: *buildenv GHC_COLLECTOR_FLAVOR: x86_64-freebsd + TEST_ENV: x86_64-freebsd steps: - checkout + - *set_git_identity - *prepare - *submodules - *boot @@ -133,6 +150,7 @@ jobs: - *storeartifacts - *test - *store_test_results + - *push_perf_note "validate-x86_64-darwin": macos: @@ -147,8 +165,10 @@ jobs: # Build with in-tree GMP since this isn't available on OS X by default. CONFIGURE_OPTS: --with-intree-gmp <<: *buildenv + TEST_ENV: x86_64-darwin steps: - checkout + - *set_git_identity - *prepare - *submodules - *boot @@ -158,6 +178,7 @@ jobs: - *storeartifacts - *test - *store_test_results + - *push_perf_note "validate-hadrian-x86_64-linux": resource_class: xlarge @@ -167,6 +188,7 @@ jobs: <<: *buildenv steps: - checkout + - *set_git_identity - *prepare - *submodules - *boot @@ -179,8 +201,10 @@ jobs: - image: ghcci/x86_64-linux:0.0.4 environment: <<: *buildenv + TEST_ENV: x86_64-linux-unreg steps: - checkout + - *set_git_identity - *prepare - *submodules - *boot @@ -188,6 +212,7 @@ jobs: - *make - *test - *store_test_results + - *push_perf_note "validate-x86_64-linux-llvm": resource_class: xlarge @@ -196,6 +221,7 @@ jobs: environment: <<: *buildenv BUILD_FLAVOUR: perf-llvm + TEST_ENV: x86_64-linux-llvm steps: - run: name: Install LLVM @@ -206,12 +232,14 @@ jobs: name: Verify that llc works command: llc - checkout + - *set_git_identity - *prepare - *submodules - *boot - *configure_unix - *make - *test + - *push_perf_note # Nightly build with -DDEBUG using devel2 flavour "validate-x86_64-linux-debug": @@ -221,8 +249,11 @@ jobs: environment: BUILD_FLAVOUR: devel2 <<: *buildenv + TEST_ENV: x86_64-linux-debug + SKIP_PERF_TESTS: YES steps: - checkout + - *set_git_identity - *prepare - *submodules - *boot @@ -230,6 +261,7 @@ jobs: - *make - *test - *store_test_results + - *push_perf_note "validate-i386-linux": resource_class: xlarge @@ -238,8 +270,10 @@ jobs: environment: <<: *buildenv GHC_COLLECTOR_FLAVOR: i386-linux + TEST_ENV: i386-linux steps: - checkout + - *set_git_identity - *prepare - *submodules - *boot @@ -249,6 +283,7 @@ jobs: - *storeartifacts - *test - *store_test_results + - *push_perf_note "validate-x86_64-fedora": resource_class: xlarge @@ -257,8 +292,10 @@ jobs: environment: <<: *buildenv GHC_COLLECTOR_FLAVOR: x86_64-fedora + TEST_ENV: x86_64-fedora steps: - checkout + - *set_git_identity - *prepare - *submodules - *boot @@ -268,6 +305,7 @@ jobs: - *storeartifacts - *test - *store_test_results + - *push_perf_note "slow-validate-x86_64-linux": resource_class: xlarge @@ -285,6 +323,7 @@ jobs: - *make - *slowtest - *store_test_results + - *push_perf_note workflows: version: 2 diff --git a/.circleci/push-test-metrics.sh b/.circleci/push-test-metrics.sh new file mode 100755 index 0000000000..4ea6958d99 --- /dev/null +++ b/.circleci/push-test-metrics.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# vim: sw=2 et +set -euo pipefail + +fail() { + echo "ERROR: $*" >&2 + exit 1 +} + +GHC_ORIGIN=git@git.haskell.org:ghc + +# Add git.haskell.org as a known host. +echo "|1|F3mPVCE55+KfApNIMYQ3Dv39sGE=|1bRkvJEJhAN2R0LE/lAjFCEJGl0= ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBBUZS9jGBkE5UzpSo6irnIgcQcfzvbuIOsFc8+N61FwtZncRntbaKPuUimOFPgeaUZLl6Iajz6IIs7aduU0/v+I=" >> ~/.ssh/known_hosts +echo "|1|2VUMjYSRVpT2qJPA0rA9ap9xILY=|5OThkI4ED9V0J+Es7D5FOD55Klk= ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC+3TLluLAO4lkW60W+N2DFkS+WoRFGqLwHzgd1ifxG9TIm31wChPY3E/hgMnJmgGqWCF4UDUemmyCycEaL7FtKfzjTAclg9EfpQnozyE3T5hIo2WL7SN5O8ttG/bYGuDnn14jLnWwJyN4oz/znWFiDG9e2Oc9YFNlQ+PK8ae5xR4gqBB7EOoj9J1EiPqG2OXRr5Mei3TLsRDU6fnz/e4oFJpKWWeN6M63oePv0qoaGjxcrATZUWsuWrxVMmYo9kP1xRuFJbAUw2m4uVP+793SW1zxySi1HBMtJG+gCDdZZSwYbkV1hassLWBHv1qPttncfX8Zek3Z3VolaTmfWJTo9" >> ~/.ssh/known_hosts + +# Check that a git notes dont already exist. +# This is a percausion as we reset refs/notes/perf and we want to avoid data loss. +if [ $(git notes --ref=perf list | wc -l) -ne 0 ] +then + fail "Found an existing git note on HEAD. Expected no git note." +fi + +# Assert that the METRICS_FILE exists and can be read. +if [ "$METRICS_FILE" = "" ] || ! [ -r $METRICS_FILE ] +then + fail "Metrics file not found: $METRICS_FILE" +fi + +# Reset the git notes and append the metrics file to the notes, then push and return the result. +# This is favoured over a git notes merge as it avoids potential data loss/duplication from the merge strategy. +function reset_append_note_push { + git fetch -f $GHC_ORIGIN refs/notes/perf:refs/notes/perf || true + echo "git notes --ref=perf append -F $METRICS_FILE HEAD" + git notes --ref=perf append -F $METRICS_FILE HEAD + git push $GHC_ORIGIN refs/notes/perf +} + +# Push the metrics file as a git note. This may fail if another task pushes a note first. In that case +# the latest note is fetched and appended. +MAX_RETRY=20 +until reset_append_note_push || [ MAX_RETRY = 0 ] +do + ((MAX_RETRY--)) + echo "" + echo "Failed to push git notes. Fetching, appending, and retrying..." +done diff --git a/libraries/base/tests/all.T b/libraries/base/tests/all.T index 90af9020d6..aaf4aa2789 100644 --- a/libraries/base/tests/all.T +++ b/libraries/base/tests/all.T @@ -176,12 +176,7 @@ test('topHandler04', test('T8766', - [ stats_num_field('bytes allocated', - [ (wordsize(64), 16828144, 5) - # with GHC-7.6.3: 83937384 (but faster execution than the next line) - # before: 58771216 (without call-arity-analysis) - # expected value: 16828144 (2014-01-14) - , (wordsize(32), 8433644, 5) ]) + [ collect_stats('bytes allocated',5) , only_ways(['normal'])], compile_and_run, ['-O']) @@ -208,9 +203,7 @@ test('T8089', test('T8684', expect_broken(8684), compile_and_run, ['']) test('T9826',normal, compile_and_run,['']) test('T9848', - [ stats_num_field('bytes allocated', - [ (wordsize(64), 51840, 20) - , (wordsize(32), 47348, 20) ]) + [ collect_stats('bytes allocated') , only_ways(['normal'])], compile_and_run, ['-O']) @@ -223,10 +216,7 @@ test('lazySTexamples', normal, compile_and_run, ['']) test('T11760', req_smp, compile_and_run, ['-threaded -with-rtsopts=-N2']) test('T12874', normal, compile_and_run, ['']) test('T13191', - [ stats_num_field('bytes allocated', - [ (wordsize(64), 185943272, 5) ]) - # with GHC-8.1 before liftA2 change: 325065128 - # GHC-8.1 with custom liftA2: 185943272 + [ collect_stats('bytes allocated', 5) , only_ways(['normal'])], compile_and_run, ['-O']) @@ -234,7 +224,7 @@ test('T13525', when(opsys('mingw32'), skip), compile_and_run, ['']) test('T13097', normal, compile_and_run, ['']) test('functorOperators', normal, compile_and_run, ['']) test('T3474', - [stats_num_field('max_bytes_used', [ (wordsize(64), 44504, 5) ]), + [collect_stats('max_bytes_used',5), only_ways(['normal'])], compile_and_run, ['-O']) test('T14425', normal, compile_and_run, ['']) diff --git a/testsuite/driver/README.md b/testsuite/driver/README.md new file mode 100644 index 0000000000..9324fd3df6 --- /dev/null +++ b/testsuite/driver/README.md @@ -0,0 +1,133 @@ +GHC Driver Readme +================= + +Greetings and well met. If you are reading this, I can only assume that you +are likely interested in working on the testsuite in some capacity. For more +detailed documentation, please see [here][1]. + +## ToC + +1. Entry points of the testsuite performance tests +2. Quick overview of program parts +3. How to use the comparison tool +4. Important Types +5. Quick answers for "how do I do X"? + + +## Entry Points of the testsuite performance tests + +The testsuite has two main entry points depending on which perspective you +approach it. From the perspective of the test writer, the entry point is the +collect_stats function called in *.T files. This function is declared in +perf_notes.py along with its associated infrastructure. The purpose of this +function is to tell the test driver what metrics to compare when processing +the test. From the perspective of running the test-suite e.g. via make, its +entry point is the runtests.py file. That file contains the main logic for +running the individual tests, collecting information, handling failure, and +outputting the final results. + +## Overview of how the performance test bits work. +During a Haskell Summer of Code project, an intern went through and revamped +most of the performance test code, as such there have been a few changes to it +that might be unusual to anyone previously familiar with the testsuite. One of +the biggest immediate benefits is that all platform differences, compiler +differences, and things such as that are not necessary to be considered by the +test writer anymore. This is due to the fact that the test comparison relies +entirely on locally collected metrics on the testing machine. + +As such, it is perfectly sufficient to write `collect_stats('all',20)` in the +".T" files to measure the 3 potential stats that can be collected for that test +and automatically test them for regressions, failing if there is more than a 20% +change in any direction. In fact, even that is not necessary as +`collect_stats()` defaults to 'all', and 20% deviation allowed. + +The function `collect_compiler_stats()` is completely equivalent in every way to +`collect_stats` except that it measures the performance of the compiler itself +rather than the performance of the code generated by the compiler. See the +implementation of collect_stats in /driver/testlib.py for more information. + +If the performance of a test is improved so much that the test fails, the value +will still be recorded. The warning that will be emitted is merely a precaution +so that the programmer can double-check that they didn't introduce a bug; +something that might be suspicious if the test suddenly improves by 70%, +for example. + +Performance metrics for performance tests are now stored in git notes under the +namespace 'perf'. The format of the git note file is that each line represents +a single metric for a particular test: `$test_env $test_name $test_way +$metric_measured $value_collected` (delimited by tabs). + +One can view the maximum deviation a test allows by looking inside its +respective all.T file; additionally, if one sets the verbosity level of the +test-suite to a value >= 4, they will see a good amount of output per test +detailing all the information about values. This information will also print +if the test falls outside of the allowed bounds. (see the test_cmp function in +/driver/perf_notes.py for exact formatting of the message) + +The git notes are only appended to by the testsuite in a single atomic python +subprocess at the end of the test run; if the run is canceled at any time, the +notes will not be written. The note appending command will be retried up to 4 +times in the event of a failure (such as one happening due to a lock on the +repo) although this is never anticipated to happen. If, for some reason, the 5 +attempts were not enough, an error message will be printed out. Further, there +is no current process or method for stripping duplicates, updating values, etc, +so if the testsuite is ran multiple times per commit there will be multiple +values in the git notes corresponding to the tests ran. In this case the +average value is used. + +## Quick overview of program parts + +The relevant bits of the directory tree are as such: + +``` +├── driver -- Testsuite driver directory + ├── junit.py -- Contains code implementing JUnit features. + ├── kill_extra_files.py -- Some of the uglier implementation details. + ├── perf_notes.py -- Comparison tool and performance tests. + ├── runtests.py -- Main entrypoint for program; runs tests. + ├── testglobals.py -- Global data structures and objects. + ├── testlib.py -- Bulk of implementation is in here. + └── testutil.py -- Misc helper functions. +├── mk + └── test.mk -- Master makefile for running tests. +├── tests -- Main tests directory. +``` + +## How to Use the Comparison Tool + +The comparison tool exists in `/driver/perf_notes.py`. + +When the testsuite is ran, the performance metrics of the performance tests are +saved automatically in a local git note that will be attached to the commit. +The comparison tool is designed to help analyze performance metrics across +commits using this performance information. + +Currently, it can only be ran by executing the file directly, like so: +``` +$ python3 perf_notes.py (arguments go here) +``` + +If you run `perf_notes.py -h` you will see a description of all of the +arguments and how to use them. The optional arguments exist to filter the +output to include only commits that you're interested in. The most typical +usage of this tool will likely be running `perf_notes.py HEAD 'HEAD~1' '(commit +hash)' ...` + +The way the performance metrics are stored in git notes remains strictly local +to the machine; as such, performance metrics will not exist for a commit until +you checkout that commit and run the testsuite (or test). + +## Quick Answers for "How do I do X?" + +* Q: How do I add a flag to "make test" to extend the testsuite functionality? + 1. Add the flag in the appropriate global object in testglobals.py + 2. Add a argument to the parser in runtests.py that sets the flag + 3. Go to the `testsuite/mk/test.mk` file and add a new ifeq (or ifneq) + block. I suggest adding the block around line 200. +* Q: How do I modify how performance tests work? + * That functionality resides in perf_notes.py which has pretty good + in-code documentation. + * Additionally, one will want to look at `compile_and_run`, `simple_run`, + and `simple_build` in testutil.py + + [1]: http://ghc.haskell.org/trac/ghc/wiki/Building/RunningTests diff --git a/testsuite/driver/perf_notes.py b/testsuite/driver/perf_notes.py new file mode 100644 index 0000000000..f162164e3e --- /dev/null +++ b/testsuite/driver/perf_notes.py @@ -0,0 +1,382 @@ +#!/usr/bin/env python3 + +# +# (c) Jared Weakly 2017 +# +# This file will be a utility to help facilitate the comparison of performance +# metrics across arbitrary commits. The file will produce a table comparing +# metrics between measurements taken for given commits in the environment +# (which defaults to 'local' if not given by --test-env). +# + +import argparse +import re +import subprocess +import time + +from collections import namedtuple +from math import ceil, trunc + +from testutil import passed, failBecause + + +# +# Some data access functions. A the moment this uses git notes. +# + +# The metrics (a.k.a stats) are named tuples, PerfStat, in this form: +# +# ( test_env : 'val', # Test environment. +# test : 'val', # Name of the test +# way : 'val', +# metric : 'val', # Metric being recorded +# value : 'val', # The statistic result e.g. runtime +# ) + +# All the fields of a metric (excluding commit field). +PerfStat = namedtuple('PerfStat', ['test_env','test','way','metric','value']) + +class MetricChange: + NewMetric = 'NewMetric' + NoChange = 'NoChange' + Increase = 'Increase' + Decrease = 'Decrease' + +def parse_perf_stat(stat_str): + field_vals = stat_str.strip('\t').split('\t') + return PerfStat(*field_vals) + +# Get all recorded (in a git note) metrics for a given commit. +# Returns an empty array if the note is not found. +def get_perf_stats(commit='HEAD', namespace='perf'): + try: + log = subprocess.check_output(['git', 'notes', '--ref=' + namespace, 'show', commit], stderr=subprocess.STDOUT).decode('utf-8') + except subprocess.CalledProcessError: + return [] + + log = log.strip('\n').split('\n') + log = list(filter(None, log)) + log = [parse_perf_stat(stat_str) for stat_str in log] + return log + + +# Get allowed changes to performance. This is extracted from the commit message of +# the given commit in this form: +# Metric (Increase | Decrease) ['metric' | \['metrics',..\]] [\((test_env|way)='abc',...\)]: TestName01, TestName02, ... +# Returns a *dictionary* from test name to a *list* of items of the form: +# { +# 'direction': either 'Increase' or 'Decrease, +# 'metrics': ['metricA', 'metricB', ...], +# 'opts': { +# 'optionA': 'string value', +# 'optionB': 'string value', +# ... +# } +# } +def get_allowed_perf_changes(commit='HEAD'): + commitByteStr = subprocess.check_output(['git', '--no-pager', 'log', '-n1', '--format=%B', commit]) + return parse_allowed_perf_changes(commitByteStr.decode()) + +def parse_allowed_perf_changes(commitMsg): + # Helper regex. Non-capturing unless postfixed with Cap. + s = r"(?:\s*\n?\s+)" # Space, possible new line with an indent. + qstr = r"(?:'(?:[^'\\]|\\.)*')" # Quoted string. + qstrCap = r"(?:'((?:[^'\\]|\\.)*)')" # Quoted string. Captures the string without the quotes. + innerQstrList = r"(?:"+qstr+r"(?:"+s+r"?,"+s+r"?"+qstr+r")*)?" # Inside of a list of strings.gs.s.. + qstrList = r"(?:\["+s+r"?"+innerQstrList+s+r"?\])" # A list of strings (using box brackets).. + + exp = (r"^Metric" + +s+r"(Increase|Decrease)" + +s+r"?("+qstr+r"|"+qstrList+r")?" # Metric or list of metrics.s.. + +s+r"?(\(" + r"(?:[^')]|"+qstr+r")*" + r"\))?" # Options surounded in parenthesis. (allow parenthases in quoted strings)) + +s+r"?:?" # Optional ":" + +s+r"?((?:(?!\n\n)(?!\n[^\s])(?:.|\n))*)" # Test names. Stop parsing on empty or non-indented new line. + ) + + matches = re.findall(exp, commitMsg, re.M) + changes = {} + for (direction, metrics_str, opts_str, tests_str) in matches: + tests = re.findall(r"(\w+)", tests_str) + for test in tests: + changes.setdefault(test, []).append({ + 'direction': direction, + 'metrics': re.findall(qstrCap, metrics_str), + 'opts': dict(re.findall(r"(\w+)"+s+r"?="+s+r"?"+qstrCap, opts_str)) + }) + + return changes + +# Calculates a suggested string to append to the git commit in order to accept the +# given changes. +# changes: [(MetricChange, PerfStat)] +def allow_changes_string(changes): + Dec = MetricChange.Decrease + Inc = MetricChange.Increase + + # We only care about increase / decrease metrics. + changes = [change for change in changes if change[0] in [Inc, Dec]] + + # Map tests to a map from change direction to metrics. + test_to_dir_to_metrics = {} + for (change, perf_stat) in changes: + change_dir_to_metrics = test_to_dir_to_metrics.setdefault(perf_stat.test, { Inc: [], Dec: [] }) + change_dir_to_metrics[change].append(perf_stat.metric) + + # Split into 3 groups. + # Tests where all changes are *increasing*. + # Tests where all changes are *decreasing*. + # Tests where changes are *mixed* increasing and decreasing. + groupDec = [] + groupInc = [] + groupMix = [] + for (test, decsAndIncs) in test_to_dir_to_metrics.items(): + decs = decsAndIncs[Dec] + incs = decsAndIncs[Inc] + if decs and incs: + groupMix.append(test) + elif not decs: + groupInc.append(test) + else: + groupDec.append(test) + + msgs = [] + nltab = '\n ' + + # Decreasing group. + if groupDec: + msgs.append('Metric Decrease:' + nltab + nltab.join(groupDec)) + + # Increasing group. + if groupInc: + msgs.append('Metric Increase:' + nltab + nltab.join(groupInc)) + + # Mixed group. + if groupMix: + # Split mixed group tests by decrease/increase, then by metric. + dir_to_metric_to_tests = { + Dec: {}, + Inc: {} + } + for test in groupMix: + for change_dir, metrics in test_to_dir_to_metrics[test].items(): + for metric in metrics: + dir_to_metric_to_tests[change_dir].setdefault(metric, []).append(test) + + for change_dir in [Dec, Inc]: + metric_to_tests = dir_to_metric_to_tests[change_dir] + for metric in sorted(metric_to_tests.keys()): + tests = metric_to_tests[metric] + msgs.append('Metric ' + change_dir + ' \'' + metric + '\':' + nltab + nltab.join(tests)) + + return '\n\n'.join(msgs) + +# Formats a list of metrics into a string. Used e.g. to save metrics to a file or git note. +def format_perf_stat(stats): + # If a single stat, convert to a singleton list. + if not isinstance(stats, list): + stats = [stats] + + return "\n".join(["\t".join([str(stat_val) for stat_val in stat]) for stat in stats]) + +# Appends a list of metrics to the git note of the given commit. +# Tries up to max_tries times to write to git notes should it fail for some reason. +# Each retry will wait 1 second. +# Returns True if the note was successfully appended. +def append_perf_stat(stats, commit='HEAD', namespace='perf', max_tries=5): + # Append to git note + print('Appending ' + str(len(stats)) + ' stats to git notes.') + stats_str = format_perf_stat(stats) + def try_append(): + try: + return subprocess.check_output(['git', 'notes', '--ref=' + namespace, 'append', commit, '-m', stats_str]) + except subprocess.CalledProcessError: + return b'Git - fatal' + + tries = 0 + while tries < max_tries: + if not b'Git - fatal' in try_append(): + return True + tries += 1 + time.sleep(1) + + print("\nAn error occured while writing the performance metrics to git notes.\n \ + ​ This is usually due to a lock-file existing somewhere in the git repo.") + + return False + +# Check test stats. This prints the results for the user. +# actual: the PerfStat with actual value. +# expected_val: the expected value (this should generally be derived from get_perf_stats()) +# tolerance_dev: allowed deviation of the actual value from the expected value. +# allowed_perf_changes: allowed changes in stats. This is a dictionary as returned by get_allowed_perf_changes(). +# force_print: Print stats even if the test stat was in the tolerance range. +# Returns a (MetricChange, pass/fail object) tuple. Passes if the stats are withing the expected value ranges. +def check_stats_change(actual, expected_val, tolerance_dev, allowed_perf_changes = {}, force_print = False): + full_name = actual.test + ' (' + actual.way + ')' + + lowerBound = trunc( int(expected_val) * ((100 - float(tolerance_dev))/100)) + upperBound = trunc(0.5 + ceil(int(expected_val) * ((100 + float(tolerance_dev))/100))) + + actual_dev = round(((float(actual.value) * 100)/ int(expected_val)) - 100, 1) + + # Find the direction of change. + change = MetricChange.NoChange + if actual.value < lowerBound: + change = MetricChange.Decrease + elif actual.value > upperBound: + change = MetricChange.Increase + + # Is the change allowed? + allowed_change_directions = [MetricChange.NoChange] + [ allow_stmt['direction'] + for allow_stmt in allowed_perf_changes.get(actual.test, []) + + # List of metrics are not specified or the metric is in the list of metrics. + if not allow_stmt['metrics'] or actual.metric in allow_stmt['metrics'] + + # way/test are not specified, or match the actual way/test. + if ((not 'way' in allow_stmt['opts'].keys()) or actual.way == allow_stmt['opts']['way']) + if ((not 'test_env' in allow_stmt['opts'].keys()) or actual.test_env == allow_stmt['opts']['test_env']) + ] + change_allowed = change in allowed_change_directions + + # Print errors and create pass/fail object. + result = passed() + if not change_allowed: + error = change + ' not allowed' + print(actual.metric, error + ':') + result = failBecause('stat ' + error, tag='stat') + + if not change_allowed or force_print: + length = max(len(str(x)) for x in [expected_val, lowerBound, upperBound, actual.value]) + + def display(descr, val, extra): + print(descr, str(val).rjust(length), extra) + + display(' Expected ' + full_name + ' ' + actual.metric + ':', expected_val, '+/-' + str(tolerance_dev) + '%') + display(' Lower bound ' + full_name + ' ' + actual.metric + ':', lowerBound, '') + display(' Upper bound ' + full_name + ' ' + actual.metric + ':', upperBound, '') + display(' Actual ' + full_name + ' ' + actual.metric + ':', actual.value, '') + if actual.value != expected_val: + display(' Deviation ' + full_name + ' ' + actual.metric + ':', actual_dev, '%') + + return (change, result) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--test-env", + help="The given test environment to be compared.") + parser.add_argument("--test-name", + help="If given, filters table to include only \ + tests matching the given regular expression.") + parser.add_argument("--add-note", nargs=3, + help="Development only. --add-note N commit seed \ + Adds N fake metrics to the given commit using the random seed.") + parser.add_argument("commits", nargs=argparse.REMAINDER, + help="The rest of the arguments will be the commits that will be used.") + args = parser.parse_args() + + env = 'local' + name = re.compile('.*') + # metrics is a tuple (str commit, PerfStat stat) + CommitAndStat = namedtuple('CommitAndStat', ['commit', 'stat']) + metrics = [] + singleton_commit = len(args.commits) == 1 + + # + # Main logic of the program when called from the command-line. + # + + if args.commits: + for c in args.commits: + metrics += [CommitAndStat(c, stat) for stat in get_perf_stats(c)] + + if args.test_env: + metrics = [test for test in metrics if test.stat.test_env == args.test_env] + + if args.test_name: + nameRe = re.compile(args.test_name) + metrics = [test for test in metrics if nameRe.search(test.test)] + + if args.add_note: + def note_gen(n, commit, delta=''): + note = [] + # Generates simple fake data. Likely not comprehensive enough to catch all edge cases. + if not delta: + note.extend([PerfStat('local', 'T'+ str(i*100), 'some_way', 'some_field', str(i*1000)) for i in range(1,int(int(n)/2)+1)]) + note.extend([PerfStat('non-local', 'W'+ str(i*100), 'other_way', 'other_field', str(i*100)) for i in range(int(int(n)/2)+1,int(n)+1)]) + if delta: + hu = abs(hash(delta)) + hv = abs(hash(hu)) + u = int(hu % 100) + v = int(hv % 10) + note.extend([PerfStat('local', 'T'+ str(i*100), 'some_way', 'some_field', str(i*u)) for i in range(1,int(int(n)/2)+1)]) + note.extend([PerfStat('non-local', 'W'+ str(i*100), 'other_way', 'other_field', str(i*v)) for i in range(int(int(n)/2)+1,int(n)+1)]) + + append_perf_stat(note, commit) + + note_gen(args.add_note[0],args.add_note[1],args.add_note[2]) + + # + # String utilities for pretty-printing + # + + row_fmt = '{:18}' * len(args.commits) + commits = row_fmt.format(*[c[:10] for c in args.commits]) + + def cmtline(insert): + return row_fmt.format(*[insert for c in args.commits]).strip() + + def header(unit): + first_line = "{:27}{:30}".format(' ',' ') + cmtline(unit) + second_line = ("{:27}{:30}".format('Test','Metric') + commits).strip() + + # Test Metric c1 c2 c3 ... + print("-" * (len(second_line)+1)) + print(first_line) + print(second_line) + print("-" * (len(second_line)+1)) + + def commit_string(test, flag): + def delta(v1, v2): + return round((100 * (v1 - v2)/v2),2) + + # Get the average value per commit (or None if that commit contains no metrics). + # Note: if the test environment is not set, this will combine metrics from all test environments. + averageValuesOrNones = [] + for commit in args.commits: + values = [float(t.stat.value) for t in metrics if t.commit == commit and t.stat.test == test] + if values == []: + averageValuesOrNones.append(None) + else: + averageValuesOrNones.append(sum(values) / len(values)) + + if flag == 'metrics': + strings = [str(v) if v != None else '-' for v in averageValuesOrNones] + if flag == 'percentages': + # If the baseline commit has no stats, then we can not produce any percentages. + baseline = averageValuesOrNones[0] + if baseline == None: + strings = ['-' for v in averageValuesOrNones] + else: + baseline = float(baseline) + strings = ['-' if val == None else str(delta(baseline,float(val))) + '%' for val in averageValuesOrNones] + + return row_fmt.format(*strings).strip() + + # + # The pretty-printed output + # + + header('commit') + # Printing out metrics. + all_tests = sorted(set([(test.stat.test, test.stat.metric) for test in metrics])) + for test, metric in all_tests: + print("{:27}{:30}".format(test, metric) + commit_string(test,'metrics')) + + # Has no meaningful output if there is no commit to compare to. + if not singleton_commit: + header('percent') + + # Printing out percentages. + for test, metric in all_tests: + print("{:27}{:30}".format(test, metric) + commit_string(test,'percentages'))
\ No newline at end of file diff --git a/testsuite/driver/runtests.py b/testsuite/driver/runtests.py index b956239d2a..fb3fe6ad54 100644 --- a/testsuite/driver/runtests.py +++ b/testsuite/driver/runtests.py @@ -23,8 +23,9 @@ import traceback # So we import it here first, so that the testsuite doesn't appear to fail. import subprocess -from testutil import getStdout, Watcher +from testutil import getStdout, Watcher, str_warn, str_info from testglobals import getConfig, ghc_env, getTestRun, TestOptions, brokens +from perf_notes import MetricChange from junit import junit # Readline sometimes spews out ANSI escapes for some values of TERM, @@ -43,11 +44,13 @@ def signal_handler(signal, frame): # cmd-line options parser = argparse.ArgumentParser(description="GHC's testsuite driver") +perf_group = parser.add_mutually_exclusive_group() parser.add_argument("-e", action='append', help="A string to execute from the command line.") parser.add_argument("--config-file", action="append", help="config file") parser.add_argument("--config", action='append', help="config field") parser.add_argument("--rootdir", action='append', help="root of tree containing tests (default: .)") +parser.add_argument("--metrics-file", help="file in which to save (append) the performance test metrics. If omitted, git notes will be used.") parser.add_argument("--summary-file", help="file in which to save the (human-readable) summary") parser.add_argument("--no-print-summary", action="store_true", help="should we print the summary?") parser.add_argument("--only", action="append", help="just this test (can be give multiple --only= flags)") @@ -55,23 +58,32 @@ parser.add_argument("--way", action="append", help="just this way") parser.add_argument("--skipway", action="append", help="skip this way") parser.add_argument("--threads", type=int, help="threads to run simultaneously") parser.add_argument("--verbose", type=int, choices=[0,1,2,3,4,5], help="verbose (Values 0 through 5 accepted)") -parser.add_argument("--skip-perf-tests", action="store_true", help="skip performance tests") parser.add_argument("--junit", type=argparse.FileType('wb'), help="output testsuite summary in JUnit format") +parser.add_argument("--test-env", default='local', help="Override default chosen test-env.") +perf_group.add_argument("--skip-perf-tests", action="store_true", help="skip performance tests") +perf_group.add_argument("--only-perf-tests", action="store_true", help="Only do performance tests") args = parser.parse_args() -for e in args.e: - exec(e) +if args.e: + for e in args.e: + exec(e) -for arg in args.config_file: - exec(open(arg).read()) +if args.config_file: + for arg in args.config_file: + exec(open(arg).read()) -for arg in args.config: - field, value = arg.split('=', 1) - setattr(config, field, value) +if args.config: + for arg in args.config: + field, value = arg.split('=', 1) + setattr(config, field, value) all_ways = config.run_ways+config.compile_ways+config.other_ways -config.rootdirs = args.rootdir + +if args.rootdir: + config.rootdirs = args.rootdir + +config.metrics_file = args.metrics_file config.summary_file = args.summary_file config.no_print_summary = args.no_print_summary @@ -104,7 +116,12 @@ if args.threads: if args.verbose is not None: config.verbose = args.verbose + config.skip_perf_tests = args.skip_perf_tests +config.only_perf_tests = args.only_perf_tests + +if args.test_env: + config.test_env = args.test_env config.cygwin = False config.msys = False @@ -223,6 +240,14 @@ if config.timeout == -1: print('Timeout is ' + str(config.timeout)) +# Try get allowed performance changes from the git commit. +try: + config.allowed_perf_changes = Perf.get_allowed_perf_changes() +except subprocess.CalledProcessError: + print('Failed to get allowed metric changes from the HEAD git commit message.') + +print(len(config.allowed_perf_changes)) + # ----------------------------------------------------------------------------- # The main dude @@ -326,7 +351,31 @@ else: # flush everything before we continue sys.stdout.flush() - summary(t, sys.stdout, config.no_print_summary) + # Warn of new metrics. + new_metrics = [metric for (change, metric) in t.metrics if change == MetricChange.NewMetric] + spacing = " " + if any(new_metrics): + print() + print(str_warn('New Metrics') + ' the previous git commit doesn\'t have metrics for the following tests:') + print(spacing + ('\n' + spacing).join(set([metric.test for metric in new_metrics]))) + + # Inform of how to accept metric changes. + if (len(t.unexpected_stat_failures) > 0): + print() + print(str_info("Some stats have changed") + " If this is expected, allow changes by appending the git commit message with this:") + print('-' * 25) + print(Perf.allow_changes_string(t.metrics)) + print('-' * 25) + + summary(t, sys.stdout, config.no_print_summary, True) + + stats = [stat for (_, stat) in t.metrics] + if config.metrics_file: + print('Appending ' + str(len(stats)) + ' stats to file: ' + config.metrics_file) + with open(config.metrics_file, 'a') as file: + file.write("\n" + Perf.format_perf_stat(stats)) + else: + Perf.append_perf_stat(stats) if config.summary_file: with open(config.summary_file, 'w') as file: diff --git a/testsuite/driver/testglobals.py b/testsuite/driver/testglobals.py index 311e39be7f..03a62503b4 100644 --- a/testsuite/driver/testglobals.py +++ b/testsuite/driver/testglobals.py @@ -31,6 +31,9 @@ class TestConfig: self.accept_platform = False self.accept_os = False + # File in which to save the performance metrics. + self.metrics_file = '' + # File in which to save the summary self.summary_file = '' @@ -122,6 +125,15 @@ class TestConfig: # Should we skip performance tests self.skip_perf_tests = False + # Only do performance tests + self.only_perf_tests = False + + # Allowed performance changes (see perf_notes.get_allowed_perf_changes()) + self.allowed_perf_changes = {} + + # The test environment. + self.test_env = 'local' + global config config = TestConfig() @@ -156,6 +168,12 @@ class TestRun: self.unexpected_failures = [] self.unexpected_stat_failures = [] + # List of all metrics measured in this test run. + # [(change, PerfStat)] where change is one of the MetricChange + # constants: NewMetric, NoChange, Increase, Decrease. + # NewMetric happens when the previous git commit has no metric recorded. + self.metrics = [] + global t t = TestRun() @@ -215,16 +233,14 @@ class TestOptions: # extra files to copy to the testdir self.extra_files = [] - # which -t numeric fields do we want to look at, and what bounds must - # they fall within? - # Elements of these lists should be things like - # ('bytes allocated', - # 9300000000, - # 10) - # To allow a 10% deviation from 9300000000. - self.compiler_stats_range_fields = {} + # Map from metric to expectected value and allowed percentage deviation. e.g. + # { 'bytes allocated': (9300000000, 10) } + # To allow a 10% deviation from 9300000000 for the 'bytes allocated' metric. self.stats_range_fields = {} + # Does this test the compiler's performance as opposed to the generated code. + self.is_compiler_stats_test = False + # should we run this test alone, i.e. not run it in parallel with # any other threads self.alone = False @@ -292,4 +308,3 @@ default_testopts = TestOptions() # (bug, directory, name) of tests marked broken global brokens brokens = [] - diff --git a/testsuite/driver/testlib.py b/testsuite/driver/testlib.py index ff6a8c8e74..761ba67fd2 100644 --- a/testsuite/driver/testlib.py +++ b/testsuite/driver/testlib.py @@ -19,7 +19,9 @@ import collections import subprocess from testglobals import config, ghc_env, default_testopts, brokens, t -from testutil import strip_quotes, lndir, link_or_copy_file +from testutil import strip_quotes, lndir, link_or_copy_file, passed, failBecause, str_fail, str_pass +import perf_notes as Perf +from perf_notes import MetricChange extra_src_files = {'T4198': ['exitminus1.c']} # TODO: See #12223 global pool_sema @@ -56,9 +58,13 @@ def setLocalTestOpts(opts): global testopts_local testopts_local.x=opts +def isCompilerStatsTest(): + opts = getTestOpts() + return bool(opts.is_compiler_stats_test) + def isStatsTest(): opts = getTestOpts() - return bool(opts.compiler_stats_range_fields or opts.stats_range_fields) + return bool(opts.stats_range_fields) # This can be called at the top of a file of tests, to set default test options @@ -254,14 +260,14 @@ def _exit_code( name, opts, v ): def signal_exit_code( val ): if opsys('solaris2'): - return exit_code( val ); + return exit_code( val ) else: # When application running on Linux receives fatal error # signal, then its exit code is encoded as 128 + signal # value. See http://www.tldp.org/LDP/abs/html/exitcodes.html # I assume that Mac OS X behaves in the same way at least Mac # OS X builder behavior suggests this. - return exit_code( val+128 ); + return exit_code( val+128 ) # ----- @@ -307,42 +313,85 @@ def _extra_files(name, opts, files): # ----- -def stats_num_field( field, expecteds ): - return lambda name, opts, f=field, e=expecteds: _stats_num_field(name, opts, f, e); +# Defaults to "test everything, and only break on extreme cases" +# +# The inputs to this function are slightly interesting: +# metric can be either: +# - 'all', in which case all 3 possible metrics are collected and compared. +# - The specific metric one wants to use in the test. +# - A list of the metrics one wants to use in the test. +# +# Deviation defaults to 20% because the goal is correctness over performance. +# The testsuite should avoid breaking when there is not an actual error. +# Instead, the testsuite should notify of regressions in a non-breaking manner. +# +# collect_compiler_stats is used when the metrics collected are about the compiler. +# collect_stats is used in the majority case when the metrics to be collected +# are about the performance of the runtime code generated by the compiler. +def collect_compiler_stats(metric='all',deviation=20): + return lambda name, opts, m=metric, d=deviation: _collect_stats(name, opts, m,d, True) + +def collect_stats(metric='all', deviation=20): + return lambda name, opts, m=metric, d=deviation: _collect_stats(name, opts, m, d) + +def testing_metrics(): + return ['bytes allocated', 'peak_megabytes_allocated', 'max_bytes_used'] + +# This is an internal function that is used only in the implementation. +# 'is_compiler_stats_test' is somewhat of an unfortunate name. +# If the boolean is set to true, it indicates that this test is one that +# measures the performance numbers of the compiler. +# As this is a fairly rare case in the testsuite, it defaults to false to +# indicate that it is a 'normal' performance test. +def _collect_stats(name, opts, metric, deviation, is_compiler_stats_test=False): + if not re.match('^[0-9]*[a-zA-Z][a-zA-Z0-9._-]*$', name): + failBecause('This test has an invalid name.') -def _stats_num_field( name, opts, field, expecteds ): - if field in opts.stats_range_fields: - framework_fail(name, 'duplicate-numfield', 'Duplicate ' + field + ' num_field check') + tests = Perf.get_perf_stats('HEAD^') - if type(expecteds) is list: - for (b, expected, dev) in expecteds: - if b: - opts.stats_range_fields[field] = (expected, dev) - return - framework_warn(name, 'numfield-no-expected', 'No expected value found for ' + field + ' in num_field check') + # Might have multiple metrics being measured for a single test. + test = [t for t in tests if t.test == name] - else: - (expected, dev) = expecteds - opts.stats_range_fields[field] = (expected, dev) + if tests == [] or test == []: + # There are no prior metrics for this test. + if isinstance(metric, str): + if metric == 'all': + for field in testing_metrics(): + opts.stats_range_fields[field] = None + else: + opts.stats_range_fields[metric] = None + if isinstance(metric, list): + for field in metric: + opts.stats_range_fields[field] = None -def compiler_stats_num_field( field, expecteds ): - return lambda name, opts, f=field, e=expecteds: _compiler_stats_num_field(name, opts, f, e); + return -def _compiler_stats_num_field( name, opts, field, expecteds ): - if field in opts.compiler_stats_range_fields: - framework_fail(name, 'duplicate-numfield', 'Duplicate ' + field + ' num_field check') + if is_compiler_stats_test: + opts.is_compiler_stats_test = True # Compiler performance numbers change when debugging is on, making the results # useless and confusing. Therefore, skip if debugging is on. - if compiler_debugged(): - skip(name, opts) - - for (b, expected, dev) in expecteds: - if b: - opts.compiler_stats_range_fields[field] = (expected, dev) + if config.compiler_debugged and is_compiler_stats_test: + opts.skip = 1 + + # get the average value of the given metric from test + def get_avg_val(metric_2): + metric_2_metrics = [float(t.value) for t in test if t.metric == metric_2] + return sum(metric_2_metrics) / len(metric_2_metrics) + + # 'all' is a shorthand to test for bytes allocated, peak megabytes allocated, and max bytes used. + if isinstance(metric, str): + if metric == 'all': + for field in testing_metrics(): + opts.stats_range_fields[field] = (get_avg_val(field), deviation) + return + else: + opts.stats_range_fields[metric] = (get_avg_val(metric), deviation) return - framework_warn(name, 'numfield-no-expected', 'No expected value found for ' + field + ' in num_field check') + if isinstance(metric, list): + for field in metric: + opts.stats_range_fields[field] = (get_avg_val(field), deviation) # ----- @@ -720,6 +769,7 @@ def test_common_work(watcher, name, opts, func, args): and (getTestOpts().only_ways == None or way in getTestOpts().only_ways) \ and (config.cmdline_ways == [] or way in config.cmdline_ways) \ and (not (config.skip_perf_tests and isStatsTest())) \ + and (not (config.only_perf_tests and not isStatsTest())) \ and way not in getTestOpts().omit_ways # Which ways we are asked to skip @@ -927,12 +977,6 @@ def badResult(result): except (KeyError, TypeError): return True -def passed(): - return {'passFail': 'pass'} - -def failBecause(reason, tag=None): - return {'passFail': 'fail', 'reason': reason, 'tag': tag} - # ----------------------------------------------------------------------------- # Generic command tests @@ -1087,56 +1131,65 @@ def multi_compile_and_run( name, way, top_mod, extra_mods, extra_hc_opts ): def stats( name, way, stats_file ): opts = getTestOpts() - return checkStats(name, way, stats_file, opts.stats_range_fields) + return check_stats(name, way, stats_file, opts.stats_range_fields) -# ----------------------------------------------------------------------------- -# Check -t stats info - -def checkStats(name, way, stats_file, range_fields): - full_name = name + '(' + way + ')' +def metric_dict(name, way, metric, value): + return Perf.PerfStat( + test_env = config.test_env, + test = name, + way = way, + metric = metric, + value = value) +# ----------------------------------------------------------------------------- +# Check test stats. This prints the results for the user. +# name: name of the test. +# way: the way. +# stats_file: the path of the stats_file containing the stats for the test. +# range_fields +# Returns a pass/fail object. Passes if the stats are withing the expected value ranges. +# This prints the results for the user. +def check_stats(name, way, stats_file, range_fields): result = passed() if range_fields: try: f = open(in_testdir(stats_file)) except IOError as e: return failBecause(str(e)) - contents = f.read() + stats_file_contents = f.read() f.close() - for (field, (expected, dev)) in range_fields.items(): - m = re.search('\("' + field + '", "([0-9]+)"\)', contents) - if m == None: - print('Failed to find field: ', field) - result = failBecause('no such stats field') - val = int(m.group(1)) - - lowerBound = trunc( expected * ((100 - float(dev))/100)) - upperBound = trunc(0.5 + ceil(expected * ((100 + float(dev))/100))) - - deviation = round(((float(val) * 100)/ expected) - 100, 1) - - if val < lowerBound: - print(field, 'value is too low:') - print('(If this is because you have improved GHC, please') - print('update the test so that GHC doesn\'t regress again)') - result = failBecause('stat too good', tag='stat') - if val > upperBound: - print(field, 'value is too high:') - result = failBecause('stat not good enough', tag='stat') - - if val < lowerBound or val > upperBound or config.verbose >= 4: - length = max(len(str(x)) for x in [expected, lowerBound, upperBound, val]) - - def display(descr, val, extra): - print(descr, str(val).rjust(length), extra) - - display(' Expected ' + full_name + ' ' + field + ':', expected, '+/-' + str(dev) + '%') - display(' Lower bound ' + full_name + ' ' + field + ':', lowerBound, '') - display(' Upper bound ' + full_name + ' ' + field + ':', upperBound, '') - display(' Actual ' + full_name + ' ' + field + ':', val, '') - if val != expected: - display(' Deviation ' + full_name + ' ' + field + ':', deviation, '%') + for (metric, range_val_dev) in range_fields.items(): + field_match = re.search('\("' + metric + '", "([0-9]+)"\)', stats_file_contents) + if field_match == None: + print('Failed to find metric: ', metric) + metric_result = failBecause('no such stats metric') + else: + actual_val = int(field_match.group(1)) + + # Store the metric so it can later be stored in a git note. + perf_stat = metric_dict(name, way, metric, actual_val) + change = None + + # If this is the first time running the benchmark, then pass. + if range_val_dev == None: + metric_result = passed() + change = MetricChange.NewMetric + else: + (expected_val, tolerance_dev) = range_val_dev + (change, metric_result) = Perf.check_stats_change( + perf_stat, + expected_val, + tolerance_dev, + config.allowed_perf_changes, + config.verbose >= 4) + t.metrics.append((change, perf_stat)) + + # If any metric fails then the test fails. + # Note, the remaining metrics are still run so that + # a complete list of changes can be presented to the user. + if metric_result['passFail'] == 'fail': + result = metric_result return result @@ -1186,7 +1239,7 @@ def simple_build(name, way, extra_hc_opts, should_fail, top_mod, link, addsuf, b to_do = '-c' # just compile stats_file = name + '.comp.stats' - if opts.compiler_stats_range_fields: + if isCompilerStatsTest(): extra_hc_opts += ' +RTS -V0 -t' + stats_file + ' --machine-readable -RTS' if backpack: extra_hc_opts += ' -outputdir ' + name + '.out' @@ -1219,10 +1272,10 @@ def simple_build(name, way, extra_hc_opts, should_fail, top_mod, link, addsuf, b # ToDo: if the sub-shell was killed by ^C, then exit - statsResult = checkStats(name, way, stats_file, opts.compiler_stats_range_fields) - - if badResult(statsResult): - return statsResult + if isCompilerStatsTest(): + statsResult = check_stats(name, way, stats_file, opts.stats_range_fields) + if badResult(statsResult): + return statsResult if should_fail: if exit_code == 0: @@ -1260,7 +1313,7 @@ def simple_run(name, way, prog, extra_run_opts): my_rts_flags = rts_flags(way) stats_file = name + '.stats' - if opts.stats_range_fields: + if isStatsTest() and not isCompilerStatsTest(): stats_args = ' +RTS -V0 -t' + stats_file + ' --machine-readable -RTS' else: stats_args = '' @@ -1298,7 +1351,7 @@ def simple_run(name, way, prog, extra_run_opts): if check_prof and not check_prof_ok(name, way): return failBecause('bad profile') - return checkStats(name, way, stats_file, opts.stats_range_fields) + return check_stats(name, way, stats_file, opts.stats_range_fields) def rts_flags(way): args = config.way_rts_flags.get(way, []) @@ -1993,7 +2046,7 @@ def findTFiles(roots): # ----------------------------------------------------------------------------- # Output a test summary to the specified file object -def summary(t, file, short=False): +def summary(t, file, short=False, color=False): file.write('\n') printUnexpectedTests(file, @@ -2004,7 +2057,16 @@ def summary(t, file, short=False): # Only print the list of unexpected tests above. return - file.write('SUMMARY for test run started at ' + colorize = lambda s: s + if color: + if len(t.unexpected_failures) > 0 or \ + len(t.unexpected_stat_failures) > 0 or \ + len(t.framework_failures) > 0: + colorize = str_fail + else: + colorize = str_pass + + file.write(colorize('SUMMARY') + ' for test run started at ' + time.strftime("%c %Z", t.start_time) + '\n' + str(datetime.timedelta(seconds= round(time.time() - time.mktime(t.start_time)))).rjust(8) diff --git a/testsuite/driver/testutil.py b/testsuite/driver/testutil.py index 15587e6960..6e0c2684d7 100644 --- a/testsuite/driver/testutil.py +++ b/testsuite/driver/testutil.py @@ -5,10 +5,28 @@ import shutil import threading +def passed(): + return {'passFail': 'pass'} + +def failBecause(reason, tag=None): + return {'passFail': 'fail', 'reason': reason, 'tag': tag} + def strip_quotes(s): # Don't wrap commands to subprocess.call/Popen in quotes. return s.strip('\'"') +def str_fail(s): + return '\033[1m\033[43m\033[31m' + s + '\033[0m' + +def str_pass(s): + return '\033[1m\033[32m' + s + '\033[0m' + +def str_warn(s): + return '\033[1m\033[33m' + s + '\033[0m' + +def str_info(s): + return '\033[1m\033[34m' + s + '\033[0m' + def getStdout(cmd_and_args): # Can't use subprocess.check_output, since we also verify that # no stderr was produced diff --git a/testsuite/mk/test.mk b/testsuite/mk/test.mk index f036110e07..65e897d849 100644 --- a/testsuite/mk/test.mk +++ b/testsuite/mk/test.mk @@ -215,6 +215,14 @@ ifeq "$(SKIP_PERF_TESTS)" "YES" RUNTEST_OPTS += --skip-perf-tests endif +ifeq "$(ONLY_PERF_TESTS)" "YES" +RUNTEST_OPTS += --only-perf-tests +endif + +ifneq "$(TEST_ENV)" "" +RUNTEST_OPTS += --test-env="$(TEST_ENV)" +endif + ifeq "$(CLEANUP)" "0" RUNTEST_OPTS += -e config.cleanup=False else ifeq "$(CLEANUP)" "NO" @@ -266,6 +274,10 @@ RUNTEST_OPTS += \ RUNTEST_OPTS += -e "config.stage=$(GhcStage)" +ifneq "$(METRICS_FILE)" "" +RUNTEST_OPTS += \ + --metrics-file "$(METRICS_FILE)" +endif ifneq "$(JUNIT_FILE)" "" RUNTEST_OPTS += \ --junit "$(JUNIT_FILE)" diff --git a/testsuite/tests/callarity/perf/all.T b/testsuite/tests/callarity/perf/all.T index 83083d4b4d..37e40e6f9c 100644 --- a/testsuite/tests/callarity/perf/all.T +++ b/testsuite/tests/callarity/perf/all.T @@ -1,13 +1,7 @@ test('T3924', - [stats_num_field('bytes allocated', - [ (wordsize(64), 50760, 8), - # previously, without call-arity: 22326544 - # 2014-01-18: 51480 (amd64/Linux) - # 2014-07-17: 50760 (amd64/Linux) (Roundabout adjustment) - # 2015-04-03: Widen 5->8% (amd64/Windows was doing better) - (wordsize(32), 44988, 5) ]), - # 2014-04-04: 44988 (Windows, 64-bit machine) - only_ways(['normal']) + [collect_stats('bytes allocated',8) + , only_ways(['normal']) ], compile_and_run, ['-O']) + diff --git a/testsuite/tests/deriving/perf/all.T b/testsuite/tests/deriving/perf/all.T index 240571b4a2..1402a38b5d 100644 --- a/testsuite/tests/deriving/perf/all.T +++ b/testsuite/tests/deriving/perf/all.T @@ -1,13 +1,6 @@ test('T10858', - [compiler_stats_num_field('bytes allocated', - [(wordsize(64), 221895064, 8) ]), - # Initial: 222312440 - # 2016-12-19 247768192 Join points (#19288) - # 2017-02-12 304094944 Type-indexed Typeable - # 2017-02-25 275357824 Early inline patch - # 2017-03-28 241242968 Run Core Lint less - # 2017-06-07 221895064 Apparently been reducing for some time - # Today it crossed the boundary; good + [ collect_compiler_stats('bytes allocated',8), only_ways(['normal'])], compile, ['-O']) + diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T index d1d5a1ce70..b2ca109000 100644 --- a/testsuite/tests/perf/compiler/all.T +++ b/testsuite/tests/perf/compiler/all.T @@ -1,4 +1,4 @@ -# Tests that call 'compiler_stats_num_field' are skipped when debugging is on. +# Tests that call 'collect_compiler_stats' are skipped when debugging is on. # See testsuite/driver/testlib.py. def no_lint(name, opts): @@ -29,112 +29,8 @@ setTestOpts(no_lint) test('T1969', [# expect_broken(12437), - compiler_stats_num_field('peak_megabytes_allocated', # Note [residency] - [(wordsize(32), 30, 15), - # 2010-05-17 14 (x86/Windows) - # 15 (x86/OS X) - # 19 (x86/OS X) - # 2013-02-10 13 (x86/Windows) - # 2013-02-10 14 (x86/OSX) - # 2013-11-13 17 (x86/Windows, 64bit machine) - # 2015-07-11 21 (x86/Linux, 64bit machine) use +RTS -G1 - # 2016-04-06 30 (x86/Linux, 64bit machine) - (wordsize(64), 73, 20)]), - # 28 (amd64/Linux) - # 34 (amd64/Linux) - # 2012-09-20 23 (amd64/Linux) - # 2012-10-03 25 (amd64/Linux if .hi exists) - # 2013-02-13 23, but unstable so increased to 10% range - # 2013-02-13 27, very unstable! - # 2014-09-10 29 (amd64/Linux) post-AMP-cleanup - # 2013-09-11 30, 10 (amd64/Linux) - # 2013-09-11 30, 15 (adapt to Phab CI) - # 2015-06-03 41, (amd64/Linux) use +RTS -G1 - # 2015-10-28 55, (amd64/Linux) emit Typeable at definition site - # 2016-10-20 68, (amd64/Linux) allow top-level string literals - # See the comment 16 on #8472. - # 2017-02-17 83 (amd64/Linux) Type-indexed Typeable - # 2017-03-31 61 (amd64/Linux) Fix memory leak in simplifier - # 2018-01-25 78 (amd64/Linux) Use CoreExpr for EvTerm - # 2018-07-10 73 (amd64/Linux) Fix space leaks - compiler_stats_num_field('max_bytes_used', - [(platform('i386-unknown-mingw32'), 5719436, 20), - # 2010-05-17 5717704 (x86/Windows) - # 2013-02-10 5159748 (x86/Windows) - # 2013-02-10 5030080 (x86/Windows) - # 2013-11-13 7295012 (x86/Windows, 64bit machine) - # 2014-04-24 5719436 (x86/Windows, 64bit machine) - (wordsize(32), 9418680, 1), - # 6707308 (x86/OS X) - # 2009-12-31 6149572 (x86/Linux) - # 2014-01-22 6429864 (x86/Linux) - # 2014-06-29 5949188 (x86/Linux) - # 2015-07-11 6241108 (x86/Linux, 64-bit machine) use +RTS -G1 - # 2016-04-06 9093608 (x86/Linux, 64-bit machine) - # 2017-03-24 9261052 (x86/Linux, 64-bit machine) - # 2017-04-06 9418680 (x86/Linux, 64-bit machine) - - (wordsize(64), 19738608, 15)]), - # 2014-09-10 10463640, 10 # post-AMP-update (somewhat stabelish) - # looks like the peak is around ~10M, but we're - # unlikely to GC exactly on the peak. - # varies quite a lot with CLEANUP and BINDIST, - # hence 10% range. - # See Note [residency] to get an accurate view. - # 2014-09-14 9684256, 10 # try to lower it a bit more to match Phab's CI - # 2014-11-03 10584344, # ghcspeed reports higher numbers consistently - # 2015-07-11 11670120 (amd64/Linux) - # 2015-10-28 15017528 (amd64/Linux) emit typeable at definition site - # 2016-10-12 17285216 (amd64/Linux) it's not entirely clear why - # 2017-02-01 19924328 (amd64/Linux) Join points (#12988) - # 2017-02-14 16393848 Early inline patch - # 2017-03-31 16679176 Fix memory leak in simplifier - # 2017-08-25 19199872 Refactor the Mighty Simplifier - # 2018-02-19 22311600 (amd64/Linux) Unknown - # 2018-07-10 19738608 (amd64/Linux) Fix space leaks - - compiler_stats_num_field('bytes allocated', - [(platform('i386-unknown-mingw32'), 301784492, 5), - # 215582916 (x86/Windows) - # 2012-10-29 298921816 (x86/Windows) - # 2013-02-10 310633884 (x86/Windows) - # 2013-11-13 317975916 (x86/Windows, 64bit machine) - # 2014-04-04 301784492 (x86/Windows, 64bit machine) - (wordsize(32), 324586096, 1), - # 221667908 (x86/OS X) - # 274932264 (x86/Linux) - # 2012-10-08 303930948 (x86/Linux, new codegen) - # 2013-02-10 322937684 (x86/OSX) - # 2014-01-22 316103268 (x86/Linux) - # 2014-06-29 303300692 (x86/Linux) - # 2015-07-11 288699104 (x86/Linux, 64-bit machine) use +RTS -G1 - # 2016-04-06 344730660 (x86/Linux, 64-bit machine) - # 2017-03-24 324586096 (x86/Linux, 64-bit machine) - (wordsize(64), 670839456, 5)]), - # 2009-11-17 434845560 (amd64/Linux) - # 2009-12-08 459776680 (amd64/Linux) - # 2010-05-17 519377728 (amd64/Linux) - # 2011-08-05 561382568 (amd64/OS X) - # 2012-07-16 589168872 (amd64/Linux) - # 2012-07-20 595936240 (amd64/Linux) - # 2012-08-23 606230880 (amd64/Linux) - # 2012-08-29 633334184 (amd64/Linux) new codegen - # 2012-09-18 641959976 (amd64/Linux) - # 2012-10-19 661832592 (amd64/Linux) -fPIC turned on - # 2012-10-23 642594312 (amd64/Linux) -fPIC turned off again - # 2012-11-12 658786936 (amd64/Linux) UNKNOWN REASON - # 2013-91-17 667160192 (x86_64/Linux) new demand analyser - # 2013-10-18 698612512 (x86_64/Linux) fix for #8456 - # 2014-02-10 660922376 (x86_64/Linux) call arity analysis - # 2014-07-17 651626680 (x86_64/Linux) roundabout update - # 2014-09-10 630299456 (x86_64/Linux) post-AMP-cleanup - # 2015-06-03 581460896 (x86_64/Linux) use +RTS -G1 - # 2015-10-28 695430728 (x86_64/Linux) emit Typeable at definition site - # 2015-10-28 756138176 (x86_64/Linux) inst-decl defaults go via typechecker (#12220) - # 2017-02-17 831733376 (x86_64/Linux) Type-indexed Typeable - # 2017-02-25 695354904 (x86_64/Linux) Early inlining patch - # 2017-04-21 659863176 (x86_64/Linux) Unknown - # 2018-07-10 670839456 (x86_64/Linux) Unknown (just updating) + collect_compiler_stats(['peak_megabytes_allocated','max_bytes_used'],15), + collect_compiler_stats('bytes allocated',5), only_ways(['normal']), extra_hc_opts('-dcore-lint -static'), @@ -161,63 +57,8 @@ else: test('T3294', [ - compiler_stats_num_field('max_bytes_used', # Note [residency] - [(wordsize(32), 28686588, 15), - # 17725476 (x86/OS X) - # 14593500 (Windows) - # 2013-02-10 20651576 (x86/Windows) - # 2013-02-10 20772984 (x86/OSX) - # 2013-11-13 24009436 (x86/Windows, 64bit machine) - # 2014-04-24 19882188 (x86/Windows, 64bit machine) - # 2014-12-22 26525384 (x86/Windows) Increase due to silent superclasses? - # 2015-07-11 43196344 (x86/Linux, 64-bit machine) use +RTS -G1 - # 2016-04-06 28686588 (x86/Linux, 64-bit machine) - - (wordsize(64), 34050960, 20)]), - # prev: 25753192 (amd64/Linux) - # 29/08/2012: 37724352 (amd64/Linux) - # (increase due to new codegen, see #7198) - # 13/13/2012: 44894544 (amd64/Linux) - # (reason for increase unknown) - # 15/5/2013: 36904752 (amd64/Linux) - # (reason for decrease unknown) - # 29/5/2013: 43224080 (amd64/Linux) - # (reason for increase back to earlier value unknown) - # 2014-07-14: 36670800 (amd64/Linux) - # (reason unknown, setting expected value somewhere in between) - # 2015-01-22: 45000000 (amd64/Linux) - # varies between 40959592 and 52914488... increasing to +-20% - # 2015-10-28: 50367248 (amd64/Linux) - # D757: emit Typeable instances at site of type definition - # 2016-07-11: 54609256 (Windows) before fix for #12227 - # 2016-07-11: 52992688 (Windows) after fix for #12227 - # 2017-02-17: 63131248 (amd64/Linux) Type indexed Typeable - # 2017-05-14: 34050960 (amd64/Linux) Two-pass CmmLayoutStack - - compiler_stats_num_field('bytes allocated', - [(wordsize(32), 1377050640, 5), - # previous: 815479800 (x86/Linux) - # (^ increase due to new codegen, see #7198) - # 2012-10-08: 1373514844 (x86/Linux) - # 2013-11-13: 1478325844 (x86/Windows, 64bit machine) - # 2014-01-12: 1565185140 (x86/Linux) - # 2013-04-04: 1377050640 (x86/Windows, 64bit machine) - (wordsize(64), 1858491504, 5)]), - # old: 1357587088 (amd64/Linux) - # 29/08/2012: 2961778696 (amd64/Linux) - # (^ increase due to new codegen, see #7198) - # 18/09/2012: 2717327208 (amd64/Linux) - # 08/06/2013: 2901451552 (amd64/Linux) (reason unknown) - # 12/12/2013: 3083825616 (amd64/Linux) (reason unknown) - # 18/02/2014: 2897630040 (amd64/Linux) (call arity improvements) - # 12/03/2014: 2705289664 (amd64/Linux) (more call arity improvements) - # 2014-17-07: 2671595512 (amd64/Linux) (round-about update) - # 2014-09-10: 2709595808 (amd64/Linux) post-AMP cleanup - # 2016-07-11: 2664479936 (Windows) before fix for #12227 - # 2016-07-11: 2739731144 (Windows) after fix for #12227 (ignoring) - # 2017-02-17: 2758641264 (amd64/Linux) (Type indexed Typeable) - # 2017-05-14: 2253557280 (amd64/Linux) Two-pass CmmLayoutStack - # 2017-10-24: 1858491504 (amd64/Linux) Improved linear regAlloc + collect_compiler_stats('max_bytes_used',15), + collect_compiler_stats('bytes allocated',5), conf_3294, # Use `+RTS -G1` for more stable residency measurements. Note [residency]. @@ -227,173 +68,27 @@ test('T3294', ['']) test('T4801', - [ # expect_broken(5224), - # temporarily unbroken (#5227) -################################### -# deactivated for now, as this metric became too volatile recently -# compiler_stats_num_field('peak_megabytes_allocated',# Note [residency] -# [(platform('x86_64-apple-darwin'), 70, 1), -# # expected value: 58 (amd64/OS X) -# # 13/01/2014 - 70 -# (wordsize(32), 30, 20), -# (wordsize(64), 48, 20)]), -# # prev: 50 (amd64/Linux) -# # 19/10/2012: 64 (amd64/Linux) -# # (^ REASON UNKNOWN!) -# # 12/11/2012: 49 (amd64/Linux) -# # (^ REASON UNKNOWN!) -# # 28/8/13: 60 (amd64/Linux) -# # (^ REASON UNKNOWN!) -# # 2014-09-10: 55 post-AMP-cleanup -# # 2014-10-08: 62 (jumps between 55 and 71 observed -- GC tipping point?) -# # 2014-10-13: 48 stricter seqDmdType - - compiler_stats_num_field('bytes allocated', - [(platform('x86_64-apple-darwin'), 417302064, 10), - # prev: 510938976 (amd64/OS X): - # 2015-12-11: 465653312 (amd64/OS X) Update, bump tolerance to +/-10% - # 2017-03-24: 417302064 (amd64/OS X) Correlated with Linux improvement - - (wordsize(32), 199856388, 10), - # prev: 185669232 (x86/OSX) - # 2014-01-22: 211198056 (x86/Linux) - # 2014-09-03: 185242032 (Windows laptop) - # 2014-12-01: 203962148 (Windows laptop) - # 2016-04-06: 239556572 (x86/Linux) - # 2017-03-24: 199856388 (x86/Linux) - (wordsize(64), 388898280, 10)]), - # prev: 360243576 (amd64/Linux) - # 19/10/2012: 447190832 (amd64/Linux) (-fPIC turned on) - # 19/10/2012: 392409984 (amd64/Linux) (-fPIC turned off) - # 2014-04-08: 362939272 (amd64/Linux) cumulation of various smaller improvements over recent commits - # 2014-10-08: 382056344 (amd64/Linux) stricter foldr2 488e95b - # 2015-10-28: 434278248 (amd64/Linux) emit Typeable at definition site - # 2016-10-19: 388898280 (amd64/Linux) Refactor traceRn interface (#12617) - -################################### -# deactivated for now, as this metric became too volatile recently -# -# compiler_stats_num_field('max_bytes_used', -# [(platform('x86_64-apple-darwin'), 25145320, 5), -# (wordsize(32), 11829000, 15), -# # 9651948 (x86/OSX) -# # 10290952 (windows) -# # 2013-02-10 11071060 (x86/Windows) -# # 2013-02-10: 11207828 (x86/OSX) -# # (some date): 11139444 -# # 2013-11-13: 11829000 (x86/Windows, 64bit machine) -# (wordsize(64), 19296544, 15)]), -# # prev: 20486256 (amd64/OS X) -# # 30/08/2012: 17305600--20391920 (varies a lot) -# # 19/10/2012: 26882576 (-fPIC turned on) -# # 19/10/2012: 18619912 (-fPIC turned off) -# # 24/12/2012: 21657520 (perhaps gc sampling time wibbles?) -# # 10/01/2014: 25166280 -# # 13/01/2014: 22646000 (mostly due to #8647) -# # 18/02/2014: 25002136 (call arity analysis changes) -# # 12/05/2014: 25002136 (specialisation and inlining changes) -# # 10/09/2014: 19296544, 10 (post-AMP-cleanup) -# # 14/09/2014: 19585456, 15 (adapt to Phab CI env) - only_ways(['normal']), - extra_hc_opts('-static'), + [# collect_compiler_stats('peak_megabytes_allocated',1), + # expect_broken(5224), + # temporarily unbroken (#5227) + # deactivated for now, as this metric became too volatile recently + collect_compiler_stats('bytes allocated',10), + # collect_compiler_stats('max_bytes_used',5), + only_ways(['normal']), + extra_hc_opts('-static'), - # Use `+RTS -G1` for more stable residency measurements. Note [residency]. - extra_hc_opts('+RTS -G1 -RTS') - ], + # Use `+RTS -G1` for more stable residency measurements. Note [residency]. + extra_hc_opts('+RTS -G1 -RTS') + ], compile, ['']) test('T3064', - [compiler_stats_num_field('peak_megabytes_allocated',# Note [residency] - [(wordsize(32), 36, 20), - # expected value: 14 (x86/Linux 28-06-2012): - # 2013-11-13: 18 (x86/Windows, 64bit machine) - # 2014-01-22: 23 (x86/Linux) - # 2014-12-22: 23 (x86/Linux) death to silent superclasses - # 2015-07-11: 28 (x86/Linux, 64-bit machine) use +RTS -G1 - # 2017-04-06: 36 (x86/Linux, 64-bit machine) it's unclear + [collect_compiler_stats('peak_megabytes_allocated',20), + collect_compiler_stats('bytes allocated',10), - (wordsize(64), 66, 20)]), - # (amd64/Linux): 18 - # (amd64/Linux) 2012-02-07: 26 - # (amd64/Linux) 2013-02-12: 23; increased range to 10% - # (amd64/Linux) 2013-04-03: 26 - # (amd64/Linux) 2013-09-11: 30; result of AMP patch - # Increased range to 20%. peak-usage varies from 22 to 26, - # depending on whether the old .hi file exists - # (amd64/Linux) 2013-09-11: 37; better arity analysis (weird) - # (amd64/Linux) (09/09/2014): 42, AMP changes (larger interfaces, more loading) - # (amd64/Linux) 2014-10-13: 38: Stricter seqDmdType - # (amd64/Linux) 2014-12-22: 27: death to silent superclasses - # (amd64/Linux) 2015-01-22: 32: Varies from 30 to 34, at least here. - # (amd64/Linux) 2015-06-03: 54: use +RTS -G1 - # (amd64/Linux) 2016-10-25: 66: Presumably creep - - compiler_stats_num_field('bytes allocated', - [(wordsize(32), 134044092, 10), - # 2011-06-28: 56380288 (x86/Linux) - # 2012-10-30: 111189536 (x86/Windows) - # 2013-11-13: 146626504 (x86/Windows, 64bit machine) - # 2014-01-22: 162457940 (x86/Linux) - # 2014-12-01: 162457940 (Windows) - # 2014-12-22: 122836340 (Windows) Death to silent superclasses - # 2016-04-06: 153261024 (x86/Linux) probably wildcard refactor - # 2017-03-24: 134044092 (x86/Linux, 64-bit machine) Update - - (wordsize(64), 272759920, 5)]), - # (amd64/Linux) (2011-06-28): 73259544 - # (amd64/Linux) (2013-02-07): 224798696 - # (amd64/Linux) (2013-08-02): 236404384, increase from roles - # (amd64/Linux) (2013-09-11): 290165632, increase from AMP warnings - # (amd64/Linux) (2013-11-22): 308300448, GND via Coercible and counters for constraints solving - # (amd64/Linux) (2013-12-02): 329795912, Coercible refactor - # (amd64/Linux) (2014-02-11): 308422280, optimize Coercions in simpleOptExpr - # (amd64/Linux) (2014-05-23): 324022680, unknown cause - # (amd64/Linux) (2014-07-17): 332702112, general round of updates - # (amd64/Linux) (2014-08-29): 313638592, w/w for INLINABLE things - # (amd64/Linux) (2014-09-09): 407416464, AMP changes (larger interfaces, more loading) - # (amd64/Linux) (2014-09-14): 385145080, BPP changes (more NoImplicitPrelude in base) - # (amd64/Linux) (2014-12-10): 363103840, improvements in constraint solver - # (Mac) (2014-12-18): 350418600, improvements to flattener - # (amd64/Linux) (2014-12-22): 243670824, Ha! Death to superclass constraints, makes - # much less code for Monad instances - # (amd64/Linux) (2015-12-01): 264952256, Regression due to Simon's wildcard refactor - # Tracked as #11151. - # (amd64/Linux) (2015-12-11): 304344936, Regression due to TypeInType - # Tracked as #11196 - # (amd64/Linux) (2016-04-15): 287460128 Improvement due to using coercionKind instead - # of zonkTcType (Trac #11882) - # (amd64/Darwin) (2017-01-23): 306222424 Presumably creep from recent changes (Typeable?) - # (amd64/Linux) (2017-02-14): 259815560 Early inline patch: 9% improvement - # (amd64/Linux) (2017-03-31): 265950920 Fix memory leak in simplifier - # (amd64/Linux) (2017-05-01): 281509496 Avoid excessive space usage from unfoldings in CoreTidy - # (amd64/Linux) (2017-05-01): 258505536 I think this is improvement in coercionKind e4ab65bd - # (amd64/Linux) (2018-08-04): 272759920 It's unclear - -################################### -# deactivated for now, as this metric became too volatile recently -# -# compiler_stats_num_field('max_bytes_used', -# [(wordsize(32), 11202304, 20), -# # 2011-06-28: 2247016 (x86/Linux) (28/6/2011): -# #(some date): 5511604 -# # 2013-11-13: 7218200 (x86/Windows, 64bit machine) -# # 2014-04-04: 11202304 (x86/Windows, 64bit machine) -# (wordsize(64), 13251728, 20)]), -# # (amd64/Linux, intree) (28/06/2011): 4032024 -# # (amd64/Linux, intree) (07/02/2013): 9819288 -# # (amd64/Linux) (14/02/2013): 8687360 -# # (amd64/Linux) (18/02/2013): 9397488 -# # (amd64/Linux) (02/08/2013): 10742536, increase from roles -# # (amd64/Linux) (19/08/2013): 9211816, decrease apparently from better eta reduction -# # (amd64/Linux) (11/09/2013): 12000480, increase from AMP warnings -# # 933cdf15a2d85229d3df04b437da31fdfbf4961f -# # (amd64/Linux) (22/11/2013): 16266992, GND via Coercible and counters for constraints solving -# # (amd64/Linux) (12/12/2013): 19821544, better One shot analysis -# # (amd64/Linux) (09/09/2014): 24357392, AMP changes (larger interfaces, more loading) -# # (amd64/Linux) (14/09/2014): 16053888, BPP changes (more NoImplicitPrelude in base) -# # (amd64/Linux) (19/09/2014): 18744992, unknown -# # (amd64/Linux) 2014-10-13: 13251728, Stricter seqDmdType + # deactivated for now, as this metric became too volatile recently + # collect_compiler_stats('max_bytes_used',20) only_ways(['normal']), @@ -409,37 +104,7 @@ test('T4007', ['$MAKE -s --no-print-directory T4007']) test('T5030', - [compiler_stats_num_field('bytes allocated', - [(wordsize(32), 345668088, 10), - # previous: 196457520 - # 2012-10-08: 259547660 (x86/Linux, new codegen) - # 2013-11-21: 198573456 (x86 Windows, 64 bit machine) - # 2014-12-10: 227205560 constraint solver got worse again; more aggressive solving - # of family-applications leads to less sharing, I think - # 2015-07-11: 201882912 reason unknown - # 2016-04-06: 345668088 likely TypeInType - - (wordsize(64), 794426536, 10)]), - # Previously 530000000 (+/- 10%) - # 17/1/13: 602993184 (x86_64/Linux) - # (new demand analyser) - # 2013-06-08 538467496 (x86_64/Linux) - # ^ reason unknown - # 2013-08-02 454498592 (amd64/Linux) - # decrease from more aggressive coercion optimisations from roles - # 2013-11-12 397672152 (amd64/Linux) - # big decrease following better CSE and arity - # 2014-07-17 409314320 (amd64/Linux) - # general round of updates - # 2014-09-10 385152728 post-AMP-cleanup - # 2014-12-08 340969128 constraint solver perf improvements (esp kick-out) - # 2014-12-10 449042120 constraint solver got worse again; more aggressive solving - # of family-applications leads to less sharing, I think - # 2015-03-17 403932600 tweak to solver algorithm - # 2015-12-11 653710960 TypeInType (see #11196) - # 2016-10-17 794426536 20% big increase following - # 31621b12 * A collection of type-inference refactorings. - # See ticket for more info + [collect_compiler_stats('bytes allocated', 10), only_ways(['normal']) ], @@ -447,47 +112,14 @@ test('T5030', ['-freduction-depth=300']) test('T5631', - [compiler_stats_num_field('bytes allocated', - [(wordsize(32), 570137436, 10), - # expected value: 392904228 (x86/Linux) - # 2014-04-04: 346389856 (x86 Windows, 64 bit machine) - # 2014-12-01: 390199244 (Windows laptop) - # 2016-04-06: 570137436 (amd64/Linux) many reasons - (wordsize(64), 1161885448, 5)]), - # expected value: 774595008 (amd64/Linux): - # expected value: 735486328 (amd64/Linux) 2012/12/12: - # expected value: 690742040 (amd64/Linux) Call Arity improvements - # 2014-09-09: 739704712 (amd64/Linux) AMP changes - # 2014-11-04: 776121120 (amd64/Linux) new-flatten-skolems - # 2015-06-01: 812288344 (amd64/Linux) unknown cause - # 2015-12-11: 1128828928 (amd64/Linux) TypeInType (see #11196) - # 2015-12-21: 1198327544 (Mac) TypeApplications (will fix with #11196) - # 2015-03-18: 1124068664 (Mac) optimize Unify & zonking - # 2016-10-19: 1024926024 (amd64/Linux) Refactor traceRn interface (#12617) - # 2016-11-10: 1077429456 (amd64/Linux) Stop -dno-debug-output suppressing -ddump-tc-trace - # 2017-02-17: 1517484488 (amd64/Linux) Type-indexed Typeable - # 2017-03-03: 1065147968 (amd64/Linux) Share Typeable KindReps - # 2017-03-31: 1037482512 (amd64/Linux) Fix memory leak in simplifier - # 2017-07-27: 1106015512 (Mac) Regresssion from tracking visibility in TypeEqOrigin - # should be fixed by #14037 - # 2018-06-18: 1161885448 (Mac) Not entirely clear - only_ways(['normal']) + [collect_compiler_stats('bytes allocated',10), + only_ways(['normal']) ], compile, ['']) test('parsing001', - [compiler_stats_num_field('bytes allocated', - [(wordsize(32), 232777056, 10), - # Initial: 274000576 - # 2017-03-24: 232777056 - (wordsize(64), 519401296, 5)]), - # expected value: 587079016 (amd64/Linux) - # 2016-09-01: 581551384 (amd64/Linux) Restore w/w limit (#11565) - # 2016-12-19: 493730288 (amd64/Linux) Join points (#12988) - # 2017-02-14: 463931280 Early inlining patch; acutal improvement 7% - # 2017-12-11: 490228304 BlockArguments - # 2018-04-09: 519401296 Inexplicable, collateral of #14737 + [collect_compiler_stats('bytes allocated',10), only_ways(['normal']), ], compile_fail, ['']) @@ -495,333 +127,53 @@ test('parsing001', test('T783', [ only_ways(['normal']), # no optimisation for this one - # expected value: 175,569,928 (x86/Linux) - compiler_stats_num_field('bytes allocated', - [(wordsize(32), 225911912, 5), - # 2012-10-08: 226907420 (x86/Linux) - # 2013-02-10: 329202116 (x86/Windows) - # 2013-02-10: 338465200 (x86/OSX) - # 2014-04-04: 319179104 (x86 Windows, 64 bit machine) - # 2014-09-03: 223377364 (Windows) better specialisation, raft of core-to-core optimisations - # 2014-12-22: 235002220 (Windows) not sure why - # 2016-04-06: 249332816 (x86/Linux, 64-bit machine) - # 2017-03-24: 225911912 (x86/Linux, 64-bit machine) - - (wordsize(64), 481875416, 10)]), - # prev: 349263216 (amd64/Linux) - # 07/08/2012: 384479856 (amd64/Linux) - # 29/08/2012: 436927840 (amd64/Linux) - # 12/11/2012: 640324528 (amd64/Linux) - # (OldCmm removed: not sure why this got worse, the - # other perf tests remained about the same) - # 18/10/2013: 734038080 (amd64/Linux) - # (fix for #8456) - # 24/10/2013: 654804144 (amd64/Linux) - # (fix previous fix for #8456) - # 2014-07-17: 640031840 (amd64/Linux) - # (general round of updates) - # 2014-08-29: 441932632 (amd64/Linux) - # (better specialisation, raft of core-to-core optimisations) - # 2014-08-29: 719814352 (amd64/Linux) - # (changed order of cmm block causes analyses to allocate much more, - # but the changed order is slighly better in terms of runtime, and - # this test seems to be an extreme outlier.) - # 2015-05-16: 548288760 (amd64/Linux) - # (improved sequenceBlocks in nativeCodeGen, #10422) - # 2015-08-07: 470738808 (amd64/Linux) - # (simplifying the switch plan code path for simple checks, #10677) - # 2015-08-28: 526230456 (amd64/Linux) - # (D757: Emit Typeable instances at site of type definition) - # 2015-12-04: 1134085384 (amd64/Linux) - # (D1535: Major overhaul of pattern match checker, #11162) - # 2016-02-03: 488592288 (amd64/Linux) - # (D1795: Another overhaul of pattern match checker, #11374) - # 2017-02-14 436978192 Early inlining: 5% improvement - # 2017-09-08 481875416 Unknown - + collect_compiler_stats('bytes allocated',10), extra_hc_opts('-static') ], compile,['']) test('T5321Fun', [ only_ways(['normal']), # no optimisation for this one - compiler_stats_num_field('bytes allocated', - [(wordsize(32), 244387620, 10), - # prev: 300000000 - # 2012-10-08: 344416344 x86/Linux - # (increase due to new codegen) - # 2014-09-03: 299656164 (specialisation and inlining) - # 2014-12-10: 206406188 # Improvements in constraint solver - # 2016-04-06: 279922360 x86/Linux - # 2017-03-24: 244387620 x86/Linux (64-bit machine) - - (platform('x86_64-apple-darwin'), 446893600, 5), - # 2018-03-17: 423774560 # OS X-only (reason unknown, see #11753) - - (wordsize(64), 423774560, 5)]) - # prev: 585521080 - # 2012-08-29: 713385808 # (increase due to new codegen) - # 2013-05-15: 628341952 # (reason for decrease unknown) - # 2013-06-24: 694019152 # (reason for re-increase unknown) - # 2014-05-12: 614409344 # (specialisation and inlining changes) - # 2014-09-10: 601629032 # post-AMP-cleanup - # 2014-11-06: 541287000 # Simon's flat-skol changes to the constraint solver - # 2014-12-10: 408110888 # Improvements in constraint solver - # 2014-12-16: 429921312 # Flattener parameterized over roles - # 2015-08-10: 509921312 - # (undefined now takes an implicit parameter and GHC -O0 does - # not recognize that the application is bottom) - # 2015-12-11: 565883176 # TypeInType (see #11196) - # 2017-01-06: 497356688 # Small coercion optimisations - # The actual decrease was only 2%; earlier - # commits had drifted down - # 2017-01-22: 525895608 # Allow top-level string literals in Core. I'm not - # convinced that this patch is - # responsible for all of this - # change, however. Namely I am - # quite skeptical of the downward - # "drift" reported above - # 2017-01-31: 498135752 # Join points (#12988) - # 2017-02-23: 524706256 # Type-indexed Typeable? (on Darwin) - # 2017-02-25: 488295304 # Early inlining patch - # 2017-05-14: 449577856 # (amd64/Linxu) Two-pass CmmLayoutStack - # 2017-12-13: 423774560 # (amd64/Linxu) Typechecker improvements + collect_compiler_stats('bytes allocated',10) ], compile,['']) test('T5321FD', [ only_ways(['normal']), # no optimisation for this one - compiler_stats_num_field('bytes allocated', - [(wordsize(32), 250757460, 10), - # prev: 213380256 - # 2012-10-08: 240302920 (x86/Linux) - # (increase due to new codegen) - # 2014-07-31: 211699816 (Windows) (-11%) - # (due to better optCoercion, 5e7406d9, #9233) - # 2016-04-06: 250757460 (x86/Linux) - - (wordsize(64), 371826136, 10)]) - # prev: 418306336 - # 29/08/2012: 492905640 - # (increase due to new codegen) - # 15/05/2013: 406039584 - # (reason for decrease unknown) - # 08/06/2013: 476497048 - # (reason for increase unknown) - # before 2014-07-17: 441997096 - # (with -8%, still in range, hence cause not known) - # 2014-07-17: 426960992 (-11% of previous value) - # (due to better optCoercion, 5e7406d9, #9233) - # 2014-10-08 410895536 - # (various changes; biggest improvements due to 949ad67 and FastString package ids) - # 2015-08-10: 470895536 - # (undefined now takes an implicit parameter and GHC -O0 does - # not recognize that the application is bottom) - # 2015-10-28: 532365376 - # D757: emit Typeable instances at site of type definition - # 2016-07-16: 477840432 - # Optimize handling of built-in OccNames - # 2017-05-14: 415136648 (amd64/Linux) Two-pass CmmLayoutStack - # 2018-04-24: 371826136 (amd64/Linux) Store size in LitString + collect_compiler_stats('bytes allocated',10) ], compile,['']) test('T5642', [ only_ways(['normal']), normal, - compiler_stats_num_field('bytes allocated', - [(wordsize(32), 413517560, 10), - # sample from x86/Linux - # prev: 650000000 - # 2014-09-03: 753045568 - # 2014-12-10: 641085256 Improvements in constraints solver - # 2016-04-06: 462677300 - # 2017-03-24: 413517560 (x86/Linux, 64-bit machine) - - (wordsize(64), 838316496, 10)]) - # prev: 1300000000 - # 2014-07-17: 1358833928 (general round of updates) - # 2014-08-07: 1402242360 (caused by 1fc60ea) -# Watch out for: - # 23/05/2014: 1452688392 (More aggressive specialisation means we get - # specialised copies of imported functions that - # are ultimately discarded by trimAutoRules - # It's a bizarre program with LOTS of data types) - # 2014-09-10: 1536924976 post-AMP-cleanup - # 2014-12-10: 1282916024 Improvements in constraints solver - # 2015-10-28: 1412808976 Emit Typeable at definition site - # 2015-11-22: 1071915072 Use TypeLits in the metadata encoding - # 2016-02-08: 950004816 Pattern match checker re-rework - # 2016-05-12: 1300685592 Make Generic1 poly-kinded - # 2016-06-05: 916484672 Refactor derived Generic instances to reduce allocations - # 2016-09-03: 838316496 Derive the Generic instance in perf/compiler/T5642 + collect_compiler_stats('bytes allocated',10) ], compile,['-O']) test('T5837', [ only_ways(['normal']), - compiler_stats_num_field('bytes allocated', - [(wordsize(32), 27028956, 10), - # 40000000 (x86/Linux) - # 2013-11-13: 45520936 (x86/Windows, 64bit machine) - # 2014-09-03: 37096484 (Windows laptop, w/w for INLINABLE things - # 2014-12-01: 135914136 (Windows laptop, regression see below) - # 2014-12-08: 115905208 Constraint solver perf improvements (esp kick-out) - # 2016-04-06: 24199320 (x86/Linux, 64-bit machine) TypeInType - # 2017-03-24: 27028956 (x86/Linux, 64-bit machine) - - (platform('x86_64-unknown-mingw32'), 61806136, 7), - # 2017-02-19 59161648 (x64/Windows) - Unknown - # 2017-04-21 54985248 (x64/Windows) - Unknown - # 2017-12-24 54793816 (x64/Windows) - Unknown - # 2018-09-23 61806136 (x64/Windows) - Unknown - - (wordsize(64), 55813608, 7)]) - # sample: 3926235424 (amd64/Linux, 15/2/2012) - # 2012-10-02 81879216 - # 2012-09-20 87254264 amd64/Linux - # 2013-09-18 90587232 amd64/Linux - # 2013-11-21 86795752 amd64/Linux, GND via Coercible and counters - # for constraints solving - # 2014-08-29 73639840 amd64/Linux, w/w for INLINABLE things - # 2014-10-08 73639840 amd64/Linux, Burning Bridges and other small changes - # 2014-11-06 271028976 Linux, Accept big regression; - # See Note [An alternative story for the inert substitution] in TcFlatten - # 2014-12-08 234790312 Constraint solver perf improvements (esp kick-out) - # 2014-12-16 231155640 Mac Flattener parameterized over roles; - # some optimization - # 2015-03-17 53424304 Mac Better depth checking; fails earlier - # 2015-06-09 38834096 Better "improvement"; I'm not sure whey it improves things - # 2015-12-11 43877520 amd64/Linux, TypeInType (see #11196) - # 2016-03-18 48507272 Mac, accept small regression in exchange - # for other optimisations - # 2016-09-15 42445672 Linux; fixing #12422 - # 2016-09-25 41832056 amd64/Linux, Rework handling of names (D2469) - # 2016-10-25 52597024 amd64/Linux, the test now passes (hooray), and so - # allocates more because it goes right down the - # compilation pipeline - # 2017-01-24 57861352 amd64/Linux, very likely due to the top-level strings - # in Core patch. - # 2017-02-07 50253880 Another improvement in SetLevels. I don't think - # all the gain here is from this patch, but I think it - # just pushed it over the edge, so I'm re-centreing, and - # changing to 5% tolerance - # 2017-02-07 53592736 amd64/Linux Simon's earlier decrease appears - # to be environmentally-dependent. - # Also bumped acceptance threshold to 7%. - # 2017-02-20 58648600 amd64/Linux Type-indexed Typeable - # 2017-02-28 54151864 amd64/Linux Likely drift due to recent simplifier improvements - # 2017-02-25 52625920 amd64/Linux Early inlining patch - # 2017-09-06 56782344 amd64/Linux Drift manifest in unrelated LLVM patch - # 2017-10-24 52089424 amd64/linux Fix space leak in BinIface.getSymbolTable - # 2018-02-19 55813608 amd64/Linux Unknown + collect_compiler_stats('bytes allocated',10) ], compile, ['-freduction-depth=50']) test('T6048', [ only_ways(['optasm']), - compiler_stats_num_field('bytes allocated', - [(wordsize(32), 55701280, 10), - # prev: 38000000 (x86/Linux) - # 2012-10-08: 48887164 (x86/Linux) - # 2014-04-04: 62618072 (x86 Windows, 64 bit machine) - # 2014-09-03: 56315812 (x86 Windows, w/w for INLINABLE) - # 2014-12-01: 49987836 (x86 Windows) - # 2016-04-06: 55701280 (x86/Linux, 64-bit machine) - - (wordsize(64), 100574504, 10)]) - # 2012-09-18 97247032 amd64/Linux - # 2014-01-16 108578664 amd64/Linux (unknown, likely foldl-via-foldr) - # 2014-01-18 95960720 amd64/Linux Call Arity improvements - # 2014-02-28 105556793 amd64/Linux (unknown, tweak in base/4d9e7c9e3 resulted in change) - # 2014-03-05 110646312 amd64/Linux Call Arity became more elaborate - # 2014-07-14 125431448 amd64/Linux unknown reason. Even worse in GHC-7.8.3. *shurg* - # 2014-08-29 108354472 amd64/Linux w/w for INLINABLE things - # 2014-09-14 88186056 amd64/Linux BPP part1 change (more NoImplicitPreludes in base) - # 2014-01-08 95946688 amd64/Linux Mostly 4c834fd. Occasional spikes to 103822120! - # 2016-03-11 108225624 amd64/Linux unknown reason sadly; likely gradual creep. - # 2016-11-25 94327392 amd64/Linux Back down again hooray; still not sure why - # 2017-02-17 115715592 amd64/Linux Type-indexed Typeable - # 2017-04-28 90996312 Join point refactoring - # 2018-06-18 100574504 Darwin Unclear + collect_compiler_stats('bytes allocated',10) ], compile,['']) test('T9020', [ only_ways(['optasm']), - compiler_stats_num_field('bytes allocated', - [(wordsize(32), 249904136, 10), - # Original: 381360728 - # 2014-07-31: 343005716 (Windows) (general round of updates) - # 2017-03-24: 249904136 (x86/Linux, 64-bit machine) - - (wordsize(64), 391876936, 10)]) - # prev: 795469104 - # 2014-07-17: 728263536 (general round of updates) - # 2014-09-10: 785871680 post-AMP-cleanup - # 2014-11-03: 680162056 Further Applicative and Monad adjustments - # 2015-10-21: 786189008 Make stronglyConnCompFromEdgedVertices deterministic - # 2016-01-26: 698401736 improvement from using ExpTypes instead of ReturnTvs - # 2016-04-06: 852298336 Refactoring of CSE #11781 - # 2016-04-06: 698401736 Use thenIO in Applicative IO - # 2017-02-03: 764866144 Join points - # 2017-02-14: 500707080 Early inline patch; 35% decrease! - # Program size collapses in first simplification - # 2017-03-31: 493596312 Fix memory leak in simplifier - # 2017-04-28: 423163832 Remove exponential behaviour in simplifier - # 2018-04-09: 562206104 Inexplicable, collateral of #14737 - # 2018-05-14: 391876936 Improved simplCast performance #15019 + collect_compiler_stats('bytes allocated',10) ], compile,['']) test('T9675', [ only_ways(['optasm']), - compiler_stats_num_field('max_bytes_used', # Note [residency] - [(wordsize(64), 20499224, 15), - # 2014-10-13 29596552 - # 2014-10-13 26570896 seq the DmdEnv in seqDmdType as well - # 2014-10-13 18582472 different machines giving different results.. - # 2014-10-13 22220552 use the mean - # 2015-06-21 28056344 switch to `+RTS -G1`, tighten bound to 15% - # 2015-10-28 23776640 emit Typeable at definition site - # 2015-12-11 30837312 TypeInType (see #11196) - # 2016-03-14 38776008 Final demand analyzer run - # 2016-04-01 29871032 Fix leaks in demand analysis - # 2016-04-30 17675240 Fix leaks in tidy unfoldings - # 2018-09-21 20499224 See #15663 - (wordsize(32), 18043224, 15) - # 2015-07-11 15341228 (x86/Linux, 64-bit machine) use +RTS -G1 - # 2016-04-06 18043224 (x86/Linux, 64-bit machine) - ]), - compiler_stats_num_field('peak_megabytes_allocated', # Note [residency] - [(wordsize(64), 75, 15), - # 2014-10-13 66 - # 2014-10-13 58 seq the DmdEnv in seqDmdType as well - # 2014-10-13 49 different machines giving different results... - # 2014-10-13 53 use the mean - # 2015-06-15 44 reduced for some reason - # 2015-06-21 105 switch to `+RTS -G1` - # 2015-12-04 88 new pattern checker (D1535) - # 2015-12-11 113 TypeInType (see #11196) - # 2016-04-14 144 Final demand analyzer run - # 2016-07-26 121 Unboxed sums? - # 2017-04-30 63 Fix leaks in tidy unfoldings - # 2018-09-21 75 See #15663 - (wordsize(32), 56, 15) - # 2015-07-11 56 (x86/Linux, 64-bit machine) use +RTS -G1 - ]), - compiler_stats_num_field('bytes allocated', - [(wordsize(64), 656137960, 10) - # 2014-10-13 544489040 - # 2015-10-28 608284152 emit Typeable at definition site - # 2017-02-17 731171072 Type-indexed Typeable - # 2017-03-13 656137960 Put join ceiling underneath lambdas? - - ,(wordsize(32), 322901484, 10) - # 2015-07-11 279480696 (x86/Linux, 64-bit machine) use +RTS -G1 - # 2017-03-24 322901484 (x86/Linux, 64-bit machine) - - ]), + # Note [residency] + collect_compiler_stats(['max_bytes_used','peak_megabytes_allocated'],15), + collect_compiler_stats('bytes allocated',10), # Use `+RTS -G1` for more stable residency measurements. Note [residency]. extra_hc_opts('+RTS -G1 -RTS') @@ -831,145 +183,40 @@ test('T9675', test('T9872a', [ only_ways(['normal']), - compiler_stats_num_field('bytes allocated', - [(wordsize(64), 2729927408, 5), - # 2014-12-10 5521332656 Initally created - # 2014-12-16 5848657456 Flattener parameterized over roles - # 2014-12-18 2680733672 Reduce type families even more eagerly - # 2015-12-11 3581500440 TypeInType (see #11196) - # 2016-04-07 3352882080 CSE improvements - # 2016-10-19 3134866040 Refactor traceRn interface (#12617) - # 2017-02-17 3298422648 Type-indexed Typeable - # 2017-02-25 3005891848 Early inlining patch - # 2018-03-26 2729927408 Flattener update with optimizations (#12919) - - (wordsize(32), 1493198244, 5) - # was 1325592896 - # 2016-04-06 1740903516 x86/Linux - # 2017-03-24 1493198244 x86/Linux, 64-bit machine - ]), + collect_compiler_stats('bytes allocated',5) ], compile_fail, ['']) test('T9872b', [ only_ways(['normal']), - compiler_stats_num_field('bytes allocated', - [(wordsize(64), 3730686224, 5), - # 2014-12-10 6483306280 Initally created - # 2014-12-16 6892251912 Flattener parameterized over roles - # 2014-12-18 3480212048 Reduce type families even more eagerly - # 2015-12-11 5199926080 TypeInType (see #11196) - # 2016-02-08 4918990352 Improved a bit by tyConRolesRepresentational - # 2016-04-06: 4600233488 Refactoring of CSE #11781 - # 2016-09-15: 4069522928 Fix #12422 - # 2017-02-14 3730686224 Early inlining: 5% improvement - - (wordsize(32), 1894037608, 5) - # was 1700000000 - # 2016-04-06 2422750696 x86/Linux - # 2017-03-24 1894037608 x86/Linux, 64-bit machine - ]), + collect_compiler_stats('bytes allocated',5) ], compile_fail, ['']) test('T9872c', [ only_ways(['normal']), - compiler_stats_num_field('bytes allocated', - [(wordsize(64), 3096670112, 5), - # 2014-12-10 5495850096 Initally created - # 2014-12-16 5842024784 Flattener parameterized over roles - # 2014-12-18 2963554096 Reduce type families even more eagerly - # 2015-12-11 4723613784 TypeInType (see #11196) - # 2016-02-08 4454071184 Improved a bit by tyConRolesRepresentational - # 2016-04-06: 4306667256 Refactoring of CSE #11781 - # 2016-09-15: 3702580928 Fixing #12422 - # 2017-02-14 3404346032 Early inlining: 5% improvement - # 2018-03-25 3096670112 Flattener patch with optimizations (#12919) - - (wordsize(32), 1727582260, 5) - # was 1500000000 - # 2016-04-06 2257242896 - # 2017-03-24 1727582260 x86/Linux, 64-bit machine - ]), + collect_compiler_stats('bytes allocated',5) ], compile_fail, ['']) test('T9872d', [ only_ways(['normal']), - compiler_stats_num_field('bytes allocated', - [(wordsize(64), 578498120, 7), - # 2014-12-18 796071864 Initally created - # 2014-12-18 739189056 Reduce type families even more eagerly - # 2015-01-07 687562440 TrieMap leaf compression - # 2015-03-17 726679784 tweak to solver; probably flattens more - # 2015-12-11 566134504 TypeInType; see #11196 - # 2016-02-08 534693648 Improved a bit by tyConRolesRepresentational - # 2016-03-18 506691240 optimize Unify & zonking - # 2016-12-05 478169352 using tyConIsTyFamFree, I think, but only - # a 1% improvement 482 -> 478 - # 2017-02-17 535565128 Type-indexed Typeable - # 2017-02-25 498855104 Early inlining - # 2017-03-03 462817352 Share Typeable KindReps - # 2018-03-25 526485920 Flattener patch does more work (#12919) - # 2018-04-11 572537984 simplCast improvement collateral (#11735) - # 2018-07-04 578498120 introduce GRefl (#15192) - - (wordsize(32), 232954000, 5) - # some date 328810212 - # 2015-07-11 350369584 - # 2016-04-06 264566040 x86/Linux - # 2017-03-24 232954000 x86/Linux, 64-bit machine - ]), + collect_compiler_stats('bytes allocated',5) ], compile, ['']) test('T9961', [ only_ways(['normal']), - compiler_stats_num_field('bytes allocated', - [(wordsize(64), 498326216, 5), - # 2015-01-12 807117816 Initally created - # 2015-spring 772510192 Got better - # 2015-05-22 663978160 Fix for #10370 improves it more - # 2015-10-28 708680480 x86_64/Linux Emit Typeable at definition site - # 2015-12-17 745044392 x86_64/Darwin Creep upwards - # 2016-03-20 519436672 x64_64/Linux Don't use build desugaring for large lists (#11707) - # 2016-03-24 568526784 x64_64/Linux Add eqInt* variants (#11688) - # 2016-09-01 537297968 x64_64/Linux Restore w/w limit (#11565) - # 2016-12-19 571246936 x64_64/Linux Join points (#12988) - # 2017-02-14 498326216 Early inline patch; 13% improvement - - (wordsize(32), 255409052, 5) - # was 375647160 - # 2016-04-06 275264188 x86/Linux - # 2017-03-24 255409052 x86/Linux, 64-bit machine - ]), + collect_compiler_stats('bytes allocated',5) ], compile, ['-O']) test('T9233', [ only_ways(['normal']), - compiler_stats_num_field('bytes allocated', - [(wordsize(64), 973149832, 5), - # 2015-08-04 999826288 initial value - # 2016-04-14 1066246248 Final demand analyzer run - # 2016-06-18 984268712 shuffling around of Data.Functor.Identity - # 2017-01-20 920101608 Improvement to SetLevels apparently saved 4.2% in - # compiler allocation. Program size seems virtually - # unchanged; maybe the compiler itself is a little faster - # 2017-01-23 861862608 worker/wrapper evald-ness flags; another 5% improvement! - # 2017-02-01 894486272 Join points - # 2017-02-07 884436192 Another improvement to SetLevels - # 2017-02-17 974530192 Type-indexed Typeable - # 2017-03-21 924299320 It's unclear - # 2018-06-09 973149832 It's unclear - - (wordsize(32), 460112888, 5) - # 2016-04-06 515672240 (x86/Linux) initial value - # 2017-03-24 460112888 x86/Linux, 64-bit machine - ]), + collect_compiler_stats('bytes allocated',5), extra_clean(['T9233a.hi', 'T9233a.o']) ], multimod_compile, @@ -977,42 +224,8 @@ test('T9233', test('T10370', [ only_ways(['optasm']), - compiler_stats_num_field('max_bytes_used', # Note [residency] - [(wordsize(64), 31524048, 15), - # 2015-10-22 19548720 - # 2016-02-24 22823976 Changing Levity to RuntimeRep; not sure why this regresses though, even after some analysis - # 2016-04-14 28256896 final demand analyzer run - # 2016-08-08 33049304 - # This change happened because we changed the behavior - # of inlining across hs-boot files, so that we don't - # inline if something comes from a boot file. This - # affected stats on bootstrapped GHC. However, - # when I set -i0.01 with profiling, the heap profiles - # were identical, so I think it's just GC noise. - # 2016-10-20 38221184 Allow top-level string literals. - # See the comment 16 on #8472. - # 2017-02-17 51126304 Type-indexed Typeable - # 2017-02-27 43455848 Likely drift from recent simplifier improvements - # 2017-02-25 41291976 Early inline patch - # 2017-04-30 31524048 Fix leaks in tidy unfoldings + collect_compiler_stats(['max_bytes_used','peak_megabytes_allocated'], 15), - (wordsize(32), 19276304, 15), - # 2015-10-22 11371496 - # 2017-03-24 19276304 (x86/Linux, 64-bit machine) - ]), - compiler_stats_num_field('peak_megabytes_allocated', # Note [residency] - [(wordsize(64), 117, 15), - # 2015-10-22 76 - # 2016-04-14 101 final demand analyzer run - # 2016-08-08 121 see above - # 2017-01-18 146 Allow top-level string literals in Core - # 2017-02-17 187 Type-indexed Typeable - # 2017-02-25 154 Early inline patch - # 2017-04-30 117 Fix leaks in tidy unfoldings - (wordsize(32), 69, 15), - # 2015-10-22 39 - # 2017-03-24 69 - ]), # Use `+RTS -G1` for more stable residency measurements. Note [residency]. extra_hc_opts('+RTS -G1 -RTS') ], @@ -1020,32 +233,14 @@ test('T10370', ['']) test('T10547', - [ compiler_stats_num_field('bytes allocated', - [(platform('x86_64-unknown-mingw32'), 37485128, 20), - # 2017-02-19 37485128 (x64/Windows) - Unknown - - (wordsize(64), 37681360, 20), - # initial: 39165544 - # 2016-11-25: 31041520 Linux Around the time of refactoring the constraint solver; - # but I think that only pushed it over the edge - # 2017-02-20: 38681216 Linux Type-indexed Typeable - ]), + [ collect_compiler_stats('bytes allocated', 20), ], compile_fail, ['-fprint-expanded-synonyms']) test('T12227', [ only_ways(['normal']), - compiler_stats_num_field('bytes allocated', - [(wordsize(64), 752214784, 5), - # 2016-07-11 5650186880 (Windows) before fix for #12227 - # 2016-07-11 1822822016 (Windows) after fix for #12227 - # 2016-12-20 1715827784 after d250d493 (INLINE in Traversable dms) - # (or thereabouts in the commit history) - # 2017-02-14 1060158624 Early inlining: 35% improvement - # 2018-01-04 812869424 Drop unused givens (#13032): 23% better - # 2018-06-27 752214784 Trac #15421 - ]), + collect_compiler_stats('bytes allocated',5) ], compile, # Use `-M1G` to prevent memory thrashing with ghc-8.0.1. @@ -1053,52 +248,21 @@ test('T12227', test('T12425', [ only_ways(['optasm']), - compiler_stats_num_field('bytes allocated', - [(wordsize(64), 139100464, 5), - # initial: 125831400 - # 2017-01-18: 133380960 Allow top-level string literals in Core - # 2017-02-17: 153611448 Type-indexed Typeable - # 2017-03-03: 142256192 Share Typeable KindReps - # 2017-03-21: 134334800 Unclear - # 2017-04-28: 127500136 Remove exponential behaviour in simplifier - # 2017-05-23: 134780272 Addition of llvm-targets in dynflags (D3352) - # 2018-04-15: 141952368 Collateral of #14737 - # 2018-04-26: 150743648 Do not unpack class dictionaries with INLINABLE - # 2018-05-14: 139100464 improved simplCast performance #15019 - ]), + collect_compiler_stats('bytes allocated',5) ], compile, ['']) test('T12234', [ only_ways(['optasm']), - compiler_stats_num_field('bytes allocated', - [(platform('x86_64-unknown-mingw32'), 79889200, 5), - # initial: 83032768 - # 2017-02-19 89180624 (x64/Windows) - Unknown - # 2017-02-25 79889200 (x64/Windows) - Early inline patch - # 2018-05-04 86938328 (x64/Windows) - Unknown and horrible - (wordsize(64), 85961968, 5), - # initial: 72958288 - # 2016-01-17: 76848856 (x86-64, Linux. drift?) - # 2017-02-01: 80882208 (Use superclass instances when solving) - # 2017-02-05: 74374440 (Probably OccAnal fixes) - # 2017-02-17: 86525344 (Type-indexed Typeable) - # 2017-02-25: 83032768 (Early inline patch) - # 2017-09-07: 81696664 (Semigroup=>Monoid patch, D3927) - # 2018-04-26: 85961968 (Do not unpack class dictionaries with INLINABLE) - ]), + collect_compiler_stats('bytes allocated',5), ], compile, ['']) test('T12545', [ only_ways(['normal']), - compiler_stats_num_field('bytes allocated', - [(wordsize(64), 3249613688, 5), - # 2017-06-08 3538652464 initial - # 2018-06-27 3249613688 Trac #15421 - ]), + collect_compiler_stats('bytes allocated',5), extra_clean(['T12545a.hi', 'T12545a.o']) ], multimod_compile, @@ -1106,88 +270,39 @@ test('T12545', test('T13035', [ only_ways(['normal']), - compiler_stats_num_field('bytes allocated', - [(wordsize(64), 125020728, 5), - # 2017-01-05 90595208 initial - # 2017-01-19 95269000 Allow top-level string literals in Core - # 2017-02-05 88806416 Probably OccAnal fixes - # 2017-02-17 103890200 Type-indexed Typeable - # 2017-02-25 98390488 Early inline patch - # 2017-03-21 93249744 It's unclear - # 2017-07-19 118665640 Generate Typeable bindings for data instances - # 2018-06-10 125020728 It's unclear - ]), + collect_compiler_stats('bytes allocated',5), ], compile, [''] ) test('T13056', [ only_ways(['optasm']), - compiler_stats_num_field('bytes allocated', - [(wordsize(64), 440548592, 10), - # 2017-01-06 520166912 initial - # 2017-01-31 546800240 Join points (#12988) - # 2017-02-07 524611224 new SetLevels - # 2017-02-14 440548592 Early inline patch: 16% improvement - # 2017-04-21 417860736 (darwin) - # 2017-04-22 Increase to +/- 10% (Darwin and Linux differ significantly) - ]), + collect_compiler_stats('bytes allocated',10), ], compile, ['-O1']) test('T12707', - [ compiler_stats_num_field('bytes allocated', - [(wordsize(64), 1201750816, 5), - # initial: 1271577192 - # 2017-01-22: 1348865648 Allow top-level strings in Core - # 2017-01-31: 1280336112 Join points (#12988) - # 2017-02-11: 1310037632 Check local family instances vs imports - # 2017-02-23: 1386110512 Type-indexed Typeable? (on Darwin) - # 2017-03-02: 1231809592 Drift from recent simplifier improvements - # 2017-05-14: 1163821528 (amd64/Linux) Two-pass CmmLayoutStack - # 2018-04-09: 1237898376 Inexplicable, collateral of #14737 - # 2018-04-30: 1141555816 improved simplCast performance #15019 - # 2018-09-21: 1201750816 (amd64/darwin) Drift - ]), + [ collect_compiler_stats('bytes allocated',5), ], compile, ['']) test('T12150', [ only_ways(['optasm']), - compiler_stats_num_field('bytes allocated', - [(wordsize(64), 77557800, 10) - # initial: 70773000 - # 2017-08-25: 74358208 Refactor the Mighty Simplifier - # 2017-08-25: 78300680 Drift - # 2017-10-25: 73769936 amd64/linux Fix space leak in BinIface.getSymbolTable - # 2018-04-26: 77557800 Do not unpack class dictionaries with INLINABLE - ]), + collect_compiler_stats('bytes allocated',5) ], compile, ['']) test('T13379', - [ compiler_stats_num_field('bytes allocated', - [(platform('x86_64-apple-darwin'), 453166912, 10), - # 453166912: add osx-specific after two-pass CmmLayoutStack - (wordsize(64), 411597856, 10), - # initial: 411597856 - # widen window to 10%, Darwin had 449080520, a 9.1% difference - ]), + [ collect_compiler_stats('bytes allocated',10), ], compile, ['']) test('MultiLayerModules', - [ compiler_stats_num_field('bytes allocated', - [(wordsize(64), 5619893176, 10), - # initial: 12139116496 - # 2017-05-12: 6956533312 Revert "Use a deterministic map for imp_dep_mods" - # 2017-05-31: 6294813000 Faster checkFamInstConsistency - # 2018-01-21: 5619893176 Allocate less in plus_mod_dep - ]), + [ collect_compiler_stats('bytes allocated',10), pre_cmd('./genMultiLayerModules'), extra_files(['genMultiLayerModules']), compile_timeout_multiplier(5) @@ -1200,11 +315,7 @@ test('MultiLayerModules', ['MultiLayerModules', '-v0']) test('ManyConstructors', - [ compiler_stats_num_field('bytes allocated', - [(wordsize(64), 4246959352, 10), - # initial: 8130527160 - # 2018-01-05: 4246959352 Lift constructor tag allocation out of a loop - ]), + [ collect_compiler_stats('bytes allocated',10), pre_cmd('./genManyConstructors'), extra_files(['genManyConstructors']), ], @@ -1212,11 +323,7 @@ test('ManyConstructors', ['ManyConstructors', '-v0']) test('ManyAlternatives', - [ compiler_stats_num_field('bytes allocated', - [(wordsize(64), 1398898072, 10), - # initial: 1756999240 - # 2018-01-20: 1398898072 Use IntSet in Dataflow - ]), + [ collect_compiler_stats('bytes allocated',10), pre_cmd('./genManyAlternatives'), extra_files(['genManyAlternatives']), ], @@ -1224,18 +331,7 @@ test('ManyAlternatives', ['ManyAlternatives', '-v0']) test('T13701', - [ compiler_stats_num_field('bytes allocated', - [(platform('x86_64-apple-darwin'), 2549206272, 10), - (platform('x86_64-unknown-linux'), 2413253392, 10), - # initial: 2511285600 - # 2017-06-23: 2188045288 treat banged variable bindings as FunBinds - # 2017-07-11: 2187920960 - # 2017-07-12: 2412223768 inconsistency between Ben's machine and Harbormaster? - # 2017-07-17: 2133380768 Resolved the issue causing the inconsistencies in this test - # 2018-05-09: 2413253392 D4659 (Fix GHCi space leaks) added - # some strictness which causes some extra - # work to be done in this test. - ]), + [ collect_compiler_stats('bytes allocated',10), pre_cmd('./genT13701'), extra_files(['genT13701']), compile_timeout_multiplier(4) @@ -1248,11 +344,7 @@ test('T13701', ['T13701', '-v0']) test('T13719', - [ compiler_stats_num_field('bytes allocated', - [(wordsize(64), 5187889872, 10), - # initial: 49907410784 - # 2017-05-31: 5187889872 Faster checkFamInstConsistency - ]), + [ collect_compiler_stats('bytes allocated',10), pre_cmd('./genT13719'), extra_files(['genT13719']), compile_timeout_multiplier(2) @@ -1265,12 +357,7 @@ test('T13719', ['T13719', '-v0']) test('T14697', - [ compiler_stats_num_field('bytes allocated', - [(wordsize(64), 371030912, 10), - # initial: 635573784 - # 2018-02-23: 337290376 Cache the fingerprint of sOpt_P - # 2018-05-24: 371030912 Unknown - ]), + [ collect_compiler_stats('bytes allocated',10), # This generates too large of a command-line for poor Windows when(opsys('mingw32'), expect_broken(15072)), pre_cmd('./genT14697'), @@ -1281,12 +368,7 @@ test('T14697', ['T14697', '-v0']) test('T14683', - [ compiler_stats_num_field('bytes allocated', - [(wordsize(64), 10521594688, 10), - # initial: 25189145632 - # 2018-04-19: 14675353056 Cache NthCo role (#14683) - # 2018-04-20: 10521594688 Remove unnecessary check in simplCast - ]), + [ collect_compiler_stats('bytes allocated',10), ], multimod_compile, ['T14683', '-v0']) @@ -1294,36 +376,20 @@ test('T14683', test('Naperian', [ reqlib('vector'), only_ways(['optasm']), - compiler_stats_num_field('bytes allocated', - [(platform('x86_64-unknown-mingw32'), 54116696, 10), - # 2017-12-24 54116696 (x64/Windows) - Unknown - (wordsize(64), 53576760, 10)]) - # 2018-01-25 53576760 (x64/Linux) - The previous value looked very wrong + collect_compiler_stats('bytes allocated',10), ], compile, ['']) test ('T9630', - [ compiler_stats_num_field('max_bytes_used', # Note [residency] - [(platform('x86_64-unknown-mingw32'), 39867088, 15), - # 2017-12-24: 34171816 (x64/Windows) - (wordsize(64), 41365088, 15) - # initial: 56955240 - # 2017-06-07: 41568168 Stop the specialiser generating loopy code - # 2018-02-25: 35324712 It's not entirely clear - # 2018-09-22: 41365088 It's not entirely clear (x86_64/darwin) - ]), + [ collect_compiler_stats('max_bytes_used',15), # Note [residency] extra_clean(['T9630a.hi', 'T9630a.o']) ], multimod_compile, ['T9630', '-v0 -O']) test ('T15164', - [ compiler_stats_num_field('bytes allocated', - [(wordsize(64), 1945564312, 10) - # initial: 3423873408 - # 2018-05-22: 1945564312 Fix bottleneck in CallArity - ]) + [ collect_compiler_stats('bytes allocated',10) ], compile, ['-v0 -O']) diff --git a/testsuite/tests/perf/haddock/all.T b/testsuite/tests/perf/haddock/all.T index 4161c2e6d3..fca30366f9 100644 --- a/testsuite/tests/perf/haddock/all.T +++ b/testsuite/tests/perf/haddock/all.T @@ -5,72 +5,7 @@ test('haddock.base', [extra_files(['../../../../libraries/base/dist-install/haddock.t']), unless(in_tree_compiler(), skip), req_haddock - ,stats_num_field('bytes allocated', - [(platform('x86_64-unknown-mingw32'), 18733710728, 5) - # 2017-02-19 24286343184 (x64/Windows) - Generalize kind of (->) - # 2017-12-24 18733710728 (x64/Windows) - Unknown - - ,(wordsize(64), 25913205656, 5) - # 2012-08-14: 5920822352 (amd64/Linux) - # 2012-09-20: 5829972376 (amd64/Linux) - # 2012-10-08: 5902601224 (amd64/Linux) - # 2013-01-17: 6064874536 (x86_64/Linux) - # 2013-02-10: 6282746976 (x86_64/Linux) - # 2013-09-17: 6634886456 (x86_64/Linux) - # 2013-09-18: 6294339840 (x86_64/Linux) - # 2013-11-21: 6756213256 (x86_64/Linux) - # 2014-01-12: 7128342344 (x86_64/Linux) - # 2014-06-12: 7498123680 (x86_64/Linux) - # 2014-08-05: 7992757384 (x86_64/Linux - bugfix for #314, Haddock now parses more URLs) - # 2014-08-08: 7946284944 (x86_64/Linux - Haddock updates to attoparsec-0.12.1.0) - # 2014-09-09: 8354439016 (x86_64/Linux - Applicative/Monad changes, according to Austin) - # 2014-09-10: 7901230808 (x86_64/Linux - Applicative/Monad changes, according to Joachim) - # 2014-10-07: 8322584616 (x86_64/Linux) - # 2014-12-14: 9502647104 (x86_64/Linux) - Update to Haddock 2.16 - # 2014-01-08: 9014511528 (x86_64/Linux) - Eliminate so-called "silent superclass parameters" (and others) - # 2015-07-22: 9418857192 (x86_64/Linux) - Just slowly creeping up. - # 2015-10-03: 9894189856 (x86_64/Linux) - Still creeping - # 2015-12-11: 11119767632 (amd64/Linux) - TypeInType (see #11196) - # 2015-12-17: 26282821104 (x86_64/Linux) - Update Haddock to master - # 2015-12-17: 27812188000 (x86_64/Linux) - Move Data.Functor.* into base - # 2016-02-25: 30987348040 (x86_64/Linux) - RuntimeRep - # 2016-05-12: 32855223200 (x86_64/Linux) - Make Generic1 poly-kinded - # 2017-01-11: 31115778088 (x86_64/Linux) - Join points (#12988) - # 2017-02-11: 34819979936 (x86_64/Linux) - OccurAnal / One-Shot (#13227) - # 2017-02-16: 32695562088 Better Lint for join points - # 2017-02-17: 38425793776 (x86_64/Linux) - Generalize kind of (->) - # 2017-02-12: 25592972912 (x86_64/Linux) - Type-indexed Typeable - # 2017-06-05: 27868466432 (x86_64/Linux) - Desugar modules compiled with -fno-code - # 2017-06-06: 25173968808 (x86_64/Linux) - Don't pass on -dcore-lint in Haddock.mk - # 2017-07-12: 23677299848 (x86_64/Linux) - Use getNameToInstancesIndex - # 2017-08-22: 19694554424 (x86_64/Linux) - Various Haddock optimizations - # 2018-03-31: 20980255200 (x86_64/Linux) - Track type variable scope more carefully - # previous to this last commit, the allocations were right below the top - # of the range. This commit adds only ~1.5% allocations. - # 2018-04-10: 18511324808 (x86_64/Linux) - TTG HsBinds and Data instances - # 2018-04-11: 20727464616 (x86_64/Linux) - Collateral of simplCast improvement (#14737) - # 2018-04-20: 18971030224 (x86_64/Linux) - Cache coercion roles - # 2018-05-14: 21123660336 (amd64/Linux) - D4659: strictness to fix space leaks - # 2018-06-14: 24662232152 (amd64/Linux) - Bump haddock - # 2018-10-08: 25913205656 (amd64/Linux&OSX) - D5167: Improve GHC.Prim docs - - ,(platform('i386-unknown-mingw32'), 2885173512, 5) - # 2013-02-10: 3358693084 (x86/Windows) - # 2013-11-13: 3097751052 (x86/Windows, 64bit machine) - # 2014-04-04: 3548581572 (x86/Windows, 64bit machine) - # 2014-12-01: 4202377432 (x86/Windows, 64bit machine) - # 2015-01-20: 4434804940 (x86/Windows, 64bit machine) - # 2017-04-02: 2885173512 update - - ,(wordsize(32), 3445319728, 5)]) - # 2012-08-14: 3046487920 (x86/OSX) - # 2012-10-30: 2955470952 (x86/Windows) - # 2013-02-10: 3146596848 (x86/OSX) - # 2014-02-22: 3554624600 (x86/Linux - new haddock) - # 2014-06-29: 3799130400 (x86/Linux) - # 2016-04-06: 5509757068 (x86/Linux) - # 2017-03-24: 3819657568 (x86/Linux) - # 2017-04-06: 3445319728 (x86/Linux) + ,collect_stats('bytes allocated',5) ], stats, ['haddock.t']) @@ -78,78 +13,7 @@ test('haddock.base', test('haddock.Cabal', [extra_files(['../../../../libraries/Cabal/Cabal/dist-install/haddock.t']), unless(in_tree_compiler(), skip), req_haddock - ,stats_num_field('bytes allocated', - [(wordsize(64), 27520214496, 5) - # 2012-08-14: 3255435248 (amd64/Linux) - # 2012-08-29: 3324606664 (amd64/Linux, new codegen) - # 2012-10-08: 3373401360 (amd64/Linux) - # 2013-03-13: 3626604824 (amd64/Linux) Cabal updated - # 2013-03-28: 3517301864 (amd64/Linux) fixed #7796 - # 2013-04-26: 3658801800 (amd64/Linux) Cabal updated - # 2013-08-26: 3808466816 (amd64/Linux) Cabal updated - # 2013-11-21: 3908586784 (amd64/Linux) Cabal updated - # 2013-12-12: 3828567272 (amd64/Linux) - # 2014-01-12: 3979151552 (amd64/Linux) new parser - # 2014-06-29: 4200993768 (amd64/Linux) - # 2014-08-05: 4493770224 (x86_64/Linux - bugfix for #314, Haddock now parses more URLs) - # 2014-08-29: 4267311856 (x86_64/Linux - w/w for INLINABLE things) - # 2014-09-09: 4660249216 (x86_64/Linux - Applicative/Monad changes according to Austin) - # 2014-09-10: 4500376192 (x86_64/Linux - Applicative/Monad changes according to Joachim) - # 2014-09-24: 5840893376 (x86_64/Linux - Cabal update) - # 2014-10-04: 6019839624 (x86_64/Linux - Burning Bridges, Cabal update) - # 2014-12-14: 6387320816 (x86_64/Linux) - Update to Haddock 2.16 - # 2015-01-22: 6710234312 (x86_64/Linux) - Cabal updated - # 2015-06-29: 7413958344 (x86_64/Linux) - due to #10482, not yet investigated - # 2015-12-11: 8114833312 (amd64/Linux) - TypeInType (See #11196) - # 2015-12-17: 9982130512 (amd64/Linux) - Update Haddock to master - # 2015-12-22: 10519532424 (amd64/Linux) - Lots of new Semigroup instances in Cabal - # 2016-03-29: 11517963232 (amd64/Linux) - not yet investigated - # 2016-03-30: 10941742184 (amd64/Linux) - defer inlining of Int* Ord methods - # 2016-04-06: 11542374816 (amd64/Linux) - CSE improvements and others - # 2016-04-07: 10963514352 (amd64/Linux) - Revert to what phabricator claims - # 2016-05-22: 11805238152 (amd64/Linux) - Make Generic1 poly-kinded - # 2016-06-05: 10997887320 (amd64/Linux) - Refactor derived Generic instances to reduce allocations - # 2016-06-21: 10070330520 (amd64/Linux) - D2350: Make checkFamInstConsistency less expensive - # 2016-08-07: 16001233464 (amd64/Linux) - Cabal update - # It's worth noting that allocations scale up with the number - # of modules in Cabal. This Cabal update added a large number - # of new modules; if you exclude them from the haddock run - # the stats are comparable. - # 2016-10-01: 20619433656 (amd64/Linux) - Cabal update - # 2016-10-03: 21554874976 (amd64/Linux) - Cabal update - # 2016-10-06: 23706190072 (amd64/Linux) - Cabal update - # 2016-12-20: 25478853176 (amd64/Linux) - Cabal update - # 2017-01-14: 23272708864 (amd64/Linux) - Join points (#12988) - # 2017-02-11: 25533642168 (amd64/Linux) - OccurAnal / One-Shot (#13227) - # 2017-02-16: 23867276992 Better Lint for join points - # 2017-02-17: 27784875792 (amd64/Linux) - Generalize kind of (->) - # 2017-02-12: 18865432648 (amd64/Linux) - Type-indexed Typeable - # 2017-05-31: 18269309128 (amd64/Linux) - Faster checkFamInstConsistency - # 2017-06-05: 22294859000 (amd64/Linux) - Desugar modules compiled with -fno-code - # 2017-06-05: 18753963960 (amd64/Linux) - Don't pass on -dcore-lint in Haddock.mk - # 2017-08-22: 15857428040 (amd64/Linux) - Various Haddock optimizations - # 2017-11-02: 17133915848 (amd64/Linux) - Phabricator D4144 - # 2017-11-06: 18936339648 (amd64/Linux) - Unknown - # 2017-11-09: 20104611952 (amd64/Linux) - Bump Cabal - # 2018-01-22: 25261834904 (amd64/Linux) - Bump Cabal - # 2018-04-10: 23525241536 (amd64/Linux) - TTG HsBinds and Data instances - # 2018-05-14: 24519860272 (amd64/Linux) - D4659: strictness to fix space leaks - # 2018-06-14: 27520214496 (amd64/Linux) - Bump haddock - - ,(platform('i386-unknown-mingw32'), 3293415576, 5) - # 2012-10-30: 1733638168 (x86/Windows) - # 2013-02-10: 1906532680 (x86/Windows) - # 2014-01-28: 1966911336 (x86/Windows) - # 2014-04-24: 2052220292 (x86/Windows) - # 2014-12-01: 3088635556 (x86/Windows) - # 2015-01-20: 3293415576 - - ,(wordsize(32), 3511151136, 5)]) - # 2012-08-14: 1648610180 (x86/OSX) - # 2014-01-22: 1986290624 (x86/Linux) - # 2014-06-29: 2127198484 (x86/Linux) - # 2016-04-06: 6268156056 (x86/Linux) - # 2017-03-24: 3511151136 (x86/Linux) + ,collect_stats('bytes allocated',5) ], stats, ['haddock.t']) @@ -157,49 +21,7 @@ test('haddock.Cabal', test('haddock.compiler', [extra_files(['../../../../compiler/stage2/haddock.t']), unless(in_tree_compiler(), skip), req_haddock - ,stats_num_field('bytes allocated', - [(platform('x86_64-unknown-mingw32'), 56775301896, 10), - # 2017-12-24: 56775301896 (x64/Windows) - (wordsize(64), 63038317672, 10) - # 2012-08-14: 26070600504 (amd64/Linux) - # 2012-08-29: 26353100288 (amd64/Linux, new CG) - # 2012-09-18: 26882813032 (amd64/Linux) - # 2012-11-12: 25990254632 (amd64/Linux) - # 2014-07-17: 29809571376 (amd64/Linux) general round of updates - # 2012-11-27: 28708374824 (amd64/Linux) - # 2014-09-10: 30353349160 (amd64/Linux) post-AMP cleanup - # 2014-11-22: 33562468736 (amd64/Linux) - # 2015-06-02: 36740649320 (amd64/Linux) unknown cause - # 2015-06-29: 40624322224 (amd64/Linux) due to #10482, not yet investigated - # 2015-12-03: 44721228752 (amd64/Linux) slow creep upwards - # 2015-12-15: 49395782136 (amd64/Linux) more creep, following kind-equalities - # 2015-12-17: 58017214568 (amd64/Linux) update Haddock to master - # 2016-06-21: 55314944264 (amd64/Linux) D2350: Make checkFamInstConsistency less expensive - # 2016-11-29: 60911147344 (amd64/Linux) unknown cause - # 2017-02-11: 62070477608 (amd64/Linux) OccurAnal / One-Shot (#13227) (and others) - # 2017-02-25: 55777283352 (amd64/Linux) Early inline patch - # 2017-05-31: 52762752968 (amd64/Linux) Faster checkFamInstConsistency - # 2017-06-05: 65378619232 (amd64/Linux) Desugar modules compiled with -fno-code - # 2017-06-06: 55990521024 (amd64/Linux) Don't pass on -dcore-lint in Haddock.mk - # 2017-07-12: 51592019560 (amd64/Linux) Use getNameToInstancesIndex - # 2018-04-08: 91115212032 (amd64/Linux) Trees that grow - # 2018-04-10: 58410358720 (amd64/Linux) Trees that grow (HsBinds, Data instances) - # 2018-05-14: 63038317672 (amd64/Linux) D4659: strictness to fix space leaks - - ,(platform('i386-unknown-mingw32'), 367546388, 10) - # 2012-10-30: 13773051312 (x86/Windows) - # 2013-02-10: 14925262356 (x86/Windows) - # 2013-11-13: 14328363592 (x86/Windows, 64bit machine) - # 2014-12-01: 104140852 (x86/Windows, sudden shrinkage!) - # 2014-12-10: 217933548 increased again - # 2017-04-02: 367546388 update - - ,(wordsize(32), 3775852520, 5)]) - # 2012-08-14: 13471797488 (x86/OSX) - # 2014-01-22: 14581475024 (x86/Linux - new haddock) - # 2014-06-29: 15110426000 (x86/Linux) - # 2016-04-06: 16222702892 (x86/Linux) - # 2017-03-24: 3775852520 (x86/Linux) + ,collect_stats('bytes allocated',10) ], stats, ['haddock.t']) diff --git a/testsuite/tests/perf/join_points/all.T b/testsuite/tests/perf/join_points/all.T index fe202b6487..eedf0c0bff 100644 --- a/testsuite/tests/perf/join_points/all.T +++ b/testsuite/tests/perf/join_points/all.T @@ -7,17 +7,15 @@ setTestOpts(f) test('join001', normal, compile, ['']) test('join002', - [stats_num_field('bytes allocated', [(wordsize(64), 2000290792, 5)])], + [collect_stats('bytes allocated',5),], compile_and_run, ['']) test('join003', - [stats_num_field('bytes allocated', [(wordsize(64), 2000290792, 5)])], + [collect_stats('bytes allocated',5),], compile_and_run, ['']) test('join004', - [stats_num_field('bytes allocated', [(wordsize(64), 16130592, 5)])], - # 2017-01-24 48146720 Join point rework - # 2017-02-05 16130592 Do Worker/Wrapper for NOINLINE things + [collect_stats('bytes allocated',5),], compile_and_run, ['']) @@ -25,11 +23,7 @@ test('join005', normal, compile, ['']) test('join006', normal, compile, ['']) test('join007', - [stats_num_field('bytes allocated', - [(platform('x86_64-unknown-mingw32'), 47368, 5), - # 2017-02-19 47368 (x64/Windows) - Unknown - - (wordsize(64), 50944, 5)])], - # Initial 50944 + [collect_stats('bytes allocated',5),], compile_and_run, ['']) + diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T index e3fb136d9f..0b70398e46 100644 --- a/testsuite/tests/perf/should_run/all.T +++ b/testsuite/tests/perf/should_run/all.T @@ -3,16 +3,14 @@ # See Note [Solving from instances when interacting Dicts] test('T5835', - [stats_num_field('max_bytes_used', - [(wordsize(64), 44312, 10)]), + [collect_stats('max_bytes_used',10), only_ways(['normal']) ], compile_and_run, ['-O']) test('T12791', - [stats_num_field('max_bytes_used', - [(wordsize(64), 44312, 10)]), + [collect_stats('max_bytes_used',10), only_ways(['normal']) ], compile_and_run, @@ -21,20 +19,14 @@ test('T12791', # Tests that newArray/newArray_ is being optimised correctly test('T10359', - [stats_num_field('bytes allocated', - [(wordsize(64), 450920, 5), - # previously 499512 (amd64/Linux) - # 2017-03-10 450920 (amd64/Linux) Don't generate wrapper for !Int# - (wordsize(32), 351508, 5)]), + [collect_stats('bytes allocated',5), only_ways(['normal']) ], compile_and_run, ['-O']) test('T14955', - [stats_num_field('bytes allocated', - [(wordsize(64), 48050760, 5), - (wordsize(32), 351508, 5)]), + [collect_stats('bytes allocated',5), only_ways(['normal']) ], multimod_compile_and_run, @@ -43,11 +35,8 @@ test('T14955', # fortunately the values here are mostly independent of the wordsize, # because the test allocates an unboxed array of doubles. test('T3586', - [stats_num_field('peak_megabytes_allocated', (17, 1)), - # expected value: 17 (amd64/Linux) - stats_num_field('bytes allocated', (16102024, 5)), - # prev: 16835544 (amd64/Linux) - # 2014-07-17: 16102024 (amd64/Linux), general round of updates + [collect_stats('peak_megabytes_allocated',1), + collect_stats('bytes allocated', 5), only_ways(['normal']), # Use `+RTS -G1` for more stable residency measurements. Note [residency]. @@ -58,17 +47,7 @@ test('T3586', ['-O']) test('T4830', - [stats_num_field('bytes allocated', - [(wordsize(64), 98248, 4), - # 127000 (amd64/Linux) - # 2013-02-07: 99264 (amd64/Linux) - # 2014-01-13: 98248 (amd64/Linux) due to #8647 - # 2015-04-03: Widen 1->4% (amd64/Windows was doing better) - (wordsize(32), 70646, 3)]), - # 2013-02-10: 69744 (x86/Windows) - # 2013-02-10: 71548 (x86/OSX) - # 2014-01-28: Widen range 2->3 - # (x86/Windows - actual 69000, lower was 69233) + [collect_stats('bytes allocated',4), only_ways(['normal']) ], compile_and_run, @@ -82,27 +61,8 @@ test('T3245', [when(doing_ghci(), extra_hc_opts('-fobject-code'))], # test('lazy-bs-alloc', [extra_files(['../../numeric/should_run/arith011.stdout']), - stats_num_field('peak_megabytes_allocated', (2, 1)), - # expected value: 2 (amd64/Linux) - stats_num_field('bytes allocated', - [(wordsize(64), 421792, 5), - # 489776 (amd64/Linux) - # 2013-02-07: 429744 (amd64/Linux) - # 2013-12-12: 425400 (amd64/Linux) - # 2015-04-04: Widen 1->3% (amd64/Windows was failing) - # 2015-08-15: 431500 (Windows not good enough. avg of Windows&Linux) - # 2015-12-15: 444720 (amd64/Linux, D1616) - # 2015-12-17: 444720 (widen 3->5%, Windows is at 462688) - # 2017-01-30: 421792 (amd64/Linux, strangely Type-indexed Typeable) - (wordsize(32), 410040, 5)]), - # 2013-02-10: 421296 (x86/Windows) - # 2013-02-10: 414180 (x86/OSX) - # 2014-01-22: 411500 (x86/Linux) - # 2014-01-28: Widen 1->2% (x86/Windows was at 425212) - # 2016-04-06: 429760 (x86/Linux) no idea what happened - # 2017-02-14: 421448 Early inline patch - # 2017-03-24: 410040 It's not entirely clear, widen threshold to match 64-bit case - + collect_stats('peak_megabytes_allocated', 1), + collect_stats('bytes allocated',5), only_ways(['normal']), extra_run_opts('arith011.stdout'), ignore_stdout, @@ -116,25 +76,7 @@ test('lazy-bs-alloc', ['-O']) test('T876', - [stats_num_field('bytes allocated', - [(platform('x86_64-unknown-mingw32'), 53472, 5), - # 2015-04-03: 71904 (amd64/Windows, unknown cause) - # 2016-11-27: 66928 (amd64/Windows, unknown cause) - # 2017-12-24: 53472 (amd64/Windows, unknown cause) - - (wordsize(64), 58128, 5), - # 2013-02-14: 1263712 (x86_64/Linux) - # 2014-02-10: 63216 (x86_64/Linux), call arity analysis - # 2016-11-11: 58128 (x86_64/Linux), it's not clear - - (wordsize(32), 50408, 5) ]), - # some date: 663712 (Windows, 64-bit machine) - # 2014-04-04: 56820 (Windows, 64-bit machine) - # 2014-06-29: 53024 (x86_64/Linux) - # 2014-12-01: 56796 (Windows) - # 2015-07-11: 53156 (x86_64/Linux) - # 2017-03-24: 50408 (x86/Linux, 64-bit machine) - + [collect_stats('bytes allocated',5), only_ways(['normal']), extra_run_opts('10000') ], @@ -154,34 +96,16 @@ test('T4321', test('T3736', [], run_command, ['$MAKE -s --no-print-directory T3736']) test('T3738', [extra_clean(['T3738a.hi', 'T3738a.o']), - stats_num_field('peak_megabytes_allocated', (2, 0)), - # expected value: 1 (amd64/Linux) - # 2016-08-31: 2 (allocation area size bumped to 1MB) - stats_num_field('bytes allocated', - [(wordsize(32), 45648, 5), - # expected value: 50520 (x86/Linux) - (wordsize(64), 50592, 8)]), - # prev: 49400 (amd64/Linux) - # 2014-07-17: 50520 (amd64/Linux) general round of updates - # 2014-09-10: 50592 (amd64/Linux) post-AMP-update - # 2015-04-03: Widen 5->8% (amd64/Windows was doing better) + collect_stats('peak_megabytes_allocated', 0), + collect_stats('bytes allocated',8), only_ways(['normal']) ], compile_and_run, ['-O']) test('MethSharing', - [stats_num_field('peak_megabytes_allocated', (2, 0)), - # expected value: 1 (amd64/Linux) - # 2016-08-31: 2 (allocation area size bumped to 1MB) - stats_num_field('bytes allocated', - [(wordsize(32), 240071008, 5), - # expected value: 2685858140 (x86/OS X) - # expected: 360940756 (x86/Linux) - # 2017-03-24: 240071008 (x86/Linux, 64-bit machine) - (wordsize(64), 480098192, 5)]), - # expected: 640067672 (amd64/Linux) - # 2017-01-31: 480098192 work/wrap noinline things + [collect_stats('peak_megabytes_allocated', 0), + collect_stats('bytes allocated',5), only_ways(['normal']) ], compile_and_run, @@ -197,9 +121,7 @@ test('T149', ['$MAKE -s --no-print-directory T149']) test('T5113', - [stats_num_field('bytes allocated', - [(wordsize(32), 4000000, 5), - (wordsize(64), 8000000, 5)]), + [collect_stats('bytes allocated',5), only_ways(['normal']) ], compile_and_run, @@ -207,103 +129,54 @@ test('T5113', test('T4978', - [stats_num_field('bytes allocated', - [(wordsize(32), 10000000, 5), - (wordsize(64), 10137680, 5)]), - # expected value: 10137680 (amd64/Linux) + [collect_stats('bytes allocated',5), only_ways(['normal']) ], compile_and_run, ['-O2']) test('T5205', - [stats_num_field('bytes allocated', - [(wordsize(32), 49460, 5), - # expected value: 47088 (x86/Darwin) - # 2017-03-24: 49460 (x86/Linux, 64-bit machine) - - (platform('x86_64-unknown-mingw32'), 52264, 5), - # 2016-12-14: 52264 (Separate out Windows results) - - (wordsize(64), 56208, 5)]), - # expected value: 51320 (amd64/Linux) - # 2014-07-17: 52600 (amd64/Linux) general round of updates - # 2015-04-03: Widen 5->7% (amd64/Windows was doing better) - # 2015-08-15: 50648 (Windows too good. avg of Windows&Linux) - # 2015-10-30: 56208 (D757: Emit Typeable at definition site) - # 2016-12-14: Narrow 7->5% (Separate out Windows results) + [collect_stats('bytes allocated',5), only_ways(['normal', 'optasm']) ], compile_and_run, ['']) test('T5549', - [stats_num_field('bytes allocated', - [(wordsize(32), 2896607976, 5), - # expected value: 3362958676 (Windows) - # 2014-12-01: 4096606332 (Windows) integer-gmp2 - # 2017-03-24: 2896607976 (x86/Linux, 64-bit machine) - - (wordsize(64), 5793140200, 5)]), - # expected value: 6725846120 (amd64/Linux) - # 8193140752 (amd64/Linux) integer-gmp2 - # 5793140200 (amd64/Linux) integer-gmp2 + [collect_stats('bytes allocated',5), only_ways(['normal']) ], compile_and_run, ['-O']) test('T4474a', - [stats_num_field('bytes allocated', - [(wordsize(32), 2405242767, 5), - (wordsize(64), 4831890304, 5)]), - # expected value: 4831890304 (amd64/OSX) + [collect_stats('bytes allocated',5), only_ways(['normal']) ], compile_and_run, ['-O']) test('T4474b', - [stats_num_field('bytes allocated', - [(wordsize(32), 2405242767, 5), - (wordsize(64), 4831890304, 5)]), - # expected value: 4831890304 (amd64/OSX) + [collect_stats('bytes allocated',5), only_ways(['normal']) ], compile_and_run, ['-O']) test('T4474c', - [stats_num_field('bytes allocated', - [(wordsize(32), 2405242767, 5), - (wordsize(64), 4831890304, 5)]), - # expected value: 4831890304 (amd64/OSX) + [collect_stats('bytes allocated',5), only_ways(['normal']) ], compile_and_run, ['-O']) test('T5237', - [stats_num_field('bytes allocated', - [(platform('i386-unknown-mingw32'), 73280, 5), - (wordsize(32), 78328, 5), - # expected value: 78328 (i386/Linux) - (wordsize(64), 104176, 5)]), - # expected value: 110888 (amd64/Linux) - # expected value: 104176 (amd64/Linux) + [collect_stats('bytes allocated',5), only_ways(['normal']) ], compile_and_run, ['-O ' + sse2_opts]) test('T5536', - [stats_num_field('bytes allocated', - [(wordsize(32), 446260520, 1), - # 1246287228 (i386/Linux) - # 446328556 (i386/Windows) - # 446192484 (i386/OSX) - (wordsize(64), 892399040, 5)]), - # expected value: 2492589480 (amd64/Linux) - # 17/1/13: 892399040 (x86_64/Linux) - # (new demand analyser) + [collect_stats('bytes allocated',1), extra_clean(['T5536.data']), ignore_stdout, only_ways(['normal']) @@ -312,37 +185,14 @@ test('T5536', ['-O']) test('T7257', - [stats_num_field('bytes allocated', - [(wordsize(32), 869850704, 10), - # expected value: 1246287228 (i386/Linux) - # 2016-04-06: 989850664 (i386/Linux) no idea what happened - # 2017-03-25: 869850704 (x86/Linux, 64-bit machine) probably sizeExpr fix - (wordsize(64), 1297293264, 5)]), - # 2012-09-21: 1774893760 (amd64/Linux) - # 2015-11-03: 1654893248 (amd64/Linux) - # 2016-06-22: 1414893248 (amd64/Linux, sizeExpr fix) - # 2018-06-22: 1297293264 (amd64/Linux, atomicModifyMutVar# replacement) - stats_num_field('peak_megabytes_allocated', - [(wordsize(32), 217, 5), - # 2012-10-08: 217 (x86/Linux) - (wordsize(64), 227, 5)]), - # 2012-09-21: 227 (amd64/Linux) - + [collect_stats('bytes allocated',10), + collect_stats('peak_megabytes_allocated',5), only_ways(['normal']) ], compile_and_run, ['-O']) test('Conversions', - [stats_num_field('bytes allocated', - [(wordsize(32), 76768, 3), - # 2012-12-18: 55316 Guessed 64-bit value / 2 - # 2013-02-10: 77472 (x86/OSX) - # 2013-02-10: 79276 (x86/Windows) - # 2014-01-13: 76768 (x86/Linux) due to #8647 - (wordsize(64), 107544, 5)]), - # 2012-12-18: 109608 (amd64/OS X) - # 2014-07-17: 107544 (amd64/Linux) - + [collect_stats('bytes allocated',3), only_ways(['normal']) ], compile_and_run, ['-O -msse2']) @@ -351,29 +201,14 @@ test('T7507', omit_ways(['ghci']), compile_and_run, ['-O']) # For 7507, stack overflow is the bad case test('T7436', - [stats_num_field('max_bytes_used', - [(wordsize(64), 60360, 4), - # 127000 (amd64/Linux) - # 2013-02-07: 60360 (amd64/Linux) - # 2015-04-03: Widen 1->4% (amd64/Windows was doing better) - (wordsize(32), 42772, 4)]), - # 2013-02-10: 58032 (x86/Windows) - # 2013-02-10: 58836 (x86/OSX) - # 2017-03-24: 42772 (x86/Linux, 64-bit machine) no idea why - # 2017-04-02: Widen 1->4% (i386/Windows was doing better) + [collect_stats('max_bytes_used',4), only_ways(['normal']) ], compile_and_run, ['-O']) test('T7797', - [stats_num_field('bytes allocated', - [(wordsize(32), 240044984, 5), - # expected value: 2685858140 (x86/OS X) - # expected: 360940756 (x86/Linux) - # expected: 240044984 (x86/Windows, 64bit machine) - (wordsize(64), 480050944, 5)]), - # expected: 480050944 (amd64/Linux) + [collect_stats('bytes allocated',5), extra_clean(['T7797a.hi', 'T7797a.o']), only_ways(['normal']) ], @@ -381,114 +216,62 @@ test('T7797', ['-O']) test('T7954', - [stats_num_field('bytes allocated', - [(wordsize(32), 920045264, 10), - # some date: 1380051408 (64-bit Windows machine) - # 2014-04-04: 920045264 (64-bit Windows machine) - (wordsize(64), 1280051632, 10)]), - # 2014-02-10: 1680051336 (x86_64/Linux), call arity analysis - # 2018-05-03: 1280051632 (x86_64/Linux), refactor numericEnumFrom + [collect_stats('bytes allocated',10), only_ways(['normal']) ], compile_and_run, ['-O']) test('T7850', - [stats_num_field('peak_megabytes_allocated', - [(wordsize(32), 2, 10), - (wordsize(64), 4, 10)]), + [collect_stats('peak_megabytes_allocated',10), only_ways(['normal'])], compile_and_run, ['-O']) test('T5949', - [stats_num_field('bytes allocated', - [ (wordsize(32), 116020, 10), - (wordsize(64), 201008, 10)]), - # previously, it was >400000 bytes + [collect_stats('bytes allocated',10), only_ways(['normal'])], compile_and_run, ['-O']) test('T4267', - [stats_num_field('bytes allocated', - [ (wordsize(32), 36012, 10) - # 32-bit value close to 64 bit; c.f. T7619 - , (wordsize(64), 40992, 10) ]), - # previously, it was >170000 bytes - # 2014-01-17: 130000 - # 2014-02-10: 40992 (x86_64/Linux), call arity analysis + [collect_stats('bytes allocated',10), only_ways(['normal'])], - compile_and_run, - ['-O']) + compile_and_run, + ['-O']) test('T7619', - [stats_num_field('bytes allocated', - [ (wordsize(32), 36012, 10) - # 32-bit close to 64-bit value; most of this very - # small number is standard start-up boilerplate I think - , (wordsize(64), 40992, 10) ]), - # previously, it was >400000 bytes + [collect_stats('bytes allocated',10), only_ways(['normal'])], compile_and_run, ['-O']) test('InlineArrayAlloc', - [stats_num_field('bytes allocated', - [ (wordsize(32), 800040960, 5) - , (wordsize(64), 1600040960, 5) ]), + [collect_stats('bytes allocated',5), only_ways(['normal'])], compile_and_run, ['-O2']) test('InlineByteArrayAlloc', - [stats_num_field('bytes allocated', - [ (wordsize(32), 1360036012, 5) - , (wordsize(64), 1440040960, 5) ]), - # 32 and 64 bit not so different, because - # we are allocating *byte* arrays + [collect_stats('bytes allocated',5), only_ways(['normal'])], compile_and_run, ['-O2']) test('InlineCloneArrayAlloc', - [stats_num_field('bytes allocated', - [ (wordsize(32), 800041120, 5) - , (wordsize(64), 1600041120, 5) ]), + [collect_stats('bytes allocated',5), only_ways(['normal'])], compile_and_run, ['-O2']) test('T9203', - [stats_num_field('bytes allocated', - [ (wordsize(32), 77969268, 5) - # was - # 2016-04-06 84345136 (i386/Debian) not sure - # 2017-03-24 77969268 (x86/Linux, 64-bit machine) probably join points - - , (wordsize(64), 98360576, 5) ]), - # was 95747304 - # 2019-09-10 94547280 post-AMP cleanup - # 2015-10-28 95451192 emit Typeable at definition site - # 2016-12-19 84620888 Join points - # 2018-07-30 98360576 it's unclear + [collect_stats('bytes allocated',5), only_ways(['normal'])], compile_and_run, ['-O2']) test('T9339', - [stats_num_field('bytes allocated', - [ (wordsize(32), 46904, 5) - # is this number correct? Seems very high. - # 2017-03-24: 46904 (x86/Linux, 64-bit machine) who knows - - , (platform('x86_64-unknown-mingw32'), 47088, 7) - # 2017-02-19 47088 (x64/Windows) - Unknown - - , (wordsize(64), 50728, 5) ]), - # w/o fusing last: 320005080 - # 2014-07-22: 80050760 - # 2016-08-17: 50728 Join points (#12988) + [collect_stats('bytes allocated',5), only_ways(['normal'])], compile_and_run, ['-O2 -fspec-constr-keen']) @@ -496,64 +279,44 @@ test('T9339', test('T8472', - [stats_num_field('bytes allocated', - [ (wordsize(32), 50000, 80) - , (wordsize(64), 51424, 80) ]), + [collect_stats('bytes allocated',80), only_ways(['normal'])], compile_and_run, ['-O2']) test('T12996', - [stats_num_field('bytes allocated', - [ (wordsize(64), 76776, 5) ]), + [collect_stats('bytes allocated',5), only_ways(['normal'])], compile_and_run, ['-O2']) test('T13001', - [stats_num_field('bytes allocated', - [ (wordsize(32), 46728, 20) - , (wordsize(64), 50600, 20) ]), + [collect_stats('bytes allocated',20), only_ways(['normal'])], compile_and_run, ['-O2']) test('T8763', - [stats_num_field('bytes allocated', - [ (wordsize(64), 41056, 20) ]), + [collect_stats('bytes allocated', 20), only_ways(['normal'])], compile_and_run, ['-O2']) test('T12990', - [stats_num_field('bytes allocated', - [ (wordsize(64), 20040936, 5) ]), - # 2017-01-03 34440936 w/o inlining unsaturated - # constructor wrappers - # 2017-01-03 21640904 inline wrappers - # 2017-01-31 20040936 work/wrap noinline things + [collect_stats('bytes allocated',5), only_ways(['normal'])], compile_and_run, ['-O2']) test('T13218', - [stats_num_field('bytes allocated', - [ (wordsize(64), 82040056, 5) ]), - # 8.1 with default <$ 163644216 - # 8.1 with derived <$ 82040056 - stats_num_field('max_bytes_used', - [ (wordsize(64), 359128, 10) ]), - # 8.1 with default <$ 64408248 - # 8.1 with derived <$ 359128 + [collect_stats('bytes allocated',5), + collect_stats('max_bytes_used',10), only_ways(['normal'])], compile_and_run, ['-O']) test('DeriveNull', - [stats_num_field('bytes allocated', - [ (wordsize(64), 112050856, 5) ]), - # 2017-04-01 152083704 w/o derived null - # 2017-04-02 112050856 derive null + [collect_stats('bytes allocated',5), only_ways(['normal'])], compile_and_run, ['-O']) @@ -561,66 +324,41 @@ test('DeriveNull', test('DeriveNullTermination', normal, compile_and_run, ['']) test('T13623', - [stats_num_field('bytes allocated', - [(platform('x86_64-unknown-mingw32'), 47232, 10), - # 2017-12-24 47232 unknown - (wordsize(64), 50936, 5)]), - # 2017-05-02 50936 initial + [collect_stats('bytes allocated',10), only_ways(['normal'])], compile_and_run, ['-O2']) test('T14052', - [compiler_stats_num_field('bytes allocated', - [ (wordsize(64), 2346183840, 15) ])], + [collect_compiler_stats('bytes allocated',15)], ghci_script, ['T14052.script']) test('T14936', - [stats_num_field('bytes allocated', - [(platform('x86_64-unknown-mingw32'), 47536, 10), - # 2018-05-04 47536 unknown - (wordsize(64), 51792, 5) ])], + [collect_stats('bytes allocated',10)], compile_and_run, ['-O2']) test('T15226', - [stats_num_field('bytes allocated', - [(platform('x86_64-unknown-mingw32'), 37488, 4), - # 2018-09-23 37488 Linker changes - (wordsize(64), 41040, 5) ]), - # 2018-06-06 41040 Let the simplifier know the result - # of seq# is in WHNF - # initial 400041040 + [collect_stats('bytes allocated',5), only_ways(['normal'])], compile_and_run, ['-O']) test('T15226a', - [stats_num_field('bytes allocated', - [(platform('x86_64-unknown-mingw32'), 37488, 4), - # 2018-09-23 37488 Linker changes - (wordsize(64), 41040, 5) ]), - # 2018-06-06 41040 Look through casts for seq# - # initial 400041040 + [collect_stats('bytes allocated',5), only_ways(['normal'])], compile_and_run, ['-O']) test('T15426', - [stats_num_field('bytes allocated', - [ (wordsize(64), 41272, 20) ]), - # 2018-08-10 41272 Change findIndices from INLINE to INLINABLE - # initial 160041176 - only_ways(['normal'])], + [collect_stats('bytes allocated', 20), + only_ways(['normal'])], compile_and_run, ['-O2']) test('T15578', - [stats_num_field('bytes allocated', - [ (wordsize(64), 800041456, 5) ]), - # 2018-09-07 800041456 Improvements from #15578 - # initial 42400041456 + [collect_stats('bytes allocated', 5), only_ways(['normal'])], compile_and_run, ['-O2']) diff --git a/testsuite/tests/perf/space_leaks/all.T b/testsuite/tests/perf/space_leaks/all.T index a23796d532..1f69d12112 100644 --- a/testsuite/tests/perf/space_leaks/all.T +++ b/testsuite/tests/perf/space_leaks/all.T @@ -1,36 +1,10 @@ test('space_leak_001', - # Before trac #2747 was fixed this was 565. - # Now it's: 3 (amd64/Linux) - # 4 (x86/OS X) - # 5 (x86/Linux) - [stats_num_field('peak_megabytes_allocated', (3, 1)), - # 3 (amd64/Linux, integer-gmp2) - stats_num_field('max_bytes_used', - [(wordsize(64), 440000, 15), - # 440224 (amd64/Linux) - # 417016 (x86/OS X) - # 415672 (x86/Windows) - # 481456 (unreg amd64/Linux) - (wordsize(32), 428220, 10)]), - # 2013-02-10 372072 (x86/OSX) - # 2013-02-10 439228 (x86/OSX) - # 2016-04-06 361400 (x86/Linux) - # 2017-03-24 428220 (x86/Linux, 64-bit machine) - - stats_num_field('bytes allocated', - [ (wordsize(64), 11315747416, 5), - # expected value: 9079316016 (amd64/Linux) - # 9331570416 (x86/Linux) - # 9329073952 (x86/OS X) - # 9327959840 (x86/Windows) - # 11315747416 (amd64/Lnx, integer-gmp2) - - (wordsize(32), 661907800, 5), - # 2014-12-01 13550759068 (Windows) - # 2017-03-24 661907800 (x86/Linux, 64-bit machine) No idea - - ]), + # This could potentially be replaced with + # collect_stats('all',5) to test all 3 with + # 5% possible deviation. + [collect_stats(['peak_megabytes_allocated','bytes allocated'],5), + collect_stats('max_bytes_used',15), omit_ways(['profasm','profthreaded','threaded1','threaded2']) ], compile_and_run, @@ -39,18 +13,14 @@ test('space_leak_001', test('T4334', # Test for a space leak in Data.List.lines (fixed with #4334) [extra_run_opts('1000000 2 t'), - stats_num_field('peak_megabytes_allocated', (2, 1)), + collect_stats('peak_megabytes_allocated',2), # prof ways don't work well with +RTS -V0 omit_ways(['profasm','profthreaded']) ], compile_and_run, ['']) test('T2762', - [stats_num_field('peak_megabytes_allocated', (2, 0)), - # peak_megabytes_allocated is 2 with 7.0.2. - # Was 57 with 6.12.3. - # 2016-08-31: 3 (allocation area size bumped to 1MB) - # 2017-02-22: 2 (refactor fiBind) + [collect_stats('peak_megabytes_allocated',2), only_ways(['normal']), extra_run_opts('+RTS -G1 -RTS' ), extra_clean(['T2762A.hi', 'T2762A.o'])], @@ -61,36 +31,9 @@ test('T4018', compile_and_run, ['-fno-state-hack']) test('T4029', - [stats_num_field('peak_megabytes_allocated', - [(wordsize(64), 65, 10)]), - # 2016-02-26: 66 (amd64/Linux) INITIAL - # 2016-05-23: 82 (amd64/Linux) Use -G1 - # 2016-07-13: 92 (amd64/Linux) Changes to tidyType - # 2016-09-01: 71 (amd64/Linux) Restore w/w limit (#11565) - # 2017-02-12: 80 (amd64/Linux) Type-indexed Typeable - # 2017-02-20: 76 (amd64/Linux) Better reading of iface files - # 2017-03-03: 65 (amd64/Linux) Share Typeable KindReps or more - # lazy interface file reading - stats_num_field('max_bytes_used', - [(wordsize(64), 18208944, 15)]), - # 2016-02-26: 24071720 (amd64/Linux) INITIAL - # 2016-04-21: 25542832 (amd64/Linux) - # 2016-05-23: 25247216 (amd64/Linux) Use -G1 - # 2016-07-13: 27575416 (amd64/Linux) Changes to tidyType - # 2016-07-20: 22920616 (amd64/Linux) Fix laziness of instance matching - # 2016-09-01: 21648488 (amd64/Linux) Restore w/w limit (#11565) - # 2016-10-13: 20325248 (amd64/Linux) Creep (downwards, yay!) - # 2016-11-14: 21387048 (amd64/Linux) Creep back upwards :( - # 2017-01-18: 21670448 (amd64/Linux) Float string literals to toplevel - # 2017-02-07: 22770352 (amd64/Linux) It is unclear - # 2017-02-12: 24151096 (amd64/Linux) Type-indexed Typeable - # 2017-02-20: 22016200 (amd64/Linux) Better reading of iface files - # 2017-03-03: 19172360 (amd64/Linux) Share Typeable KindReps or more - # lazy interface file reading - # 2017-03-07: 20476360 (amd64/Linux) It's not entirely clear - # 2017-03-14: 18208944 (amd64/Darwin) Again, not clear - # 2017-03-15: bumped margin to 15% due to instability + [collect_stats(['peak_megabytes_allocated','max_bytes_used'],10), extra_hc_opts('+RTS -G1 -RTS' ), ], ghci_script, ['T4029.script']) + diff --git a/testsuite/tests/pmcheck/should_compile/all.T b/testsuite/tests/pmcheck/should_compile/all.T index 079978b5f5..393ce92463 100644 --- a/testsuite/tests/pmcheck/should_compile/all.T +++ b/testsuite/tests/pmcheck/should_compile/all.T @@ -36,26 +36,19 @@ test('T9951b', [], compile, ['-fwarn-incomplete-patterns -fwarn-overlapping-patterns']) test('T9951', [], compile, ['-fwarn-incomplete-patterns -fwarn-overlapping-patterns']) -test('T11303', normal, compile, ['-fwarn-incomplete-patterns -fwarn-overlapping-patterns +RTS -M1G -RTS']) -test('T11276', compiler_stats_num_field('bytes allocated', - [(wordsize(64), 165890392, 10)] - # 2018-07-14: 165890392 INITIAL - ), compile, ['-fwarn-incomplete-patterns -fwarn-overlapping-patterns +RTS -M1G -RTS']) +test('T11303', normal, compile, + ['-fwarn-incomplete-patterns -fwarn-overlapping-patterns +RTS -M1G -RTS']) +test('T11276', collect_compiler_stats('bytes allocated',10), compile, + ['-fwarn-incomplete-patterns -fwarn-overlapping-patterns +RTS -M1G -RTS']) -test('T11303b', compiler_stats_num_field('bytes allocated', - [(wordsize(64), 54373936, 10)] - # 2018-07-14: 54373936 INITIAL - ), compile, ['-fwarn-incomplete-patterns -fwarn-overlapping-patterns +RTS -M1G -RTS']) +test('T11303b', collect_compiler_stats('bytes allocated',10), compile, + ['-fwarn-incomplete-patterns -fwarn-overlapping-patterns +RTS -M1G -RTS']) -test('T11374', compiler_stats_num_field('bytes allocated', - [(wordsize(64), 280144864, 10)] - # 2018-07-14: 280144864 INITIAL - ), compile, ['-fwarn-incomplete-patterns -fwarn-overlapping-patterns +RTS -M1G -RTS']) +test('T11374', collect_compiler_stats('bytes allocated',10), compile, + ['-fwarn-incomplete-patterns -fwarn-overlapping-patterns +RTS -M1G -RTS']) -test('T11195', compiler_stats_num_field('bytes allocated', - [(wordsize(64), 7852567480, 10)] - # 2018-07-14: 7852567480 INITIAL - ), compile, ['-package ghc -fwarn-incomplete-patterns -fwarn-overlapping-patterns +RTS -M2G -RTS']) +test('T11195', collect_compiler_stats('bytes allocated',10), compile, + ['-package ghc -fwarn-incomplete-patterns -fwarn-overlapping-patterns +RTS -M2G -RTS']) test('T11984', normal, compile, ['-fwarn-incomplete-patterns -fwarn-overlapping-patterns']) diff --git a/testsuite/tests/primops/should_run/all.T b/testsuite/tests/primops/should_run/all.T index ecf995bea8..c7cdd348bf 100644 --- a/testsuite/tests/primops/should_run/all.T +++ b/testsuite/tests/primops/should_run/all.T @@ -7,11 +7,7 @@ test('T4442', compile_and_run, ['']) test('T10481', exit_code(1), compile_and_run, ['']) test('T10678', - [stats_num_field('bytes allocated', - [(wordsize(64), 64004171, 5) - # 2015-11-04: 88041768 +/- 5% (before runRW#) - # 2015-11-04: 64004171 (after runRW#) - ]), + [ collect_stats('bytes allocated',5), only_ways('normal') ], compile_and_run, ['-O']) diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T index d6b9aa0cf9..1f6ef0059f 100644 --- a/testsuite/tests/simplCore/should_compile/all.T +++ b/testsuite/tests/simplCore/should_compile/all.T @@ -152,8 +152,7 @@ test('T7702', # we say 18mb peak allocated +/- 70% because other compiler flags have # a large effect on allocation which is hard to separate from the # allocation done by the plugin... but a regression allocates > 90mb - compiler_stats_num_field('peak_megabytes_allocated', - [(wordsize(32), 18, 70), (wordsize(64), 18, 70)]) + collect_compiler_stats('peak_megabytes_allocated',70), ], compile, ['-v0 -package-db T7702plugin/pkg.T7702/local.package.conf -fplugin T7702Plugin -package T7702plugin ' + config.plugin_way_flags]) diff --git a/testsuite/tests/simplStg/should_run/all.T b/testsuite/tests/simplStg/should_run/all.T index d3aa9376ee..2f7c69f5db 100644 --- a/testsuite/tests/simplStg/should_run/all.T +++ b/testsuite/tests/simplStg/should_run/all.T @@ -13,10 +13,9 @@ test('T9291', normal, compile_and_run, ['']) test('T13536', normal, compile_and_run, ['']) test('T13536a', - [stats_num_field('bytes allocated', - [ (wordsize(64), 86664, 5) ]), - # 2017-04-10 86664 -- 25769889696 if broken + [ collect_stats('bytes allocated',5), only_ways(['optasm'])], compile_and_run, ['']) + |