testsuite/driver/runtests.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576

#!/usr/bin/env python3

#
# (c) Simon Marlow 2002
#

import argparse
import signal
import sys
import os
import io
import shutil
import tempfile
import time
import re
import traceback

# We don't actually need subprocess in runtests.py, but:
# * We do need it in testlibs.py
# * We can't import testlibs.py until after we have imported ctypes
# * If we import ctypes before subprocess on cygwin, then sys.exit(0)
#   says "Aborted" and we fail with exit code 134.
# So we import it here first, so that the testsuite doesn't appear to fail.
import subprocess

from testutil import getStdout, Watcher, str_warn, str_info, print_table, shorten_metric_name
from testglobals import getConfig, ghc_env, getTestRun, TestConfig, \
                        TestOptions, brokens, PerfMetric
from my_typing import TestName
from perf_notes import MetricChange, GitRef, inside_git_repo, is_worktree_dirty, format_perf_stat, get_abbrev_hash_length, is_commit_hash
from junit import junit
import term_color
from term_color import Color, colored
import cpu_features

# Readline sometimes spews out ANSI escapes for some values of TERM,
# which result in test failures. Thus set TERM to a nice, simple, safe
# value.
os.environ['TERM'] = 'vt100'
ghc_env['TERM'] = 'vt100'

global config
config = getConfig() # get it from testglobals

def signal_handler(signal, frame):
    stopNow()

def get_compiler_info() -> TestConfig:
    """ Overriddden by configuration file. """
    raise NotImplementedError

# -----------------------------------------------------------------------------
# cmd-line options

parser = argparse.ArgumentParser(description="GHC's testsuite driver")
perf_group = parser.add_mutually_exclusive_group()

parser.add_argument("-e", action='append', help="A string to execute from the command line.")
parser.add_argument("--config-file", action="append", help="config file")
parser.add_argument("--config", action='append', help="config field")
parser.add_argument("--rootdir", action='append', help="root of tree containing tests (default: .)")
parser.add_argument("--metrics-file", help="file in which to save (append) the performance test metrics. If omitted, git notes will be used.")
parser.add_argument("--summary-file", help="file in which to save the (human-readable) summary")
parser.add_argument("--no-print-summary", action="store_true", help="should we print the summary?")
parser.add_argument("--only", action="append", help="just this test (can be give multiple --only= flags)")
parser.add_argument("--way", action="append", help="just this way")
parser.add_argument("--skipway", action="append", help="skip this way")
parser.add_argument("--threads", type=int, help="threads to run simultaneously")
parser.add_argument("--verbose", type=int, choices=[0,1,2,3,4,5], help="verbose (Values 0 through 5 accepted)")
parser.add_argument("--junit", type=argparse.FileType('wb'), help="output testsuite summary in JUnit format")
parser.add_argument("--broken-test", action="append", default=[], help="a test name to mark as broken for this run")
parser.add_argument("--test-env", default='local', help="Override default chosen test-env.")
parser.add_argument("--perf-baseline", type=GitRef, metavar='COMMIT', help="Baseline commit for performance comparsons.")
perf_group.add_argument("--skip-perf-tests", action="store_true", help="skip performance tests")
perf_group.add_argument("--only-perf-tests", action="store_true", help="Only do performance tests")

args = parser.parse_args()

# Initialize variables that are set by the build system with -e
windows = False
darwin = False

if args.e:
    for e in args.e:
        exec(e)

if args.config_file:
    for arg in args.config_file:
        exec(open(arg).read())

if args.config:
    for arg in args.config:
        field, value = arg.split('=', 1)
        setattr(config, field, value)

all_ways = config.run_ways+config.compile_ways+config.other_ways

if args.rootdir:
    config.rootdirs = args.rootdir

config.metrics_file = args.metrics_file
hasMetricsFile = config.metrics_file is not None
config.summary_file = args.summary_file
config.no_print_summary = args.no_print_summary
config.baseline_commit = args.perf_baseline

if args.only:
    config.only = args.only
    config.run_only_some_tests = True

if args.way:
    for way in args.way:
        if way not in all_ways:
            print('WARNING: Unknown WAY %s in --way' % way)
        else:
            config.cmdline_ways += [way]
            if way in config.other_ways:
                config.run_ways += [way]
                config.compile_ways += [way]

if args.skipway:
    for way in args.skipway:
        if way not in all_ways:
            print('WARNING: Unknown WAY %s in --skipway' % way)

    config.other_ways = [w for w in config.other_ways if w not in args.skipway]
    config.run_ways = [w for w in config.run_ways if w not in args.skipway]
    config.compile_ways = [w for w in config.compile_ways if w not in args.skipway]

config.broken_tests |= {TestName(t) for t in args.broken_test}

if args.threads:
    config.threads = args.threads
    config.use_threads = True

if args.verbose is not None:
    config.verbose = args.verbose

# Note force skip perf tests: skip if this is not a git repo (estimated with inside_git_repo)
# and no metrics file is given. In this case there is no way to read the previous commit's
# perf test results, nor a way to store new perf test results.
forceSkipPerfTests = not hasMetricsFile and not inside_git_repo()
config.skip_perf_tests = args.skip_perf_tests or forceSkipPerfTests
config.only_perf_tests = args.only_perf_tests

if args.test_env:
    config.test_env = args.test_env

config.cygwin = False
config.msys = False

if windows:
    h = os.popen('uname -s', 'r')
    v = h.read()
    h.close()
    if v.startswith("CYGWIN"):
        config.cygwin = True
    elif v.startswith("MINGW") or v.startswith("MSYS"):
# msys gives "MINGW32"
# msys2 gives "MINGW_NT-6.2" or "MSYS_NT-6.3"
        config.msys = True
    else:
        raise Exception("Can't detect Windows terminal type")

# Try to use UTF8
if windows:
    import ctypes
    # Windows and mingw* Python provide windll, msys2 python provides cdll.
    if hasattr(ctypes, 'WinDLL'):
        mydll = ctypes.WinDLL    # type: ignore
    else:
        mydll = ctypes.CDLL

    # This actually leaves the terminal in codepage 65001 (UTF8) even
    # after python terminates. We ought really remember the old codepage
    # and set it back.
    kernel32 = mydll('kernel32.dll')
    if kernel32.SetConsoleCP(65001) == 0:
        raise Exception("Failure calling SetConsoleCP(65001)")
    if kernel32.SetConsoleOutputCP(65001) == 0:
        raise Exception("Failure calling SetConsoleOutputCP(65001)")

    # register the interrupt handler
    signal.signal(signal.SIGINT, signal_handler)
else:
    # Try and find a utf8 locale to use
    # First see if we already have a UTF8 locale
    h = os.popen('locale | grep LC_CTYPE | grep -i utf', 'r')
    v = h.read()
    h.close()
    if v == '':
        # We don't, so now see if 'locale -a' works
        h = os.popen('locale -a | grep -F .', 'r')
        v = h.read()
        h.close()
        if v != '':
            # If it does then use the first utf8 locale that is available
            h = os.popen('locale -a | grep -i "utf8\|utf-8" 2>/dev/null', 'r')
            v = h.readline().strip()
            h.close()
            if v != '':
                os.environ['LC_ALL'] = v
                ghc_env['LC_ALL'] = v
                print("setting LC_ALL to", v)
            else:
                print('WARNING: No UTF8 locale found.')
                print('You may get some spurious test failures.')

# https://stackoverflow.com/a/22254892/1308058
def supports_colors():
    """
    Returns True if the running system's terminal supports color, and False
    otherwise.
    """
    plat = sys.platform
    supported_platform = plat != 'Pocket PC' and (plat != 'win32' or
                                                  'ANSICON' in os.environ)
    # isatty is not always implemented, #6223.
    is_a_tty = hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()
    if not supported_platform or not is_a_tty:
        return False
    return True

config.supports_colors = supports_colors()
term_color.enable_color = config.supports_colors

# This has to come after arg parsing as the args can change the compiler
get_compiler_info()

# Can't import this earlier as we need to know if threading will be
# enabled or not
from testlib import *

def format_path(path):
    if windows:
        if os.pathsep == ':':
            # If using msys2 python instead of mingw we have to change the drive
            # letter representation. Otherwise it thinks we're adding two env
            # variables E and /Foo when we add E:/Foo.
            path = re.sub('([a-zA-Z]):', '/\\1', path)
        if config.cygwin:
            # On cygwin we can't put "c:\foo" in $PATH, as : is a
            # field separator. So convert to /cygdrive/c/foo instead.
            # Other pythons use ; as the separator, so no problem.
            path = re.sub('([a-zA-Z]):', '/cygdrive/\\1', path)
            path = re.sub('\\\\', '/', path)
    return path

# On Windows we need to set $PATH to include the paths to all the DLLs
# in order for the dynamic library tests to work.
if windows or darwin:
    pkginfo = getStdout([config.ghc_pkg, 'dump'])
    topdir = config.libdir
    if windows:
        mingw = os.path.abspath(os.path.join(topdir, '../mingw/bin'))
        mingw = format_path(mingw)
        ghc_env['PATH'] = os.pathsep.join([ghc_env.get("PATH", ""), mingw])
    for line in pkginfo.split('\n'):
        if line.startswith('library-dirs:'):
            path = line.rstrip()
            path = re.sub('^library-dirs: ', '', path)
            # Use string.replace instead of re.sub, because re.sub
            # interprets backslashes in the replacement string as
            # escape sequences.
            path = path.replace('$topdir', topdir)
            if path.startswith('"'):
                path = re.sub('^"(.*)"$', '\\1', path)
                path = re.sub('\\\\(.)', '\\1', path)
            if windows:
                path = format_path(path)
                ghc_env['PATH'] = os.pathsep.join([path, ghc_env.get("PATH", "")])
            else:
                # darwin
                ghc_env['DYLD_LIBRARY_PATH'] = os.pathsep.join([path, ghc_env.get("DYLD_LIBRARY_PATH", "")])

testopts_local.x = TestOptions()

# if timeout == -1 then we try to calculate a sensible value
if config.timeout == -1:
    config.timeout = int(read_no_crs(config.top + '/timeout/calibrate.out'))

print('Timeout is ' + str(config.timeout))
print('Known ways: ' + ', '.join(config.other_ways))
print('Run ways: ' + ', '.join(config.run_ways))
print('Compile ways: ' + ', '.join(config.compile_ways))

# Try get allowed performance changes from the git commit.
try:
    config.allowed_perf_changes = Perf.get_allowed_perf_changes()
except subprocess.CalledProcessError:
    print('Failed to get allowed metric changes from the HEAD git commit message.')

print('Allowing performance changes in: ' + ', '.join(config.allowed_perf_changes.keys()))

# -----------------------------------------------------------------------------
# The main dude

if config.rootdirs == []:
    config.rootdirs = ['.']

t_files = list(findTFiles(config.rootdirs))

print('Found', len(t_files), '.T files...')

t = getTestRun() # type: TestRun

# Avoid cmd.exe built-in 'date' command on Windows
t.start_time = datetime.datetime.now()

print('Beginning test run at', t.start_time.strftime("%c %Z"))

# For reference
try:
    print('Detected CPU features: ', cpu_features.get_cpu_features())
except Exception as e:
    print('Failed to detect CPU features: ', e)

sys.stdout.flush()
# we output text, which cannot be unbuffered
sys.stdout = os.fdopen(sys.__stdout__.fileno(), "w")

if config.local:
    tempdir = ''
else:
    # See note [Running tests in /tmp]
    tempdir = tempfile.mkdtemp('', 'ghctest-')

    # opts.testdir should be quoted when used, to make sure the testsuite
    # keeps working when it contains backward slashes, for example from
    # using os.path.join. Windows native and mingw* python
    # (/mingw64/bin/python) set `os.path.sep = '\\'`, while msys2 python
    # (/bin/python, /usr/bin/python or /usr/local/bin/python) sets
    # `os.path.sep = '/'`.
    # To catch usage of unquoted opts.testdir early, insert some spaces into
    # tempdir.
    tempdir = os.path.join(tempdir, 'test   spaces')

def cleanup_and_exit(exitcode):
    if config.cleanup and tempdir:
        shutil.rmtree(tempdir, ignore_errors=True)
    exit(exitcode)

def tabulate_metrics(metrics: List[PerfMetric]) -> None:
    abbrevLen = get_abbrev_hash_length()
    hasBaseline = any([x.baseline is not None for x in metrics])
    baselineCommitSet = set([x.baseline.commit for x in metrics if x.baseline is not None])
    hideBaselineCommit = not hasBaseline or len(baselineCommitSet) == 1
    hideBaselineEnv = not hasBaseline or all(
        [x.stat.test_env == x.baseline.perfStat.test_env
         for x in metrics if x.baseline is not None])
    def row(cells: Tuple[str, str, str, str, str, str, str, str]) -> List[str]:
        return [x for (idx, x) in enumerate(list(cells)) if
                (idx != 2 or not hideBaselineCommit) and
                (idx != 3 or not hideBaselineEnv )]

    headerRows = [
        row(("", "", "Baseline", "Baseline", "Baseline", "", "", "")),
        row(("Test", "Metric", "commit", "environment", "value", "New value", "Change", ""))
    ]
    def strDiff(x: PerfMetric) -> str:
        if x.baseline is None:
            return ""
        val0 = x.baseline.perfStat.value
        val1 = x.stat.value
        return "{:+2.1f}%".format(100 * (val1 - val0) / val0)
    dataRows = [row((
        "{}({})".format(x.stat.test, x.stat.way),
        shorten_metric_name(x.stat.metric),
          "{}".format(x.baseline.commit[:abbrevLen]
                      if is_commit_hash(x.baseline.commit) else x.baseline.commit)
          if x.baseline is not None else "",
        "{}".format(x.baseline.perfStat.test_env)
          if x.baseline is not None else "",
        "{:13.1f}".format(x.baseline.perfStat.value)
          if x.baseline is not None else "",
        "{:13.1f}".format(x.stat.value),
        strDiff(x),
        "{}".format(x.change.hint())
    )) for x in sorted(metrics, key =
                      lambda m: (m.stat.test, m.stat.way, m.stat.metric))]
    print_table(headerRows, dataRows, 1)
    print("")
    if hasBaseline:
        if hideBaselineEnv:
            print("* All baselines were measured in the same environment as this test run")
        if hideBaselineCommit:
            commit = next(iter(baselineCommitSet))
            print("* All baseline commits are {}".format(
                commit[:abbrevLen]
                if is_commit_hash(commit) else commit
            ))

# First collect all the tests to be run
t_files_ok = True
for file in t_files:
    if_verbose(2, '====> Scanning %s' % file)
    newTestDir(tempdir, os.path.dirname(file))
    try:
        with io.open(file, encoding='utf8') as f:
            src = f.read()

        exec(src)
    except Exception as e:
        traceback.print_exc()
        framework_fail(None, None, 'exception: %s' % e)
        t_files_ok = False

for name in config.only:
    if t_files_ok:
        # See Note [Mutating config.only]
        framework_fail(name, None, 'test not found')
    else:
        # Let user fix .T file errors before reporting on unfound tests.
        # The reason the test can not be found is likely because of those
        # .T file errors.
        pass

if config.list_broken:
    print('')
    print('Broken tests:')
    print('\n  '.join('#{ticket}({a}/{b})'.format(ticket=ticket, a=a, b=b)
                      for ticket, a, b in brokens))
    print('')

    if t.framework_failures:
        print('WARNING:', len(t.framework_failures), 'framework failures!')
        print('')
else:
    # completion watcher
    watcher = Watcher(len(parallelTests))

    # Now run all the tests
    try:
        for oneTest in parallelTests:
            if stopping():
                break
            oneTest(watcher)

        # wait for parallel tests to finish
        if not stopping():
            watcher.wait()

        # Run the following tests purely sequential
        config.use_threads = False
        for oneTest in aloneTests:
            if stopping():
                break
            oneTest(watcher)
    except KeyboardInterrupt:
        pass

    # flush everything before we continue
    sys.stdout.flush()

    # Dump metrics data.
    print("\nPerformance Metrics (test environment: {}):\n".format(config.test_env))
    if config.baseline_commit:
        print('Performance baseline: %s\n' % config.baseline_commit)
    if any(t.metrics):
        tabulate_metrics(t.metrics)
    else:
        print("\nNone collected.")
    print("")

    # Warn if had to force skip perf tests (see Note force skip perf tests).
    spacing = "       "
    if forceSkipPerfTests and not args.skip_perf_tests:
        print()
        print(str_warn('Skipping All Performance Tests') + ' `git` exited with non-zero exit code.')
        print(spacing + 'Git is required because performance test results are compared with ancestor git commits\' results (stored with git notes).')
        print(spacing + 'You can still run the tests without git by specifying an output file with --metrics-file FILE.')

    # Warn of new metrics.
    new_metrics = [metric for (change, metric, baseline) in t.metrics if change == MetricChange.NewMetric]
    if any(new_metrics):
        if inside_git_repo():
            reason = 'a baseline (expected value) cannot be recovered from' + \
                ' previous git commits. This may be due to HEAD having' + \
                ' new tests or having expected changes, the presence of' + \
                ' expected changes since the last run of the tests, and/or' + \
                ' the latest test run being too old.'
            fix = 'If the tests exist on the previous' + \
                ' commit (And are configured to run with the same ways),' + \
                ' then check out that commit and run the tests to generate' + \
                ' the missing metrics. Alternatively, a baseline may be' + \
                ' recovered from ci results once fetched:\n\n' + \
                spacing + 'git fetch ' + \
                  'https://gitlab.haskell.org/ghc/ghc-performance-notes.git' + \
                  ' refs/notes/perf:refs/notes/' + Perf.CiNamespace
        else:
            reason = "this is not a git repo so the previous git commit's" + \
                     " metrics cannot be loaded from git notes:"
            fix = ""
        print()
        print(str_warn('Missing Baseline Metrics') + \
                ' these metrics trivially pass because ' + reason)
        print(spacing + (' ').join(set([metric.test for metric in new_metrics])))
        if fix != "":
            print()
            print(fix)

    # Inform of how to accept metric changes.
    if (len(t.unexpected_stat_failures) > 0):
        print()
        print(str_info("Some stats have changed") + " If this is expected, " + \
            "allow changes by appending the git commit message with this:")
        print('-' * 25)
        print(Perf.allow_changes_string([(m.change, m.stat) for m in t.metrics]))
        print('-' * 25)

    summary(t, sys.stdout, config.no_print_summary, config.supports_colors)

    # Write perf stats if any exist or if a metrics file is specified.
    stats_metrics = [stat for (_, stat, __) in t.metrics] # type: List[PerfStat]
    if hasMetricsFile:
        print('Appending ' + str(len(stats_metrics)) + ' stats to file: ' + config.metrics_file)
        with open(config.metrics_file, 'a') as f:
            f.write("\n" + Perf.format_perf_stat(stats_metrics))
    elif inside_git_repo() and any(stats_metrics):
        if is_worktree_dirty():
            print()
            print(str_warn('Performance Metrics NOT Saved') + \
                ' working tree is dirty. Commit changes or use ' + \
                '--metrics-file to save metrics to a file.')
        else:
            Perf.append_perf_stat(stats_metrics)

    # Write summary
    if config.summary_file:
        with open(config.summary_file, 'w') as f:
            summary(t, f)

    if args.junit:
        junit(t).write(args.junit)

if len(t.unexpected_failures) > 0 or \
   len(t.unexpected_stat_failures) > 0 or \
   len(t.unexpected_passes) > 0 or \
   len(t.framework_failures) > 0:
    exitcode = 1
else:
    exitcode = 0

cleanup_and_exit(exitcode)

# Note [Running tests in /tmp]
#
# Use LOCAL=0 to run tests in /tmp, to catch tests that use files from
# the source directory without copying them to the test directory first.
#
# As an example, take a run_command test with a Makefile containing
# `$(TEST_HC) ../Foo.hs`. GHC will now create the output files Foo.o and
# Foo.hi in the source directory. There are 2 problems with this:
# * Output files in the source directory won't get cleaned up automatically.
# * Two tests might (over)write the same output file.
#
# Tests that only fail when run concurrently with other tests are the
# worst, so we try to catch them early by enabling LOCAL=0 in validate.
#
# Adding -outputdir='.' to TEST_HC_OPTS would help a bit, but it requires
# making changes to quite a few tests. The problem is that
# `$(TEST_HC) ../Foo.hs -outputdir=.` with Foo.hs containing
# `module Main where` does not produce Foo.o, as it would without
# -outputdir, but Main.o. See [1].
#
# Using -outputdir='.' is not foolproof anyway, since it does not change
# the destination of the final executable (Foo.exe).
#
# Another hardening method that could be tried is to `chmod -w` the
# source directory.
#
# By default we set LOCAL=1, because it makes it easier to inspect the
# test directory while working on a new test.
#
# [1]
# https://downloads.haskell.org/~ghc/8.0.1/docs/html/users_guide/separate_compilation.html#output-files