summaryrefslogtreecommitdiff
path: root/testsuite/driver
diff options
context:
space:
mode:
authorGabor Greif <ggreif@gmail.com>2017-10-20 15:45:37 +0200
committerMarge Bot <ben+marge-bot@smart-cactus.org>2019-12-06 21:20:38 -0500
commit9897e8c8ef0b19a9571ef97a1d9bb050c1ee9121 (patch)
tree0d63829cdfd660376cd600a26e8ed22d3478f669 /testsuite/driver
parentf171b3582d44746bf8b08897a3b23bc97e5dbdda (diff)
downloadhaskell-9897e8c8ef0b19a9571ef97a1d9bb050c1ee9121.tar.gz
Implement pointer tagging for big families (#14373)
Formerly we punted on these and evaluated constructors always got a tag of 1. We now cascade switches because we have to check the tag first and when it is MAX_PTR_TAG then get the precise tag from the info table and switch on that. The only technically tricky part is that the default case needs (logical) duplication. To do this we emit an extra label for it and branch to that from the second switch. This avoids duplicated codegen. Here's a simple example of the new code gen: data D = D1 | D2 | D3 | D4 | D5 | D6 | D7 | D8 On a 64-bit system previously all constructors would be tagged 1. With the new code gen D7 and D8 are tagged 7: [Lib.D7_con_entry() { ... {offset c1eu: // global R1 = R1 + 7; call (P64[Sp])(R1) args: 8, res: 0, upd: 8; } }] [Lib.D8_con_entry() { ... {offset c1ez: // global R1 = R1 + 7; call (P64[Sp])(R1) args: 8, res: 0, upd: 8; } }] When switching we now look at the info table only when the tag is 7. For example, if we derive Enum for the type above, the Cmm looks like this: c2Le: _s2Js::P64 = R1; _c2Lq::P64 = _s2Js::P64 & 7; switch [1 .. 7] _c2Lq::P64 { case 1 : goto c2Lk; case 2 : goto c2Ll; case 3 : goto c2Lm; case 4 : goto c2Ln; case 5 : goto c2Lo; case 6 : goto c2Lp; case 7 : goto c2Lj; } // Read info table for tag c2Lj: _c2Lv::I64 = %MO_UU_Conv_W32_W64(I32[I64[_s2Js::P64 & (-8)] - 4]); if (_c2Lv::I64 != 6) goto c2Lu; else goto c2Lt; Generated Cmm sizes do not change too much, but binaries are very slightly larger, due to the fact that the new instructions are longer in encoded form. E.g. previously entry code for D8 above would be 00000000000001c0 <Lib_D8_con_info>: 1c0: 48 ff c3 inc %rbx 1c3: ff 65 00 jmpq *0x0(%rbp) With this patch 00000000000001d0 <Lib_D8_con_info>: 1d0: 48 83 c3 07 add $0x7,%rbx 1d4: ff 65 00 jmpq *0x0(%rbp) This is one byte longer. Secondly, reading info table directly and then switching is shorter _c1co: movq -1(%rbx),%rax movl -4(%rax),%eax // Switch on info table tag jmp *_n1d5(,%rax,8) than doing the same switch, and then for the tag 7 doing another switch: // When tag is 7 _c1ct: andq $-8,%rbx movq (%rbx),%rax movl -4(%rax),%eax // Switch on info table tag ... Some changes of binary sizes in actual programs: - In NoFib the worst case is 0.1% increase in benchmark "parser" (see NoFib results below). All programs get slightly larger. - Stage 2 compiler size does not change. - In "containers" (the library) size of all object files increases 0.0005%. Size of the test program "bitqueue-properties" increases 0.03%. nofib benchmarks kindly provided by Ă–mer (@osa1): NoFib Results ============= -------------------------------------------------------------------------------- Program Size Allocs Instrs Reads Writes -------------------------------------------------------------------------------- CS +0.0% 0.0% -0.0% -0.0% -0.0% CSD +0.0% 0.0% 0.0% +0.0% +0.0% FS +0.0% 0.0% 0.0% +0.0% 0.0% S +0.0% 0.0% -0.0% 0.0% 0.0% VS +0.0% 0.0% -0.0% +0.0% +0.0% VSD +0.0% 0.0% -0.0% +0.0% -0.0% VSM +0.0% 0.0% 0.0% 0.0% 0.0% anna +0.0% 0.0% +0.1% -0.9% -0.0% ansi +0.0% 0.0% -0.0% +0.0% +0.0% atom +0.0% 0.0% 0.0% 0.0% 0.0% awards +0.0% 0.0% -0.0% +0.0% 0.0% banner +0.0% 0.0% -0.0% +0.0% 0.0% bernouilli +0.0% 0.0% +0.0% +0.0% +0.0% binary-trees +0.0% 0.0% -0.0% -0.0% -0.0% boyer +0.0% 0.0% +0.0% 0.0% -0.0% boyer2 +0.0% 0.0% +0.0% 0.0% -0.0% bspt +0.0% 0.0% +0.0% +0.0% 0.0% cacheprof +0.0% 0.0% +0.1% -0.8% 0.0% calendar +0.0% 0.0% -0.0% +0.0% -0.0% cichelli +0.0% 0.0% +0.0% 0.0% 0.0% circsim +0.0% 0.0% -0.0% -0.1% -0.0% clausify +0.0% 0.0% +0.0% +0.0% 0.0% comp_lab_zift +0.0% 0.0% +0.0% 0.0% -0.0% compress +0.0% 0.0% +0.0% +0.0% 0.0% compress2 +0.0% 0.0% 0.0% 0.0% 0.0% constraints +0.0% 0.0% -0.0% -0.0% -0.0% cryptarithm1 +0.0% 0.0% +0.0% 0.0% 0.0% cryptarithm2 +0.0% 0.0% +0.0% -0.0% 0.0% cse +0.0% 0.0% +0.0% +0.0% 0.0% digits-of-e1 +0.0% 0.0% -0.0% -0.0% -0.0% digits-of-e2 +0.0% 0.0% +0.0% -0.0% -0.0% dom-lt +0.0% 0.0% +0.0% +0.0% 0.0% eliza +0.0% 0.0% -0.0% +0.0% 0.0% event +0.0% 0.0% -0.0% -0.0% -0.0% exact-reals +0.0% 0.0% +0.0% +0.0% +0.0% exp3_8 +0.0% 0.0% -0.0% -0.0% -0.0% expert +0.0% 0.0% +0.0% +0.0% +0.0% fannkuch-redux +0.0% 0.0% +0.0% 0.0% 0.0% fasta +0.0% 0.0% -0.0% -0.0% -0.0% fem +0.0% 0.0% +0.0% +0.0% +0.0% fft +0.0% 0.0% +0.0% -0.0% -0.0% fft2 +0.0% 0.0% +0.0% +0.0% +0.0% fibheaps +0.0% 0.0% +0.0% +0.0% 0.0% fish +0.0% 0.0% +0.0% +0.0% 0.0% fluid +0.0% 0.0% +0.0% +0.0% +0.0% fulsom +0.0% 0.0% +0.0% -0.0% +0.0% gamteb +0.0% 0.0% +0.0% -0.0% -0.0% gcd +0.0% 0.0% +0.0% +0.0% 0.0% gen_regexps +0.0% 0.0% +0.0% -0.0% -0.0% genfft +0.0% 0.0% -0.0% -0.0% -0.0% gg +0.0% 0.0% 0.0% -0.0% 0.0% grep +0.0% 0.0% +0.0% +0.0% +0.0% hidden +0.0% 0.0% +0.0% -0.0% -0.0% hpg +0.0% 0.0% +0.0% -0.1% -0.0% ida +0.0% 0.0% +0.0% -0.0% -0.0% infer +0.0% 0.0% -0.0% -0.0% -0.0% integer +0.0% 0.0% -0.0% -0.0% -0.0% integrate +0.0% 0.0% 0.0% +0.0% 0.0% k-nucleotide +0.0% 0.0% -0.0% -0.0% -0.0% kahan +0.0% 0.0% -0.0% -0.0% -0.0% knights +0.0% 0.0% +0.0% -0.0% -0.0% lambda +0.0% 0.0% +1.2% -6.1% -0.0% last-piece +0.0% 0.0% +0.0% -0.0% -0.0% lcss +0.0% 0.0% +0.0% -0.0% -0.0% life +0.0% 0.0% +0.0% -0.0% -0.0% lift +0.0% 0.0% +0.0% +0.0% 0.0% linear +0.0% 0.0% +0.0% +0.0% +0.0% listcompr +0.0% 0.0% -0.0% -0.0% -0.0% listcopy +0.0% 0.0% -0.0% -0.0% -0.0% maillist +0.0% 0.0% +0.0% -0.0% -0.0% mandel +0.0% 0.0% +0.0% +0.0% +0.0% mandel2 +0.0% 0.0% +0.0% +0.0% -0.0% mate +0.0% 0.0% +0.0% +0.0% +0.0% minimax +0.0% 0.0% -0.0% +0.0% -0.0% mkhprog +0.0% 0.0% +0.0% +0.0% +0.0% multiplier +0.0% 0.0% 0.0% +0.0% -0.0% n-body +0.0% 0.0% +0.0% -0.0% -0.0% nucleic2 +0.0% 0.0% +0.0% +0.0% -0.0% para +0.0% 0.0% +0.0% +0.0% +0.0% paraffins +0.0% 0.0% +0.0% +0.0% +0.0% parser +0.1% 0.0% +0.4% -1.7% -0.0% parstof +0.0% 0.0% -0.0% -0.0% -0.0% pic +0.0% 0.0% +0.0% 0.0% -0.0% pidigits +0.0% 0.0% -0.0% -0.0% -0.0% power +0.0% 0.0% +0.0% -0.0% -0.0% pretty +0.0% 0.0% +0.0% +0.0% +0.0% primes +0.0% 0.0% +0.0% 0.0% 0.0% primetest +0.0% 0.0% +0.0% +0.0% +0.0% prolog +0.0% 0.0% +0.0% +0.0% +0.0% puzzle +0.0% 0.0% +0.0% +0.0% +0.0% queens +0.0% 0.0% 0.0% +0.0% +0.0% reptile +0.0% 0.0% +0.0% +0.0% 0.0% reverse-complem +0.0% 0.0% -0.0% -0.0% -0.0% rewrite +0.0% 0.0% +0.0% 0.0% -0.0% rfib +0.0% 0.0% +0.0% +0.0% +0.0% rsa +0.0% 0.0% +0.0% +0.0% +0.0% scc +0.0% 0.0% +0.0% +0.0% +0.0% sched +0.0% 0.0% +0.0% +0.0% +0.0% scs +0.0% 0.0% +0.0% +0.0% 0.0% simple +0.0% 0.0% +0.0% +0.0% +0.0% solid +0.0% 0.0% +0.0% +0.0% 0.0% sorting +0.0% 0.0% +0.0% -0.0% 0.0% spectral-norm +0.0% 0.0% -0.0% -0.0% -0.0% sphere +0.0% 0.0% +0.0% -1.0% 0.0% symalg +0.0% 0.0% +0.0% +0.0% +0.0% tak +0.0% 0.0% +0.0% +0.0% +0.0% transform +0.0% 0.0% +0.4% -1.3% +0.0% treejoin +0.0% 0.0% +0.0% -0.0% 0.0% typecheck +0.0% 0.0% -0.0% +0.0% 0.0% veritas +0.0% 0.0% +0.0% -0.1% +0.0% wang +0.0% 0.0% +0.0% +0.0% +0.0% wave4main +0.0% 0.0% +0.0% 0.0% -0.0% wheel-sieve1 +0.0% 0.0% +0.0% +0.0% +0.0% wheel-sieve2 +0.0% 0.0% +0.0% +0.0% 0.0% x2n1 +0.0% 0.0% +0.0% +0.0% 0.0% -------------------------------------------------------------------------------- Min +0.0% 0.0% -0.0% -6.1% -0.0% Max +0.1% 0.0% +1.2% +0.0% +0.0% Geometric Mean +0.0% -0.0% +0.0% -0.1% -0.0% NoFib GC Results ================ -------------------------------------------------------------------------------- Program Size Allocs Instrs Reads Writes -------------------------------------------------------------------------------- circsim +0.0% 0.0% -0.0% -0.0% -0.0% constraints +0.0% 0.0% -0.0% 0.0% -0.0% fibheaps +0.0% 0.0% 0.0% -0.0% -0.0% fulsom +0.0% 0.0% 0.0% -0.6% -0.0% gc_bench +0.0% 0.0% 0.0% 0.0% -0.0% hash +0.0% 0.0% -0.0% -0.0% -0.0% lcss +0.0% 0.0% 0.0% -0.0% 0.0% mutstore1 +0.0% 0.0% 0.0% -0.0% -0.0% mutstore2 +0.0% 0.0% +0.0% -0.0% -0.0% power +0.0% 0.0% -0.0% 0.0% -0.0% spellcheck +0.0% 0.0% -0.0% -0.0% -0.0% -------------------------------------------------------------------------------- Min +0.0% 0.0% -0.0% -0.6% -0.0% Max +0.0% 0.0% +0.0% 0.0% 0.0% Geometric Mean +0.0% +0.0% +0.0% -0.1% +0.0% Fixes #14373 These performance regressions appear to be a fluke in CI. See the discussion in !1742 for details. Metric Increase: T6048 T12234 T12425 Naperian T12150 T5837 T13035
Diffstat (limited to 'testsuite/driver')
-rw-r--r--testsuite/driver/testlib.py12
1 files changed, 10 insertions, 2 deletions
diff --git a/testsuite/driver/testlib.py b/testsuite/driver/testlib.py
index b72fd4fac5..6929f7144c 100644
--- a/testsuite/driver/testlib.py
+++ b/testsuite/driver/testlib.py
@@ -1220,6 +1220,9 @@ def multimod_compile( name, way, top_mod, extra_hc_opts ):
def multimod_compile_fail( name, way, top_mod, extra_hc_opts ):
return do_compile( name, way, True, top_mod, [], extra_hc_opts )
+def multimod_compile_filter( name, way, top_mod, extra_hc_opts, filter_with, suppress_stdout=True ):
+ return do_compile( name, way, False, top_mod, [], extra_hc_opts, filter_with=filter_with, suppress_stdout=suppress_stdout )
+
def multi_compile( name, way, top_mod, extra_mods, extra_hc_opts ):
return do_compile( name, way, False, top_mod, extra_mods, extra_hc_opts)
@@ -1459,12 +1462,14 @@ def simple_build(name: Union[TestName, str],
top_mod: Optional[Path],
link: bool,
addsuf: bool,
- backpack: bool = False) -> Any:
+ backpack: bool = False,
+ suppress_stdout: bool = False,
+ filter_with: str = '') -> Any:
opts = getTestOpts()
# Redirect stdout and stderr to the same file
stdout = in_testdir(name, 'comp.stderr')
- stderr = subprocess.STDOUT
+ stderr = subprocess.STDOUT if not suppress_stdout else None
if top_mod is not None:
srcname = top_mod
@@ -1515,6 +1520,9 @@ def simple_build(name: Union[TestName, str],
'{{compiler}} {to_do} {srcname} {flags} {extra_hc_opts}'
).format(**locals())
+ if filter_with != '':
+ cmd = cmd + ' | ' + filter_with
+
exit_code = runCmd(cmd, None, stdout, stderr, opts.compile_timeout_multiplier)
actual_stderr_path = in_testdir(name, 'comp.stderr')