summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
Diffstat (limited to 'rts')
-rw-r--r--rts/Adjustor.c1110
-rw-r--r--rts/AdjustorAsm.S189
-rw-r--r--rts/Apply.cmm268
-rw-r--r--rts/Apply.h29
-rw-r--r--rts/Arena.c120
-rw-r--r--rts/Arena.h25
-rw-r--r--rts/AutoApply.h80
-rw-r--r--rts/AwaitEvent.h24
-rw-r--r--rts/BlockAlloc.c391
-rw-r--r--rts/BlockAlloc.h19
-rw-r--r--rts/Capability.c668
-rw-r--r--rts/Capability.h250
-rw-r--r--rts/ClosureFlags.c107
-rw-r--r--rts/Disassembler.c281
-rw-r--r--rts/Disassembler.h19
-rw-r--r--rts/Exception.cmm446
-rw-r--r--rts/Exception.h40
-rw-r--r--rts/FrontPanel.c802
-rw-r--r--rts/FrontPanel.h35
-rw-r--r--rts/GC.c4719
-rw-r--r--rts/GCCompact.c949
-rw-r--r--rts/GCCompact.h44
-rw-r--r--rts/GetTime.h26
-rw-r--r--rts/HSprel.def28
-rw-r--r--rts/Hash.c376
-rw-r--r--rts/Hash.h40
-rw-r--r--rts/HeapStackCheck.cmm964
-rw-r--r--rts/HsFFI.c40
-rw-r--r--rts/Interpreter.c1261
-rw-r--r--rts/Interpreter.h14
-rw-r--r--rts/LdvProfile.c342
-rw-r--r--rts/LdvProfile.h42
-rw-r--r--rts/Linker.c4315
-rw-r--r--rts/LinkerInternals.h110
-rw-r--r--rts/MBlock.c453
-rw-r--r--rts/MBlock.h90
-rw-r--r--rts/Main.c138
-rw-r--r--rts/Makefile370
-rw-r--r--rts/PosixSource.h18
-rw-r--r--rts/Prelude.h129
-rw-r--r--rts/PrimOps.cmm2106
-rw-r--r--rts/Printer.c1127
-rw-r--r--rts/Printer.h31
-rw-r--r--rts/ProfHeap.c1156
-rw-r--r--rts/ProfHeap.h19
-rw-r--r--rts/Profiling.c941
-rw-r--r--rts/Profiling.h39
-rw-r--r--rts/Proftimer.c85
-rw-r--r--rts/Proftimer.h22
-rw-r--r--rts/RetainerProfile.c2338
-rw-r--r--rts/RetainerProfile.h47
-rw-r--r--rts/RetainerSet.c498
-rw-r--r--rts/RetainerSet.h201
-rw-r--r--rts/RtsAPI.c597
-rw-r--r--rts/RtsDllMain.c39
-rw-r--r--rts/RtsFlags.c2281
-rw-r--r--rts/RtsMessages.c201
-rw-r--r--rts/RtsSignals.h78
-rw-r--r--rts/RtsStartup.c457
-rw-r--r--rts/RtsUtils.c367
-rw-r--r--rts/RtsUtils.h54
-rw-r--r--rts/STM.c1261
-rw-r--r--rts/Sanity.c948
-rw-r--r--rts/Sanity.h56
-rw-r--r--rts/Schedule.c4589
-rw-r--r--rts/Schedule.h332
-rw-r--r--rts/Sparks.c881
-rw-r--r--rts/Sparks.h104
-rw-r--r--rts/Stable.c460
-rw-r--r--rts/Stats.c632
-rw-r--r--rts/Stats.h56
-rw-r--r--rts/StgCRun.c897
-rw-r--r--rts/StgMiscClosures.cmm953
-rw-r--r--rts/StgPrimFloat.c491
-rw-r--r--rts/StgRun.h16
-rw-r--r--rts/StgStartup.cmm218
-rw-r--r--rts/StgStdThunks.cmm274
-rw-r--r--rts/Storage.c1137
-rw-r--r--rts/Task.c315
-rw-r--r--rts/Task.h271
-rw-r--r--rts/ThreadLabels.c50
-rw-r--r--rts/ThreadLabels.h27
-rw-r--r--rts/Ticker.h15
-rw-r--r--rts/Ticky.c628
-rw-r--r--rts/Ticky.h9
-rw-r--r--rts/Timer.c102
-rw-r--r--rts/Timer.h24
-rw-r--r--rts/Updates.cmm153
-rw-r--r--rts/Updates.h361
-rw-r--r--rts/VisCallbacks.c75
-rw-r--r--rts/VisCallbacks.h30
-rw-r--r--rts/VisSupport.c144
-rw-r--r--rts/VisSupport.h44
-rw-r--r--rts/VisWindow.c747
-rw-r--r--rts/VisWindow.h5
-rw-r--r--rts/Weak.c97
-rw-r--r--rts/Weak.h17
-rw-r--r--rts/dotnet/Invoke.c1081
-rw-r--r--rts/dotnet/Invoker.cpp338
-rw-r--r--rts/dotnet/Invoker.h197
-rw-r--r--rts/dotnet/InvokerClient.h180
-rw-r--r--rts/dotnet/Makefile53
-rw-r--r--rts/dotnet/invoker.snkbin0 -> 596 bytes
-rw-r--r--rts/ghc-frontpanel.glade1622
-rw-r--r--rts/gmp/.gdbinit34
-rw-r--r--rts/gmp/AUTHORS12
-rw-r--r--rts/gmp/COPYING336
-rw-r--r--rts/gmp/COPYING.LIB515
-rw-r--r--rts/gmp/INSTALL146
-rw-r--r--rts/gmp/Makefile.am197
-rw-r--r--rts/gmp/Makefile.in932
-rw-r--r--rts/gmp/NEWS136
-rw-r--r--rts/gmp/README84
-rw-r--r--rts/gmp/acconfig.h92
-rw-r--r--rts/gmp/acinclude.m4835
-rw-r--r--rts/gmp/aclocal.m41963
-rw-r--r--rts/gmp/ansi2knr.136
-rw-r--r--rts/gmp/ansi2knr.c677
-rw-r--r--rts/gmp/assert.c52
-rw-r--r--rts/gmp/compat.c46
-rw-r--r--rts/gmp/config.guess1373
-rw-r--r--rts/gmp/config.in162
-rw-r--r--rts/gmp/config.sub1273
-rw-r--r--rts/gmp/configure5216
-rw-r--r--rts/gmp/configure.in950
-rw-r--r--rts/gmp/depcomp269
-rw-r--r--rts/gmp/errno.c26
-rw-r--r--rts/gmp/extract-dbl.c187
-rw-r--r--rts/gmp/gmp-impl.h1072
-rw-r--r--rts/gmp/gmp.h1083
-rw-r--r--rts/gmp/insert-dbl.c98
-rw-r--r--rts/gmp/install-sh251
-rw-r--r--rts/gmp/longlong.h1347
-rw-r--r--rts/gmp/ltconfig3109
-rw-r--r--rts/gmp/ltmain.sh4692
-rw-r--r--rts/gmp/mdate-sh92
-rw-r--r--rts/gmp/memory.c160
-rw-r--r--rts/gmp/missing244
-rw-r--r--rts/gmp/mkinstalldirs38
-rw-r--r--rts/gmp/mp.h124
-rw-r--r--rts/gmp/mp_bpl.c27
-rw-r--r--rts/gmp/mp_clz_tab.c36
-rw-r--r--rts/gmp/mp_minv_tab.c50
-rw-r--r--rts/gmp/mp_set_fns.c48
-rw-r--r--rts/gmp/mpn/Makefile.am94
-rw-r--r--rts/gmp/mpn/Makefile.in472
-rw-r--r--rts/gmp/mpn/README13
-rw-r--r--rts/gmp/mpn/a29k/add_n.s120
-rw-r--r--rts/gmp/mpn/a29k/addmul_1.s113
-rw-r--r--rts/gmp/mpn/a29k/lshift.s93
-rw-r--r--rts/gmp/mpn/a29k/mul_1.s97
-rw-r--r--rts/gmp/mpn/a29k/rshift.s89
-rw-r--r--rts/gmp/mpn/a29k/sub_n.s120
-rw-r--r--rts/gmp/mpn/a29k/submul_1.s116
-rw-r--r--rts/gmp/mpn/a29k/udiv.s30
-rw-r--r--rts/gmp/mpn/a29k/umul.s29
-rw-r--r--rts/gmp/mpn/alpha/README224
-rw-r--r--rts/gmp/mpn/alpha/add_n.asm114
-rw-r--r--rts/gmp/mpn/alpha/addmul_1.asm87
-rw-r--r--rts/gmp/mpn/alpha/cntlz.asm68
-rw-r--r--rts/gmp/mpn/alpha/default.m477
-rw-r--r--rts/gmp/mpn/alpha/ev5/add_n.asm143
-rw-r--r--rts/gmp/mpn/alpha/ev5/lshift.asm169
-rw-r--r--rts/gmp/mpn/alpha/ev5/rshift.asm167
-rw-r--r--rts/gmp/mpn/alpha/ev5/sub_n.asm143
-rw-r--r--rts/gmp/mpn/alpha/ev6/addmul_1.asm474
-rw-r--r--rts/gmp/mpn/alpha/ev6/gmp-mparam.h62
-rw-r--r--rts/gmp/mpn/alpha/gmp-mparam.h64
-rw-r--r--rts/gmp/mpn/alpha/invert_limb.asm345
-rw-r--r--rts/gmp/mpn/alpha/lshift.asm104
-rw-r--r--rts/gmp/mpn/alpha/mul_1.asm71
-rw-r--r--rts/gmp/mpn/alpha/rshift.asm102
-rw-r--r--rts/gmp/mpn/alpha/sub_n.asm114
-rw-r--r--rts/gmp/mpn/alpha/submul_1.asm87
-rw-r--r--rts/gmp/mpn/alpha/udiv_qrnnd.S151
-rw-r--r--rts/gmp/mpn/alpha/umul.asm39
-rw-r--r--rts/gmp/mpn/alpha/unicos.m463
-rw-r--r--rts/gmp/mpn/arm/add_n.S77
-rw-r--r--rts/gmp/mpn/arm/addmul_1.S89
-rw-r--r--rts/gmp/mpn/arm/gmp-mparam.h34
-rw-r--r--rts/gmp/mpn/arm/mul_1.S81
-rw-r--r--rts/gmp/mpn/arm/sub_n.S79
-rw-r--r--rts/gmp/mpn/asm-defs.m41182
-rw-r--r--rts/gmp/mpn/clipper/add_n.s48
-rw-r--r--rts/gmp/mpn/clipper/mul_1.s47
-rw-r--r--rts/gmp/mpn/clipper/sub_n.s48
-rw-r--r--rts/gmp/mpn/cray/README14
-rw-r--r--rts/gmp/mpn/cray/add_n.c96
-rw-r--r--rts/gmp/mpn/cray/addmul_1.c46
-rw-r--r--rts/gmp/mpn/cray/gmp-mparam.h27
-rw-r--r--rts/gmp/mpn/cray/mul_1.c44
-rw-r--r--rts/gmp/mpn/cray/mulww.f54
-rw-r--r--rts/gmp/mpn/cray/mulww.s245
-rw-r--r--rts/gmp/mpn/cray/sub_n.c97
-rw-r--r--rts/gmp/mpn/cray/submul_1.c46
-rw-r--r--rts/gmp/mpn/generic/add_n.c62
-rw-r--r--rts/gmp/mpn/generic/addmul_1.c65
-rw-r--r--rts/gmp/mpn/generic/addsub_n.c167
-rw-r--r--rts/gmp/mpn/generic/bdivmod.c120
-rw-r--r--rts/gmp/mpn/generic/bz_divrem_n.c153
-rw-r--r--rts/gmp/mpn/generic/cmp.c56
-rw-r--r--rts/gmp/mpn/generic/diveby3.c77
-rw-r--r--rts/gmp/mpn/generic/divrem.c101
-rw-r--r--rts/gmp/mpn/generic/divrem_1.c248
-rw-r--r--rts/gmp/mpn/generic/divrem_2.c151
-rw-r--r--rts/gmp/mpn/generic/dump.c76
-rw-r--r--rts/gmp/mpn/generic/gcd.c414
-rw-r--r--rts/gmp/mpn/generic/gcd_1.c77
-rw-r--r--rts/gmp/mpn/generic/gcdext.c700
-rw-r--r--rts/gmp/mpn/generic/get_str.c216
-rw-r--r--rts/gmp/mpn/generic/gmp-mparam.h27
-rw-r--r--rts/gmp/mpn/generic/hamdist.c94
-rw-r--r--rts/gmp/mpn/generic/inlines.c24
-rw-r--r--rts/gmp/mpn/generic/jacbase.c136
-rw-r--r--rts/gmp/mpn/generic/lshift.c87
-rw-r--r--rts/gmp/mpn/generic/mod_1.c175
-rw-r--r--rts/gmp/mpn/generic/mod_1_rs.c111
-rw-r--r--rts/gmp/mpn/generic/mul.c190
-rw-r--r--rts/gmp/mpn/generic/mul_1.c59
-rw-r--r--rts/gmp/mpn/generic/mul_basecase.c87
-rw-r--r--rts/gmp/mpn/generic/mul_fft.c772
-rw-r--r--rts/gmp/mpn/generic/mul_n.c1343
-rw-r--r--rts/gmp/mpn/generic/perfsqr.c123
-rw-r--r--rts/gmp/mpn/generic/popcount.c93
-rw-r--r--rts/gmp/mpn/generic/pre_mod_1.c69
-rw-r--r--rts/gmp/mpn/generic/random.c43
-rw-r--r--rts/gmp/mpn/generic/random2.c105
-rw-r--r--rts/gmp/mpn/generic/rshift.c88
-rw-r--r--rts/gmp/mpn/generic/sb_divrem_mn.c201
-rw-r--r--rts/gmp/mpn/generic/scan0.c62
-rw-r--r--rts/gmp/mpn/generic/scan1.c62
-rw-r--r--rts/gmp/mpn/generic/set_str.c159
-rw-r--r--rts/gmp/mpn/generic/sqr_basecase.c83
-rw-r--r--rts/gmp/mpn/generic/sqrtrem.c509
-rw-r--r--rts/gmp/mpn/generic/sub_n.c62
-rw-r--r--rts/gmp/mpn/generic/submul_1.c65
-rw-r--r--rts/gmp/mpn/generic/tdiv_qr.c401
-rw-r--r--rts/gmp/mpn/generic/udiv_w_sdiv.c131
-rw-r--r--rts/gmp/mpn/hppa/README91
-rw-r--r--rts/gmp/mpn/hppa/add_n.s58
-rw-r--r--rts/gmp/mpn/hppa/gmp-mparam.h63
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/addmul_1.s102
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/mul_1.s98
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/pa7100/add_n.s75
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S189
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/pa7100/lshift.s83
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/pa7100/rshift.s80
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/pa7100/sub_n.s76
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/pa7100/submul_1.S195
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/submul_1.s111
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/udiv_qrnnd.S80
-rw-r--r--rts/gmp/mpn/hppa/hppa1_1/umul.s42
-rw-r--r--rts/gmp/mpn/hppa/hppa2_0/add_n.s88
-rw-r--r--rts/gmp/mpn/hppa/hppa2_0/sub_n.s88
-rw-r--r--rts/gmp/mpn/hppa/lshift.s66
-rw-r--r--rts/gmp/mpn/hppa/rshift.s63
-rw-r--r--rts/gmp/mpn/hppa/sub_n.s59
-rw-r--r--rts/gmp/mpn/hppa/udiv_qrnnd.s286
-rw-r--r--rts/gmp/mpn/i960/README9
-rw-r--r--rts/gmp/mpn/i960/add_n.s43
-rw-r--r--rts/gmp/mpn/i960/addmul_1.s48
-rw-r--r--rts/gmp/mpn/i960/mul_1.s45
-rw-r--r--rts/gmp/mpn/i960/sub_n.s43
-rw-r--r--rts/gmp/mpn/lisp/gmpasm-mode.el351
-rw-r--r--rts/gmp/mpn/m68k/add_n.S79
-rw-r--r--rts/gmp/mpn/m68k/lshift.S150
-rw-r--r--rts/gmp/mpn/m68k/mc68020/addmul_1.S83
-rw-r--r--rts/gmp/mpn/m68k/mc68020/mul_1.S90
-rw-r--r--rts/gmp/mpn/m68k/mc68020/submul_1.S83
-rw-r--r--rts/gmp/mpn/m68k/mc68020/udiv.S31
-rw-r--r--rts/gmp/mpn/m68k/mc68020/umul.S31
-rw-r--r--rts/gmp/mpn/m68k/rshift.S149
-rw-r--r--rts/gmp/mpn/m68k/sub_n.S79
-rw-r--r--rts/gmp/mpn/m68k/syntax.h177
-rw-r--r--rts/gmp/mpn/m88k/add_n.s104
-rw-r--r--rts/gmp/mpn/m88k/mc88110/add_n.S200
-rw-r--r--rts/gmp/mpn/m88k/mc88110/addmul_1.s61
-rw-r--r--rts/gmp/mpn/m88k/mc88110/mul_1.s59
-rw-r--r--rts/gmp/mpn/m88k/mc88110/sub_n.S276
-rw-r--r--rts/gmp/mpn/m88k/mul_1.s127
-rw-r--r--rts/gmp/mpn/m88k/sub_n.s106
-rw-r--r--rts/gmp/mpn/mips2/add_n.s120
-rw-r--r--rts/gmp/mpn/mips2/addmul_1.s97
-rw-r--r--rts/gmp/mpn/mips2/lshift.s95
-rw-r--r--rts/gmp/mpn/mips2/mul_1.s85
-rw-r--r--rts/gmp/mpn/mips2/rshift.s92
-rw-r--r--rts/gmp/mpn/mips2/sub_n.s120
-rw-r--r--rts/gmp/mpn/mips2/submul_1.s97
-rw-r--r--rts/gmp/mpn/mips2/umul.s30
-rw-r--r--rts/gmp/mpn/mips3/README23
-rw-r--r--rts/gmp/mpn/mips3/add_n.s120
-rw-r--r--rts/gmp/mpn/mips3/addmul_1.s97
-rw-r--r--rts/gmp/mpn/mips3/gmp-mparam.h58
-rw-r--r--rts/gmp/mpn/mips3/lshift.s95
-rw-r--r--rts/gmp/mpn/mips3/mul_1.s85
-rw-r--r--rts/gmp/mpn/mips3/rshift.s92
-rw-r--r--rts/gmp/mpn/mips3/sub_n.s120
-rw-r--r--rts/gmp/mpn/mips3/submul_1.s97
-rw-r--r--rts/gmp/mpn/mp_bases.c550
-rw-r--r--rts/gmp/mpn/ns32k/add_n.s46
-rw-r--r--rts/gmp/mpn/ns32k/addmul_1.s48
-rw-r--r--rts/gmp/mpn/ns32k/mul_1.s47
-rw-r--r--rts/gmp/mpn/ns32k/sub_n.s46
-rw-r--r--rts/gmp/mpn/ns32k/submul_1.s48
-rw-r--r--rts/gmp/mpn/pa64/README38
-rw-r--r--rts/gmp/mpn/pa64/add_n.s90
-rw-r--r--rts/gmp/mpn/pa64/addmul_1.S167
-rw-r--r--rts/gmp/mpn/pa64/gmp-mparam.h65
-rw-r--r--rts/gmp/mpn/pa64/lshift.s103
-rw-r--r--rts/gmp/mpn/pa64/mul_1.S158
-rw-r--r--rts/gmp/mpn/pa64/rshift.s100
-rw-r--r--rts/gmp/mpn/pa64/sub_n.s90
-rw-r--r--rts/gmp/mpn/pa64/submul_1.S170
-rw-r--r--rts/gmp/mpn/pa64/udiv_qrnnd.c111
-rw-r--r--rts/gmp/mpn/pa64/umul_ppmm.S74
-rw-r--r--rts/gmp/mpn/pa64w/README2
-rw-r--r--rts/gmp/mpn/pa64w/add_n.s90
-rw-r--r--rts/gmp/mpn/pa64w/addmul_1.S168
-rw-r--r--rts/gmp/mpn/pa64w/gmp-mparam.h65
-rw-r--r--rts/gmp/mpn/pa64w/lshift.s103
-rw-r--r--rts/gmp/mpn/pa64w/mul_1.S159
-rw-r--r--rts/gmp/mpn/pa64w/rshift.s100
-rw-r--r--rts/gmp/mpn/pa64w/sub_n.s90
-rw-r--r--rts/gmp/mpn/pa64w/submul_1.S171
-rw-r--r--rts/gmp/mpn/pa64w/udiv_qrnnd.c117
-rw-r--r--rts/gmp/mpn/pa64w/umul_ppmm.S72
-rw-r--r--rts/gmp/mpn/power/add_n.s79
-rw-r--r--rts/gmp/mpn/power/addmul_1.s122
-rw-r--r--rts/gmp/mpn/power/lshift.s56
-rw-r--r--rts/gmp/mpn/power/mul_1.s109
-rw-r--r--rts/gmp/mpn/power/rshift.s54
-rw-r--r--rts/gmp/mpn/power/sdiv.s34
-rw-r--r--rts/gmp/mpn/power/sub_n.s80
-rw-r--r--rts/gmp/mpn/power/submul_1.s127
-rw-r--r--rts/gmp/mpn/power/umul.s38
-rw-r--r--rts/gmp/mpn/powerpc32/add_n.asm61
-rw-r--r--rts/gmp/mpn/powerpc32/addmul_1.asm124
-rw-r--r--rts/gmp/mpn/powerpc32/aix.m439
-rw-r--r--rts/gmp/mpn/powerpc32/gmp-mparam.h66
-rw-r--r--rts/gmp/mpn/powerpc32/lshift.asm145
-rw-r--r--rts/gmp/mpn/powerpc32/mul_1.asm86
-rw-r--r--rts/gmp/mpn/powerpc32/regmap.m434
-rw-r--r--rts/gmp/mpn/powerpc32/rshift.asm60
-rw-r--r--rts/gmp/mpn/powerpc32/sub_n.asm61
-rw-r--r--rts/gmp/mpn/powerpc32/submul_1.asm130
-rw-r--r--rts/gmp/mpn/powerpc32/umul.asm32
-rw-r--r--rts/gmp/mpn/powerpc64/README36
-rw-r--r--rts/gmp/mpn/powerpc64/add_n.asm61
-rw-r--r--rts/gmp/mpn/powerpc64/addmul_1.asm52
-rw-r--r--rts/gmp/mpn/powerpc64/addsub_n.asm107
-rw-r--r--rts/gmp/mpn/powerpc64/aix.m440
-rw-r--r--rts/gmp/mpn/powerpc64/copyd.asm45
-rw-r--r--rts/gmp/mpn/powerpc64/copyi.asm44
-rw-r--r--rts/gmp/mpn/powerpc64/gmp-mparam.h62
-rw-r--r--rts/gmp/mpn/powerpc64/lshift.asm159
-rw-r--r--rts/gmp/mpn/powerpc64/mul_1.asm49
-rw-r--r--rts/gmp/mpn/powerpc64/rshift.asm60
-rw-r--r--rts/gmp/mpn/powerpc64/sub_n.asm61
-rw-r--r--rts/gmp/mpn/powerpc64/submul_1.asm54
-rw-r--r--rts/gmp/mpn/pyr/add_n.s76
-rw-r--r--rts/gmp/mpn/pyr/addmul_1.s45
-rw-r--r--rts/gmp/mpn/pyr/mul_1.s42
-rw-r--r--rts/gmp/mpn/pyr/sub_n.s76
-rw-r--r--rts/gmp/mpn/sh/add_n.s47
-rw-r--r--rts/gmp/mpn/sh/sh2/addmul_1.s53
-rw-r--r--rts/gmp/mpn/sh/sh2/mul_1.s50
-rw-r--r--rts/gmp/mpn/sh/sh2/submul_1.s53
-rw-r--r--rts/gmp/mpn/sh/sub_n.s47
-rw-r--r--rts/gmp/mpn/sparc32/README36
-rw-r--r--rts/gmp/mpn/sparc32/add_n.asm236
-rw-r--r--rts/gmp/mpn/sparc32/addmul_1.asm146
-rw-r--r--rts/gmp/mpn/sparc32/lshift.asm97
-rw-r--r--rts/gmp/mpn/sparc32/mul_1.asm137
-rw-r--r--rts/gmp/mpn/sparc32/rshift.asm93
-rw-r--r--rts/gmp/mpn/sparc32/sub_n.asm326
-rw-r--r--rts/gmp/mpn/sparc32/submul_1.asm146
-rw-r--r--rts/gmp/mpn/sparc32/udiv_fp.asm158
-rw-r--r--rts/gmp/mpn/sparc32/udiv_nfp.asm193
-rw-r--r--rts/gmp/mpn/sparc32/umul.asm68
-rw-r--r--rts/gmp/mpn/sparc32/v8/addmul_1.asm122
-rw-r--r--rts/gmp/mpn/sparc32/v8/mul_1.asm103
-rw-r--r--rts/gmp/mpn/sparc32/v8/submul_1.asm58
-rw-r--r--rts/gmp/mpn/sparc32/v8/supersparc/udiv.asm122
-rw-r--r--rts/gmp/mpn/sparc32/v8/umul.asm31
-rw-r--r--rts/gmp/mpn/sparc32/v9/README4
-rw-r--r--rts/gmp/mpn/sparc32/v9/addmul_1.asm288
-rw-r--r--rts/gmp/mpn/sparc32/v9/gmp-mparam.h69
-rw-r--r--rts/gmp/mpn/sparc32/v9/mul_1.asm267
-rw-r--r--rts/gmp/mpn/sparc32/v9/submul_1.asm291
-rw-r--r--rts/gmp/mpn/sparc64/README48
-rw-r--r--rts/gmp/mpn/sparc64/add_n.asm172
-rw-r--r--rts/gmp/mpn/sparc64/addmul1h.asm203
-rw-r--r--rts/gmp/mpn/sparc64/addmul_1.asm114
-rw-r--r--rts/gmp/mpn/sparc64/copyi.asm79
-rw-r--r--rts/gmp/mpn/sparc64/gmp-mparam.h88
-rw-r--r--rts/gmp/mpn/sparc64/lshift.asm97
-rw-r--r--rts/gmp/mpn/sparc64/mul_1.asm113
-rw-r--r--rts/gmp/mpn/sparc64/mul_1h.asm183
-rw-r--r--rts/gmp/mpn/sparc64/rshift.asm94
-rw-r--r--rts/gmp/mpn/sparc64/sub_n.asm172
-rw-r--r--rts/gmp/mpn/sparc64/submul1h.asm204
-rw-r--r--rts/gmp/mpn/sparc64/submul_1.asm114
-rw-r--r--rts/gmp/mpn/thumb/add_n.s50
-rw-r--r--rts/gmp/mpn/thumb/sub_n.s50
-rw-r--r--rts/gmp/mpn/underscore.h26
-rw-r--r--rts/gmp/mpn/vax/add_n.s61
-rw-r--r--rts/gmp/mpn/vax/addmul_1.s126
-rw-r--r--rts/gmp/mpn/vax/lshift.s58
-rw-r--r--rts/gmp/mpn/vax/mul_1.s123
-rw-r--r--rts/gmp/mpn/vax/rshift.s56
-rw-r--r--rts/gmp/mpn/vax/sub_n.s61
-rw-r--r--rts/gmp/mpn/vax/submul_1.s126
-rw-r--r--rts/gmp/mpn/x86/README40
-rw-r--r--rts/gmp/mpn/x86/README.family333
-rw-r--r--rts/gmp/mpn/x86/addsub_n.S174
-rw-r--r--rts/gmp/mpn/x86/aors_n.asm187
-rw-r--r--rts/gmp/mpn/x86/aorsmul_1.asm134
-rw-r--r--rts/gmp/mpn/x86/copyd.asm80
-rw-r--r--rts/gmp/mpn/x86/copyi.asm79
-rw-r--r--rts/gmp/mpn/x86/diveby3.asm115
-rw-r--r--rts/gmp/mpn/x86/divrem_1.asm232
-rw-r--r--rts/gmp/mpn/x86/k6/README237
-rw-r--r--rts/gmp/mpn/x86/k6/aors_n.asm329
-rw-r--r--rts/gmp/mpn/x86/k6/aorsmul_1.asm372
-rw-r--r--rts/gmp/mpn/x86/k6/cross.pl141
-rw-r--r--rts/gmp/mpn/x86/k6/diveby3.asm110
-rw-r--r--rts/gmp/mpn/x86/k6/gmp-mparam.h97
-rw-r--r--rts/gmp/mpn/x86/k6/k62mmx/copyd.asm179
-rw-r--r--rts/gmp/mpn/x86/k6/k62mmx/copyi.asm196
-rw-r--r--rts/gmp/mpn/x86/k6/k62mmx/lshift.asm286
-rw-r--r--rts/gmp/mpn/x86/k6/k62mmx/rshift.asm285
-rw-r--r--rts/gmp/mpn/x86/k6/mmx/com_n.asm91
-rw-r--r--rts/gmp/mpn/x86/k6/mmx/logops_n.asm212
-rw-r--r--rts/gmp/mpn/x86/k6/mmx/lshift.asm122
-rw-r--r--rts/gmp/mpn/x86/k6/mmx/popham.asm238
-rw-r--r--rts/gmp/mpn/x86/k6/mmx/rshift.asm122
-rw-r--r--rts/gmp/mpn/x86/k6/mul_1.asm272
-rw-r--r--rts/gmp/mpn/x86/k6/mul_basecase.asm600
-rw-r--r--rts/gmp/mpn/x86/k6/sqr_basecase.asm672
-rw-r--r--rts/gmp/mpn/x86/k7/README145
-rw-r--r--rts/gmp/mpn/x86/k7/aors_n.asm250
-rw-r--r--rts/gmp/mpn/x86/k7/aorsmul_1.asm364
-rw-r--r--rts/gmp/mpn/x86/k7/diveby3.asm131
-rw-r--r--rts/gmp/mpn/x86/k7/gmp-mparam.h100
-rw-r--r--rts/gmp/mpn/x86/k7/mmx/copyd.asm136
-rw-r--r--rts/gmp/mpn/x86/k7/mmx/copyi.asm147
-rw-r--r--rts/gmp/mpn/x86/k7/mmx/divrem_1.asm718
-rw-r--r--rts/gmp/mpn/x86/k7/mmx/lshift.asm472
-rw-r--r--rts/gmp/mpn/x86/k7/mmx/mod_1.asm457
-rw-r--r--rts/gmp/mpn/x86/k7/mmx/popham.asm239
-rw-r--r--rts/gmp/mpn/x86/k7/mmx/rshift.asm471
-rw-r--r--rts/gmp/mpn/x86/k7/mul_1.asm265
-rw-r--r--rts/gmp/mpn/x86/k7/mul_basecase.asm593
-rw-r--r--rts/gmp/mpn/x86/k7/sqr_basecase.asm627
-rw-r--r--rts/gmp/mpn/x86/lshift.asm90
-rw-r--r--rts/gmp/mpn/x86/mod_1.asm141
-rw-r--r--rts/gmp/mpn/x86/mul_1.asm130
-rw-r--r--rts/gmp/mpn/x86/mul_basecase.asm209
-rw-r--r--rts/gmp/mpn/x86/p6/README95
-rw-r--r--rts/gmp/mpn/x86/p6/aorsmul_1.asm300
-rw-r--r--rts/gmp/mpn/x86/p6/diveby3.asm37
-rw-r--r--rts/gmp/mpn/x86/p6/gmp-mparam.h96
-rw-r--r--rts/gmp/mpn/x86/p6/mmx/divrem_1.asm677
-rw-r--r--rts/gmp/mpn/x86/p6/mmx/mod_1.asm444
-rw-r--r--rts/gmp/mpn/x86/p6/mmx/popham.asm31
-rw-r--r--rts/gmp/mpn/x86/p6/p3mmx/popham.asm30
-rw-r--r--rts/gmp/mpn/x86/p6/sqr_basecase.asm641
-rw-r--r--rts/gmp/mpn/x86/pentium/README77
-rw-r--r--rts/gmp/mpn/x86/pentium/aors_n.asm196
-rw-r--r--rts/gmp/mpn/x86/pentium/aorsmul_1.asm99
-rw-r--r--rts/gmp/mpn/x86/pentium/diveby3.asm183
-rw-r--r--rts/gmp/mpn/x86/pentium/gmp-mparam.h97
-rw-r--r--rts/gmp/mpn/x86/pentium/lshift.asm236
-rw-r--r--rts/gmp/mpn/x86/pentium/mmx/gmp-mparam.h97
-rw-r--r--rts/gmp/mpn/x86/pentium/mmx/lshift.asm455
-rw-r--r--rts/gmp/mpn/x86/pentium/mmx/popham.asm30
-rw-r--r--rts/gmp/mpn/x86/pentium/mmx/rshift.asm460
-rw-r--r--rts/gmp/mpn/x86/pentium/mul_1.asm79
-rw-r--r--rts/gmp/mpn/x86/pentium/mul_basecase.asm135
-rw-r--r--rts/gmp/mpn/x86/pentium/rshift.asm236
-rw-r--r--rts/gmp/mpn/x86/pentium/sqr_basecase.asm520
-rw-r--r--rts/gmp/mpn/x86/rshift.asm92
-rw-r--r--rts/gmp/mpn/x86/udiv.asm44
-rw-r--r--rts/gmp/mpn/x86/umul.asm43
-rw-r--r--rts/gmp/mpn/x86/x86-defs.m4713
-rw-r--r--rts/gmp/mpn/z8000/add_n.s53
-rw-r--r--rts/gmp/mpn/z8000/gmp-mparam.h27
-rw-r--r--rts/gmp/mpn/z8000/mul_1.s68
-rw-r--r--rts/gmp/mpn/z8000/sub_n.s54
-rw-r--r--rts/gmp/mpn/z8000x/add_n.s56
-rw-r--r--rts/gmp/mpn/z8000x/sub_n.s56
-rw-r--r--rts/gmp/mpz/Makefile.am58
-rw-r--r--rts/gmp/mpz/Makefile.in457
-rw-r--r--rts/gmp/mpz/README23
-rw-r--r--rts/gmp/mpz/abs.c51
-rw-r--r--rts/gmp/mpz/add.c123
-rw-r--r--rts/gmp/mpz/add_ui.c84
-rw-r--r--rts/gmp/mpz/addmul_ui.c214
-rw-r--r--rts/gmp/mpz/and.c278
-rw-r--r--rts/gmp/mpz/array_init.c48
-rw-r--r--rts/gmp/mpz/bin_ui.c141
-rw-r--r--rts/gmp/mpz/bin_uiui.c120
-rw-r--r--rts/gmp/mpz/cdiv_q.c51
-rw-r--r--rts/gmp/mpz/cdiv_q_ui.c67
-rw-r--r--rts/gmp/mpz/cdiv_qr.c64
-rw-r--r--rts/gmp/mpz/cdiv_qr_ui.c71
-rw-r--r--rts/gmp/mpz/cdiv_r.c59
-rw-r--r--rts/gmp/mpz/cdiv_r_ui.c57
-rw-r--r--rts/gmp/mpz/cdiv_ui.c50
-rw-r--r--rts/gmp/mpz/clear.c35
-rw-r--r--rts/gmp/mpz/clrbit.c114
-rw-r--r--rts/gmp/mpz/cmp.c75
-rw-r--r--rts/gmp/mpz/cmp_si.c64
-rw-r--r--rts/gmp/mpz/cmp_ui.c53
-rw-r--r--rts/gmp/mpz/cmpabs.c57
-rw-r--r--rts/gmp/mpz/cmpabs_ui.c56
-rw-r--r--rts/gmp/mpz/com.c93
-rw-r--r--rts/gmp/mpz/divexact.c125
-rw-r--r--rts/gmp/mpz/dump.c44
-rw-r--r--rts/gmp/mpz/fac_ui.c157
-rw-r--r--rts/gmp/mpz/fdiv_q.c51
-rw-r--r--rts/gmp/mpz/fdiv_q_2exp.c104
-rw-r--r--rts/gmp/mpz/fdiv_q_ui.c65
-rw-r--r--rts/gmp/mpz/fdiv_qr.c64
-rw-r--r--rts/gmp/mpz/fdiv_qr_ui.c69
-rw-r--r--rts/gmp/mpz/fdiv_r.c58
-rw-r--r--rts/gmp/mpz/fdiv_r_2exp.c156
-rw-r--r--rts/gmp/mpz/fdiv_r_ui.c55
-rw-r--r--rts/gmp/mpz/fdiv_ui.c48
-rw-r--r--rts/gmp/mpz/fib_ui.c165
-rw-r--r--rts/gmp/mpz/fits_sint_p.c50
-rw-r--r--rts/gmp/mpz/fits_slong_p.c50
-rw-r--r--rts/gmp/mpz/fits_sshort_p.c50
-rw-r--r--rts/gmp/mpz/fits_uint_p.c41
-rw-r--r--rts/gmp/mpz/fits_ulong_p.c41
-rw-r--r--rts/gmp/mpz/fits_ushort_p.c41
-rw-r--r--rts/gmp/mpz/gcd.c180
-rw-r--r--rts/gmp/mpz/gcd_ui.c65
-rw-r--r--rts/gmp/mpz/gcdext.c137
-rw-r--r--rts/gmp/mpz/get_d.c128
-rw-r--r--rts/gmp/mpz/get_si.c43
-rw-r--r--rts/gmp/mpz/get_str.c118
-rw-r--r--rts/gmp/mpz/get_ui.c37
-rw-r--r--rts/gmp/mpz/getlimbn.c38
-rw-r--r--rts/gmp/mpz/hamdist.c62
-rw-r--r--rts/gmp/mpz/init.c36
-rw-r--r--rts/gmp/mpz/inp_raw.c101
-rw-r--r--rts/gmp/mpz/inp_str.c167
-rw-r--r--rts/gmp/mpz/invert.c77
-rw-r--r--rts/gmp/mpz/ior.c244
-rw-r--r--rts/gmp/mpz/iset.c49
-rw-r--r--rts/gmp/mpz/iset_d.c39
-rw-r--r--rts/gmp/mpz/iset_si.c49
-rw-r--r--rts/gmp/mpz/iset_str.c47
-rw-r--r--rts/gmp/mpz/iset_ui.c39
-rw-r--r--rts/gmp/mpz/jacobi.c53
-rw-r--r--rts/gmp/mpz/kronsz.c126
-rw-r--r--rts/gmp/mpz/kronuz.c115
-rw-r--r--rts/gmp/mpz/kronzs.c74
-rw-r--r--rts/gmp/mpz/kronzu.c66
-rw-r--r--rts/gmp/mpz/lcm.c61
-rw-r--r--rts/gmp/mpz/legendre.c184
-rw-r--r--rts/gmp/mpz/mod.c63
-rw-r--r--rts/gmp/mpz/mul.c131
-rw-r--r--rts/gmp/mpz/mul_2exp.c76
-rw-r--r--rts/gmp/mpz/mul_siui.c81
-rw-r--r--rts/gmp/mpz/neg.c53
-rw-r--r--rts/gmp/mpz/nextprime.c120
-rw-r--r--rts/gmp/mpz/out_raw.c89
-rw-r--r--rts/gmp/mpz/out_str.c108
-rw-r--r--rts/gmp/mpz/perfpow.c272
-rw-r--r--rts/gmp/mpz/perfsqr.c45
-rw-r--r--rts/gmp/mpz/popcount.c42
-rw-r--r--rts/gmp/mpz/pow_ui.c129
-rw-r--r--rts/gmp/mpz/powm.c364
-rw-r--r--rts/gmp/mpz/powm_ui.c248
-rw-r--r--rts/gmp/mpz/pprime_p.c242
-rw-r--r--rts/gmp/mpz/random.c56
-rw-r--r--rts/gmp/mpz/random2.c48
-rw-r--r--rts/gmp/mpz/realloc.c52
-rw-r--r--rts/gmp/mpz/remove.c93
-rw-r--r--rts/gmp/mpz/root.c183
-rw-r--r--rts/gmp/mpz/rrandomb.c117
-rw-r--r--rts/gmp/mpz/scan0.c35
-rw-r--r--rts/gmp/mpz/scan1.c35
-rw-r--r--rts/gmp/mpz/set.c48
-rw-r--r--rts/gmp/mpz/set_d.c96
-rw-r--r--rts/gmp/mpz/set_f.c64
-rw-r--r--rts/gmp/mpz/set_q.c36
-rw-r--r--rts/gmp/mpz/set_si.c48
-rw-r--r--rts/gmp/mpz/set_str.c157
-rw-r--r--rts/gmp/mpz/set_ui.c43
-rw-r--r--rts/gmp/mpz/setbit.c119
-rw-r--r--rts/gmp/mpz/size.c35
-rw-r--r--rts/gmp/mpz/sizeinbase.c60
-rw-r--r--rts/gmp/mpz/sqrt.c86
-rw-r--r--rts/gmp/mpz/sqrtrem.c111
-rw-r--r--rts/gmp/mpz/sub.c123
-rw-r--r--rts/gmp/mpz/sub_ui.c84
-rw-r--r--rts/gmp/mpz/swap.c52
-rw-r--r--rts/gmp/mpz/tdiv_q.c91
-rw-r--r--rts/gmp/mpz/tdiv_q_2exp.c68
-rw-r--r--rts/gmp/mpz/tdiv_q_ui.c64
-rw-r--r--rts/gmp/mpz/tdiv_qr.c130
-rw-r--r--rts/gmp/mpz/tdiv_qr_ui.c76
-rw-r--r--rts/gmp/mpz/tdiv_r.c98
-rw-r--r--rts/gmp/mpz/tdiv_r_2exp.c79
-rw-r--r--rts/gmp/mpz/tdiv_r_ui.c63
-rw-r--r--rts/gmp/mpz/tdiv_ui.c53
-rw-r--r--rts/gmp/mpz/tstbit.c70
-rw-r--r--rts/gmp/mpz/ui_pow_ui.c139
-rw-r--r--rts/gmp/mpz/urandomb.c49
-rw-r--r--rts/gmp/mpz/urandomm.c78
-rw-r--r--rts/gmp/mpz/xor.c217
-rw-r--r--rts/gmp/rand.c171
-rw-r--r--rts/gmp/randclr.c54
-rw-r--r--rts/gmp/randlc.c56
-rw-r--r--rts/gmp/randlc2x.c59
-rw-r--r--rts/gmp/randraw.c360
-rw-r--r--rts/gmp/randsd.c37
-rw-r--r--rts/gmp/randsdui.c37
-rw-r--r--rts/gmp/stack-alloc.c136
-rw-r--r--rts/gmp/stack-alloc.h64
-rw-r--r--rts/gmp/stamp-h.in1
-rw-r--r--rts/gmp/stamp-vti3
-rw-r--r--rts/gmp/urandom.h86
-rw-r--r--rts/gmp/version.c26
-rw-r--r--rts/gmp/version.texi3
-rw-r--r--rts/hooks/FlagDefaults.c20
-rw-r--r--rts/hooks/InitEachPE.c23
-rw-r--r--rts/hooks/MallocFail.c16
-rw-r--r--rts/hooks/OnExit.c19
-rw-r--r--rts/hooks/OutOfHeap.c19
-rw-r--r--rts/hooks/RtsOpts.c13
-rw-r--r--rts/hooks/ShutdownEachPEHook.c19
-rw-r--r--rts/hooks/StackOverflow.c16
-rw-r--r--rts/package.conf.in152
-rw-r--r--rts/parallel/0Hash.c320
-rw-r--r--rts/parallel/0Parallel.h414
-rw-r--r--rts/parallel/0Unpack.c440
-rw-r--r--rts/parallel/Dist.c117
-rw-r--r--rts/parallel/Dist.h20
-rw-r--r--rts/parallel/FetchMe.h24
-rw-r--r--rts/parallel/FetchMe.hc180
-rw-r--r--rts/parallel/Global.c1090
-rw-r--r--rts/parallel/GranSim.c3015
-rw-r--r--rts/parallel/GranSimRts.h268
-rw-r--r--rts/parallel/HLC.h63
-rw-r--r--rts/parallel/HLComms.c1810
-rw-r--r--rts/parallel/LLC.h130
-rw-r--r--rts/parallel/LLComms.c489
-rw-r--r--rts/parallel/PEOpCodes.h58
-rw-r--r--rts/parallel/Pack.c4293
-rw-r--r--rts/parallel/ParInit.c322
-rw-r--r--rts/parallel/ParInit.h19
-rw-r--r--rts/parallel/ParTicky.c450
-rw-r--r--rts/parallel/ParTicky.h60
-rw-r--r--rts/parallel/ParTypes.h38
-rw-r--r--rts/parallel/Parallel.c1140
-rw-r--r--rts/parallel/ParallelDebug.c1955
-rw-r--r--rts/parallel/ParallelDebug.h79
-rw-r--r--rts/parallel/ParallelRts.h253
-rw-r--r--rts/parallel/RBH.c337
-rw-r--r--rts/parallel/SysMan.c650
-rw-r--r--rts/posix/GetTime.c141
-rw-r--r--rts/posix/Itimer.c226
-rw-r--r--rts/posix/Itimer.h19
-rw-r--r--rts/posix/OSThreads.c166
-rw-r--r--rts/posix/Select.c279
-rw-r--r--rts/posix/Select.h26
-rw-r--r--rts/posix/Signals.c510
-rw-r--r--rts/posix/Signals.h26
-rw-r--r--rts/win32/AsyncIO.c345
-rw-r--r--rts/win32/AsyncIO.h25
-rw-r--r--rts/win32/AwaitEvent.c51
-rw-r--r--rts/win32/ConsoleHandler.c313
-rw-r--r--rts/win32/ConsoleHandler.h63
-rw-r--r--rts/win32/GetTime.c101
-rw-r--r--rts/win32/IOManager.c510
-rw-r--r--rts/win32/IOManager.h110
-rw-r--r--rts/win32/OSThreads.c199
-rw-r--r--rts/win32/Ticker.c124
-rw-r--r--rts/win32/WorkQueue.c215
-rw-r--r--rts/win32/WorkQueue.h37
684 files changed, 164348 insertions, 0 deletions
diff --git a/rts/Adjustor.c b/rts/Adjustor.c
new file mode 100644
index 0000000000..f3e5bfe6aa
--- /dev/null
+++ b/rts/Adjustor.c
@@ -0,0 +1,1110 @@
+/* -----------------------------------------------------------------------------
+ * Foreign export adjustor thunks
+ *
+ * Copyright (c) 1998.
+ *
+ * ---------------------------------------------------------------------------*/
+
+/* A little bit of background...
+
+An adjustor thunk is a dynamically allocated code snippet that allows
+Haskell closures to be viewed as C function pointers.
+
+Stable pointers provide a way for the outside world to get access to,
+and evaluate, Haskell heap objects, with the RTS providing a small
+range of ops for doing so. So, assuming we've got a stable pointer in
+our hand in C, we can jump into the Haskell world and evaluate a callback
+procedure, say. This works OK in some cases where callbacks are used, but
+does require the external code to know about stable pointers and how to deal
+with them. We'd like to hide the Haskell-nature of a callback and have it
+be invoked just like any other C function pointer.
+
+Enter adjustor thunks. An adjustor thunk is a little piece of code
+that's generated on-the-fly (one per Haskell closure being exported)
+that, when entered using some 'universal' calling convention (e.g., the
+C calling convention on platform X), pushes an implicit stable pointer
+(to the Haskell callback) before calling another (static) C function stub
+which takes care of entering the Haskell code via its stable pointer.
+
+An adjustor thunk is allocated on the C heap, and is called from within
+Haskell just before handing out the function pointer to the Haskell (IO)
+action. User code should never have to invoke it explicitly.
+
+An adjustor thunk differs from a C function pointer in one respect: when
+the code is through with it, it has to be freed in order to release Haskell
+and C resources. Failure to do so result in memory leaks on both the C and
+Haskell side.
+*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "RtsExternal.h"
+#include "RtsUtils.h"
+#include <stdlib.h>
+
+#if defined(_WIN32)
+#include <windows.h>
+#endif
+
+#if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
+#include <string.h>
+#endif
+
+#ifdef LEADING_UNDERSCORE
+#define UNDERSCORE "_"
+#else
+#define UNDERSCORE ""
+#endif
+#if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
+/*
+ Now here's something obscure for you:
+
+ When generating an adjustor thunk that uses the C calling
+ convention, we have to make sure that the thunk kicks off
+ the process of jumping into Haskell with a tail jump. Why?
+ Because as a result of jumping in into Haskell we may end
+ up freeing the very adjustor thunk we came from using
+ freeHaskellFunctionPtr(). Hence, we better not return to
+ the adjustor code on our way out, since it could by then
+ point to junk.
+
+ The fix is readily at hand, just include the opcodes
+ for the C stack fixup code that we need to perform when
+ returning in some static piece of memory and arrange
+ to return to it before tail jumping from the adjustor thunk.
+*/
+static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
+{
+ __asm__ (
+ ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
+ UNDERSCORE "obscure_ccall_ret_code:\n\t"
+ "addl $0x4, %esp\n\t"
+ "ret"
+ );
+}
+extern void obscure_ccall_ret_code(void);
+
+#if defined(openbsd_HOST_OS)
+static unsigned char *obscure_ccall_ret_code_dyn;
+#endif
+
+#endif
+
+#if defined(x86_64_HOST_ARCH)
+static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
+{
+ __asm__ (
+ ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
+ UNDERSCORE "obscure_ccall_ret_code:\n\t"
+ "addq $0x8, %rsp\n\t"
+ "ret"
+ );
+}
+extern void obscure_ccall_ret_code(void);
+#endif
+
+#if defined(alpha_HOST_ARCH)
+/* To get the definition of PAL_imb: */
+# if defined(linux_HOST_OS)
+# include <asm/pal.h>
+# else
+# include <machine/pal.h>
+# endif
+#endif
+
+#if defined(ia64_HOST_ARCH)
+#include "Storage.h"
+
+/* Layout of a function descriptor */
+typedef struct _IA64FunDesc {
+ StgWord64 ip;
+ StgWord64 gp;
+} IA64FunDesc;
+
+static void *
+stgAllocStable(size_t size_in_bytes, StgStablePtr *stable)
+{
+ StgArrWords* arr;
+ nat data_size_in_words, total_size_in_words;
+
+ /* round up to a whole number of words */
+ data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
+ total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
+
+ /* allocate and fill it in */
+ arr = (StgArrWords *)allocate(total_size_in_words);
+ SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
+
+ /* obtain a stable ptr */
+ *stable = getStablePtr((StgPtr)arr);
+
+ /* and return a ptr to the goods inside the array */
+ return(&(arr->payload));
+}
+#endif
+
+#if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
+__asm__("obscure_ccall_ret_code:\n\t"
+ "lwz 1,0(1)\n\t"
+ "lwz 0,4(1)\n\t"
+ "mtlr 0\n\t"
+ "blr");
+extern void obscure_ccall_ret_code(void);
+#endif
+
+#if defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
+#if !(defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS))
+
+/* !!! !!! WARNING: !!! !!!
+ * This structure is accessed from AdjustorAsm.s
+ * Any changes here have to be mirrored in the offsets there.
+ */
+
+typedef struct AdjustorStub {
+#if defined(powerpc_HOST_ARCH) && defined(darwin_HOST_OS)
+ unsigned lis;
+ unsigned ori;
+ unsigned lwz;
+ unsigned mtctr;
+ unsigned bctr;
+ StgFunPtr code;
+#elif defined(powerpc64_HOST_ARCH) && defined(darwin_HOST_OS)
+ /* powerpc64-darwin: just guessing that it won't use fundescs. */
+ unsigned lis;
+ unsigned ori;
+ unsigned rldimi;
+ unsigned oris;
+ unsigned ori2;
+ unsigned lwz;
+ unsigned mtctr;
+ unsigned bctr;
+ StgFunPtr code;
+#else
+ /* fundesc-based ABIs */
+#define FUNDESCS
+ StgFunPtr code;
+ struct AdjustorStub
+ *toc;
+ void *env;
+#endif
+ StgStablePtr hptr;
+ StgFunPtr wptr;
+ StgInt negative_framesize;
+ StgInt extrawords_plus_one;
+} AdjustorStub;
+
+#endif
+#endif
+
+#if defined(i386_HOST_ARCH) && defined(darwin_HOST_OS)
+
+/* !!! !!! WARNING: !!! !!!
+ * This structure is accessed from AdjustorAsm.s
+ * Any changes here have to be mirrored in the offsets there.
+ */
+
+typedef struct AdjustorStub {
+ unsigned char call[8];
+ StgStablePtr hptr;
+ StgFunPtr wptr;
+ StgInt frame_size;
+ StgInt argument_size;
+} AdjustorStub;
+#endif
+
+#if defined(darwin_HOST_OS) || defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
+static int totalArgumentSize(char *typeString)
+{
+ int sz = 0;
+ while(*typeString)
+ {
+ char t = *typeString++;
+
+ switch(t)
+ {
+ // on 32-bit platforms, Double and Int64 occupy two words.
+ case 'd':
+ case 'l':
+ if(sizeof(void*) == 4)
+ {
+ sz += 2;
+ break;
+ }
+ // everything else is one word.
+ default:
+ sz += 1;
+ }
+ }
+ return sz;
+}
+#endif
+
+void*
+createAdjustor(int cconv, StgStablePtr hptr,
+ StgFunPtr wptr,
+ char *typeString
+#if !defined(powerpc_HOST_ARCH) && !defined(powerpc64_HOST_ARCH) && !defined(x86_64_HOST_ARCH)
+ STG_UNUSED
+#endif
+ )
+{
+ void *adjustor = NULL;
+
+ switch (cconv)
+ {
+ case 0: /* _stdcall */
+#if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
+ /* Magic constant computed by inspecting the code length of
+ the following assembly language snippet
+ (offset and machine code prefixed):
+
+ <0>: 58 popl %eax # temp. remove ret addr..
+ <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to
+ # hold a StgStablePtr
+ <6>: 50 pushl %eax # put back ret. addr
+ <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
+ <c>: ff e0 jmp %eax # and jump to it.
+ # the callee cleans up the stack
+ */
+ adjustor = stgMallocBytesRWX(14);
+ {
+ unsigned char *const adj_code = (unsigned char *)adjustor;
+ adj_code[0x00] = (unsigned char)0x58; /* popl %eax */
+
+ adj_code[0x01] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
+ *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
+
+ adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */
+
+ adj_code[0x07] = (unsigned char)0xb8; /* movl $wptr, %eax */
+ *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
+
+ adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */
+ adj_code[0x0d] = (unsigned char)0xe0;
+ }
+#endif
+ break;
+
+ case 1: /* _ccall */
+#if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
+ /* Magic constant computed by inspecting the code length of
+ the following assembly language snippet
+ (offset and machine code prefixed):
+
+ <00>: 68 ef be ad de pushl $0xdeadbeef # constant is large enough to
+ # hold a StgStablePtr
+ <05>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
+ <0a>: 68 ef be ad de pushl $obscure_ccall_ret_code # push the return address
+ <0f>: ff e0 jmp *%eax # jump to wptr
+
+ The ccall'ing version is a tad different, passing in the return
+ address of the caller to the auto-generated C stub (which enters
+ via the stable pointer.) (The auto-generated C stub is in on this
+ game, don't worry :-)
+
+ See the comment next to obscure_ccall_ret_code why we need to
+ perform a tail jump instead of a call, followed by some C stack
+ fixup.
+
+ Note: The adjustor makes the assumption that any return value
+ coming back from the C stub is not stored on the stack.
+ That's (thankfully) the case here with the restricted set of
+ return types that we support.
+ */
+ adjustor = stgMallocBytesRWX(17);
+ {
+ unsigned char *const adj_code = (unsigned char *)adjustor;
+
+ adj_code[0x00] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */
+ *((StgStablePtr*)(adj_code+0x01)) = (StgStablePtr)hptr;
+
+ adj_code[0x05] = (unsigned char)0xb8; /* movl $wptr, %eax */
+ *((StgFunPtr*)(adj_code + 0x06)) = (StgFunPtr)wptr;
+
+ adj_code[0x0a] = (unsigned char)0x68; /* pushl obscure_ccall_ret_code */
+ *((StgFunPtr*)(adj_code + 0x0b)) =
+#if !defined(openbsd_HOST_OS)
+ (StgFunPtr)obscure_ccall_ret_code;
+#else
+ (StgFunPtr)obscure_ccall_ret_code_dyn;
+#endif
+
+ adj_code[0x0f] = (unsigned char)0xff; /* jmp *%eax */
+ adj_code[0x10] = (unsigned char)0xe0;
+ }
+#elif defined(i386_HOST_ARCH) && defined(darwin_HOST_OS)
+ {
+ /*
+ What's special about Darwin/Mac OS X on i386?
+ It wants the stack to stay 16-byte aligned.
+
+ We offload most of the work to AdjustorAsm.S.
+ */
+ AdjustorStub *adjustorStub = stgMallocBytesRWX(sizeof(AdjustorStub));
+ adjustor = adjustorStub;
+
+ extern void adjustorCode(void);
+ int sz = totalArgumentSize(typeString);
+
+ adjustorStub->call[0] = 0xe8;
+ *(long*)&adjustorStub->call[1] = ((char*)&adjustorCode) - ((char*)adjustorStub + 5);
+ adjustorStub->hptr = hptr;
+ adjustorStub->wptr = wptr;
+
+ // The adjustor puts the following things on the stack:
+ // 1.) %ebp link
+ // 2.) padding and (a copy of) the arguments
+ // 3.) a dummy argument
+ // 4.) hptr
+ // 5.) return address (for returning to the adjustor)
+ // All these have to add up to a multiple of 16.
+
+ // first, include everything in frame_size
+ adjustorStub->frame_size = sz * 4 + 16;
+ // align to 16 bytes
+ adjustorStub->frame_size = (adjustorStub->frame_size + 15) & ~15;
+ // only count 2.) and 3.) as part of frame_size
+ adjustorStub->frame_size -= 12;
+ adjustorStub->argument_size = sz;
+ }
+
+#elif defined(x86_64_HOST_ARCH)
+ /*
+ stack at call:
+ argn
+ ...
+ arg7
+ return address
+ %rdi,%rsi,%rdx,%rcx,%r8,%r9 = arg0..arg6
+
+ if there are <6 integer args, then we can just push the
+ StablePtr into %edi and shuffle the other args up.
+
+ If there are >=6 integer args, then we have to flush one arg
+ to the stack, and arrange to adjust the stack ptr on return.
+ The stack will be rearranged to this:
+
+ argn
+ ...
+ arg7
+ return address *** <-- dummy arg in stub fn.
+ arg6
+ obscure_ccall_ret_code
+
+ This unfortunately means that the type of the stub function
+ must have a dummy argument for the original return address
+ pointer inserted just after the 6th integer argument.
+
+ Code for the simple case:
+
+ 0: 4d 89 c1 mov %r8,%r9
+ 3: 49 89 c8 mov %rcx,%r8
+ 6: 48 89 d1 mov %rdx,%rcx
+ 9: 48 89 f2 mov %rsi,%rdx
+ c: 48 89 fe mov %rdi,%rsi
+ f: 48 8b 3d 0a 00 00 00 mov 10(%rip),%rdi
+ 16: ff 25 0c 00 00 00 jmpq *12(%rip)
+ ...
+ 20: .quad 0 # aligned on 8-byte boundary
+ 28: .quad 0 # aligned on 8-byte boundary
+
+
+ And the version for >=6 integer arguments:
+
+ 0: 41 51 push %r9
+ 2: ff 35 20 00 00 00 pushq 32(%rip) # 28 <ccall_adjustor+0x28>
+ 8: 4d 89 c1 mov %r8,%r9
+ b: 49 89 c8 mov %rcx,%r8
+ e: 48 89 d1 mov %rdx,%rcx
+ 11: 48 89 f2 mov %rsi,%rdx
+ 14: 48 89 fe mov %rdi,%rsi
+ 17: 48 8b 3d 12 00 00 00 mov 18(%rip),%rdi # 30 <ccall_adjustor+0x30>
+ 1e: ff 25 14 00 00 00 jmpq *20(%rip) # 38 <ccall_adjustor+0x38>
+ ...
+ 28: .quad 0 # aligned on 8-byte boundary
+ 30: .quad 0 # aligned on 8-byte boundary
+ 38: .quad 0 # aligned on 8-byte boundary
+ */
+
+ /* we assume the small code model (gcc -mcmmodel=small) where
+ * all symbols are <2^32, so hence wptr should fit into 32 bits.
+ */
+ ASSERT(((long)wptr >> 32) == 0);
+
+ {
+ int i = 0;
+ char *c;
+
+ // determine whether we have 6 or more integer arguments,
+ // and therefore need to flush one to the stack.
+ for (c = typeString; *c != '\0'; c++) {
+ if (*c == 'i' || *c == 'l') i++;
+ if (i == 6) break;
+ }
+
+ if (i < 6) {
+ adjustor = stgMallocBytesRWX(0x30);
+
+ *(StgInt32 *)adjustor = 0x49c1894d;
+ *(StgInt32 *)(adjustor+0x4) = 0x8948c889;
+ *(StgInt32 *)(adjustor+0x8) = 0xf28948d1;
+ *(StgInt32 *)(adjustor+0xc) = 0x48fe8948;
+ *(StgInt32 *)(adjustor+0x10) = 0x000a3d8b;
+ *(StgInt32 *)(adjustor+0x14) = 0x25ff0000;
+ *(StgInt32 *)(adjustor+0x18) = 0x0000000c;
+ *(StgInt64 *)(adjustor+0x20) = (StgInt64)hptr;
+ *(StgInt64 *)(adjustor+0x28) = (StgInt64)wptr;
+ }
+ else
+ {
+ adjustor = stgMallocBytesRWX(0x40);
+
+ *(StgInt32 *)adjustor = 0x35ff5141;
+ *(StgInt32 *)(adjustor+0x4) = 0x00000020;
+ *(StgInt32 *)(adjustor+0x8) = 0x49c1894d;
+ *(StgInt32 *)(adjustor+0xc) = 0x8948c889;
+ *(StgInt32 *)(adjustor+0x10) = 0xf28948d1;
+ *(StgInt32 *)(adjustor+0x14) = 0x48fe8948;
+ *(StgInt32 *)(adjustor+0x18) = 0x00123d8b;
+ *(StgInt32 *)(adjustor+0x1c) = 0x25ff0000;
+ *(StgInt32 *)(adjustor+0x20) = 0x00000014;
+
+ *(StgInt64 *)(adjustor+0x28) = (StgInt64)obscure_ccall_ret_code;
+ *(StgInt64 *)(adjustor+0x30) = (StgInt64)hptr;
+ *(StgInt64 *)(adjustor+0x38) = (StgInt64)wptr;
+ }
+ }
+#elif defined(sparc_HOST_ARCH)
+ /* Magic constant computed by inspecting the code length of the following
+ assembly language snippet (offset and machine code prefixed):
+
+ <00>: 9C23A008 sub %sp, 8, %sp ! make room for %o4/%o5 in caller's frame
+ <04>: DA23A060 st %o5, [%sp + 96] ! shift registers by 2 positions
+ <08>: D823A05C st %o4, [%sp + 92]
+ <0C>: 9A10000B mov %o3, %o5
+ <10>: 9810000A mov %o2, %o4
+ <14>: 96100009 mov %o1, %o3
+ <18>: 94100008 mov %o0, %o2
+ <1C>: 13000000 sethi %hi(wptr), %o1 ! load up wptr (1 of 2)
+ <20>: 11000000 sethi %hi(hptr), %o0 ! load up hptr (1 of 2)
+ <24>: 81C26000 jmp %o1 + %lo(wptr) ! jump to wptr (load 2 of 2)
+ <28>: 90122000 or %o0, %lo(hptr), %o0 ! load up hptr (2 of 2, delay slot)
+ <2C> 00000000 ! place for getting hptr back easily
+
+ ccall'ing on SPARC is easy, because we are quite lucky to push a
+ multiple of 8 bytes (1 word hptr + 1 word dummy arg) in front of the
+ existing arguments (note that %sp must stay double-word aligned at
+ all times, see ABI spec at http://www.sparc.org/standards/psABI3rd.pdf).
+ To do this, we extend the *caller's* stack frame by 2 words and shift
+ the output registers used for argument passing (%o0 - %o5, we are a *leaf*
+ procedure because of the tail-jump) by 2 positions. This makes room in
+ %o0 and %o1 for the additinal arguments, namely hptr and a dummy (used
+ for destination addr of jump on SPARC, return address on x86, ...). This
+ shouldn't cause any problems for a C-like caller: alloca is implemented
+ similarly, and local variables should be accessed via %fp, not %sp. In a
+ nutshell: This should work! (Famous last words! :-)
+ */
+ adjustor = stgMallocBytesRWX(4*(11+1));
+ {
+ unsigned long *const adj_code = (unsigned long *)adjustor;
+
+ adj_code[ 0] = 0x9C23A008UL; /* sub %sp, 8, %sp */
+ adj_code[ 1] = 0xDA23A060UL; /* st %o5, [%sp + 96] */
+ adj_code[ 2] = 0xD823A05CUL; /* st %o4, [%sp + 92] */
+ adj_code[ 3] = 0x9A10000BUL; /* mov %o3, %o5 */
+ adj_code[ 4] = 0x9810000AUL; /* mov %o2, %o4 */
+ adj_code[ 5] = 0x96100009UL; /* mov %o1, %o3 */
+ adj_code[ 6] = 0x94100008UL; /* mov %o0, %o2 */
+ adj_code[ 7] = 0x13000000UL; /* sethi %hi(wptr), %o1 */
+ adj_code[ 7] |= ((unsigned long)wptr) >> 10;
+ adj_code[ 8] = 0x11000000UL; /* sethi %hi(hptr), %o0 */
+ adj_code[ 8] |= ((unsigned long)hptr) >> 10;
+ adj_code[ 9] = 0x81C26000UL; /* jmp %o1 + %lo(wptr) */
+ adj_code[ 9] |= ((unsigned long)wptr) & 0x000003FFUL;
+ adj_code[10] = 0x90122000UL; /* or %o0, %lo(hptr), %o0 */
+ adj_code[10] |= ((unsigned long)hptr) & 0x000003FFUL;
+
+ adj_code[11] = (unsigned long)hptr;
+
+ /* flush cache */
+ asm("flush %0" : : "r" (adj_code ));
+ asm("flush %0" : : "r" (adj_code + 2));
+ asm("flush %0" : : "r" (adj_code + 4));
+ asm("flush %0" : : "r" (adj_code + 6));
+ asm("flush %0" : : "r" (adj_code + 10));
+
+ /* max. 5 instructions latency, and we need at >= 1 for returning */
+ asm("nop");
+ asm("nop");
+ asm("nop");
+ asm("nop");
+ }
+#elif defined(alpha_HOST_ARCH)
+ /* Magic constant computed by inspecting the code length of
+ the following assembly language snippet
+ (offset and machine code prefixed; note that the machine code
+ shown is longwords stored in little-endian order):
+
+ <00>: 46520414 mov a2, a4
+ <04>: 46100412 mov a0, a2
+ <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr
+ <0c>: 46730415 mov a3, a5
+ <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr
+ <14>: 46310413 mov a1, a3
+ <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint)
+ <1c>: 00000000 # padding for alignment
+ <20>: [8 bytes for hptr quadword]
+ <28>: [8 bytes for wptr quadword]
+
+ The "computed" jump at <08> above is really a jump to a fixed
+ location. Accordingly, we place an always-correct hint in the
+ jump instruction, namely the address offset from <0c> to wptr,
+ divided by 4, taking the lowest 14 bits.
+
+ We only support passing 4 or fewer argument words, for the same
+ reason described under sparc_HOST_ARCH above by JRS, 21 Aug 01.
+ On the Alpha the first 6 integer arguments are in a0 through a5,
+ and the rest on the stack. Hence we want to shuffle the original
+ caller's arguments by two.
+
+ On the Alpha the calling convention is so complex and dependent
+ on the callee's signature -- for example, the stack pointer has
+ to be a multiple of 16 -- that it seems impossible to me [ccshan]
+ to handle the general case correctly without changing how the
+ adjustor is called from C. For now, our solution of shuffling
+ registers only and ignoring the stack only works if the original
+ caller passed 4 or fewer argument words.
+
+TODO: Depending on how much allocation overhead stgMallocBytes uses for
+ header information (more precisely, if the overhead is no more than
+ 4 bytes), we should move the first three instructions above down by
+ 4 bytes (getting rid of the nop), hence saving memory. [ccshan]
+ */
+ ASSERT(((StgWord64)wptr & 3) == 0);
+ adjustor = stgMallocBytesRWX(48);
+ {
+ StgWord64 *const code = (StgWord64 *)adjustor;
+
+ code[0] = 0x4610041246520414L;
+ code[1] = 0x46730415a61b0020L;
+ code[2] = 0x46310413a77b0028L;
+ code[3] = 0x000000006bfb0000L
+ | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
+
+ code[4] = (StgWord64)hptr;
+ code[5] = (StgWord64)wptr;
+
+ /* Ensure that instruction cache is consistent with our new code */
+ __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
+ }
+#elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
+
+#define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
+#define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
+ {
+ /* The PowerPC Linux (32-bit) calling convention is annoyingly complex.
+ We need to calculate all the details of the stack frame layout,
+ taking into account the types of all the arguments, and then
+ generate code on the fly. */
+
+ int src_gpr = 3, dst_gpr = 5;
+ int fpr = 3;
+ int src_offset = 0, dst_offset = 0;
+ int n = strlen(typeString),i;
+ int src_locs[n], dst_locs[n];
+ int frameSize;
+ unsigned *code;
+
+ /* Step 1:
+ Calculate where the arguments should go.
+ src_locs[] will contain the locations of the arguments in the
+ original stack frame passed to the adjustor.
+ dst_locs[] will contain the locations of the arguments after the
+ adjustor runs, on entry to the wrapper proc pointed to by wptr.
+
+ This algorithm is based on the one described on page 3-19 of the
+ System V ABI PowerPC Processor Supplement.
+ */
+ for(i=0;typeString[i];i++)
+ {
+ char t = typeString[i];
+ if((t == 'f' || t == 'd') && fpr <= 8)
+ src_locs[i] = dst_locs[i] = -32-(fpr++);
+ else
+ {
+ if(t == 'l' && src_gpr <= 9)
+ {
+ if((src_gpr & 1) == 0)
+ src_gpr++;
+ src_locs[i] = -src_gpr;
+ src_gpr += 2;
+ }
+ else if(t == 'i' && src_gpr <= 10)
+ {
+ src_locs[i] = -(src_gpr++);
+ }
+ else
+ {
+ if(t == 'l' || t == 'd')
+ {
+ if(src_offset % 8)
+ src_offset += 4;
+ }
+ src_locs[i] = src_offset;
+ src_offset += (t == 'l' || t == 'd') ? 8 : 4;
+ }
+
+ if(t == 'l' && dst_gpr <= 9)
+ {
+ if((dst_gpr & 1) == 0)
+ dst_gpr++;
+ dst_locs[i] = -dst_gpr;
+ dst_gpr += 2;
+ }
+ else if(t == 'i' && dst_gpr <= 10)
+ {
+ dst_locs[i] = -(dst_gpr++);
+ }
+ else
+ {
+ if(t == 'l' || t == 'd')
+ {
+ if(dst_offset % 8)
+ dst_offset += 4;
+ }
+ dst_locs[i] = dst_offset;
+ dst_offset += (t == 'l' || t == 'd') ? 8 : 4;
+ }
+ }
+ }
+
+ frameSize = dst_offset + 8;
+ frameSize = (frameSize+15) & ~0xF;
+
+ /* Step 2:
+ Build the adjustor.
+ */
+ // allocate space for at most 4 insns per parameter
+ // plus 14 more instructions.
+ adjustor = stgMallocBytesRWX(4 * (4*n + 14));
+ code = (unsigned*)adjustor;
+
+ *code++ = 0x48000008; // b *+8
+ // * Put the hptr in a place where freeHaskellFunctionPtr
+ // can get at it.
+ *code++ = (unsigned) hptr;
+
+ // * save the link register
+ *code++ = 0x7c0802a6; // mflr r0;
+ *code++ = 0x90010004; // stw r0, 4(r1);
+ // * and build a new stack frame
+ *code++ = OP_LO(0x9421, -frameSize); // stwu r1, -frameSize(r1)
+
+ // * now generate instructions to copy arguments
+ // from the old stack frame into the new stack frame.
+ for(i=n-1;i>=0;i--)
+ {
+ if(src_locs[i] < -32)
+ ASSERT(dst_locs[i] == src_locs[i]);
+ else if(src_locs[i] < 0)
+ {
+ // source in GPR.
+ ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
+ if(dst_locs[i] < 0)
+ {
+ ASSERT(dst_locs[i] > -32);
+ // dst is in GPR, too.
+
+ if(typeString[i] == 'l')
+ {
+ // mr dst+1, src+1
+ *code++ = 0x7c000378
+ | ((-dst_locs[i]+1) << 16)
+ | ((-src_locs[i]+1) << 11)
+ | ((-src_locs[i]+1) << 21);
+ }
+ // mr dst, src
+ *code++ = 0x7c000378
+ | ((-dst_locs[i]) << 16)
+ | ((-src_locs[i]) << 11)
+ | ((-src_locs[i]) << 21);
+ }
+ else
+ {
+ if(typeString[i] == 'l')
+ {
+ // stw src+1, dst_offset+4(r1)
+ *code++ = 0x90010000
+ | ((-src_locs[i]+1) << 21)
+ | (dst_locs[i] + 4);
+ }
+
+ // stw src, dst_offset(r1)
+ *code++ = 0x90010000
+ | ((-src_locs[i]) << 21)
+ | (dst_locs[i] + 8);
+ }
+ }
+ else
+ {
+ ASSERT(dst_locs[i] >= 0);
+ ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
+
+ if(typeString[i] == 'l')
+ {
+ // lwz r0, src_offset(r1)
+ *code++ = 0x80010000
+ | (src_locs[i] + frameSize + 8 + 4);
+ // stw r0, dst_offset(r1)
+ *code++ = 0x90010000
+ | (dst_locs[i] + 8 + 4);
+ }
+ // lwz r0, src_offset(r1)
+ *code++ = 0x80010000
+ | (src_locs[i] + frameSize + 8);
+ // stw r0, dst_offset(r1)
+ *code++ = 0x90010000
+ | (dst_locs[i] + 8);
+ }
+ }
+
+ // * hptr will be the new first argument.
+ // lis r3, hi(hptr)
+ *code++ = OP_HI(0x3c60, hptr);
+ // ori r3,r3,lo(hptr)
+ *code++ = OP_LO(0x6063, hptr);
+
+ // * we need to return to a piece of code
+ // which will tear down the stack frame.
+ // lis r11,hi(obscure_ccall_ret_code)
+ *code++ = OP_HI(0x3d60, obscure_ccall_ret_code);
+ // ori r11,r11,lo(obscure_ccall_ret_code)
+ *code++ = OP_LO(0x616b, obscure_ccall_ret_code);
+ // mtlr r11
+ *code++ = 0x7d6803a6;
+
+ // * jump to wptr
+ // lis r11,hi(wptr)
+ *code++ = OP_HI(0x3d60, wptr);
+ // ori r11,r11,lo(wptr)
+ *code++ = OP_LO(0x616b, wptr);
+ // mtctr r11
+ *code++ = 0x7d6903a6;
+ // bctr
+ *code++ = 0x4e800420;
+
+ // Flush the Instruction cache:
+ {
+ unsigned *p = adjustor;
+ while(p < code)
+ {
+ __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
+ : : "r" (p));
+ p++;
+ }
+ __asm__ volatile ("sync\n\tisync");
+ }
+ }
+
+#elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
+
+#define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
+#define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
+ {
+ /* The following code applies to all PowerPC and PowerPC64 platforms
+ whose stack layout is based on the AIX ABI.
+
+ Besides (obviously) AIX, this includes
+ Mac OS 9 and BeOS/PPC (may they rest in peace),
+ which use the 32-bit AIX ABI
+ powerpc64-linux,
+ which uses the 64-bit AIX ABI
+ and Darwin (Mac OS X),
+ which uses the same stack layout as AIX,
+ but no function descriptors.
+
+ The actual stack-frame shuffling is implemented out-of-line
+ in the function adjustorCode, in AdjustorAsm.S.
+ Here, we set up an AdjustorStub structure, which
+ is a function descriptor (on platforms that have function
+ descriptors) or a short piece of stub code (on Darwin) to call
+ adjustorCode with a pointer to the AdjustorStub struct loaded
+ into register r2.
+
+ One nice thing about this is that there is _no_ code generated at
+ runtime on the platforms that have function descriptors.
+ */
+ AdjustorStub *adjustorStub;
+ int sz = 0, extra_sz, total_sz;
+
+ // from AdjustorAsm.s
+ // not declared as a function so that AIX-style
+ // fundescs can never get in the way.
+ extern void *adjustorCode;
+
+#ifdef FUNDESCS
+ adjustorStub = stgMallocBytes(sizeof(AdjustorStub), "createAdjustor");
+#else
+ adjustorStub = stgMallocBytesRWX(sizeof(AdjustorStub));
+#endif
+ adjustor = adjustorStub;
+
+ adjustorStub->code = (void*) &adjustorCode;
+
+#ifdef FUNDESCS
+ // function descriptors are a cool idea.
+ // We don't need to generate any code at runtime.
+ adjustorStub->toc = adjustorStub;
+#else
+
+ // no function descriptors :-(
+ // We need to do things "by hand".
+#if defined(powerpc_HOST_ARCH)
+ // lis r2, hi(adjustorStub)
+ adjustorStub->lis = OP_HI(0x3c40, adjustorStub);
+ // ori r2, r2, lo(adjustorStub)
+ adjustorStub->ori = OP_LO(0x6042, adjustorStub);
+ // lwz r0, code(r2)
+ adjustorStub->lwz = OP_LO(0x8002, (char*)(&adjustorStub->code)
+ - (char*)adjustorStub);
+ // mtctr r0
+ adjustorStub->mtctr = 0x7c0903a6;
+ // bctr
+ adjustorStub->bctr = 0x4e800420;
+#else
+ barf("adjustor creation not supported on this platform");
+#endif
+
+ // Flush the Instruction cache:
+ {
+ int n = sizeof(AdjustorStub)/sizeof(unsigned);
+ unsigned *p = (unsigned*)adjustor;
+ while(n--)
+ {
+ __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
+ : : "r" (p));
+ p++;
+ }
+ __asm__ volatile ("sync\n\tisync");
+ }
+#endif
+
+ // Calculate the size of the stack frame, in words.
+ sz = totalArgumentSize(typeString);
+
+ // The first eight words of the parameter area
+ // are just "backing store" for the parameters passed in
+ // the GPRs. extra_sz is the number of words beyond those first
+ // 8 words.
+ extra_sz = sz - 8;
+ if(extra_sz < 0)
+ extra_sz = 0;
+
+ // Calculate the total size of the stack frame.
+ total_sz = (6 /* linkage area */
+ + 8 /* minimum parameter area */
+ + 2 /* two extra arguments */
+ + extra_sz)*sizeof(StgWord);
+
+ // align to 16 bytes.
+ // AIX only requires 8 bytes, but who cares?
+ total_sz = (total_sz+15) & ~0xF;
+
+ // Fill in the information that adjustorCode in AdjustorAsm.S
+ // will use to create a new stack frame with the additional args.
+ adjustorStub->hptr = hptr;
+ adjustorStub->wptr = wptr;
+ adjustorStub->negative_framesize = -total_sz;
+ adjustorStub->extrawords_plus_one = extra_sz + 1;
+ }
+
+#elif defined(ia64_HOST_ARCH)
+/*
+ Up to 8 inputs are passed in registers. We flush the last two inputs to
+ the stack, initially into the 16-byte scratch region left by the caller.
+ We then shuffle the others along by 4 (taking 2 registers for ourselves
+ to save return address and previous function state - we need to come back
+ here on the way out to restore the stack, so this is a real function
+ rather than just a trampoline).
+
+ The function descriptor we create contains the gp of the target function
+ so gp is already loaded correctly.
+
+ [MLX] alloc r16=ar.pfs,10,2,0
+ movl r17=wptr
+ [MII] st8.spill [r12]=r38,8 // spill in6 (out4)
+ mov r41=r37 // out7 = in5 (out3)
+ mov r40=r36;; // out6 = in4 (out2)
+ [MII] st8.spill [r12]=r39 // spill in7 (out5)
+ mov.sptk b6=r17,50
+ mov r38=r34;; // out4 = in2 (out0)
+ [MII] mov r39=r35 // out5 = in3 (out1)
+ mov r37=r33 // out3 = in1 (loc1)
+ mov r36=r32 // out2 = in0 (loc0)
+ [MLX] adds r12=-24,r12 // update sp
+ movl r34=hptr;; // out0 = hptr
+ [MIB] mov r33=r16 // loc1 = ar.pfs
+ mov r32=b0 // loc0 = retaddr
+ br.call.sptk.many b0=b6;;
+
+ [MII] adds r12=-16,r12
+ mov b0=r32
+ mov.i ar.pfs=r33
+ [MFB] nop.m 0x0
+ nop.f 0x0
+ br.ret.sptk.many b0;;
+*/
+
+/* These macros distribute a long constant into the two words of an MLX bundle */
+#define BITS(val,start,count) (((val) >> (start)) & ((1 << (count))-1))
+#define MOVL_LOWORD(val) (BITS(val,22,18) << 46)
+#define MOVL_HIWORD(val) (BITS(val,40,23) | (BITS(val,0,7) << 36) | (BITS(val,7,9) << 50) \
+ | (BITS(val,16,5) << 55) | (BITS(val,21,1) << 44) | BITS(val,63,1) << 59)
+
+ {
+ StgStablePtr stable;
+ IA64FunDesc *wdesc = (IA64FunDesc *)wptr;
+ StgWord64 wcode = wdesc->ip;
+ IA64FunDesc *fdesc;
+ StgWord64 *code;
+
+ /* we allocate on the Haskell heap since malloc'd memory isn't executable - argh */
+ adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8, &stable);
+
+ fdesc = (IA64FunDesc *)adjustor;
+ code = (StgWord64 *)(fdesc + 1);
+ fdesc->ip = (StgWord64)code;
+ fdesc->gp = wdesc->gp;
+
+ code[0] = 0x0000058004288004 | MOVL_LOWORD(wcode);
+ code[1] = 0x6000000220000000 | MOVL_HIWORD(wcode);
+ code[2] = 0x029015d818984001;
+ code[3] = 0x8401200500420094;
+ code[4] = 0x886011d8189c0001;
+ code[5] = 0x84011004c00380c0;
+ code[6] = 0x0250210046013800;
+ code[7] = 0x8401000480420084;
+ code[8] = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr);
+ code[9] = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr);
+ code[10] = 0x0200210020010811;
+ code[11] = 0x1080006800006200;
+ code[12] = 0x0000210018406000;
+ code[13] = 0x00aa021000038005;
+ code[14] = 0x000000010000001d;
+ code[15] = 0x0084000880000200;
+
+ /* save stable pointers in convenient form */
+ code[16] = (StgWord64)hptr;
+ code[17] = (StgWord64)stable;
+ }
+#else
+ barf("adjustor creation not supported on this platform");
+#endif
+ break;
+
+ default:
+ ASSERT(0);
+ break;
+ }
+
+ /* Have fun! */
+ return adjustor;
+}
+
+
+void
+freeHaskellFunctionPtr(void* ptr)
+{
+#if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
+ if ( *(unsigned char*)ptr != 0x68 &&
+ *(unsigned char*)ptr != 0x58 ) {
+ errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
+ return;
+ }
+
+ /* Free the stable pointer first..*/
+ if (*(unsigned char*)ptr == 0x68) { /* Aha, a ccall adjustor! */
+ freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x01)));
+ } else {
+ freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
+ }
+#elif defined(x86_TARGET_ARCH) && defined(darwin_HOST_OS)
+if ( *(unsigned char*)ptr != 0xe8 ) {
+ errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
+ return;
+ }
+ freeStablePtr(((AdjustorStub*)ptr)->hptr);
+#elif defined(x86_64_HOST_ARCH)
+ if ( *(StgWord16 *)ptr == 0x894d ) {
+ freeStablePtr(*(StgStablePtr*)(ptr+0x20));
+ } else if ( *(StgWord16 *)ptr == 0x5141 ) {
+ freeStablePtr(*(StgStablePtr*)(ptr+0x30));
+ } else {
+ errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
+ return;
+ }
+#elif defined(sparc_HOST_ARCH)
+ if ( *(unsigned long*)ptr != 0x9C23A008UL ) {
+ errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
+ return;
+ }
+
+ /* Free the stable pointer first..*/
+ freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11)));
+#elif defined(alpha_HOST_ARCH)
+ if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
+ errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
+ return;
+ }
+
+ /* Free the stable pointer first..*/
+ freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
+#elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
+ if ( *(StgWord*)ptr != 0x48000008 ) {
+ errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
+ return;
+ }
+ freeStablePtr(((StgStablePtr*)ptr)[1]);
+#elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
+ extern void* adjustorCode;
+ if ( ((AdjustorStub*)ptr)->code != (StgFunPtr) &adjustorCode ) {
+ errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
+ return;
+ }
+ freeStablePtr(((AdjustorStub*)ptr)->hptr);
+#elif defined(ia64_HOST_ARCH)
+ IA64FunDesc *fdesc = (IA64FunDesc *)ptr;
+ StgWord64 *code = (StgWord64 *)(fdesc+1);
+
+ if (fdesc->ip != (StgWord64)code) {
+ errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
+ return;
+ }
+ freeStablePtr((StgStablePtr)code[16]);
+ freeStablePtr((StgStablePtr)code[17]);
+ return;
+#else
+ ASSERT(0);
+#endif
+ *((unsigned char*)ptr) = '\0';
+
+ stgFree(ptr);
+}
+
+
+/*
+ * Function: initAdjustor()
+ *
+ * Perform initialisation of adjustor thunk layer (if needed.)
+ */
+void
+initAdjustor(void)
+{
+#if defined(i386_HOST_ARCH) && defined(openbsd_HOST_OS)
+ obscure_ccall_ret_code_dyn = stgMallocBytesRWX(4);
+ obscure_ccall_ret_code_dyn[0] = ((unsigned char *)obscure_ccall_ret_code)[0];
+ obscure_ccall_ret_code_dyn[1] = ((unsigned char *)obscure_ccall_ret_code)[1];
+ obscure_ccall_ret_code_dyn[2] = ((unsigned char *)obscure_ccall_ret_code)[2];
+ obscure_ccall_ret_code_dyn[3] = ((unsigned char *)obscure_ccall_ret_code)[3];
+#endif
+}
diff --git a/rts/AdjustorAsm.S b/rts/AdjustorAsm.S
new file mode 100644
index 0000000000..cfdef68349
--- /dev/null
+++ b/rts/AdjustorAsm.S
@@ -0,0 +1,189 @@
+#include "../includes/ghcconfig.h"
+
+/* ******************************** PowerPC ******************************** */
+
+#if defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
+#if !(defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS))
+ /* The following code applies, with some differences,
+ to all powerpc platforms except for powerpc32-linux,
+ whose calling convention is annoyingly complex.
+ */
+
+
+ /* The code is "almost" the same for
+ 32-bit and for 64-bit
+ */
+#if defined(powerpc64_HOST_ARCH)
+#define WS 8
+#define LOAD ld
+#define STORE std
+#else
+#define WS 4
+#define LOAD lwz
+#define STORE stw
+#endif
+
+ /* Some info about stack frame layout */
+#define LINK_SLOT (2*WS)
+#define LINKAGE_AREA_SIZE (6*WS)
+
+ /* The following defines mirror struct AdjustorStub
+ from Adjustor.c. Make sure to keep these in sync.
+ */
+#if defined(powerpc_HOST_ARCH) && defined(darwin_HOST_OS)
+#define HEADER_WORDS 6
+#elif defined(powerpc64_HOST_ARCH) && defined(darwin_HOST_OS)
+#else
+#define HEADER_WORDS 3
+#endif
+
+#define HPTR_OFF ((HEADER_WORDS )*WS)
+#define WPTR_OFF ((HEADER_WORDS + 1)*WS)
+#define FRAMESIZE_OFF ((HEADER_WORDS + 2)*WS)
+#define EXTRA_WORDS_OFF ((HEADER_WORDS + 3)*WS)
+
+ /* Darwin insists on register names, everyone else prefers
+ to use numbers. */
+#if !defined(darwin_HOST_OS)
+#define r0 0
+#define r1 1
+#define r2 2
+#define r3 3
+#define r4 4
+#define r5 5
+#define r6 6
+#define r7 7
+#define r8 8
+#define r9 9
+#define r10 10
+#define r11 11
+#define r12 12
+
+#define r30 30
+#define r31 31
+#endif
+
+
+.text
+#if LEADING_UNDERSCORE
+ .globl _adjustorCode
+_adjustorCode:
+#else
+ .globl adjustorCode
+ /* Note that we don't build a function descriptor
+ for AIX-derived ABIs here. This will happen at runtime
+ in createAdjustor().
+ */
+adjustorCode:
+#endif
+ /* On entry, r2 will point to the AdjustorStub data structure. */
+
+ /* save the link */
+ mflr r0
+ STORE r0, LINK_SLOT(r1)
+
+ /* set up stack frame */
+ LOAD r12, FRAMESIZE_OFF(r2)
+#ifdef powerpc64_HOST_ARCH
+ stdux r1, r1, r12
+#else
+ stwux r1, r1, r12
+#endif
+
+ /* Save some regs so that we can use them.
+ Note that we use the "Red Zone" below the stack pointer.
+ */
+ STORE r31, -WS(r1)
+ STORE r30, -2*WS(r1)
+
+ mr r31, r1
+ subf r30, r12, r31
+
+ LOAD r12, EXTRA_WORDS_OFF(r2)
+ mtctr r12
+ b 2f
+1:
+ LOAD r0, LINKAGE_AREA_SIZE + 8*WS(r30)
+ STORE r0, LINKAGE_AREA_SIZE + 10*WS(r31)
+ addi r30, r30, WS
+ addi r31, r31, WS
+2:
+ bdnz 1b
+
+ /* Restore r30 and r31 now.
+ */
+ LOAD r31, -WS(r1)
+ LOAD r30, -2*WS(r1)
+
+ STORE r10, LINKAGE_AREA_SIZE + 9*WS(r1)
+ STORE r9, LINKAGE_AREA_SIZE + 8*WS(r1)
+ mr r10, r8
+ mr r9, r7
+ mr r8, r6
+ mr r7, r5
+ mr r6, r4
+ mr r5, r3
+
+ LOAD r3, HPTR_OFF(r2)
+
+ LOAD r12, WPTR_OFF(r2)
+#if defined(darwin_HOST_OS)
+ mtctr r12
+#else
+ LOAD r0, 0(r12)
+ /* The function we're calling will never be a nested function,
+ so we don't load r11.
+ */
+ mtctr r0
+ LOAD r2, WS(r12)
+#endif
+ bctrl
+
+ LOAD r1, 0(r1)
+ LOAD r0, LINK_SLOT(r1)
+ mtlr r0
+ blr
+#endif
+
+/* ********************************* i386 ********************************** */
+
+#elif defined(i386_HOST_ARCH) && defined(darwin_HOST_OS)
+
+#define WS 4
+#define RETVAL_OFF 5
+#define HEADER_BYTES 8
+
+#define HPTR_OFF HEADER_BYTES
+#define WPTR_OFF (HEADER_BYTES + 1*WS)
+#define FRAMESIZE_OFF (HEADER_BYTES + 2*WS)
+#define ARGWORDS_OFF (HEADER_BYTES + 3*WS)
+
+ .globl _adjustorCode
+_adjustorCode:
+ popl %eax
+ subl $RETVAL_OFF, %eax
+
+ pushl %ebp
+ movl %esp, %ebp
+
+ subl FRAMESIZE_OFF(%eax), %esp
+
+ pushl %esi
+ pushl %edi
+
+ leal 8(%ebp), %esi
+ leal 12(%esp), %edi
+ movl ARGWORDS_OFF(%eax), %ecx
+ rep
+ movsl
+
+ popl %edi
+ popl %esi
+
+ pushl HPTR_OFF(%eax)
+ call *WPTR_OFF(%eax)
+
+ leave
+ ret
+#endif
+
diff --git a/rts/Apply.cmm b/rts/Apply.cmm
new file mode 100644
index 0000000000..e0ca03944c
--- /dev/null
+++ b/rts/Apply.cmm
@@ -0,0 +1,268 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The University of Glasgow 2004
+ *
+ * Application-related bits.
+ *
+ * This file is written in a subset of C--, extended with various
+ * features specific to GHC. It is compiled by GHC directly. For the
+ * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
+ *
+ * -------------------------------------------------------------------------- */
+
+#include "Cmm.h"
+
+/* ----------------------------------------------------------------------------
+ * Evaluate a closure and return it.
+ *
+ * There isn't an info table / return address version of stg_ap_0, because
+ * everything being returned is guaranteed evaluated, so it would be a no-op.
+ */
+
+STRING(stg_ap_0_ret_str,"stg_ap_0_ret... ")
+
+stg_ap_0_fast
+{
+ // fn is in R1, no args on the stack
+
+ IF_DEBUG(apply,
+ foreign "C" debugBelch(stg_ap_0_ret_str) [R1];
+ foreign "C" printClosure(R1 "ptr") [R1]);
+
+ IF_DEBUG(sanity,
+ foreign "C" checkStackChunk(Sp "ptr",
+ CurrentTSO + TSO_OFFSET_StgTSO_stack +
+ WDS(TO_W_(StgTSO_stack_size(CurrentTSO))) "ptr") [R1]);
+
+ ENTER();
+}
+
+/* -----------------------------------------------------------------------------
+ Entry Code for a PAP.
+
+ This entry code is *only* called by one of the stg_ap functions.
+ On entry: Sp points to the remaining arguments on the stack. If
+ the stack check fails, we can just push the PAP on the stack and
+ return to the scheduler.
+
+ On entry: R1 points to the PAP. The rest of the function's
+ arguments (apart from those that are already in the PAP) are on the
+ stack, starting at Sp(0). R2 contains an info table which
+ describes these arguments, which is used in the event that the
+ stack check in the entry code below fails. The info table is
+ currently one of the stg_ap_*_ret family, as this code is always
+ entered from those functions.
+
+ The idea is to copy the chunk of stack from the PAP object onto the
+ stack / into registers, and enter the function.
+ -------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_PAP,/*special layout*/0,0,PAP,"PAP","PAP")
+{ foreign "C" barf("PAP object entered!"); }
+
+stg_PAP_apply
+{
+ W_ Words;
+ W_ pap;
+
+ pap = R1;
+
+ Words = TO_W_(StgPAP_n_args(pap));
+
+ //
+ // Check for stack overflow and bump the stack pointer.
+ // We have a hand-rolled stack check fragment here, because none of
+ // the canned ones suit this situation.
+ //
+ if ((Sp - WDS(Words)) < SpLim) {
+ // there is a return address in R2 in the event of a
+ // stack check failure. The various stg_apply functions arrange
+ // this before calling stg_PAP_entry.
+ Sp_adj(-1);
+ Sp(0) = R2;
+ jump stg_gc_unpt_r1;
+ }
+ Sp_adj(-Words);
+
+ // profiling
+ TICK_ENT_PAP();
+ LDV_ENTER(pap);
+ // Enter PAP cost centre
+ ENTER_CCS_PAP_CL(pap);
+
+ R1 = StgPAP_fun(pap);
+
+ // Reload the stack
+ W_ i;
+ W_ p;
+ p = pap + SIZEOF_StgHeader + OFFSET_StgPAP_payload;
+ i = 0;
+for:
+ if (i < Words) {
+ Sp(i) = W_[p];
+ p = p + WDS(1);
+ i = i + 1;
+ goto for;
+ }
+
+ // Off we go!
+ TICK_ENT_VIA_NODE();
+
+#ifdef NO_ARG_REGS
+ jump %GET_ENTRY(R1);
+#else
+ W_ info;
+ info = %GET_FUN_INFO(R1);
+ W_ type;
+ type = TO_W_(StgFunInfoExtra_fun_type(info));
+ if (type == ARG_GEN) {
+ jump StgFunInfoExtra_slow_apply(info);
+ }
+ if (type == ARG_GEN_BIG) {
+ jump StgFunInfoExtra_slow_apply(info);
+ }
+ if (type == ARG_BCO) {
+ Sp_adj(-2);
+ Sp(1) = R1;
+ Sp(0) = stg_apply_interp_info;
+ jump stg_yield_to_interpreter;
+ }
+ jump W_[stg_ap_stack_entries +
+ WDS(TO_W_(StgFunInfoExtra_fun_type(info)))];
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ Entry Code for an AP (a PAP with arity zero).
+
+ The entry code is very similar to a PAP, except there are no
+ further arguments on the stack to worry about, so the stack check
+ is simpler. We must also push an update frame on the stack before
+ applying the function.
+ -------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_AP,/*special layout*/0,0,AP,"AP","AP")
+{
+ W_ Words;
+ W_ ap;
+
+ ap = R1;
+
+ Words = TO_W_(StgAP_n_args(ap));
+
+ /*
+ * Check for stack overflow. IMPORTANT: use a _NP check here,
+ * because if the check fails, we might end up blackholing this very
+ * closure, in which case we must enter the blackhole on return rather
+ * than continuing to evaluate the now-defunct closure.
+ */
+ STK_CHK_NP(WDS(Words) + SIZEOF_StgUpdateFrame);
+
+ PUSH_UPD_FRAME(Sp - SIZEOF_StgUpdateFrame, R1);
+ Sp = Sp - SIZEOF_StgUpdateFrame - WDS(Words);
+
+ TICK_ENT_AP();
+ LDV_ENTER(ap);
+
+ // Enter PAP cost centre
+ ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL
+
+ R1 = StgAP_fun(ap);
+
+ // Reload the stack
+ W_ i;
+ W_ p;
+ p = ap + SIZEOF_StgHeader + OFFSET_StgAP_payload;
+ i = 0;
+for:
+ if (i < Words) {
+ Sp(i) = W_[p];
+ p = p + WDS(1);
+ i = i + 1;
+ goto for;
+ }
+
+ // Off we go!
+ TICK_ENT_VIA_NODE();
+
+#ifdef NO_ARG_REGS
+ jump %GET_ENTRY(R1);
+#else
+ W_ info;
+ info = %GET_FUN_INFO(R1);
+ W_ type;
+ type = TO_W_(StgFunInfoExtra_fun_type(info));
+ if (type == ARG_GEN) {
+ jump StgFunInfoExtra_slow_apply(info);
+ }
+ if (type == ARG_GEN_BIG) {
+ jump StgFunInfoExtra_slow_apply(info);
+ }
+ if (type == ARG_BCO) {
+ Sp_adj(-2);
+ Sp(1) = R1;
+ Sp(0) = stg_apply_interp_info;
+ jump stg_yield_to_interpreter;
+ }
+ jump W_[stg_ap_stack_entries +
+ WDS(TO_W_(StgFunInfoExtra_fun_type(info)))];
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ Entry Code for an AP_STACK.
+
+ Very similar to a PAP and AP. The layout is the same as PAP
+ and AP, except that the payload is a chunk of stack instead of
+ being described by the function's info table. Like an AP,
+ there are no further arguments on the stack to worry about.
+ However, the function closure (ap->fun) does not necessarily point
+ directly to a function, so we have to enter it using stg_ap_0.
+ -------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK")
+{
+ W_ Words;
+ W_ ap;
+
+ ap = R1;
+
+ Words = StgAP_STACK_size(ap);
+
+ /*
+ * Check for stack overflow. IMPORTANT: use a _NP check here,
+ * because if the check fails, we might end up blackholing this very
+ * closure, in which case we must enter the blackhole on return rather
+ * than continuing to evaluate the now-defunct closure.
+ */
+ STK_CHK_NP(WDS(Words) + SIZEOF_StgUpdateFrame);
+
+ PUSH_UPD_FRAME(Sp - SIZEOF_StgUpdateFrame, R1);
+ Sp = Sp - SIZEOF_StgUpdateFrame - WDS(Words);
+
+ TICK_ENT_AP();
+ LDV_ENTER(ap);
+
+ // Enter PAP cost centre
+ ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL
+
+ R1 = StgAP_STACK_fun(ap);
+
+ // Reload the stack
+ W_ i;
+ W_ p;
+ p = ap + SIZEOF_StgHeader + OFFSET_StgAP_STACK_payload;
+ i = 0;
+for:
+ if (i < Words) {
+ Sp(i) = W_[p];
+ p = p + WDS(1);
+ i = i + 1;
+ goto for;
+ }
+
+ // Off we go!
+ TICK_ENT_VIA_NODE();
+
+ ENTER();
+}
diff --git a/rts/Apply.h b/rts/Apply.h
new file mode 100644
index 0000000000..76e36cb9fb
--- /dev/null
+++ b/rts/Apply.h
@@ -0,0 +1,29 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The University of Glasgow 2002-2004
+ *
+ * Declarations for things defined in AutoApply.cmm
+ *
+ * -------------------------------------------------------------------------- */
+
+#ifndef APPLY_H
+#define APPLY_H
+
+// canned slow entry points, indexed by arg type (ARG_P, ARG_PP, etc.)
+#ifdef IN_STG_CODE
+extern StgWord stg_ap_stack_entries[];
+#else
+extern StgFun *stg_ap_stack_entries[];
+#endif
+
+// canned register save code for heap check failure in a function
+#ifdef IN_STG_CODE
+extern StgWord stg_stack_save_entries[];
+#else
+extern StgFun *stg_stack_save_entries[];
+#endif
+
+// canned bitmap for each arg type
+extern StgWord stg_arg_bitmaps[];
+
+#endif /* APPLY_H */
diff --git a/rts/Arena.c b/rts/Arena.c
new file mode 100644
index 0000000000..76ac23cf88
--- /dev/null
+++ b/rts/Arena.c
@@ -0,0 +1,120 @@
+/* -----------------------------------------------------------------------------
+ (c) The University of Glasgow 2001
+
+ Arena allocation. Arenas provide fast memory allocation at the
+ expense of fine-grained recycling of storage: memory may be
+ only be returned to the system by freeing the entire arena, it
+ isn't possible to return individual objects within an arena.
+
+ Do not assume that sequentially allocated objects will be adjacent
+ in memory.
+
+ Quirks: this allocator makes use of the RTS block allocator. If
+ the current block doesn't have enough room for the requested
+ object, then a new block is allocated. This means that allocating
+ large objects will tend to result in wasted space at the end of
+ each block. In the worst case, half of the allocated space is
+ wasted. This allocator is therefore best suited to situations in
+ which most allocations are small.
+ -------------------------------------------------------------------------- */
+
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "BlockAlloc.h"
+#include "Arena.h"
+
+#include <stdlib.h>
+
+// Each arena struct is allocated using malloc().
+struct _Arena {
+ bdescr *current;
+ StgWord *free; // ptr to next free byte in current block
+ StgWord *lim; // limit (== last free byte + 1)
+};
+
+// We like to keep track of how many blocks we've allocated for
+// Storage.c:memInventory().
+static long arena_blocks = 0;
+
+// Begin a new arena
+Arena *
+newArena( void )
+{
+ Arena *arena;
+
+ arena = stgMallocBytes(sizeof(Arena), "newArena");
+ arena->current = allocBlock();
+ arena->current->link = NULL;
+ arena->free = arena->current->start;
+ arena->lim = arena->current->start + BLOCK_SIZE_W;
+ arena_blocks++;
+
+ return arena;
+}
+
+// The minimum alignment of an allocated block.
+#define MIN_ALIGN 8
+
+/* 'n' is assumed to be a power of 2 */
+#define ROUNDUP(x,n) (((x)+((n)-1))&(~((n)-1)))
+#define B_TO_W(x) ((x) / sizeof(W_))
+
+// Allocate some memory in an arena
+void *
+arenaAlloc( Arena *arena, size_t size )
+{
+ void *p;
+ nat size_w;
+ nat req_blocks;
+ bdescr *bd;
+
+ // round up to nearest alignment chunk.
+ size = ROUNDUP(size,MIN_ALIGN);
+
+ // size of allocated block in words.
+ size_w = B_TO_W(size);
+
+ if ( arena->free + size_w < arena->lim ) {
+ // enough room in the current block...
+ p = arena->free;
+ arena->free += size_w;
+ return p;
+ } else {
+ // allocate a fresh block...
+ req_blocks = (lnat)BLOCK_ROUND_UP(size) / BLOCK_SIZE;
+ bd = allocGroup(req_blocks);
+ arena_blocks += req_blocks;
+
+ bd->gen_no = 0;
+ bd->step = NULL;
+ bd->flags = 0;
+ bd->free = bd->start;
+ bd->link = arena->current;
+ arena->current = bd;
+ arena->free = bd->free + size_w;
+ arena->lim = bd->free + bd->blocks * BLOCK_SIZE_W;
+ return bd->start;
+ }
+}
+
+// Free an entire arena
+void
+arenaFree( Arena *arena )
+{
+ bdescr *bd, *next;
+
+ for (bd = arena->current; bd != NULL; bd = next) {
+ next = bd->link;
+ arena_blocks -= bd->blocks;
+ ASSERT(arena_blocks >= 0);
+ freeGroup(bd);
+ }
+ stgFree(arena);
+}
+
+unsigned long
+arenaBlocks( void )
+{
+ return arena_blocks;
+}
+
diff --git a/rts/Arena.h b/rts/Arena.h
new file mode 100644
index 0000000000..7a2989e543
--- /dev/null
+++ b/rts/Arena.h
@@ -0,0 +1,25 @@
+/* -----------------------------------------------------------------------------
+ (c) The University of Glasgow 2001
+
+ Arena allocation interface.
+ -------------------------------------------------------------------------- */
+
+#ifndef ARENA_H
+#define ARENA_H
+
+// Abstract type of arenas
+typedef struct _Arena Arena;
+
+// Start a new arena
+extern Arena * newArena ( void );
+
+// Allocate memory in an arena
+extern void * arenaAlloc ( Arena *, size_t );
+
+// Free an entire arena
+extern void arenaFree ( Arena * );
+
+// For internal use only:
+extern unsigned long arenaBlocks( void );
+
+#endif /* ARENA_H */
diff --git a/rts/AutoApply.h b/rts/AutoApply.h
new file mode 100644
index 0000000000..bbec1224ff
--- /dev/null
+++ b/rts/AutoApply.h
@@ -0,0 +1,80 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The University of Glasgow 2002-2004
+ *
+ * Helper bits for the generic apply code (AutoApply.hc)
+ *
+ * -------------------------------------------------------------------------- */
+
+#ifndef AUTOAPPLY_H
+#define AUTOAPPLY_H
+
+// Build a new PAP: function is in R1
+// ret addr and m arguments taking up n words are on the stack.
+// NB. x is a dummy argument attached to the 'for' label so that
+// BUILD_PAP can be used multiple times in the same function.
+#define BUILD_PAP(m,n,f,x) \
+ W_ pap; \
+ W_ size; \
+ W_ i; \
+ size = SIZEOF_StgPAP + WDS(n); \
+ HP_CHK_NP_ASSIGN_SP0(size,f); \
+ TICK_ALLOC_HEAP_NOCTR(BYTES_TO_WDS(size)); \
+ TICK_ALLOC_PAP(n+1 /* +1 for the FUN */, 0); \
+ pap = Hp + WDS(1) - size; \
+ SET_HDR(pap, stg_PAP_info, W_[CCCS]); \
+ StgPAP_arity(pap) = HALF_W_(arity - m); \
+ StgPAP_fun(pap) = R1; \
+ StgPAP_n_args(pap) = HALF_W_(n); \
+ i = 0; \
+ for##x: \
+ if (i < n) { \
+ StgPAP_payload(pap,i) = Sp(1+i); \
+ i = i + 1; \
+ goto for##x; \
+ } \
+ R1 = pap; \
+ Sp_adj(1 + n); \
+ jump %ENTRY_CODE(Sp(0));
+
+// Copy the old PAP, build a new one with the extra arg(s)
+// ret addr and m arguments taking up n words are on the stack.
+// NB. x is a dummy argument attached to the 'for' label so that
+// BUILD_PAP can be used multiple times in the same function.
+#define NEW_PAP(m,n,f,x) \
+ W_ pap; \
+ W_ new_pap; \
+ W_ size; \
+ W_ i; \
+ pap = R1; \
+ size = SIZEOF_StgPAP + WDS(TO_W_(StgPAP_n_args(pap))) + WDS(n); \
+ HP_CHK_NP_ASSIGN_SP0(size,f); \
+ TICK_ALLOC_HEAP_NOCTR(BYTES_TO_WDS(size)); \
+ TICK_ALLOC_PAP(n+1 /* +1 for the FUN */, 0); \
+ new_pap = Hp + WDS(1) - size; \
+ SET_HDR(new_pap, stg_PAP_info, W_[CCCS]); \
+ StgPAP_arity(new_pap) = HALF_W_(arity - m); \
+ W_ n_args; \
+ n_args = TO_W_(StgPAP_n_args(pap)); \
+ StgPAP_n_args(new_pap) = HALF_W_(n_args + n); \
+ StgPAP_fun(new_pap) = StgPAP_fun(pap); \
+ i = 0; \
+ for1##x: \
+ if (i < n_args) { \
+ StgPAP_payload(new_pap,i) = StgPAP_payload(pap,i); \
+ i = i + 1; \
+ goto for1##x; \
+ } \
+ i = 0; \
+ for2##x: \
+ if (i < n) { \
+ StgPAP_payload(new_pap,n_args+i) = Sp(1+i); \
+ i = i + 1; \
+ goto for2##x; \
+ } \
+ R1 = new_pap; \
+ Sp_adj(n+1); \
+ jump %ENTRY_CODE(Sp(0));
+
+#endif /* APPLY_H */
+
diff --git a/rts/AwaitEvent.h b/rts/AwaitEvent.h
new file mode 100644
index 0000000000..e03cb4444e
--- /dev/null
+++ b/rts/AwaitEvent.h
@@ -0,0 +1,24 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1998-2005
+ *
+ * The awaitEvent() interface, for the non-threaded RTS
+ *
+ * -------------------------------------------------------------------------*/
+
+#ifndef AWAITEVENT_H
+#define AWAITEVENT_H
+
+#if !defined(THREADED_RTS)
+/* awaitEvent(rtsBool wait)
+ *
+ * Checks for blocked threads that need to be woken.
+ *
+ * Called from STG : NO
+ * Locks assumed : sched_mutex
+ */
+void awaitEvent(rtsBool wait); /* In posix/Select.c or
+ * win32/AwaitEvent.c */
+#endif
+
+#endif /* SELECT_H */
diff --git a/rts/BlockAlloc.c b/rts/BlockAlloc.c
new file mode 100644
index 0000000000..5e0e321947
--- /dev/null
+++ b/rts/BlockAlloc.c
@@ -0,0 +1,391 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1998-2006
+ *
+ * The block allocator and free list manager.
+ *
+ * This is the architecture independent part of the block allocator.
+ * It requires only the following support from the operating system:
+ *
+ * void *getMBlock();
+ *
+ * returns the address of an MBLOCK_SIZE region of memory, aligned on
+ * an MBLOCK_SIZE boundary. There is no requirement for successive
+ * calls to getMBlock to return strictly increasing addresses.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "BlockAlloc.h"
+#include "MBlock.h"
+#include "Storage.h"
+
+#include <string.h>
+
+static void initMBlock(void *mblock);
+static bdescr *allocMegaGroup(nat mblocks);
+static void freeMegaGroup(bdescr *bd);
+
+// In THREADED_RTS mode, the free list is protected by sm_mutex.
+static bdescr *free_list = NULL;
+
+/* -----------------------------------------------------------------------------
+ Initialisation
+ -------------------------------------------------------------------------- */
+
+void initBlockAllocator(void)
+{
+ // The free list starts off NULL
+}
+
+/* -----------------------------------------------------------------------------
+ Allocation
+ -------------------------------------------------------------------------- */
+
+STATIC_INLINE void
+initGroup(nat n, bdescr *head)
+{
+ bdescr *bd;
+ nat i;
+
+ if (n != 0) {
+ head->blocks = n;
+ head->free = head->start;
+ head->link = NULL;
+ for (i=1, bd = head+1; i < n; i++, bd++) {
+ bd->free = 0;
+ bd->blocks = 0;
+ bd->link = head;
+ }
+ }
+}
+
+bdescr *
+allocGroup(nat n)
+{
+ void *mblock;
+ bdescr *bd, **last;
+
+ ASSERT_SM_LOCK();
+ ASSERT(n != 0);
+
+ if (n > BLOCKS_PER_MBLOCK) {
+ return allocMegaGroup(BLOCKS_TO_MBLOCKS(n));
+ }
+
+ last = &free_list;
+ for (bd = free_list; bd != NULL; bd = bd->link) {
+ if (bd->blocks == n) { /* exactly the right size! */
+ *last = bd->link;
+ /* no initialisation necessary - this is already a
+ * self-contained block group. */
+ bd->free = bd->start; /* block isn't free now */
+ bd->link = NULL;
+ return bd;
+ }
+ if (bd->blocks > n) { /* block too big... */
+ bd->blocks -= n; /* take a chunk off the *end* */
+ bd += bd->blocks;
+ initGroup(n, bd); /* initialise it */
+ return bd;
+ }
+ last = &bd->link;
+ }
+
+ mblock = getMBlock(); /* get a new megablock */
+ initMBlock(mblock); /* initialise the start fields */
+ bd = FIRST_BDESCR(mblock);
+ initGroup(n,bd); /* we know the group will fit */
+ if (n < BLOCKS_PER_MBLOCK) {
+ initGroup(BLOCKS_PER_MBLOCK-n, bd+n);
+ freeGroup(bd+n); /* add the rest on to the free list */
+ }
+ return bd;
+}
+
+bdescr *
+allocGroup_lock(nat n)
+{
+ bdescr *bd;
+ ACQUIRE_SM_LOCK;
+ bd = allocGroup(n);
+ RELEASE_SM_LOCK;
+ return bd;
+}
+
+bdescr *
+allocBlock(void)
+{
+ return allocGroup(1);
+}
+
+bdescr *
+allocBlock_lock(void)
+{
+ bdescr *bd;
+ ACQUIRE_SM_LOCK;
+ bd = allocBlock();
+ RELEASE_SM_LOCK;
+ return bd;
+}
+
+/* -----------------------------------------------------------------------------
+ Any request larger than BLOCKS_PER_MBLOCK needs a megablock group.
+ First, search the free list for enough contiguous megablocks to
+ fulfill the request - if we don't have enough, we need to
+ allocate some new ones.
+
+ A megablock group looks just like a normal block group, except that
+ the blocks field in the head will be larger than BLOCKS_PER_MBLOCK.
+
+ Note that any objects placed in this group must start in the first
+ megablock, since the other blocks don't have block descriptors.
+ -------------------------------------------------------------------------- */
+
+static bdescr *
+allocMegaGroup(nat n)
+{
+ nat mbs_found;
+ bdescr *bd, *last, *grp_start, *grp_prev;
+
+ mbs_found = 0;
+ grp_start = NULL;
+ grp_prev = NULL;
+ last = NULL;
+ for (bd = free_list; bd != NULL; bd = bd->link) {
+
+ if (bd->blocks == BLOCKS_PER_MBLOCK) { /* whole megablock found */
+
+ /* is it the first one we've found or a non-contiguous megablock? */
+ if (grp_start == NULL ||
+ bd->start != last->start + MBLOCK_SIZE/sizeof(W_)) {
+ grp_start = bd;
+ grp_prev = last;
+ mbs_found = 1;
+ } else {
+ mbs_found++;
+ }
+
+ if (mbs_found == n) { /* found enough contig megablocks? */
+ break;
+ }
+ }
+
+ else { /* only a partial megablock, start again */
+ grp_start = NULL;
+ }
+
+ last = bd;
+ }
+
+ /* found all the megablocks we need on the free list
+ */
+ if (mbs_found == n) {
+ /* remove the megablocks from the free list */
+ if (grp_prev == NULL) { /* bd now points to the last mblock */
+ free_list = bd->link;
+ } else {
+ grp_prev->link = bd->link;
+ }
+ }
+
+ /* the free list wasn't sufficient, allocate all new mblocks.
+ */
+ else {
+ void *mblock = getMBlocks(n);
+ initMBlock(mblock); /* only need to init the 1st one */
+ grp_start = FIRST_BDESCR(mblock);
+ }
+
+ /* set up the megablock group */
+ initGroup(BLOCKS_PER_MBLOCK, grp_start);
+ grp_start->blocks = MBLOCK_GROUP_BLOCKS(n);
+ return grp_start;
+}
+
+/* -----------------------------------------------------------------------------
+ De-Allocation
+ -------------------------------------------------------------------------- */
+
+/* coalesce the group p with p->link if possible.
+ *
+ * Returns p->link if no coalescing was done, otherwise returns a
+ * pointer to the newly enlarged group p.
+ */
+
+STATIC_INLINE bdescr *
+coalesce(bdescr *p)
+{
+ bdescr *bd, *q;
+ nat i, blocks;
+
+ q = p->link;
+ if (q != NULL && p->start + p->blocks * BLOCK_SIZE_W == q->start) {
+ /* can coalesce */
+ p->blocks += q->blocks;
+ p->link = q->link;
+ blocks = q->blocks;
+ for (i = 0, bd = q; i < blocks; bd++, i++) {
+ bd->free = 0;
+ bd->blocks = 0;
+ bd->link = p;
+ }
+ return p;
+ }
+ return q;
+}
+
+void
+freeGroup(bdescr *p)
+{
+ bdescr *bd, *last;
+
+ ASSERT_SM_LOCK();
+
+ /* are we dealing with a megablock group? */
+ if (p->blocks > BLOCKS_PER_MBLOCK) {
+ freeMegaGroup(p);
+ return;
+ }
+
+
+ p->free = (void *)-1; /* indicates that this block is free */
+ p->step = NULL;
+ p->gen_no = 0;
+ /* fill the block group with garbage if sanity checking is on */
+ IF_DEBUG(sanity,memset(p->start, 0xaa, p->blocks * BLOCK_SIZE));
+
+ /* find correct place in free list to place new group */
+ last = NULL;
+ for (bd = free_list; bd != NULL && bd->start < p->start;
+ bd = bd->link) {
+ last = bd;
+ }
+
+ /* now, last = previous group (or NULL) */
+ if (last == NULL) {
+ p->link = free_list;
+ free_list = p;
+ } else {
+ /* coalesce with previous group if possible */
+ p->link = last->link;
+ last->link = p;
+ p = coalesce(last);
+ }
+
+ /* coalesce with next group if possible */
+ coalesce(p);
+ IF_DEBUG(sanity, checkFreeListSanity());
+}
+
+void
+freeGroup_lock(bdescr *p)
+{
+ ACQUIRE_SM_LOCK;
+ freeGroup(p);
+ RELEASE_SM_LOCK;
+}
+
+static void
+freeMegaGroup(bdescr *p)
+{
+ nat n;
+ void *q = p;
+
+ n = ((bdescr *)q)->blocks * BLOCK_SIZE / MBLOCK_SIZE + 1;
+ for (; n > 0; q += MBLOCK_SIZE, n--) {
+ initMBlock(MBLOCK_ROUND_DOWN(q));
+ initGroup(BLOCKS_PER_MBLOCK, (bdescr *)q);
+ freeGroup((bdescr *)q);
+ }
+}
+
+void
+freeChain(bdescr *bd)
+{
+ bdescr *next_bd;
+ while (bd != NULL) {
+ next_bd = bd->link;
+ freeGroup(bd);
+ bd = next_bd;
+ }
+}
+
+void
+freeChain_lock(bdescr *bd)
+{
+ ACQUIRE_SM_LOCK;
+ freeChain(bd);
+ RELEASE_SM_LOCK;
+}
+
+static void
+initMBlock(void *mblock)
+{
+ bdescr *bd;
+ void *block;
+
+ /* the first few Bdescr's in a block are unused, so we don't want to
+ * put them all on the free list.
+ */
+ block = FIRST_BLOCK(mblock);
+ bd = FIRST_BDESCR(mblock);
+
+ /* Initialise the start field of each block descriptor
+ */
+ for (; block <= LAST_BLOCK(mblock); bd += 1, block += BLOCK_SIZE) {
+ bd->start = block;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Debugging
+ -------------------------------------------------------------------------- */
+
+#ifdef DEBUG
+static void
+checkWellFormedGroup( bdescr *bd )
+{
+ nat i;
+
+ for (i = 1; i < bd->blocks; i++) {
+ ASSERT(bd[i].blocks == 0);
+ ASSERT(bd[i].free == 0);
+ ASSERT(bd[i].link == bd);
+ }
+}
+
+void
+checkFreeListSanity(void)
+{
+ bdescr *bd;
+
+ for (bd = free_list; bd != NULL; bd = bd->link) {
+ IF_DEBUG(block_alloc,
+ debugBelch("group at 0x%p, length %d blocks\n",
+ bd->start, bd->blocks));
+ ASSERT(bd->blocks > 0);
+ checkWellFormedGroup(bd);
+ if (bd->link != NULL) {
+ /* make sure we're fully coalesced */
+ ASSERT(bd->start + bd->blocks * BLOCK_SIZE_W != bd->link->start);
+ ASSERT(bd->start < bd->link->start);
+ }
+ }
+}
+
+nat /* BLOCKS */
+countFreeList(void)
+{
+ bdescr *bd;
+ lnat total_blocks = 0;
+
+ for (bd = free_list; bd != NULL; bd = bd->link) {
+ total_blocks += bd->blocks;
+ }
+ return total_blocks;
+}
+#endif
diff --git a/rts/BlockAlloc.h b/rts/BlockAlloc.h
new file mode 100644
index 0000000000..1472ac6f76
--- /dev/null
+++ b/rts/BlockAlloc.h
@@ -0,0 +1,19 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-1999
+ *
+ * Block Allocator Interface
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef BLOCK_ALLOC_H
+#define BLOCK_ALLOC_H
+
+/* Debugging -------------------------------------------------------------- */
+
+#ifdef DEBUG
+extern void checkFreeListSanity(void);
+nat countFreeList(void);
+#endif
+
+#endif /* BLOCK_ALLOC_H */
diff --git a/rts/Capability.c b/rts/Capability.c
new file mode 100644
index 0000000000..51a42ef468
--- /dev/null
+++ b/rts/Capability.c
@@ -0,0 +1,668 @@
+/* ---------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2003-2006
+ *
+ * Capabilities
+ *
+ * A Capability represent the token required to execute STG code,
+ * and all the state an OS thread/task needs to run Haskell code:
+ * its STG registers, a pointer to its TSO, a nursery etc. During
+ * STG execution, a pointer to the capabilitity is kept in a
+ * register (BaseReg; actually it is a pointer to cap->r).
+ *
+ * Only in an THREADED_RTS build will there be multiple capabilities,
+ * for non-threaded builds there is only one global capability, namely
+ * MainCapability.
+ *
+ * --------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "RtsFlags.h"
+#include "STM.h"
+#include "OSThreads.h"
+#include "Capability.h"
+#include "Schedule.h"
+#include "Sparks.h"
+
+// one global capability, this is the Capability for non-threaded
+// builds, and for +RTS -N1
+Capability MainCapability;
+
+nat n_capabilities;
+Capability *capabilities = NULL;
+
+// Holds the Capability which last became free. This is used so that
+// an in-call has a chance of quickly finding a free Capability.
+// Maintaining a global free list of Capabilities would require global
+// locking, so we don't do that.
+Capability *last_free_capability;
+
+#if defined(THREADED_RTS)
+STATIC_INLINE rtsBool
+globalWorkToDo (void)
+{
+ return blackholes_need_checking
+ || sched_state >= SCHED_INTERRUPTING
+ ;
+}
+#endif
+
+#if defined(THREADED_RTS)
+STATIC_INLINE rtsBool
+anyWorkForMe( Capability *cap, Task *task )
+{
+ if (task->tso != NULL) {
+ // A bound task only runs if its thread is on the run queue of
+ // the capability on which it was woken up. Otherwise, we
+ // can't be sure that we have the right capability: the thread
+ // might be woken up on some other capability, and task->cap
+ // could change under our feet.
+ return !emptyRunQueue(cap) && cap->run_queue_hd->bound == task;
+ } else {
+ // A vanilla worker task runs if either there is a lightweight
+ // thread at the head of the run queue, or the run queue is
+ // empty and (there are sparks to execute, or there is some
+ // other global condition to check, such as threads blocked on
+ // blackholes).
+ if (emptyRunQueue(cap)) {
+ return !emptySparkPoolCap(cap)
+ || !emptyWakeupQueue(cap)
+ || globalWorkToDo();
+ } else
+ return cap->run_queue_hd->bound == NULL;
+ }
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ * Manage the returning_tasks lists.
+ *
+ * These functions require cap->lock
+ * -------------------------------------------------------------------------- */
+
+#if defined(THREADED_RTS)
+STATIC_INLINE void
+newReturningTask (Capability *cap, Task *task)
+{
+ ASSERT_LOCK_HELD(&cap->lock);
+ ASSERT(task->return_link == NULL);
+ if (cap->returning_tasks_hd) {
+ ASSERT(cap->returning_tasks_tl->return_link == NULL);
+ cap->returning_tasks_tl->return_link = task;
+ } else {
+ cap->returning_tasks_hd = task;
+ }
+ cap->returning_tasks_tl = task;
+}
+
+STATIC_INLINE Task *
+popReturningTask (Capability *cap)
+{
+ ASSERT_LOCK_HELD(&cap->lock);
+ Task *task;
+ task = cap->returning_tasks_hd;
+ ASSERT(task);
+ cap->returning_tasks_hd = task->return_link;
+ if (!cap->returning_tasks_hd) {
+ cap->returning_tasks_tl = NULL;
+ }
+ task->return_link = NULL;
+ return task;
+}
+#endif
+
+/* ----------------------------------------------------------------------------
+ * Initialisation
+ *
+ * The Capability is initially marked not free.
+ * ------------------------------------------------------------------------- */
+
+static void
+initCapability( Capability *cap, nat i )
+{
+ nat g;
+
+ cap->no = i;
+ cap->in_haskell = rtsFalse;
+
+ cap->run_queue_hd = END_TSO_QUEUE;
+ cap->run_queue_tl = END_TSO_QUEUE;
+
+#if defined(THREADED_RTS)
+ initMutex(&cap->lock);
+ cap->running_task = NULL; // indicates cap is free
+ cap->spare_workers = NULL;
+ cap->suspended_ccalling_tasks = NULL;
+ cap->returning_tasks_hd = NULL;
+ cap->returning_tasks_tl = NULL;
+ cap->wakeup_queue_hd = END_TSO_QUEUE;
+ cap->wakeup_queue_tl = END_TSO_QUEUE;
+#endif
+
+ cap->f.stgGCEnter1 = (F_)__stg_gc_enter_1;
+ cap->f.stgGCFun = (F_)__stg_gc_fun;
+
+ cap->mut_lists = stgMallocBytes(sizeof(bdescr *) *
+ RtsFlags.GcFlags.generations,
+ "initCapability");
+
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ cap->mut_lists[g] = NULL;
+ }
+
+ cap->free_tvar_wait_queues = END_STM_WAIT_QUEUE;
+ cap->free_trec_chunks = END_STM_CHUNK_LIST;
+ cap->free_trec_headers = NO_TREC;
+ cap->transaction_tokens = 0;
+}
+
+/* ---------------------------------------------------------------------------
+ * Function: initCapabilities()
+ *
+ * Purpose: set up the Capability handling. For the THREADED_RTS build,
+ * we keep a table of them, the size of which is
+ * controlled by the user via the RTS flag -N.
+ *
+ * ------------------------------------------------------------------------- */
+void
+initCapabilities( void )
+{
+#if defined(THREADED_RTS)
+ nat i;
+
+#ifndef REG_Base
+ // We can't support multiple CPUs if BaseReg is not a register
+ if (RtsFlags.ParFlags.nNodes > 1) {
+ errorBelch("warning: multiple CPUs not supported in this build, reverting to 1");
+ RtsFlags.ParFlags.nNodes = 1;
+ }
+#endif
+
+ n_capabilities = RtsFlags.ParFlags.nNodes;
+
+ if (n_capabilities == 1) {
+ capabilities = &MainCapability;
+ // THREADED_RTS must work on builds that don't have a mutable
+ // BaseReg (eg. unregisterised), so in this case
+ // capabilities[0] must coincide with &MainCapability.
+ } else {
+ capabilities = stgMallocBytes(n_capabilities * sizeof(Capability),
+ "initCapabilities");
+ }
+
+ for (i = 0; i < n_capabilities; i++) {
+ initCapability(&capabilities[i], i);
+ }
+
+ IF_DEBUG(scheduler, sched_belch("allocated %d capabilities",
+ n_capabilities));
+
+#else /* !THREADED_RTS */
+
+ n_capabilities = 1;
+ capabilities = &MainCapability;
+ initCapability(&MainCapability, 0);
+
+#endif
+
+ // There are no free capabilities to begin with. We will start
+ // a worker Task to each Capability, which will quickly put the
+ // Capability on the free list when it finds nothing to do.
+ last_free_capability = &capabilities[0];
+}
+
+/* ----------------------------------------------------------------------------
+ * Give a Capability to a Task. The task must currently be sleeping
+ * on its condition variable.
+ *
+ * Requires cap->lock (modifies cap->running_task).
+ *
+ * When migrating a Task, the migrater must take task->lock before
+ * modifying task->cap, to synchronise with the waking up Task.
+ * Additionally, the migrater should own the Capability (when
+ * migrating the run queue), or cap->lock (when migrating
+ * returning_workers).
+ *
+ * ------------------------------------------------------------------------- */
+
+#if defined(THREADED_RTS)
+STATIC_INLINE void
+giveCapabilityToTask (Capability *cap USED_IF_DEBUG, Task *task)
+{
+ ASSERT_LOCK_HELD(&cap->lock);
+ ASSERT(task->cap == cap);
+ IF_DEBUG(scheduler,
+ sched_belch("passing capability %d to %s %p",
+ cap->no, task->tso ? "bound task" : "worker",
+ (void *)task->id));
+ ACQUIRE_LOCK(&task->lock);
+ task->wakeup = rtsTrue;
+ // the wakeup flag is needed because signalCondition() doesn't
+ // flag the condition if the thread is already runniing, but we want
+ // it to be sticky.
+ signalCondition(&task->cond);
+ RELEASE_LOCK(&task->lock);
+}
+#endif
+
+/* ----------------------------------------------------------------------------
+ * Function: releaseCapability(Capability*)
+ *
+ * Purpose: Letting go of a capability. Causes a
+ * 'returning worker' thread or a 'waiting worker'
+ * to wake up, in that order.
+ * ------------------------------------------------------------------------- */
+
+#if defined(THREADED_RTS)
+void
+releaseCapability_ (Capability* cap)
+{
+ Task *task;
+
+ task = cap->running_task;
+
+ ASSERT_PARTIAL_CAPABILITY_INVARIANTS(cap,task);
+
+ cap->running_task = NULL;
+
+ // Check to see whether a worker thread can be given
+ // the go-ahead to return the result of an external call..
+ if (cap->returning_tasks_hd != NULL) {
+ giveCapabilityToTask(cap,cap->returning_tasks_hd);
+ // The Task pops itself from the queue (see waitForReturnCapability())
+ return;
+ }
+
+ // If the next thread on the run queue is a bound thread,
+ // give this Capability to the appropriate Task.
+ if (!emptyRunQueue(cap) && cap->run_queue_hd->bound) {
+ // Make sure we're not about to try to wake ourselves up
+ ASSERT(task != cap->run_queue_hd->bound);
+ task = cap->run_queue_hd->bound;
+ giveCapabilityToTask(cap,task);
+ return;
+ }
+
+ if (!cap->spare_workers) {
+ // Create a worker thread if we don't have one. If the system
+ // is interrupted, we only create a worker task if there
+ // are threads that need to be completed. If the system is
+ // shutting down, we never create a new worker.
+ if (sched_state < SCHED_SHUTTING_DOWN || !emptyRunQueue(cap)) {
+ IF_DEBUG(scheduler,
+ sched_belch("starting new worker on capability %d", cap->no));
+ startWorkerTask(cap, workerStart);
+ return;
+ }
+ }
+
+ // If we have an unbound thread on the run queue, or if there's
+ // anything else to do, give the Capability to a worker thread.
+ if (!emptyRunQueue(cap) || !emptyWakeupQueue(cap)
+ || !emptySparkPoolCap(cap) || globalWorkToDo()) {
+ if (cap->spare_workers) {
+ giveCapabilityToTask(cap,cap->spare_workers);
+ // The worker Task pops itself from the queue;
+ return;
+ }
+ }
+
+ last_free_capability = cap;
+ IF_DEBUG(scheduler, sched_belch("freeing capability %d", cap->no));
+}
+
+void
+releaseCapability (Capability* cap USED_IF_THREADS)
+{
+ ACQUIRE_LOCK(&cap->lock);
+ releaseCapability_(cap);
+ RELEASE_LOCK(&cap->lock);
+}
+
+static void
+releaseCapabilityAndQueueWorker (Capability* cap USED_IF_THREADS)
+{
+ Task *task;
+
+ ACQUIRE_LOCK(&cap->lock);
+
+ task = cap->running_task;
+
+ // If the current task is a worker, save it on the spare_workers
+ // list of this Capability. A worker can mark itself as stopped,
+ // in which case it is not replaced on the spare_worker queue.
+ // This happens when the system is shutting down (see
+ // Schedule.c:workerStart()).
+ // Also, be careful to check that this task hasn't just exited
+ // Haskell to do a foreign call (task->suspended_tso).
+ if (!isBoundTask(task) && !task->stopped && !task->suspended_tso) {
+ task->next = cap->spare_workers;
+ cap->spare_workers = task;
+ }
+ // Bound tasks just float around attached to their TSOs.
+
+ releaseCapability_(cap);
+
+ RELEASE_LOCK(&cap->lock);
+}
+#endif
+
+/* ----------------------------------------------------------------------------
+ * waitForReturnCapability( Task *task )
+ *
+ * Purpose: when an OS thread returns from an external call,
+ * it calls waitForReturnCapability() (via Schedule.resumeThread())
+ * to wait for permission to enter the RTS & communicate the
+ * result of the external call back to the Haskell thread that
+ * made it.
+ *
+ * ------------------------------------------------------------------------- */
+void
+waitForReturnCapability (Capability **pCap, Task *task)
+{
+#if !defined(THREADED_RTS)
+
+ MainCapability.running_task = task;
+ task->cap = &MainCapability;
+ *pCap = &MainCapability;
+
+#else
+ Capability *cap = *pCap;
+
+ if (cap == NULL) {
+ // Try last_free_capability first
+ cap = last_free_capability;
+ if (!cap->running_task) {
+ nat i;
+ // otherwise, search for a free capability
+ for (i = 0; i < n_capabilities; i++) {
+ cap = &capabilities[i];
+ if (!cap->running_task) {
+ break;
+ }
+ }
+ // Can't find a free one, use last_free_capability.
+ cap = last_free_capability;
+ }
+
+ // record the Capability as the one this Task is now assocated with.
+ task->cap = cap;
+
+ } else {
+ ASSERT(task->cap == cap);
+ }
+
+ ACQUIRE_LOCK(&cap->lock);
+
+ IF_DEBUG(scheduler,
+ sched_belch("returning; I want capability %d", cap->no));
+
+ if (!cap->running_task) {
+ // It's free; just grab it
+ cap->running_task = task;
+ RELEASE_LOCK(&cap->lock);
+ } else {
+ newReturningTask(cap,task);
+ RELEASE_LOCK(&cap->lock);
+
+ for (;;) {
+ ACQUIRE_LOCK(&task->lock);
+ // task->lock held, cap->lock not held
+ if (!task->wakeup) waitCondition(&task->cond, &task->lock);
+ cap = task->cap;
+ task->wakeup = rtsFalse;
+ RELEASE_LOCK(&task->lock);
+
+ // now check whether we should wake up...
+ ACQUIRE_LOCK(&cap->lock);
+ if (cap->running_task == NULL) {
+ if (cap->returning_tasks_hd != task) {
+ giveCapabilityToTask(cap,cap->returning_tasks_hd);
+ RELEASE_LOCK(&cap->lock);
+ continue;
+ }
+ cap->running_task = task;
+ popReturningTask(cap);
+ RELEASE_LOCK(&cap->lock);
+ break;
+ }
+ RELEASE_LOCK(&cap->lock);
+ }
+
+ }
+
+ ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
+
+ IF_DEBUG(scheduler,
+ sched_belch("returning; got capability %d", cap->no));
+
+ *pCap = cap;
+#endif
+}
+
+#if defined(THREADED_RTS)
+/* ----------------------------------------------------------------------------
+ * yieldCapability
+ * ------------------------------------------------------------------------- */
+
+void
+yieldCapability (Capability** pCap, Task *task)
+{
+ Capability *cap = *pCap;
+
+ // The fast path has no locking, if we don't enter this while loop
+
+ while ( cap->returning_tasks_hd != NULL || !anyWorkForMe(cap,task) ) {
+ IF_DEBUG(scheduler, sched_belch("giving up capability %d", cap->no));
+
+ // We must now release the capability and wait to be woken up
+ // again.
+ task->wakeup = rtsFalse;
+ releaseCapabilityAndQueueWorker(cap);
+
+ for (;;) {
+ ACQUIRE_LOCK(&task->lock);
+ // task->lock held, cap->lock not held
+ if (!task->wakeup) waitCondition(&task->cond, &task->lock);
+ cap = task->cap;
+ task->wakeup = rtsFalse;
+ RELEASE_LOCK(&task->lock);
+
+ IF_DEBUG(scheduler, sched_belch("woken up on capability %d", cap->no));
+ ACQUIRE_LOCK(&cap->lock);
+ if (cap->running_task != NULL) {
+ IF_DEBUG(scheduler, sched_belch("capability %d is owned by another task", cap->no));
+ RELEASE_LOCK(&cap->lock);
+ continue;
+ }
+
+ if (task->tso == NULL) {
+ ASSERT(cap->spare_workers != NULL);
+ // if we're not at the front of the queue, release it
+ // again. This is unlikely to happen.
+ if (cap->spare_workers != task) {
+ giveCapabilityToTask(cap,cap->spare_workers);
+ RELEASE_LOCK(&cap->lock);
+ continue;
+ }
+ cap->spare_workers = task->next;
+ task->next = NULL;
+ }
+ cap->running_task = task;
+ RELEASE_LOCK(&cap->lock);
+ break;
+ }
+
+ IF_DEBUG(scheduler, sched_belch("got capability %d", cap->no));
+ ASSERT(cap->running_task == task);
+ }
+
+ *pCap = cap;
+
+ ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
+
+ return;
+}
+
+/* ----------------------------------------------------------------------------
+ * Wake up a thread on a Capability.
+ *
+ * This is used when the current Task is running on a Capability and
+ * wishes to wake up a thread on a different Capability.
+ * ------------------------------------------------------------------------- */
+
+void
+wakeupThreadOnCapability (Capability *cap, StgTSO *tso)
+{
+ ASSERT(tso->cap == cap);
+ ASSERT(tso->bound ? tso->bound->cap == cap : 1);
+
+ ACQUIRE_LOCK(&cap->lock);
+ if (cap->running_task == NULL) {
+ // nobody is running this Capability, we can add our thread
+ // directly onto the run queue and start up a Task to run it.
+ appendToRunQueue(cap,tso);
+
+ // start it up
+ cap->running_task = myTask(); // precond for releaseCapability_()
+ releaseCapability_(cap);
+ } else {
+ appendToWakeupQueue(cap,tso);
+ // someone is running on this Capability, so it cannot be
+ // freed without first checking the wakeup queue (see
+ // releaseCapability_).
+ }
+ RELEASE_LOCK(&cap->lock);
+}
+
+/* ----------------------------------------------------------------------------
+ * prodCapabilities
+ *
+ * Used to indicate that the interrupted flag is now set, or some
+ * other global condition that might require waking up a Task on each
+ * Capability.
+ * ------------------------------------------------------------------------- */
+
+static void
+prodCapabilities(rtsBool all)
+{
+ nat i;
+ Capability *cap;
+ Task *task;
+
+ for (i=0; i < n_capabilities; i++) {
+ cap = &capabilities[i];
+ ACQUIRE_LOCK(&cap->lock);
+ if (!cap->running_task) {
+ if (cap->spare_workers) {
+ task = cap->spare_workers;
+ ASSERT(!task->stopped);
+ giveCapabilityToTask(cap,task);
+ if (!all) {
+ RELEASE_LOCK(&cap->lock);
+ return;
+ }
+ }
+ }
+ RELEASE_LOCK(&cap->lock);
+ }
+ return;
+}
+
+void
+prodAllCapabilities (void)
+{
+ prodCapabilities(rtsTrue);
+}
+
+/* ----------------------------------------------------------------------------
+ * prodOneCapability
+ *
+ * Like prodAllCapabilities, but we only require a single Task to wake
+ * up in order to service some global event, such as checking for
+ * deadlock after some idle time has passed.
+ * ------------------------------------------------------------------------- */
+
+void
+prodOneCapability (void)
+{
+ prodCapabilities(rtsFalse);
+}
+
+/* ----------------------------------------------------------------------------
+ * shutdownCapability
+ *
+ * At shutdown time, we want to let everything exit as cleanly as
+ * possible. For each capability, we let its run queue drain, and
+ * allow the workers to stop.
+ *
+ * This function should be called when interrupted and
+ * shutting_down_scheduler = rtsTrue, thus any worker that wakes up
+ * will exit the scheduler and call taskStop(), and any bound thread
+ * that wakes up will return to its caller. Runnable threads are
+ * killed.
+ *
+ * ------------------------------------------------------------------------- */
+
+void
+shutdownCapability (Capability *cap, Task *task)
+{
+ nat i;
+
+ ASSERT(sched_state == SCHED_SHUTTING_DOWN);
+
+ task->cap = cap;
+
+ for (i = 0; i < 50; i++) {
+ IF_DEBUG(scheduler, sched_belch("shutting down capability %d, attempt %d", cap->no, i));
+ ACQUIRE_LOCK(&cap->lock);
+ if (cap->running_task) {
+ RELEASE_LOCK(&cap->lock);
+ IF_DEBUG(scheduler, sched_belch("not owner, yielding"));
+ yieldThread();
+ continue;
+ }
+ cap->running_task = task;
+ if (!emptyRunQueue(cap) || cap->spare_workers) {
+ IF_DEBUG(scheduler, sched_belch("runnable threads or workers still alive, yielding"));
+ releaseCapability_(cap); // this will wake up a worker
+ RELEASE_LOCK(&cap->lock);
+ yieldThread();
+ continue;
+ }
+ IF_DEBUG(scheduler, sched_belch("capability %d is stopped.", cap->no));
+ RELEASE_LOCK(&cap->lock);
+ break;
+ }
+ // we now have the Capability, its run queue and spare workers
+ // list are both empty.
+}
+
+/* ----------------------------------------------------------------------------
+ * tryGrabCapability
+ *
+ * Attempt to gain control of a Capability if it is free.
+ *
+ * ------------------------------------------------------------------------- */
+
+rtsBool
+tryGrabCapability (Capability *cap, Task *task)
+{
+ if (cap->running_task != NULL) return rtsFalse;
+ ACQUIRE_LOCK(&cap->lock);
+ if (cap->running_task != NULL) {
+ RELEASE_LOCK(&cap->lock);
+ return rtsFalse;
+ }
+ task->cap = cap;
+ cap->running_task = task;
+ RELEASE_LOCK(&cap->lock);
+ return rtsTrue;
+}
+
+
+#endif /* THREADED_RTS */
+
+
diff --git a/rts/Capability.h b/rts/Capability.h
new file mode 100644
index 0000000000..a2551d0cc5
--- /dev/null
+++ b/rts/Capability.h
@@ -0,0 +1,250 @@
+/* ---------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2001-2006
+ *
+ * Capabilities
+ *
+ * The notion of a capability is used when operating in multi-threaded
+ * environments (which the THREADED_RTS build of the RTS does), to
+ * hold all the state an OS thread/task needs to run Haskell code:
+ * its STG registers, a pointer to its TSO, a nursery etc. During
+ * STG execution, a pointer to the capabilitity is kept in a
+ * register (BaseReg).
+ *
+ * Only in an THREADED_RTS build will there be multiple capabilities,
+ * in the non-threaded builds there is one global capability, namely
+ * MainCapability.
+ *
+ * This header file contains the functions for working with capabilities.
+ * (the main, and only, consumer of this interface is the scheduler).
+ *
+ * --------------------------------------------------------------------------*/
+
+#ifndef CAPABILITY_H
+#define CAPABILITY_H
+
+#include "RtsFlags.h"
+#include "Task.h"
+
+struct Capability_ {
+ // State required by the STG virtual machine when running Haskell
+ // code. During STG execution, the BaseReg register always points
+ // to the StgRegTable of the current Capability (&cap->r).
+ StgFunTable f;
+ StgRegTable r;
+
+ nat no; // capability number.
+
+ // The Task currently holding this Capability. This task has
+ // exclusive access to the contents of this Capability (apart from
+ // returning_tasks_hd/returning_tasks_tl).
+ // Locks required: cap->lock.
+ Task *running_task;
+
+ // true if this Capability is running Haskell code, used for
+ // catching unsafe call-ins.
+ rtsBool in_haskell;
+
+ // The run queue. The Task owning this Capability has exclusive
+ // access to its run queue, so can wake up threads without
+ // taking a lock, and the common path through the scheduler is
+ // also lock-free.
+ StgTSO *run_queue_hd;
+ StgTSO *run_queue_tl;
+
+ // Tasks currently making safe foreign calls. Doubly-linked.
+ // When returning, a task first acquires the Capability before
+ // removing itself from this list, so that the GC can find all
+ // the suspended TSOs easily. Hence, when migrating a Task from
+ // the returning_tasks list, we must also migrate its entry from
+ // this list.
+ Task *suspended_ccalling_tasks;
+
+ // One mutable list per generation, so we don't need to take any
+ // locks when updating an old-generation thunk. These
+ // mini-mut-lists are moved onto the respective gen->mut_list at
+ // each GC.
+ bdescr **mut_lists;
+
+#if defined(THREADED_RTS)
+ // Worker Tasks waiting in the wings. Singly-linked.
+ Task *spare_workers;
+
+ // This lock protects running_task, returning_tasks_{hd,tl}, wakeup_queue.
+ Mutex lock;
+
+ // Tasks waiting to return from a foreign call, or waiting to make
+ // a new call-in using this Capability (NULL if empty).
+ // NB. this field needs to be modified by tasks other than the
+ // running_task, so it requires cap->lock to modify. A task can
+ // check whether it is NULL without taking the lock, however.
+ Task *returning_tasks_hd; // Singly-linked, with head/tail
+ Task *returning_tasks_tl;
+
+ // A list of threads to append to this Capability's run queue at
+ // the earliest opportunity. These are threads that have been
+ // woken up by another Capability.
+ StgTSO *wakeup_queue_hd;
+ StgTSO *wakeup_queue_tl;
+#endif
+
+ // Per-capability STM-related data
+ StgTVarWaitQueue *free_tvar_wait_queues;
+ StgTRecChunk *free_trec_chunks;
+ StgTRecHeader *free_trec_headers;
+ nat transaction_tokens;
+}; // typedef Capability, defined in RtsAPI.h
+
+
+#if defined(THREADED_RTS)
+#define ASSERT_TASK_ID(task) ASSERT(task->id == osThreadId())
+#else
+#define ASSERT_TASK_ID(task) /*empty*/
+#endif
+
+// These properties should be true when a Task is holding a Capability
+#define ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task) \
+ ASSERT(cap->running_task != NULL && cap->running_task == task); \
+ ASSERT(task->cap == cap); \
+ ASSERT_PARTIAL_CAPABILITY_INVARIANTS(cap,task)
+
+// Sometimes a Task holds a Capability, but the Task is not associated
+// with that Capability (ie. task->cap != cap). This happens when
+// (a) a Task holds multiple Capabilities, and (b) when the current
+// Task is bound, its thread has just blocked, and it may have been
+// moved to another Capability.
+#define ASSERT_PARTIAL_CAPABILITY_INVARIANTS(cap,task) \
+ ASSERT(cap->run_queue_hd == END_TSO_QUEUE ? \
+ cap->run_queue_tl == END_TSO_QUEUE : 1); \
+ ASSERT(myTask() == task); \
+ ASSERT_TASK_ID(task);
+
+// Converts a *StgRegTable into a *Capability.
+//
+INLINE_HEADER Capability *
+regTableToCapability (StgRegTable *reg)
+{
+ return (Capability *)((void *)((unsigned char*)reg - sizeof(StgFunTable)));
+}
+
+// Initialise the available capabilities.
+//
+void initCapabilities (void);
+
+// Release a capability. This is called by a Task that is exiting
+// Haskell to make a foreign call, or in various other cases when we
+// want to relinquish a Capability that we currently hold.
+//
+// ASSUMES: cap->running_task is the current Task.
+//
+#if defined(THREADED_RTS)
+void releaseCapability (Capability* cap);
+void releaseCapability_ (Capability* cap); // assumes cap->lock is held
+#else
+// releaseCapability() is empty in non-threaded RTS
+INLINE_HEADER void releaseCapability (Capability* cap STG_UNUSED) {};
+INLINE_HEADER void releaseCapability_ (Capability* cap STG_UNUSED) {};
+#endif
+
+#if !IN_STG_CODE
+// one global capability
+extern Capability MainCapability;
+#endif
+
+// Array of all the capabilities
+//
+extern nat n_capabilities;
+extern Capability *capabilities;
+
+// The Capability that was last free. Used as a good guess for where
+// to assign new threads.
+//
+extern Capability *last_free_capability;
+
+// Acquires a capability at a return point. If *cap is non-NULL, then
+// this is taken as a preference for the Capability we wish to
+// acquire.
+//
+// OS threads waiting in this function get priority over those waiting
+// in waitForCapability().
+//
+// On return, *cap is non-NULL, and points to the Capability acquired.
+//
+void waitForReturnCapability (Capability **cap/*in/out*/, Task *task);
+
+INLINE_HEADER void recordMutableCap (StgClosure *p, Capability *cap, nat gen);
+
+#if defined(THREADED_RTS)
+
+// Gives up the current capability IFF there is a higher-priority
+// thread waiting for it. This happens in one of two ways:
+//
+// (a) we are passing the capability to another OS thread, so
+// that it can run a bound Haskell thread, or
+//
+// (b) there is an OS thread waiting to return from a foreign call
+//
+// On return: *pCap is NULL if the capability was released. The
+// current task should then re-acquire it using waitForCapability().
+//
+void yieldCapability (Capability** pCap, Task *task);
+
+// Acquires a capability for doing some work.
+//
+// On return: pCap points to the capability.
+//
+void waitForCapability (Task *task, Mutex *mutex, Capability **pCap);
+
+// Wakes up a thread on a Capability (probably a different Capability
+// from the one held by the current Task).
+//
+void wakeupThreadOnCapability (Capability *cap, StgTSO *tso);
+
+// Wakes up a worker thread on just one Capability, used when we
+// need to service some global event.
+//
+void prodOneCapability (void);
+
+// Similar to prodOneCapability(), but prods all of them.
+//
+void prodAllCapabilities (void);
+
+// Waits for a capability to drain of runnable threads and workers,
+// and then acquires it. Used at shutdown time.
+//
+void shutdownCapability (Capability *cap, Task *task);
+
+// Attempt to gain control of a Capability if it is free.
+//
+rtsBool tryGrabCapability (Capability *cap, Task *task);
+
+#else // !THREADED_RTS
+
+// Grab a capability. (Only in the non-threaded RTS; in the threaded
+// RTS one of the waitFor*Capability() functions must be used).
+//
+extern void grabCapability (Capability **pCap);
+
+#endif /* !THREADED_RTS */
+
+/* -----------------------------------------------------------------------------
+ * INLINE functions... private below here
+ * -------------------------------------------------------------------------- */
+
+INLINE_HEADER void
+recordMutableCap (StgClosure *p, Capability *cap, nat gen)
+{
+ bdescr *bd;
+
+ bd = cap->mut_lists[gen];
+ if (bd->free >= bd->start + BLOCK_SIZE_W) {
+ bdescr *new_bd;
+ new_bd = allocBlock_lock();
+ new_bd->link = bd;
+ bd = new_bd;
+ cap->mut_lists[gen] = bd;
+ }
+ *bd->free++ = (StgWord)p;
+}
+
+#endif /* CAPABILITY_H */
diff --git a/rts/ClosureFlags.c b/rts/ClosureFlags.c
new file mode 100644
index 0000000000..5545693362
--- /dev/null
+++ b/rts/ClosureFlags.c
@@ -0,0 +1,107 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1998-1999
+ *
+ * Closure type flags
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+
+StgWord16 closure_flags[] = {
+
+/*
+ * These *must* be in the same order as the closure types in
+ * ClosureTypes.h.
+ */
+
+/* ToDo: some of these flags seem to be duplicated.
+ * - NS is the same as HNF, and the negation of THU
+ * (however, we set NS for indirections, which is probably the
+ * right thing to do, since we never get indirections pointing
+ * to thunks.)
+ */
+
+/* 0 1 2 3 4 5 6 7 */
+/* HNF BTM NS STA THU MUT UPT SRT */
+
+/* INVALID_OBJECT = */ ( 0 ),
+/* CONSTR = */ (_HNF| _NS ),
+/* CONSTR_1_0 = */ (_HNF| _NS ),
+/* CONSTR_0_1 = */ (_HNF| _NS ),
+/* CONSTR_2_0 = */ (_HNF| _NS ),
+/* CONSTR_1_1 = */ (_HNF| _NS ),
+/* CONSTR_0_2 = */ (_HNF| _NS ),
+/* CONSTR_INTLIKE = */ (_HNF| _NS|_STA ),
+/* CONSTR_CHARLIKE = */ (_HNF| _NS|_STA ),
+/* CONSTR_STATIC = */ (_HNF| _NS|_STA ),
+/* CONSTR_NOCAF_STATIC = */ (_HNF| _NS|_STA ),
+/* FUN = */ (_HNF| _NS| _SRT ),
+/* FUN_1_0 = */ (_HNF| _NS| _SRT ),
+/* FUN_0_1 = */ (_HNF| _NS| _SRT ),
+/* FUN_2_0 = */ (_HNF| _NS| _SRT ),
+/* FUN_1_1 = */ (_HNF| _NS| _SRT ),
+/* FUN_0_2 = */ (_HNF| _NS| _SRT ),
+/* FUN_STATIC = */ (_HNF| _NS|_STA| _SRT ),
+/* THUNK = */ ( _BTM| _THU| _SRT ),
+/* THUNK_1_0 = */ ( _BTM| _THU| _SRT ),
+/* THUNK_0_1 = */ ( _BTM| _THU| _SRT ),
+/* THUNK_2_0 = */ ( _BTM| _THU| _SRT ),
+/* THUNK_1_1 = */ ( _BTM| _THU| _SRT ),
+/* THUNK_0_2 = */ ( _BTM| _THU| _SRT ),
+/* THUNK_STATIC = */ ( _BTM| _STA|_THU| _SRT ),
+/* THUNK_SELECTOR = */ ( _BTM| _THU| _SRT ),
+/* BCO = */ (_HNF| _NS ),
+/* AP = */ ( _THU ),
+/* PAP = */ (_HNF| _NS ),
+/* AP_STACK = */ ( _THU ),
+/* IND = */ ( _NS| _IND ),
+/* IND_OLDGEN = */ ( _NS| _IND ),
+/* IND_PERM = */ ( _NS| _IND ),
+/* IND_OLDGEN_PERM = */ ( _NS| _IND ),
+/* IND_STATIC = */ ( _NS|_STA| _IND ),
+/* RET_BCO = */ ( _BTM ),
+/* RET_SMALL = */ ( _BTM| _SRT ),
+/* RET_VEC_SMALL = */ ( _BTM| _SRT ),
+/* RET_BIG = */ ( _SRT ),
+/* RET_VEC_BIG = */ ( _SRT ),
+/* RET_DYN = */ ( _SRT ),
+/* RET_FUN = */ ( 0 ),
+/* UPDATE_FRAME = */ ( _BTM ),
+/* CATCH_FRAME = */ ( _BTM ),
+/* STOP_FRAME = */ ( _BTM ),
+/* CAF_BLACKHOLE = */ ( _BTM|_NS| _UPT ),
+/* BLACKHOLE = */ ( _NS| _UPT ),
+/* SE_BLACKHOLE = */ ( _NS| _UPT ),
+/* SE_CAF_BLACKHOLE = */ ( _NS| _UPT ),
+/* MVAR = */ (_HNF| _NS| _MUT|_UPT ),
+/* ARR_WORDS = */ (_HNF| _NS| _UPT ),
+/* MUT_ARR_PTRS_CLEAN = */ (_HNF| _NS| _MUT|_UPT ),
+/* MUT_ARR_PTRS_DIRTY = */ (_HNF| _NS| _MUT|_UPT ),
+/* MUT_ARR_PTRS_FROZEN0 = */ (_HNF| _NS| _MUT|_UPT ),
+/* MUT_ARR_PTRS_FROZEN = */ (_HNF| _NS| _UPT ),
+/* MUT_VAR_CLEAN = */ (_HNF| _NS| _MUT|_UPT ),
+/* MUT_VAR_DIRTY = */ (_HNF| _NS| _MUT|_UPT ),
+/* WEAK = */ (_HNF| _NS| _UPT ),
+/* STABLE_NAME = */ (_HNF| _NS| _UPT ),
+/* TSO = */ (_HNF| _NS| _MUT|_UPT ),
+/* BLOCKED_FETCH = */ (_HNF| _NS| _MUT|_UPT ),
+/* FETCH_ME = */ (_HNF| _NS| _MUT|_UPT ),
+/* FETCH_ME_BQ = */ ( _NS| _MUT|_UPT ),
+/* RBH = */ ( _NS| _MUT|_UPT ),
+/* EVACUATED = */ ( 0 ),
+/* REMOTE_REF = */ (_HNF| _NS| _UPT ),
+/* TVAR_WAIT_QUEUE = */ ( _NS| _MUT|_UPT ),
+/* TVAR = */ (_HNF| _NS| _MUT|_UPT ),
+/* TREC_CHUNK = */ ( _NS| _MUT|_UPT ),
+/* TREC_HEADER = */ ( _NS| _MUT|_UPT ),
+/* ATOMICALLY_FRAME = */ ( _BTM ),
+/* CATCH_RETRY_FRAME = */ ( _BTM ),
+/* CATCH_STM_FRAME = */ ( _BTM )
+};
+
+#if N_CLOSURE_TYPES != 73
+#error Closure types changed: update ClosureFlags.c!
+#endif
+
diff --git a/rts/Disassembler.c b/rts/Disassembler.c
new file mode 100644
index 0000000000..b084a29b89
--- /dev/null
+++ b/rts/Disassembler.c
@@ -0,0 +1,281 @@
+/* -----------------------------------------------------------------------------
+ * Bytecode disassembler
+ *
+ * Copyright (c) 1994-2002.
+ *
+ * $RCSfile: Disassembler.c,v $
+ * $Revision: 1.29 $
+ * $Date: 2004/09/03 15:28:19 $
+ * ---------------------------------------------------------------------------*/
+
+#ifdef DEBUG
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "RtsAPI.h"
+#include "RtsUtils.h"
+#include "Closures.h"
+#include "TSO.h"
+#include "Schedule.h"
+
+#include "Bytecodes.h"
+#include "Printer.h"
+#include "Disassembler.h"
+#include "Interpreter.h"
+
+/* --------------------------------------------------------------------------
+ * Disassembler
+ * ------------------------------------------------------------------------*/
+
+int
+disInstr ( StgBCO *bco, int pc )
+{
+ int i;
+
+ StgWord16* instrs = (StgWord16*)(bco->instrs->payload);
+
+ StgArrWords* literal_arr = bco->literals;
+ StgWord* literals = (StgWord*)(&literal_arr->payload[0]);
+
+ StgMutArrPtrs* ptrs_arr = bco->ptrs;
+ StgPtr* ptrs = (StgPtr*)(&ptrs_arr->payload[0]);
+
+ StgArrWords* itbls_arr = bco->itbls;
+ StgInfoTable** itbls = (StgInfoTable**)(&itbls_arr->payload[0]);
+
+ switch (instrs[pc++]) {
+ case bci_SWIZZLE:
+ debugBelch("SWIZZLE stkoff %d by %d\n",
+ instrs[pc], (signed int)instrs[pc+1]);
+ pc += 2; break;
+ case bci_CCALL:
+ debugBelch("CCALL marshaller at 0x%x\n",
+ literals[instrs[pc]] );
+ pc += 1; break;
+ case bci_STKCHECK:
+ debugBelch("STKCHECK %d\n", instrs[pc] );
+ pc += 1; break;
+ case bci_PUSH_L:
+ debugBelch("PUSH_L %d\n", instrs[pc] );
+ pc += 1; break;
+ case bci_PUSH_LL:
+ debugBelch("PUSH_LL %d %d\n", instrs[pc], instrs[pc+1] );
+ pc += 2; break;
+ case bci_PUSH_LLL:
+ debugBelch("PUSH_LLL %d %d %d\n", instrs[pc], instrs[pc+1],
+ instrs[pc+2] );
+ pc += 3; break;
+ case bci_PUSH_G:
+ debugBelch("PUSH_G " ); printPtr( ptrs[instrs[pc]] );
+ debugBelch("\n" );
+ pc += 1; break;
+
+ case bci_PUSH_ALTS:
+ debugBelch("PUSH_ALTS " ); printPtr( ptrs[instrs[pc]] );
+ debugBelch("\n");
+ pc += 1; break;
+ case bci_PUSH_ALTS_P:
+ debugBelch("PUSH_ALTS_P " ); printPtr( ptrs[instrs[pc]] );
+ debugBelch("\n");
+ pc += 1; break;
+ case bci_PUSH_ALTS_N:
+ debugBelch("PUSH_ALTS_N " ); printPtr( ptrs[instrs[pc]] );
+ debugBelch("\n");
+ pc += 1; break;
+ case bci_PUSH_ALTS_F:
+ debugBelch("PUSH_ALTS_F " ); printPtr( ptrs[instrs[pc]] );
+ debugBelch("\n");
+ pc += 1; break;
+ case bci_PUSH_ALTS_D:
+ debugBelch("PUSH_ALTS_D " ); printPtr( ptrs[instrs[pc]] );
+ debugBelch("\n");
+ pc += 1; break;
+ case bci_PUSH_ALTS_L:
+ debugBelch("PUSH_ALTS_L " ); printPtr( ptrs[instrs[pc]] );
+ debugBelch("\n");
+ pc += 1; break;
+ case bci_PUSH_ALTS_V:
+ debugBelch("PUSH_ALTS_V " ); printPtr( ptrs[instrs[pc]] );
+ debugBelch("\n");
+ pc += 1; break;
+
+ case bci_PUSH_UBX:
+ debugBelch("PUSH_UBX ");
+ for (i = 0; i < instrs[pc+1]; i++)
+ debugBelch("0x%x ", literals[i + instrs[pc]] );
+ debugBelch("\n");
+ pc += 2; break;
+ case bci_PUSH_APPLY_N:
+ debugBelch("PUSH_APPLY_N\n");
+ break;
+ case bci_PUSH_APPLY_V:
+ debugBelch("PUSH_APPLY_V\n");
+ break;
+ case bci_PUSH_APPLY_F:
+ debugBelch("PUSH_APPLY_F\n");
+ break;
+ case bci_PUSH_APPLY_D:
+ debugBelch("PUSH_APPLY_D\n");
+ break;
+ case bci_PUSH_APPLY_L:
+ debugBelch("PUSH_APPLY_L\n");
+ break;
+ case bci_PUSH_APPLY_P:
+ debugBelch("PUSH_APPLY_P\n");
+ break;
+ case bci_PUSH_APPLY_PP:
+ debugBelch("PUSH_APPLY_PP\n");
+ break;
+ case bci_PUSH_APPLY_PPP:
+ debugBelch("PUSH_APPLY_PPP\n");
+ break;
+ case bci_PUSH_APPLY_PPPP:
+ debugBelch("PUSH_APPLY_PPPP\n");
+ break;
+ case bci_PUSH_APPLY_PPPPP:
+ debugBelch("PUSH_APPLY_PPPPP\n");
+ break;
+ case bci_PUSH_APPLY_PPPPPP:
+ debugBelch("PUSH_APPLY_PPPPPP\n");
+ break;
+ case bci_SLIDE:
+ debugBelch("SLIDE %d down by %d\n", instrs[pc], instrs[pc+1] );
+ pc += 2; break;
+ case bci_ALLOC_AP:
+ debugBelch("ALLOC_AP %d words\n", instrs[pc] );
+ pc += 1; break;
+ case bci_ALLOC_PAP:
+ debugBelch("ALLOC_PAP %d words, %d arity\n",
+ instrs[pc], instrs[pc+1] );
+ pc += 2; break;
+ case bci_MKAP:
+ debugBelch("MKAP %d words, %d stkoff\n", instrs[pc+1],
+ instrs[pc] );
+ pc += 2; break;
+ case bci_UNPACK:
+ debugBelch("UNPACK %d\n", instrs[pc] );
+ pc += 1; break;
+ case bci_PACK:
+ debugBelch("PACK %d words with itbl ", instrs[pc+1] );
+ printPtr( (StgPtr)itbls[instrs[pc]] );
+ debugBelch("\n");
+ pc += 2; break;
+
+ case bci_TESTLT_I:
+ debugBelch("TESTLT_I %d, fail to %d\n", literals[instrs[pc]],
+ instrs[pc+1]);
+ pc += 2; break;
+ case bci_TESTEQ_I:
+ debugBelch("TESTEQ_I %d, fail to %d\n", literals[instrs[pc]],
+ instrs[pc+1]);
+ pc += 2; break;
+
+ case bci_TESTLT_F:
+ debugBelch("TESTLT_F %d, fail to %d\n", literals[instrs[pc]],
+ instrs[pc+1]);
+ pc += 2; break;
+ case bci_TESTEQ_F:
+ debugBelch("TESTEQ_F %d, fail to %d\n", literals[instrs[pc]],
+ instrs[pc+1]);
+ pc += 2; break;
+
+ case bci_TESTLT_D:
+ debugBelch("TESTLT_D %d, fail to %d\n", literals[instrs[pc]],
+ instrs[pc+1]);
+ pc += 2; break;
+ case bci_TESTEQ_D:
+ debugBelch("TESTEQ_D %d, fail to %d\n", literals[instrs[pc]],
+ instrs[pc+1]);
+ pc += 2; break;
+
+ case bci_TESTLT_P:
+ debugBelch("TESTLT_P %d, fail to %d\n", instrs[pc],
+ instrs[pc+1]);
+ pc += 2; break;
+ case bci_TESTEQ_P:
+ debugBelch("TESTEQ_P %d, fail to %d\n", instrs[pc],
+ instrs[pc+1]);
+ pc += 2; break;
+ case bci_CASEFAIL:
+ debugBelch("CASEFAIL\n" );
+ break;
+ case bci_JMP:
+ debugBelch("JMP to %d\n", instrs[pc]);
+ pc += 1; break;
+
+ case bci_ENTER:
+ debugBelch("ENTER\n");
+ break;
+
+ case bci_RETURN:
+ debugBelch("RETURN\n" );
+ break;
+ case bci_RETURN_P:
+ debugBelch("RETURN_P\n" );
+ break;
+ case bci_RETURN_N:
+ debugBelch("RETURN_N\n" );
+ break;
+ case bci_RETURN_F:
+ debugBelch("RETURN_F\n" );
+ break;
+ case bci_RETURN_D:
+ debugBelch("RETURN_D\n" );
+ break;
+ case bci_RETURN_L:
+ debugBelch("RETURN_L\n" );
+ break;
+ case bci_RETURN_V:
+ debugBelch("RETURN_V\n" );
+ break;
+
+ default:
+ barf("disInstr: unknown opcode");
+ }
+ return pc;
+}
+
+
+/* Something of a kludge .. how do we know where the end of the insn
+ array is, since it isn't recorded anywhere? Answer: the first
+ short is the number of bytecodes which follow it.
+ See ByteCodeGen.linkBCO.insns_arr for construction ...
+*/
+void disassemble( StgBCO *bco )
+{
+ nat i, j;
+ StgWord16* instrs = (StgWord16*)(bco->instrs->payload);
+ StgMutArrPtrs* ptrs = bco->ptrs;
+ nat nbcs = (int)instrs[0];
+ nat pc = 1;
+
+ debugBelch("BCO\n" );
+ pc = 1;
+ while (pc <= nbcs) {
+ debugBelch("\t%2d: ", pc );
+ pc = disInstr ( bco, pc );
+ }
+
+ debugBelch("INSTRS:\n " );
+ j = 16;
+ for (i = 0; i < nbcs; i++) {
+ debugBelch("%3d ", (int)instrs[i] );
+ j--;
+ if (j == 0) { j = 16; debugBelch("\n "); };
+ }
+ debugBelch("\n");
+
+ debugBelch("PTRS:\n " );
+ j = 8;
+ for (i = 0; i < ptrs->ptrs; i++) {
+ debugBelch("%8p ", ptrs->payload[i] );
+ j--;
+ if (j == 0) { j = 8; debugBelch("\n "); };
+ }
+ debugBelch("\n");
+
+ debugBelch("\n");
+ ASSERT(pc == nbcs+1);
+}
+
+#endif /* DEBUG */
diff --git a/rts/Disassembler.h b/rts/Disassembler.h
new file mode 100644
index 0000000000..2851097117
--- /dev/null
+++ b/rts/Disassembler.h
@@ -0,0 +1,19 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Prototypes for functions in Disassembler.c
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef DISASSEMBLER_H
+#define DISASSEMBLER_H
+
+#ifdef DEBUG
+
+extern int disInstr ( StgBCO *bco, int pc );
+extern void disassemble( StgBCO *bco );
+
+#endif
+
+#endif /* DISASSEMBLER_H */
diff --git a/rts/Exception.cmm b/rts/Exception.cmm
new file mode 100644
index 0000000000..b5c29626b2
--- /dev/null
+++ b/rts/Exception.cmm
@@ -0,0 +1,446 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Exception support
+ *
+ * This file is written in a subset of C--, extended with various
+ * features specific to GHC. It is compiled by GHC directly. For the
+ * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Cmm.h"
+
+/* -----------------------------------------------------------------------------
+ Exception Primitives
+
+ A thread can request that asynchronous exceptions not be delivered
+ ("blocked") for the duration of an I/O computation. The primitive
+
+ blockAsyncExceptions# :: IO a -> IO a
+
+ is used for this purpose. During a blocked section, asynchronous
+ exceptions may be unblocked again temporarily:
+
+ unblockAsyncExceptions# :: IO a -> IO a
+
+ Furthermore, asynchronous exceptions are blocked automatically during
+ the execution of an exception handler. Both of these primitives
+ leave a continuation on the stack which reverts to the previous
+ state (blocked or unblocked) on exit.
+
+ A thread which wants to raise an exception in another thread (using
+ killThread#) must block until the target thread is ready to receive
+ it. The action of unblocking exceptions in a thread will release all
+ the threads waiting to deliver exceptions to that thread.
+
+ NB. there's a bug in here. If a thread is inside an
+ unsafePerformIO, and inside blockAsyncExceptions# (there is an
+ unblockAsyncExceptions_ret on the stack), and it is blocked in an
+ interruptible operation, and it receives an exception, then the
+ unsafePerformIO thunk will be updated with a stack object
+ containing the unblockAsyncExceptions_ret frame. Later, when
+ someone else evaluates this thunk, the blocked exception state is
+ not restored, and the result is that unblockAsyncExceptions_ret
+ will attempt to unblock exceptions in the current thread, but it'll
+ find that the CurrentTSO->blocked_exceptions is NULL. Hence, we
+ work around this by checking for NULL in awakenBlockedQueue().
+
+ -------------------------------------------------------------------------- */
+
+INFO_TABLE_RET( stg_unblockAsyncExceptionszh_ret,
+ 0/*framesize*/, 0/*bitmap*/, RET_SMALL )
+{
+ // Not true: see comments above
+ // ASSERT(StgTSO_blocked_exceptions(CurrentTSO) != NULL);
+#if defined(GRAN) || defined(PAR)
+ foreign "C" awakenBlockedQueue(MyCapability() "ptr", StgTSO_blocked_exceptions(CurrentTSO) "ptr",
+ NULL "ptr");
+#else
+ foreign "C" awakenBlockedQueue(MyCapability() "ptr", StgTSO_blocked_exceptions(CurrentTSO) "ptr");
+#endif
+ StgTSO_blocked_exceptions(CurrentTSO) = NULL;
+#ifdef REG_R1
+ Sp_adj(1);
+ jump %ENTRY_CODE(Sp(0));
+#else
+ Sp(1) = Sp(0);
+ Sp_adj(1);
+ jump %ENTRY_CODE(Sp(1));
+#endif
+}
+
+INFO_TABLE_RET( stg_blockAsyncExceptionszh_ret,
+ 0/*framesize*/, 0/*bitmap*/, RET_SMALL )
+{
+ // Not true: see comments above
+ // ASSERT(StgTSO_blocked_exceptions(CurrentTSO) == NULL);
+ StgTSO_blocked_exceptions(CurrentTSO) = END_TSO_QUEUE;
+#ifdef REG_R1
+ Sp_adj(1);
+ jump %ENTRY_CODE(Sp(0));
+#else
+ Sp(1) = Sp(0);
+ Sp_adj(1);
+ jump %ENTRY_CODE(Sp(1));
+#endif
+}
+
+blockAsyncExceptionszh_fast
+{
+ /* Args: R1 :: IO a */
+ STK_CHK_GEN( WDS(2)/* worst case */, R1_PTR, blockAsyncExceptionszh_fast);
+
+ if (StgTSO_blocked_exceptions(CurrentTSO) == NULL) {
+ StgTSO_blocked_exceptions(CurrentTSO) = END_TSO_QUEUE;
+ /* avoid growing the stack unnecessarily */
+ if (Sp(0) == stg_blockAsyncExceptionszh_ret_info) {
+ Sp_adj(1);
+ } else {
+ Sp_adj(-1);
+ Sp(0) = stg_unblockAsyncExceptionszh_ret_info;
+ }
+ }
+ TICK_UNKNOWN_CALL();
+ TICK_SLOW_CALL_v();
+ jump stg_ap_v_fast;
+}
+
+unblockAsyncExceptionszh_fast
+{
+ /* Args: R1 :: IO a */
+ STK_CHK_GEN( WDS(2), R1_PTR, unblockAsyncExceptionszh_fast);
+
+ if (StgTSO_blocked_exceptions(CurrentTSO) != NULL) {
+#if defined(GRAN) || defined(PAR)
+ foreign "C" awakenBlockedQueue(MyCapability() "ptr", StgTSO_blocked_exceptions(CurrentTSO) "ptr",
+ StgTSO_block_info(CurrentTSO) "ptr");
+#else
+ foreign "C" awakenBlockedQueue(MyCapability() "ptr", StgTSO_blocked_exceptions(CurrentTSO) "ptr");
+#endif
+ StgTSO_blocked_exceptions(CurrentTSO) = NULL;
+
+ /* avoid growing the stack unnecessarily */
+ if (Sp(0) == stg_unblockAsyncExceptionszh_ret_info) {
+ Sp_adj(1);
+ } else {
+ Sp_adj(-1);
+ Sp(0) = stg_blockAsyncExceptionszh_ret_info;
+ }
+ }
+ TICK_UNKNOWN_CALL();
+ TICK_SLOW_CALL_v();
+ jump stg_ap_v_fast;
+}
+
+
+#define interruptible(what_next) \
+ ( what_next == BlockedOnMVar \
+ || what_next == BlockedOnException \
+ || what_next == BlockedOnRead \
+ || what_next == BlockedOnWrite \
+ || what_next == BlockedOnDelay \
+ || what_next == BlockedOnDoProc)
+
+killThreadzh_fast
+{
+ /* args: R1 = TSO to kill, R2 = Exception */
+
+ W_ why_blocked;
+
+ /* This thread may have been relocated.
+ * (see Schedule.c:threadStackOverflow)
+ */
+ while:
+ if (StgTSO_what_next(R1) == ThreadRelocated::I16) {
+ R1 = StgTSO_link(R1);
+ goto while;
+ }
+
+ /* Determine whether this thread is interruptible or not */
+
+ /* If the target thread is currently blocking async exceptions,
+ * we'll have to block until it's ready to accept them. The
+ * exception is interruptible threads - ie. those that are blocked
+ * on some resource.
+ */
+ why_blocked = TO_W_(StgTSO_why_blocked(R1));
+ if (StgTSO_blocked_exceptions(R1) != NULL && !interruptible(why_blocked))
+ {
+ StgTSO_link(CurrentTSO) = StgTSO_blocked_exceptions(R1);
+ StgTSO_blocked_exceptions(R1) = CurrentTSO;
+
+ StgTSO_why_blocked(CurrentTSO) = BlockedOnException::I16;
+ StgTSO_block_info(CurrentTSO) = R1;
+
+ BLOCK( R1_PTR & R2_PTR, killThreadzh_fast );
+ }
+
+ /* Killed threads turn into zombies, which might be garbage
+ * collected at a later date. That's why we don't have to
+ * explicitly remove them from any queues they might be on.
+ */
+
+ /* We might have killed ourselves. In which case, better be *very*
+ * careful. If the exception killed us, then return to the scheduler.
+ * If the exception went to a catch frame, we'll just continue from
+ * the handler.
+ */
+ if (R1 == CurrentTSO) {
+ SAVE_THREAD_STATE();
+ foreign "C" raiseAsync(MyCapability() "ptr", R1 "ptr", R2 "ptr");
+ if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) {
+ R1 = ThreadFinished;
+ jump StgReturn;
+ } else {
+ LOAD_THREAD_STATE();
+ ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16);
+ jump %ENTRY_CODE(Sp(0));
+ }
+ } else {
+ foreign "C" raiseAsync(MyCapability() "ptr", R1 "ptr", R2 "ptr");
+ }
+
+ jump %ENTRY_CODE(Sp(0));
+}
+
+/* -----------------------------------------------------------------------------
+ Catch frames
+ -------------------------------------------------------------------------- */
+
+#ifdef REG_R1
+#define CATCH_FRAME_ENTRY_TEMPLATE(label,ret) \
+ label \
+ { \
+ Sp = Sp + SIZEOF_StgCatchFrame; \
+ jump ret; \
+ }
+#else
+#define CATCH_FRAME_ENTRY_TEMPLATE(label,ret) \
+ label \
+ { \
+ W_ rval; \
+ rval = Sp(0); \
+ Sp = Sp + SIZEOF_StgCatchFrame; \
+ Sp(0) = rval; \
+ jump ret; \
+ }
+#endif
+
+#ifdef REG_R1
+#define SP_OFF 0
+#else
+#define SP_OFF 1
+#endif
+
+CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
+CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
+CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
+CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
+CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
+CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
+CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
+CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
+
+#if MAX_VECTORED_RTN > 8
+#error MAX_VECTORED_RTN has changed: please modify stg_catch_frame too.
+#endif
+
+#if defined(PROFILING)
+#define CATCH_FRAME_BITMAP 7
+#define CATCH_FRAME_WORDS 4
+#else
+#define CATCH_FRAME_BITMAP 1
+#define CATCH_FRAME_WORDS 2
+#endif
+
+/* Catch frames are very similar to update frames, but when entering
+ * one we just pop the frame off the stack and perform the correct
+ * kind of return to the activation record underneath us on the stack.
+ */
+
+INFO_TABLE_RET(stg_catch_frame,
+ CATCH_FRAME_WORDS, CATCH_FRAME_BITMAP,
+ CATCH_FRAME,
+ stg_catch_frame_0_ret,
+ stg_catch_frame_1_ret,
+ stg_catch_frame_2_ret,
+ stg_catch_frame_3_ret,
+ stg_catch_frame_4_ret,
+ stg_catch_frame_5_ret,
+ stg_catch_frame_6_ret,
+ stg_catch_frame_7_ret)
+CATCH_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
+
+/* -----------------------------------------------------------------------------
+ * The catch infotable
+ *
+ * This should be exactly the same as would be generated by this STG code
+ *
+ * catch = {x,h} \n {} -> catch#{x,h}
+ *
+ * It is used in deleteThread when reverting blackholes.
+ * -------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_catch,2,0,FUN,"catch","catch")
+{
+ R2 = StgClosure_payload(R1,1); /* h */
+ R1 = StgClosure_payload(R1,0); /* x */
+ jump catchzh_fast;
+}
+
+catchzh_fast
+{
+ /* args: R1 = m :: IO a, R2 = handler :: Exception -> IO a */
+ STK_CHK_GEN(SIZEOF_StgCatchFrame + WDS(1), R1_PTR & R2_PTR, catchzh_fast);
+
+ /* Set up the catch frame */
+ Sp = Sp - SIZEOF_StgCatchFrame;
+ SET_HDR(Sp,stg_catch_frame_info,W_[CCCS]);
+
+ StgCatchFrame_handler(Sp) = R2;
+ StgCatchFrame_exceptions_blocked(Sp) =
+ (StgTSO_blocked_exceptions(CurrentTSO) != NULL);
+ TICK_CATCHF_PUSHED();
+
+ /* Apply R1 to the realworld token */
+ TICK_UNKNOWN_CALL();
+ TICK_SLOW_CALL_v();
+ jump stg_ap_v_fast;
+}
+
+/* -----------------------------------------------------------------------------
+ * The raise infotable
+ *
+ * This should be exactly the same as would be generated by this STG code
+ *
+ * raise = {err} \n {} -> raise#{err}
+ *
+ * It is used in raisezh_fast to update thunks on the update list
+ * -------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_raise,1,0,THUNK_1_0,"raise","raise")
+{
+ R1 = StgThunk_payload(R1,0);
+ jump raisezh_fast;
+}
+
+raisezh_fast
+{
+ W_ handler;
+ W_ raise_closure;
+ W_ frame_type;
+ /* args : R1 :: Exception */
+
+
+#if defined(PROFILING)
+ /* Debugging tool: on raising an exception, show where we are. */
+
+ /* ToDo: currently this is a hack. Would be much better if
+ * the info was only displayed for an *uncaught* exception.
+ */
+ if (RtsFlags_ProfFlags_showCCSOnException(RtsFlags)) {
+ foreign "C" fprintCCS_stderr(W_[CCCS] "ptr");
+ }
+#endif
+
+retry_pop_stack:
+ StgTSO_sp(CurrentTSO) = Sp;
+ frame_type = foreign "C" raiseExceptionHelper(BaseReg "ptr", CurrentTSO "ptr", R1 "ptr");
+ Sp = StgTSO_sp(CurrentTSO);
+ if (frame_type == ATOMICALLY_FRAME) {
+ /* The exception has reached the edge of a memory transaction. Check that
+ * the transaction is valid. If not then perhaps the exception should
+ * not have been thrown: re-run the transaction */
+ W_ trec;
+ W_ r;
+ trec = StgTSO_trec(CurrentTSO);
+ r = foreign "C" stmValidateNestOfTransactions(trec "ptr");
+ foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr");
+ StgTSO_trec(CurrentTSO) = NO_TREC;
+ if (r) {
+ // Transaction was valid: continue searching for a catch frame
+ Sp = Sp + SIZEOF_StgAtomicallyFrame;
+ goto retry_pop_stack;
+ } else {
+ // Transaction was not valid: we retry the exception (otherwise continue
+ // with a further call to raiseExceptionHelper)
+ "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr");
+ StgTSO_trec(CurrentTSO) = trec;
+ R1 = StgAtomicallyFrame_code(Sp);
+ jump stg_ap_v_fast;
+ }
+ }
+
+ if (frame_type == STOP_FRAME) {
+ /*
+ * We've stripped the entire stack, the thread is now dead.
+ * We will leave the stack in a GC'able state, see the stg_stop_thread
+ * entry code in StgStartup.cmm.
+ */
+ Sp = CurrentTSO + TSO_OFFSET_StgTSO_stack
+ + WDS(TO_W_(StgTSO_stack_size(CurrentTSO))) - WDS(2);
+ Sp(1) = R1; /* save the exception */
+ Sp(0) = stg_enter_info; /* so that GC can traverse this stack */
+ StgTSO_what_next(CurrentTSO) = ThreadKilled::I16;
+ SAVE_THREAD_STATE(); /* inline! */
+
+ /* The return code goes in BaseReg->rRet, and BaseReg is returned in R1 */
+ StgRegTable_rRet(BaseReg) = ThreadFinished;
+ R1 = BaseReg;
+
+ jump StgReturn;
+ }
+
+ /* Ok, Sp points to the enclosing CATCH_FRAME or CATCH_STM_FRAME. Pop everything
+ * down to and including this frame, update Su, push R1, and enter the handler.
+ */
+ if (frame_type == CATCH_FRAME) {
+ handler = StgCatchFrame_handler(Sp);
+ } else {
+ handler = StgCatchSTMFrame_handler(Sp);
+ }
+
+ /* Restore the blocked/unblocked state for asynchronous exceptions
+ * at the CATCH_FRAME.
+ *
+ * If exceptions were unblocked, arrange that they are unblocked
+ * again after executing the handler by pushing an
+ * unblockAsyncExceptions_ret stack frame.
+ */
+ W_ frame;
+ frame = Sp;
+ if (frame_type == CATCH_FRAME) {
+ Sp = Sp + SIZEOF_StgCatchFrame;
+ if (StgCatchFrame_exceptions_blocked(frame) == 0) {
+ Sp_adj(-1);
+ Sp(0) = stg_unblockAsyncExceptionszh_ret_info;
+ }
+ } else {
+ Sp = Sp + SIZEOF_StgCatchSTMFrame;
+ }
+
+ /* Ensure that async excpetions are blocked when running the handler.
+ */
+ if (StgTSO_blocked_exceptions(CurrentTSO) == NULL) {
+ StgTSO_blocked_exceptions(CurrentTSO) = END_TSO_QUEUE;
+ }
+
+ /* Call the handler, passing the exception value and a realworld
+ * token as arguments.
+ */
+ Sp_adj(-1);
+ Sp(0) = R1;
+ R1 = handler;
+ Sp_adj(-1);
+ TICK_UNKNOWN_CALL();
+ TICK_SLOW_CALL_pv();
+ jump RET_LBL(stg_ap_pv);
+}
+
+raiseIOzh_fast
+{
+ /* Args :: R1 :: Exception */
+ jump raisezh_fast;
+}
diff --git a/rts/Exception.h b/rts/Exception.h
new file mode 100644
index 0000000000..f7832f4045
--- /dev/null
+++ b/rts/Exception.h
@@ -0,0 +1,40 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Exception support
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef EXCEPTION_H
+#define EXCEPTION_H
+
+extern const StgRetInfoTable stg_blockAsyncExceptionszh_ret_info;
+extern const StgRetInfoTable stg_unblockAsyncExceptionszh_ret_info;
+
+/* Determine whether a thread is interruptible (ie. blocked
+ * indefinitely). Interruptible threads can be sent an exception with
+ * killThread# even if they have async exceptions blocked.
+ */
+STATIC_INLINE int
+interruptible(StgTSO *t)
+{
+ switch (t->why_blocked) {
+ case BlockedOnMVar:
+ case BlockedOnException:
+ case BlockedOnRead:
+ case BlockedOnWrite:
+#if defined(mingw32_HOST_OS)
+ case BlockedOnDoProc:
+#endif
+ case BlockedOnDelay:
+ return 1;
+ // NB. Threaded blocked on foreign calls (BlockedOnCCall) are
+ // *not* interruptible. We can't send these threads an exception.
+ default:
+ return 0;
+ }
+}
+
+#endif /* EXCEPTION_H */
+
diff --git a/rts/FrontPanel.c b/rts/FrontPanel.c
new file mode 100644
index 0000000000..579b75bab3
--- /dev/null
+++ b/rts/FrontPanel.c
@@ -0,0 +1,802 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 2000
+ *
+ * RTS GTK Front Panel
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifdef RTS_GTK_FRONTPANEL
+
+/* Alas, not Posix. */
+/* #include "PosixSource.h" */
+
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "MBlock.h"
+#include "FrontPanel.h"
+#include "Storage.h"
+#include "Stats.h"
+#include "RtsFlags.h"
+#include "Schedule.h"
+
+#include <gtk/gtk.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "VisSupport.h"
+#include "VisWindow.h"
+
+static GtkWidget *window, *map_drawing_area, *gen_drawing_area;
+static GtkWidget *res_drawing_area;
+static GtkWidget *continue_but, *stop_but, *quit_but;
+static GtkWidget *statusbar;
+static GtkWidget *live_label, *allocated_label;
+static GtkWidget *footprint_label, *alloc_rate_label;
+static GtkWidget *map_ruler, *gen_ruler;
+static GtkWidget *res_vruler, *res_hruler;
+static GtkWidget *running_label, *b_read_label, *b_write_label, *total_label;
+static GtkWidget *b_mvar_label, *b_bh_label, *b_throwto_label, *sleeping_label;
+
+static guint status_context_id;
+
+gboolean continue_now = FALSE, stop_now = FALSE, quit = FALSE;
+UpdateMode update_mode = Continuous;
+
+static GdkPixmap *map_pixmap = NULL;
+static GdkPixmap *gen_pixmap = NULL;
+static GdkPixmap *res_pixmap = NULL;
+
+#define N_GENS 10
+
+static GdkColor
+ bdescr_color = { 0, 0xffff, 0, 0 }, /* red */
+ free_color = { 0, 0, 0, 0xffff }, /* blue */
+ gen_colors[N_GENS] = {
+ { 0, 0, 0xffff, 0 },
+ { 0, 0, 0xf000, 0 },
+ { 0, 0, 0xe000, 0 },
+ { 0, 0, 0xd000, 0 },
+ { 0, 0, 0xc000, 0 },
+ { 0, 0, 0xb000, 0 },
+ { 0, 0, 0xa000, 0 },
+ { 0, 0, 0x9000, 0 },
+ { 0, 0, 0x8000, 0 },
+ { 0, 0, 0x7000, 0 }
+ };
+
+GdkGC *my_gc = NULL;
+
+static void *mem_start = (void *) 0x50000000;
+
+static void colorBlock( void *addr, GdkColor *color,
+ nat block_width, nat block_height,
+ nat blocks_per_line );
+
+static void residencyCensus( void );
+static void updateResidencyGraph( void );
+static void updateThreadsPanel( void );
+
+/* Some code pinched from examples/scribble-simple in the GTK+
+ * distribution.
+ */
+
+/* Create a new backing pixmap of the appropriate size */
+static gint
+configure_event( GtkWidget *widget, GdkEventConfigure *event STG_UNUSED,
+ GdkPixmap **pixmap )
+{
+ if (*pixmap)
+ gdk_pixmap_unref(*pixmap);
+
+ *pixmap = gdk_pixmap_new(widget->window,
+ widget->allocation.width,
+ widget->allocation.height,
+ -1);
+
+ gdk_draw_rectangle (*pixmap,
+ widget->style->white_gc,
+ TRUE,
+ 0, 0,
+ widget->allocation.width,
+ widget->allocation.height);
+
+ debugBelch("configure!\n");
+ updateFrontPanel();
+ return TRUE;
+}
+
+/* Redraw the screen from the backing pixmap */
+static gint
+expose_event( GtkWidget *widget, GdkEventExpose *event, GdkPixmap **pixmap )
+{
+ gdk_draw_pixmap(widget->window,
+ widget->style->fg_gc[GTK_WIDGET_STATE (widget)],
+ *pixmap,
+ event->area.x, event->area.y,
+ event->area.x, event->area.y,
+ event->area.width, event->area.height);
+
+ return FALSE;
+}
+
+void
+initFrontPanel( void )
+{
+ GdkColormap *colormap;
+ GtkWidget *gen_hbox;
+
+ gtk_init( &prog_argc, &prog_argv );
+
+ window = create_GHC_Front_Panel();
+ map_drawing_area = lookup_widget(window, "memmap");
+ gen_drawing_area = lookup_widget(window, "generations");
+ res_drawing_area = lookup_widget(window, "res_drawingarea");
+ stop_but = lookup_widget(window, "stop_but");
+ continue_but = lookup_widget(window, "continue_but");
+ quit_but = lookup_widget(window, "quit_but");
+ statusbar = lookup_widget(window, "statusbar");
+ live_label = lookup_widget(window, "live_label");
+ footprint_label = lookup_widget(window, "footprint_label");
+ allocated_label = lookup_widget(window, "allocated_label");
+ alloc_rate_label = lookup_widget(window, "alloc_rate_label");
+ gen_hbox = lookup_widget(window, "gen_hbox");
+ gen_ruler = lookup_widget(window, "gen_ruler");
+ map_ruler = lookup_widget(window, "map_ruler");
+ res_vruler = lookup_widget(window, "res_vruler");
+ res_hruler = lookup_widget(window, "res_hruler");
+ running_label = lookup_widget(window, "running_label");
+ b_read_label = lookup_widget(window, "blockread_label");
+ b_write_label = lookup_widget(window, "blockwrite_label");
+ b_mvar_label = lookup_widget(window, "blockmvar_label");
+ b_bh_label = lookup_widget(window, "blockbh_label");
+ b_throwto_label = lookup_widget(window, "blockthrowto_label");
+ sleeping_label = lookup_widget(window, "sleeping_label");
+ total_label = lookup_widget(window, "total_label");
+
+ status_context_id =
+ gtk_statusbar_get_context_id( GTK_STATUSBAR(statusbar), "context" );
+
+ /* hook up some signals for the mem map drawing area */
+ gtk_signal_connect (GTK_OBJECT(map_drawing_area), "expose_event",
+ (GtkSignalFunc)expose_event, &map_pixmap);
+ gtk_signal_connect (GTK_OBJECT(map_drawing_area), "configure_event",
+ (GtkSignalFunc)configure_event, &map_pixmap);
+
+ gtk_widget_set_events(map_drawing_area, GDK_EXPOSURE_MASK);
+
+ /* hook up some signals for the gen drawing area */
+ gtk_signal_connect (GTK_OBJECT(gen_drawing_area), "expose_event",
+ (GtkSignalFunc)expose_event, &gen_pixmap);
+ gtk_signal_connect (GTK_OBJECT(gen_drawing_area), "configure_event",
+ (GtkSignalFunc)configure_event, &gen_pixmap);
+
+ gtk_widget_set_events(gen_drawing_area, GDK_EXPOSURE_MASK);
+
+ /* hook up some signals for the res drawing area */
+ gtk_signal_connect (GTK_OBJECT(res_drawing_area), "expose_event",
+ (GtkSignalFunc)expose_event, &res_pixmap);
+ gtk_signal_connect (GTK_OBJECT(res_drawing_area), "configure_event",
+ (GtkSignalFunc)configure_event, &res_pixmap);
+
+ gtk_widget_set_events(res_drawing_area, GDK_EXPOSURE_MASK);
+
+ /* allocate our colors */
+ colormap = gdk_colormap_get_system();
+ gdk_colormap_alloc_color(colormap, &bdescr_color, TRUE, TRUE);
+ gdk_colormap_alloc_color(colormap, &free_color, TRUE, TRUE);
+
+ {
+ gboolean success[N_GENS];
+ gdk_colormap_alloc_colors(colormap, gen_colors, N_GENS, TRUE,
+ TRUE, success);
+ if (!success) { barf("can't allocate colors"); }
+ }
+
+ /* set the labels on the generation histogram */
+ {
+ char buf[64];
+ nat g, s;
+ GtkWidget *label;
+
+ for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for(s = 0; s < generations[g].n_steps; s++) {
+ g_snprintf( buf, 64, "%d.%d", g, s );
+ label = gtk_label_new( buf );
+ gtk_box_pack_start( GTK_BOX(gen_hbox), label,
+ TRUE, TRUE, 5 );
+ gtk_widget_show(label);
+ }
+ }
+ }
+
+ gtk_widget_show(window);
+
+ /* wait for the user to press "Continue" before getting going... */
+ gtk_statusbar_push( GTK_STATUSBAR(statusbar), status_context_id,
+ "Program start");
+ gtk_widget_set_sensitive( stop_but, FALSE );
+ continue_now = FALSE;
+ while (continue_now == FALSE) {
+ gtk_main_iteration();
+ }
+ gtk_statusbar_pop( GTK_STATUSBAR(statusbar), status_context_id );
+ gtk_statusbar_push( GTK_STATUSBAR(statusbar), status_context_id,
+ "Running");
+
+ gtk_widget_set_sensitive( continue_but, FALSE );
+ gtk_widget_set_sensitive( stop_but, TRUE );
+ gtk_widget_set_sensitive( quit_but, FALSE );
+
+ while (gtk_events_pending()) {
+ gtk_main_iteration();
+ }
+}
+
+void
+stopFrontPanel( void )
+{
+ gtk_widget_set_sensitive( quit_but, TRUE );
+ gtk_widget_set_sensitive( continue_but, FALSE );
+ gtk_widget_set_sensitive( stop_but, FALSE );
+
+ updateFrontPanel();
+
+ gtk_statusbar_push( GTK_STATUSBAR(statusbar), status_context_id,
+ "Program finished");
+
+ quit = FALSE;
+ while (quit == FALSE) {
+ gtk_main_iteration();
+ }
+}
+
+static void
+waitForContinue( void )
+{
+ gtk_widget_set_sensitive( continue_but, TRUE );
+ gtk_widget_set_sensitive( stop_but, FALSE );
+ stop_now = FALSE;
+ continue_now = FALSE;
+ while (continue_now == FALSE) {
+ gtk_main_iteration();
+ }
+ gtk_widget_set_sensitive( continue_but, FALSE );
+ gtk_widget_set_sensitive( stop_but, TRUE );
+}
+
+void
+updateFrontPanelBeforeGC( nat N )
+{
+ char buf[1000];
+
+ updateFrontPanel();
+
+ if (update_mode == BeforeGC
+ || update_mode == BeforeAfterGC
+ || stop_now == TRUE) {
+ g_snprintf( buf, 1000, "Stopped (before GC, generation %d)", N );
+ gtk_statusbar_push( GTK_STATUSBAR(statusbar), status_context_id, buf );
+ waitForContinue();
+ gtk_statusbar_pop( GTK_STATUSBAR(statusbar), status_context_id );
+ }
+
+ g_snprintf( buf, 1000, "Garbage collecting (generation %d)", N );
+ gtk_statusbar_push( GTK_STATUSBAR(statusbar), status_context_id, buf);
+
+ while (gtk_events_pending()) {
+ gtk_main_iteration();
+ }
+}
+
+static void
+numLabel( GtkWidget *lbl, nat n )
+{
+ char buf[64];
+ g_snprintf(buf, 64, "%d", n);
+ gtk_label_set_text( GTK_LABEL(lbl), buf );
+}
+
+void
+updateFrontPanelAfterGC( nat N, lnat live )
+{
+ char buf[1000];
+
+ gtk_statusbar_pop( GTK_STATUSBAR(statusbar), status_context_id );
+
+ /* is a major GC? */
+ if (N == RtsFlags.GcFlags.generations-1) {
+ residencyCensus();
+ }
+
+ updateFrontPanel();
+
+ if (update_mode == AfterGC
+ || update_mode == BeforeAfterGC
+ || stop_now == TRUE) {
+ snprintf( buf, 1000, "Stopped (after GC, generation %d)", N );
+ gtk_statusbar_push( GTK_STATUSBAR(statusbar), status_context_id, buf );
+ waitForContinue();
+ gtk_statusbar_pop( GTK_STATUSBAR(statusbar), status_context_id );
+ }
+
+ {
+ double words_to_megs = (1024 * 1024) / sizeof(W_);
+ double time = mut_user_time();
+
+ snprintf( buf, 1000, "%.2f", (double)live / words_to_megs );
+ gtk_label_set_text( GTK_LABEL(live_label), buf );
+
+ snprintf( buf, 1000, "%.2f", (double)total_allocated / words_to_megs );
+ gtk_label_set_text( GTK_LABEL(allocated_label), buf );
+
+ snprintf( buf, 1000, "%.2f",
+ (double)(mblocks_allocated * MBLOCK_SIZE_W) / words_to_megs );
+ gtk_label_set_text( GTK_LABEL(footprint_label), buf );
+
+ if ( time == 0.0 )
+ snprintf( buf, 1000, "%.2f", time );
+ else
+ snprintf( buf, 1000, "%.2f",
+ (double)(total_allocated / words_to_megs) / time );
+ gtk_label_set_text( GTK_LABEL(alloc_rate_label), buf );
+ }
+
+ while (gtk_events_pending()) {
+ gtk_main_iteration();
+ }
+}
+
+void
+updateFrontPanel( void )
+{
+ void *m, *a;
+ bdescr *bd;
+
+ updateThreadsPanel();
+
+ if (my_gc == NULL) {
+ my_gc = gdk_gc_new( window->window );
+ }
+
+ if (map_pixmap != NULL) {
+ nat height, width, blocks_per_line,
+ block_height, block_width, mblock_height;
+
+ height = map_drawing_area->allocation.height;
+ width = map_drawing_area->allocation.width;
+
+ mblock_height = height / mblocks_allocated;
+ blocks_per_line = 16;
+ block_height = mblock_height /
+ ((MBLOCK_SIZE/BLOCK_SIZE) / blocks_per_line);
+ while (block_height == 0) {
+ blocks_per_line *= 2;
+ block_height = mblock_height /
+ ((MBLOCK_SIZE/BLOCK_SIZE) / blocks_per_line);
+ }
+ block_width = width / blocks_per_line;
+
+ gdk_draw_rectangle (map_pixmap,
+ map_drawing_area->style->bg_gc[GTK_STATE_NORMAL],
+ TRUE,
+ 0, 0,
+ map_drawing_area->allocation.width,
+ map_drawing_area->allocation.height);
+
+ for ( m = mem_start;
+ (char *)m < (char *)mem_start +
+ (mblocks_allocated * MBLOCK_SIZE);
+ (char *)m += MBLOCK_SIZE ) {
+
+ /* color the bdescr area first */
+ for (a = m; a < FIRST_BLOCK(m); (char *)a += BLOCK_SIZE) {
+ colorBlock( a, &bdescr_color,
+ block_width, block_height, blocks_per_line );
+ }
+
+#if 0 /* Segfaults because bd appears to be bogus but != NULL. stolz, 2003-06-24 */
+ /* color each block */
+ for (; a <= LAST_BLOCK(m); (char *)a += BLOCK_SIZE) {
+ bd = Bdescr((P_)a);
+ ASSERT(bd->start == a);
+ if (bd->flags & BF_FREE) {
+ colorBlock( a, &free_color,
+ block_width, block_height, blocks_per_line );
+ } else {
+ colorBlock( a, &gen_colors[bd->gen_no],
+ block_width, block_height, blocks_per_line );
+ }
+ }
+#endif
+ }
+
+
+ {
+ nat height = map_drawing_area->allocation.height,
+ block_height, mblock_height;
+
+ block_height = (height / mblocks_allocated) /
+ ((MBLOCK_SIZE/BLOCK_SIZE) / blocks_per_line);
+ if (block_height < 1) block_height = 1;
+ mblock_height = block_height *
+ ((MBLOCK_SIZE/BLOCK_SIZE) / blocks_per_line);
+
+ gtk_ruler_set_range( GTK_RULER(map_ruler), 0,
+ (double)(height * mblocks_allocated) /
+ (double)((mblock_height * mblocks_allocated)),
+ 0,
+ (double)(height * mblocks_allocated) /
+ (double)((mblock_height * mblocks_allocated))
+ );
+ }
+
+ gtk_widget_draw( map_drawing_area, NULL );
+ }
+
+ if (gen_pixmap != NULL) {
+
+ GdkRectangle rect;
+ nat g, s, columns, column, max_blocks, height_blocks,
+ width, height;
+
+ gdk_draw_rectangle (gen_pixmap,
+ gen_drawing_area->style->white_gc,
+ TRUE,
+ 0, 0,
+ gen_drawing_area->allocation.width,
+ gen_drawing_area->allocation.height);
+
+ height = gen_drawing_area->allocation.height;
+ width = gen_drawing_area->allocation.width;
+
+ columns = 0; max_blocks = 0;
+ for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ columns += generations[g].n_steps;
+ for(s = 0; s < generations[g].n_steps; s++) {
+ if (generations[g].steps[s].n_blocks > max_blocks) {
+ max_blocks = generations[g].steps[s].n_blocks;
+ }
+ }
+ }
+
+ /* find a reasonable height value larger than max_blocks */
+ {
+ nat n = 0;
+ while (max_blocks != 0) {
+ max_blocks >>= 1; n++;
+ }
+ height_blocks = 1 << n;
+ }
+
+ column = 0;
+ for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for(s = 0; s < generations[g].n_steps; s++, column++) {
+ gdk_gc_set_foreground(my_gc, &gen_colors[g]);
+
+ rect.x = column * (width / columns);
+
+ if (generations[g].steps[s].n_blocks == 0)
+ rect.y = height;
+ else
+ rect.y = height -
+ (height * generations[g].steps[s].n_blocks
+ / height_blocks);
+
+ rect.width = (width / columns);
+ rect.height = height - rect.y;
+
+ gdk_draw_rectangle( gen_pixmap, my_gc, TRUE/*filled*/,
+ rect.x, rect.y, rect.width,
+ rect.height );
+ }
+ }
+
+ gtk_ruler_set_range( GTK_RULER(gen_ruler),
+ height_blocks * BLOCK_SIZE / (1024 * 1024),
+ 0, 0,
+ height_blocks * BLOCK_SIZE / (1024 * 1024)
+ );
+
+ gtk_widget_draw( gen_drawing_area, NULL );
+ }
+
+ if (res_pixmap != NULL) {
+ updateResidencyGraph();
+ }
+
+ while (gtk_events_pending()) {
+ gtk_main_iteration_do(FALSE/*don't block*/);
+ }
+}
+
+static void
+colorBlock( void *addr, GdkColor *color,
+ nat block_width, nat block_height, nat blocks_per_line )
+{
+ GdkRectangle rect;
+ nat block_no;
+
+ gdk_gc_set_foreground(my_gc, color);
+
+ block_no = ((char *)addr - (char *)mem_start) / BLOCK_SIZE;
+
+ rect.x = (block_no % blocks_per_line) * block_width;
+ rect.y = block_no / blocks_per_line * block_height;
+ rect.width = block_width;
+ rect.height = block_height;
+ gdk_draw_rectangle( map_pixmap, my_gc, TRUE/*filled*/,
+ rect.x, rect.y, rect.width, rect.height );
+}
+
+static void
+updateThreadsPanel( void )
+{
+ nat running = 0,
+ b_read = 0,
+ b_write = 0,
+ b_mvar = 0,
+ b_throwto = 0,
+ b_bh = 0,
+ sleeping = 0,
+ total = 0;
+
+ StgTSO *t;
+
+ for (t = all_threads; t != END_TSO_QUEUE; t = t->global_link) {
+ switch (t->what_next) {
+ case ThreadKilled: break;
+ case ThreadComplete: break;
+ default:
+ switch (t->why_blocked) {
+ case BlockedOnRead: b_read++; break;
+ case BlockedOnWrite: b_write++; break;
+ case BlockedOnDelay: sleeping++; break;
+ case BlockedOnMVar: b_mvar++; break;
+ case BlockedOnException: b_throwto++; break;
+ case BlockedOnBlackHole: b_bh++; break;
+ case NotBlocked: running++; break;
+ }
+ }
+ }
+ total = running + b_read + b_write + b_mvar + b_throwto + b_bh + sleeping;
+ numLabel(running_label, running);
+ numLabel(b_read_label, b_read);
+ numLabel(b_write_label, b_write);
+ numLabel(b_mvar_label, b_mvar);
+ numLabel(b_bh_label, b_bh);
+ numLabel(b_throwto_label, b_throwto);
+ numLabel(sleeping_label, sleeping);
+ numLabel(total_label, total);
+}
+
+typedef enum { Thunk, Fun, Constr, BlackHole,
+ Array, Thread, Other, N_Cats } ClosureCategory;
+
+#define N_SLICES 100
+
+static nat *res_prof[N_SLICES];
+static double res_time[N_SLICES];
+static nat next_slice = 0;
+
+static void
+residencyCensus( void )
+{
+ nat slice = next_slice++, *prof;
+ bdescr *bd;
+ nat g, s, size, type;
+ StgPtr p;
+ StgInfoTable *info;
+
+ if (slice >= N_SLICES) {
+ barf("too many slices");
+ }
+ res_prof[slice] = stgMallocBytes(N_Cats * sizeof(nat), "residencyCensus");
+ prof = res_prof[slice];
+ memset(prof, 0, N_Cats * sizeof(nat));
+
+ res_time[slice] = mut_user_time();
+
+ for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for(s = 0; s < generations[g].n_steps; s++) {
+
+ /* skip over g0s0 if multi-generational */
+ if (RtsFlags.GcFlags.generations > 1 &&
+ g == 0 && s == 0) continue;
+
+ if (RtsFlags.GcFlags.generations == 1) {
+/* bd = generations[g].steps[s].to_blocks; FIXME to_blocks does not exist */
+ } else {
+ bd = generations[g].steps[s].blocks;
+ }
+
+ for (; bd != NULL; bd = bd->link) {
+
+ p = bd->start;
+
+ while (p < bd->free) {
+ info = get_itbl((StgClosure *)p);
+ type = Other;
+
+ switch (info->type) {
+
+ case CONSTR:
+ case BCO:
+ if (((StgClosure *)p)->header.info == &stg_DEAD_WEAK_info) {
+ size = sizeofW(StgWeak);
+ type = Other;
+ break;
+ }
+ /* else, fall through... */
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case CONSTR_2_0:
+ size = sizeW_fromITBL(info);
+ type = Constr;
+ break;
+
+ case FUN_1_0:
+ case FUN_0_1:
+ size = sizeofW(StgHeader) + 1;
+ goto fun;
+ case FUN_1_1:
+ case FUN_0_2:
+ case FUN_2_0:
+ case FUN:
+ size = sizeW_fromITBL(info);
+ fun:
+ type = Fun;
+ break;
+
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_SELECTOR:
+ size = sizeofW(StgHeader) + 2;
+ goto thunk;
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_2_0:
+ case THUNK:
+ size = sizeW_fromITBL(info);
+ thunk:
+ type = Thunk;
+ break;
+
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case SE_BLACKHOLE:
+ case BLACKHOLE:
+/* case BLACKHOLE_BQ: FIXME: case does not exist */
+ size = sizeW_fromITBL(info);
+ type = BlackHole;
+ break;
+
+ case AP:
+ size = pap_sizeW((StgPAP *)p);
+ type = Thunk;
+ break;
+
+ case PAP:
+ size = pap_sizeW((StgPAP *)p);
+ type = Fun;
+ break;
+
+ case ARR_WORDS:
+ size = arr_words_sizeW(stgCast(StgArrWords*,p));
+ type = Array;
+ break;
+
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+ size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
+ type = Array;
+ break;
+
+ case TSO:
+ size = tso_sizeW((StgTSO *)p);
+ type = Thread;
+ break;
+
+ case WEAK:
+ case STABLE_NAME:
+ case MVAR:
+ case MUT_VAR:
+/* case MUT_CONS: FIXME: case does not exist */
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ size = sizeW_fromITBL(info);
+ type = Other;
+ break;
+
+ default:
+ barf("updateResidencyGraph: strange closure "
+ "%d", info->type );
+ }
+
+ prof[type] += size;
+ p += size;
+ }
+ }
+ }
+ }
+
+}
+
+static void
+updateResidencyGraph( void )
+{
+ nat total, prev_total, i, max_res;
+ double time;
+ double time_scale = 1;
+ nat last_slice = next_slice-1;
+ double res_scale = 1; /* in megabytes, doubles */
+ nat *prof;
+ nat width, height;
+ GdkPoint points[4];
+
+ gdk_draw_rectangle (res_pixmap,
+ res_drawing_area->style->bg_gc[GTK_STATE_NORMAL],
+ TRUE,
+ 0, 0,
+ res_drawing_area->allocation.width,
+ res_drawing_area->allocation.height);
+
+ if (next_slice == 0) return;
+
+ time = res_time[last_slice];
+ while (time > time_scale) {
+ time_scale *= 2;
+ }
+
+ max_res = 0;
+ for (i = 0; i < next_slice; i++) {
+ prof = res_prof[i];
+ total = prof[Thunk] + prof[Fun] + prof[Constr] +
+ prof[BlackHole] + prof[Array] + prof[Other];
+ if (total > max_res) {
+ max_res = total;
+ }
+ }
+ while (max_res > res_scale) {
+ res_scale *= 2;
+ }
+
+ height = res_drawing_area->allocation.height;
+ width = res_drawing_area->allocation.width;
+
+ points[0].x = 0;
+ points[0].y = height;
+ points[1].y = height;
+ points[3].x = 0;
+ points[3].y = height;
+
+ gdk_gc_set_foreground(my_gc, &free_color);
+
+ prev_total = 0;
+ for (i = 0; i < next_slice; i++) {
+ prof = res_prof[i];
+ total = prof[Thunk] + prof[Fun] + prof[Constr] +
+ prof[BlackHole] + prof[Array] + prof[Other];
+ points[1].x = width * res_time[i] / time_scale;
+ points[2].x = points[1].x;
+ points[2].y = height - ((height * total) / res_scale);
+ gdk_draw_polygon(res_pixmap, my_gc, TRUE/*filled*/, points, 4);
+ points[3] = points[2];
+ points[0] = points[1];
+ }
+
+ gtk_ruler_set_range( GTK_RULER(res_vruler),
+ res_scale / ((1024*1024)/sizeof(W_)),
+ 0, 0,
+ res_scale / ((1024*1024)/sizeof(W_)) );
+
+ gtk_ruler_set_range( GTK_RULER(res_hruler),
+ 0, time_scale, 0, time_scale );
+
+
+ gtk_widget_draw( res_drawing_area, NULL );
+}
+
+#endif /* RTS_GTK_FRONTPANEL */
diff --git a/rts/FrontPanel.h b/rts/FrontPanel.h
new file mode 100644
index 0000000000..de3b741657
--- /dev/null
+++ b/rts/FrontPanel.h
@@ -0,0 +1,35 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 2000-2005
+ *
+ * RTS GTK Front Panel
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef FRONTPANEL_H
+#define FRONTPANEL_H
+
+#ifdef RTS_GTK_FRONTPANEL
+
+#include "Rts.h" /* needed because this file gets included by
+ * auto-generated code */
+
+void initFrontPanel( void );
+void stopFrontPanel( void );
+void updateFrontPanelBeforeGC( nat N );
+void updateFrontPanelAfterGC( nat N, lnat live );
+void updateFrontPanel( void );
+
+
+/* --------- PRIVATE ----------------------------------------- */
+
+#include <gdk/gdktypes.h>
+
+typedef enum { BeforeGC, AfterGC, BeforeAfterGC, Continuous } UpdateMode;
+extern UpdateMode update_mode;
+extern gboolean continue_now, stop_now, quit;
+
+#endif /* RTS_GTK_FRONTPANEL */
+
+#endif /* FRONTPANEL_H */
+
diff --git a/rts/GC.c b/rts/GC.c
new file mode 100644
index 0000000000..a13cd33afa
--- /dev/null
+++ b/rts/GC.c
@@ -0,0 +1,4719 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1998-2003
+ *
+ * Generational garbage collector
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "Apply.h"
+#include "OSThreads.h"
+#include "Storage.h"
+#include "LdvProfile.h"
+#include "Updates.h"
+#include "Stats.h"
+#include "Schedule.h"
+#include "Sanity.h"
+#include "BlockAlloc.h"
+#include "MBlock.h"
+#include "ProfHeap.h"
+#include "SchedAPI.h"
+#include "Weak.h"
+#include "Prelude.h"
+#include "ParTicky.h" // ToDo: move into Rts.h
+#include "GCCompact.h"
+#include "RtsSignals.h"
+#include "STM.h"
+#if defined(GRAN) || defined(PAR)
+# include "GranSimRts.h"
+# include "ParallelRts.h"
+# include "FetchMe.h"
+# if defined(DEBUG)
+# include "Printer.h"
+# include "ParallelDebug.h"
+# endif
+#endif
+#include "HsFFI.h"
+#include "Linker.h"
+#if defined(RTS_GTK_FRONTPANEL)
+#include "FrontPanel.h"
+#endif
+
+#include "RetainerProfile.h"
+
+#include <string.h>
+
+// Turn off inlining when debugging - it obfuscates things
+#ifdef DEBUG
+# undef STATIC_INLINE
+# define STATIC_INLINE static
+#endif
+
+/* STATIC OBJECT LIST.
+ *
+ * During GC:
+ * We maintain a linked list of static objects that are still live.
+ * The requirements for this list are:
+ *
+ * - we need to scan the list while adding to it, in order to
+ * scavenge all the static objects (in the same way that
+ * breadth-first scavenging works for dynamic objects).
+ *
+ * - we need to be able to tell whether an object is already on
+ * the list, to break loops.
+ *
+ * Each static object has a "static link field", which we use for
+ * linking objects on to the list. We use a stack-type list, consing
+ * objects on the front as they are added (this means that the
+ * scavenge phase is depth-first, not breadth-first, but that
+ * shouldn't matter).
+ *
+ * A separate list is kept for objects that have been scavenged
+ * already - this is so that we can zero all the marks afterwards.
+ *
+ * An object is on the list if its static link field is non-zero; this
+ * means that we have to mark the end of the list with '1', not NULL.
+ *
+ * Extra notes for generational GC:
+ *
+ * Each generation has a static object list associated with it. When
+ * collecting generations up to N, we treat the static object lists
+ * from generations > N as roots.
+ *
+ * We build up a static object list while collecting generations 0..N,
+ * which is then appended to the static object list of generation N+1.
+ */
+static StgClosure* static_objects; // live static objects
+StgClosure* scavenged_static_objects; // static objects scavenged so far
+
+/* N is the oldest generation being collected, where the generations
+ * are numbered starting at 0. A major GC (indicated by the major_gc
+ * flag) is when we're collecting all generations. We only attempt to
+ * deal with static objects and GC CAFs when doing a major GC.
+ */
+static nat N;
+static rtsBool major_gc;
+
+/* Youngest generation that objects should be evacuated to in
+ * evacuate(). (Logically an argument to evacuate, but it's static
+ * a lot of the time so we optimise it into a global variable).
+ */
+static nat evac_gen;
+
+/* Whether to do eager promotion or not.
+ */
+static rtsBool eager_promotion;
+
+/* Weak pointers
+ */
+StgWeak *old_weak_ptr_list; // also pending finaliser list
+
+/* Which stage of processing various kinds of weak pointer are we at?
+ * (see traverse_weak_ptr_list() below for discussion).
+ */
+typedef enum { WeakPtrs, WeakThreads, WeakDone } WeakStage;
+static WeakStage weak_stage;
+
+/* List of all threads during GC
+ */
+static StgTSO *old_all_threads;
+StgTSO *resurrected_threads;
+
+/* Flag indicating failure to evacuate an object to the desired
+ * generation.
+ */
+static rtsBool failed_to_evac;
+
+/* Saved nursery (used for 2-space collector only)
+ */
+static bdescr *saved_nursery;
+static nat saved_n_blocks;
+
+/* Data used for allocation area sizing.
+ */
+static lnat new_blocks; // blocks allocated during this GC
+static lnat new_scavd_blocks; // ditto, but depth-first blocks
+static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC
+
+/* Used to avoid long recursion due to selector thunks
+ */
+static lnat thunk_selector_depth = 0;
+#define MAX_THUNK_SELECTOR_DEPTH 8
+
+/* Mut-list stats */
+#ifdef DEBUG
+static nat
+ mutlist_MUTVARS,
+ mutlist_MUTARRS,
+ mutlist_OTHERS;
+#endif
+
+/* -----------------------------------------------------------------------------
+ Static function declarations
+ -------------------------------------------------------------------------- */
+
+static bdescr * gc_alloc_block ( step *stp );
+static void mark_root ( StgClosure **root );
+
+// Use a register argument for evacuate, if available.
+#if __GNUC__ >= 2
+#define REGPARM1 __attribute__((regparm(1)))
+#else
+#define REGPARM1
+#endif
+
+REGPARM1 static StgClosure * evacuate (StgClosure *q);
+
+static void zero_static_object_list ( StgClosure* first_static );
+
+static rtsBool traverse_weak_ptr_list ( void );
+static void mark_weak_ptr_list ( StgWeak **list );
+
+static StgClosure * eval_thunk_selector ( nat field, StgSelector * p );
+
+
+static void scavenge ( step * );
+static void scavenge_mark_stack ( void );
+static void scavenge_stack ( StgPtr p, StgPtr stack_end );
+static rtsBool scavenge_one ( StgPtr p );
+static void scavenge_large ( step * );
+static void scavenge_static ( void );
+static void scavenge_mutable_list ( generation *g );
+
+static void scavenge_large_bitmap ( StgPtr p,
+ StgLargeBitmap *large_bitmap,
+ nat size );
+
+#if 0 && defined(DEBUG)
+static void gcCAFs ( void );
+#endif
+
+/* -----------------------------------------------------------------------------
+ inline functions etc. for dealing with the mark bitmap & stack.
+ -------------------------------------------------------------------------- */
+
+#define MARK_STACK_BLOCKS 4
+
+static bdescr *mark_stack_bdescr;
+static StgPtr *mark_stack;
+static StgPtr *mark_sp;
+static StgPtr *mark_splim;
+
+// Flag and pointers used for falling back to a linear scan when the
+// mark stack overflows.
+static rtsBool mark_stack_overflowed;
+static bdescr *oldgen_scan_bd;
+static StgPtr oldgen_scan;
+
+STATIC_INLINE rtsBool
+mark_stack_empty(void)
+{
+ return mark_sp == mark_stack;
+}
+
+STATIC_INLINE rtsBool
+mark_stack_full(void)
+{
+ return mark_sp >= mark_splim;
+}
+
+STATIC_INLINE void
+reset_mark_stack(void)
+{
+ mark_sp = mark_stack;
+}
+
+STATIC_INLINE void
+push_mark_stack(StgPtr p)
+{
+ *mark_sp++ = p;
+}
+
+STATIC_INLINE StgPtr
+pop_mark_stack(void)
+{
+ return *--mark_sp;
+}
+
+/* -----------------------------------------------------------------------------
+ Allocate a new to-space block in the given step.
+ -------------------------------------------------------------------------- */
+
+static bdescr *
+gc_alloc_block(step *stp)
+{
+ bdescr *bd = allocBlock();
+ bd->gen_no = stp->gen_no;
+ bd->step = stp;
+ bd->link = NULL;
+
+ // blocks in to-space in generations up to and including N
+ // get the BF_EVACUATED flag.
+ if (stp->gen_no <= N) {
+ bd->flags = BF_EVACUATED;
+ } else {
+ bd->flags = 0;
+ }
+
+ // Start a new to-space block, chain it on after the previous one.
+ if (stp->hp_bd != NULL) {
+ stp->hp_bd->free = stp->hp;
+ stp->hp_bd->link = bd;
+ }
+
+ stp->hp_bd = bd;
+ stp->hp = bd->start;
+ stp->hpLim = stp->hp + BLOCK_SIZE_W;
+
+ stp->n_blocks++;
+ new_blocks++;
+
+ return bd;
+}
+
+static bdescr *
+gc_alloc_scavd_block(step *stp)
+{
+ bdescr *bd = allocBlock();
+ bd->gen_no = stp->gen_no;
+ bd->step = stp;
+
+ // blocks in to-space in generations up to and including N
+ // get the BF_EVACUATED flag.
+ if (stp->gen_no <= N) {
+ bd->flags = BF_EVACUATED;
+ } else {
+ bd->flags = 0;
+ }
+
+ bd->link = stp->blocks;
+ stp->blocks = bd;
+
+ if (stp->scavd_hp != NULL) {
+ Bdescr(stp->scavd_hp)->free = stp->scavd_hp;
+ }
+ stp->scavd_hp = bd->start;
+ stp->scavd_hpLim = stp->scavd_hp + BLOCK_SIZE_W;
+
+ stp->n_blocks++;
+ new_scavd_blocks++;
+
+ return bd;
+}
+
+/* -----------------------------------------------------------------------------
+ GarbageCollect
+
+ Rough outline of the algorithm: for garbage collecting generation N
+ (and all younger generations):
+
+ - follow all pointers in the root set. the root set includes all
+ mutable objects in all generations (mutable_list).
+
+ - for each pointer, evacuate the object it points to into either
+
+ + to-space of the step given by step->to, which is the next
+ highest step in this generation or the first step in the next
+ generation if this is the last step.
+
+ + to-space of generations[evac_gen]->steps[0], if evac_gen != 0.
+ When we evacuate an object we attempt to evacuate
+ everything it points to into the same generation - this is
+ achieved by setting evac_gen to the desired generation. If
+ we can't do this, then an entry in the mut list has to
+ be made for the cross-generation pointer.
+
+ + if the object is already in a generation > N, then leave
+ it alone.
+
+ - repeatedly scavenge to-space from each step in each generation
+ being collected until no more objects can be evacuated.
+
+ - free from-space in each step, and set from-space = to-space.
+
+ Locks held: all capabilities are held throughout GarbageCollect().
+
+ -------------------------------------------------------------------------- */
+
+void
+GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
+{
+ bdescr *bd;
+ step *stp;
+ lnat live, allocated, copied = 0, scavd_copied = 0;
+ lnat oldgen_saved_blocks = 0;
+ nat g, s, i;
+
+ ACQUIRE_SM_LOCK;
+
+#ifdef PROFILING
+ CostCentreStack *prev_CCS;
+#endif
+
+#if defined(DEBUG) && defined(GRAN)
+ IF_DEBUG(gc, debugBelch("@@ Starting garbage collection at %ld (%lx)\n",
+ Now, Now));
+#endif
+
+#if defined(RTS_USER_SIGNALS)
+ // block signals
+ blockUserSignals();
+#endif
+
+ // tell the STM to discard any cached closures its hoping to re-use
+ stmPreGCHook();
+
+ // tell the stats department that we've started a GC
+ stat_startGC();
+
+#ifdef DEBUG
+ // check for memory leaks if DEBUG is on
+ memInventory();
+#endif
+
+#ifdef DEBUG
+ mutlist_MUTVARS = 0;
+ mutlist_MUTARRS = 0;
+ mutlist_OTHERS = 0;
+#endif
+
+ // Init stats and print par specific (timing) info
+ PAR_TICKY_PAR_START();
+
+ // attribute any costs to CCS_GC
+#ifdef PROFILING
+ prev_CCS = CCCS;
+ CCCS = CCS_GC;
+#endif
+
+ /* Approximate how much we allocated.
+ * Todo: only when generating stats?
+ */
+ allocated = calcAllocated();
+
+ /* Figure out which generation to collect
+ */
+ if (force_major_gc) {
+ N = RtsFlags.GcFlags.generations - 1;
+ major_gc = rtsTrue;
+ } else {
+ N = 0;
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ if (generations[g].steps[0].n_blocks +
+ generations[g].steps[0].n_large_blocks
+ >= generations[g].max_blocks) {
+ N = g;
+ }
+ }
+ major_gc = (N == RtsFlags.GcFlags.generations-1);
+ }
+
+#ifdef RTS_GTK_FRONTPANEL
+ if (RtsFlags.GcFlags.frontpanel) {
+ updateFrontPanelBeforeGC(N);
+ }
+#endif
+
+ // check stack sanity *before* GC (ToDo: check all threads)
+#if defined(GRAN)
+ // ToDo!: check sanity IF_DEBUG(sanity, checkTSOsSanity());
+#endif
+ IF_DEBUG(sanity, checkFreeListSanity());
+
+ /* Initialise the static object lists
+ */
+ static_objects = END_OF_STATIC_LIST;
+ scavenged_static_objects = END_OF_STATIC_LIST;
+
+ /* Save the nursery if we're doing a two-space collection.
+ * g0s0->blocks will be used for to-space, so we need to get the
+ * nursery out of the way.
+ */
+ if (RtsFlags.GcFlags.generations == 1) {
+ saved_nursery = g0s0->blocks;
+ saved_n_blocks = g0s0->n_blocks;
+ g0s0->blocks = NULL;
+ g0s0->n_blocks = 0;
+ }
+
+ /* Keep a count of how many new blocks we allocated during this GC
+ * (used for resizing the allocation area, later).
+ */
+ new_blocks = 0;
+ new_scavd_blocks = 0;
+
+ // Initialise to-space in all the generations/steps that we're
+ // collecting.
+ //
+ for (g = 0; g <= N; g++) {
+
+ // throw away the mutable list. Invariant: the mutable list
+ // always has at least one block; this means we can avoid a check for
+ // NULL in recordMutable().
+ if (g != 0) {
+ freeChain(generations[g].mut_list);
+ generations[g].mut_list = allocBlock();
+ for (i = 0; i < n_capabilities; i++) {
+ freeChain(capabilities[i].mut_lists[g]);
+ capabilities[i].mut_lists[g] = allocBlock();
+ }
+ }
+
+ for (s = 0; s < generations[g].n_steps; s++) {
+
+ // generation 0, step 0 doesn't need to-space
+ if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
+ continue;
+ }
+
+ stp = &generations[g].steps[s];
+ ASSERT(stp->gen_no == g);
+
+ // start a new to-space for this step.
+ stp->old_blocks = stp->blocks;
+ stp->n_old_blocks = stp->n_blocks;
+
+ // allocate the first to-space block; extra blocks will be
+ // chained on as necessary.
+ stp->hp_bd = NULL;
+ bd = gc_alloc_block(stp);
+ stp->blocks = bd;
+ stp->n_blocks = 1;
+ stp->scan = bd->start;
+ stp->scan_bd = bd;
+
+ // allocate a block for "already scavenged" objects. This goes
+ // on the front of the stp->blocks list, so it won't be
+ // traversed by the scavenging sweep.
+ gc_alloc_scavd_block(stp);
+
+ // initialise the large object queues.
+ stp->new_large_objects = NULL;
+ stp->scavenged_large_objects = NULL;
+ stp->n_scavenged_large_blocks = 0;
+
+ // mark the large objects as not evacuated yet
+ for (bd = stp->large_objects; bd; bd = bd->link) {
+ bd->flags &= ~BF_EVACUATED;
+ }
+
+ // for a compacted step, we need to allocate the bitmap
+ if (stp->is_compacted) {
+ nat bitmap_size; // in bytes
+ bdescr *bitmap_bdescr;
+ StgWord *bitmap;
+
+ bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
+
+ if (bitmap_size > 0) {
+ bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
+ / BLOCK_SIZE);
+ stp->bitmap = bitmap_bdescr;
+ bitmap = bitmap_bdescr->start;
+
+ IF_DEBUG(gc, debugBelch("bitmap_size: %d, bitmap: %p",
+ bitmap_size, bitmap););
+
+ // don't forget to fill it with zeros!
+ memset(bitmap, 0, bitmap_size);
+
+ // For each block in this step, point to its bitmap from the
+ // block descriptor.
+ for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
+ bd->u.bitmap = bitmap;
+ bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
+
+ // Also at this point we set the BF_COMPACTED flag
+ // for this block. The invariant is that
+ // BF_COMPACTED is always unset, except during GC
+ // when it is set on those blocks which will be
+ // compacted.
+ bd->flags |= BF_COMPACTED;
+ }
+ }
+ }
+ }
+ }
+
+ /* make sure the older generations have at least one block to
+ * allocate into (this makes things easier for copy(), see below).
+ */
+ for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ stp = &generations[g].steps[s];
+ if (stp->hp_bd == NULL) {
+ ASSERT(stp->blocks == NULL);
+ bd = gc_alloc_block(stp);
+ stp->blocks = bd;
+ stp->n_blocks = 1;
+ }
+ if (stp->scavd_hp == NULL) {
+ gc_alloc_scavd_block(stp);
+ stp->n_blocks++;
+ }
+ /* Set the scan pointer for older generations: remember we
+ * still have to scavenge objects that have been promoted. */
+ stp->scan = stp->hp;
+ stp->scan_bd = stp->hp_bd;
+ stp->new_large_objects = NULL;
+ stp->scavenged_large_objects = NULL;
+ stp->n_scavenged_large_blocks = 0;
+ }
+
+ /* Move the private mutable lists from each capability onto the
+ * main mutable list for the generation.
+ */
+ for (i = 0; i < n_capabilities; i++) {
+ for (bd = capabilities[i].mut_lists[g];
+ bd->link != NULL; bd = bd->link) {
+ /* nothing */
+ }
+ bd->link = generations[g].mut_list;
+ generations[g].mut_list = capabilities[i].mut_lists[g];
+ capabilities[i].mut_lists[g] = allocBlock();
+ }
+ }
+
+ /* Allocate a mark stack if we're doing a major collection.
+ */
+ if (major_gc) {
+ mark_stack_bdescr = allocGroup(MARK_STACK_BLOCKS);
+ mark_stack = (StgPtr *)mark_stack_bdescr->start;
+ mark_sp = mark_stack;
+ mark_splim = mark_stack + (MARK_STACK_BLOCKS * BLOCK_SIZE_W);
+ } else {
+ mark_stack_bdescr = NULL;
+ }
+
+ eager_promotion = rtsTrue; // for now
+
+ /* -----------------------------------------------------------------------
+ * follow all the roots that we know about:
+ * - mutable lists from each generation > N
+ * we want to *scavenge* these roots, not evacuate them: they're not
+ * going to move in this GC.
+ * Also: do them in reverse generation order. This is because we
+ * often want to promote objects that are pointed to by older
+ * generations early, so we don't have to repeatedly copy them.
+ * Doing the generations in reverse order ensures that we don't end
+ * up in the situation where we want to evac an object to gen 3 and
+ * it has already been evaced to gen 2.
+ */
+ {
+ int st;
+ for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
+ generations[g].saved_mut_list = generations[g].mut_list;
+ generations[g].mut_list = allocBlock();
+ // mut_list always has at least one block.
+ }
+
+ for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
+ IF_PAR_DEBUG(verbose, printMutableList(&generations[g]));
+ scavenge_mutable_list(&generations[g]);
+ evac_gen = g;
+ for (st = generations[g].n_steps-1; st >= 0; st--) {
+ scavenge(&generations[g].steps[st]);
+ }
+ }
+ }
+
+ /* follow roots from the CAF list (used by GHCi)
+ */
+ evac_gen = 0;
+ markCAFs(mark_root);
+
+ /* follow all the roots that the application knows about.
+ */
+ evac_gen = 0;
+ get_roots(mark_root);
+
+#if defined(PAR)
+ /* And don't forget to mark the TSO if we got here direct from
+ * Haskell! */
+ /* Not needed in a seq version?
+ if (CurrentTSO) {
+ CurrentTSO = (StgTSO *)MarkRoot((StgClosure *)CurrentTSO);
+ }
+ */
+
+ // Mark the entries in the GALA table of the parallel system
+ markLocalGAs(major_gc);
+ // Mark all entries on the list of pending fetches
+ markPendingFetches(major_gc);
+#endif
+
+ /* Mark the weak pointer list, and prepare to detect dead weak
+ * pointers.
+ */
+ mark_weak_ptr_list(&weak_ptr_list);
+ old_weak_ptr_list = weak_ptr_list;
+ weak_ptr_list = NULL;
+ weak_stage = WeakPtrs;
+
+ /* The all_threads list is like the weak_ptr_list.
+ * See traverse_weak_ptr_list() for the details.
+ */
+ old_all_threads = all_threads;
+ all_threads = END_TSO_QUEUE;
+ resurrected_threads = END_TSO_QUEUE;
+
+ /* Mark the stable pointer table.
+ */
+ markStablePtrTable(mark_root);
+
+ /* -------------------------------------------------------------------------
+ * Repeatedly scavenge all the areas we know about until there's no
+ * more scavenging to be done.
+ */
+ {
+ rtsBool flag;
+ loop:
+ flag = rtsFalse;
+
+ // scavenge static objects
+ if (major_gc && static_objects != END_OF_STATIC_LIST) {
+ IF_DEBUG(sanity, checkStaticObjects(static_objects));
+ scavenge_static();
+ }
+
+ /* When scavenging the older generations: Objects may have been
+ * evacuated from generations <= N into older generations, and we
+ * need to scavenge these objects. We're going to try to ensure that
+ * any evacuations that occur move the objects into at least the
+ * same generation as the object being scavenged, otherwise we
+ * have to create new entries on the mutable list for the older
+ * generation.
+ */
+
+ // scavenge each step in generations 0..maxgen
+ {
+ long gen;
+ int st;
+
+ loop2:
+ // scavenge objects in compacted generation
+ if (mark_stack_overflowed || oldgen_scan_bd != NULL ||
+ (mark_stack_bdescr != NULL && !mark_stack_empty())) {
+ scavenge_mark_stack();
+ flag = rtsTrue;
+ }
+
+ for (gen = RtsFlags.GcFlags.generations; --gen >= 0; ) {
+ for (st = generations[gen].n_steps; --st >= 0; ) {
+ if (gen == 0 && st == 0 && RtsFlags.GcFlags.generations > 1) {
+ continue;
+ }
+ stp = &generations[gen].steps[st];
+ evac_gen = gen;
+ if (stp->hp_bd != stp->scan_bd || stp->scan < stp->hp) {
+ scavenge(stp);
+ flag = rtsTrue;
+ goto loop2;
+ }
+ if (stp->new_large_objects != NULL) {
+ scavenge_large(stp);
+ flag = rtsTrue;
+ goto loop2;
+ }
+ }
+ }
+ }
+
+ if (flag) { goto loop; }
+
+ // must be last... invariant is that everything is fully
+ // scavenged at this point.
+ if (traverse_weak_ptr_list()) { // returns rtsTrue if evaced something
+ goto loop;
+ }
+ }
+
+ /* Update the pointers from the task list - these are
+ * treated as weak pointers because we want to allow a main thread
+ * to get a BlockedOnDeadMVar exception in the same way as any other
+ * thread. Note that the threads should all have been retained by
+ * GC by virtue of being on the all_threads list, we're just
+ * updating pointers here.
+ */
+ {
+ Task *task;
+ StgTSO *tso;
+ for (task = all_tasks; task != NULL; task = task->all_link) {
+ if (!task->stopped && task->tso) {
+ ASSERT(task->tso->bound == task);
+ tso = (StgTSO *) isAlive((StgClosure *)task->tso);
+ if (tso == NULL) {
+ barf("task %p: main thread %d has been GC'd",
+#ifdef THREADED_RTS
+ (void *)task->id,
+#else
+ (void *)task,
+#endif
+ task->tso->id);
+ }
+ task->tso = tso;
+ }
+ }
+ }
+
+#if defined(PAR)
+ // Reconstruct the Global Address tables used in GUM
+ rebuildGAtables(major_gc);
+ IF_DEBUG(sanity, checkLAGAtable(rtsTrue/*check closures, too*/));
+#endif
+
+ // Now see which stable names are still alive.
+ gcStablePtrTable();
+
+ // Tidy the end of the to-space chains
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ stp = &generations[g].steps[s];
+ if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
+ ASSERT(Bdescr(stp->hp) == stp->hp_bd);
+ stp->hp_bd->free = stp->hp;
+ Bdescr(stp->scavd_hp)->free = stp->scavd_hp;
+ }
+ }
+ }
+
+#ifdef PROFILING
+ // We call processHeapClosureForDead() on every closure destroyed during
+ // the current garbage collection, so we invoke LdvCensusForDead().
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
+ || RtsFlags.ProfFlags.bioSelector != NULL)
+ LdvCensusForDead(N);
+#endif
+
+ // NO MORE EVACUATION AFTER THIS POINT!
+ // Finally: compaction of the oldest generation.
+ if (major_gc && oldest_gen->steps[0].is_compacted) {
+ // save number of blocks for stats
+ oldgen_saved_blocks = oldest_gen->steps[0].n_old_blocks;
+ compact(get_roots);
+ }
+
+ IF_DEBUG(sanity, checkGlobalTSOList(rtsFalse));
+
+ /* run through all the generations/steps and tidy up
+ */
+ copied = new_blocks * BLOCK_SIZE_W;
+ scavd_copied = new_scavd_blocks * BLOCK_SIZE_W;
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+
+ if (g <= N) {
+ generations[g].collections++; // for stats
+ }
+
+ // Count the mutable list as bytes "copied" for the purposes of
+ // stats. Every mutable list is copied during every GC.
+ if (g > 0) {
+ nat mut_list_size = 0;
+ for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
+ mut_list_size += bd->free - bd->start;
+ }
+ copied += mut_list_size;
+
+ IF_DEBUG(gc, debugBelch("mut_list_size: %ld (%d vars, %d arrays, %d others)\n", mut_list_size * sizeof(W_), mutlist_MUTVARS, mutlist_MUTARRS, mutlist_OTHERS));
+ }
+
+ for (s = 0; s < generations[g].n_steps; s++) {
+ bdescr *next;
+ stp = &generations[g].steps[s];
+
+ if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
+ // stats information: how much we copied
+ if (g <= N) {
+ copied -= stp->hp_bd->start + BLOCK_SIZE_W -
+ stp->hp_bd->free;
+ scavd_copied -= (P_)(BLOCK_ROUND_UP(stp->scavd_hp)) - stp->scavd_hp;
+ }
+ }
+
+ // for generations we collected...
+ if (g <= N) {
+
+ /* free old memory and shift to-space into from-space for all
+ * the collected steps (except the allocation area). These
+ * freed blocks will probaby be quickly recycled.
+ */
+ if (!(g == 0 && s == 0)) {
+ if (stp->is_compacted) {
+ // for a compacted step, just shift the new to-space
+ // onto the front of the now-compacted existing blocks.
+ for (bd = stp->blocks; bd != NULL; bd = bd->link) {
+ bd->flags &= ~BF_EVACUATED; // now from-space
+ }
+ // tack the new blocks on the end of the existing blocks
+ if (stp->old_blocks != NULL) {
+ for (bd = stp->old_blocks; bd != NULL; bd = next) {
+ // NB. this step might not be compacted next
+ // time, so reset the BF_COMPACTED flags.
+ // They are set before GC if we're going to
+ // compact. (search for BF_COMPACTED above).
+ bd->flags &= ~BF_COMPACTED;
+ next = bd->link;
+ if (next == NULL) {
+ bd->link = stp->blocks;
+ }
+ }
+ stp->blocks = stp->old_blocks;
+ }
+ // add the new blocks to the block tally
+ stp->n_blocks += stp->n_old_blocks;
+ ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
+ } else {
+ freeChain(stp->old_blocks);
+ for (bd = stp->blocks; bd != NULL; bd = bd->link) {
+ bd->flags &= ~BF_EVACUATED; // now from-space
+ }
+ }
+ stp->old_blocks = NULL;
+ stp->n_old_blocks = 0;
+ }
+
+ /* LARGE OBJECTS. The current live large objects are chained on
+ * scavenged_large, having been moved during garbage
+ * collection from large_objects. Any objects left on
+ * large_objects list are therefore dead, so we free them here.
+ */
+ for (bd = stp->large_objects; bd != NULL; bd = next) {
+ next = bd->link;
+ freeGroup(bd);
+ bd = next;
+ }
+
+ // update the count of blocks used by large objects
+ for (bd = stp->scavenged_large_objects; bd != NULL; bd = bd->link) {
+ bd->flags &= ~BF_EVACUATED;
+ }
+ stp->large_objects = stp->scavenged_large_objects;
+ stp->n_large_blocks = stp->n_scavenged_large_blocks;
+
+ } else {
+ // for older generations...
+
+ /* For older generations, we need to append the
+ * scavenged_large_object list (i.e. large objects that have been
+ * promoted during this GC) to the large_object list for that step.
+ */
+ for (bd = stp->scavenged_large_objects; bd; bd = next) {
+ next = bd->link;
+ bd->flags &= ~BF_EVACUATED;
+ dbl_link_onto(bd, &stp->large_objects);
+ }
+
+ // add the new blocks we promoted during this GC
+ stp->n_large_blocks += stp->n_scavenged_large_blocks;
+ }
+ }
+ }
+
+ /* Reset the sizes of the older generations when we do a major
+ * collection.
+ *
+ * CURRENT STRATEGY: make all generations except zero the same size.
+ * We have to stay within the maximum heap size, and leave a certain
+ * percentage of the maximum heap size available to allocate into.
+ */
+ if (major_gc && RtsFlags.GcFlags.generations > 1) {
+ nat live, size, min_alloc;
+ nat max = RtsFlags.GcFlags.maxHeapSize;
+ nat gens = RtsFlags.GcFlags.generations;
+
+ // live in the oldest generations
+ live = oldest_gen->steps[0].n_blocks +
+ oldest_gen->steps[0].n_large_blocks;
+
+ // default max size for all generations except zero
+ size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
+ RtsFlags.GcFlags.minOldGenSize);
+
+ // minimum size for generation zero
+ min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
+ RtsFlags.GcFlags.minAllocAreaSize);
+
+ // Auto-enable compaction when the residency reaches a
+ // certain percentage of the maximum heap size (default: 30%).
+ if (RtsFlags.GcFlags.generations > 1 &&
+ (RtsFlags.GcFlags.compact ||
+ (max > 0 &&
+ oldest_gen->steps[0].n_blocks >
+ (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
+ oldest_gen->steps[0].is_compacted = 1;
+// debugBelch("compaction: on\n", live);
+ } else {
+ oldest_gen->steps[0].is_compacted = 0;
+// debugBelch("compaction: off\n", live);
+ }
+
+ // if we're going to go over the maximum heap size, reduce the
+ // size of the generations accordingly. The calculation is
+ // different if compaction is turned on, because we don't need
+ // to double the space required to collect the old generation.
+ if (max != 0) {
+
+ // this test is necessary to ensure that the calculations
+ // below don't have any negative results - we're working
+ // with unsigned values here.
+ if (max < min_alloc) {
+ heapOverflow();
+ }
+
+ if (oldest_gen->steps[0].is_compacted) {
+ if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
+ size = (max - min_alloc) / ((gens - 1) * 2 - 1);
+ }
+ } else {
+ if ( (size * (gens - 1) * 2) + min_alloc > max ) {
+ size = (max - min_alloc) / ((gens - 1) * 2);
+ }
+ }
+
+ if (size < live) {
+ heapOverflow();
+ }
+ }
+
+#if 0
+ debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
+ min_alloc, size, max);
+#endif
+
+ for (g = 0; g < gens; g++) {
+ generations[g].max_blocks = size;
+ }
+ }
+
+ // Guess the amount of live data for stats.
+ live = calcLive();
+
+ /* Free the small objects allocated via allocate(), since this will
+ * all have been copied into G0S1 now.
+ */
+ if (small_alloc_list != NULL) {
+ freeChain(small_alloc_list);
+ }
+ small_alloc_list = NULL;
+ alloc_blocks = 0;
+ alloc_Hp = NULL;
+ alloc_HpLim = NULL;
+ alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
+
+ // Start a new pinned_object_block
+ pinned_object_block = NULL;
+
+ /* Free the mark stack.
+ */
+ if (mark_stack_bdescr != NULL) {
+ freeGroup(mark_stack_bdescr);
+ }
+
+ /* Free any bitmaps.
+ */
+ for (g = 0; g <= N; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ stp = &generations[g].steps[s];
+ if (stp->bitmap != NULL) {
+ freeGroup(stp->bitmap);
+ stp->bitmap = NULL;
+ }
+ }
+ }
+
+ /* Two-space collector:
+ * Free the old to-space, and estimate the amount of live data.
+ */
+ if (RtsFlags.GcFlags.generations == 1) {
+ nat blocks;
+
+ if (g0s0->old_blocks != NULL) {
+ freeChain(g0s0->old_blocks);
+ }
+ for (bd = g0s0->blocks; bd != NULL; bd = bd->link) {
+ bd->flags = 0; // now from-space
+ }
+ g0s0->old_blocks = g0s0->blocks;
+ g0s0->n_old_blocks = g0s0->n_blocks;
+ g0s0->blocks = saved_nursery;
+ g0s0->n_blocks = saved_n_blocks;
+
+ /* For a two-space collector, we need to resize the nursery. */
+
+ /* set up a new nursery. Allocate a nursery size based on a
+ * function of the amount of live data (by default a factor of 2)
+ * Use the blocks from the old nursery if possible, freeing up any
+ * left over blocks.
+ *
+ * If we get near the maximum heap size, then adjust our nursery
+ * size accordingly. If the nursery is the same size as the live
+ * data (L), then we need 3L bytes. We can reduce the size of the
+ * nursery to bring the required memory down near 2L bytes.
+ *
+ * A normal 2-space collector would need 4L bytes to give the same
+ * performance we get from 3L bytes, reducing to the same
+ * performance at 2L bytes.
+ */
+ blocks = g0s0->n_old_blocks;
+
+ if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
+ blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
+ RtsFlags.GcFlags.maxHeapSize ) {
+ long adjusted_blocks; // signed on purpose
+ int pc_free;
+
+ adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
+ IF_DEBUG(gc, debugBelch("@@ Near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld", RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks));
+ pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
+ if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even be < 0 */ {
+ heapOverflow();
+ }
+ blocks = adjusted_blocks;
+
+ } else {
+ blocks *= RtsFlags.GcFlags.oldGenFactor;
+ if (blocks < RtsFlags.GcFlags.minAllocAreaSize) {
+ blocks = RtsFlags.GcFlags.minAllocAreaSize;
+ }
+ }
+ resizeNurseries(blocks);
+
+ } else {
+ /* Generational collector:
+ * If the user has given us a suggested heap size, adjust our
+ * allocation area to make best use of the memory available.
+ */
+
+ if (RtsFlags.GcFlags.heapSizeSuggestion) {
+ long blocks;
+ nat needed = calcNeeded(); // approx blocks needed at next GC
+
+ /* Guess how much will be live in generation 0 step 0 next time.
+ * A good approximation is obtained by finding the
+ * percentage of g0s0 that was live at the last minor GC.
+ */
+ if (N == 0) {
+ g0s0_pcnt_kept = (new_blocks * 100) / countNurseryBlocks();
+ }
+
+ /* Estimate a size for the allocation area based on the
+ * information available. We might end up going slightly under
+ * or over the suggested heap size, but we should be pretty
+ * close on average.
+ *
+ * Formula: suggested - needed
+ * ----------------------------
+ * 1 + g0s0_pcnt_kept/100
+ *
+ * where 'needed' is the amount of memory needed at the next
+ * collection for collecting all steps except g0s0.
+ */
+ blocks =
+ (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
+ (100 + (long)g0s0_pcnt_kept);
+
+ if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) {
+ blocks = RtsFlags.GcFlags.minAllocAreaSize;
+ }
+
+ resizeNurseries((nat)blocks);
+
+ } else {
+ // we might have added extra large blocks to the nursery, so
+ // resize back to minAllocAreaSize again.
+ resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
+ }
+ }
+
+ // mark the garbage collected CAFs as dead
+#if 0 && defined(DEBUG) // doesn't work at the moment
+ if (major_gc) { gcCAFs(); }
+#endif
+
+#ifdef PROFILING
+ // resetStaticObjectForRetainerProfiling() must be called before
+ // zeroing below.
+ resetStaticObjectForRetainerProfiling();
+#endif
+
+ // zero the scavenged static object list
+ if (major_gc) {
+ zero_static_object_list(scavenged_static_objects);
+ }
+
+ // Reset the nursery
+ resetNurseries();
+
+ // start any pending finalizers
+ RELEASE_SM_LOCK;
+ scheduleFinalizers(last_free_capability, old_weak_ptr_list);
+ ACQUIRE_SM_LOCK;
+
+ // send exceptions to any threads which were about to die
+ RELEASE_SM_LOCK;
+ resurrectThreads(resurrected_threads);
+ ACQUIRE_SM_LOCK;
+
+ // Update the stable pointer hash table.
+ updateStablePtrTable(major_gc);
+
+ // check sanity after GC
+ IF_DEBUG(sanity, checkSanity());
+
+ // extra GC trace info
+ IF_DEBUG(gc, statDescribeGens());
+
+#ifdef DEBUG
+ // symbol-table based profiling
+ /* heapCensus(to_blocks); */ /* ToDo */
+#endif
+
+ // restore enclosing cost centre
+#ifdef PROFILING
+ CCCS = prev_CCS;
+#endif
+
+#ifdef DEBUG
+ // check for memory leaks if DEBUG is on
+ memInventory();
+#endif
+
+#ifdef RTS_GTK_FRONTPANEL
+ if (RtsFlags.GcFlags.frontpanel) {
+ updateFrontPanelAfterGC( N, live );
+ }
+#endif
+
+ // ok, GC over: tell the stats department what happened.
+ stat_endGC(allocated, live, copied, scavd_copied, N);
+
+#if defined(RTS_USER_SIGNALS)
+ // unblock signals again
+ unblockUserSignals();
+#endif
+
+ RELEASE_SM_LOCK;
+
+ //PAR_TICKY_TP();
+}
+
+
+/* -----------------------------------------------------------------------------
+ Weak Pointers
+
+ traverse_weak_ptr_list is called possibly many times during garbage
+ collection. It returns a flag indicating whether it did any work
+ (i.e. called evacuate on any live pointers).
+
+ Invariant: traverse_weak_ptr_list is called when the heap is in an
+ idempotent state. That means that there are no pending
+ evacuate/scavenge operations. This invariant helps the weak
+ pointer code decide which weak pointers are dead - if there are no
+ new live weak pointers, then all the currently unreachable ones are
+ dead.
+
+ For generational GC: we just don't try to finalize weak pointers in
+ older generations than the one we're collecting. This could
+ probably be optimised by keeping per-generation lists of weak
+ pointers, but for a few weak pointers this scheme will work.
+
+ There are three distinct stages to processing weak pointers:
+
+ - weak_stage == WeakPtrs
+
+ We process all the weak pointers whos keys are alive (evacuate
+ their values and finalizers), and repeat until we can find no new
+ live keys. If no live keys are found in this pass, then we
+ evacuate the finalizers of all the dead weak pointers in order to
+ run them.
+
+ - weak_stage == WeakThreads
+
+ Now, we discover which *threads* are still alive. Pointers to
+ threads from the all_threads and main thread lists are the
+ weakest of all: a pointers from the finalizer of a dead weak
+ pointer can keep a thread alive. Any threads found to be unreachable
+ are evacuated and placed on the resurrected_threads list so we
+ can send them a signal later.
+
+ - weak_stage == WeakDone
+
+ No more evacuation is done.
+
+ -------------------------------------------------------------------------- */
+
+static rtsBool
+traverse_weak_ptr_list(void)
+{
+ StgWeak *w, **last_w, *next_w;
+ StgClosure *new;
+ rtsBool flag = rtsFalse;
+
+ switch (weak_stage) {
+
+ case WeakDone:
+ return rtsFalse;
+
+ case WeakPtrs:
+ /* doesn't matter where we evacuate values/finalizers to, since
+ * these pointers are treated as roots (iff the keys are alive).
+ */
+ evac_gen = 0;
+
+ last_w = &old_weak_ptr_list;
+ for (w = old_weak_ptr_list; w != NULL; w = next_w) {
+
+ /* There might be a DEAD_WEAK on the list if finalizeWeak# was
+ * called on a live weak pointer object. Just remove it.
+ */
+ if (w->header.info == &stg_DEAD_WEAK_info) {
+ next_w = ((StgDeadWeak *)w)->link;
+ *last_w = next_w;
+ continue;
+ }
+
+ switch (get_itbl(w)->type) {
+
+ case EVACUATED:
+ next_w = (StgWeak *)((StgEvacuated *)w)->evacuee;
+ *last_w = next_w;
+ continue;
+
+ case WEAK:
+ /* Now, check whether the key is reachable.
+ */
+ new = isAlive(w->key);
+ if (new != NULL) {
+ w->key = new;
+ // evacuate the value and finalizer
+ w->value = evacuate(w->value);
+ w->finalizer = evacuate(w->finalizer);
+ // remove this weak ptr from the old_weak_ptr list
+ *last_w = w->link;
+ // and put it on the new weak ptr list
+ next_w = w->link;
+ w->link = weak_ptr_list;
+ weak_ptr_list = w;
+ flag = rtsTrue;
+ IF_DEBUG(weak, debugBelch("Weak pointer still alive at %p -> %p",
+ w, w->key));
+ continue;
+ }
+ else {
+ last_w = &(w->link);
+ next_w = w->link;
+ continue;
+ }
+
+ default:
+ barf("traverse_weak_ptr_list: not WEAK");
+ }
+ }
+
+ /* If we didn't make any changes, then we can go round and kill all
+ * the dead weak pointers. The old_weak_ptr list is used as a list
+ * of pending finalizers later on.
+ */
+ if (flag == rtsFalse) {
+ for (w = old_weak_ptr_list; w; w = w->link) {
+ w->finalizer = evacuate(w->finalizer);
+ }
+
+ // Next, move to the WeakThreads stage after fully
+ // scavenging the finalizers we've just evacuated.
+ weak_stage = WeakThreads;
+ }
+
+ return rtsTrue;
+
+ case WeakThreads:
+ /* Now deal with the all_threads list, which behaves somewhat like
+ * the weak ptr list. If we discover any threads that are about to
+ * become garbage, we wake them up and administer an exception.
+ */
+ {
+ StgTSO *t, *tmp, *next, **prev;
+
+ prev = &old_all_threads;
+ for (t = old_all_threads; t != END_TSO_QUEUE; t = next) {
+
+ tmp = (StgTSO *)isAlive((StgClosure *)t);
+
+ if (tmp != NULL) {
+ t = tmp;
+ }
+
+ ASSERT(get_itbl(t)->type == TSO);
+ switch (t->what_next) {
+ case ThreadRelocated:
+ next = t->link;
+ *prev = next;
+ continue;
+ case ThreadKilled:
+ case ThreadComplete:
+ // finshed or died. The thread might still be alive, but we
+ // don't keep it on the all_threads list. Don't forget to
+ // stub out its global_link field.
+ next = t->global_link;
+ t->global_link = END_TSO_QUEUE;
+ *prev = next;
+ continue;
+ default:
+ ;
+ }
+
+ // Threads blocked on black holes: if the black hole
+ // is alive, then the thread is alive too.
+ if (tmp == NULL && t->why_blocked == BlockedOnBlackHole) {
+ if (isAlive(t->block_info.closure)) {
+ t = (StgTSO *)evacuate((StgClosure *)t);
+ tmp = t;
+ flag = rtsTrue;
+ }
+ }
+
+ if (tmp == NULL) {
+ // not alive (yet): leave this thread on the
+ // old_all_threads list.
+ prev = &(t->global_link);
+ next = t->global_link;
+ }
+ else {
+ // alive: move this thread onto the all_threads list.
+ next = t->global_link;
+ t->global_link = all_threads;
+ all_threads = t;
+ *prev = next;
+ }
+ }
+ }
+
+ /* If we evacuated any threads, we need to go back to the scavenger.
+ */
+ if (flag) return rtsTrue;
+
+ /* And resurrect any threads which were about to become garbage.
+ */
+ {
+ StgTSO *t, *tmp, *next;
+ for (t = old_all_threads; t != END_TSO_QUEUE; t = next) {
+ next = t->global_link;
+ tmp = (StgTSO *)evacuate((StgClosure *)t);
+ tmp->global_link = resurrected_threads;
+ resurrected_threads = tmp;
+ }
+ }
+
+ /* Finally, we can update the blackhole_queue. This queue
+ * simply strings together TSOs blocked on black holes, it is
+ * not intended to keep anything alive. Hence, we do not follow
+ * pointers on the blackhole_queue until now, when we have
+ * determined which TSOs are otherwise reachable. We know at
+ * this point that all TSOs have been evacuated, however.
+ */
+ {
+ StgTSO **pt;
+ for (pt = &blackhole_queue; *pt != END_TSO_QUEUE; pt = &((*pt)->link)) {
+ *pt = (StgTSO *)isAlive((StgClosure *)*pt);
+ ASSERT(*pt != NULL);
+ }
+ }
+
+ weak_stage = WeakDone; // *now* we're done,
+ return rtsTrue; // but one more round of scavenging, please
+
+ default:
+ barf("traverse_weak_ptr_list");
+ return rtsTrue;
+ }
+
+}
+
+/* -----------------------------------------------------------------------------
+ After GC, the live weak pointer list may have forwarding pointers
+ on it, because a weak pointer object was evacuated after being
+ moved to the live weak pointer list. We remove those forwarding
+ pointers here.
+
+ Also, we don't consider weak pointer objects to be reachable, but
+ we must nevertheless consider them to be "live" and retain them.
+ Therefore any weak pointer objects which haven't as yet been
+ evacuated need to be evacuated now.
+ -------------------------------------------------------------------------- */
+
+
+static void
+mark_weak_ptr_list ( StgWeak **list )
+{
+ StgWeak *w, **last_w;
+
+ last_w = list;
+ for (w = *list; w; w = w->link) {
+ // w might be WEAK, EVACUATED, or DEAD_WEAK (actually CON_STATIC) here
+ ASSERT(w->header.info == &stg_DEAD_WEAK_info
+ || get_itbl(w)->type == WEAK || get_itbl(w)->type == EVACUATED);
+ w = (StgWeak *)evacuate((StgClosure *)w);
+ *last_w = w;
+ last_w = &(w->link);
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ isAlive determines whether the given closure is still alive (after
+ a garbage collection) or not. It returns the new address of the
+ closure if it is alive, or NULL otherwise.
+
+ NOTE: Use it before compaction only!
+ -------------------------------------------------------------------------- */
+
+
+StgClosure *
+isAlive(StgClosure *p)
+{
+ const StgInfoTable *info;
+ bdescr *bd;
+
+ while (1) {
+
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
+ info = get_itbl(p);
+
+ // ignore static closures
+ //
+ // ToDo: for static closures, check the static link field.
+ // Problem here is that we sometimes don't set the link field, eg.
+ // for static closures with an empty SRT or CONSTR_STATIC_NOCAFs.
+ //
+ if (!HEAP_ALLOCED(p)) {
+ return p;
+ }
+
+ // ignore closures in generations that we're not collecting.
+ bd = Bdescr((P_)p);
+ if (bd->gen_no > N) {
+ return p;
+ }
+
+ // if it's a pointer into to-space, then we're done
+ if (bd->flags & BF_EVACUATED) {
+ return p;
+ }
+
+ // large objects use the evacuated flag
+ if (bd->flags & BF_LARGE) {
+ return NULL;
+ }
+
+ // check the mark bit for compacted steps
+ if ((bd->flags & BF_COMPACTED) && is_marked((P_)p,bd)) {
+ return p;
+ }
+
+ switch (info->type) {
+
+ case IND:
+ case IND_STATIC:
+ case IND_PERM:
+ case IND_OLDGEN: // rely on compatible layout with StgInd
+ case IND_OLDGEN_PERM:
+ // follow indirections
+ p = ((StgInd *)p)->indirectee;
+ continue;
+
+ case EVACUATED:
+ // alive!
+ return ((StgEvacuated *)p)->evacuee;
+
+ case TSO:
+ if (((StgTSO *)p)->what_next == ThreadRelocated) {
+ p = (StgClosure *)((StgTSO *)p)->link;
+ continue;
+ }
+ return NULL;
+
+ default:
+ // dead.
+ return NULL;
+ }
+ }
+}
+
+static void
+mark_root(StgClosure **root)
+{
+ *root = evacuate(*root);
+}
+
+STATIC_INLINE void
+upd_evacuee(StgClosure *p, StgClosure *dest)
+{
+ // not true: (ToDo: perhaps it should be)
+ // ASSERT(Bdescr((P_)dest)->flags & BF_EVACUATED);
+ SET_INFO(p, &stg_EVACUATED_info);
+ ((StgEvacuated *)p)->evacuee = dest;
+}
+
+
+STATIC_INLINE StgClosure *
+copy(StgClosure *src, nat size, step *stp)
+{
+ StgPtr to, from;
+ nat i;
+#ifdef PROFILING
+ // @LDV profiling
+ nat size_org = size;
+#endif
+
+ TICK_GC_WORDS_COPIED(size);
+ /* Find out where we're going, using the handy "to" pointer in
+ * the step of the source object. If it turns out we need to
+ * evacuate to an older generation, adjust it here (see comment
+ * by evacuate()).
+ */
+ if (stp->gen_no < evac_gen) {
+ if (eager_promotion) {
+ stp = &generations[evac_gen].steps[0];
+ } else {
+ failed_to_evac = rtsTrue;
+ }
+ }
+
+ /* chain a new block onto the to-space for the destination step if
+ * necessary.
+ */
+ if (stp->hp + size >= stp->hpLim) {
+ gc_alloc_block(stp);
+ }
+
+ to = stp->hp;
+ from = (StgPtr)src;
+ stp->hp = to + size;
+ for (i = 0; i < size; i++) { // unroll for small i
+ to[i] = from[i];
+ }
+ upd_evacuee((StgClosure *)from,(StgClosure *)to);
+
+#ifdef PROFILING
+ // We store the size of the just evacuated object in the LDV word so that
+ // the profiler can guess the position of the next object later.
+ SET_EVACUAEE_FOR_LDV(from, size_org);
+#endif
+ return (StgClosure *)to;
+}
+
+// Same as copy() above, except the object will be allocated in memory
+// that will not be scavenged. Used for object that have no pointer
+// fields.
+STATIC_INLINE StgClosure *
+copy_noscav(StgClosure *src, nat size, step *stp)
+{
+ StgPtr to, from;
+ nat i;
+#ifdef PROFILING
+ // @LDV profiling
+ nat size_org = size;
+#endif
+
+ TICK_GC_WORDS_COPIED(size);
+ /* Find out where we're going, using the handy "to" pointer in
+ * the step of the source object. If it turns out we need to
+ * evacuate to an older generation, adjust it here (see comment
+ * by evacuate()).
+ */
+ if (stp->gen_no < evac_gen) {
+ if (eager_promotion) {
+ stp = &generations[evac_gen].steps[0];
+ } else {
+ failed_to_evac = rtsTrue;
+ }
+ }
+
+ /* chain a new block onto the to-space for the destination step if
+ * necessary.
+ */
+ if (stp->scavd_hp + size >= stp->scavd_hpLim) {
+ gc_alloc_scavd_block(stp);
+ }
+
+ to = stp->scavd_hp;
+ from = (StgPtr)src;
+ stp->scavd_hp = to + size;
+ for (i = 0; i < size; i++) { // unroll for small i
+ to[i] = from[i];
+ }
+ upd_evacuee((StgClosure *)from,(StgClosure *)to);
+
+#ifdef PROFILING
+ // We store the size of the just evacuated object in the LDV word so that
+ // the profiler can guess the position of the next object later.
+ SET_EVACUAEE_FOR_LDV(from, size_org);
+#endif
+ return (StgClosure *)to;
+}
+
+/* Special version of copy() for when we only want to copy the info
+ * pointer of an object, but reserve some padding after it. This is
+ * used to optimise evacuation of BLACKHOLEs.
+ */
+
+
+static StgClosure *
+copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp)
+{
+ P_ dest, to, from;
+#ifdef PROFILING
+ // @LDV profiling
+ nat size_to_copy_org = size_to_copy;
+#endif
+
+ TICK_GC_WORDS_COPIED(size_to_copy);
+ if (stp->gen_no < evac_gen) {
+ if (eager_promotion) {
+ stp = &generations[evac_gen].steps[0];
+ } else {
+ failed_to_evac = rtsTrue;
+ }
+ }
+
+ if (stp->hp + size_to_reserve >= stp->hpLim) {
+ gc_alloc_block(stp);
+ }
+
+ for(to = stp->hp, from = (P_)src; size_to_copy>0; --size_to_copy) {
+ *to++ = *from++;
+ }
+
+ dest = stp->hp;
+ stp->hp += size_to_reserve;
+ upd_evacuee(src,(StgClosure *)dest);
+#ifdef PROFILING
+ // We store the size of the just evacuated object in the LDV word so that
+ // the profiler can guess the position of the next object later.
+ // size_to_copy_org is wrong because the closure already occupies size_to_reserve
+ // words.
+ SET_EVACUAEE_FOR_LDV(src, size_to_reserve);
+ // fill the slop
+ if (size_to_reserve - size_to_copy_org > 0)
+ LDV_FILL_SLOP(stp->hp - 1, (int)(size_to_reserve - size_to_copy_org));
+#endif
+ return (StgClosure *)dest;
+}
+
+
+/* -----------------------------------------------------------------------------
+ Evacuate a large object
+
+ This just consists of removing the object from the (doubly-linked)
+ step->large_objects list, and linking it on to the (singly-linked)
+ step->new_large_objects list, from where it will be scavenged later.
+
+ Convention: bd->flags has BF_EVACUATED set for a large object
+ that has been evacuated, or unset otherwise.
+ -------------------------------------------------------------------------- */
+
+
+STATIC_INLINE void
+evacuate_large(StgPtr p)
+{
+ bdescr *bd = Bdescr(p);
+ step *stp;
+
+ // object must be at the beginning of the block (or be a ByteArray)
+ ASSERT(get_itbl((StgClosure *)p)->type == ARR_WORDS ||
+ (((W_)p & BLOCK_MASK) == 0));
+
+ // already evacuated?
+ if (bd->flags & BF_EVACUATED) {
+ /* Don't forget to set the failed_to_evac flag if we didn't get
+ * the desired destination (see comments in evacuate()).
+ */
+ if (bd->gen_no < evac_gen) {
+ failed_to_evac = rtsTrue;
+ TICK_GC_FAILED_PROMOTION();
+ }
+ return;
+ }
+
+ stp = bd->step;
+ // remove from large_object list
+ if (bd->u.back) {
+ bd->u.back->link = bd->link;
+ } else { // first object in the list
+ stp->large_objects = bd->link;
+ }
+ if (bd->link) {
+ bd->link->u.back = bd->u.back;
+ }
+
+ /* link it on to the evacuated large object list of the destination step
+ */
+ stp = bd->step->to;
+ if (stp->gen_no < evac_gen) {
+ if (eager_promotion) {
+ stp = &generations[evac_gen].steps[0];
+ } else {
+ failed_to_evac = rtsTrue;
+ }
+ }
+
+ bd->step = stp;
+ bd->gen_no = stp->gen_no;
+ bd->link = stp->new_large_objects;
+ stp->new_large_objects = bd;
+ bd->flags |= BF_EVACUATED;
+}
+
+/* -----------------------------------------------------------------------------
+ Evacuate
+
+ This is called (eventually) for every live object in the system.
+
+ The caller to evacuate specifies a desired generation in the
+ evac_gen global variable. The following conditions apply to
+ evacuating an object which resides in generation M when we're
+ collecting up to generation N
+
+ if M >= evac_gen
+ if M > N do nothing
+ else evac to step->to
+
+ if M < evac_gen evac to evac_gen, step 0
+
+ if the object is already evacuated, then we check which generation
+ it now resides in.
+
+ if M >= evac_gen do nothing
+ if M < evac_gen set failed_to_evac flag to indicate that we
+ didn't manage to evacuate this object into evac_gen.
+
+
+ OPTIMISATION NOTES:
+
+ evacuate() is the single most important function performance-wise
+ in the GC. Various things have been tried to speed it up, but as
+ far as I can tell the code generated by gcc 3.2 with -O2 is about
+ as good as it's going to get. We pass the argument to evacuate()
+ in a register using the 'regparm' attribute (see the prototype for
+ evacuate() near the top of this file).
+
+ Changing evacuate() to take an (StgClosure **) rather than
+ returning the new pointer seems attractive, because we can avoid
+ writing back the pointer when it hasn't changed (eg. for a static
+ object, or an object in a generation > N). However, I tried it and
+ it doesn't help. One reason is that the (StgClosure **) pointer
+ gets spilled to the stack inside evacuate(), resulting in far more
+ extra reads/writes than we save.
+ -------------------------------------------------------------------------- */
+
+REGPARM1 static StgClosure *
+evacuate(StgClosure *q)
+{
+#if defined(PAR)
+ StgClosure *to;
+#endif
+ bdescr *bd = NULL;
+ step *stp;
+ const StgInfoTable *info;
+
+loop:
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
+
+ if (!HEAP_ALLOCED(q)) {
+
+ if (!major_gc) return q;
+
+ info = get_itbl(q);
+ switch (info->type) {
+
+ case THUNK_STATIC:
+ if (info->srt_bitmap != 0 &&
+ *THUNK_STATIC_LINK((StgClosure *)q) == NULL) {
+ *THUNK_STATIC_LINK((StgClosure *)q) = static_objects;
+ static_objects = (StgClosure *)q;
+ }
+ return q;
+
+ case FUN_STATIC:
+ if (info->srt_bitmap != 0 &&
+ *FUN_STATIC_LINK((StgClosure *)q) == NULL) {
+ *FUN_STATIC_LINK((StgClosure *)q) = static_objects;
+ static_objects = (StgClosure *)q;
+ }
+ return q;
+
+ case IND_STATIC:
+ /* If q->saved_info != NULL, then it's a revertible CAF - it'll be
+ * on the CAF list, so don't do anything with it here (we'll
+ * scavenge it later).
+ */
+ if (((StgIndStatic *)q)->saved_info == NULL
+ && *IND_STATIC_LINK((StgClosure *)q) == NULL) {
+ *IND_STATIC_LINK((StgClosure *)q) = static_objects;
+ static_objects = (StgClosure *)q;
+ }
+ return q;
+
+ case CONSTR_STATIC:
+ if (*STATIC_LINK(info,(StgClosure *)q) == NULL) {
+ *STATIC_LINK(info,(StgClosure *)q) = static_objects;
+ static_objects = (StgClosure *)q;
+ }
+ return q;
+
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ /* no need to put these on the static linked list, they don't need
+ * to be scavenged.
+ */
+ return q;
+
+ default:
+ barf("evacuate(static): strange closure type %d", (int)(info->type));
+ }
+ }
+
+ bd = Bdescr((P_)q);
+
+ if (bd->gen_no > N) {
+ /* Can't evacuate this object, because it's in a generation
+ * older than the ones we're collecting. Let's hope that it's
+ * in evac_gen or older, or we will have to arrange to track
+ * this pointer using the mutable list.
+ */
+ if (bd->gen_no < evac_gen) {
+ // nope
+ failed_to_evac = rtsTrue;
+ TICK_GC_FAILED_PROMOTION();
+ }
+ return q;
+ }
+
+ if ((bd->flags & (BF_LARGE | BF_COMPACTED | BF_EVACUATED)) != 0) {
+
+ /* pointer into to-space: just return it. This normally
+ * shouldn't happen, but alllowing it makes certain things
+ * slightly easier (eg. the mutable list can contain the same
+ * object twice, for example).
+ */
+ if (bd->flags & BF_EVACUATED) {
+ if (bd->gen_no < evac_gen) {
+ failed_to_evac = rtsTrue;
+ TICK_GC_FAILED_PROMOTION();
+ }
+ return q;
+ }
+
+ /* evacuate large objects by re-linking them onto a different list.
+ */
+ if (bd->flags & BF_LARGE) {
+ info = get_itbl(q);
+ if (info->type == TSO &&
+ ((StgTSO *)q)->what_next == ThreadRelocated) {
+ q = (StgClosure *)((StgTSO *)q)->link;
+ goto loop;
+ }
+ evacuate_large((P_)q);
+ return q;
+ }
+
+ /* If the object is in a step that we're compacting, then we
+ * need to use an alternative evacuate procedure.
+ */
+ if (bd->flags & BF_COMPACTED) {
+ if (!is_marked((P_)q,bd)) {
+ mark((P_)q,bd);
+ if (mark_stack_full()) {
+ mark_stack_overflowed = rtsTrue;
+ reset_mark_stack();
+ }
+ push_mark_stack((P_)q);
+ }
+ return q;
+ }
+ }
+
+ stp = bd->step->to;
+
+ info = get_itbl(q);
+
+ switch (info->type) {
+
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ case MVAR:
+ return copy(q,sizeW_fromITBL(info),stp);
+
+ case CONSTR_0_1:
+ {
+ StgWord w = (StgWord)q->payload[0];
+ if (q->header.info == Czh_con_info &&
+ // unsigned, so always true: (StgChar)w >= MIN_CHARLIKE &&
+ (StgChar)w <= MAX_CHARLIKE) {
+ return (StgClosure *)CHARLIKE_CLOSURE((StgChar)w);
+ }
+ if (q->header.info == Izh_con_info &&
+ (StgInt)w >= MIN_INTLIKE && (StgInt)w <= MAX_INTLIKE) {
+ return (StgClosure *)INTLIKE_CLOSURE((StgInt)w);
+ }
+ // else
+ return copy_noscav(q,sizeofW(StgHeader)+1,stp);
+ }
+
+ case FUN_0_1:
+ case FUN_1_0:
+ case CONSTR_1_0:
+ return copy(q,sizeofW(StgHeader)+1,stp);
+
+ case THUNK_1_0:
+ case THUNK_0_1:
+ return copy(q,sizeofW(StgThunk)+1,stp);
+
+ case THUNK_1_1:
+ case THUNK_2_0:
+ case THUNK_0_2:
+#ifdef NO_PROMOTE_THUNKS
+ if (bd->gen_no == 0 &&
+ bd->step->no != 0 &&
+ bd->step->no == generations[bd->gen_no].n_steps-1) {
+ stp = bd->step;
+ }
+#endif
+ return copy(q,sizeofW(StgThunk)+2,stp);
+
+ case FUN_1_1:
+ case FUN_2_0:
+ case CONSTR_1_1:
+ case CONSTR_2_0:
+ case FUN_0_2:
+ return copy(q,sizeofW(StgHeader)+2,stp);
+
+ case CONSTR_0_2:
+ return copy_noscav(q,sizeofW(StgHeader)+2,stp);
+
+ case THUNK:
+ return copy(q,thunk_sizeW_fromITBL(info),stp);
+
+ case FUN:
+ case CONSTR:
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ case WEAK:
+ case STABLE_NAME:
+ return copy(q,sizeW_fromITBL(info),stp);
+
+ case BCO:
+ return copy(q,bco_sizeW((StgBCO *)q),stp);
+
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case SE_BLACKHOLE:
+ case BLACKHOLE:
+ return copyPart(q,BLACKHOLE_sizeW(),sizeofW(StgHeader),stp);
+
+ case THUNK_SELECTOR:
+ {
+ StgClosure *p;
+ const StgInfoTable *info_ptr;
+
+ if (thunk_selector_depth > MAX_THUNK_SELECTOR_DEPTH) {
+ return copy(q,THUNK_SELECTOR_sizeW(),stp);
+ }
+
+ // stashed away for LDV profiling, see below
+ info_ptr = q->header.info;
+
+ p = eval_thunk_selector(info->layout.selector_offset,
+ (StgSelector *)q);
+
+ if (p == NULL) {
+ return copy(q,THUNK_SELECTOR_sizeW(),stp);
+ } else {
+ StgClosure *val;
+ // q is still BLACKHOLE'd.
+ thunk_selector_depth++;
+ val = evacuate(p);
+ thunk_selector_depth--;
+
+#ifdef PROFILING
+ // For the purposes of LDV profiling, we have destroyed
+ // the original selector thunk.
+ SET_INFO(q, info_ptr);
+ LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(q);
+#endif
+
+ // Update the THUNK_SELECTOR with an indirection to the
+ // EVACUATED closure now at p. Why do this rather than
+ // upd_evacuee(q,p)? Because we have an invariant that an
+ // EVACUATED closure always points to an object in the
+ // same or an older generation (required by the short-cut
+ // test in the EVACUATED case, below).
+ SET_INFO(q, &stg_IND_info);
+ ((StgInd *)q)->indirectee = p;
+
+ // For the purposes of LDV profiling, we have created an
+ // indirection.
+ LDV_RECORD_CREATE(q);
+
+ return val;
+ }
+ }
+
+ case IND:
+ case IND_OLDGEN:
+ // follow chains of indirections, don't evacuate them
+ q = ((StgInd*)q)->indirectee;
+ goto loop;
+
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ case RET_DYN:
+ case UPDATE_FRAME:
+ case STOP_FRAME:
+ case CATCH_FRAME:
+ case CATCH_STM_FRAME:
+ case CATCH_RETRY_FRAME:
+ case ATOMICALLY_FRAME:
+ // shouldn't see these
+ barf("evacuate: stack frame at %p\n", q);
+
+ case PAP:
+ return copy(q,pap_sizeW((StgPAP*)q),stp);
+
+ case AP:
+ return copy(q,ap_sizeW((StgAP*)q),stp);
+
+ case AP_STACK:
+ return copy(q,ap_stack_sizeW((StgAP_STACK*)q),stp);
+
+ case EVACUATED:
+ /* Already evacuated, just return the forwarding address.
+ * HOWEVER: if the requested destination generation (evac_gen) is
+ * older than the actual generation (because the object was
+ * already evacuated to a younger generation) then we have to
+ * set the failed_to_evac flag to indicate that we couldn't
+ * manage to promote the object to the desired generation.
+ */
+ /*
+ * Optimisation: the check is fairly expensive, but we can often
+ * shortcut it if either the required generation is 0, or the
+ * current object (the EVACUATED) is in a high enough generation.
+ * We know that an EVACUATED always points to an object in the
+ * same or an older generation. stp is the lowest step that the
+ * current object would be evacuated to, so we only do the full
+ * check if stp is too low.
+ */
+ if (evac_gen > 0 && stp->gen_no < evac_gen) { // optimisation
+ StgClosure *p = ((StgEvacuated*)q)->evacuee;
+ if (HEAP_ALLOCED(p) && Bdescr((P_)p)->gen_no < evac_gen) {
+ failed_to_evac = rtsTrue;
+ TICK_GC_FAILED_PROMOTION();
+ }
+ }
+ return ((StgEvacuated*)q)->evacuee;
+
+ case ARR_WORDS:
+ // just copy the block
+ return copy_noscav(q,arr_words_sizeW((StgArrWords *)q),stp);
+
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN:
+ case MUT_ARR_PTRS_FROZEN0:
+ // just copy the block
+ return copy(q,mut_arr_ptrs_sizeW((StgMutArrPtrs *)q),stp);
+
+ case TSO:
+ {
+ StgTSO *tso = (StgTSO *)q;
+
+ /* Deal with redirected TSOs (a TSO that's had its stack enlarged).
+ */
+ if (tso->what_next == ThreadRelocated) {
+ q = (StgClosure *)tso->link;
+ goto loop;
+ }
+
+ /* To evacuate a small TSO, we need to relocate the update frame
+ * list it contains.
+ */
+ {
+ StgTSO *new_tso;
+ StgPtr p, q;
+
+ new_tso = (StgTSO *)copyPart((StgClosure *)tso,
+ tso_sizeW(tso),
+ sizeofW(StgTSO), stp);
+ move_TSO(tso, new_tso);
+ for (p = tso->sp, q = new_tso->sp;
+ p < tso->stack+tso->stack_size;) {
+ *q++ = *p++;
+ }
+
+ return (StgClosure *)new_tso;
+ }
+ }
+
+#if defined(PAR)
+ case RBH:
+ {
+ //StgInfoTable *rip = get_closure_info(q, &size, &ptrs, &nonptrs, &vhs, str);
+ to = copy(q,BLACKHOLE_sizeW(),stp);
+ //ToDo: derive size etc from reverted IP
+ //to = copy(q,size,stp);
+ IF_DEBUG(gc,
+ debugBelch("@@ evacuate: RBH %p (%s) to %p (%s)",
+ q, info_type(q), to, info_type(to)));
+ return to;
+ }
+
+ case BLOCKED_FETCH:
+ ASSERT(sizeofW(StgBlockedFetch) >= MIN_PAYLOD_SIZE);
+ to = copy(q,sizeofW(StgBlockedFetch),stp);
+ IF_DEBUG(gc,
+ debugBelch("@@ evacuate: %p (%s) to %p (%s)",
+ q, info_type(q), to, info_type(to)));
+ return to;
+
+# ifdef DIST
+ case REMOTE_REF:
+# endif
+ case FETCH_ME:
+ ASSERT(sizeofW(StgBlockedFetch) >= MIN_PAYLOAD_SIZE);
+ to = copy(q,sizeofW(StgFetchMe),stp);
+ IF_DEBUG(gc,
+ debugBelch("@@ evacuate: %p (%s) to %p (%s)",
+ q, info_type(q), to, info_type(to)));
+ return to;
+
+ case FETCH_ME_BQ:
+ ASSERT(sizeofW(StgBlockedFetch) >= MIN_PAYLOAD_SIZE);
+ to = copy(q,sizeofW(StgFetchMeBlockingQueue),stp);
+ IF_DEBUG(gc,
+ debugBelch("@@ evacuate: %p (%s) to %p (%s)",
+ q, info_type(q), to, info_type(to)));
+ return to;
+#endif
+
+ case TREC_HEADER:
+ return copy(q,sizeofW(StgTRecHeader),stp);
+
+ case TVAR_WAIT_QUEUE:
+ return copy(q,sizeofW(StgTVarWaitQueue),stp);
+
+ case TVAR:
+ return copy(q,sizeofW(StgTVar),stp);
+
+ case TREC_CHUNK:
+ return copy(q,sizeofW(StgTRecChunk),stp);
+
+ default:
+ barf("evacuate: strange closure type %d", (int)(info->type));
+ }
+
+ barf("evacuate");
+}
+
+/* -----------------------------------------------------------------------------
+ Evaluate a THUNK_SELECTOR if possible.
+
+ returns: NULL if we couldn't evaluate this THUNK_SELECTOR, or
+ a closure pointer if we evaluated it and this is the result. Note
+ that "evaluating" the THUNK_SELECTOR doesn't necessarily mean
+ reducing it to HNF, just that we have eliminated the selection.
+ The result might be another thunk, or even another THUNK_SELECTOR.
+
+ If the return value is non-NULL, the original selector thunk has
+ been BLACKHOLE'd, and should be updated with an indirection or a
+ forwarding pointer. If the return value is NULL, then the selector
+ thunk is unchanged.
+
+ ***
+ ToDo: the treatment of THUNK_SELECTORS could be improved in the
+ following way (from a suggestion by Ian Lynagh):
+
+ We can have a chain like this:
+
+ sel_0 --> (a,b)
+ |
+ |-----> sel_0 --> (a,b)
+ |
+ |-----> sel_0 --> ...
+
+ and the depth limit means we don't go all the way to the end of the
+ chain, which results in a space leak. This affects the recursive
+ call to evacuate() in the THUNK_SELECTOR case in evacuate(): *not*
+ the recursive call to eval_thunk_selector() in
+ eval_thunk_selector().
+
+ We could eliminate the depth bound in this case, in the following
+ way:
+
+ - traverse the chain once to discover the *value* of the
+ THUNK_SELECTOR. Mark all THUNK_SELECTORS that we
+ visit on the way as having been visited already (somehow).
+
+ - in a second pass, traverse the chain again updating all
+ THUNK_SEELCTORS that we find on the way with indirections to
+ the value.
+
+ - if we encounter a "marked" THUNK_SELECTOR in a normal
+ evacuate(), we konw it can't be updated so just evac it.
+
+ Program that illustrates the problem:
+
+ foo [] = ([], [])
+ foo (x:xs) = let (ys, zs) = foo xs
+ in if x >= 0 then (x:ys, zs) else (ys, x:zs)
+
+ main = bar [1..(100000000::Int)]
+ bar xs = (\(ys, zs) -> print ys >> print zs) (foo xs)
+
+ -------------------------------------------------------------------------- */
+
+static inline rtsBool
+is_to_space ( StgClosure *p )
+{
+ bdescr *bd;
+
+ bd = Bdescr((StgPtr)p);
+ if (HEAP_ALLOCED(p) &&
+ ((bd->flags & BF_EVACUATED)
+ || ((bd->flags & BF_COMPACTED) &&
+ is_marked((P_)p,bd)))) {
+ return rtsTrue;
+ } else {
+ return rtsFalse;
+ }
+}
+
+static StgClosure *
+eval_thunk_selector( nat field, StgSelector * p )
+{
+ StgInfoTable *info;
+ const StgInfoTable *info_ptr;
+ StgClosure *selectee;
+
+ selectee = p->selectee;
+
+ // Save the real info pointer (NOTE: not the same as get_itbl()).
+ info_ptr = p->header.info;
+
+ // If the THUNK_SELECTOR is in a generation that we are not
+ // collecting, then bail out early. We won't be able to save any
+ // space in any case, and updating with an indirection is trickier
+ // in an old gen.
+ if (Bdescr((StgPtr)p)->gen_no > N) {
+ return NULL;
+ }
+
+ // BLACKHOLE the selector thunk, since it is now under evaluation.
+ // This is important to stop us going into an infinite loop if
+ // this selector thunk eventually refers to itself.
+ SET_INFO(p,&stg_BLACKHOLE_info);
+
+selector_loop:
+
+ // We don't want to end up in to-space, because this causes
+ // problems when the GC later tries to evacuate the result of
+ // eval_thunk_selector(). There are various ways this could
+ // happen:
+ //
+ // 1. following an IND_STATIC
+ //
+ // 2. when the old generation is compacted, the mark phase updates
+ // from-space pointers to be to-space pointers, and we can't
+ // reliably tell which we're following (eg. from an IND_STATIC).
+ //
+ // 3. compacting GC again: if we're looking at a constructor in
+ // the compacted generation, it might point directly to objects
+ // in to-space. We must bale out here, otherwise doing the selection
+ // will result in a to-space pointer being returned.
+ //
+ // (1) is dealt with using a BF_EVACUATED test on the
+ // selectee. (2) and (3): we can tell if we're looking at an
+ // object in the compacted generation that might point to
+ // to-space objects by testing that (a) it is BF_COMPACTED, (b)
+ // the compacted generation is being collected, and (c) the
+ // object is marked. Only a marked object may have pointers that
+ // point to to-space objects, because that happens when
+ // scavenging.
+ //
+ // The to-space test is now embodied in the in_to_space() inline
+ // function, as it is re-used below.
+ //
+ if (is_to_space(selectee)) {
+ goto bale_out;
+ }
+
+ info = get_itbl(selectee);
+ switch (info->type) {
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case CONSTR_STATIC:
+ case CONSTR_NOCAF_STATIC:
+ // check that the size is in range
+ ASSERT(field < (StgWord32)(info->layout.payload.ptrs +
+ info->layout.payload.nptrs));
+
+ // Select the right field from the constructor, and check
+ // that the result isn't in to-space. It might be in
+ // to-space if, for example, this constructor contains
+ // pointers to younger-gen objects (and is on the mut-once
+ // list).
+ //
+ {
+ StgClosure *q;
+ q = selectee->payload[field];
+ if (is_to_space(q)) {
+ goto bale_out;
+ } else {
+ return q;
+ }
+ }
+
+ case IND:
+ case IND_PERM:
+ case IND_OLDGEN:
+ case IND_OLDGEN_PERM:
+ case IND_STATIC:
+ selectee = ((StgInd *)selectee)->indirectee;
+ goto selector_loop;
+
+ case EVACUATED:
+ // We don't follow pointers into to-space; the constructor
+ // has already been evacuated, so we won't save any space
+ // leaks by evaluating this selector thunk anyhow.
+ break;
+
+ case THUNK_SELECTOR:
+ {
+ StgClosure *val;
+
+ // check that we don't recurse too much, re-using the
+ // depth bound also used in evacuate().
+ if (thunk_selector_depth >= MAX_THUNK_SELECTOR_DEPTH) {
+ break;
+ }
+ thunk_selector_depth++;
+
+ val = eval_thunk_selector(info->layout.selector_offset,
+ (StgSelector *)selectee);
+
+ thunk_selector_depth--;
+
+ if (val == NULL) {
+ break;
+ } else {
+ // We evaluated this selector thunk, so update it with
+ // an indirection. NOTE: we don't use UPD_IND here,
+ // because we are guaranteed that p is in a generation
+ // that we are collecting, and we never want to put the
+ // indirection on a mutable list.
+#ifdef PROFILING
+ // For the purposes of LDV profiling, we have destroyed
+ // the original selector thunk.
+ SET_INFO(p, info_ptr);
+ LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(selectee);
+#endif
+ ((StgInd *)selectee)->indirectee = val;
+ SET_INFO(selectee,&stg_IND_info);
+
+ // For the purposes of LDV profiling, we have created an
+ // indirection.
+ LDV_RECORD_CREATE(selectee);
+
+ selectee = val;
+ goto selector_loop;
+ }
+ }
+
+ case AP:
+ case AP_STACK:
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_STATIC:
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case SE_BLACKHOLE:
+ case BLACKHOLE:
+#if defined(PAR)
+ case RBH:
+ case BLOCKED_FETCH:
+# ifdef DIST
+ case REMOTE_REF:
+# endif
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+#endif
+ // not evaluated yet
+ break;
+
+ default:
+ barf("eval_thunk_selector: strange selectee %d",
+ (int)(info->type));
+ }
+
+bale_out:
+ // We didn't manage to evaluate this thunk; restore the old info pointer
+ SET_INFO(p, info_ptr);
+ return NULL;
+}
+
+/* -----------------------------------------------------------------------------
+ move_TSO is called to update the TSO structure after it has been
+ moved from one place to another.
+ -------------------------------------------------------------------------- */
+
+void
+move_TSO (StgTSO *src, StgTSO *dest)
+{
+ ptrdiff_t diff;
+
+ // relocate the stack pointer...
+ diff = (StgPtr)dest - (StgPtr)src; // In *words*
+ dest->sp = (StgPtr)dest->sp + diff;
+}
+
+/* Similar to scavenge_large_bitmap(), but we don't write back the
+ * pointers we get back from evacuate().
+ */
+static void
+scavenge_large_srt_bitmap( StgLargeSRT *large_srt )
+{
+ nat i, b, size;
+ StgWord bitmap;
+ StgClosure **p;
+
+ b = 0;
+ bitmap = large_srt->l.bitmap[b];
+ size = (nat)large_srt->l.size;
+ p = (StgClosure **)large_srt->srt;
+ for (i = 0; i < size; ) {
+ if ((bitmap & 1) != 0) {
+ evacuate(*p);
+ }
+ i++;
+ p++;
+ if (i % BITS_IN(W_) == 0) {
+ b++;
+ bitmap = large_srt->l.bitmap[b];
+ } else {
+ bitmap = bitmap >> 1;
+ }
+ }
+}
+
+/* evacuate the SRT. If srt_bitmap is zero, then there isn't an
+ * srt field in the info table. That's ok, because we'll
+ * never dereference it.
+ */
+STATIC_INLINE void
+scavenge_srt (StgClosure **srt, nat srt_bitmap)
+{
+ nat bitmap;
+ StgClosure **p;
+
+ bitmap = srt_bitmap;
+ p = srt;
+
+ if (bitmap == (StgHalfWord)(-1)) {
+ scavenge_large_srt_bitmap( (StgLargeSRT *)srt );
+ return;
+ }
+
+ while (bitmap != 0) {
+ if ((bitmap & 1) != 0) {
+#ifdef ENABLE_WIN32_DLL_SUPPORT
+ // Special-case to handle references to closures hiding out in DLLs, since
+ // double indirections required to get at those. The code generator knows
+ // which is which when generating the SRT, so it stores the (indirect)
+ // reference to the DLL closure in the table by first adding one to it.
+ // We check for this here, and undo the addition before evacuating it.
+ //
+ // If the SRT entry hasn't got bit 0 set, the SRT entry points to a
+ // closure that's fixed at link-time, and no extra magic is required.
+ if ( (unsigned long)(*srt) & 0x1 ) {
+ evacuate(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1)));
+ } else {
+ evacuate(*p);
+ }
+#else
+ evacuate(*p);
+#endif
+ }
+ p++;
+ bitmap = bitmap >> 1;
+ }
+}
+
+
+STATIC_INLINE void
+scavenge_thunk_srt(const StgInfoTable *info)
+{
+ StgThunkInfoTable *thunk_info;
+
+ if (!major_gc) return;
+
+ thunk_info = itbl_to_thunk_itbl(info);
+ scavenge_srt((StgClosure **)GET_SRT(thunk_info), thunk_info->i.srt_bitmap);
+}
+
+STATIC_INLINE void
+scavenge_fun_srt(const StgInfoTable *info)
+{
+ StgFunInfoTable *fun_info;
+
+ if (!major_gc) return;
+
+ fun_info = itbl_to_fun_itbl(info);
+ scavenge_srt((StgClosure **)GET_FUN_SRT(fun_info), fun_info->i.srt_bitmap);
+}
+
+/* -----------------------------------------------------------------------------
+ Scavenge a TSO.
+ -------------------------------------------------------------------------- */
+
+static void
+scavengeTSO (StgTSO *tso)
+{
+ if ( tso->why_blocked == BlockedOnMVar
+ || tso->why_blocked == BlockedOnBlackHole
+ || tso->why_blocked == BlockedOnException
+#if defined(PAR)
+ || tso->why_blocked == BlockedOnGA
+ || tso->why_blocked == BlockedOnGA_NoSend
+#endif
+ ) {
+ tso->block_info.closure = evacuate(tso->block_info.closure);
+ }
+ if ( tso->blocked_exceptions != NULL ) {
+ tso->blocked_exceptions =
+ (StgTSO *)evacuate((StgClosure *)tso->blocked_exceptions);
+ }
+
+ // We don't always chase the link field: TSOs on the blackhole
+ // queue are not automatically alive, so the link field is a
+ // "weak" pointer in that case.
+ if (tso->why_blocked != BlockedOnBlackHole) {
+ tso->link = (StgTSO *)evacuate((StgClosure *)tso->link);
+ }
+
+ // scavange current transaction record
+ tso->trec = (StgTRecHeader *)evacuate((StgClosure *)tso->trec);
+
+ // scavenge this thread's stack
+ scavenge_stack(tso->sp, &(tso->stack[tso->stack_size]));
+}
+
+/* -----------------------------------------------------------------------------
+ Blocks of function args occur on the stack (at the top) and
+ in PAPs.
+ -------------------------------------------------------------------------- */
+
+STATIC_INLINE StgPtr
+scavenge_arg_block (StgFunInfoTable *fun_info, StgClosure **args)
+{
+ StgPtr p;
+ StgWord bitmap;
+ nat size;
+
+ p = (StgPtr)args;
+ switch (fun_info->f.fun_type) {
+ case ARG_GEN:
+ bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+ size = BITMAP_SIZE(fun_info->f.b.bitmap);
+ goto small_bitmap;
+ case ARG_GEN_BIG:
+ size = GET_FUN_LARGE_BITMAP(fun_info)->size;
+ scavenge_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), size);
+ p += size;
+ break;
+ default:
+ bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+ size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
+ small_bitmap:
+ while (size > 0) {
+ if ((bitmap & 1) == 0) {
+ *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+ }
+ p++;
+ bitmap = bitmap >> 1;
+ size--;
+ }
+ break;
+ }
+ return p;
+}
+
+STATIC_INLINE StgPtr
+scavenge_PAP_payload (StgClosure *fun, StgClosure **payload, StgWord size)
+{
+ StgPtr p;
+ StgWord bitmap;
+ StgFunInfoTable *fun_info;
+
+ fun_info = get_fun_itbl(fun);
+ ASSERT(fun_info->i.type != PAP);
+ p = (StgPtr)payload;
+
+ switch (fun_info->f.fun_type) {
+ case ARG_GEN:
+ bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+ goto small_bitmap;
+ case ARG_GEN_BIG:
+ scavenge_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), size);
+ p += size;
+ break;
+ case ARG_BCO:
+ scavenge_large_bitmap((StgPtr)payload, BCO_BITMAP(fun), size);
+ p += size;
+ break;
+ default:
+ bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+ small_bitmap:
+ while (size > 0) {
+ if ((bitmap & 1) == 0) {
+ *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+ }
+ p++;
+ bitmap = bitmap >> 1;
+ size--;
+ }
+ break;
+ }
+ return p;
+}
+
+STATIC_INLINE StgPtr
+scavenge_PAP (StgPAP *pap)
+{
+ pap->fun = evacuate(pap->fun);
+ return scavenge_PAP_payload (pap->fun, pap->payload, pap->n_args);
+}
+
+STATIC_INLINE StgPtr
+scavenge_AP (StgAP *ap)
+{
+ ap->fun = evacuate(ap->fun);
+ return scavenge_PAP_payload (ap->fun, ap->payload, ap->n_args);
+}
+
+/* -----------------------------------------------------------------------------
+ Scavenge a given step until there are no more objects in this step
+ to scavenge.
+
+ evac_gen is set by the caller to be either zero (for a step in a
+ generation < N) or G where G is the generation of the step being
+ scavenged.
+
+ We sometimes temporarily change evac_gen back to zero if we're
+ scavenging a mutable object where early promotion isn't such a good
+ idea.
+ -------------------------------------------------------------------------- */
+
+static void
+scavenge(step *stp)
+{
+ StgPtr p, q;
+ StgInfoTable *info;
+ bdescr *bd;
+ nat saved_evac_gen = evac_gen;
+
+ p = stp->scan;
+ bd = stp->scan_bd;
+
+ failed_to_evac = rtsFalse;
+
+ /* scavenge phase - standard breadth-first scavenging of the
+ * evacuated objects
+ */
+
+ while (bd != stp->hp_bd || p < stp->hp) {
+
+ // If we're at the end of this block, move on to the next block
+ if (bd != stp->hp_bd && p == bd->free) {
+ bd = bd->link;
+ p = bd->start;
+ continue;
+ }
+
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
+ info = get_itbl((StgClosure *)p);
+
+ ASSERT(thunk_selector_depth == 0);
+
+ q = p;
+ switch (info->type) {
+
+ case MVAR:
+ {
+ StgMVar *mvar = ((StgMVar *)p);
+ evac_gen = 0;
+ mvar->head = (StgTSO *)evacuate((StgClosure *)mvar->head);
+ mvar->tail = (StgTSO *)evacuate((StgClosure *)mvar->tail);
+ mvar->value = evacuate((StgClosure *)mvar->value);
+ evac_gen = saved_evac_gen;
+ failed_to_evac = rtsTrue; // mutable.
+ p += sizeofW(StgMVar);
+ break;
+ }
+
+ case FUN_2_0:
+ scavenge_fun_srt(info);
+ ((StgClosure *)p)->payload[1] = evacuate(((StgClosure *)p)->payload[1]);
+ ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
+ p += sizeofW(StgHeader) + 2;
+ break;
+
+ case THUNK_2_0:
+ scavenge_thunk_srt(info);
+ ((StgThunk *)p)->payload[1] = evacuate(((StgThunk *)p)->payload[1]);
+ ((StgThunk *)p)->payload[0] = evacuate(((StgThunk *)p)->payload[0]);
+ p += sizeofW(StgThunk) + 2;
+ break;
+
+ case CONSTR_2_0:
+ ((StgClosure *)p)->payload[1] = evacuate(((StgClosure *)p)->payload[1]);
+ ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
+ p += sizeofW(StgHeader) + 2;
+ break;
+
+ case THUNK_1_0:
+ scavenge_thunk_srt(info);
+ ((StgThunk *)p)->payload[0] = evacuate(((StgThunk *)p)->payload[0]);
+ p += sizeofW(StgThunk) + 1;
+ break;
+
+ case FUN_1_0:
+ scavenge_fun_srt(info);
+ case CONSTR_1_0:
+ ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
+ p += sizeofW(StgHeader) + 1;
+ break;
+
+ case THUNK_0_1:
+ scavenge_thunk_srt(info);
+ p += sizeofW(StgThunk) + 1;
+ break;
+
+ case FUN_0_1:
+ scavenge_fun_srt(info);
+ case CONSTR_0_1:
+ p += sizeofW(StgHeader) + 1;
+ break;
+
+ case THUNK_0_2:
+ scavenge_thunk_srt(info);
+ p += sizeofW(StgThunk) + 2;
+ break;
+
+ case FUN_0_2:
+ scavenge_fun_srt(info);
+ case CONSTR_0_2:
+ p += sizeofW(StgHeader) + 2;
+ break;
+
+ case THUNK_1_1:
+ scavenge_thunk_srt(info);
+ ((StgThunk *)p)->payload[0] = evacuate(((StgThunk *)p)->payload[0]);
+ p += sizeofW(StgThunk) + 2;
+ break;
+
+ case FUN_1_1:
+ scavenge_fun_srt(info);
+ case CONSTR_1_1:
+ ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
+ p += sizeofW(StgHeader) + 2;
+ break;
+
+ case FUN:
+ scavenge_fun_srt(info);
+ goto gen_obj;
+
+ case THUNK:
+ {
+ StgPtr end;
+
+ scavenge_thunk_srt(info);
+ end = (P_)((StgThunk *)p)->payload + info->layout.payload.ptrs;
+ for (p = (P_)((StgThunk *)p)->payload; p < end; p++) {
+ *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+ }
+ p += info->layout.payload.nptrs;
+ break;
+ }
+
+ gen_obj:
+ case CONSTR:
+ case WEAK:
+ case STABLE_NAME:
+ {
+ StgPtr end;
+
+ end = (P_)((StgClosure *)p)->payload + info->layout.payload.ptrs;
+ for (p = (P_)((StgClosure *)p)->payload; p < end; p++) {
+ *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+ }
+ p += info->layout.payload.nptrs;
+ break;
+ }
+
+ case BCO: {
+ StgBCO *bco = (StgBCO *)p;
+ bco->instrs = (StgArrWords *)evacuate((StgClosure *)bco->instrs);
+ bco->literals = (StgArrWords *)evacuate((StgClosure *)bco->literals);
+ bco->ptrs = (StgMutArrPtrs *)evacuate((StgClosure *)bco->ptrs);
+ bco->itbls = (StgArrWords *)evacuate((StgClosure *)bco->itbls);
+ p += bco_sizeW(bco);
+ break;
+ }
+
+ case IND_PERM:
+ if (stp->gen->no != 0) {
+#ifdef PROFILING
+ // @LDV profiling
+ // No need to call LDV_recordDead_FILL_SLOP_DYNAMIC() because an
+ // IND_OLDGEN_PERM closure is larger than an IND_PERM closure.
+ LDV_recordDead((StgClosure *)p, sizeofW(StgInd));
+#endif
+ //
+ // Todo: maybe use SET_HDR() and remove LDV_RECORD_CREATE()?
+ //
+ SET_INFO(((StgClosure *)p), &stg_IND_OLDGEN_PERM_info);
+
+ // We pretend that p has just been created.
+ LDV_RECORD_CREATE((StgClosure *)p);
+ }
+ // fall through
+ case IND_OLDGEN_PERM:
+ ((StgInd *)p)->indirectee = evacuate(((StgInd *)p)->indirectee);
+ p += sizeofW(StgInd);
+ break;
+
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY: {
+ rtsBool saved_eager_promotion = eager_promotion;
+
+ eager_promotion = rtsFalse;
+ ((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var);
+ eager_promotion = saved_eager_promotion;
+
+ if (failed_to_evac) {
+ ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
+ } else {
+ ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info;
+ }
+ p += sizeofW(StgMutVar);
+ break;
+ }
+
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case SE_BLACKHOLE:
+ case BLACKHOLE:
+ p += BLACKHOLE_sizeW();
+ break;
+
+ case THUNK_SELECTOR:
+ {
+ StgSelector *s = (StgSelector *)p;
+ s->selectee = evacuate(s->selectee);
+ p += THUNK_SELECTOR_sizeW();
+ break;
+ }
+
+ // A chunk of stack saved in a heap object
+ case AP_STACK:
+ {
+ StgAP_STACK *ap = (StgAP_STACK *)p;
+
+ ap->fun = evacuate(ap->fun);
+ scavenge_stack((StgPtr)ap->payload, (StgPtr)ap->payload + ap->size);
+ p = (StgPtr)ap->payload + ap->size;
+ break;
+ }
+
+ case PAP:
+ p = scavenge_PAP((StgPAP *)p);
+ break;
+
+ case AP:
+ p = scavenge_AP((StgAP *)p);
+ break;
+
+ case ARR_WORDS:
+ // nothing to follow
+ p += arr_words_sizeW((StgArrWords *)p);
+ break;
+
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ // follow everything
+ {
+ StgPtr next;
+ rtsBool saved_eager;
+
+ // We don't eagerly promote objects pointed to by a mutable
+ // array, but if we find the array only points to objects in
+ // the same or an older generation, we mark it "clean" and
+ // avoid traversing it during minor GCs.
+ saved_eager = eager_promotion;
+ eager_promotion = rtsFalse;
+ next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+ }
+ eager_promotion = saved_eager;
+
+ if (failed_to_evac) {
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
+ } else {
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info;
+ }
+
+ failed_to_evac = rtsTrue; // always put it on the mutable list.
+ break;
+ }
+
+ case MUT_ARR_PTRS_FROZEN:
+ case MUT_ARR_PTRS_FROZEN0:
+ // follow everything
+ {
+ StgPtr next;
+
+ next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+ }
+
+ // If we're going to put this object on the mutable list, then
+ // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
+ if (failed_to_evac) {
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+ } else {
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
+ }
+ break;
+ }
+
+ case TSO:
+ {
+ StgTSO *tso = (StgTSO *)p;
+ rtsBool saved_eager = eager_promotion;
+
+ eager_promotion = rtsFalse;
+ scavengeTSO(tso);
+ eager_promotion = saved_eager;
+
+ if (failed_to_evac) {
+ tso->flags |= TSO_DIRTY;
+ } else {
+ tso->flags &= ~TSO_DIRTY;
+ }
+
+ failed_to_evac = rtsTrue; // always on the mutable list
+ p += tso_sizeW(tso);
+ break;
+ }
+
+#if defined(PAR)
+ case RBH:
+ {
+#if 0
+ nat size, ptrs, nonptrs, vhs;
+ char str[80];
+ StgInfoTable *rip = get_closure_info(p, &size, &ptrs, &nonptrs, &vhs, str);
+#endif
+ StgRBH *rbh = (StgRBH *)p;
+ (StgClosure *)rbh->blocking_queue =
+ evacuate((StgClosure *)rbh->blocking_queue);
+ failed_to_evac = rtsTrue; // mutable anyhow.
+ IF_DEBUG(gc,
+ debugBelch("@@ scavenge: RBH %p (%s) (new blocking_queue link=%p)",
+ p, info_type(p), (StgClosure *)rbh->blocking_queue));
+ // ToDo: use size of reverted closure here!
+ p += BLACKHOLE_sizeW();
+ break;
+ }
+
+ case BLOCKED_FETCH:
+ {
+ StgBlockedFetch *bf = (StgBlockedFetch *)p;
+ // follow the pointer to the node which is being demanded
+ (StgClosure *)bf->node =
+ evacuate((StgClosure *)bf->node);
+ // follow the link to the rest of the blocking queue
+ (StgClosure *)bf->link =
+ evacuate((StgClosure *)bf->link);
+ IF_DEBUG(gc,
+ debugBelch("@@ scavenge: %p (%s); node is now %p; exciting, isn't it",
+ bf, info_type((StgClosure *)bf),
+ bf->node, info_type(bf->node)));
+ p += sizeofW(StgBlockedFetch);
+ break;
+ }
+
+#ifdef DIST
+ case REMOTE_REF:
+#endif
+ case FETCH_ME:
+ p += sizeofW(StgFetchMe);
+ break; // nothing to do in this case
+
+ case FETCH_ME_BQ:
+ {
+ StgFetchMeBlockingQueue *fmbq = (StgFetchMeBlockingQueue *)p;
+ (StgClosure *)fmbq->blocking_queue =
+ evacuate((StgClosure *)fmbq->blocking_queue);
+ IF_DEBUG(gc,
+ debugBelch("@@ scavenge: %p (%s) exciting, isn't it",
+ p, info_type((StgClosure *)p)));
+ p += sizeofW(StgFetchMeBlockingQueue);
+ break;
+ }
+#endif
+
+ case TVAR_WAIT_QUEUE:
+ {
+ StgTVarWaitQueue *wq = ((StgTVarWaitQueue *) p);
+ evac_gen = 0;
+ wq->waiting_tso = (StgTSO *)evacuate((StgClosure*)wq->waiting_tso);
+ wq->next_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)wq->next_queue_entry);
+ wq->prev_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)wq->prev_queue_entry);
+ evac_gen = saved_evac_gen;
+ failed_to_evac = rtsTrue; // mutable
+ p += sizeofW(StgTVarWaitQueue);
+ break;
+ }
+
+ case TVAR:
+ {
+ StgTVar *tvar = ((StgTVar *) p);
+ evac_gen = 0;
+ tvar->current_value = evacuate((StgClosure*)tvar->current_value);
+ tvar->first_wait_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)tvar->first_wait_queue_entry);
+ evac_gen = saved_evac_gen;
+ failed_to_evac = rtsTrue; // mutable
+ p += sizeofW(StgTVar);
+ break;
+ }
+
+ case TREC_HEADER:
+ {
+ StgTRecHeader *trec = ((StgTRecHeader *) p);
+ evac_gen = 0;
+ trec->enclosing_trec = (StgTRecHeader *)evacuate((StgClosure*)trec->enclosing_trec);
+ trec->current_chunk = (StgTRecChunk *)evacuate((StgClosure*)trec->current_chunk);
+ evac_gen = saved_evac_gen;
+ failed_to_evac = rtsTrue; // mutable
+ p += sizeofW(StgTRecHeader);
+ break;
+ }
+
+ case TREC_CHUNK:
+ {
+ StgWord i;
+ StgTRecChunk *tc = ((StgTRecChunk *) p);
+ TRecEntry *e = &(tc -> entries[0]);
+ evac_gen = 0;
+ tc->prev_chunk = (StgTRecChunk *)evacuate((StgClosure*)tc->prev_chunk);
+ for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) {
+ e->tvar = (StgTVar *)evacuate((StgClosure*)e->tvar);
+ e->expected_value = evacuate((StgClosure*)e->expected_value);
+ e->new_value = evacuate((StgClosure*)e->new_value);
+ }
+ evac_gen = saved_evac_gen;
+ failed_to_evac = rtsTrue; // mutable
+ p += sizeofW(StgTRecChunk);
+ break;
+ }
+
+ default:
+ barf("scavenge: unimplemented/strange closure type %d @ %p",
+ info->type, p);
+ }
+
+ /*
+ * We need to record the current object on the mutable list if
+ * (a) It is actually mutable, or
+ * (b) It contains pointers to a younger generation.
+ * Case (b) arises if we didn't manage to promote everything that
+ * the current object points to into the current generation.
+ */
+ if (failed_to_evac) {
+ failed_to_evac = rtsFalse;
+ if (stp->gen_no > 0) {
+ recordMutableGen((StgClosure *)q, stp->gen);
+ }
+ }
+ }
+
+ stp->scan_bd = bd;
+ stp->scan = p;
+}
+
+/* -----------------------------------------------------------------------------
+ Scavenge everything on the mark stack.
+
+ This is slightly different from scavenge():
+ - we don't walk linearly through the objects, so the scavenger
+ doesn't need to advance the pointer on to the next object.
+ -------------------------------------------------------------------------- */
+
+static void
+scavenge_mark_stack(void)
+{
+ StgPtr p, q;
+ StgInfoTable *info;
+ nat saved_evac_gen;
+
+ evac_gen = oldest_gen->no;
+ saved_evac_gen = evac_gen;
+
+linear_scan:
+ while (!mark_stack_empty()) {
+ p = pop_mark_stack();
+
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
+ info = get_itbl((StgClosure *)p);
+
+ q = p;
+ switch (info->type) {
+
+ case MVAR:
+ {
+ StgMVar *mvar = ((StgMVar *)p);
+ evac_gen = 0;
+ mvar->head = (StgTSO *)evacuate((StgClosure *)mvar->head);
+ mvar->tail = (StgTSO *)evacuate((StgClosure *)mvar->tail);
+ mvar->value = evacuate((StgClosure *)mvar->value);
+ evac_gen = saved_evac_gen;
+ failed_to_evac = rtsTrue; // mutable.
+ break;
+ }
+
+ case FUN_2_0:
+ scavenge_fun_srt(info);
+ ((StgClosure *)p)->payload[1] = evacuate(((StgClosure *)p)->payload[1]);
+ ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
+ break;
+
+ case THUNK_2_0:
+ scavenge_thunk_srt(info);
+ ((StgThunk *)p)->payload[1] = evacuate(((StgThunk *)p)->payload[1]);
+ ((StgThunk *)p)->payload[0] = evacuate(((StgThunk *)p)->payload[0]);
+ break;
+
+ case CONSTR_2_0:
+ ((StgClosure *)p)->payload[1] = evacuate(((StgClosure *)p)->payload[1]);
+ ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
+ break;
+
+ case FUN_1_0:
+ case FUN_1_1:
+ scavenge_fun_srt(info);
+ ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
+ break;
+
+ case THUNK_1_0:
+ case THUNK_1_1:
+ scavenge_thunk_srt(info);
+ ((StgThunk *)p)->payload[0] = evacuate(((StgThunk *)p)->payload[0]);
+ break;
+
+ case CONSTR_1_0:
+ case CONSTR_1_1:
+ ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
+ break;
+
+ case FUN_0_1:
+ case FUN_0_2:
+ scavenge_fun_srt(info);
+ break;
+
+ case THUNK_0_1:
+ case THUNK_0_2:
+ scavenge_thunk_srt(info);
+ break;
+
+ case CONSTR_0_1:
+ case CONSTR_0_2:
+ break;
+
+ case FUN:
+ scavenge_fun_srt(info);
+ goto gen_obj;
+
+ case THUNK:
+ {
+ StgPtr end;
+
+ scavenge_thunk_srt(info);
+ end = (P_)((StgThunk *)p)->payload + info->layout.payload.ptrs;
+ for (p = (P_)((StgThunk *)p)->payload; p < end; p++) {
+ *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+ }
+ break;
+ }
+
+ gen_obj:
+ case CONSTR:
+ case WEAK:
+ case STABLE_NAME:
+ {
+ StgPtr end;
+
+ end = (P_)((StgClosure *)p)->payload + info->layout.payload.ptrs;
+ for (p = (P_)((StgClosure *)p)->payload; p < end; p++) {
+ *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+ }
+ break;
+ }
+
+ case BCO: {
+ StgBCO *bco = (StgBCO *)p;
+ bco->instrs = (StgArrWords *)evacuate((StgClosure *)bco->instrs);
+ bco->literals = (StgArrWords *)evacuate((StgClosure *)bco->literals);
+ bco->ptrs = (StgMutArrPtrs *)evacuate((StgClosure *)bco->ptrs);
+ bco->itbls = (StgArrWords *)evacuate((StgClosure *)bco->itbls);
+ break;
+ }
+
+ case IND_PERM:
+ // don't need to do anything here: the only possible case
+ // is that we're in a 1-space compacting collector, with
+ // no "old" generation.
+ break;
+
+ case IND_OLDGEN:
+ case IND_OLDGEN_PERM:
+ ((StgInd *)p)->indirectee =
+ evacuate(((StgInd *)p)->indirectee);
+ break;
+
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY: {
+ rtsBool saved_eager_promotion = eager_promotion;
+
+ eager_promotion = rtsFalse;
+ ((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var);
+ eager_promotion = saved_eager_promotion;
+
+ if (failed_to_evac) {
+ ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
+ } else {
+ ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info;
+ }
+ break;
+ }
+
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case SE_BLACKHOLE:
+ case BLACKHOLE:
+ case ARR_WORDS:
+ break;
+
+ case THUNK_SELECTOR:
+ {
+ StgSelector *s = (StgSelector *)p;
+ s->selectee = evacuate(s->selectee);
+ break;
+ }
+
+ // A chunk of stack saved in a heap object
+ case AP_STACK:
+ {
+ StgAP_STACK *ap = (StgAP_STACK *)p;
+
+ ap->fun = evacuate(ap->fun);
+ scavenge_stack((StgPtr)ap->payload, (StgPtr)ap->payload + ap->size);
+ break;
+ }
+
+ case PAP:
+ scavenge_PAP((StgPAP *)p);
+ break;
+
+ case AP:
+ scavenge_AP((StgAP *)p);
+ break;
+
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ // follow everything
+ {
+ StgPtr next;
+ rtsBool saved_eager;
+
+ // We don't eagerly promote objects pointed to by a mutable
+ // array, but if we find the array only points to objects in
+ // the same or an older generation, we mark it "clean" and
+ // avoid traversing it during minor GCs.
+ saved_eager = eager_promotion;
+ eager_promotion = rtsFalse;
+ next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+ }
+ eager_promotion = saved_eager;
+
+ if (failed_to_evac) {
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
+ } else {
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info;
+ }
+
+ failed_to_evac = rtsTrue; // mutable anyhow.
+ break;
+ }
+
+ case MUT_ARR_PTRS_FROZEN:
+ case MUT_ARR_PTRS_FROZEN0:
+ // follow everything
+ {
+ StgPtr next, q = p;
+
+ next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+ }
+
+ // If we're going to put this object on the mutable list, then
+ // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
+ if (failed_to_evac) {
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+ } else {
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
+ }
+ break;
+ }
+
+ case TSO:
+ {
+ StgTSO *tso = (StgTSO *)p;
+ rtsBool saved_eager = eager_promotion;
+
+ eager_promotion = rtsFalse;
+ scavengeTSO(tso);
+ eager_promotion = saved_eager;
+
+ if (failed_to_evac) {
+ tso->flags |= TSO_DIRTY;
+ } else {
+ tso->flags &= ~TSO_DIRTY;
+ }
+
+ failed_to_evac = rtsTrue; // always on the mutable list
+ break;
+ }
+
+#if defined(PAR)
+ case RBH:
+ {
+#if 0
+ nat size, ptrs, nonptrs, vhs;
+ char str[80];
+ StgInfoTable *rip = get_closure_info(p, &size, &ptrs, &nonptrs, &vhs, str);
+#endif
+ StgRBH *rbh = (StgRBH *)p;
+ bh->blocking_queue =
+ (StgTSO *)evacuate((StgClosure *)bh->blocking_queue);
+ failed_to_evac = rtsTrue; // mutable anyhow.
+ IF_DEBUG(gc,
+ debugBelch("@@ scavenge: RBH %p (%s) (new blocking_queue link=%p)",
+ p, info_type(p), (StgClosure *)rbh->blocking_queue));
+ break;
+ }
+
+ case BLOCKED_FETCH:
+ {
+ StgBlockedFetch *bf = (StgBlockedFetch *)p;
+ // follow the pointer to the node which is being demanded
+ (StgClosure *)bf->node =
+ evacuate((StgClosure *)bf->node);
+ // follow the link to the rest of the blocking queue
+ (StgClosure *)bf->link =
+ evacuate((StgClosure *)bf->link);
+ IF_DEBUG(gc,
+ debugBelch("@@ scavenge: %p (%s); node is now %p; exciting, isn't it",
+ bf, info_type((StgClosure *)bf),
+ bf->node, info_type(bf->node)));
+ break;
+ }
+
+#ifdef DIST
+ case REMOTE_REF:
+#endif
+ case FETCH_ME:
+ break; // nothing to do in this case
+
+ case FETCH_ME_BQ:
+ {
+ StgFetchMeBlockingQueue *fmbq = (StgFetchMeBlockingQueue *)p;
+ (StgClosure *)fmbq->blocking_queue =
+ evacuate((StgClosure *)fmbq->blocking_queue);
+ IF_DEBUG(gc,
+ debugBelch("@@ scavenge: %p (%s) exciting, isn't it",
+ p, info_type((StgClosure *)p)));
+ break;
+ }
+#endif /* PAR */
+
+ case TVAR_WAIT_QUEUE:
+ {
+ StgTVarWaitQueue *wq = ((StgTVarWaitQueue *) p);
+ evac_gen = 0;
+ wq->waiting_tso = (StgTSO *)evacuate((StgClosure*)wq->waiting_tso);
+ wq->next_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)wq->next_queue_entry);
+ wq->prev_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)wq->prev_queue_entry);
+ evac_gen = saved_evac_gen;
+ failed_to_evac = rtsTrue; // mutable
+ break;
+ }
+
+ case TVAR:
+ {
+ StgTVar *tvar = ((StgTVar *) p);
+ evac_gen = 0;
+ tvar->current_value = evacuate((StgClosure*)tvar->current_value);
+ tvar->first_wait_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)tvar->first_wait_queue_entry);
+ evac_gen = saved_evac_gen;
+ failed_to_evac = rtsTrue; // mutable
+ break;
+ }
+
+ case TREC_CHUNK:
+ {
+ StgWord i;
+ StgTRecChunk *tc = ((StgTRecChunk *) p);
+ TRecEntry *e = &(tc -> entries[0]);
+ evac_gen = 0;
+ tc->prev_chunk = (StgTRecChunk *)evacuate((StgClosure*)tc->prev_chunk);
+ for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) {
+ e->tvar = (StgTVar *)evacuate((StgClosure*)e->tvar);
+ e->expected_value = evacuate((StgClosure*)e->expected_value);
+ e->new_value = evacuate((StgClosure*)e->new_value);
+ }
+ evac_gen = saved_evac_gen;
+ failed_to_evac = rtsTrue; // mutable
+ break;
+ }
+
+ case TREC_HEADER:
+ {
+ StgTRecHeader *trec = ((StgTRecHeader *) p);
+ evac_gen = 0;
+ trec->enclosing_trec = (StgTRecHeader *)evacuate((StgClosure*)trec->enclosing_trec);
+ trec->current_chunk = (StgTRecChunk *)evacuate((StgClosure*)trec->current_chunk);
+ evac_gen = saved_evac_gen;
+ failed_to_evac = rtsTrue; // mutable
+ break;
+ }
+
+ default:
+ barf("scavenge_mark_stack: unimplemented/strange closure type %d @ %p",
+ info->type, p);
+ }
+
+ if (failed_to_evac) {
+ failed_to_evac = rtsFalse;
+ if (evac_gen > 0) {
+ recordMutableGen((StgClosure *)q, &generations[evac_gen]);
+ }
+ }
+
+ // mark the next bit to indicate "scavenged"
+ mark(q+1, Bdescr(q));
+
+ } // while (!mark_stack_empty())
+
+ // start a new linear scan if the mark stack overflowed at some point
+ if (mark_stack_overflowed && oldgen_scan_bd == NULL) {
+ IF_DEBUG(gc, debugBelch("scavenge_mark_stack: starting linear scan"));
+ mark_stack_overflowed = rtsFalse;
+ oldgen_scan_bd = oldest_gen->steps[0].old_blocks;
+ oldgen_scan = oldgen_scan_bd->start;
+ }
+
+ if (oldgen_scan_bd) {
+ // push a new thing on the mark stack
+ loop:
+ // find a closure that is marked but not scavenged, and start
+ // from there.
+ while (oldgen_scan < oldgen_scan_bd->free
+ && !is_marked(oldgen_scan,oldgen_scan_bd)) {
+ oldgen_scan++;
+ }
+
+ if (oldgen_scan < oldgen_scan_bd->free) {
+
+ // already scavenged?
+ if (is_marked(oldgen_scan+1,oldgen_scan_bd)) {
+ oldgen_scan += sizeofW(StgHeader) + MIN_PAYLOAD_SIZE;
+ goto loop;
+ }
+ push_mark_stack(oldgen_scan);
+ // ToDo: bump the linear scan by the actual size of the object
+ oldgen_scan += sizeofW(StgHeader) + MIN_PAYLOAD_SIZE;
+ goto linear_scan;
+ }
+
+ oldgen_scan_bd = oldgen_scan_bd->link;
+ if (oldgen_scan_bd != NULL) {
+ oldgen_scan = oldgen_scan_bd->start;
+ goto loop;
+ }
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Scavenge one object.
+
+ This is used for objects that are temporarily marked as mutable
+ because they contain old-to-new generation pointers. Only certain
+ objects can have this property.
+ -------------------------------------------------------------------------- */
+
+static rtsBool
+scavenge_one(StgPtr p)
+{
+ const StgInfoTable *info;
+ nat saved_evac_gen = evac_gen;
+ rtsBool no_luck;
+
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
+ info = get_itbl((StgClosure *)p);
+
+ switch (info->type) {
+
+ case MVAR:
+ {
+ StgMVar *mvar = ((StgMVar *)p);
+ evac_gen = 0;
+ mvar->head = (StgTSO *)evacuate((StgClosure *)mvar->head);
+ mvar->tail = (StgTSO *)evacuate((StgClosure *)mvar->tail);
+ mvar->value = evacuate((StgClosure *)mvar->value);
+ evac_gen = saved_evac_gen;
+ failed_to_evac = rtsTrue; // mutable.
+ break;
+ }
+
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_2_0:
+ {
+ StgPtr q, end;
+
+ end = (StgPtr)((StgThunk *)p)->payload + info->layout.payload.ptrs;
+ for (q = (StgPtr)((StgThunk *)p)->payload; q < end; q++) {
+ *q = (StgWord)(StgPtr)evacuate((StgClosure *)*q);
+ }
+ break;
+ }
+
+ case FUN:
+ case FUN_1_0: // hardly worth specialising these guys
+ case FUN_0_1:
+ case FUN_1_1:
+ case FUN_0_2:
+ case FUN_2_0:
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case CONSTR_2_0:
+ case WEAK:
+ case IND_PERM:
+ {
+ StgPtr q, end;
+
+ end = (StgPtr)((StgClosure *)p)->payload + info->layout.payload.ptrs;
+ for (q = (StgPtr)((StgClosure *)p)->payload; q < end; q++) {
+ *q = (StgWord)(StgPtr)evacuate((StgClosure *)*q);
+ }
+ break;
+ }
+
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY: {
+ StgPtr q = p;
+ rtsBool saved_eager_promotion = eager_promotion;
+
+ eager_promotion = rtsFalse;
+ ((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var);
+ eager_promotion = saved_eager_promotion;
+
+ if (failed_to_evac) {
+ ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
+ } else {
+ ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info;
+ }
+ break;
+ }
+
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case SE_BLACKHOLE:
+ case BLACKHOLE:
+ break;
+
+ case THUNK_SELECTOR:
+ {
+ StgSelector *s = (StgSelector *)p;
+ s->selectee = evacuate(s->selectee);
+ break;
+ }
+
+ case AP_STACK:
+ {
+ StgAP_STACK *ap = (StgAP_STACK *)p;
+
+ ap->fun = evacuate(ap->fun);
+ scavenge_stack((StgPtr)ap->payload, (StgPtr)ap->payload + ap->size);
+ p = (StgPtr)ap->payload + ap->size;
+ break;
+ }
+
+ case PAP:
+ p = scavenge_PAP((StgPAP *)p);
+ break;
+
+ case AP:
+ p = scavenge_AP((StgAP *)p);
+ break;
+
+ case ARR_WORDS:
+ // nothing to follow
+ break;
+
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ {
+ StgPtr next, q;
+ rtsBool saved_eager;
+
+ // We don't eagerly promote objects pointed to by a mutable
+ // array, but if we find the array only points to objects in
+ // the same or an older generation, we mark it "clean" and
+ // avoid traversing it during minor GCs.
+ saved_eager = eager_promotion;
+ eager_promotion = rtsFalse;
+ q = p;
+ next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+ }
+ eager_promotion = saved_eager;
+
+ if (failed_to_evac) {
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
+ } else {
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info;
+ }
+
+ failed_to_evac = rtsTrue;
+ break;
+ }
+
+ case MUT_ARR_PTRS_FROZEN:
+ case MUT_ARR_PTRS_FROZEN0:
+ {
+ // follow everything
+ StgPtr next, q=p;
+
+ next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+ }
+
+ // If we're going to put this object on the mutable list, then
+ // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
+ if (failed_to_evac) {
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+ } else {
+ ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
+ }
+ break;
+ }
+
+ case TSO:
+ {
+ StgTSO *tso = (StgTSO *)p;
+ rtsBool saved_eager = eager_promotion;
+
+ eager_promotion = rtsFalse;
+ scavengeTSO(tso);
+ eager_promotion = saved_eager;
+
+ if (failed_to_evac) {
+ tso->flags |= TSO_DIRTY;
+ } else {
+ tso->flags &= ~TSO_DIRTY;
+ }
+
+ failed_to_evac = rtsTrue; // always on the mutable list
+ break;
+ }
+
+#if defined(PAR)
+ case RBH:
+ {
+#if 0
+ nat size, ptrs, nonptrs, vhs;
+ char str[80];
+ StgInfoTable *rip = get_closure_info(p, &size, &ptrs, &nonptrs, &vhs, str);
+#endif
+ StgRBH *rbh = (StgRBH *)p;
+ (StgClosure *)rbh->blocking_queue =
+ evacuate((StgClosure *)rbh->blocking_queue);
+ failed_to_evac = rtsTrue; // mutable anyhow.
+ IF_DEBUG(gc,
+ debugBelch("@@ scavenge: RBH %p (%s) (new blocking_queue link=%p)",
+ p, info_type(p), (StgClosure *)rbh->blocking_queue));
+ // ToDo: use size of reverted closure here!
+ break;
+ }
+
+ case BLOCKED_FETCH:
+ {
+ StgBlockedFetch *bf = (StgBlockedFetch *)p;
+ // follow the pointer to the node which is being demanded
+ (StgClosure *)bf->node =
+ evacuate((StgClosure *)bf->node);
+ // follow the link to the rest of the blocking queue
+ (StgClosure *)bf->link =
+ evacuate((StgClosure *)bf->link);
+ IF_DEBUG(gc,
+ debugBelch("@@ scavenge: %p (%s); node is now %p; exciting, isn't it",
+ bf, info_type((StgClosure *)bf),
+ bf->node, info_type(bf->node)));
+ break;
+ }
+
+#ifdef DIST
+ case REMOTE_REF:
+#endif
+ case FETCH_ME:
+ break; // nothing to do in this case
+
+ case FETCH_ME_BQ:
+ {
+ StgFetchMeBlockingQueue *fmbq = (StgFetchMeBlockingQueue *)p;
+ (StgClosure *)fmbq->blocking_queue =
+ evacuate((StgClosure *)fmbq->blocking_queue);
+ IF_DEBUG(gc,
+ debugBelch("@@ scavenge: %p (%s) exciting, isn't it",
+ p, info_type((StgClosure *)p)));
+ break;
+ }
+#endif
+
+ case TVAR_WAIT_QUEUE:
+ {
+ StgTVarWaitQueue *wq = ((StgTVarWaitQueue *) p);
+ evac_gen = 0;
+ wq->waiting_tso = (StgTSO *)evacuate((StgClosure*)wq->waiting_tso);
+ wq->next_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)wq->next_queue_entry);
+ wq->prev_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)wq->prev_queue_entry);
+ evac_gen = saved_evac_gen;
+ failed_to_evac = rtsTrue; // mutable
+ break;
+ }
+
+ case TVAR:
+ {
+ StgTVar *tvar = ((StgTVar *) p);
+ evac_gen = 0;
+ tvar->current_value = evacuate((StgClosure*)tvar->current_value);
+ tvar->first_wait_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)tvar->first_wait_queue_entry);
+ evac_gen = saved_evac_gen;
+ failed_to_evac = rtsTrue; // mutable
+ break;
+ }
+
+ case TREC_HEADER:
+ {
+ StgTRecHeader *trec = ((StgTRecHeader *) p);
+ evac_gen = 0;
+ trec->enclosing_trec = (StgTRecHeader *)evacuate((StgClosure*)trec->enclosing_trec);
+ trec->current_chunk = (StgTRecChunk *)evacuate((StgClosure*)trec->current_chunk);
+ evac_gen = saved_evac_gen;
+ failed_to_evac = rtsTrue; // mutable
+ break;
+ }
+
+ case TREC_CHUNK:
+ {
+ StgWord i;
+ StgTRecChunk *tc = ((StgTRecChunk *) p);
+ TRecEntry *e = &(tc -> entries[0]);
+ evac_gen = 0;
+ tc->prev_chunk = (StgTRecChunk *)evacuate((StgClosure*)tc->prev_chunk);
+ for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) {
+ e->tvar = (StgTVar *)evacuate((StgClosure*)e->tvar);
+ e->expected_value = evacuate((StgClosure*)e->expected_value);
+ e->new_value = evacuate((StgClosure*)e->new_value);
+ }
+ evac_gen = saved_evac_gen;
+ failed_to_evac = rtsTrue; // mutable
+ break;
+ }
+
+ case IND_OLDGEN:
+ case IND_OLDGEN_PERM:
+ case IND_STATIC:
+ {
+ /* Careful here: a THUNK can be on the mutable list because
+ * it contains pointers to young gen objects. If such a thunk
+ * is updated, the IND_OLDGEN will be added to the mutable
+ * list again, and we'll scavenge it twice. evacuate()
+ * doesn't check whether the object has already been
+ * evacuated, so we perform that check here.
+ */
+ StgClosure *q = ((StgInd *)p)->indirectee;
+ if (HEAP_ALLOCED(q) && Bdescr((StgPtr)q)->flags & BF_EVACUATED) {
+ break;
+ }
+ ((StgInd *)p)->indirectee = evacuate(q);
+ }
+
+#if 0 && defined(DEBUG)
+ if (RtsFlags.DebugFlags.gc)
+ /* Debugging code to print out the size of the thing we just
+ * promoted
+ */
+ {
+ StgPtr start = gen->steps[0].scan;
+ bdescr *start_bd = gen->steps[0].scan_bd;
+ nat size = 0;
+ scavenge(&gen->steps[0]);
+ if (start_bd != gen->steps[0].scan_bd) {
+ size += (P_)BLOCK_ROUND_UP(start) - start;
+ start_bd = start_bd->link;
+ while (start_bd != gen->steps[0].scan_bd) {
+ size += BLOCK_SIZE_W;
+ start_bd = start_bd->link;
+ }
+ size += gen->steps[0].scan -
+ (P_)BLOCK_ROUND_DOWN(gen->steps[0].scan);
+ } else {
+ size = gen->steps[0].scan - start;
+ }
+ debugBelch("evac IND_OLDGEN: %ld bytes", size * sizeof(W_));
+ }
+#endif
+ break;
+
+ default:
+ barf("scavenge_one: strange object %d", (int)(info->type));
+ }
+
+ no_luck = failed_to_evac;
+ failed_to_evac = rtsFalse;
+ return (no_luck);
+}
+
+/* -----------------------------------------------------------------------------
+ Scavenging mutable lists.
+
+ We treat the mutable list of each generation > N (i.e. all the
+ generations older than the one being collected) as roots. We also
+ remove non-mutable objects from the mutable list at this point.
+ -------------------------------------------------------------------------- */
+
+static void
+scavenge_mutable_list(generation *gen)
+{
+ bdescr *bd;
+ StgPtr p, q;
+
+ bd = gen->saved_mut_list;
+
+ evac_gen = gen->no;
+ for (; bd != NULL; bd = bd->link) {
+ for (q = bd->start; q < bd->free; q++) {
+ p = (StgPtr)*q;
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
+
+#ifdef DEBUG
+ switch (get_itbl((StgClosure *)p)->type) {
+ case MUT_VAR_CLEAN:
+ barf("MUT_VAR_CLEAN on mutable list");
+ case MUT_VAR_DIRTY:
+ mutlist_MUTVARS++; break;
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN:
+ case MUT_ARR_PTRS_FROZEN0:
+ mutlist_MUTARRS++; break;
+ default:
+ mutlist_OTHERS++; break;
+ }
+#endif
+
+ // Check whether this object is "clean", that is it
+ // definitely doesn't point into a young generation.
+ // Clean objects don't need to be scavenged. Some clean
+ // objects (MUT_VAR_CLEAN) are not kept on the mutable
+ // list at all; others, such as MUT_ARR_PTRS_CLEAN and
+ // TSO, are always on the mutable list.
+ //
+ switch (get_itbl((StgClosure *)p)->type) {
+ case MUT_ARR_PTRS_CLEAN:
+ recordMutableGen((StgClosure *)p,gen);
+ continue;
+ case TSO: {
+ StgTSO *tso = (StgTSO *)p;
+ if ((tso->flags & TSO_DIRTY) == 0) {
+ // A clean TSO: we don't have to traverse its
+ // stack. However, we *do* follow the link field:
+ // we don't want to have to mark a TSO dirty just
+ // because we put it on a different queue.
+ if (tso->why_blocked != BlockedOnBlackHole) {
+ tso->link = (StgTSO *)evacuate((StgClosure *)tso->link);
+ }
+ recordMutableGen((StgClosure *)p,gen);
+ continue;
+ }
+ }
+ default:
+ ;
+ }
+
+ if (scavenge_one(p)) {
+ // didn't manage to promote everything, so put the
+ // object back on the list.
+ recordMutableGen((StgClosure *)p,gen);
+ }
+ }
+ }
+
+ // free the old mut_list
+ freeChain(gen->saved_mut_list);
+ gen->saved_mut_list = NULL;
+}
+
+
+static void
+scavenge_static(void)
+{
+ StgClosure* p = static_objects;
+ const StgInfoTable *info;
+
+ /* Always evacuate straight to the oldest generation for static
+ * objects */
+ evac_gen = oldest_gen->no;
+
+ /* keep going until we've scavenged all the objects on the linked
+ list... */
+ while (p != END_OF_STATIC_LIST) {
+
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
+ info = get_itbl(p);
+ /*
+ if (info->type==RBH)
+ info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure
+ */
+ // make sure the info pointer is into text space
+
+ /* Take this object *off* the static_objects list,
+ * and put it on the scavenged_static_objects list.
+ */
+ static_objects = *STATIC_LINK(info,p);
+ *STATIC_LINK(info,p) = scavenged_static_objects;
+ scavenged_static_objects = p;
+
+ switch (info -> type) {
+
+ case IND_STATIC:
+ {
+ StgInd *ind = (StgInd *)p;
+ ind->indirectee = evacuate(ind->indirectee);
+
+ /* might fail to evacuate it, in which case we have to pop it
+ * back on the mutable list of the oldest generation. We
+ * leave it *on* the scavenged_static_objects list, though,
+ * in case we visit this object again.
+ */
+ if (failed_to_evac) {
+ failed_to_evac = rtsFalse;
+ recordMutableGen((StgClosure *)p,oldest_gen);
+ }
+ break;
+ }
+
+ case THUNK_STATIC:
+ scavenge_thunk_srt(info);
+ break;
+
+ case FUN_STATIC:
+ scavenge_fun_srt(info);
+ break;
+
+ case CONSTR_STATIC:
+ {
+ StgPtr q, next;
+
+ next = (P_)p->payload + info->layout.payload.ptrs;
+ // evacuate the pointers
+ for (q = (P_)p->payload; q < next; q++) {
+ *q = (StgWord)(StgPtr)evacuate((StgClosure *)*q);
+ }
+ break;
+ }
+
+ default:
+ barf("scavenge_static: strange closure %d", (int)(info->type));
+ }
+
+ ASSERT(failed_to_evac == rtsFalse);
+
+ /* get the next static object from the list. Remember, there might
+ * be more stuff on this list now that we've done some evacuating!
+ * (static_objects is a global)
+ */
+ p = static_objects;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ scavenge a chunk of memory described by a bitmap
+ -------------------------------------------------------------------------- */
+
+static void
+scavenge_large_bitmap( StgPtr p, StgLargeBitmap *large_bitmap, nat size )
+{
+ nat i, b;
+ StgWord bitmap;
+
+ b = 0;
+ bitmap = large_bitmap->bitmap[b];
+ for (i = 0; i < size; ) {
+ if ((bitmap & 1) == 0) {
+ *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+ }
+ i++;
+ p++;
+ if (i % BITS_IN(W_) == 0) {
+ b++;
+ bitmap = large_bitmap->bitmap[b];
+ } else {
+ bitmap = bitmap >> 1;
+ }
+ }
+}
+
+STATIC_INLINE StgPtr
+scavenge_small_bitmap (StgPtr p, nat size, StgWord bitmap)
+{
+ while (size > 0) {
+ if ((bitmap & 1) == 0) {
+ *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+ }
+ p++;
+ bitmap = bitmap >> 1;
+ size--;
+ }
+ return p;
+}
+
+/* -----------------------------------------------------------------------------
+ scavenge_stack walks over a section of stack and evacuates all the
+ objects pointed to by it. We can use the same code for walking
+ AP_STACK_UPDs, since these are just sections of copied stack.
+ -------------------------------------------------------------------------- */
+
+
+static void
+scavenge_stack(StgPtr p, StgPtr stack_end)
+{
+ const StgRetInfoTable* info;
+ StgWord bitmap;
+ nat size;
+
+ //IF_DEBUG(sanity, debugBelch(" scavenging stack between %p and %p", p, stack_end));
+
+ /*
+ * Each time around this loop, we are looking at a chunk of stack
+ * that starts with an activation record.
+ */
+
+ while (p < stack_end) {
+ info = get_ret_itbl((StgClosure *)p);
+
+ switch (info->i.type) {
+
+ case UPDATE_FRAME:
+ // In SMP, we can get update frames that point to indirections
+ // when two threads evaluate the same thunk. We do attempt to
+ // discover this situation in threadPaused(), but it's
+ // possible that the following sequence occurs:
+ //
+ // A B
+ // enter T
+ // enter T
+ // blackhole T
+ // update T
+ // GC
+ //
+ // Now T is an indirection, and the update frame is already
+ // marked on A's stack, so we won't traverse it again in
+ // threadPaused(). We could traverse the whole stack again
+ // before GC, but that seems like overkill.
+ //
+ // Scavenging this update frame as normal would be disastrous;
+ // the updatee would end up pointing to the value. So we turn
+ // the indirection into an IND_PERM, so that evacuate will
+ // copy the indirection into the old generation instead of
+ // discarding it.
+ if (get_itbl(((StgUpdateFrame *)p)->updatee)->type == IND) {
+ ((StgUpdateFrame *)p)->updatee->header.info =
+ (StgInfoTable *)&stg_IND_PERM_info;
+ }
+ ((StgUpdateFrame *)p)->updatee
+ = evacuate(((StgUpdateFrame *)p)->updatee);
+ p += sizeofW(StgUpdateFrame);
+ continue;
+
+ // small bitmap (< 32 entries, or 64 on a 64-bit machine)
+ case CATCH_STM_FRAME:
+ case CATCH_RETRY_FRAME:
+ case ATOMICALLY_FRAME:
+ case STOP_FRAME:
+ case CATCH_FRAME:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ bitmap = BITMAP_BITS(info->i.layout.bitmap);
+ size = BITMAP_SIZE(info->i.layout.bitmap);
+ // NOTE: the payload starts immediately after the info-ptr, we
+ // don't have an StgHeader in the same sense as a heap closure.
+ p++;
+ p = scavenge_small_bitmap(p, size, bitmap);
+
+ follow_srt:
+ if (major_gc)
+ scavenge_srt((StgClosure **)GET_SRT(info), info->i.srt_bitmap);
+ continue;
+
+ case RET_BCO: {
+ StgBCO *bco;
+ nat size;
+
+ p++;
+ *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+ bco = (StgBCO *)*p;
+ p++;
+ size = BCO_BITMAP_SIZE(bco);
+ scavenge_large_bitmap(p, BCO_BITMAP(bco), size);
+ p += size;
+ continue;
+ }
+
+ // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
+ case RET_BIG:
+ case RET_VEC_BIG:
+ {
+ nat size;
+
+ size = GET_LARGE_BITMAP(&info->i)->size;
+ p++;
+ scavenge_large_bitmap(p, GET_LARGE_BITMAP(&info->i), size);
+ p += size;
+ // and don't forget to follow the SRT
+ goto follow_srt;
+ }
+
+ // Dynamic bitmap: the mask is stored on the stack, and
+ // there are a number of non-pointers followed by a number
+ // of pointers above the bitmapped area. (see StgMacros.h,
+ // HEAP_CHK_GEN).
+ case RET_DYN:
+ {
+ StgWord dyn;
+ dyn = ((StgRetDyn *)p)->liveness;
+
+ // traverse the bitmap first
+ bitmap = RET_DYN_LIVENESS(dyn);
+ p = (P_)&((StgRetDyn *)p)->payload[0];
+ size = RET_DYN_BITMAP_SIZE;
+ p = scavenge_small_bitmap(p, size, bitmap);
+
+ // skip over the non-ptr words
+ p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
+
+ // follow the ptr words
+ for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
+ *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+ p++;
+ }
+ continue;
+ }
+
+ case RET_FUN:
+ {
+ StgRetFun *ret_fun = (StgRetFun *)p;
+ StgFunInfoTable *fun_info;
+
+ ret_fun->fun = evacuate(ret_fun->fun);
+ fun_info = get_fun_itbl(ret_fun->fun);
+ p = scavenge_arg_block(fun_info, ret_fun->payload);
+ goto follow_srt;
+ }
+
+ default:
+ barf("scavenge_stack: weird activation record found on stack: %d", (int)(info->i.type));
+ }
+ }
+}
+
+/*-----------------------------------------------------------------------------
+ scavenge the large object list.
+
+ evac_gen set by caller; similar games played with evac_gen as with
+ scavenge() - see comment at the top of scavenge(). Most large
+ objects are (repeatedly) mutable, so most of the time evac_gen will
+ be zero.
+ --------------------------------------------------------------------------- */
+
+static void
+scavenge_large(step *stp)
+{
+ bdescr *bd;
+ StgPtr p;
+
+ bd = stp->new_large_objects;
+
+ for (; bd != NULL; bd = stp->new_large_objects) {
+
+ /* take this object *off* the large objects list and put it on
+ * the scavenged large objects list. This is so that we can
+ * treat new_large_objects as a stack and push new objects on
+ * the front when evacuating.
+ */
+ stp->new_large_objects = bd->link;
+ dbl_link_onto(bd, &stp->scavenged_large_objects);
+
+ // update the block count in this step.
+ stp->n_scavenged_large_blocks += bd->blocks;
+
+ p = bd->start;
+ if (scavenge_one(p)) {
+ if (stp->gen_no > 0) {
+ recordMutableGen((StgClosure *)p, stp->gen);
+ }
+ }
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Initialising the static object & mutable lists
+ -------------------------------------------------------------------------- */
+
+static void
+zero_static_object_list(StgClosure* first_static)
+{
+ StgClosure* p;
+ StgClosure* link;
+ const StgInfoTable *info;
+
+ for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
+ info = get_itbl(p);
+ link = *STATIC_LINK(info, p);
+ *STATIC_LINK(info,p) = NULL;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Reverting CAFs
+ -------------------------------------------------------------------------- */
+
+void
+revertCAFs( void )
+{
+ StgIndStatic *c;
+
+ for (c = (StgIndStatic *)revertible_caf_list; c != NULL;
+ c = (StgIndStatic *)c->static_link)
+ {
+ SET_INFO(c, c->saved_info);
+ c->saved_info = NULL;
+ // could, but not necessary: c->static_link = NULL;
+ }
+ revertible_caf_list = NULL;
+}
+
+void
+markCAFs( evac_fn evac )
+{
+ StgIndStatic *c;
+
+ for (c = (StgIndStatic *)caf_list; c != NULL;
+ c = (StgIndStatic *)c->static_link)
+ {
+ evac(&c->indirectee);
+ }
+ for (c = (StgIndStatic *)revertible_caf_list; c != NULL;
+ c = (StgIndStatic *)c->static_link)
+ {
+ evac(&c->indirectee);
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Sanity code for CAF garbage collection.
+
+ With DEBUG turned on, we manage a CAF list in addition to the SRT
+ mechanism. After GC, we run down the CAF list and blackhole any
+ CAFs which have been garbage collected. This means we get an error
+ whenever the program tries to enter a garbage collected CAF.
+
+ Any garbage collected CAFs are taken off the CAF list at the same
+ time.
+ -------------------------------------------------------------------------- */
+
+#if 0 && defined(DEBUG)
+
+static void
+gcCAFs(void)
+{
+ StgClosure* p;
+ StgClosure** pp;
+ const StgInfoTable *info;
+ nat i;
+
+ i = 0;
+ p = caf_list;
+ pp = &caf_list;
+
+ while (p != NULL) {
+
+ info = get_itbl(p);
+
+ ASSERT(info->type == IND_STATIC);
+
+ if (STATIC_LINK(info,p) == NULL) {
+ IF_DEBUG(gccafs, debugBelch("CAF gc'd at 0x%04lx", (long)p));
+ // black hole it
+ SET_INFO(p,&stg_BLACKHOLE_info);
+ p = STATIC_LINK2(info,p);
+ *pp = p;
+ }
+ else {
+ pp = &STATIC_LINK2(info,p);
+ p = *pp;
+ i++;
+ }
+
+ }
+
+ // debugBelch("%d CAFs live", i);
+}
+#endif
+
+
+/* -----------------------------------------------------------------------------
+ * Stack squeezing
+ *
+ * Code largely pinched from old RTS, then hacked to bits. We also do
+ * lazy black holing here.
+ *
+ * -------------------------------------------------------------------------- */
+
+struct stack_gap { StgWord gap_size; struct stack_gap *next_gap; };
+
+static void
+stackSqueeze(StgTSO *tso, StgPtr bottom)
+{
+ StgPtr frame;
+ rtsBool prev_was_update_frame;
+ StgClosure *updatee = NULL;
+ StgRetInfoTable *info;
+ StgWord current_gap_size;
+ struct stack_gap *gap;
+
+ // Stage 1:
+ // Traverse the stack upwards, replacing adjacent update frames
+ // with a single update frame and a "stack gap". A stack gap
+ // contains two values: the size of the gap, and the distance
+ // to the next gap (or the stack top).
+
+ frame = tso->sp;
+
+ ASSERT(frame < bottom);
+
+ prev_was_update_frame = rtsFalse;
+ current_gap_size = 0;
+ gap = (struct stack_gap *) (tso->sp - sizeofW(StgUpdateFrame));
+
+ while (frame < bottom) {
+
+ info = get_ret_itbl((StgClosure *)frame);
+ switch (info->i.type) {
+
+ case UPDATE_FRAME:
+ {
+ StgUpdateFrame *upd = (StgUpdateFrame *)frame;
+
+ if (prev_was_update_frame) {
+
+ TICK_UPD_SQUEEZED();
+ /* wasn't there something about update squeezing and ticky to be
+ * sorted out? oh yes: we aren't counting each enter properly
+ * in this case. See the log somewhere. KSW 1999-04-21
+ *
+ * Check two things: that the two update frames don't point to
+ * the same object, and that the updatee_bypass isn't already an
+ * indirection. Both of these cases only happen when we're in a
+ * block hole-style loop (and there are multiple update frames
+ * on the stack pointing to the same closure), but they can both
+ * screw us up if we don't check.
+ */
+ if (upd->updatee != updatee && !closure_IND(upd->updatee)) {
+ UPD_IND_NOLOCK(upd->updatee, updatee);
+ }
+
+ // now mark this update frame as a stack gap. The gap
+ // marker resides in the bottom-most update frame of
+ // the series of adjacent frames, and covers all the
+ // frames in this series.
+ current_gap_size += sizeofW(StgUpdateFrame);
+ ((struct stack_gap *)frame)->gap_size = current_gap_size;
+ ((struct stack_gap *)frame)->next_gap = gap;
+
+ frame += sizeofW(StgUpdateFrame);
+ continue;
+ }
+
+ // single update frame, or the topmost update frame in a series
+ else {
+ prev_was_update_frame = rtsTrue;
+ updatee = upd->updatee;
+ frame += sizeofW(StgUpdateFrame);
+ continue;
+ }
+ }
+
+ default:
+ prev_was_update_frame = rtsFalse;
+
+ // we're not in a gap... check whether this is the end of a gap
+ // (an update frame can't be the end of a gap).
+ if (current_gap_size != 0) {
+ gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame));
+ }
+ current_gap_size = 0;
+
+ frame += stack_frame_sizeW((StgClosure *)frame);
+ continue;
+ }
+ }
+
+ if (current_gap_size != 0) {
+ gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame));
+ }
+
+ // Now we have a stack with gaps in it, and we have to walk down
+ // shoving the stack up to fill in the gaps. A diagram might
+ // help:
+ //
+ // +| ********* |
+ // | ********* | <- sp
+ // | |
+ // | | <- gap_start
+ // | ......... | |
+ // | stack_gap | <- gap | chunk_size
+ // | ......... | |
+ // | ......... | <- gap_end v
+ // | ********* |
+ // | ********* |
+ // | ********* |
+ // -| ********* |
+ //
+ // 'sp' points the the current top-of-stack
+ // 'gap' points to the stack_gap structure inside the gap
+ // ***** indicates real stack data
+ // ..... indicates gap
+ // <empty> indicates unused
+ //
+ {
+ void *sp;
+ void *gap_start, *next_gap_start, *gap_end;
+ nat chunk_size;
+
+ next_gap_start = (void *)((unsigned char*)gap + sizeof(StgUpdateFrame));
+ sp = next_gap_start;
+
+ while ((StgPtr)gap > tso->sp) {
+
+ // we're working in *bytes* now...
+ gap_start = next_gap_start;
+ gap_end = (void*) ((unsigned char*)gap_start - gap->gap_size * sizeof(W_));
+
+ gap = gap->next_gap;
+ next_gap_start = (void *)((unsigned char*)gap + sizeof(StgUpdateFrame));
+
+ chunk_size = (unsigned char*)gap_end - (unsigned char*)next_gap_start;
+ sp -= chunk_size;
+ memmove(sp, next_gap_start, chunk_size);
+ }
+
+ tso->sp = (StgPtr)sp;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Pausing a thread
+ *
+ * We have to prepare for GC - this means doing lazy black holing
+ * here. We also take the opportunity to do stack squeezing if it's
+ * turned on.
+ * -------------------------------------------------------------------------- */
+void
+threadPaused(Capability *cap, StgTSO *tso)
+{
+ StgClosure *frame;
+ StgRetInfoTable *info;
+ StgClosure *bh;
+ StgPtr stack_end;
+ nat words_to_squeeze = 0;
+ nat weight = 0;
+ nat weight_pending = 0;
+ rtsBool prev_was_update_frame;
+
+ stack_end = &tso->stack[tso->stack_size];
+
+ frame = (StgClosure *)tso->sp;
+
+ while (1) {
+ // If we've already marked this frame, then stop here.
+ if (frame->header.info == (StgInfoTable *)&stg_marked_upd_frame_info) {
+ goto end;
+ }
+
+ info = get_ret_itbl(frame);
+
+ switch (info->i.type) {
+
+ case UPDATE_FRAME:
+
+ SET_INFO(frame, (StgInfoTable *)&stg_marked_upd_frame_info);
+
+ bh = ((StgUpdateFrame *)frame)->updatee;
+
+ if (closure_IND(bh) || bh->header.info == &stg_BLACKHOLE_info) {
+ IF_DEBUG(squeeze, debugBelch("suspending duplicate work: %ld words of stack\n", (StgPtr)frame - tso->sp));
+
+ // If this closure is already an indirection, then
+ // suspend the computation up to this point:
+ suspendComputation(cap,tso,(StgPtr)frame);
+
+ // Now drop the update frame, and arrange to return
+ // the value to the frame underneath:
+ tso->sp = (StgPtr)frame + sizeofW(StgUpdateFrame) - 2;
+ tso->sp[1] = (StgWord)bh;
+ tso->sp[0] = (W_)&stg_enter_info;
+
+ // And continue with threadPaused; there might be
+ // yet more computation to suspend.
+ threadPaused(cap,tso);
+ return;
+ }
+
+ if (bh->header.info != &stg_CAF_BLACKHOLE_info) {
+#if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG)
+ debugBelch("Unexpected lazy BHing required at 0x%04lx\n",(long)bh);
+#endif
+ // zero out the slop so that the sanity checker can tell
+ // where the next closure is.
+ DEBUG_FILL_SLOP(bh);
+#ifdef PROFILING
+ // @LDV profiling
+ // We pretend that bh is now dead.
+ LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh);
+#endif
+ SET_INFO(bh,&stg_BLACKHOLE_info);
+
+ // We pretend that bh has just been created.
+ LDV_RECORD_CREATE(bh);
+ }
+
+ frame = (StgClosure *) ((StgUpdateFrame *)frame + 1);
+ if (prev_was_update_frame) {
+ words_to_squeeze += sizeofW(StgUpdateFrame);
+ weight += weight_pending;
+ weight_pending = 0;
+ }
+ prev_was_update_frame = rtsTrue;
+ break;
+
+ case STOP_FRAME:
+ goto end;
+
+ // normal stack frames; do nothing except advance the pointer
+ default:
+ {
+ nat frame_size = stack_frame_sizeW(frame);
+ weight_pending += frame_size;
+ frame = (StgClosure *)((StgPtr)frame + frame_size);
+ prev_was_update_frame = rtsFalse;
+ }
+ }
+ }
+
+end:
+ IF_DEBUG(squeeze,
+ debugBelch("words_to_squeeze: %d, weight: %d, squeeze: %s\n",
+ words_to_squeeze, weight,
+ weight < words_to_squeeze ? "YES" : "NO"));
+
+ // Should we squeeze or not? Arbitrary heuristic: we squeeze if
+ // the number of words we have to shift down is less than the
+ // number of stack words we squeeze away by doing so.
+ if (RtsFlags.GcFlags.squeezeUpdFrames == rtsTrue &&
+ weight < words_to_squeeze) {
+ stackSqueeze(tso, (StgPtr)frame);
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Debugging
+ * -------------------------------------------------------------------------- */
+
+#if DEBUG
+void
+printMutableList(generation *gen)
+{
+ bdescr *bd;
+ StgPtr p;
+
+ debugBelch("@@ Mutable list %p: ", gen->mut_list);
+
+ for (bd = gen->mut_list; bd != NULL; bd = bd->link) {
+ for (p = bd->start; p < bd->free; p++) {
+ debugBelch("%p (%s), ", (void *)*p, info_type((StgClosure *)*p));
+ }
+ }
+ debugBelch("\n");
+}
+#endif /* DEBUG */
diff --git a/rts/GCCompact.c b/rts/GCCompact.c
new file mode 100644
index 0000000000..4dfe84bbe0
--- /dev/null
+++ b/rts/GCCompact.c
@@ -0,0 +1,949 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 2001
+ *
+ * Compacting garbage collector
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "RtsFlags.h"
+#include "OSThreads.h"
+#include "Storage.h"
+#include "BlockAlloc.h"
+#include "MBlock.h"
+#include "GCCompact.h"
+#include "Schedule.h"
+#include "Apply.h"
+
+// Turn off inlining when debugging - it obfuscates things
+#ifdef DEBUG
+# undef STATIC_INLINE
+# define STATIC_INLINE static
+#endif
+
+/* -----------------------------------------------------------------------------
+ Threading / unthreading pointers.
+
+ The basic idea here is to chain together all the fields pointing at
+ a particular object, with the root of the chain in the object's
+ info table field. The original contents of the info pointer goes
+ at the end of the chain.
+
+ Adding a new field to the chain is a matter of swapping the
+ contents of the field with the contents of the object's info table
+ field.
+
+ To unthread the chain, we walk down it updating all the fields on
+ the chain with the new location of the object. We stop when we
+ reach the info pointer at the end.
+
+ We use a trick to identify the info pointer: when swapping pointers
+ for threading, we set the low bit of the original pointer, with the
+ result that all the pointers in the chain have their low bits set
+ except for the info pointer.
+ -------------------------------------------------------------------------- */
+
+STATIC_INLINE void
+thread( StgPtr p )
+{
+ StgPtr q = (StgPtr)*p;
+ bdescr *bd;
+
+ // It doesn't look like a closure at the moment, because the info
+ // ptr is possibly threaded:
+ // ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
+
+ if (HEAP_ALLOCED(q)) {
+ bd = Bdescr(q);
+ // a handy way to discover whether the ptr is into the
+ // compacted area of the old gen, is that the EVACUATED flag
+ // is zero (it's non-zero for all the other areas of live
+ // memory).
+ if ((bd->flags & BF_EVACUATED) == 0) {
+ *p = (StgWord)*q;
+ *q = (StgWord)p + 1; // set the low bit
+ }
+ }
+}
+
+STATIC_INLINE void
+unthread( StgPtr p, StgPtr free )
+{
+ StgWord q = *p, r;
+
+ while ((q & 1) != 0) {
+ q -= 1; // unset the low bit again
+ r = *((StgPtr)q);
+ *((StgPtr)q) = (StgWord)free;
+ q = r;
+ }
+ *p = q;
+}
+
+STATIC_INLINE StgInfoTable *
+get_threaded_info( StgPtr p )
+{
+ StgPtr q = (P_)GET_INFO((StgClosure *)p);
+
+ while (((StgWord)q & 1) != 0) {
+ q = (P_)*((StgPtr)((StgWord)q-1));
+ }
+
+ ASSERT(LOOKS_LIKE_INFO_PTR(q));
+ return INFO_PTR_TO_STRUCT((StgInfoTable *)q);
+}
+
+// A word-aligned memmove will be faster for small objects than libc's or gcc's.
+// Remember, the two regions *might* overlap, but: to <= from.
+STATIC_INLINE void
+move(StgPtr to, StgPtr from, nat size)
+{
+ for(; size > 0; --size) {
+ *to++ = *from++;
+ }
+}
+
+static void
+thread_static( StgClosure* p )
+{
+ const StgInfoTable *info;
+
+ // keep going until we've threaded all the objects on the linked
+ // list...
+ while (p != END_OF_STATIC_LIST) {
+
+ info = get_itbl(p);
+ switch (info->type) {
+
+ case IND_STATIC:
+ thread((StgPtr)&((StgInd *)p)->indirectee);
+ p = *IND_STATIC_LINK(p);
+ continue;
+
+ case THUNK_STATIC:
+ p = *THUNK_STATIC_LINK(p);
+ continue;
+ case FUN_STATIC:
+ p = *FUN_STATIC_LINK(p);
+ continue;
+ case CONSTR_STATIC:
+ p = *STATIC_LINK(info,p);
+ continue;
+
+ default:
+ barf("thread_static: strange closure %d", (int)(info->type));
+ }
+
+ }
+}
+
+STATIC_INLINE void
+thread_large_bitmap( StgPtr p, StgLargeBitmap *large_bitmap, nat size )
+{
+ nat i, b;
+ StgWord bitmap;
+
+ b = 0;
+ bitmap = large_bitmap->bitmap[b];
+ for (i = 0; i < size; ) {
+ if ((bitmap & 1) == 0) {
+ thread(p);
+ }
+ i++;
+ p++;
+ if (i % BITS_IN(W_) == 0) {
+ b++;
+ bitmap = large_bitmap->bitmap[b];
+ } else {
+ bitmap = bitmap >> 1;
+ }
+ }
+}
+
+STATIC_INLINE StgPtr
+thread_arg_block (StgFunInfoTable *fun_info, StgClosure **args)
+{
+ StgPtr p;
+ StgWord bitmap;
+ nat size;
+
+ p = (StgPtr)args;
+ switch (fun_info->f.fun_type) {
+ case ARG_GEN:
+ bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+ size = BITMAP_SIZE(fun_info->f.b.bitmap);
+ goto small_bitmap;
+ case ARG_GEN_BIG:
+ size = GET_FUN_LARGE_BITMAP(fun_info)->size;
+ thread_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), size);
+ p += size;
+ break;
+ default:
+ bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+ size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
+ small_bitmap:
+ while (size > 0) {
+ if ((bitmap & 1) == 0) {
+ thread(p);
+ }
+ p++;
+ bitmap = bitmap >> 1;
+ size--;
+ }
+ break;
+ }
+ return p;
+}
+
+static void
+thread_stack(StgPtr p, StgPtr stack_end)
+{
+ const StgRetInfoTable* info;
+ StgWord bitmap;
+ nat size;
+
+ // highly similar to scavenge_stack, but we do pointer threading here.
+
+ while (p < stack_end) {
+
+ // *p must be the info pointer of an activation
+ // record. All activation records have 'bitmap' style layout
+ // info.
+ //
+ info = get_ret_itbl((StgClosure *)p);
+
+ switch (info->i.type) {
+
+ // Dynamic bitmap: the mask is stored on the stack
+ case RET_DYN:
+ {
+ StgWord dyn;
+ dyn = ((StgRetDyn *)p)->liveness;
+
+ // traverse the bitmap first
+ bitmap = RET_DYN_LIVENESS(dyn);
+ p = (P_)&((StgRetDyn *)p)->payload[0];
+ size = RET_DYN_BITMAP_SIZE;
+ while (size > 0) {
+ if ((bitmap & 1) == 0) {
+ thread(p);
+ }
+ p++;
+ bitmap = bitmap >> 1;
+ size--;
+ }
+
+ // skip over the non-ptr words
+ p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
+
+ // follow the ptr words
+ for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
+ thread(p);
+ p++;
+ }
+ continue;
+ }
+
+ // small bitmap (<= 32 entries, or 64 on a 64-bit machine)
+ case CATCH_RETRY_FRAME:
+ case CATCH_STM_FRAME:
+ case ATOMICALLY_FRAME:
+ case UPDATE_FRAME:
+ case STOP_FRAME:
+ case CATCH_FRAME:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ bitmap = BITMAP_BITS(info->i.layout.bitmap);
+ size = BITMAP_SIZE(info->i.layout.bitmap);
+ p++;
+ // NOTE: the payload starts immediately after the info-ptr, we
+ // don't have an StgHeader in the same sense as a heap closure.
+ while (size > 0) {
+ if ((bitmap & 1) == 0) {
+ thread(p);
+ }
+ p++;
+ bitmap = bitmap >> 1;
+ size--;
+ }
+ continue;
+
+ case RET_BCO: {
+ StgBCO *bco;
+ nat size;
+
+ p++;
+ bco = (StgBCO *)*p;
+ thread(p);
+ p++;
+ size = BCO_BITMAP_SIZE(bco);
+ thread_large_bitmap(p, BCO_BITMAP(bco), size);
+ p += size;
+ continue;
+ }
+
+ // large bitmap (> 32 entries, or 64 on a 64-bit machine)
+ case RET_BIG:
+ case RET_VEC_BIG:
+ p++;
+ size = GET_LARGE_BITMAP(&info->i)->size;
+ thread_large_bitmap(p, GET_LARGE_BITMAP(&info->i), size);
+ p += size;
+ continue;
+
+ case RET_FUN:
+ {
+ StgRetFun *ret_fun = (StgRetFun *)p;
+ StgFunInfoTable *fun_info;
+
+ fun_info = itbl_to_fun_itbl(
+ get_threaded_info((StgPtr)ret_fun->fun));
+ // *before* threading it!
+ thread((StgPtr)&ret_fun->fun);
+ p = thread_arg_block(fun_info, ret_fun->payload);
+ continue;
+ }
+
+ default:
+ barf("thread_stack: weird activation record found on stack: %d",
+ (int)(info->i.type));
+ }
+ }
+}
+
+STATIC_INLINE StgPtr
+thread_PAP_payload (StgClosure *fun, StgClosure **payload, StgWord size)
+{
+ StgPtr p;
+ StgWord bitmap;
+ StgFunInfoTable *fun_info;
+
+ fun_info = itbl_to_fun_itbl(get_threaded_info((StgPtr)fun));
+ ASSERT(fun_info->i.type != PAP);
+
+ p = (StgPtr)payload;
+
+ switch (fun_info->f.fun_type) {
+ case ARG_GEN:
+ bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+ goto small_bitmap;
+ case ARG_GEN_BIG:
+ thread_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), size);
+ p += size;
+ break;
+ case ARG_BCO:
+ thread_large_bitmap((StgPtr)payload, BCO_BITMAP(fun), size);
+ p += size;
+ break;
+ default:
+ bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+ small_bitmap:
+ while (size > 0) {
+ if ((bitmap & 1) == 0) {
+ thread(p);
+ }
+ p++;
+ bitmap = bitmap >> 1;
+ size--;
+ }
+ break;
+ }
+
+ return p;
+}
+
+STATIC_INLINE StgPtr
+thread_PAP (StgPAP *pap)
+{
+ StgPtr p;
+ p = thread_PAP_payload(pap->fun, pap->payload, pap->n_args);
+ thread((StgPtr)&pap->fun);
+ return p;
+}
+
+STATIC_INLINE StgPtr
+thread_AP (StgAP *ap)
+{
+ StgPtr p;
+ p = thread_PAP_payload(ap->fun, ap->payload, ap->n_args);
+ thread((StgPtr)&ap->fun);
+ return p;
+}
+
+STATIC_INLINE StgPtr
+thread_AP_STACK (StgAP_STACK *ap)
+{
+ thread((StgPtr)&ap->fun);
+ thread_stack((P_)ap->payload, (P_)ap->payload + ap->size);
+ return (P_)ap + sizeofW(StgAP_STACK) + ap->size;
+}
+
+static StgPtr
+thread_TSO (StgTSO *tso)
+{
+ thread((StgPtr)&tso->link);
+ thread((StgPtr)&tso->global_link);
+
+ if ( tso->why_blocked == BlockedOnMVar
+ || tso->why_blocked == BlockedOnBlackHole
+ || tso->why_blocked == BlockedOnException
+#if defined(PAR)
+ || tso->why_blocked == BlockedOnGA
+ || tso->why_blocked == BlockedOnGA_NoSend
+#endif
+ ) {
+ thread((StgPtr)&tso->block_info.closure);
+ }
+ if ( tso->blocked_exceptions != NULL ) {
+ thread((StgPtr)&tso->blocked_exceptions);
+ }
+
+ thread((StgPtr)&tso->trec);
+
+ thread_stack(tso->sp, &(tso->stack[tso->stack_size]));
+ return (StgPtr)tso + tso_sizeW(tso);
+}
+
+
+static void
+update_fwd_large( bdescr *bd )
+{
+ StgPtr p;
+ const StgInfoTable* info;
+
+ for (; bd != NULL; bd = bd->link) {
+
+ p = bd->start;
+ info = get_itbl((StgClosure *)p);
+
+ switch (info->type) {
+
+ case ARR_WORDS:
+ // nothing to follow
+ continue;
+
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN:
+ case MUT_ARR_PTRS_FROZEN0:
+ // follow everything
+ {
+ StgPtr next;
+
+ next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ thread(p);
+ }
+ continue;
+ }
+
+ case TSO:
+ thread_TSO((StgTSO *)p);
+ continue;
+
+ case AP_STACK:
+ thread_AP_STACK((StgAP_STACK *)p);
+ continue;
+
+ case PAP:
+ thread_PAP((StgPAP *)p);
+ continue;
+
+ case TREC_CHUNK:
+ {
+ StgWord i;
+ StgTRecChunk *tc = (StgTRecChunk *)p;
+ TRecEntry *e = &(tc -> entries[0]);
+ thread((StgPtr)&tc->prev_chunk);
+ for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) {
+ thread((StgPtr)&e->tvar);
+ thread((StgPtr)&e->expected_value);
+ thread((StgPtr)&e->new_value);
+ }
+ continue;
+ }
+
+ default:
+ barf("update_fwd_large: unknown/strange object %d", (int)(info->type));
+ }
+ }
+}
+
+STATIC_INLINE StgPtr
+thread_obj (StgInfoTable *info, StgPtr p)
+{
+ switch (info->type) {
+ case THUNK_0_1:
+ return p + sizeofW(StgThunk) + 1;
+
+ case FUN_0_1:
+ case CONSTR_0_1:
+ return p + sizeofW(StgHeader) + 1;
+
+ case FUN_1_0:
+ case CONSTR_1_0:
+ thread((StgPtr)&((StgClosure *)p)->payload[0]);
+ return p + sizeofW(StgHeader) + 1;
+
+ case THUNK_1_0:
+ thread((StgPtr)&((StgThunk *)p)->payload[0]);
+ return p + sizeofW(StgThunk) + 1;
+
+ case THUNK_0_2:
+ return p + sizeofW(StgThunk) + 2;
+
+ case FUN_0_2:
+ case CONSTR_0_2:
+ return p + sizeofW(StgHeader) + 2;
+
+ case THUNK_1_1:
+ thread((StgPtr)&((StgThunk *)p)->payload[0]);
+ return p + sizeofW(StgThunk) + 2;
+
+ case FUN_1_1:
+ case CONSTR_1_1:
+ thread((StgPtr)&((StgClosure *)p)->payload[0]);
+ return p + sizeofW(StgHeader) + 2;
+
+ case THUNK_2_0:
+ thread((StgPtr)&((StgThunk *)p)->payload[0]);
+ thread((StgPtr)&((StgThunk *)p)->payload[1]);
+ return p + sizeofW(StgThunk) + 2;
+
+ case FUN_2_0:
+ case CONSTR_2_0:
+ thread((StgPtr)&((StgClosure *)p)->payload[0]);
+ thread((StgPtr)&((StgClosure *)p)->payload[1]);
+ return p + sizeofW(StgHeader) + 2;
+
+ case BCO: {
+ StgBCO *bco = (StgBCO *)p;
+ thread((StgPtr)&bco->instrs);
+ thread((StgPtr)&bco->literals);
+ thread((StgPtr)&bco->ptrs);
+ thread((StgPtr)&bco->itbls);
+ return p + bco_sizeW(bco);
+ }
+
+ case THUNK:
+ {
+ StgPtr end;
+
+ end = (P_)((StgThunk *)p)->payload +
+ info->layout.payload.ptrs;
+ for (p = (P_)((StgThunk *)p)->payload; p < end; p++) {
+ thread(p);
+ }
+ return p + info->layout.payload.nptrs;
+ }
+
+ case FUN:
+ case CONSTR:
+ case STABLE_NAME:
+ case IND_PERM:
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case SE_BLACKHOLE:
+ case BLACKHOLE:
+ {
+ StgPtr end;
+
+ end = (P_)((StgClosure *)p)->payload +
+ info->layout.payload.ptrs;
+ for (p = (P_)((StgClosure *)p)->payload; p < end; p++) {
+ thread(p);
+ }
+ return p + info->layout.payload.nptrs;
+ }
+
+ case WEAK:
+ {
+ StgWeak *w = (StgWeak *)p;
+ thread((StgPtr)&w->key);
+ thread((StgPtr)&w->value);
+ thread((StgPtr)&w->finalizer);
+ if (w->link != NULL) {
+ thread((StgPtr)&w->link);
+ }
+ return p + sizeofW(StgWeak);
+ }
+
+ case MVAR:
+ {
+ StgMVar *mvar = (StgMVar *)p;
+ thread((StgPtr)&mvar->head);
+ thread((StgPtr)&mvar->tail);
+ thread((StgPtr)&mvar->value);
+ return p + sizeofW(StgMVar);
+ }
+
+ case IND_OLDGEN:
+ case IND_OLDGEN_PERM:
+ thread((StgPtr)&((StgInd *)p)->indirectee);
+ return p + sizeofW(StgInd);
+
+ case THUNK_SELECTOR:
+ {
+ StgSelector *s = (StgSelector *)p;
+ thread((StgPtr)&s->selectee);
+ return p + THUNK_SELECTOR_sizeW();
+ }
+
+ case AP_STACK:
+ return thread_AP_STACK((StgAP_STACK *)p);
+
+ case PAP:
+ return thread_PAP((StgPAP *)p);
+
+ case AP:
+ return thread_AP((StgAP *)p);
+
+ case ARR_WORDS:
+ return p + arr_words_sizeW((StgArrWords *)p);
+
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN:
+ case MUT_ARR_PTRS_FROZEN0:
+ // follow everything
+ {
+ StgPtr next;
+
+ next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ thread(p);
+ }
+ return p;
+ }
+
+ case TSO:
+ return thread_TSO((StgTSO *)p);
+
+ case TVAR_WAIT_QUEUE:
+ {
+ StgTVarWaitQueue *wq = (StgTVarWaitQueue *)p;
+ thread((StgPtr)&wq->waiting_tso);
+ thread((StgPtr)&wq->next_queue_entry);
+ thread((StgPtr)&wq->prev_queue_entry);
+ return p + sizeofW(StgTVarWaitQueue);
+ }
+
+ case TVAR:
+ {
+ StgTVar *tvar = (StgTVar *)p;
+ thread((StgPtr)&tvar->current_value);
+ thread((StgPtr)&tvar->first_wait_queue_entry);
+ return p + sizeofW(StgTVar);
+ }
+
+ case TREC_HEADER:
+ {
+ StgTRecHeader *trec = (StgTRecHeader *)p;
+ thread((StgPtr)&trec->enclosing_trec);
+ thread((StgPtr)&trec->current_chunk);
+ return p + sizeofW(StgTRecHeader);
+ }
+
+ case TREC_CHUNK:
+ {
+ StgWord i;
+ StgTRecChunk *tc = (StgTRecChunk *)p;
+ TRecEntry *e = &(tc -> entries[0]);
+ thread((StgPtr)&tc->prev_chunk);
+ for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) {
+ thread((StgPtr)&e->tvar);
+ thread((StgPtr)&e->expected_value);
+ thread((StgPtr)&e->new_value);
+ }
+ return p + sizeofW(StgTRecChunk);
+ }
+
+ default:
+ barf("update_fwd: unknown/strange object %d", (int)(info->type));
+ return NULL;
+ }
+}
+
+static void
+update_fwd( bdescr *blocks )
+{
+ StgPtr p;
+ bdescr *bd;
+ StgInfoTable *info;
+
+ bd = blocks;
+
+#if defined(PAR)
+ barf("update_fwd: ToDo");
+#endif
+
+ // cycle through all the blocks in the step
+ for (; bd != NULL; bd = bd->link) {
+ p = bd->start;
+
+ // linearly scan the objects in this block
+ while (p < bd->free) {
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
+ info = get_itbl((StgClosure *)p);
+ p = thread_obj(info, p);
+ }
+ }
+}
+
+static void
+update_fwd_compact( bdescr *blocks )
+{
+ StgPtr p, q, free;
+#if 0
+ StgWord m;
+#endif
+ bdescr *bd, *free_bd;
+ StgInfoTable *info;
+ nat size;
+
+ bd = blocks;
+ free_bd = blocks;
+ free = free_bd->start;
+
+#if defined(PAR)
+ barf("update_fwd: ToDo");
+#endif
+
+ // cycle through all the blocks in the step
+ for (; bd != NULL; bd = bd->link) {
+ p = bd->start;
+
+ while (p < bd->free ) {
+
+ while ( p < bd->free && !is_marked(p,bd) ) {
+ p++;
+ }
+ if (p >= bd->free) {
+ break;
+ }
+
+#if 0
+ next:
+ m = * ((StgPtr)bd->u.bitmap + ((p - bd->start) / (BITS_IN(StgWord))));
+ m >>= ((p - bd->start) & (BITS_IN(StgWord) - 1));
+
+ while ( p < bd->free ) {
+
+ if ((m & 1) == 0) {
+ m >>= 1;
+ p++;
+ if (((StgWord)p & (sizeof(W_) * BITS_IN(StgWord))) == 0) {
+ goto next;
+ } else {
+ continue;
+ }
+ }
+#endif
+
+ // Problem: we need to know the destination for this cell
+ // in order to unthread its info pointer. But we can't
+ // know the destination without the size, because we may
+ // spill into the next block. So we have to run down the
+ // threaded list and get the info ptr first.
+ info = get_threaded_info(p);
+
+ q = p;
+
+ p = thread_obj(info, p);
+
+ size = p - q;
+ if (free + size > free_bd->start + BLOCK_SIZE_W) {
+ // unset the next bit in the bitmap to indicate that
+ // this object needs to be pushed into the next
+ // block. This saves us having to run down the
+ // threaded info pointer list twice during the next pass.
+ unmark(q+1,bd);
+ free_bd = free_bd->link;
+ free = free_bd->start;
+ } else {
+ ASSERT(is_marked(q+1,bd));
+ }
+
+ unthread(q,free);
+ free += size;
+#if 0
+ goto next;
+#endif
+ }
+ }
+}
+
+static nat
+update_bkwd_compact( step *stp )
+{
+ StgPtr p, free;
+#if 0
+ StgWord m;
+#endif
+ bdescr *bd, *free_bd;
+ StgInfoTable *info;
+ nat size, free_blocks;
+
+ bd = free_bd = stp->old_blocks;
+ free = free_bd->start;
+ free_blocks = 1;
+
+#if defined(PAR)
+ barf("update_bkwd: ToDo");
+#endif
+
+ // cycle through all the blocks in the step
+ for (; bd != NULL; bd = bd->link) {
+ p = bd->start;
+
+ while (p < bd->free ) {
+
+ while ( p < bd->free && !is_marked(p,bd) ) {
+ p++;
+ }
+ if (p >= bd->free) {
+ break;
+ }
+
+#if 0
+ next:
+ m = * ((StgPtr)bd->u.bitmap + ((p - bd->start) / (BITS_IN(StgWord))));
+ m >>= ((p - bd->start) & (BITS_IN(StgWord) - 1));
+
+ while ( p < bd->free ) {
+
+ if ((m & 1) == 0) {
+ m >>= 1;
+ p++;
+ if (((StgWord)p & (sizeof(W_) * BITS_IN(StgWord))) == 0) {
+ goto next;
+ } else {
+ continue;
+ }
+ }
+#endif
+
+ if (!is_marked(p+1,bd)) {
+ // don't forget to update the free ptr in the block desc.
+ free_bd->free = free;
+ free_bd = free_bd->link;
+ free = free_bd->start;
+ free_blocks++;
+ }
+
+ unthread(p,free);
+ ASSERT(LOOKS_LIKE_INFO_PTR(((StgClosure *)p)->header.info));
+ info = get_itbl((StgClosure *)p);
+ size = closure_sizeW_((StgClosure *)p,info);
+
+ if (free != p) {
+ move(free,p,size);
+ }
+
+ // relocate TSOs
+ if (info->type == TSO) {
+ move_TSO((StgTSO *)p, (StgTSO *)free);
+ }
+
+ free += size;
+ p += size;
+#if 0
+ goto next;
+#endif
+ }
+ }
+
+ // free the remaining blocks and count what's left.
+ free_bd->free = free;
+ if (free_bd->link != NULL) {
+ freeChain(free_bd->link);
+ free_bd->link = NULL;
+ }
+
+ return free_blocks;
+}
+
+void
+compact( void (*get_roots)(evac_fn) )
+{
+ nat g, s, blocks;
+ step *stp;
+
+ // 1. thread the roots
+ get_roots((evac_fn)thread);
+
+ // the weak pointer lists...
+ if (weak_ptr_list != NULL) {
+ thread((StgPtr)(void *)&weak_ptr_list);
+ }
+ if (old_weak_ptr_list != NULL) {
+ thread((StgPtr)(void *)&old_weak_ptr_list); // tmp
+ }
+
+ // mutable lists
+ for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
+ bdescr *bd;
+ StgPtr p;
+ for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
+ for (p = bd->start; p < bd->free; p++) {
+ thread(p);
+ }
+ }
+ }
+
+ // the global thread list
+ thread((StgPtr)(void *)&all_threads);
+
+ // any threads resurrected during this GC
+ thread((StgPtr)(void *)&resurrected_threads);
+
+ // the task list
+ {
+ Task *task;
+ for (task = all_tasks; task != NULL; task = task->all_link) {
+ if (task->tso) {
+ thread((StgPtr)&task->tso);
+ }
+ }
+ }
+
+ // the static objects
+ thread_static(scavenged_static_objects);
+
+ // the stable pointer table
+ threadStablePtrTable((evac_fn)thread);
+
+ // the CAF list (used by GHCi)
+ markCAFs((evac_fn)thread);
+
+ // 2. update forward ptrs
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ if (g==0 && s ==0) continue;
+ stp = &generations[g].steps[s];
+ IF_DEBUG(gc, debugBelch("update_fwd: %d.%d\n", stp->gen->no, stp->no););
+
+ update_fwd(stp->blocks);
+ update_fwd_large(stp->scavenged_large_objects);
+ if (g == RtsFlags.GcFlags.generations-1 && stp->old_blocks != NULL) {
+ IF_DEBUG(gc, debugBelch("update_fwd: %d.%d (compact)\n", stp->gen->no, stp->no););
+ update_fwd_compact(stp->old_blocks);
+ }
+ }
+ }
+
+ // 3. update backward ptrs
+ stp = &oldest_gen->steps[0];
+ if (stp->old_blocks != NULL) {
+ blocks = update_bkwd_compact(stp);
+ IF_DEBUG(gc, debugBelch("update_bkwd: %d.%d (compact, old: %d blocks, now %d blocks)\n",
+ stp->gen->no, stp->no,
+ stp->n_old_blocks, blocks););
+ stp->n_old_blocks = blocks;
+ }
+}
diff --git a/rts/GCCompact.h b/rts/GCCompact.h
new file mode 100644
index 0000000000..0fb39b3b12
--- /dev/null
+++ b/rts/GCCompact.h
@@ -0,0 +1,44 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1998-2005
+ *
+ * Compacting garbage collector
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef GCCOMPACT_H
+#define GCCOMPACT_H
+
+STATIC_INLINE void
+mark(StgPtr p, bdescr *bd)
+{
+ nat offset_within_block = p - bd->start; // in words
+ StgPtr bitmap_word = (StgPtr)bd->u.bitmap +
+ (offset_within_block / (sizeof(W_)*BITS_PER_BYTE));
+ StgWord bit_mask = (StgWord)1 << (offset_within_block & (sizeof(W_)*BITS_PER_BYTE - 1));
+ *bitmap_word |= bit_mask;
+}
+
+STATIC_INLINE void
+unmark(StgPtr p, bdescr *bd)
+{
+ nat offset_within_block = p - bd->start; // in words
+ StgPtr bitmap_word = (StgPtr)bd->u.bitmap +
+ (offset_within_block / (sizeof(W_)*BITS_PER_BYTE));
+ StgWord bit_mask = (StgWord)1 << (offset_within_block & (sizeof(W_)*BITS_PER_BYTE - 1));
+ *bitmap_word &= ~bit_mask;
+}
+
+STATIC_INLINE StgWord
+is_marked(StgPtr p, bdescr *bd)
+{
+ nat offset_within_block = p - bd->start; // in words
+ StgPtr bitmap_word = (StgPtr)bd->u.bitmap +
+ (offset_within_block / (sizeof(W_)*BITS_PER_BYTE));
+ StgWord bit_mask = (StgWord)1 << (offset_within_block & (sizeof(W_)*BITS_PER_BYTE - 1));
+ return (*bitmap_word & bit_mask);
+}
+
+void compact( void (*get_roots)(evac_fn) );
+
+#endif /* GCCOMPACT_H */
diff --git a/rts/GetTime.h b/rts/GetTime.h
new file mode 100644
index 0000000000..5f02df0625
--- /dev/null
+++ b/rts/GetTime.h
@@ -0,0 +1,26 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 2005
+ *
+ * Machine-independent interface to time measurement
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef GETTIME_H
+#define GETTIME_H
+
+// We'll use a fixed resolution of usec for now. The machine
+// dependent implementation may have a different resolution, but we'll
+// normalise to this for the machine independent interface.
+#define TICKS_PER_SECOND 1000000
+typedef StgInt64 Ticks;
+
+Ticks getProcessCPUTime (void);
+Ticks getThreadCPUTime (void);
+Ticks getProcessElapsedTime (void);
+void getProcessTimes (Ticks *user, Ticks *elapsed);
+
+// Not strictly timing, but related
+nat getPageFaults (void);
+
+#endif /* GETTIME_H */
diff --git a/rts/HSprel.def b/rts/HSprel.def
new file mode 100644
index 0000000000..0ffe00b48c
--- /dev/null
+++ b/rts/HSprel.def
@@ -0,0 +1,28 @@
+; list of entry points that the RTS imports from
+; the Prelude.
+EXPORTS
+PrelBase_False_closure
+PrelBase_True_closure
+PrelBase_Czh_con_info DATA
+PrelBase_Czh_static_info DATA
+PrelBase_Izh_con_info DATA
+PrelBase_Izh_static_info DATA
+PrelAddr_I64zh_con_info DATA
+PrelAddr_W64zh_con_info DATA
+PrelAddr_Azh_con_info DATA
+PrelAddr_Azh_static_info DATA
+PrelFloat_Fzh_con_info DATA
+PrelFloat_Fzh_static_info DATA
+PrelFloat_Dzh_con_info DATA
+PrelFloat_Dzh_static_info DATA
+PrelAddr_Wzh_con_info DATA
+PrelAddr_Wzh_static_info DATA
+PrelStable_StablePtr_con_info DATA
+PrelStable_StablePtr_static_info DATA
+PrelPack_unpackCString_closure
+PrelIOBase_stackOverflow_closure
+PrelIOBase_BlockedOnDeadMVar_closure
+PrelIOBase_BlockedIndefinitely_closure
+PrelIOBase_NonTermination_closure
+PrelWeak_runFinalizzerBatch_closure
+__stginit_Prelude
diff --git a/rts/Hash.c b/rts/Hash.c
new file mode 100644
index 0000000000..ada11a6a85
--- /dev/null
+++ b/rts/Hash.c
@@ -0,0 +1,376 @@
+/*-----------------------------------------------------------------------------
+ *
+ * (c) The AQUA Project, Glasgow University, 1995-1998
+ * (c) The GHC Team, 1999
+ *
+ * Dynamically expanding linear hash tables, as described in
+ * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
+ * pp. 446 -- 457.
+ * -------------------------------------------------------------------------- */
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "Hash.h"
+#include "RtsUtils.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#define HSEGSIZE 1024 /* Size of a single hash table segment */
+ /* Also the minimum size of a hash table */
+#define HDIRSIZE 1024 /* Size of the segment directory */
+ /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
+#define HLOAD 5 /* Maximum average load of a single hash bucket */
+
+#define HCHUNK (1024 * sizeof(W_) / sizeof(HashList))
+ /* Number of HashList cells to allocate in one go */
+
+
+/* Linked list of (key, data) pairs for separate chaining */
+struct hashlist {
+ StgWord key;
+ void *data;
+ struct hashlist *next; /* Next cell in bucket chain (same hash value) */
+};
+
+typedef struct hashlist HashList;
+
+typedef int HashFunction(HashTable *table, StgWord key);
+typedef int CompareFunction(StgWord key1, StgWord key2);
+
+struct hashtable {
+ int split; /* Next bucket to split when expanding */
+ int max; /* Max bucket of smaller table */
+ int mask1; /* Mask for doing the mod of h_1 (smaller table) */
+ int mask2; /* Mask for doing the mod of h_2 (larger table) */
+ int kcount; /* Number of keys */
+ int bcount; /* Number of buckets */
+ HashList **dir[HDIRSIZE]; /* Directory of segments */
+ HashFunction *hash; /* hash function */
+ CompareFunction *compare; /* key comparison function */
+};
+
+/* -----------------------------------------------------------------------------
+ * Hash first using the smaller table. If the bucket is less than the
+ * next bucket to be split, re-hash using the larger table.
+ * -------------------------------------------------------------------------- */
+
+static int
+hashWord(HashTable *table, StgWord key)
+{
+ int bucket;
+
+ /* Strip the boring zero bits */
+ key /= sizeof(StgWord);
+
+ /* Mod the size of the hash table (a power of 2) */
+ bucket = key & table->mask1;
+
+ if (bucket < table->split) {
+ /* Mod the size of the expanded hash table (also a power of 2) */
+ bucket = key & table->mask2;
+ }
+ return bucket;
+}
+
+static int
+hashStr(HashTable *table, char *key)
+{
+ int h, bucket;
+ char *s;
+
+ s = key;
+ for (h=0; *s; s++) {
+ h *= 128;
+ h += *s;
+ h = h % 1048583; /* some random large prime */
+ }
+
+ /* Mod the size of the hash table (a power of 2) */
+ bucket = h & table->mask1;
+
+ if (bucket < table->split) {
+ /* Mod the size of the expanded hash table (also a power of 2) */
+ bucket = h & table->mask2;
+ }
+
+ return bucket;
+}
+
+static int
+compareWord(StgWord key1, StgWord key2)
+{
+ return (key1 == key2);
+}
+
+static int
+compareStr(StgWord key1, StgWord key2)
+{
+ return (strcmp((char *)key1, (char *)key2) == 0);
+}
+
+
+/* -----------------------------------------------------------------------------
+ * Allocate a new segment of the dynamically growing hash table.
+ * -------------------------------------------------------------------------- */
+
+static void
+allocSegment(HashTable *table, int segment)
+{
+ table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),
+ "allocSegment");
+}
+
+
+/* -----------------------------------------------------------------------------
+ * Expand the larger hash table by one bucket, and split one bucket
+ * from the smaller table into two parts. Only the bucket referenced
+ * by @table->split@ is affected by the expansion.
+ * -------------------------------------------------------------------------- */
+
+static void
+expand(HashTable *table)
+{
+ int oldsegment;
+ int oldindex;
+ int newbucket;
+ int newsegment;
+ int newindex;
+ HashList *hl;
+ HashList *next;
+ HashList *old, *new;
+
+ if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
+ /* Wow! That's big. Too big, so don't expand. */
+ return;
+
+ /* Calculate indices of bucket to split */
+ oldsegment = table->split / HSEGSIZE;
+ oldindex = table->split % HSEGSIZE;
+
+ newbucket = table->max + table->split;
+
+ /* And the indices of the new bucket */
+ newsegment = newbucket / HSEGSIZE;
+ newindex = newbucket % HSEGSIZE;
+
+ if (newindex == 0)
+ allocSegment(table, newsegment);
+
+ if (++table->split == table->max) {
+ table->split = 0;
+ table->max *= 2;
+ table->mask1 = table->mask2;
+ table->mask2 = table->mask2 << 1 | 1;
+ }
+ table->bcount++;
+
+ /* Split the bucket, paying no attention to the original order */
+
+ old = new = NULL;
+ for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
+ next = hl->next;
+ if (table->hash(table, hl->key) == newbucket) {
+ hl->next = new;
+ new = hl;
+ } else {
+ hl->next = old;
+ old = hl;
+ }
+ }
+ table->dir[oldsegment][oldindex] = old;
+ table->dir[newsegment][newindex] = new;
+
+ return;
+}
+
+void *
+lookupHashTable(HashTable *table, StgWord key)
+{
+ int bucket;
+ int segment;
+ int index;
+ HashList *hl;
+
+ bucket = table->hash(table, key);
+ segment = bucket / HSEGSIZE;
+ index = bucket % HSEGSIZE;
+
+ for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
+ if (table->compare(hl->key, key))
+ return hl->data;
+
+ /* It's not there */
+ return NULL;
+}
+
+/* -----------------------------------------------------------------------------
+ * We allocate the hashlist cells in large chunks to cut down on malloc
+ * overhead. Although we keep a free list of hashlist cells, we make
+ * no effort to actually return the space to the malloc arena.
+ * -------------------------------------------------------------------------- */
+
+static HashList *freeList = NULL;
+
+static HashList *
+allocHashList(void)
+{
+ HashList *hl, *p;
+
+ if ((hl = freeList) != NULL) {
+ freeList = hl->next;
+ } else {
+ hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
+
+ freeList = hl + 1;
+ for (p = freeList; p < hl + HCHUNK - 1; p++)
+ p->next = p + 1;
+ p->next = NULL;
+ }
+ return hl;
+}
+
+static void
+freeHashList(HashList *hl)
+{
+ hl->next = freeList;
+ freeList = hl;
+}
+
+void
+insertHashTable(HashTable *table, StgWord key, void *data)
+{
+ int bucket;
+ int segment;
+ int index;
+ HashList *hl;
+
+ // Disable this assert; sometimes it's useful to be able to
+ // overwrite entries in the hash table.
+ // ASSERT(lookupHashTable(table, key) == NULL);
+
+ /* When the average load gets too high, we expand the table */
+ if (++table->kcount >= HLOAD * table->bcount)
+ expand(table);
+
+ bucket = table->hash(table, key);
+ segment = bucket / HSEGSIZE;
+ index = bucket % HSEGSIZE;
+
+ hl = allocHashList();
+
+ hl->key = key;
+ hl->data = data;
+ hl->next = table->dir[segment][index];
+ table->dir[segment][index] = hl;
+
+}
+
+void *
+removeHashTable(HashTable *table, StgWord key, void *data)
+{
+ int bucket;
+ int segment;
+ int index;
+ HashList *hl;
+ HashList *prev = NULL;
+
+ bucket = table->hash(table, key);
+ segment = bucket / HSEGSIZE;
+ index = bucket % HSEGSIZE;
+
+ for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
+ if (table->compare(hl->key,key) && (data == NULL || hl->data == data)) {
+ if (prev == NULL)
+ table->dir[segment][index] = hl->next;
+ else
+ prev->next = hl->next;
+ freeHashList(hl);
+ table->kcount--;
+ return hl->data;
+ }
+ prev = hl;
+ }
+
+ /* It's not there */
+ ASSERT(data == NULL);
+ return NULL;
+}
+
+/* -----------------------------------------------------------------------------
+ * When we free a hash table, we are also good enough to free the
+ * data part of each (key, data) pair, as long as our caller can tell
+ * us how to do it.
+ * -------------------------------------------------------------------------- */
+
+void
+freeHashTable(HashTable *table, void (*freeDataFun)(void *) )
+{
+ long segment;
+ long index;
+ HashList *hl;
+ HashList *next;
+
+ /* The last bucket with something in it is table->max + table->split - 1 */
+ segment = (table->max + table->split - 1) / HSEGSIZE;
+ index = (table->max + table->split - 1) % HSEGSIZE;
+
+ while (segment >= 0) {
+ while (index >= 0) {
+ for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
+ next = hl->next;
+ if (freeDataFun != NULL)
+ (*freeDataFun)(hl->data);
+ freeHashList(hl);
+ }
+ index--;
+ }
+ stgFree(table->dir[segment]);
+ segment--;
+ index = HSEGSIZE - 1;
+ }
+ stgFree(table);
+}
+
+/* -----------------------------------------------------------------------------
+ * When we initialize a hash table, we set up the first segment as well,
+ * initializing all of the first segment's hash buckets to NULL.
+ * -------------------------------------------------------------------------- */
+
+static HashTable *
+allocHashTable_(HashFunction *hash, CompareFunction *compare)
+{
+ HashTable *table;
+ HashList **hb;
+
+ table = stgMallocBytes(sizeof(HashTable),"allocHashTable");
+
+ allocSegment(table, 0);
+
+ for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
+ *hb = NULL;
+
+ table->split = 0;
+ table->max = HSEGSIZE;
+ table->mask1 = HSEGSIZE - 1;
+ table->mask2 = 2 * HSEGSIZE - 1;
+ table->kcount = 0;
+ table->bcount = HSEGSIZE;
+ table->hash = hash;
+ table->compare = compare;
+
+ return table;
+}
+
+HashTable *
+allocHashTable(void)
+{
+ return allocHashTable_(hashWord, compareWord);
+}
+
+HashTable *
+allocStrHashTable(void)
+{
+ return allocHashTable_((HashFunction *)hashStr,
+ (CompareFunction *)compareStr);
+}
diff --git a/rts/Hash.h b/rts/Hash.h
new file mode 100644
index 0000000000..ad55953da4
--- /dev/null
+++ b/rts/Hash.h
@@ -0,0 +1,40 @@
+/*-----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1999
+ *
+ * Prototypes for Hash.c
+ *
+ * -------------------------------------------------------------------------- */
+
+#ifndef HASH_H
+#define HASH_H
+
+typedef struct hashtable HashTable; /* abstract */
+
+/* Hash table access where the keys are StgWords */
+HashTable * allocHashTable ( void );
+void * lookupHashTable ( HashTable *table, StgWord key );
+void insertHashTable ( HashTable *table, StgWord key, void *data );
+void * removeHashTable ( HashTable *table, StgWord key, void *data );
+
+/* Hash table access where the keys are C strings (the strings are
+ * assumed to be allocated by the caller, and mustn't be deallocated
+ * until the corresponding hash table entry has been removed).
+ */
+HashTable * allocStrHashTable ( void );
+
+#define lookupStrHashTable(table, key) \
+ (lookupHashTable(table, (StgWord)key))
+
+#define insertStrHashTable(table, key, data) \
+ (insertHashTable(table, (StgWord)key, data))
+
+#define removeStrHashTable(table, key, data) \
+ (removeHashTable(table, (StgWord)key, data))
+
+/* Freeing hash tables
+ */
+void freeHashTable ( HashTable *table, void (*freeDataFun)(void *) );
+
+#endif /* HASH_H */
+
diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm
new file mode 100644
index 0000000000..4e5dd24596
--- /dev/null
+++ b/rts/HeapStackCheck.cmm
@@ -0,0 +1,964 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Canned Heap-Check and Stack-Check sequences.
+ *
+ * This file is written in a subset of C--, extended with various
+ * features specific to GHC. It is compiled by GHC directly. For the
+ * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Cmm.h"
+
+/* Stack/Heap Check Failure
+ * ------------------------
+ *
+ * On discovering that a stack or heap check has failed, we do the following:
+ *
+ * - If the context_switch flag is set, indicating that there are more
+ * threads waiting to run, we yield to the scheduler
+ * (return ThreadYielding).
+ *
+ * - If Hp > HpLim, we've had a heap check failure. This means we've
+ * come to the end of the current heap block, so we try to chain
+ * another block on with ExtendNursery().
+ *
+ * - If this succeeds, we carry on without returning to the
+ * scheduler.
+ *
+ * - If it fails, we return to the scheduler claiming HeapOverflow
+ * so that a garbage collection can be performed.
+ *
+ * - If Hp <= HpLim, it must have been a stack check that failed. In
+ * which case, we return to the scheduler claiming StackOverflow, the
+ * scheduler will either increase the size of our stack, or raise
+ * an exception if the stack is already too big.
+ *
+ * The effect of checking for context switch only in the heap/stack check
+ * failure code is that we'll switch threads after the current thread has
+ * reached the end of its heap block. If a thread isn't allocating
+ * at all, it won't yield. Hopefully this won't be a problem in practice.
+ */
+
+#define PRE_RETURN(why,what_next) \
+ StgTSO_what_next(CurrentTSO) = what_next::I16; \
+ StgRegTable_rRet(BaseReg) = why; \
+ R1 = BaseReg;
+
+/* Remember that the return address is *removed* when returning to a
+ * ThreadRunGHC thread.
+ */
+
+#define GC_GENERIC \
+ DEBUG_ONLY(foreign "C" heapCheckFail()); \
+ if (Hp > HpLim) { \
+ Hp = Hp - HpAlloc/*in bytes*/; \
+ if (HpAlloc <= BLOCK_SIZE \
+ && bdescr_link(CurrentNursery) != NULL) { \
+ CLOSE_NURSERY(); \
+ CurrentNursery = bdescr_link(CurrentNursery); \
+ OPEN_NURSERY(); \
+ if (CInt[context_switch] != 0 :: CInt) { \
+ R1 = ThreadYielding; \
+ goto sched; \
+ } else { \
+ jump %ENTRY_CODE(Sp(0)); \
+ } \
+ } else { \
+ R1 = HeapOverflow; \
+ goto sched; \
+ } \
+ } else { \
+ R1 = StackOverflow; \
+ } \
+ sched: \
+ PRE_RETURN(R1,ThreadRunGHC); \
+ jump stg_returnToSched;
+
+#define HP_GENERIC \
+ PRE_RETURN(HeapOverflow, ThreadRunGHC) \
+ jump stg_returnToSched;
+
+#define BLOCK_GENERIC \
+ PRE_RETURN(ThreadBlocked, ThreadRunGHC) \
+ jump stg_returnToSched;
+
+#define YIELD_GENERIC \
+ PRE_RETURN(ThreadYielding, ThreadRunGHC) \
+ jump stg_returnToSched;
+
+#define BLOCK_BUT_FIRST(c) \
+ PRE_RETURN(ThreadBlocked, ThreadRunGHC) \
+ R2 = c; \
+ jump stg_returnToSchedButFirst;
+
+#define YIELD_TO_INTERPRETER \
+ PRE_RETURN(ThreadYielding, ThreadInterpret) \
+ jump stg_returnToSchedNotPaused;
+
+/* -----------------------------------------------------------------------------
+ Heap checks in thunks/functions.
+
+ In these cases, node always points to the function closure. This gives
+ us an easy way to return to the function: just leave R1 on the top of
+ the stack, and have the scheduler enter it to return.
+
+ There are canned sequences for 'n' pointer values in registers.
+ -------------------------------------------------------------------------- */
+
+INFO_TABLE_RET( stg_enter, 1/*framesize*/, 0/*bitmap*/, RET_SMALL)
+{
+ R1 = Sp(1);
+ Sp_adj(2);
+ ENTER();
+}
+
+__stg_gc_enter_1
+{
+ Sp_adj(-2);
+ Sp(1) = R1;
+ Sp(0) = stg_enter_info;
+ GC_GENERIC
+}
+
+#if defined(GRAN)
+/*
+ ToDo: merge the block and yield macros, calling something like BLOCK(N)
+ at the end;
+*/
+
+/*
+ Should we actually ever do a yield in such a case?? -- HWL
+*/
+gran_yield_0
+{
+ SAVE_THREAD_STATE();
+ TSO_what_next(CurrentTSO) = ThreadRunGHC;
+ R1 = ThreadYielding;
+ jump StgReturn;
+}
+
+gran_yield_1
+{
+ Sp_adj(-1);
+ Sp(0) = R1;
+ SAVE_THREAD_STATE();
+ TSO_what_next(CurrentTSO) = ThreadRunGHC;
+ R1 = ThreadYielding;
+ jump StgReturn;
+}
+
+/*- 2 Regs--------------------------------------------------------------------*/
+
+gran_yield_2
+{
+ Sp_adj(-2);
+ Sp(1) = R2;
+ Sp(0) = R1;
+ SAVE_THREAD_STATE();
+ TSO_what_next(CurrentTSO) = ThreadRunGHC;
+ R1 = ThreadYielding;
+ jump StgReturn;
+}
+
+/*- 3 Regs -------------------------------------------------------------------*/
+
+gran_yield_3
+{
+ Sp_adj(-3);
+ Sp(2) = R3;
+ Sp(1) = R2;
+ Sp(0) = R1;
+ SAVE_THREAD_STATE();
+ TSO_what_next(CurrentTSO) = ThreadRunGHC;
+ R1 = ThreadYielding;
+ jump StgReturn;
+}
+
+/*- 4 Regs -------------------------------------------------------------------*/
+
+gran_yield_4
+{
+ Sp_adj(-4);
+ Sp(3) = R4;
+ Sp(2) = R3;
+ Sp(1) = R2;
+ Sp(0) = R1;
+ SAVE_THREAD_STATE();
+ TSO_what_next(CurrentTSO) = ThreadRunGHC;
+ R1 = ThreadYielding;
+ jump StgReturn;
+}
+
+/*- 5 Regs -------------------------------------------------------------------*/
+
+gran_yield_5
+{
+ Sp_adj(-5);
+ Sp(4) = R5;
+ Sp(3) = R4;
+ Sp(2) = R3;
+ Sp(1) = R2;
+ Sp(0) = R1;
+ SAVE_THREAD_STATE();
+ TSO_what_next(CurrentTSO) = ThreadRunGHC;
+ R1 = ThreadYielding;
+ jump StgReturn;
+}
+
+/*- 6 Regs -------------------------------------------------------------------*/
+
+gran_yield_6
+{
+ Sp_adj(-6);
+ Sp(5) = R6;
+ Sp(4) = R5;
+ Sp(3) = R4;
+ Sp(2) = R3;
+ Sp(1) = R2;
+ Sp(0) = R1;
+ SAVE_THREAD_STATE();
+ TSO_what_next(CurrentTSO) = ThreadRunGHC;
+ R1 = ThreadYielding;
+ jump StgReturn;
+}
+
+/*- 7 Regs -------------------------------------------------------------------*/
+
+gran_yield_7
+{
+ Sp_adj(-7);
+ Sp(6) = R7;
+ Sp(5) = R6;
+ Sp(4) = R5;
+ Sp(3) = R4;
+ Sp(2) = R3;
+ Sp(1) = R2;
+ Sp(0) = R1;
+ SAVE_THREAD_STATE();
+ TSO_what_next(CurrentTSO) = ThreadRunGHC;
+ R1 = ThreadYielding;
+ jump StgReturn;
+}
+
+/*- 8 Regs -------------------------------------------------------------------*/
+
+gran_yield_8
+{
+ Sp_adj(-8);
+ Sp(7) = R8;
+ Sp(6) = R7;
+ Sp(5) = R6;
+ Sp(4) = R5;
+ Sp(3) = R4;
+ Sp(2) = R3;
+ Sp(1) = R2;
+ Sp(0) = R1;
+ SAVE_THREAD_STATE();
+ TSO_what_next(CurrentTSO) = ThreadRunGHC;
+ R1 = ThreadYielding;
+ jump StgReturn;
+}
+
+// the same routines but with a block rather than a yield
+
+gran_block_1
+{
+ Sp_adj(-1);
+ Sp(0) = R1;
+ SAVE_THREAD_STATE();
+ TSO_what_next(CurrentTSO) = ThreadRunGHC;
+ R1 = ThreadBlocked;
+ jump StgReturn;
+}
+
+/*- 2 Regs--------------------------------------------------------------------*/
+
+gran_block_2
+{
+ Sp_adj(-2);
+ Sp(1) = R2;
+ Sp(0) = R1;
+ SAVE_THREAD_STATE();
+ TSO_what_next(CurrentTSO) = ThreadRunGHC;
+ R1 = ThreadBlocked;
+ jump StgReturn;
+}
+
+/*- 3 Regs -------------------------------------------------------------------*/
+
+gran_block_3
+{
+ Sp_adj(-3);
+ Sp(2) = R3;
+ Sp(1) = R2;
+ Sp(0) = R1;
+ SAVE_THREAD_STATE();
+ TSO_what_next(CurrentTSO) = ThreadRunGHC;
+ R1 = ThreadBlocked;
+ jump StgReturn;
+}
+
+/*- 4 Regs -------------------------------------------------------------------*/
+
+gran_block_4
+{
+ Sp_adj(-4);
+ Sp(3) = R4;
+ Sp(2) = R3;
+ Sp(1) = R2;
+ Sp(0) = R1;
+ SAVE_THREAD_STATE();
+ TSO_what_next(CurrentTSO) = ThreadRunGHC;
+ R1 = ThreadBlocked;
+ jump StgReturn;
+}
+
+/*- 5 Regs -------------------------------------------------------------------*/
+
+gran_block_5
+{
+ Sp_adj(-5);
+ Sp(4) = R5;
+ Sp(3) = R4;
+ Sp(2) = R3;
+ Sp(1) = R2;
+ Sp(0) = R1;
+ SAVE_THREAD_STATE();
+ TSO_what_next(CurrentTSO) = ThreadRunGHC;
+ R1 = ThreadBlocked;
+ jump StgReturn;
+}
+
+/*- 6 Regs -------------------------------------------------------------------*/
+
+gran_block_6
+{
+ Sp_adj(-6);
+ Sp(5) = R6;
+ Sp(4) = R5;
+ Sp(3) = R4;
+ Sp(2) = R3;
+ Sp(1) = R2;
+ Sp(0) = R1;
+ SAVE_THREAD_STATE();
+ TSO_what_next(CurrentTSO) = ThreadRunGHC;
+ R1 = ThreadBlocked;
+ jump StgReturn;
+}
+
+/*- 7 Regs -------------------------------------------------------------------*/
+
+gran_block_7
+{
+ Sp_adj(-7);
+ Sp(6) = R7;
+ Sp(5) = R6;
+ Sp(4) = R5;
+ Sp(3) = R4;
+ Sp(2) = R3;
+ Sp(1) = R2;
+ Sp(0) = R1;
+ SAVE_THREAD_STATE();
+ TSO_what_next(CurrentTSO) = ThreadRunGHC;
+ R1 = ThreadBlocked;
+ jump StgReturn;
+}
+
+/*- 8 Regs -------------------------------------------------------------------*/
+
+gran_block_8
+{
+ Sp_adj(-8);
+ Sp(7) = R8;
+ Sp(6) = R7;
+ Sp(5) = R6;
+ Sp(4) = R5;
+ Sp(3) = R4;
+ Sp(2) = R3;
+ Sp(1) = R2;
+ Sp(0) = R1;
+ SAVE_THREAD_STATE();
+ TSO_what_next(CurrentTSO) = ThreadRunGHC;
+ R1 = ThreadBlocked;
+ jump StgReturn;
+}
+
+#endif
+
+#if 0 && defined(PAR)
+
+/*
+ Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
+ saving of the thread state from the actual jump via an StgReturn.
+ We need this separation because we call RTS routines in blocking entry codes
+ before jumping back into the RTS (see parallel/FetchMe.hc).
+*/
+
+par_block_1_no_jump
+{
+ Sp_adj(-1);
+ Sp(0) = R1;
+ SAVE_THREAD_STATE();
+}
+
+par_jump
+{
+ TSO_what_next(CurrentTSO) = ThreadRunGHC;
+ R1 = ThreadBlocked;
+ jump StgReturn;
+}
+
+#endif
+
+/* -----------------------------------------------------------------------------
+ Heap checks in Primitive case alternatives
+
+ A primitive case alternative is entered with a value either in
+ R1, FloatReg1 or D1 depending on the return convention. All the
+ cases are covered below.
+ -------------------------------------------------------------------------- */
+
+/*-- No Registers live ------------------------------------------------------ */
+
+stg_gc_noregs
+{
+ GC_GENERIC
+}
+
+/*-- void return ------------------------------------------------------------ */
+
+INFO_TABLE_RET( stg_gc_void, 0/*framesize*/, 0/*bitmap*/, RET_SMALL)
+{
+ Sp_adj(1);
+ jump %ENTRY_CODE(Sp(0));
+}
+
+/*-- R1 is boxed/unpointed -------------------------------------------------- */
+
+INFO_TABLE_RET( stg_gc_unpt_r1, 1/*framesize*/, 0/*bitmap*/, RET_SMALL)
+{
+ R1 = Sp(1);
+ Sp_adj(2);
+ jump %ENTRY_CODE(Sp(0));
+}
+
+stg_gc_unpt_r1
+{
+ Sp_adj(-2);
+ Sp(1) = R1;
+ Sp(0) = stg_gc_unpt_r1_info;
+ GC_GENERIC
+}
+
+/*-- R1 is unboxed -------------------------------------------------- */
+
+/* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */
+INFO_TABLE_RET( stg_gc_unbx_r1, 1/*framesize*/, 1/*bitmap*/, RET_SMALL )
+{
+ R1 = Sp(1);
+ Sp_adj(2);
+ jump %ENTRY_CODE(Sp(0));
+}
+
+stg_gc_unbx_r1
+{
+ Sp_adj(-2);
+ Sp(1) = R1;
+ Sp(0) = stg_gc_unbx_r1_info;
+ GC_GENERIC
+}
+
+/*-- F1 contains a float ------------------------------------------------- */
+
+INFO_TABLE_RET( stg_gc_f1, 1/*framesize*/, 1/*bitmap*/, RET_SMALL )
+{
+ F1 = F_[Sp+WDS(1)];
+ Sp_adj(2);
+ jump %ENTRY_CODE(Sp(0));
+}
+
+stg_gc_f1
+{
+ Sp_adj(-2);
+ F_[Sp + WDS(1)] = F1;
+ Sp(0) = stg_gc_f1_info;
+ GC_GENERIC
+}
+
+/*-- D1 contains a double ------------------------------------------------- */
+
+/* we support doubles of either 1 or 2 words in size */
+
+#if SIZEOF_DOUBLE == SIZEOF_VOID_P
+# define DBL_BITMAP 1
+# define DBL_WORDS 1
+#else
+# define DBL_BITMAP 3
+# define DBL_WORDS 2
+#endif
+
+INFO_TABLE_RET( stg_gc_d1, DBL_WORDS/*framesize*/, DBL_BITMAP/*bitmap*/, RET_SMALL )
+{
+ D1 = D_[Sp + WDS(1)];
+ Sp = Sp + WDS(1) + SIZEOF_StgDouble;
+ jump %ENTRY_CODE(Sp(0));
+}
+
+stg_gc_d1
+{
+ Sp = Sp - WDS(1) - SIZEOF_StgDouble;
+ D_[Sp + WDS(1)] = D1;
+ Sp(0) = stg_gc_d1_info;
+ GC_GENERIC
+}
+
+
+/*-- L1 contains an int64 ------------------------------------------------- */
+
+/* we support int64s of either 1 or 2 words in size */
+
+#if SIZEOF_VOID_P == 8
+# define LLI_BITMAP 1
+# define LLI_WORDS 1
+#else
+# define LLI_BITMAP 3
+# define LLI_WORDS 2
+#endif
+
+INFO_TABLE_RET( stg_gc_l1, LLI_WORDS/*framesize*/, LLI_BITMAP/*bitmap*/, RET_SMALL )
+{
+ L1 = L_[Sp + WDS(1)];
+ Sp_adj(1) + SIZEOF_StgWord64;
+ jump %ENTRY_CODE(Sp(0));
+}
+
+stg_gc_l1
+{
+ Sp_adj(-1) - SIZEOF_StgWord64;
+ L_[Sp + WDS(1)] = L1;
+ Sp(0) = stg_gc_l1_info;
+ GC_GENERIC
+}
+
+/*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */
+
+INFO_TABLE_RET( stg_ut_1_0_unreg, 1/*size*/, 0/*BITMAP*/, RET_SMALL )
+{
+ Sp_adj(1);
+ // one ptr is on the stack (Sp(0))
+ jump %ENTRY_CODE(Sp(1));
+}
+
+/* -----------------------------------------------------------------------------
+ Generic function entry heap check code.
+
+ At a function entry point, the arguments are as per the calling convention,
+ i.e. some in regs and some on the stack. There may or may not be
+ a pointer to the function closure in R1 - if there isn't, then the heap
+ check failure code in the function will arrange to load it.
+
+ The function's argument types are described in its info table, so we
+ can just jump to this bit of generic code to save away all the
+ registers and return to the scheduler.
+
+ This code arranges the stack like this:
+
+ | .... |
+ | args |
+ +---------------------+
+ | f_closure |
+ +---------------------+
+ | size |
+ +---------------------+
+ | stg_gc_fun_info |
+ +---------------------+
+
+ The size is the number of words of arguments on the stack, and is cached
+ in the frame in order to simplify stack walking: otherwise the size of
+ this stack frame would have to be calculated by looking at f's info table.
+
+ -------------------------------------------------------------------------- */
+
+__stg_gc_fun
+{
+ W_ size;
+ W_ info;
+ W_ type;
+
+ info = %GET_FUN_INFO(R1);
+
+ // cache the size
+ type = TO_W_(StgFunInfoExtra_fun_type(info));
+ if (type == ARG_GEN) {
+ size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info));
+ } else {
+ if (type == ARG_GEN_BIG) {
+#ifdef TABLES_NEXT_TO_CODE
+ // bitmap field holds an offset
+ size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info)
+ + %GET_ENTRY(R1) /* ### */ );
+#else
+ size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
+#endif
+ } else {
+ size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]);
+ }
+ }
+
+#ifdef NO_ARG_REGS
+ // we don't have to save any registers away
+ Sp_adj(-3);
+ Sp(2) = R1;
+ Sp(1) = size;
+ Sp(0) = stg_gc_fun_info;
+ GC_GENERIC
+#else
+ W_ type;
+ type = TO_W_(StgFunInfoExtra_fun_type(info));
+ // cache the size
+ if (type == ARG_GEN || type == ARG_GEN_BIG) {
+ // regs already saved by the heap check code
+ Sp_adj(-3);
+ Sp(2) = R1;
+ Sp(1) = size;
+ Sp(0) = stg_gc_fun_info;
+ // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)"););
+ GC_GENERIC
+ } else {
+ jump W_[stg_stack_save_entries + WDS(type)];
+ // jumps to stg_gc_noregs after saving stuff
+ }
+#endif /* !NO_ARG_REGS */
+}
+
+/* -----------------------------------------------------------------------------
+ Generic Apply (return point)
+
+ The dual to stg_fun_gc_gen (above): this fragment returns to the
+ function, passing arguments in the stack and in registers
+ appropriately. The stack layout is given above.
+ -------------------------------------------------------------------------- */
+
+INFO_TABLE_RET( stg_gc_fun, 0/*framesize*/, 0/*bitmap*/, RET_FUN )
+{
+ R1 = Sp(2);
+ Sp_adj(3);
+#ifdef NO_ARG_REGS
+ // Minor optimisation: there are no argument registers to load up,
+ // so we can just jump straight to the function's entry point.
+ jump %GET_ENTRY(R1);
+#else
+ W_ info;
+ W_ type;
+
+ info = %GET_FUN_INFO(R1);
+ type = TO_W_(StgFunInfoExtra_fun_type(info));
+ if (type == ARG_GEN || type == ARG_GEN_BIG) {
+ jump StgFunInfoExtra_slow_apply(info);
+ } else {
+ if (type == ARG_BCO) {
+ // cover this case just to be on the safe side
+ Sp_adj(-2);
+ Sp(1) = R1;
+ Sp(0) = stg_apply_interp_info;
+ jump stg_yield_to_interpreter;
+ } else {
+ jump W_[stg_ap_stack_entries + WDS(type)];
+ }
+ }
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ Generic Heap Check Code.
+
+ Called with Liveness mask in R9, Return address in R10.
+ Stack must be consistent (containing all necessary info pointers
+ to relevant SRTs).
+
+ See StgMacros.h for a description of the RET_DYN stack frame.
+
+ We also define an stg_gen_yield here, because it's very similar.
+ -------------------------------------------------------------------------- */
+
+// For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P
+// on a 64-bit machine, we'll end up wasting a couple of words, but
+// it's not a big deal.
+
+#define RESTORE_EVERYTHING \
+ L1 = L_[Sp + WDS(19)]; \
+ D2 = D_[Sp + WDS(17)]; \
+ D1 = D_[Sp + WDS(15)]; \
+ F4 = F_[Sp + WDS(14)]; \
+ F3 = F_[Sp + WDS(13)]; \
+ F2 = F_[Sp + WDS(12)]; \
+ F1 = F_[Sp + WDS(11)]; \
+ R8 = Sp(10); \
+ R7 = Sp(9); \
+ R6 = Sp(8); \
+ R5 = Sp(7); \
+ R4 = Sp(6); \
+ R3 = Sp(5); \
+ R2 = Sp(4); \
+ R1 = Sp(3); \
+ Sp_adj(21);
+
+#define RET_OFFSET (-19)
+
+#define SAVE_EVERYTHING \
+ Sp_adj(-21); \
+ L_[Sp + WDS(19)] = L1; \
+ D_[Sp + WDS(17)] = D2; \
+ D_[Sp + WDS(15)] = D1; \
+ F_[Sp + WDS(14)] = F4; \
+ F_[Sp + WDS(13)] = F3; \
+ F_[Sp + WDS(12)] = F2; \
+ F_[Sp + WDS(11)] = F1; \
+ Sp(10) = R8; \
+ Sp(9) = R7; \
+ Sp(8) = R6; \
+ Sp(7) = R5; \
+ Sp(6) = R4; \
+ Sp(5) = R3; \
+ Sp(4) = R2; \
+ Sp(3) = R1; \
+ Sp(2) = R10; /* return address */ \
+ Sp(1) = R9; /* liveness mask */ \
+ Sp(0) = stg_gc_gen_info;
+
+INFO_TABLE_RET( stg_gc_gen, 0/*framesize*/, 0/*bitmap*/, RET_DYN )
+/* bitmap in the above info table is unused, the real one is on the stack. */
+{
+ RESTORE_EVERYTHING;
+ jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */
+}
+
+stg_gc_gen
+{
+ SAVE_EVERYTHING;
+ GC_GENERIC
+}
+
+// A heap check at an unboxed tuple return point. The return address
+// is on the stack, and we can find it by using the offsets given
+// to us in the liveness mask.
+stg_gc_ut
+{
+ R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9)));
+ SAVE_EVERYTHING;
+ GC_GENERIC
+}
+
+/*
+ * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC
+ * because we've just failed doYouWantToGC(), not a standard heap
+ * check. GC_GENERIC would end up returning StackOverflow.
+ */
+stg_gc_gen_hp
+{
+ SAVE_EVERYTHING;
+ HP_GENERIC
+}
+
+/* -----------------------------------------------------------------------------
+ Yields
+ -------------------------------------------------------------------------- */
+
+stg_gen_yield
+{
+ SAVE_EVERYTHING;
+ YIELD_GENERIC
+}
+
+stg_yield_noregs
+{
+ YIELD_GENERIC;
+}
+
+/* -----------------------------------------------------------------------------
+ Yielding to the interpreter... top of stack says what to do next.
+ -------------------------------------------------------------------------- */
+
+stg_yield_to_interpreter
+{
+ YIELD_TO_INTERPRETER;
+}
+
+/* -----------------------------------------------------------------------------
+ Blocks
+ -------------------------------------------------------------------------- */
+
+stg_gen_block
+{
+ SAVE_EVERYTHING;
+ BLOCK_GENERIC;
+}
+
+stg_block_noregs
+{
+ BLOCK_GENERIC;
+}
+
+stg_block_1
+{
+ Sp_adj(-2);
+ Sp(1) = R1;
+ Sp(0) = stg_enter_info;
+ BLOCK_GENERIC;
+}
+
+/* -----------------------------------------------------------------------------
+ * takeMVar/putMVar-specific blocks
+ *
+ * Stack layout for a thread blocked in takeMVar:
+ *
+ * ret. addr
+ * ptr to MVar (R1)
+ * stg_block_takemvar_info
+ *
+ * Stack layout for a thread blocked in putMVar:
+ *
+ * ret. addr
+ * ptr to Value (R2)
+ * ptr to MVar (R1)
+ * stg_block_putmvar_info
+ *
+ * See PrimOps.hc for a description of the workings of take/putMVar.
+ *
+ * -------------------------------------------------------------------------- */
+
+INFO_TABLE_RET( stg_block_takemvar, 1/*framesize*/, 0/*bitmap*/, RET_SMALL )
+{
+ R1 = Sp(1);
+ Sp_adj(2);
+ jump takeMVarzh_fast;
+}
+
+// code fragment executed just before we return to the scheduler
+stg_block_takemvar_finally
+{
+#ifdef THREADED_RTS
+ foreign "C" unlockClosure(R3 "ptr", stg_EMPTY_MVAR_info);
+#endif
+ jump StgReturn;
+}
+
+stg_block_takemvar
+{
+ Sp_adj(-2);
+ Sp(1) = R1;
+ Sp(0) = stg_block_takemvar_info;
+ R3 = R1;
+ BLOCK_BUT_FIRST(stg_block_takemvar_finally);
+}
+
+INFO_TABLE_RET( stg_block_putmvar, 2/*framesize*/, 0/*bitmap*/, RET_SMALL )
+{
+ R2 = Sp(2);
+ R1 = Sp(1);
+ Sp_adj(3);
+ jump putMVarzh_fast;
+}
+
+// code fragment executed just before we return to the scheduler
+stg_block_putmvar_finally
+{
+#ifdef THREADED_RTS
+ foreign "C" unlockClosure(R3 "ptr", stg_FULL_MVAR_info);
+#endif
+ jump StgReturn;
+}
+
+stg_block_putmvar
+{
+ Sp_adj(-3);
+ Sp(2) = R2;
+ Sp(1) = R1;
+ Sp(0) = stg_block_putmvar_info;
+ R3 = R1;
+ BLOCK_BUT_FIRST(stg_block_putmvar_finally);
+}
+
+// code fragment executed just before we return to the scheduler
+stg_block_blackhole_finally
+{
+#if defined(THREADED_RTS)
+ // The last thing we do is release sched_lock, which is
+ // preventing other threads from accessing blackhole_queue and
+ // picking up this thread before we are finished with it.
+ foreign "C" RELEASE_LOCK(sched_mutex "ptr");
+#endif
+ jump StgReturn;
+}
+
+stg_block_blackhole
+{
+ Sp_adj(-2);
+ Sp(1) = R1;
+ Sp(0) = stg_enter_info;
+ BLOCK_BUT_FIRST(stg_block_blackhole_finally);
+}
+
+#ifdef mingw32_HOST_OS
+INFO_TABLE_RET( stg_block_async, 0/*framesize*/, 0/*bitmap*/, RET_SMALL )
+{
+ W_ ares;
+ W_ len, errC;
+
+ ares = StgTSO_block_info(CurrentTSO);
+ len = StgAsyncIOResult_len(ares);
+ errC = StgAsyncIOResult_errCode(ares);
+ StgTSO_block_info(CurrentTSO) = NULL;
+ foreign "C" free(ares "ptr");
+ R1 = len;
+ Sp(0) = errC;
+ jump %ENTRY_CODE(Sp(1));
+}
+
+stg_block_async
+{
+ Sp_adj(-1);
+ Sp(0) = stg_block_async_info;
+ BLOCK_GENERIC;
+}
+
+/* Used by threadDelay implementation; it would be desirable to get rid of
+ * this free()'ing void return continuation.
+ */
+INFO_TABLE_RET( stg_block_async_void, 0/*framesize*/, 0/*bitmap*/, RET_SMALL )
+{
+ W_ ares;
+
+ ares = StgTSO_block_info(CurrentTSO);
+ StgTSO_block_info(CurrentTSO) = NULL;
+ foreign "C" free(ares "ptr");
+ Sp_adj(1);
+ jump %ENTRY_CODE(Sp(0));
+}
+
+stg_block_async_void
+{
+ Sp_adj(-1);
+ Sp(0) = stg_block_async_void_info;
+ BLOCK_GENERIC;
+}
+
+#endif
+
+/* -----------------------------------------------------------------------------
+ STM-specific waiting
+ -------------------------------------------------------------------------- */
+
+stg_block_stmwait_finally
+{
+ foreign "C" stmWaitUnlock(MyCapability() "ptr", R3 "ptr");
+ jump StgReturn;
+}
+
+stg_block_stmwait
+{
+ BLOCK_BUT_FIRST(stg_block_stmwait_finally);
+}
diff --git a/rts/HsFFI.c b/rts/HsFFI.c
new file mode 100644
index 0000000000..350bcfbdec
--- /dev/null
+++ b/rts/HsFFI.c
@@ -0,0 +1,40 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2005
+ *
+ * RTS entry points as mandated by the FFI addendum to the Haskell 98 report
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "HsFFI.h"
+#include "Rts.h"
+
+// hs_init and hs_exit are defined in RtsStartup.c
+
+void
+hs_set_argv(int argc, char *argv[])
+{
+ setProgArgv(argc,argv);
+}
+
+void
+hs_perform_gc(void)
+{
+ /* Hmmm, the FFI spec is a bit vague, but it seems to imply a major GC... */
+ performMajorGC();
+}
+
+void
+hs_free_stable_ptr(HsStablePtr sp)
+{
+ /* The cast is for clarity only, both HsStablePtr and StgStablePtr are
+ typedefs for void*. */
+ freeStablePtr((StgStablePtr)sp);
+}
+
+void
+hs_free_fun_ptr(HsFunPtr fp)
+{
+ /* I simply *love* all these similar names... */
+ freeHaskellFunctionPtr(fp);
+}
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
new file mode 100644
index 0000000000..56e9bb67ce
--- /dev/null
+++ b/rts/Interpreter.c
@@ -0,0 +1,1261 @@
+/* -----------------------------------------------------------------------------
+ * Bytecode interpreter
+ *
+ * Copyright (c) The GHC Team, 1994-2002.
+ * ---------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "RtsAPI.h"
+#include "RtsUtils.h"
+#include "Closures.h"
+#include "TSO.h"
+#include "Schedule.h"
+#include "RtsFlags.h"
+#include "Storage.h"
+#include "LdvProfile.h"
+#include "Updates.h"
+#include "Sanity.h"
+#include "Liveness.h"
+
+#include "Bytecodes.h"
+#include "Printer.h"
+#include "Disassembler.h"
+#include "Interpreter.h"
+
+#include <string.h> /* for memcpy */
+#ifdef HAVE_ERRNO_H
+#include <errno.h>
+#endif
+
+
+/* --------------------------------------------------------------------------
+ * The bytecode interpreter
+ * ------------------------------------------------------------------------*/
+
+/* Gather stats about entry, opcode, opcode-pair frequencies. For
+ tuning the interpreter. */
+
+/* #define INTERP_STATS */
+
+
+/* Sp points to the lowest live word on the stack. */
+
+#define BCO_NEXT instrs[bciPtr++]
+#define BCO_PTR(n) (W_)ptrs[n]
+#define BCO_LIT(n) literals[n]
+#define BCO_ITBL(n) itbls[n]
+
+#define LOAD_STACK_POINTERS \
+ Sp = cap->r.rCurrentTSO->sp; \
+ /* We don't change this ... */ \
+ SpLim = cap->r.rCurrentTSO->stack + RESERVED_STACK_WORDS;
+
+#define SAVE_STACK_POINTERS \
+ cap->r.rCurrentTSO->sp = Sp
+
+#define RETURN_TO_SCHEDULER(todo,retcode) \
+ SAVE_STACK_POINTERS; \
+ cap->r.rCurrentTSO->what_next = (todo); \
+ threadPaused(cap,cap->r.rCurrentTSO); \
+ cap->r.rRet = (retcode); \
+ return cap;
+
+#define RETURN_TO_SCHEDULER_NO_PAUSE(todo,retcode) \
+ SAVE_STACK_POINTERS; \
+ cap->r.rCurrentTSO->what_next = (todo); \
+ cap->r.rRet = (retcode); \
+ return cap;
+
+
+STATIC_INLINE StgPtr
+allocate_NONUPD (int n_words)
+{
+ return allocate(stg_max(sizeofW(StgHeader)+MIN_PAYLOAD_SIZE, n_words));
+}
+
+
+#ifdef INTERP_STATS
+
+/* Hacky stats, for tuning the interpreter ... */
+int it_unknown_entries[N_CLOSURE_TYPES];
+int it_total_unknown_entries;
+int it_total_entries;
+
+int it_retto_BCO;
+int it_retto_UPDATE;
+int it_retto_other;
+
+int it_slides;
+int it_insns;
+int it_BCO_entries;
+
+int it_ofreq[27];
+int it_oofreq[27][27];
+int it_lastopc;
+
+#define INTERP_TICK(n) (n)++
+
+void interp_startup ( void )
+{
+ int i, j;
+ it_retto_BCO = it_retto_UPDATE = it_retto_other = 0;
+ it_total_entries = it_total_unknown_entries = 0;
+ for (i = 0; i < N_CLOSURE_TYPES; i++)
+ it_unknown_entries[i] = 0;
+ it_slides = it_insns = it_BCO_entries = 0;
+ for (i = 0; i < 27; i++) it_ofreq[i] = 0;
+ for (i = 0; i < 27; i++)
+ for (j = 0; j < 27; j++)
+ it_oofreq[i][j] = 0;
+ it_lastopc = 0;
+}
+
+void interp_shutdown ( void )
+{
+ int i, j, k, o_max, i_max, j_max;
+ debugBelch("%d constrs entered -> (%d BCO, %d UPD, %d ??? )\n",
+ it_retto_BCO + it_retto_UPDATE + it_retto_other,
+ it_retto_BCO, it_retto_UPDATE, it_retto_other );
+ debugBelch("%d total entries, %d unknown entries \n",
+ it_total_entries, it_total_unknown_entries);
+ for (i = 0; i < N_CLOSURE_TYPES; i++) {
+ if (it_unknown_entries[i] == 0) continue;
+ debugBelch(" type %2d: unknown entries (%4.1f%%) == %d\n",
+ i, 100.0 * ((double)it_unknown_entries[i]) /
+ ((double)it_total_unknown_entries),
+ it_unknown_entries[i]);
+ }
+ debugBelch("%d insns, %d slides, %d BCO_entries\n",
+ it_insns, it_slides, it_BCO_entries);
+ for (i = 0; i < 27; i++)
+ debugBelch("opcode %2d got %d\n", i, it_ofreq[i] );
+
+ for (k = 1; k < 20; k++) {
+ o_max = 0;
+ i_max = j_max = 0;
+ for (i = 0; i < 27; i++) {
+ for (j = 0; j < 27; j++) {
+ if (it_oofreq[i][j] > o_max) {
+ o_max = it_oofreq[i][j];
+ i_max = i; j_max = j;
+ }
+ }
+ }
+
+ debugBelch("%d: count (%4.1f%%) %6d is %d then %d\n",
+ k, ((double)o_max) * 100.0 / ((double)it_insns), o_max,
+ i_max, j_max );
+ it_oofreq[i_max][j_max] = 0;
+
+ }
+}
+
+#else // !INTERP_STATS
+
+#define INTERP_TICK(n) /* nothing */
+
+#endif
+
+static StgWord app_ptrs_itbl[] = {
+ (W_)&stg_ap_p_info,
+ (W_)&stg_ap_pp_info,
+ (W_)&stg_ap_ppp_info,
+ (W_)&stg_ap_pppp_info,
+ (W_)&stg_ap_ppppp_info,
+ (W_)&stg_ap_pppppp_info,
+};
+
+Capability *
+interpretBCO (Capability* cap)
+{
+ // Use of register here is primarily to make it clear to compilers
+ // that these entities are non-aliasable.
+ register StgPtr Sp; // local state -- stack pointer
+ register StgPtr SpLim; // local state -- stack lim pointer
+ register StgClosure* obj;
+ nat n, m;
+
+ LOAD_STACK_POINTERS;
+
+ // ------------------------------------------------------------------------
+ // Case 1:
+ //
+ // We have a closure to evaluate. Stack looks like:
+ //
+ // | XXXX_info |
+ // +---------------+
+ // Sp | -------------------> closure
+ // +---------------+
+ //
+ if (Sp[0] == (W_)&stg_enter_info) {
+ Sp++;
+ goto eval;
+ }
+
+ // ------------------------------------------------------------------------
+ // Case 2:
+ //
+ // We have a BCO application to perform. Stack looks like:
+ //
+ // | .... |
+ // +---------------+
+ // | arg1 |
+ // +---------------+
+ // | BCO |
+ // +---------------+
+ // Sp | RET_BCO |
+ // +---------------+
+ //
+ else if (Sp[0] == (W_)&stg_apply_interp_info) {
+ obj = (StgClosure *)Sp[1];
+ Sp += 2;
+ goto run_BCO_fun;
+ }
+
+ // ------------------------------------------------------------------------
+ // Case 3:
+ //
+ // We have an unboxed value to return. See comment before
+ // do_return_unboxed, below.
+ //
+ else {
+ goto do_return_unboxed;
+ }
+
+ // Evaluate the object on top of the stack.
+eval:
+ obj = (StgClosure*)Sp[0]; Sp++;
+
+eval_obj:
+ INTERP_TICK(it_total_evals);
+
+ IF_DEBUG(interpreter,
+ debugBelch(
+ "\n---------------------------------------------------------------\n");
+ debugBelch("Evaluating: "); printObj(obj);
+ debugBelch("Sp = %p\n", Sp);
+ debugBelch("\n" );
+
+ printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size);
+ debugBelch("\n\n");
+ );
+
+ IF_DEBUG(sanity,checkStackChunk(Sp, cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size));
+
+ switch ( get_itbl(obj)->type ) {
+
+ case IND:
+ case IND_OLDGEN:
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ case IND_STATIC:
+ {
+ obj = ((StgInd*)obj)->indirectee;
+ goto eval_obj;
+ }
+
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_STATIC:
+ case CONSTR_NOCAF_STATIC:
+ case FUN:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_2_0:
+ case FUN_1_1:
+ case FUN_0_2:
+ case FUN_STATIC:
+ case PAP:
+ // already in WHNF
+ break;
+
+ case BCO:
+ ASSERT(((StgBCO *)obj)->arity > 0);
+ break;
+
+ case AP: /* Copied from stg_AP_entry. */
+ {
+ nat i, words;
+ StgAP *ap;
+
+ ap = (StgAP*)obj;
+ words = ap->n_args;
+
+ // Stack check
+ if (Sp - (words+sizeofW(StgUpdateFrame)) < SpLim) {
+ Sp -= 2;
+ Sp[1] = (W_)obj;
+ Sp[0] = (W_)&stg_enter_info;
+ RETURN_TO_SCHEDULER(ThreadInterpret, StackOverflow);
+ }
+
+ /* Ok; we're safe. Party on. Push an update frame. */
+ Sp -= sizeofW(StgUpdateFrame);
+ {
+ StgUpdateFrame *__frame;
+ __frame = (StgUpdateFrame *)Sp;
+ SET_INFO(__frame, (StgInfoTable *)&stg_upd_frame_info);
+ __frame->updatee = (StgClosure *)(ap);
+ }
+
+ /* Reload the stack */
+ Sp -= words;
+ for (i=0; i < words; i++) {
+ Sp[i] = (W_)ap->payload[i];
+ }
+
+ obj = (StgClosure*)ap->fun;
+ ASSERT(get_itbl(obj)->type == BCO);
+ goto run_BCO_fun;
+ }
+
+ default:
+#ifdef INTERP_STATS
+ {
+ int j;
+
+ j = get_itbl(obj)->type;
+ ASSERT(j >= 0 && j < N_CLOSURE_TYPES);
+ it_unknown_entries[j]++;
+ it_total_unknown_entries++;
+ }
+#endif
+ {
+ // Can't handle this object; yield to scheduler
+ IF_DEBUG(interpreter,
+ debugBelch("evaluating unknown closure -- yielding to sched\n");
+ printObj(obj);
+ );
+ Sp -= 2;
+ Sp[1] = (W_)obj;
+ Sp[0] = (W_)&stg_enter_info;
+ RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
+ }
+ }
+
+ // ------------------------------------------------------------------------
+ // We now have an evaluated object (obj). The next thing to
+ // do is return it to the stack frame on top of the stack.
+do_return:
+ ASSERT(closure_HNF(obj));
+
+ IF_DEBUG(interpreter,
+ debugBelch(
+ "\n---------------------------------------------------------------\n");
+ debugBelch("Returning: "); printObj(obj);
+ debugBelch("Sp = %p\n", Sp);
+ debugBelch("\n" );
+ printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size);
+ debugBelch("\n\n");
+ );
+
+ IF_DEBUG(sanity,checkStackChunk(Sp, cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size));
+
+ switch (get_itbl((StgClosure *)Sp)->type) {
+
+ case RET_SMALL: {
+ const StgInfoTable *info;
+
+ // NOTE: not using get_itbl().
+ info = ((StgClosure *)Sp)->header.info;
+ if (info == (StgInfoTable *)&stg_ap_v_info) {
+ n = 1; m = 0; goto do_apply;
+ }
+ if (info == (StgInfoTable *)&stg_ap_f_info) {
+ n = 1; m = 1; goto do_apply;
+ }
+ if (info == (StgInfoTable *)&stg_ap_d_info) {
+ n = 1; m = sizeofW(StgDouble); goto do_apply;
+ }
+ if (info == (StgInfoTable *)&stg_ap_l_info) {
+ n = 1; m = sizeofW(StgInt64); goto do_apply;
+ }
+ if (info == (StgInfoTable *)&stg_ap_n_info) {
+ n = 1; m = 1; goto do_apply;
+ }
+ if (info == (StgInfoTable *)&stg_ap_p_info) {
+ n = 1; m = 1; goto do_apply;
+ }
+ if (info == (StgInfoTable *)&stg_ap_pp_info) {
+ n = 2; m = 2; goto do_apply;
+ }
+ if (info == (StgInfoTable *)&stg_ap_ppp_info) {
+ n = 3; m = 3; goto do_apply;
+ }
+ if (info == (StgInfoTable *)&stg_ap_pppp_info) {
+ n = 4; m = 4; goto do_apply;
+ }
+ if (info == (StgInfoTable *)&stg_ap_ppppp_info) {
+ n = 5; m = 5; goto do_apply;
+ }
+ if (info == (StgInfoTable *)&stg_ap_pppppp_info) {
+ n = 6; m = 6; goto do_apply;
+ }
+ goto do_return_unrecognised;
+ }
+
+ case UPDATE_FRAME:
+ // Returning to an update frame: do the update, pop the update
+ // frame, and continue with the next stack frame.
+ INTERP_TICK(it_retto_UPDATE);
+ UPD_IND(((StgUpdateFrame *)Sp)->updatee, obj);
+ Sp += sizeofW(StgUpdateFrame);
+ goto do_return;
+
+ case RET_BCO:
+ // Returning to an interpreted continuation: put the object on
+ // the stack, and start executing the BCO.
+ INTERP_TICK(it_retto_BCO);
+ Sp--;
+ Sp[0] = (W_)obj;
+ obj = (StgClosure*)Sp[2];
+ ASSERT(get_itbl(obj)->type == BCO);
+ goto run_BCO_return;
+
+ default:
+ do_return_unrecognised:
+ {
+ // Can't handle this return address; yield to scheduler
+ INTERP_TICK(it_retto_other);
+ IF_DEBUG(interpreter,
+ debugBelch("returning to unknown frame -- yielding to sched\n");
+ printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size);
+ );
+ Sp -= 2;
+ Sp[1] = (W_)obj;
+ Sp[0] = (W_)&stg_enter_info;
+ RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
+ }
+ }
+
+ // -------------------------------------------------------------------------
+ // Returning an unboxed value. The stack looks like this:
+ //
+ // | .... |
+ // +---------------+
+ // | fv2 |
+ // +---------------+
+ // | fv1 |
+ // +---------------+
+ // | BCO |
+ // +---------------+
+ // | stg_ctoi_ret_ |
+ // +---------------+
+ // | retval |
+ // +---------------+
+ // | XXXX_info |
+ // +---------------+
+ //
+ // where XXXX_info is one of the stg_gc_unbx_r1_info family.
+ //
+ // We're only interested in the case when the real return address
+ // is a BCO; otherwise we'll return to the scheduler.
+
+do_return_unboxed:
+ {
+ int offset;
+
+ ASSERT( Sp[0] == (W_)&stg_gc_unbx_r1_info
+ || Sp[0] == (W_)&stg_gc_unpt_r1_info
+ || Sp[0] == (W_)&stg_gc_f1_info
+ || Sp[0] == (W_)&stg_gc_d1_info
+ || Sp[0] == (W_)&stg_gc_l1_info
+ || Sp[0] == (W_)&stg_gc_void_info // VoidRep
+ );
+
+ // get the offset of the stg_ctoi_ret_XXX itbl
+ offset = stack_frame_sizeW((StgClosure *)Sp);
+
+ switch (get_itbl((StgClosure *)Sp+offset)->type) {
+
+ case RET_BCO:
+ // Returning to an interpreted continuation: put the object on
+ // the stack, and start executing the BCO.
+ INTERP_TICK(it_retto_BCO);
+ obj = (StgClosure*)Sp[offset+1];
+ ASSERT(get_itbl(obj)->type == BCO);
+ goto run_BCO_return_unboxed;
+
+ default:
+ {
+ // Can't handle this return address; yield to scheduler
+ INTERP_TICK(it_retto_other);
+ IF_DEBUG(interpreter,
+ debugBelch("returning to unknown frame -- yielding to sched\n");
+ printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size);
+ );
+ RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
+ }
+ }
+ }
+ // not reached.
+
+
+ // -------------------------------------------------------------------------
+ // Application...
+
+do_apply:
+ // we have a function to apply (obj), and n arguments taking up m
+ // words on the stack. The info table (stg_ap_pp_info or whatever)
+ // is on top of the arguments on the stack.
+ {
+ switch (get_itbl(obj)->type) {
+
+ case PAP: {
+ StgPAP *pap;
+ nat i, arity;
+
+ pap = (StgPAP *)obj;
+
+ // we only cope with PAPs whose function is a BCO
+ if (get_itbl(pap->fun)->type != BCO) {
+ goto defer_apply_to_sched;
+ }
+
+ Sp++;
+ arity = pap->arity;
+ ASSERT(arity > 0);
+ if (arity < n) {
+ // n must be greater than 1, and the only kinds of
+ // application we support with more than one argument
+ // are all pointers...
+ //
+ // Shuffle the args for this function down, and put
+ // the appropriate info table in the gap.
+ for (i = 0; i < arity; i++) {
+ Sp[(int)i-1] = Sp[i];
+ // ^^^^^ careful, i-1 might be negative, but i in unsigned
+ }
+ Sp[arity-1] = app_ptrs_itbl[n-arity-1];
+ Sp--;
+ // unpack the PAP's arguments onto the stack
+ Sp -= pap->n_args;
+ for (i = 0; i < pap->n_args; i++) {
+ Sp[i] = (W_)pap->payload[i];
+ }
+ obj = pap->fun;
+ goto run_BCO_fun;
+ }
+ else if (arity == n) {
+ Sp -= pap->n_args;
+ for (i = 0; i < pap->n_args; i++) {
+ Sp[i] = (W_)pap->payload[i];
+ }
+ obj = pap->fun;
+ goto run_BCO_fun;
+ }
+ else /* arity > n */ {
+ // build a new PAP and return it.
+ StgPAP *new_pap;
+ new_pap = (StgPAP *)allocate(PAP_sizeW(pap->n_args + m));
+ SET_HDR(new_pap,&stg_PAP_info,CCCS);
+ new_pap->arity = pap->arity - n;
+ new_pap->n_args = pap->n_args + m;
+ new_pap->fun = pap->fun;
+ for (i = 0; i < pap->n_args; i++) {
+ new_pap->payload[i] = pap->payload[i];
+ }
+ for (i = 0; i < m; i++) {
+ new_pap->payload[pap->n_args + i] = (StgClosure *)Sp[i];
+ }
+ obj = (StgClosure *)new_pap;
+ Sp += m;
+ goto do_return;
+ }
+ }
+
+ case BCO: {
+ nat arity, i;
+
+ Sp++;
+ arity = ((StgBCO *)obj)->arity;
+ ASSERT(arity > 0);
+ if (arity < n) {
+ // n must be greater than 1, and the only kinds of
+ // application we support with more than one argument
+ // are all pointers...
+ //
+ // Shuffle the args for this function down, and put
+ // the appropriate info table in the gap.
+ for (i = 0; i < arity; i++) {
+ Sp[(int)i-1] = Sp[i];
+ // ^^^^^ careful, i-1 might be negative, but i in unsigned
+ }
+ Sp[arity-1] = app_ptrs_itbl[n-arity-1];
+ Sp--;
+ goto run_BCO_fun;
+ }
+ else if (arity == n) {
+ goto run_BCO_fun;
+ }
+ else /* arity > n */ {
+ // build a PAP and return it.
+ StgPAP *pap;
+ nat i;
+ pap = (StgPAP *)allocate(PAP_sizeW(m));
+ SET_HDR(pap, &stg_PAP_info,CCCS);
+ pap->arity = arity - n;
+ pap->fun = obj;
+ pap->n_args = m;
+ for (i = 0; i < m; i++) {
+ pap->payload[i] = (StgClosure *)Sp[i];
+ }
+ obj = (StgClosure *)pap;
+ Sp += m;
+ goto do_return;
+ }
+ }
+
+ // No point in us applying machine-code functions
+ default:
+ defer_apply_to_sched:
+ Sp -= 2;
+ Sp[1] = (W_)obj;
+ Sp[0] = (W_)&stg_enter_info;
+ RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
+ }
+
+ // ------------------------------------------------------------------------
+ // Ok, we now have a bco (obj), and its arguments are all on the
+ // stack. We can start executing the byte codes.
+ //
+ // The stack is in one of two states. First, if this BCO is a
+ // function:
+ //
+ // | .... |
+ // +---------------+
+ // | arg2 |
+ // +---------------+
+ // | arg1 |
+ // +---------------+
+ //
+ // Second, if this BCO is a continuation:
+ //
+ // | .... |
+ // +---------------+
+ // | fv2 |
+ // +---------------+
+ // | fv1 |
+ // +---------------+
+ // | BCO |
+ // +---------------+
+ // | stg_ctoi_ret_ |
+ // +---------------+
+ // | retval |
+ // +---------------+
+ //
+ // where retval is the value being returned to this continuation.
+ // In the event of a stack check, heap check, or context switch,
+ // we need to leave the stack in a sane state so the garbage
+ // collector can find all the pointers.
+ //
+ // (1) BCO is a function: the BCO's bitmap describes the
+ // pointerhood of the arguments.
+ //
+ // (2) BCO is a continuation: BCO's bitmap describes the
+ // pointerhood of the free variables.
+ //
+ // Sadly we have three different kinds of stack/heap/cswitch check
+ // to do:
+
+run_BCO_return:
+ // Heap check
+ if (doYouWantToGC()) {
+ Sp--; Sp[0] = (W_)&stg_enter_info;
+ RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow);
+ }
+ // Stack checks aren't necessary at return points, the stack use
+ // is aggregated into the enclosing function entry point.
+ goto run_BCO;
+
+run_BCO_return_unboxed:
+ // Heap check
+ if (doYouWantToGC()) {
+ RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow);
+ }
+ // Stack checks aren't necessary at return points, the stack use
+ // is aggregated into the enclosing function entry point.
+ goto run_BCO;
+
+run_BCO_fun:
+ IF_DEBUG(sanity,
+ Sp -= 2;
+ Sp[1] = (W_)obj;
+ Sp[0] = (W_)&stg_apply_interp_info;
+ checkStackChunk(Sp,SpLim);
+ Sp += 2;
+ );
+
+ // Heap check
+ if (doYouWantToGC()) {
+ Sp -= 2;
+ Sp[1] = (W_)obj;
+ Sp[0] = (W_)&stg_apply_interp_info; // placeholder, really
+ RETURN_TO_SCHEDULER(ThreadInterpret, HeapOverflow);
+ }
+
+ // Stack check
+ if (Sp - INTERP_STACK_CHECK_THRESH < SpLim) {
+ Sp -= 2;
+ Sp[1] = (W_)obj;
+ Sp[0] = (W_)&stg_apply_interp_info; // placeholder, really
+ RETURN_TO_SCHEDULER(ThreadInterpret, StackOverflow);
+ }
+ goto run_BCO;
+
+ // Now, actually interpret the BCO... (no returning to the
+ // scheduler again until the stack is in an orderly state).
+run_BCO:
+ INTERP_TICK(it_BCO_entries);
+ {
+ register int bciPtr = 1; /* instruction pointer */
+ register StgBCO* bco = (StgBCO*)obj;
+ register StgWord16* instrs = (StgWord16*)(bco->instrs->payload);
+ register StgWord* literals = (StgWord*)(&bco->literals->payload[0]);
+ register StgPtr* ptrs = (StgPtr*)(&bco->ptrs->payload[0]);
+ register StgInfoTable** itbls = (StgInfoTable**)
+ (&bco->itbls->payload[0]);
+
+#ifdef INTERP_STATS
+ it_lastopc = 0; /* no opcode */
+#endif
+
+ nextInsn:
+ ASSERT(bciPtr <= instrs[0]);
+ IF_DEBUG(interpreter,
+ //if (do_print_stack) {
+ //debugBelch("\n-- BEGIN stack\n");
+ //printStack(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size,iSu);
+ //debugBelch("-- END stack\n\n");
+ //}
+ debugBelch("Sp = %p pc = %d ", Sp, bciPtr);
+ disInstr(bco,bciPtr);
+ if (0) { int i;
+ debugBelch("\n");
+ for (i = 8; i >= 0; i--) {
+ debugBelch("%d %p\n", i, (StgPtr)(*(Sp+i)));
+ }
+ debugBelch("\n");
+ }
+ //if (do_print_stack) checkStack(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size,iSu);
+ );
+
+ INTERP_TICK(it_insns);
+
+#ifdef INTERP_STATS
+ ASSERT( (int)instrs[bciPtr] >= 0 && (int)instrs[bciPtr] < 27 );
+ it_ofreq[ (int)instrs[bciPtr] ] ++;
+ it_oofreq[ it_lastopc ][ (int)instrs[bciPtr] ] ++;
+ it_lastopc = (int)instrs[bciPtr];
+#endif
+
+ switch (BCO_NEXT) {
+
+ case bci_STKCHECK: {
+ // Explicit stack check at the beginning of a function
+ // *only* (stack checks in case alternatives are
+ // propagated to the enclosing function).
+ int stk_words_reqd = BCO_NEXT + 1;
+ if (Sp - stk_words_reqd < SpLim) {
+ Sp -= 2;
+ Sp[1] = (W_)obj;
+ Sp[0] = (W_)&stg_apply_interp_info;
+ RETURN_TO_SCHEDULER(ThreadInterpret, StackOverflow);
+ } else {
+ goto nextInsn;
+ }
+ }
+
+ case bci_PUSH_L: {
+ int o1 = BCO_NEXT;
+ Sp[-1] = Sp[o1];
+ Sp--;
+ goto nextInsn;
+ }
+
+ case bci_PUSH_LL: {
+ int o1 = BCO_NEXT;
+ int o2 = BCO_NEXT;
+ Sp[-1] = Sp[o1];
+ Sp[-2] = Sp[o2];
+ Sp -= 2;
+ goto nextInsn;
+ }
+
+ case bci_PUSH_LLL: {
+ int o1 = BCO_NEXT;
+ int o2 = BCO_NEXT;
+ int o3 = BCO_NEXT;
+ Sp[-1] = Sp[o1];
+ Sp[-2] = Sp[o2];
+ Sp[-3] = Sp[o3];
+ Sp -= 3;
+ goto nextInsn;
+ }
+
+ case bci_PUSH_G: {
+ int o1 = BCO_NEXT;
+ Sp[-1] = BCO_PTR(o1);
+ Sp -= 1;
+ goto nextInsn;
+ }
+
+ case bci_PUSH_ALTS: {
+ int o_bco = BCO_NEXT;
+ Sp[-2] = (W_)&stg_ctoi_R1p_info;
+ Sp[-1] = BCO_PTR(o_bco);
+ Sp -= 2;
+ goto nextInsn;
+ }
+
+ case bci_PUSH_ALTS_P: {
+ int o_bco = BCO_NEXT;
+ Sp[-2] = (W_)&stg_ctoi_R1unpt_info;
+ Sp[-1] = BCO_PTR(o_bco);
+ Sp -= 2;
+ goto nextInsn;
+ }
+
+ case bci_PUSH_ALTS_N: {
+ int o_bco = BCO_NEXT;
+ Sp[-2] = (W_)&stg_ctoi_R1n_info;
+ Sp[-1] = BCO_PTR(o_bco);
+ Sp -= 2;
+ goto nextInsn;
+ }
+
+ case bci_PUSH_ALTS_F: {
+ int o_bco = BCO_NEXT;
+ Sp[-2] = (W_)&stg_ctoi_F1_info;
+ Sp[-1] = BCO_PTR(o_bco);
+ Sp -= 2;
+ goto nextInsn;
+ }
+
+ case bci_PUSH_ALTS_D: {
+ int o_bco = BCO_NEXT;
+ Sp[-2] = (W_)&stg_ctoi_D1_info;
+ Sp[-1] = BCO_PTR(o_bco);
+ Sp -= 2;
+ goto nextInsn;
+ }
+
+ case bci_PUSH_ALTS_L: {
+ int o_bco = BCO_NEXT;
+ Sp[-2] = (W_)&stg_ctoi_L1_info;
+ Sp[-1] = BCO_PTR(o_bco);
+ Sp -= 2;
+ goto nextInsn;
+ }
+
+ case bci_PUSH_ALTS_V: {
+ int o_bco = BCO_NEXT;
+ Sp[-2] = (W_)&stg_ctoi_V_info;
+ Sp[-1] = BCO_PTR(o_bco);
+ Sp -= 2;
+ goto nextInsn;
+ }
+
+ case bci_PUSH_APPLY_N:
+ Sp--; Sp[0] = (W_)&stg_ap_n_info;
+ goto nextInsn;
+ case bci_PUSH_APPLY_V:
+ Sp--; Sp[0] = (W_)&stg_ap_v_info;
+ goto nextInsn;
+ case bci_PUSH_APPLY_F:
+ Sp--; Sp[0] = (W_)&stg_ap_f_info;
+ goto nextInsn;
+ case bci_PUSH_APPLY_D:
+ Sp--; Sp[0] = (W_)&stg_ap_d_info;
+ goto nextInsn;
+ case bci_PUSH_APPLY_L:
+ Sp--; Sp[0] = (W_)&stg_ap_l_info;
+ goto nextInsn;
+ case bci_PUSH_APPLY_P:
+ Sp--; Sp[0] = (W_)&stg_ap_p_info;
+ goto nextInsn;
+ case bci_PUSH_APPLY_PP:
+ Sp--; Sp[0] = (W_)&stg_ap_pp_info;
+ goto nextInsn;
+ case bci_PUSH_APPLY_PPP:
+ Sp--; Sp[0] = (W_)&stg_ap_ppp_info;
+ goto nextInsn;
+ case bci_PUSH_APPLY_PPPP:
+ Sp--; Sp[0] = (W_)&stg_ap_pppp_info;
+ goto nextInsn;
+ case bci_PUSH_APPLY_PPPPP:
+ Sp--; Sp[0] = (W_)&stg_ap_ppppp_info;
+ goto nextInsn;
+ case bci_PUSH_APPLY_PPPPPP:
+ Sp--; Sp[0] = (W_)&stg_ap_pppppp_info;
+ goto nextInsn;
+
+ case bci_PUSH_UBX: {
+ int i;
+ int o_lits = BCO_NEXT;
+ int n_words = BCO_NEXT;
+ Sp -= n_words;
+ for (i = 0; i < n_words; i++) {
+ Sp[i] = (W_)BCO_LIT(o_lits+i);
+ }
+ goto nextInsn;
+ }
+
+ case bci_SLIDE: {
+ int n = BCO_NEXT;
+ int by = BCO_NEXT;
+ /* a_1, .. a_n, b_1, .. b_by, s => a_1, .. a_n, s */
+ while(--n >= 0) {
+ Sp[n+by] = Sp[n];
+ }
+ Sp += by;
+ INTERP_TICK(it_slides);
+ goto nextInsn;
+ }
+
+ case bci_ALLOC_AP: {
+ StgAP* ap;
+ int n_payload = BCO_NEXT;
+ ap = (StgAP*)allocate(AP_sizeW(n_payload));
+ Sp[-1] = (W_)ap;
+ ap->n_args = n_payload;
+ SET_HDR(ap, &stg_AP_info, CCS_SYSTEM/*ToDo*/)
+ Sp --;
+ goto nextInsn;
+ }
+
+ case bci_ALLOC_PAP: {
+ StgPAP* pap;
+ int arity = BCO_NEXT;
+ int n_payload = BCO_NEXT;
+ pap = (StgPAP*)allocate(PAP_sizeW(n_payload));
+ Sp[-1] = (W_)pap;
+ pap->n_args = n_payload;
+ pap->arity = arity;
+ SET_HDR(pap, &stg_PAP_info, CCS_SYSTEM/*ToDo*/)
+ Sp --;
+ goto nextInsn;
+ }
+
+ case bci_MKAP: {
+ int i;
+ int stkoff = BCO_NEXT;
+ int n_payload = BCO_NEXT;
+ StgAP* ap = (StgAP*)Sp[stkoff];
+ ASSERT((int)ap->n_args == n_payload);
+ ap->fun = (StgClosure*)Sp[0];
+
+ // The function should be a BCO, and its bitmap should
+ // cover the payload of the AP correctly.
+ ASSERT(get_itbl(ap->fun)->type == BCO
+ && BCO_BITMAP_SIZE(ap->fun) == ap->n_args);
+
+ for (i = 0; i < n_payload; i++)
+ ap->payload[i] = (StgClosure*)Sp[i+1];
+ Sp += n_payload+1;
+ IF_DEBUG(interpreter,
+ debugBelch("\tBuilt ");
+ printObj((StgClosure*)ap);
+ );
+ goto nextInsn;
+ }
+
+ case bci_MKPAP: {
+ int i;
+ int stkoff = BCO_NEXT;
+ int n_payload = BCO_NEXT;
+ StgPAP* pap = (StgPAP*)Sp[stkoff];
+ ASSERT((int)pap->n_args == n_payload);
+ pap->fun = (StgClosure*)Sp[0];
+
+ // The function should be a BCO
+ ASSERT(get_itbl(pap->fun)->type == BCO);
+
+ for (i = 0; i < n_payload; i++)
+ pap->payload[i] = (StgClosure*)Sp[i+1];
+ Sp += n_payload+1;
+ IF_DEBUG(interpreter,
+ debugBelch("\tBuilt ");
+ printObj((StgClosure*)pap);
+ );
+ goto nextInsn;
+ }
+
+ case bci_UNPACK: {
+ /* Unpack N ptr words from t.o.s constructor */
+ int i;
+ int n_words = BCO_NEXT;
+ StgClosure* con = (StgClosure*)Sp[0];
+ Sp -= n_words;
+ for (i = 0; i < n_words; i++) {
+ Sp[i] = (W_)con->payload[i];
+ }
+ goto nextInsn;
+ }
+
+ case bci_PACK: {
+ int i;
+ int o_itbl = BCO_NEXT;
+ int n_words = BCO_NEXT;
+ StgInfoTable* itbl = INFO_PTR_TO_STRUCT(BCO_ITBL(o_itbl));
+ int request = CONSTR_sizeW( itbl->layout.payload.ptrs,
+ itbl->layout.payload.nptrs );
+ StgClosure* con = (StgClosure*)allocate_NONUPD(request);
+ ASSERT( itbl->layout.payload.ptrs + itbl->layout.payload.nptrs > 0);
+ SET_HDR(con, BCO_ITBL(o_itbl), CCS_SYSTEM/*ToDo*/);
+ for (i = 0; i < n_words; i++) {
+ con->payload[i] = (StgClosure*)Sp[i];
+ }
+ Sp += n_words;
+ Sp --;
+ Sp[0] = (W_)con;
+ IF_DEBUG(interpreter,
+ debugBelch("\tBuilt ");
+ printObj((StgClosure*)con);
+ );
+ goto nextInsn;
+ }
+
+ case bci_TESTLT_P: {
+ unsigned int discr = BCO_NEXT;
+ int failto = BCO_NEXT;
+ StgClosure* con = (StgClosure*)Sp[0];
+ if (GET_TAG(con) >= discr) {
+ bciPtr = failto;
+ }
+ goto nextInsn;
+ }
+
+ case bci_TESTEQ_P: {
+ unsigned int discr = BCO_NEXT;
+ int failto = BCO_NEXT;
+ StgClosure* con = (StgClosure*)Sp[0];
+ if (GET_TAG(con) != discr) {
+ bciPtr = failto;
+ }
+ goto nextInsn;
+ }
+
+ case bci_TESTLT_I: {
+ // There should be an Int at Sp[1], and an info table at Sp[0].
+ int discr = BCO_NEXT;
+ int failto = BCO_NEXT;
+ I_ stackInt = (I_)Sp[1];
+ if (stackInt >= (I_)BCO_LIT(discr))
+ bciPtr = failto;
+ goto nextInsn;
+ }
+
+ case bci_TESTEQ_I: {
+ // There should be an Int at Sp[1], and an info table at Sp[0].
+ int discr = BCO_NEXT;
+ int failto = BCO_NEXT;
+ I_ stackInt = (I_)Sp[1];
+ if (stackInt != (I_)BCO_LIT(discr)) {
+ bciPtr = failto;
+ }
+ goto nextInsn;
+ }
+
+ case bci_TESTLT_D: {
+ // There should be a Double at Sp[1], and an info table at Sp[0].
+ int discr = BCO_NEXT;
+ int failto = BCO_NEXT;
+ StgDouble stackDbl, discrDbl;
+ stackDbl = PK_DBL( & Sp[1] );
+ discrDbl = PK_DBL( & BCO_LIT(discr) );
+ if (stackDbl >= discrDbl) {
+ bciPtr = failto;
+ }
+ goto nextInsn;
+ }
+
+ case bci_TESTEQ_D: {
+ // There should be a Double at Sp[1], and an info table at Sp[0].
+ int discr = BCO_NEXT;
+ int failto = BCO_NEXT;
+ StgDouble stackDbl, discrDbl;
+ stackDbl = PK_DBL( & Sp[1] );
+ discrDbl = PK_DBL( & BCO_LIT(discr) );
+ if (stackDbl != discrDbl) {
+ bciPtr = failto;
+ }
+ goto nextInsn;
+ }
+
+ case bci_TESTLT_F: {
+ // There should be a Float at Sp[1], and an info table at Sp[0].
+ int discr = BCO_NEXT;
+ int failto = BCO_NEXT;
+ StgFloat stackFlt, discrFlt;
+ stackFlt = PK_FLT( & Sp[1] );
+ discrFlt = PK_FLT( & BCO_LIT(discr) );
+ if (stackFlt >= discrFlt) {
+ bciPtr = failto;
+ }
+ goto nextInsn;
+ }
+
+ case bci_TESTEQ_F: {
+ // There should be a Float at Sp[1], and an info table at Sp[0].
+ int discr = BCO_NEXT;
+ int failto = BCO_NEXT;
+ StgFloat stackFlt, discrFlt;
+ stackFlt = PK_FLT( & Sp[1] );
+ discrFlt = PK_FLT( & BCO_LIT(discr) );
+ if (stackFlt != discrFlt) {
+ bciPtr = failto;
+ }
+ goto nextInsn;
+ }
+
+ // Control-flow ish things
+ case bci_ENTER:
+ // Context-switch check. We put it here to ensure that
+ // the interpreter has done at least *some* work before
+ // context switching: sometimes the scheduler can invoke
+ // the interpreter with context_switch == 1, particularly
+ // if the -C0 flag has been given on the cmd line.
+ if (context_switch) {
+ Sp--; Sp[0] = (W_)&stg_enter_info;
+ RETURN_TO_SCHEDULER(ThreadInterpret, ThreadYielding);
+ }
+ goto eval;
+
+ case bci_RETURN:
+ obj = (StgClosure *)Sp[0];
+ Sp++;
+ goto do_return;
+
+ case bci_RETURN_P:
+ Sp--;
+ Sp[0] = (W_)&stg_gc_unpt_r1_info;
+ goto do_return_unboxed;
+ case bci_RETURN_N:
+ Sp--;
+ Sp[0] = (W_)&stg_gc_unbx_r1_info;
+ goto do_return_unboxed;
+ case bci_RETURN_F:
+ Sp--;
+ Sp[0] = (W_)&stg_gc_f1_info;
+ goto do_return_unboxed;
+ case bci_RETURN_D:
+ Sp--;
+ Sp[0] = (W_)&stg_gc_d1_info;
+ goto do_return_unboxed;
+ case bci_RETURN_L:
+ Sp--;
+ Sp[0] = (W_)&stg_gc_l1_info;
+ goto do_return_unboxed;
+ case bci_RETURN_V:
+ Sp--;
+ Sp[0] = (W_)&stg_gc_void_info;
+ goto do_return_unboxed;
+
+ case bci_SWIZZLE: {
+ int stkoff = BCO_NEXT;
+ signed short n = (signed short)(BCO_NEXT);
+ Sp[stkoff] += (W_)n;
+ goto nextInsn;
+ }
+
+ case bci_CCALL: {
+ void *tok;
+ int stk_offset = BCO_NEXT;
+ int o_itbl = BCO_NEXT;
+ void(*marshall_fn)(void*) = (void (*)(void*))BCO_LIT(o_itbl);
+ int ret_dyn_size =
+ RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE
+ + sizeofW(StgRetDyn);
+
+#ifdef THREADED_RTS
+ // Threaded RTS:
+ // Arguments on the TSO stack are not good, because garbage
+ // collection might move the TSO as soon as we call
+ // suspendThread below.
+
+ W_ arguments[stk_offset];
+
+ memcpy(arguments, Sp, sizeof(W_) * stk_offset);
+#endif
+
+ // Restore the Haskell thread's current value of errno
+ errno = cap->r.rCurrentTSO->saved_errno;
+
+ // There are a bunch of non-ptr words on the stack (the
+ // ccall args, the ccall fun address and space for the
+ // result), which we need to cover with an info table
+ // since we might GC during this call.
+ //
+ // We know how many (non-ptr) words there are before the
+ // next valid stack frame: it is the stk_offset arg to the
+ // CCALL instruction. So we build a RET_DYN stack frame
+ // on the stack frame to describe this chunk of stack.
+ //
+ Sp -= ret_dyn_size;
+ ((StgRetDyn *)Sp)->liveness = NO_PTRS | N_NONPTRS(stk_offset);
+ ((StgRetDyn *)Sp)->info = (StgInfoTable *)&stg_gc_gen_info;
+
+ SAVE_STACK_POINTERS;
+ tok = suspendThread(&cap->r);
+
+#ifndef THREADED_RTS
+ // Careful:
+ // suspendThread might have shifted the stack
+ // around (stack squeezing), so we have to grab the real
+ // Sp out of the TSO to find the ccall args again.
+
+ marshall_fn ( (void*)(cap->r.rCurrentTSO->sp + ret_dyn_size) );
+#else
+ // Threaded RTS:
+ // We already made a copy of the arguments above.
+
+ marshall_fn ( arguments );
+#endif
+
+ // And restart the thread again, popping the RET_DYN frame.
+ cap = (Capability *)((void *)((unsigned char*)resumeThread(tok) - sizeof(StgFunTable)));
+ LOAD_STACK_POINTERS;
+ Sp += ret_dyn_size;
+
+ // Save the Haskell thread's current value of errno
+ cap->r.rCurrentTSO->saved_errno = errno;
+
+#ifdef THREADED_RTS
+ // Threaded RTS:
+ // Copy the "arguments", which might include a return value,
+ // back to the TSO stack. It would of course be enough to
+ // just copy the return value, but we don't know the offset.
+ memcpy(Sp, arguments, sizeof(W_) * stk_offset);
+#endif
+
+ goto nextInsn;
+ }
+
+ case bci_JMP: {
+ /* BCO_NEXT modifies bciPtr, so be conservative. */
+ int nextpc = BCO_NEXT;
+ bciPtr = nextpc;
+ goto nextInsn;
+ }
+
+ case bci_CASEFAIL:
+ barf("interpretBCO: hit a CASEFAIL");
+
+ // Errors
+ default:
+ barf("interpretBCO: unknown or unimplemented opcode");
+
+ } /* switch on opcode */
+ }
+ }
+
+ barf("interpretBCO: fell off end of the interpreter");
+}
diff --git a/rts/Interpreter.h b/rts/Interpreter.h
new file mode 100644
index 0000000000..d66e636084
--- /dev/null
+++ b/rts/Interpreter.h
@@ -0,0 +1,14 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2002.
+ *
+ * Prototypes for functions in Interpreter.c
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef INTERPRETER_H
+#define INTERPRETER_H
+
+extern Capability *interpretBCO (Capability* cap);
+
+#endif /* INTERPRETER_H */
diff --git a/rts/LdvProfile.c b/rts/LdvProfile.c
new file mode 100644
index 0000000000..19ebe426d3
--- /dev/null
+++ b/rts/LdvProfile.c
@@ -0,0 +1,342 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Lag/Drag/Void profiling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifdef PROFILING
+
+#include "Rts.h"
+#include "LdvProfile.h"
+#include "RtsFlags.h"
+#include "Profiling.h"
+#include "Stats.h"
+#include "Storage.h"
+#include "RtsUtils.h"
+#include "Schedule.h"
+
+/* --------------------------------------------------------------------------
+ * Fills in the slop when a *dynamic* closure changes its type.
+ * First calls LDV_recordDead() to declare the closure is dead, and then
+ * fills in the slop.
+ *
+ * Invoked when:
+ * 1) blackholing, UPD_BH_UPDATABLE() and UPD_BH_SINGLE_ENTRY (in
+ * includes/StgMacros.h), threadLazyBlackHole() and
+ * threadSqueezeStack() (in GC.c).
+ * 2) updating with indirection closures, updateWithIndirection()
+ * and updateWithPermIndirection() (in Storage.h).
+ *
+ * LDV_recordDead_FILL_SLOP_DYNAMIC() is not called on 'inherently used'
+ * closures such as TSO. It is not called on PAP because PAP is not updatable.
+ * ----------------------------------------------------------------------- */
+void
+LDV_recordDead_FILL_SLOP_DYNAMIC( StgClosure *p )
+{
+ nat size, i;
+
+#if defined(__GNUC__) && __GNUC__ < 3 && defined(DEBUG)
+#error Please use gcc 3.0+ to compile this file with DEBUG; gcc < 3.0 miscompiles it
+#endif
+
+ if (era > 0) {
+ // very like FILL_SLOP(), except that we call LDV_recordDead().
+ size = closure_sizeW(p);
+
+ LDV_recordDead((StgClosure *)(p), size);
+
+ if (size > sizeofW(StgThunkHeader)) {
+ for (i = 0; i < size - sizeofW(StgThunkHeader); i++) {
+ ((StgThunk *)(p))->payload[i] = 0;
+ }
+ }
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * This function is called eventually on every object destroyed during
+ * a garbage collection, whether it is a major garbage collection or
+ * not. If c is an 'inherently used' closure, nothing happens. If c
+ * is an ordinary closure, LDV_recordDead() is called on c with its
+ * proper size which excludes the profiling header portion in the
+ * closure. Returns the size of the closure, including the profiling
+ * header portion, so that the caller can find the next closure.
+ * ----------------------------------------------------------------------- */
+STATIC_INLINE nat
+processHeapClosureForDead( StgClosure *c )
+{
+ nat size;
+ StgInfoTable *info;
+
+ info = get_itbl(c);
+
+ if (info->type != EVACUATED) {
+ ASSERT(((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) <= era &&
+ ((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) > 0);
+ ASSERT(((LDVW(c) & LDV_STATE_MASK) == LDV_STATE_CREATE) ||
+ (
+ (LDVW(c) & LDV_LAST_MASK) <= era &&
+ (LDVW(c) & LDV_LAST_MASK) > 0
+ ));
+ }
+
+ if (info->type == EVACUATED) {
+ // The size of the evacuated closure is currently stored in
+ // the LDV field. See SET_EVACUAEE_FOR_LDV() in
+ // includes/StgLdvProf.h.
+ return LDVW(c);
+ }
+
+ size = closure_sizeW(c);
+
+ switch (info->type) {
+ /*
+ 'inherently used' cases: do nothing.
+ */
+ case TSO:
+ case MVAR:
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN:
+ case MUT_ARR_PTRS_FROZEN0:
+ case ARR_WORDS:
+ case WEAK:
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ case BCO:
+ case STABLE_NAME:
+ case TVAR_WAIT_QUEUE:
+ case TVAR:
+ case TREC_HEADER:
+ case TREC_CHUNK:
+ return size;
+
+ /*
+ ordinary cases: call LDV_recordDead().
+ */
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_SELECTOR:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case AP:
+ case PAP:
+ case AP_STACK:
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case FUN:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_2_0:
+ case FUN_1_1:
+ case FUN_0_2:
+ case BLACKHOLE:
+ case SE_BLACKHOLE:
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ /*
+ 'Ingore' cases
+ */
+ // Why can we ignore IND/IND_OLDGEN closures? We assume that
+ // any census is preceded by a major garbage collection, which
+ // IND/IND_OLDGEN closures cannot survive. Therefore, it is no
+ // use considering IND/IND_OLDGEN closures in the meanwhile
+ // because they will perish before the next census at any
+ // rate.
+ case IND:
+ case IND_OLDGEN:
+ // Found a dead closure: record its size
+ LDV_recordDead(c, size);
+ return size;
+
+ /*
+ Error case
+ */
+ // static objects
+ case IND_STATIC:
+ case CONSTR_STATIC:
+ case FUN_STATIC:
+ case THUNK_STATIC:
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ // stack objects
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ case STOP_FRAME:
+ case RET_DYN:
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ // others
+ case BLOCKED_FETCH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ case RBH:
+ case REMOTE_REF:
+ case INVALID_OBJECT:
+ default:
+ barf("Invalid object in processHeapClosureForDead(): %d", info->type);
+ return 0;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosureForDead() on every *dead* closures in the
+ * heap blocks starting at bd.
+ * ----------------------------------------------------------------------- */
+static void
+processHeapForDead( bdescr *bd )
+{
+ StgPtr p;
+
+ while (bd != NULL) {
+ p = bd->start;
+ while (p < bd->free) {
+ p += processHeapClosureForDead((StgClosure *)p);
+ while (p < bd->free && !*p) // skip slop
+ p++;
+ }
+ ASSERT(p == bd->free);
+ bd = bd->link;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosureForDead() on every *dead* closures in the nursery.
+ * ----------------------------------------------------------------------- */
+static void
+processNurseryForDead( void )
+{
+ StgPtr p, bdLimit;
+ bdescr *bd;
+
+ bd = MainCapability.r.rNursery->blocks;
+ while (bd->start < bd->free) {
+ p = bd->start;
+ bdLimit = bd->start + BLOCK_SIZE_W;
+ while (p < bd->free && p < bdLimit) {
+ p += processHeapClosureForDead((StgClosure *)p);
+ while (p < bd->free && p < bdLimit && !*p) // skip slop
+ p++;
+ }
+ bd = bd->link;
+ if (bd == NULL)
+ break;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosureForDead() on every *dead* closures in the
+ * small object pool.
+ * ----------------------------------------------------------------------- */
+static void
+processSmallObjectPoolForDead( void )
+{
+ bdescr *bd;
+ StgPtr p;
+
+ bd = small_alloc_list;
+
+ // first block
+ if (bd == NULL)
+ return;
+
+ p = bd->start;
+ while (p < alloc_Hp) {
+ p += processHeapClosureForDead((StgClosure *)p);
+ while (p < alloc_Hp && !*p) // skip slop
+ p++;
+ }
+ ASSERT(p == alloc_Hp);
+
+ bd = bd->link;
+ while (bd != NULL) {
+ p = bd->start;
+ while (p < bd->free) {
+ p += processHeapClosureForDead((StgClosure *)p);
+ while (p < bd->free && !*p) // skip slop
+ p++;
+ }
+ ASSERT(p == bd->free);
+ bd = bd->link;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosureForDead() on every *dead* closures in the closure
+ * chain.
+ * ----------------------------------------------------------------------- */
+static void
+processChainForDead( bdescr *bd )
+{
+ // Any object still in the chain is dead!
+ while (bd != NULL) {
+ processHeapClosureForDead((StgClosure *)bd->start);
+ bd = bd->link;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Start a census for *dead* closures, and calls
+ * processHeapClosureForDead() on every closure which died in the
+ * current garbage collection. This function is called from a garbage
+ * collector right before tidying up, when all dead closures are still
+ * stored in the heap and easy to identify. Generations 0 through N
+ * have just beed garbage collected.
+ * ----------------------------------------------------------------------- */
+void
+LdvCensusForDead( nat N )
+{
+ nat g, s;
+
+ // ldvTime == 0 means that LDV profiling is currently turned off.
+ if (era == 0)
+ return;
+
+ if (RtsFlags.GcFlags.generations == 1) {
+ //
+ // Todo: support LDV for two-space garbage collection.
+ //
+ barf("Lag/Drag/Void profiling not supported with -G1");
+ } else {
+ for (g = 0; g <= N; g++)
+ for (s = 0; s < generations[g].n_steps; s++) {
+ if (g == 0 && s == 0) {
+ processSmallObjectPoolForDead();
+ processNurseryForDead();
+ processChainForDead(generations[g].steps[s].large_objects);
+ } else{
+ processHeapForDead(generations[g].steps[s].old_blocks);
+ processChainForDead(generations[g].steps[s].large_objects);
+ }
+ }
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Regard any closure in the current heap as dead or moribund and update
+ * LDV statistics accordingly.
+ * Called from shutdownHaskell() in RtsStartup.c.
+ * Also, stops LDV profiling by resetting ldvTime to 0.
+ * ----------------------------------------------------------------------- */
+void
+LdvCensusKillAll( void )
+{
+ LdvCensusForDead(RtsFlags.GcFlags.generations - 1);
+}
+
+#endif /* PROFILING */
diff --git a/rts/LdvProfile.h b/rts/LdvProfile.h
new file mode 100644
index 0000000000..d85b95cd6a
--- /dev/null
+++ b/rts/LdvProfile.h
@@ -0,0 +1,42 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Lag/Drag/Void profiling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef LDVPROFILE_H
+#define LDVPROFILE_H
+
+#ifdef PROFILING
+
+#include "ProfHeap.h"
+
+extern void LDV_recordDead_FILL_SLOP_DYNAMIC( StgClosure *p );
+extern void LdvCensusForDead ( nat );
+extern void LdvCensusKillAll ( void );
+
+// Creates a 0-filled slop of size 'howManyBackwards' backwards from the
+// address 'from'.
+//
+// Invoked when:
+// 1) Hp is incremented and exceeds HpLim (in Updates.hc).
+// 2) copypart() is called (in GC.c).
+#define LDV_FILL_SLOP(from, howManyBackwards) \
+ if (era > 0) { \
+ int i; \
+ for (i = 0;i < (howManyBackwards); i++) \
+ ((StgWord *)(from))[-i] = 0; \
+ }
+
+// Informs the LDV profiler that closure c has just been evacuated.
+// Evacuated objects are no longer needed, so we just store its original size in
+// the LDV field.
+#define SET_EVACUAEE_FOR_LDV(c, size) \
+ LDVW((c)) = (size)
+
+#endif /* PROFILING */
+
+#endif /* LDVPROFILE_H */
diff --git a/rts/Linker.c b/rts/Linker.c
new file mode 100644
index 0000000000..92d0106def
--- /dev/null
+++ b/rts/Linker.c
@@ -0,0 +1,4315 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2000-2004
+ *
+ * RTS Object Linker
+ *
+ * ---------------------------------------------------------------------------*/
+
+#if 0
+#include "PosixSource.h"
+#endif
+
+/* Linux needs _GNU_SOURCE to get RTLD_DEFAULT from <dlfcn.h> and
+ MREMAP_MAYMOVE from <sys/mman.h>.
+ */
+#ifdef __linux__
+#define _GNU_SOURCE
+#endif
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "HsFFI.h"
+#include "Hash.h"
+#include "Linker.h"
+#include "LinkerInternals.h"
+#include "RtsUtils.h"
+#include "Schedule.h"
+#include "Storage.h"
+#include "Sparks.h"
+
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+
+#if defined(HAVE_DLFCN_H)
+#include <dlfcn.h>
+#endif
+
+#if defined(cygwin32_HOST_OS)
+#ifdef HAVE_DIRENT_H
+#include <dirent.h>
+#endif
+
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#include <regex.h>
+#include <sys/fcntl.h>
+#include <sys/termios.h>
+#include <sys/utime.h>
+#include <sys/utsname.h>
+#include <sys/wait.h>
+#endif
+
+#if defined(ia64_HOST_ARCH) || defined(openbsd_HOST_OS) || defined(linux_HOST_OS) || defined(freebsd_HOST_OS)
+#define USE_MMAP
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if defined(openbsd_HOST_OS) || defined(linux_HOST_OS) || defined(freebsd_HOST_OS)
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#endif
+
+#endif
+
+#if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) || defined(freebsd_HOST_OS) || defined(netbsd_HOST_OS) || defined(openbsd_HOST_OS)
+# define OBJFORMAT_ELF
+#elif defined(cygwin32_HOST_OS) || defined (mingw32_HOST_OS)
+# define OBJFORMAT_PEi386
+# include <windows.h>
+# include <math.h>
+#elif defined(darwin_HOST_OS)
+# define OBJFORMAT_MACHO
+# include <mach-o/loader.h>
+# include <mach-o/nlist.h>
+# include <mach-o/reloc.h>
+# include <mach-o/dyld.h>
+#if defined(powerpc_HOST_ARCH)
+# include <mach-o/ppc/reloc.h>
+#endif
+#endif
+
+/* Hash table mapping symbol names to Symbol */
+static /*Str*/HashTable *symhash;
+
+/* List of currently loaded objects */
+ObjectCode *objects = NULL; /* initially empty */
+
+#if defined(OBJFORMAT_ELF)
+static int ocVerifyImage_ELF ( ObjectCode* oc );
+static int ocGetNames_ELF ( ObjectCode* oc );
+static int ocResolve_ELF ( ObjectCode* oc );
+#if defined(powerpc_HOST_ARCH)
+static int ocAllocateJumpIslands_ELF ( ObjectCode* oc );
+#endif
+#elif defined(OBJFORMAT_PEi386)
+static int ocVerifyImage_PEi386 ( ObjectCode* oc );
+static int ocGetNames_PEi386 ( ObjectCode* oc );
+static int ocResolve_PEi386 ( ObjectCode* oc );
+#elif defined(OBJFORMAT_MACHO)
+static int ocVerifyImage_MachO ( ObjectCode* oc );
+static int ocGetNames_MachO ( ObjectCode* oc );
+static int ocResolve_MachO ( ObjectCode* oc );
+
+static int machoGetMisalignment( FILE * );
+#ifdef powerpc_HOST_ARCH
+static int ocAllocateJumpIslands_MachO ( ObjectCode* oc );
+static void machoInitSymbolsWithoutUnderscore( void );
+#endif
+#endif
+
+#if defined(x86_64_HOST_ARCH)
+static void*x86_64_high_symbol( char *lbl, void *addr );
+#endif
+
+/* -----------------------------------------------------------------------------
+ * Built-in symbols from the RTS
+ */
+
+typedef struct _RtsSymbolVal {
+ char *lbl;
+ void *addr;
+} RtsSymbolVal;
+
+
+#if !defined(PAR)
+#define Maybe_Stable_Names SymX(mkWeakzh_fast) \
+ SymX(makeStableNamezh_fast) \
+ SymX(finalizzeWeakzh_fast)
+#else
+/* These are not available in GUM!!! -- HWL */
+#define Maybe_Stable_Names
+#endif
+
+#if !defined (mingw32_HOST_OS)
+#define RTS_POSIX_ONLY_SYMBOLS \
+ SymX(signal_handlers) \
+ SymX(stg_sig_install) \
+ Sym(nocldstop)
+#endif
+
+#if defined (cygwin32_HOST_OS)
+#define RTS_MINGW_ONLY_SYMBOLS /**/
+/* Don't have the ability to read import libs / archives, so
+ * we have to stupidly list a lot of what libcygwin.a
+ * exports; sigh.
+ */
+#define RTS_CYGWIN_ONLY_SYMBOLS \
+ SymX(regfree) \
+ SymX(regexec) \
+ SymX(regerror) \
+ SymX(regcomp) \
+ SymX(__errno) \
+ SymX(access) \
+ SymX(chmod) \
+ SymX(chdir) \
+ SymX(close) \
+ SymX(creat) \
+ SymX(dup) \
+ SymX(dup2) \
+ SymX(fstat) \
+ SymX(fcntl) \
+ SymX(getcwd) \
+ SymX(getenv) \
+ SymX(lseek) \
+ SymX(open) \
+ SymX(fpathconf) \
+ SymX(pathconf) \
+ SymX(stat) \
+ SymX(pow) \
+ SymX(tanh) \
+ SymX(cosh) \
+ SymX(sinh) \
+ SymX(atan) \
+ SymX(acos) \
+ SymX(asin) \
+ SymX(tan) \
+ SymX(cos) \
+ SymX(sin) \
+ SymX(exp) \
+ SymX(log) \
+ SymX(sqrt) \
+ SymX(localtime_r) \
+ SymX(gmtime_r) \
+ SymX(mktime) \
+ Sym(_imp___tzname) \
+ SymX(gettimeofday) \
+ SymX(timezone) \
+ SymX(tcgetattr) \
+ SymX(tcsetattr) \
+ SymX(memcpy) \
+ SymX(memmove) \
+ SymX(realloc) \
+ SymX(malloc) \
+ SymX(free) \
+ SymX(fork) \
+ SymX(lstat) \
+ SymX(isatty) \
+ SymX(mkdir) \
+ SymX(opendir) \
+ SymX(readdir) \
+ SymX(rewinddir) \
+ SymX(closedir) \
+ SymX(link) \
+ SymX(mkfifo) \
+ SymX(pipe) \
+ SymX(read) \
+ SymX(rename) \
+ SymX(rmdir) \
+ SymX(select) \
+ SymX(system) \
+ SymX(write) \
+ SymX(strcmp) \
+ SymX(strcpy) \
+ SymX(strncpy) \
+ SymX(strerror) \
+ SymX(sigaddset) \
+ SymX(sigemptyset) \
+ SymX(sigprocmask) \
+ SymX(umask) \
+ SymX(uname) \
+ SymX(unlink) \
+ SymX(utime) \
+ SymX(waitpid)
+
+#elif !defined(mingw32_HOST_OS)
+#define RTS_MINGW_ONLY_SYMBOLS /**/
+#define RTS_CYGWIN_ONLY_SYMBOLS /**/
+#else /* defined(mingw32_HOST_OS) */
+#define RTS_POSIX_ONLY_SYMBOLS /**/
+#define RTS_CYGWIN_ONLY_SYMBOLS /**/
+
+/* Extra syms gen'ed by mingw-2's gcc-3.2: */
+#if __GNUC__>=3
+#define RTS_MINGW_EXTRA_SYMS \
+ Sym(_imp____mb_cur_max) \
+ Sym(_imp___pctype)
+#else
+#define RTS_MINGW_EXTRA_SYMS
+#endif
+
+/* These are statically linked from the mingw libraries into the ghc
+ executable, so we have to employ this hack. */
+#define RTS_MINGW_ONLY_SYMBOLS \
+ SymX(asyncReadzh_fast) \
+ SymX(asyncWritezh_fast) \
+ SymX(asyncDoProczh_fast) \
+ SymX(memset) \
+ SymX(inet_ntoa) \
+ SymX(inet_addr) \
+ SymX(htonl) \
+ SymX(recvfrom) \
+ SymX(listen) \
+ SymX(bind) \
+ SymX(shutdown) \
+ SymX(connect) \
+ SymX(htons) \
+ SymX(ntohs) \
+ SymX(getservbyname) \
+ SymX(getservbyport) \
+ SymX(getprotobynumber) \
+ SymX(getprotobyname) \
+ SymX(gethostbyname) \
+ SymX(gethostbyaddr) \
+ SymX(gethostname) \
+ SymX(strcpy) \
+ SymX(strncpy) \
+ SymX(abort) \
+ Sym(_alloca) \
+ Sym(isxdigit) \
+ Sym(isupper) \
+ Sym(ispunct) \
+ Sym(islower) \
+ Sym(isspace) \
+ Sym(isprint) \
+ Sym(isdigit) \
+ Sym(iscntrl) \
+ Sym(isalpha) \
+ Sym(isalnum) \
+ SymX(strcmp) \
+ SymX(memmove) \
+ SymX(realloc) \
+ SymX(malloc) \
+ SymX(pow) \
+ SymX(tanh) \
+ SymX(cosh) \
+ SymX(sinh) \
+ SymX(atan) \
+ SymX(acos) \
+ SymX(asin) \
+ SymX(tan) \
+ SymX(cos) \
+ SymX(sin) \
+ SymX(exp) \
+ SymX(log) \
+ SymX(sqrt) \
+ SymX(powf) \
+ SymX(tanhf) \
+ SymX(coshf) \
+ SymX(sinhf) \
+ SymX(atanf) \
+ SymX(acosf) \
+ SymX(asinf) \
+ SymX(tanf) \
+ SymX(cosf) \
+ SymX(sinf) \
+ SymX(expf) \
+ SymX(logf) \
+ SymX(sqrtf) \
+ SymX(memcpy) \
+ SymX(rts_InstallConsoleEvent) \
+ SymX(rts_ConsoleHandlerDone) \
+ Sym(mktime) \
+ Sym(_imp___timezone) \
+ Sym(_imp___tzname) \
+ Sym(_imp___iob) \
+ Sym(_imp___osver) \
+ Sym(localtime) \
+ Sym(gmtime) \
+ Sym(opendir) \
+ Sym(readdir) \
+ Sym(rewinddir) \
+ RTS_MINGW_EXTRA_SYMS \
+ Sym(closedir)
+#endif
+
+#if defined(darwin_TARGET_OS) && HAVE_PRINTF_LDBLSTUB
+#define RTS_DARWIN_ONLY_SYMBOLS \
+ Sym(asprintf$LDBLStub) \
+ Sym(err$LDBLStub) \
+ Sym(errc$LDBLStub) \
+ Sym(errx$LDBLStub) \
+ Sym(fprintf$LDBLStub) \
+ Sym(fscanf$LDBLStub) \
+ Sym(fwprintf$LDBLStub) \
+ Sym(fwscanf$LDBLStub) \
+ Sym(printf$LDBLStub) \
+ Sym(scanf$LDBLStub) \
+ Sym(snprintf$LDBLStub) \
+ Sym(sprintf$LDBLStub) \
+ Sym(sscanf$LDBLStub) \
+ Sym(strtold$LDBLStub) \
+ Sym(swprintf$LDBLStub) \
+ Sym(swscanf$LDBLStub) \
+ Sym(syslog$LDBLStub) \
+ Sym(vasprintf$LDBLStub) \
+ Sym(verr$LDBLStub) \
+ Sym(verrc$LDBLStub) \
+ Sym(verrx$LDBLStub) \
+ Sym(vfprintf$LDBLStub) \
+ Sym(vfscanf$LDBLStub) \
+ Sym(vfwprintf$LDBLStub) \
+ Sym(vfwscanf$LDBLStub) \
+ Sym(vprintf$LDBLStub) \
+ Sym(vscanf$LDBLStub) \
+ Sym(vsnprintf$LDBLStub) \
+ Sym(vsprintf$LDBLStub) \
+ Sym(vsscanf$LDBLStub) \
+ Sym(vswprintf$LDBLStub) \
+ Sym(vswscanf$LDBLStub) \
+ Sym(vsyslog$LDBLStub) \
+ Sym(vwarn$LDBLStub) \
+ Sym(vwarnc$LDBLStub) \
+ Sym(vwarnx$LDBLStub) \
+ Sym(vwprintf$LDBLStub) \
+ Sym(vwscanf$LDBLStub) \
+ Sym(warn$LDBLStub) \
+ Sym(warnc$LDBLStub) \
+ Sym(warnx$LDBLStub) \
+ Sym(wcstold$LDBLStub) \
+ Sym(wprintf$LDBLStub) \
+ Sym(wscanf$LDBLStub)
+#else
+#define RTS_DARWIN_ONLY_SYMBOLS
+#endif
+
+#ifndef SMP
+# define MAIN_CAP_SYM SymX(MainCapability)
+#else
+# define MAIN_CAP_SYM
+#endif
+
+#if !defined(mingw32_HOST_OS)
+#define RTS_USER_SIGNALS_SYMBOLS \
+ SymX(setIOManagerPipe)
+#else
+#define RTS_USER_SIGNALS_SYMBOLS /* nothing */
+#endif
+
+#ifdef TABLES_NEXT_TO_CODE
+#define RTS_RET_SYMBOLS /* nothing */
+#else
+#define RTS_RET_SYMBOLS \
+ SymX(stg_enter_ret) \
+ SymX(stg_gc_fun_ret) \
+ SymX(stg_ap_v_ret) \
+ SymX(stg_ap_f_ret) \
+ SymX(stg_ap_d_ret) \
+ SymX(stg_ap_l_ret) \
+ SymX(stg_ap_n_ret) \
+ SymX(stg_ap_p_ret) \
+ SymX(stg_ap_pv_ret) \
+ SymX(stg_ap_pp_ret) \
+ SymX(stg_ap_ppv_ret) \
+ SymX(stg_ap_ppp_ret) \
+ SymX(stg_ap_pppv_ret) \
+ SymX(stg_ap_pppp_ret) \
+ SymX(stg_ap_ppppp_ret) \
+ SymX(stg_ap_pppppp_ret)
+#endif
+
+#define RTS_SYMBOLS \
+ Maybe_Stable_Names \
+ Sym(StgReturn) \
+ SymX(stg_enter_info) \
+ SymX(stg_gc_void_info) \
+ SymX(__stg_gc_enter_1) \
+ SymX(stg_gc_noregs) \
+ SymX(stg_gc_unpt_r1_info) \
+ SymX(stg_gc_unpt_r1) \
+ SymX(stg_gc_unbx_r1_info) \
+ SymX(stg_gc_unbx_r1) \
+ SymX(stg_gc_f1_info) \
+ SymX(stg_gc_f1) \
+ SymX(stg_gc_d1_info) \
+ SymX(stg_gc_d1) \
+ SymX(stg_gc_l1_info) \
+ SymX(stg_gc_l1) \
+ SymX(__stg_gc_fun) \
+ SymX(stg_gc_fun_info) \
+ SymX(stg_gc_gen) \
+ SymX(stg_gc_gen_info) \
+ SymX(stg_gc_gen_hp) \
+ SymX(stg_gc_ut) \
+ SymX(stg_gen_yield) \
+ SymX(stg_yield_noregs) \
+ SymX(stg_yield_to_interpreter) \
+ SymX(stg_gen_block) \
+ SymX(stg_block_noregs) \
+ SymX(stg_block_1) \
+ SymX(stg_block_takemvar) \
+ SymX(stg_block_putmvar) \
+ SymX(stg_seq_frame_info) \
+ MAIN_CAP_SYM \
+ SymX(MallocFailHook) \
+ SymX(OnExitHook) \
+ SymX(OutOfHeapHook) \
+ SymX(StackOverflowHook) \
+ SymX(__encodeDouble) \
+ SymX(__encodeFloat) \
+ SymX(addDLL) \
+ SymX(__gmpn_gcd_1) \
+ SymX(__gmpz_cmp) \
+ SymX(__gmpz_cmp_si) \
+ SymX(__gmpz_cmp_ui) \
+ SymX(__gmpz_get_si) \
+ SymX(__gmpz_get_ui) \
+ SymX(__int_encodeDouble) \
+ SymX(__int_encodeFloat) \
+ SymX(andIntegerzh_fast) \
+ SymX(atomicallyzh_fast) \
+ SymX(barf) \
+ SymX(debugBelch) \
+ SymX(errorBelch) \
+ SymX(blockAsyncExceptionszh_fast) \
+ SymX(catchzh_fast) \
+ SymX(catchRetryzh_fast) \
+ SymX(catchSTMzh_fast) \
+ SymX(closure_flags) \
+ SymX(cmp_thread) \
+ SymX(cmpIntegerzh_fast) \
+ SymX(cmpIntegerIntzh_fast) \
+ SymX(complementIntegerzh_fast) \
+ SymX(createAdjustor) \
+ SymX(decodeDoublezh_fast) \
+ SymX(decodeFloatzh_fast) \
+ SymX(defaultsHook) \
+ SymX(delayzh_fast) \
+ SymX(deRefWeakzh_fast) \
+ SymX(deRefStablePtrzh_fast) \
+ SymX(dirty_MUT_VAR) \
+ SymX(divExactIntegerzh_fast) \
+ SymX(divModIntegerzh_fast) \
+ SymX(forkzh_fast) \
+ SymX(forkOnzh_fast) \
+ SymX(forkProcess) \
+ SymX(forkOS_createThread) \
+ SymX(freeHaskellFunctionPtr) \
+ SymX(freeStablePtr) \
+ SymX(gcdIntegerzh_fast) \
+ SymX(gcdIntegerIntzh_fast) \
+ SymX(gcdIntzh_fast) \
+ SymX(genSymZh) \
+ SymX(genericRaise) \
+ SymX(getProgArgv) \
+ SymX(getStablePtr) \
+ SymX(hs_init) \
+ SymX(hs_exit) \
+ SymX(hs_set_argv) \
+ SymX(hs_add_root) \
+ SymX(hs_perform_gc) \
+ SymX(hs_free_stable_ptr) \
+ SymX(hs_free_fun_ptr) \
+ SymX(initLinker) \
+ SymX(int2Integerzh_fast) \
+ SymX(integer2Intzh_fast) \
+ SymX(integer2Wordzh_fast) \
+ SymX(isCurrentThreadBoundzh_fast) \
+ SymX(isDoubleDenormalized) \
+ SymX(isDoubleInfinite) \
+ SymX(isDoubleNaN) \
+ SymX(isDoubleNegativeZero) \
+ SymX(isEmptyMVarzh_fast) \
+ SymX(isFloatDenormalized) \
+ SymX(isFloatInfinite) \
+ SymX(isFloatNaN) \
+ SymX(isFloatNegativeZero) \
+ SymX(killThreadzh_fast) \
+ SymX(loadObj) \
+ SymX(lookupSymbol) \
+ SymX(makeStablePtrzh_fast) \
+ SymX(minusIntegerzh_fast) \
+ SymX(mkApUpd0zh_fast) \
+ SymX(myThreadIdzh_fast) \
+ SymX(labelThreadzh_fast) \
+ SymX(newArrayzh_fast) \
+ SymX(newBCOzh_fast) \
+ SymX(newByteArrayzh_fast) \
+ SymX_redirect(newCAF, newDynCAF) \
+ SymX(newMVarzh_fast) \
+ SymX(newMutVarzh_fast) \
+ SymX(newTVarzh_fast) \
+ SymX(atomicModifyMutVarzh_fast) \
+ SymX(newPinnedByteArrayzh_fast) \
+ SymX(newSpark) \
+ SymX(orIntegerzh_fast) \
+ SymX(performGC) \
+ SymX(performMajorGC) \
+ SymX(plusIntegerzh_fast) \
+ SymX(prog_argc) \
+ SymX(prog_argv) \
+ SymX(putMVarzh_fast) \
+ SymX(quotIntegerzh_fast) \
+ SymX(quotRemIntegerzh_fast) \
+ SymX(raisezh_fast) \
+ SymX(raiseIOzh_fast) \
+ SymX(readTVarzh_fast) \
+ SymX(remIntegerzh_fast) \
+ SymX(resetNonBlockingFd) \
+ SymX(resumeThread) \
+ SymX(resolveObjs) \
+ SymX(retryzh_fast) \
+ SymX(rts_apply) \
+ SymX(rts_checkSchedStatus) \
+ SymX(rts_eval) \
+ SymX(rts_evalIO) \
+ SymX(rts_evalLazyIO) \
+ SymX(rts_evalStableIO) \
+ SymX(rts_eval_) \
+ SymX(rts_getBool) \
+ SymX(rts_getChar) \
+ SymX(rts_getDouble) \
+ SymX(rts_getFloat) \
+ SymX(rts_getInt) \
+ SymX(rts_getInt32) \
+ SymX(rts_getPtr) \
+ SymX(rts_getFunPtr) \
+ SymX(rts_getStablePtr) \
+ SymX(rts_getThreadId) \
+ SymX(rts_getWord) \
+ SymX(rts_getWord32) \
+ SymX(rts_lock) \
+ SymX(rts_mkBool) \
+ SymX(rts_mkChar) \
+ SymX(rts_mkDouble) \
+ SymX(rts_mkFloat) \
+ SymX(rts_mkInt) \
+ SymX(rts_mkInt16) \
+ SymX(rts_mkInt32) \
+ SymX(rts_mkInt64) \
+ SymX(rts_mkInt8) \
+ SymX(rts_mkPtr) \
+ SymX(rts_mkFunPtr) \
+ SymX(rts_mkStablePtr) \
+ SymX(rts_mkString) \
+ SymX(rts_mkWord) \
+ SymX(rts_mkWord16) \
+ SymX(rts_mkWord32) \
+ SymX(rts_mkWord64) \
+ SymX(rts_mkWord8) \
+ SymX(rts_unlock) \
+ SymX(rtsSupportsBoundThreads) \
+ SymX(__hscore_get_saved_termios) \
+ SymX(__hscore_set_saved_termios) \
+ SymX(setProgArgv) \
+ SymX(startupHaskell) \
+ SymX(shutdownHaskell) \
+ SymX(shutdownHaskellAndExit) \
+ SymX(stable_ptr_table) \
+ SymX(stackOverflow) \
+ SymX(stg_CAF_BLACKHOLE_info) \
+ SymX(awakenBlockedQueue) \
+ SymX(stg_CHARLIKE_closure) \
+ SymX(stg_EMPTY_MVAR_info) \
+ SymX(stg_IND_STATIC_info) \
+ SymX(stg_INTLIKE_closure) \
+ SymX(stg_MUT_ARR_PTRS_DIRTY_info) \
+ SymX(stg_MUT_ARR_PTRS_FROZEN_info) \
+ SymX(stg_MUT_ARR_PTRS_FROZEN0_info) \
+ SymX(stg_WEAK_info) \
+ SymX(stg_ap_v_info) \
+ SymX(stg_ap_f_info) \
+ SymX(stg_ap_d_info) \
+ SymX(stg_ap_l_info) \
+ SymX(stg_ap_n_info) \
+ SymX(stg_ap_p_info) \
+ SymX(stg_ap_pv_info) \
+ SymX(stg_ap_pp_info) \
+ SymX(stg_ap_ppv_info) \
+ SymX(stg_ap_ppp_info) \
+ SymX(stg_ap_pppv_info) \
+ SymX(stg_ap_pppp_info) \
+ SymX(stg_ap_ppppp_info) \
+ SymX(stg_ap_pppppp_info) \
+ SymX(stg_ap_0_fast) \
+ SymX(stg_ap_v_fast) \
+ SymX(stg_ap_f_fast) \
+ SymX(stg_ap_d_fast) \
+ SymX(stg_ap_l_fast) \
+ SymX(stg_ap_n_fast) \
+ SymX(stg_ap_p_fast) \
+ SymX(stg_ap_pv_fast) \
+ SymX(stg_ap_pp_fast) \
+ SymX(stg_ap_ppv_fast) \
+ SymX(stg_ap_ppp_fast) \
+ SymX(stg_ap_pppv_fast) \
+ SymX(stg_ap_pppp_fast) \
+ SymX(stg_ap_ppppp_fast) \
+ SymX(stg_ap_pppppp_fast) \
+ SymX(stg_ap_1_upd_info) \
+ SymX(stg_ap_2_upd_info) \
+ SymX(stg_ap_3_upd_info) \
+ SymX(stg_ap_4_upd_info) \
+ SymX(stg_ap_5_upd_info) \
+ SymX(stg_ap_6_upd_info) \
+ SymX(stg_ap_7_upd_info) \
+ SymX(stg_exit) \
+ SymX(stg_sel_0_upd_info) \
+ SymX(stg_sel_10_upd_info) \
+ SymX(stg_sel_11_upd_info) \
+ SymX(stg_sel_12_upd_info) \
+ SymX(stg_sel_13_upd_info) \
+ SymX(stg_sel_14_upd_info) \
+ SymX(stg_sel_15_upd_info) \
+ SymX(stg_sel_1_upd_info) \
+ SymX(stg_sel_2_upd_info) \
+ SymX(stg_sel_3_upd_info) \
+ SymX(stg_sel_4_upd_info) \
+ SymX(stg_sel_5_upd_info) \
+ SymX(stg_sel_6_upd_info) \
+ SymX(stg_sel_7_upd_info) \
+ SymX(stg_sel_8_upd_info) \
+ SymX(stg_sel_9_upd_info) \
+ SymX(stg_upd_frame_info) \
+ SymX(suspendThread) \
+ SymX(takeMVarzh_fast) \
+ SymX(timesIntegerzh_fast) \
+ SymX(tryPutMVarzh_fast) \
+ SymX(tryTakeMVarzh_fast) \
+ SymX(unblockAsyncExceptionszh_fast) \
+ SymX(unloadObj) \
+ SymX(unsafeThawArrayzh_fast) \
+ SymX(waitReadzh_fast) \
+ SymX(waitWritezh_fast) \
+ SymX(word2Integerzh_fast) \
+ SymX(writeTVarzh_fast) \
+ SymX(xorIntegerzh_fast) \
+ SymX(yieldzh_fast) \
+ SymX(stg_interp_constr_entry) \
+ SymX(stg_interp_constr1_entry) \
+ SymX(stg_interp_constr2_entry) \
+ SymX(stg_interp_constr3_entry) \
+ SymX(stg_interp_constr4_entry) \
+ SymX(stg_interp_constr5_entry) \
+ SymX(stg_interp_constr6_entry) \
+ SymX(stg_interp_constr7_entry) \
+ SymX(stg_interp_constr8_entry) \
+ SymX(stgMallocBytesRWX) \
+ SymX(getAllocations) \
+ SymX(revertCAFs) \
+ SymX(RtsFlags) \
+ RTS_USER_SIGNALS_SYMBOLS
+
+#ifdef SUPPORT_LONG_LONGS
+#define RTS_LONG_LONG_SYMS \
+ SymX(int64ToIntegerzh_fast) \
+ SymX(word64ToIntegerzh_fast)
+#else
+#define RTS_LONG_LONG_SYMS /* nothing */
+#endif
+
+// 64-bit support functions in libgcc.a
+#if defined(__GNUC__) && SIZEOF_VOID_P <= 4
+#define RTS_LIBGCC_SYMBOLS \
+ Sym(__divdi3) \
+ Sym(__udivdi3) \
+ Sym(__moddi3) \
+ Sym(__umoddi3) \
+ Sym(__muldi3) \
+ Sym(__ashldi3) \
+ Sym(__ashrdi3) \
+ Sym(__lshrdi3) \
+ Sym(__eprintf)
+#elif defined(ia64_HOST_ARCH)
+#define RTS_LIBGCC_SYMBOLS \
+ Sym(__divdi3) \
+ Sym(__udivdi3) \
+ Sym(__moddi3) \
+ Sym(__umoddi3) \
+ Sym(__divsf3) \
+ Sym(__divdf3)
+#else
+#define RTS_LIBGCC_SYMBOLS
+#endif
+
+#if defined(darwin_HOST_OS) && defined(powerpc_HOST_ARCH)
+ // Symbols that don't have a leading underscore
+ // on Mac OS X. They have to receive special treatment,
+ // see machoInitSymbolsWithoutUnderscore()
+#define RTS_MACHO_NOUNDERLINE_SYMBOLS \
+ Sym(saveFP) \
+ Sym(restFP)
+#endif
+
+/* entirely bogus claims about types of these symbols */
+#define Sym(vvv) extern void vvv(void);
+#define SymX(vvv) /**/
+#define SymX_redirect(vvv,xxx) /**/
+RTS_SYMBOLS
+RTS_RET_SYMBOLS
+RTS_LONG_LONG_SYMS
+RTS_POSIX_ONLY_SYMBOLS
+RTS_MINGW_ONLY_SYMBOLS
+RTS_CYGWIN_ONLY_SYMBOLS
+RTS_DARWIN_ONLY_SYMBOLS
+RTS_LIBGCC_SYMBOLS
+#undef Sym
+#undef SymX
+#undef SymX_redirect
+
+#ifdef LEADING_UNDERSCORE
+#define MAYBE_LEADING_UNDERSCORE_STR(s) ("_" s)
+#else
+#define MAYBE_LEADING_UNDERSCORE_STR(s) (s)
+#endif
+
+#define Sym(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
+ (void*)(&(vvv)) },
+#define SymX(vvv) Sym(vvv)
+
+// SymX_redirect allows us to redirect references to one symbol to
+// another symbol. See newCAF/newDynCAF for an example.
+#define SymX_redirect(vvv,xxx) \
+ { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
+ (void*)(&(xxx)) },
+
+static RtsSymbolVal rtsSyms[] = {
+ RTS_SYMBOLS
+ RTS_RET_SYMBOLS
+ RTS_LONG_LONG_SYMS
+ RTS_POSIX_ONLY_SYMBOLS
+ RTS_MINGW_ONLY_SYMBOLS
+ RTS_CYGWIN_ONLY_SYMBOLS
+ RTS_LIBGCC_SYMBOLS
+#if defined(darwin_HOST_OS) && defined(i386_HOST_ARCH)
+ // dyld stub code contains references to this,
+ // but it should never be called because we treat
+ // lazy pointers as nonlazy.
+ { "dyld_stub_binding_helper", (void*)0xDEADBEEF },
+#endif
+ { 0, 0 } /* sentinel */
+};
+
+/* -----------------------------------------------------------------------------
+ * Insert symbols into hash tables, checking for duplicates.
+ */
+static void ghciInsertStrHashTable ( char* obj_name,
+ HashTable *table,
+ char* key,
+ void *data
+ )
+{
+ if (lookupHashTable(table, (StgWord)key) == NULL)
+ {
+ insertStrHashTable(table, (StgWord)key, data);
+ return;
+ }
+ debugBelch(
+ "\n\n"
+ "GHCi runtime linker: fatal error: I found a duplicate definition for symbol\n"
+ " %s\n"
+ "whilst processing object file\n"
+ " %s\n"
+ "This could be caused by:\n"
+ " * Loading two different object files which export the same symbol\n"
+ " * Specifying the same object file twice on the GHCi command line\n"
+ " * An incorrect `package.conf' entry, causing some object to be\n"
+ " loaded twice.\n"
+ "GHCi cannot safely continue in this situation. Exiting now. Sorry.\n"
+ "\n",
+ (char*)key,
+ obj_name
+ );
+ exit(1);
+}
+
+
+/* -----------------------------------------------------------------------------
+ * initialize the object linker
+ */
+
+
+static int linker_init_done = 0 ;
+
+#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
+static void *dl_prog_handle;
+#endif
+
+/* dlopen(NULL,..) doesn't work so we grab libc explicitly */
+#if defined(openbsd_HOST_OS)
+static void *dl_libc_handle;
+#endif
+
+void
+initLinker( void )
+{
+ RtsSymbolVal *sym;
+
+ /* Make initLinker idempotent, so we can call it
+ before evey relevant operation; that means we
+ don't need to initialise the linker separately */
+ if (linker_init_done == 1) { return; } else {
+ linker_init_done = 1;
+ }
+
+ symhash = allocStrHashTable();
+
+ /* populate the symbol table with stuff from the RTS */
+ for (sym = rtsSyms; sym->lbl != NULL; sym++) {
+ ghciInsertStrHashTable("(GHCi built-in symbols)",
+ symhash, sym->lbl, sym->addr);
+ }
+# if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
+ machoInitSymbolsWithoutUnderscore();
+# endif
+
+# if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
+# if defined(RTLD_DEFAULT)
+ dl_prog_handle = RTLD_DEFAULT;
+# else
+ dl_prog_handle = dlopen(NULL, RTLD_LAZY);
+# if defined(openbsd_HOST_OS)
+ dl_libc_handle = dlopen("libc.so", RTLD_LAZY);
+# endif
+# endif /* RTLD_DEFAULT */
+# endif
+}
+
+/* -----------------------------------------------------------------------------
+ * Loading DLL or .so dynamic libraries
+ * -----------------------------------------------------------------------------
+ *
+ * Add a DLL from which symbols may be found. In the ELF case, just
+ * do RTLD_GLOBAL-style add, so no further messing around needs to
+ * happen in order that symbols in the loaded .so are findable --
+ * lookupSymbol() will subsequently see them by dlsym on the program's
+ * dl-handle. Returns NULL if success, otherwise ptr to an err msg.
+ *
+ * In the PEi386 case, open the DLLs and put handles to them in a
+ * linked list. When looking for a symbol, try all handles in the
+ * list. This means that we need to load even DLLs that are guaranteed
+ * to be in the ghc.exe image already, just so we can get a handle
+ * to give to loadSymbol, so that we can find the symbols. For such
+ * libraries, the LoadLibrary call should be a no-op except for returning
+ * the handle.
+ *
+ */
+
+#if defined(OBJFORMAT_PEi386)
+/* A record for storing handles into DLLs. */
+
+typedef
+ struct _OpenedDLL {
+ char* name;
+ struct _OpenedDLL* next;
+ HINSTANCE instance;
+ }
+ OpenedDLL;
+
+/* A list thereof. */
+static OpenedDLL* opened_dlls = NULL;
+#endif
+
+char *
+addDLL( char *dll_name )
+{
+# if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
+ /* ------------------- ELF DLL loader ------------------- */
+ void *hdl;
+ char *errmsg;
+
+ initLinker();
+
+ hdl= dlopen(dll_name, RTLD_NOW | RTLD_GLOBAL);
+
+ if (hdl == NULL) {
+ /* dlopen failed; return a ptr to the error msg. */
+ errmsg = dlerror();
+ if (errmsg == NULL) errmsg = "addDLL: unknown error";
+ return errmsg;
+ } else {
+ return NULL;
+ }
+ /*NOTREACHED*/
+
+# elif defined(OBJFORMAT_PEi386)
+ /* ------------------- Win32 DLL loader ------------------- */
+
+ char* buf;
+ OpenedDLL* o_dll;
+ HINSTANCE instance;
+
+ initLinker();
+
+ /* debugBelch("\naddDLL; dll_name = `%s'\n", dll_name); */
+
+ /* See if we've already got it, and ignore if so. */
+ for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
+ if (0 == strcmp(o_dll->name, dll_name))
+ return NULL;
+ }
+
+ /* The file name has no suffix (yet) so that we can try
+ both foo.dll and foo.drv
+
+ The documentation for LoadLibrary says:
+ If no file name extension is specified in the lpFileName
+ parameter, the default library extension .dll is
+ appended. However, the file name string can include a trailing
+ point character (.) to indicate that the module name has no
+ extension. */
+
+ buf = stgMallocBytes(strlen(dll_name) + 10, "addDLL");
+ sprintf(buf, "%s.DLL", dll_name);
+ instance = LoadLibrary(buf);
+ if (instance == NULL) {
+ sprintf(buf, "%s.DRV", dll_name); // KAA: allow loading of drivers (like winspool.drv)
+ instance = LoadLibrary(buf);
+ if (instance == NULL) {
+ stgFree(buf);
+
+ /* LoadLibrary failed; return a ptr to the error msg. */
+ return "addDLL: unknown error";
+ }
+ }
+ stgFree(buf);
+
+ /* Add this DLL to the list of DLLs in which to search for symbols. */
+ o_dll = stgMallocBytes( sizeof(OpenedDLL), "addDLL" );
+ o_dll->name = stgMallocBytes(1+strlen(dll_name), "addDLL");
+ strcpy(o_dll->name, dll_name);
+ o_dll->instance = instance;
+ o_dll->next = opened_dlls;
+ opened_dlls = o_dll;
+
+ return NULL;
+# else
+ barf("addDLL: not implemented on this platform");
+# endif
+}
+
+/* -----------------------------------------------------------------------------
+ * lookup a symbol in the hash table
+ */
+void *
+lookupSymbol( char *lbl )
+{
+ void *val;
+ initLinker() ;
+ ASSERT(symhash != NULL);
+ val = lookupStrHashTable(symhash, lbl);
+
+ if (val == NULL) {
+# if defined(OBJFORMAT_ELF)
+# if defined(openbsd_HOST_OS)
+ val = dlsym(dl_prog_handle, lbl);
+ return (val != NULL) ? val : dlsym(dl_libc_handle,lbl);
+# elif defined(x86_64_HOST_ARCH)
+ val = dlsym(dl_prog_handle, lbl);
+ if (val >= (void *)0x80000000) {
+ void *new_val;
+ new_val = x86_64_high_symbol(lbl, val);
+ IF_DEBUG(linker,debugBelch("lookupSymbol: relocating out of range symbol: %s = %p, now %p\n", lbl, val, new_val));
+ return new_val;
+ } else {
+ return val;
+ }
+# else /* not openbsd */
+ return dlsym(dl_prog_handle, lbl);
+# endif
+# elif defined(OBJFORMAT_MACHO)
+ if(NSIsSymbolNameDefined(lbl)) {
+ NSSymbol symbol = NSLookupAndBindSymbol(lbl);
+ return NSAddressOfSymbol(symbol);
+ } else {
+ return NULL;
+ }
+# elif defined(OBJFORMAT_PEi386)
+ OpenedDLL* o_dll;
+ void* sym;
+ for (o_dll = opened_dlls; o_dll != NULL; o_dll = o_dll->next) {
+ /* debugBelch("look in %s for %s\n", o_dll->name, lbl); */
+ if (lbl[0] == '_') {
+ /* HACK: if the name has an initial underscore, try stripping
+ it off & look that up first. I've yet to verify whether there's
+ a Rule that governs whether an initial '_' *should always* be
+ stripped off when mapping from import lib name to the DLL name.
+ */
+ sym = GetProcAddress(o_dll->instance, (lbl+1));
+ if (sym != NULL) {
+ /*debugBelch("found %s in %s\n", lbl+1,o_dll->name);*/
+ return sym;
+ }
+ }
+ sym = GetProcAddress(o_dll->instance, lbl);
+ if (sym != NULL) {
+ /*debugBelch("found %s in %s\n", lbl,o_dll->name);*/
+ return sym;
+ }
+ }
+ return NULL;
+# else
+ ASSERT(2+2 == 5);
+ return NULL;
+# endif
+ } else {
+ return val;
+ }
+}
+
+static
+__attribute((unused))
+void *
+lookupLocalSymbol( ObjectCode* oc, char *lbl )
+{
+ void *val;
+ initLinker() ;
+ val = lookupStrHashTable(oc->lochash, lbl);
+
+ if (val == NULL) {
+ return NULL;
+ } else {
+ return val;
+ }
+}
+
+
+/* -----------------------------------------------------------------------------
+ * Debugging aid: look in GHCi's object symbol tables for symbols
+ * within DELTA bytes of the specified address, and show their names.
+ */
+#ifdef DEBUG
+void ghci_enquire ( char* addr );
+
+void ghci_enquire ( char* addr )
+{
+ int i;
+ char* sym;
+ char* a;
+ const int DELTA = 64;
+ ObjectCode* oc;
+
+ initLinker();
+
+ for (oc = objects; oc; oc = oc->next) {
+ for (i = 0; i < oc->n_symbols; i++) {
+ sym = oc->symbols[i];
+ if (sym == NULL) continue;
+ // debugBelch("enquire %p %p\n", sym, oc->lochash);
+ a = NULL;
+ if (oc->lochash != NULL) {
+ a = lookupStrHashTable(oc->lochash, sym);
+ }
+ if (a == NULL) {
+ a = lookupStrHashTable(symhash, sym);
+ }
+ if (a == NULL) {
+ // debugBelch("ghci_enquire: can't find %s\n", sym);
+ }
+ else if (addr-DELTA <= a && a <= addr+DELTA) {
+ debugBelch("%p + %3d == `%s'\n", addr, (int)(a - addr), sym);
+ }
+ }
+ }
+}
+#endif
+
+#ifdef ia64_HOST_ARCH
+static unsigned int PLTSize(void);
+#endif
+
+/* -----------------------------------------------------------------------------
+ * Load an obj (populate the global symbol table, but don't resolve yet)
+ *
+ * Returns: 1 if ok, 0 on error.
+ */
+HsInt
+loadObj( char *path )
+{
+ ObjectCode* oc;
+ struct stat st;
+ int r, n;
+#ifdef USE_MMAP
+ int fd, pagesize;
+ void *map_addr = NULL;
+#else
+ FILE *f;
+ int misalignment;
+#endif
+ initLinker();
+
+ /* debugBelch("loadObj %s\n", path ); */
+
+ /* Check that we haven't already loaded this object.
+ Ignore requests to load multiple times */
+ {
+ ObjectCode *o;
+ int is_dup = 0;
+ for (o = objects; o; o = o->next) {
+ if (0 == strcmp(o->fileName, path)) {
+ is_dup = 1;
+ break; /* don't need to search further */
+ }
+ }
+ if (is_dup) {
+ IF_DEBUG(linker, debugBelch(
+ "GHCi runtime linker: warning: looks like you're trying to load the\n"
+ "same object file twice:\n"
+ " %s\n"
+ "GHCi will ignore this, but be warned.\n"
+ , path));
+ return 1; /* success */
+ }
+ }
+
+ oc = stgMallocBytes(sizeof(ObjectCode), "loadObj(oc)");
+
+# if defined(OBJFORMAT_ELF)
+ oc->formatName = "ELF";
+# elif defined(OBJFORMAT_PEi386)
+ oc->formatName = "PEi386";
+# elif defined(OBJFORMAT_MACHO)
+ oc->formatName = "Mach-O";
+# else
+ stgFree(oc);
+ barf("loadObj: not implemented on this platform");
+# endif
+
+ r = stat(path, &st);
+ if (r == -1) { return 0; }
+
+ /* sigh, strdup() isn't a POSIX function, so do it the long way */
+ oc->fileName = stgMallocBytes( strlen(path)+1, "loadObj" );
+ strcpy(oc->fileName, path);
+
+ oc->fileSize = st.st_size;
+ oc->symbols = NULL;
+ oc->sections = NULL;
+ oc->lochash = allocStrHashTable();
+ oc->proddables = NULL;
+
+ /* chain it onto the list of objects */
+ oc->next = objects;
+ objects = oc;
+
+#ifdef USE_MMAP
+#define ROUND_UP(x,size) ((x + size - 1) & ~(size - 1))
+
+ /* On many architectures malloc'd memory isn't executable, so we need to use mmap. */
+
+#if defined(openbsd_HOST_OS)
+ fd = open(path, O_RDONLY, S_IRUSR);
+#else
+ fd = open(path, O_RDONLY);
+#endif
+ if (fd == -1)
+ barf("loadObj: can't open `%s'", path);
+
+ pagesize = getpagesize();
+
+#ifdef ia64_HOST_ARCH
+ /* The PLT needs to be right before the object */
+ n = ROUND_UP(PLTSize(), pagesize);
+ oc->plt = mmap(NULL, n, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (oc->plt == MAP_FAILED)
+ barf("loadObj: can't allocate PLT");
+
+ oc->pltIndex = 0;
+ map_addr = oc->plt + n;
+#endif
+
+ n = ROUND_UP(oc->fileSize, pagesize);
+
+ /* Link objects into the lower 2Gb on x86_64. GHC assumes the
+ * small memory model on this architecture (see gcc docs,
+ * -mcmodel=small).
+ */
+#ifdef x86_64_HOST_ARCH
+#define EXTRA_MAP_FLAGS MAP_32BIT
+#else
+#define EXTRA_MAP_FLAGS 0
+#endif
+
+ oc->image = mmap(map_addr, n, PROT_EXEC|PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|EXTRA_MAP_FLAGS, fd, 0);
+ if (oc->image == MAP_FAILED)
+ barf("loadObj: can't map `%s'", path);
+
+ close(fd);
+
+#else /* !USE_MMAP */
+
+ /* load the image into memory */
+ f = fopen(path, "rb");
+ if (!f)
+ barf("loadObj: can't read `%s'", path);
+
+#ifdef darwin_HOST_OS
+ // In a Mach-O .o file, all sections can and will be misaligned
+ // if the total size of the headers is not a multiple of the
+ // desired alignment. This is fine for .o files that only serve
+ // as input for the static linker, but it's not fine for us,
+ // as SSE (used by gcc for floating point) and Altivec require
+ // 16-byte alignment.
+ // We calculate the correct alignment from the header before
+ // reading the file, and then we misalign oc->image on purpose so
+ // that the actual sections end up aligned again.
+ misalignment = machoGetMisalignment(f);
+ oc->misalignment = misalignment;
+#else
+ misalignment = 0;
+#endif
+
+ oc->image = stgMallocBytes(oc->fileSize + misalignment, "loadObj(image)");
+ oc->image += misalignment;
+
+ n = fread ( oc->image, 1, oc->fileSize, f );
+ if (n != oc->fileSize)
+ barf("loadObj: error whilst reading `%s'", path);
+
+ fclose(f);
+
+#endif /* USE_MMAP */
+
+# if defined(OBJFORMAT_MACHO) && defined(powerpc_HOST_ARCH)
+ r = ocAllocateJumpIslands_MachO ( oc );
+ if (!r) { return r; }
+# elif defined(OBJFORMAT_ELF) && defined(powerpc_HOST_ARCH)
+ r = ocAllocateJumpIslands_ELF ( oc );
+ if (!r) { return r; }
+#endif
+
+ /* verify the in-memory image */
+# if defined(OBJFORMAT_ELF)
+ r = ocVerifyImage_ELF ( oc );
+# elif defined(OBJFORMAT_PEi386)
+ r = ocVerifyImage_PEi386 ( oc );
+# elif defined(OBJFORMAT_MACHO)
+ r = ocVerifyImage_MachO ( oc );
+# else
+ barf("loadObj: no verify method");
+# endif
+ if (!r) { return r; }
+
+ /* build the symbol list for this image */
+# if defined(OBJFORMAT_ELF)
+ r = ocGetNames_ELF ( oc );
+# elif defined(OBJFORMAT_PEi386)
+ r = ocGetNames_PEi386 ( oc );
+# elif defined(OBJFORMAT_MACHO)
+ r = ocGetNames_MachO ( oc );
+# else
+ barf("loadObj: no getNames method");
+# endif
+ if (!r) { return r; }
+
+ /* loaded, but not resolved yet */
+ oc->status = OBJECT_LOADED;
+
+ return 1;
+}
+
+/* -----------------------------------------------------------------------------
+ * resolve all the currently unlinked objects in memory
+ *
+ * Returns: 1 if ok, 0 on error.
+ */
+HsInt
+resolveObjs( void )
+{
+ ObjectCode *oc;
+ int r;
+
+ initLinker();
+
+ for (oc = objects; oc; oc = oc->next) {
+ if (oc->status != OBJECT_RESOLVED) {
+# if defined(OBJFORMAT_ELF)
+ r = ocResolve_ELF ( oc );
+# elif defined(OBJFORMAT_PEi386)
+ r = ocResolve_PEi386 ( oc );
+# elif defined(OBJFORMAT_MACHO)
+ r = ocResolve_MachO ( oc );
+# else
+ barf("resolveObjs: not implemented on this platform");
+# endif
+ if (!r) { return r; }
+ oc->status = OBJECT_RESOLVED;
+ }
+ }
+ return 1;
+}
+
+/* -----------------------------------------------------------------------------
+ * delete an object from the pool
+ */
+HsInt
+unloadObj( char *path )
+{
+ ObjectCode *oc, *prev;
+
+ ASSERT(symhash != NULL);
+ ASSERT(objects != NULL);
+
+ initLinker();
+
+ prev = NULL;
+ for (oc = objects; oc; prev = oc, oc = oc->next) {
+ if (!strcmp(oc->fileName,path)) {
+
+ /* Remove all the mappings for the symbols within this
+ * object..
+ */
+ {
+ int i;
+ for (i = 0; i < oc->n_symbols; i++) {
+ if (oc->symbols[i] != NULL) {
+ removeStrHashTable(symhash, oc->symbols[i], NULL);
+ }
+ }
+ }
+
+ if (prev == NULL) {
+ objects = oc->next;
+ } else {
+ prev->next = oc->next;
+ }
+
+ /* We're going to leave this in place, in case there are
+ any pointers from the heap into it: */
+ /* stgFree(oc->image); */
+ stgFree(oc->fileName);
+ stgFree(oc->symbols);
+ stgFree(oc->sections);
+ /* The local hash table should have been freed at the end
+ of the ocResolve_ call on it. */
+ ASSERT(oc->lochash == NULL);
+ stgFree(oc);
+ return 1;
+ }
+ }
+
+ errorBelch("unloadObj: can't find `%s' to unload", path);
+ return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Sanity checking. For each ObjectCode, maintain a list of address ranges
+ * which may be prodded during relocation, and abort if we try and write
+ * outside any of these.
+ */
+static void addProddableBlock ( ObjectCode* oc, void* start, int size )
+{
+ ProddableBlock* pb
+ = stgMallocBytes(sizeof(ProddableBlock), "addProddableBlock");
+ /* debugBelch("aPB %p %p %d\n", oc, start, size); */
+ ASSERT(size > 0);
+ pb->start = start;
+ pb->size = size;
+ pb->next = oc->proddables;
+ oc->proddables = pb;
+}
+
+static void checkProddableBlock ( ObjectCode* oc, void* addr )
+{
+ ProddableBlock* pb;
+ for (pb = oc->proddables; pb != NULL; pb = pb->next) {
+ char* s = (char*)(pb->start);
+ char* e = s + pb->size - 1;
+ char* a = (char*)addr;
+ /* Assumes that the biggest fixup involves a 4-byte write. This
+ probably needs to be changed to 8 (ie, +7) on 64-bit
+ plats. */
+ if (a >= s && (a+3) <= e) return;
+ }
+ barf("checkProddableBlock: invalid fixup in runtime linker");
+}
+
+/* -----------------------------------------------------------------------------
+ * Section management.
+ */
+static void addSection ( ObjectCode* oc, SectionKind kind,
+ void* start, void* end )
+{
+ Section* s = stgMallocBytes(sizeof(Section), "addSection");
+ s->start = start;
+ s->end = end;
+ s->kind = kind;
+ s->next = oc->sections;
+ oc->sections = s;
+ /*
+ debugBelch("addSection: %p-%p (size %d), kind %d\n",
+ start, ((char*)end)-1, end - start + 1, kind );
+ */
+}
+
+
+/* --------------------------------------------------------------------------
+ * PowerPC specifics (jump islands)
+ * ------------------------------------------------------------------------*/
+
+#if defined(powerpc_HOST_ARCH)
+
+/*
+ ocAllocateJumpIslands
+
+ Allocate additional space at the end of the object file image to make room
+ for jump islands.
+
+ PowerPC relative branch instructions have a 24 bit displacement field.
+ As PPC code is always 4-byte-aligned, this yields a +-32MB range.
+ If a particular imported symbol is outside this range, we have to redirect
+ the jump to a short piece of new code that just loads the 32bit absolute
+ address and jumps there.
+ This function just allocates space for one 16 byte ppcJumpIsland for every
+ undefined symbol in the object file. The code for the islands is filled in by
+ makeJumpIsland below.
+*/
+
+static int ocAllocateJumpIslands( ObjectCode* oc, int count, int first )
+{
+#ifdef USE_MMAP
+ int pagesize, n, m;
+#endif
+ int aligned;
+ int misalignment = 0;
+#if darwin_HOST_OS
+ misalignment = oc->misalignment;
+#endif
+
+ if( count > 0 )
+ {
+ // round up to the nearest 4
+ aligned = (oc->fileSize + 3) & ~3;
+
+#ifdef USE_MMAP
+ #ifndef linux_HOST_OS /* mremap is a linux extension */
+ #error ocAllocateJumpIslands doesnt want USE_MMAP to be defined
+ #endif
+
+ pagesize = getpagesize();
+ n = ROUND_UP( oc->fileSize, pagesize );
+ m = ROUND_UP( aligned + sizeof (ppcJumpIsland) * count, pagesize );
+
+ /* If we have a half-page-size file and map one page of it then
+ * the part of the page after the size of the file remains accessible.
+ * If, however, we map in 2 pages, the 2nd page is not accessible
+ * and will give a "Bus Error" on access. To get around this, we check
+ * if we need any extra pages for the jump islands and map them in
+ * anonymously. We must check that we actually require extra pages
+ * otherwise the attempt to mmap 0 pages of anonymous memory will
+ * fail -EINVAL.
+ */
+
+ if( m > n )
+ {
+ /* The effect of this mremap() call is only the ensure that we have
+ * a sufficient number of virtually contiguous pages. As returned from
+ * mremap, the pages past the end of the file are not backed. We give
+ * them a backing by using MAP_FIXED to map in anonymous pages.
+ */
+ oc->image = mremap( oc->image, n, m, MREMAP_MAYMOVE );
+
+ if( oc->image == MAP_FAILED )
+ {
+ errorBelch( "Unable to mremap for Jump Islands\n" );
+ return 0;
+ }
+
+ if( mmap( oc->image + n, m - n, PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, 0, 0 ) == MAP_FAILED )
+ {
+ errorBelch( "Unable to mmap( MAP_FIXED ) for Jump Islands\n" );
+ return 0;
+ }
+ }
+
+#else
+ oc->image -= misalignment;
+ oc->image = stgReallocBytes( oc->image,
+ misalignment +
+ aligned + sizeof (ppcJumpIsland) * count,
+ "ocAllocateJumpIslands" );
+ oc->image += misalignment;
+#endif /* USE_MMAP */
+
+ oc->jump_islands = (ppcJumpIsland *) (oc->image + aligned);
+ memset( oc->jump_islands, 0, sizeof (ppcJumpIsland) * count );
+ }
+ else
+ oc->jump_islands = NULL;
+
+ oc->island_start_symbol = first;
+ oc->n_islands = count;
+
+ return 1;
+}
+
+static unsigned long makeJumpIsland( ObjectCode* oc,
+ unsigned long symbolNumber,
+ unsigned long target )
+{
+ ppcJumpIsland *island;
+
+ if( symbolNumber < oc->island_start_symbol ||
+ symbolNumber - oc->island_start_symbol > oc->n_islands)
+ return 0;
+
+ island = &oc->jump_islands[symbolNumber - oc->island_start_symbol];
+
+ // lis r12, hi16(target)
+ island->lis_r12 = 0x3d80;
+ island->hi_addr = target >> 16;
+
+ // ori r12, r12, lo16(target)
+ island->ori_r12_r12 = 0x618c;
+ island->lo_addr = target & 0xffff;
+
+ // mtctr r12
+ island->mtctr_r12 = 0x7d8903a6;
+
+ // bctr
+ island->bctr = 0x4e800420;
+
+ return (unsigned long) island;
+}
+
+/*
+ ocFlushInstructionCache
+
+ Flush the data & instruction caches.
+ Because the PPC has split data/instruction caches, we have to
+ do that whenever we modify code at runtime.
+ */
+
+static void ocFlushInstructionCache( ObjectCode *oc )
+{
+ int n = (oc->fileSize + sizeof( ppcJumpIsland ) * oc->n_islands + 3) / 4;
+ unsigned long *p = (unsigned long *) oc->image;
+
+ while( n-- )
+ {
+ __asm__ volatile ( "dcbf 0,%0\n\t"
+ "sync\n\t"
+ "icbi 0,%0"
+ :
+ : "r" (p)
+ );
+ p++;
+ }
+ __asm__ volatile ( "sync\n\t"
+ "isync"
+ );
+}
+#endif
+
+/* --------------------------------------------------------------------------
+ * PEi386 specifics (Win32 targets)
+ * ------------------------------------------------------------------------*/
+
+/* The information for this linker comes from
+ Microsoft Portable Executable
+ and Common Object File Format Specification
+ revision 5.1 January 1998
+ which SimonM says comes from the MS Developer Network CDs.
+
+ It can be found there (on older CDs), but can also be found
+ online at:
+
+ http://www.microsoft.com/hwdev/hardware/PECOFF.asp
+
+ (this is Rev 6.0 from February 1999).
+
+ Things move, so if that fails, try searching for it via
+
+ http://www.google.com/search?q=PE+COFF+specification
+
+ The ultimate reference for the PE format is the Winnt.h
+ header file that comes with the Platform SDKs; as always,
+ implementations will drift wrt their documentation.
+
+ A good background article on the PE format is Matt Pietrek's
+ March 1994 article in Microsoft System Journal (MSJ)
+ (Vol.9, No. 3): "Peering Inside the PE: A Tour of the
+ Win32 Portable Executable File Format." The info in there
+ has recently been updated in a two part article in
+ MSDN magazine, issues Feb and March 2002,
+ "Inside Windows: An In-Depth Look into the Win32 Portable
+ Executable File Format"
+
+ John Levine's book "Linkers and Loaders" contains useful
+ info on PE too.
+*/
+
+
+#if defined(OBJFORMAT_PEi386)
+
+
+
+typedef unsigned char UChar;
+typedef unsigned short UInt16;
+typedef unsigned int UInt32;
+typedef int Int32;
+
+
+typedef
+ struct {
+ UInt16 Machine;
+ UInt16 NumberOfSections;
+ UInt32 TimeDateStamp;
+ UInt32 PointerToSymbolTable;
+ UInt32 NumberOfSymbols;
+ UInt16 SizeOfOptionalHeader;
+ UInt16 Characteristics;
+ }
+ COFF_header;
+
+#define sizeof_COFF_header 20
+
+
+typedef
+ struct {
+ UChar Name[8];
+ UInt32 VirtualSize;
+ UInt32 VirtualAddress;
+ UInt32 SizeOfRawData;
+ UInt32 PointerToRawData;
+ UInt32 PointerToRelocations;
+ UInt32 PointerToLinenumbers;
+ UInt16 NumberOfRelocations;
+ UInt16 NumberOfLineNumbers;
+ UInt32 Characteristics;
+ }
+ COFF_section;
+
+#define sizeof_COFF_section 40
+
+
+typedef
+ struct {
+ UChar Name[8];
+ UInt32 Value;
+ UInt16 SectionNumber;
+ UInt16 Type;
+ UChar StorageClass;
+ UChar NumberOfAuxSymbols;
+ }
+ COFF_symbol;
+
+#define sizeof_COFF_symbol 18
+
+
+typedef
+ struct {
+ UInt32 VirtualAddress;
+ UInt32 SymbolTableIndex;
+ UInt16 Type;
+ }
+ COFF_reloc;
+
+#define sizeof_COFF_reloc 10
+
+
+/* From PE spec doc, section 3.3.2 */
+/* Note use of MYIMAGE_* since IMAGE_* are already defined in
+ windows.h -- for the same purpose, but I want to know what I'm
+ getting, here. */
+#define MYIMAGE_FILE_RELOCS_STRIPPED 0x0001
+#define MYIMAGE_FILE_EXECUTABLE_IMAGE 0x0002
+#define MYIMAGE_FILE_DLL 0x2000
+#define MYIMAGE_FILE_SYSTEM 0x1000
+#define MYIMAGE_FILE_BYTES_REVERSED_HI 0x8000
+#define MYIMAGE_FILE_BYTES_REVERSED_LO 0x0080
+#define MYIMAGE_FILE_32BIT_MACHINE 0x0100
+
+/* From PE spec doc, section 5.4.2 and 5.4.4 */
+#define MYIMAGE_SYM_CLASS_EXTERNAL 2
+#define MYIMAGE_SYM_CLASS_STATIC 3
+#define MYIMAGE_SYM_UNDEFINED 0
+
+/* From PE spec doc, section 4.1 */
+#define MYIMAGE_SCN_CNT_CODE 0x00000020
+#define MYIMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040
+#define MYIMAGE_SCN_LNK_NRELOC_OVFL 0x01000000
+
+/* From PE spec doc, section 5.2.1 */
+#define MYIMAGE_REL_I386_DIR32 0x0006
+#define MYIMAGE_REL_I386_REL32 0x0014
+
+
+/* We use myindex to calculate array addresses, rather than
+ simply doing the normal subscript thing. That's because
+ some of the above structs have sizes which are not
+ a whole number of words. GCC rounds their sizes up to a
+ whole number of words, which means that the address calcs
+ arising from using normal C indexing or pointer arithmetic
+ are just plain wrong. Sigh.
+*/
+static UChar *
+myindex ( int scale, void* base, int index )
+{
+ return
+ ((UChar*)base) + scale * index;
+}
+
+
+static void
+printName ( UChar* name, UChar* strtab )
+{
+ if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
+ UInt32 strtab_offset = * (UInt32*)(name+4);
+ debugBelch("%s", strtab + strtab_offset );
+ } else {
+ int i;
+ for (i = 0; i < 8; i++) {
+ if (name[i] == 0) break;
+ debugBelch("%c", name[i] );
+ }
+ }
+}
+
+
+static void
+copyName ( UChar* name, UChar* strtab, UChar* dst, int dstSize )
+{
+ if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
+ UInt32 strtab_offset = * (UInt32*)(name+4);
+ strncpy ( dst, strtab+strtab_offset, dstSize );
+ dst[dstSize-1] = 0;
+ } else {
+ int i = 0;
+ while (1) {
+ if (i >= 8) break;
+ if (name[i] == 0) break;
+ dst[i] = name[i];
+ i++;
+ }
+ dst[i] = 0;
+ }
+}
+
+
+static UChar *
+cstring_from_COFF_symbol_name ( UChar* name, UChar* strtab )
+{
+ UChar* newstr;
+ /* If the string is longer than 8 bytes, look in the
+ string table for it -- this will be correctly zero terminated.
+ */
+ if (name[0]==0 && name[1]==0 && name[2]==0 && name[3]==0) {
+ UInt32 strtab_offset = * (UInt32*)(name+4);
+ return ((UChar*)strtab) + strtab_offset;
+ }
+ /* Otherwise, if shorter than 8 bytes, return the original,
+ which by defn is correctly terminated.
+ */
+ if (name[7]==0) return name;
+ /* The annoying case: 8 bytes. Copy into a temporary
+ (which is never freed ...)
+ */
+ newstr = stgMallocBytes(9, "cstring_from_COFF_symbol_name");
+ ASSERT(newstr);
+ strncpy(newstr,name,8);
+ newstr[8] = 0;
+ return newstr;
+}
+
+
+/* Just compares the short names (first 8 chars) */
+static COFF_section *
+findPEi386SectionCalled ( ObjectCode* oc, char* name )
+{
+ int i;
+ COFF_header* hdr
+ = (COFF_header*)(oc->image);
+ COFF_section* sectab
+ = (COFF_section*) (
+ ((UChar*)(oc->image))
+ + sizeof_COFF_header + hdr->SizeOfOptionalHeader
+ );
+ for (i = 0; i < hdr->NumberOfSections; i++) {
+ UChar* n1;
+ UChar* n2;
+ COFF_section* section_i
+ = (COFF_section*)
+ myindex ( sizeof_COFF_section, sectab, i );
+ n1 = (UChar*) &(section_i->Name);
+ n2 = name;
+ if (n1[0]==n2[0] && n1[1]==n2[1] && n1[2]==n2[2] &&
+ n1[3]==n2[3] && n1[4]==n2[4] && n1[5]==n2[5] &&
+ n1[6]==n2[6] && n1[7]==n2[7])
+ return section_i;
+ }
+
+ return NULL;
+}
+
+
+static void
+zapTrailingAtSign ( UChar* sym )
+{
+# define my_isdigit(c) ((c) >= '0' && (c) <= '9')
+ int i, j;
+ if (sym[0] == 0) return;
+ i = 0;
+ while (sym[i] != 0) i++;
+ i--;
+ j = i;
+ while (j > 0 && my_isdigit(sym[j])) j--;
+ if (j > 0 && sym[j] == '@' && j != i) sym[j] = 0;
+# undef my_isdigit
+}
+
+
+static int
+ocVerifyImage_PEi386 ( ObjectCode* oc )
+{
+ int i;
+ UInt32 j, noRelocs;
+ COFF_header* hdr;
+ COFF_section* sectab;
+ COFF_symbol* symtab;
+ UChar* strtab;
+ /* debugBelch("\nLOADING %s\n", oc->fileName); */
+ hdr = (COFF_header*)(oc->image);
+ sectab = (COFF_section*) (
+ ((UChar*)(oc->image))
+ + sizeof_COFF_header + hdr->SizeOfOptionalHeader
+ );
+ symtab = (COFF_symbol*) (
+ ((UChar*)(oc->image))
+ + hdr->PointerToSymbolTable
+ );
+ strtab = ((UChar*)symtab)
+ + hdr->NumberOfSymbols * sizeof_COFF_symbol;
+
+ if (hdr->Machine != 0x14c) {
+ errorBelch("%s: Not x86 PEi386", oc->fileName);
+ return 0;
+ }
+ if (hdr->SizeOfOptionalHeader != 0) {
+ errorBelch("%s: PEi386 with nonempty optional header", oc->fileName);
+ return 0;
+ }
+ if ( /* (hdr->Characteristics & MYIMAGE_FILE_RELOCS_STRIPPED) || */
+ (hdr->Characteristics & MYIMAGE_FILE_EXECUTABLE_IMAGE) ||
+ (hdr->Characteristics & MYIMAGE_FILE_DLL) ||
+ (hdr->Characteristics & MYIMAGE_FILE_SYSTEM) ) {
+ errorBelch("%s: Not a PEi386 object file", oc->fileName);
+ return 0;
+ }
+ if ( (hdr->Characteristics & MYIMAGE_FILE_BYTES_REVERSED_HI)
+ /* || !(hdr->Characteristics & MYIMAGE_FILE_32BIT_MACHINE) */ ) {
+ errorBelch("%s: Invalid PEi386 word size or endiannness: %d",
+ oc->fileName,
+ (int)(hdr->Characteristics));
+ return 0;
+ }
+ /* If the string table size is way crazy, this might indicate that
+ there are more than 64k relocations, despite claims to the
+ contrary. Hence this test. */
+ /* debugBelch("strtab size %d\n", * (UInt32*)strtab); */
+#if 0
+ if ( (*(UInt32*)strtab) > 600000 ) {
+ /* Note that 600k has no special significance other than being
+ big enough to handle the almost-2MB-sized lumps that
+ constitute HSwin32*.o. */
+ debugBelch("PEi386 object has suspiciously large string table; > 64k relocs?");
+ return 0;
+ }
+#endif
+
+ /* No further verification after this point; only debug printing. */
+ i = 0;
+ IF_DEBUG(linker, i=1);
+ if (i == 0) return 1;
+
+ debugBelch( "sectab offset = %d\n", ((UChar*)sectab) - ((UChar*)hdr) );
+ debugBelch( "symtab offset = %d\n", ((UChar*)symtab) - ((UChar*)hdr) );
+ debugBelch( "strtab offset = %d\n", ((UChar*)strtab) - ((UChar*)hdr) );
+
+ debugBelch("\n" );
+ debugBelch( "Machine: 0x%x\n", (UInt32)(hdr->Machine) );
+ debugBelch( "# sections: %d\n", (UInt32)(hdr->NumberOfSections) );
+ debugBelch( "time/date: 0x%x\n", (UInt32)(hdr->TimeDateStamp) );
+ debugBelch( "symtab offset: %d\n", (UInt32)(hdr->PointerToSymbolTable) );
+ debugBelch( "# symbols: %d\n", (UInt32)(hdr->NumberOfSymbols) );
+ debugBelch( "sz of opt hdr: %d\n", (UInt32)(hdr->SizeOfOptionalHeader) );
+ debugBelch( "characteristics: 0x%x\n", (UInt32)(hdr->Characteristics) );
+
+ /* Print the section table. */
+ debugBelch("\n" );
+ for (i = 0; i < hdr->NumberOfSections; i++) {
+ COFF_reloc* reltab;
+ COFF_section* sectab_i
+ = (COFF_section*)
+ myindex ( sizeof_COFF_section, sectab, i );
+ debugBelch(
+ "\n"
+ "section %d\n"
+ " name `",
+ i
+ );
+ printName ( sectab_i->Name, strtab );
+ debugBelch(
+ "'\n"
+ " vsize %d\n"
+ " vaddr %d\n"
+ " data sz %d\n"
+ " data off %d\n"
+ " num rel %d\n"
+ " off rel %d\n"
+ " ptr raw 0x%x\n",
+ sectab_i->VirtualSize,
+ sectab_i->VirtualAddress,
+ sectab_i->SizeOfRawData,
+ sectab_i->PointerToRawData,
+ sectab_i->NumberOfRelocations,
+ sectab_i->PointerToRelocations,
+ sectab_i->PointerToRawData
+ );
+ reltab = (COFF_reloc*) (
+ ((UChar*)(oc->image)) + sectab_i->PointerToRelocations
+ );
+
+ if ( sectab_i->Characteristics & MYIMAGE_SCN_LNK_NRELOC_OVFL ) {
+ /* If the relocation field (a short) has overflowed, the
+ * real count can be found in the first reloc entry.
+ *
+ * See Section 4.1 (last para) of the PE spec (rev6.0).
+ */
+ COFF_reloc* rel = (COFF_reloc*)
+ myindex ( sizeof_COFF_reloc, reltab, 0 );
+ noRelocs = rel->VirtualAddress;
+ j = 1;
+ } else {
+ noRelocs = sectab_i->NumberOfRelocations;
+ j = 0;
+ }
+
+ for (; j < noRelocs; j++) {
+ COFF_symbol* sym;
+ COFF_reloc* rel = (COFF_reloc*)
+ myindex ( sizeof_COFF_reloc, reltab, j );
+ debugBelch(
+ " type 0x%-4x vaddr 0x%-8x name `",
+ (UInt32)rel->Type,
+ rel->VirtualAddress );
+ sym = (COFF_symbol*)
+ myindex ( sizeof_COFF_symbol, symtab, rel->SymbolTableIndex );
+ /* Hmm..mysterious looking offset - what's it for? SOF */
+ printName ( sym->Name, strtab -10 );
+ debugBelch("'\n" );
+ }
+
+ debugBelch("\n" );
+ }
+ debugBelch("\n" );
+ debugBelch("string table has size 0x%x\n", * (UInt32*)strtab );
+ debugBelch("---START of string table---\n");
+ for (i = 4; i < *(Int32*)strtab; i++) {
+ if (strtab[i] == 0)
+ debugBelch("\n"); else
+ debugBelch("%c", strtab[i] );
+ }
+ debugBelch("--- END of string table---\n");
+
+ debugBelch("\n" );
+ i = 0;
+ while (1) {
+ COFF_symbol* symtab_i;
+ if (i >= (Int32)(hdr->NumberOfSymbols)) break;
+ symtab_i = (COFF_symbol*)
+ myindex ( sizeof_COFF_symbol, symtab, i );
+ debugBelch(
+ "symbol %d\n"
+ " name `",
+ i
+ );
+ printName ( symtab_i->Name, strtab );
+ debugBelch(
+ "'\n"
+ " value 0x%x\n"
+ " 1+sec# %d\n"
+ " type 0x%x\n"
+ " sclass 0x%x\n"
+ " nAux %d\n",
+ symtab_i->Value,
+ (Int32)(symtab_i->SectionNumber),
+ (UInt32)symtab_i->Type,
+ (UInt32)symtab_i->StorageClass,
+ (UInt32)symtab_i->NumberOfAuxSymbols
+ );
+ i += symtab_i->NumberOfAuxSymbols;
+ i++;
+ }
+
+ debugBelch("\n" );
+ return 1;
+}
+
+
+static int
+ocGetNames_PEi386 ( ObjectCode* oc )
+{
+ COFF_header* hdr;
+ COFF_section* sectab;
+ COFF_symbol* symtab;
+ UChar* strtab;
+
+ UChar* sname;
+ void* addr;
+ int i;
+
+ hdr = (COFF_header*)(oc->image);
+ sectab = (COFF_section*) (
+ ((UChar*)(oc->image))
+ + sizeof_COFF_header + hdr->SizeOfOptionalHeader
+ );
+ symtab = (COFF_symbol*) (
+ ((UChar*)(oc->image))
+ + hdr->PointerToSymbolTable
+ );
+ strtab = ((UChar*)(oc->image))
+ + hdr->PointerToSymbolTable
+ + hdr->NumberOfSymbols * sizeof_COFF_symbol;
+
+ /* Allocate space for any (local, anonymous) .bss sections. */
+
+ for (i = 0; i < hdr->NumberOfSections; i++) {
+ UInt32 bss_sz;
+ UChar* zspace;
+ COFF_section* sectab_i
+ = (COFF_section*)
+ myindex ( sizeof_COFF_section, sectab, i );
+ if (0 != strcmp(sectab_i->Name, ".bss")) continue;
+ /* sof 10/05: the PE spec text isn't too clear regarding what
+ * the SizeOfRawData field is supposed to hold for object
+ * file sections containing just uninitialized data -- for executables,
+ * it is supposed to be zero; unclear what it's supposed to be
+ * for object files. However, VirtualSize is guaranteed to be
+ * zero for object files, which definitely suggests that SizeOfRawData
+ * will be non-zero (where else would the size of this .bss section be
+ * stored?) Looking at the COFF_section info for incoming object files,
+ * this certainly appears to be the case.
+ *
+ * => I suspect we've been incorrectly handling .bss sections in (relocatable)
+ * object files up until now. This turned out to bite us with ghc-6.4.1's use
+ * of gcc-3.4.x, which has started to emit initially-zeroed-out local 'static'
+ * variable decls into to the .bss section. (The specific function in Q which
+ * triggered this is libraries/base/cbits/dirUtils.c:__hscore_getFolderPath())
+ */
+ if (sectab_i->VirtualSize == 0 && sectab_i->SizeOfRawData == 0) continue;
+ /* This is a non-empty .bss section. Allocate zeroed space for
+ it, and set its PointerToRawData field such that oc->image +
+ PointerToRawData == addr_of_zeroed_space. */
+ bss_sz = sectab_i->VirtualSize;
+ if ( bss_sz < sectab_i->SizeOfRawData) { bss_sz = sectab_i->SizeOfRawData; }
+ zspace = stgCallocBytes(1, bss_sz, "ocGetNames_PEi386(anonymous bss)");
+ sectab_i->PointerToRawData = ((UChar*)zspace) - ((UChar*)(oc->image));
+ addProddableBlock(oc, zspace, bss_sz);
+ /* debugBelch("BSS anon section at 0x%x\n", zspace); */
+ }
+
+ /* Copy section information into the ObjectCode. */
+
+ for (i = 0; i < hdr->NumberOfSections; i++) {
+ UChar* start;
+ UChar* end;
+ UInt32 sz;
+
+ SectionKind kind
+ = SECTIONKIND_OTHER;
+ COFF_section* sectab_i
+ = (COFF_section*)
+ myindex ( sizeof_COFF_section, sectab, i );
+ IF_DEBUG(linker, debugBelch("section name = %s\n", sectab_i->Name ));
+
+# if 0
+ /* I'm sure this is the Right Way to do it. However, the
+ alternative of testing the sectab_i->Name field seems to
+ work ok with Cygwin.
+ */
+ if (sectab_i->Characteristics & MYIMAGE_SCN_CNT_CODE ||
+ sectab_i->Characteristics & MYIMAGE_SCN_CNT_INITIALIZED_DATA)
+ kind = SECTIONKIND_CODE_OR_RODATA;
+# endif
+
+ if (0==strcmp(".text",sectab_i->Name) ||
+ 0==strcmp(".rdata",sectab_i->Name)||
+ 0==strcmp(".rodata",sectab_i->Name))
+ kind = SECTIONKIND_CODE_OR_RODATA;
+ if (0==strcmp(".data",sectab_i->Name) ||
+ 0==strcmp(".bss",sectab_i->Name))
+ kind = SECTIONKIND_RWDATA;
+
+ ASSERT(sectab_i->SizeOfRawData == 0 || sectab_i->VirtualSize == 0);
+ sz = sectab_i->SizeOfRawData;
+ if (sz < sectab_i->VirtualSize) sz = sectab_i->VirtualSize;
+
+ start = ((UChar*)(oc->image)) + sectab_i->PointerToRawData;
+ end = start + sz - 1;
+
+ if (kind == SECTIONKIND_OTHER
+ /* Ignore sections called which contain stabs debugging
+ information. */
+ && 0 != strcmp(".stab", sectab_i->Name)
+ && 0 != strcmp(".stabstr", sectab_i->Name)
+ /* ignore constructor section for now */
+ && 0 != strcmp(".ctors", sectab_i->Name)
+ ) {
+ errorBelch("Unknown PEi386 section name `%s' (while processing: %s)", sectab_i->Name, oc->fileName);
+ return 0;
+ }
+
+ if (kind != SECTIONKIND_OTHER && end >= start) {
+ addSection(oc, kind, start, end);
+ addProddableBlock(oc, start, end - start + 1);
+ }
+ }
+
+ /* Copy exported symbols into the ObjectCode. */
+
+ oc->n_symbols = hdr->NumberOfSymbols;
+ oc->symbols = stgMallocBytes(oc->n_symbols * sizeof(char*),
+ "ocGetNames_PEi386(oc->symbols)");
+ /* Call me paranoid; I don't care. */
+ for (i = 0; i < oc->n_symbols; i++)
+ oc->symbols[i] = NULL;
+
+ i = 0;
+ while (1) {
+ COFF_symbol* symtab_i;
+ if (i >= (Int32)(hdr->NumberOfSymbols)) break;
+ symtab_i = (COFF_symbol*)
+ myindex ( sizeof_COFF_symbol, symtab, i );
+
+ addr = NULL;
+
+ if (symtab_i->StorageClass == MYIMAGE_SYM_CLASS_EXTERNAL
+ && symtab_i->SectionNumber != MYIMAGE_SYM_UNDEFINED) {
+ /* This symbol is global and defined, viz, exported */
+ /* for MYIMAGE_SYMCLASS_EXTERNAL
+ && !MYIMAGE_SYM_UNDEFINED,
+ the address of the symbol is:
+ address of relevant section + offset in section
+ */
+ COFF_section* sectabent
+ = (COFF_section*) myindex ( sizeof_COFF_section,
+ sectab,
+ symtab_i->SectionNumber-1 );
+ addr = ((UChar*)(oc->image))
+ + (sectabent->PointerToRawData
+ + symtab_i->Value);
+ }
+ else
+ if (symtab_i->SectionNumber == MYIMAGE_SYM_UNDEFINED
+ && symtab_i->Value > 0) {
+ /* This symbol isn't in any section at all, ie, global bss.
+ Allocate zeroed space for it. */
+ addr = stgCallocBytes(1, symtab_i->Value,
+ "ocGetNames_PEi386(non-anonymous bss)");
+ addSection(oc, SECTIONKIND_RWDATA, addr,
+ ((UChar*)addr) + symtab_i->Value - 1);
+ addProddableBlock(oc, addr, symtab_i->Value);
+ /* debugBelch("BSS section at 0x%x\n", addr); */
+ }
+
+ if (addr != NULL ) {
+ sname = cstring_from_COFF_symbol_name ( symtab_i->Name, strtab );
+ /* debugBelch("addSymbol %p `%s \n", addr,sname); */
+ IF_DEBUG(linker, debugBelch("addSymbol %p `%s'\n", addr,sname);)
+ ASSERT(i >= 0 && i < oc->n_symbols);
+ /* cstring_from_COFF_symbol_name always succeeds. */
+ oc->symbols[i] = sname;
+ ghciInsertStrHashTable(oc->fileName, symhash, sname, addr);
+ } else {
+# if 0
+ debugBelch(
+ "IGNORING symbol %d\n"
+ " name `",
+ i
+ );
+ printName ( symtab_i->Name, strtab );
+ debugBelch(
+ "'\n"
+ " value 0x%x\n"
+ " 1+sec# %d\n"
+ " type 0x%x\n"
+ " sclass 0x%x\n"
+ " nAux %d\n",
+ symtab_i->Value,
+ (Int32)(symtab_i->SectionNumber),
+ (UInt32)symtab_i->Type,
+ (UInt32)symtab_i->StorageClass,
+ (UInt32)symtab_i->NumberOfAuxSymbols
+ );
+# endif
+ }
+
+ i += symtab_i->NumberOfAuxSymbols;
+ i++;
+ }
+
+ return 1;
+}
+
+
+static int
+ocResolve_PEi386 ( ObjectCode* oc )
+{
+ COFF_header* hdr;
+ COFF_section* sectab;
+ COFF_symbol* symtab;
+ UChar* strtab;
+
+ UInt32 A;
+ UInt32 S;
+ UInt32* pP;
+
+ int i;
+ UInt32 j, noRelocs;
+
+ /* ToDo: should be variable-sized? But is at least safe in the
+ sense of buffer-overrun-proof. */
+ char symbol[1000];
+ /* debugBelch("resolving for %s\n", oc->fileName); */
+
+ hdr = (COFF_header*)(oc->image);
+ sectab = (COFF_section*) (
+ ((UChar*)(oc->image))
+ + sizeof_COFF_header + hdr->SizeOfOptionalHeader
+ );
+ symtab = (COFF_symbol*) (
+ ((UChar*)(oc->image))
+ + hdr->PointerToSymbolTable
+ );
+ strtab = ((UChar*)(oc->image))
+ + hdr->PointerToSymbolTable
+ + hdr->NumberOfSymbols * sizeof_COFF_symbol;
+
+ for (i = 0; i < hdr->NumberOfSections; i++) {
+ COFF_section* sectab_i
+ = (COFF_section*)
+ myindex ( sizeof_COFF_section, sectab, i );
+ COFF_reloc* reltab
+ = (COFF_reloc*) (
+ ((UChar*)(oc->image)) + sectab_i->PointerToRelocations
+ );
+
+ /* Ignore sections called which contain stabs debugging
+ information. */
+ if (0 == strcmp(".stab", sectab_i->Name)
+ || 0 == strcmp(".stabstr", sectab_i->Name)
+ || 0 == strcmp(".ctors", sectab_i->Name))
+ continue;
+
+ if ( sectab_i->Characteristics & MYIMAGE_SCN_LNK_NRELOC_OVFL ) {
+ /* If the relocation field (a short) has overflowed, the
+ * real count can be found in the first reloc entry.
+ *
+ * See Section 4.1 (last para) of the PE spec (rev6.0).
+ *
+ * Nov2003 update: the GNU linker still doesn't correctly
+ * handle the generation of relocatable object files with
+ * overflown relocations. Hence the output to warn of potential
+ * troubles.
+ */
+ COFF_reloc* rel = (COFF_reloc*)
+ myindex ( sizeof_COFF_reloc, reltab, 0 );
+ noRelocs = rel->VirtualAddress;
+
+ /* 10/05: we now assume (and check for) a GNU ld that is capable
+ * of handling object files with (>2^16) of relocs.
+ */
+#if 0
+ debugBelch("WARNING: Overflown relocation field (# relocs found: %u)\n",
+ noRelocs);
+#endif
+ j = 1;
+ } else {
+ noRelocs = sectab_i->NumberOfRelocations;
+ j = 0;
+ }
+
+
+ for (; j < noRelocs; j++) {
+ COFF_symbol* sym;
+ COFF_reloc* reltab_j
+ = (COFF_reloc*)
+ myindex ( sizeof_COFF_reloc, reltab, j );
+
+ /* the location to patch */
+ pP = (UInt32*)(
+ ((UChar*)(oc->image))
+ + (sectab_i->PointerToRawData
+ + reltab_j->VirtualAddress
+ - sectab_i->VirtualAddress )
+ );
+ /* the existing contents of pP */
+ A = *pP;
+ /* the symbol to connect to */
+ sym = (COFF_symbol*)
+ myindex ( sizeof_COFF_symbol,
+ symtab, reltab_j->SymbolTableIndex );
+ IF_DEBUG(linker,
+ debugBelch(
+ "reloc sec %2d num %3d: type 0x%-4x "
+ "vaddr 0x%-8x name `",
+ i, j,
+ (UInt32)reltab_j->Type,
+ reltab_j->VirtualAddress );
+ printName ( sym->Name, strtab );
+ debugBelch("'\n" ));
+
+ if (sym->StorageClass == MYIMAGE_SYM_CLASS_STATIC) {
+ COFF_section* section_sym
+ = findPEi386SectionCalled ( oc, sym->Name );
+ if (!section_sym) {
+ errorBelch("%s: can't find section `%s'", oc->fileName, sym->Name);
+ return 0;
+ }
+ S = ((UInt32)(oc->image))
+ + (section_sym->PointerToRawData
+ + sym->Value);
+ } else {
+ copyName ( sym->Name, strtab, symbol, 1000-1 );
+ (void*)S = lookupLocalSymbol( oc, symbol );
+ if ((void*)S != NULL) goto foundit;
+ (void*)S = lookupSymbol( symbol );
+ if ((void*)S != NULL) goto foundit;
+ zapTrailingAtSign ( symbol );
+ (void*)S = lookupLocalSymbol( oc, symbol );
+ if ((void*)S != NULL) goto foundit;
+ (void*)S = lookupSymbol( symbol );
+ if ((void*)S != NULL) goto foundit;
+ /* Newline first because the interactive linker has printed "linking..." */
+ errorBelch("\n%s: unknown symbol `%s'", oc->fileName, symbol);
+ return 0;
+ foundit:;
+ }
+ checkProddableBlock(oc, pP);
+ switch (reltab_j->Type) {
+ case MYIMAGE_REL_I386_DIR32:
+ *pP = A + S;
+ break;
+ case MYIMAGE_REL_I386_REL32:
+ /* Tricky. We have to insert a displacement at
+ pP which, when added to the PC for the _next_
+ insn, gives the address of the target (S).
+ Problem is to know the address of the next insn
+ when we only know pP. We assume that this
+ literal field is always the last in the insn,
+ so that the address of the next insn is pP+4
+ -- hence the constant 4.
+ Also I don't know if A should be added, but so
+ far it has always been zero.
+
+ SOF 05/2005: 'A' (old contents of *pP) have been observed
+ to contain values other than zero (the 'wx' object file
+ that came with wxhaskell-0.9.4; dunno how it was compiled..).
+ So, add displacement to old value instead of asserting
+ A to be zero. Fixes wxhaskell-related crashes, and no other
+ ill effects have been observed.
+
+ Update: the reason why we're seeing these more elaborate
+ relocations is due to a switch in how the NCG compiles SRTs
+ and offsets to them from info tables. SRTs live in .(ro)data,
+ while info tables live in .text, causing GAS to emit REL32/DISP32
+ relocations with non-zero values. Adding the displacement is
+ the right thing to do.
+ */
+ *pP = S - ((UInt32)pP) - 4 + A;
+ break;
+ default:
+ debugBelch("%s: unhandled PEi386 relocation type %d",
+ oc->fileName, reltab_j->Type);
+ return 0;
+ }
+
+ }
+ }
+
+ IF_DEBUG(linker, debugBelch("completed %s", oc->fileName));
+ return 1;
+}
+
+#endif /* defined(OBJFORMAT_PEi386) */
+
+
+/* --------------------------------------------------------------------------
+ * ELF specifics
+ * ------------------------------------------------------------------------*/
+
+#if defined(OBJFORMAT_ELF)
+
+#define FALSE 0
+#define TRUE 1
+
+#if defined(sparc_HOST_ARCH)
+# define ELF_TARGET_SPARC /* Used inside <elf.h> */
+#elif defined(i386_HOST_ARCH)
+# define ELF_TARGET_386 /* Used inside <elf.h> */
+#elif defined(x86_64_HOST_ARCH)
+# define ELF_TARGET_X64_64
+# define ELF_64BIT
+#elif defined (ia64_HOST_ARCH)
+# define ELF_TARGET_IA64 /* Used inside <elf.h> */
+# define ELF_64BIT
+# define ELF_FUNCTION_DESC /* calling convention uses function descriptors */
+# define ELF_NEED_GOT /* needs Global Offset Table */
+# define ELF_NEED_PLT /* needs Procedure Linkage Tables */
+#endif
+
+#if !defined(openbsd_HOST_OS)
+#include <elf.h>
+#else
+/* openbsd elf has things in different places, with diff names */
+#include <elf_abi.h>
+#include <machine/reloc.h>
+#define R_386_32 RELOC_32
+#define R_386_PC32 RELOC_PC32
+#endif
+
+/*
+ * Define a set of types which can be used for both ELF32 and ELF64
+ */
+
+#ifdef ELF_64BIT
+#define ELFCLASS ELFCLASS64
+#define Elf_Addr Elf64_Addr
+#define Elf_Word Elf64_Word
+#define Elf_Sword Elf64_Sword
+#define Elf_Ehdr Elf64_Ehdr
+#define Elf_Phdr Elf64_Phdr
+#define Elf_Shdr Elf64_Shdr
+#define Elf_Sym Elf64_Sym
+#define Elf_Rel Elf64_Rel
+#define Elf_Rela Elf64_Rela
+#define ELF_ST_TYPE ELF64_ST_TYPE
+#define ELF_ST_BIND ELF64_ST_BIND
+#define ELF_R_TYPE ELF64_R_TYPE
+#define ELF_R_SYM ELF64_R_SYM
+#else
+#define ELFCLASS ELFCLASS32
+#define Elf_Addr Elf32_Addr
+#define Elf_Word Elf32_Word
+#define Elf_Sword Elf32_Sword
+#define Elf_Ehdr Elf32_Ehdr
+#define Elf_Phdr Elf32_Phdr
+#define Elf_Shdr Elf32_Shdr
+#define Elf_Sym Elf32_Sym
+#define Elf_Rel Elf32_Rel
+#define Elf_Rela Elf32_Rela
+#ifndef ELF_ST_TYPE
+#define ELF_ST_TYPE ELF32_ST_TYPE
+#endif
+#ifndef ELF_ST_BIND
+#define ELF_ST_BIND ELF32_ST_BIND
+#endif
+#ifndef ELF_R_TYPE
+#define ELF_R_TYPE ELF32_R_TYPE
+#endif
+#ifndef ELF_R_SYM
+#define ELF_R_SYM ELF32_R_SYM
+#endif
+#endif
+
+
+/*
+ * Functions to allocate entries in dynamic sections. Currently we simply
+ * preallocate a large number, and we don't check if a entry for the given
+ * target already exists (a linear search is too slow). Ideally these
+ * entries would be associated with symbols.
+ */
+
+/* These sizes sufficient to load HSbase + HShaskell98 + a few modules */
+#define GOT_SIZE 0x20000
+#define FUNCTION_TABLE_SIZE 0x10000
+#define PLT_SIZE 0x08000
+
+#ifdef ELF_NEED_GOT
+static Elf_Addr got[GOT_SIZE];
+static unsigned int gotIndex;
+static Elf_Addr gp_val = (Elf_Addr)got;
+
+static Elf_Addr
+allocateGOTEntry(Elf_Addr target)
+{
+ Elf_Addr *entry;
+
+ if (gotIndex >= GOT_SIZE)
+ barf("Global offset table overflow");
+
+ entry = &got[gotIndex++];
+ *entry = target;
+ return (Elf_Addr)entry;
+}
+#endif
+
+#ifdef ELF_FUNCTION_DESC
+typedef struct {
+ Elf_Addr ip;
+ Elf_Addr gp;
+} FunctionDesc;
+
+static FunctionDesc functionTable[FUNCTION_TABLE_SIZE];
+static unsigned int functionTableIndex;
+
+static Elf_Addr
+allocateFunctionDesc(Elf_Addr target)
+{
+ FunctionDesc *entry;
+
+ if (functionTableIndex >= FUNCTION_TABLE_SIZE)
+ barf("Function table overflow");
+
+ entry = &functionTable[functionTableIndex++];
+ entry->ip = target;
+ entry->gp = (Elf_Addr)gp_val;
+ return (Elf_Addr)entry;
+}
+
+static Elf_Addr
+copyFunctionDesc(Elf_Addr target)
+{
+ FunctionDesc *olddesc = (FunctionDesc *)target;
+ FunctionDesc *newdesc;
+
+ newdesc = (FunctionDesc *)allocateFunctionDesc(olddesc->ip);
+ newdesc->gp = olddesc->gp;
+ return (Elf_Addr)newdesc;
+}
+#endif
+
+#ifdef ELF_NEED_PLT
+#ifdef ia64_HOST_ARCH
+static void ia64_reloc_gprel22(Elf_Addr target, Elf_Addr value);
+static void ia64_reloc_pcrel21(Elf_Addr target, Elf_Addr value, ObjectCode *oc);
+
+static unsigned char plt_code[] =
+{
+ /* taken from binutils bfd/elfxx-ia64.c */
+ 0x0b, 0x78, 0x00, 0x02, 0x00, 0x24, /* [MMI] addl r15=0,r1;; */
+ 0x00, 0x41, 0x3c, 0x30, 0x28, 0xc0, /* ld8 r16=[r15],8 */
+ 0x01, 0x08, 0x00, 0x84, /* mov r14=r1;; */
+ 0x11, 0x08, 0x00, 0x1e, 0x18, 0x10, /* [MIB] ld8 r1=[r15] */
+ 0x60, 0x80, 0x04, 0x80, 0x03, 0x00, /* mov b6=r16 */
+ 0x60, 0x00, 0x80, 0x00 /* br.few b6;; */
+};
+
+/* If we can't get to the function descriptor via gp, take a local copy of it */
+#define PLT_RELOC(code, target) { \
+ Elf64_Sxword rel_value = target - gp_val; \
+ if ((rel_value > 0x1fffff) || (rel_value < -0x1fffff)) \
+ ia64_reloc_gprel22((Elf_Addr)code, copyFunctionDesc(target)); \
+ else \
+ ia64_reloc_gprel22((Elf_Addr)code, target); \
+ }
+#endif
+
+typedef struct {
+ unsigned char code[sizeof(plt_code)];
+} PLTEntry;
+
+static Elf_Addr
+allocatePLTEntry(Elf_Addr target, ObjectCode *oc)
+{
+ PLTEntry *plt = (PLTEntry *)oc->plt;
+ PLTEntry *entry;
+
+ if (oc->pltIndex >= PLT_SIZE)
+ barf("Procedure table overflow");
+
+ entry = &plt[oc->pltIndex++];
+ memcpy(entry->code, plt_code, sizeof(entry->code));
+ PLT_RELOC(entry->code, target);
+ return (Elf_Addr)entry;
+}
+
+static unsigned int
+PLTSize(void)
+{
+ return (PLT_SIZE * sizeof(PLTEntry));
+}
+#endif
+
+
+#if x86_64_HOST_ARCH
+// On x86_64, 32-bit relocations are often used, which requires that
+// we can resolve a symbol to a 32-bit offset. However, shared
+// libraries are placed outside the 2Gb area, which leaves us with a
+// problem when we need to give a 32-bit offset to a symbol in a
+// shared library.
+//
+// For a function symbol, we can allocate a bounce sequence inside the
+// 2Gb area and resolve the symbol to this. The bounce sequence is
+// simply a long jump instruction to the real location of the symbol.
+//
+// For data references, we're screwed.
+//
+typedef struct {
+ unsigned char jmp[8]; /* 6 byte instruction: jmpq *0x00000002(%rip) */
+ void *addr;
+} x86_64_bounce;
+
+#define X86_64_BB_SIZE 1024
+
+static x86_64_bounce *x86_64_bounce_buffer = NULL;
+static nat x86_64_bb_next_off;
+
+static void*
+x86_64_high_symbol( char *lbl, void *addr )
+{
+ x86_64_bounce *bounce;
+
+ if ( x86_64_bounce_buffer == NULL ||
+ x86_64_bb_next_off >= X86_64_BB_SIZE ) {
+ x86_64_bounce_buffer =
+ mmap(NULL, X86_64_BB_SIZE * sizeof(x86_64_bounce),
+ PROT_EXEC|PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_32BIT|MAP_ANONYMOUS, -1, 0);
+ if (x86_64_bounce_buffer == MAP_FAILED) {
+ barf("x86_64_high_symbol: mmap failed");
+ }
+ x86_64_bb_next_off = 0;
+ }
+ bounce = &x86_64_bounce_buffer[x86_64_bb_next_off];
+ bounce->jmp[0] = 0xff;
+ bounce->jmp[1] = 0x25;
+ bounce->jmp[2] = 0x02;
+ bounce->jmp[3] = 0x00;
+ bounce->jmp[4] = 0x00;
+ bounce->jmp[5] = 0x00;
+ bounce->addr = addr;
+ x86_64_bb_next_off++;
+
+ IF_DEBUG(linker, debugBelch("x86_64: allocated bounce entry for %s->%p at %p\n",
+ lbl, addr, bounce));
+
+ insertStrHashTable(symhash, lbl, bounce);
+ return bounce;
+}
+#endif
+
+
+/*
+ * Generic ELF functions
+ */
+
+static char *
+findElfSection ( void* objImage, Elf_Word sh_type )
+{
+ char* ehdrC = (char*)objImage;
+ Elf_Ehdr* ehdr = (Elf_Ehdr*)ehdrC;
+ Elf_Shdr* shdr = (Elf_Shdr*)(ehdrC + ehdr->e_shoff);
+ char* sh_strtab = ehdrC + shdr[ehdr->e_shstrndx].sh_offset;
+ char* ptr = NULL;
+ int i;
+
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ if (shdr[i].sh_type == sh_type
+ /* Ignore the section header's string table. */
+ && i != ehdr->e_shstrndx
+ /* Ignore string tables named .stabstr, as they contain
+ debugging info. */
+ && 0 != memcmp(".stabstr", sh_strtab + shdr[i].sh_name, 8)
+ ) {
+ ptr = ehdrC + shdr[i].sh_offset;
+ break;
+ }
+ }
+ return ptr;
+}
+
+#if defined(ia64_HOST_ARCH)
+static Elf_Addr
+findElfSegment ( void* objImage, Elf_Addr vaddr )
+{
+ char* ehdrC = (char*)objImage;
+ Elf_Ehdr* ehdr = (Elf_Ehdr*)ehdrC;
+ Elf_Phdr* phdr = (Elf_Phdr*)(ehdrC + ehdr->e_phoff);
+ Elf_Addr segaddr = 0;
+ int i;
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ segaddr = phdr[i].p_vaddr;
+ if ((vaddr >= segaddr) && (vaddr < segaddr + phdr[i].p_memsz))
+ break;
+ }
+ return segaddr;
+}
+#endif
+
+static int
+ocVerifyImage_ELF ( ObjectCode* oc )
+{
+ Elf_Shdr* shdr;
+ Elf_Sym* stab;
+ int i, j, nent, nstrtab, nsymtabs;
+ char* sh_strtab;
+ char* strtab;
+
+ char* ehdrC = (char*)(oc->image);
+ Elf_Ehdr* ehdr = (Elf_Ehdr*)ehdrC;
+
+ if (ehdr->e_ident[EI_MAG0] != ELFMAG0 ||
+ ehdr->e_ident[EI_MAG1] != ELFMAG1 ||
+ ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
+ ehdr->e_ident[EI_MAG3] != ELFMAG3) {
+ errorBelch("%s: not an ELF object", oc->fileName);
+ return 0;
+ }
+
+ if (ehdr->e_ident[EI_CLASS] != ELFCLASS) {
+ errorBelch("%s: unsupported ELF format", oc->fileName);
+ return 0;
+ }
+
+ if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) {
+ IF_DEBUG(linker,debugBelch( "Is little-endian\n" ));
+ } else
+ if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) {
+ IF_DEBUG(linker,debugBelch( "Is big-endian\n" ));
+ } else {
+ errorBelch("%s: unknown endiannness", oc->fileName);
+ return 0;
+ }
+
+ if (ehdr->e_type != ET_REL) {
+ errorBelch("%s: not a relocatable object (.o) file", oc->fileName);
+ return 0;
+ }
+ IF_DEBUG(linker, debugBelch( "Is a relocatable object (.o) file\n" ));
+
+ IF_DEBUG(linker,debugBelch( "Architecture is " ));
+ switch (ehdr->e_machine) {
+ case EM_386: IF_DEBUG(linker,debugBelch( "x86" )); break;
+ case EM_SPARC: IF_DEBUG(linker,debugBelch( "sparc" )); break;
+#ifdef EM_IA_64
+ case EM_IA_64: IF_DEBUG(linker,debugBelch( "ia64" )); break;
+#endif
+ case EM_PPC: IF_DEBUG(linker,debugBelch( "powerpc32" )); break;
+#ifdef EM_X86_64
+ case EM_X86_64: IF_DEBUG(linker,debugBelch( "x86_64" )); break;
+#endif
+ default: IF_DEBUG(linker,debugBelch( "unknown" ));
+ errorBelch("%s: unknown architecture", oc->fileName);
+ return 0;
+ }
+
+ IF_DEBUG(linker,debugBelch(
+ "\nSection header table: start %ld, n_entries %d, ent_size %d\n",
+ (long)ehdr->e_shoff, ehdr->e_shnum, ehdr->e_shentsize ));
+
+ ASSERT (ehdr->e_shentsize == sizeof(Elf_Shdr));
+
+ shdr = (Elf_Shdr*) (ehdrC + ehdr->e_shoff);
+
+ if (ehdr->e_shstrndx == SHN_UNDEF) {
+ errorBelch("%s: no section header string table", oc->fileName);
+ return 0;
+ } else {
+ IF_DEBUG(linker,debugBelch( "Section header string table is section %d\n",
+ ehdr->e_shstrndx));
+ sh_strtab = ehdrC + shdr[ehdr->e_shstrndx].sh_offset;
+ }
+
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ IF_DEBUG(linker,debugBelch("%2d: ", i ));
+ IF_DEBUG(linker,debugBelch("type=%2d ", (int)shdr[i].sh_type ));
+ IF_DEBUG(linker,debugBelch("size=%4d ", (int)shdr[i].sh_size ));
+ IF_DEBUG(linker,debugBelch("offs=%4d ", (int)shdr[i].sh_offset ));
+ IF_DEBUG(linker,debugBelch(" (%p .. %p) ",
+ ehdrC + shdr[i].sh_offset,
+ ehdrC + shdr[i].sh_offset + shdr[i].sh_size - 1));
+
+ if (shdr[i].sh_type == SHT_REL) {
+ IF_DEBUG(linker,debugBelch("Rel " ));
+ } else if (shdr[i].sh_type == SHT_RELA) {
+ IF_DEBUG(linker,debugBelch("RelA " ));
+ } else {
+ IF_DEBUG(linker,debugBelch(" "));
+ }
+ if (sh_strtab) {
+ IF_DEBUG(linker,debugBelch("sname=%s\n", sh_strtab + shdr[i].sh_name ));
+ }
+ }
+
+ IF_DEBUG(linker,debugBelch( "\nString tables" ));
+ strtab = NULL;
+ nstrtab = 0;
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ if (shdr[i].sh_type == SHT_STRTAB
+ /* Ignore the section header's string table. */
+ && i != ehdr->e_shstrndx
+ /* Ignore string tables named .stabstr, as they contain
+ debugging info. */
+ && 0 != memcmp(".stabstr", sh_strtab + shdr[i].sh_name, 8)
+ ) {
+ IF_DEBUG(linker,debugBelch(" section %d is a normal string table", i ));
+ strtab = ehdrC + shdr[i].sh_offset;
+ nstrtab++;
+ }
+ }
+ if (nstrtab != 1) {
+ errorBelch("%s: no string tables, or too many", oc->fileName);
+ return 0;
+ }
+
+ nsymtabs = 0;
+ IF_DEBUG(linker,debugBelch( "\nSymbol tables" ));
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ if (shdr[i].sh_type != SHT_SYMTAB) continue;
+ IF_DEBUG(linker,debugBelch( "section %d is a symbol table\n", i ));
+ nsymtabs++;
+ stab = (Elf_Sym*) (ehdrC + shdr[i].sh_offset);
+ nent = shdr[i].sh_size / sizeof(Elf_Sym);
+ IF_DEBUG(linker,debugBelch( " number of entries is apparently %d (%ld rem)\n",
+ nent,
+ (long)shdr[i].sh_size % sizeof(Elf_Sym)
+ ));
+ if (0 != shdr[i].sh_size % sizeof(Elf_Sym)) {
+ errorBelch("%s: non-integral number of symbol table entries", oc->fileName);
+ return 0;
+ }
+ for (j = 0; j < nent; j++) {
+ IF_DEBUG(linker,debugBelch(" %2d ", j ));
+ IF_DEBUG(linker,debugBelch(" sec=%-5d size=%-3d val=%5p ",
+ (int)stab[j].st_shndx,
+ (int)stab[j].st_size,
+ (char*)stab[j].st_value ));
+
+ IF_DEBUG(linker,debugBelch("type=" ));
+ switch (ELF_ST_TYPE(stab[j].st_info)) {
+ case STT_NOTYPE: IF_DEBUG(linker,debugBelch("notype " )); break;
+ case STT_OBJECT: IF_DEBUG(linker,debugBelch("object " )); break;
+ case STT_FUNC : IF_DEBUG(linker,debugBelch("func " )); break;
+ case STT_SECTION: IF_DEBUG(linker,debugBelch("section" )); break;
+ case STT_FILE: IF_DEBUG(linker,debugBelch("file " )); break;
+ default: IF_DEBUG(linker,debugBelch("? " )); break;
+ }
+ IF_DEBUG(linker,debugBelch(" " ));
+
+ IF_DEBUG(linker,debugBelch("bind=" ));
+ switch (ELF_ST_BIND(stab[j].st_info)) {
+ case STB_LOCAL : IF_DEBUG(linker,debugBelch("local " )); break;
+ case STB_GLOBAL: IF_DEBUG(linker,debugBelch("global" )); break;
+ case STB_WEAK : IF_DEBUG(linker,debugBelch("weak " )); break;
+ default: IF_DEBUG(linker,debugBelch("? " )); break;
+ }
+ IF_DEBUG(linker,debugBelch(" " ));
+
+ IF_DEBUG(linker,debugBelch("name=%s\n", strtab + stab[j].st_name ));
+ }
+ }
+
+ if (nsymtabs == 0) {
+ errorBelch("%s: didn't find any symbol tables", oc->fileName);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int getSectionKind_ELF( Elf_Shdr *hdr, int *is_bss )
+{
+ *is_bss = FALSE;
+
+ if (hdr->sh_type == SHT_PROGBITS
+ && (hdr->sh_flags & SHF_ALLOC) && (hdr->sh_flags & SHF_EXECINSTR)) {
+ /* .text-style section */
+ return SECTIONKIND_CODE_OR_RODATA;
+ }
+
+ if (hdr->sh_type == SHT_PROGBITS
+ && (hdr->sh_flags & SHF_ALLOC) && (hdr->sh_flags & SHF_WRITE)) {
+ /* .data-style section */
+ return SECTIONKIND_RWDATA;
+ }
+
+ if (hdr->sh_type == SHT_PROGBITS
+ && (hdr->sh_flags & SHF_ALLOC) && !(hdr->sh_flags & SHF_WRITE)) {
+ /* .rodata-style section */
+ return SECTIONKIND_CODE_OR_RODATA;
+ }
+
+ if (hdr->sh_type == SHT_NOBITS
+ && (hdr->sh_flags & SHF_ALLOC) && (hdr->sh_flags & SHF_WRITE)) {
+ /* .bss-style section */
+ *is_bss = TRUE;
+ return SECTIONKIND_RWDATA;
+ }
+
+ return SECTIONKIND_OTHER;
+}
+
+
+static int
+ocGetNames_ELF ( ObjectCode* oc )
+{
+ int i, j, k, nent;
+ Elf_Sym* stab;
+
+ char* ehdrC = (char*)(oc->image);
+ Elf_Ehdr* ehdr = (Elf_Ehdr*)ehdrC;
+ char* strtab = findElfSection ( ehdrC, SHT_STRTAB );
+ Elf_Shdr* shdr = (Elf_Shdr*) (ehdrC + ehdr->e_shoff);
+
+ ASSERT(symhash != NULL);
+
+ if (!strtab) {
+ errorBelch("%s: no strtab", oc->fileName);
+ return 0;
+ }
+
+ k = 0;
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ /* Figure out what kind of section it is. Logic derived from
+ Figure 1.14 ("Special Sections") of the ELF document
+ ("Portable Formats Specification, Version 1.1"). */
+ int is_bss = FALSE;
+ SectionKind kind = getSectionKind_ELF(&shdr[i], &is_bss);
+
+ if (is_bss && shdr[i].sh_size > 0) {
+ /* This is a non-empty .bss section. Allocate zeroed space for
+ it, and set its .sh_offset field such that
+ ehdrC + .sh_offset == addr_of_zeroed_space. */
+ char* zspace = stgCallocBytes(1, shdr[i].sh_size,
+ "ocGetNames_ELF(BSS)");
+ shdr[i].sh_offset = ((char*)zspace) - ((char*)ehdrC);
+ /*
+ debugBelch("BSS section at 0x%x, size %d\n",
+ zspace, shdr[i].sh_size);
+ */
+ }
+
+ /* fill in the section info */
+ if (kind != SECTIONKIND_OTHER && shdr[i].sh_size > 0) {
+ addProddableBlock(oc, ehdrC + shdr[i].sh_offset, shdr[i].sh_size);
+ addSection(oc, kind, ehdrC + shdr[i].sh_offset,
+ ehdrC + shdr[i].sh_offset + shdr[i].sh_size - 1);
+ }
+
+ if (shdr[i].sh_type != SHT_SYMTAB) continue;
+
+ /* copy stuff into this module's object symbol table */
+ stab = (Elf_Sym*) (ehdrC + shdr[i].sh_offset);
+ nent = shdr[i].sh_size / sizeof(Elf_Sym);
+
+ oc->n_symbols = nent;
+ oc->symbols = stgMallocBytes(oc->n_symbols * sizeof(char*),
+ "ocGetNames_ELF(oc->symbols)");
+
+ for (j = 0; j < nent; j++) {
+
+ char isLocal = FALSE; /* avoids uninit-var warning */
+ char* ad = NULL;
+ char* nm = strtab + stab[j].st_name;
+ int secno = stab[j].st_shndx;
+
+ /* Figure out if we want to add it; if so, set ad to its
+ address. Otherwise leave ad == NULL. */
+
+ if (secno == SHN_COMMON) {
+ isLocal = FALSE;
+ ad = stgCallocBytes(1, stab[j].st_size, "ocGetNames_ELF(COMMON)");
+ /*
+ debugBelch("COMMON symbol, size %d name %s\n",
+ stab[j].st_size, nm);
+ */
+ /* Pointless to do addProddableBlock() for this area,
+ since the linker should never poke around in it. */
+ }
+ else
+ if ( ( ELF_ST_BIND(stab[j].st_info)==STB_GLOBAL
+ || ELF_ST_BIND(stab[j].st_info)==STB_LOCAL
+ )
+ /* and not an undefined symbol */
+ && stab[j].st_shndx != SHN_UNDEF
+ /* and not in a "special section" */
+ && stab[j].st_shndx < SHN_LORESERVE
+ &&
+ /* and it's a not a section or string table or anything silly */
+ ( ELF_ST_TYPE(stab[j].st_info)==STT_FUNC ||
+ ELF_ST_TYPE(stab[j].st_info)==STT_OBJECT ||
+ ELF_ST_TYPE(stab[j].st_info)==STT_NOTYPE
+ )
+ ) {
+ /* Section 0 is the undefined section, hence > and not >=. */
+ ASSERT(secno > 0 && secno < ehdr->e_shnum);
+ /*
+ if (shdr[secno].sh_type == SHT_NOBITS) {
+ debugBelch(" BSS symbol, size %d off %d name %s\n",
+ stab[j].st_size, stab[j].st_value, nm);
+ }
+ */
+ ad = ehdrC + shdr[ secno ].sh_offset + stab[j].st_value;
+ if (ELF_ST_BIND(stab[j].st_info)==STB_LOCAL) {
+ isLocal = TRUE;
+ } else {
+#ifdef ELF_FUNCTION_DESC
+ /* dlsym() and the initialisation table both give us function
+ * descriptors, so to be consistent we store function descriptors
+ * in the symbol table */
+ if (ELF_ST_TYPE(stab[j].st_info) == STT_FUNC)
+ ad = (char *)allocateFunctionDesc((Elf_Addr)ad);
+#endif
+ IF_DEBUG(linker,debugBelch( "addOTabName(GLOB): %10p %s %s",
+ ad, oc->fileName, nm ));
+ isLocal = FALSE;
+ }
+ }
+
+ /* And the decision is ... */
+
+ if (ad != NULL) {
+ ASSERT(nm != NULL);
+ oc->symbols[j] = nm;
+ /* Acquire! */
+ if (isLocal) {
+ /* Ignore entirely. */
+ } else {
+ ghciInsertStrHashTable(oc->fileName, symhash, nm, ad);
+ }
+ } else {
+ /* Skip. */
+ IF_DEBUG(linker,debugBelch( "skipping `%s'\n",
+ strtab + stab[j].st_name ));
+ /*
+ debugBelch(
+ "skipping bind = %d, type = %d, shndx = %d `%s'\n",
+ (int)ELF_ST_BIND(stab[j].st_info),
+ (int)ELF_ST_TYPE(stab[j].st_info),
+ (int)stab[j].st_shndx,
+ strtab + stab[j].st_name
+ );
+ */
+ oc->symbols[j] = NULL;
+ }
+
+ }
+ }
+
+ return 1;
+}
+
+/* Do ELF relocations which lack an explicit addend. All x86-linux
+ relocations appear to be of this form. */
+static int
+do_Elf_Rel_relocations ( ObjectCode* oc, char* ehdrC,
+ Elf_Shdr* shdr, int shnum,
+ Elf_Sym* stab, char* strtab )
+{
+ int j;
+ char *symbol;
+ Elf_Word* targ;
+ Elf_Rel* rtab = (Elf_Rel*) (ehdrC + shdr[shnum].sh_offset);
+ int nent = shdr[shnum].sh_size / sizeof(Elf_Rel);
+ int target_shndx = shdr[shnum].sh_info;
+ int symtab_shndx = shdr[shnum].sh_link;
+
+ stab = (Elf_Sym*) (ehdrC + shdr[ symtab_shndx ].sh_offset);
+ targ = (Elf_Word*)(ehdrC + shdr[ target_shndx ].sh_offset);
+ IF_DEBUG(linker,debugBelch( "relocations for section %d using symtab %d\n",
+ target_shndx, symtab_shndx ));
+
+ /* Skip sections that we're not interested in. */
+ {
+ int is_bss;
+ SectionKind kind = getSectionKind_ELF(&shdr[target_shndx], &is_bss);
+ if (kind == SECTIONKIND_OTHER) {
+ IF_DEBUG(linker,debugBelch( "skipping (target section not loaded)"));
+ return 1;
+ }
+ }
+
+ for (j = 0; j < nent; j++) {
+ Elf_Addr offset = rtab[j].r_offset;
+ Elf_Addr info = rtab[j].r_info;
+
+ Elf_Addr P = ((Elf_Addr)targ) + offset;
+ Elf_Word* pP = (Elf_Word*)P;
+ Elf_Addr A = *pP;
+ Elf_Addr S;
+ void* S_tmp;
+ Elf_Addr value;
+
+ IF_DEBUG(linker,debugBelch( "Rel entry %3d is raw(%6p %6p)",
+ j, (void*)offset, (void*)info ));
+ if (!info) {
+ IF_DEBUG(linker,debugBelch( " ZERO" ));
+ S = 0;
+ } else {
+ Elf_Sym sym = stab[ELF_R_SYM(info)];
+ /* First see if it is a local symbol. */
+ if (ELF_ST_BIND(sym.st_info) == STB_LOCAL) {
+ /* Yes, so we can get the address directly from the ELF symbol
+ table. */
+ symbol = sym.st_name==0 ? "(noname)" : strtab+sym.st_name;
+ S = (Elf_Addr)
+ (ehdrC + shdr[ sym.st_shndx ].sh_offset
+ + stab[ELF_R_SYM(info)].st_value);
+
+ } else {
+ /* No, so look up the name in our global table. */
+ symbol = strtab + sym.st_name;
+ S_tmp = lookupSymbol( symbol );
+ S = (Elf_Addr)S_tmp;
+ }
+ if (!S) {
+ errorBelch("%s: unknown symbol `%s'", oc->fileName, symbol);
+ return 0;
+ }
+ IF_DEBUG(linker,debugBelch( "`%s' resolves to %p\n", symbol, (void*)S ));
+ }
+
+ IF_DEBUG(linker,debugBelch( "Reloc: P = %p S = %p A = %p\n",
+ (void*)P, (void*)S, (void*)A ));
+ checkProddableBlock ( oc, pP );
+
+ value = S + A;
+
+ switch (ELF_R_TYPE(info)) {
+# ifdef i386_HOST_ARCH
+ case R_386_32: *pP = value; break;
+ case R_386_PC32: *pP = value - P; break;
+# endif
+ default:
+ errorBelch("%s: unhandled ELF relocation(Rel) type %lu\n",
+ oc->fileName, (lnat)ELF_R_TYPE(info));
+ return 0;
+ }
+
+ }
+ return 1;
+}
+
+/* Do ELF relocations for which explicit addends are supplied.
+ sparc-solaris relocations appear to be of this form. */
+static int
+do_Elf_Rela_relocations ( ObjectCode* oc, char* ehdrC,
+ Elf_Shdr* shdr, int shnum,
+ Elf_Sym* stab, char* strtab )
+{
+ int j;
+ char *symbol = NULL;
+ Elf_Addr targ;
+ Elf_Rela* rtab = (Elf_Rela*) (ehdrC + shdr[shnum].sh_offset);
+ int nent = shdr[shnum].sh_size / sizeof(Elf_Rela);
+ int target_shndx = shdr[shnum].sh_info;
+ int symtab_shndx = shdr[shnum].sh_link;
+
+ stab = (Elf_Sym*) (ehdrC + shdr[ symtab_shndx ].sh_offset);
+ targ = (Elf_Addr) (ehdrC + shdr[ target_shndx ].sh_offset);
+ IF_DEBUG(linker,debugBelch( "relocations for section %d using symtab %d\n",
+ target_shndx, symtab_shndx ));
+
+ for (j = 0; j < nent; j++) {
+#if defined(DEBUG) || defined(sparc_HOST_ARCH) || defined(ia64_HOST_ARCH) || defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH)
+ /* This #ifdef only serves to avoid unused-var warnings. */
+ Elf_Addr offset = rtab[j].r_offset;
+ Elf_Addr P = targ + offset;
+#endif
+ Elf_Addr info = rtab[j].r_info;
+ Elf_Addr A = rtab[j].r_addend;
+ Elf_Addr S;
+ void* S_tmp;
+ Elf_Addr value;
+# if defined(sparc_HOST_ARCH)
+ Elf_Word* pP = (Elf_Word*)P;
+ Elf_Word w1, w2;
+# elif defined(ia64_HOST_ARCH)
+ Elf64_Xword *pP = (Elf64_Xword *)P;
+ Elf_Addr addr;
+# elif defined(powerpc_HOST_ARCH)
+ Elf_Sword delta;
+# endif
+
+ IF_DEBUG(linker,debugBelch( "Rel entry %3d is raw(%6p %6p %6p) ",
+ j, (void*)offset, (void*)info,
+ (void*)A ));
+ if (!info) {
+ IF_DEBUG(linker,debugBelch( " ZERO" ));
+ S = 0;
+ } else {
+ Elf_Sym sym = stab[ELF_R_SYM(info)];
+ /* First see if it is a local symbol. */
+ if (ELF_ST_BIND(sym.st_info) == STB_LOCAL) {
+ /* Yes, so we can get the address directly from the ELF symbol
+ table. */
+ symbol = sym.st_name==0 ? "(noname)" : strtab+sym.st_name;
+ S = (Elf_Addr)
+ (ehdrC + shdr[ sym.st_shndx ].sh_offset
+ + stab[ELF_R_SYM(info)].st_value);
+#ifdef ELF_FUNCTION_DESC
+ /* Make a function descriptor for this function */
+ if (S && ELF_ST_TYPE(sym.st_info) == STT_FUNC) {
+ S = allocateFunctionDesc(S + A);
+ A = 0;
+ }
+#endif
+ } else {
+ /* No, so look up the name in our global table. */
+ symbol = strtab + sym.st_name;
+ S_tmp = lookupSymbol( symbol );
+ S = (Elf_Addr)S_tmp;
+
+#ifdef ELF_FUNCTION_DESC
+ /* If a function, already a function descriptor - we would
+ have to copy it to add an offset. */
+ if (S && (ELF_ST_TYPE(sym.st_info) == STT_FUNC) && (A != 0))
+ errorBelch("%s: function %s with addend %p", oc->fileName, symbol, (void *)A);
+#endif
+ }
+ if (!S) {
+ errorBelch("%s: unknown symbol `%s'", oc->fileName, symbol);
+ return 0;
+ }
+ IF_DEBUG(linker,debugBelch( "`%s' resolves to %p", symbol, (void*)S ));
+ }
+
+ IF_DEBUG(linker,debugBelch("Reloc: P = %p S = %p A = %p\n",
+ (void*)P, (void*)S, (void*)A ));
+ /* checkProddableBlock ( oc, (void*)P ); */
+
+ value = S + A;
+
+ switch (ELF_R_TYPE(info)) {
+# if defined(sparc_HOST_ARCH)
+ case R_SPARC_WDISP30:
+ w1 = *pP & 0xC0000000;
+ w2 = (Elf_Word)((value - P) >> 2);
+ ASSERT((w2 & 0xC0000000) == 0);
+ w1 |= w2;
+ *pP = w1;
+ break;
+ case R_SPARC_HI22:
+ w1 = *pP & 0xFFC00000;
+ w2 = (Elf_Word)(value >> 10);
+ ASSERT((w2 & 0xFFC00000) == 0);
+ w1 |= w2;
+ *pP = w1;
+ break;
+ case R_SPARC_LO10:
+ w1 = *pP & ~0x3FF;
+ w2 = (Elf_Word)(value & 0x3FF);
+ ASSERT((w2 & ~0x3FF) == 0);
+ w1 |= w2;
+ *pP = w1;
+ break;
+ /* According to the Sun documentation:
+ R_SPARC_UA32
+ This relocation type resembles R_SPARC_32, except it refers to an
+ unaligned word. That is, the word to be relocated must be treated
+ as four separate bytes with arbitrary alignment, not as a word
+ aligned according to the architecture requirements.
+
+ (JRS: which means that freeloading on the R_SPARC_32 case
+ is probably wrong, but hey ...)
+ */
+ case R_SPARC_UA32:
+ case R_SPARC_32:
+ w2 = (Elf_Word)value;
+ *pP = w2;
+ break;
+# elif defined(ia64_HOST_ARCH)
+ case R_IA64_DIR64LSB:
+ case R_IA64_FPTR64LSB:
+ *pP = value;
+ break;
+ case R_IA64_PCREL64LSB:
+ *pP = value - P;
+ break;
+ case R_IA64_SEGREL64LSB:
+ addr = findElfSegment(ehdrC, value);
+ *pP = value - addr;
+ break;
+ case R_IA64_GPREL22:
+ ia64_reloc_gprel22(P, value);
+ break;
+ case R_IA64_LTOFF22:
+ case R_IA64_LTOFF22X:
+ case R_IA64_LTOFF_FPTR22:
+ addr = allocateGOTEntry(value);
+ ia64_reloc_gprel22(P, addr);
+ break;
+ case R_IA64_PCREL21B:
+ ia64_reloc_pcrel21(P, S, oc);
+ break;
+ case R_IA64_LDXMOV:
+ /* This goes with R_IA64_LTOFF22X and points to the load to
+ * convert into a move. We don't implement relaxation. */
+ break;
+# elif defined(powerpc_HOST_ARCH)
+ case R_PPC_ADDR16_LO:
+ *(Elf32_Half*) P = value;
+ break;
+
+ case R_PPC_ADDR16_HI:
+ *(Elf32_Half*) P = value >> 16;
+ break;
+
+ case R_PPC_ADDR16_HA:
+ *(Elf32_Half*) P = (value + 0x8000) >> 16;
+ break;
+
+ case R_PPC_ADDR32:
+ *(Elf32_Word *) P = value;
+ break;
+
+ case R_PPC_REL32:
+ *(Elf32_Word *) P = value - P;
+ break;
+
+ case R_PPC_REL24:
+ delta = value - P;
+
+ if( delta << 6 >> 6 != delta )
+ {
+ value = makeJumpIsland( oc, ELF_R_SYM(info), value );
+ delta = value - P;
+
+ if( value == 0 || delta << 6 >> 6 != delta )
+ {
+ barf( "Unable to make ppcJumpIsland for #%d",
+ ELF_R_SYM(info) );
+ return 0;
+ }
+ }
+
+ *(Elf_Word *) P = (*(Elf_Word *) P & 0xfc000003)
+ | (delta & 0x3fffffc);
+ break;
+# endif
+
+#if x86_64_HOST_ARCH
+ case R_X86_64_64:
+ *(Elf64_Xword *)P = value;
+ break;
+
+ case R_X86_64_PC32:
+ {
+ StgInt64 off = value - P;
+ if (off >= 0x7fffffffL || off < -0x80000000L) {
+ barf("R_X86_64_PC32 relocation out of range: %s = %p",
+ symbol, off);
+ }
+ *(Elf64_Word *)P = (Elf64_Word)off;
+ break;
+ }
+
+ case R_X86_64_32:
+ if (value >= 0x7fffffffL) {
+ barf("R_X86_64_32 relocation out of range: %s = %p\n",
+ symbol, value);
+ }
+ *(Elf64_Word *)P = (Elf64_Word)value;
+ break;
+
+ case R_X86_64_32S:
+ if ((StgInt64)value > 0x7fffffffL || (StgInt64)value < -0x80000000L) {
+ barf("R_X86_64_32S relocation out of range: %s = %p\n",
+ symbol, value);
+ }
+ *(Elf64_Sword *)P = (Elf64_Sword)value;
+ break;
+#endif
+
+ default:
+ errorBelch("%s: unhandled ELF relocation(RelA) type %lu\n",
+ oc->fileName, (lnat)ELF_R_TYPE(info));
+ return 0;
+ }
+
+ }
+ return 1;
+}
+
+static int
+ocResolve_ELF ( ObjectCode* oc )
+{
+ char *strtab;
+ int shnum, ok;
+ Elf_Sym* stab = NULL;
+ char* ehdrC = (char*)(oc->image);
+ Elf_Ehdr* ehdr = (Elf_Ehdr*) ehdrC;
+ Elf_Shdr* shdr = (Elf_Shdr*) (ehdrC + ehdr->e_shoff);
+
+ /* first find "the" symbol table */
+ stab = (Elf_Sym*) findElfSection ( ehdrC, SHT_SYMTAB );
+
+ /* also go find the string table */
+ strtab = findElfSection ( ehdrC, SHT_STRTAB );
+
+ if (stab == NULL || strtab == NULL) {
+ errorBelch("%s: can't find string or symbol table", oc->fileName);
+ return 0;
+ }
+
+ /* Process the relocation sections. */
+ for (shnum = 0; shnum < ehdr->e_shnum; shnum++) {
+ if (shdr[shnum].sh_type == SHT_REL) {
+ ok = do_Elf_Rel_relocations ( oc, ehdrC, shdr,
+ shnum, stab, strtab );
+ if (!ok) return ok;
+ }
+ else
+ if (shdr[shnum].sh_type == SHT_RELA) {
+ ok = do_Elf_Rela_relocations ( oc, ehdrC, shdr,
+ shnum, stab, strtab );
+ if (!ok) return ok;
+ }
+ }
+
+ /* Free the local symbol table; we won't need it again. */
+ freeHashTable(oc->lochash, NULL);
+ oc->lochash = NULL;
+
+#if defined(powerpc_HOST_ARCH)
+ ocFlushInstructionCache( oc );
+#endif
+
+ return 1;
+}
+
+/*
+ * IA64 specifics
+ * Instructions are 41 bits long, packed into 128 bit bundles with a 5-bit template
+ * at the front. The following utility functions pack and unpack instructions, and
+ * take care of the most common relocations.
+ */
+
+#ifdef ia64_HOST_ARCH
+
+static Elf64_Xword
+ia64_extract_instruction(Elf64_Xword *target)
+{
+ Elf64_Xword w1, w2;
+ int slot = (Elf_Addr)target & 3;
+ target = (Elf_Addr)target & ~3;
+
+ w1 = *target;
+ w2 = *(target+1);
+
+ switch (slot)
+ {
+ case 0:
+ return ((w1 >> 5) & 0x1ffffffffff);
+ case 1:
+ return (w1 >> 46) | ((w2 & 0x7fffff) << 18);
+ case 2:
+ return (w2 >> 23);
+ default:
+ barf("ia64_extract_instruction: invalid slot %p", target);
+ }
+}
+
+static void
+ia64_deposit_instruction(Elf64_Xword *target, Elf64_Xword value)
+{
+ int slot = (Elf_Addr)target & 3;
+ target = (Elf_Addr)target & ~3;
+
+ switch (slot)
+ {
+ case 0:
+ *target |= value << 5;
+ break;
+ case 1:
+ *target |= value << 46;
+ *(target+1) |= value >> 18;
+ break;
+ case 2:
+ *(target+1) |= value << 23;
+ break;
+ }
+}
+
+static void
+ia64_reloc_gprel22(Elf_Addr target, Elf_Addr value)
+{
+ Elf64_Xword instruction;
+ Elf64_Sxword rel_value;
+
+ rel_value = value - gp_val;
+ if ((rel_value > 0x1fffff) || (rel_value < -0x1fffff))
+ barf("GP-relative data out of range (address = 0x%lx, gp = 0x%lx)", value, gp_val);
+
+ instruction = ia64_extract_instruction((Elf64_Xword *)target);
+ instruction |= (((rel_value >> 0) & 0x07f) << 13) /* imm7b */
+ | (((rel_value >> 7) & 0x1ff) << 27) /* imm9d */
+ | (((rel_value >> 16) & 0x01f) << 22) /* imm5c */
+ | ((Elf64_Xword)(rel_value < 0) << 36); /* s */
+ ia64_deposit_instruction((Elf64_Xword *)target, instruction);
+}
+
+static void
+ia64_reloc_pcrel21(Elf_Addr target, Elf_Addr value, ObjectCode *oc)
+{
+ Elf64_Xword instruction;
+ Elf64_Sxword rel_value;
+ Elf_Addr entry;
+
+ entry = allocatePLTEntry(value, oc);
+
+ rel_value = (entry >> 4) - (target >> 4);
+ if ((rel_value > 0xfffff) || (rel_value < -0xfffff))
+ barf("PLT entry too far away (entry = 0x%lx, target = 0x%lx)", entry, target);
+
+ instruction = ia64_extract_instruction((Elf64_Xword *)target);
+ instruction |= ((rel_value & 0xfffff) << 13) /* imm20b */
+ | ((Elf64_Xword)(rel_value < 0) << 36); /* s */
+ ia64_deposit_instruction((Elf64_Xword *)target, instruction);
+}
+
+#endif /* ia64 */
+
+/*
+ * PowerPC ELF specifics
+ */
+
+#ifdef powerpc_HOST_ARCH
+
+static int ocAllocateJumpIslands_ELF( ObjectCode *oc )
+{
+ Elf_Ehdr *ehdr;
+ Elf_Shdr* shdr;
+ int i;
+
+ ehdr = (Elf_Ehdr *) oc->image;
+ shdr = (Elf_Shdr *) ( ((char *)oc->image) + ehdr->e_shoff );
+
+ for( i = 0; i < ehdr->e_shnum; i++ )
+ if( shdr[i].sh_type == SHT_SYMTAB )
+ break;
+
+ if( i == ehdr->e_shnum )
+ {
+ errorBelch( "This ELF file contains no symtab" );
+ return 0;
+ }
+
+ if( shdr[i].sh_entsize != sizeof( Elf_Sym ) )
+ {
+ errorBelch( "The entry size (%d) of the symtab isn't %d\n",
+ shdr[i].sh_entsize, sizeof( Elf_Sym ) );
+
+ return 0;
+ }
+
+ return ocAllocateJumpIslands( oc, shdr[i].sh_size / sizeof( Elf_Sym ), 0 );
+}
+
+#endif /* powerpc */
+
+#endif /* ELF */
+
+/* --------------------------------------------------------------------------
+ * Mach-O specifics
+ * ------------------------------------------------------------------------*/
+
+#if defined(OBJFORMAT_MACHO)
+
+/*
+ Support for MachO linking on Darwin/MacOS X
+ by Wolfgang Thaller (wolfgang.thaller@gmx.net)
+
+ I hereby formally apologize for the hackish nature of this code.
+ Things that need to be done:
+ *) implement ocVerifyImage_MachO
+ *) add still more sanity checks.
+*/
+
+#ifdef powerpc_HOST_ARCH
+static int ocAllocateJumpIslands_MachO(ObjectCode* oc)
+{
+ struct mach_header *header = (struct mach_header *) oc->image;
+ struct load_command *lc = (struct load_command *) (header + 1);
+ unsigned i;
+
+ for( i = 0; i < header->ncmds; i++ )
+ {
+ if( lc->cmd == LC_SYMTAB )
+ {
+ // Find out the first and last undefined external
+ // symbol, so we don't have to allocate too many
+ // jump islands.
+ struct symtab_command *symLC = (struct symtab_command *) lc;
+ unsigned min = symLC->nsyms, max = 0;
+ struct nlist *nlist =
+ symLC ? (struct nlist*) ((char*) oc->image + symLC->symoff)
+ : NULL;
+ for(i=0;i<symLC->nsyms;i++)
+ {
+ if(nlist[i].n_type & N_STAB)
+ ;
+ else if(nlist[i].n_type & N_EXT)
+ {
+ if((nlist[i].n_type & N_TYPE) == N_UNDF
+ && (nlist[i].n_value == 0))
+ {
+ if(i < min)
+ min = i;
+ if(i > max)
+ max = i;
+ }
+ }
+ }
+ if(max >= min)
+ return ocAllocateJumpIslands(oc, max - min + 1, min);
+
+ break;
+ }
+
+ lc = (struct load_command *) ( ((char *)lc) + lc->cmdsize );
+ }
+ return ocAllocateJumpIslands(oc,0,0);
+}
+#endif
+
+static int ocVerifyImage_MachO(ObjectCode* oc STG_UNUSED)
+{
+ // FIXME: do some verifying here
+ return 1;
+}
+
+static int resolveImports(
+ ObjectCode* oc,
+ char *image,
+ struct symtab_command *symLC,
+ struct section *sect, // ptr to lazy or non-lazy symbol pointer section
+ unsigned long *indirectSyms,
+ struct nlist *nlist)
+{
+ unsigned i;
+ size_t itemSize = 4;
+
+#if i386_HOST_ARCH
+ int isJumpTable = 0;
+ if(!strcmp(sect->sectname,"__jump_table"))
+ {
+ isJumpTable = 1;
+ itemSize = 5;
+ ASSERT(sect->reserved2 == itemSize);
+ }
+#endif
+
+ for(i=0; i*itemSize < sect->size;i++)
+ {
+ // according to otool, reserved1 contains the first index into the indirect symbol table
+ struct nlist *symbol = &nlist[indirectSyms[sect->reserved1+i]];
+ char *nm = image + symLC->stroff + symbol->n_un.n_strx;
+ void *addr = NULL;
+
+ if((symbol->n_type & N_TYPE) == N_UNDF
+ && (symbol->n_type & N_EXT) && (symbol->n_value != 0))
+ addr = (void*) (symbol->n_value);
+ else if((addr = lookupLocalSymbol(oc,nm)) != NULL)
+ ;
+ else
+ addr = lookupSymbol(nm);
+ if(!addr)
+ {
+ errorBelch("\n%s: unknown symbol `%s'", oc->fileName, nm);
+ return 0;
+ }
+ ASSERT(addr);
+
+#if i386_HOST_ARCH
+ if(isJumpTable)
+ {
+ checkProddableBlock(oc,image + sect->offset + i*itemSize);
+ *(image + sect->offset + i*itemSize) = 0xe9; // jmp
+ *(unsigned*)(image + sect->offset + i*itemSize + 1)
+ = (char*)addr - (image + sect->offset + i*itemSize + 5);
+ }
+ else
+#endif
+ {
+ checkProddableBlock(oc,((void**)(image + sect->offset)) + i);
+ ((void**)(image + sect->offset))[i] = addr;
+ }
+ }
+
+ return 1;
+}
+
+static unsigned long relocateAddress(
+ ObjectCode* oc,
+ int nSections,
+ struct section* sections,
+ unsigned long address)
+{
+ int i;
+ for(i = 0; i < nSections; i++)
+ {
+ if(sections[i].addr <= address
+ && address < sections[i].addr + sections[i].size)
+ {
+ return (unsigned long)oc->image
+ + sections[i].offset + address - sections[i].addr;
+ }
+ }
+ barf("Invalid Mach-O file:"
+ "Address out of bounds while relocating object file");
+ return 0;
+}
+
+static int relocateSection(
+ ObjectCode* oc,
+ char *image,
+ struct symtab_command *symLC, struct nlist *nlist,
+ int nSections, struct section* sections, struct section *sect)
+{
+ struct relocation_info *relocs;
+ int i,n;
+
+ if(!strcmp(sect->sectname,"__la_symbol_ptr"))
+ return 1;
+ else if(!strcmp(sect->sectname,"__nl_symbol_ptr"))
+ return 1;
+ else if(!strcmp(sect->sectname,"__la_sym_ptr2"))
+ return 1;
+ else if(!strcmp(sect->sectname,"__la_sym_ptr3"))
+ return 1;
+
+ n = sect->nreloc;
+ relocs = (struct relocation_info*) (image + sect->reloff);
+
+ for(i=0;i<n;i++)
+ {
+ if(relocs[i].r_address & R_SCATTERED)
+ {
+ struct scattered_relocation_info *scat =
+ (struct scattered_relocation_info*) &relocs[i];
+
+ if(!scat->r_pcrel)
+ {
+ if(scat->r_length == 2)
+ {
+ unsigned long word = 0;
+ unsigned long* wordPtr = (unsigned long*) (image + sect->offset + scat->r_address);
+ checkProddableBlock(oc,wordPtr);
+
+ // Note on relocation types:
+ // i386 uses the GENERIC_RELOC_* types,
+ // while ppc uses special PPC_RELOC_* types.
+ // *_RELOC_VANILLA and *_RELOC_PAIR have the same value
+ // in both cases, all others are different.
+ // Therefore, we use GENERIC_RELOC_VANILLA
+ // and GENERIC_RELOC_PAIR instead of the PPC variants,
+ // and use #ifdefs for the other types.
+
+ // Step 1: Figure out what the relocated value should be
+ if(scat->r_type == GENERIC_RELOC_VANILLA)
+ {
+ word = *wordPtr + (unsigned long) relocateAddress(
+ oc,
+ nSections,
+ sections,
+ scat->r_value)
+ - scat->r_value;
+ }
+#ifdef powerpc_HOST_ARCH
+ else if(scat->r_type == PPC_RELOC_SECTDIFF
+ || scat->r_type == PPC_RELOC_LO16_SECTDIFF
+ || scat->r_type == PPC_RELOC_HI16_SECTDIFF
+ || scat->r_type == PPC_RELOC_HA16_SECTDIFF)
+#else
+ else if(scat->r_type == GENERIC_RELOC_SECTDIFF)
+#endif
+ {
+ struct scattered_relocation_info *pair =
+ (struct scattered_relocation_info*) &relocs[i+1];
+
+ if(!pair->r_scattered || pair->r_type != GENERIC_RELOC_PAIR)
+ barf("Invalid Mach-O file: "
+ "RELOC_*_SECTDIFF not followed by RELOC_PAIR");
+
+ word = (unsigned long)
+ (relocateAddress(oc, nSections, sections, scat->r_value)
+ - relocateAddress(oc, nSections, sections, pair->r_value));
+ i++;
+ }
+#ifdef powerpc_HOST_ARCH
+ else if(scat->r_type == PPC_RELOC_HI16
+ || scat->r_type == PPC_RELOC_LO16
+ || scat->r_type == PPC_RELOC_HA16
+ || scat->r_type == PPC_RELOC_LO14)
+ { // these are generated by label+offset things
+ struct relocation_info *pair = &relocs[i+1];
+ if((pair->r_address & R_SCATTERED) || pair->r_type != PPC_RELOC_PAIR)
+ barf("Invalid Mach-O file: "
+ "PPC_RELOC_* not followed by PPC_RELOC_PAIR");
+
+ if(scat->r_type == PPC_RELOC_LO16)
+ {
+ word = ((unsigned short*) wordPtr)[1];
+ word |= ((unsigned long) relocs[i+1].r_address & 0xFFFF) << 16;
+ }
+ else if(scat->r_type == PPC_RELOC_LO14)
+ {
+ barf("Unsupported Relocation: PPC_RELOC_LO14");
+ word = ((unsigned short*) wordPtr)[1] & 0xFFFC;
+ word |= ((unsigned long) relocs[i+1].r_address & 0xFFFF) << 16;
+ }
+ else if(scat->r_type == PPC_RELOC_HI16)
+ {
+ word = ((unsigned short*) wordPtr)[1] << 16;
+ word |= ((unsigned long) relocs[i+1].r_address & 0xFFFF);
+ }
+ else if(scat->r_type == PPC_RELOC_HA16)
+ {
+ word = ((unsigned short*) wordPtr)[1] << 16;
+ word += ((short)relocs[i+1].r_address & (short)0xFFFF);
+ }
+
+
+ word += (unsigned long) relocateAddress(oc, nSections, sections, scat->r_value)
+ - scat->r_value;
+
+ i++;
+ }
+ #endif
+ else
+ continue; // ignore the others
+
+#ifdef powerpc_HOST_ARCH
+ if(scat->r_type == GENERIC_RELOC_VANILLA
+ || scat->r_type == PPC_RELOC_SECTDIFF)
+#else
+ if(scat->r_type == GENERIC_RELOC_VANILLA
+ || scat->r_type == GENERIC_RELOC_SECTDIFF)
+#endif
+ {
+ *wordPtr = word;
+ }
+#ifdef powerpc_HOST_ARCH
+ else if(scat->r_type == PPC_RELOC_LO16_SECTDIFF || scat->r_type == PPC_RELOC_LO16)
+ {
+ ((unsigned short*) wordPtr)[1] = word & 0xFFFF;
+ }
+ else if(scat->r_type == PPC_RELOC_HI16_SECTDIFF || scat->r_type == PPC_RELOC_HI16)
+ {
+ ((unsigned short*) wordPtr)[1] = (word >> 16) & 0xFFFF;
+ }
+ else if(scat->r_type == PPC_RELOC_HA16_SECTDIFF || scat->r_type == PPC_RELOC_HA16)
+ {
+ ((unsigned short*) wordPtr)[1] = ((word >> 16) & 0xFFFF)
+ + ((word & (1<<15)) ? 1 : 0);
+ }
+#endif
+ }
+ }
+
+ continue; // FIXME: I hope it's OK to ignore all the others.
+ }
+ else
+ {
+ struct relocation_info *reloc = &relocs[i];
+ if(reloc->r_pcrel && !reloc->r_extern)
+ continue;
+
+ if(reloc->r_length == 2)
+ {
+ unsigned long word = 0;
+#ifdef powerpc_HOST_ARCH
+ unsigned long jumpIsland = 0;
+ long offsetToJumpIsland = 0xBADBAD42; // initialise to bad value
+ // to avoid warning and to catch
+ // bugs.
+#endif
+
+ unsigned long* wordPtr = (unsigned long*) (image + sect->offset + reloc->r_address);
+ checkProddableBlock(oc,wordPtr);
+
+ if(reloc->r_type == GENERIC_RELOC_VANILLA)
+ {
+ word = *wordPtr;
+ }
+#ifdef powerpc_HOST_ARCH
+ else if(reloc->r_type == PPC_RELOC_LO16)
+ {
+ word = ((unsigned short*) wordPtr)[1];
+ word |= ((unsigned long) relocs[i+1].r_address & 0xFFFF) << 16;
+ }
+ else if(reloc->r_type == PPC_RELOC_HI16)
+ {
+ word = ((unsigned short*) wordPtr)[1] << 16;
+ word |= ((unsigned long) relocs[i+1].r_address & 0xFFFF);
+ }
+ else if(reloc->r_type == PPC_RELOC_HA16)
+ {
+ word = ((unsigned short*) wordPtr)[1] << 16;
+ word += ((short)relocs[i+1].r_address & (short)0xFFFF);
+ }
+ else if(reloc->r_type == PPC_RELOC_BR24)
+ {
+ word = *wordPtr;
+ word = (word & 0x03FFFFFC) | ((word & 0x02000000) ? 0xFC000000 : 0);
+ }
+#endif
+
+ if(!reloc->r_extern)
+ {
+ long delta =
+ sections[reloc->r_symbolnum-1].offset
+ - sections[reloc->r_symbolnum-1].addr
+ + ((long) image);
+
+ word += delta;
+ }
+ else
+ {
+ struct nlist *symbol = &nlist[reloc->r_symbolnum];
+ char *nm = image + symLC->stroff + symbol->n_un.n_strx;
+ void *symbolAddress = lookupSymbol(nm);
+ if(!symbolAddress)
+ {
+ errorBelch("\nunknown symbol `%s'", nm);
+ return 0;
+ }
+
+ if(reloc->r_pcrel)
+ {
+#ifdef powerpc_HOST_ARCH
+ // In the .o file, this should be a relative jump to NULL
+ // and we'll change it to a relative jump to the symbol
+ ASSERT(-word == reloc->r_address);
+ jumpIsland = makeJumpIsland(oc,reloc->r_symbolnum,(unsigned long) symbolAddress);
+ if(jumpIsland != 0)
+ {
+ offsetToJumpIsland = word + jumpIsland
+ - (((long)image) + sect->offset - sect->addr);
+ }
+#endif
+ word += (unsigned long) symbolAddress
+ - (((long)image) + sect->offset - sect->addr);
+ }
+ else
+ {
+ word += (unsigned long) symbolAddress;
+ }
+ }
+
+ if(reloc->r_type == GENERIC_RELOC_VANILLA)
+ {
+ *wordPtr = word;
+ continue;
+ }
+#ifdef powerpc_HOST_ARCH
+ else if(reloc->r_type == PPC_RELOC_LO16)
+ {
+ ((unsigned short*) wordPtr)[1] = word & 0xFFFF;
+ i++; continue;
+ }
+ else if(reloc->r_type == PPC_RELOC_HI16)
+ {
+ ((unsigned short*) wordPtr)[1] = (word >> 16) & 0xFFFF;
+ i++; continue;
+ }
+ else if(reloc->r_type == PPC_RELOC_HA16)
+ {
+ ((unsigned short*) wordPtr)[1] = ((word >> 16) & 0xFFFF)
+ + ((word & (1<<15)) ? 1 : 0);
+ i++; continue;
+ }
+ else if(reloc->r_type == PPC_RELOC_BR24)
+ {
+ if((long)word > (long)0x01FFFFFF || (long)word < (long)0xFFE00000)
+ {
+ // The branch offset is too large.
+ // Therefore, we try to use a jump island.
+ if(jumpIsland == 0)
+ {
+ barf("unconditional relative branch out of range: "
+ "no jump island available");
+ }
+
+ word = offsetToJumpIsland;
+ if((long)word > (long)0x01FFFFFF || (long)word < (long)0xFFE00000)
+ barf("unconditional relative branch out of range: "
+ "jump island out of range");
+ }
+ *wordPtr = (*wordPtr & 0xFC000003) | (word & 0x03FFFFFC);
+ continue;
+ }
+#endif
+ }
+ barf("\nunknown relocation %d",reloc->r_type);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int ocGetNames_MachO(ObjectCode* oc)
+{
+ char *image = (char*) oc->image;
+ struct mach_header *header = (struct mach_header*) image;
+ struct load_command *lc = (struct load_command*) (image + sizeof(struct mach_header));
+ unsigned i,curSymbol = 0;
+ struct segment_command *segLC = NULL;
+ struct section *sections;
+ struct symtab_command *symLC = NULL;
+ struct nlist *nlist;
+ unsigned long commonSize = 0;
+ char *commonStorage = NULL;
+ unsigned long commonCounter;
+
+ for(i=0;i<header->ncmds;i++)
+ {
+ if(lc->cmd == LC_SEGMENT)
+ segLC = (struct segment_command*) lc;
+ else if(lc->cmd == LC_SYMTAB)
+ symLC = (struct symtab_command*) lc;
+ lc = (struct load_command *) ( ((char*)lc) + lc->cmdsize );
+ }
+
+ sections = (struct section*) (segLC+1);
+ nlist = symLC ? (struct nlist*) (image + symLC->symoff)
+ : NULL;
+
+ for(i=0;i<segLC->nsects;i++)
+ {
+ if(sections[i].size == 0)
+ continue;
+
+ if((sections[i].flags & SECTION_TYPE) == S_ZEROFILL)
+ {
+ char * zeroFillArea = stgCallocBytes(1,sections[i].size,
+ "ocGetNames_MachO(common symbols)");
+ sections[i].offset = zeroFillArea - image;
+ }
+
+ if(!strcmp(sections[i].sectname,"__text"))
+ addSection(oc, SECTIONKIND_CODE_OR_RODATA,
+ (void*) (image + sections[i].offset),
+ (void*) (image + sections[i].offset + sections[i].size));
+ else if(!strcmp(sections[i].sectname,"__const"))
+ addSection(oc, SECTIONKIND_RWDATA,
+ (void*) (image + sections[i].offset),
+ (void*) (image + sections[i].offset + sections[i].size));
+ else if(!strcmp(sections[i].sectname,"__data"))
+ addSection(oc, SECTIONKIND_RWDATA,
+ (void*) (image + sections[i].offset),
+ (void*) (image + sections[i].offset + sections[i].size));
+ else if(!strcmp(sections[i].sectname,"__bss")
+ || !strcmp(sections[i].sectname,"__common"))
+ addSection(oc, SECTIONKIND_RWDATA,
+ (void*) (image + sections[i].offset),
+ (void*) (image + sections[i].offset + sections[i].size));
+
+ addProddableBlock(oc, (void*) (image + sections[i].offset),
+ sections[i].size);
+ }
+
+ // count external symbols defined here
+ oc->n_symbols = 0;
+ if(symLC)
+ {
+ for(i=0;i<symLC->nsyms;i++)
+ {
+ if(nlist[i].n_type & N_STAB)
+ ;
+ else if(nlist[i].n_type & N_EXT)
+ {
+ if((nlist[i].n_type & N_TYPE) == N_UNDF
+ && (nlist[i].n_value != 0))
+ {
+ commonSize += nlist[i].n_value;
+ oc->n_symbols++;
+ }
+ else if((nlist[i].n_type & N_TYPE) == N_SECT)
+ oc->n_symbols++;
+ }
+ }
+ }
+ oc->symbols = stgMallocBytes(oc->n_symbols * sizeof(char*),
+ "ocGetNames_MachO(oc->symbols)");
+
+ if(symLC)
+ {
+ for(i=0;i<symLC->nsyms;i++)
+ {
+ if(nlist[i].n_type & N_STAB)
+ ;
+ else if((nlist[i].n_type & N_TYPE) == N_SECT)
+ {
+ if(nlist[i].n_type & N_EXT)
+ {
+ char *nm = image + symLC->stroff + nlist[i].n_un.n_strx;
+ ghciInsertStrHashTable(oc->fileName, symhash, nm,
+ image
+ + sections[nlist[i].n_sect-1].offset
+ - sections[nlist[i].n_sect-1].addr
+ + nlist[i].n_value);
+ oc->symbols[curSymbol++] = nm;
+ }
+ else
+ {
+ char *nm = image + symLC->stroff + nlist[i].n_un.n_strx;
+ ghciInsertStrHashTable(oc->fileName, oc->lochash, nm,
+ image
+ + sections[nlist[i].n_sect-1].offset
+ - sections[nlist[i].n_sect-1].addr
+ + nlist[i].n_value);
+ }
+ }
+ }
+ }
+
+ commonStorage = stgCallocBytes(1,commonSize,"ocGetNames_MachO(common symbols)");
+ commonCounter = (unsigned long)commonStorage;
+ if(symLC)
+ {
+ for(i=0;i<symLC->nsyms;i++)
+ {
+ if((nlist[i].n_type & N_TYPE) == N_UNDF
+ && (nlist[i].n_type & N_EXT) && (nlist[i].n_value != 0))
+ {
+ char *nm = image + symLC->stroff + nlist[i].n_un.n_strx;
+ unsigned long sz = nlist[i].n_value;
+
+ nlist[i].n_value = commonCounter;
+
+ ghciInsertStrHashTable(oc->fileName, symhash, nm,
+ (void*)commonCounter);
+ oc->symbols[curSymbol++] = nm;
+
+ commonCounter += sz;
+ }
+ }
+ }
+ return 1;
+}
+
+static int ocResolve_MachO(ObjectCode* oc)
+{
+ char *image = (char*) oc->image;
+ struct mach_header *header = (struct mach_header*) image;
+ struct load_command *lc = (struct load_command*) (image + sizeof(struct mach_header));
+ unsigned i;
+ struct segment_command *segLC = NULL;
+ struct section *sections;
+ struct symtab_command *symLC = NULL;
+ struct dysymtab_command *dsymLC = NULL;
+ struct nlist *nlist;
+
+ for(i=0;i<header->ncmds;i++)
+ {
+ if(lc->cmd == LC_SEGMENT)
+ segLC = (struct segment_command*) lc;
+ else if(lc->cmd == LC_SYMTAB)
+ symLC = (struct symtab_command*) lc;
+ else if(lc->cmd == LC_DYSYMTAB)
+ dsymLC = (struct dysymtab_command*) lc;
+ lc = (struct load_command *) ( ((char*)lc) + lc->cmdsize );
+ }
+
+ sections = (struct section*) (segLC+1);
+ nlist = symLC ? (struct nlist*) (image + symLC->symoff)
+ : NULL;
+
+ if(dsymLC)
+ {
+ unsigned long *indirectSyms
+ = (unsigned long*) (image + dsymLC->indirectsymoff);
+
+ for(i=0;i<segLC->nsects;i++)
+ {
+ if( !strcmp(sections[i].sectname,"__la_symbol_ptr")
+ || !strcmp(sections[i].sectname,"__la_sym_ptr2")
+ || !strcmp(sections[i].sectname,"__la_sym_ptr3"))
+ {
+ if(!resolveImports(oc,image,symLC,&sections[i],indirectSyms,nlist))
+ return 0;
+ }
+ else if(!strcmp(sections[i].sectname,"__nl_symbol_ptr")
+ || !strcmp(sections[i].sectname,"__pointers"))
+ {
+ if(!resolveImports(oc,image,symLC,&sections[i],indirectSyms,nlist))
+ return 0;
+ }
+ else if(!strcmp(sections[i].sectname,"__jump_table"))
+ {
+ if(!resolveImports(oc,image,symLC,&sections[i],indirectSyms,nlist))
+ return 0;
+ }
+ }
+ }
+
+ for(i=0;i<segLC->nsects;i++)
+ {
+ if(!relocateSection(oc,image,symLC,nlist,segLC->nsects,sections,&sections[i]))
+ return 0;
+ }
+
+ /* Free the local symbol table; we won't need it again. */
+ freeHashTable(oc->lochash, NULL);
+ oc->lochash = NULL;
+
+#if defined (powerpc_HOST_ARCH)
+ ocFlushInstructionCache( oc );
+#endif
+
+ return 1;
+}
+
+#ifdef powerpc_HOST_ARCH
+/*
+ * The Mach-O object format uses leading underscores. But not everywhere.
+ * There is a small number of runtime support functions defined in
+ * libcc_dynamic.a whose name does not have a leading underscore.
+ * As a consequence, we can't get their address from C code.
+ * We have to use inline assembler just to take the address of a function.
+ * Yuck.
+ */
+
+static void machoInitSymbolsWithoutUnderscore()
+{
+ extern void* symbolsWithoutUnderscore[];
+ void **p = symbolsWithoutUnderscore;
+ __asm__ volatile(".globl _symbolsWithoutUnderscore\n.data\n_symbolsWithoutUnderscore:");
+
+#undef Sym
+#define Sym(x) \
+ __asm__ volatile(".long " # x);
+
+ RTS_MACHO_NOUNDERLINE_SYMBOLS
+
+ __asm__ volatile(".text");
+
+#undef Sym
+#define Sym(x) \
+ ghciInsertStrHashTable("(GHCi built-in symbols)", symhash, #x, *p++);
+
+ RTS_MACHO_NOUNDERLINE_SYMBOLS
+
+#undef Sym
+}
+#endif
+
+/*
+ * Figure out by how much to shift the entire Mach-O file in memory
+ * when loading so that its single segment ends up 16-byte-aligned
+ */
+static int machoGetMisalignment( FILE * f )
+{
+ struct mach_header header;
+ int misalignment;
+
+ fread(&header, sizeof(header), 1, f);
+ rewind(f);
+
+ if(header.magic != MH_MAGIC)
+ return 0;
+
+ misalignment = (header.sizeofcmds + sizeof(header))
+ & 0xF;
+
+ return misalignment ? (16 - misalignment) : 0;
+}
+
+#endif
diff --git a/rts/LinkerInternals.h b/rts/LinkerInternals.h
new file mode 100644
index 0000000000..07d6334c7f
--- /dev/null
+++ b/rts/LinkerInternals.h
@@ -0,0 +1,110 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2000
+ *
+ * RTS Object Linker
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef LINKERINTERNALS_H
+#define LINKERINTERNALS_H
+
+typedef enum { OBJECT_LOADED, OBJECT_RESOLVED } OStatus;
+
+/* Indication of section kinds for loaded objects. Needed by
+ the GC for deciding whether or not a pointer on the stack
+ is a code pointer.
+*/
+typedef
+ enum { SECTIONKIND_CODE_OR_RODATA,
+ SECTIONKIND_RWDATA,
+ SECTIONKIND_OTHER,
+ SECTIONKIND_NOINFOAVAIL }
+ SectionKind;
+
+typedef
+ struct _Section {
+ void* start;
+ void* end;
+ SectionKind kind;
+ struct _Section* next;
+ }
+ Section;
+
+typedef
+ struct _ProddableBlock {
+ void* start;
+ int size;
+ struct _ProddableBlock* next;
+ }
+ ProddableBlock;
+
+/* Jump Islands are sniplets of machine code required for relative
+ * address relocations on the PowerPC.
+ */
+#ifdef powerpc_HOST_ARCH
+typedef struct {
+ short lis_r12, hi_addr;
+ short ori_r12_r12, lo_addr;
+ long mtctr_r12;
+ long bctr;
+} ppcJumpIsland;
+#endif
+
+/* Top-level structure for an object module. One of these is allocated
+ * for each object file in use.
+ */
+typedef struct _ObjectCode {
+ OStatus status;
+ char* fileName;
+ int fileSize;
+ char* formatName; /* eg "ELF32", "DLL", "COFF", etc. */
+
+ /* An array containing ptrs to all the symbol names copied from
+ this object into the global symbol hash table. This is so that
+ we know which parts of the latter mapping to nuke when this
+ object is removed from the system. */
+ char** symbols;
+ int n_symbols;
+
+ /* ptr to malloc'd lump of memory holding the obj file */
+ char* image;
+
+#ifdef darwin_HOST_OS
+ /* record by how much image has been deliberately misaligned
+ after allocation, so that we can use realloc */
+ int misalignment;
+#endif
+
+ /* The section-kind entries for this object module. Linked
+ list. */
+ Section* sections;
+
+ /* A private hash table for local symbols. */
+ HashTable* lochash;
+
+ /* Allow a chain of these things */
+ struct _ObjectCode * next;
+
+ /* SANITY CHECK ONLY: a list of the only memory regions which may
+ safely be prodded during relocation. Any attempt to prod
+ outside one of these is an error in the linker. */
+ ProddableBlock* proddables;
+
+#ifdef ia64_HOST_ARCH
+ /* Procedure Linkage Table for this object */
+ void *plt;
+ unsigned int pltIndex;
+#endif
+
+#ifdef powerpc_HOST_ARCH
+ ppcJumpIsland *jump_islands;
+ unsigned long island_start_symbol;
+ unsigned long n_islands;
+#endif
+
+} ObjectCode;
+
+extern ObjectCode *objects;
+
+#endif /* LINKERINTERNALS_H */
diff --git a/rts/MBlock.c b/rts/MBlock.c
new file mode 100644
index 0000000000..fa8fd49d88
--- /dev/null
+++ b/rts/MBlock.c
@@ -0,0 +1,453 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1998-1999
+ *
+ * MegaBlock Allocator Interface. This file contains all the dirty
+ * architecture-dependent hackery required to get a chunk of aligned
+ * memory from the operating system.
+ *
+ * ---------------------------------------------------------------------------*/
+
+/* This is non-posix compliant. */
+/* #include "PosixSource.h" */
+
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "RtsFlags.h"
+#include "MBlock.h"
+#include "BlockAlloc.h"
+
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifndef mingw32_HOST_OS
+# ifdef HAVE_SYS_MMAN_H
+# include <sys/mman.h>
+# endif
+#endif
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#if HAVE_WINDOWS_H
+#include <windows.h>
+#endif
+#if darwin_HOST_OS
+#include <mach/vm_map.h>
+#endif
+
+#include <errno.h>
+
+lnat mblocks_allocated = 0;
+
+/* -----------------------------------------------------------------------------
+ The MBlock Map: provides our implementation of HEAP_ALLOCED()
+ -------------------------------------------------------------------------- */
+
+#if SIZEOF_VOID_P == 4
+StgWord8 mblock_map[MBLOCK_MAP_SIZE]; // initially all zeros
+#elif SIZEOF_VOID_P == 8
+static MBlockMap dummy_mblock_map;
+MBlockMap *mblock_cache = &dummy_mblock_map;
+int mblock_map_count = 0;
+MBlockMap **mblock_maps = NULL;
+
+static MBlockMap *
+findMBlockMap(void *p)
+{
+ int i;
+ StgWord32 hi = (StgWord32) (((StgWord)p) >> 32);
+ for( i = 0; i < mblock_map_count; i++ )
+ {
+ if(mblock_maps[i]->addrHigh32 == hi)
+ {
+ return mblock_maps[i];
+ }
+ }
+ return NULL;
+}
+
+StgBool
+slowIsHeapAlloced(void *p)
+{
+ MBlockMap *map = findMBlockMap(p);
+ if(map)
+ {
+ mblock_cache = map;
+ return map->mblocks[MBLOCK_MAP_ENTRY(p)];
+ }
+ else
+ return 0;
+}
+#endif
+
+static void
+markHeapAlloced(void *p)
+{
+#if SIZEOF_VOID_P == 4
+ mblock_map[MBLOCK_MAP_ENTRY(p)] = 1;
+#elif SIZEOF_VOID_P == 8
+ MBlockMap *map = findMBlockMap(p);
+ if(map == NULL)
+ {
+ mblock_map_count++;
+ mblock_maps = realloc(mblock_maps,
+ sizeof(MBlockMap*) * mblock_map_count);
+ map = mblock_maps[mblock_map_count-1] = calloc(1,sizeof(MBlockMap));
+ map->addrHigh32 = (StgWord32) (((StgWord)p) >> 32);
+ }
+ map->mblocks[MBLOCK_MAP_ENTRY(p)] = 1;
+ mblock_cache = map;
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ Allocate new mblock(s)
+ -------------------------------------------------------------------------- */
+
+void *
+getMBlock(void)
+{
+ return getMBlocks(1);
+}
+
+/* -----------------------------------------------------------------------------
+ The mmap() method
+
+ On Unix-like systems, we use mmap() to allocate our memory. We
+ want memory in chunks of MBLOCK_SIZE, and aligned on an MBLOCK_SIZE
+ boundary. The mmap() interface doesn't give us this level of
+ control, so we have to use some heuristics.
+
+ In the general case, if we want a block of n megablocks, then we
+ allocate n+1 and trim off the slop from either side (using
+ munmap()) to get an aligned chunk of size n. However, the next
+ time we'll try to allocate directly after the previously allocated
+ chunk, on the grounds that this is aligned and likely to be free.
+ If it turns out that we were wrong, we have to munmap() and try
+ again using the general method.
+
+ Note on posix_memalign(): this interface is available on recent
+ systems and appears to provide exactly what we want. However, it
+ turns out not to be as good as our mmap() implementation, because
+ it wastes extra space (using double the address space, in a test on
+ x86_64/Linux). The problem seems to be that posix_memalign()
+ returns memory that can be free()'d, so the library must store
+ extra information along with the allocated block, thus messing up
+ the alignment. Hence, we don't use posix_memalign() for now.
+
+ -------------------------------------------------------------------------- */
+
+#if !defined(mingw32_HOST_OS) && !defined(cygwin32_HOST_OS)
+
+// A wrapper around mmap(), to abstract away from OS differences in
+// the mmap() interface.
+
+static void *
+my_mmap (void *addr, lnat size)
+{
+ void *ret;
+
+#if defined(solaris2_HOST_OS) || defined(irix_HOST_OS)
+ {
+ int fd = open("/dev/zero",O_RDONLY);
+ ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ close(fd);
+ }
+#elif hpux_HOST_OS
+ ret = mmap(addr, size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+#elif darwin_HOST_OS
+ // Without MAP_FIXED, Apple's mmap ignores addr.
+ // With MAP_FIXED, it overwrites already mapped regions, whic
+ // mmap(0, ... MAP_FIXED ...) is worst of all: It unmaps the program text
+ // and replaces it with zeroes, causing instant death.
+ // This behaviour seems to be conformant with IEEE Std 1003.1-2001.
+ // Let's just use the underlying Mach Microkernel calls directly,
+ // they're much nicer.
+
+ kern_return_t err;
+ ret = addr;
+ if(addr) // try to allocate at adress
+ err = vm_allocate(mach_task_self(),(vm_address_t*) &ret, size, FALSE);
+ if(!addr || err) // try to allocate anywhere
+ err = vm_allocate(mach_task_self(),(vm_address_t*) &ret, size, TRUE);
+
+ if(err) {
+ // don't know what the error codes mean exactly, assume it's
+ // not our problem though.
+ errorBelch("memory allocation failed (requested %lu bytes)", size);
+ stg_exit(EXIT_FAILURE);
+ } else {
+ vm_protect(mach_task_self(),ret,size,FALSE,VM_PROT_READ|VM_PROT_WRITE);
+ }
+#else
+ ret = mmap(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+#endif
+
+ if (ret == (void *)-1) {
+ if (errno == ENOMEM ||
+ (errno == EINVAL && sizeof(void*)==4 && size >= 0xc0000000)) {
+ // If we request more than 3Gig, then we get EINVAL
+ // instead of ENOMEM (at least on Linux).
+ errorBelch("out of memory (requested %lu bytes)", size);
+ stg_exit(EXIT_FAILURE);
+ } else {
+ barf("getMBlock: mmap: %s", strerror(errno));
+ }
+ }
+
+ return ret;
+}
+
+// Implements the general case: allocate a chunk of memory of 'size'
+// mblocks.
+
+static void *
+gen_map_mblocks (lnat size)
+{
+ int slop;
+ void *ret;
+
+ // Try to map a larger block, and take the aligned portion from
+ // it (unmap the rest).
+ size += MBLOCK_SIZE;
+ ret = my_mmap(0, size);
+
+ // unmap the slop bits around the chunk we allocated
+ slop = (W_)ret & MBLOCK_MASK;
+
+ if (munmap(ret, MBLOCK_SIZE - slop) == -1) {
+ barf("gen_map_mblocks: munmap failed");
+ }
+ if (slop > 0 && munmap(ret+size-slop, slop) == -1) {
+ barf("gen_map_mblocks: munmap failed");
+ }
+
+ // ToDo: if we happened to get an aligned block, then don't
+ // unmap the excess, just use it. For this to work, you
+ // need to keep in mind the following:
+ // * Calling my_mmap() with an 'addr' arg pointing to
+ // already my_mmap()ed space is OK and won't fail.
+ // * If my_mmap() can't satisfy the request at the
+ // given 'next_request' address in getMBlocks(), that
+ // you unmap the extra mblock mmap()ed here (or simply
+ // satisfy yourself that the slop introduced isn't worth
+ // salvaging.)
+ //
+
+ // next time, try after the block we just got.
+ ret += MBLOCK_SIZE - slop;
+ return ret;
+}
+
+
+// The external interface: allocate 'n' mblocks, and return the
+// address.
+
+void *
+getMBlocks(nat n)
+{
+ static caddr_t next_request = (caddr_t)HEAP_BASE;
+ caddr_t ret;
+ lnat size = MBLOCK_SIZE * n;
+ nat i;
+
+ if (next_request == 0) {
+ // use gen_map_mblocks the first time.
+ ret = gen_map_mblocks(size);
+ } else {
+ ret = my_mmap(next_request, size);
+
+ if (((W_)ret & MBLOCK_MASK) != 0) {
+ // misaligned block!
+#if 0 // defined(DEBUG)
+ errorBelch("warning: getMBlock: misaligned block %p returned when allocating %d megablock(s) at %p", ret, n, next_request);
+#endif
+
+ // unmap this block...
+ if (munmap(ret, size) == -1) {
+ barf("getMBlock: munmap failed");
+ }
+ // and do it the hard way
+ ret = gen_map_mblocks(size);
+ }
+ }
+
+ // Next time, we'll try to allocate right after the block we just got.
+ // ToDo: check that we haven't already grabbed the memory at next_request
+ next_request = ret + size;
+
+ IF_DEBUG(gc,debugBelch("Allocated %d megablock(s) at %p\n",n,ret));
+
+ // fill in the table
+ for (i = 0; i < n; i++) {
+ markHeapAlloced( ret + i * MBLOCK_SIZE );
+ }
+
+ mblocks_allocated += n;
+
+ return ret;
+}
+
+void
+freeAllMBlocks(void)
+{
+ /* XXX Do something here */
+}
+
+#else /* defined(mingw32_HOST_OS) || defined(cygwin32_HOST_OS) */
+
+/*
+ On Win32 platforms we make use of the two-phased virtual memory API
+ to allocate mega blocks. We proceed as follows:
+
+ Reserve a large chunk of VM (256M at the time, or what the user asked
+ for via the -M option), but don't supply a base address that's aligned on
+ a MB boundary. Instead we round up to the nearest mblock from the chunk of
+ VM we're handed back from the OS (at the moment we just leave the 'slop' at
+ the beginning of the reserved chunk unused - ToDo: reuse it .)
+
+ Reserving memory doesn't allocate physical storage (not even in the
+ page file), this is done later on by committing pages (or mega-blocks in
+ our case).
+*/
+
+static char* base_non_committed = (char*)0;
+static char* end_non_committed = (char*)0;
+
+static void *membase;
+
+/* Default is to reserve 256M of VM to minimise the slop cost. */
+#define SIZE_RESERVED_POOL ( 256 * 1024 * 1024 )
+
+/* Number of bytes reserved */
+static unsigned long size_reserved_pool = SIZE_RESERVED_POOL;
+
+void *
+getMBlocks(nat n)
+{
+ static char* base_mblocks = (char*)0;
+ static char* next_request = (char*)0;
+ void* ret = (void*)0;
+ nat i;
+
+ lnat size = MBLOCK_SIZE * n;
+
+ if ( (base_non_committed == 0) || (next_request + size > end_non_committed) ) {
+ if (base_non_committed) {
+ /* Tacky, but if no user-provided -M option is in effect,
+ * set it to the default (==256M) in time for the heap overflow PSA.
+ */
+ if (RtsFlags.GcFlags.maxHeapSize == 0) {
+ RtsFlags.GcFlags.maxHeapSize = size_reserved_pool / BLOCK_SIZE;
+ }
+ heapOverflow();
+ }
+ if (RtsFlags.GcFlags.maxHeapSize != 0) {
+ size_reserved_pool = BLOCK_SIZE * RtsFlags.GcFlags.maxHeapSize;
+ if (size_reserved_pool < MBLOCK_SIZE) {
+ size_reserved_pool = 2*MBLOCK_SIZE;
+ }
+ }
+ base_non_committed = VirtualAlloc ( NULL
+ , size_reserved_pool
+ , MEM_RESERVE
+ , PAGE_READWRITE
+ );
+ membase = base_non_committed;
+ if ( base_non_committed == 0 ) {
+ errorBelch("getMBlocks: VirtualAlloc MEM_RESERVE %lu failed with: %ld\n", size_reserved_pool, GetLastError());
+ ret=(void*)-1;
+ } else {
+ end_non_committed = (char*)base_non_committed + (unsigned long)size_reserved_pool;
+ /* The returned pointer is not aligned on a mega-block boundary. Make it. */
+ base_mblocks = (char*)((unsigned long)base_non_committed & (unsigned long)~MBLOCK_MASK) + MBLOCK_SIZE;
+# if 0
+ debugBelch("getMBlocks: Dropping %d bytes off of 256M chunk\n",
+ (unsigned)base_mblocks - (unsigned)base_non_committed);
+# endif
+
+ if ( ((char*)base_mblocks + size) > end_non_committed ) {
+ debugBelch("getMBlocks: oops, committed too small a region to start with.");
+ ret=(void*)-1;
+ } else {
+ next_request = base_mblocks;
+ }
+ }
+ }
+ /* Commit the mega block(s) to phys mem */
+ if ( ret != (void*)-1 ) {
+ ret = VirtualAlloc(next_request, size, MEM_COMMIT, PAGE_READWRITE);
+ if (ret == NULL) {
+ debugBelch("getMBlocks: VirtualAlloc MEM_COMMIT %lu failed with: %ld\n", size, GetLastError());
+ ret=(void*)-1;
+ }
+ }
+
+ if (((W_)ret & MBLOCK_MASK) != 0) {
+ barf("getMBlocks: misaligned block returned");
+ }
+
+ if (ret == (void*)-1) {
+ barf("getMBlocks: unknown memory allocation failure on Win32.");
+ }
+
+ IF_DEBUG(gc,debugBelch("Allocated %d megablock(s) at 0x%x\n",n,(nat)ret));
+ next_request = (char*)next_request + size;
+
+ mblocks_allocated += n;
+
+ // fill in the table
+ for (i = 0; i < n; i++) {
+ markHeapAlloced( ret + i * MBLOCK_SIZE );
+ }
+
+ return ret;
+}
+
+void
+freeAllMBlocks(void)
+{
+ BOOL rc;
+
+ rc = VirtualFree(membase, 0, MEM_RELEASE);
+
+ if (rc == FALSE) {
+ debugBelch("freeAllMBlocks: VirtualFree failed with: %ld\n", GetLastError());
+ }
+}
+
+/* Hand back the physical memory that is allocated to a mega-block.
+ ToDo: chain the released mega block onto some list so that
+ getMBlocks() can get at it.
+
+ Currently unused.
+*/
+#if 0
+void
+freeMBlock(void* p, nat n)
+{
+ BOOL rc;
+
+ rc = VirtualFree(p, n * MBLOCK_SIZE , MEM_DECOMMIT );
+
+ if (rc == FALSE) {
+# ifdef DEBUG
+ debugBelch("freeMBlocks: VirtualFree failed with: %d\n", GetLastError());
+# endif
+ }
+
+}
+#endif
+
+#endif
diff --git a/rts/MBlock.h b/rts/MBlock.h
new file mode 100644
index 0000000000..1cc0dc5a1f
--- /dev/null
+++ b/rts/MBlock.h
@@ -0,0 +1,90 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * MegaBlock Allocator interface.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef MBLOCK_H
+#define MBLOCK_H
+
+extern lnat RTS_VAR(mblocks_allocated);
+
+extern void * getMBlock(void);
+extern void * getMBlocks(nat n);
+extern void freeAllMBlocks(void);
+
+#if osf3_HOST_OS
+/* ToDo: Perhaps by adjusting this value we can make linking without
+ * -static work (i.e., not generate a core-dumping executable)? */
+#if SIZEOF_VOID_P == 8
+#define HEAP_BASE 0x180000000L
+#else
+#error I have no idea where to begin the heap on a non-64-bit osf3 machine.
+#endif
+
+#else
+
+// we're using the generic method
+#define HEAP_BASE 0
+
+#endif
+
+/* -----------------------------------------------------------------------------
+ The HEAP_ALLOCED() test.
+
+ HEAP_ALLOCED is called FOR EVERY SINGLE CLOSURE during GC.
+ It needs to be FAST.
+
+ Implementation of HEAP_ALLOCED
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Since heap is allocated in chunks of megablocks (MBLOCK_SIZE), we
+ can just use a table to record which megablocks in the address
+ space belong to the heap. On a 32-bit machine, with 1Mb
+ megablocks, using 8 bits for each entry in the table, the table
+ requires 4k. Lookups during GC will be fast, because the table
+ will be quickly cached (indeed, performance measurements showed no
+ measurable difference between doing the table lookup and using a
+ constant comparison).
+
+ On 64-bit machines, we cache one 12-bit block map that describes
+ 4096 megablocks or 4GB of memory. If HEAP_ALLOCED is called for
+ an address that is not in the cache, it calls slowIsHeapAlloced
+ (see MBlock.c) which will find the block map for the 4GB block in
+ question.
+ -------------------------------------------------------------------------- */
+
+#if SIZEOF_VOID_P == 4
+extern StgWord8 mblock_map[];
+
+/* On a 32-bit machine a 4KB table is always sufficient */
+# define MBLOCK_MAP_SIZE 4096
+# define MBLOCK_MAP_ENTRY(p) ((StgWord)(p) >> MBLOCK_SHIFT)
+# define HEAP_ALLOCED(p) mblock_map[MBLOCK_MAP_ENTRY(p)]
+
+#elif SIZEOF_VOID_P == 8
+
+# define MBLOCK_MAP_SIZE 4096
+# define MBLOCK_MAP_ENTRY(p) (((StgWord)(p) & 0xffffffff) >> MBLOCK_SHIFT)
+
+typedef struct {
+ StgWord32 addrHigh32;
+ StgWord8 mblocks[MBLOCK_MAP_SIZE];
+} MBlockMap;
+
+extern MBlockMap *mblock_cache;
+
+StgBool slowIsHeapAlloced(void *p);
+
+# define HEAP_ALLOCED(p) \
+ ( ((((StgWord)(p)) >> 32) == mblock_cache->addrHigh32) \
+ ? mblock_cache->mblocks[MBLOCK_MAP_ENTRY(p)] \
+ : slowIsHeapAlloced(p) )
+
+#else
+# error HEAP_ALLOCED not defined
+#endif
+
+#endif /* MBLOCK_H */
diff --git a/rts/Main.c b/rts/Main.c
new file mode 100644
index 0000000000..6aef280e25
--- /dev/null
+++ b/rts/Main.c
@@ -0,0 +1,138 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1998-2000
+ *
+ * Main function for a standalone Haskell program.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#define COMPILING_RTS_MAIN
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "RtsAPI.h"
+#include "SchedAPI.h"
+#include "Schedule.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "Prelude.h"
+#include "Task.h"
+#include <stdlib.h>
+
+#ifdef DEBUG
+# include "Printer.h" /* for printing */
+#endif
+
+#ifdef PAR
+# include "Parallel.h"
+# include "ParallelRts.h"
+# include "LLC.h"
+#endif
+
+#if defined(GRAN) || defined(PAR)
+# include "GranSimRts.h"
+#endif
+
+#ifdef HAVE_WINDOWS_H
+# include <windows.h>
+#endif
+
+extern void __stginit_ZCMain(void);
+
+/* Hack: we assume that we're building a batch-mode system unless
+ * INTERPRETER is set
+ */
+#ifndef INTERPRETER /* Hack */
+int main(int argc, char *argv[])
+{
+ int exit_status;
+ SchedulerStatus status;
+ /* all GranSim/GUM init is done in startupHaskell; sets IAmMainThread! */
+
+ startupHaskell(argc,argv,__stginit_ZCMain);
+
+ /* kick off the computation by creating the main thread with a pointer
+ to mainIO_closure representing the computation of the overall program;
+ then enter the scheduler with this thread and off we go;
+
+ the same for GranSim (we have only one instance of this code)
+
+ in a parallel setup, where we have many instances of this code
+ running on different PEs, we should do this only for the main PE
+ (IAmMainThread is set in startupHaskell)
+ */
+
+# if defined(PAR)
+
+# if defined(DEBUG)
+ { /* a wait loop to allow attachment of gdb to UNIX threads */
+ nat i, j, s;
+
+ for (i=0, s=0; i<(nat)RtsFlags.ParFlags.wait; i++)
+ for (j=0; j<1000000; j++)
+ s += j % 65536;
+ }
+ IF_PAR_DEBUG(verbose,
+ belch("Passed wait loop"));
+# endif
+
+ if (IAmMainThread == rtsTrue) {
+ IF_PAR_DEBUG(verbose,
+ debugBelch("==== [%x] Main Thread Started ...\n", mytid));
+
+ /* ToDo: Dump event for the main thread */
+ status = rts_mainLazyIO((HaskellObj)mainIO_closure, NULL);
+ } else {
+ /* Just to show we're alive */
+ IF_PAR_DEBUG(verbose,
+ debugBelch("== [%x] Non-Main PE enters scheduler via taskStart() without work ...\n",
+ mytid));
+
+ /* all non-main threads enter the scheduler without work */
+ taskStart();
+ status = Success; // declare victory (see shutdownParallelSystem)
+ }
+
+# elif defined(GRAN)
+
+ /* ToDo: Dump event for the main thread */
+ status = rts_mainLazyIO(mainIO_closure, NULL);
+
+# else /* !PAR && !GRAN */
+
+ /* ToDo: want to start with a larger stack size */
+ {
+ void *cap = rts_lock();
+ cap = rts_evalLazyIO(cap,(HaskellObj)(void *)mainIO_closure, NULL);
+ status = rts_getSchedStatus(cap);
+ rts_unlock(cap);
+ }
+
+# endif /* !PAR && !GRAN */
+
+ /* check the status of the entire Haskell computation */
+ switch (status) {
+ case Killed:
+ errorBelch("main thread exited (uncaught exception)");
+ exit_status = EXIT_KILLED;
+ break;
+ case Interrupted:
+ errorBelch("interrupted");
+ exit_status = EXIT_INTERRUPTED;
+ break;
+ case Success:
+ exit_status = EXIT_SUCCESS;
+ break;
+#if defined(PAR)
+ case NoStatus:
+ errorBelch("main thread PE killed; probably due to failure of another PE; check /tmp/pvml...");
+ exit_status = EXIT_KILLED;
+ break;
+#endif
+ default:
+ barf("main thread completed with invalid status");
+ }
+ shutdownHaskellAndExit(exit_status);
+ return 0; /* never reached, keep gcc -Wall happy */
+}
+# endif /* BATCH_MODE */
diff --git a/rts/Makefile b/rts/Makefile
new file mode 100644
index 0000000000..2319788d65
--- /dev/null
+++ b/rts/Makefile
@@ -0,0 +1,370 @@
+#-----------------------------------------------------------------------------
+#
+# This is the Makefile for the runtime-system stuff.
+# This stuff is written in C (and cannot be written in Haskell).
+#
+# .c files are vanilla C,
+# .hc files are "Haskellized-C", compiled using the C compiler and
+# (possibly) the assembly-mangler. The GHC driver script
+# knows how to compile this stuff.
+#
+# Other sorta independent, compile-once subdirs are:
+# gmp -- GNU multi-precision library (for Integer)
+
+#-----------------------------------------------------------------------------
+# Preamble
+
+TOP=..
+
+# Set UseGhcForCc: this causes the fptools build system to use a different
+# set of suffix rules for compiling C code, using $(HC) rather than $(CC)
+# and prepending "-optc" to $(CC_OPTS). NB. must be done before including
+# boilerplate.mk below.
+UseGhcForCc = YES
+
+include $(TOP)/mk/boilerplate.mk
+
+PACKAGE = rts
+
+HC=$(GHC_INPLACE)
+
+# -----------------------------------------------------------------------------
+# RTS ways
+
+WAYS=$(GhcLibWays) $(GhcRTSWays)
+
+ifneq "$(findstring debug, $(way))" ""
+GhcRtsHcOpts=
+GhcRtsCcOpts=-g
+endif
+
+# -----------------------------------------------------------------------------
+
+# Tells the build system not to add various Haskellish options to $(SRC_HC_OPTS)
+NON_HS_PACKAGE = YES
+
+# grab sources from these subdirectories
+ALL_DIRS = hooks parallel
+
+ifeq "$(HOSTPLATFORM)" "i386-unknown-mingw32"
+ALL_DIRS += win32
+else
+ALL_DIRS += posix
+endif
+
+ifneq "$(DLLized)" "YES"
+EXCLUDED_SRCS += RtsDllMain.c
+else
+EXCLUDED_SRCS += Main.c
+endif
+
+# This file ends up being empty unless we're building for a powerpc
+# or darwin system, and it is reported that Solaris ld chokes on it when
+# building HSrts.o.
+ifeq "$(findstring $(TargetArch_CPP), powerpc powerpc64)" ""
+ifeq "$(findstring $(TargetOS_CPP), darwin)" ""
+EXCLUDED_SRCS += AdjustorAsm.S
+endif
+endif
+
+EXCLUDED_SRCS += parallel/SysMan.c
+
+# The build system doesn't give us these
+CMM_SRCS = $(filter-out AutoApply%.cmm, $(wildcard *.cmm)) $(EXTRA_CMM_SRCS)
+CMM_OBJS = $(patsubst %.cmm,%.$(way_)o, $(CMM_SRCS))
+
+CLEAN_FILES += $(CMM_OBJS)
+
+# Override the default $(LIBOBJS) (defaults to $(HS_OBJS))
+LIBOBJS = $(C_OBJS) $(CMM_OBJS)
+
+SplitObjs=NO
+
+H_FILES = $(wildcard ../includes/*.h) $(wildcard *.h)
+
+#-----------------------------------------------------------------------------
+# Flags for compiling RTS .c and .hc files
+
+# gcc provides lots of useful warnings if you ask it.
+# This is a pretty good list to start with - use a # to comment out
+# any you don't like.
+WARNING_OPTS += -Wall
+WARNING_OPTS += -W
+WARNING_OPTS += -Wstrict-prototypes
+WARNING_OPTS += -Wmissing-prototypes
+WARNING_OPTS += -Wmissing-declarations
+WARNING_OPTS += -Winline
+WARNING_OPTS += -Waggregate-return
+#WARNING_OPTS += -Wpointer-arith
+WARNING_OPTS += -Wbad-function-cast
+#WARNING_OPTS += -Wcast-align
+#WARNING_OPTS += -Wnested-externs
+#WARNING_OPTS += -Wshadow
+#WARNING_OPTS += -Wcast-qual
+#WARNING_OPTS += -Wno-unused
+#WARNING_OPTS += -Wredundant-decls
+#WARNING_OPTS += -Wconversion
+
+STANDARD_OPTS += -I../includes -I. -Iparallel
+# COMPILING_RTS is only used when building Win32 DLL support.
+STANDARD_OPTS += -DCOMPILING_RTS
+
+# HC_OPTS is included in both .c and .cmm compilations, whereas CC_OPTS is
+# only included in .c compilations. HC_OPTS included the WAY_* opts, which
+# must be included in both types of compilations.
+
+SRC_CC_OPTS += $(WARNING_OPTS)
+SRC_CC_OPTS += $(STANDARD_OPTS)
+
+SRC_CC_OPTS += $(GhcRtsCcOpts)
+SRC_HC_OPTS += $(GhcRtsHcOpts)
+
+ifneq "$(DLLized)" "YES"
+SRC_HC_OPTS += -static
+endif
+# SRC_HC_OPTS += -fPIC
+
+RtsMessages_CC_OPTS += -DProjectVersion=\"$(ProjectVersion)\"
+
+ifeq "$(way)" "mp"
+SRC_HC_OPTS += -I$$PVM_ROOT/include
+endif
+
+# If -DDEBUG is in effect, adjust package conf accordingly..
+ifneq "$(strip $(filter -optc-DDEBUG,$(GhcRtsHcOpts)))" ""
+PACKAGE_CPP_OPTS += -DDEBUG
+endif
+
+ifeq "$(HaveLibMingwEx)" "YES"
+PACKAGE_CPP_OPTS += -DHAVE_LIBMINGWEX
+endif
+
+ifeq "$(DotnetSupport)" "YES"
+
+#
+# Would like to just use SUBDIRS here, but need to
+# descend into dotnet/ earlier than that.
+#
+all ::
+ $(MAKE) -C dotnet all
+
+# But use SUBDIRS for other recursive targets.
+SUBDIRS += dotnet
+
+LIBOBJS += dotnet/Invoke.o
+endif
+
+# Suppress uninitialized variable warnings for GC.c
+GC_CC_OPTS += -Wno-uninitialized
+
+#-----------------------------------------------------------------------------
+# Include the Front panel code?
+
+# we need GTK+ for the front panel
+ifneq "$(GTK_CONFIG)" ""
+ifeq "$(GhcRtsWithFrontPanel)" "YES"
+SRC_HC_OPTS += `$(GTK_CONFIG) --cflags` -optc-DRTS_GTK_FRONTPANEL
+VisCallbacks_CC_OPTS += -Wno-unused
+SRC_MKDEPENDC_OPTS += `$(GTK_CONFIG) --cflags`
+else # GhcRtsWithFrontPanel
+EXCLUDED_SRCS += $(wildcard Vis*.c)
+endif
+else # GTK_CONFIG
+EXCLUDED_SRCS += $(wildcard Vis*.c)
+endif
+
+#-----------------------------------------------------------------------------
+# make depend setup
+
+SRC_MKDEPENDC_OPTS += -I. -I../includes
+
+# Hack: we define every way-related option here, so that we get (hopefully)
+# a superset of the dependencies. To do this properly, we should generate
+# a different set of dependencies for each way. Further hack: PROFILING and
+# TICKY_TICKY can't be used together, so we omit TICKY_TICKY for now.
+SRC_MKDEPENDC_OPTS += -DPROFILING -DTHREADED_RTS -DDEBUG
+
+# -----------------------------------------------------------------------------
+# The auto-generated apply code
+
+# We want a slightly different version for the unregisterised way, so we make
+# AutoApply on a per-way basis (eg. AutoApply_p.cmm).
+
+AUTO_APPLY_CMM = AutoApply$(_way).cmm
+
+ifneq "$(BootingFromHc)" "YES"
+$(AUTO_APPLY_CMM): $(GHC_GENAPPLY)
+ @$(RM) $@
+ $(GENAPPLY) $(if $(filter $(way), u debug_u), -u) >$@
+endif
+
+EXTRA_CMM_SRCS += $(AUTO_APPLY_CMM)
+
+CLEAN_FILES += $(AUTO_APPLY_CMM)
+
+# -----------------------------------------------------------------------------
+#
+# Building DLLs is only supported on mingw32 at the moment.
+#
+ifeq "$(DLLized)" "YES"
+SRC_BLD_DLL_OPTS += -lHS_imp_stub -lgmp_imp
+
+# It's not included in the DLL, but we need to compile it up separately.
+all :: Main.dll_o
+
+# Need an import library containing the symbols the RTS uses from the Prelude.
+# So, to avoid bootstrapping trouble, we build one containing just the syms
+# we need. Weirdly named to avoid clashing later on when compiling the contents
+# of ghc/lib/..
+#
+# Note: if you do change the name of the Prelude DLL, the "--dllname <nm>.dll"
+# below will need to be updated as well.
+
+$(DLL_PEN)/HSrts$(_way).dll :: libHS_imp_stub.a
+
+libHS_imp_stub.a :
+ dlltool --output-lib libHS_imp_stub.a --def HSprel.def --dllname HSstd.dll
+
+endif
+
+# -----------------------------------------------------------------------------
+# Compile GMP only if we don't have it already
+#
+# We use GMP's own configuration stuff, because it's all rather hairy
+# and not worth re-implementing in our Makefile framework.
+
+ifneq "$(HaveLibGmp)" "YES"
+ifneq "$(HaveFrameworkGMP)" "YES"
+boot ::
+ if [ -f gmp/config.status ]; then \
+ cd gmp && CC=$(WhatGccIsCalled) ./config.status; \
+ else \
+ cd gmp && CC=$(WhatGccIsCalled) ./configure --enable-shared=no \
+ --host=`echo $(HOSTPLATFORM) | sed 's/i[567]86/i486/g'`; \
+ fi
+
+# Slight cheatage here to pass host as target, but x-compilation isn't supported by ghc.
+
+ifeq "$(way)" ""
+all :: gmp/libgmp.a
+
+ifeq "$(DLLized)" "YES"
+all :: $(DLL_PEN)/gmp.dll
+
+$(DLL_PEN)/gmp.dll:
+ $(MAKE) -C gmp gmp.dll
+ $(MV) gmp/gmp.dll $(DLL_PEN)
+endif
+endif
+
+install :: gmp/libgmp.a
+
+ifeq "$(way)" ""
+clean distclean maintainer-clean ::
+ -$(MAKE) -C gmp MAKEFLAGS= $@
+
+INSTALL_LIBS += gmp/libgmp.a
+endif
+endif
+
+gmp/libgmp.a ::
+ $(MAKE) -C gmp MAKEFLAGS=
+ @$(CP) gmp/.libs/libgmp.a gmp
+ @$(RANLIB) gmp/libgmp.a
+endif
+
+CLEAN_FILES += gmp/libgmp.a
+
+#-----------------------------------------------------------------------------
+#
+# Building the GUM SysMan
+#
+
+ifeq "$(way)" "mp"
+all :: parallel/SysMan
+
+ifdef solaris2_TARGET_OS
+__socket_libs = -lsocket -lnsl
+else
+__socket_libs =
+endif
+
+parallel/SysMan : parallel/SysMan.mp_o parallel/LLComms.mp_o RtsUtils.mp_o RtsFlags.mp_o
+ $(RM) $@
+ gcc -o $@ parallel/SysMan.mp_o parallel/LLComms.mp_o -L$$PVM_ROOT/lib/$$PVM_ARCH -lgpvm3 -lpvm3 $(__socket_libs)
+
+CLEAN_FILES += parallel/SysMan.mp_o parallel/SysMan
+INSTALL_LIBEXECS += parallel/SysMan
+endif
+
+#-----------------------------------------------------------------------------
+# Compiling the cmm files
+
+# ToDo: should we really include Rts.h here? Required for GNU_ATTRIBUTE().
+SRC_HC_OPTS += \
+ -I. \
+ -\#include Prelude.h \
+ -\#include Rts.h \
+ -\#include RtsFlags.h \
+ -\#include RtsUtils.h \
+ -\#include StgRun.h \
+ -\#include Schedule.h \
+ -\#include Printer.h \
+ -\#include Sanity.h \
+ -\#include STM.h \
+ -\#include Storage.h \
+ -\#include SchedAPI.h \
+ -\#include Timer.h \
+ -\#include ProfHeap.h \
+ -\#include LdvProfile.h \
+ -\#include Profiling.h \
+ -\#include OSThreads.h \
+ -\#include Apply.h \
+ -\#include SMP.h
+
+ifeq "$(Windows)" "YES"
+PrimOps_HC_OPTS += -\#include '<windows.h>' -\#include win32/AsyncIO.h
+else
+PrimOps_HC_OPTS += -\#include posix/Itimer.h
+endif
+
+# -O3 helps unroll some loops (especially in copy() with a constant argument).
+# -fno-strict-aliasing is a hack because we often mix StgPtr and StgClosure pointers
+# to the same object, and gcc will assume these don't alias. eg. it happens in
+# copy() with gcc 3.4.3, the upd_evacee() assigments get moved before the object copy.
+GC_HC_OPTS += -optc-O3 -optc-fno-strict-aliasing
+
+# Cmm must be compiled via-C for now, because the NCG can't handle loops
+SRC_HC_OPTS += -fvia-C
+
+# We *want* type-checking of hand-written cmm.
+SRC_HC_OPTS += -dcmm-lint
+
+ifneq "$(BootingFromHc)" "YES"
+# .cmm files depend on all the .h files, to a first approximation.
+%.$(way_)o : %.cmm $(H_FILES)
+ $(HC_PRE_OPTS)
+ $(HC) $(HC_OPTS) -c $< -o $@
+ $(HC_POST_OPTS)
+
+%.$(way_)hc : %.cmm $(H_FILES)
+ $(HC) $(HC_OPTS) -C $< -o $@
+
+%.$(way_)s : %.cmm $(H_FILES)
+ $(HC) $(HC_OPTS) -S $< -o $@
+endif
+
+#-----------------------------------------------------------------------------
+#
+# Files to install
+#
+# Just libHSrts is installed uniformly across ways
+#
+INSTALL_LIBS += $(LIBRARY)
+ifeq "$(DLLized)" "YES"
+INSTALL_PROGS += $(DLL_NAME) gmp/gmp.dll
+INSTALL_LIBS += $(patsubst %.a,%_imp.a,$(LIBARY))
+INSTALL_LIBS += gmp/libgmp_imp.a Main.dll_o
+endif
+
+include $(TOP)/mk/target.mk
diff --git a/rts/PosixSource.h b/rts/PosixSource.h
new file mode 100644
index 0000000000..a938f9bc0f
--- /dev/null
+++ b/rts/PosixSource.h
@@ -0,0 +1,18 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Include this file into sources which should not need any non-Posix services.
+ * That includes most RTS C sources.
+ * ---------------------------------------------------------------------------*/
+
+#ifndef POSIXSOURCE_H
+#define POSIXSOURCE_H
+
+#define _POSIX_SOURCE 1
+#define _POSIX_C_SOURCE 199506L
+#define _ISOC9X_SOURCE
+
+/* Let's be ISO C9X too... */
+
+#endif /* POSIXSOURCE_H */
diff --git a/rts/Prelude.h b/rts/Prelude.h
new file mode 100644
index 0000000000..c209b2b800
--- /dev/null
+++ b/rts/Prelude.h
@@ -0,0 +1,129 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Prelude identifiers that we sometimes need to refer to in the RTS.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef PRELUDE_H
+#define PRELUDE_H
+
+/* These definitions are required by the RTS .cmm files too, so we
+ * need declarations that we can #include into the generated .hc files.
+ */
+#if IN_STG_CODE
+#define PRELUDE_INFO(i) extern W_(i)[]
+#define PRELUDE_CLOSURE(i) extern W_(i)[]
+#else
+#define PRELUDE_INFO(i) extern DLL_IMPORT const StgInfoTable i
+#define PRELUDE_CLOSURE(i) extern DLL_IMPORT StgClosure i
+#endif
+
+/* Define canonical names so we can abstract away from the actual
+ * modules these names are defined in.
+ */
+
+PRELUDE_CLOSURE(GHCziBase_True_closure);
+PRELUDE_CLOSURE(GHCziBase_False_closure);
+PRELUDE_CLOSURE(GHCziPack_unpackCString_closure);
+PRELUDE_CLOSURE(GHCziWeak_runFinalizzerBatch_closure);
+
+#ifdef IN_STG_CODE
+extern W_ ZCMain_main_closure[];
+#else
+extern StgClosure ZCMain_main_closure;
+#endif
+
+PRELUDE_CLOSURE(GHCziIOBase_stackOverflow_closure);
+PRELUDE_CLOSURE(GHCziIOBase_heapOverflow_closure);
+PRELUDE_CLOSURE(GHCziIOBase_BlockedOnDeadMVar_closure);
+PRELUDE_CLOSURE(GHCziIOBase_BlockedIndefinitely_closure);
+PRELUDE_CLOSURE(GHCziIOBase_NonTermination_closure);
+PRELUDE_CLOSURE(GHCziIOBase_NestedAtomically_closure);
+
+PRELUDE_INFO(GHCziBase_Czh_static_info);
+PRELUDE_INFO(GHCziBase_Izh_static_info);
+PRELUDE_INFO(GHCziFloat_Fzh_static_info);
+PRELUDE_INFO(GHCziFloat_Dzh_static_info);
+PRELUDE_INFO(Addr_Azh_static_info);
+PRELUDE_INFO(GHCziPtr_Ptr_static_info);
+PRELUDE_INFO(GHCziPtr_FunPtr_static_info);
+PRELUDE_INFO(GHCziInt_I8zh_static_info);
+PRELUDE_INFO(GHCziInt_I16zh_static_info);
+PRELUDE_INFO(GHCziInt_I32zh_static_info);
+PRELUDE_INFO(GHCziInt_I64zh_static_info);
+PRELUDE_INFO(GHCziWord_Wzh_static_info);
+PRELUDE_INFO(GHCziWord_W8zh_static_info);
+PRELUDE_INFO(GHCziWord_W16zh_static_info);
+PRELUDE_INFO(GHCziWord_W32zh_static_info);
+PRELUDE_INFO(GHCziWord_W64zh_static_info);
+PRELUDE_INFO(GHCziBase_Czh_con_info);
+PRELUDE_INFO(GHCziBase_Izh_con_info);
+PRELUDE_INFO(GHCziFloat_Fzh_con_info);
+PRELUDE_INFO(GHCziFloat_Dzh_con_info);
+PRELUDE_INFO(GHCziPtr_Ptr_con_info);
+PRELUDE_INFO(GHCziPtr_FunPtr_con_info);
+PRELUDE_INFO(Addr_Azh_con_info);
+PRELUDE_INFO(GHCziWord_Wzh_con_info);
+PRELUDE_INFO(GHCziInt_I8zh_con_info);
+PRELUDE_INFO(GHCziInt_I16zh_con_info);
+PRELUDE_INFO(GHCziInt_I32zh_con_info);
+PRELUDE_INFO(GHCziInt_I64zh_con_info);
+PRELUDE_INFO(GHCziWord_W8zh_con_info);
+PRELUDE_INFO(GHCziWord_W16zh_con_info);
+PRELUDE_INFO(GHCziWord_W32zh_con_info);
+PRELUDE_INFO(GHCziWord_W64zh_con_info);
+PRELUDE_INFO(GHCziStable_StablePtr_static_info);
+PRELUDE_INFO(GHCziStable_StablePtr_con_info);
+
+#define True_closure (&GHCziBase_True_closure)
+#define False_closure (&GHCziBase_False_closure)
+#define unpackCString_closure (&GHCziPack_unpackCString_closure)
+#define runFinalizerBatch_closure (&GHCziWeak_runFinalizzerBatch_closure)
+#define mainIO_closure (&ZCMain_main_closure)
+
+#define stackOverflow_closure (&GHCziIOBase_stackOverflow_closure)
+#define heapOverflow_closure (&GHCziIOBase_heapOverflow_closure)
+#define BlockedOnDeadMVar_closure (&GHCziIOBase_BlockedOnDeadMVar_closure)
+#define BlockedIndefinitely_closure (&GHCziIOBase_BlockedIndefinitely_closure)
+#define NonTermination_closure (&GHCziIOBase_NonTermination_closure)
+#define NestedAtomically_closure (&GHCziIOBase_NestedAtomically_closure)
+
+#define Czh_static_info (&GHCziBase_Czh_static_info)
+#define Fzh_static_info (&GHCziFloat_Fzh_static_info)
+#define Dzh_static_info (&GHCziFloat_Dzh_static_info)
+#define Azh_static_info (&Addr_Azh_static_info)
+#define Izh_static_info (&GHCziBase_Izh_static_info)
+#define I8zh_static_info (&GHCziInt_I8zh_static_info)
+#define I16zh_static_info (&GHCziInt_I16zh_static_info)
+#define I32zh_static_info (&GHCziInt_I32zh_static_info)
+#define I64zh_static_info (&GHCziInt_I64zh_static_info)
+#define Wzh_static_info (&GHCziWord_Wzh_static_info)
+#define W8zh_static_info (&GHCziWord_W8zh_static_info)
+#define W16zh_static_info (&GHCziWord_W16zh_static_info)
+#define W32zh_static_info (&GHCziWord_W32zh_static_info)
+#define W64zh_static_info (&GHCziWord_W64zh_static_info)
+#define Ptr_static_info (&GHCziPtr_Ptr_static_info)
+#define FunPtr_static_info (&GHCziPtr_FunPtr_static_info)
+#define Czh_con_info (&GHCziBase_Czh_con_info)
+#define Izh_con_info (&GHCziBase_Izh_con_info)
+#define Fzh_con_info (&GHCziFloat_Fzh_con_info)
+#define Dzh_con_info (&GHCziFloat_Dzh_con_info)
+#define Azh_con_info (&Addr_Azh_con_info)
+#define Wzh_con_info (&GHCziWord_Wzh_con_info)
+#define W8zh_con_info (&GHCziWord_W8zh_con_info)
+#define W16zh_con_info (&GHCziWord_W16zh_con_info)
+#define W32zh_con_info (&GHCziWord_W32zh_con_info)
+#define W64zh_con_info (&GHCziWord_W64zh_con_info)
+#define I8zh_con_info (&GHCziInt_I8zh_con_info)
+#define I16zh_con_info (&GHCziInt_I16zh_con_info)
+#define I32zh_con_info (&GHCziInt_I32zh_con_info)
+#define I64zh_con_info (&GHCziInt_I64zh_con_info)
+#define I64zh_con_info (&GHCziInt_I64zh_con_info)
+#define Ptr_con_info (&GHCziPtr_Ptr_con_info)
+#define FunPtr_con_info (&GHCziPtr_FunPtr_con_info)
+#define StablePtr_static_info (&GHCziStable_StablePtr_static_info)
+#define StablePtr_con_info (&GHCziStable_StablePtr_con_info)
+
+#endif /* PRELUDE_H */
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
new file mode 100644
index 0000000000..f1c214e304
--- /dev/null
+++ b/rts/PrimOps.cmm
@@ -0,0 +1,2106 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Out-of-line primitive operations
+ *
+ * This file contains the implementations of all the primitive
+ * operations ("primops") which are not expanded inline. See
+ * ghc/compiler/prelude/primops.txt.pp for a list of all the primops;
+ * this file contains code for most of those with the attribute
+ * out_of_line=True.
+ *
+ * Entry convention: the entry convention for a primop is that all the
+ * args are in Stg registers (R1, R2, etc.). This is to make writing
+ * the primops easier. (see compiler/codeGen/CgCallConv.hs).
+ *
+ * Return convention: results from a primop are generally returned
+ * using the ordinary unboxed tuple return convention. The C-- parser
+ * implements the RET_xxxx() macros to perform unboxed-tuple returns
+ * based on the prevailing return convention.
+ *
+ * This file is written in a subset of C--, extended with various
+ * features specific to GHC. It is compiled by GHC directly. For the
+ * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Cmm.h"
+
+/*-----------------------------------------------------------------------------
+ Array Primitives
+
+ Basically just new*Array - the others are all inline macros.
+
+ The size arg is always passed in R1, and the result returned in R1.
+
+ The slow entry point is for returning from a heap check, the saved
+ size argument must be re-loaded from the stack.
+ -------------------------------------------------------------------------- */
+
+/* for objects that are *less* than the size of a word, make sure we
+ * round up to the nearest word for the size of the array.
+ */
+
+newByteArrayzh_fast
+{
+ W_ words, payload_words, n, p;
+ MAYBE_GC(NO_PTRS,newByteArrayzh_fast);
+ n = R1;
+ payload_words = ROUNDUP_BYTES_TO_WDS(n);
+ words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
+ "ptr" p = foreign "C" allocateLocal(MyCapability() "ptr",words) [];
+ TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
+ SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+ StgArrWords_words(p) = payload_words;
+ RET_P(p);
+}
+
+newPinnedByteArrayzh_fast
+{
+ W_ words, payload_words, n, p;
+
+ MAYBE_GC(NO_PTRS,newPinnedByteArrayzh_fast);
+ n = R1;
+ payload_words = ROUNDUP_BYTES_TO_WDS(n);
+
+ // We want an 8-byte aligned array. allocatePinned() gives us
+ // 8-byte aligned memory by default, but we want to align the
+ // *goods* inside the ArrWords object, so we have to check the
+ // size of the ArrWords header and adjust our size accordingly.
+ words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words;
+ if ((SIZEOF_StgArrWords & 7) != 0) {
+ words = words + 1;
+ }
+
+ "ptr" p = foreign "C" allocatePinned(words) [];
+ TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0);
+
+ // Again, if the ArrWords header isn't a multiple of 8 bytes, we
+ // have to push the object forward one word so that the goods
+ // fall on an 8-byte boundary.
+ if ((SIZEOF_StgArrWords & 7) != 0) {
+ p = p + WDS(1);
+ }
+
+ SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+ StgArrWords_words(p) = payload_words;
+ RET_P(p);
+}
+
+newArrayzh_fast
+{
+ W_ words, n, init, arr, p;
+ /* Args: R1 = words, R2 = initialisation value */
+
+ n = R1;
+ MAYBE_GC(R2_PTR,newArrayzh_fast);
+
+ words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + n;
+ "ptr" arr = foreign "C" allocateLocal(MyCapability() "ptr",words) [R2];
+ TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0);
+
+ SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]);
+ StgMutArrPtrs_ptrs(arr) = n;
+
+ // Initialise all elements of the the array with the value in R2
+ init = R2;
+ p = arr + SIZEOF_StgMutArrPtrs;
+ for:
+ if (p < arr + WDS(words)) {
+ W_[p] = init;
+ p = p + WDS(1);
+ goto for;
+ }
+
+ RET_P(arr);
+}
+
+unsafeThawArrayzh_fast
+{
+ // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST
+ //
+ // A MUT_ARR_PTRS lives on the mutable list, but a MUT_ARR_PTRS_FROZEN
+ // normally doesn't. However, when we freeze a MUT_ARR_PTRS, we leave
+ // it on the mutable list for the GC to remove (removing something from
+ // the mutable list is not easy, because the mut_list is only singly-linked).
+ //
+ // So that we can tell whether a MUT_ARR_PTRS_FROZEN is on the mutable list,
+ // when we freeze it we set the info ptr to be MUT_ARR_PTRS_FROZEN0
+ // to indicate that it is still on the mutable list.
+ //
+ // So, when we thaw a MUT_ARR_PTRS_FROZEN, we must cope with two cases:
+ // either it is on a mut_list, or it isn't. We adopt the convention that
+ // the closure type is MUT_ARR_PTRS_FROZEN0 if it is on the mutable list,
+ // and MUT_ARR_PTRS_FROZEN otherwise. In fact it wouldn't matter if
+ // we put it on the mutable list more than once, but it would get scavenged
+ // multiple times during GC, which would be unnecessarily slow.
+ //
+ if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) {
+ SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
+ foreign "C" recordMutableLock(R1 "ptr") [R1];
+ // must be done after SET_INFO, because it ASSERTs closure_MUTABLE()
+ RET_P(R1);
+ } else {
+ SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info);
+ RET_P(R1);
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ MutVar primitives
+ -------------------------------------------------------------------------- */
+
+newMutVarzh_fast
+{
+ W_ mv;
+ /* Args: R1 = initialisation value */
+
+ ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, newMutVarzh_fast);
+
+ mv = Hp - SIZEOF_StgMutVar + WDS(1);
+ SET_HDR(mv,stg_MUT_VAR_DIRTY_info,W_[CCCS]);
+ StgMutVar_var(mv) = R1;
+
+ RET_P(mv);
+}
+
+atomicModifyMutVarzh_fast
+{
+ W_ mv, z, x, y, r;
+ /* Args: R1 :: MutVar#, R2 :: a -> (a,b) */
+
+ /* If x is the current contents of the MutVar#, then
+ We want to make the new contents point to
+
+ (sel_0 (f x))
+
+ and the return value is
+
+ (sel_1 (f x))
+
+ obviously we can share (f x).
+
+ z = [stg_ap_2 f x] (max (HS + 2) MIN_UPD_SIZE)
+ y = [stg_sel_0 z] (max (HS + 1) MIN_UPD_SIZE)
+ r = [stg_sel_1 z] (max (HS + 1) MIN_UPD_SIZE)
+ */
+
+#if MIN_UPD_SIZE > 1
+#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
+#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),WDS(MIN_UPD_SIZE-1))
+#else
+#define THUNK_1_SIZE (SIZEOF_StgThunkHeader + WDS(1))
+#define TICK_ALLOC_THUNK_1() TICK_ALLOC_UP_THK(WDS(1),0)
+#endif
+
+#if MIN_UPD_SIZE > 2
+#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(MIN_UPD_SIZE))
+#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),WDS(MIN_UPD_SIZE-2))
+#else
+#define THUNK_2_SIZE (SIZEOF_StgThunkHeader + WDS(2))
+#define TICK_ALLOC_THUNK_2() TICK_ALLOC_UP_THK(WDS(2),0)
+#endif
+
+#define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE)
+
+ HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, atomicModifyMutVarzh_fast);
+
+#if defined(THREADED_RTS)
+ foreign "C" ACQUIRE_LOCK(atomic_modify_mutvar_mutex "ptr") [R1,R2];
+#endif
+
+ x = StgMutVar_var(R1);
+
+ TICK_ALLOC_THUNK_2();
+ CCCS_ALLOC(THUNK_2_SIZE);
+ z = Hp - THUNK_2_SIZE + WDS(1);
+ SET_HDR(z, stg_ap_2_upd_info, W_[CCCS]);
+ LDV_RECORD_CREATE(z);
+ StgThunk_payload(z,0) = R2;
+ StgThunk_payload(z,1) = x;
+
+ TICK_ALLOC_THUNK_1();
+ CCCS_ALLOC(THUNK_1_SIZE);
+ y = z - THUNK_1_SIZE;
+ SET_HDR(y, stg_sel_0_upd_info, W_[CCCS]);
+ LDV_RECORD_CREATE(y);
+ StgThunk_payload(y,0) = z;
+
+ StgMutVar_var(R1) = y;
+ foreign "C" dirty_MUT_VAR(BaseReg "ptr", R1 "ptr") [R1];
+
+ TICK_ALLOC_THUNK_1();
+ CCCS_ALLOC(THUNK_1_SIZE);
+ r = y - THUNK_1_SIZE;
+ SET_HDR(r, stg_sel_1_upd_info, W_[CCCS]);
+ LDV_RECORD_CREATE(r);
+ StgThunk_payload(r,0) = z;
+
+#if defined(THREADED_RTS)
+ foreign "C" RELEASE_LOCK(atomic_modify_mutvar_mutex "ptr") [];
+#endif
+
+ RET_P(r);
+}
+
+/* -----------------------------------------------------------------------------
+ Weak Pointer Primitives
+ -------------------------------------------------------------------------- */
+
+STRING(stg_weak_msg,"New weak pointer at %p\n")
+
+mkWeakzh_fast
+{
+ /* R1 = key
+ R2 = value
+ R3 = finalizer (or NULL)
+ */
+ W_ w;
+
+ if (R3 == NULL) {
+ R3 = stg_NO_FINALIZER_closure;
+ }
+
+ ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, mkWeakzh_fast );
+
+ w = Hp - SIZEOF_StgWeak + WDS(1);
+ SET_HDR(w, stg_WEAK_info, W_[CCCS]);
+
+ StgWeak_key(w) = R1;
+ StgWeak_value(w) = R2;
+ StgWeak_finalizer(w) = R3;
+
+ StgWeak_link(w) = W_[weak_ptr_list];
+ W_[weak_ptr_list] = w;
+
+ IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []);
+
+ RET_P(w);
+}
+
+
+finalizzeWeakzh_fast
+{
+ /* R1 = weak ptr
+ */
+ W_ w, f;
+
+ w = R1;
+
+ // already dead?
+ if (GET_INFO(w) == stg_DEAD_WEAK_info) {
+ RET_NP(0,stg_NO_FINALIZER_closure);
+ }
+
+ // kill it
+#ifdef PROFILING
+ // @LDV profiling
+ // A weak pointer is inherently used, so we do not need to call
+ // LDV_recordDead_FILL_SLOP_DYNAMIC():
+ // LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
+ // or, LDV_recordDead():
+ // LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
+ // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as
+ // large as weak pointers, so there is no need to fill the slop, either.
+ // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
+#endif
+
+ //
+ // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
+ //
+ SET_INFO(w,stg_DEAD_WEAK_info);
+ LDV_RECORD_CREATE(w);
+
+ f = StgWeak_finalizer(w);
+ StgDeadWeak_link(w) = StgWeak_link(w);
+
+ /* return the finalizer */
+ if (f == stg_NO_FINALIZER_closure) {
+ RET_NP(0,stg_NO_FINALIZER_closure);
+ } else {
+ RET_NP(1,f);
+ }
+}
+
+deRefWeakzh_fast
+{
+ /* R1 = weak ptr */
+ W_ w, code, val;
+
+ w = R1;
+ if (GET_INFO(w) == stg_WEAK_info) {
+ code = 1;
+ val = StgWeak_value(w);
+ } else {
+ code = 0;
+ val = w;
+ }
+ RET_NP(code,val);
+}
+
+/* -----------------------------------------------------------------------------
+ Arbitrary-precision Integer operations.
+
+ There are some assumptions in this code that mp_limb_t == W_. This is
+ the case for all the platforms that GHC supports, currently.
+ -------------------------------------------------------------------------- */
+
+int2Integerzh_fast
+{
+ /* arguments: R1 = Int# */
+
+ W_ val, s, p; /* to avoid aliasing */
+
+ val = R1;
+ ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, int2Integerzh_fast );
+
+ p = Hp - SIZEOF_StgArrWords;
+ SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+ StgArrWords_words(p) = 1;
+
+ /* mpz_set_si is inlined here, makes things simpler */
+ if (%lt(val,0)) {
+ s = -1;
+ Hp(0) = -val;
+ } else {
+ if (%gt(val,0)) {
+ s = 1;
+ Hp(0) = val;
+ } else {
+ s = 0;
+ }
+ }
+
+ /* returns (# size :: Int#,
+ data :: ByteArray#
+ #)
+ */
+ RET_NP(s,p);
+}
+
+word2Integerzh_fast
+{
+ /* arguments: R1 = Word# */
+
+ W_ val, s, p; /* to avoid aliasing */
+
+ val = R1;
+
+ ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, word2Integerzh_fast);
+
+ p = Hp - SIZEOF_StgArrWords;
+ SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+ StgArrWords_words(p) = 1;
+
+ if (val != 0) {
+ s = 1;
+ W_[Hp] = val;
+ } else {
+ s = 0;
+ }
+
+ /* returns (# size :: Int#,
+ data :: ByteArray# #)
+ */
+ RET_NP(s,p);
+}
+
+
+/*
+ * 'long long' primops for converting to/from Integers.
+ */
+
+#ifdef SUPPORT_LONG_LONGS
+
+int64ToIntegerzh_fast
+{
+ /* arguments: L1 = Int64# */
+
+ L_ val;
+ W_ hi, s, neg, words_needed, p;
+
+ val = L1;
+ neg = 0;
+
+ if ( %ge(val,0x100000000::L_) || %le(val,-0x100000000::L_) ) {
+ words_needed = 2;
+ } else {
+ // minimum is one word
+ words_needed = 1;
+ }
+
+ ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
+ NO_PTRS, int64ToIntegerzh_fast );
+
+ p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
+ SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+ StgArrWords_words(p) = words_needed;
+
+ if ( %lt(val,0::L_) ) {
+ neg = 1;
+ val = -val;
+ }
+
+ hi = TO_W_(val >> 32);
+
+ if ( words_needed == 2 ) {
+ s = 2;
+ Hp(-1) = TO_W_(val);
+ Hp(0) = hi;
+ } else {
+ if ( val != 0::L_ ) {
+ s = 1;
+ Hp(0) = TO_W_(val);
+ } else /* val==0 */ {
+ s = 0;
+ }
+ }
+ if ( neg != 0 ) {
+ s = -s;
+ }
+
+ /* returns (# size :: Int#,
+ data :: ByteArray# #)
+ */
+ RET_NP(s,p);
+}
+
+word64ToIntegerzh_fast
+{
+ /* arguments: L1 = Word64# */
+
+ L_ val;
+ W_ hi, s, words_needed, p;
+
+ val = L1;
+ if ( val >= 0x100000000::L_ ) {
+ words_needed = 2;
+ } else {
+ words_needed = 1;
+ }
+
+ ALLOC_PRIM( SIZEOF_StgArrWords + WDS(words_needed),
+ NO_PTRS, word64ToIntegerzh_fast );
+
+ p = Hp - SIZEOF_StgArrWords - WDS(words_needed) + WDS(1);
+ SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+ StgArrWords_words(p) = words_needed;
+
+ hi = TO_W_(val >> 32);
+ if ( val >= 0x100000000::L_ ) {
+ s = 2;
+ Hp(-1) = TO_W_(val);
+ Hp(0) = hi;
+ } else {
+ if ( val != 0::L_ ) {
+ s = 1;
+ Hp(0) = TO_W_(val);
+ } else /* val==0 */ {
+ s = 0;
+ }
+ }
+
+ /* returns (# size :: Int#,
+ data :: ByteArray# #)
+ */
+ RET_NP(s,p);
+}
+
+
+#endif /* SUPPORT_LONG_LONGS */
+
+/* ToDo: this is shockingly inefficient */
+
+#ifndef THREADED_RTS
+section "bss" {
+ mp_tmp1:
+ bits8 [SIZEOF_MP_INT];
+}
+
+section "bss" {
+ mp_tmp2:
+ bits8 [SIZEOF_MP_INT];
+}
+
+section "bss" {
+ mp_result1:
+ bits8 [SIZEOF_MP_INT];
+}
+
+section "bss" {
+ mp_result2:
+ bits8 [SIZEOF_MP_INT];
+}
+#endif
+
+#ifdef THREADED_RTS
+#define FETCH_MP_TEMP(X) \
+W_ X; \
+X = BaseReg + (OFFSET_StgRegTable_r ## X);
+#else
+#define FETCH_MP_TEMP(X) /* Nothing */
+#endif
+
+#define GMP_TAKE2_RET1(name,mp_fun) \
+name \
+{ \
+ CInt s1, s2; \
+ W_ d1, d2; \
+ FETCH_MP_TEMP(mp_tmp1); \
+ FETCH_MP_TEMP(mp_tmp2); \
+ FETCH_MP_TEMP(mp_result1) \
+ FETCH_MP_TEMP(mp_result2); \
+ \
+ /* call doYouWantToGC() */ \
+ MAYBE_GC(R2_PTR & R4_PTR, name); \
+ \
+ s1 = W_TO_INT(R1); \
+ d1 = R2; \
+ s2 = W_TO_INT(R3); \
+ d2 = R4; \
+ \
+ MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1)); \
+ MP_INT__mp_size(mp_tmp1) = (s1); \
+ MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(d1); \
+ MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2)); \
+ MP_INT__mp_size(mp_tmp2) = (s2); \
+ MP_INT__mp_d(mp_tmp2) = BYTE_ARR_CTS(d2); \
+ \
+ foreign "C" mpz_init(mp_result1 "ptr") []; \
+ \
+ /* Perform the operation */ \
+ foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") []; \
+ \
+ RET_NP(TO_W_(MP_INT__mp_size(mp_result1)), \
+ MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords); \
+}
+
+#define GMP_TAKE1_RET1(name,mp_fun) \
+name \
+{ \
+ CInt s1; \
+ W_ d1; \
+ FETCH_MP_TEMP(mp_tmp1); \
+ FETCH_MP_TEMP(mp_result1) \
+ \
+ /* call doYouWantToGC() */ \
+ MAYBE_GC(R2_PTR, name); \
+ \
+ d1 = R2; \
+ s1 = W_TO_INT(R1); \
+ \
+ MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1)); \
+ MP_INT__mp_size(mp_tmp1) = (s1); \
+ MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(d1); \
+ \
+ foreign "C" mpz_init(mp_result1 "ptr") []; \
+ \
+ /* Perform the operation */ \
+ foreign "C" mp_fun(mp_result1 "ptr",mp_tmp1 "ptr") []; \
+ \
+ RET_NP(TO_W_(MP_INT__mp_size(mp_result1)), \
+ MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords); \
+}
+
+#define GMP_TAKE2_RET2(name,mp_fun) \
+name \
+{ \
+ CInt s1, s2; \
+ W_ d1, d2; \
+ FETCH_MP_TEMP(mp_tmp1); \
+ FETCH_MP_TEMP(mp_tmp2); \
+ FETCH_MP_TEMP(mp_result1) \
+ FETCH_MP_TEMP(mp_result2) \
+ \
+ /* call doYouWantToGC() */ \
+ MAYBE_GC(R2_PTR & R4_PTR, name); \
+ \
+ s1 = W_TO_INT(R1); \
+ d1 = R2; \
+ s2 = W_TO_INT(R3); \
+ d2 = R4; \
+ \
+ MP_INT__mp_alloc(mp_tmp1) = W_TO_INT(StgArrWords_words(d1)); \
+ MP_INT__mp_size(mp_tmp1) = (s1); \
+ MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(d1); \
+ MP_INT__mp_alloc(mp_tmp2) = W_TO_INT(StgArrWords_words(d2)); \
+ MP_INT__mp_size(mp_tmp2) = (s2); \
+ MP_INT__mp_d(mp_tmp2) = BYTE_ARR_CTS(d2); \
+ \
+ foreign "C" mpz_init(mp_result1 "ptr") []; \
+ foreign "C" mpz_init(mp_result2 "ptr") []; \
+ \
+ /* Perform the operation */ \
+ foreign "C" mp_fun(mp_result1 "ptr",mp_result2 "ptr",mp_tmp1 "ptr",mp_tmp2 "ptr") []; \
+ \
+ RET_NPNP(TO_W_(MP_INT__mp_size(mp_result1)), \
+ MP_INT__mp_d(mp_result1) - SIZEOF_StgArrWords, \
+ TO_W_(MP_INT__mp_size(mp_result2)), \
+ MP_INT__mp_d(mp_result2) - SIZEOF_StgArrWords); \
+}
+
+GMP_TAKE2_RET1(plusIntegerzh_fast, mpz_add)
+GMP_TAKE2_RET1(minusIntegerzh_fast, mpz_sub)
+GMP_TAKE2_RET1(timesIntegerzh_fast, mpz_mul)
+GMP_TAKE2_RET1(gcdIntegerzh_fast, mpz_gcd)
+GMP_TAKE2_RET1(quotIntegerzh_fast, mpz_tdiv_q)
+GMP_TAKE2_RET1(remIntegerzh_fast, mpz_tdiv_r)
+GMP_TAKE2_RET1(divExactIntegerzh_fast, mpz_divexact)
+GMP_TAKE2_RET1(andIntegerzh_fast, mpz_and)
+GMP_TAKE2_RET1(orIntegerzh_fast, mpz_ior)
+GMP_TAKE2_RET1(xorIntegerzh_fast, mpz_xor)
+GMP_TAKE1_RET1(complementIntegerzh_fast, mpz_com)
+
+GMP_TAKE2_RET2(quotRemIntegerzh_fast, mpz_tdiv_qr)
+GMP_TAKE2_RET2(divModIntegerzh_fast, mpz_fdiv_qr)
+
+#ifndef THREADED_RTS
+section "bss" {
+ mp_tmp_w: W_; // NB. mp_tmp_w is really an here mp_limb_t
+}
+#endif
+
+gcdIntzh_fast
+{
+ /* R1 = the first Int#; R2 = the second Int# */
+ W_ r;
+ FETCH_MP_TEMP(mp_tmp_w);
+
+ W_[mp_tmp_w] = R1;
+ r = foreign "C" mpn_gcd_1(mp_tmp_w "ptr", 1, R2) [];
+
+ R1 = r;
+ /* Result parked in R1, return via info-pointer at TOS */
+ jump %ENTRY_CODE(Sp(0));
+}
+
+
+gcdIntegerIntzh_fast
+{
+ /* R1 = s1; R2 = d1; R3 = the int */
+ R1 = foreign "C" mpn_gcd_1( BYTE_ARR_CTS(R2) "ptr", R1, R3) [];
+
+ /* Result parked in R1, return via info-pointer at TOS */
+ jump %ENTRY_CODE(Sp(0));
+}
+
+
+cmpIntegerIntzh_fast
+{
+ /* R1 = s1; R2 = d1; R3 = the int */
+ W_ usize, vsize, v_digit, u_digit;
+
+ usize = R1;
+ vsize = 0;
+ v_digit = R3;
+
+ // paraphrased from mpz_cmp_si() in the GMP sources
+ if (%gt(v_digit,0)) {
+ vsize = 1;
+ } else {
+ if (%lt(v_digit,0)) {
+ vsize = -1;
+ v_digit = -v_digit;
+ }
+ }
+
+ if (usize != vsize) {
+ R1 = usize - vsize;
+ jump %ENTRY_CODE(Sp(0));
+ }
+
+ if (usize == 0) {
+ R1 = 0;
+ jump %ENTRY_CODE(Sp(0));
+ }
+
+ u_digit = W_[BYTE_ARR_CTS(R2)];
+
+ if (u_digit == v_digit) {
+ R1 = 0;
+ jump %ENTRY_CODE(Sp(0));
+ }
+
+ if (%gtu(u_digit,v_digit)) { // NB. unsigned: these are mp_limb_t's
+ R1 = usize;
+ } else {
+ R1 = -usize;
+ }
+
+ jump %ENTRY_CODE(Sp(0));
+}
+
+cmpIntegerzh_fast
+{
+ /* R1 = s1; R2 = d1; R3 = s2; R4 = d2 */
+ W_ usize, vsize, size, up, vp;
+ CInt cmp;
+
+ // paraphrased from mpz_cmp() in the GMP sources
+ usize = R1;
+ vsize = R3;
+
+ if (usize != vsize) {
+ R1 = usize - vsize;
+ jump %ENTRY_CODE(Sp(0));
+ }
+
+ if (usize == 0) {
+ R1 = 0;
+ jump %ENTRY_CODE(Sp(0));
+ }
+
+ if (%lt(usize,0)) { // NB. not <, which is unsigned
+ size = -usize;
+ } else {
+ size = usize;
+ }
+
+ up = BYTE_ARR_CTS(R2);
+ vp = BYTE_ARR_CTS(R4);
+
+ cmp = foreign "C" mpn_cmp(up "ptr", vp "ptr", size) [];
+
+ if (cmp == 0 :: CInt) {
+ R1 = 0;
+ jump %ENTRY_CODE(Sp(0));
+ }
+
+ if (%lt(cmp,0 :: CInt) == %lt(usize,0)) {
+ R1 = 1;
+ } else {
+ R1 = (-1);
+ }
+ /* Result parked in R1, return via info-pointer at TOS */
+ jump %ENTRY_CODE(Sp(0));
+}
+
+integer2Intzh_fast
+{
+ /* R1 = s; R2 = d */
+ W_ r, s;
+
+ s = R1;
+ if (s == 0) {
+ r = 0;
+ } else {
+ r = W_[R2 + SIZEOF_StgArrWords];
+ if (%lt(s,0)) {
+ r = -r;
+ }
+ }
+ /* Result parked in R1, return via info-pointer at TOS */
+ R1 = r;
+ jump %ENTRY_CODE(Sp(0));
+}
+
+integer2Wordzh_fast
+{
+ /* R1 = s; R2 = d */
+ W_ r, s;
+
+ s = R1;
+ if (s == 0) {
+ r = 0;
+ } else {
+ r = W_[R2 + SIZEOF_StgArrWords];
+ if (%lt(s,0)) {
+ r = -r;
+ }
+ }
+ /* Result parked in R1, return via info-pointer at TOS */
+ R1 = r;
+ jump %ENTRY_CODE(Sp(0));
+}
+
+decodeFloatzh_fast
+{
+ W_ p;
+ F_ arg;
+ FETCH_MP_TEMP(mp_tmp1);
+ FETCH_MP_TEMP(mp_tmp_w);
+
+ /* arguments: F1 = Float# */
+ arg = F1;
+
+ ALLOC_PRIM( SIZEOF_StgArrWords + WDS(1), NO_PTRS, decodeFloatzh_fast );
+
+ /* Be prepared to tell Lennart-coded __decodeFloat
+ where mantissa._mp_d can be put (it does not care about the rest) */
+ p = Hp - SIZEOF_StgArrWords;
+ SET_HDR(p,stg_ARR_WORDS_info,W_[CCCS]);
+ StgArrWords_words(p) = 1;
+ MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
+
+ /* Perform the operation */
+ foreign "C" __decodeFloat(mp_tmp1 "ptr",mp_tmp_w "ptr" ,arg) [];
+
+ /* returns: (Int# (expn), Int#, ByteArray#) */
+ RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
+}
+
+#define DOUBLE_MANTISSA_SIZE SIZEOF_DOUBLE
+#define ARR_SIZE (SIZEOF_StgArrWords + DOUBLE_MANTISSA_SIZE)
+
+decodeDoublezh_fast
+{
+ D_ arg;
+ W_ p;
+ FETCH_MP_TEMP(mp_tmp1);
+ FETCH_MP_TEMP(mp_tmp_w);
+
+ /* arguments: D1 = Double# */
+ arg = D1;
+
+ ALLOC_PRIM( ARR_SIZE, NO_PTRS, decodeDoublezh_fast );
+
+ /* Be prepared to tell Lennart-coded __decodeDouble
+ where mantissa.d can be put (it does not care about the rest) */
+ p = Hp - ARR_SIZE + WDS(1);
+ SET_HDR(p, stg_ARR_WORDS_info, W_[CCCS]);
+ StgArrWords_words(p) = BYTES_TO_WDS(DOUBLE_MANTISSA_SIZE);
+ MP_INT__mp_d(mp_tmp1) = BYTE_ARR_CTS(p);
+
+ /* Perform the operation */
+ foreign "C" __decodeDouble(mp_tmp1 "ptr", mp_tmp_w "ptr",arg) [];
+
+ /* returns: (Int# (expn), Int#, ByteArray#) */
+ RET_NNP(W_[mp_tmp_w], TO_W_(MP_INT__mp_size(mp_tmp1)), p);
+}
+
+/* -----------------------------------------------------------------------------
+ * Concurrency primitives
+ * -------------------------------------------------------------------------- */
+
+forkzh_fast
+{
+ /* args: R1 = closure to spark */
+
+ MAYBE_GC(R1_PTR, forkzh_fast);
+
+ W_ closure;
+ W_ threadid;
+ closure = R1;
+
+ "ptr" threadid = foreign "C" createIOThread( MyCapability() "ptr",
+ RtsFlags_GcFlags_initialStkSize(RtsFlags),
+ closure "ptr") [];
+ foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") [];
+
+ // switch at the earliest opportunity
+ CInt[context_switch] = 1 :: CInt;
+
+ RET_P(threadid);
+}
+
+forkOnzh_fast
+{
+ /* args: R1 = cpu, R2 = closure to spark */
+
+ MAYBE_GC(R2_PTR, forkOnzh_fast);
+
+ W_ cpu;
+ W_ closure;
+ W_ threadid;
+ cpu = R1;
+ closure = R2;
+
+ "ptr" threadid = foreign "C" createIOThread( MyCapability() "ptr",
+ RtsFlags_GcFlags_initialStkSize(RtsFlags),
+ closure "ptr") [];
+ foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") [];
+
+ // switch at the earliest opportunity
+ CInt[context_switch] = 1 :: CInt;
+
+ RET_P(threadid);
+}
+
+yieldzh_fast
+{
+ jump stg_yield_noregs;
+}
+
+myThreadIdzh_fast
+{
+ /* no args. */
+ RET_P(CurrentTSO);
+}
+
+labelThreadzh_fast
+{
+ /* args:
+ R1 = ThreadId#
+ R2 = Addr# */
+#ifdef DEBUG
+ foreign "C" labelThread(R1 "ptr", R2 "ptr") [];
+#endif
+ jump %ENTRY_CODE(Sp(0));
+}
+
+isCurrentThreadBoundzh_fast
+{
+ /* no args */
+ W_ r;
+ r = foreign "C" isThreadBound(CurrentTSO) [];
+ RET_N(r);
+}
+
+
+/* -----------------------------------------------------------------------------
+ * TVar primitives
+ * -------------------------------------------------------------------------- */
+
+#ifdef REG_R1
+#define SP_OFF 0
+#define IF_NOT_REG_R1(x)
+#else
+#define SP_OFF 1
+#define IF_NOT_REG_R1(x) x
+#endif
+
+// Catch retry frame ------------------------------------------------------------
+
+#define CATCH_RETRY_FRAME_ERROR(label) \
+ label { foreign "C" barf("catch_retry_frame incorrectly entered!"); }
+
+CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_0_ret)
+CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_1_ret)
+CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_2_ret)
+CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_3_ret)
+CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_4_ret)
+CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_5_ret)
+CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_6_ret)
+CATCH_RETRY_FRAME_ERROR(stg_catch_retry_frame_7_ret)
+
+#if MAX_VECTORED_RTN > 8
+#error MAX_VECTORED_RTN has changed: please modify stg_catch_retry_frame too.
+#endif
+
+#if defined(PROFILING)
+#define CATCH_RETRY_FRAME_BITMAP 7
+#define CATCH_RETRY_FRAME_WORDS 6
+#else
+#define CATCH_RETRY_FRAME_BITMAP 1
+#define CATCH_RETRY_FRAME_WORDS 4
+#endif
+
+INFO_TABLE_RET(stg_catch_retry_frame,
+ CATCH_RETRY_FRAME_WORDS, CATCH_RETRY_FRAME_BITMAP,
+ CATCH_RETRY_FRAME,
+ stg_catch_retry_frame_0_ret,
+ stg_catch_retry_frame_1_ret,
+ stg_catch_retry_frame_2_ret,
+ stg_catch_retry_frame_3_ret,
+ stg_catch_retry_frame_4_ret,
+ stg_catch_retry_frame_5_ret,
+ stg_catch_retry_frame_6_ret,
+ stg_catch_retry_frame_7_ret)
+{
+ W_ r, frame, trec, outer;
+ IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); )
+
+ frame = Sp;
+ trec = StgTSO_trec(CurrentTSO);
+ "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
+ r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
+ if (r) {
+ /* Succeeded (either first branch or second branch) */
+ StgTSO_trec(CurrentTSO) = outer;
+ Sp = Sp + SIZEOF_StgCatchRetryFrame;
+ IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
+ jump %ENTRY_CODE(Sp(SP_OFF));
+ } else {
+ /* Did not commit: retry */
+ W_ new_trec;
+ "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
+ StgTSO_trec(CurrentTSO) = new_trec;
+ if (StgCatchRetryFrame_running_alt_code(frame)) {
+ R1 = StgCatchRetryFrame_alt_code(frame);
+ } else {
+ R1 = StgCatchRetryFrame_first_code(frame);
+ StgCatchRetryFrame_first_code_trec(frame) = new_trec;
+ }
+ jump stg_ap_v_fast;
+ }
+}
+
+
+// Atomically frame -------------------------------------------------------------
+
+
+#define ATOMICALLY_FRAME_ERROR(label) \
+ label { foreign "C" barf("atomically_frame incorrectly entered!"); }
+
+ATOMICALLY_FRAME_ERROR(stg_atomically_frame_0_ret)
+ATOMICALLY_FRAME_ERROR(stg_atomically_frame_1_ret)
+ATOMICALLY_FRAME_ERROR(stg_atomically_frame_2_ret)
+ATOMICALLY_FRAME_ERROR(stg_atomically_frame_3_ret)
+ATOMICALLY_FRAME_ERROR(stg_atomically_frame_4_ret)
+ATOMICALLY_FRAME_ERROR(stg_atomically_frame_5_ret)
+ATOMICALLY_FRAME_ERROR(stg_atomically_frame_6_ret)
+ATOMICALLY_FRAME_ERROR(stg_atomically_frame_7_ret)
+
+#if MAX_VECTORED_RTN > 8
+#error MAX_VECTORED_RTN has changed: please modify stg_atomically_frame too.
+#endif
+
+#if defined(PROFILING)
+#define ATOMICALLY_FRAME_BITMAP 3
+#define ATOMICALLY_FRAME_WORDS 3
+#else
+#define ATOMICALLY_FRAME_BITMAP 0
+#define ATOMICALLY_FRAME_WORDS 1
+#endif
+
+
+INFO_TABLE_RET(stg_atomically_frame,
+ ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
+ ATOMICALLY_FRAME,
+ stg_atomically_frame_0_ret,
+ stg_atomically_frame_1_ret,
+ stg_atomically_frame_2_ret,
+ stg_atomically_frame_3_ret,
+ stg_atomically_frame_4_ret,
+ stg_atomically_frame_5_ret,
+ stg_atomically_frame_6_ret,
+ stg_atomically_frame_7_ret)
+{
+ W_ frame, trec, valid;
+ IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); )
+
+ frame = Sp;
+ trec = StgTSO_trec(CurrentTSO);
+
+ /* The TSO is not currently waiting: try to commit the transaction */
+ valid = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") [];
+ if (valid) {
+ /* Transaction was valid: commit succeeded */
+ StgTSO_trec(CurrentTSO) = NO_TREC;
+ Sp = Sp + SIZEOF_StgAtomicallyFrame;
+ IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;)
+ jump %ENTRY_CODE(Sp(SP_OFF));
+ } else {
+ /* Transaction was not valid: try again */
+ "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
+ StgTSO_trec(CurrentTSO) = trec;
+ R1 = StgAtomicallyFrame_code(frame);
+ jump stg_ap_v_fast;
+ }
+}
+
+INFO_TABLE_RET(stg_atomically_waiting_frame,
+ ATOMICALLY_FRAME_WORDS, ATOMICALLY_FRAME_BITMAP,
+ ATOMICALLY_FRAME,
+ stg_atomically_frame_0_ret,
+ stg_atomically_frame_1_ret,
+ stg_atomically_frame_2_ret,
+ stg_atomically_frame_3_ret,
+ stg_atomically_frame_4_ret,
+ stg_atomically_frame_5_ret,
+ stg_atomically_frame_6_ret,
+ stg_atomically_frame_7_ret)
+{
+ W_ frame, trec, valid;
+ IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); )
+
+ frame = Sp;
+
+ /* The TSO is currently waiting: should we stop waiting? */
+ valid = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") [];
+ if (valid) {
+ /* Previous attempt is still valid: no point trying again yet */
+ IF_NOT_REG_R1(Sp_adj(-2);
+ Sp(1) = stg_NO_FINALIZER_closure;
+ Sp(0) = stg_ut_1_0_unreg_info;)
+ jump stg_block_noregs;
+ } else {
+ /* Previous attempt is no longer valid: try again */
+ "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") [];
+ StgTSO_trec(CurrentTSO) = trec;
+ StgHeader_info(frame) = stg_atomically_frame_info;
+ R1 = StgAtomicallyFrame_code(frame);
+ jump stg_ap_v_fast;
+ }
+}
+
+// STM catch frame --------------------------------------------------------------
+
+#define CATCH_STM_FRAME_ENTRY_TEMPLATE(label,ret) \
+ label \
+ { \
+ IF_NOT_REG_R1(W_ rval; rval = Sp(0); Sp_adj(1); ) \
+ Sp = Sp + SIZEOF_StgCatchSTMFrame; \
+ IF_NOT_REG_R1(Sp_adj(-1); Sp(0) = rval;) \
+ jump ret; \
+ }
+
+#ifdef REG_R1
+#define SP_OFF 0
+#else
+#define SP_OFF 1
+#endif
+
+CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_0_ret,%RET_VEC(Sp(SP_OFF),0))
+CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_1_ret,%RET_VEC(Sp(SP_OFF),1))
+CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_2_ret,%RET_VEC(Sp(SP_OFF),2))
+CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_3_ret,%RET_VEC(Sp(SP_OFF),3))
+CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_4_ret,%RET_VEC(Sp(SP_OFF),4))
+CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_5_ret,%RET_VEC(Sp(SP_OFF),5))
+CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_6_ret,%RET_VEC(Sp(SP_OFF),6))
+CATCH_STM_FRAME_ENTRY_TEMPLATE(stg_catch_stm_frame_7_ret,%RET_VEC(Sp(SP_OFF),7))
+
+#if MAX_VECTORED_RTN > 8
+#error MAX_VECTORED_RTN has changed: please modify stg_catch_stm_frame too.
+#endif
+
+#if defined(PROFILING)
+#define CATCH_STM_FRAME_BITMAP 3
+#define CATCH_STM_FRAME_WORDS 3
+#else
+#define CATCH_STM_FRAME_BITMAP 0
+#define CATCH_STM_FRAME_WORDS 1
+#endif
+
+/* Catch frames are very similar to update frames, but when entering
+ * one we just pop the frame off the stack and perform the correct
+ * kind of return to the activation record underneath us on the stack.
+ */
+
+INFO_TABLE_RET(stg_catch_stm_frame,
+ CATCH_STM_FRAME_WORDS, CATCH_STM_FRAME_BITMAP,
+ CATCH_STM_FRAME,
+ stg_catch_stm_frame_0_ret,
+ stg_catch_stm_frame_1_ret,
+ stg_catch_stm_frame_2_ret,
+ stg_catch_stm_frame_3_ret,
+ stg_catch_stm_frame_4_ret,
+ stg_catch_stm_frame_5_ret,
+ stg_catch_stm_frame_6_ret,
+ stg_catch_stm_frame_7_ret)
+CATCH_STM_FRAME_ENTRY_TEMPLATE(,%ENTRY_CODE(Sp(SP_OFF)))
+
+
+// Primop definition ------------------------------------------------------------
+
+atomicallyzh_fast
+{
+ W_ frame;
+ W_ old_trec;
+ W_ new_trec;
+
+ // stmStartTransaction may allocate
+ MAYBE_GC (R1_PTR, atomicallyzh_fast);
+
+ /* Args: R1 = m :: STM a */
+ STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, atomicallyzh_fast);
+
+ old_trec = StgTSO_trec(CurrentTSO);
+
+ /* Nested transactions are not allowed; raise an exception */
+ if (old_trec != NO_TREC) {
+ R1 = GHCziIOBase_NestedAtomically_closure;
+ jump raisezh_fast;
+ }
+
+ /* Set up the atomically frame */
+ Sp = Sp - SIZEOF_StgAtomicallyFrame;
+ frame = Sp;
+
+ SET_HDR(frame,stg_atomically_frame_info, W_[CCCS]);
+ StgAtomicallyFrame_code(frame) = R1;
+
+ /* Start the memory transcation */
+ "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1];
+ StgTSO_trec(CurrentTSO) = new_trec;
+
+ /* Apply R1 to the realworld token */
+ jump stg_ap_v_fast;
+}
+
+
+catchSTMzh_fast
+{
+ W_ frame;
+
+ /* Args: R1 :: STM a */
+ /* Args: R2 :: Exception -> STM a */
+ STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, catchSTMzh_fast);
+
+ /* Set up the catch frame */
+ Sp = Sp - SIZEOF_StgCatchSTMFrame;
+ frame = Sp;
+
+ SET_HDR(frame, stg_catch_stm_frame_info, W_[CCCS]);
+ StgCatchSTMFrame_handler(frame) = R2;
+
+ /* Apply R1 to the realworld token */
+ jump stg_ap_v_fast;
+}
+
+
+catchRetryzh_fast
+{
+ W_ frame;
+ W_ new_trec;
+ W_ trec;
+
+ // stmStartTransaction may allocate
+ MAYBE_GC (R1_PTR & R2_PTR, catchRetryzh_fast);
+
+ /* Args: R1 :: STM a */
+ /* Args: R2 :: STM a */
+ STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, catchRetryzh_fast);
+
+ /* Start a nested transaction within which to run the first code */
+ trec = StgTSO_trec(CurrentTSO);
+ "ptr" new_trec = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2];
+ StgTSO_trec(CurrentTSO) = new_trec;
+
+ /* Set up the catch-retry frame */
+ Sp = Sp - SIZEOF_StgCatchRetryFrame;
+ frame = Sp;
+
+ SET_HDR(frame, stg_catch_retry_frame_info, W_[CCCS]);
+ StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
+ StgCatchRetryFrame_first_code(frame) = R1;
+ StgCatchRetryFrame_alt_code(frame) = R2;
+ StgCatchRetryFrame_first_code_trec(frame) = new_trec;
+
+ /* Apply R1 to the realworld token */
+ jump stg_ap_v_fast;
+}
+
+
+retryzh_fast
+{
+ W_ frame_type;
+ W_ frame;
+ W_ trec;
+ W_ outer;
+ W_ r;
+
+ MAYBE_GC (NO_PTRS, retryzh_fast); // STM operations may allocate
+
+ // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME
+retry_pop_stack:
+ trec = StgTSO_trec(CurrentTSO);
+ "ptr" outer = foreign "C" stmGetEnclosingTRec(trec "ptr") [];
+ StgTSO_sp(CurrentTSO) = Sp;
+ frame_type = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") [];
+ Sp = StgTSO_sp(CurrentTSO);
+ frame = Sp;
+
+ if (frame_type == CATCH_RETRY_FRAME) {
+ // The retry reaches a CATCH_RETRY_FRAME before the atomic frame
+ ASSERT(outer != NO_TREC);
+ if (!StgCatchRetryFrame_running_alt_code(frame)) {
+ // Retry in the first code: try the alternative
+ "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
+ StgTSO_trec(CurrentTSO) = trec;
+ StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true;
+ R1 = StgCatchRetryFrame_alt_code(frame);
+ jump stg_ap_v_fast;
+ } else {
+ // Retry in the alternative code: propagate
+ W_ other_trec;
+ other_trec = StgCatchRetryFrame_first_code_trec(frame);
+ r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", other_trec "ptr") [];
+ if (r) {
+ r = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") [];
+ } else {
+ foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") [];
+ }
+ if (r) {
+ // Merge between siblings succeeded: commit it back to enclosing transaction
+ // and then propagate the retry
+ StgTSO_trec(CurrentTSO) = outer;
+ Sp = Sp + SIZEOF_StgCatchRetryFrame;
+ goto retry_pop_stack;
+ } else {
+ // Merge failed: we musn't propagate the retry. Try both paths again.
+ "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
+ StgCatchRetryFrame_first_code_trec(frame) = trec;
+ StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false;
+ StgTSO_trec(CurrentTSO) = trec;
+ R1 = StgCatchRetryFrame_first_code(frame);
+ jump stg_ap_v_fast;
+ }
+ }
+ }
+
+ // We've reached the ATOMICALLY_FRAME: attempt to wait
+ ASSERT(frame_type == ATOMICALLY_FRAME);
+ ASSERT(outer == NO_TREC);
+ r = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") [];
+ if (r) {
+ // Transaction was valid: stmWait put us on the TVars' queues, we now block
+ StgHeader_info(frame) = stg_atomically_waiting_frame_info;
+ Sp = frame;
+ // Fix up the stack in the unregisterised case: the return convention is different.
+ IF_NOT_REG_R1(Sp_adj(-2);
+ Sp(1) = stg_NO_FINALIZER_closure;
+ Sp(0) = stg_ut_1_0_unreg_info;)
+ R3 = trec; // passing to stmWaitUnblock()
+ jump stg_block_stmwait;
+ } else {
+ // Transaction was not valid: retry immediately
+ "ptr" trec = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") [];
+ StgTSO_trec(CurrentTSO) = trec;
+ R1 = StgAtomicallyFrame_code(frame);
+ Sp = frame;
+ jump stg_ap_v_fast;
+ }
+}
+
+
+newTVarzh_fast
+{
+ W_ tv;
+ W_ new_value;
+
+ /* Args: R1 = initialisation value */
+
+ MAYBE_GC (R1_PTR, newTVarzh_fast);
+ new_value = R1;
+ "ptr" tv = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") [];
+ RET_P(tv);
+}
+
+
+readTVarzh_fast
+{
+ W_ trec;
+ W_ tvar;
+ W_ result;
+
+ /* Args: R1 = TVar closure */
+
+ MAYBE_GC (R1_PTR, readTVarzh_fast); // Call to stmReadTVar may allocate
+ trec = StgTSO_trec(CurrentTSO);
+ tvar = R1;
+ "ptr" result = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") [];
+
+ RET_P(result);
+}
+
+
+writeTVarzh_fast
+{
+ W_ trec;
+ W_ tvar;
+ W_ new_value;
+
+ /* Args: R1 = TVar closure */
+ /* R2 = New value */
+
+ MAYBE_GC (R1_PTR & R2_PTR, writeTVarzh_fast); // Call to stmWriteTVar may allocate
+ trec = StgTSO_trec(CurrentTSO);
+ tvar = R1;
+ new_value = R2;
+ foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") [];
+
+ jump %ENTRY_CODE(Sp(0));
+}
+
+
+/* -----------------------------------------------------------------------------
+ * MVar primitives
+ *
+ * take & putMVar work as follows. Firstly, an important invariant:
+ *
+ * If the MVar is full, then the blocking queue contains only
+ * threads blocked on putMVar, and if the MVar is empty then the
+ * blocking queue contains only threads blocked on takeMVar.
+ *
+ * takeMvar:
+ * MVar empty : then add ourselves to the blocking queue
+ * MVar full : remove the value from the MVar, and
+ * blocking queue empty : return
+ * blocking queue non-empty : perform the first blocked putMVar
+ * from the queue, and wake up the
+ * thread (MVar is now full again)
+ *
+ * putMVar is just the dual of the above algorithm.
+ *
+ * How do we "perform a putMVar"? Well, we have to fiddle around with
+ * the stack of the thread waiting to do the putMVar. See
+ * stg_block_putmvar and stg_block_takemvar in HeapStackCheck.c for
+ * the stack layout, and the PerformPut and PerformTake macros below.
+ *
+ * It is important that a blocked take or put is woken up with the
+ * take/put already performed, because otherwise there would be a
+ * small window of vulnerability where the thread could receive an
+ * exception and never perform its take or put, and we'd end up with a
+ * deadlock.
+ *
+ * -------------------------------------------------------------------------- */
+
+isEmptyMVarzh_fast
+{
+ /* args: R1 = MVar closure */
+
+ if (GET_INFO(R1) == stg_EMPTY_MVAR_info) {
+ RET_N(1);
+ } else {
+ RET_N(0);
+ }
+}
+
+newMVarzh_fast
+{
+ /* args: none */
+ W_ mvar;
+
+ ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, newMVarzh_fast );
+
+ mvar = Hp - SIZEOF_StgMVar + WDS(1);
+ SET_HDR(mvar,stg_EMPTY_MVAR_info,W_[CCCS]);
+ StgMVar_head(mvar) = stg_END_TSO_QUEUE_closure;
+ StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
+ StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
+ RET_P(mvar);
+}
+
+
+/* If R1 isn't available, pass it on the stack */
+#ifdef REG_R1
+#define PerformTake(tso, value) \
+ W_[StgTSO_sp(tso) + WDS(1)] = value; \
+ W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info;
+#else
+#define PerformTake(tso, value) \
+ W_[StgTSO_sp(tso) + WDS(1)] = value; \
+ W_[StgTSO_sp(tso) + WDS(0)] = stg_ut_1_0_unreg_info;
+#endif
+
+#define PerformPut(tso,lval) \
+ StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3); \
+ lval = W_[StgTSO_sp(tso) - WDS(1)];
+
+takeMVarzh_fast
+{
+ W_ mvar, val, info, tso;
+
+ /* args: R1 = MVar closure */
+ mvar = R1;
+
+#if defined(THREADED_RTS)
+ "ptr" info = foreign "C" lockClosure(mvar "ptr") [];
+#else
+ info = GET_INFO(mvar);
+#endif
+
+ /* If the MVar is empty, put ourselves on its blocking queue,
+ * and wait until we're woken up.
+ */
+ if (info == stg_EMPTY_MVAR_info) {
+ if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
+ StgMVar_head(mvar) = CurrentTSO;
+ } else {
+ StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
+ }
+ StgTSO_link(CurrentTSO) = stg_END_TSO_QUEUE_closure;
+ StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
+ StgTSO_block_info(CurrentTSO) = mvar;
+ StgMVar_tail(mvar) = CurrentTSO;
+
+ jump stg_block_takemvar;
+ }
+
+ /* we got the value... */
+ val = StgMVar_value(mvar);
+
+ if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure)
+ {
+ /* There are putMVar(s) waiting...
+ * wake up the first thread on the queue
+ */
+ ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
+
+ /* actually perform the putMVar for the thread that we just woke up */
+ tso = StgMVar_head(mvar);
+ PerformPut(tso,StgMVar_value(mvar));
+ foreign "C" dirtyTSO(tso "ptr") [];
+
+#if defined(GRAN) || defined(PAR)
+ /* ToDo: check 2nd arg (mvar) is right */
+ "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar),mvar) [];
+ StgMVar_head(mvar) = tso;
+#else
+ "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr",
+ StgMVar_head(mvar) "ptr") [];
+ StgMVar_head(mvar) = tso;
+#endif
+
+ if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
+ StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
+ }
+
+#if defined(THREADED_RTS)
+ foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
+#endif
+ RET_P(val);
+ }
+ else
+ {
+ /* No further putMVars, MVar is now empty */
+ StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
+
+#if defined(THREADED_RTS)
+ foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
+#else
+ SET_INFO(mvar,stg_EMPTY_MVAR_info);
+#endif
+
+ RET_P(val);
+ }
+}
+
+
+tryTakeMVarzh_fast
+{
+ W_ mvar, val, info, tso;
+
+ /* args: R1 = MVar closure */
+
+ mvar = R1;
+
+#if defined(THREADED_RTS)
+ "ptr" info = foreign "C" lockClosure(mvar "ptr") [];
+#else
+ info = GET_INFO(mvar);
+#endif
+
+ if (info == stg_EMPTY_MVAR_info) {
+#if defined(THREADED_RTS)
+ foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
+#endif
+ /* HACK: we need a pointer to pass back,
+ * so we abuse NO_FINALIZER_closure
+ */
+ RET_NP(0, stg_NO_FINALIZER_closure);
+ }
+
+ /* we got the value... */
+ val = StgMVar_value(mvar);
+
+ if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
+
+ /* There are putMVar(s) waiting...
+ * wake up the first thread on the queue
+ */
+ ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
+
+ /* actually perform the putMVar for the thread that we just woke up */
+ tso = StgMVar_head(mvar);
+ PerformPut(tso,StgMVar_value(mvar));
+ foreign "C" dirtyTSO(tso "ptr") [];
+
+#if defined(GRAN) || defined(PAR)
+ /* ToDo: check 2nd arg (mvar) is right */
+ "ptr" tso = foreign "C" unblockOne(StgMVar_head(mvar) "ptr", mvar "ptr") [];
+ StgMVar_head(mvar) = tso;
+#else
+ "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr",
+ StgMVar_head(mvar) "ptr") [];
+ StgMVar_head(mvar) = tso;
+#endif
+
+ if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
+ StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
+ }
+#if defined(THREADED_RTS)
+ foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
+#endif
+ }
+ else
+ {
+ /* No further putMVars, MVar is now empty */
+ StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure;
+#if defined(THREADED_RTS)
+ foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
+#else
+ SET_INFO(mvar,stg_EMPTY_MVAR_info);
+#endif
+ }
+
+ RET_NP(1, val);
+}
+
+
+putMVarzh_fast
+{
+ W_ mvar, info, tso;
+
+ /* args: R1 = MVar, R2 = value */
+ mvar = R1;
+
+#if defined(THREADED_RTS)
+ "ptr" info = foreign "C" lockClosure(mvar "ptr") [R2];
+#else
+ info = GET_INFO(mvar);
+#endif
+
+ if (info == stg_FULL_MVAR_info) {
+ if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
+ StgMVar_head(mvar) = CurrentTSO;
+ } else {
+ StgTSO_link(StgMVar_tail(mvar)) = CurrentTSO;
+ }
+ StgTSO_link(CurrentTSO) = stg_END_TSO_QUEUE_closure;
+ StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16;
+ StgTSO_block_info(CurrentTSO) = mvar;
+ StgMVar_tail(mvar) = CurrentTSO;
+
+ jump stg_block_putmvar;
+ }
+
+ if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
+
+ /* There are takeMVar(s) waiting: wake up the first one
+ */
+ ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
+
+ /* actually perform the takeMVar */
+ tso = StgMVar_head(mvar);
+ PerformTake(tso, R2);
+ foreign "C" dirtyTSO(tso "ptr") [];
+
+#if defined(GRAN) || defined(PAR)
+ /* ToDo: check 2nd arg (mvar) is right */
+ "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
+ StgMVar_head(mvar) = tso;
+#else
+ "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
+ StgMVar_head(mvar) = tso;
+#endif
+
+ if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
+ StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
+ }
+
+#if defined(THREADED_RTS)
+ foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
+#endif
+ jump %ENTRY_CODE(Sp(0));
+ }
+ else
+ {
+ /* No further takes, the MVar is now full. */
+ StgMVar_value(mvar) = R2;
+
+#if defined(THREADED_RTS)
+ foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
+#else
+ SET_INFO(mvar,stg_FULL_MVAR_info);
+#endif
+ jump %ENTRY_CODE(Sp(0));
+ }
+
+ /* ToDo: yield afterward for better communication performance? */
+}
+
+
+tryPutMVarzh_fast
+{
+ W_ mvar, info, tso;
+
+ /* args: R1 = MVar, R2 = value */
+ mvar = R1;
+
+#if defined(THREADED_RTS)
+ "ptr" info = foreign "C" lockClosure(mvar "ptr") [R2];
+#else
+ info = GET_INFO(mvar);
+#endif
+
+ if (info == stg_FULL_MVAR_info) {
+#if defined(THREADED_RTS)
+ foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
+#endif
+ RET_N(0);
+ }
+
+ if (StgMVar_head(mvar) != stg_END_TSO_QUEUE_closure) {
+
+ /* There are takeMVar(s) waiting: wake up the first one
+ */
+ ASSERT(StgTSO_why_blocked(StgMVar_head(mvar)) == BlockedOnMVar::I16);
+
+ /* actually perform the takeMVar */
+ tso = StgMVar_head(mvar);
+ PerformTake(tso, R2);
+ foreign "C" dirtyTSO(tso "ptr") [];
+
+#if defined(GRAN) || defined(PAR)
+ /* ToDo: check 2nd arg (mvar) is right */
+ "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr",mvar "ptr") [];
+ StgMVar_head(mvar) = tso;
+#else
+ "ptr" tso = foreign "C" unblockOne(MyCapability() "ptr", StgMVar_head(mvar) "ptr") [];
+ StgMVar_head(mvar) = tso;
+#endif
+
+ if (StgMVar_head(mvar) == stg_END_TSO_QUEUE_closure) {
+ StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure;
+ }
+
+#if defined(THREADED_RTS)
+ foreign "C" unlockClosure(mvar "ptr", stg_EMPTY_MVAR_info) [];
+#endif
+ }
+ else
+ {
+ /* No further takes, the MVar is now full. */
+ StgMVar_value(mvar) = R2;
+
+#if defined(THREADED_RTS)
+ foreign "C" unlockClosure(mvar "ptr", stg_FULL_MVAR_info) [];
+#else
+ SET_INFO(mvar,stg_FULL_MVAR_info);
+#endif
+ }
+
+ RET_N(1);
+ /* ToDo: yield afterward for better communication performance? */
+}
+
+
+/* -----------------------------------------------------------------------------
+ Stable pointer primitives
+ ------------------------------------------------------------------------- */
+
+makeStableNamezh_fast
+{
+ W_ index, sn_obj;
+
+ ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, makeStableNamezh_fast );
+
+ index = foreign "C" lookupStableName(R1 "ptr") [];
+
+ /* Is there already a StableName for this heap object?
+ * stable_ptr_table is a pointer to an array of snEntry structs.
+ */
+ if ( snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) == NULL ) {
+ sn_obj = Hp - SIZEOF_StgStableName + WDS(1);
+ SET_HDR(sn_obj, stg_STABLE_NAME_info, W_[CCCS]);
+ StgStableName_sn(sn_obj) = index;
+ snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry) = sn_obj;
+ } else {
+ sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry);
+ }
+
+ RET_P(sn_obj);
+}
+
+
+makeStablePtrzh_fast
+{
+ /* Args: R1 = a */
+ W_ sp;
+ MAYBE_GC(R1_PTR, makeStablePtrzh_fast);
+ "ptr" sp = foreign "C" getStablePtr(R1 "ptr") [];
+ RET_N(sp);
+}
+
+deRefStablePtrzh_fast
+{
+ /* Args: R1 = the stable ptr */
+ W_ r, sp;
+ sp = R1;
+ r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry);
+ RET_P(r);
+}
+
+/* -----------------------------------------------------------------------------
+ Bytecode object primitives
+ ------------------------------------------------------------------------- */
+
+newBCOzh_fast
+{
+ /* R1 = instrs
+ R2 = literals
+ R3 = ptrs
+ R4 = itbls
+ R5 = arity
+ R6 = bitmap array
+ */
+ W_ bco, bitmap_arr, bytes, words;
+
+ bitmap_arr = R6;
+ words = BYTES_TO_WDS(SIZEOF_StgBCO) + StgArrWords_words(bitmap_arr);
+ bytes = WDS(words);
+
+ ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R4_PTR&R6_PTR, newBCOzh_fast );
+
+ bco = Hp - bytes + WDS(1);
+ SET_HDR(bco, stg_BCO_info, W_[CCCS]);
+
+ StgBCO_instrs(bco) = R1;
+ StgBCO_literals(bco) = R2;
+ StgBCO_ptrs(bco) = R3;
+ StgBCO_itbls(bco) = R4;
+ StgBCO_arity(bco) = HALF_W_(R5);
+ StgBCO_size(bco) = HALF_W_(words);
+
+ // Copy the arity/bitmap info into the BCO
+ W_ i;
+ i = 0;
+for:
+ if (i < StgArrWords_words(bitmap_arr)) {
+ StgBCO_bitmap(bco,i) = StgArrWords_payload(bitmap_arr,i);
+ i = i + 1;
+ goto for;
+ }
+
+ RET_P(bco);
+}
+
+
+mkApUpd0zh_fast
+{
+ // R1 = the BCO# for the AP
+ //
+ W_ ap;
+
+ // This function is *only* used to wrap zero-arity BCOs in an
+ // updatable wrapper (see ByteCodeLink.lhs). An AP thunk is always
+ // saturated and always points directly to a FUN or BCO.
+ ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) &&
+ StgBCO_arity(R1) == HALF_W_(0));
+
+ HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, mkApUpd0zh_fast);
+ TICK_ALLOC_UP_THK(0, 0);
+ CCCS_ALLOC(SIZEOF_StgAP);
+
+ ap = Hp - SIZEOF_StgAP + WDS(1);
+ SET_HDR(ap, stg_AP_info, W_[CCCS]);
+
+ StgAP_n_args(ap) = HALF_W_(0);
+ StgAP_fun(ap) = R1;
+
+ RET_P(ap);
+}
+
+/* -----------------------------------------------------------------------------
+ Thread I/O blocking primitives
+ -------------------------------------------------------------------------- */
+
+/* Add a thread to the end of the blocked queue. (C-- version of the C
+ * macro in Schedule.h).
+ */
+#define APPEND_TO_BLOCKED_QUEUE(tso) \
+ ASSERT(StgTSO_link(tso) == END_TSO_QUEUE); \
+ if (W_[blocked_queue_hd] == END_TSO_QUEUE) { \
+ W_[blocked_queue_hd] = tso; \
+ } else { \
+ StgTSO_link(W_[blocked_queue_tl]) = tso; \
+ } \
+ W_[blocked_queue_tl] = tso;
+
+waitReadzh_fast
+{
+ /* args: R1 */
+#ifdef THREADED_RTS
+ foreign "C" barf("waitRead# on threaded RTS");
+#else
+
+ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
+ StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
+ StgTSO_block_info(CurrentTSO) = R1;
+ // No locking - we're not going to use this interface in the
+ // threaded RTS anyway.
+ APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
+ jump stg_block_noregs;
+#endif
+}
+
+waitWritezh_fast
+{
+ /* args: R1 */
+#ifdef THREADED_RTS
+ foreign "C" barf("waitWrite# on threaded RTS");
+#else
+
+ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
+ StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
+ StgTSO_block_info(CurrentTSO) = R1;
+ // No locking - we're not going to use this interface in the
+ // threaded RTS anyway.
+ APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
+ jump stg_block_noregs;
+#endif
+}
+
+
+STRING(stg_delayzh_malloc_str, "delayzh_fast")
+delayzh_fast
+{
+#ifdef mingw32_HOST_OS
+ W_ ares;
+ CInt reqID;
+#else
+ W_ t, prev, target;
+#endif
+
+#ifdef THREADED_RTS
+ foreign "C" barf("delay# on threaded RTS");
+#else
+
+ /* args: R1 (microsecond delay amount) */
+ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
+ StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16;
+
+#ifdef mingw32_HOST_OS
+
+ /* could probably allocate this on the heap instead */
+ "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
+ stg_delayzh_malloc_str);
+ reqID = foreign "C" addDelayRequest(R1);
+ StgAsyncIOResult_reqID(ares) = reqID;
+ StgAsyncIOResult_len(ares) = 0;
+ StgAsyncIOResult_errCode(ares) = 0;
+ StgTSO_block_info(CurrentTSO) = ares;
+
+ /* Having all async-blocked threads reside on the blocked_queue
+ * simplifies matters, so change the status to OnDoProc put the
+ * delayed thread on the blocked_queue.
+ */
+ StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
+ APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
+ jump stg_block_async_void;
+
+#else
+
+ W_ time;
+ time = foreign "C" getourtimeofday();
+ target = (R1 / (TICK_MILLISECS*1000)) + time;
+ StgTSO_block_info(CurrentTSO) = target;
+
+ /* Insert the new thread in the sleeping queue. */
+ prev = NULL;
+ t = W_[sleeping_queue];
+while:
+ if (t != END_TSO_QUEUE && StgTSO_block_info(t) < target) {
+ prev = t;
+ t = StgTSO_link(t);
+ goto while;
+ }
+
+ StgTSO_link(CurrentTSO) = t;
+ if (prev == NULL) {
+ W_[sleeping_queue] = CurrentTSO;
+ } else {
+ StgTSO_link(prev) = CurrentTSO;
+ }
+ jump stg_block_noregs;
+#endif
+#endif /* !THREADED_RTS */
+}
+
+
+#ifdef mingw32_HOST_OS
+STRING(stg_asyncReadzh_malloc_str, "asyncReadzh_fast")
+asyncReadzh_fast
+{
+ W_ ares;
+ CInt reqID;
+
+#ifdef THREADED_RTS
+ foreign "C" barf("asyncRead# on threaded RTS");
+#else
+
+ /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
+ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
+ StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16;
+
+ /* could probably allocate this on the heap instead */
+ "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
+ stg_asyncReadzh_malloc_str)
+ [R1,R2,R3,R4];
+ reqID = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") [];
+ StgAsyncIOResult_reqID(ares) = reqID;
+ StgAsyncIOResult_len(ares) = 0;
+ StgAsyncIOResult_errCode(ares) = 0;
+ StgTSO_block_info(CurrentTSO) = ares;
+ APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
+ jump stg_block_async;
+#endif
+}
+
+STRING(stg_asyncWritezh_malloc_str, "asyncWritezh_fast")
+asyncWritezh_fast
+{
+ W_ ares;
+ CInt reqID;
+
+#ifdef THREADED_RTS
+ foreign "C" barf("asyncWrite# on threaded RTS");
+#else
+
+ /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */
+ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
+ StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16;
+
+ "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
+ stg_asyncWritezh_malloc_str)
+ [R1,R2,R3,R4];
+ reqID = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") [];
+
+ StgAsyncIOResult_reqID(ares) = reqID;
+ StgAsyncIOResult_len(ares) = 0;
+ StgAsyncIOResult_errCode(ares) = 0;
+ StgTSO_block_info(CurrentTSO) = ares;
+ APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
+ jump stg_block_async;
+#endif
+}
+
+STRING(stg_asyncDoProczh_malloc_str, "asyncDoProczh_fast")
+asyncDoProczh_fast
+{
+ W_ ares;
+ CInt reqID;
+
+#ifdef THREADED_RTS
+ foreign "C" barf("asyncDoProc# on threaded RTS");
+#else
+
+ /* args: R1 = proc, R2 = param */
+ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16);
+ StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16;
+
+ /* could probably allocate this on the heap instead */
+ "ptr" ares = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult,
+ stg_asyncDoProczh_malloc_str)
+ [R1,R2];
+ reqID = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") [];
+ StgAsyncIOResult_reqID(ares) = reqID;
+ StgAsyncIOResult_len(ares) = 0;
+ StgAsyncIOResult_errCode(ares) = 0;
+ StgTSO_block_info(CurrentTSO) = ares;
+ APPEND_TO_BLOCKED_QUEUE(CurrentTSO);
+ jump stg_block_async;
+#endif
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ ** temporary **
+
+ classes CCallable and CReturnable don't really exist, but the
+ compiler insists on generating dictionaries containing references
+ to GHC_ZcCCallable_static_info etc., so we provide dummy symbols
+ for these. Some C compilers can't cope with zero-length static arrays,
+ so we have to make these one element long.
+ --------------------------------------------------------------------------- */
+
+section "rodata" {
+ GHC_ZCCCallable_static_info: W_ 0;
+}
+
+section "rodata" {
+ GHC_ZCCReturnable_static_info: W_ 0;
+}
diff --git a/rts/Printer.c b/rts/Printer.c
new file mode 100644
index 0000000000..8290d220a0
--- /dev/null
+++ b/rts/Printer.c
@@ -0,0 +1,1127 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1994-2000.
+ *
+ * Heap printer
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "Printer.h"
+#include "RtsUtils.h"
+
+#ifdef DEBUG
+
+#include "RtsFlags.h"
+#include "MBlock.h"
+#include "Storage.h"
+#include "Bytecodes.h" /* for InstrPtr */
+#include "Disassembler.h"
+#include "Apply.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(GRAN) || defined(PAR)
+// HWL: explicit fixed header size to make debugging easier
+int fixed_hs = sizeof(StgHeader), itbl_sz = sizeofW(StgInfoTable),
+ uf_sz=sizeofW(StgUpdateFrame);
+#endif
+
+/* --------------------------------------------------------------------------
+ * local function decls
+ * ------------------------------------------------------------------------*/
+
+static void printStdObjPayload( StgClosure *obj );
+#ifdef USING_LIBBFD
+static void reset_table ( int size );
+static void prepare_table ( void );
+static void insert ( unsigned value, const char *name );
+#endif
+#if 0 /* unused but might be useful sometime */
+static rtsBool lookup_name ( char *name, unsigned *result );
+static void enZcode ( char *in, char *out );
+#endif
+static char unZcode ( char ch );
+const char * lookupGHCName ( void *addr );
+static void printZcoded ( const char *raw );
+
+/* --------------------------------------------------------------------------
+ * Printer
+ * ------------------------------------------------------------------------*/
+
+void printPtr( StgPtr p )
+{
+ const char *raw;
+ raw = lookupGHCName(p);
+ if (raw != NULL) {
+ printZcoded(raw);
+ } else {
+ debugBelch("%p", p);
+ }
+}
+
+void printObj( StgClosure *obj )
+{
+ debugBelch("Object "); printPtr((StgPtr)obj); debugBelch(" = ");
+ printClosure(obj);
+}
+
+STATIC_INLINE void
+printStdObjHdr( StgClosure *obj, char* tag )
+{
+ debugBelch("%s(",tag);
+ printPtr((StgPtr)obj->header.info);
+#ifdef PROFILING
+ debugBelch(", %s", obj->header.prof.ccs->cc->label);
+#endif
+}
+
+static void
+printStdObjPayload( StgClosure *obj )
+{
+ StgWord i, j;
+ const StgInfoTable* info;
+
+ info = get_itbl(obj);
+ for (i = 0; i < info->layout.payload.ptrs; ++i) {
+ debugBelch(", ");
+ printPtr((StgPtr)obj->payload[i]);
+ }
+ for (j = 0; j < info->layout.payload.nptrs; ++j) {
+ debugBelch(", %pd#",obj->payload[i+j]);
+ }
+ debugBelch(")\n");
+}
+
+static void
+printThunkPayload( StgThunk *obj )
+{
+ StgWord i, j;
+ const StgInfoTable* info;
+
+ info = get_itbl(obj);
+ for (i = 0; i < info->layout.payload.ptrs; ++i) {
+ debugBelch(", ");
+ printPtr((StgPtr)obj->payload[i]);
+ }
+ for (j = 0; j < info->layout.payload.nptrs; ++j) {
+ debugBelch(", %pd#",obj->payload[i+j]);
+ }
+ debugBelch(")\n");
+}
+
+static void
+printThunkObject( StgThunk *obj, char* tag )
+{
+ printStdObjHdr( (StgClosure *)obj, tag );
+ printThunkPayload( obj );
+}
+
+void
+printClosure( StgClosure *obj )
+{
+ StgInfoTable *info;
+
+ info = get_itbl(obj);
+
+ switch ( info->type ) {
+ case INVALID_OBJECT:
+ barf("Invalid object");
+
+ case CONSTR:
+ case CONSTR_1_0: case CONSTR_0_1:
+ case CONSTR_1_1: case CONSTR_0_2: case CONSTR_2_0:
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_STATIC:
+ case CONSTR_NOCAF_STATIC:
+ {
+ StgWord i, j;
+#ifdef PROFILING
+ debugBelch("%s(", info->prof.closure_desc);
+ debugBelch("%s", obj->header.prof.ccs->cc->label);
+#else
+ debugBelch("CONSTR(");
+ printPtr((StgPtr)obj->header.info);
+ debugBelch("(tag=%d)",info->srt_bitmap);
+#endif
+ for (i = 0; i < info->layout.payload.ptrs; ++i) {
+ debugBelch(", ");
+ printPtr((StgPtr)obj->payload[i]);
+ }
+ for (j = 0; j < info->layout.payload.nptrs; ++j) {
+ debugBelch(", %p#", obj->payload[i+j]);
+ }
+ debugBelch(")\n");
+ break;
+ }
+
+ case FUN:
+ case FUN_1_0: case FUN_0_1:
+ case FUN_1_1: case FUN_0_2: case FUN_2_0:
+ case FUN_STATIC:
+ debugBelch("FUN/%d(",itbl_to_fun_itbl(info)->f.arity);
+ printPtr((StgPtr)obj->header.info);
+#ifdef PROFILING
+ debugBelch(", %s", obj->header.prof.ccs->cc->label);
+#endif
+ printStdObjPayload(obj);
+ break;
+
+ case THUNK:
+ case THUNK_1_0: case THUNK_0_1:
+ case THUNK_1_1: case THUNK_0_2: case THUNK_2_0:
+ case THUNK_STATIC:
+ /* ToDo: will this work for THUNK_STATIC too? */
+#ifdef PROFILING
+ printThunkObject((StgThunk *)obj,info->prof.closure_desc);
+#else
+ printThunkObject((StgThunk *)obj,"THUNK");
+#endif
+ break;
+
+ case THUNK_SELECTOR:
+ printStdObjHdr(obj, "THUNK_SELECTOR");
+ debugBelch(", %p)\n", ((StgSelector *)obj)->selectee);
+ break;
+
+ case BCO:
+ disassemble( (StgBCO*)obj );
+ break;
+
+ case AP:
+ {
+ StgAP* ap = stgCast(StgAP*,obj);
+ StgWord i;
+ debugBelch("AP("); printPtr((StgPtr)ap->fun);
+ for (i = 0; i < ap->n_args; ++i) {
+ debugBelch(", ");
+ printPtr((P_)ap->payload[i]);
+ }
+ debugBelch(")\n");
+ break;
+ }
+
+ case PAP:
+ {
+ StgPAP* pap = stgCast(StgPAP*,obj);
+ StgWord i;
+ debugBelch("PAP/%d(",pap->arity);
+ printPtr((StgPtr)pap->fun);
+ for (i = 0; i < pap->n_args; ++i) {
+ debugBelch(", ");
+ printPtr((StgPtr)pap->payload[i]);
+ }
+ debugBelch(")\n");
+ break;
+ }
+
+ case AP_STACK:
+ {
+ StgAP_STACK* ap = stgCast(StgAP_STACK*,obj);
+ StgWord i;
+ debugBelch("AP_STACK("); printPtr((StgPtr)ap->fun);
+ for (i = 0; i < ap->size; ++i) {
+ debugBelch(", ");
+ printPtr((P_)ap->payload[i]);
+ }
+ debugBelch(")\n");
+ break;
+ }
+
+ case IND:
+ debugBelch("IND(");
+ printPtr((StgPtr)stgCast(StgInd*,obj)->indirectee);
+ debugBelch(")\n");
+ break;
+
+ case IND_OLDGEN:
+ debugBelch("IND_OLDGEN(");
+ printPtr((StgPtr)stgCast(StgInd*,obj)->indirectee);
+ debugBelch(")\n");
+ break;
+
+ case IND_PERM:
+ debugBelch("IND(");
+ printPtr((StgPtr)stgCast(StgInd*,obj)->indirectee);
+ debugBelch(")\n");
+ break;
+
+ case IND_OLDGEN_PERM:
+ debugBelch("IND_OLDGEN_PERM(");
+ printPtr((StgPtr)stgCast(StgInd*,obj)->indirectee);
+ debugBelch(")\n");
+ break;
+
+ case IND_STATIC:
+ debugBelch("IND_STATIC(");
+ printPtr((StgPtr)stgCast(StgInd*,obj)->indirectee);
+ debugBelch(")\n");
+ break;
+
+ /* Cannot happen -- use default case.
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ case RET_DYN:
+ case RET_FUN:
+ */
+
+ case UPDATE_FRAME:
+ {
+ StgUpdateFrame* u = stgCast(StgUpdateFrame*,obj);
+ debugBelch("UPDATE_FRAME(");
+ printPtr((StgPtr)GET_INFO(u));
+ debugBelch(",");
+ printPtr((StgPtr)u->updatee);
+ debugBelch(")\n");
+ break;
+ }
+
+ case CATCH_FRAME:
+ {
+ StgCatchFrame* u = stgCast(StgCatchFrame*,obj);
+ debugBelch("CATCH_FRAME(");
+ printPtr((StgPtr)GET_INFO(u));
+ debugBelch(",");
+ printPtr((StgPtr)u->handler);
+ debugBelch(")\n");
+ break;
+ }
+
+ case STOP_FRAME:
+ {
+ StgStopFrame* u = stgCast(StgStopFrame*,obj);
+ debugBelch("STOP_FRAME(");
+ printPtr((StgPtr)GET_INFO(u));
+ debugBelch(")\n");
+ break;
+ }
+
+ case CAF_BLACKHOLE:
+ debugBelch("CAF_BH");
+ break;
+
+ case BLACKHOLE:
+ debugBelch("BH\n");
+ break;
+
+ case SE_BLACKHOLE:
+ debugBelch("SE_BH\n");
+ break;
+
+ case SE_CAF_BLACKHOLE:
+ debugBelch("SE_CAF_BH\n");
+ break;
+
+ case ARR_WORDS:
+ {
+ StgWord i;
+ debugBelch("ARR_WORDS(\"");
+ /* ToDo: we can't safely assume that this is a string!
+ for (i = 0; arrWordsGetChar(obj,i); ++i) {
+ putchar(arrWordsGetChar(obj,i));
+ } */
+ for (i=0; i<((StgArrWords *)obj)->words; i++)
+ debugBelch("%lu", (lnat)((StgArrWords *)obj)->payload[i]);
+ debugBelch("\")\n");
+ break;
+ }
+
+ case MUT_ARR_PTRS_CLEAN:
+ debugBelch("MUT_ARR_PTRS_CLEAN(size=%lu)\n", (lnat)((StgMutArrPtrs *)obj)->ptrs);
+ break;
+
+ case MUT_ARR_PTRS_DIRTY:
+ debugBelch("MUT_ARR_PTRS_DIRTY(size=%lu)\n", (lnat)((StgMutArrPtrs *)obj)->ptrs);
+ break;
+
+ case MUT_ARR_PTRS_FROZEN:
+ debugBelch("MUT_ARR_PTRS_FROZEN(size=%lu)\n", (lnat)((StgMutArrPtrs *)obj)->ptrs);
+ break;
+
+ case MVAR:
+ {
+ StgMVar* mv = (StgMVar*)obj;
+ debugBelch("MVAR(head=%p, tail=%p, value=%p)\n", mv->head, mv->tail, mv->value);
+ break;
+ }
+
+ case MUT_VAR_CLEAN:
+ {
+ StgMutVar* mv = (StgMutVar*)obj;
+ debugBelch("MUT_VAR_CLEAN(var=%p)\n", mv->var);
+ break;
+ }
+
+ case MUT_VAR_DIRTY:
+ {
+ StgMutVar* mv = (StgMutVar*)obj;
+ debugBelch("MUT_VAR_DIRTY(var=%p)\n", mv->var);
+ break;
+ }
+
+ case WEAK:
+ debugBelch("WEAK(");
+ debugBelch(" key=%p value=%p finalizer=%p",
+ (StgPtr)(((StgWeak*)obj)->key),
+ (StgPtr)(((StgWeak*)obj)->value),
+ (StgPtr)(((StgWeak*)obj)->finalizer));
+ debugBelch(")\n");
+ /* ToDo: chase 'link' ? */
+ break;
+
+ case STABLE_NAME:
+ debugBelch("STABLE_NAME(%lu)\n", (lnat)((StgStableName*)obj)->sn);
+ break;
+
+ case TSO:
+ debugBelch("TSO(");
+ debugBelch("%d (%p)",((StgTSO*)obj)->id, (StgTSO*)obj);
+ debugBelch(")\n");
+ break;
+
+#if defined(PAR)
+ case BLOCKED_FETCH:
+ debugBelch("BLOCKED_FETCH(");
+ printGA(&(stgCast(StgBlockedFetch*,obj)->ga));
+ printPtr((StgPtr)(stgCast(StgBlockedFetch*,obj)->node));
+ debugBelch(")\n");
+ break;
+
+ case FETCH_ME:
+ debugBelch("FETCH_ME(");
+ printGA((globalAddr *)stgCast(StgFetchMe*,obj)->ga);
+ debugBelch(")\n");
+ break;
+
+ case FETCH_ME_BQ:
+ debugBelch("FETCH_ME_BQ(");
+ // printGA((globalAddr *)stgCast(StgFetchMe*,obj)->ga);
+ printPtr((StgPtr)stgCast(StgFetchMeBlockingQueue*,obj)->blocking_queue);
+ debugBelch(")\n");
+ break;
+#endif
+
+#if defined(GRAN) || defined(PAR)
+ case RBH:
+ debugBelch("RBH(");
+ printPtr((StgPtr)stgCast(StgRBH*,obj)->blocking_queue);
+ debugBelch(")\n");
+ break;
+
+#endif
+
+#if 0
+ /* Symptomatic of a problem elsewhere, have it fall-through & fail */
+ case EVACUATED:
+ debugBelch("EVACUATED(");
+ printClosure((StgEvacuated*)obj->evacuee);
+ debugBelch(")\n");
+ break;
+#endif
+
+#if defined(PAR) && defined(DIST)
+ case REMOTE_REF:
+ debugBelch("REMOTE_REF(");
+ printGA((globalAddr *)stgCast(StgFetchMe*,obj)->ga);
+ debugBelch(")\n");
+ break;
+#endif
+
+ default:
+ //barf("printClosure %d",get_itbl(obj)->type);
+ debugBelch("*** printClosure: unknown type %d ****\n",
+ get_itbl(obj)->type );
+ barf("printClosure %d",get_itbl(obj)->type);
+ return;
+ }
+}
+
+/*
+void printGraph( StgClosure *obj )
+{
+ printClosure(obj);
+}
+*/
+
+StgPtr
+printStackObj( StgPtr sp )
+{
+ /*debugBelch("Stack[%d] = ", &stgStack[STACK_SIZE] - sp); */
+
+ StgClosure* c = (StgClosure*)(*sp);
+ printPtr((StgPtr)*sp);
+ if (c == (StgClosure*)&stg_ctoi_R1p_info) {
+ debugBelch("\t\t\tstg_ctoi_ret_R1p_info\n" );
+ } else
+ if (c == (StgClosure*)&stg_ctoi_R1n_info) {
+ debugBelch("\t\t\tstg_ctoi_ret_R1n_info\n" );
+ } else
+ if (c == (StgClosure*)&stg_ctoi_F1_info) {
+ debugBelch("\t\t\tstg_ctoi_ret_F1_info\n" );
+ } else
+ if (c == (StgClosure*)&stg_ctoi_D1_info) {
+ debugBelch("\t\t\tstg_ctoi_ret_D1_info\n" );
+ } else
+ if (c == (StgClosure*)&stg_ctoi_V_info) {
+ debugBelch("\t\t\tstg_ctoi_ret_V_info\n" );
+ } else
+ if (get_itbl(c)->type == BCO) {
+ debugBelch("\t\t\t");
+ debugBelch("BCO(...)\n");
+ }
+ else {
+ debugBelch("\t\t\t");
+ printClosure ( (StgClosure*)(*sp));
+ }
+ sp += 1;
+
+ return sp;
+
+}
+
+static void
+printSmallBitmap( StgPtr spBottom, StgPtr payload, StgWord bitmap, nat size )
+{
+ StgPtr p;
+ nat i;
+
+ p = payload;
+ for(i = 0; i < size; i++, bitmap >>= 1 ) {
+ debugBelch(" stk[%ld] (%p) = ", (long)(spBottom-(payload+i)), payload+i);
+ if ((bitmap & 1) == 0) {
+ printPtr((P_)payload[i]);
+ debugBelch("\n");
+ } else {
+ debugBelch("Word# %lu\n", (lnat)payload[i]);
+ }
+ }
+}
+
+static void
+printLargeBitmap( StgPtr spBottom, StgPtr payload, StgLargeBitmap* large_bitmap, nat size )
+{
+ StgWord bmp;
+ nat i, j;
+
+ i = 0;
+ for (bmp=0; i < size; bmp++) {
+ StgWord bitmap = large_bitmap->bitmap[bmp];
+ j = 0;
+ for(; i < size && j < BITS_IN(W_); j++, i++, bitmap >>= 1 ) {
+ debugBelch(" stk[%lu] (%p) = ", (lnat)(spBottom-(payload+i)), payload+i);
+ if ((bitmap & 1) == 0) {
+ printPtr((P_)payload[i]);
+ debugBelch("\n");
+ } else {
+ debugBelch("Word# %lu\n", (lnat)payload[i]);
+ }
+ }
+ }
+}
+
+void
+printStackChunk( StgPtr sp, StgPtr spBottom )
+{
+ StgWord bitmap;
+ const StgInfoTable *info;
+
+ ASSERT(sp <= spBottom);
+ for (; sp < spBottom; sp += stack_frame_sizeW((StgClosure *)sp)) {
+
+ info = get_itbl((StgClosure *)sp);
+
+ switch (info->type) {
+
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ printObj((StgClosure*)sp);
+ continue;
+
+ case STOP_FRAME:
+ printObj((StgClosure*)sp);
+ return;
+
+ case RET_DYN:
+ {
+ StgRetDyn* r;
+ StgPtr p;
+ StgWord dyn;
+ nat size;
+
+ r = (StgRetDyn *)sp;
+ dyn = r->liveness;
+ debugBelch("RET_DYN (%p)\n", r);
+
+ p = (P_)(r->payload);
+ printSmallBitmap(spBottom, sp,
+ RET_DYN_LIVENESS(r->liveness),
+ RET_DYN_BITMAP_SIZE);
+ p += RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE;
+
+ for (size = RET_DYN_NONPTRS(dyn); size > 0; size--) {
+ debugBelch(" stk[%ld] (%p) = ", (long)(spBottom-p), p);
+ debugBelch("Word# %ld\n", (long)*p);
+ p++;
+ }
+
+ for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
+ debugBelch(" stk[%ld] (%p) = ", (long)(spBottom-p), p);
+ printPtr(p);
+ p++;
+ }
+ continue;
+ }
+
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ debugBelch("RET_SMALL (%p)\n", info);
+ bitmap = info->layout.bitmap;
+ printSmallBitmap(spBottom, sp+1,
+ BITMAP_BITS(bitmap), BITMAP_SIZE(bitmap));
+ continue;
+
+ case RET_BCO: {
+ StgBCO *bco;
+
+ bco = ((StgBCO *)sp[1]);
+
+ debugBelch("RET_BCO (%p)\n", sp);
+ printLargeBitmap(spBottom, sp+2,
+ BCO_BITMAP(bco), BCO_BITMAP_SIZE(bco));
+ continue;
+ }
+
+ case RET_BIG:
+ case RET_VEC_BIG:
+ barf("todo");
+
+ case RET_FUN:
+ {
+ StgFunInfoTable *fun_info;
+ StgRetFun *ret_fun;
+ nat size;
+
+ ret_fun = (StgRetFun *)sp;
+ fun_info = get_fun_itbl(ret_fun->fun);
+ size = ret_fun->size;
+ debugBelch("RET_FUN (%p) (type=%d)\n", ret_fun->fun, fun_info->f.fun_type);
+ switch (fun_info->f.fun_type) {
+ case ARG_GEN:
+ printSmallBitmap(spBottom, sp+2,
+ BITMAP_BITS(fun_info->f.b.bitmap),
+ BITMAP_SIZE(fun_info->f.b.bitmap));
+ break;
+ case ARG_GEN_BIG:
+ printLargeBitmap(spBottom, sp+2,
+ GET_FUN_LARGE_BITMAP(fun_info),
+ GET_FUN_LARGE_BITMAP(fun_info)->size);
+ break;
+ default:
+ printSmallBitmap(spBottom, sp+2,
+ BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]),
+ BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]));
+ break;
+ }
+ continue;
+ }
+
+ default:
+ debugBelch("unknown object %d\n", info->type);
+ barf("printStackChunk");
+ }
+ }
+}
+
+void printTSO( StgTSO *tso )
+{
+ printStackChunk( tso->sp, tso->stack+tso->stack_size);
+}
+
+/* -----------------------------------------------------------------------------
+ Closure types
+
+ NOTE: must be kept in sync with the closure types in includes/ClosureTypes.h
+ -------------------------------------------------------------------------- */
+
+static char *closure_type_names[] = {
+ "INVALID_OBJECT",
+ "CONSTR",
+ "CONSTR_1",
+ "CONSTR_0",
+ "CONSTR_2",
+ "CONSTR_1",
+ "CONSTR_0",
+ "CONSTR_INTLIKE",
+ "CONSTR_CHARLIKE",
+ "CONSTR_STATIC",
+ "CONSTR_NOCAF_STATIC",
+ "FUN",
+ "FUN_1_0",
+ "FUN_0_1",
+ "FUN_2_0",
+ "FUN_1_1",
+ "FUN_0",
+ "FUN_STATIC",
+ "THUNK",
+ "THUNK_1_0",
+ "THUNK_0_1",
+ "THUNK_2_0",
+ "THUNK_1_1",
+ "THUNK_0",
+ "THUNK_STATIC",
+ "THUNK_SELECTOR",
+ "BCO",
+ "AP_UPD",
+ "PAP",
+ "AP_STACK",
+ "IND",
+ "IND_OLDGEN",
+ "IND_PERM",
+ "IND_OLDGEN_PERM",
+ "IND_STATIC",
+ "RET_BCO",
+ "RET_SMALL",
+ "RET_VEC_SMALL",
+ "RET_BIG",
+ "RET_VEC_BIG",
+ "RET_DYN",
+ "RET_FUN",
+ "UPDATE_FRAME",
+ "CATCH_FRAME",
+ "STOP_FRAME",
+ "CAF_BLACKHOLE",
+ "BLACKHOLE",
+ "BLACKHOLE_BQ",
+ "SE_BLACKHOLE",
+ "SE_CAF_BLACKHOLE",
+ "MVAR",
+ "ARR_WORDS",
+ "MUT_ARR_PTRS_CLEAN",
+ "MUT_ARR_PTRS_DIRTY",
+ "MUT_ARR_PTRS_FROZEN",
+ "MUT_VAR_CLEAN",
+ "MUT_VAR_DIRTY",
+ "MUT_CONS",
+ "WEAK",
+ "FOREIGN",
+ "STABLE_NAME",
+ "TSO",
+ "BLOCKED_FETCH",
+ "FETCH_ME",
+ "FETCH_ME_BQ",
+ "RBH",
+ "EVACUATED",
+ "REMOTE_REF",
+ "TVAR_WAIT_QUEUE",
+ "TVAR",
+ "TREC_CHUNK",
+ "TREC_HEADER",
+ "ATOMICALLY_FRAME",
+ "CATCH_RETRY_FRAME"
+};
+
+
+char *
+info_type(StgClosure *closure){
+ return closure_type_names[get_itbl(closure)->type];
+}
+
+char *
+info_type_by_ip(StgInfoTable *ip){
+ return closure_type_names[ip->type];
+}
+
+void
+info_hdr_type(StgClosure *closure, char *res){
+ strcpy(res,closure_type_names[get_itbl(closure)->type]);
+}
+
+/* --------------------------------------------------------------------------
+ * Address printing code
+ *
+ * Uses symbol table in (unstripped executable)
+ * ------------------------------------------------------------------------*/
+
+/* --------------------------------------------------------------------------
+ * Simple lookup table
+ *
+ * Current implementation is pretty dumb!
+ * ------------------------------------------------------------------------*/
+
+struct entry {
+ nat value;
+ const char *name;
+};
+
+static nat table_size;
+static struct entry* table;
+
+#ifdef USING_LIBBFD
+static nat max_table_size;
+
+static void reset_table( int size )
+{
+ max_table_size = size;
+ table_size = 0;
+ table = (struct entry *)stgMallocBytes(size * sizeof(struct entry), "Printer.c:reset_table()");
+}
+
+static void prepare_table( void )
+{
+ /* Could sort it... */
+}
+
+static void insert( unsigned value, const char *name )
+{
+ if ( table_size >= max_table_size ) {
+ barf( "Symbol table overflow\n" );
+ }
+ table[table_size].value = value;
+ table[table_size].name = name;
+ table_size = table_size + 1;
+}
+#endif
+
+#if 0
+static rtsBool lookup_name( char *name, unsigned *result )
+{
+ int i;
+ for( i = 0; i < table_size && strcmp(name,table[i].name) != 0; ++i ) {
+ }
+ if (i < table_size) {
+ *result = table[i].value;
+ return rtsTrue;
+ } else {
+ return rtsFalse;
+ }
+}
+#endif
+
+/* Code from somewhere inside GHC (circa 1994)
+ * * Z-escapes:
+ * "std"++xs -> "Zstd"++xs
+ * char_to_c 'Z' = "ZZ"
+ * char_to_c '&' = "Za"
+ * char_to_c '|' = "Zb"
+ * char_to_c ':' = "Zc"
+ * char_to_c '/' = "Zd"
+ * char_to_c '=' = "Ze"
+ * char_to_c '>' = "Zg"
+ * char_to_c '#' = "Zh"
+ * char_to_c '<' = "Zl"
+ * char_to_c '-' = "Zm"
+ * char_to_c '!' = "Zn"
+ * char_to_c '.' = "Zo"
+ * char_to_c '+' = "Zp"
+ * char_to_c '\'' = "Zq"
+ * char_to_c '*' = "Zt"
+ * char_to_c '_' = "Zu"
+ * char_to_c c = "Z" ++ show (ord c)
+ */
+static char unZcode( char ch )
+{
+ switch (ch) {
+ case 'a' : return ('&');
+ case 'b' : return ('|');
+ case 'c' : return (':');
+ case 'd' : return ('/');
+ case 'e' : return ('=');
+ case 'g' : return ('>');
+ case 'h' : return ('#');
+ case 'l' : return ('<');
+ case 'm' : return ('-');
+ case 'n' : return ('!');
+ case 'o' : return ('.');
+ case 'p' : return ('+');
+ case 'q' : return ('\'');
+ case 't' : return ('*');
+ case 'u' : return ('_');
+ case 'Z' :
+ case '\0' : return ('Z');
+ default : return (ch);
+ }
+}
+
+#if 0
+/* Precondition: out big enough to handle output (about twice length of in) */
+static void enZcode( char *in, char *out )
+{
+ int i, j;
+
+ j = 0;
+ out[ j++ ] = '_';
+ for( i = 0; in[i] != '\0'; ++i ) {
+ switch (in[i]) {
+ case 'Z' :
+ out[j++] = 'Z';
+ out[j++] = 'Z';
+ break;
+ case '&' :
+ out[j++] = 'Z';
+ out[j++] = 'a';
+ break;
+ case '|' :
+ out[j++] = 'Z';
+ out[j++] = 'b';
+ break;
+ case ':' :
+ out[j++] = 'Z';
+ out[j++] = 'c';
+ break;
+ case '/' :
+ out[j++] = 'Z';
+ out[j++] = 'd';
+ break;
+ case '=' :
+ out[j++] = 'Z';
+ out[j++] = 'e';
+ break;
+ case '>' :
+ out[j++] = 'Z';
+ out[j++] = 'g';
+ break;
+ case '#' :
+ out[j++] = 'Z';
+ out[j++] = 'h';
+ break;
+ case '<' :
+ out[j++] = 'Z';
+ out[j++] = 'l';
+ break;
+ case '-' :
+ out[j++] = 'Z';
+ out[j++] = 'm';
+ break;
+ case '!' :
+ out[j++] = 'Z';
+ out[j++] = 'n';
+ break;
+ case '.' :
+ out[j++] = 'Z';
+ out[j++] = 'o';
+ break;
+ case '+' :
+ out[j++] = 'Z';
+ out[j++] = 'p';
+ break;
+ case '\'' :
+ out[j++] = 'Z';
+ out[j++] = 'q';
+ break;
+ case '*' :
+ out[j++] = 'Z';
+ out[j++] = 't';
+ break;
+ case '_' :
+ out[j++] = 'Z';
+ out[j++] = 'u';
+ break;
+ default :
+ out[j++] = in[i];
+ break;
+ }
+ }
+ out[j] = '\0';
+}
+#endif
+
+const char *lookupGHCName( void *addr )
+{
+ nat i;
+ for( i = 0; i < table_size && table[i].value != (unsigned) addr; ++i ) {
+ }
+ if (i < table_size) {
+ return table[i].name;
+ } else {
+ return NULL;
+ }
+}
+
+static void printZcoded( const char *raw )
+{
+ nat j = 0;
+
+ while ( raw[j] != '\0' ) {
+ if (raw[j] == 'Z') {
+ debugBelch("%c", unZcode(raw[j+1]));
+ j = j + 2;
+ } else {
+ debugBelch("%c", unZcode(raw[j+1]));
+ j = j + 1;
+ }
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Symbol table loading
+ * ------------------------------------------------------------------------*/
+
+/* Causing linking trouble on Win32 plats, so I'm
+ disabling this for now.
+*/
+#ifdef USING_LIBBFD
+
+#include <bfd.h>
+
+/* Fairly ad-hoc piece of code that seems to filter out a lot of
+ * rubbish like the obj-splitting symbols
+ */
+
+static rtsBool isReal( flagword flags STG_UNUSED, const char *name )
+{
+#if 0
+ /* ToDo: make this work on BFD */
+ int tp = type & N_TYPE;
+ if (tp == N_TEXT || tp == N_DATA) {
+ return (name[0] == '_' && name[1] != '_');
+ } else {
+ return rtsFalse;
+ }
+#else
+ if (*name == '\0' ||
+ (name[0] == 'g' && name[1] == 'c' && name[2] == 'c') ||
+ (name[0] == 'c' && name[1] == 'c' && name[2] == '.')) {
+ return rtsFalse;
+ }
+ return rtsTrue;
+#endif
+}
+
+extern void DEBUG_LoadSymbols( char *name )
+{
+ bfd* abfd;
+ char **matching;
+
+ bfd_init();
+ abfd = bfd_openr(name, "default");
+ if (abfd == NULL) {
+ barf("can't open executable %s to get symbol table", name);
+ }
+ if (!bfd_check_format_matches (abfd, bfd_object, &matching)) {
+ barf("mismatch");
+ }
+
+ {
+ long storage_needed;
+ asymbol **symbol_table;
+ long number_of_symbols;
+ long num_real_syms = 0;
+ long i;
+
+ storage_needed = bfd_get_symtab_upper_bound (abfd);
+
+ if (storage_needed < 0) {
+ barf("can't read symbol table");
+ }
+#if 0
+ if (storage_needed == 0) {
+ debugBelch("no storage needed");
+ }
+#endif
+ symbol_table = (asymbol **) stgMallocBytes(storage_needed,"DEBUG_LoadSymbols");
+
+ number_of_symbols = bfd_canonicalize_symtab (abfd, symbol_table);
+
+ if (number_of_symbols < 0) {
+ barf("can't canonicalise symbol table");
+ }
+
+ for( i = 0; i != number_of_symbols; ++i ) {
+ symbol_info info;
+ bfd_get_symbol_info(abfd,symbol_table[i],&info);
+ /*debugBelch("\t%c\t0x%x \t%s\n",info.type,(nat)info.value,info.name); */
+ if (isReal(info.type, info.name)) {
+ num_real_syms += 1;
+ }
+ }
+
+ IF_DEBUG(interpreter,
+ debugBelch("Loaded %ld symbols. Of which %ld are real symbols\n",
+ number_of_symbols, num_real_syms)
+ );
+
+ reset_table( num_real_syms );
+
+ for( i = 0; i != number_of_symbols; ++i ) {
+ symbol_info info;
+ bfd_get_symbol_info(abfd,symbol_table[i],&info);
+ if (isReal(info.type, info.name)) {
+ insert( info.value, info.name );
+ }
+ }
+
+ stgFree(symbol_table);
+ }
+ prepare_table();
+}
+
+#else /* HAVE_BFD_H */
+
+extern void DEBUG_LoadSymbols( char *name STG_UNUSED )
+{
+ /* nothing, yet */
+}
+
+#endif /* HAVE_BFD_H */
+
+void findPtr(P_ p, int); /* keep gcc -Wall happy */
+
+void
+findPtr(P_ p, int follow)
+{
+ nat s, g;
+ P_ q, r;
+ bdescr *bd;
+#if defined(__GNUC__)
+ const int arr_size = 1024;
+#else
+#define arr_size 1024
+#endif
+ StgPtr arr[arr_size];
+ int i = 0;
+
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ bd = generations[g].steps[s].blocks;
+ for (; bd; bd = bd->link) {
+ for (q = bd->start; q < bd->free; q++) {
+ if (*q == (W_)p) {
+ if (i < arr_size) {
+ r = q;
+ while (!LOOKS_LIKE_INFO_PTR(*r) || (P_)*r == NULL) {
+ r--;
+ }
+ debugBelch("%p = ", r);
+ printClosure((StgClosure *)r);
+ arr[i++] = r;
+ } else {
+ return;
+ }
+ }
+ }
+ }
+ }
+ }
+ if (follow && i == 1) {
+ debugBelch("-->\n");
+ findPtr(arr[0], 1);
+ }
+}
+
+#else /* DEBUG */
+void printPtr( StgPtr p )
+{
+ debugBelch("ptr 0x%p (enable -DDEBUG for more info) " , p );
+}
+
+void printObj( StgClosure *obj )
+{
+ debugBelch("obj 0x%p (enable -DDEBUG for more info) " , obj );
+}
+#endif /* DEBUG */
diff --git a/rts/Printer.h b/rts/Printer.h
new file mode 100644
index 0000000000..54bf611250
--- /dev/null
+++ b/rts/Printer.h
@@ -0,0 +1,31 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Prototypes for functions in Printer.c
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef PRINTER_H
+#define PRINTER_H
+
+extern void printPtr ( StgPtr p );
+extern void printObj ( StgClosure *obj );
+
+#ifdef DEBUG
+extern void printClosure ( StgClosure *obj );
+extern StgStackPtr printStackObj ( StgStackPtr sp );
+extern void printStackChunk ( StgStackPtr sp, StgStackPtr spLim );
+extern void printTSO ( StgTSO *tso );
+
+void info_hdr_type ( StgClosure *closure, char *res );
+char * info_type ( StgClosure *closure );
+char * info_type_by_ip ( StgInfoTable *ip );
+
+extern void DEBUG_LoadSymbols( char *name );
+
+extern const char *lookupGHCName( void *addr );
+#endif
+
+#endif /* PRINTER_H */
+
diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c
new file mode 100644
index 0000000000..312bee735c
--- /dev/null
+++ b/rts/ProfHeap.c
@@ -0,0 +1,1156 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2003
+ *
+ * Support for heap profiling
+ *
+ * ---------------------------------------------------------------------------*/
+
+#if defined(DEBUG) && !defined(PROFILING)
+#define DEBUG_HEAP_PROF
+#else
+#undef DEBUG_HEAP_PROF
+#endif
+
+#if defined(PROFILING) || defined(DEBUG_HEAP_PROF)
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "RtsFlags.h"
+#include "Profiling.h"
+#include "Storage.h"
+#include "ProfHeap.h"
+#include "Stats.h"
+#include "Hash.h"
+#include "RetainerProfile.h"
+#include "LdvProfile.h"
+#include "Arena.h"
+#include "Printer.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+
+/* -----------------------------------------------------------------------------
+ * era stores the current time period. It is the same as the
+ * number of censuses that have been performed.
+ *
+ * RESTRICTION:
+ * era must be no longer than LDV_SHIFT (15 or 30) bits.
+ * Invariants:
+ * era is initialized to 1 in initHeapProfiling().
+ *
+ * max_era is initialized to 2^LDV_SHIFT in initHeapProfiling().
+ * When era reaches max_era, the profiling stops because a closure can
+ * store only up to (max_era - 1) as its creation or last use time.
+ * -------------------------------------------------------------------------- */
+unsigned int era;
+static nat max_era;
+
+/* -----------------------------------------------------------------------------
+ * Counters
+ *
+ * For most heap profiles each closure identity gets a simple count
+ * of live words in the heap at each census. However, if we're
+ * selecting by biography, then we have to keep the various
+ * lag/drag/void counters for each identity.
+ * -------------------------------------------------------------------------- */
+typedef struct _counter {
+ void *identity;
+ union {
+ nat resid;
+ struct {
+ int prim; // total size of 'inherently used' closures
+ int not_used; // total size of 'never used' closures
+ int used; // total size of 'used at least once' closures
+ int void_total; // current total size of 'destroyed without being used' closures
+ int drag_total; // current total size of 'used at least once and waiting to die'
+ } ldv;
+ } c;
+ struct _counter *next;
+} counter;
+
+STATIC_INLINE void
+initLDVCtr( counter *ctr )
+{
+ ctr->c.ldv.prim = 0;
+ ctr->c.ldv.not_used = 0;
+ ctr->c.ldv.used = 0;
+ ctr->c.ldv.void_total = 0;
+ ctr->c.ldv.drag_total = 0;
+}
+
+typedef struct {
+ double time; // the time in MUT time when the census is made
+ HashTable * hash;
+ counter * ctrs;
+ Arena * arena;
+
+ // for LDV profiling, when just displaying by LDV
+ int prim;
+ int not_used;
+ int used;
+ int void_total;
+ int drag_total;
+} Census;
+
+static Census *censuses = NULL;
+static nat n_censuses = 0;
+
+#ifdef PROFILING
+static void aggregateCensusInfo( void );
+#endif
+
+static void dumpCensus( Census *census );
+
+/* -----------------------------------------------------------------------------
+ Closure Type Profiling;
+
+ PROBABLY TOTALLY OUT OF DATE -- ToDo (SDM)
+ -------------------------------------------------------------------------- */
+
+#ifdef DEBUG_HEAP_PROF
+static char *type_names[] = {
+ "INVALID_OBJECT"
+ , "CONSTR"
+ , "CONSTR_INTLIKE"
+ , "CONSTR_CHARLIKE"
+ , "CONSTR_STATIC"
+ , "CONSTR_NOCAF_STATIC"
+
+ , "FUN"
+ , "FUN_STATIC"
+
+ , "THUNK"
+ , "THUNK_STATIC"
+ , "THUNK_SELECTOR"
+
+ , "BCO"
+ , "AP_STACK"
+ , "AP"
+
+ , "PAP"
+
+ , "IND"
+ , "IND_OLDGEN"
+ , "IND_PERM"
+ , "IND_OLDGEN_PERM"
+ , "IND_STATIC"
+
+ , "RET_BCO"
+ , "RET_SMALL"
+ , "RET_VEC_SMALL"
+ , "RET_BIG"
+ , "RET_VEC_BIG"
+ , "RET_DYN"
+ , "UPDATE_FRAME"
+ , "CATCH_FRAME"
+ , "STOP_FRAME"
+
+ , "BLACKHOLE"
+ , "MVAR"
+
+ , "ARR_WORDS"
+
+ , "MUT_ARR_PTRS_CLEAN"
+ , "MUT_ARR_PTRS_DIRTY"
+ , "MUT_ARR_PTRS_FROZEN"
+ , "MUT_VAR_CLEAN"
+ , "MUT_VAR_DIRTY"
+
+ , "WEAK"
+
+ , "TSO"
+
+ , "BLOCKED_FETCH"
+ , "FETCH_ME"
+
+ , "EVACUATED"
+};
+
+#endif /* DEBUG_HEAP_PROF */
+
+/* -----------------------------------------------------------------------------
+ * Find the "closure identity", which is a unique pointer reresenting
+ * the band to which this closure's heap space is attributed in the
+ * heap profile.
+ * ------------------------------------------------------------------------- */
+STATIC_INLINE void *
+closureIdentity( StgClosure *p )
+{
+ switch (RtsFlags.ProfFlags.doHeapProfile) {
+
+#ifdef PROFILING
+ case HEAP_BY_CCS:
+ return p->header.prof.ccs;
+ case HEAP_BY_MOD:
+ return p->header.prof.ccs->cc->module;
+ case HEAP_BY_DESCR:
+ return get_itbl(p)->prof.closure_desc;
+ case HEAP_BY_TYPE:
+ return get_itbl(p)->prof.closure_type;
+ case HEAP_BY_RETAINER:
+ // AFAIK, the only closures in the heap which might not have a
+ // valid retainer set are DEAD_WEAK closures.
+ if (isRetainerSetFieldValid(p))
+ return retainerSetOf(p);
+ else
+ return NULL;
+
+#else // DEBUG
+ case HEAP_BY_INFOPTR:
+ return (void *)((StgClosure *)p)->header.info;
+ case HEAP_BY_CLOSURE_TYPE:
+ return type_names[get_itbl(p)->type];
+
+#endif
+ default:
+ barf("closureIdentity");
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Profiling type predicates
+ * ----------------------------------------------------------------------- */
+#ifdef PROFILING
+STATIC_INLINE rtsBool
+doingLDVProfiling( void )
+{
+ return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
+ || RtsFlags.ProfFlags.bioSelector != NULL);
+}
+
+STATIC_INLINE rtsBool
+doingRetainerProfiling( void )
+{
+ return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER
+ || RtsFlags.ProfFlags.retainerSelector != NULL);
+}
+#endif /* PROFILING */
+
+// Precesses a closure 'c' being destroyed whose size is 'size'.
+// Make sure that LDV_recordDead() is not invoked on 'inherently used' closures
+// such as TSO; they should not be involved in computing dragNew or voidNew.
+//
+// Even though era is checked in both LdvCensusForDead() and
+// LdvCensusKillAll(), we still need to make sure that era is > 0 because
+// LDV_recordDead() may be called from elsewhere in the runtime system. E.g.,
+// when a thunk is replaced by an indirection object.
+
+#ifdef PROFILING
+void
+LDV_recordDead( StgClosure *c, nat size )
+{
+ void *id;
+ nat t;
+ counter *ctr;
+
+ if (era > 0 && closureSatisfiesConstraints(c)) {
+ size -= sizeofW(StgProfHeader);
+ ASSERT(LDVW(c) != 0);
+ if ((LDVW((c)) & LDV_STATE_MASK) == LDV_STATE_CREATE) {
+ t = (LDVW((c)) & LDV_CREATE_MASK) >> LDV_SHIFT;
+ if (t < era) {
+ if (RtsFlags.ProfFlags.bioSelector == NULL) {
+ censuses[t].void_total += (int)size;
+ censuses[era].void_total -= (int)size;
+ ASSERT(censuses[t].void_total < censuses[t].not_used);
+ } else {
+ id = closureIdentity(c);
+ ctr = lookupHashTable(censuses[t].hash, (StgWord)id);
+ ASSERT( ctr != NULL );
+ ctr->c.ldv.void_total += (int)size;
+ ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
+ if (ctr == NULL) {
+ ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
+ initLDVCtr(ctr);
+ insertHashTable(censuses[era].hash, (StgWord)id, ctr);
+ ctr->identity = id;
+ ctr->next = censuses[era].ctrs;
+ censuses[era].ctrs = ctr;
+ }
+ ctr->c.ldv.void_total -= (int)size;
+ }
+ }
+ } else {
+ t = LDVW((c)) & LDV_LAST_MASK;
+ if (t + 1 < era) {
+ if (RtsFlags.ProfFlags.bioSelector == NULL) {
+ censuses[t+1].drag_total += size;
+ censuses[era].drag_total -= size;
+ } else {
+ void *id;
+ id = closureIdentity(c);
+ ctr = lookupHashTable(censuses[t+1].hash, (StgWord)id);
+ ASSERT( ctr != NULL );
+ ctr->c.ldv.drag_total += (int)size;
+ ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
+ if (ctr == NULL) {
+ ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
+ initLDVCtr(ctr);
+ insertHashTable(censuses[era].hash, (StgWord)id, ctr);
+ ctr->identity = id;
+ ctr->next = censuses[era].ctrs;
+ censuses[era].ctrs = ctr;
+ }
+ ctr->c.ldv.drag_total -= (int)size;
+ }
+ }
+ }
+ }
+}
+#endif
+
+/* --------------------------------------------------------------------------
+ * Initialize censuses[era];
+ * ----------------------------------------------------------------------- */
+STATIC_INLINE void
+initEra(Census *census)
+{
+ census->hash = allocHashTable();
+ census->ctrs = NULL;
+ census->arena = newArena();
+
+ census->not_used = 0;
+ census->used = 0;
+ census->prim = 0;
+ census->void_total = 0;
+ census->drag_total = 0;
+}
+
+/* --------------------------------------------------------------------------
+ * Increases era by 1 and initialize census[era].
+ * Reallocates gi[] and increases its size if needed.
+ * ----------------------------------------------------------------------- */
+static void
+nextEra( void )
+{
+#ifdef PROFILING
+ if (doingLDVProfiling()) {
+ era++;
+
+ if (era == max_era) {
+ errorBelch("maximum number of censuses reached; use +RTS -i to reduce");
+ stg_exit(EXIT_FAILURE);
+ }
+
+ if (era == n_censuses) {
+ n_censuses *= 2;
+ censuses = stgReallocBytes(censuses, sizeof(Census) * n_censuses,
+ "nextEra");
+ }
+ }
+#endif /* PROFILING */
+
+ initEra( &censuses[era] );
+}
+
+/* -----------------------------------------------------------------------------
+ * DEBUG heap profiling, by info table
+ * -------------------------------------------------------------------------- */
+
+#ifdef DEBUG_HEAP_PROF
+FILE *hp_file;
+static char *hp_filename;
+
+void initProfiling1( void )
+{
+}
+
+void initProfiling2( void )
+{
+ if (RtsFlags.ProfFlags.doHeapProfile) {
+ /* Initialise the log file name */
+ hp_filename = stgMallocBytes(strlen(prog_name) + 6, "hpFileName");
+ sprintf(hp_filename, "%s.hp", prog_name);
+
+ /* open the log file */
+ if ((hp_file = fopen(hp_filename, "w")) == NULL) {
+ debugBelch("Can't open profiling report file %s\n",
+ hp_filename);
+ RtsFlags.ProfFlags.doHeapProfile = 0;
+ return;
+ }
+ }
+
+ initHeapProfiling();
+}
+
+void endProfiling( void )
+{
+ endHeapProfiling();
+}
+#endif /* DEBUG_HEAP_PROF */
+
+static void
+printSample(rtsBool beginSample, StgDouble sampleValue)
+{
+ StgDouble fractionalPart, integralPart;
+ fractionalPart = modf(sampleValue, &integralPart);
+ fprintf(hp_file, "%s %d.%02d\n",
+ (beginSample ? "BEGIN_SAMPLE" : "END_SAMPLE"),
+ (int)integralPart, (int)(fractionalPart * 100));
+}
+
+/* --------------------------------------------------------------------------
+ * Initialize the heap profilier
+ * ----------------------------------------------------------------------- */
+nat
+initHeapProfiling(void)
+{
+ if (! RtsFlags.ProfFlags.doHeapProfile) {
+ return 0;
+ }
+
+#ifdef PROFILING
+ if (doingLDVProfiling() && doingRetainerProfiling()) {
+ errorBelch("cannot mix -hb and -hr");
+ stg_exit(EXIT_FAILURE);
+ }
+#endif
+
+ // we only count eras if we're doing LDV profiling. Otherwise era
+ // is fixed at zero.
+#ifdef PROFILING
+ if (doingLDVProfiling()) {
+ era = 1;
+ } else
+#endif
+ {
+ era = 0;
+ }
+
+ { // max_era = 2^LDV_SHIFT
+ nat p;
+ max_era = 1;
+ for (p = 0; p < LDV_SHIFT; p++)
+ max_era *= 2;
+ }
+
+ n_censuses = 32;
+ censuses = stgMallocBytes(sizeof(Census) * n_censuses, "initHeapProfiling");
+
+ initEra( &censuses[era] );
+
+ /* initProfilingLogFile(); */
+ fprintf(hp_file, "JOB \"%s", prog_name);
+
+#ifdef PROFILING
+ {
+ int count;
+ for(count = 1; count < prog_argc; count++)
+ fprintf(hp_file, " %s", prog_argv[count]);
+ fprintf(hp_file, " +RTS");
+ for(count = 0; count < rts_argc; count++)
+ fprintf(hp_file, " %s", rts_argv[count]);
+ }
+#endif /* PROFILING */
+
+ fprintf(hp_file, "\"\n" );
+
+ fprintf(hp_file, "DATE \"%s\"\n", time_str());
+
+ fprintf(hp_file, "SAMPLE_UNIT \"seconds\"\n");
+ fprintf(hp_file, "VALUE_UNIT \"bytes\"\n");
+
+ printSample(rtsTrue, 0);
+ printSample(rtsFalse, 0);
+
+#ifdef DEBUG_HEAP_PROF
+ DEBUG_LoadSymbols(prog_name);
+#endif
+
+#ifdef PROFILING
+ if (doingRetainerProfiling()) {
+ initRetainerProfiling();
+ }
+#endif
+
+ return 0;
+}
+
+void
+endHeapProfiling(void)
+{
+ StgDouble seconds;
+
+ if (! RtsFlags.ProfFlags.doHeapProfile) {
+ return;
+ }
+
+#ifdef PROFILING
+ if (doingRetainerProfiling()) {
+ endRetainerProfiling();
+ }
+#endif
+
+#ifdef PROFILING
+ if (doingLDVProfiling()) {
+ nat t;
+ LdvCensusKillAll();
+ aggregateCensusInfo();
+ for (t = 1; t < era; t++) {
+ dumpCensus( &censuses[t] );
+ }
+ }
+#endif
+
+ seconds = mut_user_time();
+ printSample(rtsTrue, seconds);
+ printSample(rtsFalse, seconds);
+ fclose(hp_file);
+}
+
+
+
+#ifdef PROFILING
+static size_t
+buf_append(char *p, const char *q, char *end)
+{
+ int m;
+
+ for (m = 0; p < end; p++, q++, m++) {
+ *p = *q;
+ if (*q == '\0') { break; }
+ }
+ return m;
+}
+
+static void
+fprint_ccs(FILE *fp, CostCentreStack *ccs, nat max_length)
+{
+ char buf[max_length+1], *p, *buf_end;
+
+ // MAIN on its own gets printed as "MAIN", otherwise we ignore MAIN.
+ if (ccs == CCS_MAIN) {
+ fprintf(fp, "MAIN");
+ return;
+ }
+
+ fprintf(fp, "(%ld)", ccs->ccsID);
+
+ p = buf;
+ buf_end = buf + max_length + 1;
+
+ // keep printing components of the stack until we run out of space
+ // in the buffer. If we run out of space, end with "...".
+ for (; ccs != NULL && ccs != CCS_MAIN; ccs = ccs->prevStack) {
+
+ // CAF cost centres print as M.CAF, but we leave the module
+ // name out of all the others to save space.
+ if (!strcmp(ccs->cc->label,"CAF")) {
+ p += buf_append(p, ccs->cc->module, buf_end);
+ p += buf_append(p, ".CAF", buf_end);
+ } else {
+ if (ccs->prevStack != NULL && ccs->prevStack != CCS_MAIN) {
+ p += buf_append(p, "/", buf_end);
+ }
+ p += buf_append(p, ccs->cc->label, buf_end);
+ }
+
+ if (p >= buf_end) {
+ sprintf(buf+max_length-4, "...");
+ break;
+ }
+ }
+ fprintf(fp, "%s", buf);
+}
+#endif /* PROFILING */
+
+rtsBool
+strMatchesSelector( char* str, char* sel )
+{
+ char* p;
+ // debugBelch("str_matches_selector %s %s\n", str, sel);
+ while (1) {
+ // Compare str against wherever we've got to in sel.
+ p = str;
+ while (*p != '\0' && *sel != ',' && *sel != '\0' && *p == *sel) {
+ p++; sel++;
+ }
+ // Match if all of str used and have reached the end of a sel fragment.
+ if (*p == '\0' && (*sel == ',' || *sel == '\0'))
+ return rtsTrue;
+
+ // No match. Advance sel to the start of the next elem.
+ while (*sel != ',' && *sel != '\0') sel++;
+ if (*sel == ',') sel++;
+
+ /* Run out of sel ?? */
+ if (*sel == '\0') return rtsFalse;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Figure out whether a closure should be counted in this census, by
+ * testing against all the specified constraints.
+ * -------------------------------------------------------------------------- */
+rtsBool
+closureSatisfiesConstraints( StgClosure* p )
+{
+#ifdef DEBUG_HEAP_PROF
+ (void)p; /* keep gcc -Wall happy */
+ return rtsTrue;
+#else
+ rtsBool b;
+
+ // The CCS has a selected field to indicate whether this closure is
+ // deselected by not being mentioned in the module, CC, or CCS
+ // selectors.
+ if (!p->header.prof.ccs->selected) {
+ return rtsFalse;
+ }
+
+ if (RtsFlags.ProfFlags.descrSelector) {
+ b = strMatchesSelector( (get_itbl((StgClosure *)p))->prof.closure_desc,
+ RtsFlags.ProfFlags.descrSelector );
+ if (!b) return rtsFalse;
+ }
+ if (RtsFlags.ProfFlags.typeSelector) {
+ b = strMatchesSelector( (get_itbl((StgClosure *)p))->prof.closure_type,
+ RtsFlags.ProfFlags.typeSelector );
+ if (!b) return rtsFalse;
+ }
+ if (RtsFlags.ProfFlags.retainerSelector) {
+ RetainerSet *rs;
+ nat i;
+ // We must check that the retainer set is valid here. One
+ // reason it might not be valid is if this closure is a
+ // a newly deceased weak pointer (i.e. a DEAD_WEAK), since
+ // these aren't reached by the retainer profiler's traversal.
+ if (isRetainerSetFieldValid((StgClosure *)p)) {
+ rs = retainerSetOf((StgClosure *)p);
+ if (rs != NULL) {
+ for (i = 0; i < rs->num; i++) {
+ b = strMatchesSelector( rs->element[i]->cc->label,
+ RtsFlags.ProfFlags.retainerSelector );
+ if (b) return rtsTrue;
+ }
+ }
+ }
+ return rtsFalse;
+ }
+ return rtsTrue;
+#endif /* PROFILING */
+}
+
+/* -----------------------------------------------------------------------------
+ * Aggregate the heap census info for biographical profiling
+ * -------------------------------------------------------------------------- */
+#ifdef PROFILING
+static void
+aggregateCensusInfo( void )
+{
+ HashTable *acc;
+ nat t;
+ counter *c, *d, *ctrs;
+ Arena *arena;
+
+ if (!doingLDVProfiling()) return;
+
+ // Aggregate the LDV counters when displaying by biography.
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
+ int void_total, drag_total;
+
+ // Now we compute void_total and drag_total for each census
+ // After the program has finished, the void_total field of
+ // each census contains the count of words that were *created*
+ // in this era and were eventually void. Conversely, if a
+ // void closure was destroyed in this era, it will be
+ // represented by a negative count of words in void_total.
+ //
+ // To get the count of live words that are void at each
+ // census, just propagate the void_total count forwards:
+
+ void_total = 0;
+ drag_total = 0;
+ for (t = 1; t < era; t++) { // note: start at 1, not 0
+ void_total += censuses[t].void_total;
+ drag_total += censuses[t].drag_total;
+ censuses[t].void_total = void_total;
+ censuses[t].drag_total = drag_total;
+
+ ASSERT( censuses[t].void_total <= censuses[t].not_used );
+ // should be true because: void_total is the count of
+ // live words that are void at this census, which *must*
+ // be less than the number of live words that have not
+ // been used yet.
+
+ ASSERT( censuses[t].drag_total <= censuses[t].used );
+ // similar reasoning as above.
+ }
+
+ return;
+ }
+
+ // otherwise... we're doing a heap profile that is restricted to
+ // some combination of lag, drag, void or use. We've kept all the
+ // census info for all censuses so far, but we still need to
+ // aggregate the counters forwards.
+
+ arena = newArena();
+ acc = allocHashTable();
+ ctrs = NULL;
+
+ for (t = 1; t < era; t++) {
+
+ // first look through all the counters we're aggregating
+ for (c = ctrs; c != NULL; c = c->next) {
+ // if one of the totals is non-zero, then this closure
+ // type must be present in the heap at this census time...
+ d = lookupHashTable(censuses[t].hash, (StgWord)c->identity);
+
+ if (d == NULL) {
+ // if this closure identity isn't present in the
+ // census for this time period, then our running
+ // totals *must* be zero.
+ ASSERT(c->c.ldv.void_total == 0 && c->c.ldv.drag_total == 0);
+
+ // debugCCS(c->identity);
+ // debugBelch(" census=%d void_total=%d drag_total=%d\n",
+ // t, c->c.ldv.void_total, c->c.ldv.drag_total);
+ } else {
+ d->c.ldv.void_total += c->c.ldv.void_total;
+ d->c.ldv.drag_total += c->c.ldv.drag_total;
+ c->c.ldv.void_total = d->c.ldv.void_total;
+ c->c.ldv.drag_total = d->c.ldv.drag_total;
+
+ ASSERT( c->c.ldv.void_total >= 0 );
+ ASSERT( c->c.ldv.drag_total >= 0 );
+ }
+ }
+
+ // now look through the counters in this census to find new ones
+ for (c = censuses[t].ctrs; c != NULL; c = c->next) {
+ d = lookupHashTable(acc, (StgWord)c->identity);
+ if (d == NULL) {
+ d = arenaAlloc( arena, sizeof(counter) );
+ initLDVCtr(d);
+ insertHashTable( acc, (StgWord)c->identity, d );
+ d->identity = c->identity;
+ d->next = ctrs;
+ ctrs = d;
+ d->c.ldv.void_total = c->c.ldv.void_total;
+ d->c.ldv.drag_total = c->c.ldv.drag_total;
+ }
+ ASSERT( c->c.ldv.void_total >= 0 );
+ ASSERT( c->c.ldv.drag_total >= 0 );
+ }
+ }
+
+ freeHashTable(acc, NULL);
+ arenaFree(arena);
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ * Print out the results of a heap census.
+ * -------------------------------------------------------------------------- */
+static void
+dumpCensus( Census *census )
+{
+ counter *ctr;
+ int count;
+
+ printSample(rtsTrue, census->time);
+
+#ifdef PROFILING
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
+ fprintf(hp_file, "VOID\t%lu\n", (unsigned long)(census->void_total) * sizeof(W_));
+ fprintf(hp_file, "LAG\t%lu\n",
+ (unsigned long)(census->not_used - census->void_total) * sizeof(W_));
+ fprintf(hp_file, "USE\t%lu\n",
+ (unsigned long)(census->used - census->drag_total) * sizeof(W_));
+ fprintf(hp_file, "INHERENT_USE\t%lu\n",
+ (unsigned long)(census->prim) * sizeof(W_));
+ fprintf(hp_file, "DRAG\t%lu\n",
+ (unsigned long)(census->drag_total) * sizeof(W_));
+ printSample(rtsFalse, census->time);
+ return;
+ }
+#endif
+
+ for (ctr = census->ctrs; ctr != NULL; ctr = ctr->next) {
+
+#ifdef PROFILING
+ if (RtsFlags.ProfFlags.bioSelector != NULL) {
+ count = 0;
+ if (strMatchesSelector("lag", RtsFlags.ProfFlags.bioSelector))
+ count += ctr->c.ldv.not_used - ctr->c.ldv.void_total;
+ if (strMatchesSelector("drag", RtsFlags.ProfFlags.bioSelector))
+ count += ctr->c.ldv.drag_total;
+ if (strMatchesSelector("void", RtsFlags.ProfFlags.bioSelector))
+ count += ctr->c.ldv.void_total;
+ if (strMatchesSelector("use", RtsFlags.ProfFlags.bioSelector))
+ count += ctr->c.ldv.used - ctr->c.ldv.drag_total;
+ } else
+#endif
+ {
+ count = ctr->c.resid;
+ }
+
+ ASSERT( count >= 0 );
+
+ if (count == 0) continue;
+
+#ifdef DEBUG_HEAP_PROF
+ switch (RtsFlags.ProfFlags.doHeapProfile) {
+ case HEAP_BY_INFOPTR:
+ fprintf(hp_file, "%s", lookupGHCName(ctr->identity));
+ break;
+ case HEAP_BY_CLOSURE_TYPE:
+ fprintf(hp_file, "%s", (char *)ctr->identity);
+ break;
+ }
+#endif
+
+#ifdef PROFILING
+ switch (RtsFlags.ProfFlags.doHeapProfile) {
+ case HEAP_BY_CCS:
+ fprint_ccs(hp_file, (CostCentreStack *)ctr->identity, 25);
+ break;
+ case HEAP_BY_MOD:
+ case HEAP_BY_DESCR:
+ case HEAP_BY_TYPE:
+ fprintf(hp_file, "%s", (char *)ctr->identity);
+ break;
+ case HEAP_BY_RETAINER:
+ {
+ RetainerSet *rs = (RetainerSet *)ctr->identity;
+
+ // it might be the distinguished retainer set rs_MANY:
+ if (rs == &rs_MANY) {
+ fprintf(hp_file, "MANY");
+ break;
+ }
+
+ // Mark this retainer set by negating its id, because it
+ // has appeared in at least one census. We print the
+ // values of all such retainer sets into the log file at
+ // the end. A retainer set may exist but not feature in
+ // any censuses if it arose as the intermediate retainer
+ // set for some closure during retainer set calculation.
+ if (rs->id > 0)
+ rs->id = -(rs->id);
+
+ // report in the unit of bytes: * sizeof(StgWord)
+ printRetainerSetShort(hp_file, rs);
+ break;
+ }
+ default:
+ barf("dumpCensus; doHeapProfile");
+ }
+#endif
+
+ fprintf(hp_file, "\t%lu\n", (unsigned long)count * sizeof(W_));
+ }
+
+ printSample(rtsFalse, census->time);
+}
+
+/* -----------------------------------------------------------------------------
+ * Code to perform a heap census.
+ * -------------------------------------------------------------------------- */
+static void
+heapCensusChain( Census *census, bdescr *bd )
+{
+ StgPtr p;
+ StgInfoTable *info;
+ void *identity;
+ nat size;
+ counter *ctr;
+ nat real_size;
+ rtsBool prim;
+
+ for (; bd != NULL; bd = bd->link) {
+
+ // HACK: ignore pinned blocks, because they contain gaps.
+ // It's not clear exactly what we'd like to do here, since we
+ // can't tell which objects in the block are actually alive.
+ // Perhaps the whole block should be counted as SYSTEM memory.
+ if (bd->flags & BF_PINNED) {
+ continue;
+ }
+
+ p = bd->start;
+ while (p < bd->free) {
+ info = get_itbl((StgClosure *)p);
+ prim = rtsFalse;
+
+ switch (info->type) {
+
+ case THUNK:
+ size = thunk_sizeW_fromITBL(info);
+ break;
+
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_2_0:
+ size = sizeofW(StgThunkHeader) + 2;
+ break;
+
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_SELECTOR:
+ size = sizeofW(StgThunkHeader) + 1;
+ break;
+
+ case CONSTR:
+ case FUN:
+ case IND_PERM:
+ case IND_OLDGEN:
+ case IND_OLDGEN_PERM:
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case SE_BLACKHOLE:
+ case BLACKHOLE:
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_1_1:
+ case FUN_0_2:
+ case FUN_2_0:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case CONSTR_2_0:
+ size = sizeW_fromITBL(info);
+ break;
+
+ case IND:
+ // Special case/Delicate Hack: INDs don't normally
+ // appear, since we're doing this heap census right
+ // after GC. However, GarbageCollect() also does
+ // resurrectThreads(), which can update some
+ // blackholes when it calls raiseAsync() on the
+ // resurrected threads. So we know that any IND will
+ // be the size of a BLACKHOLE.
+ size = BLACKHOLE_sizeW();
+ break;
+
+ case BCO:
+ prim = rtsTrue;
+ size = bco_sizeW((StgBCO *)p);
+ break;
+
+ case MVAR:
+ case WEAK:
+ case STABLE_NAME:
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ prim = rtsTrue;
+ size = sizeW_fromITBL(info);
+ break;
+
+ case AP:
+ size = ap_sizeW((StgAP *)p);
+ break;
+
+ case PAP:
+ size = pap_sizeW((StgPAP *)p);
+ break;
+
+ case AP_STACK:
+ size = ap_stack_sizeW((StgAP_STACK *)p);
+ break;
+
+ case ARR_WORDS:
+ prim = rtsTrue;
+ size = arr_words_sizeW(stgCast(StgArrWords*,p));
+ break;
+
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN:
+ case MUT_ARR_PTRS_FROZEN0:
+ prim = rtsTrue;
+ size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
+ break;
+
+ case TSO:
+ prim = rtsTrue;
+#ifdef DEBUG_HEAP_PROF
+ size = tso_sizeW((StgTSO *)p);
+ break;
+#else
+ if (RtsFlags.ProfFlags.includeTSOs) {
+ size = tso_sizeW((StgTSO *)p);
+ break;
+ } else {
+ // Skip this TSO and move on to the next object
+ p += tso_sizeW((StgTSO *)p);
+ continue;
+ }
+#endif
+
+ case TREC_HEADER:
+ prim = rtsTrue;
+ size = sizeofW(StgTRecHeader);
+ break;
+
+ case TVAR_WAIT_QUEUE:
+ prim = rtsTrue;
+ size = sizeofW(StgTVarWaitQueue);
+ break;
+
+ case TVAR:
+ prim = rtsTrue;
+ size = sizeofW(StgTVar);
+ break;
+
+ case TREC_CHUNK:
+ prim = rtsTrue;
+ size = sizeofW(StgTRecChunk);
+ break;
+
+ default:
+ barf("heapCensus, unknown object: %d", info->type);
+ }
+
+ identity = NULL;
+
+#ifdef DEBUG_HEAP_PROF
+ real_size = size;
+#else
+ // subtract the profiling overhead
+ real_size = size - sizeofW(StgProfHeader);
+#endif
+
+ if (closureSatisfiesConstraints((StgClosure*)p)) {
+#ifdef PROFILING
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
+ if (prim)
+ census->prim += real_size;
+ else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
+ census->not_used += real_size;
+ else
+ census->used += real_size;
+ } else
+#endif
+ {
+ identity = closureIdentity((StgClosure *)p);
+
+ if (identity != NULL) {
+ ctr = lookupHashTable( census->hash, (StgWord)identity );
+ if (ctr != NULL) {
+#ifdef PROFILING
+ if (RtsFlags.ProfFlags.bioSelector != NULL) {
+ if (prim)
+ ctr->c.ldv.prim += real_size;
+ else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
+ ctr->c.ldv.not_used += real_size;
+ else
+ ctr->c.ldv.used += real_size;
+ } else
+#endif
+ {
+ ctr->c.resid += real_size;
+ }
+ } else {
+ ctr = arenaAlloc( census->arena, sizeof(counter) );
+ initLDVCtr(ctr);
+ insertHashTable( census->hash, (StgWord)identity, ctr );
+ ctr->identity = identity;
+ ctr->next = census->ctrs;
+ census->ctrs = ctr;
+
+#ifdef PROFILING
+ if (RtsFlags.ProfFlags.bioSelector != NULL) {
+ if (prim)
+ ctr->c.ldv.prim = real_size;
+ else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
+ ctr->c.ldv.not_used = real_size;
+ else
+ ctr->c.ldv.used = real_size;
+ } else
+#endif
+ {
+ ctr->c.resid = real_size;
+ }
+ }
+ }
+ }
+ }
+
+ p += size;
+ }
+ }
+}
+
+void
+heapCensus( void )
+{
+ nat g, s;
+ Census *census;
+
+ census = &censuses[era];
+ census->time = mut_user_time();
+
+ // calculate retainer sets if necessary
+#ifdef PROFILING
+ if (doingRetainerProfiling()) {
+ retainerProfile();
+ }
+#endif
+
+#ifdef PROFILING
+ stat_startHeapCensus();
+#endif
+
+ // Traverse the heap, collecting the census info
+
+ // First the small_alloc_list: we have to fix the free pointer at
+ // the end by calling tidyAllocatedLists() first.
+ tidyAllocateLists();
+ heapCensusChain( census, small_alloc_list );
+
+ // Now traverse the heap in each generation/step.
+ if (RtsFlags.GcFlags.generations == 1) {
+ heapCensusChain( census, g0s0->blocks );
+ } else {
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ heapCensusChain( census, generations[g].steps[s].blocks );
+ // Are we interested in large objects? might be
+ // confusing to include the stack in a heap profile.
+ heapCensusChain( census, generations[g].steps[s].large_objects );
+ }
+ }
+ }
+
+ // dump out the census info
+#ifdef PROFILING
+ // We can't generate any info for LDV profiling until
+ // the end of the run...
+ if (!doingLDVProfiling())
+ dumpCensus( census );
+#else
+ dumpCensus( census );
+#endif
+
+
+ // free our storage, unless we're keeping all the census info for
+ // future restriction by biography.
+#ifdef PROFILING
+ if (RtsFlags.ProfFlags.bioSelector == NULL)
+#endif
+ {
+ freeHashTable( census->hash, NULL/* don't free the elements */ );
+ arenaFree( census->arena );
+ census->hash = NULL;
+ census->arena = NULL;
+ }
+
+ // we're into the next time period now
+ nextEra();
+
+#ifdef PROFILING
+ stat_endHeapCensus();
+#endif
+}
+
+#endif /* PROFILING || DEBUG_HEAP_PROF */
+
diff --git a/rts/ProfHeap.h b/rts/ProfHeap.h
new file mode 100644
index 0000000000..0251416762
--- /dev/null
+++ b/rts/ProfHeap.h
@@ -0,0 +1,19 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Support for heap profiling
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef PROFHEAP_H
+#define PROFHEAP_H
+
+extern void heapCensus( void );
+extern nat initHeapProfiling( void );
+extern void endHeapProfiling( void );
+extern rtsBool closureSatisfiesConstraints( StgClosure* p );
+extern void LDV_recordDead( StgClosure *c, nat size );
+extern rtsBool strMatchesSelector( char* str, char* sel );
+
+#endif /* PROFHEAP_H */
diff --git a/rts/Profiling.c b/rts/Profiling.c
new file mode 100644
index 0000000000..028dc5a509
--- /dev/null
+++ b/rts/Profiling.c
@@ -0,0 +1,941 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2000
+ *
+ * Support for profiling
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifdef PROFILING
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "RtsFlags.h"
+#include "Profiling.h"
+#include "Storage.h"
+#include "Proftimer.h"
+#include "Timer.h"
+#include "ProfHeap.h"
+#include "Arena.h"
+#include "RetainerProfile.h"
+#include "LdvProfile.h"
+
+#include <string.h>
+
+/*
+ * Profiling allocation arena.
+ */
+Arena *prof_arena;
+
+/*
+ * Global variables used to assign unique IDs to cc's, ccs's, and
+ * closure_cats
+ */
+
+unsigned int CC_ID;
+unsigned int CCS_ID;
+unsigned int HP_ID;
+
+/* figures for the profiling report.
+ */
+static ullong total_alloc;
+static lnat total_prof_ticks;
+
+/* Globals for opening the profiling log file(s)
+ */
+static char *prof_filename; /* prof report file name = <program>.prof */
+FILE *prof_file;
+
+static char *hp_filename; /* heap profile (hp2ps style) log file */
+FILE *hp_file;
+
+/* The Current Cost Centre Stack (for attributing costs)
+ */
+CostCentreStack *CCCS;
+
+/* Linked lists to keep track of cc's and ccs's that haven't
+ * been declared in the log file yet
+ */
+CostCentre *CC_LIST;
+CostCentreStack *CCS_LIST;
+
+/*
+ * Built-in cost centres and cost-centre stacks:
+ *
+ * MAIN is the root of the cost-centre stack tree. If there are
+ * no _scc_s in the program, all costs will be attributed
+ * to MAIN.
+ *
+ * SYSTEM is the RTS in general (scheduler, etc.). All costs for
+ * RTS operations apart from garbage collection are attributed
+ * to SYSTEM.
+ *
+ * GC is the storage manager / garbage collector.
+ *
+ * OVERHEAD gets all costs generated by the profiling system
+ * itself. These are costs that would not be incurred
+ * during non-profiled execution of the program.
+ *
+ * SUBSUMED is the one-and-only CCS placed on top-level functions.
+ * It indicates that all costs are to be attributed to the
+ * enclosing cost centre stack. SUBSUMED never accumulates
+ * any costs. The is_caf flag is set on the subsumed cost
+ * centre.
+ *
+ * DONT_CARE is a placeholder cost-centre we assign to static
+ * constructors. It should *never* accumulate any costs.
+ */
+
+CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_IS_BORING, );
+CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", CC_IS_BORING, );
+CC_DECLARE(CC_GC, "GC", "GC", CC_IS_BORING, );
+CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_IS_CAF, );
+CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", CC_IS_CAF, );
+CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_IS_BORING, );
+
+CCS_DECLARE(CCS_MAIN, CC_MAIN, );
+CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, );
+CCS_DECLARE(CCS_GC, CC_GC, );
+CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, );
+CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, );
+CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, );
+
+/*
+ * Uniques for the XML log-file format
+ */
+#define CC_UQ 1
+#define CCS_UQ 2
+#define TC_UQ 3
+#define HEAP_OBJ_UQ 4
+#define TIME_UPD_UQ 5
+#define HEAP_UPD_UQ 6
+
+/*
+ * Static Functions
+ */
+
+static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc,
+ CostCentreStack *new_ccs );
+static rtsBool ccs_to_ignore ( CostCentreStack *ccs );
+static void count_ticks ( CostCentreStack *ccs );
+static void inherit_costs ( CostCentreStack *ccs );
+static void reportCCS ( CostCentreStack *ccs, nat indent );
+static void DecCCS ( CostCentreStack *ccs );
+static void DecBackEdge ( CostCentreStack *ccs,
+ CostCentreStack *oldccs );
+static CostCentreStack * CheckLoop ( CostCentreStack *ccs, CostCentre *cc );
+static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs );
+static CostCentreStack * ActualPush ( CostCentreStack *, CostCentre * );
+static CostCentreStack * IsInIndexTable ( IndexTable *, CostCentre * );
+static IndexTable * AddToIndexTable ( IndexTable *, CostCentreStack *,
+ CostCentre *, unsigned int );
+static void ccsSetSelected ( CostCentreStack *ccs );
+
+static void initTimeProfiling ( void );
+static void initProfilingLogFile( void );
+
+static void reportCCS_XML ( CostCentreStack *ccs );
+
+/* -----------------------------------------------------------------------------
+ Initialise the profiling environment
+ -------------------------------------------------------------------------- */
+
+void
+initProfiling1 (void)
+{
+ // initialise our arena
+ prof_arena = newArena();
+
+ /* for the benefit of allocate()... */
+ CCCS = CCS_SYSTEM;
+
+ /* Initialize counters for IDs */
+ CC_ID = 1;
+ CCS_ID = 1;
+ HP_ID = 1;
+
+ /* Initialize Declaration lists to NULL */
+ CC_LIST = NULL;
+ CCS_LIST = NULL;
+
+ /* Register all the cost centres / stacks in the program
+ * CC_MAIN gets link = 0, all others have non-zero link.
+ */
+ REGISTER_CC(CC_MAIN);
+ REGISTER_CC(CC_SYSTEM);
+ REGISTER_CC(CC_GC);
+ REGISTER_CC(CC_OVERHEAD);
+ REGISTER_CC(CC_SUBSUMED);
+ REGISTER_CC(CC_DONT_CARE);
+ REGISTER_CCS(CCS_MAIN);
+ REGISTER_CCS(CCS_SYSTEM);
+ REGISTER_CCS(CCS_GC);
+ REGISTER_CCS(CCS_OVERHEAD);
+ REGISTER_CCS(CCS_SUBSUMED);
+ REGISTER_CCS(CCS_DONT_CARE);
+
+ CCCS = CCS_OVERHEAD;
+
+ /* cost centres are registered by the per-module
+ * initialisation code now...
+ */
+}
+
+void
+initProfiling2 (void)
+{
+ CostCentreStack *ccs, *next;
+
+ CCCS = CCS_SYSTEM;
+
+ /* Set up the log file, and dump the header and cost centre
+ * information into it. */
+ initProfilingLogFile();
+
+ /* find all the "special" cost centre stacks, and make them children
+ * of CCS_MAIN.
+ */
+ ASSERT(CCS_MAIN->prevStack == 0);
+ CCS_MAIN->root = CC_MAIN;
+ ccsSetSelected(CCS_MAIN);
+ DecCCS(CCS_MAIN);
+
+ for (ccs = CCS_LIST; ccs != CCS_MAIN; ) {
+ next = ccs->prevStack;
+ ccs->prevStack = 0;
+ ActualPush_(CCS_MAIN,ccs->cc,ccs);
+ ccs->root = ccs->cc;
+ ccs = next;
+ }
+
+ if (RtsFlags.CcFlags.doCostCentres) {
+ initTimeProfiling();
+ }
+
+ if (RtsFlags.ProfFlags.doHeapProfile) {
+ initHeapProfiling();
+ }
+}
+
+// Decide whether closures with this CCS should contribute to the heap
+// profile.
+static void
+ccsSetSelected( CostCentreStack *ccs )
+{
+ if (RtsFlags.ProfFlags.modSelector) {
+ if (! strMatchesSelector( ccs->cc->module,
+ RtsFlags.ProfFlags.modSelector ) ) {
+ ccs->selected = 0;
+ return;
+ }
+ }
+ if (RtsFlags.ProfFlags.ccSelector) {
+ if (! strMatchesSelector( ccs->cc->label,
+ RtsFlags.ProfFlags.ccSelector ) ) {
+ ccs->selected = 0;
+ return;
+ }
+ }
+ if (RtsFlags.ProfFlags.ccsSelector) {
+ CostCentreStack *c;
+ for (c = ccs; c != NULL; c = c->prevStack) {
+ if ( strMatchesSelector( c->cc->label,
+ RtsFlags.ProfFlags.ccsSelector )) {
+ break;
+ }
+ }
+ if (c == NULL) {
+ ccs->selected = 0;
+ return;
+ }
+ }
+
+ ccs->selected = 1;
+ return;
+}
+
+
+static void
+initProfilingLogFile(void)
+{
+ /* Initialise the log file name */
+ prof_filename = arenaAlloc(prof_arena, strlen(prog_name) + 6);
+ sprintf(prof_filename, "%s.prof", prog_name);
+
+ /* open the log file */
+ if ((prof_file = fopen(prof_filename, "w")) == NULL) {
+ debugBelch("Can't open profiling report file %s\n", prof_filename);
+ RtsFlags.CcFlags.doCostCentres = 0;
+ // The following line was added by Sung; retainer/LDV profiling may need
+ // two output files, i.e., <program>.prof/hp.
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER)
+ RtsFlags.ProfFlags.doHeapProfile = 0;
+ return;
+ }
+
+ if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
+ /* dump the time, and the profiling interval */
+ fprintf(prof_file, "\"%s\"\n", time_str());
+ fprintf(prof_file, "\"%d ms\"\n", TICK_MILLISECS);
+
+ /* declare all the cost centres */
+ {
+ CostCentre *cc;
+ for (cc = CC_LIST; cc != NULL; cc = cc->link) {
+ fprintf(prof_file, "%d %d \"%s\" \"%s\"\n",
+ CC_UQ, cc->ccID, cc->label, cc->module);
+ }
+ }
+ }
+
+ if (RtsFlags.ProfFlags.doHeapProfile) {
+ /* Initialise the log file name */
+ hp_filename = arenaAlloc(prof_arena, strlen(prog_name) + 6);
+ sprintf(hp_filename, "%s.hp", prog_name);
+
+ /* open the log file */
+ if ((hp_file = fopen(hp_filename, "w")) == NULL) {
+ debugBelch("Can't open profiling report file %s\n",
+ hp_filename);
+ RtsFlags.ProfFlags.doHeapProfile = 0;
+ return;
+ }
+ }
+}
+
+void
+initTimeProfiling(void)
+{
+ /* Start ticking */
+ startProfTimer();
+};
+
+void
+endProfiling ( void )
+{
+ if (RtsFlags.CcFlags.doCostCentres) {
+ stopProfTimer();
+ }
+ if (RtsFlags.ProfFlags.doHeapProfile) {
+ endHeapProfiling();
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Set cost centre stack when entering a function.
+ -------------------------------------------------------------------------- */
+rtsBool entering_PAP;
+
+void
+EnterFunCCS ( CostCentreStack *ccsfn )
+{
+ /* PAP_entry has already set CCCS for us */
+ if (entering_PAP) {
+ entering_PAP = rtsFalse;
+ return;
+ }
+
+ if (ccsfn->root->is_caf == CC_IS_CAF) {
+ CCCS = AppendCCS(CCCS,ccsfn);
+ } else {
+ CCCS = ccsfn;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Cost-centre stack manipulation
+ -------------------------------------------------------------------------- */
+
+#ifdef DEBUG
+CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc );
+CostCentreStack *
+PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
+#define PushCostCentre _PushCostCentre
+{
+ IF_DEBUG(prof,
+ debugBelch("Pushing %s on ", cc->label);
+ debugCCS(ccs);
+ debugBelch("\n"));
+ return PushCostCentre(ccs,cc);
+}
+#endif
+
+CostCentreStack *
+PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
+{
+ CostCentreStack *temp_ccs;
+
+ if (ccs == EMPTY_STACK)
+ return ActualPush(ccs,cc);
+ else {
+ if (ccs->cc == cc)
+ return ccs;
+ else {
+ /* check if we've already memoized this stack */
+ temp_ccs = IsInIndexTable(ccs->indexTable,cc);
+
+ if (temp_ccs != EMPTY_STACK)
+ return temp_ccs;
+ else {
+ temp_ccs = CheckLoop(ccs,cc);
+ if (temp_ccs != NULL) {
+ /* we have recursed to an older CCS. Mark this in
+ * the index table, and emit a "back edge" into the
+ * log file.
+ */
+ ccs->indexTable = AddToIndexTable(ccs->indexTable,temp_ccs,cc,1);
+ DecBackEdge(temp_ccs,ccs);
+ return temp_ccs;
+ } else {
+ return ActualPush(ccs,cc);
+ }
+ }
+ }
+ }
+}
+
+static CostCentreStack *
+CheckLoop ( CostCentreStack *ccs, CostCentre *cc )
+{
+ while (ccs != EMPTY_STACK) {
+ if (ccs->cc == cc)
+ return ccs;
+ ccs = ccs->prevStack;
+ }
+ return NULL;
+}
+
+/* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */
+
+#ifdef DEBUG
+CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 );
+CostCentreStack *
+AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
+#define AppendCCS _AppendCCS
+{
+ IF_DEBUG(prof,
+ if (ccs1 != ccs2) {
+ debugBelch("Appending ");
+ debugCCS(ccs1);
+ debugBelch(" to ");
+ debugCCS(ccs2);
+ debugBelch("\n");});
+ return AppendCCS(ccs1,ccs2);
+}
+#endif
+
+CostCentreStack *
+AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
+{
+ CostCentreStack *ccs = NULL;
+
+ if (ccs1 == ccs2) {
+ return ccs1;
+ }
+
+ if (ccs2->cc->is_caf == CC_IS_CAF) {
+ return ccs1;
+ }
+
+ if (ccs2->prevStack != NULL) {
+ ccs = AppendCCS(ccs1, ccs2->prevStack);
+ }
+
+ return PushCostCentre(ccs,ccs2->cc);
+}
+
+static CostCentreStack *
+ActualPush ( CostCentreStack *ccs, CostCentre *cc )
+{
+ CostCentreStack *new_ccs;
+
+ /* allocate space for a new CostCentreStack */
+ new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack));
+
+ return ActualPush_(ccs, cc, new_ccs);
+}
+
+static CostCentreStack *
+ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs )
+{
+ /* assign values to each member of the structure */
+ new_ccs->ccsID = CCS_ID++;
+ new_ccs->cc = cc;
+ new_ccs->prevStack = ccs;
+
+ new_ccs->indexTable = EMPTY_TABLE;
+
+ /* Initialise the various _scc_ counters to zero
+ */
+ new_ccs->scc_count = 0;
+
+ /* Initialize all other stats here. There should be a quick way
+ * that's easily used elsewhere too
+ */
+ new_ccs->time_ticks = 0;
+ new_ccs->mem_alloc = 0;
+ new_ccs->inherited_ticks = 0;
+ new_ccs->inherited_alloc = 0;
+
+ new_ccs->root = ccs->root;
+
+ // Set the selected field.
+ ccsSetSelected(new_ccs);
+
+ /* update the memoization table for the parent stack */
+ if (ccs != EMPTY_STACK)
+ ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc,
+ 0/*not a back edge*/);
+
+ /* make sure this CC is declared at the next heap/time sample */
+ DecCCS(new_ccs);
+
+ /* return a pointer to the new stack */
+ return new_ccs;
+}
+
+
+static CostCentreStack *
+IsInIndexTable(IndexTable *it, CostCentre *cc)
+{
+ while (it!=EMPTY_TABLE)
+ {
+ if (it->cc==cc)
+ return it->ccs;
+ else
+ it = it->next;
+ }
+
+ /* otherwise we never found it so return EMPTY_TABLE */
+ return EMPTY_TABLE;
+}
+
+
+static IndexTable *
+AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs,
+ CostCentre *cc, unsigned int back_edge)
+{
+ IndexTable *new_it;
+
+ new_it = arenaAlloc(prof_arena, sizeof(IndexTable));
+
+ new_it->cc = cc;
+ new_it->ccs = new_ccs;
+ new_it->next = it;
+ new_it->back_edge = back_edge;
+ return new_it;
+}
+
+
+static void
+DecCCS(CostCentreStack *ccs)
+{
+ if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
+ if (ccs->prevStack == EMPTY_STACK)
+ fprintf(prof_file, "%d %d 1 %d\n", CCS_UQ,
+ ccs->ccsID, ccs->cc->ccID);
+ else
+ fprintf(prof_file, "%d %d 2 %d %d\n", CCS_UQ,
+ ccs->ccsID, ccs->cc->ccID, ccs->prevStack->ccsID);
+ }
+}
+
+static void
+DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs )
+{
+ if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) {
+ if (ccs->prevStack == EMPTY_STACK)
+ fprintf(prof_file, "%d %d 1 %d\n", CCS_UQ,
+ ccs->ccsID, ccs->cc->ccID);
+ else
+ fprintf(prof_file, "%d %d 2 %d %d\n", CCS_UQ,
+ ccs->ccsID, ccs->cc->ccID, oldccs->ccsID);
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Generating a time & allocation profiling report.
+ -------------------------------------------------------------------------- */
+
+/* We omit certain system-related CCs and CCSs from the default
+ * reports, so as not to cause confusion.
+ */
+static rtsBool
+cc_to_ignore (CostCentre *cc)
+{
+ if ( cc == CC_OVERHEAD
+ || cc == CC_DONT_CARE
+ || cc == CC_GC
+ || cc == CC_SYSTEM) {
+ return rtsTrue;
+ } else {
+ return rtsFalse;
+ }
+}
+
+static rtsBool
+ccs_to_ignore (CostCentreStack *ccs)
+{
+ if ( ccs == CCS_OVERHEAD
+ || ccs == CCS_DONT_CARE
+ || ccs == CCS_GC
+ || ccs == CCS_SYSTEM) {
+ return rtsTrue;
+ } else {
+ return rtsFalse;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Generating the aggregated per-cost-centre time/alloc report.
+ -------------------------------------------------------------------------- */
+
+static CostCentre *sorted_cc_list;
+
+static void
+aggregate_cc_costs( CostCentreStack *ccs )
+{
+ IndexTable *i;
+
+ ccs->cc->mem_alloc += ccs->mem_alloc;
+ ccs->cc->time_ticks += ccs->time_ticks;
+
+ for (i = ccs->indexTable; i != 0; i = i->next) {
+ if (!i->back_edge) {
+ aggregate_cc_costs(i->ccs);
+ }
+ }
+}
+
+static void
+insert_cc_in_sorted_list( CostCentre *new_cc )
+{
+ CostCentre **prev, *cc;
+
+ prev = &sorted_cc_list;
+ for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
+ if (new_cc->time_ticks > cc->time_ticks) {
+ new_cc->link = cc;
+ *prev = new_cc;
+ return;
+ } else {
+ prev = &(cc->link);
+ }
+ }
+ new_cc->link = NULL;
+ *prev = new_cc;
+}
+
+static void
+report_per_cc_costs( void )
+{
+ CostCentre *cc, *next;
+
+ aggregate_cc_costs(CCS_MAIN);
+ sorted_cc_list = NULL;
+
+ for (cc = CC_LIST; cc != NULL; cc = next) {
+ next = cc->link;
+ if (cc->time_ticks > total_prof_ticks/100
+ || cc->mem_alloc > total_alloc/100
+ || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) {
+ insert_cc_in_sorted_list(cc);
+ }
+ }
+
+ fprintf(prof_file, "%-30s %-20s", "COST CENTRE", "MODULE");
+ fprintf(prof_file, "%6s %6s", "%time", "%alloc");
+ if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
+ fprintf(prof_file, " %5s %9s", "ticks", "bytes");
+ }
+ fprintf(prof_file, "\n\n");
+
+ for (cc = sorted_cc_list; cc != NULL; cc = cc->link) {
+ if (cc_to_ignore(cc)) {
+ continue;
+ }
+ fprintf(prof_file, "%-30s %-20s", cc->label, cc->module);
+ fprintf(prof_file, "%6.1f %6.1f",
+ total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100),
+ total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat)
+ total_alloc * 100)
+ );
+
+ if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
+ fprintf(prof_file, " %5llu %9llu", (StgWord64)(cc->time_ticks), cc->mem_alloc);
+ }
+ fprintf(prof_file, "\n");
+ }
+
+ fprintf(prof_file,"\n\n");
+}
+
+/* -----------------------------------------------------------------------------
+ Generate the cost-centre-stack time/alloc report
+ -------------------------------------------------------------------------- */
+
+static void
+fprint_header( void )
+{
+ fprintf(prof_file, "%-24s %-10s individual inherited\n", "", "");
+
+ fprintf(prof_file, "%-24s %-50s", "COST CENTRE", "MODULE");
+ fprintf(prof_file, "%6s %10s %5s %5s %5s %5s", "no.", "entries", "%time", "%alloc", "%time", "%alloc");
+
+ if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
+ fprintf(prof_file, " %5s %9s", "ticks", "bytes");
+#if defined(PROFILING_DETAIL_COUNTS)
+ fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s",
+ "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub");
+#endif
+ }
+
+ fprintf(prof_file, "\n\n");
+}
+
+void
+reportCCSProfiling( void )
+{
+ nat count;
+ char temp[128]; /* sigh: magic constant */
+
+ stopProfTimer();
+
+ total_prof_ticks = 0;
+ total_alloc = 0;
+ count_ticks(CCS_MAIN);
+
+ switch (RtsFlags.CcFlags.doCostCentres) {
+ case 0:
+ return;
+ case COST_CENTRES_XML:
+ gen_XML_logfile();
+ return;
+ default:
+ break;
+ }
+
+ fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n",
+ time_str(), "Final");
+
+ fprintf(prof_file, "\n\t ");
+ fprintf(prof_file, " %s", prog_name);
+ fprintf(prof_file, " +RTS");
+ for (count = 0; rts_argv[count]; count++)
+ fprintf(prof_file, " %s", rts_argv[count]);
+ fprintf(prof_file, " -RTS");
+ for (count = 1; prog_argv[count]; count++)
+ fprintf(prof_file, " %s", prog_argv[count]);
+ fprintf(prof_file, "\n\n");
+
+ fprintf(prof_file, "\ttotal time = %11.2f secs (%lu ticks @ %d ms)\n",
+ total_prof_ticks / (StgFloat) TICK_FREQUENCY,
+ total_prof_ticks, TICK_MILLISECS);
+
+ fprintf(prof_file, "\ttotal alloc = %11s bytes",
+ ullong_format_string(total_alloc * sizeof(W_),
+ temp, rtsTrue/*commas*/));
+
+#if defined(PROFILING_DETAIL_COUNTS)
+ fprintf(prof_file, " (%lu closures)", total_allocs);
+#endif
+ fprintf(prof_file, " (excludes profiling overheads)\n\n");
+
+ report_per_cc_costs();
+
+ inherit_costs(CCS_MAIN);
+
+ fprint_header();
+ reportCCS(pruneCCSTree(CCS_MAIN), 0);
+}
+
+static void
+reportCCS(CostCentreStack *ccs, nat indent)
+{
+ CostCentre *cc;
+ IndexTable *i;
+
+ cc = ccs->cc;
+
+ /* Only print cost centres with non 0 data ! */
+
+ if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL ||
+ ! ccs_to_ignore(ccs))
+ /* force printing of *all* cost centres if -P -P */
+ {
+
+ fprintf(prof_file, "%-*s%-*s %-50s",
+ indent, "", 24-indent, cc->label, cc->module);
+
+ fprintf(prof_file, "%6d %11.0f %5.1f %5.1f %5.1f %5.1f",
+ ccs->ccsID, (double) ccs->scc_count,
+ total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0),
+ total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0),
+ total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0),
+ total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0)
+ );
+
+ if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) {
+ fprintf(prof_file, " %5llu %9llu", (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_));
+#if defined(PROFILING_DETAIL_COUNTS)
+ fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld",
+ ccs->mem_allocs, ccs->thunk_count,
+ ccs->function_count, ccs->pap_count,
+ ccs->subsumed_fun_count, ccs->subsumed_caf_count,
+ ccs->caffun_subsumed);
+#endif
+ }
+ fprintf(prof_file, "\n");
+ }
+
+ for (i = ccs->indexTable; i != 0; i = i->next) {
+ if (!i->back_edge) {
+ reportCCS(i->ccs, indent+1);
+ }
+ }
+}
+
+
+/* Traverse the cost centre stack tree and accumulate
+ * ticks/allocations.
+ */
+static void
+count_ticks(CostCentreStack *ccs)
+{
+ IndexTable *i;
+
+ if (!ccs_to_ignore(ccs)) {
+ total_alloc += ccs->mem_alloc;
+ total_prof_ticks += ccs->time_ticks;
+ }
+ for (i = ccs->indexTable; i != NULL; i = i->next)
+ if (!i->back_edge) {
+ count_ticks(i->ccs);
+ }
+}
+
+/* Traverse the cost centre stack tree and inherit ticks & allocs.
+ */
+static void
+inherit_costs(CostCentreStack *ccs)
+{
+ IndexTable *i;
+
+ if (ccs_to_ignore(ccs)) { return; }
+
+ ccs->inherited_ticks += ccs->time_ticks;
+ ccs->inherited_alloc += ccs->mem_alloc;
+
+ for (i = ccs->indexTable; i != NULL; i = i->next)
+ if (!i->back_edge) {
+ inherit_costs(i->ccs);
+ ccs->inherited_ticks += i->ccs->inherited_ticks;
+ ccs->inherited_alloc += i->ccs->inherited_alloc;
+ }
+
+ return;
+}
+
+static CostCentreStack *
+pruneCCSTree( CostCentreStack *ccs )
+{
+ CostCentreStack *ccs1;
+ IndexTable *i, **prev;
+
+ prev = &ccs->indexTable;
+ for (i = ccs->indexTable; i != 0; i = i->next) {
+ if (i->back_edge) { continue; }
+
+ ccs1 = pruneCCSTree(i->ccs);
+ if (ccs1 == NULL) {
+ *prev = i->next;
+ } else {
+ prev = &(i->next);
+ }
+ }
+
+ if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL
+ /* force printing of *all* cost centres if -P -P */ )
+
+ || ( ccs->indexTable != 0 )
+ || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc )
+ ) {
+ return ccs;
+ } else {
+ return NULL;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Generate the XML time/allocation profile
+ -------------------------------------------------------------------------- */
+
+void
+gen_XML_logfile( void )
+{
+ fprintf(prof_file, "%d %lu", TIME_UPD_UQ, total_prof_ticks);
+
+ reportCCS_XML(pruneCCSTree(CCS_MAIN));
+
+ fprintf(prof_file, " 0\n");
+
+ fclose(prof_file);
+}
+
+static void
+reportCCS_XML(CostCentreStack *ccs)
+{
+ CostCentre *cc;
+ IndexTable *i;
+
+ if (ccs_to_ignore(ccs)) { return; }
+
+ cc = ccs->cc;
+
+ fprintf(prof_file, " 1 %d %llu %llu %llu",
+ ccs->ccsID, ccs->scc_count, (StgWord64)(ccs->time_ticks), ccs->mem_alloc);
+
+ for (i = ccs->indexTable; i != 0; i = i->next) {
+ if (!i->back_edge) {
+ reportCCS_XML(i->ccs);
+ }
+ }
+}
+
+void
+fprintCCS( FILE *f, CostCentreStack *ccs )
+{
+ fprintf(f,"<");
+ for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
+ fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
+ if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
+ fprintf(f,",");
+ }
+ }
+ fprintf(f,">");
+}
+
+/* For calling from .cmm code, where we can't reliably refer to stderr */
+void
+fprintCCS_stderr( CostCentreStack *ccs )
+{
+ fprintCCS(stderr, ccs);
+}
+
+#ifdef DEBUG
+void
+debugCCS( CostCentreStack *ccs )
+{
+ debugBelch("<");
+ for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
+ debugBelch("%s.%s", ccs->cc->module, ccs->cc->label);
+ if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
+ debugBelch(",");
+ }
+ }
+ debugBelch(">");
+}
+#endif /* DEBUG */
+
+#endif /* PROFILING */
diff --git a/rts/Profiling.h b/rts/Profiling.h
new file mode 100644
index 0000000000..d968349a52
--- /dev/null
+++ b/rts/Profiling.h
@@ -0,0 +1,39 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Support for profiling
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef PROFILING_H
+#define PROFILING_H
+
+#include <stdio.h>
+
+#if defined(PROFILING) || defined(DEBUG)
+void initProfiling1 ( void );
+void initProfiling2 ( void );
+void endProfiling ( void );
+
+extern FILE *prof_file;
+extern FILE *hp_file;
+#endif
+
+#ifdef PROFILING
+
+void gen_XML_logfile ( void );
+void reportCCSProfiling ( void );
+
+void PrintNewStackDecls ( void );
+
+extern void fprintCCS( FILE *f, CostCentreStack *ccs );
+extern void fprintCCS_stderr( CostCentreStack *ccs );
+
+#ifdef DEBUG
+extern void debugCCS( CostCentreStack *ccs );
+#endif
+
+#endif
+
+#endif /* PROFILING_H */
diff --git a/rts/Proftimer.c b/rts/Proftimer.c
new file mode 100644
index 0000000000..3b499152d6
--- /dev/null
+++ b/rts/Proftimer.c
@@ -0,0 +1,85 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-1999
+ *
+ * Profiling interval timer
+ *
+ * ---------------------------------------------------------------------------*/
+
+#if defined (PROFILING)
+
+#include "PosixSource.h"
+
+#include "Rts.h"
+#include "Profiling.h"
+#include "Timer.h"
+#include "Proftimer.h"
+#include "RtsFlags.h"
+
+static rtsBool do_prof_ticks = rtsFalse; // enable profiling ticks
+static rtsBool do_heap_prof_ticks = rtsFalse; // enable heap profiling ticks
+
+// Number of ticks until next heap census
+static int ticks_to_heap_profile;
+
+// Time for a heap profile on the next context switch
+rtsBool performHeapProfile;
+
+void
+stopProfTimer( void )
+{
+ do_prof_ticks = rtsFalse;
+}
+
+void
+startProfTimer( void )
+{
+ do_prof_ticks = rtsTrue;
+}
+
+void
+stopHeapProfTimer( void )
+{
+ do_heap_prof_ticks = rtsFalse;
+}
+
+void
+startHeapProfTimer( void )
+{
+ if (RtsFlags.ProfFlags.doHeapProfile &&
+ RtsFlags.ProfFlags.profileIntervalTicks > 0) {
+ do_heap_prof_ticks = rtsTrue;
+ }
+}
+
+void
+initProfTimer( void )
+{
+ performHeapProfile = rtsFalse;
+
+ RtsFlags.ProfFlags.profileIntervalTicks =
+ RtsFlags.ProfFlags.profileInterval / TICK_MILLISECS;
+
+ ticks_to_heap_profile = RtsFlags.ProfFlags.profileIntervalTicks;
+
+ startHeapProfTimer();
+}
+
+
+void
+handleProfTick(void)
+{
+ if (do_prof_ticks) {
+ CCCS->time_ticks++;
+ }
+
+ if (do_heap_prof_ticks) {
+ ticks_to_heap_profile--;
+ if (ticks_to_heap_profile <= 0) {
+ ticks_to_heap_profile = RtsFlags.ProfFlags.profileIntervalTicks;
+ performHeapProfile = rtsTrue;
+ }
+ }
+}
+
+#endif /* PROFILING */
diff --git a/rts/Proftimer.h b/rts/Proftimer.h
new file mode 100644
index 0000000000..c837b855f9
--- /dev/null
+++ b/rts/Proftimer.h
@@ -0,0 +1,22 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Profiling interval timer
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef PROFTIMER_H
+#define PROFTIMER_H
+
+extern void initProfTimer ( void );
+extern void handleProfTick ( void );
+
+extern void stopProfTimer ( void );
+extern void startProfTimer ( void );
+extern void stopHeapProfTimer ( void );
+extern void startHeapProfTimer ( void );
+
+extern rtsBool performHeapProfile;
+
+#endif /* PROFTIMER_H */
diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c
new file mode 100644
index 0000000000..c5c3de5314
--- /dev/null
+++ b/rts/RetainerProfile.c
@@ -0,0 +1,2338 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Retainer profiling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifdef PROFILING
+
+// Turn off inlining when debugging - it obfuscates things
+#ifdef DEBUG
+#define INLINE
+#else
+#define INLINE inline
+#endif
+
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "RetainerProfile.h"
+#include "RetainerSet.h"
+#include "Schedule.h"
+#include "Printer.h"
+#include "Storage.h"
+#include "RtsFlags.h"
+#include "Weak.h"
+#include "Sanity.h"
+#include "Profiling.h"
+#include "Stats.h"
+#include "BlockAlloc.h"
+#include "ProfHeap.h"
+#include "Apply.h"
+
+/*
+ Note: what to change in order to plug-in a new retainer profiling scheme?
+ (1) type retainer in ../includes/StgRetainerProf.h
+ (2) retainer function R(), i.e., getRetainerFrom()
+ (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
+ in RetainerSet.h, if needed.
+ (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
+ */
+
+/* -----------------------------------------------------------------------------
+ * Declarations...
+ * -------------------------------------------------------------------------- */
+
+static nat retainerGeneration; // generation
+
+static nat numObjectVisited; // total number of objects visited
+static nat timesAnyObjectVisited; // number of times any objects are visited
+
+/*
+ The rs field in the profile header of any object points to its retainer
+ set in an indirect way: if flip is 0, it points to the retainer set;
+ if flip is 1, it points to the next byte after the retainer set (even
+ for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
+ pointer. See retainerSetOf().
+ */
+
+StgWord flip = 0; // flip bit
+ // must be 0 if DEBUG_RETAINER is on (for static closures)
+
+#define setRetainerSetToNull(c) \
+ (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
+
+static void retainStack(StgClosure *, retainer, StgPtr, StgPtr);
+static void retainClosure(StgClosure *, StgClosure *, retainer);
+#ifdef DEBUG_RETAINER
+static void belongToHeap(StgPtr p);
+#endif
+
+#ifdef DEBUG_RETAINER
+/*
+ cStackSize records how many times retainStack() has been invoked recursively,
+ that is, the number of activation records for retainStack() on the C stack.
+ maxCStackSize records its max value.
+ Invariants:
+ cStackSize <= maxCStackSize
+ */
+static nat cStackSize, maxCStackSize;
+
+static nat sumOfNewCost; // sum of the cost of each object, computed
+ // when the object is first visited
+static nat sumOfNewCostExtra; // for those objects not visited during
+ // retainer profiling, e.g., MUT_VAR
+static nat costArray[N_CLOSURE_TYPES];
+
+nat sumOfCostLinear; // sum of the costs of all object, computed
+ // when linearly traversing the heap after
+ // retainer profiling
+nat costArrayLinear[N_CLOSURE_TYPES];
+#endif
+
+/* -----------------------------------------------------------------------------
+ * Retainer stack - header
+ * Note:
+ * Although the retainer stack implementation could be separated *
+ * from the retainer profiling engine, there does not seem to be
+ * any advantage in doing that; retainer stack is an integral part
+ * of retainer profiling engine and cannot be use elsewhere at
+ * all.
+ * -------------------------------------------------------------------------- */
+
+typedef enum {
+ posTypeStep,
+ posTypePtrs,
+ posTypeSRT,
+ posTypeLargeSRT,
+} nextPosType;
+
+typedef union {
+ // fixed layout or layout specified by a field in the closure
+ StgWord step;
+
+ // layout.payload
+ struct {
+ // See StgClosureInfo in InfoTables.h
+#if SIZEOF_VOID_P == 8
+ StgWord32 pos;
+ StgWord32 ptrs;
+#else
+ StgWord16 pos;
+ StgWord16 ptrs;
+#endif
+ StgPtr payload;
+ } ptrs;
+
+ // SRT
+ struct {
+ StgClosure **srt;
+ StgWord srt_bitmap;
+ } srt;
+
+ // Large SRT
+ struct {
+ StgLargeSRT *srt;
+ StgWord offset;
+ } large_srt;
+
+} nextPos;
+
+typedef struct {
+ nextPosType type;
+ nextPos next;
+} stackPos;
+
+typedef struct {
+ StgClosure *c;
+ retainer c_child_r;
+ stackPos info;
+} stackElement;
+
+/*
+ Invariants:
+ firstStack points to the first block group.
+ currentStack points to the block group currently being used.
+ currentStack->free == stackLimit.
+ stackTop points to the topmost byte in the stack of currentStack.
+ Unless the whole stack is empty, stackTop must point to the topmost
+ object (or byte) in the whole stack. Thus, it is only when the whole stack
+ is empty that stackTop == stackLimit (not during the execution of push()
+ and pop()).
+ stackBottom == currentStack->start.
+ stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
+ Note:
+ When a current stack becomes empty, stackTop is set to point to
+ the topmost element on the previous block group so as to satisfy
+ the invariants described above.
+ */
+static bdescr *firstStack = NULL;
+static bdescr *currentStack;
+static stackElement *stackBottom, *stackTop, *stackLimit;
+
+/*
+ currentStackBoundary is used to mark the current stack chunk.
+ If stackTop == currentStackBoundary, it means that the current stack chunk
+ is empty. It is the responsibility of the user to keep currentStackBoundary
+ valid all the time if it is to be employed.
+ */
+static stackElement *currentStackBoundary;
+
+/*
+ stackSize records the current size of the stack.
+ maxStackSize records its high water mark.
+ Invariants:
+ stackSize <= maxStackSize
+ Note:
+ stackSize is just an estimate measure of the depth of the graph. The reason
+ is that some heap objects have only a single child and may not result
+ in a new element being pushed onto the stack. Therefore, at the end of
+ retainer profiling, maxStackSize + maxCStackSize is some value no greater
+ than the actual depth of the graph.
+ */
+#ifdef DEBUG_RETAINER
+static int stackSize, maxStackSize;
+#endif
+
+// number of blocks allocated for one stack
+#define BLOCKS_IN_STACK 1
+
+/* -----------------------------------------------------------------------------
+ * Add a new block group to the stack.
+ * Invariants:
+ * currentStack->link == s.
+ * -------------------------------------------------------------------------- */
+static INLINE void
+newStackBlock( bdescr *bd )
+{
+ currentStack = bd;
+ stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
+ stackBottom = (stackElement *)bd->start;
+ stackLimit = (stackElement *)stackTop;
+ bd->free = (StgPtr)stackLimit;
+}
+
+/* -----------------------------------------------------------------------------
+ * Return to the previous block group.
+ * Invariants:
+ * s->link == currentStack.
+ * -------------------------------------------------------------------------- */
+static INLINE void
+returnToOldStack( bdescr *bd )
+{
+ currentStack = bd;
+ stackTop = (stackElement *)bd->free;
+ stackBottom = (stackElement *)bd->start;
+ stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
+ bd->free = (StgPtr)stackLimit;
+}
+
+/* -----------------------------------------------------------------------------
+ * Initializes the traverse stack.
+ * -------------------------------------------------------------------------- */
+static void
+initializeTraverseStack( void )
+{
+ if (firstStack != NULL) {
+ freeChain(firstStack);
+ }
+
+ firstStack = allocGroup(BLOCKS_IN_STACK);
+ firstStack->link = NULL;
+ firstStack->u.back = NULL;
+
+ newStackBlock(firstStack);
+}
+
+/* -----------------------------------------------------------------------------
+ * Frees all the block groups in the traverse stack.
+ * Invariants:
+ * firstStack != NULL
+ * -------------------------------------------------------------------------- */
+static void
+closeTraverseStack( void )
+{
+ freeChain(firstStack);
+ firstStack = NULL;
+}
+
+/* -----------------------------------------------------------------------------
+ * Returns rtsTrue if the whole stack is empty.
+ * -------------------------------------------------------------------------- */
+static INLINE rtsBool
+isEmptyRetainerStack( void )
+{
+ return (firstStack == currentStack) && stackTop == stackLimit;
+}
+
+/* -----------------------------------------------------------------------------
+ * Returns size of stack
+ * -------------------------------------------------------------------------- */
+#ifdef DEBUG
+lnat
+retainerStackBlocks( void )
+{
+ bdescr* bd;
+ lnat res = 0;
+
+ for (bd = firstStack; bd != NULL; bd = bd->link)
+ res += bd->blocks;
+
+ return res;
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
+ * i.e., if the current stack chunk is empty.
+ * -------------------------------------------------------------------------- */
+static INLINE rtsBool
+isOnBoundary( void )
+{
+ return stackTop == currentStackBoundary;
+}
+
+/* -----------------------------------------------------------------------------
+ * Initializes *info from ptrs and payload.
+ * Invariants:
+ * payload[] begins with ptrs pointers followed by non-pointers.
+ * -------------------------------------------------------------------------- */
+static INLINE void
+init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
+{
+ info->type = posTypePtrs;
+ info->next.ptrs.pos = 0;
+ info->next.ptrs.ptrs = ptrs;
+ info->next.ptrs.payload = payload;
+}
+
+/* -----------------------------------------------------------------------------
+ * Find the next object from *info.
+ * -------------------------------------------------------------------------- */
+static INLINE StgClosure *
+find_ptrs( stackPos *info )
+{
+ if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
+ return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
+ } else {
+ return NULL;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Initializes *info from SRT information stored in *infoTable.
+ * -------------------------------------------------------------------------- */
+static INLINE void
+init_srt_fun( stackPos *info, StgFunInfoTable *infoTable )
+{
+ if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
+ info->type = posTypeLargeSRT;
+ info->next.large_srt.srt = (StgLargeSRT *)GET_FUN_SRT(infoTable);
+ info->next.large_srt.offset = 0;
+ } else {
+ info->type = posTypeSRT;
+ info->next.srt.srt = (StgClosure **)GET_FUN_SRT(infoTable);
+ info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
+ }
+}
+
+static INLINE void
+init_srt_thunk( stackPos *info, StgThunkInfoTable *infoTable )
+{
+ if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
+ info->type = posTypeLargeSRT;
+ info->next.large_srt.srt = (StgLargeSRT *)GET_SRT(infoTable);
+ info->next.large_srt.offset = 0;
+ } else {
+ info->type = posTypeSRT;
+ info->next.srt.srt = (StgClosure **)GET_SRT(infoTable);
+ info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Find the next object from *info.
+ * -------------------------------------------------------------------------- */
+static INLINE StgClosure *
+find_srt( stackPos *info )
+{
+ StgClosure *c;
+ StgWord bitmap;
+
+ if (info->type == posTypeSRT) {
+ // Small SRT bitmap
+ bitmap = info->next.srt.srt_bitmap;
+ while (bitmap != 0) {
+ if ((bitmap & 1) != 0) {
+#ifdef ENABLE_WIN32_DLL_SUPPORT
+
+ if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
+ c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
+ else
+ c = *(info->next.srt.srt);
+#else
+ c = *(info->next.srt.srt);
+#endif
+ bitmap = bitmap >> 1;
+ info->next.srt.srt++;
+ info->next.srt.srt_bitmap = bitmap;
+ return c;
+ }
+ bitmap = bitmap >> 1;
+ info->next.srt.srt++;
+ }
+ // bitmap is now zero...
+ return NULL;
+ }
+ else {
+ // Large SRT bitmap
+ nat i = info->next.large_srt.offset;
+ StgWord bitmap;
+
+ // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
+ bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
+ bitmap = bitmap >> (i % BITS_IN(StgWord));
+ while (i < info->next.large_srt.srt->l.size) {
+ if ((bitmap & 1) != 0) {
+ c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
+ i++;
+ info->next.large_srt.offset = i;
+ return c;
+ }
+ i++;
+ if (i % BITS_IN(W_) == 0) {
+ bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
+ } else {
+ bitmap = bitmap >> 1;
+ }
+ }
+ // reached the end of this bitmap.
+ info->next.large_srt.offset = i;
+ return NULL;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * push() pushes a stackElement representing the next child of *c
+ * onto the traverse stack. If *c has no child, *first_child is set
+ * to NULL and nothing is pushed onto the stack. If *c has only one
+ * child, *c_chlid is set to that child and nothing is pushed onto
+ * the stack. If *c has more than two children, *first_child is set
+ * to the first child and a stackElement representing the second
+ * child is pushed onto the stack.
+
+ * Invariants:
+ * *c_child_r is the most recent retainer of *c's children.
+ * *c is not any of TSO, AP, PAP, AP_STACK, which means that
+ * there cannot be any stack objects.
+ * Note: SRTs are considered to be children as well.
+ * -------------------------------------------------------------------------- */
+static INLINE void
+push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
+{
+ stackElement se;
+ bdescr *nbd; // Next Block Descriptor
+
+#ifdef DEBUG_RETAINER
+ // debugBelch("push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
+#endif
+
+ ASSERT(get_itbl(c)->type != TSO);
+ ASSERT(get_itbl(c)->type != AP_STACK);
+
+ //
+ // fill in se
+ //
+
+ se.c = c;
+ se.c_child_r = c_child_r;
+
+ // fill in se.info
+ switch (get_itbl(c)->type) {
+ // no child, no SRT
+ case CONSTR_0_1:
+ case CONSTR_0_2:
+ case CAF_BLACKHOLE:
+ case BLACKHOLE:
+ case SE_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case ARR_WORDS:
+ *first_child = NULL;
+ return;
+
+ // one child (fixed), no SRT
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ *first_child = ((StgMutVar *)c)->var;
+ return;
+ case THUNK_SELECTOR:
+ *first_child = ((StgSelector *)c)->selectee;
+ return;
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ case IND_OLDGEN:
+ *first_child = ((StgInd *)c)->indirectee;
+ return;
+ case CONSTR_1_0:
+ case CONSTR_1_1:
+ *first_child = c->payload[0];
+ return;
+
+ // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
+ // of the next child. We do not write a separate initialization code.
+ // Also we do not have to initialize info.type;
+
+ // two children (fixed), no SRT
+ // need to push a stackElement, but nothing to store in se.info
+ case CONSTR_2_0:
+ *first_child = c->payload[0]; // return the first pointer
+ // se.info.type = posTypeStep;
+ // se.info.next.step = 2; // 2 = second
+ break;
+
+ // three children (fixed), no SRT
+ // need to push a stackElement
+ case MVAR:
+ // head must be TSO and the head of a linked list of TSOs.
+ // Shoule it be a child? Seems to be yes.
+ *first_child = (StgClosure *)((StgMVar *)c)->head;
+ // se.info.type = posTypeStep;
+ se.info.next.step = 2; // 2 = second
+ break;
+
+ // three children (fixed), no SRT
+ case WEAK:
+ *first_child = ((StgWeak *)c)->key;
+ // se.info.type = posTypeStep;
+ se.info.next.step = 2;
+ break;
+
+ // layout.payload.ptrs, no SRT
+ case CONSTR:
+ case STABLE_NAME:
+ case BCO:
+ case CONSTR_STATIC:
+ init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
+ (StgPtr)c->payload);
+ *first_child = find_ptrs(&se.info);
+ if (*first_child == NULL)
+ return; // no child
+ break;
+
+ // StgMutArrPtr.ptrs, no SRT
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN:
+ case MUT_ARR_PTRS_FROZEN0:
+ init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
+ (StgPtr)(((StgMutArrPtrs *)c)->payload));
+ *first_child = find_ptrs(&se.info);
+ if (*first_child == NULL)
+ return;
+ break;
+
+ // layout.payload.ptrs, SRT
+ case FUN: // *c is a heap object.
+ case FUN_2_0:
+ init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
+ *first_child = find_ptrs(&se.info);
+ if (*first_child == NULL)
+ // no child from ptrs, so check SRT
+ goto fun_srt_only;
+ break;
+
+ case THUNK:
+ case THUNK_2_0:
+ init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
+ (StgPtr)((StgThunk *)c)->payload);
+ *first_child = find_ptrs(&se.info);
+ if (*first_child == NULL)
+ // no child from ptrs, so check SRT
+ goto thunk_srt_only;
+ break;
+
+ // 1 fixed child, SRT
+ case FUN_1_0:
+ case FUN_1_1:
+ *first_child = c->payload[0];
+ ASSERT(*first_child != NULL);
+ init_srt_fun(&se.info, get_fun_itbl(c));
+ break;
+
+ case THUNK_1_0:
+ case THUNK_1_1:
+ *first_child = ((StgThunk *)c)->payload[0];
+ ASSERT(*first_child != NULL);
+ init_srt_thunk(&se.info, get_thunk_itbl(c));
+ break;
+
+ case FUN_STATIC: // *c is a heap object.
+ ASSERT(get_itbl(c)->srt_bitmap != 0);
+ case FUN_0_1:
+ case FUN_0_2:
+ fun_srt_only:
+ init_srt_fun(&se.info, get_fun_itbl(c));
+ *first_child = find_srt(&se.info);
+ if (*first_child == NULL)
+ return; // no child
+ break;
+
+ // SRT only
+ case THUNK_STATIC:
+ ASSERT(get_itbl(c)->srt_bitmap != 0);
+ case THUNK_0_1:
+ case THUNK_0_2:
+ thunk_srt_only:
+ init_srt_thunk(&se.info, get_thunk_itbl(c));
+ *first_child = find_srt(&se.info);
+ if (*first_child == NULL)
+ return; // no child
+ break;
+
+ case TVAR_WAIT_QUEUE:
+ *first_child = (StgClosure *)((StgTVarWaitQueue *)c)->waiting_tso;
+ se.info.next.step = 2; // 2 = second
+ break;
+ case TVAR:
+ *first_child = (StgClosure *)((StgTVar *)c)->current_value;
+ break;
+ case TREC_HEADER:
+ *first_child = (StgClosure *)((StgTRecHeader *)c)->enclosing_trec;
+ break;
+ case TREC_CHUNK:
+ *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
+ se.info.next.step = 0; // entry no.
+ break;
+
+ // cannot appear
+ case PAP:
+ case AP:
+ case AP_STACK:
+ case TSO:
+ case IND_STATIC:
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ // stack objects
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ case STOP_FRAME:
+ case RET_DYN:
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ // invalid objects
+ case IND:
+ case BLOCKED_FETCH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ case RBH:
+ case REMOTE_REF:
+ case EVACUATED:
+ case INVALID_OBJECT:
+ default:
+ barf("Invalid object *c in push()");
+ return;
+ }
+
+ if (stackTop - 1 < stackBottom) {
+#ifdef DEBUG_RETAINER
+ // debugBelch("push() to the next stack.\n");
+#endif
+ // currentStack->free is updated when the active stack is switched
+ // to the next stack.
+ currentStack->free = (StgPtr)stackTop;
+
+ if (currentStack->link == NULL) {
+ nbd = allocGroup(BLOCKS_IN_STACK);
+ nbd->link = NULL;
+ nbd->u.back = currentStack;
+ currentStack->link = nbd;
+ } else
+ nbd = currentStack->link;
+
+ newStackBlock(nbd);
+ }
+
+ // adjust stackTop (acutal push)
+ stackTop--;
+ // If the size of stackElement was huge, we would better replace the
+ // following statement by either a memcpy() call or a switch statement
+ // on the type of the element. Currently, the size of stackElement is
+ // small enough (5 words) that this direct assignment seems to be enough.
+ *stackTop = se;
+
+#ifdef DEBUG_RETAINER
+ stackSize++;
+ if (stackSize > maxStackSize) maxStackSize = stackSize;
+ // ASSERT(stackSize >= 0);
+ // debugBelch("stackSize = %d\n", stackSize);
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ * popOff() and popOffReal(): Pop a stackElement off the traverse stack.
+ * Invariants:
+ * stackTop cannot be equal to stackLimit unless the whole stack is
+ * empty, in which case popOff() is not allowed.
+ * Note:
+ * You can think of popOffReal() as a part of popOff() which is
+ * executed at the end of popOff() in necessary. Since popOff() is
+ * likely to be executed quite often while popOffReal() is not, we
+ * separate popOffReal() from popOff(), which is declared as an
+ * INLINE function (for the sake of execution speed). popOffReal()
+ * is called only within popOff() and nowhere else.
+ * -------------------------------------------------------------------------- */
+static void
+popOffReal(void)
+{
+ bdescr *pbd; // Previous Block Descriptor
+
+#ifdef DEBUG_RETAINER
+ // debugBelch("pop() to the previous stack.\n");
+#endif
+
+ ASSERT(stackTop + 1 == stackLimit);
+ ASSERT(stackBottom == (stackElement *)currentStack->start);
+
+ if (firstStack == currentStack) {
+ // The stack is completely empty.
+ stackTop++;
+ ASSERT(stackTop == stackLimit);
+#ifdef DEBUG_RETAINER
+ stackSize--;
+ if (stackSize > maxStackSize) maxStackSize = stackSize;
+ /*
+ ASSERT(stackSize >= 0);
+ debugBelch("stackSize = %d\n", stackSize);
+ */
+#endif
+ return;
+ }
+
+ // currentStack->free is updated when the active stack is switched back
+ // to the previous stack.
+ currentStack->free = (StgPtr)stackLimit;
+
+ // find the previous block descriptor
+ pbd = currentStack->u.back;
+ ASSERT(pbd != NULL);
+
+ returnToOldStack(pbd);
+
+#ifdef DEBUG_RETAINER
+ stackSize--;
+ if (stackSize > maxStackSize) maxStackSize = stackSize;
+ /*
+ ASSERT(stackSize >= 0);
+ debugBelch("stackSize = %d\n", stackSize);
+ */
+#endif
+}
+
+static INLINE void
+popOff(void) {
+#ifdef DEBUG_RETAINER
+ // debugBelch("\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
+#endif
+
+ ASSERT(stackTop != stackLimit);
+ ASSERT(!isEmptyRetainerStack());
+
+ // <= (instead of <) is wrong!
+ if (stackTop + 1 < stackLimit) {
+ stackTop++;
+#ifdef DEBUG_RETAINER
+ stackSize--;
+ if (stackSize > maxStackSize) maxStackSize = stackSize;
+ /*
+ ASSERT(stackSize >= 0);
+ debugBelch("stackSize = %d\n", stackSize);
+ */
+#endif
+ return;
+ }
+
+ popOffReal();
+}
+
+/* -----------------------------------------------------------------------------
+ * Finds the next object to be considered for retainer profiling and store
+ * its pointer to *c.
+ * Test if the topmost stack element indicates that more objects are left,
+ * and if so, retrieve the first object and store its pointer to *c. Also,
+ * set *cp and *r appropriately, both of which are stored in the stack element.
+ * The topmost stack element then is overwritten so as for it to now denote
+ * the next object.
+ * If the topmost stack element indicates no more objects are left, pop
+ * off the stack element until either an object can be retrieved or
+ * the current stack chunk becomes empty, indicated by rtsTrue returned by
+ * isOnBoundary(), in which case *c is set to NULL.
+ * Note:
+ * It is okay to call this function even when the current stack chunk
+ * is empty.
+ * -------------------------------------------------------------------------- */
+static INLINE void
+pop( StgClosure **c, StgClosure **cp, retainer *r )
+{
+ stackElement *se;
+
+#ifdef DEBUG_RETAINER
+ // debugBelch("pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
+#endif
+
+ do {
+ if (isOnBoundary()) { // if the current stack chunk is depleted
+ *c = NULL;
+ return;
+ }
+
+ se = stackTop;
+
+ switch (get_itbl(se->c)->type) {
+ // two children (fixed), no SRT
+ // nothing in se.info
+ case CONSTR_2_0:
+ *c = se->c->payload[1];
+ *cp = se->c;
+ *r = se->c_child_r;
+ popOff();
+ return;
+
+ // three children (fixed), no SRT
+ // need to push a stackElement
+ case MVAR:
+ if (se->info.next.step == 2) {
+ *c = (StgClosure *)((StgMVar *)se->c)->tail;
+ se->info.next.step++; // move to the next step
+ // no popOff
+ } else {
+ *c = ((StgMVar *)se->c)->value;
+ popOff();
+ }
+ *cp = se->c;
+ *r = se->c_child_r;
+ return;
+
+ // three children (fixed), no SRT
+ case WEAK:
+ if (se->info.next.step == 2) {
+ *c = ((StgWeak *)se->c)->value;
+ se->info.next.step++;
+ // no popOff
+ } else {
+ *c = ((StgWeak *)se->c)->finalizer;
+ popOff();
+ }
+ *cp = se->c;
+ *r = se->c_child_r;
+ return;
+
+ case TVAR_WAIT_QUEUE:
+ if (se->info.next.step == 2) {
+ *c = (StgClosure *)((StgTVarWaitQueue *)se->c)->next_queue_entry;
+ se->info.next.step++; // move to the next step
+ // no popOff
+ } else {
+ *c = (StgClosure *)((StgTVarWaitQueue *)se->c)->prev_queue_entry;
+ popOff();
+ }
+ *cp = se->c;
+ *r = se->c_child_r;
+ return;
+
+ case TVAR:
+ *c = (StgClosure *)((StgTVar *)se->c)->first_wait_queue_entry;
+ *cp = se->c;
+ *r = se->c_child_r;
+ popOff();
+ return;
+
+ case TREC_HEADER:
+ *c = (StgClosure *)((StgTRecHeader *)se->c)->current_chunk;
+ *cp = se->c;
+ *r = se->c_child_r;
+ popOff();
+ return;
+
+ case TREC_CHUNK: {
+ // These are pretty complicated: we have N entries, each
+ // of which contains 3 fields that we want to follow. So
+ // we divide the step counter: the 2 low bits indicate
+ // which field, and the rest of the bits indicate the
+ // entry number (starting from zero).
+ nat entry_no = se->info.next.step >> 2;
+ nat field_no = se->info.next.step & 3;
+ if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
+ *c = NULL;
+ popOff();
+ return;
+ }
+ TRecEntry *entry = &((StgTRecChunk *)se->c)->entries[entry_no];
+ if (field_no == 0) {
+ *c = (StgClosure *)entry->tvar;
+ } else if (field_no == 1) {
+ *c = entry->expected_value;
+ } else {
+ *c = entry->new_value;
+ }
+ *cp = se->c;
+ *r = se->c_child_r;
+ se->info.next.step++;
+ return;
+ }
+
+ case CONSTR:
+ case STABLE_NAME:
+ case BCO:
+ case CONSTR_STATIC:
+ // StgMutArrPtr.ptrs, no SRT
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN:
+ case MUT_ARR_PTRS_FROZEN0:
+ *c = find_ptrs(&se->info);
+ if (*c == NULL) {
+ popOff();
+ break;
+ }
+ *cp = se->c;
+ *r = se->c_child_r;
+ return;
+
+ // layout.payload.ptrs, SRT
+ case FUN: // always a heap object
+ case FUN_2_0:
+ if (se->info.type == posTypePtrs) {
+ *c = find_ptrs(&se->info);
+ if (*c != NULL) {
+ *cp = se->c;
+ *r = se->c_child_r;
+ return;
+ }
+ init_srt_fun(&se->info, get_fun_itbl(se->c));
+ }
+ goto do_srt;
+
+ case THUNK:
+ case THUNK_2_0:
+ if (se->info.type == posTypePtrs) {
+ *c = find_ptrs(&se->info);
+ if (*c != NULL) {
+ *cp = se->c;
+ *r = se->c_child_r;
+ return;
+ }
+ init_srt_thunk(&se->info, get_thunk_itbl(se->c));
+ }
+ goto do_srt;
+
+ // SRT
+ do_srt:
+ case THUNK_STATIC:
+ case FUN_STATIC:
+ case FUN_0_1:
+ case FUN_0_2:
+ case THUNK_0_1:
+ case THUNK_0_2:
+ case FUN_1_0:
+ case FUN_1_1:
+ case THUNK_1_0:
+ case THUNK_1_1:
+ *c = find_srt(&se->info);
+ if (*c != NULL) {
+ *cp = se->c;
+ *r = se->c_child_r;
+ return;
+ }
+ popOff();
+ break;
+
+ // no child (fixed), no SRT
+ case CONSTR_0_1:
+ case CONSTR_0_2:
+ case CAF_BLACKHOLE:
+ case BLACKHOLE:
+ case SE_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case ARR_WORDS:
+ // one child (fixed), no SRT
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ case THUNK_SELECTOR:
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ case IND_OLDGEN:
+ case CONSTR_1_1:
+ // cannot appear
+ case PAP:
+ case AP:
+ case AP_STACK:
+ case TSO:
+ case IND_STATIC:
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ // stack objects
+ case RET_DYN:
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ case STOP_FRAME:
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ // invalid objects
+ case IND:
+ case BLOCKED_FETCH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ case RBH:
+ case REMOTE_REF:
+ case EVACUATED:
+ case INVALID_OBJECT:
+ default:
+ barf("Invalid object *c in pop()");
+ return;
+ }
+ } while (rtsTrue);
+}
+
+/* -----------------------------------------------------------------------------
+ * RETAINER PROFILING ENGINE
+ * -------------------------------------------------------------------------- */
+
+void
+initRetainerProfiling( void )
+{
+ initializeAllRetainerSet();
+ retainerGeneration = 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * This function must be called before f-closing prof_file.
+ * -------------------------------------------------------------------------- */
+void
+endRetainerProfiling( void )
+{
+#ifdef SECOND_APPROACH
+ outputAllRetainerSet(prof_file);
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ * Returns the actual pointer to the retainer set of the closure *c.
+ * It may adjust RSET(c) subject to flip.
+ * Side effects:
+ * RSET(c) is initialized to NULL if its current value does not
+ * conform to flip.
+ * Note:
+ * Even though this function has side effects, they CAN be ignored because
+ * subsequent calls to retainerSetOf() always result in the same return value
+ * and retainerSetOf() is the only way to retrieve retainerSet of a given
+ * closure.
+ * We have to perform an XOR (^) operation each time a closure is examined.
+ * The reason is that we do not know when a closure is visited last.
+ * -------------------------------------------------------------------------- */
+static INLINE void
+maybeInitRetainerSet( StgClosure *c )
+{
+ if (!isRetainerSetFieldValid(c)) {
+ setRetainerSetToNull(c);
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Returns rtsTrue if *c is a retainer.
+ * -------------------------------------------------------------------------- */
+static INLINE rtsBool
+isRetainer( StgClosure *c )
+{
+ switch (get_itbl(c)->type) {
+ //
+ // True case
+ //
+ // TSOs MUST be retainers: they constitute the set of roots.
+ case TSO:
+
+ // mutable objects
+ case MVAR:
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN:
+ case MUT_ARR_PTRS_FROZEN0:
+
+ // thunks are retainers.
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_SELECTOR:
+ case AP:
+ case AP_STACK:
+
+ // Static thunks, or CAFS, are obviously retainers.
+ case THUNK_STATIC:
+
+ // WEAK objects are roots; there is separate code in which traversing
+ // begins from WEAK objects.
+ case WEAK:
+
+ // Since the other mutvar-type things are retainers, seems
+ // like the right thing to do:
+ case TVAR:
+ return rtsTrue;
+
+ //
+ // False case
+ //
+
+ // constructors
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ // functions
+ case FUN:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_2_0:
+ case FUN_1_1:
+ case FUN_0_2:
+ // partial applications
+ case PAP:
+ // blackholes
+ case CAF_BLACKHOLE:
+ case BLACKHOLE:
+ case SE_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ // indirection
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ case IND_OLDGEN:
+ // static objects
+ case CONSTR_STATIC:
+ case FUN_STATIC:
+ // misc
+ case STABLE_NAME:
+ case BCO:
+ case ARR_WORDS:
+ // STM
+ case TVAR_WAIT_QUEUE:
+ case TREC_HEADER:
+ case TREC_CHUNK:
+ return rtsFalse;
+
+ //
+ // Error case
+ //
+ // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
+ case IND_STATIC:
+ // CONSTR_INTLIKE, CONSTR_CHARLIKE, and CONSTR_NOCAF_STATIC
+ // cannot be *c, *cp, *r in the retainer profiling loop.
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ // Stack objects are invalid because they are never treated as
+ // legal objects during retainer profiling.
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ case STOP_FRAME:
+ case RET_DYN:
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ // other cases
+ case IND:
+ case BLOCKED_FETCH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ case RBH:
+ case REMOTE_REF:
+ case EVACUATED:
+ case INVALID_OBJECT:
+ default:
+ barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
+ return rtsFalse;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Returns the retainer function value for the closure *c, i.e., R(*c).
+ * This function does NOT return the retainer(s) of *c.
+ * Invariants:
+ * *c must be a retainer.
+ * Note:
+ * Depending on the definition of this function, the maintenance of retainer
+ * sets can be made easier. If most retainer sets are likely to be created
+ * again across garbage collections, refreshAllRetainerSet() in
+ * RetainerSet.c can simply do nothing.
+ * If this is not the case, we can free all the retainer sets and
+ * re-initialize the hash table.
+ * See refreshAllRetainerSet() in RetainerSet.c.
+ * -------------------------------------------------------------------------- */
+static INLINE retainer
+getRetainerFrom( StgClosure *c )
+{
+ ASSERT(isRetainer(c));
+
+#if defined(RETAINER_SCHEME_INFO)
+ // Retainer scheme 1: retainer = info table
+ return get_itbl(c);
+#elif defined(RETAINER_SCHEME_CCS)
+ // Retainer scheme 2: retainer = cost centre stack
+ return c->header.prof.ccs;
+#elif defined(RETAINER_SCHEME_CC)
+ // Retainer scheme 3: retainer = cost centre
+ return c->header.prof.ccs->cc;
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ * Associates the retainer set *s with the closure *c, that is, *s becomes
+ * the retainer set of *c.
+ * Invariants:
+ * c != NULL
+ * s != NULL
+ * -------------------------------------------------------------------------- */
+static INLINE void
+associate( StgClosure *c, RetainerSet *s )
+{
+ // StgWord has the same size as pointers, so the following type
+ // casting is okay.
+ RSET(c) = (RetainerSet *)((StgWord)s | flip);
+}
+
+/* -----------------------------------------------------------------------------
+ Call retainClosure for each of the closures covered by a large bitmap.
+ -------------------------------------------------------------------------- */
+
+static void
+retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, nat size,
+ StgClosure *c, retainer c_child_r)
+{
+ nat i, b;
+ StgWord bitmap;
+
+ b = 0;
+ bitmap = large_bitmap->bitmap[b];
+ for (i = 0; i < size; ) {
+ if ((bitmap & 1) == 0) {
+ retainClosure((StgClosure *)*p, c, c_child_r);
+ }
+ i++;
+ p++;
+ if (i % BITS_IN(W_) == 0) {
+ b++;
+ bitmap = large_bitmap->bitmap[b];
+ } else {
+ bitmap = bitmap >> 1;
+ }
+ }
+}
+
+static INLINE StgPtr
+retain_small_bitmap (StgPtr p, nat size, StgWord bitmap,
+ StgClosure *c, retainer c_child_r)
+{
+ while (size > 0) {
+ if ((bitmap & 1) == 0) {
+ retainClosure((StgClosure *)*p, c, c_child_r);
+ }
+ p++;
+ bitmap = bitmap >> 1;
+ size--;
+ }
+ return p;
+}
+
+/* -----------------------------------------------------------------------------
+ * Call retainClosure for each of the closures in an SRT.
+ * ------------------------------------------------------------------------- */
+
+static void
+retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
+{
+ nat i, b, size;
+ StgWord bitmap;
+ StgClosure **p;
+
+ b = 0;
+ p = (StgClosure **)srt->srt;
+ size = srt->l.size;
+ bitmap = srt->l.bitmap[b];
+ for (i = 0; i < size; ) {
+ if ((bitmap & 1) != 0) {
+ retainClosure((StgClosure *)*p, c, c_child_r);
+ }
+ i++;
+ p++;
+ if (i % BITS_IN(W_) == 0) {
+ b++;
+ bitmap = srt->l.bitmap[b];
+ } else {
+ bitmap = bitmap >> 1;
+ }
+ }
+}
+
+static INLINE void
+retainSRT (StgClosure **srt, nat srt_bitmap, StgClosure *c, retainer c_child_r)
+{
+ nat bitmap;
+ StgClosure **p;
+
+ bitmap = srt_bitmap;
+ p = srt;
+
+ if (bitmap == (StgHalfWord)(-1)) {
+ retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
+ return;
+ }
+
+ while (bitmap != 0) {
+ if ((bitmap & 1) != 0) {
+#ifdef ENABLE_WIN32_DLL_SUPPORT
+ if ( (unsigned long)(*srt) & 0x1 ) {
+ retainClosure(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1)),
+ c, c_child_r);
+ } else {
+ retainClosure(*srt,c,c_child_r);
+ }
+#else
+ retainClosure(*srt,c,c_child_r);
+#endif
+ }
+ p++;
+ bitmap = bitmap >> 1;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Process all the objects in the stack chunk from stackStart to stackEnd
+ * with *c and *c_child_r being their parent and their most recent retainer,
+ * respectively. Treat stackOptionalFun as another child of *c if it is
+ * not NULL.
+ * Invariants:
+ * *c is one of the following: TSO, AP_STACK.
+ * If *c is TSO, c == c_child_r.
+ * stackStart < stackEnd.
+ * RSET(c) and RSET(c_child_r) are valid, i.e., their
+ * interpretation conforms to the current value of flip (even when they
+ * are interpreted to be NULL).
+ * If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete,
+ * or ThreadKilled, which means that its stack is ready to process.
+ * Note:
+ * This code was almost plagiarzied from GC.c! For each pointer,
+ * retainClosure() is invoked instead of evacuate().
+ * -------------------------------------------------------------------------- */
+static void
+retainStack( StgClosure *c, retainer c_child_r,
+ StgPtr stackStart, StgPtr stackEnd )
+{
+ stackElement *oldStackBoundary;
+ StgPtr p;
+ StgRetInfoTable *info;
+ StgWord32 bitmap;
+ nat size;
+
+#ifdef DEBUG_RETAINER
+ cStackSize++;
+ if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
+#endif
+
+ /*
+ Each invocation of retainStack() creates a new virtual
+ stack. Since all such stacks share a single common stack, we
+ record the current currentStackBoundary, which will be restored
+ at the exit.
+ */
+ oldStackBoundary = currentStackBoundary;
+ currentStackBoundary = stackTop;
+
+#ifdef DEBUG_RETAINER
+ // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
+#endif
+
+ ASSERT(get_itbl(c)->type != TSO ||
+ (((StgTSO *)c)->what_next != ThreadRelocated &&
+ ((StgTSO *)c)->what_next != ThreadComplete &&
+ ((StgTSO *)c)->what_next != ThreadKilled));
+
+ p = stackStart;
+ while (p < stackEnd) {
+ info = get_ret_itbl((StgClosure *)p);
+
+ switch(info->i.type) {
+
+ case UPDATE_FRAME:
+ retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
+ p += sizeofW(StgUpdateFrame);
+ continue;
+
+ case STOP_FRAME:
+ case CATCH_FRAME:
+ case CATCH_STM_FRAME:
+ case CATCH_RETRY_FRAME:
+ case ATOMICALLY_FRAME:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ bitmap = BITMAP_BITS(info->i.layout.bitmap);
+ size = BITMAP_SIZE(info->i.layout.bitmap);
+ p++;
+ p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
+
+ follow_srt:
+ retainSRT((StgClosure **)GET_SRT(info), info->i.srt_bitmap, c, c_child_r);
+ continue;
+
+ case RET_BCO: {
+ StgBCO *bco;
+
+ p++;
+ retainClosure((StgClosure *)*p, c, c_child_r);
+ bco = (StgBCO *)*p;
+ p++;
+ size = BCO_BITMAP_SIZE(bco);
+ retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r);
+ p += size;
+ continue;
+ }
+
+ // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
+ case RET_BIG:
+ case RET_VEC_BIG:
+ size = GET_LARGE_BITMAP(&info->i)->size;
+ p++;
+ retain_large_bitmap(p, GET_LARGE_BITMAP(&info->i),
+ size, c, c_child_r);
+ p += size;
+ // and don't forget to follow the SRT
+ goto follow_srt;
+
+ // Dynamic bitmap: the mask is stored on the stack
+ case RET_DYN: {
+ StgWord dyn;
+ dyn = ((StgRetDyn *)p)->liveness;
+
+ // traverse the bitmap first
+ bitmap = RET_DYN_LIVENESS(dyn);
+ p = (P_)&((StgRetDyn *)p)->payload[0];
+ size = RET_DYN_BITMAP_SIZE;
+ p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
+
+ // skip over the non-ptr words
+ p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
+
+ // follow the ptr words
+ for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
+ retainClosure((StgClosure *)*p, c, c_child_r);
+ p++;
+ }
+ continue;
+ }
+
+ case RET_FUN: {
+ StgRetFun *ret_fun = (StgRetFun *)p;
+ StgFunInfoTable *fun_info;
+
+ retainClosure(ret_fun->fun, c, c_child_r);
+ fun_info = get_fun_itbl(ret_fun->fun);
+
+ p = (P_)&ret_fun->payload;
+ switch (fun_info->f.fun_type) {
+ case ARG_GEN:
+ bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+ size = BITMAP_SIZE(fun_info->f.b.bitmap);
+ p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
+ break;
+ case ARG_GEN_BIG:
+ size = GET_FUN_LARGE_BITMAP(fun_info)->size;
+ retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
+ size, c, c_child_r);
+ p += size;
+ break;
+ default:
+ bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+ size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
+ p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
+ break;
+ }
+ goto follow_srt;
+ }
+
+ default:
+ barf("Invalid object found in retainStack(): %d",
+ (int)(info->i.type));
+ }
+ }
+
+ // restore currentStackBoundary
+ currentStackBoundary = oldStackBoundary;
+#ifdef DEBUG_RETAINER
+ // debugBelch("retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
+#endif
+
+#ifdef DEBUG_RETAINER
+ cStackSize--;
+#endif
+}
+
+/* ----------------------------------------------------------------------------
+ * Call retainClosure for each of the children of a PAP/AP
+ * ------------------------------------------------------------------------- */
+
+static INLINE StgPtr
+retain_PAP_payload (StgClosure *pap, retainer c_child_r, StgClosure *fun,
+ StgClosure** payload, StgWord n_args)
+{
+ StgPtr p;
+ StgWord bitmap;
+ StgFunInfoTable *fun_info;
+
+ retainClosure(fun, pap, c_child_r);
+ fun_info = get_fun_itbl(fun);
+ ASSERT(fun_info->i.type != PAP);
+
+ p = (StgPtr)payload;
+
+ switch (fun_info->f.fun_type) {
+ case ARG_GEN:
+ bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+ p = retain_small_bitmap(p, n_args, bitmap,
+ pap, c_child_r);
+ break;
+ case ARG_GEN_BIG:
+ retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
+ n_args, pap, c_child_r);
+ p += n_args;
+ break;
+ case ARG_BCO:
+ retain_large_bitmap((StgPtr)payload, BCO_BITMAP(fun),
+ n_args, pap, c_child_r);
+ p += n_args;
+ break;
+ default:
+ bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+ p = retain_small_bitmap(p, n_args, bitmap, pap, c_child_r);
+ break;
+ }
+ return p;
+}
+
+/* -----------------------------------------------------------------------------
+ * Compute the retainer set of *c0 and all its desecents by traversing.
+ * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
+ * Invariants:
+ * c0 = cp0 = r0 holds only for root objects.
+ * RSET(cp0) and RSET(r0) are valid, i.e., their
+ * interpretation conforms to the current value of flip (even when they
+ * are interpreted to be NULL).
+ * However, RSET(c0) may be corrupt, i.e., it may not conform to
+ * the current value of flip. If it does not, during the execution
+ * of this function, RSET(c0) must be initialized as well as all
+ * its descendants.
+ * Note:
+ * stackTop must be the same at the beginning and the exit of this function.
+ * *c0 can be TSO (as well as AP_STACK).
+ * -------------------------------------------------------------------------- */
+static void
+retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
+{
+ // c = Current closure
+ // cp = Current closure's Parent
+ // r = current closures' most recent Retainer
+ // c_child_r = current closure's children's most recent retainer
+ // first_child = first child of c
+ StgClosure *c, *cp, *first_child;
+ RetainerSet *s, *retainerSetOfc;
+ retainer r, c_child_r;
+ StgWord typeOfc;
+
+#ifdef DEBUG_RETAINER
+ // StgPtr oldStackTop;
+#endif
+
+#ifdef DEBUG_RETAINER
+ // oldStackTop = stackTop;
+ // debugBelch("retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
+#endif
+
+ // (c, cp, r) = (c0, cp0, r0)
+ c = c0;
+ cp = cp0;
+ r = r0;
+ goto inner_loop;
+
+loop:
+ //debugBelch("loop");
+ // pop to (c, cp, r);
+ pop(&c, &cp, &r);
+
+ if (c == NULL) {
+#ifdef DEBUG_RETAINER
+ // debugBelch("retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
+#endif
+ return;
+ }
+
+ //debugBelch("inner_loop");
+
+inner_loop:
+ // c = current closure under consideration,
+ // cp = current closure's parent,
+ // r = current closure's most recent retainer
+ //
+ // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
+ // RSET(cp) and RSET(r) are valid.
+ // RSET(c) is valid only if c has been visited before.
+ //
+ // Loop invariants (on the relation between c, cp, and r)
+ // if cp is not a retainer, r belongs to RSET(cp).
+ // if cp is a retainer, r == cp.
+
+ typeOfc = get_itbl(c)->type;
+
+#ifdef DEBUG_RETAINER
+ switch (typeOfc) {
+ case IND_STATIC:
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ case CONSTR_STATIC:
+ case THUNK_STATIC:
+ case FUN_STATIC:
+ break;
+ default:
+ if (retainerSetOf(c) == NULL) { // first visit?
+ costArray[typeOfc] += cost(c);
+ sumOfNewCost += cost(c);
+ }
+ break;
+ }
+#endif
+
+ // special cases
+ switch (typeOfc) {
+ case TSO:
+ if (((StgTSO *)c)->what_next == ThreadComplete ||
+ ((StgTSO *)c)->what_next == ThreadKilled) {
+#ifdef DEBUG_RETAINER
+ debugBelch("ThreadComplete or ThreadKilled encountered in retainClosure()\n");
+#endif
+ goto loop;
+ }
+ if (((StgTSO *)c)->what_next == ThreadRelocated) {
+#ifdef DEBUG_RETAINER
+ debugBelch("ThreadRelocated encountered in retainClosure()\n");
+#endif
+ c = (StgClosure *)((StgTSO *)c)->link;
+ goto inner_loop;
+ }
+ break;
+
+ case IND_STATIC:
+ // We just skip IND_STATIC, so its retainer set is never computed.
+ c = ((StgIndStatic *)c)->indirectee;
+ goto inner_loop;
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ // static objects with no pointers out, so goto loop.
+ case CONSTR_NOCAF_STATIC:
+ // It is not just enough not to compute the retainer set for *c; it is
+ // mandatory because CONSTR_NOCAF_STATIC are not reachable from
+ // scavenged_static_objects, the list from which is assumed to traverse
+ // all static objects after major garbage collections.
+ goto loop;
+ case THUNK_STATIC:
+ case FUN_STATIC:
+ if (get_itbl(c)->srt_bitmap == 0) {
+ // No need to compute the retainer set; no dynamic objects
+ // are reachable from *c.
+ //
+ // Static objects: if we traverse all the live closures,
+ // including static closures, during each heap census then
+ // we will observe that some static closures appear and
+ // disappear. eg. a closure may contain a pointer to a
+ // static function 'f' which is not otherwise reachable
+ // (it doesn't indirectly point to any CAFs, so it doesn't
+ // appear in any SRTs), so we would find 'f' during
+ // traversal. However on the next sweep there may be no
+ // closures pointing to 'f'.
+ //
+ // We must therefore ignore static closures whose SRT is
+ // empty, because these are exactly the closures that may
+ // "appear". A closure with a non-empty SRT, and which is
+ // still required, will always be reachable.
+ //
+ // But what about CONSTR_STATIC? Surely these may be able
+ // to appear, and they don't have SRTs, so we can't
+ // check. So for now, we're calling
+ // resetStaticObjectForRetainerProfiling() from the
+ // garbage collector to reset the retainer sets in all the
+ // reachable static objects.
+ goto loop;
+ }
+ default:
+ break;
+ }
+
+ // The above objects are ignored in computing the average number of times
+ // an object is visited.
+ timesAnyObjectVisited++;
+
+ // If this is the first visit to c, initialize its retainer set.
+ maybeInitRetainerSet(c);
+ retainerSetOfc = retainerSetOf(c);
+
+ // Now compute s:
+ // isRetainer(cp) == rtsTrue => s == NULL
+ // isRetainer(cp) == rtsFalse => s == cp.retainer
+ if (isRetainer(cp))
+ s = NULL;
+ else
+ s = retainerSetOf(cp);
+
+ // (c, cp, r, s) is available.
+
+ // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
+ if (retainerSetOfc == NULL) {
+ // This is the first visit to *c.
+ numObjectVisited++;
+
+ if (s == NULL)
+ associate(c, singleton(r));
+ else
+ // s is actually the retainer set of *c!
+ associate(c, s);
+
+ // compute c_child_r
+ c_child_r = isRetainer(c) ? getRetainerFrom(c) : r;
+ } else {
+ // This is not the first visit to *c.
+ if (isMember(r, retainerSetOfc))
+ goto loop; // no need to process child
+
+ if (s == NULL)
+ associate(c, addElement(r, retainerSetOfc));
+ else {
+ // s is not NULL and cp is not a retainer. This means that
+ // each time *cp is visited, so is *c. Thus, if s has
+ // exactly one more element in its retainer set than c, s
+ // is also the new retainer set for *c.
+ if (s->num == retainerSetOfc->num + 1) {
+ associate(c, s);
+ }
+ // Otherwise, just add R_r to the current retainer set of *c.
+ else {
+ associate(c, addElement(r, retainerSetOfc));
+ }
+ }
+
+ if (isRetainer(c))
+ goto loop; // no need to process child
+
+ // compute c_child_r
+ c_child_r = r;
+ }
+
+ // now, RSET() of all of *c, *cp, and *r is valid.
+ // (c, c_child_r) are available.
+
+ // process child
+
+ // Special case closures: we process these all in one go rather
+ // than attempting to save the current position, because doing so
+ // would be hard.
+ switch (typeOfc) {
+ case TSO:
+ retainStack(c, c_child_r,
+ ((StgTSO *)c)->sp,
+ ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size);
+ goto loop;
+
+ case PAP:
+ {
+ StgPAP *pap = (StgPAP *)c;
+ retain_PAP_payload(c, c_child_r, pap->fun, pap->payload, pap->n_args);
+ goto loop;
+ }
+
+ case AP:
+ {
+ StgAP *ap = (StgAP *)c;
+ retain_PAP_payload(c, c_child_r, ap->fun, ap->payload, ap->n_args);
+ goto loop;
+ }
+
+ case AP_STACK:
+ retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r);
+ retainStack(c, c_child_r,
+ (StgPtr)((StgAP_STACK *)c)->payload,
+ (StgPtr)((StgAP_STACK *)c)->payload +
+ ((StgAP_STACK *)c)->size);
+ goto loop;
+ }
+
+ push(c, c_child_r, &first_child);
+
+ // If first_child is null, c has no child.
+ // If first_child is not null, the top stack element points to the next
+ // object. push() may or may not push a stackElement on the stack.
+ if (first_child == NULL)
+ goto loop;
+
+ // (c, cp, r) = (first_child, c, c_child_r)
+ r = c_child_r;
+ cp = c;
+ c = first_child;
+ goto inner_loop;
+}
+
+/* -----------------------------------------------------------------------------
+ * Compute the retainer set for every object reachable from *tl.
+ * -------------------------------------------------------------------------- */
+static void
+retainRoot( StgClosure **tl )
+{
+ // We no longer assume that only TSOs and WEAKs are roots; any closure can
+ // be a root.
+
+ ASSERT(isEmptyRetainerStack());
+ currentStackBoundary = stackTop;
+
+ if (*tl != &stg_END_TSO_QUEUE_closure && isRetainer(*tl)) {
+ retainClosure(*tl, *tl, getRetainerFrom(*tl));
+ } else {
+ retainClosure(*tl, *tl, CCS_SYSTEM);
+ }
+
+ // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
+ // *tl might be a TSO which is ThreadComplete, in which
+ // case we ignore it for the purposes of retainer profiling.
+}
+
+/* -----------------------------------------------------------------------------
+ * Compute the retainer set for each of the objects in the heap.
+ * -------------------------------------------------------------------------- */
+static void
+computeRetainerSet( void )
+{
+ StgWeak *weak;
+ RetainerSet *rtl;
+ nat g;
+ StgPtr ml;
+ bdescr *bd;
+#ifdef DEBUG_RETAINER
+ RetainerSet tmpRetainerSet;
+#endif
+
+ GetRoots(retainRoot); // for scheduler roots
+
+ // This function is called after a major GC, when key, value, and finalizer
+ // all are guaranteed to be valid, or reachable.
+ //
+ // The following code assumes that WEAK objects are considered to be roots
+ // for retainer profilng.
+ for (weak = weak_ptr_list; weak != NULL; weak = weak->link)
+ // retainRoot((StgClosure *)weak);
+ retainRoot((StgClosure **)&weak);
+
+ // Consider roots from the stable ptr table.
+ markStablePtrTable(retainRoot);
+
+ // The following code resets the rs field of each unvisited mutable
+ // object (computing sumOfNewCostExtra and updating costArray[] when
+ // debugging retainer profiler).
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ // NOT TRUE: even G0 has a block on its mutable list
+ // ASSERT(g != 0 || (generations[g].mut_list == NULL));
+
+ // Traversing through mut_list is necessary
+ // because we can find MUT_VAR objects which have not been
+ // visited during retainer profiling.
+ for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
+ for (ml = bd->start; ml < bd->free; ml++) {
+
+ maybeInitRetainerSet((StgClosure *)*ml);
+ rtl = retainerSetOf((StgClosure *)*ml);
+
+#ifdef DEBUG_RETAINER
+ if (rtl == NULL) {
+ // first visit to *ml
+ // This is a violation of the interface rule!
+ RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
+
+ switch (get_itbl((StgClosure *)ml)->type) {
+ case IND_STATIC:
+ // no cost involved
+ break;
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_NOCAF_STATIC:
+ case CONSTR_STATIC:
+ case THUNK_STATIC:
+ case FUN_STATIC:
+ barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
+ break;
+ default:
+ // dynamic objects
+ costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
+ sumOfNewCostExtra += cost((StgClosure *)ml);
+ break;
+ }
+ }
+#endif
+ }
+ }
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Traverse all static objects for which we compute retainer sets,
+ * and reset their rs fields to NULL, which is accomplished by
+ * invoking maybeInitRetainerSet(). This function must be called
+ * before zeroing all objects reachable from scavenged_static_objects
+ * in the case of major gabage collections. See GarbageCollect() in
+ * GC.c.
+ * Note:
+ * The mut_once_list of the oldest generation must also be traversed?
+ * Why? Because if the evacuation of an object pointed to by a static
+ * indirection object fails, it is put back to the mut_once_list of
+ * the oldest generation.
+ * However, this is not necessary because any static indirection objects
+ * are just traversed through to reach dynamic objects. In other words,
+ * they are not taken into consideration in computing retainer sets.
+ * -------------------------------------------------------------------------- */
+void
+resetStaticObjectForRetainerProfiling( void )
+{
+#ifdef DEBUG_RETAINER
+ nat count;
+#endif
+ StgClosure *p;
+
+#ifdef DEBUG_RETAINER
+ count = 0;
+#endif
+ p = scavenged_static_objects;
+ while (p != END_OF_STATIC_LIST) {
+#ifdef DEBUG_RETAINER
+ count++;
+#endif
+ switch (get_itbl(p)->type) {
+ case IND_STATIC:
+ // Since we do not compute the retainer set of any
+ // IND_STATIC object, we don't have to reset its retainer
+ // field.
+ p = (StgClosure*)*IND_STATIC_LINK(p);
+ break;
+ case THUNK_STATIC:
+ maybeInitRetainerSet(p);
+ p = (StgClosure*)*THUNK_STATIC_LINK(p);
+ break;
+ case FUN_STATIC:
+ maybeInitRetainerSet(p);
+ p = (StgClosure*)*FUN_STATIC_LINK(p);
+ break;
+ case CONSTR_STATIC:
+ maybeInitRetainerSet(p);
+ p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
+ break;
+ default:
+ barf("resetStaticObjectForRetainerProfiling: %p (%s)",
+ p, get_itbl(p)->type);
+ break;
+ }
+ }
+#ifdef DEBUG_RETAINER
+ // debugBelch("count in scavenged_static_objects = %d\n", count);
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ * Perform retainer profiling.
+ * N is the oldest generation being profilied, where the generations are
+ * numbered starting at 0.
+ * Invariants:
+ * Note:
+ * This function should be called only immediately after major garbage
+ * collection.
+ * ------------------------------------------------------------------------- */
+void
+retainerProfile(void)
+{
+#ifdef DEBUG_RETAINER
+ nat i;
+ nat totalHeapSize; // total raw heap size (computed by linear scanning)
+#endif
+
+#ifdef DEBUG_RETAINER
+ debugBelch(" < retainerProfile() invoked : %d>\n", retainerGeneration);
+#endif
+
+ stat_startRP();
+
+ // We haven't flipped the bit yet.
+#ifdef DEBUG_RETAINER
+ debugBelch("Before traversing:\n");
+ sumOfCostLinear = 0;
+ for (i = 0;i < N_CLOSURE_TYPES; i++)
+ costArrayLinear[i] = 0;
+ totalHeapSize = checkHeapSanityForRetainerProfiling();
+
+ debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
+ /*
+ debugBelch("costArrayLinear[] = ");
+ for (i = 0;i < N_CLOSURE_TYPES; i++)
+ debugBelch("[%u:%u] ", i, costArrayLinear[i]);
+ debugBelch("\n");
+ */
+
+ ASSERT(sumOfCostLinear == totalHeapSize);
+
+/*
+#define pcostArrayLinear(index) \
+ if (costArrayLinear[index] > 0) \
+ debugBelch("costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
+ pcostArrayLinear(THUNK_STATIC);
+ pcostArrayLinear(FUN_STATIC);
+ pcostArrayLinear(CONSTR_STATIC);
+ pcostArrayLinear(CONSTR_NOCAF_STATIC);
+ pcostArrayLinear(CONSTR_INTLIKE);
+ pcostArrayLinear(CONSTR_CHARLIKE);
+*/
+#endif
+
+ // Now we flips flip.
+ flip = flip ^ 1;
+
+#ifdef DEBUG_RETAINER
+ stackSize = 0;
+ maxStackSize = 0;
+ cStackSize = 0;
+ maxCStackSize = 0;
+#endif
+ numObjectVisited = 0;
+ timesAnyObjectVisited = 0;
+
+#ifdef DEBUG_RETAINER
+ debugBelch("During traversing:\n");
+ sumOfNewCost = 0;
+ sumOfNewCostExtra = 0;
+ for (i = 0;i < N_CLOSURE_TYPES; i++)
+ costArray[i] = 0;
+#endif
+
+ /*
+ We initialize the traverse stack each time the retainer profiling is
+ performed (because the traverse stack size varies on each retainer profiling
+ and this operation is not costly anyhow). However, we just refresh the
+ retainer sets.
+ */
+ initializeTraverseStack();
+#ifdef DEBUG_RETAINER
+ initializeAllRetainerSet();
+#else
+ refreshAllRetainerSet();
+#endif
+ computeRetainerSet();
+
+#ifdef DEBUG_RETAINER
+ debugBelch("After traversing:\n");
+ sumOfCostLinear = 0;
+ for (i = 0;i < N_CLOSURE_TYPES; i++)
+ costArrayLinear[i] = 0;
+ totalHeapSize = checkHeapSanityForRetainerProfiling();
+
+ debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
+ ASSERT(sumOfCostLinear == totalHeapSize);
+
+ // now, compare the two results
+ /*
+ Note:
+ costArray[] must be exactly the same as costArrayLinear[].
+ Known exceptions:
+ 1) Dead weak pointers, whose type is CONSTR. These objects are not
+ reachable from any roots.
+ */
+ debugBelch("Comparison:\n");
+ debugBelch("\tcostArrayLinear[] (must be empty) = ");
+ for (i = 0;i < N_CLOSURE_TYPES; i++)
+ if (costArray[i] != costArrayLinear[i])
+ // nothing should be printed except MUT_VAR after major GCs
+ debugBelch("[%u:%u] ", i, costArrayLinear[i]);
+ debugBelch("\n");
+
+ debugBelch("\tsumOfNewCost = %u\n", sumOfNewCost);
+ debugBelch("\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
+ debugBelch("\tcostArray[] (must be empty) = ");
+ for (i = 0;i < N_CLOSURE_TYPES; i++)
+ if (costArray[i] != costArrayLinear[i])
+ // nothing should be printed except MUT_VAR after major GCs
+ debugBelch("[%u:%u] ", i, costArray[i]);
+ debugBelch("\n");
+
+ // only for major garbage collection
+ ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
+#endif
+
+ // post-processing
+ closeTraverseStack();
+#ifdef DEBUG_RETAINER
+ closeAllRetainerSet();
+#else
+ // Note that there is no post-processing for the retainer sets.
+#endif
+ retainerGeneration++;
+
+ stat_endRP(
+ retainerGeneration - 1, // retainerGeneration has just been incremented!
+#ifdef DEBUG_RETAINER
+ maxCStackSize, maxStackSize,
+#endif
+ (double)timesAnyObjectVisited / numObjectVisited);
+}
+
+/* -----------------------------------------------------------------------------
+ * DEBUGGING CODE
+ * -------------------------------------------------------------------------- */
+
+#ifdef DEBUG_RETAINER
+
+#define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
+ ((HEAP_ALLOCED(r) && ((Bdescr((P_)r)->flags & BF_FREE) == 0)))) && \
+ ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa))
+
+static nat
+sanityCheckHeapClosure( StgClosure *c )
+{
+ StgInfoTable *info;
+
+ ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
+ ASSERT(!closure_STATIC(c));
+ ASSERT(LOOKS_LIKE_PTR(c));
+
+ if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
+ if (get_itbl(c)->type == CONSTR &&
+ !strcmp(get_itbl(c)->prof.closure_type, "DEAD_WEAK") &&
+ !strcmp(get_itbl(c)->prof.closure_desc, "DEAD_WEAK")) {
+ debugBelch("\tUnvisited dead weak pointer object found: c = %p\n", c);
+ costArray[get_itbl(c)->type] += cost(c);
+ sumOfNewCost += cost(c);
+ } else
+ debugBelch(
+ "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
+ flip, c, get_itbl(c)->type,
+ get_itbl(c)->prof.closure_type, get_itbl(c)->prof.closure_desc,
+ RSET(c));
+ } else {
+ // debugBelch("sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
+ }
+
+ return closure_sizeW(c);
+}
+
+static nat
+heapCheck( bdescr *bd )
+{
+ StgPtr p;
+ static nat costSum, size;
+
+ costSum = 0;
+ while (bd != NULL) {
+ p = bd->start;
+ while (p < bd->free) {
+ size = sanityCheckHeapClosure((StgClosure *)p);
+ sumOfCostLinear += size;
+ costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
+ p += size;
+ // no need for slop check; I think slops are not used currently.
+ }
+ ASSERT(p == bd->free);
+ costSum += bd->free - bd->start;
+ bd = bd->link;
+ }
+
+ return costSum;
+}
+
+static nat
+smallObjectPoolCheck(void)
+{
+ bdescr *bd;
+ StgPtr p;
+ static nat costSum, size;
+
+ bd = small_alloc_list;
+ costSum = 0;
+
+ // first block
+ if (bd == NULL)
+ return costSum;
+
+ p = bd->start;
+ while (p < alloc_Hp) {
+ size = sanityCheckHeapClosure((StgClosure *)p);
+ sumOfCostLinear += size;
+ costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
+ p += size;
+ }
+ ASSERT(p == alloc_Hp);
+ costSum += alloc_Hp - bd->start;
+
+ bd = bd->link;
+ while (bd != NULL) {
+ p = bd->start;
+ while (p < bd->free) {
+ size = sanityCheckHeapClosure((StgClosure *)p);
+ sumOfCostLinear += size;
+ costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
+ p += size;
+ }
+ ASSERT(p == bd->free);
+ costSum += bd->free - bd->start;
+ bd = bd->link;
+ }
+
+ return costSum;
+}
+
+static nat
+chainCheck(bdescr *bd)
+{
+ nat costSum, size;
+
+ costSum = 0;
+ while (bd != NULL) {
+ // bd->free - bd->start is not an accurate measurement of the
+ // object size. Actually it is always zero, so we compute its
+ // size explicitly.
+ size = sanityCheckHeapClosure((StgClosure *)bd->start);
+ sumOfCostLinear += size;
+ costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
+ costSum += size;
+ bd = bd->link;
+ }
+
+ return costSum;
+}
+
+static nat
+checkHeapSanityForRetainerProfiling( void )
+{
+ nat costSum, g, s;
+
+ costSum = 0;
+ debugBelch("START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+ if (RtsFlags.GcFlags.generations == 1) {
+ costSum += heapCheck(g0s0->to_blocks);
+ debugBelch("heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+ costSum += chainCheck(g0s0->large_objects);
+ debugBelch("chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+ } else {
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++)
+ for (s = 0; s < generations[g].n_steps; s++) {
+ /*
+ After all live objects have been scavenged, the garbage
+ collector may create some objects in
+ scheduleFinalizers(). These objects are created throught
+ allocate(), so the small object pool or the large object
+ pool of the g0s0 may not be empty.
+ */
+ if (g == 0 && s == 0) {
+ costSum += smallObjectPoolCheck();
+ debugBelch("smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+ costSum += chainCheck(generations[g].steps[s].large_objects);
+ debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+ } else {
+ costSum += heapCheck(generations[g].steps[s].blocks);
+ debugBelch("heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+ costSum += chainCheck(generations[g].steps[s].large_objects);
+ debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+ }
+ }
+ }
+
+ return costSum;
+}
+
+void
+findPointer(StgPtr p)
+{
+ StgPtr q, r, e;
+ bdescr *bd;
+ nat g, s;
+
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ // if (g == 0 && s == 0) continue;
+ bd = generations[g].steps[s].blocks;
+ for (; bd; bd = bd->link) {
+ for (q = bd->start; q < bd->free; q++) {
+ if (*q == (StgWord)p) {
+ r = q;
+ while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
+ debugBelch("Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
+ // return;
+ }
+ }
+ }
+ bd = generations[g].steps[s].large_objects;
+ for (; bd; bd = bd->link) {
+ e = bd->start + cost((StgClosure *)bd->start);
+ for (q = bd->start; q < e; q++) {
+ if (*q == (StgWord)p) {
+ r = q;
+ while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
+ debugBelch("Found in gen[%d], large_objects: %p\n", g, r);
+ // return;
+ }
+ }
+ }
+ }
+ }
+}
+
+static void
+belongToHeap(StgPtr p)
+{
+ bdescr *bd;
+ nat g, s;
+
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ // if (g == 0 && s == 0) continue;
+ bd = generations[g].steps[s].blocks;
+ for (; bd; bd = bd->link) {
+ if (bd->start <= p && p < bd->free) {
+ debugBelch("Belongs to gen[%d], step[%d]", g, s);
+ return;
+ }
+ }
+ bd = generations[g].steps[s].large_objects;
+ for (; bd; bd = bd->link) {
+ if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
+ debugBelch("Found in gen[%d], large_objects: %p\n", g, bd->start);
+ return;
+ }
+ }
+ }
+ }
+}
+#endif /* DEBUG_RETAINER */
+
+#endif /* PROFILING */
diff --git a/rts/RetainerProfile.h b/rts/RetainerProfile.h
new file mode 100644
index 0000000000..827daa8ef4
--- /dev/null
+++ b/rts/RetainerProfile.h
@@ -0,0 +1,47 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Retainer profiling interface.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef RETAINERPROFILE_H
+#define RETAINERPROFILE_H
+
+#ifdef PROFILING
+
+#include "RetainerSet.h"
+
+extern void initRetainerProfiling ( void );
+extern void endRetainerProfiling ( void );
+extern void printRetainer ( FILE *, retainer );
+extern void retainerProfile ( void );
+extern void resetStaticObjectForRetainerProfiling ( void );
+
+extern StgWord RTS_VAR(flip);
+
+// extract the retainer set field from c
+#define RSET(c) ((c)->header.prof.hp.rs)
+
+#define isRetainerSetFieldValid(c) \
+ ((((StgWord)(c)->header.prof.hp.rs & 1) ^ flip) == 0)
+
+static inline RetainerSet *
+retainerSetOf( StgClosure *c )
+{
+ ASSERT( isRetainerSetFieldValid(c) );
+ // StgWord has the same size as pointers, so the following type
+ // casting is okay.
+ return (RetainerSet *)((StgWord)RSET(c) ^ flip);
+}
+
+// Used by Storage.c:memInventory()
+#ifdef DEBUG
+extern lnat retainerStackBlocks ( void );
+#endif
+
+#endif /* PROFILING */
+
+#endif /* RETAINERPROFILE_H */
diff --git a/rts/RetainerSet.c b/rts/RetainerSet.c
new file mode 100644
index 0000000000..bfa0bc8acf
--- /dev/null
+++ b/rts/RetainerSet.c
@@ -0,0 +1,498 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Retainer set implementation for retainer profiling (see RetainerProfile.c)
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifdef PROFILING
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "Stats.h"
+#include "RtsUtils.h"
+#include "RetainerSet.h"
+#include "Arena.h"
+#include "Profiling.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#define HASH_TABLE_SIZE 255
+#define hash(hk) (hk % HASH_TABLE_SIZE)
+static RetainerSet *hashTable[HASH_TABLE_SIZE];
+
+static Arena *arena; // arena in which we store retainer sets
+
+static int nextId; // id of next retainer set
+
+/* -----------------------------------------------------------------------------
+ * rs_MANY is a distinguished retainer set, such that
+ *
+ * isMember(e, rs_MANY) = True
+ *
+ * addElement(e, rs) = rs_MANY, if rs->num >= maxRetainerSetSize
+ * addElement(e, rs_MANY) = rs_MANY
+ *
+ * The point of rs_MANY is to keep the total number of retainer sets
+ * from growing too large.
+ * -------------------------------------------------------------------------- */
+RetainerSet rs_MANY = {
+ num : 0,
+ hashKey : 0,
+ link : NULL,
+ id : 1,
+ element : {}
+};
+
+/* -----------------------------------------------------------------------------
+ * calculate the size of a RetainerSet structure
+ * -------------------------------------------------------------------------- */
+STATIC_INLINE size_t
+sizeofRetainerSet( int elems )
+{
+ return (sizeof(RetainerSet) + elems * sizeof(retainer));
+}
+
+/* -----------------------------------------------------------------------------
+ * Creates the first pool and initializes hashTable[].
+ * Frees all pools if any.
+ * -------------------------------------------------------------------------- */
+void
+initializeAllRetainerSet(void)
+{
+ int i;
+
+ arena = newArena();
+
+ for (i = 0; i < HASH_TABLE_SIZE; i++)
+ hashTable[i] = NULL;
+ nextId = 2; // Initial value must be positive, 2 is MANY.
+}
+
+/* -----------------------------------------------------------------------------
+ * Refreshes all pools for reuse and initializes hashTable[].
+ * -------------------------------------------------------------------------- */
+void
+refreshAllRetainerSet(void)
+{
+#ifdef FIRST_APPROACH
+ int i;
+
+ // first approach: completely refresh
+ arenaFree(arena);
+ arena = newArena();
+
+ for (i = 0; i < HASH_TABLE_SIZE; i++)
+ hashTable[i] = NULL;
+ nextId = 2;
+#endif /* FIRST_APPROACH */
+}
+
+/* -----------------------------------------------------------------------------
+ * Frees all pools.
+ * -------------------------------------------------------------------------- */
+void
+closeAllRetainerSet(void)
+{
+ arenaFree(arena);
+}
+
+/* -----------------------------------------------------------------------------
+ * Finds or creates if needed a singleton retainer set.
+ * -------------------------------------------------------------------------- */
+RetainerSet *
+singleton(retainer r)
+{
+ RetainerSet *rs;
+ StgWord hk;
+
+ hk = hashKeySingleton(r);
+ for (rs = hashTable[hash(hk)]; rs != NULL; rs = rs->link)
+ if (rs->num == 1 && rs->element[0] == r) return rs; // found it
+
+ // create it
+ rs = arenaAlloc( arena, sizeofRetainerSet(1) );
+ rs->num = 1;
+ rs->hashKey = hk;
+ rs->link = hashTable[hash(hk)];
+ rs->id = nextId++;
+ rs->element[0] = r;
+
+ // The new retainer set is placed at the head of the linked list.
+ hashTable[hash(hk)] = rs;
+
+ return rs;
+}
+
+/* -----------------------------------------------------------------------------
+ * Finds or creates a retainer set *rs augmented with r.
+ * Invariants:
+ * r is not a member of rs, i.e., isMember(r, rs) returns rtsFalse.
+ * rs is not NULL.
+ * Note:
+ * We could check if rs is NULL, in which case this function call
+ * reverts to singleton(). We do not choose this strategy because
+ * in most cases addElement() is invoked with non-NULL rs.
+ * -------------------------------------------------------------------------- */
+RetainerSet *
+addElement(retainer r, RetainerSet *rs)
+{
+ nat i;
+ nat nl; // Number of retainers in *rs Less than r
+ RetainerSet *nrs; // New Retainer Set
+ StgWord hk; // Hash Key
+
+#ifdef DEBUG_RETAINER
+ // debugBelch("addElement(%p, %p) = ", r, rs);
+#endif
+
+ ASSERT(rs != NULL);
+ ASSERT(rs->num <= RtsFlags.ProfFlags.maxRetainerSetSize);
+
+ if (rs == &rs_MANY || rs->num == RtsFlags.ProfFlags.maxRetainerSetSize) {
+ return &rs_MANY;
+ }
+
+ ASSERT(!isMember(r, rs));
+
+ for (nl = 0; nl < rs->num; nl++)
+ if (r < rs->element[nl]) break;
+ // Now nl is the index for r into the new set.
+ // Also it denotes the number of retainers less than r in *rs.
+ // Thus, compare the first nl retainers, then r itself, and finally the
+ // remaining (rs->num - nl) retainers.
+
+ hk = hashKeyAddElement(r, rs);
+ for (nrs = hashTable[hash(hk)]; nrs != NULL; nrs = nrs->link) {
+ // test *rs and *nrs for equality
+
+ // check their size
+ if (rs->num + 1 != nrs->num) continue;
+
+ // compare the first nl retainers and find the first non-matching one.
+ for (i = 0; i < nl; i++)
+ if (rs->element[i] != nrs->element[i]) break;
+ if (i < nl) continue;
+
+ // compare r itself
+ if (r != nrs->element[i]) continue; // i == nl
+
+ // compare the remaining retainers
+ for (; i < rs->num; i++)
+ if (rs->element[i] != nrs->element[i + 1]) break;
+ if (i < rs->num) continue;
+
+#ifdef DEBUG_RETAINER
+ // debugBelch("%p\n", nrs);
+#endif
+ // The set we are seeking already exists!
+ return nrs;
+ }
+
+ // create a new retainer set
+ nrs = arenaAlloc( arena, sizeofRetainerSet(rs->num + 1) );
+ nrs->num = rs->num + 1;
+ nrs->hashKey = hk;
+ nrs->link = hashTable[hash(hk)];
+ nrs->id = nextId++;
+ for (i = 0; i < nl; i++) { // copy the first nl retainers
+ nrs->element[i] = rs->element[i];
+ }
+ nrs->element[i] = r; // copy r
+ for (; i < rs->num; i++) { // copy the remaining retainers
+ nrs->element[i + 1] = rs->element[i];
+ }
+
+ hashTable[hash(hk)] = nrs;
+
+#ifdef DEBUG_RETAINER
+ // debugBelch("%p\n", nrs);
+#endif
+ return nrs;
+}
+
+/* -----------------------------------------------------------------------------
+ * Call f() for each retainer set.
+ * -------------------------------------------------------------------------- */
+void
+traverseAllRetainerSet(void (*f)(RetainerSet *))
+{
+ int i;
+ RetainerSet *rs;
+
+ (*f)(&rs_MANY);
+ for (i = 0; i < HASH_TABLE_SIZE; i++)
+ for (rs = hashTable[i]; rs != NULL; rs = rs->link)
+ (*f)(rs);
+}
+
+
+/* -----------------------------------------------------------------------------
+ * printRetainer() prints the full information on a given retainer,
+ * not a retainer set.
+ * -------------------------------------------------------------------------- */
+#if defined(RETAINER_SCHEME_INFO)
+// Retainer scheme 1: retainer = info table
+void
+printRetainer(FILE *f, retainer itbl)
+{
+ fprintf(f, "%s[%s]", itbl->prof.closure_desc, itbl->prof.closure_type);
+}
+#elif defined(RETAINER_SCHEME_CCS)
+// Retainer scheme 2: retainer = cost centre stack
+void
+printRetainer(FILE *f, retainer ccs)
+{
+ fprintCCS(f, ccs);
+}
+#elif defined(RETAINER_SCHEME_CC)
+// Retainer scheme 3: retainer = cost centre
+void
+printRetainer(FILE *f, retainer cc)
+{
+ fprintf(f,"%s.%s", cc->module, cc->label);
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ * printRetainerSetShort() should always display the same output for
+ * a given retainer set regardless of the time of invocation.
+ * -------------------------------------------------------------------------- */
+#ifdef SECOND_APPROACH
+#if defined(RETAINER_SCHEME_INFO)
+// Retainer scheme 1: retainer = info table
+void
+printRetainerSetShort(FILE *f, RetainerSet *rs)
+{
+#define MAX_RETAINER_SET_SPACE 24
+ char tmp[MAX_RETAINER_SET_SPACE + 1];
+ int size;
+ nat j;
+
+ ASSERT(rs->id < 0);
+
+ tmp[MAX_RETAINER_SET_SPACE] = '\0';
+
+ // No blank characters are allowed.
+ sprintf(tmp + 0, "(%d)", -(rs->id));
+ size = strlen(tmp);
+ ASSERT(size < MAX_RETAINER_SET_SPACE);
+
+ for (j = 0; j < rs->num; j++) {
+ if (j < rs->num - 1) {
+ strncpy(tmp + size, rs->element[j]->prof.closure_desc, MAX_RETAINER_SET_SPACE - size);
+ size = strlen(tmp);
+ if (size == MAX_RETAINER_SET_SPACE)
+ break;
+ strncpy(tmp + size, ",", MAX_RETAINER_SET_SPACE - size);
+ size = strlen(tmp);
+ if (size == MAX_RETAINER_SET_SPACE)
+ break;
+ }
+ else {
+ strncpy(tmp + size, rs->element[j]->prof.closure_desc, MAX_RETAINER_SET_SPACE - size);
+ // size = strlen(tmp);
+ }
+ }
+ fprintf(f, tmp);
+}
+#elif defined(RETAINER_SCHEME_CC)
+// Retainer scheme 3: retainer = cost centre
+void
+printRetainerSetShort(FILE *f, RetainerSet *rs)
+{
+#define MAX_RETAINER_SET_SPACE 24
+ char tmp[MAX_RETAINER_SET_SPACE + 1];
+ int size;
+ nat j;
+
+}
+#elif defined(RETAINER_SCHEME_CCS)
+// Retainer scheme 2: retainer = cost centre stack
+void
+printRetainerSetShort(FILE *f, RetainerSet *rs)
+{
+#define MAX_RETAINER_SET_SPACE 24
+ char tmp[MAX_RETAINER_SET_SPACE + 1];
+ int size;
+ nat j;
+
+ ASSERT(rs->id < 0);
+
+ tmp[MAX_RETAINER_SET_SPACE] = '\0';
+
+ // No blank characters are allowed.
+ sprintf(tmp + 0, "(%d)", -(rs->id));
+ size = strlen(tmp);
+ ASSERT(size < MAX_RETAINER_SET_SPACE);
+
+ for (j = 0; j < rs->num; j++) {
+ if (j < rs->num - 1) {
+ strncpy(tmp + size, rs->element[j]->cc->label, MAX_RETAINER_SET_SPACE - size);
+ size = strlen(tmp);
+ if (size == MAX_RETAINER_SET_SPACE)
+ break;
+ strncpy(tmp + size, ",", MAX_RETAINER_SET_SPACE - size);
+ size = strlen(tmp);
+ if (size == MAX_RETAINER_SET_SPACE)
+ break;
+ }
+ else {
+ strncpy(tmp + size, rs->element[j]->cc->label, MAX_RETAINER_SET_SPACE - size);
+ // size = strlen(tmp);
+ }
+ }
+ fprintf(f, tmp);
+}
+#elif defined(RETAINER_SCHEME_CC)
+// Retainer scheme 3: retainer = cost centre
+static void
+printRetainerSetShort(FILE *f, retainerSet *rs)
+{
+#define MAX_RETAINER_SET_SPACE 24
+ char tmp[MAX_RETAINER_SET_SPACE + 1];
+ int size;
+ nat j;
+
+ ASSERT(rs->id < 0);
+
+ tmp[MAX_RETAINER_SET_SPACE] = '\0';
+
+ // No blank characters are allowed.
+ sprintf(tmp + 0, "(%d)", -(rs->id));
+ size = strlen(tmp);
+ ASSERT(size < MAX_RETAINER_SET_SPACE);
+
+ for (j = 0; j < rs->num; j++) {
+ if (j < rs->num - 1) {
+ strncpy(tmp + size, rs->element[j]->label,
+ MAX_RETAINER_SET_SPACE - size);
+ size = strlen(tmp);
+ if (size == MAX_RETAINER_SET_SPACE)
+ break;
+ strncpy(tmp + size, ",", MAX_RETAINER_SET_SPACE - size);
+ size = strlen(tmp);
+ if (size == MAX_RETAINER_SET_SPACE)
+ break;
+ }
+ else {
+ strncpy(tmp + size, rs->element[j]->label,
+ MAX_RETAINER_SET_SPACE - size);
+ // size = strlen(tmp);
+ }
+ }
+ fprintf(f, tmp);
+/*
+ #define MAX_RETAINER_SET_SPACE 24
+ #define DOT_NUMBER 3
+ // 1. 32 > MAX_RETAINER_SET_SPACE + 1 (1 for '\0')
+ // 2. (MAX_RETAINER_SET_SPACE - DOT_NUMBER ) characters should be enough for
+ // printing one natural number (plus '(' and ')').
+ char tmp[32];
+ int size, ts;
+ nat j;
+
+ ASSERT(rs->id < 0);
+
+ // No blank characters are allowed.
+ sprintf(tmp + 0, "(%d)", -(rs->id));
+ size = strlen(tmp);
+ ASSERT(size < MAX_RETAINER_SET_SPACE - DOT_NUMBER);
+
+ for (j = 0; j < rs->num; j++) {
+ ts = strlen(rs->element[j]->label);
+ if (j < rs->num - 1) {
+ if (size + ts + 1 > MAX_RETAINER_SET_SPACE - DOT_NUMBER) {
+ sprintf(tmp + size, "...");
+ break;
+ }
+ sprintf(tmp + size, "%s,", rs->element[j]->label);
+ size += ts + 1;
+ }
+ else {
+ if (size + ts > MAX_RETAINER_SET_SPACE - DOT_NUMBER) {
+ sprintf(tmp + size, "...");
+ break;
+ }
+ sprintf(tmp + size, "%s", rs->element[j]->label);
+ size += ts;
+ }
+ }
+ fprintf(f, tmp);
+*/
+}
+#endif /* RETAINER_SCHEME_CC */
+#endif /* SECOND_APPROACH */
+
+/* -----------------------------------------------------------------------------
+ * Dump the contents of each retainer set into the log file at the end
+ * of the run, so the user can find out for a given retainer set ID
+ * the full contents of that set.
+ * --------------------------------------------------------------------------- */
+#ifdef SECOND_APPROACH
+void
+outputAllRetainerSet(FILE *prof_file)
+{
+ nat i, j;
+ nat numSet;
+ RetainerSet *rs, **rsArray, *tmp;
+
+ // find out the number of retainer sets which have had a non-zero cost at
+ // least once during retainer profiling
+ numSet = 0;
+ for (i = 0; i < HASH_TABLE_SIZE; i++)
+ for (rs = hashTable[i]; rs != NULL; rs = rs->link) {
+ if (rs->id < 0)
+ numSet++;
+ }
+
+ if (numSet == 0) // retainer profiling was not done at all.
+ return;
+
+ // allocate memory
+ rsArray = stgMallocBytes(numSet * sizeof(RetainerSet *),
+ "outputAllRetainerSet()");
+
+ // prepare for sorting
+ j = 0;
+ for (i = 0; i < HASH_TABLE_SIZE; i++)
+ for (rs = hashTable[i]; rs != NULL; rs = rs->link) {
+ if (rs->id < 0) {
+ rsArray[j] = rs;
+ j++;
+ }
+ }
+
+ ASSERT(j == numSet);
+
+ // sort rsArray[] according to the id of each retainer set
+ for (i = numSet - 1; i > 0; i--) {
+ for (j = 0; j <= i - 1; j++) {
+ // if (-(rsArray[j]->id) < -(rsArray[j + 1]->id))
+ if (rsArray[j]->id < rsArray[j + 1]->id) {
+ tmp = rsArray[j];
+ rsArray[j] = rsArray[j + 1];
+ rsArray[j + 1] = tmp;
+ }
+ }
+ }
+
+ fprintf(prof_file, "\nRetainer sets created during profiling:\n");
+ for (i = 0;i < numSet; i++) {
+ fprintf(prof_file, "SET %u = {", -(rsArray[i]->id));
+ for (j = 0; j < rsArray[i]->num - 1; j++) {
+ printRetainer(prof_file, rsArray[i]->element[j]);
+ fprintf(prof_file, ", ");
+ }
+ printRetainer(prof_file, rsArray[i]->element[j]);
+ fprintf(prof_file, "}\n");
+ }
+
+ stgFree(rsArray);
+}
+#endif /* SECOND_APPROACH */
+
+#endif /* PROFILING */
diff --git a/rts/RetainerSet.h b/rts/RetainerSet.h
new file mode 100644
index 0000000000..6a00e1395e
--- /dev/null
+++ b/rts/RetainerSet.h
@@ -0,0 +1,201 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Retainer set interface for retainer profiling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef RETAINERSET_H
+#define RETAINERSET_H
+
+#include <stdio.h>
+
+#ifdef PROFILING
+
+/*
+ Type 'retainer' defines the retainer identity.
+
+ Invariant:
+ 1. The retainer identity of a given retainer cannot change during
+ program execution, no matter where it is actually stored.
+ For instance, the memory address of a retainer cannot be used as
+ its retainer identity because its location may change during garbage
+ collections.
+ 2. Type 'retainer' must come with comparison operations as well as
+ an equality operation. That it, <, >, and == must be supported -
+ this is necessary to store retainers in a sorted order in retainer sets.
+ Therefore, you cannot use a huge structure type as 'retainer', for instance.
+
+ We illustrate three possibilities of defining 'retainer identity'.
+ Choose one of the following three compiler directives:
+
+ Retainer scheme 1 (RETAINER_SCHEME_INFO) : retainer = info table
+ Retainer scheme 2 (RETAINER_SCHEME_CCS) : retainer = cost centre stack
+ Retainer scheme 3 (RETAINER_SCHEME_CC) : retainer = cost centre
+*/
+
+// #define RETAINER_SCHEME_INFO
+#define RETAINER_SCHEME_CCS
+// #define RETAINER_SCHEME_CC
+
+#ifdef RETAINER_SCHEME_INFO
+struct _StgInfoTable;
+typedef struct _StgInfoTable *retainer;
+#endif
+
+#ifdef RETAINER_SCHEME_CCS
+typedef CostCentreStack *retainer;
+#endif
+
+#ifdef RETAINER_SCHEME_CC
+typedef CostCentre *retainer;
+#endif
+
+/*
+ Type 'retainerSet' defines an abstract datatype for sets of retainers.
+
+ Invariants:
+ A retainer set stores its elements in increasing order (in element[] array).
+ */
+
+typedef struct _RetainerSet {
+ nat num; // number of elements
+ StgWord hashKey; // hash key for this retainer set
+ struct _RetainerSet *link; // link to the next retainer set in the bucket
+ int id; // unique id of this retainer set (used when printing)
+ // Its absolute value is interpreted as its true id; if id is
+ // negative, it indicates that this retainer set has had a postive
+ // cost after some retainer profiling.
+ retainer element[0]; // elements of this retainer set
+ // do not put anything below here!
+} RetainerSet;
+
+/*
+ Note:
+ There are two ways of maintaining all retainer sets. The first is simply by
+ freeing all the retainer sets and re-initialize the hash table at each
+ retainer profiling. The second is by setting the cost field of each
+ retainer set. The second is preferred to the first if most retainer sets
+ are likely to be observed again during the next retainer profiling. Note
+ that in the first approach, we do not free the memory allocated for
+ retainer sets; we just invalidate all retainer sets.
+ */
+#ifdef DEBUG_RETAINER
+// In thise case, FIRST_APPROACH must be turned on because the memory pool
+// for retainer sets is freed each time.
+#define FIRST_APPROACH
+#else
+// #define FIRST_APPROACH
+#define SECOND_APPROACH
+#endif
+
+// Creates the first pool and initializes a hash table. Frees all pools if any.
+void initializeAllRetainerSet(void);
+
+// Refreshes all pools for reuse and initializes a hash table.
+void refreshAllRetainerSet(void);
+
+// Frees all pools.
+void closeAllRetainerSet(void);
+
+// Finds or creates if needed a singleton retainer set.
+RetainerSet *singleton(retainer r);
+
+extern RetainerSet rs_MANY;
+
+// Checks if a given retainer is a memeber of the retainer set.
+//
+// Note & (maybe) Todo:
+// This function needs to be declared as an inline function, so it is declared
+// as an inline static function here.
+// This make the interface really bad, but isMember() returns a value, so
+// it is not easy either to write it as a macro (due to my lack of C
+// programming experience). Sungwoo
+//
+// rtsBool isMember(retainer, retainerSet *);
+/*
+ Returns rtsTrue if r is a member of *rs.
+ Invariants:
+ rs is not NULL.
+ Note:
+ The efficiency of this function is subject to the typical size of
+ retainer sets. If it is small, linear scan is better. If it
+ is large in most cases, binary scan is better.
+ The current implementation mixes the two search strategies.
+ */
+
+#define BINARY_SEARCH_THRESHOLD 8
+INLINE_HEADER rtsBool
+isMember(retainer r, RetainerSet *rs)
+{
+ int i, left, right; // must be int, not nat (because -1 can appear)
+ retainer ri;
+
+ if (rs == &rs_MANY) { return rtsTrue; }
+
+ if (rs->num < BINARY_SEARCH_THRESHOLD) {
+ for (i = 0; i < (int)rs->num; i++) {
+ ri = rs->element[i];
+ if (r == ri) return rtsTrue;
+ else if (r < ri) return rtsFalse;
+ }
+ } else {
+ left = 0;
+ right = rs->num - 1;
+ while (left <= right) {
+ i = (left + right) / 2;
+ ri = rs->element[i];
+ if (r == ri) return rtsTrue;
+ else if (r < ri) right = i - 1;
+ else left = i + 1;
+ }
+ }
+ return rtsFalse;
+}
+
+// Finds or creates a retainer set augmented with a new retainer.
+RetainerSet *addElement(retainer, RetainerSet *);
+
+// Call f() for each retainer set.
+void traverseAllRetainerSet(void (*f)(RetainerSet *));
+
+#ifdef SECOND_APPROACH
+// Prints a single retainer set.
+void printRetainerSetShort(FILE *, RetainerSet *);
+#endif
+
+// Print the statistics on all the retainer sets.
+// store the sum of all costs and the number of all retainer sets.
+void outputRetainerSet(FILE *, nat *, nat *);
+
+#ifdef SECOND_APPROACH
+// Print all retainer sets at the exit of the program.
+void outputAllRetainerSet(FILE *);
+#endif
+
+// Hashing functions
+/*
+ Invariants:
+ Once either initializeAllRetainerSet() or refreshAllRetainerSet()
+ is called, there exists only one copy of any retainer set created
+ through singleton() and addElement(). The pool (the storage for
+ retainer sets) is consumed linearly. All the retainer sets of the
+ same hash function value are linked together from an element in
+ hashTable[]. See the invariants of allocateInPool() for the
+ maximum size of retainer sets. The hashing function is defined by
+ hashKeySingleton() and hashKeyAddElement(). The hash key for a set
+ must be unique regardless of the order its elements are inserted,
+ i.e., the hashing function must be additive(?).
+*/
+#define hashKeySingleton(r) ((StgWord)(r))
+#define hashKeyAddElement(r, s) (hashKeySingleton((r)) + (s)->hashKey)
+
+// Prints the full information on a given retainer.
+// Note: This function is not part of retainerSet interface, but this is
+// the best place to define it.
+void printRetainer(FILE *, retainer);
+
+#endif /* PROFILING */
+#endif /* RETAINERSET_H */
diff --git a/rts/RtsAPI.c b/rts/RtsAPI.c
new file mode 100644
index 0000000000..b1b1d9c52d
--- /dev/null
+++ b/rts/RtsAPI.c
@@ -0,0 +1,597 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2001
+ *
+ * API for invoking Haskell functions via the RTS
+ *
+ * --------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "OSThreads.h"
+#include "Storage.h"
+#include "RtsAPI.h"
+#include "SchedAPI.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "Prelude.h"
+#include "Schedule.h"
+#include "Capability.h"
+
+#include <stdlib.h>
+
+/* ----------------------------------------------------------------------------
+ Building Haskell objects from C datatypes.
+ ------------------------------------------------------------------------- */
+HaskellObj
+rts_mkChar (Capability *cap, HsChar c)
+{
+ StgClosure *p = (StgClosure *)allocateLocal(cap, CONSTR_sizeW(0,1));
+ SET_HDR(p, Czh_con_info, CCS_SYSTEM);
+ p->payload[0] = (StgClosure *)(StgWord)(StgChar)c;
+ return p;
+}
+
+HaskellObj
+rts_mkInt (Capability *cap, HsInt i)
+{
+ StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+ SET_HDR(p, Izh_con_info, CCS_SYSTEM);
+ p->payload[0] = (StgClosure *)(StgInt)i;
+ return p;
+}
+
+HaskellObj
+rts_mkInt8 (Capability *cap, HsInt8 i)
+{
+ StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+ SET_HDR(p, I8zh_con_info, CCS_SYSTEM);
+ /* Make sure we mask out the bits above the lowest 8 */
+ p->payload[0] = (StgClosure *)(StgInt)((unsigned)i & 0xff);
+ return p;
+}
+
+HaskellObj
+rts_mkInt16 (Capability *cap, HsInt16 i)
+{
+ StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+ SET_HDR(p, I16zh_con_info, CCS_SYSTEM);
+ /* Make sure we mask out the relevant bits */
+ p->payload[0] = (StgClosure *)(StgInt)((unsigned)i & 0xffff);
+ return p;
+}
+
+HaskellObj
+rts_mkInt32 (Capability *cap, HsInt32 i)
+{
+ StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+ SET_HDR(p, I32zh_con_info, CCS_SYSTEM);
+ p->payload[0] = (StgClosure *)(StgInt)((unsigned)i & 0xffffffff);
+ return p;
+}
+
+HaskellObj
+rts_mkInt64 (Capability *cap, HsInt64 i)
+{
+ llong *tmp;
+ StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,2));
+ SET_HDR(p, I64zh_con_info, CCS_SYSTEM);
+ tmp = (llong*)&(p->payload[0]);
+ *tmp = (StgInt64)i;
+ return p;
+}
+
+HaskellObj
+rts_mkWord (Capability *cap, HsWord i)
+{
+ StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+ SET_HDR(p, Wzh_con_info, CCS_SYSTEM);
+ p->payload[0] = (StgClosure *)(StgWord)i;
+ return p;
+}
+
+HaskellObj
+rts_mkWord8 (Capability *cap, HsWord8 w)
+{
+ /* see rts_mkInt* comments */
+ StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+ SET_HDR(p, W8zh_con_info, CCS_SYSTEM);
+ p->payload[0] = (StgClosure *)(StgWord)(w & 0xff);
+ return p;
+}
+
+HaskellObj
+rts_mkWord16 (Capability *cap, HsWord16 w)
+{
+ /* see rts_mkInt* comments */
+ StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+ SET_HDR(p, W16zh_con_info, CCS_SYSTEM);
+ p->payload[0] = (StgClosure *)(StgWord)(w & 0xffff);
+ return p;
+}
+
+HaskellObj
+rts_mkWord32 (Capability *cap, HsWord32 w)
+{
+ /* see rts_mkInt* comments */
+ StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+ SET_HDR(p, W32zh_con_info, CCS_SYSTEM);
+ p->payload[0] = (StgClosure *)(StgWord)(w & 0xffffffff);
+ return p;
+}
+
+HaskellObj
+rts_mkWord64 (Capability *cap, HsWord64 w)
+{
+ ullong *tmp;
+
+ StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,2));
+ /* see mk_Int8 comment */
+ SET_HDR(p, W64zh_con_info, CCS_SYSTEM);
+ tmp = (ullong*)&(p->payload[0]);
+ *tmp = (StgWord64)w;
+ return p;
+}
+
+HaskellObj
+rts_mkFloat (Capability *cap, HsFloat f)
+{
+ StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+ SET_HDR(p, Fzh_con_info, CCS_SYSTEM);
+ ASSIGN_FLT((P_)p->payload, (StgFloat)f);
+ return p;
+}
+
+HaskellObj
+rts_mkDouble (Capability *cap, HsDouble d)
+{
+ StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,sizeofW(StgDouble)));
+ SET_HDR(p, Dzh_con_info, CCS_SYSTEM);
+ ASSIGN_DBL((P_)p->payload, (StgDouble)d);
+ return p;
+}
+
+HaskellObj
+rts_mkStablePtr (Capability *cap, HsStablePtr s)
+{
+ StgClosure *p = (StgClosure *)allocateLocal(cap,sizeofW(StgHeader)+1);
+ SET_HDR(p, StablePtr_con_info, CCS_SYSTEM);
+ p->payload[0] = (StgClosure *)s;
+ return p;
+}
+
+HaskellObj
+rts_mkPtr (Capability *cap, HsPtr a)
+{
+ StgClosure *p = (StgClosure *)allocateLocal(cap,sizeofW(StgHeader)+1);
+ SET_HDR(p, Ptr_con_info, CCS_SYSTEM);
+ p->payload[0] = (StgClosure *)a;
+ return p;
+}
+
+HaskellObj
+rts_mkFunPtr (Capability *cap, HsFunPtr a)
+{
+ StgClosure *p = (StgClosure *)allocateLocal(cap,sizeofW(StgHeader)+1);
+ SET_HDR(p, FunPtr_con_info, CCS_SYSTEM);
+ p->payload[0] = (StgClosure *)a;
+ return p;
+}
+
+HaskellObj
+rts_mkBool (Capability *cap STG_UNUSED, HsBool b)
+{
+ if (b) {
+ return (StgClosure *)True_closure;
+ } else {
+ return (StgClosure *)False_closure;
+ }
+}
+
+HaskellObj
+rts_mkString (Capability *cap, char *s)
+{
+ return rts_apply(cap, (StgClosure *)unpackCString_closure, rts_mkPtr(cap,s));
+}
+
+HaskellObj
+rts_apply (Capability *cap, HaskellObj f, HaskellObj arg)
+{
+ StgThunk *ap;
+
+ ap = (StgThunk *)allocateLocal(cap,sizeofW(StgThunk) + 2);
+ SET_HDR(ap, (StgInfoTable *)&stg_ap_2_upd_info, CCS_SYSTEM);
+ ap->payload[0] = f;
+ ap->payload[1] = arg;
+ return (StgClosure *)ap;
+}
+
+/* ----------------------------------------------------------------------------
+ Deconstructing Haskell objects
+
+ We would like to assert that we have the right kind of object in
+ each case, but this is problematic because in GHCi the info table
+ for the D# constructor (say) might be dynamically loaded. Hence we
+ omit these assertions for now.
+ ------------------------------------------------------------------------- */
+
+HsChar
+rts_getChar (HaskellObj p)
+{
+ // See comment above:
+ // ASSERT(p->header.info == Czh_con_info ||
+ // p->header.info == Czh_static_info);
+ return (StgChar)(StgWord)(p->payload[0]);
+}
+
+HsInt
+rts_getInt (HaskellObj p)
+{
+ // See comment above:
+ // ASSERT(p->header.info == Izh_con_info ||
+ // p->header.info == Izh_static_info);
+ return (HsInt)(p->payload[0]);
+}
+
+HsInt8
+rts_getInt8 (HaskellObj p)
+{
+ // See comment above:
+ // ASSERT(p->header.info == I8zh_con_info ||
+ // p->header.info == I8zh_static_info);
+ return (HsInt8)(HsInt)(p->payload[0]);
+}
+
+HsInt16
+rts_getInt16 (HaskellObj p)
+{
+ // See comment above:
+ // ASSERT(p->header.info == I16zh_con_info ||
+ // p->header.info == I16zh_static_info);
+ return (HsInt16)(HsInt)(p->payload[0]);
+}
+
+HsInt32
+rts_getInt32 (HaskellObj p)
+{
+ // See comment above:
+ // ASSERT(p->header.info == I32zh_con_info ||
+ // p->header.info == I32zh_static_info);
+ return (HsInt32)(HsInt)(p->payload[0]);
+}
+
+HsInt64
+rts_getInt64 (HaskellObj p)
+{
+ HsInt64* tmp;
+ // See comment above:
+ // ASSERT(p->header.info == I64zh_con_info ||
+ // p->header.info == I64zh_static_info);
+ tmp = (HsInt64*)&(p->payload[0]);
+ return *tmp;
+}
+HsWord
+rts_getWord (HaskellObj p)
+{
+ // See comment above:
+ // ASSERT(p->header.info == Wzh_con_info ||
+ // p->header.info == Wzh_static_info);
+ return (HsWord)(p->payload[0]);
+}
+
+HsWord8
+rts_getWord8 (HaskellObj p)
+{
+ // See comment above:
+ // ASSERT(p->header.info == W8zh_con_info ||
+ // p->header.info == W8zh_static_info);
+ return (HsWord8)(HsWord)(p->payload[0]);
+}
+
+HsWord16
+rts_getWord16 (HaskellObj p)
+{
+ // See comment above:
+ // ASSERT(p->header.info == W16zh_con_info ||
+ // p->header.info == W16zh_static_info);
+ return (HsWord16)(HsWord)(p->payload[0]);
+}
+
+HsWord32
+rts_getWord32 (HaskellObj p)
+{
+ // See comment above:
+ // ASSERT(p->header.info == W32zh_con_info ||
+ // p->header.info == W32zh_static_info);
+ return (HsWord32)(HsWord)(p->payload[0]);
+}
+
+
+HsWord64
+rts_getWord64 (HaskellObj p)
+{
+ HsWord64* tmp;
+ // See comment above:
+ // ASSERT(p->header.info == W64zh_con_info ||
+ // p->header.info == W64zh_static_info);
+ tmp = (HsWord64*)&(p->payload[0]);
+ return *tmp;
+}
+
+HsFloat
+rts_getFloat (HaskellObj p)
+{
+ // See comment above:
+ // ASSERT(p->header.info == Fzh_con_info ||
+ // p->header.info == Fzh_static_info);
+ return (float)(PK_FLT((P_)p->payload));
+}
+
+HsDouble
+rts_getDouble (HaskellObj p)
+{
+ // See comment above:
+ // ASSERT(p->header.info == Dzh_con_info ||
+ // p->header.info == Dzh_static_info);
+ return (double)(PK_DBL((P_)p->payload));
+}
+
+HsStablePtr
+rts_getStablePtr (HaskellObj p)
+{
+ // See comment above:
+ // ASSERT(p->header.info == StablePtr_con_info ||
+ // p->header.info == StablePtr_static_info);
+ return (StgStablePtr)(p->payload[0]);
+}
+
+HsPtr
+rts_getPtr (HaskellObj p)
+{
+ // See comment above:
+ // ASSERT(p->header.info == Ptr_con_info ||
+ // p->header.info == Ptr_static_info);
+ return (Capability *)(p->payload[0]);
+}
+
+HsFunPtr
+rts_getFunPtr (HaskellObj p)
+{
+ // See comment above:
+ // ASSERT(p->header.info == FunPtr_con_info ||
+ // p->header.info == FunPtr_static_info);
+ return (void *)(p->payload[0]);
+}
+
+HsBool
+rts_getBool (HaskellObj p)
+{
+ StgInfoTable *info;
+
+ info = get_itbl((StgClosure *)p);
+ if (info->srt_bitmap == 0) { // srt_bitmap is the constructor tag
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Creating threads
+ -------------------------------------------------------------------------- */
+
+INLINE_HEADER void pushClosure (StgTSO *tso, StgWord c) {
+ tso->sp--;
+ tso->sp[0] = (W_) c;
+}
+
+StgTSO *
+createGenThread (Capability *cap, nat stack_size, StgClosure *closure)
+{
+ StgTSO *t;
+#if defined(GRAN)
+ t = createThread (cap, stack_size, NO_PRI);
+#else
+ t = createThread (cap, stack_size);
+#endif
+ pushClosure(t, (W_)closure);
+ pushClosure(t, (W_)&stg_enter_info);
+ return t;
+}
+
+StgTSO *
+createIOThread (Capability *cap, nat stack_size, StgClosure *closure)
+{
+ StgTSO *t;
+#if defined(GRAN)
+ t = createThread (cap, stack_size, NO_PRI);
+#else
+ t = createThread (cap, stack_size);
+#endif
+ pushClosure(t, (W_)&stg_noforceIO_info);
+ pushClosure(t, (W_)&stg_ap_v_info);
+ pushClosure(t, (W_)closure);
+ pushClosure(t, (W_)&stg_enter_info);
+ return t;
+}
+
+/*
+ * Same as above, but also evaluate the result of the IO action
+ * to whnf while we're at it.
+ */
+
+StgTSO *
+createStrictIOThread(Capability *cap, nat stack_size, StgClosure *closure)
+{
+ StgTSO *t;
+#if defined(GRAN)
+ t = createThread(cap, stack_size, NO_PRI);
+#else
+ t = createThread(cap, stack_size);
+#endif
+ pushClosure(t, (W_)&stg_forceIO_info);
+ pushClosure(t, (W_)&stg_ap_v_info);
+ pushClosure(t, (W_)closure);
+ pushClosure(t, (W_)&stg_enter_info);
+ return t;
+}
+
+/* ----------------------------------------------------------------------------
+ Evaluating Haskell expressions
+ ------------------------------------------------------------------------- */
+
+Capability *
+rts_eval (Capability *cap, HaskellObj p, /*out*/HaskellObj *ret)
+{
+ StgTSO *tso;
+
+ tso = createGenThread(cap, RtsFlags.GcFlags.initialStkSize, p);
+ return scheduleWaitThread(tso,ret,cap);
+}
+
+Capability *
+rts_eval_ (Capability *cap, HaskellObj p, unsigned int stack_size,
+ /*out*/HaskellObj *ret)
+{
+ StgTSO *tso;
+
+ tso = createGenThread(cap, stack_size, p);
+ return scheduleWaitThread(tso,ret,cap);
+}
+
+/*
+ * rts_evalIO() evaluates a value of the form (IO a), forcing the action's
+ * result to WHNF before returning.
+ */
+Capability *
+rts_evalIO (Capability *cap, HaskellObj p, /*out*/HaskellObj *ret)
+{
+ StgTSO* tso;
+
+ tso = createStrictIOThread(cap, RtsFlags.GcFlags.initialStkSize, p);
+ return scheduleWaitThread(tso,ret,cap);
+}
+
+/*
+ * rts_evalStableIO() is suitable for calling from Haskell. It
+ * evaluates a value of the form (StablePtr (IO a)), forcing the
+ * action's result to WHNF before returning. The result is returned
+ * in a StablePtr.
+ */
+Capability *
+rts_evalStableIO (Capability *cap, HsStablePtr s, /*out*/HsStablePtr *ret)
+{
+ StgTSO* tso;
+ StgClosure *p, *r;
+ SchedulerStatus stat;
+
+ p = (StgClosure *)deRefStablePtr(s);
+ tso = createStrictIOThread(cap, RtsFlags.GcFlags.initialStkSize, p);
+ cap = scheduleWaitThread(tso,&r,cap);
+ stat = rts_getSchedStatus(cap);
+
+ if (stat == Success && ret != NULL) {
+ ASSERT(r != NULL);
+ *ret = getStablePtr((StgPtr)r);
+ }
+
+ return cap;
+}
+
+/*
+ * Like rts_evalIO(), but doesn't force the action's result.
+ */
+Capability *
+rts_evalLazyIO (Capability *cap, HaskellObj p, /*out*/HaskellObj *ret)
+{
+ StgTSO *tso;
+
+ tso = createIOThread(cap, RtsFlags.GcFlags.initialStkSize, p);
+ return scheduleWaitThread(tso,ret,cap);
+}
+
+Capability *
+rts_evalLazyIO_ (Capability *cap, HaskellObj p, unsigned int stack_size,
+ /*out*/HaskellObj *ret)
+{
+ StgTSO *tso;
+
+ tso = createIOThread(cap, stack_size, p);
+ return scheduleWaitThread(tso,ret,cap);
+}
+
+/* Convenience function for decoding the returned status. */
+
+void
+rts_checkSchedStatus (char* site, Capability *cap)
+{
+ SchedulerStatus rc = cap->running_task->stat;
+ switch (rc) {
+ case Success:
+ return;
+ case Killed:
+ errorBelch("%s: uncaught exception",site);
+ stg_exit(EXIT_FAILURE);
+ case Interrupted:
+ errorBelch("%s: interrupted", site);
+ stg_exit(EXIT_FAILURE);
+ default:
+ errorBelch("%s: Return code (%d) not ok",(site),(rc));
+ stg_exit(EXIT_FAILURE);
+ }
+}
+
+SchedulerStatus
+rts_getSchedStatus (Capability *cap)
+{
+ return cap->running_task->stat;
+}
+
+Capability *
+rts_lock (void)
+{
+ Capability *cap;
+ Task *task;
+
+ // ToDo: get rid of this lock in the common case. We could store
+ // a free Task in thread-local storage, for example. That would
+ // leave just one lock on the path into the RTS: cap->lock when
+ // acquiring the Capability.
+ ACQUIRE_LOCK(&sched_mutex);
+ task = newBoundTask();
+ RELEASE_LOCK(&sched_mutex);
+
+ cap = NULL;
+ waitForReturnCapability(&cap, task);
+ return (Capability *)cap;
+}
+
+// Exiting the RTS: we hold a Capability that is not necessarily the
+// same one that was originally returned by rts_lock(), because
+// rts_evalIO() etc. may return a new one. Now that we have
+// investigated the return value, we can release the Capability,
+// and free the Task (in that order).
+
+void
+rts_unlock (Capability *cap)
+{
+ Task *task;
+
+ task = cap->running_task;
+ ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
+
+ // slightly delicate ordering of operations below, pay attention!
+
+ // We are no longer a bound task/thread. This is important,
+ // because the GC can run when we release the Capability below,
+ // and we don't want it to treat this as a live TSO pointer.
+ task->tso = NULL;
+
+ // Now release the Capability. With the capability released, GC
+ // may happen. NB. does not try to put the current Task on the
+ // worker queue.
+ releaseCapability(cap);
+
+ // Finally, we can release the Task to the free list.
+ boundTaskExiting(task);
+}
diff --git a/rts/RtsDllMain.c b/rts/RtsDllMain.c
new file mode 100644
index 0000000000..af3c5090de
--- /dev/null
+++ b/rts/RtsDllMain.c
@@ -0,0 +1,39 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1999-2000
+ *
+ * Entry point for RTS-in-a-DLL
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "RtsAPI.h"
+
+#ifdef HAVE_WINDOWS_H
+#include <windows.h>
+#endif
+
+/* I'd be mildly surprised if this wasn't defined, but still. */
+#ifdef ENABLE_WIN32_DLL_SUPPORT
+
+BOOL
+WINAPI
+DllMain ( HINSTANCE hInstance
+ , DWORD reason
+ , LPVOID reserved
+ )
+{
+ /*
+ * Note: the DllMain() doesn't call startupHaskell() for you,
+ * that is the task of users of the RTS. The reason is
+ * that *you* want to be able to control the arguments
+ * you pass to the RTS.
+ */
+ switch (reason) {
+ case DLL_PROCESS_DETACH: shutdownHaskell();
+ }
+ return TRUE;
+}
+
+#endif /* ENABLE_WIN32_DLL_SUPPORT */
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
new file mode 100644
index 0000000000..0f83b3356c
--- /dev/null
+++ b/rts/RtsFlags.c
@@ -0,0 +1,2281 @@
+
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The AQUA Project, Glasgow University, 1994-1997
+ * (c) The GHC Team, 1998-1999
+ *
+ * Functions for parsing the argument list.
+ *
+ * ---------------------------------------------------------------------------*/
+
+//@menu
+//* Includes::
+//* Constants::
+//* Static function decls::
+//* Command-line option parsing routines::
+//* GranSim specific options::
+//* Aux fcts::
+//@end menu
+//*/
+
+//@node Includes, Constants
+//@subsection Includes
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "BlockAlloc.h"
+#include "Timer.h" /* CS_MIN_MILLISECS */
+#include "Profiling.h"
+
+#ifdef HAVE_CTYPE_H
+#include <ctype.h>
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
+// Flag Structure
+RTS_FLAGS RtsFlags;
+
+/*
+ * Split argument lists
+ */
+int prog_argc = 0; /* an "int" so as to match normal "argc" */
+char **prog_argv = NULL;
+char *prog_name = NULL; /* 'basename' of prog_argv[0] */
+int rts_argc = 0; /* ditto */
+char *rts_argv[MAX_RTS_ARGS];
+
+//@node Constants, Static function decls, Includes
+//@subsection Constants
+
+/*
+ * constants, used later
+ */
+#define RTS 1
+#define PGM 0
+
+#if defined(GRAN)
+
+static char *gran_debug_opts_strs[] = {
+ "DEBUG (-bDe, -bD1): event_trace; printing event trace.\n",
+ "DEBUG (-bDE, -bD2): event_stats; printing event statistics.\n",
+ "DEBUG (-bDb, -bD4): bq; check blocking queues\n",
+ "DEBUG (-bDG, -bD8): pack; routines for (un-)packing graph structures.\n",
+ "DEBUG (-bDq, -bD16): checkSparkQ; check consistency of the spark queues.\n",
+ "DEBUG (-bDf, -bD32): thunkStealing; print forwarding of fetches.\n",
+ "DEBUG (-bDr, -bD64): randomSteal; stealing sparks/threads from random PEs.\n",
+ "DEBUG (-bDF, -bD128): findWork; searching spark-pools (local & remote), thread queues for work.\n",
+ "DEBUG (-bDu, -bD256): unused; currently unused flag.\n",
+ "DEBUG (-bDS, -bD512): pri; priority sparking or scheduling.\n",
+ "DEBUG (-bD:, -bD1024): checkLight; check GranSim-Light setup.\n",
+ "DEBUG (-bDo, -bD2048): sortedQ; check whether spark/thread queues are sorted.\n",
+ "DEBUG (-bDz, -bD4096): blockOnFetch; check for blocked on fetch.\n",
+ "DEBUG (-bDP, -bD8192): packBuffer; routines handling pack buffer (GranSim internal!).\n",
+ "DEBUG (-bDt, -bD16384): blockOnFetch_sanity; check for TSO asleep on fetch.\n",
+};
+
+/* one character codes for the available debug options */
+static char gran_debug_opts_flags[] = {
+ 'e', 'E', 'b', 'G', 'q', 'f', 'r', 'F', 'u', 'S', ':', 'o', 'z', 'P', 't'
+};
+
+#elif defined(PAR)
+
+static char *par_debug_opts_strs[] = {
+ "DEBUG (-qDv, -qD1): verbose; be generally verbose with parallel related stuff.\n",
+ "DEBUG (-qDq, -qD2): bq; print blocking queues.\n",
+ "DEBUG (-qDs, -qD4): schedule; scheduling of parallel threads.\n",
+ "DEBUG (-qDe, -qD8): free; free messages.\n",
+ "DEBUG (-qDr, -qD16): resume; resume messages.\n",
+ "DEBUG (-qDw, -qD32): weight; print weights and distrib GC stuff.\n",
+ "DEBUG (-qDF, -qD64): fetch; fetch messages.\n",
+ // "DEBUG (-qDa, -qD128): ack; ack messages.\n",
+ "DEBUG (-qDf, -qD128): fish; fish messages.\n",
+ //"DEBUG (-qDo, -qD512): forward; forwarding messages to other PEs.\n",
+ "DEBUG (-qDl, -qD256): tables; print internal LAGA etc tables.\n",
+ "DEBUG (-qDo, -qD512): packet; packets and graph structures when packing.\n",
+ "DEBUG (-qDp, -qD1024): pack; packing and unpacking graphs.\n",
+ "DEBUG (-qDz, -qD2048): paranoia; ridiculously detailed output (excellent for filling a partition).\n"
+};
+
+/* one character codes for the available debug options */
+static char par_debug_opts_flags[] = {
+ 'v', 'q', 's', 'e', 'r', 'w', 'F', 'f', 'l', 'o', 'p', 'z'
+};
+
+#endif /* PAR */
+
+//@node Static function decls, Command-line option parsing routines, Constants
+//@subsection Static function decls
+
+/* -----------------------------------------------------------------------------
+ Static function decls
+ -------------------------------------------------------------------------- */
+
+static int /* return NULL on error */
+open_stats_file (
+ I_ arg,
+ int argc, char *argv[],
+ int rts_argc, char *rts_argv[],
+ const char *FILENAME_FMT,
+ FILE **file_ret);
+
+static I_ decode(const char *s);
+static void bad_option(const char *s);
+
+#if defined(GRAN)
+static void enable_GranSimLight(void);
+static void process_gran_option(int arg, int *rts_argc, char *rts_argv[], rtsBool *error);
+static void set_GranSim_debug_options(nat n);
+static void help_GranSim_debug_options(nat n);
+#elif defined(PAR)
+static void process_par_option(int arg, int *rts_argc, char *rts_argv[], rtsBool *error);
+static void set_par_debug_options(nat n);
+static void help_par_debug_options(nat n);
+#endif
+
+//@node Command-line option parsing routines, GranSim specific options, Static function decls
+//@subsection Command-line option parsing routines
+
+/* -----------------------------------------------------------------------------
+ * Command-line option parsing routines.
+ * ---------------------------------------------------------------------------*/
+
+void initRtsFlagsDefaults(void)
+{
+ RtsFlags.GcFlags.statsFile = NULL;
+ RtsFlags.GcFlags.giveStats = NO_GC_STATS;
+
+ RtsFlags.GcFlags.maxStkSize = (8 * 1024 * 1024) / sizeof(W_);
+ RtsFlags.GcFlags.initialStkSize = 1024 / sizeof(W_);
+
+ RtsFlags.GcFlags.minAllocAreaSize = (512 * 1024) / BLOCK_SIZE;
+ RtsFlags.GcFlags.minOldGenSize = (1024 * 1024) / BLOCK_SIZE;
+ RtsFlags.GcFlags.maxHeapSize = 0; /* off by default */
+ RtsFlags.GcFlags.heapSizeSuggestion = 0; /* none */
+ RtsFlags.GcFlags.pcFreeHeap = 3; /* 3% */
+ RtsFlags.GcFlags.oldGenFactor = 2;
+#if defined(PAR)
+ /* A hack currently needed for GUM -- HWL */
+ RtsFlags.GcFlags.generations = 1;
+ RtsFlags.GcFlags.steps = 2;
+ RtsFlags.GcFlags.squeezeUpdFrames = rtsFalse;
+#else
+ RtsFlags.GcFlags.generations = 2;
+ RtsFlags.GcFlags.steps = 2;
+ RtsFlags.GcFlags.squeezeUpdFrames = rtsTrue;
+#endif
+ RtsFlags.GcFlags.compact = rtsFalse;
+ RtsFlags.GcFlags.compactThreshold = 30.0;
+#ifdef RTS_GTK_FRONTPANEL
+ RtsFlags.GcFlags.frontpanel = rtsFalse;
+#endif
+ RtsFlags.GcFlags.idleGCDelayTicks = 300 / TICK_MILLISECS; /* ticks */
+
+#ifdef DEBUG
+ RtsFlags.DebugFlags.scheduler = rtsFalse;
+ RtsFlags.DebugFlags.interpreter = rtsFalse;
+ RtsFlags.DebugFlags.codegen = rtsFalse;
+ RtsFlags.DebugFlags.weak = rtsFalse;
+ RtsFlags.DebugFlags.gccafs = rtsFalse;
+ RtsFlags.DebugFlags.gc = rtsFalse;
+ RtsFlags.DebugFlags.block_alloc = rtsFalse;
+ RtsFlags.DebugFlags.sanity = rtsFalse;
+ RtsFlags.DebugFlags.stable = rtsFalse;
+ RtsFlags.DebugFlags.stm = rtsFalse;
+ RtsFlags.DebugFlags.prof = rtsFalse;
+ RtsFlags.DebugFlags.gran = rtsFalse;
+ RtsFlags.DebugFlags.par = rtsFalse;
+ RtsFlags.DebugFlags.linker = rtsFalse;
+ RtsFlags.DebugFlags.squeeze = rtsFalse;
+#endif
+
+#if defined(PROFILING) || defined(PAR)
+ RtsFlags.CcFlags.doCostCentres = 0;
+#endif /* PROFILING or PAR */
+
+#ifdef PROFILING
+ RtsFlags.ProfFlags.doHeapProfile = rtsFalse;
+ RtsFlags.ProfFlags.profileInterval = 100;
+ RtsFlags.ProfFlags.includeTSOs = rtsFalse;
+ RtsFlags.ProfFlags.showCCSOnException = rtsFalse;
+ RtsFlags.ProfFlags.maxRetainerSetSize = 8;
+ RtsFlags.ProfFlags.modSelector = NULL;
+ RtsFlags.ProfFlags.descrSelector = NULL;
+ RtsFlags.ProfFlags.typeSelector = NULL;
+ RtsFlags.ProfFlags.ccSelector = NULL;
+ RtsFlags.ProfFlags.ccsSelector = NULL;
+ RtsFlags.ProfFlags.retainerSelector = NULL;
+ RtsFlags.ProfFlags.bioSelector = NULL;
+
+#elif defined(DEBUG)
+ RtsFlags.ProfFlags.doHeapProfile = rtsFalse;
+#endif
+
+ RtsFlags.ConcFlags.ctxtSwitchTime = CS_MIN_MILLISECS; /* In milliseconds */
+
+#ifdef THREADED_RTS
+ RtsFlags.ParFlags.nNodes = 1;
+ RtsFlags.ParFlags.migrate = rtsTrue;
+ RtsFlags.ParFlags.wakeupMigrate = rtsFalse;
+#endif
+
+#ifdef PAR
+ RtsFlags.ParFlags.ParStats.Full = rtsFalse;
+ RtsFlags.ParFlags.ParStats.Suppressed = rtsFalse;
+ RtsFlags.ParFlags.ParStats.Binary = rtsFalse;
+ RtsFlags.ParFlags.ParStats.Sparks = rtsFalse;
+ RtsFlags.ParFlags.ParStats.Heap = rtsFalse;
+ RtsFlags.ParFlags.ParStats.NewLogfile = rtsFalse;
+ RtsFlags.ParFlags.ParStats.Global = rtsFalse;
+
+ RtsFlags.ParFlags.outputDisabled = rtsFalse;
+#ifdef DIST
+ RtsFlags.ParFlags.doFairScheduling = rtsTrue; /* fair sched by def */
+#else
+ RtsFlags.ParFlags.doFairScheduling = rtsFalse; /* unfair sched by def */
+#endif
+ RtsFlags.ParFlags.packBufferSize = 1024;
+ RtsFlags.ParFlags.thunksToPack = 1; /* 0 ... infinity; */
+ RtsFlags.ParFlags.globalising = 1; /* 0 ... everything */
+ RtsFlags.ParFlags.maxThreads = 1024;
+ RtsFlags.ParFlags.maxFishes = MAX_FISHES;
+ RtsFlags.ParFlags.fishDelay = FISH_DELAY;
+#endif
+
+#if defined(PAR) || defined(THREADED_RTS)
+ RtsFlags.ParFlags.maxLocalSparks = 4096;
+#endif /* PAR || THREADED_RTS */
+
+#if defined(GRAN)
+ /* ToDo: check defaults for GranSim and GUM */
+ RtsFlags.GcFlags.maxStkSize = (8 * 1024 * 1024) / sizeof(W_);
+ RtsFlags.GcFlags.initialStkSize = 1024 / sizeof(W_);
+
+ RtsFlags.GranFlags.maxThreads = 65536; // refers to mandatory threads
+ RtsFlags.GranFlags.GranSimStats.Full = rtsFalse;
+ RtsFlags.GranFlags.GranSimStats.Suppressed = rtsFalse;
+ RtsFlags.GranFlags.GranSimStats.Binary = rtsFalse;
+ RtsFlags.GranFlags.GranSimStats.Sparks = rtsFalse;
+ RtsFlags.GranFlags.GranSimStats.Heap = rtsFalse;
+ RtsFlags.GranFlags.GranSimStats.NewLogfile = rtsFalse;
+ RtsFlags.GranFlags.GranSimStats.Global = rtsFalse;
+
+ RtsFlags.GranFlags.packBufferSize = 1024;
+ RtsFlags.GranFlags.packBufferSize_internal = GRANSIM_DEFAULT_PACK_BUFFER_SIZE;
+
+ RtsFlags.GranFlags.proc = MAX_PROC;
+ RtsFlags.GranFlags.Fishing = rtsFalse;
+ RtsFlags.GranFlags.maxFishes = MAX_FISHES;
+ RtsFlags.GranFlags.time_slice = GRAN_TIME_SLICE;
+ RtsFlags.GranFlags.Light = rtsFalse;
+
+ RtsFlags.GranFlags.Costs.latency = LATENCY;
+ RtsFlags.GranFlags.Costs.additional_latency = ADDITIONAL_LATENCY;
+ RtsFlags.GranFlags.Costs.fetchtime = FETCHTIME;
+ RtsFlags.GranFlags.Costs.lunblocktime = LOCALUNBLOCKTIME;
+ RtsFlags.GranFlags.Costs.gunblocktime = GLOBALUNBLOCKTIME;
+ RtsFlags.GranFlags.Costs.mpacktime = MSGPACKTIME;
+ RtsFlags.GranFlags.Costs.munpacktime = MSGUNPACKTIME;
+ RtsFlags.GranFlags.Costs.mtidytime = MSGTIDYTIME;
+
+ RtsFlags.GranFlags.Costs.threadcreatetime = THREADCREATETIME;
+ RtsFlags.GranFlags.Costs.threadqueuetime = THREADQUEUETIME;
+ RtsFlags.GranFlags.Costs.threaddescheduletime = THREADDESCHEDULETIME;
+ RtsFlags.GranFlags.Costs.threadscheduletime = THREADSCHEDULETIME;
+ RtsFlags.GranFlags.Costs.threadcontextswitchtime = THREADCONTEXTSWITCHTIME;
+
+ RtsFlags.GranFlags.Costs.arith_cost = ARITH_COST;
+ RtsFlags.GranFlags.Costs.branch_cost = BRANCH_COST;
+ RtsFlags.GranFlags.Costs.load_cost = LOAD_COST;
+ RtsFlags.GranFlags.Costs.store_cost = STORE_COST;
+ RtsFlags.GranFlags.Costs.float_cost = FLOAT_COST;
+
+ RtsFlags.GranFlags.Costs.heapalloc_cost = HEAPALLOC_COST;
+
+ RtsFlags.GranFlags.Costs.pri_spark_overhead = PRI_SPARK_OVERHEAD;
+ RtsFlags.GranFlags.Costs.pri_sched_overhead = PRI_SCHED_OVERHEAD;
+
+ RtsFlags.GranFlags.DoFairSchedule = rtsFalse;
+ RtsFlags.GranFlags.DoAsyncFetch = rtsFalse;
+ RtsFlags.GranFlags.DoStealThreadsFirst = rtsFalse;
+ RtsFlags.GranFlags.DoAlwaysCreateThreads = rtsFalse;
+ RtsFlags.GranFlags.DoBulkFetching = rtsFalse;
+ RtsFlags.GranFlags.DoThreadMigration = rtsFalse;
+ RtsFlags.GranFlags.FetchStrategy = 2;
+ RtsFlags.GranFlags.PreferSparksOfLocalNodes = rtsFalse;
+ RtsFlags.GranFlags.DoPrioritySparking = rtsFalse;
+ RtsFlags.GranFlags.DoPriorityScheduling = rtsFalse;
+ RtsFlags.GranFlags.SparkPriority = 0;
+ RtsFlags.GranFlags.SparkPriority2 = 0;
+ RtsFlags.GranFlags.RandomPriorities = rtsFalse;
+ RtsFlags.GranFlags.InversePriorities = rtsFalse;
+ RtsFlags.GranFlags.IgnorePriorities = rtsFalse;
+ RtsFlags.GranFlags.ThunksToPack = 0;
+ RtsFlags.GranFlags.RandomSteal = rtsTrue;
+#endif
+
+#ifdef TICKY_TICKY
+ RtsFlags.TickyFlags.showTickyStats = rtsFalse;
+ RtsFlags.TickyFlags.tickyFile = NULL;
+#endif
+}
+
+static const char *
+usage_text[] = {
+"",
+"Usage: <prog> <args> [+RTS <rtsopts> | -RTS <args>] ... --RTS <args>",
+"",
+" +RTS Indicates run time system options follow",
+" -RTS Indicates program arguments follow",
+" --RTS Indicates that ALL subsequent arguments will be given to the",
+" program (including any of these RTS flags)",
+"",
+"The following run time system options are available:",
+"",
+" -? Prints this message and exits; the program is not executed",
+"",
+" -K<size> Sets the maximum stack size (default 8M) Egs: -K32k -K512k",
+" -k<size> Sets the initial thread stack size (default 1k) Egs: -k4k -k2m",
+"",
+" -A<size> Sets the minimum allocation area size (default 256k) Egs: -A1m -A10k",
+" -M<size> Sets the maximum heap size (default unlimited) Egs: -M256k -M1G",
+" -H<size> Sets the minimum heap size (default 0M) Egs: -H24m -H1G",
+" -m<n> Minimum % of heap which must be available (default 3%)",
+" -G<n> Number of generations (default: 2)",
+" -T<n> Number of steps in younger generations (default: 2)",
+" -c<n> Auto-enable compaction of the oldest generation when live data is",
+" at least <n>% of the maximum heap size set with -M (default: 30%)",
+" -c Enable compaction for all major collections",
+#if defined(THREADED_RTS)
+" -I<sec> Perform full GC after <sec> idle time (default: 0.3, 0 == off)",
+#endif
+"",
+" -t<file> One-line GC statistics (default file: <program>.stat)",
+" -s<file> Summary GC statistics (with -Sstderr going to stderr)",
+" -S<file> Detailed GC statistics",
+#ifdef RTS_GTK_FRONTPANEL
+" -f Display front panel (requires X11 & GTK+)",
+#endif
+"",
+"",
+" -Z Don't squeeze out update frames on stack overflow",
+" -B Sound the bell at the start of each garbage collection",
+#if defined(PROFILING) || defined(PAR)
+"",
+" -px Time/allocation profile (XML) (output file <program>.prof)",
+" -p Time/allocation profile (output file <program>.prof)",
+" -P More detailed Time/Allocation profile",
+" -Pa Give information about *all* cost centres",
+
+# if defined(PROFILING)
+"",
+" -hx Heap residency profile (XML) (output file <program>.prof)",
+" -h<break-down> Heap residency profile (hp2ps) (output file <program>.hp)",
+" break-down: c = cost centre stack (default)",
+" m = module",
+" d = closure description",
+" y = type description",
+" r = retainer",
+" b = biography (LAG,DRAG,VOID,USE)",
+" A subset of closures may be selected thusly:",
+" -hc<cc>,... specific cost centre(s) (top of stack only)",
+" -hC<cc>,... specific cost centre(s) (anywhere in stack)",
+" -hm<mod>... all cost centres from the specified modules(s)",
+" -hd<des>,... closures with specified closure descriptions",
+" -hy<typ>... closures with specified type descriptions",
+" -hr<cc>... closures with specified retainers",
+" -hb<bio>... closures with specified biographies (lag,drag,void,use)",
+"",
+" -R<size> Set the maximum retainer set size (default: 8)",
+"",
+" -i<sec> Time between heap samples (seconds, default: 0.1)",
+"",
+" -xt Include threads (TSOs) in a heap profile",
+"",
+" -xc Show current cost centre stack on raising an exception",
+# endif
+#endif /* PROFILING or PAR */
+#if !defined(PROFILING) && defined(DEBUG)
+"",
+" -h<break-down> Debugging Heap residency profile",
+" (output file <program>.hp)",
+" break-down: L = closure label (default)",
+" T = closure type (constructor, thunk etc.)",
+#endif
+"",
+#if defined(TICKY_TICKY)
+" -r<file> Produce reduction profiling statistics (with -rstderr for stderr)",
+"",
+#endif
+#if defined(PAR)
+" -N<n> Use <n> PVMish processors in parallel (default: 2)",
+/* NB: the -N<n> is implemented by the driver!! */
+#endif
+" -C<secs> Context-switch interval in seconds",
+" (0 or no argument means switch as often as possible)",
+" the default is .02 sec; resolution is .02 sec",
+"",
+#if defined(DEBUG)
+" -Ds DEBUG: scheduler",
+" -Di DEBUG: interpreter",
+" -Dc DEBUG: codegen",
+" -Dw DEBUG: weak",
+" -DG DEBUG: gccafs",
+" -Dg DEBUG: gc",
+" -Db DEBUG: block",
+" -DS DEBUG: sanity",
+" -Dt DEBUG: stable",
+" -Dp DEBUG: prof",
+" -Dr DEBUG: gran",
+" -DP DEBUG: par",
+" -Dl DEBUG: linker",
+" -Dm DEBUG: stm",
+" -Dz DEBUG: stack squezing",
+"",
+#endif /* DEBUG */
+#if defined(THREADED_RTS)
+" -N<n> Use <n> OS threads (default: 1)",
+" -qm Don't automatically migrate threads between CPUs",
+" -qw Migrate a thread to the current CPU when it is woken up",
+#endif
+#if defined(THREADED_RTS) || defined(PAR)
+" -e<size> Size of spark pools (default 100)",
+#endif
+#if defined(PAR)
+" -t<num> Set maximum number of advisory threads per PE (default 32)",
+" -qP Enable activity profile (output files in ~/<program>*.gr)",
+" -qQ<size> Set pack-buffer size (default: 1024)",
+" -qd Turn on PVM-ish debugging",
+" -qO Disable output for performance measurement",
+#endif
+#if defined(THREADED_RTS) || defined(PAR)
+" -e<n> Maximum number of outstanding local sparks (default: 4096)",
+#endif
+#if defined(PAR)
+" -d Turn on PVM-ish debugging",
+" -O Disable output for performance measurement",
+#endif /* PAR */
+#if defined(GRAN) /* ToDo: fill in decent Docu here */
+" -b... All GranSim options start with -b; see GranSim User's Guide for details",
+#endif
+"",
+"RTS options may also be specified using the GHCRTS environment variable.",
+"",
+"Other RTS options may be available for programs compiled a different way.",
+"The GHC User's Guide has full details.",
+"",
+0
+};
+
+STATIC_INLINE rtsBool
+strequal(const char *a, const char * b)
+{
+ return(strcmp(a, b) == 0);
+}
+
+static void
+splitRtsFlags(char *s, int *rts_argc, char *rts_argv[])
+{
+ char *c1, *c2;
+
+ c1 = s;
+ do {
+ while (isspace(*c1)) { c1++; };
+ c2 = c1;
+ while (!isspace(*c2) && *c2 != '\0') { c2++; };
+
+ if (c1 == c2) { break; }
+
+ if (*rts_argc < MAX_RTS_ARGS-1) {
+ s = stgMallocBytes(c2-c1+1, "RtsFlags.c:splitRtsFlags()");
+ strncpy(s, c1, c2-c1);
+ s[c2-c1] = '\0';
+ rts_argv[(*rts_argc)++] = s;
+ } else {
+ barf("too many RTS arguments (max %d)", MAX_RTS_ARGS-1);
+ }
+
+ c1 = c2;
+ } while (*c1 != '\0');
+}
+
+void
+setupRtsFlags(int *argc, char *argv[], int *rts_argc, char *rts_argv[])
+{
+ rtsBool error = rtsFalse;
+ I_ mode;
+ I_ arg, total_arg;
+
+ setProgName (argv);
+ total_arg = *argc;
+ arg = 1;
+
+ *argc = 1;
+ *rts_argc = 0;
+
+ // process arguments from the ghc_rts_opts global variable first.
+ // (arguments from the GHCRTS environment variable and the command
+ // line override these).
+ {
+ if (ghc_rts_opts != NULL) {
+ splitRtsFlags(ghc_rts_opts, rts_argc, rts_argv);
+ }
+ }
+
+ // process arguments from the GHCRTS environment variable next
+ // (arguments from the command line override these).
+ {
+ char *ghc_rts = getenv("GHCRTS");
+
+ if (ghc_rts != NULL) {
+ splitRtsFlags(ghc_rts, rts_argc, rts_argv);
+ }
+ }
+
+ // Split arguments (argv) into PGM (argv) and RTS (rts_argv) parts
+ // argv[0] must be PGM argument -- leave in argv
+
+ for (mode = PGM; arg < total_arg; arg++) {
+ // The '--RTS' argument disables all future +RTS ... -RTS processing.
+ if (strequal("--RTS", argv[arg])) {
+ arg++;
+ break;
+ }
+ // The '--' argument is passed through to the program, but
+ // disables all further +RTS ... -RTS processing.
+ else if (strequal("--", argv[arg])) {
+ break;
+ }
+ else if (strequal("+RTS", argv[arg])) {
+ mode = RTS;
+ }
+ else if (strequal("-RTS", argv[arg])) {
+ mode = PGM;
+ }
+ else if (mode == RTS && *rts_argc < MAX_RTS_ARGS-1) {
+ rts_argv[(*rts_argc)++] = argv[arg];
+ }
+ else if (mode == PGM) {
+ argv[(*argc)++] = argv[arg];
+ }
+ else {
+ barf("too many RTS arguments (max %d)", MAX_RTS_ARGS-1);
+ }
+ }
+ // process remaining program arguments
+ for (; arg < total_arg; arg++) {
+ argv[(*argc)++] = argv[arg];
+ }
+ argv[*argc] = (char *) 0;
+ rts_argv[*rts_argc] = (char *) 0;
+
+ // Process RTS (rts_argv) part: mainly to determine statsfile
+ for (arg = 0; arg < *rts_argc; arg++) {
+ if (rts_argv[arg][0] != '-') {
+ fflush(stdout);
+ errorBelch("unexpected RTS argument: %s", rts_argv[arg]);
+ error = rtsTrue;
+
+ } else {
+ switch(rts_argv[arg][1]) {
+
+ /* process: general args, then PROFILING-only ones,
+ then CONCURRENT-only, PARallel-only, GRAN-only,
+ TICKY-only (same order as defined in RtsFlags.lh);
+ within those groups, mostly in case-insensitive
+ alphabetical order.
+ Final group is x*, which allows for more options.
+ */
+
+#ifdef TICKY_TICKY
+# define TICKY_BUILD_ONLY(x) x
+#else
+# define TICKY_BUILD_ONLY(x) \
+errorBelch("not built for: ticky-ticky stats"); \
+error = rtsTrue;
+#endif
+
+#if defined(PROFILING)
+# define COST_CENTRE_USING_BUILD_ONLY(x) x
+#else
+# define COST_CENTRE_USING_BUILD_ONLY(x) \
+errorBelch("not built for: -prof or -parallel"); \
+error = rtsTrue;
+#endif
+
+#ifdef PROFILING
+# define PROFILING_BUILD_ONLY(x) x
+#else
+# define PROFILING_BUILD_ONLY(x) \
+errorBelch("not built for: -prof"); \
+error = rtsTrue;
+#endif
+
+#ifdef PAR
+# define PAR_BUILD_ONLY(x) x
+#else
+# define PAR_BUILD_ONLY(x) \
+errorBelch("not built for: -parallel"); \
+error = rtsTrue;
+#endif
+
+#ifdef THREADED_RTS
+# define THREADED_BUILD_ONLY(x) x
+#else
+# define THREADED_BUILD_ONLY(x) \
+errorBelch("not built for: -smp"); \
+error = rtsTrue;
+#endif
+
+#if defined(THREADED_RTS) || defined(PAR)
+# define PAR_OR_THREADED_BUILD_ONLY(x) x
+#else
+# define PAR_OR_THREADED_BUILD_ONLY(x) \
+errorBelch("not built for: -parallel or -smp"); \
+error = rtsTrue;
+#endif
+
+#ifdef GRAN
+# define GRAN_BUILD_ONLY(x) x
+#else
+# define GRAN_BUILD_ONLY(x) \
+errorBelch("not built for: -gransim"); \
+error = rtsTrue;
+#endif
+
+ /* =========== GENERAL ========================== */
+ case '?':
+ error = rtsTrue;
+ break;
+
+ case 'A':
+ RtsFlags.GcFlags.minAllocAreaSize
+ = decode(rts_argv[arg]+2) / BLOCK_SIZE;
+ if (RtsFlags.GcFlags.minAllocAreaSize <= 0) {
+ bad_option(rts_argv[arg]);
+ }
+ break;
+
+ case 'B':
+ RtsFlags.GcFlags.ringBell = rtsTrue;
+ break;
+
+ case 'c':
+ if (rts_argv[arg][2] != '\0') {
+ RtsFlags.GcFlags.compactThreshold =
+ atof(rts_argv[arg]+2);
+ } else {
+ RtsFlags.GcFlags.compact = rtsTrue;
+ }
+ break;
+
+ case 'F':
+ RtsFlags.GcFlags.oldGenFactor = atof(rts_argv[arg]+2);
+
+ if (RtsFlags.GcFlags.oldGenFactor < 0)
+ bad_option( rts_argv[arg] );
+ break;
+
+#ifdef DEBUG
+ case 'D':
+ {
+ char *c;
+
+ for (c = rts_argv[arg] + 2; *c != '\0'; c++) {
+ switch (*c) {
+ case 's':
+ RtsFlags.DebugFlags.scheduler = rtsTrue;
+ break;
+ case 'i':
+ RtsFlags.DebugFlags.interpreter = rtsTrue;
+ break;
+ case 'c':
+ RtsFlags.DebugFlags.codegen = rtsTrue;
+ break;
+ case 'w':
+ RtsFlags.DebugFlags.weak = rtsTrue;
+ break;
+ case 'G':
+ RtsFlags.DebugFlags.gccafs = rtsTrue;
+ break;
+ case 'g':
+ RtsFlags.DebugFlags.gc = rtsTrue;
+ break;
+ case 'b':
+ RtsFlags.DebugFlags.block_alloc = rtsTrue;
+ break;
+ case 'S':
+ RtsFlags.DebugFlags.sanity = rtsTrue;
+ break;
+ case 't':
+ RtsFlags.DebugFlags.stable = rtsTrue;
+ break;
+ case 'p':
+ RtsFlags.DebugFlags.prof = rtsTrue;
+ break;
+ case 'r':
+ RtsFlags.DebugFlags.gran = rtsTrue;
+ break;
+ case 'P':
+ RtsFlags.DebugFlags.par = rtsTrue;
+ break;
+ case 'l':
+ RtsFlags.DebugFlags.linker = rtsTrue;
+ break;
+ case 'a':
+ RtsFlags.DebugFlags.apply = rtsTrue;
+ break;
+ case 'm':
+ RtsFlags.DebugFlags.stm = rtsTrue;
+ break;
+ case 'z':
+ RtsFlags.DebugFlags.squeeze = rtsTrue;
+ break;
+ default:
+ bad_option( rts_argv[arg] );
+ }
+ }
+ break;
+ }
+#endif
+
+ case 'K':
+ RtsFlags.GcFlags.maxStkSize =
+ decode(rts_argv[arg]+2) / sizeof(W_);
+
+ if (RtsFlags.GcFlags.maxStkSize == 0)
+ bad_option( rts_argv[arg] );
+ break;
+
+ case 'k':
+ RtsFlags.GcFlags.initialStkSize =
+ decode(rts_argv[arg]+2) / sizeof(W_);
+
+ if (RtsFlags.GcFlags.initialStkSize == 0)
+ bad_option( rts_argv[arg] );
+ break;
+
+ case 'M':
+ RtsFlags.GcFlags.maxHeapSize =
+ decode(rts_argv[arg]+2) / BLOCK_SIZE;
+ /* user give size in *bytes* but "maxHeapSize" is in *blocks* */
+
+ if (RtsFlags.GcFlags.maxHeapSize <= 0) {
+ bad_option(rts_argv[arg]);
+ }
+ break;
+
+ case 'm':
+ RtsFlags.GcFlags.pcFreeHeap = atof(rts_argv[arg]+2);
+
+ if (RtsFlags.GcFlags.pcFreeHeap < 0 ||
+ RtsFlags.GcFlags.pcFreeHeap > 100)
+ bad_option( rts_argv[arg] );
+ break;
+
+ case 'G':
+ RtsFlags.GcFlags.generations = decode(rts_argv[arg]+2);
+ if (RtsFlags.GcFlags.generations < 1) {
+ bad_option(rts_argv[arg]);
+ }
+ break;
+
+ case 'T':
+ RtsFlags.GcFlags.steps = decode(rts_argv[arg]+2);
+ if (RtsFlags.GcFlags.steps < 1) {
+ bad_option(rts_argv[arg]);
+ }
+ break;
+
+ case 'H':
+ RtsFlags.GcFlags.heapSizeSuggestion =
+ decode(rts_argv[arg]+2) / BLOCK_SIZE;
+
+ if (RtsFlags.GcFlags.heapSizeSuggestion <= 0) {
+ bad_option(rts_argv[arg]);
+ }
+ break;
+
+#ifdef RTS_GTK_FRONTPANEL
+ case 'f':
+ RtsFlags.GcFlags.frontpanel = rtsTrue;
+ break;
+#endif
+
+ case 'I': /* idle GC delay */
+ if (rts_argv[arg][2] == '\0') {
+ /* use default */
+ } else {
+ I_ cst; /* tmp */
+
+ /* Convert to ticks */
+ cst = (I_) ((atof(rts_argv[arg]+2) * 1000));
+ if (cst > 0 && cst < TICK_MILLISECS) {
+ cst = TICK_MILLISECS;
+ } else {
+ cst = cst / TICK_MILLISECS;
+ }
+ RtsFlags.GcFlags.idleGCDelayTicks = cst;
+ }
+ break;
+
+ case 'S':
+ RtsFlags.GcFlags.giveStats = VERBOSE_GC_STATS;
+ goto stats;
+
+ case 's':
+ RtsFlags.GcFlags.giveStats = SUMMARY_GC_STATS;
+ goto stats;
+
+ case 't':
+ RtsFlags.GcFlags.giveStats = ONELINE_GC_STATS;
+ goto stats;
+
+ stats:
+#ifdef PAR
+ /* Opening all those files would almost certainly fail... */
+ // RtsFlags.ParFlags.ParStats.Full = rtsTrue;
+ RtsFlags.GcFlags.statsFile = NULL; /* temporary; ToDo: rm */
+#else
+ {
+ int r;
+ r = open_stats_file(arg, *argc, argv,
+ *rts_argc, rts_argv, STAT_FILENAME_FMT,
+ &RtsFlags.GcFlags.statsFile);
+ if (r == -1) { error = rtsTrue; }
+ }
+#endif
+ break;
+
+ case 'Z':
+ RtsFlags.GcFlags.squeezeUpdFrames = rtsFalse;
+ break;
+
+ /* =========== PROFILING ========================== */
+
+ case 'P': /* detailed cost centre profiling (time/alloc) */
+ case 'p': /* cost centre profiling (time/alloc) */
+ COST_CENTRE_USING_BUILD_ONLY(
+ switch (rts_argv[arg][2]) {
+ case 'x':
+ RtsFlags.CcFlags.doCostCentres = COST_CENTRES_XML;
+ break;
+ case 'a':
+ RtsFlags.CcFlags.doCostCentres = COST_CENTRES_ALL;
+ break;
+ default:
+ if (rts_argv[arg][1] == 'P') {
+ RtsFlags.CcFlags.doCostCentres =
+ COST_CENTRES_VERBOSE;
+ } else {
+ RtsFlags.CcFlags.doCostCentres =
+ COST_CENTRES_SUMMARY;
+ }
+ break;
+ }
+ ) break;
+
+ case 'R':
+ PROFILING_BUILD_ONLY(
+ RtsFlags.ProfFlags.maxRetainerSetSize = atof(rts_argv[arg]+2);
+ ) break;
+
+ case 'h': /* serial heap profile */
+#if !defined(PROFILING) && defined(DEBUG)
+ switch (rts_argv[arg][2]) {
+ case '\0':
+ case 'L':
+ RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_INFOPTR;
+ break;
+ case 'T':
+ RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_CLOSURE_TYPE;
+ break;
+ default:
+ errorBelch("invalid heap profile option: %s",rts_argv[arg]);
+ error = rtsTrue;
+ }
+#else
+ PROFILING_BUILD_ONLY(
+ switch (rts_argv[arg][2]) {
+ case '\0':
+ case 'C':
+ case 'c':
+ case 'M':
+ case 'm':
+ case 'D':
+ case 'd':
+ case 'Y':
+ case 'y':
+ case 'R':
+ case 'r':
+ case 'B':
+ case 'b':
+ if (rts_argv[arg][2] != '\0' && rts_argv[arg][3] != '\0') {
+ {
+ char *left = strchr(rts_argv[arg], '{');
+ char *right = strrchr(rts_argv[arg], '}');
+
+ // curly braces are optional, for
+ // backwards compat.
+ if (left)
+ left = left+1;
+ else
+ left = rts_argv[arg] + 3;
+
+ if (!right)
+ right = rts_argv[arg] + strlen(rts_argv[arg]);
+
+ *right = '\0';
+
+ switch (rts_argv[arg][2]) {
+ case 'c': // cost centre label select
+ RtsFlags.ProfFlags.ccSelector = left;
+ break;
+ case 'C':
+ RtsFlags.ProfFlags.ccsSelector = left;
+ break;
+ case 'M':
+ case 'm': // cost centre module select
+ RtsFlags.ProfFlags.modSelector = left;
+ break;
+ case 'D':
+ case 'd': // closure descr select
+ RtsFlags.ProfFlags.descrSelector = left;
+ break;
+ case 'Y':
+ case 'y': // closure type select
+ RtsFlags.ProfFlags.typeSelector = left;
+ break;
+ case 'R':
+ case 'r': // retainer select
+ RtsFlags.ProfFlags.retainerSelector = left;
+ break;
+ case 'B':
+ case 'b': // biography select
+ RtsFlags.ProfFlags.bioSelector = left;
+ break;
+ }
+ }
+ break;
+ }
+
+ if (RtsFlags.ProfFlags.doHeapProfile != 0) {
+ errorBelch("multiple heap profile options");
+ error = rtsTrue;
+ break;
+ }
+
+ switch (rts_argv[arg][2]) {
+ case '\0':
+ case 'C':
+ case 'c':
+ RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_CCS;
+ break;
+ case 'M':
+ case 'm':
+ RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_MOD;
+ break;
+ case 'D':
+ case 'd':
+ RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_DESCR;
+ break;
+ case 'Y':
+ case 'y':
+ RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_TYPE;
+ break;
+ case 'R':
+ case 'r':
+ RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_RETAINER;
+ break;
+ case 'B':
+ case 'b':
+ RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_LDV;
+ break;
+ }
+ break;
+
+ default:
+ errorBelch("invalid heap profile option: %s",rts_argv[arg]);
+ error = rtsTrue;
+ }
+ )
+#endif /* PROFILING */
+ break;
+
+#if defined(PROFILING)
+ case 'i': /* heap sample interval */
+ if (rts_argv[arg][2] == '\0') {
+ /* use default */
+ } else {
+ I_ cst; /* tmp */
+
+ /* Convert to milliseconds */
+ cst = (I_) ((atof(rts_argv[arg]+2) * 1000));
+ cst = (cst / CS_MIN_MILLISECS) * CS_MIN_MILLISECS;
+ if (cst != 0 && cst < CS_MIN_MILLISECS)
+ cst = CS_MIN_MILLISECS;
+
+ RtsFlags.ProfFlags.profileInterval = cst;
+ }
+ break;
+#endif
+
+ /* =========== CONCURRENT ========================= */
+ case 'C': /* context switch interval */
+ if (rts_argv[arg][2] == '\0')
+ RtsFlags.ConcFlags.ctxtSwitchTime = 0;
+ else {
+ I_ cst; /* tmp */
+
+ /* Convert to milliseconds */
+ cst = (I_) ((atof(rts_argv[arg]+2) * 1000));
+ cst = (cst / CS_MIN_MILLISECS) * CS_MIN_MILLISECS;
+ if (cst != 0 && cst < CS_MIN_MILLISECS)
+ cst = CS_MIN_MILLISECS;
+
+ RtsFlags.ConcFlags.ctxtSwitchTime = cst;
+ }
+ break;
+
+#ifdef THREADED_RTS
+ case 'N':
+ THREADED_BUILD_ONLY(
+ if (rts_argv[arg][2] != '\0') {
+ RtsFlags.ParFlags.nNodes
+ = strtol(rts_argv[arg]+2, (char **) NULL, 10);
+ if (RtsFlags.ParFlags.nNodes <= 0) {
+ errorBelch("bad value for -N");
+ error = rtsTrue;
+ }
+ }
+ ) break;
+
+ case 'q':
+ switch (rts_argv[arg][2]) {
+ case '\0':
+ errorBelch("incomplete RTS option: %s",rts_argv[arg]);
+ error = rtsTrue;
+ break;
+ case 'm':
+ RtsFlags.ParFlags.migrate = rtsFalse;
+ break;
+ case 'w':
+ RtsFlags.ParFlags.wakeupMigrate = rtsTrue;
+ break;
+ default:
+ errorBelch("unknown RTS option: %s",rts_argv[arg]);
+ error = rtsTrue;
+ break;
+ }
+ break;
+#endif
+ /* =========== PARALLEL =========================== */
+ case 'e':
+ PAR_OR_THREADED_BUILD_ONLY(
+ if (rts_argv[arg][2] != '\0') {
+ RtsFlags.ParFlags.maxLocalSparks
+ = strtol(rts_argv[arg]+2, (char **) NULL, 10);
+ if (RtsFlags.ParFlags.maxLocalSparks <= 0) {
+ errorBelch("bad value for -e");
+ error = rtsTrue;
+ }
+ }
+ ) break;
+
+#ifdef PAR
+ case 'q':
+ PAR_BUILD_ONLY(
+ process_par_option(arg, rts_argc, rts_argv, &error);
+ ) break;
+#endif
+
+ /* =========== GRAN =============================== */
+
+ case 'b':
+ GRAN_BUILD_ONLY(
+ process_gran_option(arg, rts_argc, rts_argv, &error);
+ ) break;
+
+ /* =========== TICKY ============================== */
+
+ case 'r': /* Basic profiling stats */
+ TICKY_BUILD_ONLY(
+
+ RtsFlags.TickyFlags.showTickyStats = rtsTrue;
+
+ {
+ int r;
+ r = open_stats_file(arg, *argc, argv,
+ *rts_argc, rts_argv, TICKY_FILENAME_FMT,
+ &RtsFlags.TickyFlags.tickyFile);
+ if (r == -1) { error = rtsTrue; }
+ }
+ ) break;
+
+ /* =========== EXTENDED OPTIONS =================== */
+
+ case 'x': /* Extend the argument space */
+ switch(rts_argv[arg][2]) {
+ case '\0':
+ errorBelch("incomplete RTS option: %s",rts_argv[arg]);
+ error = rtsTrue;
+ break;
+
+ case 'c': /* Debugging tool: show current cost centre on an exception */
+ PROFILING_BUILD_ONLY(
+ RtsFlags.ProfFlags.showCCSOnException = rtsTrue;
+ );
+ break;
+
+ case 't': /* Include memory used by TSOs in a heap profile */
+ PROFILING_BUILD_ONLY(
+ RtsFlags.ProfFlags.includeTSOs = rtsTrue;
+ );
+ break;
+
+ /* The option prefix '-xx' is reserved for future extension. KSW 1999-11. */
+
+ default:
+ errorBelch("unknown RTS option: %s",rts_argv[arg]);
+ error = rtsTrue;
+ break;
+ }
+ break; /* defensive programming */
+
+ /* =========== OH DEAR ============================ */
+ default:
+ errorBelch("unknown RTS option: %s",rts_argv[arg]);
+ error = rtsTrue;
+ break;
+ }
+ }
+ }
+ if (error) {
+ const char **p;
+
+ fflush(stdout);
+ for (p = usage_text; *p; p++)
+ errorBelch("%s", *p);
+ stg_exit(EXIT_FAILURE);
+ }
+}
+
+#if defined(GRAN)
+
+//@node GranSim specific options, Aux fcts, Command-line option parsing routines
+//@subsection GranSim specific options
+
+static void
+enable_GranSimLight(void) {
+
+ debugBelch("GrAnSim Light enabled (infinite number of processors; 0 communication costs)\n");
+ RtsFlags.GranFlags.Light=rtsTrue;
+ RtsFlags.GranFlags.Costs.latency =
+ RtsFlags.GranFlags.Costs.fetchtime =
+ RtsFlags.GranFlags.Costs.additional_latency =
+ RtsFlags.GranFlags.Costs.gunblocktime =
+ RtsFlags.GranFlags.Costs.lunblocktime =
+ RtsFlags.GranFlags.Costs.threadcreatetime =
+ RtsFlags.GranFlags.Costs.threadqueuetime =
+ RtsFlags.GranFlags.Costs.threadscheduletime =
+ RtsFlags.GranFlags.Costs.threaddescheduletime =
+ RtsFlags.GranFlags.Costs.threadcontextswitchtime = 0;
+
+ RtsFlags.GranFlags.Costs.mpacktime =
+ RtsFlags.GranFlags.Costs.munpacktime = 0;
+
+ RtsFlags.GranFlags.DoFairSchedule = rtsTrue;
+ RtsFlags.GranFlags.DoAsyncFetch = rtsFalse;
+ RtsFlags.GranFlags.DoAlwaysCreateThreads = rtsTrue;
+ /* FetchStrategy is irrelevant in GrAnSim-Light */
+
+ /* GrAnSim Light often creates an abundance of parallel threads,
+ each with its own stack etc. Therefore, it's in general a good
+ idea to use small stack chunks (use the -o<size> option to
+ increase it again).
+ */
+ // RtsFlags.ConcFlags.stkChunkSize = 100;
+
+ RtsFlags.GranFlags.proc = 1;
+}
+
+static void
+process_gran_option(int arg, int *rts_argc, char *rts_argv[], rtsBool *error)
+{
+ if (rts_argv[arg][1] != 'b') /* All GranSim options start with -b */
+ return;
+
+ /* or a ridiculously idealised simulator */
+ if(strcmp((rts_argv[arg]+2),"oring")==0) {
+ RtsFlags.GranFlags.Costs.latency =
+ RtsFlags.GranFlags.Costs.fetchtime =
+ RtsFlags.GranFlags.Costs.additional_latency =
+ RtsFlags.GranFlags.Costs.gunblocktime =
+ RtsFlags.GranFlags.Costs.lunblocktime =
+ RtsFlags.GranFlags.Costs.threadcreatetime =
+ RtsFlags.GranFlags.Costs.threadqueuetime =
+ RtsFlags.GranFlags.Costs.threadscheduletime =
+ RtsFlags.GranFlags.Costs.threaddescheduletime =
+ RtsFlags.GranFlags.Costs.threadcontextswitchtime = 0;
+
+ RtsFlags.GranFlags.Costs.mpacktime =
+ RtsFlags.GranFlags.Costs.munpacktime = 0;
+
+ RtsFlags.GranFlags.Costs.arith_cost =
+ RtsFlags.GranFlags.Costs.float_cost =
+ RtsFlags.GranFlags.Costs.load_cost =
+ RtsFlags.GranFlags.Costs.store_cost =
+ RtsFlags.GranFlags.Costs.branch_cost = 0;
+
+ RtsFlags.GranFlags.Costs.heapalloc_cost = 1;
+
+ /* ++RtsFlags.GranFlags.DoFairSchedule; */
+ RtsFlags.GranFlags.DoStealThreadsFirst = rtsTrue; /* -bZ */
+ RtsFlags.GranFlags.DoThreadMigration = rtsTrue; /* -bM */
+ RtsFlags.GranFlags.GranSimStats.Full = rtsTrue; /* -bP */
+ return;
+ }
+
+ /* or a somewhat idealised simulator */
+ if(strcmp((rts_argv[arg]+2),"onzo")==0) {
+ RtsFlags.GranFlags.Costs.latency =
+ RtsFlags.GranFlags.Costs.fetchtime =
+ RtsFlags.GranFlags.Costs.additional_latency =
+ RtsFlags.GranFlags.Costs.gunblocktime =
+ RtsFlags.GranFlags.Costs.lunblocktime =
+ RtsFlags.GranFlags.Costs.threadcreatetime =
+ RtsFlags.GranFlags.Costs.threadqueuetime =
+ RtsFlags.GranFlags.Costs.threadscheduletime =
+ RtsFlags.GranFlags.Costs.threaddescheduletime =
+ RtsFlags.GranFlags.Costs.threadcontextswitchtime = 0;
+
+ RtsFlags.GranFlags.Costs.mpacktime =
+ RtsFlags.GranFlags.Costs.munpacktime = 0;
+
+ RtsFlags.GranFlags.Costs.heapalloc_cost = 1;
+
+ /* RtsFlags.GranFlags.DoFairSchedule = rtsTrue; */ /* -b-R */
+ /* RtsFlags.GranFlags.DoStealThreadsFirst = rtsTrue; */ /* -b-T */
+ RtsFlags.GranFlags.DoAsyncFetch = rtsTrue; /* -bZ */
+ RtsFlags.GranFlags.DoThreadMigration = rtsTrue; /* -bM */
+ RtsFlags.GranFlags.GranSimStats.Full = rtsTrue; /* -bP */
+# if defined(GRAN_CHECK) && defined(GRAN)
+ RtsFlags.GranFlags.Debug.event_stats = rtsTrue; /* print event statistics */
+# endif
+ return;
+ }
+
+ /* Communication and task creation cost parameters */
+ switch(rts_argv[arg][2]) {
+ case '.':
+ IgnoreYields = rtsTrue; // HWL HACK
+ break;
+
+ case ':':
+ enable_GranSimLight(); /* set flags for GrAnSim-Light mode */
+ break;
+
+ case 'l':
+ if (rts_argv[arg][3] != '\0')
+ {
+ RtsFlags.GranFlags.Costs.gunblocktime =
+ RtsFlags.GranFlags.Costs.latency = decode(rts_argv[arg]+3);
+ RtsFlags.GranFlags.Costs.fetchtime = 2*RtsFlags.GranFlags.Costs.latency;
+ }
+ else
+ RtsFlags.GranFlags.Costs.latency = LATENCY;
+ break;
+
+ case 'a':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.additional_latency = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.additional_latency = ADDITIONAL_LATENCY;
+ break;
+
+ case 'm':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.mpacktime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.mpacktime = MSGPACKTIME;
+ break;
+
+ case 'x':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.mtidytime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.mtidytime = 0;
+ break;
+
+ case 'r':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.munpacktime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.munpacktime = MSGUNPACKTIME;
+ break;
+
+ case 'g':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.fetchtime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.fetchtime = FETCHTIME;
+ break;
+
+ case 'n':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.gunblocktime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.gunblocktime = GLOBALUNBLOCKTIME;
+ break;
+
+ case 'u':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.lunblocktime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.lunblocktime = LOCALUNBLOCKTIME;
+ break;
+
+ /* Thread-related metrics */
+ case 't':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.threadcreatetime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.threadcreatetime = THREADCREATETIME;
+ break;
+
+ case 'q':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.threadqueuetime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.threadqueuetime = THREADQUEUETIME;
+ break;
+
+ case 'c':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.threadscheduletime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.threadscheduletime = THREADSCHEDULETIME;
+
+ RtsFlags.GranFlags.Costs.threadcontextswitchtime = RtsFlags.GranFlags.Costs.threadscheduletime
+ + RtsFlags.GranFlags.Costs.threaddescheduletime;
+ break;
+
+ case 'd':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.threaddescheduletime = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.threaddescheduletime = THREADDESCHEDULETIME;
+
+ RtsFlags.GranFlags.Costs.threadcontextswitchtime = RtsFlags.GranFlags.Costs.threadscheduletime
+ + RtsFlags.GranFlags.Costs.threaddescheduletime;
+ break;
+
+ /* Instruction Cost Metrics */
+ case 'A':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.arith_cost = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.arith_cost = ARITH_COST;
+ break;
+
+ case 'F':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.float_cost = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.float_cost = FLOAT_COST;
+ break;
+
+ case 'B':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.branch_cost = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.branch_cost = BRANCH_COST;
+ break;
+
+ case 'L':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.load_cost = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.load_cost = LOAD_COST;
+ break;
+
+ case 'S':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.store_cost = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.store_cost = STORE_COST;
+ break;
+
+ case 'H':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.heapalloc_cost = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.heapalloc_cost = 0;
+ break;
+
+ case 'y':
+ RtsFlags.GranFlags.DoAsyncFetch = rtsTrue;
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.FetchStrategy = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.FetchStrategy = 2;
+ if (RtsFlags.GranFlags.FetchStrategy == 0)
+ RtsFlags.GranFlags.DoAsyncFetch = rtsFalse;
+ break;
+
+ case 'K': /* sort overhead (per elem in spark list) */
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.pri_spark_overhead = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.pri_spark_overhead = PRI_SPARK_OVERHEAD;
+ debugBelch("Overhead for pri spark: %d (per elem).\n",
+ RtsFlags.GranFlags.Costs.pri_spark_overhead);
+ break;
+
+ case 'O': /* sort overhead (per elem in spark list) */
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.Costs.pri_sched_overhead = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.Costs.pri_sched_overhead = PRI_SCHED_OVERHEAD;
+ debugBelch("Overhead for pri sched: %d (per elem).\n",
+ RtsFlags.GranFlags.Costs.pri_sched_overhead);
+ break;
+
+ /* General Parameters */
+ case 'p':
+ if (rts_argv[arg][3] != '\0')
+ {
+ RtsFlags.GranFlags.proc = decode(rts_argv[arg]+3);
+ if (RtsFlags.GranFlags.proc==0) {
+ enable_GranSimLight(); /* set flags for GrAnSim-Light mode */
+ } else if (RtsFlags.GranFlags.proc > MAX_PROC ||
+ RtsFlags.GranFlags.proc < 1)
+ {
+ debugBelch("setupRtsFlags: no more than %u processors allowed\n",
+ MAX_PROC);
+ *error = rtsTrue;
+ }
+ }
+ else
+ RtsFlags.GranFlags.proc = MAX_PROC;
+ break;
+
+ case 'f':
+ RtsFlags.GranFlags.Fishing = rtsTrue;
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.maxFishes = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.maxFishes = MAX_FISHES;
+ break;
+
+ case 'w':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.GranFlags.time_slice = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.GranFlags.time_slice = GRAN_TIME_SLICE;
+ break;
+
+ case 'C':
+ RtsFlags.GranFlags.DoAlwaysCreateThreads=rtsTrue;
+ RtsFlags.GranFlags.DoThreadMigration=rtsTrue;
+ break;
+
+ case 'G':
+ debugBelch("Bulk fetching enabled.\n");
+ RtsFlags.GranFlags.DoBulkFetching=rtsTrue;
+ break;
+
+ case 'M':
+ debugBelch("Thread migration enabled.\n");
+ RtsFlags.GranFlags.DoThreadMigration=rtsTrue;
+ break;
+
+ case 'R':
+ debugBelch("Fair Scheduling enabled.\n");
+ RtsFlags.GranFlags.DoFairSchedule=rtsTrue;
+ break;
+
+ case 'I':
+ debugBelch("Priority Scheduling enabled.\n");
+ RtsFlags.GranFlags.DoPriorityScheduling=rtsTrue;
+ break;
+
+ case 'T':
+ RtsFlags.GranFlags.DoStealThreadsFirst=rtsTrue;
+ RtsFlags.GranFlags.DoThreadMigration=rtsTrue;
+ break;
+
+ case 'Z':
+ RtsFlags.GranFlags.DoAsyncFetch=rtsTrue;
+ break;
+
+/* case 'z': */
+/* RtsFlags.GranFlags.SimplifiedFetch=rtsTrue; */
+/* break; */
+
+ case 'N':
+ RtsFlags.GranFlags.PreferSparksOfLocalNodes=rtsTrue;
+ break;
+
+ case 'b':
+ RtsFlags.GranFlags.GranSimStats.Binary=rtsTrue;
+ break;
+
+ case 'P':
+ /* format is -bP<c> where <c> is one char describing kind of profile */
+ RtsFlags.GranFlags.GranSimStats.Full = rtsTrue;
+ switch(rts_argv[arg][3]) {
+ case '\0': break; // nothing special, just an ordinary profile
+ case '0': RtsFlags.GranFlags.GranSimStats.Suppressed = rtsTrue;
+ break;
+ case 'b': RtsFlags.GranFlags.GranSimStats.Binary = rtsTrue;
+ break;
+ case 's': RtsFlags.GranFlags.GranSimStats.Sparks = rtsTrue;
+ break;
+ case 'h': RtsFlags.GranFlags.GranSimStats.Heap = rtsTrue;
+ break;
+ case 'n': RtsFlags.GranFlags.GranSimStats.NewLogfile = rtsTrue;
+ break;
+ case 'g': RtsFlags.GranFlags.GranSimStats.Global = rtsTrue;
+ break;
+ default: barf("Unknown option -bP%c", rts_argv[arg][3]);
+ }
+ break;
+
+ case 's':
+ RtsFlags.GranFlags.GranSimStats.Sparks=rtsTrue;
+ break;
+
+ case 'h':
+ RtsFlags.GranFlags.GranSimStats.Heap=rtsTrue;
+ break;
+
+ case 'Y': /* syntax: -bY<n>[,<n>] n ... pos int */
+ if (rts_argv[arg][3] != '\0') {
+ char *arg0, *tmp;
+
+ arg0 = rts_argv[arg]+3;
+ if ((tmp = strstr(arg0,","))==NULL) {
+ RtsFlags.GranFlags.SparkPriority = decode(arg0);
+ debugBelch("SparkPriority: %u.\n",RtsFlags.GranFlags.SparkPriority);
+ } else {
+ *(tmp++) = '\0';
+ RtsFlags.GranFlags.SparkPriority = decode(arg0);
+ RtsFlags.GranFlags.SparkPriority2 = decode(tmp);
+ debugBelch("SparkPriority: %u.\n",
+ RtsFlags.GranFlags.SparkPriority);
+ debugBelch("SparkPriority2:%u.\n",
+ RtsFlags.GranFlags.SparkPriority2);
+ if (RtsFlags.GranFlags.SparkPriority2 <
+ RtsFlags.GranFlags.SparkPriority) {
+ debugBelch("WARNING: 2nd pri < main pri (%u<%u); 2nd pri has no effect\n",
+ RtsFlags.GranFlags.SparkPriority2,
+ RtsFlags.GranFlags.SparkPriority);
+ }
+ }
+ } else {
+ /* plain pri spark is now invoked with -bX
+ RtsFlags.GranFlags.DoPrioritySparking = 1;
+ debugBelch("PrioritySparking.\n");
+ */
+ }
+ break;
+
+ case 'Q':
+ if (rts_argv[arg][3] != '\0') {
+ RtsFlags.GranFlags.ThunksToPack = decode(rts_argv[arg]+3);
+ } else {
+ RtsFlags.GranFlags.ThunksToPack = 1;
+ }
+ debugBelch("Thunks To Pack in one packet: %u.\n",
+ RtsFlags.GranFlags.ThunksToPack);
+ break;
+
+ case 'e':
+ RtsFlags.GranFlags.RandomSteal = rtsFalse;
+ debugBelch("Deterministic mode (no random stealing)\n");
+ break;
+
+ /* The following class of options contains eXperimental */
+ /* features in connection with exploiting granularity */
+ /* information. I.e. if -bY is chosen these options */
+ /* tell the RTS what to do with the supplied info --HWL */
+
+ case 'W':
+ if (rts_argv[arg][3] != '\0') {
+ RtsFlags.GranFlags.packBufferSize_internal = decode(rts_argv[arg]+3);
+ } else {
+ RtsFlags.GranFlags.packBufferSize_internal = GRANSIM_DEFAULT_PACK_BUFFER_SIZE;
+ }
+ debugBelch("Size of GranSim internal pack buffer: %u.\n",
+ RtsFlags.GranFlags.packBufferSize_internal);
+ break;
+
+ case 'X':
+ switch(rts_argv[arg][3]) {
+
+ case '\0':
+ RtsFlags.GranFlags.DoPrioritySparking = 1;
+ debugBelch("Priority Sparking with Normal Priorities.\n");
+ RtsFlags.GranFlags.InversePriorities = rtsFalse;
+ RtsFlags.GranFlags.RandomPriorities = rtsFalse;
+ RtsFlags.GranFlags.IgnorePriorities = rtsFalse;
+ break;
+
+ case 'I':
+ RtsFlags.GranFlags.DoPrioritySparking = 1;
+ debugBelch("Priority Sparking with Inverse Priorities.\n");
+ RtsFlags.GranFlags.InversePriorities++;
+ break;
+
+ case 'R':
+ RtsFlags.GranFlags.DoPrioritySparking = 1;
+ debugBelch("Priority Sparking with Random Priorities.\n");
+ RtsFlags.GranFlags.RandomPriorities++;
+ break;
+
+ case 'N':
+ RtsFlags.GranFlags.DoPrioritySparking = 1;
+ debugBelch("Priority Sparking with No Priorities.\n");
+ RtsFlags.GranFlags.IgnorePriorities++;
+ break;
+
+ default:
+ bad_option( rts_argv[arg] );
+ break;
+ }
+ break;
+
+ case '-':
+ switch(rts_argv[arg][3]) {
+
+ case 'C':
+ RtsFlags.GranFlags.DoAlwaysCreateThreads=rtsFalse;
+ RtsFlags.GranFlags.DoThreadMigration=rtsFalse;
+ break;
+
+ case 'G':
+ RtsFlags.GranFlags.DoBulkFetching=rtsFalse;
+ break;
+
+ case 'M':
+ RtsFlags.GranFlags.DoThreadMigration=rtsFalse;
+ break;
+
+ case 'R':
+ RtsFlags.GranFlags.DoFairSchedule=rtsFalse;
+ break;
+
+ case 'T':
+ RtsFlags.GranFlags.DoStealThreadsFirst=rtsFalse;
+ RtsFlags.GranFlags.DoThreadMigration=rtsFalse;
+ break;
+
+ case 'Z':
+ RtsFlags.GranFlags.DoAsyncFetch=rtsFalse;
+ break;
+
+ case 'N':
+ RtsFlags.GranFlags.PreferSparksOfLocalNodes=rtsFalse;
+ break;
+
+ case 'P':
+ RtsFlags.GranFlags.GranSimStats.Suppressed=rtsTrue;
+ break;
+
+ case 's':
+ RtsFlags.GranFlags.GranSimStats.Sparks=rtsFalse;
+ break;
+
+ case 'h':
+ RtsFlags.GranFlags.GranSimStats.Heap=rtsFalse;
+ break;
+
+ case 'b':
+ RtsFlags.GranFlags.GranSimStats.Binary=rtsFalse;
+ break;
+
+ case 'X':
+ RtsFlags.GranFlags.DoPrioritySparking = rtsFalse;
+ break;
+
+ case 'Y':
+ RtsFlags.GranFlags.DoPrioritySparking = rtsFalse;
+ RtsFlags.GranFlags.SparkPriority = rtsFalse;
+ break;
+
+ case 'I':
+ RtsFlags.GranFlags.DoPriorityScheduling = rtsFalse;
+ break;
+
+ case 'e':
+ RtsFlags.GranFlags.RandomSteal = rtsFalse;
+ break;
+
+ default:
+ bad_option( rts_argv[arg] );
+ break;
+ }
+ break;
+
+# if defined(GRAN_CHECK) && defined(GRAN)
+ case 'D':
+ switch(rts_argv[arg][3]) {
+ case 'Q': /* Set pack buffer size (same as 'Q' in GUM) */
+ if (rts_argv[arg][4] != '\0') {
+ RtsFlags.GranFlags.packBufferSize = decode(rts_argv[arg]+4);
+ debugBelch("Pack buffer size: %d\n",
+ RtsFlags.GranFlags.packBufferSize);
+ } else {
+ debugBelch("setupRtsFlags: missing size of PackBuffer (for -Q)\n");
+ *error = rtsTrue;
+ }
+ break;
+
+ default:
+ if (isdigit(rts_argv[arg][3])) {/* Set all debugging options in one */
+ /* hack warning: interpret the flags as a binary number */
+ nat n = decode(rts_argv[arg]+3);
+ set_GranSim_debug_options(n);
+ } else {
+ nat i;
+ for (i=0; i<=MAX_GRAN_DEBUG_OPTION; i++)
+ if (rts_argv[arg][3] == gran_debug_opts_flags[i])
+ break;
+
+ if (i==MAX_GRAN_DEBUG_OPTION+1) {
+ debugBelch("Valid GranSim debug options are:\n");
+ help_GranSim_debug_options(MAX_GRAN_DEBUG_MASK);
+ bad_option( rts_argv[arg] );
+ } else { // flag found; now set it
+ set_GranSim_debug_options(GRAN_DEBUG_MASK(i)); // 2^i
+ }
+ }
+ break;
+
+#if 0
+ case 'e': /* event trace; also -bD1 */
+ debugBelch("DEBUG: event_trace; printing event trace.\n");
+ RtsFlags.GranFlags.Debug.event_trace = rtsTrue;
+ /* RtsFlags.GranFlags.event_trace=rtsTrue; */
+ break;
+
+ case 'E': /* event statistics; also -bD2 */
+ debugBelch("DEBUG: event_stats; printing event statistics.\n");
+ RtsFlags.GranFlags.Debug.event_stats = rtsTrue;
+ /* RtsFlags.GranFlags.Debug |= 0x20; print event statistics */
+ break;
+
+ case 'f': /* thunkStealing; also -bD4 */
+ debugBelch("DEBUG: thunkStealing; printing forwarding of FETCHNODES.\n");
+ RtsFlags.GranFlags.Debug.thunkStealing = rtsTrue;
+ /* RtsFlags.GranFlags.Debug |= 0x2; print fwd messages */
+ break;
+
+ case 'z': /* blockOnFetch; also -bD8 */
+ debugBelch("DEBUG: blockOnFetch; check for blocked on fetch.\n");
+ RtsFlags.GranFlags.Debug.blockOnFetch = rtsTrue;
+ /* RtsFlags.GranFlags.Debug |= 0x4; debug non-reschedule-on-fetch */
+ break;
+
+ case 't': /* blockOnFetch_sanity; also -bD16 */
+ debugBelch("DEBUG: blockOnFetch_sanity; check for TSO asleep on fetch.\n");
+ RtsFlags.GranFlags.Debug.blockOnFetch_sanity = rtsTrue;
+ /* RtsFlags.GranFlags.Debug |= 0x10; debug TSO asleep for fetch */
+ break;
+
+ case 'S': /* priSpark; also -bD32 */
+ debugBelch("DEBUG: priSpark; priority sparking.\n");
+ RtsFlags.GranFlags.Debug.priSpark = rtsTrue;
+ break;
+
+ case 's': /* priSched; also -bD64 */
+ debugBelch("DEBUG: priSched; priority scheduling.\n");
+ RtsFlags.GranFlags.Debug.priSched = rtsTrue;
+ break;
+
+ case 'F': /* findWork; also -bD128 */
+ debugBelch("DEBUG: findWork; searching spark-pools (local & remote), thread queues for work.\n");
+ RtsFlags.GranFlags.Debug.findWork = rtsTrue;
+ break;
+
+ case 'g': /* globalBlock; also -bD256 */
+ debugBelch("DEBUG: globalBlock; blocking on remote closures (FETCHMEs etc in GUM).\n");
+ RtsFlags.GranFlags.Debug.globalBlock = rtsTrue;
+ break;
+
+ case 'G': /* pack; also -bD512 */
+ debugBelch("DEBUG: pack; routines for (un-)packing graph structures.\n");
+ RtsFlags.GranFlags.Debug.pack = rtsTrue;
+ break;
+
+ case 'P': /* packBuffer; also -bD1024 */
+ debugBelch("DEBUG: packBuffer; routines handling pack buffer (GranSim internal!).\n");
+ RtsFlags.GranFlags.Debug.packBuffer = rtsTrue;
+ break;
+
+ case 'o': /* sortedQ; also -bD2048 */
+ debugBelch("DEBUG: sortedQ; check whether spark/thread queues are sorted.\n");
+ RtsFlags.GranFlags.Debug.sortedQ = rtsTrue;
+ break;
+
+ case 'r': /* randomSteal; also -bD4096 */
+ debugBelch("DEBUG: randomSteal; stealing sparks/threads from random PEs.\n");
+ RtsFlags.GranFlags.Debug.randomSteal = rtsTrue;
+ break;
+
+ case 'q': /* checkSparkQ; also -bD8192 */
+ debugBelch("DEBUG: checkSparkQ; check consistency of the spark queues.\n");
+ RtsFlags.GranFlags.Debug.checkSparkQ = rtsTrue;
+ break;
+
+ case ':': /* checkLight; also -bD16384 */
+ debugBelch("DEBUG: checkLight; check GranSim-Light setup.\n");
+ RtsFlags.GranFlags.Debug.checkLight = rtsTrue;
+ break;
+
+ case 'b': /* bq; also -bD32768 */
+ debugBelch("DEBUG: bq; check blocking queues\n");
+ RtsFlags.GranFlags.Debug.bq = rtsTrue;
+ break;
+
+ case 'd': /* all options turned on */
+ debugBelch("DEBUG: all options turned on.\n");
+ set_GranSim_debug_options(MAX_GRAN_DEBUG_MASK);
+ /* RtsFlags.GranFlags.Debug |= 0x40; */
+ break;
+
+/* case '\0': */
+/* RtsFlags.GranFlags.Debug = 1; */
+/* break; */
+#endif
+
+ }
+ break;
+# endif /* GRAN_CHECK */
+ default:
+ bad_option( rts_argv[arg] );
+ break;
+ }
+}
+
+/*
+ Interpret n as a binary number masking GranSim debug options and set the
+ correxponding option. See gran_debug_opts_strs for explanations of the flags.
+*/
+static void
+set_GranSim_debug_options(nat n) {
+ nat i;
+
+ for (i=0; i<=MAX_GRAN_DEBUG_OPTION; i++)
+ if ((n>>i)&1) {
+ errorBelch(gran_debug_opts_strs[i]);
+ switch (i) {
+ case 0: RtsFlags.GranFlags.Debug.event_trace = rtsTrue; break;
+ case 1: RtsFlags.GranFlags.Debug.event_stats = rtsTrue; break;
+ case 2: RtsFlags.GranFlags.Debug.bq = rtsTrue; break;
+ case 3: RtsFlags.GranFlags.Debug.pack = rtsTrue; break;
+ case 4: RtsFlags.GranFlags.Debug.checkSparkQ = rtsTrue; break;
+ case 5: RtsFlags.GranFlags.Debug.thunkStealing = rtsTrue; break;
+ case 6: RtsFlags.GranFlags.Debug.randomSteal = rtsTrue; break;
+ case 7: RtsFlags.GranFlags.Debug.findWork = rtsTrue; break;
+ case 8: RtsFlags.GranFlags.Debug.unused = rtsTrue; break;
+ case 9: RtsFlags.GranFlags.Debug.pri = rtsTrue; break;
+ case 10: RtsFlags.GranFlags.Debug.checkLight = rtsTrue; break;
+ case 11: RtsFlags.GranFlags.Debug.sortedQ = rtsTrue; break;
+ case 12: RtsFlags.GranFlags.Debug.blockOnFetch = rtsTrue; break;
+ case 13: RtsFlags.GranFlags.Debug.packBuffer = rtsTrue; break;
+ case 14: RtsFlags.GranFlags.Debug.blockOnFetch_sanity = rtsTrue; break;
+ default: barf("set_GranSim_debug_options: only %d debug options expected");
+ } /* switch */
+ } /* if */
+}
+
+/*
+ Print one line explanation for each of the GranSim debug options specified
+ in the bitmask n.
+*/
+static void
+help_GranSim_debug_options(nat n) {
+ nat i;
+
+ for (i=0; i<=MAX_GRAN_DEBUG_OPTION; i++)
+ if ((n>>i)&1)
+ debugBelch(gran_debug_opts_strs[i]);
+}
+
+# elif defined(PAR)
+
+static void
+process_par_option(int arg, int *rts_argc, char *rts_argv[], rtsBool *error)
+{
+
+ if (rts_argv[arg][1] != 'q') { /* All GUM options start with -q */
+ errorBelch("Warning: GUM option does not start with -q: %s", rts_argv[arg]);
+ return;
+ }
+
+ /* Communication and task creation cost parameters */
+ switch(rts_argv[arg][2]) {
+ case 'e': /* -qe<n> ... allow <n> local sparks */
+ if (rts_argv[arg][3] != '\0') { /* otherwise, stick w/ the default */
+ RtsFlags.ParFlags.maxLocalSparks
+ = strtol(rts_argv[arg]+3, (char **) NULL, 10);
+
+ if (RtsFlags.ParFlags.maxLocalSparks <= 0) {
+ errorBelch("setupRtsFlags: bad value for -e\n");
+ *error = rtsTrue;
+ }
+ }
+ IF_PAR_DEBUG(verbose,
+ errorBelch("-qe<n>: max %d local sparks",
+ RtsFlags.ParFlags.maxLocalSparks));
+ break;
+
+ case 't':
+ if (rts_argv[arg][3] != '\0') {
+ RtsFlags.ParFlags.maxThreads
+ = strtol(rts_argv[arg]+3, (char **) NULL, 10);
+ } else {
+ errorBelch("missing size for -qt\n");
+ *error = rtsTrue;
+ }
+ IF_PAR_DEBUG(verbose,
+ errorBelch("-qt<n>: max %d threads",
+ RtsFlags.ParFlags.maxThreads));
+ break;
+
+ case 'f':
+ if (rts_argv[arg][3] != '\0')
+ RtsFlags.ParFlags.maxFishes = decode(rts_argv[arg]+3);
+ else
+ RtsFlags.ParFlags.maxFishes = MAX_FISHES;
+ break;
+ IF_PAR_DEBUG(verbose,
+ errorBelch("-qf<n>: max %d fishes sent out at one time",
+ RtsFlags.ParFlags.maxFishes));
+ break;
+
+ case 'F':
+ if (rts_argv[arg][3] != '\0') {
+ RtsFlags.ParFlags.fishDelay
+ = strtol(rts_argv[arg]+3, (char **) NULL, 10);
+ } else {
+ errorBelch("missing fish delay time for -qF\n");
+ *error = rtsTrue;
+ }
+ IF_PAR_DEBUG(verbose,
+ errorBelch("-qF<n>: fish delay time %d us",
+ RtsFlags.ParFlags.fishDelay));
+ break;
+
+ case 'O':
+ RtsFlags.ParFlags.outputDisabled = rtsTrue;
+ IF_PAR_DEBUG(verbose,
+ errorBelch("-qO: output disabled"));
+ break;
+
+ case 'g': /* -qg<n> ... globalisation scheme */
+ if (rts_argv[arg][3] != '\0') {
+ RtsFlags.ParFlags.globalising = decode(rts_argv[arg]+3);
+ } else {
+ errorBelch("missing identifier for globalisation scheme (for -qg)\n");
+ *error = rtsTrue;
+ }
+ IF_PAR_DEBUG(verbose,
+ debugBelch("-qg<n>: globalisation scheme set to %d",
+ RtsFlags.ParFlags.globalising));
+ break;
+
+ case 'h': /* -qh<n> ... max number of thunks (except root) in packet */
+ if (rts_argv[arg][3] != '\0') {
+ RtsFlags.ParFlags.thunksToPack = decode(rts_argv[arg]+3);
+ } else {
+ errorBelch("missing number of thunks per packet (for -qh)\n");
+ *error = rtsTrue;
+ }
+ IF_PAR_DEBUG(verbose,
+ debugBelch("-qh<n>: thunks per packet set to %d",
+ RtsFlags.ParFlags.thunksToPack));
+ break;
+
+ case 'P': /* -qP for writing a log file */
+ //RtsFlags.ParFlags.ParStats.Full = rtsFalse;
+ /* same encoding as in GranSim after -bP */
+ switch(rts_argv[arg][3]) {
+ case '\0': RtsFlags.ParFlags.ParStats.Full = rtsTrue;
+ break; // nothing special, just an ordinary profile
+ case '0': RtsFlags.ParFlags.ParStats.Suppressed = rtsTrue;
+ RtsFlags.ParFlags.ParStats.Full = rtsFalse;
+ break;
+ case 'b': RtsFlags.ParFlags.ParStats.Binary = rtsTrue;
+ break;
+ case 's': RtsFlags.ParFlags.ParStats.Sparks = rtsTrue;
+ break;
+ //case 'h': RtsFlags.parFlags.ParStats.Heap = rtsTrue;
+ // break;
+ case 'n': RtsFlags.ParFlags.ParStats.NewLogfile = rtsTrue;
+ break;
+ case 'g':
+# if defined(PAR_TICKY)
+ RtsFlags.ParFlags.ParStats.Global = rtsTrue;
+# else
+ errorBelch("-qPg is only possible for a PAR_TICKY RTS, which this is not");
+ stg_exit(EXIT_FAILURE);
+# endif
+ break;
+ default: barf("Unknown option -qP%c", rts_argv[arg][2]);
+ }
+ IF_PAR_DEBUG(verbose,
+ debugBelch("(-qP) writing to log-file (RtsFlags.ParFlags.ParStats.Full=%s)",
+ (RtsFlags.ParFlags.ParStats.Full ? "rtsTrue" : "rtsFalse")));
+ break;
+
+ case 'Q': /* -qQ<n> ... set pack buffer size to <n> */
+ if (rts_argv[arg][3] != '\0') {
+ RtsFlags.ParFlags.packBufferSize = decode(rts_argv[arg]+3);
+ } else {
+ errorBelch("missing size of PackBuffer (for -qQ)\n");
+ *error = rtsTrue;
+ }
+ IF_PAR_DEBUG(verbose,
+ debugBelch("-qQ<n>: pack buffer size set to %d",
+ RtsFlags.ParFlags.packBufferSize));
+ break;
+
+ case 'R':
+ RtsFlags.ParFlags.doFairScheduling = rtsTrue;
+ IF_PAR_DEBUG(verbose,
+ debugBelch("-qR: fair-ish scheduling"));
+ break;
+
+# if defined(DEBUG)
+ case 'w':
+ if (rts_argv[arg][3] != '\0') {
+ RtsFlags.ParFlags.wait
+ = strtol(rts_argv[arg]+3, (char **) NULL, 10);
+ } else {
+ RtsFlags.ParFlags.wait = 1000;
+ }
+ IF_PAR_DEBUG(verbose,
+ debugBelch("-qw<n>: length of wait loop after synchr before reduction: %d",
+ RtsFlags.ParFlags.wait));
+ break;
+
+ case 'D': /* -qD ... all the debugging options */
+ if (isdigit(rts_argv[arg][3])) {/* Set all debugging options in one */
+ /* hack warning: interpret the flags as a binary number */
+ nat n = decode(rts_argv[arg]+3);
+ set_par_debug_options(n);
+ } else {
+ nat i;
+ for (i=0; i<=MAX_PAR_DEBUG_OPTION; i++)
+ if (rts_argv[arg][3] == par_debug_opts_flags[i])
+ break;
+
+ if (i==MAX_PAR_DEBUG_OPTION+1) {
+ errorBelch("Valid GUM debug options are:\n");
+ help_par_debug_options(MAX_PAR_DEBUG_MASK);
+ bad_option( rts_argv[arg] );
+ } else { // flag found; now set it
+ set_par_debug_options(PAR_DEBUG_MASK(i)); // 2^i
+ }
+ }
+ break;
+# endif
+ default:
+ errorBelch("Unknown option -q%c (%d opts in total)",
+ rts_argv[arg][2], *rts_argc);
+ break;
+ } /* switch */
+}
+
+/*
+ Interpret n as a binary number masking Par debug options and set the
+ correxponding option. See par_debug_opts_strs for explanations of the flags.
+*/
+static void
+set_par_debug_options(nat n) {
+ nat i;
+
+ for (i=0; i<=MAX_PAR_DEBUG_OPTION; i++)
+ if ((n>>i)&1) {
+ debugBelch(par_debug_opts_strs[i]);
+ switch (i) {
+ case 0: RtsFlags.ParFlags.Debug.verbose = rtsTrue; break;
+ case 1: RtsFlags.ParFlags.Debug.bq = rtsTrue; break;
+ case 2: RtsFlags.ParFlags.Debug.schedule = rtsTrue; break;
+ case 3: RtsFlags.ParFlags.Debug.free = rtsTrue; break;
+ case 4: RtsFlags.ParFlags.Debug.resume = rtsTrue; break;
+ case 5: RtsFlags.ParFlags.Debug.weight = rtsTrue; break;
+ case 6: RtsFlags.ParFlags.Debug.fetch = rtsTrue; break;
+ //case 7: RtsFlags.ParFlags.Debug.ack = rtsTrue; break;
+ case 7: RtsFlags.ParFlags.Debug.fish = rtsTrue; break;
+ case 8: RtsFlags.ParFlags.Debug.tables = rtsTrue; break;
+ case 9: RtsFlags.ParFlags.Debug.packet = rtsTrue; break;
+ case 10: RtsFlags.ParFlags.Debug.pack = rtsTrue; break;
+ case 11: RtsFlags.ParFlags.Debug.paranoia = rtsTrue; break;
+ default: barf("set_par_debug_options: only %d debug options expected",
+ MAX_PAR_DEBUG_OPTION);
+ } /* switch */
+ } /* if */
+}
+
+/*
+ Print one line explanation for each of the GranSim debug options specified
+ in the bitmask n.
+*/
+static void
+help_par_debug_options(nat n) {
+ nat i;
+
+ for (i=0; i<=MAX_PAR_DEBUG_OPTION; i++)
+ if ((n>>i)&1)
+ debugBelch(par_debug_opts_strs[i]);
+}
+
+#endif /* PAR */
+
+//@node Aux fcts, , GranSim specific options
+//@subsection Aux fcts
+
+static void
+stats_fprintf(FILE *f, char *s, ...)
+{
+ va_list ap;
+ va_start(ap,s);
+ if (f == NULL) {
+ vdebugBelch(s, ap);
+ } else {
+ vfprintf(f, s, ap);
+ }
+ va_end(ap);
+}
+
+static int /* return -1 on error */
+open_stats_file (
+ I_ arg,
+ int argc, char *argv[],
+ int rts_argc, char *rts_argv[],
+ const char *FILENAME_FMT,
+ FILE **file_ret)
+{
+ FILE *f = NULL;
+
+ if (strequal(rts_argv[arg]+2, "stderr")) { /* use debugBelch */
+ f = NULL; /* NULL means use debugBelch */
+ } else {
+ if (rts_argv[arg][2] != '\0') { /* stats file specified */
+ f = fopen(rts_argv[arg]+2,"w");
+ } else {
+ char stats_filename[STATS_FILENAME_MAXLEN]; /* default <program>.<ext> */
+ sprintf(stats_filename, FILENAME_FMT, argv[0]);
+ f = fopen(stats_filename,"w");
+ }
+ if (f == NULL) {
+ errorBelch("Can't open stats file %s\n", rts_argv[arg]+2);
+ return -1;
+ }
+ }
+ *file_ret = f;
+
+ {
+ /* Write argv and rtsv into start of stats file */
+ int count;
+ for(count = 0; count < argc; count++) {
+ stats_fprintf(f, "%s ", argv[count]);
+ }
+ stats_fprintf(f, "+RTS ");
+ for(count = 0; count < rts_argc; count++)
+ stats_fprintf(f, "%s ", rts_argv[count]);
+ stats_fprintf(f, "\n");
+ }
+ return 0;
+}
+
+
+
+static I_
+decode(const char *s)
+{
+ I_ c;
+ StgDouble m;
+
+ if (!*s)
+ return 0;
+
+ m = atof(s);
+ c = s[strlen(s)-1];
+
+ if (c == 'g' || c == 'G')
+ m *= 1000*1000*1000; /* UNchecked! */
+ else if (c == 'm' || c == 'M')
+ m *= 1000*1000; /* We do not use powers of 2 (1024) */
+ else if (c == 'k' || c == 'K') /* to avoid possible bad effects on */
+ m *= 1000; /* a direct-mapped cache. */
+ else if (c == 'w' || c == 'W')
+ m *= sizeof(W_);
+
+ return (I_)m;
+}
+
+static void
+bad_option(const char *s)
+{
+ errorBelch("bad RTS option: %s", s);
+ stg_exit(EXIT_FAILURE);
+}
+
+/* -----------------------------------------------------------------------------
+ Getting/Setting the program's arguments.
+
+ These are used by System.Environment, and parts of the RTS.
+ -------------------------------------------------------------------------- */
+
+void
+setProgName(char *argv[])
+{
+ /* Remove directory from argv[0] -- default files in current directory */
+#if !defined(mingw32_HOST_OS)
+ char *last_slash;
+ if ( (last_slash = (char *) strrchr(argv[0], '/')) != NULL ) {
+ prog_name = last_slash+1;
+ } else {
+ prog_name = argv[0];
+ }
+#else
+ char* last_slash = argv[0] + (strlen(argv[0]) - 1);
+ while ( last_slash > argv[0] ) {
+ if ( *last_slash == '/' || *last_slash == '\\' ) {
+ prog_name = last_slash+1;
+ return;
+ }
+ last_slash--;
+ }
+ prog_name = argv[0];
+#endif
+}
+
+void
+getProgArgv(int *argc, char **argv[])
+{
+ if (argc) { *argc = prog_argc; }
+ if (argv) { *argv = prog_argv; }
+}
+
+void
+setProgArgv(int argc, char *argv[])
+{
+ /* Usually this is done by startupHaskell, so we don't need to call this.
+ However, sometimes Hugs wants to change the arguments which Haskell
+ getArgs >>= ... will be fed. So you can do that by calling here
+ _after_ calling startupHaskell.
+ */
+ prog_argc = argc;
+ prog_argv = argv;
+ setProgName(prog_argv);
+}
diff --git a/rts/RtsMessages.c b/rts/RtsMessages.c
new file mode 100644
index 0000000000..1242d886eb
--- /dev/null
+++ b/rts/RtsMessages.c
@@ -0,0 +1,201 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * General utility functions used in the RTS.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+
+#include <stdio.h>
+
+#ifdef HAVE_WINDOWS_H
+#include <windows.h>
+#endif
+
+/* -----------------------------------------------------------------------------
+ General message generation functions
+
+ All messages should go through here. We can't guarantee that
+ stdout/stderr will be available - e.g. in a Windows program there
+ is no console for generating messages, so they have to either go to
+ to the debug console, or pop up message boxes.
+ -------------------------------------------------------------------------- */
+
+// Default to the stdio implementation of these hooks.
+RtsMsgFunction *fatalInternalErrorFn = rtsFatalInternalErrorFn;
+RtsMsgFunction *debugMsgFn = rtsDebugMsgFn;
+RtsMsgFunction *errorMsgFn = rtsErrorMsgFn;
+
+void
+barf(char *s, ...)
+{
+ va_list ap;
+ va_start(ap,s);
+ (*fatalInternalErrorFn)(s,ap);
+ stg_exit(EXIT_INTERNAL_ERROR); // just in case fatalInternalErrorFn() returns
+ va_end(ap);
+}
+
+void
+vbarf(char *s, va_list ap)
+{
+ (*fatalInternalErrorFn)(s,ap);
+ stg_exit(EXIT_INTERNAL_ERROR); // just in case fatalInternalErrorFn() returns
+}
+
+void
+_assertFail(char *filename, unsigned int linenum)
+{
+ barf("ASSERTION FAILED: file %s, line %u\n", filename, linenum);
+}
+
+void
+errorBelch(char *s, ...)
+{
+ va_list ap;
+ va_start(ap,s);
+ (*errorMsgFn)(s,ap);
+ va_end(ap);
+}
+
+void
+verrorBelch(char *s, va_list ap)
+{
+ (*errorMsgFn)(s,ap);
+}
+
+void
+debugBelch(char *s, ...)
+{
+ va_list ap;
+ va_start(ap,s);
+ (*debugMsgFn)(s,ap);
+ va_end(ap);
+}
+
+void
+vdebugBelch(char *s, va_list ap)
+{
+ (*debugMsgFn)(s,ap);
+}
+
+/* -----------------------------------------------------------------------------
+ stdio versions of the message functions
+ -------------------------------------------------------------------------- */
+
+#define BUFSIZE 512
+
+#if defined(cygwin32_TARGET_OS) || defined (mingw32_TARGET_OS)
+static int
+isGUIApp()
+{
+ PIMAGE_DOS_HEADER pDOSHeader;
+ PIMAGE_NT_HEADERS pPEHeader;
+
+ pDOSHeader = (PIMAGE_DOS_HEADER) GetModuleHandleA(NULL);
+ if (pDOSHeader->e_magic != IMAGE_DOS_SIGNATURE)
+ return 0;
+
+ pPEHeader = (PIMAGE_NT_HEADERS) ((char *)pDOSHeader + pDOSHeader->e_lfanew);
+ if (pPEHeader->Signature != IMAGE_NT_SIGNATURE)
+ return 0;
+
+ return (pPEHeader->OptionalHeader.Subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI);
+}
+#endif
+
+#define xstr(s) str(s)
+#define str(s) #s
+
+void
+rtsFatalInternalErrorFn(char *s, va_list ap)
+{
+#if defined(cygwin32_TARGET_OS) || defined (mingw32_TARGET_OS)
+ if (isGUIApp())
+ {
+ char title[BUFSIZE], message[BUFSIZE];
+
+ snprintf(title, BUFSIZE, "%s: internal error", prog_name);
+ vsnprintf(message, BUFSIZE, s, ap);
+
+ MessageBox(NULL /* hWnd */,
+ message,
+ title,
+ MB_OK | MB_ICONERROR | MB_TASKMODAL
+ );
+ }
+ else
+#endif
+ {
+ /* don't fflush(stdout); WORKAROUND bug in Linux glibc */
+ if (prog_argv != NULL && prog_name != NULL) {
+ fprintf(stderr, "%s: internal error: ", prog_name);
+ } else {
+ fprintf(stderr, "internal error: ");
+ }
+ vfprintf(stderr, s, ap);
+ fprintf(stderr, "\n");
+ fprintf(stderr, " (GHC version %s for %s)\n", ProjectVersion, xstr(HostPlatform_TYPE));
+ fprintf(stderr, " Please report this as a GHC bug: http://www.haskell.org/ghc/reportabug\n");
+ fflush(stderr);
+ }
+
+ abort();
+ // stg_exit(EXIT_INTERNAL_ERROR);
+}
+
+void
+rtsErrorMsgFn(char *s, va_list ap)
+{
+#if defined(cygwin32_TARGET_OS) || defined (mingw32_TARGET_OS)
+ if (isGUIApp())
+ {
+ char buf[BUFSIZE];
+ int r;
+
+ r = vsnprintf(buf, BUFSIZE, s, ap);
+ if (r > 0 && r < BUFSIZE) {
+ MessageBox(NULL /* hWnd */,
+ buf,
+ prog_name,
+ MB_OK | MB_ICONERROR | MB_TASKMODAL
+ );
+ }
+ }
+ else
+#endif
+ {
+ /* don't fflush(stdout); WORKAROUND bug in Linux glibc */
+ if (prog_argv != NULL && prog_name != NULL) {
+ fprintf(stderr, "%s: ", prog_name);
+ }
+ vfprintf(stderr, s, ap);
+ fprintf(stderr, "\n");
+ }
+}
+
+void
+rtsDebugMsgFn(char *s, va_list ap)
+{
+#if defined(cygwin32_TARGET_OS) || defined (mingw32_TARGET_OS)
+ if (isGUIApp())
+ {
+ char buf[BUFSIZE];
+ int r;
+
+ r = vsnprintf(buf, BUFSIZE, s, ap);
+ if (r > 0 && r < BUFSIZE) {
+ OutputDebugString(buf);
+ }
+ }
+ else
+#endif
+ {
+ /* don't fflush(stdout); WORKAROUND bug in Linux glibc */
+ vfprintf(stderr, s, ap);
+ fflush(stderr);
+ }
+}
diff --git a/rts/RtsSignals.h b/rts/RtsSignals.h
new file mode 100644
index 0000000000..eafeeaaf55
--- /dev/null
+++ b/rts/RtsSignals.h
@@ -0,0 +1,78 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Signal processing / handling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef RTS_SIGNALS_H
+#define RTS_SIGNALS_H
+
+#if !defined(PAR) && !defined(mingw32_HOST_OS)
+
+#include "posix/Signals.h"
+
+#elif defined(mingw32_HOST_OS)
+
+#include "win32/ConsoleHandler.h"
+
+#else /* PAR */
+
+#define signals_pending() (rtsFalse)
+
+#endif /* PAR */
+
+
+#if RTS_USER_SIGNALS
+
+/*
+ * Function: initUserSignals()
+ *
+ * Initialize the console handling substrate.
+ */
+extern void initUserSignals(void);
+
+/*
+ * Function: initDefaultHandlers()
+ *
+ * Install any default signal/console handlers. Currently we install a
+ * Ctrl+C handler that shuts down the RTS in an orderly manner.
+ */
+extern void initDefaultHandlers(void);
+
+/*
+ * Function: blockUserSignals()
+ *
+ * Temporarily block the delivery of further console events. Needed to
+ * avoid race conditions when GCing the queue of outstanding handlers or
+ * when emptying the queue by running the handlers.
+ *
+ */
+extern void blockUserSignals(void);
+
+/*
+ * Function: unblockUserSignals()
+ *
+ * The inverse of blockUserSignals(); re-enable the deliver of console events.
+ */
+extern void unblockUserSignals(void);
+
+/*
+ * Function: awaitUserSignals()
+ *
+ * Wait for the next console event. Currently a NOP (returns immediately.)
+ */
+extern void awaitUserSignals(void);
+
+/*
+ * Function: markSignalHandlers()
+ *
+ * Evacuate the handler queue. _Assumes_ that console event delivery
+ * has already been blocked.
+ */
+extern void markSignalHandlers (evac_fn evac);
+
+#endif /* RTS_USER_SIGNALS */
+
+#endif /* RTS_SIGNALS_H */
diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c
new file mode 100644
index 0000000000..147de7b857
--- /dev/null
+++ b/rts/RtsStartup.c
@@ -0,0 +1,457 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2002
+ *
+ * Main function for a standalone Haskell program.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "RtsAPI.h"
+#include "RtsUtils.h"
+#include "RtsFlags.h"
+#include "OSThreads.h"
+#include "Storage.h" /* initStorage, exitStorage */
+#include "Schedule.h" /* initScheduler */
+#include "Stats.h" /* initStats */
+#include "STM.h" /* initSTM */
+#include "Signals.h"
+#include "RtsSignals.h"
+#include "Timer.h" /* startTimer, stopTimer */
+#include "Weak.h"
+#include "Ticky.h"
+#include "StgRun.h"
+#include "Prelude.h" /* fixupRTStoPreludeRefs */
+#include "HsFFI.h"
+#include "Linker.h"
+#include "ThreadLabels.h"
+#include "BlockAlloc.h"
+
+#if defined(RTS_GTK_FRONTPANEL)
+#include "FrontPanel.h"
+#endif
+
+#if defined(PROFILING) || defined(DEBUG)
+# include "Profiling.h"
+# include "ProfHeap.h"
+# include "RetainerProfile.h"
+#endif
+
+#if defined(GRAN)
+# include "GranSimRts.h"
+#endif
+
+#if defined(GRAN) || defined(PAR)
+# include "ParallelRts.h"
+#endif
+
+#if defined(PAR)
+# include "Parallel.h"
+# include "LLC.h"
+#endif
+
+#if defined(mingw32_HOST_OS)
+#include "win32/AsyncIO.h"
+#endif
+
+#include <stdlib.h>
+
+#ifdef HAVE_TERMIOS_H
+#include <termios.h>
+#endif
+#ifdef HAVE_SIGNAL_H
+#include <signal.h>
+#endif
+
+// Count of how many outstanding hs_init()s there have been.
+static int hs_init_count = 0;
+
+// Here we save the terminal settings on the standard file
+// descriptors, if we need to change them (eg. to support NoBuffering
+// input).
+static void *saved_termios[3] = {NULL,NULL,NULL};
+
+void*
+__hscore_get_saved_termios(int fd)
+{
+ return (0 <= fd && fd < (int)(sizeof(saved_termios) / sizeof(*saved_termios))) ?
+ saved_termios[fd] : NULL;
+}
+
+void
+__hscore_set_saved_termios(int fd, void* ts)
+{
+ if (0 <= fd && fd < (int)(sizeof(saved_termios) / sizeof(*saved_termios))) {
+ saved_termios[fd] = ts;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Initialise floating point unit on x86 (currently disabled. why?)
+ (see comment in ghc/compiler/nativeGen/MachInstrs.lhs).
+ -------------------------------------------------------------------------- */
+
+#define X86_INIT_FPU 0
+
+#if X86_INIT_FPU
+static void
+x86_init_fpu ( void )
+{
+ __volatile unsigned short int fpu_cw;
+
+ // Grab the control word
+ __asm __volatile ("fnstcw %0" : "=m" (fpu_cw));
+
+#if 0
+ printf("fpu_cw: %x\n", fpu_cw);
+#endif
+
+ // Set bits 8-9 to 10 (64-bit precision).
+ fpu_cw = (fpu_cw & 0xfcff) | 0x0200;
+
+ // Store the new control word back
+ __asm __volatile ("fldcw %0" : : "m" (fpu_cw));
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ Starting up the RTS
+ -------------------------------------------------------------------------- */
+
+void
+hs_init(int *argc, char **argv[])
+{
+ hs_init_count++;
+ if (hs_init_count > 1) {
+ // second and subsequent inits are ignored
+ return;
+ }
+
+ /* The very first thing we do is grab the start time...just in case we're
+ * collecting timing statistics.
+ */
+ stat_startInit();
+
+#ifdef PAR
+ /*
+ * The parallel system needs to be initialised and synchronised before
+ * the program is run.
+ */
+ startupParallelSystem(argv);
+
+ if (*argv[0] == '-') { /* Strip off mainPE flag argument */
+ argv++;
+ argc--;
+ }
+
+ argv[1] = argv[0]; /* ignore the nPEs argument */
+ argv++; argc--;
+#endif
+
+ /* Set the RTS flags to default values. */
+ initRtsFlagsDefaults();
+
+ /* Call the user hook to reset defaults, if present */
+ defaultsHook();
+
+ /* Parse the flags, separating the RTS flags from the programs args */
+ if (argc != NULL && argv != NULL) {
+ setupRtsFlags(argc, *argv, &rts_argc, rts_argv);
+ setProgArgv(*argc,*argv);
+ }
+
+#if defined(PAR)
+ /* NB: this really must be done after processing the RTS flags */
+ IF_PAR_DEBUG(verbose,
+ debugBelch("==== Synchronising system (%d PEs)\n", nPEs));
+ synchroniseSystem(); // calls initParallelSystem etc
+#endif /* PAR */
+
+ /* Perform initialisation of adjustor thunk layer. */
+ initAdjustor();
+
+ /* initialise scheduler data structures (needs to be done before
+ * initStorage()).
+ */
+ initScheduler();
+
+#if defined(GRAN)
+ /* And start GranSim profiling if required: */
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv);
+#elif defined(PAR)
+ /* And start GUM profiling if required: */
+ if (RtsFlags.ParFlags.ParStats.Full)
+ init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv);
+#endif /* PAR || GRAN */
+
+ /* initialize the storage manager */
+ initStorage();
+
+ /* initialise the stable pointer table */
+ initStablePtrTable();
+
+#if defined(DEBUG)
+ /* initialise thread label table (tso->char*) */
+ initThreadLabelTable();
+#endif
+
+#if defined(PROFILING) || defined(DEBUG)
+ initProfiling1();
+#endif
+
+ /* start the virtual timer 'subsystem'. */
+ startTimer(TICK_MILLISECS);
+
+ /* Initialise the stats department */
+ initStats();
+
+#if defined(RTS_USER_SIGNALS)
+ /* Initialise the user signal handler set */
+ initUserSignals();
+ /* Set up handler to run on SIGINT, etc. */
+ initDefaultHandlers();
+#endif
+
+#if defined(mingw32_HOST_OS)
+ startupAsyncIO();
+#endif
+
+#ifdef RTS_GTK_FRONTPANEL
+ if (RtsFlags.GcFlags.frontpanel) {
+ initFrontPanel();
+ }
+#endif
+
+#if X86_INIT_FPU
+ x86_init_fpu();
+#endif
+
+ /* Record initialization times */
+ stat_endInit();
+}
+
+// Compatibility interface
+void
+startupHaskell(int argc, char *argv[], void (*init_root)(void))
+{
+ hs_init(&argc, &argv);
+ if(init_root)
+ hs_add_root(init_root);
+}
+
+
+/* -----------------------------------------------------------------------------
+ Per-module initialisation
+
+ This process traverses all the compiled modules in the program
+ starting with "Main", and performing per-module initialisation for
+ each one.
+
+ So far, two things happen at initialisation time:
+
+ - we register stable names for each foreign-exported function
+ in that module. This prevents foreign-exported entities, and
+ things they depend on, from being garbage collected.
+
+ - we supply a unique integer to each statically declared cost
+ centre and cost centre stack in the program.
+
+ The code generator inserts a small function "__stginit_<module>" in each
+ module and calls the registration functions in each of the modules it
+ imports.
+
+ The init* functions are compiled in the same way as STG code,
+ i.e. without normal C call/return conventions. Hence we must use
+ StgRun to call this stuff.
+ -------------------------------------------------------------------------- */
+
+/* The init functions use an explicit stack...
+ */
+#define INIT_STACK_BLOCKS 4
+static F_ *init_stack = NULL;
+
+void
+hs_add_root(void (*init_root)(void))
+{
+ bdescr *bd;
+ nat init_sp;
+ Capability *cap = &MainCapability;
+
+ if (hs_init_count <= 0) {
+ barf("hs_add_root() must be called after hs_init()");
+ }
+
+ /* The initialisation stack grows downward, with sp pointing
+ to the last occupied word */
+ init_sp = INIT_STACK_BLOCKS*BLOCK_SIZE_W;
+ bd = allocGroup_lock(INIT_STACK_BLOCKS);
+ init_stack = (F_ *)bd->start;
+ init_stack[--init_sp] = (F_)stg_init_finish;
+ if (init_root != NULL) {
+ init_stack[--init_sp] = (F_)init_root;
+ }
+
+ cap->r.rSp = (P_)(init_stack + init_sp);
+ StgRun((StgFunPtr)stg_init, &cap->r);
+
+ freeGroup_lock(bd);
+
+#if defined(PROFILING) || defined(DEBUG)
+ // This must be done after module initialisation.
+ // ToDo: make this work in the presence of multiple hs_add_root()s.
+ initProfiling2();
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ Shutting down the RTS
+ -------------------------------------------------------------------------- */
+
+void
+hs_exit(void)
+{
+ if (hs_init_count <= 0) {
+ errorBelch("warning: too many hs_exit()s");
+ return;
+ }
+ hs_init_count--;
+ if (hs_init_count > 0) {
+ // ignore until it's the last one
+ return;
+ }
+
+ /* start timing the shutdown */
+ stat_startExit();
+
+ /* stop all running tasks */
+ exitScheduler();
+
+#if defined(GRAN)
+ /* end_gr_simulation prints global stats if requested -- HWL */
+ if (!RtsFlags.GranFlags.GranSimStats.Suppressed)
+ end_gr_simulation();
+#endif
+
+ /* stop the ticker */
+ stopTimer();
+
+ /* reset the standard file descriptors to blocking mode */
+ resetNonBlockingFd(0);
+ resetNonBlockingFd(1);
+ resetNonBlockingFd(2);
+
+#if HAVE_TERMIOS_H
+ // Reset the terminal settings on the standard file descriptors,
+ // if we changed them. See System.Posix.Internals.tcSetAttr for
+ // more details, including the reason we termporarily disable
+ // SIGTTOU here.
+ {
+ int fd;
+ sigset_t sigset, old_sigset;
+ sigemptyset(&sigset);
+ sigaddset(&sigset, SIGTTOU);
+ sigprocmask(SIG_BLOCK, &sigset, &old_sigset);
+ for (fd = 0; fd <= 2; fd++) {
+ struct termios* ts = (struct termios*)__hscore_get_saved_termios(fd);
+ if (ts != NULL) {
+ tcsetattr(fd,TCSANOW,ts);
+ }
+ }
+ sigprocmask(SIG_SETMASK, &old_sigset, NULL);
+ }
+#endif
+
+#if defined(PAR)
+ /* controlled exit; good thread! */
+ shutdownParallelSystem(0);
+
+ /* global statistics in parallel system */
+ PAR_TICKY_PAR_END();
+#endif
+
+ /* stop timing the shutdown, we're about to print stats */
+ stat_endExit();
+
+ // clean up things from the storage manager's point of view.
+ // also outputs the stats (+RTS -s) info.
+ exitStorage();
+
+#ifdef RTS_GTK_FRONTPANEL
+ if (RtsFlags.GcFlags.frontpanel) {
+ stopFrontPanel();
+ }
+#endif
+
+#if defined(PROFILING)
+ reportCCSProfiling();
+#endif
+
+#if defined(PROFILING) || defined(DEBUG)
+ endProfiling();
+#endif
+
+#ifdef PROFILING
+ // Originally, this was in report_ccs_profiling(). Now, retainer
+ // profiling might tack some extra stuff on to the end of this file
+ // during endProfiling().
+ fclose(prof_file);
+#endif
+
+#if defined(TICKY_TICKY)
+ if (RtsFlags.TickyFlags.showTickyStats) PrintTickyInfo();
+#endif
+
+#if defined(mingw32_HOST_OS)
+ shutdownAsyncIO();
+#endif
+
+ // Finally, free all our storage.
+ freeStorage();
+}
+
+// Compatibility interfaces
+void
+shutdownHaskell(void)
+{
+ hs_exit();
+}
+
+void
+shutdownHaskellAndExit(int n)
+{
+ if (hs_init_count == 1) {
+ OnExitHook();
+ hs_exit();
+#if defined(PAR)
+ /* really exit (stg_exit() would call shutdownParallelSystem() again) */
+ exit(n);
+#else
+ stg_exit(n);
+#endif
+ }
+}
+
+/*
+ * called from STG-land to exit the program
+ */
+
+#ifdef PAR
+static int exit_started=rtsFalse;
+#endif
+
+void
+stg_exit(int n)
+{
+#ifdef PAR
+ /* HACK: avoid a loop when exiting due to a stupid error */
+ if (exit_started)
+ return;
+ exit_started=rtsTrue;
+
+ IF_PAR_DEBUG(verbose, debugBelch("==-- stg_exit %d on [%x]...", n, mytid));
+ shutdownParallelSystem(n);
+#endif
+ exit(n);
+}
diff --git a/rts/RtsUtils.c b/rts/RtsUtils.c
new file mode 100644
index 0000000000..3e7e225dda
--- /dev/null
+++ b/rts/RtsUtils.c
@@ -0,0 +1,367 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * General utility functions used in the RTS.
+ *
+ * ---------------------------------------------------------------------------*/
+
+/* gettimeofday isn't POSIX */
+/* #include "PosixSource.h" */
+
+#include "Rts.h"
+#include "RtsAPI.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "Ticky.h"
+
+#ifdef HAVE_TIME_H
+#include <time.h>
+#endif
+
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+
+#ifdef HAVE_GETTIMEOFDAY
+#include <sys/time.h>
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+#ifdef HAVE_SIGNAL_H
+#include <signal.h>
+#endif
+
+#if defined(THREADED_RTS) && defined(openbsd_HOST_OS) && defined(HAVE_PTHREAD_H)
+#include <pthread.h>
+#endif
+
+#if defined(openbsd_HOST_OS) || defined(linux_HOST_OS) || defined(darwin_HOST_OS)
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+
+/* no C99 header stdint.h on OpenBSD? */
+#if defined(openbsd_HOST_OS)
+typedef unsigned long my_uintptr_t;
+#else
+#include <stdint.h>
+typedef uintptr_t my_uintptr_t;
+#endif
+#endif
+
+#if defined(_WIN32)
+#include <windows.h>
+#endif
+
+/* -----------------------------------------------------------------------------
+ Result-checking malloc wrappers.
+ -------------------------------------------------------------------------- */
+
+void *
+stgMallocBytes (int n, char *msg)
+{
+ char *space;
+
+ if ((space = (char *) malloc((size_t) n)) == NULL) {
+ /* don't fflush(stdout); WORKAROUND bug in Linux glibc */
+ MallocFailHook((W_) n, msg); /*msg*/
+ stg_exit(EXIT_INTERNAL_ERROR);
+ }
+ return space;
+}
+
+void *
+stgReallocBytes (void *p, int n, char *msg)
+{
+ char *space;
+
+ if ((space = (char *) realloc(p, (size_t) n)) == NULL) {
+ /* don't fflush(stdout); WORKAROUND bug in Linux glibc */
+ MallocFailHook((W_) n, msg); /*msg*/
+ stg_exit(EXIT_INTERNAL_ERROR);
+ }
+ return space;
+}
+
+void *
+stgCallocBytes (int n, int m, char *msg)
+{
+ char *space;
+
+ if ((space = (char *) calloc((size_t) n, (size_t) m)) == NULL) {
+ /* don't fflush(stdout); WORKAROUND bug in Linux glibc */
+ MallocFailHook((W_) n*m, msg); /*msg*/
+ stg_exit(EXIT_INTERNAL_ERROR);
+ }
+ return space;
+}
+
+/* To simplify changing the underlying allocator used
+ * by stgMallocBytes(), provide stgFree() as well.
+ */
+void
+stgFree(void* p)
+{
+ free(p);
+}
+
+/* -----------------------------------------------------------------------------
+ Stack overflow
+
+ Not sure if this belongs here.
+ -------------------------------------------------------------------------- */
+
+void
+stackOverflow(void)
+{
+ StackOverflowHook(RtsFlags.GcFlags.maxStkSize * sizeof(W_));
+
+#if defined(TICKY_TICKY)
+ if (RtsFlags.TickyFlags.showTickyStats) PrintTickyInfo();
+#endif
+}
+
+void
+heapOverflow(void)
+{
+ /* don't fflush(stdout); WORKAROUND bug in Linux glibc */
+ OutOfHeapHook(0/*unknown request size*/,
+ RtsFlags.GcFlags.maxHeapSize * BLOCK_SIZE);
+
+#if defined(TICKY_TICKY)
+ if (RtsFlags.TickyFlags.showTickyStats) PrintTickyInfo();
+#endif
+
+ stg_exit(EXIT_HEAPOVERFLOW);
+}
+
+/* -----------------------------------------------------------------------------
+ Out-of-line strlen.
+
+ Used in addr2Integer because the C compiler on x86 chokes on
+ strlen, trying to inline it with not enough registers available.
+ -------------------------------------------------------------------------- */
+
+nat stg_strlen(char *s)
+{
+ char *p = s;
+
+ while (*p) p++;
+ return p-s;
+}
+
+
+/* -----------------------------------------------------------------------------
+ genSym stuff, used by GHC itself for its splitting unique supply.
+
+ ToDo: put this somewhere sensible.
+ ------------------------------------------------------------------------- */
+
+static I_ __GenSymCounter = 0;
+
+I_
+genSymZh(void)
+{
+ return(__GenSymCounter++);
+}
+I_
+resetGenSymZh(void) /* it's your funeral */
+{
+ __GenSymCounter=0;
+ return(__GenSymCounter);
+}
+
+/* -----------------------------------------------------------------------------
+ Get the current time as a string. Used in profiling reports.
+ -------------------------------------------------------------------------- */
+
+#if defined(PROFILING) || defined(DEBUG) || defined(PAR) || defined(GRAN)
+char *
+time_str(void)
+{
+ static time_t now = 0;
+ static char nowstr[26];
+
+ if (now == 0) {
+ time(&now);
+#if HAVE_CTIME_R
+ ctime_r(&now, nowstr);
+#else
+ strcpy(nowstr, ctime(&now));
+#endif
+ memmove(nowstr+16,nowstr+19,7);
+ nowstr[21] = '\0'; // removes the \n
+ }
+ return nowstr;
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ * Reset a file handle to blocking mode. We do this for the standard
+ * file descriptors before exiting, because the shell doesn't always
+ * clean up for us.
+ * -------------------------------------------------------------------------- */
+
+#if !defined(mingw32_HOST_OS)
+void
+resetNonBlockingFd(int fd)
+{
+ long fd_flags;
+
+ /* clear the non-blocking flag on this file descriptor */
+ fd_flags = fcntl(fd, F_GETFL);
+ if (fd_flags & O_NONBLOCK) {
+ fcntl(fd, F_SETFL, fd_flags & ~O_NONBLOCK);
+ }
+}
+
+void
+setNonBlockingFd(int fd)
+{
+ long fd_flags;
+
+ /* clear the non-blocking flag on this file descriptor */
+ fd_flags = fcntl(fd, F_GETFL);
+ if (!(fd_flags & O_NONBLOCK)) {
+ fcntl(fd, F_SETFL, fd_flags | O_NONBLOCK);
+ }
+}
+#else
+/* Stub defns -- async / non-blocking IO is not done
+ * via O_NONBLOCK and select() under Win32.
+ */
+void resetNonBlockingFd(int fd STG_UNUSED) {}
+void setNonBlockingFd(int fd STG_UNUSED) {}
+#endif
+
+#ifdef PAR
+static ullong startTime = 0;
+
+/* used in a parallel setup */
+ullong
+msTime(void)
+{
+# if defined(HAVE_GETCLOCK) && !defined(alpha_HOST_ARCH) && !defined(hppa1_1_HOST_ARCH)
+ struct timespec tv;
+
+ if (getclock(TIMEOFDAY, &tv) != 0) {
+ fflush(stdout);
+ fprintf(stderr, "Clock failed\n");
+ stg_exit(EXIT_FAILURE);
+ }
+ return tv.tv_sec * LL(1000) + tv.tv_nsec / LL(1000000) - startTime;
+# elif HAVE_GETTIMEOFDAY && !defined(alpha_HOST_ARCH)
+ struct timeval tv;
+
+ if (gettimeofday(&tv, NULL) != 0) {
+ fflush(stdout);
+ fprintf(stderr, "Clock failed\n");
+ stg_exit(EXIT_FAILURE);
+ }
+ return tv.tv_sec * LL(1000) + tv.tv_usec / LL(1000) - startTime;
+# else
+ time_t t;
+ if ((t = time(NULL)) == (time_t) -1) {
+ fflush(stdout);
+ fprintf(stderr, "Clock failed\n");
+ stg_exit(EXIT_FAILURE);
+ }
+ return t * LL(1000) - startTime;
+# endif
+}
+#endif /* PAR */
+
+/* -----------------------------------------------------------------------------
+ Print large numbers, with punctuation.
+ -------------------------------------------------------------------------- */
+
+char *
+ullong_format_string(ullong x, char *s, rtsBool with_commas)
+{
+ if (x < (ullong)1000)
+ sprintf(s, "%lu", (lnat)x);
+ else if (x < (ullong)1000000)
+ sprintf(s, (with_commas) ? "%lu,%3.3lu" : "%lu%3.3lu",
+ (lnat)((x)/(ullong)1000),
+ (lnat)((x)%(ullong)1000));
+ else if (x < (ullong)1000000000)
+ sprintf(s, (with_commas) ? "%lu,%3.3lu,%3.3lu" : "%lu%3.3lu%3.3lu",
+ (lnat)((x)/(ullong)1000000),
+ (lnat)((x)/(ullong)1000%(ullong)1000),
+ (lnat)((x)%(ullong)1000));
+ else
+ sprintf(s, (with_commas) ? "%lu,%3.3lu,%3.3lu,%3.3lu" : "%lu%3.3lu%3.3lu%3.3lu",
+ (lnat)((x)/(ullong)1000000000),
+ (lnat)((x)/(ullong)1000000%(ullong)1000),
+ (lnat)((x)/(ullong)1000%(ullong)1000),
+ (lnat)((x)%(ullong)1000));
+ return s;
+}
+
+
+// Can be used as a breakpoint to set on every heap check failure.
+#ifdef DEBUG
+void
+heapCheckFail( void )
+{
+}
+#endif
+
+/*
+ * It seems that pthreads and signals interact oddly in OpenBSD & FreeBSD
+ * pthreads (and possibly others). When linking with -lpthreads, we
+ * have to use pthread_kill to send blockable signals. So use that
+ * when we have a threaded rts. So System.Posix.Signals will call
+ * genericRaise(), rather than raise(3).
+ */
+int genericRaise(int sig) {
+#if defined(THREADED_RTS) && (defined(openbsd_HOST_OS) || defined(freebsd_HOST_OS))
+ return pthread_kill(pthread_self(), sig);
+#else
+ return raise(sig);
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ Allocating executable memory
+ -------------------------------------------------------------------------- */
+
+/* Heavily arch-specific, I'm afraid.. */
+
+/*
+ * Allocate len bytes which are readable, writable, and executable.
+ *
+ * ToDo: If this turns out to be a performance bottleneck, one could
+ * e.g. cache the last VirtualProtect/mprotect-ed region and do
+ * nothing in case of a cache hit.
+ */
+void*
+stgMallocBytesRWX(int len)
+{
+ void *addr = stgMallocBytes(len, "mallocBytesRWX");
+#if defined(i386_HOST_ARCH) && defined(_WIN32)
+ /* This could be necessary for processors which distinguish between READ and
+ EXECUTE memory accesses, e.g. Itaniums. */
+ DWORD dwOldProtect = 0;
+ if (VirtualProtect (addr, len, PAGE_EXECUTE_READWRITE, &dwOldProtect) == 0) {
+ barf("mallocBytesRWX: failed to protect 0x%p; error=%lu; old protection: %lu\n",
+ addr, (unsigned long)GetLastError(), (unsigned long)dwOldProtect);
+ }
+#elif defined(openbsd_HOST_OS) || defined(linux_HOST_OS) || defined(darwin_HOST_OS)
+ /* malloced memory isn't executable by default on OpenBSD */
+ my_uintptr_t pageSize = sysconf(_SC_PAGESIZE);
+ my_uintptr_t mask = ~(pageSize - 1);
+ my_uintptr_t startOfFirstPage = ((my_uintptr_t)addr ) & mask;
+ my_uintptr_t startOfLastPage = ((my_uintptr_t)addr + len - 1) & mask;
+ my_uintptr_t size = startOfLastPage - startOfFirstPage + pageSize;
+ if (mprotect((void*)startOfFirstPage, (size_t)size, PROT_EXEC | PROT_READ | PROT_WRITE) != 0) {
+ barf("mallocBytesRWX: failed to protect 0x%p\n", addr);
+ }
+#endif
+ return addr;
+}
diff --git a/rts/RtsUtils.h b/rts/RtsUtils.h
new file mode 100644
index 0000000000..96a5f0d82f
--- /dev/null
+++ b/rts/RtsUtils.h
@@ -0,0 +1,54 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * General utility functions used in the RTS.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef RTSUTILS_H
+#define RTSUTILS_H
+
+/* -----------------------------------------------------------------------------
+ * (Checked) dynamic allocation
+ * -------------------------------------------------------------------------- */
+
+extern void *stgMallocBytes(int n, char *msg)
+ GNUC3_ATTRIBUTE(__malloc__);
+
+extern void* stgMallocBytesRWX(int len)
+ GNUC3_ATTRIBUTE(__malloc__);
+
+extern void *stgReallocBytes(void *p, int n, char *msg);
+
+extern void *stgCallocBytes(int n, int m, char *msg)
+ GNUC3_ATTRIBUTE(__malloc__);
+
+extern void stgFree(void* p);
+
+/* -----------------------------------------------------------------------------
+ * Misc other utilities
+ * -------------------------------------------------------------------------- */
+
+extern void heapOverflow(void);
+
+extern void setNonBlockingFd(int fd);
+extern void resetNonBlockingFd(int fd);
+
+extern nat stg_strlen(char *str);
+
+extern char *time_str(void);
+extern char *ullong_format_string(ullong, char *, rtsBool);
+
+#ifdef PAR
+extern ullong msTime(void);
+#endif
+
+#ifdef DEBUG
+extern void heapCheckFail( void );
+#endif
+
+extern void* __hscore_get_saved_termios(int fd);
+extern void __hscore_set_saved_termios(int fd, void* ts);
+
+#endif /* RTSUTILS_H */
diff --git a/rts/STM.c b/rts/STM.c
new file mode 100644
index 0000000000..d3283a92f0
--- /dev/null
+++ b/rts/STM.c
@@ -0,0 +1,1261 @@
+/* -----------------------------------------------------------------------------
+ * (c) The GHC Team 1998-2005
+ *
+ * STM implementation.
+ *
+ * Overview
+ * --------
+ *
+ * See the PPoPP 2005 paper "Composable memory transactions". In summary,
+ * each transcation has a TRec (transaction record) holding entries for each of the
+ * TVars (transactional variables) that it has accessed. Each entry records
+ * (a) the TVar, (b) the expected value seen in the TVar, (c) the new value that
+ * the transaction wants to write to the TVar, (d) during commit, the identity of
+ * the TRec that wrote the expected value.
+ *
+ * Separate TRecs are used for each level in a nest of transactions. This allows
+ * a nested transaction to be aborted without condemning its enclosing transactions.
+ * This is needed in the implementation of catchRetry. Note that the "expected value"
+ * in a nested transaction's TRec is the value expected to be *held in memory* if
+ * the transaction commits -- not the "new value" stored in one of the enclosing
+ * transactions. This means that validation can be done without searching through
+ * a nest of TRecs.
+ *
+ * Concurrency control
+ * -------------------
+ *
+ * Three different concurrency control schemes can be built according to the settings
+ * in STM.h:
+ *
+ * STM_UNIPROC assumes that the caller serialises invocations on the STM interface.
+ * In the Haskell RTS this means it is suitable only for non-THREADED_RTS builds.
+ *
+ * STM_CG_LOCK uses coarse-grained locking -- a single 'stm lock' is acquired during
+ * an invocation on the STM interface. Note that this does not mean that
+ * transactions are simply serialized -- the lock is only held *within* the
+ * implementation of stmCommitTransaction, stmWait etc.
+ *
+ * STM_FG_LOCKS uses fine-grained locking -- locking is done on a per-TVar basis
+ * and, when committing a transaction, no locks are acquired for TVars that have
+ * been read but not updated.
+ *
+ * Concurrency control is implemented in the functions:
+ *
+ * lock_stm
+ * unlock_stm
+ * lock_tvar / cond_lock_tvar
+ * unlock_tvar
+ *
+ * The choice between STM_UNIPROC / STM_CG_LOCK / STM_FG_LOCKS affects the
+ * implementation of these functions.
+ *
+ * lock_stm & unlock_stm are straightforward : they acquire a simple spin-lock
+ * using STM_CG_LOCK, and otherwise they are no-ops.
+ *
+ * lock_tvar / cond_lock_tvar and unlock_tvar are more complex because they
+ * have other effects (present in STM_UNIPROC and STM_CG_LOCK builds) as well
+ * as the actual business of maniupultaing a lock (present only in STM_FG_LOCKS
+ * builds). This is because locking a TVar is implemented by writing the lock
+ * holder's TRec into the TVar's current_value field:
+ *
+ * lock_tvar - lock a specified TVar (STM_FG_LOCKS only), returning the value
+ * it contained.
+ *
+ * cond_lock_tvar - lock a specified TVar (STM_FG_LOCKS only) if it
+ * contains a specified value. Return TRUE if this succeeds,
+ * FALSE otherwise.
+ *
+ * unlock_tvar - release the lock on a specified TVar (STM_FG_LOCKS only),
+ * storing a specified value in place of the lock entry.
+ *
+ * Using these operations, the typcial pattern of a commit/validate/wait operation
+ * is to (a) lock the STM, (b) lock all the TVars being updated, (c) check that
+ * the TVars that were only read from still contain their expected values,
+ * (d) release the locks on the TVars, writing updates to them in the case of a
+ * commit, (e) unlock the STM.
+ *
+ * Queues of waiting threads hang off the first_wait_queue_entry field of each
+ * TVar. This may only be manipulated when holding that TVar's lock. In
+ * particular, when a thread is putting itself to sleep, it mustn't release
+ * the TVar's lock until it has added itself to the wait queue and marked its
+ * TSO as BlockedOnSTM -- this makes sure that other threads will know to wake it.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "Schedule.h"
+#include "SMP.h"
+#include "STM.h"
+#include "Storage.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#define TRUE 1
+#define FALSE 0
+
+// ACQ_ASSERT is used for assertions which are only required for
+// THREADED_RTS builds with fine-grained locking.
+
+#if defined(STM_FG_LOCKS)
+#define ACQ_ASSERT(_X) ASSERT(_X)
+#define NACQ_ASSERT(_X) /*Nothing*/
+#else
+#define ACQ_ASSERT(_X) /*Nothing*/
+#define NACQ_ASSERT(_X) ASSERT(_X)
+#endif
+
+/*......................................................................*/
+
+// If SHAKE is defined then validation will sometime spuriously fail. They helps test
+// unusualy code paths if genuine contention is rare
+
+#if defined(DEBUG)
+#define SHAKE
+#if defined(THREADED_RTS)
+#define TRACE(_x...) IF_DEBUG(stm, debugBelch("STM (task %p): ", (void *)(unsigned long)(unsigned int)osThreadId()); debugBelch ( _x ))
+#else
+#define TRACE(_x...) IF_DEBUG(stm, debugBelch ( _x ))
+#endif
+#else
+#define TRACE(_x...) /*Nothing*/
+#endif
+
+#ifdef SHAKE
+static const int do_shake = TRUE;
+#else
+static const int do_shake = FALSE;
+#endif
+static int shake_ctr = 0;
+static int shake_lim = 1;
+
+static int shake(void) {
+ if (do_shake) {
+ if (((shake_ctr++) % shake_lim) == 0) {
+ shake_ctr = 1;
+ shake_lim ++;
+ return TRUE;
+ }
+ return FALSE;
+ } else {
+ return FALSE;
+ }
+}
+
+/*......................................................................*/
+
+// Helper macros for iterating over entries within a transaction
+// record
+
+#define FOR_EACH_ENTRY(_t,_x,CODE) do { \
+ StgTRecHeader *__t = (_t); \
+ StgTRecChunk *__c = __t -> current_chunk; \
+ StgWord __limit = __c -> next_entry_idx; \
+ TRACE("%p : FOR_EACH_ENTRY, current_chunk=%p limit=%ld\n", __t, __c, __limit); \
+ while (__c != END_STM_CHUNK_LIST) { \
+ StgWord __i; \
+ for (__i = 0; __i < __limit; __i ++) { \
+ TRecEntry *_x = &(__c -> entries[__i]); \
+ do { CODE } while (0); \
+ } \
+ __c = __c -> prev_chunk; \
+ __limit = TREC_CHUNK_NUM_ENTRIES; \
+ } \
+ exit_for_each: \
+ if (FALSE) goto exit_for_each; \
+} while (0)
+
+#define BREAK_FOR_EACH goto exit_for_each
+
+/*......................................................................*/
+
+// if REUSE_MEMORY is defined then attempt to re-use descriptors, log chunks,
+// and wait queue entries without GC
+
+#define REUSE_MEMORY
+
+/*......................................................................*/
+
+#define IF_STM_UNIPROC(__X) do { } while (0)
+#define IF_STM_CG_LOCK(__X) do { } while (0)
+#define IF_STM_FG_LOCKS(__X) do { } while (0)
+
+#if defined(STM_UNIPROC)
+#undef IF_STM_UNIPROC
+#define IF_STM_UNIPROC(__X) do { __X } while (0)
+static const StgBool use_read_phase = FALSE;
+
+static void lock_stm(StgTRecHeader *trec STG_UNUSED) {
+ TRACE("%p : lock_stm()\n", trec);
+}
+
+static void unlock_stm(StgTRecHeader *trec STG_UNUSED) {
+ TRACE("%p : unlock_stm()\n", trec);
+}
+
+static StgClosure *lock_tvar(StgTRecHeader *trec STG_UNUSED,
+ StgTVar *s STG_UNUSED) {
+ StgClosure *result;
+ TRACE("%p : lock_tvar(%p)\n", trec, s);
+ result = s -> current_value;
+ return result;
+}
+
+static void unlock_tvar(StgTRecHeader *trec STG_UNUSED,
+ StgTVar *s STG_UNUSED,
+ StgClosure *c,
+ StgBool force_update) {
+ TRACE("%p : unlock_tvar(%p)\n", trec, s);
+ if (force_update) {
+ s -> current_value = c;
+ }
+}
+
+static StgBool cond_lock_tvar(StgTRecHeader *trec STG_UNUSED,
+ StgTVar *s STG_UNUSED,
+ StgClosure *expected) {
+ StgClosure *result;
+ TRACE("%p : cond_lock_tvar(%p, %p)\n", trec, s, expected);
+ result = s -> current_value;
+ TRACE("%p : %s\n", trec, (result == expected) ? "success" : "failure");
+ return (result == expected);
+}
+#endif
+
+#if defined(STM_CG_LOCK) /*........................................*/
+
+#undef IF_STM_CG_LOCK
+#define IF_STM_CG_LOCK(__X) do { __X } while (0)
+static const StgBool use_read_phase = FALSE;
+static volatile StgTRecHeader *smp_locked = NULL;
+
+static void lock_stm(StgTRecHeader *trec) {
+ while (cas(&smp_locked, NULL, trec) != NULL) { }
+ TRACE("%p : lock_stm()\n", trec);
+}
+
+static void unlock_stm(StgTRecHeader *trec STG_UNUSED) {
+ TRACE("%p : unlock_stm()\n", trec);
+ ASSERT (smp_locked == trec);
+ smp_locked = 0;
+}
+
+static StgClosure *lock_tvar(StgTRecHeader *trec STG_UNUSED,
+ StgTVar *s STG_UNUSED) {
+ StgClosure *result;
+ TRACE("%p : lock_tvar(%p)\n", trec, s);
+ ASSERT (smp_locked == trec);
+ result = s -> current_value;
+ return result;
+}
+
+static void *unlock_tvar(StgTRecHeader *trec STG_UNUSED,
+ StgTVar *s STG_UNUSED,
+ StgClosure *c,
+ StgBool force_update) {
+ TRACE("%p : unlock_tvar(%p, %p)\n", trec, s, c);
+ ASSERT (smp_locked == trec);
+ if (force_update) {
+ s -> current_value = c;
+ }
+}
+
+static StgBool cond_lock_tvar(StgTRecHeader *trec STG_UNUSED,
+ StgTVar *s STG_UNUSED,
+ StgClosure *expected) {
+ StgClosure *result;
+ TRACE("%p : cond_lock_tvar(%p, %p)\n", trec, s, expected);
+ ASSERT (smp_locked == trec);
+ result = s -> current_value;
+ TRACE("%p : %d\n", result ? "success" : "failure");
+ return (result == expected);
+}
+#endif
+
+#if defined(STM_FG_LOCKS) /*...................................*/
+
+#undef IF_STM_FG_LOCKS
+#define IF_STM_FG_LOCKS(__X) do { __X } while (0)
+static const StgBool use_read_phase = TRUE;
+
+static void lock_stm(StgTRecHeader *trec STG_UNUSED) {
+ TRACE("%p : lock_stm()\n", trec);
+}
+
+static void unlock_stm(StgTRecHeader *trec STG_UNUSED) {
+ TRACE("%p : unlock_stm()\n", trec);
+}
+
+static StgClosure *lock_tvar(StgTRecHeader *trec,
+ StgTVar *s STG_UNUSED) {
+ StgClosure *result;
+ TRACE("%p : lock_tvar(%p)\n", trec, s);
+ do {
+ do {
+ result = s -> current_value;
+ } while (GET_INFO(result) == &stg_TREC_HEADER_info);
+ } while (cas(&(s -> current_value), result, trec) != result);
+ return result;
+}
+
+static void unlock_tvar(StgTRecHeader *trec STG_UNUSED,
+ StgTVar *s,
+ StgClosure *c,
+ StgBool force_update STG_UNUSED) {
+ TRACE("%p : unlock_tvar(%p, %p)\n", trec, s, c);
+ ASSERT(s -> current_value == trec);
+ s -> current_value = c;
+}
+
+static StgBool cond_lock_tvar(StgTRecHeader *trec,
+ StgTVar *s,
+ StgClosure *expected) {
+ StgClosure *result;
+ TRACE("%p : cond_lock_tvar(%p, %p)\n", trec, s, expected);
+ result = cas(&(s -> current_value), expected, trec);
+ TRACE("%p : %s\n", trec, result ? "success" : "failure");
+ return (result == expected);
+}
+#endif
+
+/*......................................................................*/
+
+// Helper functions for thread blocking and unblocking
+
+static void park_tso(StgTSO *tso) {
+ ASSERT(tso -> why_blocked == NotBlocked);
+ tso -> why_blocked = BlockedOnSTM;
+ tso -> block_info.closure = (StgClosure *) END_TSO_QUEUE;
+ TRACE("park_tso on tso=%p\n", tso);
+}
+
+static void unpark_tso(Capability *cap, StgTSO *tso) {
+ // We will continue unparking threads while they remain on one of the wait
+ // queues: it's up to the thread itself to remove it from the wait queues
+ // if it decides to do so when it is scheduled.
+ if (tso -> why_blocked == BlockedOnSTM) {
+ TRACE("unpark_tso on tso=%p\n", tso);
+ unblockOne(cap,tso);
+ } else {
+ TRACE("spurious unpark_tso on tso=%p\n", tso);
+ }
+}
+
+static void unpark_waiters_on(Capability *cap, StgTVar *s) {
+ StgTVarWaitQueue *q;
+ TRACE("unpark_waiters_on tvar=%p\n", s);
+ for (q = s -> first_wait_queue_entry;
+ q != END_STM_WAIT_QUEUE;
+ q = q -> next_queue_entry) {
+ unpark_tso(cap, q -> waiting_tso);
+ }
+}
+
+/*......................................................................*/
+
+// Helper functions for downstream allocation and initialization
+
+static StgTVarWaitQueue *new_stg_tvar_wait_queue(Capability *cap,
+ StgTSO *waiting_tso) {
+ StgTVarWaitQueue *result;
+ result = (StgTVarWaitQueue *)allocateLocal(cap, sizeofW(StgTVarWaitQueue));
+ SET_HDR (result, &stg_TVAR_WAIT_QUEUE_info, CCS_SYSTEM);
+ result -> waiting_tso = waiting_tso;
+ return result;
+}
+
+static StgTRecChunk *new_stg_trec_chunk(Capability *cap) {
+ StgTRecChunk *result;
+ result = (StgTRecChunk *)allocateLocal(cap, sizeofW(StgTRecChunk));
+ SET_HDR (result, &stg_TREC_CHUNK_info, CCS_SYSTEM);
+ result -> prev_chunk = END_STM_CHUNK_LIST;
+ result -> next_entry_idx = 0;
+ return result;
+}
+
+static StgTRecHeader *new_stg_trec_header(Capability *cap,
+ StgTRecHeader *enclosing_trec) {
+ StgTRecHeader *result;
+ result = (StgTRecHeader *) allocateLocal(cap, sizeofW(StgTRecHeader));
+ SET_HDR (result, &stg_TREC_HEADER_info, CCS_SYSTEM);
+
+ result -> enclosing_trec = enclosing_trec;
+ result -> current_chunk = new_stg_trec_chunk(cap);
+
+ if (enclosing_trec == NO_TREC) {
+ result -> state = TREC_ACTIVE;
+ } else {
+ ASSERT(enclosing_trec -> state == TREC_ACTIVE ||
+ enclosing_trec -> state == TREC_CONDEMNED);
+ result -> state = enclosing_trec -> state;
+ }
+
+ return result;
+}
+
+/*......................................................................*/
+
+// Allocation / deallocation functions that retain per-capability lists
+// of closures that can be re-used
+
+static StgTVarWaitQueue *alloc_stg_tvar_wait_queue(Capability *cap,
+ StgTSO *waiting_tso) {
+ StgTVarWaitQueue *result = NULL;
+ if (cap -> free_tvar_wait_queues == END_STM_WAIT_QUEUE) {
+ result = new_stg_tvar_wait_queue(cap, waiting_tso);
+ } else {
+ result = cap -> free_tvar_wait_queues;
+ result -> waiting_tso = waiting_tso;
+ cap -> free_tvar_wait_queues = result -> next_queue_entry;
+ }
+ return result;
+}
+
+static void free_stg_tvar_wait_queue(Capability *cap,
+ StgTVarWaitQueue *wq) {
+#if defined(REUSE_MEMORY)
+ wq -> next_queue_entry = cap -> free_tvar_wait_queues;
+ cap -> free_tvar_wait_queues = wq;
+#endif
+}
+
+static StgTRecChunk *alloc_stg_trec_chunk(Capability *cap) {
+ StgTRecChunk *result = NULL;
+ if (cap -> free_trec_chunks == END_STM_CHUNK_LIST) {
+ result = new_stg_trec_chunk(cap);
+ } else {
+ result = cap -> free_trec_chunks;
+ cap -> free_trec_chunks = result -> prev_chunk;
+ result -> prev_chunk = END_STM_CHUNK_LIST;
+ result -> next_entry_idx = 0;
+ }
+ return result;
+}
+
+static void free_stg_trec_chunk(Capability *cap,
+ StgTRecChunk *c) {
+#if defined(REUSE_MEMORY)
+ c -> prev_chunk = cap -> free_trec_chunks;
+ cap -> free_trec_chunks = c;
+#endif
+}
+
+static StgTRecHeader *alloc_stg_trec_header(Capability *cap,
+ StgTRecHeader *enclosing_trec) {
+ StgTRecHeader *result = NULL;
+ if (cap -> free_trec_headers == NO_TREC) {
+ result = new_stg_trec_header(cap, enclosing_trec);
+ } else {
+ result = cap -> free_trec_headers;
+ cap -> free_trec_headers = result -> enclosing_trec;
+ result -> enclosing_trec = enclosing_trec;
+ result -> current_chunk -> next_entry_idx = 0;
+ if (enclosing_trec == NO_TREC) {
+ result -> state = TREC_ACTIVE;
+ } else {
+ ASSERT(enclosing_trec -> state == TREC_ACTIVE ||
+ enclosing_trec -> state == TREC_CONDEMNED);
+ result -> state = enclosing_trec -> state;
+ }
+ }
+ return result;
+}
+
+static void free_stg_trec_header(Capability *cap,
+ StgTRecHeader *trec) {
+#if defined(REUSE_MEMORY)
+ StgTRecChunk *chunk = trec -> current_chunk -> prev_chunk;
+ while (chunk != END_STM_CHUNK_LIST) {
+ StgTRecChunk *prev_chunk = chunk -> prev_chunk;
+ free_stg_trec_chunk(cap, chunk);
+ chunk = prev_chunk;
+ }
+ trec -> current_chunk -> prev_chunk = END_STM_CHUNK_LIST;
+ trec -> enclosing_trec = cap -> free_trec_headers;
+ cap -> free_trec_headers = trec;
+#endif
+}
+
+/*......................................................................*/
+
+// Helper functions for managing waiting lists
+
+static void build_wait_queue_entries_for_trec(Capability *cap,
+ StgTSO *tso,
+ StgTRecHeader *trec) {
+ ASSERT(trec != NO_TREC);
+ ASSERT(trec -> enclosing_trec == NO_TREC);
+ ASSERT(trec -> state == TREC_ACTIVE);
+
+ TRACE("%p : build_wait_queue_entries_for_trec()\n", trec);
+
+ FOR_EACH_ENTRY(trec, e, {
+ StgTVar *s;
+ StgTVarWaitQueue *q;
+ StgTVarWaitQueue *fq;
+ s = e -> tvar;
+ TRACE("%p : adding tso=%p to wait queue for tvar=%p\n", trec, tso, s);
+ ACQ_ASSERT(s -> current_value == trec);
+ NACQ_ASSERT(s -> current_value == e -> expected_value);
+ fq = s -> first_wait_queue_entry;
+ q = alloc_stg_tvar_wait_queue(cap, tso);
+ q -> next_queue_entry = fq;
+ q -> prev_queue_entry = END_STM_WAIT_QUEUE;
+ if (fq != END_STM_WAIT_QUEUE) {
+ fq -> prev_queue_entry = q;
+ }
+ s -> first_wait_queue_entry = q;
+ e -> new_value = (StgClosure *) q;
+ });
+}
+
+static void remove_wait_queue_entries_for_trec(Capability *cap,
+ StgTRecHeader *trec) {
+ ASSERT(trec != NO_TREC);
+ ASSERT(trec -> enclosing_trec == NO_TREC);
+ ASSERT(trec -> state == TREC_WAITING ||
+ trec -> state == TREC_CONDEMNED);
+
+ TRACE("%p : remove_wait_queue_entries_for_trec()\n", trec);
+
+ FOR_EACH_ENTRY(trec, e, {
+ StgTVar *s;
+ StgTVarWaitQueue *pq;
+ StgTVarWaitQueue *nq;
+ StgTVarWaitQueue *q;
+ s = e -> tvar;
+ StgClosure *saw = lock_tvar(trec, s);
+ q = (StgTVarWaitQueue *) (e -> new_value);
+ TRACE("%p : removing tso=%p from wait queue for tvar=%p\n", trec, q -> waiting_tso, s);
+ ACQ_ASSERT(s -> current_value == trec);
+ nq = q -> next_queue_entry;
+ pq = q -> prev_queue_entry;
+ if (nq != END_STM_WAIT_QUEUE) {
+ nq -> prev_queue_entry = pq;
+ }
+ if (pq != END_STM_WAIT_QUEUE) {
+ pq -> next_queue_entry = nq;
+ } else {
+ ASSERT (s -> first_wait_queue_entry == q);
+ s -> first_wait_queue_entry = nq;
+ }
+ free_stg_tvar_wait_queue(cap, q);
+ unlock_tvar(trec, s, saw, FALSE);
+ });
+}
+
+/*......................................................................*/
+
+static TRecEntry *get_new_entry(Capability *cap,
+ StgTRecHeader *t) {
+ TRecEntry *result;
+ StgTRecChunk *c;
+ int i;
+
+ c = t -> current_chunk;
+ i = c -> next_entry_idx;
+ ASSERT(c != END_STM_CHUNK_LIST);
+
+ if (i < TREC_CHUNK_NUM_ENTRIES) {
+ // Continue to use current chunk
+ result = &(c -> entries[i]);
+ c -> next_entry_idx ++;
+ } else {
+ // Current chunk is full: allocate a fresh one
+ StgTRecChunk *nc;
+ nc = alloc_stg_trec_chunk(cap);
+ nc -> prev_chunk = c;
+ nc -> next_entry_idx = 1;
+ t -> current_chunk = nc;
+ result = &(nc -> entries[0]);
+ }
+
+ return result;
+}
+
+/*......................................................................*/
+
+static void merge_update_into(Capability *cap,
+ StgTRecHeader *t,
+ StgTVar *tvar,
+ StgClosure *expected_value,
+ StgClosure *new_value) {
+ int found;
+
+ // Look for an entry in this trec
+ found = FALSE;
+ FOR_EACH_ENTRY(t, e, {
+ StgTVar *s;
+ s = e -> tvar;
+ if (s == tvar) {
+ found = TRUE;
+ if (e -> expected_value != expected_value) {
+ // Must abort if the two entries start from different values
+ TRACE("%p : entries inconsistent at %p (%p vs %p)\n",
+ t, tvar, e -> expected_value, expected_value);
+ t -> state = TREC_CONDEMNED;
+ }
+ e -> new_value = new_value;
+ BREAK_FOR_EACH;
+ }
+ });
+
+ if (!found) {
+ // No entry so far in this trec
+ TRecEntry *ne;
+ ne = get_new_entry(cap, t);
+ ne -> tvar = tvar;
+ ne -> expected_value = expected_value;
+ ne -> new_value = new_value;
+ }
+}
+
+/*......................................................................*/
+
+static StgBool entry_is_update(TRecEntry *e) {
+ StgBool result;
+ result = (e -> expected_value != e -> new_value);
+ return result;
+}
+
+#if defined(STM_FG_LOCKS)
+static StgBool entry_is_read_only(TRecEntry *e) {
+ StgBool result;
+ result = (e -> expected_value == e -> new_value);
+ return result;
+}
+
+static StgBool tvar_is_locked(StgTVar *s, StgTRecHeader *h) {
+ StgClosure *c;
+ StgBool result;
+ c = s -> current_value;
+ result = (c == (StgClosure *) h);
+ return result;
+}
+#endif
+
+// revert_ownership : release a lock on a TVar, storing back
+// the value that it held when the lock was acquired. "revert_all"
+// is set in stmWait and stmReWait when we acquired locks on all of
+// the TVars involved. "revert_all" is not set in commit operations
+// where we don't lock TVars that have been read from but not updated.
+
+static void revert_ownership(StgTRecHeader *trec STG_UNUSED,
+ StgBool revert_all STG_UNUSED) {
+#if defined(STM_FG_LOCKS)
+ FOR_EACH_ENTRY(trec, e, {
+ if (revert_all || entry_is_update(e)) {
+ StgTVar *s;
+ s = e -> tvar;
+ if (tvar_is_locked(s, trec)) {
+ unlock_tvar(trec, s, e -> expected_value, TRUE);
+ }
+ }
+ });
+#endif
+}
+
+/*......................................................................*/
+
+// validate_and_acquire_ownership : this performs the twin functions
+// of checking that the TVars referred to by entries in trec hold the
+// expected values and:
+//
+// - locking the TVar (on updated TVars during commit, or all TVars
+// during wait)
+//
+// - recording the identity of the TRec who wrote the value seen in the
+// TVar (on non-updated TVars during commit). These values are
+// stashed in the TRec entries and are then checked in check_read_only
+// to ensure that an atomic snapshot of all of these locations has been
+// seen.
+
+static StgBool validate_and_acquire_ownership (StgTRecHeader *trec,
+ int acquire_all,
+ int retain_ownership) {
+ StgBool result;
+
+ if (shake()) {
+ TRACE("%p : shake, pretending trec is invalid when it may not be\n", trec);
+ return FALSE;
+ }
+
+ ASSERT ((trec -> state == TREC_ACTIVE) ||
+ (trec -> state == TREC_WAITING) ||
+ (trec -> state == TREC_CONDEMNED));
+ result = !((trec -> state) == TREC_CONDEMNED);
+ if (result) {
+ FOR_EACH_ENTRY(trec, e, {
+ StgTVar *s;
+ s = e -> tvar;
+ if (acquire_all || entry_is_update(e)) {
+ TRACE("%p : trying to acquire %p\n", trec, s);
+ if (!cond_lock_tvar(trec, s, e -> expected_value)) {
+ TRACE("%p : failed to acquire %p\n", trec, s);
+ result = FALSE;
+ BREAK_FOR_EACH;
+ }
+ } else {
+ ASSERT(use_read_phase);
+ IF_STM_FG_LOCKS({
+ TRACE("%p : will need to check %p\n", trec, s);
+ if (s -> current_value != e -> expected_value) {
+ TRACE("%p : doesn't match\n", trec);
+ result = FALSE;
+ BREAK_FOR_EACH;
+ }
+ e -> num_updates = s -> num_updates;
+ if (s -> current_value != e -> expected_value) {
+ TRACE("%p : doesn't match (race)\n", trec);
+ result = FALSE;
+ BREAK_FOR_EACH;
+ } else {
+ TRACE("%p : need to check version %d\n", trec, e -> num_updates);
+ }
+ });
+ }
+ });
+ }
+
+ if ((!result) || (!retain_ownership)) {
+ revert_ownership(trec, acquire_all);
+ }
+
+ return result;
+}
+
+// check_read_only : check that we've seen an atomic snapshot of the
+// non-updated TVars accessed by a trec. This checks that the last TRec to
+// commit an update to the TVar is unchanged since the value was stashed in
+// validate_and_acquire_ownership. If no udpate is seen to any TVar than
+// all of them contained their expected values at the start of the call to
+// check_read_only.
+//
+// The paper "Concurrent programming without locks" (under submission), or
+// Keir Fraser's PhD dissertation "Practical lock-free programming" discuss
+// this kind of algorithm.
+
+static StgBool check_read_only(StgTRecHeader *trec STG_UNUSED) {
+ StgBool result = TRUE;
+
+ ASSERT (use_read_phase);
+ IF_STM_FG_LOCKS({
+ FOR_EACH_ENTRY(trec, e, {
+ StgTVar *s;
+ s = e -> tvar;
+ if (entry_is_read_only(e)) {
+ TRACE("%p : check_read_only for TVar %p, saw %d\n", trec, s, e -> num_updates);
+ if (s -> num_updates != e -> num_updates) {
+ // ||s -> current_value != e -> expected_value) {
+ TRACE("%p : mismatch\n", trec);
+ result = FALSE;
+ BREAK_FOR_EACH;
+ }
+ }
+ });
+ });
+
+ return result;
+}
+
+
+/************************************************************************/
+
+void stmPreGCHook() {
+ nat i;
+
+ lock_stm(NO_TREC);
+ TRACE("stmPreGCHook\n");
+ for (i = 0; i < n_capabilities; i ++) {
+ Capability *cap = &capabilities[i];
+ cap -> free_tvar_wait_queues = END_STM_WAIT_QUEUE;
+ cap -> free_trec_chunks = END_STM_CHUNK_LIST;
+ cap -> free_trec_headers = NO_TREC;
+ }
+ unlock_stm(NO_TREC);
+}
+
+/************************************************************************/
+
+// check_read_only relies on version numbers held in TVars' "num_updates"
+// fields not wrapping around while a transaction is committed. The version
+// number is incremented each time an update is committed to the TVar
+// This is unlikely to wrap around when 32-bit integers are used for the counts,
+// but to ensure correctness we maintain a shared count on the maximum
+// number of commit operations that may occur and check that this has
+// not increased by more than 2^32 during a commit.
+
+#define TOKEN_BATCH_SIZE 1024
+
+static volatile StgInt64 max_commits = 0;
+
+static volatile StgBool token_locked = FALSE;
+
+#if defined(THREADED_RTS)
+static void getTokenBatch(Capability *cap) {
+ while (cas(&token_locked, FALSE, TRUE) == TRUE) { /* nothing */ }
+ max_commits += TOKEN_BATCH_SIZE;
+ cap -> transaction_tokens = TOKEN_BATCH_SIZE;
+ token_locked = FALSE;
+}
+
+static void getToken(Capability *cap) {
+ if (cap -> transaction_tokens == 0) {
+ getTokenBatch(cap);
+ }
+ cap -> transaction_tokens --;
+}
+#else
+static void getToken(Capability *cap STG_UNUSED) {
+ // Nothing
+}
+#endif
+
+/*......................................................................*/
+
+StgTRecHeader *stmStartTransaction(Capability *cap,
+ StgTRecHeader *outer) {
+ StgTRecHeader *t;
+ TRACE("%p : stmStartTransaction with %d tokens\n",
+ outer,
+ cap -> transaction_tokens);
+
+ getToken(cap);
+
+ t = alloc_stg_trec_header(cap, outer);
+ TRACE("%p : stmStartTransaction()=%p\n", outer, t);
+ return t;
+}
+
+/*......................................................................*/
+
+void stmAbortTransaction(Capability *cap,
+ StgTRecHeader *trec) {
+ TRACE("%p : stmAbortTransaction\n", trec);
+ ASSERT (trec != NO_TREC);
+ ASSERT ((trec -> state == TREC_ACTIVE) ||
+ (trec -> state == TREC_WAITING) ||
+ (trec -> state == TREC_CONDEMNED));
+
+ lock_stm(trec);
+ if (trec -> state == TREC_WAITING) {
+ ASSERT (trec -> enclosing_trec == NO_TREC);
+ TRACE("%p : stmAbortTransaction aborting waiting transaction\n", trec);
+ remove_wait_queue_entries_for_trec(cap, trec);
+ }
+ trec -> state = TREC_ABORTED;
+ unlock_stm(trec);
+
+ free_stg_trec_header(cap, trec);
+
+ TRACE("%p : stmAbortTransaction done\n", trec);
+}
+
+/*......................................................................*/
+
+void stmCondemnTransaction(Capability *cap,
+ StgTRecHeader *trec) {
+ TRACE("%p : stmCondemnTransaction\n", trec);
+ ASSERT (trec != NO_TREC);
+ ASSERT ((trec -> state == TREC_ACTIVE) ||
+ (trec -> state == TREC_WAITING) ||
+ (trec -> state == TREC_CONDEMNED));
+
+ lock_stm(trec);
+ if (trec -> state == TREC_WAITING) {
+ ASSERT (trec -> enclosing_trec == NO_TREC);
+ TRACE("%p : stmCondemnTransaction condemning waiting transaction\n", trec);
+ remove_wait_queue_entries_for_trec(cap, trec);
+ }
+ trec -> state = TREC_CONDEMNED;
+ unlock_stm(trec);
+
+ TRACE("%p : stmCondemnTransaction done\n", trec);
+}
+
+/*......................................................................*/
+
+StgTRecHeader *stmGetEnclosingTRec(StgTRecHeader *trec) {
+ StgTRecHeader *outer;
+ TRACE("%p : stmGetEnclosingTRec\n", trec);
+ outer = trec -> enclosing_trec;
+ TRACE("%p : stmGetEnclosingTRec()=%p\n", trec, outer);
+ return outer;
+}
+
+/*......................................................................*/
+
+StgBool stmValidateNestOfTransactions(StgTRecHeader *trec) {
+ StgTRecHeader *t;
+ StgBool result;
+
+ TRACE("%p : stmValidateNestOfTransactions\n", trec);
+ ASSERT(trec != NO_TREC);
+ ASSERT((trec -> state == TREC_ACTIVE) ||
+ (trec -> state == TREC_WAITING) ||
+ (trec -> state == TREC_CONDEMNED));
+
+ lock_stm(trec);
+
+ t = trec;
+ result = TRUE;
+ while (t != NO_TREC) {
+ result &= validate_and_acquire_ownership(t, TRUE, FALSE);
+ t = t -> enclosing_trec;
+ }
+
+ if (!result && trec -> state != TREC_WAITING) {
+ trec -> state = TREC_CONDEMNED;
+ }
+
+ unlock_stm(trec);
+
+ TRACE("%p : stmValidateNestOfTransactions()=%d\n", trec, result);
+ return result;
+}
+
+/*......................................................................*/
+
+StgBool stmCommitTransaction(Capability *cap, StgTRecHeader *trec) {
+ int result;
+ StgInt64 max_commits_at_start = max_commits;
+
+ TRACE("%p : stmCommitTransaction()\n", trec);
+ ASSERT (trec != NO_TREC);
+
+ lock_stm(trec);
+
+ ASSERT (trec -> enclosing_trec == NO_TREC);
+ ASSERT ((trec -> state == TREC_ACTIVE) ||
+ (trec -> state == TREC_CONDEMNED));
+
+ result = validate_and_acquire_ownership(trec, (!use_read_phase), TRUE);
+ if (result) {
+ // We now know that all the updated locations hold their expected values.
+ ASSERT (trec -> state == TREC_ACTIVE);
+
+ if (use_read_phase) {
+ TRACE("%p : doing read check\n", trec);
+ result = check_read_only(trec);
+ TRACE("%p : read-check %s\n", trec, result ? "succeeded" : "failed");
+
+ StgInt64 max_commits_at_end = max_commits;
+ StgInt64 max_concurrent_commits;
+ max_concurrent_commits = ((max_commits_at_end - max_commits_at_start) +
+ (n_capabilities * TOKEN_BATCH_SIZE));
+ if (((max_concurrent_commits >> 32) > 0) || shake()) {
+ result = FALSE;
+ }
+ }
+
+ if (result) {
+ // We now know that all of the read-only locations held their exepcted values
+ // at the end of the call to validate_and_acquire_ownership. This forms the
+ // linearization point of the commit.
+
+ FOR_EACH_ENTRY(trec, e, {
+ StgTVar *s;
+ s = e -> tvar;
+ if (e -> new_value != e -> expected_value) {
+ // Entry is an update: write the value back to the TVar, unlocking it if
+ // necessary.
+
+ ACQ_ASSERT(tvar_is_locked(s, trec));
+ TRACE("%p : writing %p to %p, waking waiters\n", trec, e -> new_value, s);
+ unpark_waiters_on(cap,s);
+ IF_STM_FG_LOCKS({
+ s -> num_updates ++;
+ });
+ unlock_tvar(trec, s, e -> new_value, TRUE);
+ }
+ ACQ_ASSERT(!tvar_is_locked(s, trec));
+ });
+ } else {
+ revert_ownership(trec, FALSE);
+ }
+ }
+
+ unlock_stm(trec);
+
+ free_stg_trec_header(cap, trec);
+
+ TRACE("%p : stmCommitTransaction()=%d\n", trec, result);
+
+ return result;
+}
+
+/*......................................................................*/
+
+StgBool stmCommitNestedTransaction(Capability *cap, StgTRecHeader *trec) {
+ StgTRecHeader *et;
+ int result;
+ ASSERT (trec != NO_TREC && trec -> enclosing_trec != NO_TREC);
+ TRACE("%p : stmCommitNestedTransaction() into %p\n", trec, trec -> enclosing_trec);
+ ASSERT ((trec -> state == TREC_ACTIVE) || (trec -> state == TREC_CONDEMNED));
+
+ lock_stm(trec);
+
+ et = trec -> enclosing_trec;
+ result = validate_and_acquire_ownership(trec, (!use_read_phase), TRUE);
+ if (result) {
+ // We now know that all the updated locations hold their expected values.
+
+ if (use_read_phase) {
+ TRACE("%p : doing read check\n", trec);
+ result = check_read_only(trec);
+ }
+ if (result) {
+ // We now know that all of the read-only locations held their exepcted values
+ // at the end of the call to validate_and_acquire_ownership. This forms the
+ // linearization point of the commit.
+
+ if (result) {
+ TRACE("%p : read-check succeeded\n", trec);
+ FOR_EACH_ENTRY(trec, e, {
+ // Merge each entry into the enclosing transaction record, release all
+ // locks.
+
+ StgTVar *s;
+ s = e -> tvar;
+ if (entry_is_update(e)) {
+ unlock_tvar(trec, s, e -> expected_value, FALSE);
+ }
+ merge_update_into(cap, et, s, e -> expected_value, e -> new_value);
+ ACQ_ASSERT(s -> current_value != trec);
+ });
+ } else {
+ revert_ownership(trec, FALSE);
+ }
+ }
+ }
+
+ unlock_stm(trec);
+
+ free_stg_trec_header(cap, trec);
+
+ TRACE("%p : stmCommitNestedTransaction()=%d\n", trec, result);
+
+ return result;
+}
+
+/*......................................................................*/
+
+StgBool stmWait(Capability *cap, StgTSO *tso, StgTRecHeader *trec) {
+ int result;
+ TRACE("%p : stmWait(%p)\n", trec, tso);
+ ASSERT (trec != NO_TREC);
+ ASSERT (trec -> enclosing_trec == NO_TREC);
+ ASSERT ((trec -> state == TREC_ACTIVE) ||
+ (trec -> state == TREC_CONDEMNED));
+
+ lock_stm(trec);
+ result = validate_and_acquire_ownership(trec, TRUE, TRUE);
+ if (result) {
+ // The transaction is valid so far so we can actually start waiting.
+ // (Otherwise the transaction was not valid and the thread will have to
+ // retry it).
+
+ // Put ourselves to sleep. We retain locks on all the TVars involved
+ // until we are sound asleep : (a) on the wait queues, (b) BlockedOnSTM
+ // in the TSO, (c) TREC_WAITING in the Trec.
+ build_wait_queue_entries_for_trec(cap, tso, trec);
+ park_tso(tso);
+ trec -> state = TREC_WAITING;
+
+ // We haven't released ownership of the transaction yet. The TSO
+ // has been put on the wait queue for the TVars it is waiting for,
+ // but we haven't yet tidied up the TSO's stack and made it safe
+ // to wake up the TSO. Therefore, we must wait until the TSO is
+ // safe to wake up before we release ownership - when all is well,
+ // the runtime will call stmWaitUnlock() below, with the same
+ // TRec.
+
+ } else {
+ unlock_stm(trec);
+ free_stg_trec_header(cap, trec);
+ }
+
+ TRACE("%p : stmWait(%p)=%d\n", trec, tso, result);
+ return result;
+}
+
+
+void
+stmWaitUnlock(Capability *cap STG_UNUSED, StgTRecHeader *trec) {
+ revert_ownership(trec, TRUE);
+ unlock_stm(trec);
+}
+
+/*......................................................................*/
+
+StgBool stmReWait(Capability *cap, StgTSO *tso) {
+ int result;
+ StgTRecHeader *trec = tso->trec;
+
+ TRACE("%p : stmReWait\n", trec);
+ ASSERT (trec != NO_TREC);
+ ASSERT (trec -> enclosing_trec == NO_TREC);
+ ASSERT ((trec -> state == TREC_WAITING) ||
+ (trec -> state == TREC_CONDEMNED));
+
+ lock_stm(trec);
+ result = validate_and_acquire_ownership(trec, TRUE, TRUE);
+ TRACE("%p : validation %s\n", trec, result ? "succeeded" : "failed");
+ if (result) {
+ // The transaction remains valid -- do nothing because it is already on
+ // the wait queues
+ ASSERT (trec -> state == TREC_WAITING);
+ park_tso(tso);
+ revert_ownership(trec, TRUE);
+ } else {
+ // The transcation has become invalid. We can now remove it from the wait
+ // queues.
+ if (trec -> state != TREC_CONDEMNED) {
+ remove_wait_queue_entries_for_trec (cap, trec);
+ }
+ free_stg_trec_header(cap, trec);
+ }
+ unlock_stm(trec);
+
+ TRACE("%p : stmReWait()=%d\n", trec, result);
+ return result;
+}
+
+/*......................................................................*/
+
+static TRecEntry *get_entry_for(StgTRecHeader *trec, StgTVar *tvar, StgTRecHeader **in) {
+ TRecEntry *result = NULL;
+
+ TRACE("%p : get_entry_for TVar %p\n", trec, tvar);
+ ASSERT(trec != NO_TREC);
+
+ do {
+ FOR_EACH_ENTRY(trec, e, {
+ if (e -> tvar == tvar) {
+ result = e;
+ if (in != NULL) {
+ *in = trec;
+ }
+ BREAK_FOR_EACH;
+ }
+ });
+ trec = trec -> enclosing_trec;
+ } while (result == NULL && trec != NO_TREC);
+
+ return result;
+}
+
+static StgClosure *read_current_value(StgTRecHeader *trec STG_UNUSED, StgTVar *tvar) {
+ StgClosure *result;
+ result = tvar -> current_value;
+
+#if defined(STM_FG_LOCKS)
+ while (GET_INFO(result) == &stg_TREC_HEADER_info) {
+ TRACE("%p : read_current_value(%p) saw %p\n", trec, tvar, result);
+ result = tvar -> current_value;
+ }
+#endif
+
+ TRACE("%p : read_current_value(%p)=%p\n", trec, tvar, result);
+ return result;
+}
+
+/*......................................................................*/
+
+StgClosure *stmReadTVar(Capability *cap,
+ StgTRecHeader *trec,
+ StgTVar *tvar) {
+ StgTRecHeader *entry_in;
+ StgClosure *result = NULL;
+ TRecEntry *entry = NULL;
+ TRACE("%p : stmReadTVar(%p)\n", trec, tvar);
+ ASSERT (trec != NO_TREC);
+ ASSERT (trec -> state == TREC_ACTIVE ||
+ trec -> state == TREC_CONDEMNED);
+
+ entry = get_entry_for(trec, tvar, &entry_in);
+
+ if (entry != NULL) {
+ if (entry_in == trec) {
+ // Entry found in our trec
+ result = entry -> new_value;
+ } else {
+ // Entry found in another trec
+ TRecEntry *new_entry = get_new_entry(cap, trec);
+ new_entry -> tvar = tvar;
+ new_entry -> expected_value = entry -> expected_value;
+ new_entry -> new_value = entry -> new_value;
+ result = new_entry -> new_value;
+ }
+ } else {
+ // No entry found
+ StgClosure *current_value = read_current_value(trec, tvar);
+ TRecEntry *new_entry = get_new_entry(cap, trec);
+ new_entry -> tvar = tvar;
+ new_entry -> expected_value = current_value;
+ new_entry -> new_value = current_value;
+ result = current_value;
+ }
+
+ TRACE("%p : stmReadTVar(%p)=%p\n", trec, tvar, result);
+ return result;
+}
+
+/*......................................................................*/
+
+void stmWriteTVar(Capability *cap,
+ StgTRecHeader *trec,
+ StgTVar *tvar,
+ StgClosure *new_value) {
+
+ StgTRecHeader *entry_in;
+ TRecEntry *entry = NULL;
+ TRACE("%p : stmWriteTVar(%p, %p)\n", trec, tvar, new_value);
+ ASSERT (trec != NO_TREC);
+ ASSERT (trec -> state == TREC_ACTIVE ||
+ trec -> state == TREC_CONDEMNED);
+
+ entry = get_entry_for(trec, tvar, &entry_in);
+
+ if (entry != NULL) {
+ if (entry_in == trec) {
+ // Entry found in our trec
+ entry -> new_value = new_value;
+ } else {
+ // Entry found in another trec
+ TRecEntry *new_entry = get_new_entry(cap, trec);
+ new_entry -> tvar = tvar;
+ new_entry -> expected_value = entry -> expected_value;
+ new_entry -> new_value = new_value;
+ }
+ } else {
+ // No entry found
+ StgClosure *current_value = read_current_value(trec, tvar);
+ TRecEntry *new_entry = get_new_entry(cap, trec);
+ new_entry -> tvar = tvar;
+ new_entry -> expected_value = current_value;
+ new_entry -> new_value = new_value;
+ }
+
+ TRACE("%p : stmWriteTVar done\n", trec);
+}
+
+/*......................................................................*/
+
+StgTVar *stmNewTVar(Capability *cap,
+ StgClosure *new_value) {
+ StgTVar *result;
+ result = (StgTVar *)allocateLocal(cap, sizeofW(StgTVar));
+ SET_HDR (result, &stg_TVAR_info, CCS_SYSTEM);
+ result -> current_value = new_value;
+ result -> first_wait_queue_entry = END_STM_WAIT_QUEUE;
+#if defined(THREADED_RTS)
+ result -> num_updates = 0;
+#endif
+ return result;
+}
+
+/*......................................................................*/
diff --git a/rts/Sanity.c b/rts/Sanity.c
new file mode 100644
index 0000000000..0e68a86ba7
--- /dev/null
+++ b/rts/Sanity.c
@@ -0,0 +1,948 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2006
+ *
+ * Sanity checking code for the heap and stack.
+ *
+ * Used when debugging: check that everything reasonable.
+ *
+ * - All things that are supposed to be pointers look like pointers.
+ *
+ * - Objects in text space are marked as static closures, those
+ * in the heap are dynamic.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+
+#ifdef DEBUG /* whole file */
+
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "BlockAlloc.h"
+#include "Sanity.h"
+#include "MBlock.h"
+#include "Storage.h"
+#include "Schedule.h"
+#include "Apply.h"
+
+/* -----------------------------------------------------------------------------
+ Forward decls.
+ -------------------------------------------------------------------------- */
+
+static void checkSmallBitmap ( StgPtr payload, StgWord bitmap, nat );
+static void checkLargeBitmap ( StgPtr payload, StgLargeBitmap*, nat );
+static void checkClosureShallow ( StgClosure * );
+
+/* -----------------------------------------------------------------------------
+ Check stack sanity
+ -------------------------------------------------------------------------- */
+
+static void
+checkSmallBitmap( StgPtr payload, StgWord bitmap, nat size )
+{
+ StgPtr p;
+ nat i;
+
+ p = payload;
+ for(i = 0; i < size; i++, bitmap >>= 1 ) {
+ if ((bitmap & 1) == 0) {
+ checkClosureShallow((StgClosure *)payload[i]);
+ }
+ }
+}
+
+static void
+checkLargeBitmap( StgPtr payload, StgLargeBitmap* large_bitmap, nat size )
+{
+ StgWord bmp;
+ nat i, j;
+
+ i = 0;
+ for (bmp=0; i < size; bmp++) {
+ StgWord bitmap = large_bitmap->bitmap[bmp];
+ j = 0;
+ for(; i < size && j < BITS_IN(W_); j++, i++, bitmap >>= 1 ) {
+ if ((bitmap & 1) == 0) {
+ checkClosureShallow((StgClosure *)payload[i]);
+ }
+ }
+ }
+}
+
+/*
+ * check that it looks like a valid closure - without checking its payload
+ * used to avoid recursion between checking PAPs and checking stack
+ * chunks.
+ */
+
+static void
+checkClosureShallow( StgClosure* p )
+{
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
+
+ /* Is it a static closure? */
+ if (!HEAP_ALLOCED(p)) {
+ ASSERT(closure_STATIC(p));
+ } else {
+ ASSERT(!closure_STATIC(p));
+ }
+}
+
+// check an individual stack object
+StgOffset
+checkStackFrame( StgPtr c )
+{
+ nat size;
+ const StgRetInfoTable* info;
+
+ info = get_ret_itbl((StgClosure *)c);
+
+ /* All activation records have 'bitmap' style layout info. */
+ switch (info->i.type) {
+ case RET_DYN: /* Dynamic bitmap: the mask is stored on the stack */
+ {
+ StgWord dyn;
+ StgPtr p;
+ StgRetDyn* r;
+
+ r = (StgRetDyn *)c;
+ dyn = r->liveness;
+
+ p = (P_)(r->payload);
+ checkSmallBitmap(p,RET_DYN_LIVENESS(r->liveness),RET_DYN_BITMAP_SIZE);
+ p += RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE;
+
+ // skip over the non-pointers
+ p += RET_DYN_NONPTRS(dyn);
+
+ // follow the ptr words
+ for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
+ checkClosureShallow((StgClosure *)*p);
+ p++;
+ }
+
+ return sizeofW(StgRetDyn) + RET_DYN_BITMAP_SIZE +
+ RET_DYN_NONPTR_REGS_SIZE +
+ RET_DYN_NONPTRS(dyn) + RET_DYN_PTRS(dyn);
+ }
+
+ case UPDATE_FRAME:
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgUpdateFrame*)c)->updatee));
+ case ATOMICALLY_FRAME:
+ case CATCH_RETRY_FRAME:
+ case CATCH_STM_FRAME:
+ case CATCH_FRAME:
+ // small bitmap cases (<= 32 entries)
+ case STOP_FRAME:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ size = BITMAP_SIZE(info->i.layout.bitmap);
+ checkSmallBitmap((StgPtr)c + 1,
+ BITMAP_BITS(info->i.layout.bitmap), size);
+ return 1 + size;
+
+ case RET_BCO: {
+ StgBCO *bco;
+ nat size;
+ bco = (StgBCO *)*(c+1);
+ size = BCO_BITMAP_SIZE(bco);
+ checkLargeBitmap((StgPtr)c + 2, BCO_BITMAP(bco), size);
+ return 2 + size;
+ }
+
+ case RET_BIG: // large bitmap (> 32 entries)
+ case RET_VEC_BIG:
+ size = GET_LARGE_BITMAP(&info->i)->size;
+ checkLargeBitmap((StgPtr)c + 1, GET_LARGE_BITMAP(&info->i), size);
+ return 1 + size;
+
+ case RET_FUN:
+ {
+ StgFunInfoTable *fun_info;
+ StgRetFun *ret_fun;
+
+ ret_fun = (StgRetFun *)c;
+ fun_info = get_fun_itbl(ret_fun->fun);
+ size = ret_fun->size;
+ switch (fun_info->f.fun_type) {
+ case ARG_GEN:
+ checkSmallBitmap((StgPtr)ret_fun->payload,
+ BITMAP_BITS(fun_info->f.b.bitmap), size);
+ break;
+ case ARG_GEN_BIG:
+ checkLargeBitmap((StgPtr)ret_fun->payload,
+ GET_FUN_LARGE_BITMAP(fun_info), size);
+ break;
+ default:
+ checkSmallBitmap((StgPtr)ret_fun->payload,
+ BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]),
+ size);
+ break;
+ }
+ return sizeofW(StgRetFun) + size;
+ }
+
+ default:
+ barf("checkStackFrame: weird activation record found on stack (%p %d).",c,info->i.type);
+ }
+}
+
+// check sections of stack between update frames
+void
+checkStackChunk( StgPtr sp, StgPtr stack_end )
+{
+ StgPtr p;
+
+ p = sp;
+ while (p < stack_end) {
+ p += checkStackFrame( p );
+ }
+ // ASSERT( p == stack_end ); -- HWL
+}
+
+static void
+checkPAP (StgClosure *fun, StgClosure** payload, StgWord n_args)
+{
+ StgClosure *p;
+ StgFunInfoTable *fun_info;
+
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(fun));
+ fun_info = get_fun_itbl(fun);
+
+ p = (StgClosure *)payload;
+ switch (fun_info->f.fun_type) {
+ case ARG_GEN:
+ checkSmallBitmap( (StgPtr)payload,
+ BITMAP_BITS(fun_info->f.b.bitmap), n_args );
+ break;
+ case ARG_GEN_BIG:
+ checkLargeBitmap( (StgPtr)payload,
+ GET_FUN_LARGE_BITMAP(fun_info),
+ n_args );
+ break;
+ case ARG_BCO:
+ checkLargeBitmap( (StgPtr)payload,
+ BCO_BITMAP(fun),
+ n_args );
+ break;
+ default:
+ checkSmallBitmap( (StgPtr)payload,
+ BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]),
+ n_args );
+ break;
+ }
+}
+
+
+StgOffset
+checkClosure( StgClosure* p )
+{
+ const StgInfoTable *info;
+
+ ASSERT(LOOKS_LIKE_INFO_PTR(p->header.info));
+
+ /* Is it a static closure (i.e. in the data segment)? */
+ if (!HEAP_ALLOCED(p)) {
+ ASSERT(closure_STATIC(p));
+ } else {
+ ASSERT(!closure_STATIC(p));
+ }
+
+ info = get_itbl(p);
+ switch (info->type) {
+
+ case MVAR:
+ {
+ StgMVar *mvar = (StgMVar *)p;
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(mvar->head));
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(mvar->tail));
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(mvar->value));
+#if 0
+#if defined(PAR)
+ checkBQ((StgBlockingQueueElement *)mvar->head, p);
+#else
+ checkBQ(mvar->head, p);
+#endif
+#endif
+ return sizeofW(StgMVar);
+ }
+
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ case THUNK_2_0:
+ {
+ nat i;
+ for (i = 0; i < info->layout.payload.ptrs; i++) {
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgThunk *)p)->payload[i]));
+ }
+ return thunk_sizeW_fromITBL(info);
+ }
+
+ case FUN:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_1_1:
+ case FUN_0_2:
+ case FUN_2_0:
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case CONSTR_2_0:
+ case IND_PERM:
+ case IND_OLDGEN:
+ case IND_OLDGEN_PERM:
+#ifdef TICKY_TICKY
+ case SE_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+#endif
+ case BLACKHOLE:
+ case CAF_BLACKHOLE:
+ case STABLE_NAME:
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_STATIC:
+ case CONSTR_NOCAF_STATIC:
+ case THUNK_STATIC:
+ case FUN_STATIC:
+ {
+ nat i;
+ for (i = 0; i < info->layout.payload.ptrs; i++) {
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(p->payload[i]));
+ }
+ return sizeW_fromITBL(info);
+ }
+
+ case BCO: {
+ StgBCO *bco = (StgBCO *)p;
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(bco->instrs));
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(bco->literals));
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(bco->ptrs));
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(bco->itbls));
+ return bco_sizeW(bco);
+ }
+
+ case IND_STATIC: /* (1, 0) closure */
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgIndStatic*)p)->indirectee));
+ return sizeW_fromITBL(info);
+
+ case WEAK:
+ /* deal with these specially - the info table isn't
+ * representative of the actual layout.
+ */
+ { StgWeak *w = (StgWeak *)p;
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(w->key));
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(w->value));
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(w->finalizer));
+ if (w->link) {
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(w->link));
+ }
+ return sizeW_fromITBL(info);
+ }
+
+ case THUNK_SELECTOR:
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgSelector *)p)->selectee));
+ return THUNK_SELECTOR_sizeW();
+
+ case IND:
+ {
+ /* we don't expect to see any of these after GC
+ * but they might appear during execution
+ */
+ StgInd *ind = (StgInd *)p;
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(ind->indirectee));
+ return sizeofW(StgInd);
+ }
+
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ case RET_DYN:
+ case UPDATE_FRAME:
+ case STOP_FRAME:
+ case CATCH_FRAME:
+ case ATOMICALLY_FRAME:
+ case CATCH_RETRY_FRAME:
+ case CATCH_STM_FRAME:
+ barf("checkClosure: stack frame");
+
+ case AP:
+ {
+ StgAP* ap = (StgAP *)p;
+ checkPAP (ap->fun, ap->payload, ap->n_args);
+ return ap_sizeW(ap);
+ }
+
+ case PAP:
+ {
+ StgPAP* pap = (StgPAP *)p;
+ checkPAP (pap->fun, pap->payload, pap->n_args);
+ return pap_sizeW(pap);
+ }
+
+ case AP_STACK:
+ {
+ StgAP_STACK *ap = (StgAP_STACK *)p;
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(ap->fun));
+ checkStackChunk((StgPtr)ap->payload, (StgPtr)ap->payload + ap->size);
+ return ap_stack_sizeW(ap);
+ }
+
+ case ARR_WORDS:
+ return arr_words_sizeW((StgArrWords *)p);
+
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN:
+ case MUT_ARR_PTRS_FROZEN0:
+ {
+ StgMutArrPtrs* a = (StgMutArrPtrs *)p;
+ nat i;
+ for (i = 0; i < a->ptrs; i++) {
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(a->payload[i]));
+ }
+ return mut_arr_ptrs_sizeW(a);
+ }
+
+ case TSO:
+ checkTSO((StgTSO *)p);
+ return tso_sizeW((StgTSO *)p);
+
+#if defined(PAR)
+
+ case BLOCKED_FETCH:
+ ASSERT(LOOKS_LIKE_GA(&(((StgBlockedFetch *)p)->ga)));
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR((((StgBlockedFetch *)p)->node)));
+ return sizeofW(StgBlockedFetch); // see size used in evacuate()
+
+#ifdef DIST
+ case REMOTE_REF:
+ return sizeofW(StgFetchMe);
+#endif /*DIST */
+
+ case FETCH_ME:
+ ASSERT(LOOKS_LIKE_GA(((StgFetchMe *)p)->ga));
+ return sizeofW(StgFetchMe); // see size used in evacuate()
+
+ case FETCH_ME_BQ:
+ checkBQ(((StgFetchMeBlockingQueue *)p)->blocking_queue, (StgClosure *)p);
+ return sizeofW(StgFetchMeBlockingQueue); // see size used in evacuate()
+
+ case RBH:
+ /* In an RBH the BQ may be empty (ie END_BQ_QUEUE) but not NULL */
+ ASSERT(((StgRBH *)p)->blocking_queue!=NULL);
+ if (((StgRBH *)p)->blocking_queue!=END_BQ_QUEUE)
+ checkBQ(((StgRBH *)p)->blocking_queue, p);
+ ASSERT(LOOKS_LIKE_INFO_PTR(REVERT_INFOPTR(get_itbl((StgClosure *)p))));
+ return BLACKHOLE_sizeW(); // see size used in evacuate()
+ // sizeW_fromITBL(REVERT_INFOPTR(get_itbl((StgClosure *)p)));
+
+#endif
+
+ case TVAR_WAIT_QUEUE:
+ {
+ StgTVarWaitQueue *wq = (StgTVarWaitQueue *)p;
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(wq->next_queue_entry));
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(wq->prev_queue_entry));
+ return sizeofW(StgTVarWaitQueue);
+ }
+
+ case TVAR:
+ {
+ StgTVar *tv = (StgTVar *)p;
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(tv->current_value));
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(tv->first_wait_queue_entry));
+ return sizeofW(StgTVar);
+ }
+
+ case TREC_CHUNK:
+ {
+ nat i;
+ StgTRecChunk *tc = (StgTRecChunk *)p;
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(tc->prev_chunk));
+ for (i = 0; i < tc -> next_entry_idx; i ++) {
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(tc->entries[i].tvar));
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(tc->entries[i].expected_value));
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(tc->entries[i].new_value));
+ }
+ return sizeofW(StgTRecChunk);
+ }
+
+ case TREC_HEADER:
+ {
+ StgTRecHeader *trec = (StgTRecHeader *)p;
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(trec -> enclosing_trec));
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(trec -> current_chunk));
+ return sizeofW(StgTRecHeader);
+ }
+
+
+ case EVACUATED:
+ barf("checkClosure: found EVACUATED closure %d",
+ info->type);
+ default:
+ barf("checkClosure (closure type %d)", info->type);
+ }
+}
+
+#if defined(PAR)
+
+#define PVM_PE_MASK 0xfffc0000
+#define MAX_PVM_PES MAX_PES
+#define MAX_PVM_TIDS MAX_PES
+#define MAX_SLOTS 100000
+
+rtsBool
+looks_like_tid(StgInt tid)
+{
+ StgInt hi = (tid & PVM_PE_MASK) >> 18;
+ StgInt lo = (tid & ~PVM_PE_MASK);
+ rtsBool ok = (hi != 0) && (lo < MAX_PVM_TIDS) && (hi < MAX_PVM_TIDS);
+ return ok;
+}
+
+rtsBool
+looks_like_slot(StgInt slot)
+{
+ /* if tid is known better use looks_like_ga!! */
+ rtsBool ok = slot<MAX_SLOTS;
+ // This refers only to the no. of slots on the current PE
+ // rtsBool ok = slot<=highest_slot();
+ return ok;
+}
+
+rtsBool
+looks_like_ga(globalAddr *ga)
+{
+ rtsBool is_tid = looks_like_tid((ga)->payload.gc.gtid);
+ rtsBool is_slot = ((ga)->payload.gc.gtid==mytid) ?
+ (ga)->payload.gc.slot<=highest_slot() :
+ (ga)->payload.gc.slot<MAX_SLOTS;
+ rtsBool ok = is_tid && is_slot;
+ return ok;
+}
+
+#endif
+
+
+/* -----------------------------------------------------------------------------
+ Check Heap Sanity
+
+ After garbage collection, the live heap is in a state where we can
+ run through and check that all the pointers point to the right
+ place. This function starts at a given position and sanity-checks
+ all the objects in the remainder of the chain.
+ -------------------------------------------------------------------------- */
+
+void
+checkHeap(bdescr *bd)
+{
+ StgPtr p;
+
+#if defined(THREADED_RTS)
+ // heap sanity checking doesn't work with SMP, because we can't
+ // zero the slop (see Updates.h).
+ return;
+#endif
+
+ for (; bd != NULL; bd = bd->link) {
+ p = bd->start;
+ while (p < bd->free) {
+ nat size = checkClosure((StgClosure *)p);
+ /* This is the smallest size of closure that can live in the heap */
+ ASSERT( size >= MIN_PAYLOAD_SIZE + sizeofW(StgHeader) );
+ p += size;
+
+ /* skip over slop */
+ while (p < bd->free &&
+ (*p < 0x1000 || !LOOKS_LIKE_INFO_PTR((void*)*p))) { p++; }
+ }
+ }
+}
+
+#if defined(PAR)
+/*
+ Check heap between start and end. Used after unpacking graphs.
+*/
+void
+checkHeapChunk(StgPtr start, StgPtr end)
+{
+ extern globalAddr *LAGAlookup(StgClosure *addr);
+ StgPtr p;
+ nat size;
+
+ for (p=start; p<end; p+=size) {
+ ASSERT(LOOKS_LIKE_INFO_PTR((void*)*p));
+ if (get_itbl((StgClosure*)p)->type == FETCH_ME &&
+ *(p+1) == 0x0000eeee /* ie. unpack garbage (see SetGAandCommonUp) */) {
+ /* if it's a FM created during unpack and commoned up, it's not global */
+ ASSERT(LAGAlookup((StgClosure*)p)==NULL);
+ size = sizeofW(StgFetchMe);
+ } else if (get_itbl((StgClosure*)p)->type == IND) {
+ *(p+2) = 0x0000ee11; /* mark slop in IND as garbage */
+ size = sizeofW(StgInd);
+ } else {
+ size = checkClosure((StgClosure *)p);
+ /* This is the smallest size of closure that can live in the heap. */
+ ASSERT( size >= MIN_PAYLOAD_SIZE + sizeofW(StgHeader) );
+ }
+ }
+}
+#else /* !PAR */
+void
+checkHeapChunk(StgPtr start, StgPtr end)
+{
+ StgPtr p;
+ nat size;
+
+ for (p=start; p<end; p+=size) {
+ ASSERT(LOOKS_LIKE_INFO_PTR((void*)*p));
+ size = checkClosure((StgClosure *)p);
+ /* This is the smallest size of closure that can live in the heap. */
+ ASSERT( size >= MIN_PAYLOAD_SIZE + sizeofW(StgHeader) );
+ }
+}
+#endif
+
+void
+checkChain(bdescr *bd)
+{
+ while (bd != NULL) {
+ checkClosure((StgClosure *)bd->start);
+ bd = bd->link;
+ }
+}
+
+void
+checkTSO(StgTSO *tso)
+{
+ StgPtr sp = tso->sp;
+ StgPtr stack = tso->stack;
+ StgOffset stack_size = tso->stack_size;
+ StgPtr stack_end = stack + stack_size;
+
+ if (tso->what_next == ThreadRelocated) {
+ checkTSO(tso->link);
+ return;
+ }
+
+ if (tso->what_next == ThreadKilled) {
+ /* The garbage collector doesn't bother following any pointers
+ * from dead threads, so don't check sanity here.
+ */
+ return;
+ }
+
+ ASSERT(stack <= sp && sp < stack_end);
+
+#if defined(PAR)
+ ASSERT(tso->par.magic==TSO_MAGIC);
+
+ switch (tso->why_blocked) {
+ case BlockedOnGA:
+ checkClosureShallow(tso->block_info.closure);
+ ASSERT(/* Can't be a FETCH_ME because *this* closure is on its BQ */
+ get_itbl(tso->block_info.closure)->type==FETCH_ME_BQ);
+ break;
+ case BlockedOnGA_NoSend:
+ checkClosureShallow(tso->block_info.closure);
+ ASSERT(get_itbl(tso->block_info.closure)->type==FETCH_ME_BQ);
+ break;
+ case BlockedOnBlackHole:
+ checkClosureShallow(tso->block_info.closure);
+ ASSERT(get_itbl(tso->block_info.closure)->type==BLACKHOLE ||
+ get_itbl(tso->block_info.closure)->type==RBH);
+ break;
+ case BlockedOnRead:
+ case BlockedOnWrite:
+ case BlockedOnDelay:
+#if defined(mingw32_HOST_OS)
+ case BlockedOnDoProc:
+#endif
+ /* isOnBQ(blocked_queue) */
+ break;
+ case BlockedOnException:
+ /* isOnSomeBQ(tso) */
+ ASSERT(get_itbl(tso->block_info.tso)->type==TSO);
+ break;
+ case BlockedOnMVar:
+ ASSERT(get_itbl(tso->block_info.closure)->type==MVAR);
+ break;
+ case BlockedOnSTM:
+ ASSERT(tso->block_info.closure == END_TSO_QUEUE);
+ break;
+ default:
+ /*
+ Could check other values of why_blocked but I am more
+ lazy than paranoid (bad combination) -- HWL
+ */
+ }
+
+ /* if the link field is non-nil it most point to one of these
+ three closure types */
+ ASSERT(tso->link == END_TSO_QUEUE ||
+ get_itbl(tso->link)->type == TSO ||
+ get_itbl(tso->link)->type == BLOCKED_FETCH ||
+ get_itbl(tso->link)->type == CONSTR);
+#endif
+
+ checkStackChunk(sp, stack_end);
+}
+
+#if defined(GRAN)
+void
+checkTSOsSanity(void) {
+ nat i, tsos;
+ StgTSO *tso;
+
+ debugBelch("Checking sanity of all runnable TSOs:");
+
+ for (i=0, tsos=0; i<RtsFlags.GranFlags.proc; i++) {
+ for (tso=run_queue_hds[i]; tso!=END_TSO_QUEUE; tso=tso->link) {
+ debugBelch("TSO %p on PE %d ...", tso, i);
+ checkTSO(tso);
+ debugBelch("OK, ");
+ tsos++;
+ }
+ }
+
+ debugBelch(" checked %d TSOs on %d PEs; ok\n", tsos, RtsFlags.GranFlags.proc);
+}
+
+
+// still GRAN only
+
+rtsBool
+checkThreadQSanity (PEs proc, rtsBool check_TSO_too)
+{
+ StgTSO *tso, *prev;
+
+ /* the NIL value for TSOs is END_TSO_QUEUE; thus, finding NULL is an error */
+ ASSERT(run_queue_hds[proc]!=NULL);
+ ASSERT(run_queue_tls[proc]!=NULL);
+ /* if either head or tail is NIL then the other one must be NIL, too */
+ ASSERT(run_queue_hds[proc]!=END_TSO_QUEUE || run_queue_tls[proc]==END_TSO_QUEUE);
+ ASSERT(run_queue_tls[proc]!=END_TSO_QUEUE || run_queue_hds[proc]==END_TSO_QUEUE);
+ for (tso=run_queue_hds[proc], prev=END_TSO_QUEUE;
+ tso!=END_TSO_QUEUE;
+ prev=tso, tso=tso->link) {
+ ASSERT((prev!=END_TSO_QUEUE || tso==run_queue_hds[proc]) &&
+ (prev==END_TSO_QUEUE || prev->link==tso));
+ if (check_TSO_too)
+ checkTSO(tso);
+ }
+ ASSERT(prev==run_queue_tls[proc]);
+}
+
+rtsBool
+checkThreadQsSanity (rtsBool check_TSO_too)
+{
+ PEs p;
+
+ for (p=0; p<RtsFlags.GranFlags.proc; p++)
+ checkThreadQSanity(p, check_TSO_too);
+}
+#endif /* GRAN */
+
+/*
+ Check that all TSOs have been evacuated.
+ Optionally also check the sanity of the TSOs.
+*/
+void
+checkGlobalTSOList (rtsBool checkTSOs)
+{
+ extern StgTSO *all_threads;
+ StgTSO *tso;
+ for (tso=all_threads; tso != END_TSO_QUEUE; tso = tso->global_link) {
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso));
+ ASSERT(get_itbl(tso)->type == TSO);
+ if (checkTSOs)
+ checkTSO(tso);
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Check mutable list sanity.
+ -------------------------------------------------------------------------- */
+
+void
+checkMutableList( bdescr *mut_bd, nat gen )
+{
+ bdescr *bd;
+ StgPtr q;
+ StgClosure *p;
+
+ for (bd = mut_bd; bd != NULL; bd = bd->link) {
+ for (q = bd->start; q < bd->free; q++) {
+ p = (StgClosure *)*q;
+ ASSERT(!HEAP_ALLOCED(p) || Bdescr((P_)p)->gen_no == gen);
+ }
+ }
+}
+
+/*
+ Check the static objects list.
+*/
+void
+checkStaticObjects ( StgClosure* static_objects )
+{
+ StgClosure *p = static_objects;
+ StgInfoTable *info;
+
+ while (p != END_OF_STATIC_LIST) {
+ checkClosure(p);
+ info = get_itbl(p);
+ switch (info->type) {
+ case IND_STATIC:
+ {
+ StgClosure *indirectee = ((StgIndStatic *)p)->indirectee;
+
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(indirectee));
+ ASSERT(LOOKS_LIKE_INFO_PTR(indirectee->header.info));
+ p = *IND_STATIC_LINK((StgClosure *)p);
+ break;
+ }
+
+ case THUNK_STATIC:
+ p = *THUNK_STATIC_LINK((StgClosure *)p);
+ break;
+
+ case FUN_STATIC:
+ p = *FUN_STATIC_LINK((StgClosure *)p);
+ break;
+
+ case CONSTR_STATIC:
+ p = *STATIC_LINK(info,(StgClosure *)p);
+ break;
+
+ default:
+ barf("checkStaticObjetcs: strange closure %p (%s)",
+ p, info_type(p));
+ }
+ }
+}
+
+/*
+ Check the sanity of a blocking queue starting at bqe with closure being
+ the closure holding the blocking queue.
+ Note that in GUM we can have several different closure types in a
+ blocking queue
+*/
+#if defined(PAR)
+void
+checkBQ (StgBlockingQueueElement *bqe, StgClosure *closure)
+{
+ rtsBool end = rtsFalse;
+ StgInfoTable *info = get_itbl(closure);
+
+ ASSERT(info->type == MVAR || info->type == FETCH_ME_BQ || info->type == RBH);
+
+ do {
+ switch (get_itbl(bqe)->type) {
+ case BLOCKED_FETCH:
+ case TSO:
+ checkClosure((StgClosure *)bqe);
+ bqe = bqe->link;
+ end = (bqe==END_BQ_QUEUE);
+ break;
+
+ case CONSTR:
+ checkClosure((StgClosure *)bqe);
+ end = rtsTrue;
+ break;
+
+ default:
+ barf("checkBQ: strange closure %d in blocking queue for closure %p (%s)\n",
+ get_itbl(bqe)->type, closure, info_type(closure));
+ }
+ } while (!end);
+}
+#elif defined(GRAN)
+void
+checkBQ (StgTSO *bqe, StgClosure *closure)
+{
+ rtsBool end = rtsFalse;
+ StgInfoTable *info = get_itbl(closure);
+
+ ASSERT(info->type == MVAR);
+
+ do {
+ switch (get_itbl(bqe)->type) {
+ case BLOCKED_FETCH:
+ case TSO:
+ checkClosure((StgClosure *)bqe);
+ bqe = bqe->link;
+ end = (bqe==END_BQ_QUEUE);
+ break;
+
+ default:
+ barf("checkBQ: strange closure %d in blocking queue for closure %p (%s)\n",
+ get_itbl(bqe)->type, closure, info_type(closure));
+ }
+ } while (!end);
+}
+#endif
+
+
+
+/*
+ This routine checks the sanity of the LAGA and GALA tables. They are
+ implemented as lists through one hash table, LAtoGALAtable, because entries
+ in both tables have the same structure:
+ - the LAGA table maps local addresses to global addresses; it starts
+ with liveIndirections
+ - the GALA table maps global addresses to local addresses; it starts
+ with liveRemoteGAs
+*/
+
+#if defined(PAR)
+#include "Hash.h"
+
+/* hidden in parallel/Global.c; only accessed for testing here */
+extern GALA *liveIndirections;
+extern GALA *liveRemoteGAs;
+extern HashTable *LAtoGALAtable;
+
+void
+checkLAGAtable(rtsBool check_closures)
+{
+ GALA *gala, *gala0;
+ nat n=0, m=0; // debugging
+
+ for (gala = liveIndirections; gala != NULL; gala = gala->next) {
+ n++;
+ gala0 = lookupHashTable(LAtoGALAtable, (StgWord) gala->la);
+ ASSERT(!gala->preferred || gala == gala0);
+ ASSERT(LOOKS_LIKE_INFO_PTR(((StgClosure *)gala->la)->header.info));
+ ASSERT(gala->next!=gala); // detect direct loops
+ if ( check_closures ) {
+ checkClosure((StgClosure *)gala->la);
+ }
+ }
+
+ for (gala = liveRemoteGAs; gala != NULL; gala = gala->next) {
+ m++;
+ gala0 = lookupHashTable(LAtoGALAtable, (StgWord) gala->la);
+ ASSERT(!gala->preferred || gala == gala0);
+ ASSERT(LOOKS_LIKE_INFO_PTR(((StgClosure *)gala->la)->header.info));
+ ASSERT(gala->next!=gala); // detect direct loops
+ /*
+ if ( check_closures ) {
+ checkClosure((StgClosure *)gala->la);
+ }
+ */
+ }
+}
+#endif
+
+#endif /* DEBUG */
diff --git a/rts/Sanity.h b/rts/Sanity.h
new file mode 100644
index 0000000000..8cf3f9e52e
--- /dev/null
+++ b/rts/Sanity.h
@@ -0,0 +1,56 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-1999
+ *
+ * Prototypes for functions in Sanity.c
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef SANITY_H
+
+#ifdef DEBUG
+
+# if defined(PAR)
+# define PVM_PE_MASK 0xfffc0000
+# define MAX_PVM_PES MAX_PES
+# define MAX_PVM_TIDS MAX_PES
+# define MAX_SLOTS 100000
+# endif
+
+/* debugging routines */
+extern void checkHeap ( bdescr *bd );
+extern void checkHeapChunk ( StgPtr start, StgPtr end );
+extern void checkChain ( bdescr *bd );
+extern void checkTSO ( StgTSO* tso );
+extern void checkGlobalTSOList ( rtsBool checkTSOs );
+extern void checkStaticObjects ( StgClosure* static_objects );
+extern void checkStackChunk ( StgPtr sp, StgPtr stack_end );
+extern StgOffset checkStackFrame ( StgPtr sp );
+extern StgOffset checkClosure ( StgClosure* p );
+
+extern void checkMutableList ( bdescr *bd, nat gen );
+
+#if defined(GRAN)
+extern void checkTSOsSanity(void);
+extern rtsBool checkThreadQSanity (PEs proc, rtsBool check_TSO_too);
+extern rtsBool checkThreadQsSanity (rtsBool check_TSO_too);
+#endif
+
+#if defined(PAR)
+extern void checkBQ (StgBlockingQueueElement *bqe, StgClosure *closure);
+#else
+extern void checkBQ (StgTSO *bqe, StgClosure *closure);
+#endif
+
+#if defined(PAR)
+extern void checkLAGAtable(rtsBool check_closures);
+extern void checkHeapChunk(StgPtr start, StgPtr end);
+#endif
+
+/* test whether an object is already on update list */
+extern rtsBool isBlackhole( StgTSO* tso, StgClosure* p );
+
+#endif /* DEBUG */
+
+#endif /* SANITY_H */
+
diff --git a/rts/Schedule.c b/rts/Schedule.c
new file mode 100644
index 0000000000..52fd4d5df6
--- /dev/null
+++ b/rts/Schedule.c
@@ -0,0 +1,4589 @@
+/* ---------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * The scheduler and thread-related functionality
+ *
+ * --------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "SchedAPI.h"
+#include "RtsUtils.h"
+#include "RtsFlags.h"
+#include "BlockAlloc.h"
+#include "OSThreads.h"
+#include "Storage.h"
+#include "StgRun.h"
+#include "Hooks.h"
+#include "Schedule.h"
+#include "StgMiscClosures.h"
+#include "Interpreter.h"
+#include "Exception.h"
+#include "Printer.h"
+#include "RtsSignals.h"
+#include "Sanity.h"
+#include "Stats.h"
+#include "STM.h"
+#include "Timer.h"
+#include "Prelude.h"
+#include "ThreadLabels.h"
+#include "LdvProfile.h"
+#include "Updates.h"
+#ifdef PROFILING
+#include "Proftimer.h"
+#include "ProfHeap.h"
+#endif
+#if defined(GRAN) || defined(PARALLEL_HASKELL)
+# include "GranSimRts.h"
+# include "GranSim.h"
+# include "ParallelRts.h"
+# include "Parallel.h"
+# include "ParallelDebug.h"
+# include "FetchMe.h"
+# include "HLC.h"
+#endif
+#include "Sparks.h"
+#include "Capability.h"
+#include "Task.h"
+#include "AwaitEvent.h"
+#if defined(mingw32_HOST_OS)
+#include "win32/IOManager.h"
+#endif
+
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#ifdef HAVE_ERRNO_H
+#include <errno.h>
+#endif
+
+// Turn off inlining when debugging - it obfuscates things
+#ifdef DEBUG
+# undef STATIC_INLINE
+# define STATIC_INLINE static
+#endif
+
+/* -----------------------------------------------------------------------------
+ * Global variables
+ * -------------------------------------------------------------------------- */
+
+#if defined(GRAN)
+
+StgTSO* ActiveTSO = NULL; /* for assigning system costs; GranSim-Light only */
+/* rtsTime TimeOfNextEvent, EndOfTimeSlice; now in GranSim.c */
+
+/*
+ In GranSim we have a runnable and a blocked queue for each processor.
+ In order to minimise code changes new arrays run_queue_hds/tls
+ are created. run_queue_hd is then a short cut (macro) for
+ run_queue_hds[CurrentProc] (see GranSim.h).
+ -- HWL
+*/
+StgTSO *run_queue_hds[MAX_PROC], *run_queue_tls[MAX_PROC];
+StgTSO *blocked_queue_hds[MAX_PROC], *blocked_queue_tls[MAX_PROC];
+StgTSO *ccalling_threadss[MAX_PROC];
+/* We use the same global list of threads (all_threads) in GranSim as in
+ the std RTS (i.e. we are cheating). However, we don't use this list in
+ the GranSim specific code at the moment (so we are only potentially
+ cheating). */
+
+#else /* !GRAN */
+
+#if !defined(THREADED_RTS)
+// Blocked/sleeping thrads
+StgTSO *blocked_queue_hd = NULL;
+StgTSO *blocked_queue_tl = NULL;
+StgTSO *sleeping_queue = NULL; // perhaps replace with a hash table?
+#endif
+
+/* Threads blocked on blackholes.
+ * LOCK: sched_mutex+capability, or all capabilities
+ */
+StgTSO *blackhole_queue = NULL;
+#endif
+
+/* The blackhole_queue should be checked for threads to wake up. See
+ * Schedule.h for more thorough comment.
+ * LOCK: none (doesn't matter if we miss an update)
+ */
+rtsBool blackholes_need_checking = rtsFalse;
+
+/* Linked list of all threads.
+ * Used for detecting garbage collected threads.
+ * LOCK: sched_mutex+capability, or all capabilities
+ */
+StgTSO *all_threads = NULL;
+
+/* flag set by signal handler to precipitate a context switch
+ * LOCK: none (just an advisory flag)
+ */
+int context_switch = 0;
+
+/* flag that tracks whether we have done any execution in this time slice.
+ * LOCK: currently none, perhaps we should lock (but needs to be
+ * updated in the fast path of the scheduler).
+ */
+nat recent_activity = ACTIVITY_YES;
+
+/* if this flag is set as well, give up execution
+ * LOCK: none (changes once, from false->true)
+ */
+rtsBool sched_state = SCHED_RUNNING;
+
+/* Next thread ID to allocate.
+ * LOCK: sched_mutex
+ */
+static StgThreadID next_thread_id = 1;
+
+/* The smallest stack size that makes any sense is:
+ * RESERVED_STACK_WORDS (so we can get back from the stack overflow)
+ * + sizeofW(StgStopFrame) (the stg_stop_thread_info frame)
+ * + 1 (the closure to enter)
+ * + 1 (stg_ap_v_ret)
+ * + 1 (spare slot req'd by stg_ap_v_ret)
+ *
+ * A thread with this stack will bomb immediately with a stack
+ * overflow, which will increase its stack size.
+ */
+#define MIN_STACK_WORDS (RESERVED_STACK_WORDS + sizeofW(StgStopFrame) + 3)
+
+#if defined(GRAN)
+StgTSO *CurrentTSO;
+#endif
+
+/* This is used in `TSO.h' and gcc 2.96 insists that this variable actually
+ * exists - earlier gccs apparently didn't.
+ * -= chak
+ */
+StgTSO dummy_tso;
+
+/*
+ * Set to TRUE when entering a shutdown state (via shutdownHaskellAndExit()) --
+ * in an MT setting, needed to signal that a worker thread shouldn't hang around
+ * in the scheduler when it is out of work.
+ */
+rtsBool shutting_down_scheduler = rtsFalse;
+
+/*
+ * This mutex protects most of the global scheduler data in
+ * the THREADED_RTS runtime.
+ */
+#if defined(THREADED_RTS)
+Mutex sched_mutex;
+#endif
+
+#if defined(PARALLEL_HASKELL)
+StgTSO *LastTSO;
+rtsTime TimeOfLastYield;
+rtsBool emitSchedule = rtsTrue;
+#endif
+
+/* -----------------------------------------------------------------------------
+ * static function prototypes
+ * -------------------------------------------------------------------------- */
+
+static Capability *schedule (Capability *initialCapability, Task *task);
+
+//
+// These function all encapsulate parts of the scheduler loop, and are
+// abstracted only to make the structure and control flow of the
+// scheduler clearer.
+//
+static void schedulePreLoop (void);
+#if defined(THREADED_RTS)
+static void schedulePushWork(Capability *cap, Task *task);
+#endif
+static void scheduleStartSignalHandlers (Capability *cap);
+static void scheduleCheckBlockedThreads (Capability *cap);
+static void scheduleCheckWakeupThreads(Capability *cap USED_IF_NOT_THREADS);
+static void scheduleCheckBlackHoles (Capability *cap);
+static void scheduleDetectDeadlock (Capability *cap, Task *task);
+#if defined(GRAN)
+static StgTSO *scheduleProcessEvent(rtsEvent *event);
+#endif
+#if defined(PARALLEL_HASKELL)
+static StgTSO *scheduleSendPendingMessages(void);
+static void scheduleActivateSpark(void);
+static rtsBool scheduleGetRemoteWork(rtsBool *receivedFinish);
+#endif
+#if defined(PAR) || defined(GRAN)
+static void scheduleGranParReport(void);
+#endif
+static void schedulePostRunThread(void);
+static rtsBool scheduleHandleHeapOverflow( Capability *cap, StgTSO *t );
+static void scheduleHandleStackOverflow( Capability *cap, Task *task,
+ StgTSO *t);
+static rtsBool scheduleHandleYield( Capability *cap, StgTSO *t,
+ nat prev_what_next );
+static void scheduleHandleThreadBlocked( StgTSO *t );
+static rtsBool scheduleHandleThreadFinished( Capability *cap, Task *task,
+ StgTSO *t );
+static rtsBool scheduleDoHeapProfile(rtsBool ready_to_gc);
+static Capability *scheduleDoGC(Capability *cap, Task *task,
+ rtsBool force_major,
+ void (*get_roots)(evac_fn));
+
+static void unblockThread(Capability *cap, StgTSO *tso);
+static rtsBool checkBlackHoles(Capability *cap);
+static void AllRoots(evac_fn evac);
+
+static StgTSO *threadStackOverflow(Capability *cap, StgTSO *tso);
+
+static void raiseAsync_(Capability *cap, StgTSO *tso, StgClosure *exception,
+ rtsBool stop_at_atomically, StgPtr stop_here);
+
+static void deleteThread (Capability *cap, StgTSO *tso);
+static void deleteAllThreads (Capability *cap);
+
+#ifdef DEBUG
+static void printThreadBlockage(StgTSO *tso);
+static void printThreadStatus(StgTSO *tso);
+void printThreadQueue(StgTSO *tso);
+#endif
+
+#if defined(PARALLEL_HASKELL)
+StgTSO * createSparkThread(rtsSpark spark);
+StgTSO * activateSpark (rtsSpark spark);
+#endif
+
+#ifdef DEBUG
+static char *whatNext_strs[] = {
+ "(unknown)",
+ "ThreadRunGHC",
+ "ThreadInterpret",
+ "ThreadKilled",
+ "ThreadRelocated",
+ "ThreadComplete"
+};
+#endif
+
+/* -----------------------------------------------------------------------------
+ * Putting a thread on the run queue: different scheduling policies
+ * -------------------------------------------------------------------------- */
+
+STATIC_INLINE void
+addToRunQueue( Capability *cap, StgTSO *t )
+{
+#if defined(PARALLEL_HASKELL)
+ if (RtsFlags.ParFlags.doFairScheduling) {
+ // this does round-robin scheduling; good for concurrency
+ appendToRunQueue(cap,t);
+ } else {
+ // this does unfair scheduling; good for parallelism
+ pushOnRunQueue(cap,t);
+ }
+#else
+ // this does round-robin scheduling; good for concurrency
+ appendToRunQueue(cap,t);
+#endif
+}
+
+/* ---------------------------------------------------------------------------
+ Main scheduling loop.
+
+ We use round-robin scheduling, each thread returning to the
+ scheduler loop when one of these conditions is detected:
+
+ * out of heap space
+ * timer expires (thread yields)
+ * thread blocks
+ * thread ends
+ * stack overflow
+
+ GRAN version:
+ In a GranSim setup this loop iterates over the global event queue.
+ This revolves around the global event queue, which determines what
+ to do next. Therefore, it's more complicated than either the
+ concurrent or the parallel (GUM) setup.
+
+ GUM version:
+ GUM iterates over incoming messages.
+ It starts with nothing to do (thus CurrentTSO == END_TSO_QUEUE),
+ and sends out a fish whenever it has nothing to do; in-between
+ doing the actual reductions (shared code below) it processes the
+ incoming messages and deals with delayed operations
+ (see PendingFetches).
+ This is not the ugliest code you could imagine, but it's bloody close.
+
+ ------------------------------------------------------------------------ */
+
+static Capability *
+schedule (Capability *initialCapability, Task *task)
+{
+ StgTSO *t;
+ Capability *cap;
+ StgThreadReturnCode ret;
+#if defined(GRAN)
+ rtsEvent *event;
+#elif defined(PARALLEL_HASKELL)
+ StgTSO *tso;
+ GlobalTaskId pe;
+ rtsBool receivedFinish = rtsFalse;
+# if defined(DEBUG)
+ nat tp_size, sp_size; // stats only
+# endif
+#endif
+ nat prev_what_next;
+ rtsBool ready_to_gc;
+#if defined(THREADED_RTS)
+ rtsBool first = rtsTrue;
+#endif
+
+ cap = initialCapability;
+
+ // Pre-condition: this task owns initialCapability.
+ // The sched_mutex is *NOT* held
+ // NB. on return, we still hold a capability.
+
+ IF_DEBUG(scheduler,
+ sched_belch("### NEW SCHEDULER LOOP (task: %p, cap: %p)",
+ task, initialCapability);
+ );
+
+ schedulePreLoop();
+
+ // -----------------------------------------------------------
+ // Scheduler loop starts here:
+
+#if defined(PARALLEL_HASKELL)
+#define TERMINATION_CONDITION (!receivedFinish)
+#elif defined(GRAN)
+#define TERMINATION_CONDITION ((event = get_next_event()) != (rtsEvent*)NULL)
+#else
+#define TERMINATION_CONDITION rtsTrue
+#endif
+
+ while (TERMINATION_CONDITION) {
+
+#if defined(GRAN)
+ /* Choose the processor with the next event */
+ CurrentProc = event->proc;
+ CurrentTSO = event->tso;
+#endif
+
+#if defined(THREADED_RTS)
+ if (first) {
+ // don't yield the first time, we want a chance to run this
+ // thread for a bit, even if there are others banging at the
+ // door.
+ first = rtsFalse;
+ ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
+ } else {
+ // Yield the capability to higher-priority tasks if necessary.
+ yieldCapability(&cap, task);
+ }
+#endif
+
+#if defined(THREADED_RTS)
+ schedulePushWork(cap,task);
+#endif
+
+ // Check whether we have re-entered the RTS from Haskell without
+ // going via suspendThread()/resumeThread (i.e. a 'safe' foreign
+ // call).
+ if (cap->in_haskell) {
+ errorBelch("schedule: re-entered unsafely.\n"
+ " Perhaps a 'foreign import unsafe' should be 'safe'?");
+ stg_exit(EXIT_FAILURE);
+ }
+
+ // The interruption / shutdown sequence.
+ //
+ // In order to cleanly shut down the runtime, we want to:
+ // * make sure that all main threads return to their callers
+ // with the state 'Interrupted'.
+ // * clean up all OS threads assocated with the runtime
+ // * free all memory etc.
+ //
+ // So the sequence for ^C goes like this:
+ //
+ // * ^C handler sets sched_state := SCHED_INTERRUPTING and
+ // arranges for some Capability to wake up
+ //
+ // * all threads in the system are halted, and the zombies are
+ // placed on the run queue for cleaning up. We acquire all
+ // the capabilities in order to delete the threads, this is
+ // done by scheduleDoGC() for convenience (because GC already
+ // needs to acquire all the capabilities). We can't kill
+ // threads involved in foreign calls.
+ //
+ // * sched_state := SCHED_INTERRUPTED
+ //
+ // * somebody calls shutdownHaskell(), which calls exitScheduler()
+ //
+ // * sched_state := SCHED_SHUTTING_DOWN
+ //
+ // * all workers exit when the run queue on their capability
+ // drains. All main threads will also exit when their TSO
+ // reaches the head of the run queue and they can return.
+ //
+ // * eventually all Capabilities will shut down, and the RTS can
+ // exit.
+ //
+ // * We might be left with threads blocked in foreign calls,
+ // we should really attempt to kill these somehow (TODO);
+
+ switch (sched_state) {
+ case SCHED_RUNNING:
+ break;
+ case SCHED_INTERRUPTING:
+ IF_DEBUG(scheduler, sched_belch("SCHED_INTERRUPTING"));
+#if defined(THREADED_RTS)
+ discardSparksCap(cap);
+#endif
+ /* scheduleDoGC() deletes all the threads */
+ cap = scheduleDoGC(cap,task,rtsFalse,GetRoots);
+ break;
+ case SCHED_INTERRUPTED:
+ IF_DEBUG(scheduler, sched_belch("SCHED_INTERRUPTED"));
+ break;
+ case SCHED_SHUTTING_DOWN:
+ IF_DEBUG(scheduler, sched_belch("SCHED_SHUTTING_DOWN"));
+ // If we are a worker, just exit. If we're a bound thread
+ // then we will exit below when we've removed our TSO from
+ // the run queue.
+ if (task->tso == NULL && emptyRunQueue(cap)) {
+ return cap;
+ }
+ break;
+ default:
+ barf("sched_state: %d", sched_state);
+ }
+
+#if defined(THREADED_RTS)
+ // If the run queue is empty, take a spark and turn it into a thread.
+ {
+ if (emptyRunQueue(cap)) {
+ StgClosure *spark;
+ spark = findSpark(cap);
+ if (spark != NULL) {
+ IF_DEBUG(scheduler,
+ sched_belch("turning spark of closure %p into a thread",
+ (StgClosure *)spark));
+ createSparkThread(cap,spark);
+ }
+ }
+ }
+#endif // THREADED_RTS
+
+ scheduleStartSignalHandlers(cap);
+
+ // Only check the black holes here if we've nothing else to do.
+ // During normal execution, the black hole list only gets checked
+ // at GC time, to avoid repeatedly traversing this possibly long
+ // list each time around the scheduler.
+ if (emptyRunQueue(cap)) { scheduleCheckBlackHoles(cap); }
+
+ scheduleCheckWakeupThreads(cap);
+
+ scheduleCheckBlockedThreads(cap);
+
+ scheduleDetectDeadlock(cap,task);
+#if defined(THREADED_RTS)
+ cap = task->cap; // reload cap, it might have changed
+#endif
+
+ // Normally, the only way we can get here with no threads to
+ // run is if a keyboard interrupt received during
+ // scheduleCheckBlockedThreads() or scheduleDetectDeadlock().
+ // Additionally, it is not fatal for the
+ // threaded RTS to reach here with no threads to run.
+ //
+ // win32: might be here due to awaitEvent() being abandoned
+ // as a result of a console event having been delivered.
+ if ( emptyRunQueue(cap) ) {
+#if !defined(THREADED_RTS) && !defined(mingw32_HOST_OS)
+ ASSERT(sched_state >= SCHED_INTERRUPTING);
+#endif
+ continue; // nothing to do
+ }
+
+#if defined(PARALLEL_HASKELL)
+ scheduleSendPendingMessages();
+ if (emptyRunQueue(cap) && scheduleActivateSpark())
+ continue;
+
+#if defined(SPARKS)
+ ASSERT(next_fish_to_send_at==0); // i.e. no delayed fishes left!
+#endif
+
+ /* If we still have no work we need to send a FISH to get a spark
+ from another PE */
+ if (emptyRunQueue(cap)) {
+ if (!scheduleGetRemoteWork(&receivedFinish)) continue;
+ ASSERT(rtsFalse); // should not happen at the moment
+ }
+ // from here: non-empty run queue.
+ // TODO: merge above case with this, only one call processMessages() !
+ if (PacketsWaiting()) { /* process incoming messages, if
+ any pending... only in else
+ because getRemoteWork waits for
+ messages as well */
+ receivedFinish = processMessages();
+ }
+#endif
+
+#if defined(GRAN)
+ scheduleProcessEvent(event);
+#endif
+
+ //
+ // Get a thread to run
+ //
+ t = popRunQueue(cap);
+
+#if defined(GRAN) || defined(PAR)
+ scheduleGranParReport(); // some kind of debuging output
+#else
+ // Sanity check the thread we're about to run. This can be
+ // expensive if there is lots of thread switching going on...
+ IF_DEBUG(sanity,checkTSO(t));
+#endif
+
+#if defined(THREADED_RTS)
+ // Check whether we can run this thread in the current task.
+ // If not, we have to pass our capability to the right task.
+ {
+ Task *bound = t->bound;
+
+ if (bound) {
+ if (bound == task) {
+ IF_DEBUG(scheduler,
+ sched_belch("### Running thread %d in bound thread",
+ t->id));
+ // yes, the Haskell thread is bound to the current native thread
+ } else {
+ IF_DEBUG(scheduler,
+ sched_belch("### thread %d bound to another OS thread",
+ t->id));
+ // no, bound to a different Haskell thread: pass to that thread
+ pushOnRunQueue(cap,t);
+ continue;
+ }
+ } else {
+ // The thread we want to run is unbound.
+ if (task->tso) {
+ IF_DEBUG(scheduler,
+ sched_belch("### this OS thread cannot run thread %d", t->id));
+ // no, the current native thread is bound to a different
+ // Haskell thread, so pass it to any worker thread
+ pushOnRunQueue(cap,t);
+ continue;
+ }
+ }
+ }
+#endif
+
+ cap->r.rCurrentTSO = t;
+
+ /* context switches are initiated by the timer signal, unless
+ * the user specified "context switch as often as possible", with
+ * +RTS -C0
+ */
+ if (RtsFlags.ConcFlags.ctxtSwitchTicks == 0
+ && !emptyThreadQueues(cap)) {
+ context_switch = 1;
+ }
+
+run_thread:
+
+ IF_DEBUG(scheduler, sched_belch("-->> running thread %ld %s ...",
+ (long)t->id, whatNext_strs[t->what_next]));
+
+#if defined(PROFILING)
+ startHeapProfTimer();
+#endif
+
+ // ----------------------------------------------------------------------
+ // Run the current thread
+
+ ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
+ ASSERT(t->cap == cap);
+
+ prev_what_next = t->what_next;
+
+ errno = t->saved_errno;
+ cap->in_haskell = rtsTrue;
+
+ dirtyTSO(t);
+
+ recent_activity = ACTIVITY_YES;
+
+ switch (prev_what_next) {
+
+ case ThreadKilled:
+ case ThreadComplete:
+ /* Thread already finished, return to scheduler. */
+ ret = ThreadFinished;
+ break;
+
+ case ThreadRunGHC:
+ {
+ StgRegTable *r;
+ r = StgRun((StgFunPtr) stg_returnToStackTop, &cap->r);
+ cap = regTableToCapability(r);
+ ret = r->rRet;
+ break;
+ }
+
+ case ThreadInterpret:
+ cap = interpretBCO(cap);
+ ret = cap->r.rRet;
+ break;
+
+ default:
+ barf("schedule: invalid what_next field");
+ }
+
+ cap->in_haskell = rtsFalse;
+
+ // The TSO might have moved, eg. if it re-entered the RTS and a GC
+ // happened. So find the new location:
+ t = cap->r.rCurrentTSO;
+
+ // We have run some Haskell code: there might be blackhole-blocked
+ // threads to wake up now.
+ // Lock-free test here should be ok, we're just setting a flag.
+ if ( blackhole_queue != END_TSO_QUEUE ) {
+ blackholes_need_checking = rtsTrue;
+ }
+
+ // And save the current errno in this thread.
+ // XXX: possibly bogus for SMP because this thread might already
+ // be running again, see code below.
+ t->saved_errno = errno;
+
+#if defined(THREADED_RTS)
+ // If ret is ThreadBlocked, and this Task is bound to the TSO that
+ // blocked, we are in limbo - the TSO is now owned by whatever it
+ // is blocked on, and may in fact already have been woken up,
+ // perhaps even on a different Capability. It may be the case
+ // that task->cap != cap. We better yield this Capability
+ // immediately and return to normaility.
+ if (ret == ThreadBlocked) {
+ IF_DEBUG(scheduler,
+ sched_belch("--<< thread %d (%s) stopped: blocked\n",
+ t->id, whatNext_strs[t->what_next]));
+ continue;
+ }
+#endif
+
+ ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
+ ASSERT(t->cap == cap);
+
+ // ----------------------------------------------------------------------
+
+ // Costs for the scheduler are assigned to CCS_SYSTEM
+#if defined(PROFILING)
+ stopHeapProfTimer();
+ CCCS = CCS_SYSTEM;
+#endif
+
+#if defined(THREADED_RTS)
+ IF_DEBUG(scheduler,debugBelch("sched (task %p): ", (void *)(unsigned long)(unsigned int)osThreadId()););
+#elif !defined(GRAN) && !defined(PARALLEL_HASKELL)
+ IF_DEBUG(scheduler,debugBelch("sched: "););
+#endif
+
+ schedulePostRunThread();
+
+ ready_to_gc = rtsFalse;
+
+ switch (ret) {
+ case HeapOverflow:
+ ready_to_gc = scheduleHandleHeapOverflow(cap,t);
+ break;
+
+ case StackOverflow:
+ scheduleHandleStackOverflow(cap,task,t);
+ break;
+
+ case ThreadYielding:
+ if (scheduleHandleYield(cap, t, prev_what_next)) {
+ // shortcut for switching between compiler/interpreter:
+ goto run_thread;
+ }
+ break;
+
+ case ThreadBlocked:
+ scheduleHandleThreadBlocked(t);
+ break;
+
+ case ThreadFinished:
+ if (scheduleHandleThreadFinished(cap, task, t)) return cap;
+ ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
+ break;
+
+ default:
+ barf("schedule: invalid thread return code %d", (int)ret);
+ }
+
+ if (scheduleDoHeapProfile(ready_to_gc)) { ready_to_gc = rtsFalse; }
+ if (ready_to_gc) {
+ cap = scheduleDoGC(cap,task,rtsFalse,GetRoots);
+ }
+ } /* end of while() */
+
+ IF_PAR_DEBUG(verbose,
+ debugBelch("== Leaving schedule() after having received Finish\n"));
+}
+
+/* ----------------------------------------------------------------------------
+ * Setting up the scheduler loop
+ * ------------------------------------------------------------------------- */
+
+static void
+schedulePreLoop(void)
+{
+#if defined(GRAN)
+ /* set up first event to get things going */
+ /* ToDo: assign costs for system setup and init MainTSO ! */
+ new_event(CurrentProc, CurrentProc, CurrentTime[CurrentProc],
+ ContinueThread,
+ CurrentTSO, (StgClosure*)NULL, (rtsSpark*)NULL);
+
+ IF_DEBUG(gran,
+ debugBelch("GRAN: Init CurrentTSO (in schedule) = %p\n",
+ CurrentTSO);
+ G_TSO(CurrentTSO, 5));
+
+ if (RtsFlags.GranFlags.Light) {
+ /* Save current time; GranSim Light only */
+ CurrentTSO->gran.clock = CurrentTime[CurrentProc];
+ }
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ * schedulePushWork()
+ *
+ * Push work to other Capabilities if we have some.
+ * -------------------------------------------------------------------------- */
+
+#if defined(THREADED_RTS)
+static void
+schedulePushWork(Capability *cap USED_IF_THREADS,
+ Task *task USED_IF_THREADS)
+{
+ Capability *free_caps[n_capabilities], *cap0;
+ nat i, n_free_caps;
+
+ // migration can be turned off with +RTS -qg
+ if (!RtsFlags.ParFlags.migrate) return;
+
+ // Check whether we have more threads on our run queue, or sparks
+ // in our pool, that we could hand to another Capability.
+ if ((emptyRunQueue(cap) || cap->run_queue_hd->link == END_TSO_QUEUE)
+ && sparkPoolSizeCap(cap) < 2) {
+ return;
+ }
+
+ // First grab as many free Capabilities as we can.
+ for (i=0, n_free_caps=0; i < n_capabilities; i++) {
+ cap0 = &capabilities[i];
+ if (cap != cap0 && tryGrabCapability(cap0,task)) {
+ if (!emptyRunQueue(cap0) || cap->returning_tasks_hd != NULL) {
+ // it already has some work, we just grabbed it at
+ // the wrong moment. Or maybe it's deadlocked!
+ releaseCapability(cap0);
+ } else {
+ free_caps[n_free_caps++] = cap0;
+ }
+ }
+ }
+
+ // we now have n_free_caps free capabilities stashed in
+ // free_caps[]. Share our run queue equally with them. This is
+ // probably the simplest thing we could do; improvements we might
+ // want to do include:
+ //
+ // - giving high priority to moving relatively new threads, on
+ // the gournds that they haven't had time to build up a
+ // working set in the cache on this CPU/Capability.
+ //
+ // - giving low priority to moving long-lived threads
+
+ if (n_free_caps > 0) {
+ StgTSO *prev, *t, *next;
+ rtsBool pushed_to_all;
+
+ IF_DEBUG(scheduler, sched_belch("excess threads on run queue and %d free capabilities, sharing...", n_free_caps));
+
+ i = 0;
+ pushed_to_all = rtsFalse;
+
+ if (cap->run_queue_hd != END_TSO_QUEUE) {
+ prev = cap->run_queue_hd;
+ t = prev->link;
+ prev->link = END_TSO_QUEUE;
+ for (; t != END_TSO_QUEUE; t = next) {
+ next = t->link;
+ t->link = END_TSO_QUEUE;
+ if (t->what_next == ThreadRelocated
+ || t->bound == task // don't move my bound thread
+ || tsoLocked(t)) { // don't move a locked thread
+ prev->link = t;
+ prev = t;
+ } else if (i == n_free_caps) {
+ pushed_to_all = rtsTrue;
+ i = 0;
+ // keep one for us
+ prev->link = t;
+ prev = t;
+ } else {
+ IF_DEBUG(scheduler, sched_belch("pushing thread %d to capability %d", t->id, free_caps[i]->no));
+ appendToRunQueue(free_caps[i],t);
+ if (t->bound) { t->bound->cap = free_caps[i]; }
+ t->cap = free_caps[i];
+ i++;
+ }
+ }
+ cap->run_queue_tl = prev;
+ }
+
+ // If there are some free capabilities that we didn't push any
+ // threads to, then try to push a spark to each one.
+ if (!pushed_to_all) {
+ StgClosure *spark;
+ // i is the next free capability to push to
+ for (; i < n_free_caps; i++) {
+ if (emptySparkPoolCap(free_caps[i])) {
+ spark = findSpark(cap);
+ if (spark != NULL) {
+ IF_DEBUG(scheduler, sched_belch("pushing spark %p to capability %d", spark, free_caps[i]->no));
+ newSpark(&(free_caps[i]->r), spark);
+ }
+ }
+ }
+ }
+
+ // release the capabilities
+ for (i = 0; i < n_free_caps; i++) {
+ task->cap = free_caps[i];
+ releaseCapability(free_caps[i]);
+ }
+ }
+ task->cap = cap; // reset to point to our Capability.
+}
+#endif
+
+/* ----------------------------------------------------------------------------
+ * Start any pending signal handlers
+ * ------------------------------------------------------------------------- */
+
+#if defined(RTS_USER_SIGNALS) && (!defined(THREADED_RTS) || defined(mingw32_HOST_OS))
+static void
+scheduleStartSignalHandlers(Capability *cap)
+{
+ if (signals_pending()) { // safe outside the lock
+ startSignalHandlers(cap);
+ }
+}
+#else
+static void
+scheduleStartSignalHandlers(Capability *cap STG_UNUSED)
+{
+}
+#endif
+
+/* ----------------------------------------------------------------------------
+ * Check for blocked threads that can be woken up.
+ * ------------------------------------------------------------------------- */
+
+static void
+scheduleCheckBlockedThreads(Capability *cap USED_IF_NOT_THREADS)
+{
+#if !defined(THREADED_RTS)
+ //
+ // Check whether any waiting threads need to be woken up. If the
+ // run queue is empty, and there are no other tasks running, we
+ // can wait indefinitely for something to happen.
+ //
+ if ( !emptyQueue(blocked_queue_hd) || !emptyQueue(sleeping_queue) )
+ {
+ awaitEvent( emptyRunQueue(cap) && !blackholes_need_checking );
+ }
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------
+ * Check for threads woken up by other Capabilities
+ * ------------------------------------------------------------------------- */
+
+static void
+scheduleCheckWakeupThreads(Capability *cap USED_IF_THREADS)
+{
+#if defined(THREADED_RTS)
+ // Any threads that were woken up by other Capabilities get
+ // appended to our run queue.
+ if (!emptyWakeupQueue(cap)) {
+ ACQUIRE_LOCK(&cap->lock);
+ if (emptyRunQueue(cap)) {
+ cap->run_queue_hd = cap->wakeup_queue_hd;
+ cap->run_queue_tl = cap->wakeup_queue_tl;
+ } else {
+ cap->run_queue_tl->link = cap->wakeup_queue_hd;
+ cap->run_queue_tl = cap->wakeup_queue_tl;
+ }
+ cap->wakeup_queue_hd = cap->wakeup_queue_tl = END_TSO_QUEUE;
+ RELEASE_LOCK(&cap->lock);
+ }
+#endif
+}
+
+/* ----------------------------------------------------------------------------
+ * Check for threads blocked on BLACKHOLEs that can be woken up
+ * ------------------------------------------------------------------------- */
+static void
+scheduleCheckBlackHoles (Capability *cap)
+{
+ if ( blackholes_need_checking ) // check without the lock first
+ {
+ ACQUIRE_LOCK(&sched_mutex);
+ if ( blackholes_need_checking ) {
+ checkBlackHoles(cap);
+ blackholes_need_checking = rtsFalse;
+ }
+ RELEASE_LOCK(&sched_mutex);
+ }
+}
+
+/* ----------------------------------------------------------------------------
+ * Detect deadlock conditions and attempt to resolve them.
+ * ------------------------------------------------------------------------- */
+
+static void
+scheduleDetectDeadlock (Capability *cap, Task *task)
+{
+
+#if defined(PARALLEL_HASKELL)
+ // ToDo: add deadlock detection in GUM (similar to THREADED_RTS) -- HWL
+ return;
+#endif
+
+ /*
+ * Detect deadlock: when we have no threads to run, there are no
+ * threads blocked, waiting for I/O, or sleeping, and all the
+ * other tasks are waiting for work, we must have a deadlock of
+ * some description.
+ */
+ if ( emptyThreadQueues(cap) )
+ {
+#if defined(THREADED_RTS)
+ /*
+ * In the threaded RTS, we only check for deadlock if there
+ * has been no activity in a complete timeslice. This means
+ * we won't eagerly start a full GC just because we don't have
+ * any threads to run currently.
+ */
+ if (recent_activity != ACTIVITY_INACTIVE) return;
+#endif
+
+ IF_DEBUG(scheduler, sched_belch("deadlocked, forcing major GC..."));
+
+ // Garbage collection can release some new threads due to
+ // either (a) finalizers or (b) threads resurrected because
+ // they are unreachable and will therefore be sent an
+ // exception. Any threads thus released will be immediately
+ // runnable.
+ cap = scheduleDoGC (cap, task, rtsTrue/*force major GC*/, GetRoots);
+
+ recent_activity = ACTIVITY_DONE_GC;
+
+ if ( !emptyRunQueue(cap) ) return;
+
+#if defined(RTS_USER_SIGNALS) && (!defined(THREADED_RTS) || defined(mingw32_HOST_OS))
+ /* If we have user-installed signal handlers, then wait
+ * for signals to arrive rather then bombing out with a
+ * deadlock.
+ */
+ if ( anyUserHandlers() ) {
+ IF_DEBUG(scheduler,
+ sched_belch("still deadlocked, waiting for signals..."));
+
+ awaitUserSignals();
+
+ if (signals_pending()) {
+ startSignalHandlers(cap);
+ }
+
+ // either we have threads to run, or we were interrupted:
+ ASSERT(!emptyRunQueue(cap) || sched_state >= SCHED_INTERRUPTING);
+ }
+#endif
+
+#if !defined(THREADED_RTS)
+ /* Probably a real deadlock. Send the current main thread the
+ * Deadlock exception.
+ */
+ if (task->tso) {
+ switch (task->tso->why_blocked) {
+ case BlockedOnSTM:
+ case BlockedOnBlackHole:
+ case BlockedOnException:
+ case BlockedOnMVar:
+ raiseAsync(cap, task->tso, (StgClosure *)NonTermination_closure);
+ return;
+ default:
+ barf("deadlock: main thread blocked in a strange way");
+ }
+ }
+ return;
+#endif
+ }
+}
+
+/* ----------------------------------------------------------------------------
+ * Process an event (GRAN only)
+ * ------------------------------------------------------------------------- */
+
+#if defined(GRAN)
+static StgTSO *
+scheduleProcessEvent(rtsEvent *event)
+{
+ StgTSO *t;
+
+ if (RtsFlags.GranFlags.Light)
+ GranSimLight_enter_system(event, &ActiveTSO); // adjust ActiveTSO etc
+
+ /* adjust time based on time-stamp */
+ if (event->time > CurrentTime[CurrentProc] &&
+ event->evttype != ContinueThread)
+ CurrentTime[CurrentProc] = event->time;
+
+ /* Deal with the idle PEs (may issue FindWork or MoveSpark events) */
+ if (!RtsFlags.GranFlags.Light)
+ handleIdlePEs();
+
+ IF_DEBUG(gran, debugBelch("GRAN: switch by event-type\n"));
+
+ /* main event dispatcher in GranSim */
+ switch (event->evttype) {
+ /* Should just be continuing execution */
+ case ContinueThread:
+ IF_DEBUG(gran, debugBelch("GRAN: doing ContinueThread\n"));
+ /* ToDo: check assertion
+ ASSERT(run_queue_hd != (StgTSO*)NULL &&
+ run_queue_hd != END_TSO_QUEUE);
+ */
+ /* Ignore ContinueThreads for fetching threads (if synchr comm) */
+ if (!RtsFlags.GranFlags.DoAsyncFetch &&
+ procStatus[CurrentProc]==Fetching) {
+ debugBelch("ghuH: Spurious ContinueThread while Fetching ignored; TSO %d (%p) [PE %d]\n",
+ CurrentTSO->id, CurrentTSO, CurrentProc);
+ goto next_thread;
+ }
+ /* Ignore ContinueThreads for completed threads */
+ if (CurrentTSO->what_next == ThreadComplete) {
+ debugBelch("ghuH: found a ContinueThread event for completed thread %d (%p) [PE %d] (ignoring ContinueThread)\n",
+ CurrentTSO->id, CurrentTSO, CurrentProc);
+ goto next_thread;
+ }
+ /* Ignore ContinueThreads for threads that are being migrated */
+ if (PROCS(CurrentTSO)==Nowhere) {
+ debugBelch("ghuH: trying to run the migrating TSO %d (%p) [PE %d] (ignoring ContinueThread)\n",
+ CurrentTSO->id, CurrentTSO, CurrentProc);
+ goto next_thread;
+ }
+ /* The thread should be at the beginning of the run queue */
+ if (CurrentTSO!=run_queue_hds[CurrentProc]) {
+ debugBelch("ghuH: TSO %d (%p) [PE %d] is not at the start of the run_queue when doing a ContinueThread\n",
+ CurrentTSO->id, CurrentTSO, CurrentProc);
+ break; // run the thread anyway
+ }
+ /*
+ new_event(proc, proc, CurrentTime[proc],
+ FindWork,
+ (StgTSO*)NULL, (StgClosure*)NULL, (rtsSpark*)NULL);
+ goto next_thread;
+ */ /* Catches superfluous CONTINUEs -- should be unnecessary */
+ break; // now actually run the thread; DaH Qu'vam yImuHbej
+
+ case FetchNode:
+ do_the_fetchnode(event);
+ goto next_thread; /* handle next event in event queue */
+
+ case GlobalBlock:
+ do_the_globalblock(event);
+ goto next_thread; /* handle next event in event queue */
+
+ case FetchReply:
+ do_the_fetchreply(event);
+ goto next_thread; /* handle next event in event queue */
+
+ case UnblockThread: /* Move from the blocked queue to the tail of */
+ do_the_unblock(event);
+ goto next_thread; /* handle next event in event queue */
+
+ case ResumeThread: /* Move from the blocked queue to the tail of */
+ /* the runnable queue ( i.e. Qu' SImqa'lu') */
+ event->tso->gran.blocktime +=
+ CurrentTime[CurrentProc] - event->tso->gran.blockedat;
+ do_the_startthread(event);
+ goto next_thread; /* handle next event in event queue */
+
+ case StartThread:
+ do_the_startthread(event);
+ goto next_thread; /* handle next event in event queue */
+
+ case MoveThread:
+ do_the_movethread(event);
+ goto next_thread; /* handle next event in event queue */
+
+ case MoveSpark:
+ do_the_movespark(event);
+ goto next_thread; /* handle next event in event queue */
+
+ case FindWork:
+ do_the_findwork(event);
+ goto next_thread; /* handle next event in event queue */
+
+ default:
+ barf("Illegal event type %u\n", event->evttype);
+ } /* switch */
+
+ /* This point was scheduler_loop in the old RTS */
+
+ IF_DEBUG(gran, debugBelch("GRAN: after main switch\n"));
+
+ TimeOfLastEvent = CurrentTime[CurrentProc];
+ TimeOfNextEvent = get_time_of_next_event();
+ IgnoreEvents=(TimeOfNextEvent==0); // HWL HACK
+ // CurrentTSO = ThreadQueueHd;
+
+ IF_DEBUG(gran, debugBelch("GRAN: time of next event is: %ld\n",
+ TimeOfNextEvent));
+
+ if (RtsFlags.GranFlags.Light)
+ GranSimLight_leave_system(event, &ActiveTSO);
+
+ EndOfTimeSlice = CurrentTime[CurrentProc]+RtsFlags.GranFlags.time_slice;
+
+ IF_DEBUG(gran,
+ debugBelch("GRAN: end of time-slice is %#lx\n", EndOfTimeSlice));
+
+ /* in a GranSim setup the TSO stays on the run queue */
+ t = CurrentTSO;
+ /* Take a thread from the run queue. */
+ POP_RUN_QUEUE(t); // take_off_run_queue(t);
+
+ IF_DEBUG(gran,
+ debugBelch("GRAN: About to run current thread, which is\n");
+ G_TSO(t,5));
+
+ context_switch = 0; // turned on via GranYield, checking events and time slice
+
+ IF_DEBUG(gran,
+ DumpGranEvent(GR_SCHEDULE, t));
+
+ procStatus[CurrentProc] = Busy;
+}
+#endif // GRAN
+
+/* ----------------------------------------------------------------------------
+ * Send pending messages (PARALLEL_HASKELL only)
+ * ------------------------------------------------------------------------- */
+
+#if defined(PARALLEL_HASKELL)
+static StgTSO *
+scheduleSendPendingMessages(void)
+{
+ StgSparkPool *pool;
+ rtsSpark spark;
+ StgTSO *t;
+
+# if defined(PAR) // global Mem.Mgmt., omit for now
+ if (PendingFetches != END_BF_QUEUE) {
+ processFetches();
+ }
+# endif
+
+ if (RtsFlags.ParFlags.BufferTime) {
+ // if we use message buffering, we must send away all message
+ // packets which have become too old...
+ sendOldBuffers();
+ }
+}
+#endif
+
+/* ----------------------------------------------------------------------------
+ * Activate spark threads (PARALLEL_HASKELL only)
+ * ------------------------------------------------------------------------- */
+
+#if defined(PARALLEL_HASKELL)
+static void
+scheduleActivateSpark(void)
+{
+#if defined(SPARKS)
+ ASSERT(emptyRunQueue());
+/* We get here if the run queue is empty and want some work.
+ We try to turn a spark into a thread, and add it to the run queue,
+ from where it will be picked up in the next iteration of the scheduler
+ loop.
+*/
+
+ /* :-[ no local threads => look out for local sparks */
+ /* the spark pool for the current PE */
+ pool = &(cap.r.rSparks); // JB: cap = (old) MainCap
+ if (advisory_thread_count < RtsFlags.ParFlags.maxThreads &&
+ pool->hd < pool->tl) {
+ /*
+ * ToDo: add GC code check that we really have enough heap afterwards!!
+ * Old comment:
+ * If we're here (no runnable threads) and we have pending
+ * sparks, we must have a space problem. Get enough space
+ * to turn one of those pending sparks into a
+ * thread...
+ */
+
+ spark = findSpark(rtsFalse); /* get a spark */
+ if (spark != (rtsSpark) NULL) {
+ tso = createThreadFromSpark(spark); /* turn the spark into a thread */
+ IF_PAR_DEBUG(fish, // schedule,
+ debugBelch("==== schedule: Created TSO %d (%p); %d threads active\n",
+ tso->id, tso, advisory_thread_count));
+
+ if (tso==END_TSO_QUEUE) { /* failed to activate spark->back to loop */
+ IF_PAR_DEBUG(fish, // schedule,
+ debugBelch("==^^ failed to create thread from spark @ %lx\n",
+ spark));
+ return rtsFalse; /* failed to generate a thread */
+ } /* otherwise fall through & pick-up new tso */
+ } else {
+ IF_PAR_DEBUG(fish, // schedule,
+ debugBelch("==^^ no local sparks (spark pool contains only NFs: %d)\n",
+ spark_queue_len(pool)));
+ return rtsFalse; /* failed to generate a thread */
+ }
+ return rtsTrue; /* success in generating a thread */
+ } else { /* no more threads permitted or pool empty */
+ return rtsFalse; /* failed to generateThread */
+ }
+#else
+ tso = NULL; // avoid compiler warning only
+ return rtsFalse; /* dummy in non-PAR setup */
+#endif // SPARKS
+}
+#endif // PARALLEL_HASKELL
+
+/* ----------------------------------------------------------------------------
+ * Get work from a remote node (PARALLEL_HASKELL only)
+ * ------------------------------------------------------------------------- */
+
+#if defined(PARALLEL_HASKELL)
+static rtsBool
+scheduleGetRemoteWork(rtsBool *receivedFinish)
+{
+ ASSERT(emptyRunQueue());
+
+ if (RtsFlags.ParFlags.BufferTime) {
+ IF_PAR_DEBUG(verbose,
+ debugBelch("...send all pending data,"));
+ {
+ nat i;
+ for (i=1; i<=nPEs; i++)
+ sendImmediately(i); // send all messages away immediately
+ }
+ }
+# ifndef SPARKS
+ //++EDEN++ idle() , i.e. send all buffers, wait for work
+ // suppress fishing in EDEN... just look for incoming messages
+ // (blocking receive)
+ IF_PAR_DEBUG(verbose,
+ debugBelch("...wait for incoming messages...\n"));
+ *receivedFinish = processMessages(); // blocking receive...
+
+ // and reenter scheduling loop after having received something
+ // (return rtsFalse below)
+
+# else /* activate SPARKS machinery */
+/* We get here, if we have no work, tried to activate a local spark, but still
+ have no work. We try to get a remote spark, by sending a FISH message.
+ Thread migration should be added here, and triggered when a sequence of
+ fishes returns without work. */
+ delay = (RtsFlags.ParFlags.fishDelay!=0ll ? RtsFlags.ParFlags.fishDelay : 0ll);
+
+ /* =8-[ no local sparks => look for work on other PEs */
+ /*
+ * We really have absolutely no work. Send out a fish
+ * (there may be some out there already), and wait for
+ * something to arrive. We clearly can't run any threads
+ * until a SCHEDULE or RESUME arrives, and so that's what
+ * we're hoping to see. (Of course, we still have to
+ * respond to other types of messages.)
+ */
+ rtsTime now = msTime() /*CURRENT_TIME*/;
+ IF_PAR_DEBUG(verbose,
+ debugBelch("-- now=%ld\n", now));
+ IF_PAR_DEBUG(fish, // verbose,
+ if (outstandingFishes < RtsFlags.ParFlags.maxFishes &&
+ (last_fish_arrived_at!=0 &&
+ last_fish_arrived_at+delay > now)) {
+ debugBelch("--$$ <%llu> delaying FISH until %llu (last fish %llu, delay %llu)\n",
+ now, last_fish_arrived_at+delay,
+ last_fish_arrived_at,
+ delay);
+ });
+
+ if (outstandingFishes < RtsFlags.ParFlags.maxFishes &&
+ advisory_thread_count < RtsFlags.ParFlags.maxThreads) { // send a FISH, but when?
+ if (last_fish_arrived_at==0 ||
+ (last_fish_arrived_at+delay <= now)) { // send FISH now!
+ /* outstandingFishes is set in sendFish, processFish;
+ avoid flooding system with fishes via delay */
+ next_fish_to_send_at = 0;
+ } else {
+ /* ToDo: this should be done in the main scheduling loop to avoid the
+ busy wait here; not so bad if fish delay is very small */
+ int iq = 0; // DEBUGGING -- HWL
+ next_fish_to_send_at = last_fish_arrived_at+delay; // remember when to send
+ /* send a fish when ready, but process messages that arrive in the meantime */
+ do {
+ if (PacketsWaiting()) {
+ iq++; // DEBUGGING
+ *receivedFinish = processMessages();
+ }
+ now = msTime();
+ } while (!*receivedFinish || now<next_fish_to_send_at);
+ // JB: This means the fish could become obsolete, if we receive
+ // work. Better check for work again?
+ // last line: while (!receivedFinish || !haveWork || now<...)
+ // next line: if (receivedFinish || haveWork )
+
+ if (*receivedFinish) // no need to send a FISH if we are finishing anyway
+ return rtsFalse; // NB: this will leave scheduler loop
+ // immediately after return!
+
+ IF_PAR_DEBUG(fish, // verbose,
+ debugBelch("--$$ <%llu> sent delayed fish (%d processMessages); active/total threads=%d/%d\n",now,iq,run_queue_len(),advisory_thread_count));
+
+ }
+
+ // JB: IMHO, this should all be hidden inside sendFish(...)
+ /* pe = choosePE();
+ sendFish(pe, thisPE, NEW_FISH_AGE, NEW_FISH_HISTORY,
+ NEW_FISH_HUNGER);
+
+ // Global statistics: count no. of fishes
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_fish_mess++;
+ }
+ */
+
+ /* delayed fishes must have been sent by now! */
+ next_fish_to_send_at = 0;
+ }
+
+ *receivedFinish = processMessages();
+# endif /* SPARKS */
+
+ return rtsFalse;
+ /* NB: this function always returns rtsFalse, meaning the scheduler
+ loop continues with the next iteration;
+ rationale:
+ return code means success in finding work; we enter this function
+ if there is no local work, thus have to send a fish which takes
+ time until it arrives with work; in the meantime we should process
+ messages in the main loop;
+ */
+}
+#endif // PARALLEL_HASKELL
+
+/* ----------------------------------------------------------------------------
+ * PAR/GRAN: Report stats & debugging info(?)
+ * ------------------------------------------------------------------------- */
+
+#if defined(PAR) || defined(GRAN)
+static void
+scheduleGranParReport(void)
+{
+ ASSERT(run_queue_hd != END_TSO_QUEUE);
+
+ /* Take a thread from the run queue, if we have work */
+ POP_RUN_QUEUE(t); // take_off_run_queue(END_TSO_QUEUE);
+
+ /* If this TSO has got its outport closed in the meantime,
+ * it mustn't be run. Instead, we have to clean it up as if it was finished.
+ * It has to be marked as TH_DEAD for this purpose.
+ * If it is TH_TERM instead, it is supposed to have finished in the normal way.
+
+JB: TODO: investigate wether state change field could be nuked
+ entirely and replaced by the normal tso state (whatnext
+ field). All we want to do is to kill tsos from outside.
+ */
+
+ /* ToDo: write something to the log-file
+ if (RTSflags.ParFlags.granSimStats && !sameThread)
+ DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd);
+
+ CurrentTSO = t;
+ */
+ /* the spark pool for the current PE */
+ pool = &(cap.r.rSparks); // cap = (old) MainCap
+
+ IF_DEBUG(scheduler,
+ debugBelch("--=^ %d threads, %d sparks on [%#x]\n",
+ run_queue_len(), spark_queue_len(pool), CURRENT_PROC));
+
+ IF_PAR_DEBUG(fish,
+ debugBelch("--=^ %d threads, %d sparks on [%#x]\n",
+ run_queue_len(), spark_queue_len(pool), CURRENT_PROC));
+
+ if (RtsFlags.ParFlags.ParStats.Full &&
+ (t->par.sparkname != (StgInt)0) && // only log spark generated threads
+ (emitSchedule || // forced emit
+ (t && LastTSO && t->id != LastTSO->id))) {
+ /*
+ we are running a different TSO, so write a schedule event to log file
+ NB: If we use fair scheduling we also have to write a deschedule
+ event for LastTSO; with unfair scheduling we know that the
+ previous tso has blocked whenever we switch to another tso, so
+ we don't need it in GUM for now
+ */
+ IF_PAR_DEBUG(fish, // schedule,
+ debugBelch("____ scheduling spark generated thread %d (%lx) (%lx) via a forced emit\n",t->id,t,t->par.sparkname));
+
+ DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC,
+ GR_SCHEDULE, t, (StgClosure *)NULL, 0, 0);
+ emitSchedule = rtsFalse;
+ }
+}
+#endif
+
+/* ----------------------------------------------------------------------------
+ * After running a thread...
+ * ------------------------------------------------------------------------- */
+
+static void
+schedulePostRunThread(void)
+{
+#if defined(PAR)
+ /* HACK 675: if the last thread didn't yield, make sure to print a
+ SCHEDULE event to the log file when StgRunning the next thread, even
+ if it is the same one as before */
+ LastTSO = t;
+ TimeOfLastYield = CURRENT_TIME;
+#endif
+
+ /* some statistics gathering in the parallel case */
+
+#if defined(GRAN) || defined(PAR) || defined(EDEN)
+ switch (ret) {
+ case HeapOverflow:
+# if defined(GRAN)
+ IF_DEBUG(gran, DumpGranEvent(GR_DESCHEDULE, t));
+ globalGranStats.tot_heapover++;
+# elif defined(PAR)
+ globalParStats.tot_heapover++;
+# endif
+ break;
+
+ case StackOverflow:
+# if defined(GRAN)
+ IF_DEBUG(gran,
+ DumpGranEvent(GR_DESCHEDULE, t));
+ globalGranStats.tot_stackover++;
+# elif defined(PAR)
+ // IF_DEBUG(par,
+ // DumpGranEvent(GR_DESCHEDULE, t);
+ globalParStats.tot_stackover++;
+# endif
+ break;
+
+ case ThreadYielding:
+# if defined(GRAN)
+ IF_DEBUG(gran,
+ DumpGranEvent(GR_DESCHEDULE, t));
+ globalGranStats.tot_yields++;
+# elif defined(PAR)
+ // IF_DEBUG(par,
+ // DumpGranEvent(GR_DESCHEDULE, t);
+ globalParStats.tot_yields++;
+# endif
+ break;
+
+ case ThreadBlocked:
+# if defined(GRAN)
+ IF_DEBUG(scheduler,
+ debugBelch("--<< thread %ld (%p; %s) stopped, blocking on node %p [PE %d] with BQ: ",
+ t->id, t, whatNext_strs[t->what_next], t->block_info.closure,
+ (t->block_info.closure==(StgClosure*)NULL ? 99 : where_is(t->block_info.closure)));
+ if (t->block_info.closure!=(StgClosure*)NULL)
+ print_bq(t->block_info.closure);
+ debugBelch("\n"));
+
+ // ??? needed; should emit block before
+ IF_DEBUG(gran,
+ DumpGranEvent(GR_DESCHEDULE, t));
+ prune_eventq(t, (StgClosure *)NULL); // prune ContinueThreads for t
+ /*
+ ngoq Dogh!
+ ASSERT(procStatus[CurrentProc]==Busy ||
+ ((procStatus[CurrentProc]==Fetching) &&
+ (t->block_info.closure!=(StgClosure*)NULL)));
+ if (run_queue_hds[CurrentProc] == END_TSO_QUEUE &&
+ !(!RtsFlags.GranFlags.DoAsyncFetch &&
+ procStatus[CurrentProc]==Fetching))
+ procStatus[CurrentProc] = Idle;
+ */
+# elif defined(PAR)
+//++PAR++ blockThread() writes the event (change?)
+# endif
+ break;
+
+ case ThreadFinished:
+ break;
+
+ default:
+ barf("parGlobalStats: unknown return code");
+ break;
+ }
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ * Handle a thread that returned to the scheduler with ThreadHeepOverflow
+ * -------------------------------------------------------------------------- */
+
+static rtsBool
+scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
+{
+ // did the task ask for a large block?
+ if (cap->r.rHpAlloc > BLOCK_SIZE) {
+ // if so, get one and push it on the front of the nursery.
+ bdescr *bd;
+ lnat blocks;
+
+ blocks = (lnat)BLOCK_ROUND_UP(cap->r.rHpAlloc) / BLOCK_SIZE;
+
+ IF_DEBUG(scheduler,
+ debugBelch("--<< thread %ld (%s) stopped: requesting a large block (size %ld)\n",
+ (long)t->id, whatNext_strs[t->what_next], blocks));
+
+ // don't do this if the nursery is (nearly) full, we'll GC first.
+ if (cap->r.rCurrentNursery->link != NULL ||
+ cap->r.rNursery->n_blocks == 1) { // paranoia to prevent infinite loop
+ // if the nursery has only one block.
+
+ ACQUIRE_SM_LOCK
+ bd = allocGroup( blocks );
+ RELEASE_SM_LOCK
+ cap->r.rNursery->n_blocks += blocks;
+
+ // link the new group into the list
+ bd->link = cap->r.rCurrentNursery;
+ bd->u.back = cap->r.rCurrentNursery->u.back;
+ if (cap->r.rCurrentNursery->u.back != NULL) {
+ cap->r.rCurrentNursery->u.back->link = bd;
+ } else {
+#if !defined(THREADED_RTS)
+ ASSERT(g0s0->blocks == cap->r.rCurrentNursery &&
+ g0s0 == cap->r.rNursery);
+#endif
+ cap->r.rNursery->blocks = bd;
+ }
+ cap->r.rCurrentNursery->u.back = bd;
+
+ // initialise it as a nursery block. We initialise the
+ // step, gen_no, and flags field of *every* sub-block in
+ // this large block, because this is easier than making
+ // sure that we always find the block head of a large
+ // block whenever we call Bdescr() (eg. evacuate() and
+ // isAlive() in the GC would both have to do this, at
+ // least).
+ {
+ bdescr *x;
+ for (x = bd; x < bd + blocks; x++) {
+ x->step = cap->r.rNursery;
+ x->gen_no = 0;
+ x->flags = 0;
+ }
+ }
+
+ // This assert can be a killer if the app is doing lots
+ // of large block allocations.
+ IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
+
+ // now update the nursery to point to the new block
+ cap->r.rCurrentNursery = bd;
+
+ // we might be unlucky and have another thread get on the
+ // run queue before us and steal the large block, but in that
+ // case the thread will just end up requesting another large
+ // block.
+ pushOnRunQueue(cap,t);
+ return rtsFalse; /* not actually GC'ing */
+ }
+ }
+
+ IF_DEBUG(scheduler,
+ debugBelch("--<< thread %ld (%s) stopped: HeapOverflow\n",
+ (long)t->id, whatNext_strs[t->what_next]));
+#if defined(GRAN)
+ ASSERT(!is_on_queue(t,CurrentProc));
+#elif defined(PARALLEL_HASKELL)
+ /* Currently we emit a DESCHEDULE event before GC in GUM.
+ ToDo: either add separate event to distinguish SYSTEM time from rest
+ or just nuke this DESCHEDULE (and the following SCHEDULE) */
+ if (0 && RtsFlags.ParFlags.ParStats.Full) {
+ DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC,
+ GR_DESCHEDULE, t, (StgClosure *)NULL, 0, 0);
+ emitSchedule = rtsTrue;
+ }
+#endif
+
+ pushOnRunQueue(cap,t);
+ return rtsTrue;
+ /* actual GC is done at the end of the while loop in schedule() */
+}
+
+/* -----------------------------------------------------------------------------
+ * Handle a thread that returned to the scheduler with ThreadStackOverflow
+ * -------------------------------------------------------------------------- */
+
+static void
+scheduleHandleStackOverflow (Capability *cap, Task *task, StgTSO *t)
+{
+ IF_DEBUG(scheduler,debugBelch("--<< thread %ld (%s) stopped, StackOverflow\n",
+ (long)t->id, whatNext_strs[t->what_next]));
+ /* just adjust the stack for this thread, then pop it back
+ * on the run queue.
+ */
+ {
+ /* enlarge the stack */
+ StgTSO *new_t = threadStackOverflow(cap, t);
+
+ /* The TSO attached to this Task may have moved, so update the
+ * pointer to it.
+ */
+ if (task->tso == t) {
+ task->tso = new_t;
+ }
+ pushOnRunQueue(cap,new_t);
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Handle a thread that returned to the scheduler with ThreadYielding
+ * -------------------------------------------------------------------------- */
+
+static rtsBool
+scheduleHandleYield( Capability *cap, StgTSO *t, nat prev_what_next )
+{
+ // Reset the context switch flag. We don't do this just before
+ // running the thread, because that would mean we would lose ticks
+ // during GC, which can lead to unfair scheduling (a thread hogs
+ // the CPU because the tick always arrives during GC). This way
+ // penalises threads that do a lot of allocation, but that seems
+ // better than the alternative.
+ context_switch = 0;
+
+ /* put the thread back on the run queue. Then, if we're ready to
+ * GC, check whether this is the last task to stop. If so, wake
+ * up the GC thread. getThread will block during a GC until the
+ * GC is finished.
+ */
+ IF_DEBUG(scheduler,
+ if (t->what_next != prev_what_next) {
+ debugBelch("--<< thread %ld (%s) stopped to switch evaluators\n",
+ (long)t->id, whatNext_strs[t->what_next]);
+ } else {
+ debugBelch("--<< thread %ld (%s) stopped, yielding\n",
+ (long)t->id, whatNext_strs[t->what_next]);
+ }
+ );
+
+ IF_DEBUG(sanity,
+ //debugBelch("&& Doing sanity check on yielding TSO %ld.", t->id);
+ checkTSO(t));
+ ASSERT(t->link == END_TSO_QUEUE);
+
+ // Shortcut if we're just switching evaluators: don't bother
+ // doing stack squeezing (which can be expensive), just run the
+ // thread.
+ if (t->what_next != prev_what_next) {
+ return rtsTrue;
+ }
+
+#if defined(GRAN)
+ ASSERT(!is_on_queue(t,CurrentProc));
+
+ IF_DEBUG(sanity,
+ //debugBelch("&& Doing sanity check on all ThreadQueues (and their TSOs).");
+ checkThreadQsSanity(rtsTrue));
+
+#endif
+
+ addToRunQueue(cap,t);
+
+#if defined(GRAN)
+ /* add a ContinueThread event to actually process the thread */
+ new_event(CurrentProc, CurrentProc, CurrentTime[CurrentProc],
+ ContinueThread,
+ t, (StgClosure*)NULL, (rtsSpark*)NULL);
+ IF_GRAN_DEBUG(bq,
+ debugBelch("GRAN: eventq and runnableq after adding yielded thread to queue again:\n");
+ G_EVENTQ(0);
+ G_CURR_THREADQ(0));
+#endif
+ return rtsFalse;
+}
+
+/* -----------------------------------------------------------------------------
+ * Handle a thread that returned to the scheduler with ThreadBlocked
+ * -------------------------------------------------------------------------- */
+
+static void
+scheduleHandleThreadBlocked( StgTSO *t
+#if !defined(GRAN) && !defined(DEBUG)
+ STG_UNUSED
+#endif
+ )
+{
+#if defined(GRAN)
+ IF_DEBUG(scheduler,
+ debugBelch("--<< thread %ld (%p; %s) stopped, blocking on node %p [PE %d] with BQ: \n",
+ t->id, t, whatNext_strs[t->what_next], t->block_info.closure, (t->block_info.closure==(StgClosure*)NULL ? 99 : where_is(t->block_info.closure)));
+ if (t->block_info.closure!=(StgClosure*)NULL) print_bq(t->block_info.closure));
+
+ // ??? needed; should emit block before
+ IF_DEBUG(gran,
+ DumpGranEvent(GR_DESCHEDULE, t));
+ prune_eventq(t, (StgClosure *)NULL); // prune ContinueThreads for t
+ /*
+ ngoq Dogh!
+ ASSERT(procStatus[CurrentProc]==Busy ||
+ ((procStatus[CurrentProc]==Fetching) &&
+ (t->block_info.closure!=(StgClosure*)NULL)));
+ if (run_queue_hds[CurrentProc] == END_TSO_QUEUE &&
+ !(!RtsFlags.GranFlags.DoAsyncFetch &&
+ procStatus[CurrentProc]==Fetching))
+ procStatus[CurrentProc] = Idle;
+ */
+#elif defined(PAR)
+ IF_DEBUG(scheduler,
+ debugBelch("--<< thread %ld (%p; %s) stopped, blocking on node %p with BQ: \n",
+ t->id, t, whatNext_strs[t->what_next], t->block_info.closure));
+ IF_PAR_DEBUG(bq,
+
+ if (t->block_info.closure!=(StgClosure*)NULL)
+ print_bq(t->block_info.closure));
+
+ /* Send a fetch (if BlockedOnGA) and dump event to log file */
+ blockThread(t);
+
+ /* whatever we schedule next, we must log that schedule */
+ emitSchedule = rtsTrue;
+
+#else /* !GRAN */
+
+ // We don't need to do anything. The thread is blocked, and it
+ // has tidied up its stack and placed itself on whatever queue
+ // it needs to be on.
+
+#if !defined(THREADED_RTS)
+ ASSERT(t->why_blocked != NotBlocked);
+ // This might not be true under THREADED_RTS: we don't have
+ // exclusive access to this TSO, so someone might have
+ // woken it up by now. This actually happens: try
+ // conc023 +RTS -N2.
+#endif
+
+ IF_DEBUG(scheduler,
+ debugBelch("--<< thread %d (%s) stopped: ",
+ t->id, whatNext_strs[t->what_next]);
+ printThreadBlockage(t);
+ debugBelch("\n"));
+
+ /* Only for dumping event to log file
+ ToDo: do I need this in GranSim, too?
+ blockThread(t);
+ */
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ * Handle a thread that returned to the scheduler with ThreadFinished
+ * -------------------------------------------------------------------------- */
+
+static rtsBool
+scheduleHandleThreadFinished (Capability *cap STG_UNUSED, Task *task, StgTSO *t)
+{
+ /* Need to check whether this was a main thread, and if so,
+ * return with the return value.
+ *
+ * We also end up here if the thread kills itself with an
+ * uncaught exception, see Exception.cmm.
+ */
+ IF_DEBUG(scheduler,debugBelch("--++ thread %d (%s) finished\n",
+ t->id, whatNext_strs[t->what_next]));
+
+#if defined(GRAN)
+ endThread(t, CurrentProc); // clean-up the thread
+#elif defined(PARALLEL_HASKELL)
+ /* For now all are advisory -- HWL */
+ //if(t->priority==AdvisoryPriority) ??
+ advisory_thread_count--; // JB: Caution with this counter, buggy!
+
+# if defined(DIST)
+ if(t->dist.priority==RevalPriority)
+ FinishReval(t);
+# endif
+
+# if defined(EDENOLD)
+ // the thread could still have an outport... (BUG)
+ if (t->eden.outport != -1) {
+ // delete the outport for the tso which has finished...
+ IF_PAR_DEBUG(eden_ports,
+ debugBelch("WARNING: Scheduler removes outport %d for TSO %d.\n",
+ t->eden.outport, t->id));
+ deleteOPT(t);
+ }
+ // thread still in the process (HEAVY BUG! since outport has just been closed...)
+ if (t->eden.epid != -1) {
+ IF_PAR_DEBUG(eden_ports,
+ debugBelch("WARNING: Scheduler removes TSO %d from process %d .\n",
+ t->id, t->eden.epid));
+ removeTSOfromProcess(t);
+ }
+# endif
+
+# if defined(PAR)
+ if (RtsFlags.ParFlags.ParStats.Full &&
+ !RtsFlags.ParFlags.ParStats.Suppressed)
+ DumpEndEvent(CURRENT_PROC, t, rtsFalse /* not mandatory */);
+
+ // t->par only contains statistics: left out for now...
+ IF_PAR_DEBUG(fish,
+ debugBelch("**** end thread: ended sparked thread %d (%lx); sparkname: %lx\n",
+ t->id,t,t->par.sparkname));
+# endif
+#endif // PARALLEL_HASKELL
+
+ //
+ // Check whether the thread that just completed was a bound
+ // thread, and if so return with the result.
+ //
+ // There is an assumption here that all thread completion goes
+ // through this point; we need to make sure that if a thread
+ // ends up in the ThreadKilled state, that it stays on the run
+ // queue so it can be dealt with here.
+ //
+
+ if (t->bound) {
+
+ if (t->bound != task) {
+#if !defined(THREADED_RTS)
+ // Must be a bound thread that is not the topmost one. Leave
+ // it on the run queue until the stack has unwound to the
+ // point where we can deal with this. Leaving it on the run
+ // queue also ensures that the garbage collector knows about
+ // this thread and its return value (it gets dropped from the
+ // all_threads list so there's no other way to find it).
+ appendToRunQueue(cap,t);
+ return rtsFalse;
+#else
+ // this cannot happen in the threaded RTS, because a
+ // bound thread can only be run by the appropriate Task.
+ barf("finished bound thread that isn't mine");
+#endif
+ }
+
+ ASSERT(task->tso == t);
+
+ if (t->what_next == ThreadComplete) {
+ if (task->ret) {
+ // NOTE: return val is tso->sp[1] (see StgStartup.hc)
+ *(task->ret) = (StgClosure *)task->tso->sp[1];
+ }
+ task->stat = Success;
+ } else {
+ if (task->ret) {
+ *(task->ret) = NULL;
+ }
+ if (sched_state >= SCHED_INTERRUPTING) {
+ task->stat = Interrupted;
+ } else {
+ task->stat = Killed;
+ }
+ }
+#ifdef DEBUG
+ removeThreadLabel((StgWord)task->tso->id);
+#endif
+ return rtsTrue; // tells schedule() to return
+ }
+
+ return rtsFalse;
+}
+
+/* -----------------------------------------------------------------------------
+ * Perform a heap census, if PROFILING
+ * -------------------------------------------------------------------------- */
+
+static rtsBool
+scheduleDoHeapProfile( rtsBool ready_to_gc STG_UNUSED )
+{
+#if defined(PROFILING)
+ // When we have +RTS -i0 and we're heap profiling, do a census at
+ // every GC. This lets us get repeatable runs for debugging.
+ if (performHeapProfile ||
+ (RtsFlags.ProfFlags.profileInterval==0 &&
+ RtsFlags.ProfFlags.doHeapProfile && ready_to_gc)) {
+
+ // checking black holes is necessary before GC, otherwise
+ // there may be threads that are unreachable except by the
+ // blackhole queue, which the GC will consider to be
+ // deadlocked.
+ scheduleCheckBlackHoles(&MainCapability);
+
+ IF_DEBUG(scheduler, sched_belch("garbage collecting before heap census"));
+ GarbageCollect(GetRoots, rtsTrue);
+
+ IF_DEBUG(scheduler, sched_belch("performing heap census"));
+ heapCensus();
+
+ performHeapProfile = rtsFalse;
+ return rtsTrue; // true <=> we already GC'd
+ }
+#endif
+ return rtsFalse;
+}
+
+/* -----------------------------------------------------------------------------
+ * Perform a garbage collection if necessary
+ * -------------------------------------------------------------------------- */
+
+static Capability *
+scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS,
+ rtsBool force_major, void (*get_roots)(evac_fn))
+{
+ StgTSO *t;
+#ifdef THREADED_RTS
+ static volatile StgWord waiting_for_gc;
+ rtsBool was_waiting;
+ nat i;
+#endif
+
+#ifdef THREADED_RTS
+ // In order to GC, there must be no threads running Haskell code.
+ // Therefore, the GC thread needs to hold *all* the capabilities,
+ // and release them after the GC has completed.
+ //
+ // This seems to be the simplest way: previous attempts involved
+ // making all the threads with capabilities give up their
+ // capabilities and sleep except for the *last* one, which
+ // actually did the GC. But it's quite hard to arrange for all
+ // the other tasks to sleep and stay asleep.
+ //
+
+ was_waiting = cas(&waiting_for_gc, 0, 1);
+ if (was_waiting) {
+ do {
+ IF_DEBUG(scheduler, sched_belch("someone else is trying to GC..."));
+ if (cap) yieldCapability(&cap,task);
+ } while (waiting_for_gc);
+ return cap; // NOTE: task->cap might have changed here
+ }
+
+ for (i=0; i < n_capabilities; i++) {
+ IF_DEBUG(scheduler, sched_belch("ready_to_gc, grabbing all the capabilies (%d/%d)", i, n_capabilities));
+ if (cap != &capabilities[i]) {
+ Capability *pcap = &capabilities[i];
+ // we better hope this task doesn't get migrated to
+ // another Capability while we're waiting for this one.
+ // It won't, because load balancing happens while we have
+ // all the Capabilities, but even so it's a slightly
+ // unsavoury invariant.
+ task->cap = pcap;
+ context_switch = 1;
+ waitForReturnCapability(&pcap, task);
+ if (pcap != &capabilities[i]) {
+ barf("scheduleDoGC: got the wrong capability");
+ }
+ }
+ }
+
+ waiting_for_gc = rtsFalse;
+#endif
+
+ /* Kick any transactions which are invalid back to their
+ * atomically frames. When next scheduled they will try to
+ * commit, this commit will fail and they will retry.
+ */
+ {
+ StgTSO *next;
+
+ for (t = all_threads; t != END_TSO_QUEUE; t = next) {
+ if (t->what_next == ThreadRelocated) {
+ next = t->link;
+ } else {
+ next = t->global_link;
+ if (t -> trec != NO_TREC && t -> why_blocked == NotBlocked) {
+ if (!stmValidateNestOfTransactions (t -> trec)) {
+ IF_DEBUG(stm, sched_belch("trec %p found wasting its time", t));
+
+ // strip the stack back to the
+ // ATOMICALLY_FRAME, aborting the (nested)
+ // transaction, and saving the stack of any
+ // partially-evaluated thunks on the heap.
+ raiseAsync_(&capabilities[0], t, NULL, rtsTrue, NULL);
+
+#ifdef REG_R1
+ ASSERT(get_itbl((StgClosure *)t->sp)->type == ATOMICALLY_FRAME);
+#endif
+ }
+ }
+ }
+ }
+ }
+
+ // so this happens periodically:
+ if (cap) scheduleCheckBlackHoles(cap);
+
+ IF_DEBUG(scheduler, printAllThreads());
+
+ /*
+ * We now have all the capabilities; if we're in an interrupting
+ * state, then we should take the opportunity to delete all the
+ * threads in the system.
+ */
+ if (sched_state >= SCHED_INTERRUPTING) {
+ deleteAllThreads(&capabilities[0]);
+ sched_state = SCHED_INTERRUPTED;
+ }
+
+ /* everybody back, start the GC.
+ * Could do it in this thread, or signal a condition var
+ * to do it in another thread. Either way, we need to
+ * broadcast on gc_pending_cond afterward.
+ */
+#if defined(THREADED_RTS)
+ IF_DEBUG(scheduler,sched_belch("doing GC"));
+#endif
+ GarbageCollect(get_roots, force_major);
+
+#if defined(THREADED_RTS)
+ // release our stash of capabilities.
+ for (i = 0; i < n_capabilities; i++) {
+ if (cap != &capabilities[i]) {
+ task->cap = &capabilities[i];
+ releaseCapability(&capabilities[i]);
+ }
+ }
+ if (cap) {
+ task->cap = cap;
+ } else {
+ task->cap = NULL;
+ }
+#endif
+
+#if defined(GRAN)
+ /* add a ContinueThread event to continue execution of current thread */
+ new_event(CurrentProc, CurrentProc, CurrentTime[CurrentProc],
+ ContinueThread,
+ t, (StgClosure*)NULL, (rtsSpark*)NULL);
+ IF_GRAN_DEBUG(bq,
+ debugBelch("GRAN: eventq and runnableq after Garbage collection:\n\n");
+ G_EVENTQ(0);
+ G_CURR_THREADQ(0));
+#endif /* GRAN */
+
+ return cap;
+}
+
+/* ---------------------------------------------------------------------------
+ * rtsSupportsBoundThreads(): is the RTS built to support bound threads?
+ * used by Control.Concurrent for error checking.
+ * ------------------------------------------------------------------------- */
+
+StgBool
+rtsSupportsBoundThreads(void)
+{
+#if defined(THREADED_RTS)
+ return rtsTrue;
+#else
+ return rtsFalse;
+#endif
+}
+
+/* ---------------------------------------------------------------------------
+ * isThreadBound(tso): check whether tso is bound to an OS thread.
+ * ------------------------------------------------------------------------- */
+
+StgBool
+isThreadBound(StgTSO* tso USED_IF_THREADS)
+{
+#if defined(THREADED_RTS)
+ return (tso->bound != NULL);
+#endif
+ return rtsFalse;
+}
+
+/* ---------------------------------------------------------------------------
+ * Singleton fork(). Do not copy any running threads.
+ * ------------------------------------------------------------------------- */
+
+#if !defined(mingw32_HOST_OS)
+#define FORKPROCESS_PRIMOP_SUPPORTED
+#endif
+
+#ifdef FORKPROCESS_PRIMOP_SUPPORTED
+static void
+deleteThread_(Capability *cap, StgTSO *tso);
+#endif
+StgInt
+forkProcess(HsStablePtr *entry
+#ifndef FORKPROCESS_PRIMOP_SUPPORTED
+ STG_UNUSED
+#endif
+ )
+{
+#ifdef FORKPROCESS_PRIMOP_SUPPORTED
+ Task *task;
+ pid_t pid;
+ StgTSO* t,*next;
+ Capability *cap;
+
+#if defined(THREADED_RTS)
+ if (RtsFlags.ParFlags.nNodes > 1) {
+ errorBelch("forking not supported with +RTS -N<n> greater than 1");
+ stg_exit(EXIT_FAILURE);
+ }
+#endif
+
+ IF_DEBUG(scheduler,sched_belch("forking!"));
+
+ // ToDo: for SMP, we should probably acquire *all* the capabilities
+ cap = rts_lock();
+
+ pid = fork();
+
+ if (pid) { // parent
+
+ // just return the pid
+ rts_unlock(cap);
+ return pid;
+
+ } else { // child
+
+ // Now, all OS threads except the thread that forked are
+ // stopped. We need to stop all Haskell threads, including
+ // those involved in foreign calls. Also we need to delete
+ // all Tasks, because they correspond to OS threads that are
+ // now gone.
+
+ for (t = all_threads; t != END_TSO_QUEUE; t = next) {
+ if (t->what_next == ThreadRelocated) {
+ next = t->link;
+ } else {
+ next = t->global_link;
+ // don't allow threads to catch the ThreadKilled
+ // exception, but we do want to raiseAsync() because these
+ // threads may be evaluating thunks that we need later.
+ deleteThread_(cap,t);
+ }
+ }
+
+ // Empty the run queue. It seems tempting to let all the
+ // killed threads stay on the run queue as zombies to be
+ // cleaned up later, but some of them correspond to bound
+ // threads for which the corresponding Task does not exist.
+ cap->run_queue_hd = END_TSO_QUEUE;
+ cap->run_queue_tl = END_TSO_QUEUE;
+
+ // Any suspended C-calling Tasks are no more, their OS threads
+ // don't exist now:
+ cap->suspended_ccalling_tasks = NULL;
+
+ // Empty the all_threads list. Otherwise, the garbage
+ // collector may attempt to resurrect some of these threads.
+ all_threads = END_TSO_QUEUE;
+
+ // Wipe the task list, except the current Task.
+ ACQUIRE_LOCK(&sched_mutex);
+ for (task = all_tasks; task != NULL; task=task->all_link) {
+ if (task != cap->running_task) {
+ discardTask(task);
+ }
+ }
+ RELEASE_LOCK(&sched_mutex);
+
+#if defined(THREADED_RTS)
+ // Wipe our spare workers list, they no longer exist. New
+ // workers will be created if necessary.
+ cap->spare_workers = NULL;
+ cap->returning_tasks_hd = NULL;
+ cap->returning_tasks_tl = NULL;
+#endif
+
+ cap = rts_evalStableIO(cap, entry, NULL); // run the action
+ rts_checkSchedStatus("forkProcess",cap);
+
+ rts_unlock(cap);
+ hs_exit(); // clean up and exit
+ stg_exit(EXIT_SUCCESS);
+ }
+#else /* !FORKPROCESS_PRIMOP_SUPPORTED */
+ barf("forkProcess#: primop not supported on this platform, sorry!\n");
+ return -1;
+#endif
+}
+
+/* ---------------------------------------------------------------------------
+ * Delete all the threads in the system
+ * ------------------------------------------------------------------------- */
+
+static void
+deleteAllThreads ( Capability *cap )
+{
+ StgTSO* t, *next;
+ IF_DEBUG(scheduler,sched_belch("deleting all threads"));
+ for (t = all_threads; t != END_TSO_QUEUE; t = next) {
+ if (t->what_next == ThreadRelocated) {
+ next = t->link;
+ } else {
+ next = t->global_link;
+ deleteThread(cap,t);
+ }
+ }
+
+ // The run queue now contains a bunch of ThreadKilled threads. We
+ // must not throw these away: the main thread(s) will be in there
+ // somewhere, and the main scheduler loop has to deal with it.
+ // Also, the run queue is the only thing keeping these threads from
+ // being GC'd, and we don't want the "main thread has been GC'd" panic.
+
+#if !defined(THREADED_RTS)
+ ASSERT(blocked_queue_hd == END_TSO_QUEUE);
+ ASSERT(sleeping_queue == END_TSO_QUEUE);
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ Managing the suspended_ccalling_tasks list.
+ Locks required: sched_mutex
+ -------------------------------------------------------------------------- */
+
+STATIC_INLINE void
+suspendTask (Capability *cap, Task *task)
+{
+ ASSERT(task->next == NULL && task->prev == NULL);
+ task->next = cap->suspended_ccalling_tasks;
+ task->prev = NULL;
+ if (cap->suspended_ccalling_tasks) {
+ cap->suspended_ccalling_tasks->prev = task;
+ }
+ cap->suspended_ccalling_tasks = task;
+}
+
+STATIC_INLINE void
+recoverSuspendedTask (Capability *cap, Task *task)
+{
+ if (task->prev) {
+ task->prev->next = task->next;
+ } else {
+ ASSERT(cap->suspended_ccalling_tasks == task);
+ cap->suspended_ccalling_tasks = task->next;
+ }
+ if (task->next) {
+ task->next->prev = task->prev;
+ }
+ task->next = task->prev = NULL;
+}
+
+/* ---------------------------------------------------------------------------
+ * Suspending & resuming Haskell threads.
+ *
+ * When making a "safe" call to C (aka _ccall_GC), the task gives back
+ * its capability before calling the C function. This allows another
+ * task to pick up the capability and carry on running Haskell
+ * threads. It also means that if the C call blocks, it won't lock
+ * the whole system.
+ *
+ * The Haskell thread making the C call is put to sleep for the
+ * duration of the call, on the susepended_ccalling_threads queue. We
+ * give out a token to the task, which it can use to resume the thread
+ * on return from the C function.
+ * ------------------------------------------------------------------------- */
+
+void *
+suspendThread (StgRegTable *reg)
+{
+ Capability *cap;
+ int saved_errno = errno;
+ StgTSO *tso;
+ Task *task;
+
+ /* assume that *reg is a pointer to the StgRegTable part of a Capability.
+ */
+ cap = regTableToCapability(reg);
+
+ task = cap->running_task;
+ tso = cap->r.rCurrentTSO;
+
+ IF_DEBUG(scheduler,
+ sched_belch("thread %d did a safe foreign call", cap->r.rCurrentTSO->id));
+
+ // XXX this might not be necessary --SDM
+ tso->what_next = ThreadRunGHC;
+
+ threadPaused(cap,tso);
+
+ if(tso->blocked_exceptions == NULL) {
+ tso->why_blocked = BlockedOnCCall;
+ tso->blocked_exceptions = END_TSO_QUEUE;
+ } else {
+ tso->why_blocked = BlockedOnCCall_NoUnblockExc;
+ }
+
+ // Hand back capability
+ task->suspended_tso = tso;
+
+ ACQUIRE_LOCK(&cap->lock);
+
+ suspendTask(cap,task);
+ cap->in_haskell = rtsFalse;
+ releaseCapability_(cap);
+
+ RELEASE_LOCK(&cap->lock);
+
+#if defined(THREADED_RTS)
+ /* Preparing to leave the RTS, so ensure there's a native thread/task
+ waiting to take over.
+ */
+ IF_DEBUG(scheduler, sched_belch("thread %d: leaving RTS", tso->id));
+#endif
+
+ errno = saved_errno;
+ return task;
+}
+
+StgRegTable *
+resumeThread (void *task_)
+{
+ StgTSO *tso;
+ Capability *cap;
+ int saved_errno = errno;
+ Task *task = task_;
+
+ cap = task->cap;
+ // Wait for permission to re-enter the RTS with the result.
+ waitForReturnCapability(&cap,task);
+ // we might be on a different capability now... but if so, our
+ // entry on the suspended_ccalling_tasks list will also have been
+ // migrated.
+
+ // Remove the thread from the suspended list
+ recoverSuspendedTask(cap,task);
+
+ tso = task->suspended_tso;
+ task->suspended_tso = NULL;
+ tso->link = END_TSO_QUEUE;
+ IF_DEBUG(scheduler, sched_belch("thread %d: re-entering RTS", tso->id));
+
+ if (tso->why_blocked == BlockedOnCCall) {
+ awakenBlockedQueue(cap,tso->blocked_exceptions);
+ tso->blocked_exceptions = NULL;
+ }
+
+ /* Reset blocking status */
+ tso->why_blocked = NotBlocked;
+
+ cap->r.rCurrentTSO = tso;
+ cap->in_haskell = rtsTrue;
+ errno = saved_errno;
+
+ /* We might have GC'd, mark the TSO dirty again */
+ dirtyTSO(tso);
+
+ IF_DEBUG(sanity, checkTSO(tso));
+
+ return &cap->r;
+}
+
+/* ---------------------------------------------------------------------------
+ * Comparing Thread ids.
+ *
+ * This is used from STG land in the implementation of the
+ * instances of Eq/Ord for ThreadIds.
+ * ------------------------------------------------------------------------ */
+
+int
+cmp_thread(StgPtr tso1, StgPtr tso2)
+{
+ StgThreadID id1 = ((StgTSO *)tso1)->id;
+ StgThreadID id2 = ((StgTSO *)tso2)->id;
+
+ if (id1 < id2) return (-1);
+ if (id1 > id2) return 1;
+ return 0;
+}
+
+/* ---------------------------------------------------------------------------
+ * Fetching the ThreadID from an StgTSO.
+ *
+ * This is used in the implementation of Show for ThreadIds.
+ * ------------------------------------------------------------------------ */
+int
+rts_getThreadId(StgPtr tso)
+{
+ return ((StgTSO *)tso)->id;
+}
+
+#ifdef DEBUG
+void
+labelThread(StgPtr tso, char *label)
+{
+ int len;
+ void *buf;
+
+ /* Caveat: Once set, you can only set the thread name to "" */
+ len = strlen(label)+1;
+ buf = stgMallocBytes(len * sizeof(char), "Schedule.c:labelThread()");
+ strncpy(buf,label,len);
+ /* Update will free the old memory for us */
+ updateThreadLabel(((StgTSO *)tso)->id,buf);
+}
+#endif /* DEBUG */
+
+/* ---------------------------------------------------------------------------
+ Create a new thread.
+
+ The new thread starts with the given stack size. Before the
+ scheduler can run, however, this thread needs to have a closure
+ (and possibly some arguments) pushed on its stack. See
+ pushClosure() in Schedule.h.
+
+ createGenThread() and createIOThread() (in SchedAPI.h) are
+ convenient packaged versions of this function.
+
+ currently pri (priority) is only used in a GRAN setup -- HWL
+ ------------------------------------------------------------------------ */
+#if defined(GRAN)
+/* currently pri (priority) is only used in a GRAN setup -- HWL */
+StgTSO *
+createThread(nat size, StgInt pri)
+#else
+StgTSO *
+createThread(Capability *cap, nat size)
+#endif
+{
+ StgTSO *tso;
+ nat stack_size;
+
+ /* sched_mutex is *not* required */
+
+ /* First check whether we should create a thread at all */
+#if defined(PARALLEL_HASKELL)
+ /* check that no more than RtsFlags.ParFlags.maxThreads threads are created */
+ if (advisory_thread_count >= RtsFlags.ParFlags.maxThreads) {
+ threadsIgnored++;
+ debugBelch("{createThread}Daq ghuH: refusing to create another thread; no more than %d threads allowed (currently %d)\n",
+ RtsFlags.ParFlags.maxThreads, advisory_thread_count);
+ return END_TSO_QUEUE;
+ }
+ threadsCreated++;
+#endif
+
+#if defined(GRAN)
+ ASSERT(!RtsFlags.GranFlags.Light || CurrentProc==0);
+#endif
+
+ // ToDo: check whether size = stack_size - TSO_STRUCT_SIZEW
+
+ /* catch ridiculously small stack sizes */
+ if (size < MIN_STACK_WORDS + TSO_STRUCT_SIZEW) {
+ size = MIN_STACK_WORDS + TSO_STRUCT_SIZEW;
+ }
+
+ stack_size = size - TSO_STRUCT_SIZEW;
+
+ tso = (StgTSO *)allocateLocal(cap, size);
+ TICK_ALLOC_TSO(stack_size, 0);
+
+ SET_HDR(tso, &stg_TSO_info, CCS_SYSTEM);
+#if defined(GRAN)
+ SET_GRAN_HDR(tso, ThisPE);
+#endif
+
+ // Always start with the compiled code evaluator
+ tso->what_next = ThreadRunGHC;
+
+ tso->why_blocked = NotBlocked;
+ tso->blocked_exceptions = NULL;
+ tso->flags = TSO_DIRTY;
+
+ tso->saved_errno = 0;
+ tso->bound = NULL;
+ tso->cap = cap;
+
+ tso->stack_size = stack_size;
+ tso->max_stack_size = round_to_mblocks(RtsFlags.GcFlags.maxStkSize)
+ - TSO_STRUCT_SIZEW;
+ tso->sp = (P_)&(tso->stack) + stack_size;
+
+ tso->trec = NO_TREC;
+
+#ifdef PROFILING
+ tso->prof.CCCS = CCS_MAIN;
+#endif
+
+ /* put a stop frame on the stack */
+ tso->sp -= sizeofW(StgStopFrame);
+ SET_HDR((StgClosure*)tso->sp,(StgInfoTable *)&stg_stop_thread_info,CCS_SYSTEM);
+ tso->link = END_TSO_QUEUE;
+
+ // ToDo: check this
+#if defined(GRAN)
+ /* uses more flexible routine in GranSim */
+ insertThread(tso, CurrentProc);
+#else
+ /* In a non-GranSim setup the pushing of a TSO onto the runq is separated
+ * from its creation
+ */
+#endif
+
+#if defined(GRAN)
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpGranEvent(GR_START,tso);
+#elif defined(PARALLEL_HASKELL)
+ if (RtsFlags.ParFlags.ParStats.Full)
+ DumpGranEvent(GR_STARTQ,tso);
+ /* HACk to avoid SCHEDULE
+ LastTSO = tso; */
+#endif
+
+ /* Link the new thread on the global thread list.
+ */
+ ACQUIRE_LOCK(&sched_mutex);
+ tso->id = next_thread_id++; // while we have the mutex
+ tso->global_link = all_threads;
+ all_threads = tso;
+ RELEASE_LOCK(&sched_mutex);
+
+#if defined(DIST)
+ tso->dist.priority = MandatoryPriority; //by default that is...
+#endif
+
+#if defined(GRAN)
+ tso->gran.pri = pri;
+# if defined(DEBUG)
+ tso->gran.magic = TSO_MAGIC; // debugging only
+# endif
+ tso->gran.sparkname = 0;
+ tso->gran.startedat = CURRENT_TIME;
+ tso->gran.exported = 0;
+ tso->gran.basicblocks = 0;
+ tso->gran.allocs = 0;
+ tso->gran.exectime = 0;
+ tso->gran.fetchtime = 0;
+ tso->gran.fetchcount = 0;
+ tso->gran.blocktime = 0;
+ tso->gran.blockcount = 0;
+ tso->gran.blockedat = 0;
+ tso->gran.globalsparks = 0;
+ tso->gran.localsparks = 0;
+ if (RtsFlags.GranFlags.Light)
+ tso->gran.clock = Now; /* local clock */
+ else
+ tso->gran.clock = 0;
+
+ IF_DEBUG(gran,printTSO(tso));
+#elif defined(PARALLEL_HASKELL)
+# if defined(DEBUG)
+ tso->par.magic = TSO_MAGIC; // debugging only
+# endif
+ tso->par.sparkname = 0;
+ tso->par.startedat = CURRENT_TIME;
+ tso->par.exported = 0;
+ tso->par.basicblocks = 0;
+ tso->par.allocs = 0;
+ tso->par.exectime = 0;
+ tso->par.fetchtime = 0;
+ tso->par.fetchcount = 0;
+ tso->par.blocktime = 0;
+ tso->par.blockcount = 0;
+ tso->par.blockedat = 0;
+ tso->par.globalsparks = 0;
+ tso->par.localsparks = 0;
+#endif
+
+#if defined(GRAN)
+ globalGranStats.tot_threads_created++;
+ globalGranStats.threads_created_on_PE[CurrentProc]++;
+ globalGranStats.tot_sq_len += spark_queue_len(CurrentProc);
+ globalGranStats.tot_sq_probes++;
+#elif defined(PARALLEL_HASKELL)
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ //debugBelch("Creating thread %d @ %11.2f\n", tso->id, usertime());
+ globalParStats.tot_threads_created++;
+ }
+#endif
+
+#if defined(GRAN)
+ IF_GRAN_DEBUG(pri,
+ sched_belch("==__ schedule: Created TSO %d (%p);",
+ CurrentProc, tso, tso->id));
+#elif defined(PARALLEL_HASKELL)
+ IF_PAR_DEBUG(verbose,
+ sched_belch("==__ schedule: Created TSO %d (%p); %d threads active",
+ (long)tso->id, tso, advisory_thread_count));
+#else
+ IF_DEBUG(scheduler,sched_belch("created thread %ld, stack size = %lx words",
+ (long)tso->id, (long)tso->stack_size));
+#endif
+ return tso;
+}
+
+#if defined(PAR)
+/* RFP:
+ all parallel thread creation calls should fall through the following routine.
+*/
+StgTSO *
+createThreadFromSpark(rtsSpark spark)
+{ StgTSO *tso;
+ ASSERT(spark != (rtsSpark)NULL);
+// JB: TAKE CARE OF THIS COUNTER! BUGGY
+ if (advisory_thread_count >= RtsFlags.ParFlags.maxThreads)
+ { threadsIgnored++;
+ barf("{createSparkThread}Daq ghuH: refusing to create another thread; no more than %d threads allowed (currently %d)",
+ RtsFlags.ParFlags.maxThreads, advisory_thread_count);
+ return END_TSO_QUEUE;
+ }
+ else
+ { threadsCreated++;
+ tso = createThread(RtsFlags.GcFlags.initialStkSize);
+ if (tso==END_TSO_QUEUE)
+ barf("createSparkThread: Cannot create TSO");
+#if defined(DIST)
+ tso->priority = AdvisoryPriority;
+#endif
+ pushClosure(tso,spark);
+ addToRunQueue(tso);
+ advisory_thread_count++; // JB: TAKE CARE OF THIS COUNTER! BUGGY
+ }
+ return tso;
+}
+#endif
+
+/*
+ Turn a spark into a thread.
+ ToDo: fix for SMP (needs to acquire SCHED_MUTEX!)
+*/
+#if 0
+StgTSO *
+activateSpark (rtsSpark spark)
+{
+ StgTSO *tso;
+
+ tso = createSparkThread(spark);
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ //ASSERT(run_queue_hd == END_TSO_QUEUE); // I think ...
+ IF_PAR_DEBUG(verbose,
+ debugBelch("==^^ activateSpark: turning spark of closure %p (%s) into a thread\n",
+ (StgClosure *)spark, info_type((StgClosure *)spark)));
+ }
+ // ToDo: fwd info on local/global spark to thread -- HWL
+ // tso->gran.exported = spark->exported;
+ // tso->gran.locked = !spark->global;
+ // tso->gran.sparkname = spark->name;
+
+ return tso;
+}
+#endif
+
+/* ---------------------------------------------------------------------------
+ * scheduleThread()
+ *
+ * scheduleThread puts a thread on the end of the runnable queue.
+ * This will usually be done immediately after a thread is created.
+ * The caller of scheduleThread must create the thread using e.g.
+ * createThread and push an appropriate closure
+ * on this thread's stack before the scheduler is invoked.
+ * ------------------------------------------------------------------------ */
+
+void
+scheduleThread(Capability *cap, StgTSO *tso)
+{
+ // The thread goes at the *end* of the run-queue, to avoid possible
+ // starvation of any threads already on the queue.
+ appendToRunQueue(cap,tso);
+}
+
+void
+scheduleThreadOn(Capability *cap, StgWord cpu USED_IF_THREADS, StgTSO *tso)
+{
+#if defined(THREADED_RTS)
+ tso->flags |= TSO_LOCKED; // we requested explicit affinity; don't
+ // move this thread from now on.
+ cpu %= RtsFlags.ParFlags.nNodes;
+ if (cpu == cap->no) {
+ appendToRunQueue(cap,tso);
+ } else {
+ Capability *target_cap = &capabilities[cpu];
+ if (tso->bound) {
+ tso->bound->cap = target_cap;
+ }
+ tso->cap = target_cap;
+ wakeupThreadOnCapability(target_cap,tso);
+ }
+#else
+ appendToRunQueue(cap,tso);
+#endif
+}
+
+Capability *
+scheduleWaitThread (StgTSO* tso, /*[out]*/HaskellObj* ret, Capability *cap)
+{
+ Task *task;
+
+ // We already created/initialised the Task
+ task = cap->running_task;
+
+ // This TSO is now a bound thread; make the Task and TSO
+ // point to each other.
+ tso->bound = task;
+ tso->cap = cap;
+
+ task->tso = tso;
+ task->ret = ret;
+ task->stat = NoStatus;
+
+ appendToRunQueue(cap,tso);
+
+ IF_DEBUG(scheduler, sched_belch("new bound thread (%d)", tso->id));
+
+#if defined(GRAN)
+ /* GranSim specific init */
+ CurrentTSO = m->tso; // the TSO to run
+ procStatus[MainProc] = Busy; // status of main PE
+ CurrentProc = MainProc; // PE to run it on
+#endif
+
+ cap = schedule(cap,task);
+
+ ASSERT(task->stat != NoStatus);
+ ASSERT_FULL_CAPABILITY_INVARIANTS(cap,task);
+
+ IF_DEBUG(scheduler, sched_belch("bound thread (%d) finished", task->tso->id));
+ return cap;
+}
+
+/* ----------------------------------------------------------------------------
+ * Starting Tasks
+ * ------------------------------------------------------------------------- */
+
+#if defined(THREADED_RTS)
+void
+workerStart(Task *task)
+{
+ Capability *cap;
+
+ // See startWorkerTask().
+ ACQUIRE_LOCK(&task->lock);
+ cap = task->cap;
+ RELEASE_LOCK(&task->lock);
+
+ // set the thread-local pointer to the Task:
+ taskEnter(task);
+
+ // schedule() runs without a lock.
+ cap = schedule(cap,task);
+
+ // On exit from schedule(), we have a Capability.
+ releaseCapability(cap);
+ taskStop(task);
+}
+#endif
+
+/* ---------------------------------------------------------------------------
+ * initScheduler()
+ *
+ * Initialise the scheduler. This resets all the queues - if the
+ * queues contained any threads, they'll be garbage collected at the
+ * next pass.
+ *
+ * ------------------------------------------------------------------------ */
+
+void
+initScheduler(void)
+{
+#if defined(GRAN)
+ nat i;
+ for (i=0; i<=MAX_PROC; i++) {
+ run_queue_hds[i] = END_TSO_QUEUE;
+ run_queue_tls[i] = END_TSO_QUEUE;
+ blocked_queue_hds[i] = END_TSO_QUEUE;
+ blocked_queue_tls[i] = END_TSO_QUEUE;
+ ccalling_threadss[i] = END_TSO_QUEUE;
+ blackhole_queue[i] = END_TSO_QUEUE;
+ sleeping_queue = END_TSO_QUEUE;
+ }
+#elif !defined(THREADED_RTS)
+ blocked_queue_hd = END_TSO_QUEUE;
+ blocked_queue_tl = END_TSO_QUEUE;
+ sleeping_queue = END_TSO_QUEUE;
+#endif
+
+ blackhole_queue = END_TSO_QUEUE;
+ all_threads = END_TSO_QUEUE;
+
+ context_switch = 0;
+ sched_state = SCHED_RUNNING;
+
+ RtsFlags.ConcFlags.ctxtSwitchTicks =
+ RtsFlags.ConcFlags.ctxtSwitchTime / TICK_MILLISECS;
+
+#if defined(THREADED_RTS)
+ /* Initialise the mutex and condition variables used by
+ * the scheduler. */
+ initMutex(&sched_mutex);
+#endif
+
+ ACQUIRE_LOCK(&sched_mutex);
+
+ /* A capability holds the state a native thread needs in
+ * order to execute STG code. At least one capability is
+ * floating around (only THREADED_RTS builds have more than one).
+ */
+ initCapabilities();
+
+ initTaskManager();
+
+#if defined(THREADED_RTS) || defined(PARALLEL_HASKELL)
+ initSparkPools();
+#endif
+
+#if defined(THREADED_RTS)
+ /*
+ * Eagerly start one worker to run each Capability, except for
+ * Capability 0. The idea is that we're probably going to start a
+ * bound thread on Capability 0 pretty soon, so we don't want a
+ * worker task hogging it.
+ */
+ {
+ nat i;
+ Capability *cap;
+ for (i = 1; i < n_capabilities; i++) {
+ cap = &capabilities[i];
+ ACQUIRE_LOCK(&cap->lock);
+ startWorkerTask(cap, workerStart);
+ RELEASE_LOCK(&cap->lock);
+ }
+ }
+#endif
+
+ RELEASE_LOCK(&sched_mutex);
+}
+
+void
+exitScheduler( void )
+{
+ Task *task = NULL;
+
+#if defined(THREADED_RTS)
+ ACQUIRE_LOCK(&sched_mutex);
+ task = newBoundTask();
+ RELEASE_LOCK(&sched_mutex);
+#endif
+
+ // If we haven't killed all the threads yet, do it now.
+ if (sched_state < SCHED_INTERRUPTED) {
+ sched_state = SCHED_INTERRUPTING;
+ scheduleDoGC(NULL,task,rtsFalse,GetRoots);
+ }
+ sched_state = SCHED_SHUTTING_DOWN;
+
+#if defined(THREADED_RTS)
+ {
+ nat i;
+
+ for (i = 0; i < n_capabilities; i++) {
+ shutdownCapability(&capabilities[i], task);
+ }
+ boundTaskExiting(task);
+ stopTaskManager();
+ }
+#endif
+}
+
+/* ---------------------------------------------------------------------------
+ Where are the roots that we know about?
+
+ - all the threads on the runnable queue
+ - all the threads on the blocked queue
+ - all the threads on the sleeping queue
+ - all the thread currently executing a _ccall_GC
+ - all the "main threads"
+
+ ------------------------------------------------------------------------ */
+
+/* This has to be protected either by the scheduler monitor, or by the
+ garbage collection monitor (probably the latter).
+ KH @ 25/10/99
+*/
+
+void
+GetRoots( evac_fn evac )
+{
+ nat i;
+ Capability *cap;
+ Task *task;
+
+#if defined(GRAN)
+ for (i=0; i<=RtsFlags.GranFlags.proc; i++) {
+ if ((run_queue_hds[i] != END_TSO_QUEUE) && ((run_queue_hds[i] != NULL)))
+ evac((StgClosure **)&run_queue_hds[i]);
+ if ((run_queue_tls[i] != END_TSO_QUEUE) && ((run_queue_tls[i] != NULL)))
+ evac((StgClosure **)&run_queue_tls[i]);
+
+ if ((blocked_queue_hds[i] != END_TSO_QUEUE) && ((blocked_queue_hds[i] != NULL)))
+ evac((StgClosure **)&blocked_queue_hds[i]);
+ if ((blocked_queue_tls[i] != END_TSO_QUEUE) && ((blocked_queue_tls[i] != NULL)))
+ evac((StgClosure **)&blocked_queue_tls[i]);
+ if ((ccalling_threadss[i] != END_TSO_QUEUE) && ((ccalling_threadss[i] != NULL)))
+ evac((StgClosure **)&ccalling_threads[i]);
+ }
+
+ markEventQueue();
+
+#else /* !GRAN */
+
+ for (i = 0; i < n_capabilities; i++) {
+ cap = &capabilities[i];
+ evac((StgClosure **)(void *)&cap->run_queue_hd);
+ evac((StgClosure **)(void *)&cap->run_queue_tl);
+#if defined(THREADED_RTS)
+ evac((StgClosure **)(void *)&cap->wakeup_queue_hd);
+ evac((StgClosure **)(void *)&cap->wakeup_queue_tl);
+#endif
+ for (task = cap->suspended_ccalling_tasks; task != NULL;
+ task=task->next) {
+ IF_DEBUG(scheduler,sched_belch("evac'ing suspended TSO %d", task->suspended_tso->id));
+ evac((StgClosure **)(void *)&task->suspended_tso);
+ }
+
+ }
+
+
+#if !defined(THREADED_RTS)
+ evac((StgClosure **)(void *)&blocked_queue_hd);
+ evac((StgClosure **)(void *)&blocked_queue_tl);
+ evac((StgClosure **)(void *)&sleeping_queue);
+#endif
+#endif
+
+ // evac((StgClosure **)&blackhole_queue);
+
+#if defined(THREADED_RTS) || defined(PARALLEL_HASKELL) || defined(GRAN)
+ markSparkQueue(evac);
+#endif
+
+#if defined(RTS_USER_SIGNALS)
+ // mark the signal handlers (signals should be already blocked)
+ markSignalHandlers(evac);
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ performGC
+
+ This is the interface to the garbage collector from Haskell land.
+ We provide this so that external C code can allocate and garbage
+ collect when called from Haskell via _ccall_GC.
+
+ It might be useful to provide an interface whereby the programmer
+ can specify more roots (ToDo).
+
+ This needs to be protected by the GC condition variable above. KH.
+ -------------------------------------------------------------------------- */
+
+static void (*extra_roots)(evac_fn);
+
+static void
+performGC_(rtsBool force_major, void (*get_roots)(evac_fn))
+{
+ Task *task = myTask();
+
+ if (task == NULL) {
+ ACQUIRE_LOCK(&sched_mutex);
+ task = newBoundTask();
+ RELEASE_LOCK(&sched_mutex);
+ scheduleDoGC(NULL,task,force_major, get_roots);
+ boundTaskExiting(task);
+ } else {
+ scheduleDoGC(NULL,task,force_major, get_roots);
+ }
+}
+
+void
+performGC(void)
+{
+ performGC_(rtsFalse, GetRoots);
+}
+
+void
+performMajorGC(void)
+{
+ performGC_(rtsTrue, GetRoots);
+}
+
+static void
+AllRoots(evac_fn evac)
+{
+ GetRoots(evac); // the scheduler's roots
+ extra_roots(evac); // the user's roots
+}
+
+void
+performGCWithRoots(void (*get_roots)(evac_fn))
+{
+ extra_roots = get_roots;
+ performGC_(rtsFalse, AllRoots);
+}
+
+/* -----------------------------------------------------------------------------
+ Stack overflow
+
+ If the thread has reached its maximum stack size, then raise the
+ StackOverflow exception in the offending thread. Otherwise
+ relocate the TSO into a larger chunk of memory and adjust its stack
+ size appropriately.
+ -------------------------------------------------------------------------- */
+
+static StgTSO *
+threadStackOverflow(Capability *cap, StgTSO *tso)
+{
+ nat new_stack_size, stack_words;
+ lnat new_tso_size;
+ StgPtr new_sp;
+ StgTSO *dest;
+
+ IF_DEBUG(sanity,checkTSO(tso));
+ if (tso->stack_size >= tso->max_stack_size) {
+
+ IF_DEBUG(gc,
+ debugBelch("@@ threadStackOverflow of TSO %ld (%p): stack too large (now %ld; max is %ld)\n",
+ (long)tso->id, tso, (long)tso->stack_size, (long)tso->max_stack_size);
+ /* If we're debugging, just print out the top of the stack */
+ printStackChunk(tso->sp, stg_min(tso->stack+tso->stack_size,
+ tso->sp+64)));
+
+ /* Send this thread the StackOverflow exception */
+ raiseAsync(cap, tso, (StgClosure *)stackOverflow_closure);
+ return tso;
+ }
+
+ /* Try to double the current stack size. If that takes us over the
+ * maximum stack size for this thread, then use the maximum instead.
+ * Finally round up so the TSO ends up as a whole number of blocks.
+ */
+ new_stack_size = stg_min(tso->stack_size * 2, tso->max_stack_size);
+ new_tso_size = (lnat)BLOCK_ROUND_UP(new_stack_size * sizeof(W_) +
+ TSO_STRUCT_SIZE)/sizeof(W_);
+ new_tso_size = round_to_mblocks(new_tso_size); /* Be MBLOCK-friendly */
+ new_stack_size = new_tso_size - TSO_STRUCT_SIZEW;
+
+ IF_DEBUG(scheduler, sched_belch("increasing stack size from %ld words to %d.\n", (long)tso->stack_size, new_stack_size));
+
+ dest = (StgTSO *)allocate(new_tso_size);
+ TICK_ALLOC_TSO(new_stack_size,0);
+
+ /* copy the TSO block and the old stack into the new area */
+ memcpy(dest,tso,TSO_STRUCT_SIZE);
+ stack_words = tso->stack + tso->stack_size - tso->sp;
+ new_sp = (P_)dest + new_tso_size - stack_words;
+ memcpy(new_sp, tso->sp, stack_words * sizeof(W_));
+
+ /* relocate the stack pointers... */
+ dest->sp = new_sp;
+ dest->stack_size = new_stack_size;
+
+ /* Mark the old TSO as relocated. We have to check for relocated
+ * TSOs in the garbage collector and any primops that deal with TSOs.
+ *
+ * It's important to set the sp value to just beyond the end
+ * of the stack, so we don't attempt to scavenge any part of the
+ * dead TSO's stack.
+ */
+ tso->what_next = ThreadRelocated;
+ tso->link = dest;
+ tso->sp = (P_)&(tso->stack[tso->stack_size]);
+ tso->why_blocked = NotBlocked;
+
+ IF_PAR_DEBUG(verbose,
+ debugBelch("@@ threadStackOverflow of TSO %d (now at %p): stack size increased to %ld\n",
+ tso->id, tso, tso->stack_size);
+ /* If we're debugging, just print out the top of the stack */
+ printStackChunk(tso->sp, stg_min(tso->stack+tso->stack_size,
+ tso->sp+64)));
+
+ IF_DEBUG(sanity,checkTSO(tso));
+#if 0
+ IF_DEBUG(scheduler,printTSO(dest));
+#endif
+
+ return dest;
+}
+
+/* ---------------------------------------------------------------------------
+ Wake up a queue that was blocked on some resource.
+ ------------------------------------------------------------------------ */
+
+#if defined(GRAN)
+STATIC_INLINE void
+unblockCount ( StgBlockingQueueElement *bqe, StgClosure *node )
+{
+}
+#elif defined(PARALLEL_HASKELL)
+STATIC_INLINE void
+unblockCount ( StgBlockingQueueElement *bqe, StgClosure *node )
+{
+ /* write RESUME events to log file and
+ update blocked and fetch time (depending on type of the orig closure) */
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC,
+ GR_RESUMEQ, ((StgTSO *)bqe), ((StgTSO *)bqe)->block_info.closure,
+ 0, 0 /* spark_queue_len(ADVISORY_POOL) */);
+ if (emptyRunQueue())
+ emitSchedule = rtsTrue;
+
+ switch (get_itbl(node)->type) {
+ case FETCH_ME_BQ:
+ ((StgTSO *)bqe)->par.fetchtime += CURRENT_TIME-((StgTSO *)bqe)->par.blockedat;
+ break;
+ case RBH:
+ case FETCH_ME:
+ case BLACKHOLE_BQ:
+ ((StgTSO *)bqe)->par.blocktime += CURRENT_TIME-((StgTSO *)bqe)->par.blockedat;
+ break;
+#ifdef DIST
+ case MVAR:
+ break;
+#endif
+ default:
+ barf("{unblockOne}Daq Qagh: unexpected closure in blocking queue");
+ }
+ }
+}
+#endif
+
+#if defined(GRAN)
+StgBlockingQueueElement *
+unblockOne(StgBlockingQueueElement *bqe, StgClosure *node)
+{
+ StgTSO *tso;
+ PEs node_loc, tso_loc;
+
+ node_loc = where_is(node); // should be lifted out of loop
+ tso = (StgTSO *)bqe; // wastes an assignment to get the type right
+ tso_loc = where_is((StgClosure *)tso);
+ if (IS_LOCAL_TO(PROCS(node),tso_loc)) { // TSO is local
+ /* !fake_fetch => TSO is on CurrentProc is same as IS_LOCAL_TO */
+ ASSERT(CurrentProc!=node_loc || tso_loc==CurrentProc);
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.lunblocktime;
+ // insertThread(tso, node_loc);
+ new_event(tso_loc, tso_loc, CurrentTime[CurrentProc],
+ ResumeThread,
+ tso, node, (rtsSpark*)NULL);
+ tso->link = END_TSO_QUEUE; // overwrite link just to be sure
+ // len_local++;
+ // len++;
+ } else { // TSO is remote (actually should be FMBQ)
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mpacktime +
+ RtsFlags.GranFlags.Costs.gunblocktime +
+ RtsFlags.GranFlags.Costs.latency;
+ new_event(tso_loc, CurrentProc, CurrentTime[CurrentProc],
+ UnblockThread,
+ tso, node, (rtsSpark*)NULL);
+ tso->link = END_TSO_QUEUE; // overwrite link just to be sure
+ // len++;
+ }
+ /* the thread-queue-overhead is accounted for in either Resume or UnblockThread */
+ IF_GRAN_DEBUG(bq,
+ debugBelch(" %s TSO %d (%p) [PE %d] (block_info.closure=%p) (next=%p) ,",
+ (node_loc==tso_loc ? "Local" : "Global"),
+ tso->id, tso, CurrentProc, tso->block_info.closure, tso->link));
+ tso->block_info.closure = NULL;
+ IF_DEBUG(scheduler,debugBelch("-- Waking up thread %ld (%p)\n",
+ tso->id, tso));
+}
+#elif defined(PARALLEL_HASKELL)
+StgBlockingQueueElement *
+unblockOne(StgBlockingQueueElement *bqe, StgClosure *node)
+{
+ StgBlockingQueueElement *next;
+
+ switch (get_itbl(bqe)->type) {
+ case TSO:
+ ASSERT(((StgTSO *)bqe)->why_blocked != NotBlocked);
+ /* if it's a TSO just push it onto the run_queue */
+ next = bqe->link;
+ ((StgTSO *)bqe)->link = END_TSO_QUEUE; // debugging?
+ APPEND_TO_RUN_QUEUE((StgTSO *)bqe);
+ threadRunnable();
+ unblockCount(bqe, node);
+ /* reset blocking status after dumping event */
+ ((StgTSO *)bqe)->why_blocked = NotBlocked;
+ break;
+
+ case BLOCKED_FETCH:
+ /* if it's a BLOCKED_FETCH put it on the PendingFetches list */
+ next = bqe->link;
+ bqe->link = (StgBlockingQueueElement *)PendingFetches;
+ PendingFetches = (StgBlockedFetch *)bqe;
+ break;
+
+# if defined(DEBUG)
+ /* can ignore this case in a non-debugging setup;
+ see comments on RBHSave closures above */
+ case CONSTR:
+ /* check that the closure is an RBHSave closure */
+ ASSERT(get_itbl((StgClosure *)bqe) == &stg_RBH_Save_0_info ||
+ get_itbl((StgClosure *)bqe) == &stg_RBH_Save_1_info ||
+ get_itbl((StgClosure *)bqe) == &stg_RBH_Save_2_info);
+ break;
+
+ default:
+ barf("{unblockOne}Daq Qagh: Unexpected IP (%#lx; %s) in blocking queue at %#lx\n",
+ get_itbl((StgClosure *)bqe), info_type((StgClosure *)bqe),
+ (StgClosure *)bqe);
+# endif
+ }
+ IF_PAR_DEBUG(bq, debugBelch(", %p (%s)\n", bqe, info_type((StgClosure*)bqe)));
+ return next;
+}
+#endif
+
+StgTSO *
+unblockOne(Capability *cap, StgTSO *tso)
+{
+ StgTSO *next;
+
+ ASSERT(get_itbl(tso)->type == TSO);
+ ASSERT(tso->why_blocked != NotBlocked);
+
+ tso->why_blocked = NotBlocked;
+ next = tso->link;
+ tso->link = END_TSO_QUEUE;
+
+#if defined(THREADED_RTS)
+ if (tso->cap == cap || (!tsoLocked(tso) && RtsFlags.ParFlags.wakeupMigrate)) {
+ // We are waking up this thread on the current Capability, which
+ // might involve migrating it from the Capability it was last on.
+ if (tso->bound) {
+ ASSERT(tso->bound->cap == tso->cap);
+ tso->bound->cap = cap;
+ }
+ tso->cap = cap;
+ appendToRunQueue(cap,tso);
+ // we're holding a newly woken thread, make sure we context switch
+ // quickly so we can migrate it if necessary.
+ context_switch = 1;
+ } else {
+ // we'll try to wake it up on the Capability it was last on.
+ wakeupThreadOnCapability(tso->cap, tso);
+ }
+#else
+ appendToRunQueue(cap,tso);
+ context_switch = 1;
+#endif
+
+ IF_DEBUG(scheduler,sched_belch("waking up thread %ld on cap %d", (long)tso->id, tso->cap->no));
+ return next;
+}
+
+
+#if defined(GRAN)
+void
+awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node)
+{
+ StgBlockingQueueElement *bqe;
+ PEs node_loc;
+ nat len = 0;
+
+ IF_GRAN_DEBUG(bq,
+ debugBelch("##-_ AwBQ for node %p on PE %d @ %ld by TSO %d (%p): \n", \
+ node, CurrentProc, CurrentTime[CurrentProc],
+ CurrentTSO->id, CurrentTSO));
+
+ node_loc = where_is(node);
+
+ ASSERT(q == END_BQ_QUEUE ||
+ get_itbl(q)->type == TSO || // q is either a TSO or an RBHSave
+ get_itbl(q)->type == CONSTR); // closure (type constructor)
+ ASSERT(is_unique(node));
+
+ /* FAKE FETCH: magically copy the node to the tso's proc;
+ no Fetch necessary because in reality the node should not have been
+ moved to the other PE in the first place
+ */
+ if (CurrentProc!=node_loc) {
+ IF_GRAN_DEBUG(bq,
+ debugBelch("## node %p is on PE %d but CurrentProc is %d (TSO %d); assuming fake fetch and adjusting bitmask (old: %#x)\n",
+ node, node_loc, CurrentProc, CurrentTSO->id,
+ // CurrentTSO, where_is(CurrentTSO),
+ node->header.gran.procs));
+ node->header.gran.procs = (node->header.gran.procs) | PE_NUMBER(CurrentProc);
+ IF_GRAN_DEBUG(bq,
+ debugBelch("## new bitmask of node %p is %#x\n",
+ node, node->header.gran.procs));
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.tot_fake_fetches++;
+ }
+ }
+
+ bqe = q;
+ // ToDo: check: ASSERT(CurrentProc==node_loc);
+ while (get_itbl(bqe)->type==TSO) { // q != END_TSO_QUEUE) {
+ //next = bqe->link;
+ /*
+ bqe points to the current element in the queue
+ next points to the next element in the queue
+ */
+ //tso = (StgTSO *)bqe; // wastes an assignment to get the type right
+ //tso_loc = where_is(tso);
+ len++;
+ bqe = unblockOne(bqe, node);
+ }
+
+ /* if this is the BQ of an RBH, we have to put back the info ripped out of
+ the closure to make room for the anchor of the BQ */
+ if (bqe!=END_BQ_QUEUE) {
+ ASSERT(get_itbl(node)->type == RBH && get_itbl(bqe)->type == CONSTR);
+ /*
+ ASSERT((info_ptr==&RBH_Save_0_info) ||
+ (info_ptr==&RBH_Save_1_info) ||
+ (info_ptr==&RBH_Save_2_info));
+ */
+ /* cf. convertToRBH in RBH.c for writing the RBHSave closure */
+ ((StgRBH *)node)->blocking_queue = (StgBlockingQueueElement *)((StgRBHSave *)bqe)->payload[0];
+ ((StgRBH *)node)->mut_link = (StgMutClosure *)((StgRBHSave *)bqe)->payload[1];
+
+ IF_GRAN_DEBUG(bq,
+ debugBelch("## Filled in RBH_Save for %p (%s) at end of AwBQ\n",
+ node, info_type(node)));
+ }
+
+ /* statistics gathering */
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ // globalGranStats.tot_bq_processing_time += bq_processing_time;
+ globalGranStats.tot_bq_len += len; // total length of all bqs awakened
+ // globalGranStats.tot_bq_len_local += len_local; // same for local TSOs only
+ globalGranStats.tot_awbq++; // total no. of bqs awakened
+ }
+ IF_GRAN_DEBUG(bq,
+ debugBelch("## BQ Stats of %p: [%d entries] %s\n",
+ node, len, (bqe!=END_BQ_QUEUE) ? "RBH" : ""));
+}
+#elif defined(PARALLEL_HASKELL)
+void
+awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node)
+{
+ StgBlockingQueueElement *bqe;
+
+ IF_PAR_DEBUG(verbose,
+ debugBelch("##-_ AwBQ for node %p on [%x]: \n",
+ node, mytid));
+#ifdef DIST
+ //RFP
+ if(get_itbl(q)->type == CONSTR || q==END_BQ_QUEUE) {
+ IF_PAR_DEBUG(verbose, debugBelch("## ... nothing to unblock so lets just return. RFP (BUG?)\n"));
+ return;
+ }
+#endif
+
+ ASSERT(q == END_BQ_QUEUE ||
+ get_itbl(q)->type == TSO ||
+ get_itbl(q)->type == BLOCKED_FETCH ||
+ get_itbl(q)->type == CONSTR);
+
+ bqe = q;
+ while (get_itbl(bqe)->type==TSO ||
+ get_itbl(bqe)->type==BLOCKED_FETCH) {
+ bqe = unblockOne(bqe, node);
+ }
+}
+
+#else /* !GRAN && !PARALLEL_HASKELL */
+
+void
+awakenBlockedQueue(Capability *cap, StgTSO *tso)
+{
+ if (tso == NULL) return; // hack; see bug #1235728, and comments in
+ // Exception.cmm
+ while (tso != END_TSO_QUEUE) {
+ tso = unblockOne(cap,tso);
+ }
+}
+#endif
+
+/* ---------------------------------------------------------------------------
+ Interrupt execution
+ - usually called inside a signal handler so it mustn't do anything fancy.
+ ------------------------------------------------------------------------ */
+
+void
+interruptStgRts(void)
+{
+ sched_state = SCHED_INTERRUPTING;
+ context_switch = 1;
+#if defined(THREADED_RTS)
+ prodAllCapabilities();
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ Unblock a thread
+
+ This is for use when we raise an exception in another thread, which
+ may be blocked.
+ This has nothing to do with the UnblockThread event in GranSim. -- HWL
+ -------------------------------------------------------------------------- */
+
+#if defined(GRAN) || defined(PARALLEL_HASKELL)
+/*
+ NB: only the type of the blocking queue is different in GranSim and GUM
+ the operations on the queue-elements are the same
+ long live polymorphism!
+
+ Locks: sched_mutex is held upon entry and exit.
+
+*/
+static void
+unblockThread(Capability *cap, StgTSO *tso)
+{
+ StgBlockingQueueElement *t, **last;
+
+ switch (tso->why_blocked) {
+
+ case NotBlocked:
+ return; /* not blocked */
+
+ case BlockedOnSTM:
+ // Be careful: nothing to do here! We tell the scheduler that the thread
+ // is runnable and we leave it to the stack-walking code to abort the
+ // transaction while unwinding the stack. We should perhaps have a debugging
+ // test to make sure that this really happens and that the 'zombie' transaction
+ // does not get committed.
+ goto done;
+
+ case BlockedOnMVar:
+ ASSERT(get_itbl(tso->block_info.closure)->type == MVAR);
+ {
+ StgBlockingQueueElement *last_tso = END_BQ_QUEUE;
+ StgMVar *mvar = (StgMVar *)(tso->block_info.closure);
+
+ last = (StgBlockingQueueElement **)&mvar->head;
+ for (t = (StgBlockingQueueElement *)mvar->head;
+ t != END_BQ_QUEUE;
+ last = &t->link, last_tso = t, t = t->link) {
+ if (t == (StgBlockingQueueElement *)tso) {
+ *last = (StgBlockingQueueElement *)tso->link;
+ if (mvar->tail == tso) {
+ mvar->tail = (StgTSO *)last_tso;
+ }
+ goto done;
+ }
+ }
+ barf("unblockThread (MVAR): TSO not found");
+ }
+
+ case BlockedOnBlackHole:
+ ASSERT(get_itbl(tso->block_info.closure)->type == BLACKHOLE_BQ);
+ {
+ StgBlockingQueue *bq = (StgBlockingQueue *)(tso->block_info.closure);
+
+ last = &bq->blocking_queue;
+ for (t = bq->blocking_queue;
+ t != END_BQ_QUEUE;
+ last = &t->link, t = t->link) {
+ if (t == (StgBlockingQueueElement *)tso) {
+ *last = (StgBlockingQueueElement *)tso->link;
+ goto done;
+ }
+ }
+ barf("unblockThread (BLACKHOLE): TSO not found");
+ }
+
+ case BlockedOnException:
+ {
+ StgTSO *target = tso->block_info.tso;
+
+ ASSERT(get_itbl(target)->type == TSO);
+
+ if (target->what_next == ThreadRelocated) {
+ target = target->link;
+ ASSERT(get_itbl(target)->type == TSO);
+ }
+
+ ASSERT(target->blocked_exceptions != NULL);
+
+ last = (StgBlockingQueueElement **)&target->blocked_exceptions;
+ for (t = (StgBlockingQueueElement *)target->blocked_exceptions;
+ t != END_BQ_QUEUE;
+ last = &t->link, t = t->link) {
+ ASSERT(get_itbl(t)->type == TSO);
+ if (t == (StgBlockingQueueElement *)tso) {
+ *last = (StgBlockingQueueElement *)tso->link;
+ goto done;
+ }
+ }
+ barf("unblockThread (Exception): TSO not found");
+ }
+
+ case BlockedOnRead:
+ case BlockedOnWrite:
+#if defined(mingw32_HOST_OS)
+ case BlockedOnDoProc:
+#endif
+ {
+ /* take TSO off blocked_queue */
+ StgBlockingQueueElement *prev = NULL;
+ for (t = (StgBlockingQueueElement *)blocked_queue_hd; t != END_BQ_QUEUE;
+ prev = t, t = t->link) {
+ if (t == (StgBlockingQueueElement *)tso) {
+ if (prev == NULL) {
+ blocked_queue_hd = (StgTSO *)t->link;
+ if ((StgBlockingQueueElement *)blocked_queue_tl == t) {
+ blocked_queue_tl = END_TSO_QUEUE;
+ }
+ } else {
+ prev->link = t->link;
+ if ((StgBlockingQueueElement *)blocked_queue_tl == t) {
+ blocked_queue_tl = (StgTSO *)prev;
+ }
+ }
+#if defined(mingw32_HOST_OS)
+ /* (Cooperatively) signal that the worker thread should abort
+ * the request.
+ */
+ abandonWorkRequest(tso->block_info.async_result->reqID);
+#endif
+ goto done;
+ }
+ }
+ barf("unblockThread (I/O): TSO not found");
+ }
+
+ case BlockedOnDelay:
+ {
+ /* take TSO off sleeping_queue */
+ StgBlockingQueueElement *prev = NULL;
+ for (t = (StgBlockingQueueElement *)sleeping_queue; t != END_BQ_QUEUE;
+ prev = t, t = t->link) {
+ if (t == (StgBlockingQueueElement *)tso) {
+ if (prev == NULL) {
+ sleeping_queue = (StgTSO *)t->link;
+ } else {
+ prev->link = t->link;
+ }
+ goto done;
+ }
+ }
+ barf("unblockThread (delay): TSO not found");
+ }
+
+ default:
+ barf("unblockThread");
+ }
+
+ done:
+ tso->link = END_TSO_QUEUE;
+ tso->why_blocked = NotBlocked;
+ tso->block_info.closure = NULL;
+ pushOnRunQueue(cap,tso);
+}
+#else
+static void
+unblockThread(Capability *cap, StgTSO *tso)
+{
+ StgTSO *t, **last;
+
+ /* To avoid locking unnecessarily. */
+ if (tso->why_blocked == NotBlocked) {
+ return;
+ }
+
+ switch (tso->why_blocked) {
+
+ case BlockedOnSTM:
+ // Be careful: nothing to do here! We tell the scheduler that the thread
+ // is runnable and we leave it to the stack-walking code to abort the
+ // transaction while unwinding the stack. We should perhaps have a debugging
+ // test to make sure that this really happens and that the 'zombie' transaction
+ // does not get committed.
+ goto done;
+
+ case BlockedOnMVar:
+ ASSERT(get_itbl(tso->block_info.closure)->type == MVAR);
+ {
+ StgTSO *last_tso = END_TSO_QUEUE;
+ StgMVar *mvar = (StgMVar *)(tso->block_info.closure);
+
+ last = &mvar->head;
+ for (t = mvar->head; t != END_TSO_QUEUE;
+ last = &t->link, last_tso = t, t = t->link) {
+ if (t == tso) {
+ *last = tso->link;
+ if (mvar->tail == tso) {
+ mvar->tail = last_tso;
+ }
+ goto done;
+ }
+ }
+ barf("unblockThread (MVAR): TSO not found");
+ }
+
+ case BlockedOnBlackHole:
+ {
+ last = &blackhole_queue;
+ for (t = blackhole_queue; t != END_TSO_QUEUE;
+ last = &t->link, t = t->link) {
+ if (t == tso) {
+ *last = tso->link;
+ goto done;
+ }
+ }
+ barf("unblockThread (BLACKHOLE): TSO not found");
+ }
+
+ case BlockedOnException:
+ {
+ StgTSO *target = tso->block_info.tso;
+
+ ASSERT(get_itbl(target)->type == TSO);
+
+ while (target->what_next == ThreadRelocated) {
+ target = target->link;
+ ASSERT(get_itbl(target)->type == TSO);
+ }
+
+ ASSERT(target->blocked_exceptions != NULL);
+
+ last = &target->blocked_exceptions;
+ for (t = target->blocked_exceptions; t != END_TSO_QUEUE;
+ last = &t->link, t = t->link) {
+ ASSERT(get_itbl(t)->type == TSO);
+ if (t == tso) {
+ *last = tso->link;
+ goto done;
+ }
+ }
+ barf("unblockThread (Exception): TSO not found");
+ }
+
+#if !defined(THREADED_RTS)
+ case BlockedOnRead:
+ case BlockedOnWrite:
+#if defined(mingw32_HOST_OS)
+ case BlockedOnDoProc:
+#endif
+ {
+ StgTSO *prev = NULL;
+ for (t = blocked_queue_hd; t != END_TSO_QUEUE;
+ prev = t, t = t->link) {
+ if (t == tso) {
+ if (prev == NULL) {
+ blocked_queue_hd = t->link;
+ if (blocked_queue_tl == t) {
+ blocked_queue_tl = END_TSO_QUEUE;
+ }
+ } else {
+ prev->link = t->link;
+ if (blocked_queue_tl == t) {
+ blocked_queue_tl = prev;
+ }
+ }
+#if defined(mingw32_HOST_OS)
+ /* (Cooperatively) signal that the worker thread should abort
+ * the request.
+ */
+ abandonWorkRequest(tso->block_info.async_result->reqID);
+#endif
+ goto done;
+ }
+ }
+ barf("unblockThread (I/O): TSO not found");
+ }
+
+ case BlockedOnDelay:
+ {
+ StgTSO *prev = NULL;
+ for (t = sleeping_queue; t != END_TSO_QUEUE;
+ prev = t, t = t->link) {
+ if (t == tso) {
+ if (prev == NULL) {
+ sleeping_queue = t->link;
+ } else {
+ prev->link = t->link;
+ }
+ goto done;
+ }
+ }
+ barf("unblockThread (delay): TSO not found");
+ }
+#endif
+
+ default:
+ barf("unblockThread");
+ }
+
+ done:
+ tso->link = END_TSO_QUEUE;
+ tso->why_blocked = NotBlocked;
+ tso->block_info.closure = NULL;
+ appendToRunQueue(cap,tso);
+
+ // We might have just migrated this TSO to our Capability:
+ if (tso->bound) {
+ tso->bound->cap = cap;
+ }
+ tso->cap = cap;
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ * checkBlackHoles()
+ *
+ * Check the blackhole_queue for threads that can be woken up. We do
+ * this periodically: before every GC, and whenever the run queue is
+ * empty.
+ *
+ * An elegant solution might be to just wake up all the blocked
+ * threads with awakenBlockedQueue occasionally: they'll go back to
+ * sleep again if the object is still a BLACKHOLE. Unfortunately this
+ * doesn't give us a way to tell whether we've actually managed to
+ * wake up any threads, so we would be busy-waiting.
+ *
+ * -------------------------------------------------------------------------- */
+
+static rtsBool
+checkBlackHoles (Capability *cap)
+{
+ StgTSO **prev, *t;
+ rtsBool any_woke_up = rtsFalse;
+ StgHalfWord type;
+
+ // blackhole_queue is global:
+ ASSERT_LOCK_HELD(&sched_mutex);
+
+ IF_DEBUG(scheduler, sched_belch("checking threads blocked on black holes"));
+
+ // ASSUMES: sched_mutex
+ prev = &blackhole_queue;
+ t = blackhole_queue;
+ while (t != END_TSO_QUEUE) {
+ ASSERT(t->why_blocked == BlockedOnBlackHole);
+ type = get_itbl(t->block_info.closure)->type;
+ if (type != BLACKHOLE && type != CAF_BLACKHOLE) {
+ IF_DEBUG(sanity,checkTSO(t));
+ t = unblockOne(cap, t);
+ // urk, the threads migrate to the current capability
+ // here, but we'd like to keep them on the original one.
+ *prev = t;
+ any_woke_up = rtsTrue;
+ } else {
+ prev = &t->link;
+ t = t->link;
+ }
+ }
+
+ return any_woke_up;
+}
+
+/* -----------------------------------------------------------------------------
+ * raiseAsync()
+ *
+ * The following function implements the magic for raising an
+ * asynchronous exception in an existing thread.
+ *
+ * We first remove the thread from any queue on which it might be
+ * blocked. The possible blockages are MVARs and BLACKHOLE_BQs.
+ *
+ * We strip the stack down to the innermost CATCH_FRAME, building
+ * thunks in the heap for all the active computations, so they can
+ * be restarted if necessary. When we reach a CATCH_FRAME, we build
+ * an application of the handler to the exception, and push it on
+ * the top of the stack.
+ *
+ * How exactly do we save all the active computations? We create an
+ * AP_STACK for every UpdateFrame on the stack. Entering one of these
+ * AP_STACKs pushes everything from the corresponding update frame
+ * upwards onto the stack. (Actually, it pushes everything up to the
+ * next update frame plus a pointer to the next AP_STACK object.
+ * Entering the next AP_STACK object pushes more onto the stack until we
+ * reach the last AP_STACK object - at which point the stack should look
+ * exactly as it did when we killed the TSO and we can continue
+ * execution by entering the closure on top of the stack.
+ *
+ * We can also kill a thread entirely - this happens if either (a) the
+ * exception passed to raiseAsync is NULL, or (b) there's no
+ * CATCH_FRAME on the stack. In either case, we strip the entire
+ * stack and replace the thread with a zombie.
+ *
+ * ToDo: in THREADED_RTS mode, this function is only safe if either
+ * (a) we hold all the Capabilities (eg. in GC, or if there is only
+ * one Capability), or (b) we own the Capability that the TSO is
+ * currently blocked on or on the run queue of.
+ *
+ * -------------------------------------------------------------------------- */
+
+void
+raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception)
+{
+ raiseAsync_(cap, tso, exception, rtsFalse, NULL);
+}
+
+void
+suspendComputation(Capability *cap, StgTSO *tso, StgPtr stop_here)
+{
+ raiseAsync_(cap, tso, NULL, rtsFalse, stop_here);
+}
+
+static void
+raiseAsync_(Capability *cap, StgTSO *tso, StgClosure *exception,
+ rtsBool stop_at_atomically, StgPtr stop_here)
+{
+ StgRetInfoTable *info;
+ StgPtr sp, frame;
+ nat i;
+
+ // Thread already dead?
+ if (tso->what_next == ThreadComplete || tso->what_next == ThreadKilled) {
+ return;
+ }
+
+ IF_DEBUG(scheduler,
+ sched_belch("raising exception in thread %ld.", (long)tso->id));
+
+ // Remove it from any blocking queues
+ unblockThread(cap,tso);
+
+ // mark it dirty; we're about to change its stack.
+ dirtyTSO(tso);
+
+ sp = tso->sp;
+
+ // The stack freezing code assumes there's a closure pointer on
+ // the top of the stack, so we have to arrange that this is the case...
+ //
+ if (sp[0] == (W_)&stg_enter_info) {
+ sp++;
+ } else {
+ sp--;
+ sp[0] = (W_)&stg_dummy_ret_closure;
+ }
+
+ frame = sp + 1;
+ while (stop_here == NULL || frame < stop_here) {
+
+ // 1. Let the top of the stack be the "current closure"
+ //
+ // 2. Walk up the stack until we find either an UPDATE_FRAME or a
+ // CATCH_FRAME.
+ //
+ // 3. If it's an UPDATE_FRAME, then make an AP_STACK containing the
+ // current closure applied to the chunk of stack up to (but not
+ // including) the update frame. This closure becomes the "current
+ // closure". Go back to step 2.
+ //
+ // 4. If it's a CATCH_FRAME, then leave the exception handler on
+ // top of the stack applied to the exception.
+ //
+ // 5. If it's a STOP_FRAME, then kill the thread.
+ //
+ // NB: if we pass an ATOMICALLY_FRAME then abort the associated
+ // transaction
+
+ info = get_ret_itbl((StgClosure *)frame);
+
+ switch (info->i.type) {
+
+ case UPDATE_FRAME:
+ {
+ StgAP_STACK * ap;
+ nat words;
+
+ // First build an AP_STACK consisting of the stack chunk above the
+ // current update frame, with the top word on the stack as the
+ // fun field.
+ //
+ words = frame - sp - 1;
+ ap = (StgAP_STACK *)allocateLocal(cap,AP_STACK_sizeW(words));
+
+ ap->size = words;
+ ap->fun = (StgClosure *)sp[0];
+ sp++;
+ for(i=0; i < (nat)words; ++i) {
+ ap->payload[i] = (StgClosure *)*sp++;
+ }
+
+ SET_HDR(ap,&stg_AP_STACK_info,
+ ((StgClosure *)frame)->header.prof.ccs /* ToDo */);
+ TICK_ALLOC_UP_THK(words+1,0);
+
+ IF_DEBUG(scheduler,
+ debugBelch("sched: Updating ");
+ printPtr((P_)((StgUpdateFrame *)frame)->updatee);
+ debugBelch(" with ");
+ printObj((StgClosure *)ap);
+ );
+
+ // Replace the updatee with an indirection
+ //
+ // Warning: if we're in a loop, more than one update frame on
+ // the stack may point to the same object. Be careful not to
+ // overwrite an IND_OLDGEN in this case, because we'll screw
+ // up the mutable lists. To be on the safe side, don't
+ // overwrite any kind of indirection at all. See also
+ // threadSqueezeStack in GC.c, where we have to make a similar
+ // check.
+ //
+ if (!closure_IND(((StgUpdateFrame *)frame)->updatee)) {
+ // revert the black hole
+ UPD_IND_NOLOCK(((StgUpdateFrame *)frame)->updatee,
+ (StgClosure *)ap);
+ }
+ sp += sizeofW(StgUpdateFrame) - 1;
+ sp[0] = (W_)ap; // push onto stack
+ frame = sp + 1;
+ continue; //no need to bump frame
+ }
+
+ case STOP_FRAME:
+ // We've stripped the entire stack, the thread is now dead.
+ tso->what_next = ThreadKilled;
+ tso->sp = frame + sizeofW(StgStopFrame);
+ return;
+
+ case CATCH_FRAME:
+ // If we find a CATCH_FRAME, and we've got an exception to raise,
+ // then build the THUNK raise(exception), and leave it on
+ // top of the CATCH_FRAME ready to enter.
+ //
+ {
+#ifdef PROFILING
+ StgCatchFrame *cf = (StgCatchFrame *)frame;
+#endif
+ StgThunk *raise;
+
+ if (exception == NULL) break;
+
+ // we've got an exception to raise, so let's pass it to the
+ // handler in this frame.
+ //
+ raise = (StgThunk *)allocateLocal(cap,sizeofW(StgThunk)+1);
+ TICK_ALLOC_SE_THK(1,0);
+ SET_HDR(raise,&stg_raise_info,cf->header.prof.ccs);
+ raise->payload[0] = exception;
+
+ // throw away the stack from Sp up to the CATCH_FRAME.
+ //
+ sp = frame - 1;
+
+ /* Ensure that async excpetions are blocked now, so we don't get
+ * a surprise exception before we get around to executing the
+ * handler.
+ */
+ if (tso->blocked_exceptions == NULL) {
+ tso->blocked_exceptions = END_TSO_QUEUE;
+ }
+
+ /* Put the newly-built THUNK on top of the stack, ready to execute
+ * when the thread restarts.
+ */
+ sp[0] = (W_)raise;
+ sp[-1] = (W_)&stg_enter_info;
+ tso->sp = sp-1;
+ tso->what_next = ThreadRunGHC;
+ IF_DEBUG(sanity, checkTSO(tso));
+ return;
+ }
+
+ case ATOMICALLY_FRAME:
+ if (stop_at_atomically) {
+ ASSERT(stmGetEnclosingTRec(tso->trec) == NO_TREC);
+ stmCondemnTransaction(cap, tso -> trec);
+#ifdef REG_R1
+ tso->sp = frame;
+#else
+ // R1 is not a register: the return convention for IO in
+ // this case puts the return value on the stack, so we
+ // need to set up the stack to return to the atomically
+ // frame properly...
+ tso->sp = frame - 2;
+ tso->sp[1] = (StgWord) &stg_NO_FINALIZER_closure; // why not?
+ tso->sp[0] = (StgWord) &stg_ut_1_0_unreg_info;
+#endif
+ tso->what_next = ThreadRunGHC;
+ return;
+ }
+ // Not stop_at_atomically... fall through and abort the
+ // transaction.
+
+ case CATCH_RETRY_FRAME:
+ // IF we find an ATOMICALLY_FRAME then we abort the
+ // current transaction and propagate the exception. In
+ // this case (unlike ordinary exceptions) we do not care
+ // whether the transaction is valid or not because its
+ // possible validity cannot have caused the exception
+ // and will not be visible after the abort.
+ IF_DEBUG(stm,
+ debugBelch("Found atomically block delivering async exception\n"));
+ StgTRecHeader *trec = tso -> trec;
+ StgTRecHeader *outer = stmGetEnclosingTRec(trec);
+ stmAbortTransaction(cap, trec);
+ tso -> trec = outer;
+ break;
+
+ default:
+ break;
+ }
+
+ // move on to the next stack frame
+ frame += stack_frame_sizeW((StgClosure *)frame);
+ }
+
+ // if we got here, then we stopped at stop_here
+ ASSERT(stop_here != NULL);
+}
+
+/* -----------------------------------------------------------------------------
+ Deleting threads
+
+ This is used for interruption (^C) and forking, and corresponds to
+ raising an exception but without letting the thread catch the
+ exception.
+ -------------------------------------------------------------------------- */
+
+static void
+deleteThread (Capability *cap, StgTSO *tso)
+{
+ if (tso->why_blocked != BlockedOnCCall &&
+ tso->why_blocked != BlockedOnCCall_NoUnblockExc) {
+ raiseAsync(cap,tso,NULL);
+ }
+}
+
+#ifdef FORKPROCESS_PRIMOP_SUPPORTED
+static void
+deleteThread_(Capability *cap, StgTSO *tso)
+{ // for forkProcess only:
+ // like deleteThread(), but we delete threads in foreign calls, too.
+
+ if (tso->why_blocked == BlockedOnCCall ||
+ tso->why_blocked == BlockedOnCCall_NoUnblockExc) {
+ unblockOne(cap,tso);
+ tso->what_next = ThreadKilled;
+ } else {
+ deleteThread(cap,tso);
+ }
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ raiseExceptionHelper
+
+ This function is called by the raise# primitve, just so that we can
+ move some of the tricky bits of raising an exception from C-- into
+ C. Who knows, it might be a useful re-useable thing here too.
+ -------------------------------------------------------------------------- */
+
+StgWord
+raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception)
+{
+ Capability *cap = regTableToCapability(reg);
+ StgThunk *raise_closure = NULL;
+ StgPtr p, next;
+ StgRetInfoTable *info;
+ //
+ // This closure represents the expression 'raise# E' where E
+ // is the exception raise. It is used to overwrite all the
+ // thunks which are currently under evaluataion.
+ //
+
+ // OLD COMMENT (we don't have MIN_UPD_SIZE now):
+ // LDV profiling: stg_raise_info has THUNK as its closure
+ // type. Since a THUNK takes at least MIN_UPD_SIZE words in its
+ // payload, MIN_UPD_SIZE is more approprate than 1. It seems that
+ // 1 does not cause any problem unless profiling is performed.
+ // However, when LDV profiling goes on, we need to linearly scan
+ // small object pool, where raise_closure is stored, so we should
+ // use MIN_UPD_SIZE.
+ //
+ // raise_closure = (StgClosure *)RET_STGCALL1(P_,allocate,
+ // sizeofW(StgClosure)+1);
+ //
+
+ //
+ // Walk up the stack, looking for the catch frame. On the way,
+ // we update any closures pointed to from update frames with the
+ // raise closure that we just built.
+ //
+ p = tso->sp;
+ while(1) {
+ info = get_ret_itbl((StgClosure *)p);
+ next = p + stack_frame_sizeW((StgClosure *)p);
+ switch (info->i.type) {
+
+ case UPDATE_FRAME:
+ // Only create raise_closure if we need to.
+ if (raise_closure == NULL) {
+ raise_closure =
+ (StgThunk *)allocateLocal(cap,sizeofW(StgThunk)+1);
+ SET_HDR(raise_closure, &stg_raise_info, CCCS);
+ raise_closure->payload[0] = exception;
+ }
+ UPD_IND(((StgUpdateFrame *)p)->updatee,(StgClosure *)raise_closure);
+ p = next;
+ continue;
+
+ case ATOMICALLY_FRAME:
+ IF_DEBUG(stm, debugBelch("Found ATOMICALLY_FRAME at %p\n", p));
+ tso->sp = p;
+ return ATOMICALLY_FRAME;
+
+ case CATCH_FRAME:
+ tso->sp = p;
+ return CATCH_FRAME;
+
+ case CATCH_STM_FRAME:
+ IF_DEBUG(stm, debugBelch("Found CATCH_STM_FRAME at %p\n", p));
+ tso->sp = p;
+ return CATCH_STM_FRAME;
+
+ case STOP_FRAME:
+ tso->sp = p;
+ return STOP_FRAME;
+
+ case CATCH_RETRY_FRAME:
+ default:
+ p = next;
+ continue;
+ }
+ }
+}
+
+
+/* -----------------------------------------------------------------------------
+ findRetryFrameHelper
+
+ This function is called by the retry# primitive. It traverses the stack
+ leaving tso->sp referring to the frame which should handle the retry.
+
+ This should either be a CATCH_RETRY_FRAME (if the retry# is within an orElse#)
+ or should be a ATOMICALLY_FRAME (if the retry# reaches the top level).
+
+ We skip CATCH_STM_FRAMEs because retries are not considered to be exceptions,
+ despite the similar implementation.
+
+ We should not expect to see CATCH_FRAME or STOP_FRAME because those should
+ not be created within memory transactions.
+ -------------------------------------------------------------------------- */
+
+StgWord
+findRetryFrameHelper (StgTSO *tso)
+{
+ StgPtr p, next;
+ StgRetInfoTable *info;
+
+ p = tso -> sp;
+ while (1) {
+ info = get_ret_itbl((StgClosure *)p);
+ next = p + stack_frame_sizeW((StgClosure *)p);
+ switch (info->i.type) {
+
+ case ATOMICALLY_FRAME:
+ IF_DEBUG(stm, debugBelch("Found ATOMICALLY_FRAME at %p during retrry\n", p));
+ tso->sp = p;
+ return ATOMICALLY_FRAME;
+
+ case CATCH_RETRY_FRAME:
+ IF_DEBUG(stm, debugBelch("Found CATCH_RETRY_FRAME at %p during retrry\n", p));
+ tso->sp = p;
+ return CATCH_RETRY_FRAME;
+
+ case CATCH_STM_FRAME:
+ default:
+ ASSERT(info->i.type != CATCH_FRAME);
+ ASSERT(info->i.type != STOP_FRAME);
+ p = next;
+ continue;
+ }
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ resurrectThreads is called after garbage collection on the list of
+ threads found to be garbage. Each of these threads will be woken
+ up and sent a signal: BlockedOnDeadMVar if the thread was blocked
+ on an MVar, or NonTermination if the thread was blocked on a Black
+ Hole.
+
+ Locks: assumes we hold *all* the capabilities.
+ -------------------------------------------------------------------------- */
+
+void
+resurrectThreads (StgTSO *threads)
+{
+ StgTSO *tso, *next;
+ Capability *cap;
+
+ for (tso = threads; tso != END_TSO_QUEUE; tso = next) {
+ next = tso->global_link;
+ tso->global_link = all_threads;
+ all_threads = tso;
+ IF_DEBUG(scheduler, sched_belch("resurrecting thread %d", tso->id));
+
+ // Wake up the thread on the Capability it was last on
+ cap = tso->cap;
+
+ switch (tso->why_blocked) {
+ case BlockedOnMVar:
+ case BlockedOnException:
+ /* Called by GC - sched_mutex lock is currently held. */
+ raiseAsync(cap, tso,(StgClosure *)BlockedOnDeadMVar_closure);
+ break;
+ case BlockedOnBlackHole:
+ raiseAsync(cap, tso,(StgClosure *)NonTermination_closure);
+ break;
+ case BlockedOnSTM:
+ raiseAsync(cap, tso,(StgClosure *)BlockedIndefinitely_closure);
+ break;
+ case NotBlocked:
+ /* This might happen if the thread was blocked on a black hole
+ * belonging to a thread that we've just woken up (raiseAsync
+ * can wake up threads, remember...).
+ */
+ continue;
+ default:
+ barf("resurrectThreads: thread blocked in a strange way");
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------------
+ * Debugging: why is a thread blocked
+ * [Also provides useful information when debugging threaded programs
+ * at the Haskell source code level, so enable outside of DEBUG. --sof 7/02]
+ ------------------------------------------------------------------------- */
+
+#if DEBUG
+static void
+printThreadBlockage(StgTSO *tso)
+{
+ switch (tso->why_blocked) {
+ case BlockedOnRead:
+ debugBelch("is blocked on read from fd %d", (int)(tso->block_info.fd));
+ break;
+ case BlockedOnWrite:
+ debugBelch("is blocked on write to fd %d", (int)(tso->block_info.fd));
+ break;
+#if defined(mingw32_HOST_OS)
+ case BlockedOnDoProc:
+ debugBelch("is blocked on proc (request: %ld)", tso->block_info.async_result->reqID);
+ break;
+#endif
+ case BlockedOnDelay:
+ debugBelch("is blocked until %ld", (long)(tso->block_info.target));
+ break;
+ case BlockedOnMVar:
+ debugBelch("is blocked on an MVar @ %p", tso->block_info.closure);
+ break;
+ case BlockedOnException:
+ debugBelch("is blocked on delivering an exception to thread %d",
+ tso->block_info.tso->id);
+ break;
+ case BlockedOnBlackHole:
+ debugBelch("is blocked on a black hole");
+ break;
+ case NotBlocked:
+ debugBelch("is not blocked");
+ break;
+#if defined(PARALLEL_HASKELL)
+ case BlockedOnGA:
+ debugBelch("is blocked on global address; local FM_BQ is %p (%s)",
+ tso->block_info.closure, info_type(tso->block_info.closure));
+ break;
+ case BlockedOnGA_NoSend:
+ debugBelch("is blocked on global address (no send); local FM_BQ is %p (%s)",
+ tso->block_info.closure, info_type(tso->block_info.closure));
+ break;
+#endif
+ case BlockedOnCCall:
+ debugBelch("is blocked on an external call");
+ break;
+ case BlockedOnCCall_NoUnblockExc:
+ debugBelch("is blocked on an external call (exceptions were already blocked)");
+ break;
+ case BlockedOnSTM:
+ debugBelch("is blocked on an STM operation");
+ break;
+ default:
+ barf("printThreadBlockage: strange tso->why_blocked: %d for TSO %d (%d)",
+ tso->why_blocked, tso->id, tso);
+ }
+}
+
+void
+printThreadStatus(StgTSO *t)
+{
+ debugBelch("\tthread %4d @ %p ", t->id, (void *)t);
+ {
+ void *label = lookupThreadLabel(t->id);
+ if (label) debugBelch("[\"%s\"] ",(char *)label);
+ }
+ if (t->what_next == ThreadRelocated) {
+ debugBelch("has been relocated...\n");
+ } else {
+ switch (t->what_next) {
+ case ThreadKilled:
+ debugBelch("has been killed");
+ break;
+ case ThreadComplete:
+ debugBelch("has completed");
+ break;
+ default:
+ printThreadBlockage(t);
+ }
+ debugBelch("\n");
+ }
+}
+
+void
+printAllThreads(void)
+{
+ StgTSO *t, *next;
+ nat i;
+ Capability *cap;
+
+# if defined(GRAN)
+ char time_string[TIME_STR_LEN], node_str[NODE_STR_LEN];
+ ullong_format_string(TIME_ON_PROC(CurrentProc),
+ time_string, rtsFalse/*no commas!*/);
+
+ debugBelch("all threads at [%s]:\n", time_string);
+# elif defined(PARALLEL_HASKELL)
+ char time_string[TIME_STR_LEN], node_str[NODE_STR_LEN];
+ ullong_format_string(CURRENT_TIME,
+ time_string, rtsFalse/*no commas!*/);
+
+ debugBelch("all threads at [%s]:\n", time_string);
+# else
+ debugBelch("all threads:\n");
+# endif
+
+ for (i = 0; i < n_capabilities; i++) {
+ cap = &capabilities[i];
+ debugBelch("threads on capability %d:\n", cap->no);
+ for (t = cap->run_queue_hd; t != END_TSO_QUEUE; t = t->link) {
+ printThreadStatus(t);
+ }
+ }
+
+ debugBelch("other threads:\n");
+ for (t = all_threads; t != END_TSO_QUEUE; t = next) {
+ if (t->why_blocked != NotBlocked) {
+ printThreadStatus(t);
+ }
+ if (t->what_next == ThreadRelocated) {
+ next = t->link;
+ } else {
+ next = t->global_link;
+ }
+ }
+}
+
+// useful from gdb
+void
+printThreadQueue(StgTSO *t)
+{
+ nat i = 0;
+ for (; t != END_TSO_QUEUE; t = t->link) {
+ printThreadStatus(t);
+ i++;
+ }
+ debugBelch("%d threads on queue\n", i);
+}
+
+/*
+ Print a whole blocking queue attached to node (debugging only).
+*/
+# if defined(PARALLEL_HASKELL)
+void
+print_bq (StgClosure *node)
+{
+ StgBlockingQueueElement *bqe;
+ StgTSO *tso;
+ rtsBool end;
+
+ debugBelch("## BQ of closure %p (%s): ",
+ node, info_type(node));
+
+ /* should cover all closures that may have a blocking queue */
+ ASSERT(get_itbl(node)->type == BLACKHOLE_BQ ||
+ get_itbl(node)->type == FETCH_ME_BQ ||
+ get_itbl(node)->type == RBH ||
+ get_itbl(node)->type == MVAR);
+
+ ASSERT(node!=(StgClosure*)NULL); // sanity check
+
+ print_bqe(((StgBlockingQueue*)node)->blocking_queue);
+}
+
+/*
+ Print a whole blocking queue starting with the element bqe.
+*/
+void
+print_bqe (StgBlockingQueueElement *bqe)
+{
+ rtsBool end;
+
+ /*
+ NB: In a parallel setup a BQ of an RBH must end with an RBH_Save closure;
+ */
+ for (end = (bqe==END_BQ_QUEUE);
+ !end; // iterate until bqe points to a CONSTR
+ end = (get_itbl(bqe)->type == CONSTR) || (bqe->link==END_BQ_QUEUE),
+ bqe = end ? END_BQ_QUEUE : bqe->link) {
+ ASSERT(bqe != END_BQ_QUEUE); // sanity check
+ ASSERT(bqe != (StgBlockingQueueElement *)NULL); // sanity check
+ /* types of closures that may appear in a blocking queue */
+ ASSERT(get_itbl(bqe)->type == TSO ||
+ get_itbl(bqe)->type == BLOCKED_FETCH ||
+ get_itbl(bqe)->type == CONSTR);
+ /* only BQs of an RBH end with an RBH_Save closure */
+ //ASSERT(get_itbl(bqe)->type != CONSTR || get_itbl(node)->type == RBH);
+
+ switch (get_itbl(bqe)->type) {
+ case TSO:
+ debugBelch(" TSO %u (%x),",
+ ((StgTSO *)bqe)->id, ((StgTSO *)bqe));
+ break;
+ case BLOCKED_FETCH:
+ debugBelch(" BF (node=%p, ga=((%x, %d, %x)),",
+ ((StgBlockedFetch *)bqe)->node,
+ ((StgBlockedFetch *)bqe)->ga.payload.gc.gtid,
+ ((StgBlockedFetch *)bqe)->ga.payload.gc.slot,
+ ((StgBlockedFetch *)bqe)->ga.weight);
+ break;
+ case CONSTR:
+ debugBelch(" %s (IP %p),",
+ (get_itbl(bqe) == &stg_RBH_Save_0_info ? "RBH_Save_0" :
+ get_itbl(bqe) == &stg_RBH_Save_1_info ? "RBH_Save_1" :
+ get_itbl(bqe) == &stg_RBH_Save_2_info ? "RBH_Save_2" :
+ "RBH_Save_?"), get_itbl(bqe));
+ break;
+ default:
+ barf("Unexpected closure type %s in blocking queue", // of %p (%s)",
+ info_type((StgClosure *)bqe)); // , node, info_type(node));
+ break;
+ }
+ } /* for */
+ debugBelch("\n");
+}
+# elif defined(GRAN)
+void
+print_bq (StgClosure *node)
+{
+ StgBlockingQueueElement *bqe;
+ PEs node_loc, tso_loc;
+ rtsBool end;
+
+ /* should cover all closures that may have a blocking queue */
+ ASSERT(get_itbl(node)->type == BLACKHOLE_BQ ||
+ get_itbl(node)->type == FETCH_ME_BQ ||
+ get_itbl(node)->type == RBH);
+
+ ASSERT(node!=(StgClosure*)NULL); // sanity check
+ node_loc = where_is(node);
+
+ debugBelch("## BQ of closure %p (%s) on [PE %d]: ",
+ node, info_type(node), node_loc);
+
+ /*
+ NB: In a parallel setup a BQ of an RBH must end with an RBH_Save closure;
+ */
+ for (bqe = ((StgBlockingQueue*)node)->blocking_queue, end = (bqe==END_BQ_QUEUE);
+ !end; // iterate until bqe points to a CONSTR
+ end = (get_itbl(bqe)->type == CONSTR) || (bqe->link==END_BQ_QUEUE), bqe = end ? END_BQ_QUEUE : bqe->link) {
+ ASSERT(bqe != END_BQ_QUEUE); // sanity check
+ ASSERT(bqe != (StgBlockingQueueElement *)NULL); // sanity check
+ /* types of closures that may appear in a blocking queue */
+ ASSERT(get_itbl(bqe)->type == TSO ||
+ get_itbl(bqe)->type == CONSTR);
+ /* only BQs of an RBH end with an RBH_Save closure */
+ ASSERT(get_itbl(bqe)->type != CONSTR || get_itbl(node)->type == RBH);
+
+ tso_loc = where_is((StgClosure *)bqe);
+ switch (get_itbl(bqe)->type) {
+ case TSO:
+ debugBelch(" TSO %d (%p) on [PE %d],",
+ ((StgTSO *)bqe)->id, (StgTSO *)bqe, tso_loc);
+ break;
+ case CONSTR:
+ debugBelch(" %s (IP %p),",
+ (get_itbl(bqe) == &stg_RBH_Save_0_info ? "RBH_Save_0" :
+ get_itbl(bqe) == &stg_RBH_Save_1_info ? "RBH_Save_1" :
+ get_itbl(bqe) == &stg_RBH_Save_2_info ? "RBH_Save_2" :
+ "RBH_Save_?"), get_itbl(bqe));
+ break;
+ default:
+ barf("Unexpected closure type %s in blocking queue of %p (%s)",
+ info_type((StgClosure *)bqe), node, info_type(node));
+ break;
+ }
+ } /* for */
+ debugBelch("\n");
+}
+# endif
+
+#if defined(PARALLEL_HASKELL)
+static nat
+run_queue_len(void)
+{
+ nat i;
+ StgTSO *tso;
+
+ for (i=0, tso=run_queue_hd;
+ tso != END_TSO_QUEUE;
+ i++, tso=tso->link) {
+ /* nothing */
+ }
+
+ return i;
+}
+#endif
+
+void
+sched_belch(char *s, ...)
+{
+ va_list ap;
+ va_start(ap,s);
+#ifdef THREADED_RTS
+ debugBelch("sched (task %p): ", (void *)(unsigned long)(unsigned int)osThreadId());
+#elif defined(PARALLEL_HASKELL)
+ debugBelch("== ");
+#else
+ debugBelch("sched: ");
+#endif
+ vdebugBelch(s, ap);
+ debugBelch("\n");
+ va_end(ap);
+}
+
+#endif /* DEBUG */
diff --git a/rts/Schedule.h b/rts/Schedule.h
new file mode 100644
index 0000000000..37b07941f4
--- /dev/null
+++ b/rts/Schedule.h
@@ -0,0 +1,332 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1998-2005
+ *
+ * Prototypes for functions in Schedule.c
+ * (RTS internal scheduler interface)
+ *
+ * -------------------------------------------------------------------------*/
+
+#ifndef SCHEDULE_H
+#define SCHEDULE_H
+
+#include "OSThreads.h"
+#include "Capability.h"
+
+/* initScheduler(), exitScheduler()
+ * Called from STG : no
+ * Locks assumed : none
+ */
+void initScheduler (void);
+void exitScheduler (void);
+
+// Place a new thread on the run queue of the current Capability
+void scheduleThread (Capability *cap, StgTSO *tso);
+
+// Place a new thread on the run queue of a specified Capability
+// (cap is the currently owned Capability, cpu is the number of
+// the desired Capability).
+void scheduleThreadOn(Capability *cap, StgWord cpu, StgTSO *tso);
+
+/* awakenBlockedQueue()
+ *
+ * Takes a pointer to the beginning of a blocked TSO queue, and
+ * wakes up the entire queue.
+ * Called from STG : yes
+ * Locks assumed : none
+ */
+#if defined(GRAN)
+void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node);
+#elif defined(PAR)
+void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node);
+#else
+void awakenBlockedQueue (Capability *cap, StgTSO *tso);
+#endif
+
+/* unblockOne()
+ *
+ * Put the specified thread on the run queue of the given Capability.
+ * Called from STG : yes
+ * Locks assumed : we own the Capability.
+ */
+StgTSO * unblockOne(Capability *cap, StgTSO *tso);
+
+/* raiseAsync()
+ *
+ * Raises an exception asynchronously in the specified thread.
+ *
+ * Called from STG : yes
+ * Locks assumed : none
+ */
+void raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception);
+
+/* suspendComputation()
+ *
+ * A variant of raiseAsync(), this strips the stack of the specified
+ * thread down to the stop_here point, leaving a current closure on
+ * top of the stack at [stop_here - 1].
+ */
+void suspendComputation(Capability *cap, StgTSO *tso, StgPtr stop_here);
+
+/* raiseExceptionHelper */
+StgWord raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception);
+
+/* findRetryFrameHelper */
+StgWord findRetryFrameHelper (StgTSO *tso);
+
+/* GetRoots(evac_fn f)
+ *
+ * Call f() for each root known to the scheduler.
+ *
+ * Called from STG : NO
+ * Locks assumed : ????
+ */
+void GetRoots(evac_fn);
+
+/* workerStart()
+ *
+ * Entry point for a new worker task.
+ * Called from STG : NO
+ * Locks assumed : none
+ */
+void workerStart(Task *task);
+
+#if defined(GRAN)
+void awaken_blocked_queue(StgBlockingQueueElement *q, StgClosure *node);
+void unlink_from_bq(StgTSO* tso, StgClosure* node);
+void initThread(StgTSO *tso, nat stack_size, StgInt pri);
+#elif defined(PAR)
+nat run_queue_len(void);
+void awaken_blocked_queue(StgBlockingQueueElement *q, StgClosure *node);
+void initThread(StgTSO *tso, nat stack_size);
+#else
+char *info_type(StgClosure *closure); // dummy
+char *info_type_by_ip(StgInfoTable *ip); // dummy
+void awaken_blocked_queue(StgTSO *q);
+void initThread(StgTSO *tso, nat stack_size);
+#endif
+
+/* Context switch flag.
+ * Locks required : none (conflicts are harmless)
+ */
+extern int RTS_VAR(context_switch);
+
+/* The state of the scheduler. This is used to control the sequence
+ * of events during shutdown, and when the runtime is interrupted
+ * using ^C.
+ */
+#define SCHED_RUNNING 0 /* running as normal */
+#define SCHED_INTERRUPTING 1 /* ^C detected, before threads are deleted */
+#define SCHED_INTERRUPTED 2 /* ^C detected, after threads deleted */
+#define SCHED_SHUTTING_DOWN 3 /* final shutdown */
+
+extern rtsBool RTS_VAR(sched_state);
+
+/*
+ * flag that tracks whether we have done any execution in this time slice.
+ */
+#define ACTIVITY_YES 0 /* there has been activity in the current slice */
+#define ACTIVITY_MAYBE_NO 1 /* no activity in the current slice */
+#define ACTIVITY_INACTIVE 2 /* a complete slice has passed with no activity */
+#define ACTIVITY_DONE_GC 3 /* like 2, but we've done a GC too */
+
+/* Recent activity flag.
+ * Locks required : Transition from MAYBE_NO to INACTIVE
+ * happens in the timer signal, so it is atomic. Trnasition from
+ * INACTIVE to DONE_GC happens under sched_mutex. No lock required
+ * to set it to ACTIVITY_YES.
+ */
+extern nat recent_activity;
+
+/* Thread queues.
+ * Locks required : sched_mutex
+ *
+ * In GranSim we have one run/blocked_queue per PE.
+ */
+#if defined(GRAN)
+// run_queue_hds defined in GranSim.h
+#else
+extern StgTSO *RTS_VAR(blackhole_queue);
+#if !defined(THREADED_RTS)
+extern StgTSO *RTS_VAR(blocked_queue_hd), *RTS_VAR(blocked_queue_tl);
+extern StgTSO *RTS_VAR(sleeping_queue);
+#endif
+#endif
+
+/* Linked list of all threads.
+ * Locks required : sched_mutex
+ */
+extern StgTSO *RTS_VAR(all_threads);
+
+/* Set to rtsTrue if there are threads on the blackhole_queue, and
+ * it is possible that one or more of them may be available to run.
+ * This flag is set to rtsFalse after we've checked the queue, and
+ * set to rtsTrue just before we run some Haskell code. It is used
+ * to decide whether we should yield the Capability or not.
+ * Locks required : none (see scheduleCheckBlackHoles()).
+ */
+extern rtsBool blackholes_need_checking;
+
+#if defined(THREADED_RTS)
+extern Mutex RTS_VAR(sched_mutex);
+#endif
+
+StgBool isThreadBound(StgTSO *tso);
+
+SchedulerStatus rts_mainLazyIO(HaskellObj p, /*out*/HaskellObj *ret);
+
+/* Called by shutdown_handler(). */
+void interruptStgRts (void);
+
+nat run_queue_len (void);
+
+void resurrectThreads (StgTSO *);
+
+void printAllThreads(void);
+
+/* debugging only
+ */
+#ifdef DEBUG
+void print_bq (StgClosure *node);
+#endif
+#if defined(PAR)
+void print_bqe (StgBlockingQueueElement *bqe);
+#endif
+
+void labelThread(StgPtr tso, char *label);
+
+/* -----------------------------------------------------------------------------
+ * Some convenient macros/inline functions...
+ */
+
+#if !IN_STG_CODE
+
+/* END_TSO_QUEUE and friends now defined in includes/StgMiscClosures.h */
+
+/* Add a thread to the end of the run queue.
+ * NOTE: tso->link should be END_TSO_QUEUE before calling this macro.
+ * ASSUMES: cap->running_task is the current task.
+ */
+STATIC_INLINE void
+appendToRunQueue (Capability *cap, StgTSO *tso)
+{
+ ASSERT(tso->link == END_TSO_QUEUE);
+ if (cap->run_queue_hd == END_TSO_QUEUE) {
+ cap->run_queue_hd = tso;
+ } else {
+ cap->run_queue_tl->link = tso;
+ }
+ cap->run_queue_tl = tso;
+}
+
+/* Push a thread on the beginning of the run queue. Used for
+ * newly awakened threads, so they get run as soon as possible.
+ * ASSUMES: cap->running_task is the current task.
+ */
+STATIC_INLINE void
+pushOnRunQueue (Capability *cap, StgTSO *tso)
+{
+ tso->link = cap->run_queue_hd;
+ cap->run_queue_hd = tso;
+ if (cap->run_queue_tl == END_TSO_QUEUE) {
+ cap->run_queue_tl = tso;
+ }
+}
+
+/* Pop the first thread off the runnable queue.
+ */
+STATIC_INLINE StgTSO *
+popRunQueue (Capability *cap)
+{
+ StgTSO *t = cap->run_queue_hd;
+ ASSERT(t != END_TSO_QUEUE);
+ cap->run_queue_hd = t->link;
+ t->link = END_TSO_QUEUE;
+ if (cap->run_queue_hd == END_TSO_QUEUE) {
+ cap->run_queue_tl = END_TSO_QUEUE;
+ }
+ return t;
+}
+
+/* Add a thread to the end of the blocked queue.
+ */
+#if !defined(THREADED_RTS)
+STATIC_INLINE void
+appendToBlockedQueue(StgTSO *tso)
+{
+ ASSERT(tso->link == END_TSO_QUEUE);
+ if (blocked_queue_hd == END_TSO_QUEUE) {
+ blocked_queue_hd = tso;
+ } else {
+ blocked_queue_tl->link = tso;
+ }
+ blocked_queue_tl = tso;
+}
+#endif
+
+#if defined(THREADED_RTS)
+STATIC_INLINE void
+appendToWakeupQueue (Capability *cap, StgTSO *tso)
+{
+ ASSERT(tso->link == END_TSO_QUEUE);
+ if (cap->wakeup_queue_hd == END_TSO_QUEUE) {
+ cap->wakeup_queue_hd = tso;
+ } else {
+ cap->wakeup_queue_tl->link = tso;
+ }
+ cap->wakeup_queue_tl = tso;
+}
+#endif
+
+/* Check whether various thread queues are empty
+ */
+STATIC_INLINE rtsBool
+emptyQueue (StgTSO *q)
+{
+ return (q == END_TSO_QUEUE);
+}
+
+STATIC_INLINE rtsBool
+emptyRunQueue(Capability *cap)
+{
+ return emptyQueue(cap->run_queue_hd);
+}
+
+#if defined(THREADED_RTS)
+STATIC_INLINE rtsBool
+emptyWakeupQueue(Capability *cap)
+{
+ return emptyQueue(cap->wakeup_queue_hd);
+}
+#endif
+
+#if !defined(THREADED_RTS)
+#define EMPTY_BLOCKED_QUEUE() (emptyQueue(blocked_queue_hd))
+#define EMPTY_SLEEPING_QUEUE() (emptyQueue(sleeping_queue))
+#endif
+
+STATIC_INLINE rtsBool
+emptyThreadQueues(Capability *cap)
+{
+ return emptyRunQueue(cap)
+#if !defined(THREADED_RTS)
+ && EMPTY_BLOCKED_QUEUE() && EMPTY_SLEEPING_QUEUE()
+#endif
+ ;
+}
+
+#ifdef DEBUG
+void sched_belch(char *s, ...)
+ GNU_ATTRIBUTE(format (printf, 1, 2));
+#endif
+
+#endif /* !IN_STG_CODE */
+
+STATIC_INLINE void
+dirtyTSO (StgTSO *tso)
+{
+ tso->flags |= TSO_DIRTY;
+}
+
+#endif /* SCHEDULE_H */
+
diff --git a/rts/Sparks.c b/rts/Sparks.c
new file mode 100644
index 0000000000..615d832e33
--- /dev/null
+++ b/rts/Sparks.c
@@ -0,0 +1,881 @@
+/* ---------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2000-2006
+ *
+ * Sparking support for PARALLEL_HASKELL and THREADED_RTS versions of the RTS.
+ *
+ * -------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "Schedule.h"
+#include "SchedAPI.h"
+#include "Storage.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "ParTicky.h"
+# if defined(PARALLEL_HASKELL)
+# include "ParallelRts.h"
+# include "GranSimRts.h" // for GR_...
+# elif defined(GRAN)
+# include "GranSimRts.h"
+# endif
+#include "Sparks.h"
+
+#if defined(THREADED_RTS) || defined(PARALLEL_HASKELL)
+
+static INLINE_ME void bump_hd (StgSparkPool *p)
+{ p->hd++; if (p->hd == p->lim) p->hd = p->base; }
+
+static INLINE_ME void bump_tl (StgSparkPool *p)
+{ p->tl++; if (p->tl == p->lim) p->tl = p->base; }
+
+/* -----------------------------------------------------------------------------
+ *
+ * Initialising spark pools.
+ *
+ * -------------------------------------------------------------------------- */
+
+static void
+initSparkPool(StgSparkPool *pool)
+{
+ pool->base = stgMallocBytes(RtsFlags.ParFlags.maxLocalSparks
+ * sizeof(StgClosure *),
+ "initSparkPools");
+ pool->lim = pool->base + RtsFlags.ParFlags.maxLocalSparks;
+ pool->hd = pool->base;
+ pool->tl = pool->base;
+}
+
+void
+initSparkPools( void )
+{
+#ifdef THREADED_RTS
+ /* walk over the capabilities, allocating a spark pool for each one */
+ nat i;
+ for (i = 0; i < n_capabilities; i++) {
+ initSparkPool(&capabilities[i].r.rSparks);
+ }
+#else
+ /* allocate a single spark pool */
+ initSparkPool(&MainCapability.r.rSparks);
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ *
+ * findSpark: find a spark on the current Capability that we can fork
+ * into a thread.
+ *
+ * -------------------------------------------------------------------------- */
+
+StgClosure *
+findSpark (Capability *cap)
+{
+ StgSparkPool *pool;
+ StgClosure *spark;
+
+ pool = &(cap->r.rSparks);
+ ASSERT_SPARK_POOL_INVARIANTS(pool);
+
+ while (pool->hd != pool->tl) {
+ spark = *pool->hd;
+ bump_hd(pool);
+ if (closure_SHOULD_SPARK(spark)) {
+#ifdef GRAN
+ if (RtsFlags.ParFlags.ParStats.Sparks)
+ DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC,
+ GR_STEALING, ((StgTSO *)NULL), spark,
+ 0, 0 /* spark_queue_len(ADVISORY_POOL) */);
+#endif
+ return spark;
+ }
+ }
+ // spark pool is now empty
+ return NULL;
+}
+
+/* -----------------------------------------------------------------------------
+ * Mark all nodes pointed to by sparks in the spark queues (for GC) Does an
+ * implicit slide i.e. after marking all sparks are at the beginning of the
+ * spark pool and the spark pool only contains sparkable closures
+ * -------------------------------------------------------------------------- */
+
+void
+markSparkQueue (evac_fn evac)
+{
+ StgClosure **sparkp, **to_sparkp;
+ nat i, n, pruned_sparks; // stats only
+ StgSparkPool *pool;
+ Capability *cap;
+
+ PAR_TICKY_MARK_SPARK_QUEUE_START();
+
+ n = 0;
+ pruned_sparks = 0;
+ for (i = 0; i < n_capabilities; i++) {
+ cap = &capabilities[i];
+ pool = &(cap->r.rSparks);
+
+ ASSERT_SPARK_POOL_INVARIANTS(pool);
+
+#if defined(PARALLEL_HASKELL)
+ // stats only
+ n = 0;
+ pruned_sparks = 0;
+#endif
+
+ sparkp = pool->hd;
+ to_sparkp = pool->hd;
+ while (sparkp != pool->tl) {
+ ASSERT(to_sparkp<=sparkp);
+ ASSERT(*sparkp!=NULL);
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgClosure *)*sparkp)));
+ // ToDo?: statistics gathering here (also for GUM!)
+ if (closure_SHOULD_SPARK(*sparkp)) {
+ evac(sparkp);
+ *to_sparkp++ = *sparkp;
+ n++;
+ } else {
+ pruned_sparks++;
+ }
+ sparkp++;
+ if (sparkp == pool->lim) {
+ sparkp = pool->base;
+ }
+ }
+ pool->tl = to_sparkp;
+
+ PAR_TICKY_MARK_SPARK_QUEUE_END(n);
+
+#if defined(PARALLEL_HASKELL)
+ IF_DEBUG(scheduler,
+ debugBelch("markSparkQueue: marked %d sparks and pruned %d sparks on [%x]",
+ n, pruned_sparks, mytid));
+#else
+ IF_DEBUG(scheduler,
+ debugBelch("markSparkQueue: marked %d sparks and pruned %d sparks\n",
+ n, pruned_sparks));
+#endif
+
+ IF_DEBUG(scheduler,
+ debugBelch("markSparkQueue: new spark queue len=%d; (hd=%p; tl=%p)\n",
+ sparkPoolSize(pool), pool->hd, pool->tl));
+
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ *
+ * Turn a spark into a real thread
+ *
+ * -------------------------------------------------------------------------- */
+
+void
+createSparkThread (Capability *cap, StgClosure *p)
+{
+ StgTSO *tso;
+
+ tso = createGenThread (cap, RtsFlags.GcFlags.initialStkSize, p);
+ appendToRunQueue(cap,tso);
+}
+
+/* -----------------------------------------------------------------------------
+ *
+ * Create a new spark
+ *
+ * -------------------------------------------------------------------------- */
+
+#define DISCARD_NEW
+
+StgInt
+newSpark (StgRegTable *reg, StgClosure *p)
+{
+ StgSparkPool *pool = &(reg->rSparks);
+
+ ASSERT_SPARK_POOL_INVARIANTS(pool);
+
+ if (closure_SHOULD_SPARK(p)) {
+#ifdef DISCARD_NEW
+ StgClosure **new_tl;
+ new_tl = pool->tl + 1;
+ if (new_tl == pool->lim) { new_tl = pool->base; }
+ if (new_tl != pool->hd) {
+ *pool->tl = p;
+ pool->tl = new_tl;
+ } else if (!closure_SHOULD_SPARK(*pool->hd)) {
+ // if the old closure is not sparkable, discard it and
+ // keep the new one. Otherwise, keep the old one.
+ *pool->tl = p;
+ bump_hd(pool);
+ }
+#else /* DISCARD OLD */
+ *pool->tl = p;
+ bump_tl(pool);
+ if (pool->tl == pool->hd) { bump_hd(pool); }
+#endif
+ }
+
+ ASSERT_SPARK_POOL_INVARIANTS(pool);
+ return 1;
+}
+
+#else
+
+StgInt
+newSpark (StgRegTable *reg STG_UNUSED, StgClosure *p STG_UNUSED)
+{
+ /* nothing */
+ return 1;
+}
+
+#endif /* PARALLEL_HASKELL || THREADED_RTS */
+
+
+/* -----------------------------------------------------------------------------
+ *
+ * GRAN & PARALLEL_HASKELL stuff beyond here.
+ *
+ * -------------------------------------------------------------------------- */
+
+#if defined(PARALLEL_HASKELL) || defined(GRAN)
+
+static void slide_spark_pool( StgSparkPool *pool );
+
+rtsBool
+add_to_spark_queue( StgClosure *closure, StgSparkPool *pool )
+{
+ if (pool->tl == pool->lim)
+ slide_spark_pool(pool);
+
+ if (closure_SHOULD_SPARK(closure) &&
+ pool->tl < pool->lim) {
+ *(pool->tl++) = closure;
+
+#if defined(PARALLEL_HASKELL)
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ // debugBelch("Creating spark for %x @ %11.2f\n", closure, usertime());
+ globalParStats.tot_sparks_created++;
+ }
+#endif
+ return rtsTrue;
+ } else {
+#if defined(PARALLEL_HASKELL)
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ //debugBelch("Ignoring spark for %x @ %11.2f\n", closure, usertime());
+ globalParStats.tot_sparks_ignored++;
+ }
+#endif
+ return rtsFalse;
+ }
+}
+
+static void
+slide_spark_pool( StgSparkPool *pool )
+{
+ StgClosure **sparkp, **to_sparkp;
+
+ sparkp = pool->hd;
+ to_sparkp = pool->base;
+ while (sparkp < pool->tl) {
+ ASSERT(to_sparkp<=sparkp);
+ ASSERT(*sparkp!=NULL);
+ ASSERT(LOOKS_LIKE_GHC_INFO((*sparkp)->header.info));
+
+ if (closure_SHOULD_SPARK(*sparkp)) {
+ *to_sparkp++ = *sparkp++;
+ } else {
+ sparkp++;
+ }
+ }
+ pool->hd = pool->base;
+ pool->tl = to_sparkp;
+}
+
+void
+disposeSpark(spark)
+StgClosure *spark;
+{
+#if !defined(THREADED_RTS)
+ Capability *cap;
+ StgSparkPool *pool;
+
+ cap = &MainRegTable;
+ pool = &(cap->rSparks);
+ ASSERT(pool->hd <= pool->tl && pool->tl <= pool->lim);
+#endif
+ ASSERT(spark != (StgClosure *)NULL);
+ /* Do nothing */
+}
+
+
+#elif defined(GRAN)
+
+/*
+ Search the spark queue of the proc in event for a spark that's worth
+ turning into a thread
+ (was gimme_spark in the old RTS)
+*/
+void
+findLocalSpark (rtsEvent *event, rtsBool *found_res, rtsSparkQ *spark_res)
+{
+ PEs proc = event->proc, /* proc to search for work */
+ creator = event->creator; /* proc that requested work */
+ StgClosure* node;
+ rtsBool found;
+ rtsSparkQ spark_of_non_local_node = NULL,
+ spark_of_non_local_node_prev = NULL,
+ low_priority_spark = NULL,
+ low_priority_spark_prev = NULL,
+ spark = NULL, prev = NULL;
+
+ /* Choose a spark from the local spark queue */
+ prev = (rtsSpark*)NULL;
+ spark = pending_sparks_hds[proc];
+ found = rtsFalse;
+
+ // ToDo: check this code & implement local sparking !! -- HWL
+ while (!found && spark != (rtsSpark*)NULL)
+ {
+ ASSERT((prev!=(rtsSpark*)NULL || spark==pending_sparks_hds[proc]) &&
+ (prev==(rtsSpark*)NULL || prev->next==spark) &&
+ (spark->prev==prev));
+ node = spark->node;
+ if (!closure_SHOULD_SPARK(node))
+ {
+ IF_GRAN_DEBUG(checkSparkQ,
+ debugBelch("^^ pruning spark %p (node %p) in gimme_spark",
+ spark, node));
+
+ if (RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(proc, (PEs)0, SP_PRUNED,(StgTSO*)NULL,
+ spark->node, spark->name, spark_queue_len(proc));
+
+ ASSERT(spark != (rtsSpark*)NULL);
+ ASSERT(SparksAvail>0);
+ --SparksAvail;
+
+ ASSERT(prev==(rtsSpark*)NULL || prev->next==spark);
+ spark = delete_from_sparkq (spark, proc, rtsTrue);
+ if (spark != (rtsSpark*)NULL)
+ prev = spark->prev;
+ continue;
+ }
+ /* -- node should eventually be sparked */
+ else if (RtsFlags.GranFlags.PreferSparksOfLocalNodes &&
+ !IS_LOCAL_TO(PROCS(node),CurrentProc))
+ {
+ barf("Local sparking not yet implemented");
+
+ /* Remember first low priority spark */
+ if (spark_of_non_local_node==(rtsSpark*)NULL) {
+ spark_of_non_local_node_prev = prev;
+ spark_of_non_local_node = spark;
+ }
+
+ if (spark->next == (rtsSpark*)NULL) {
+ /* ASSERT(spark==SparkQueueTl); just for testing */
+ prev = spark_of_non_local_node_prev;
+ spark = spark_of_non_local_node;
+ found = rtsTrue;
+ break;
+ }
+
+# if defined(GRAN) && defined(GRAN_CHECK)
+ /* Should never happen; just for testing
+ if (spark==pending_sparks_tl) {
+ debugBelch("ReSchedule: Last spark != SparkQueueTl\n");
+ stg_exit(EXIT_FAILURE);
+ } */
+# endif
+ prev = spark;
+ spark = spark->next;
+ ASSERT(SparksAvail>0);
+ --SparksAvail;
+ continue;
+ }
+ else if ( RtsFlags.GranFlags.DoPrioritySparking ||
+ (spark->gran_info >= RtsFlags.GranFlags.SparkPriority2) )
+ {
+ if (RtsFlags.GranFlags.DoPrioritySparking)
+ barf("Priority sparking not yet implemented");
+
+ found = rtsTrue;
+ }
+#if 0
+ else /* only used if SparkPriority2 is defined */
+ {
+ /* ToDo: fix the code below and re-integrate it */
+ /* Remember first low priority spark */
+ if (low_priority_spark==(rtsSpark*)NULL) {
+ low_priority_spark_prev = prev;
+ low_priority_spark = spark;
+ }
+
+ if (spark->next == (rtsSpark*)NULL) {
+ /* ASSERT(spark==spark_queue_tl); just for testing */
+ prev = low_priority_spark_prev;
+ spark = low_priority_spark;
+ found = rtsTrue; /* take low pri spark => rc is 2 */
+ break;
+ }
+
+ /* Should never happen; just for testing
+ if (spark==pending_sparks_tl) {
+ debugBelch("ReSchedule: Last spark != SparkQueueTl\n");
+ stg_exit(EXIT_FAILURE);
+ break;
+ } */
+ prev = spark;
+ spark = spark->next;
+
+ IF_GRAN_DEBUG(pri,
+ debugBelch("++ Ignoring spark of priority %u (SparkPriority=%u); node=%p; name=%u\n",
+ spark->gran_info, RtsFlags.GranFlags.SparkPriority,
+ spark->node, spark->name);)
+ }
+#endif
+ } /* while (spark!=NULL && !found) */
+
+ *spark_res = spark;
+ *found_res = found;
+}
+
+/*
+ Turn the spark into a thread.
+ In GranSim this basically means scheduling a StartThread event for the
+ node pointed to by the spark at some point in the future.
+ (was munch_spark in the old RTS)
+*/
+rtsBool
+activateSpark (rtsEvent *event, rtsSparkQ spark)
+{
+ PEs proc = event->proc, /* proc to search for work */
+ creator = event->creator; /* proc that requested work */
+ StgTSO* tso;
+ StgClosure* node;
+ rtsTime spark_arrival_time;
+
+ /*
+ We've found a node on PE proc requested by PE creator.
+ If proc==creator we can turn the spark into a thread immediately;
+ otherwise we schedule a MoveSpark event on the requesting PE
+ */
+
+ /* DaH Qu' yIchen */
+ if (proc!=creator) {
+
+ /* only possible if we simulate GUM style fishing */
+ ASSERT(RtsFlags.GranFlags.Fishing);
+
+ /* Message packing costs for sending a Fish; qeq jabbI'ID */
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.mpacktime;
+
+ if (RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(proc, (PEs)0, SP_EXPORTED,
+ (StgTSO*)NULL, spark->node,
+ spark->name, spark_queue_len(proc));
+
+ /* time of the spark arrival on the remote PE */
+ spark_arrival_time = CurrentTime[proc] + RtsFlags.GranFlags.Costs.latency;
+
+ new_event(creator, proc, spark_arrival_time,
+ MoveSpark,
+ (StgTSO*)NULL, spark->node, spark);
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.mtidytime;
+
+ } else { /* proc==creator i.e. turn the spark into a thread */
+
+ if ( RtsFlags.GranFlags.GranSimStats.Global &&
+ spark->gran_info < RtsFlags.GranFlags.SparkPriority2 ) {
+
+ globalGranStats.tot_low_pri_sparks++;
+ IF_GRAN_DEBUG(pri,
+ debugBelch("++ No high priority spark available; low priority (%u) spark chosen: node=%p; name=%u\n",
+ spark->gran_info,
+ spark->node, spark->name));
+ }
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadcreatetime;
+
+ node = spark->node;
+
+# if 0
+ /* ToDo: fix the GC interface and move to StartThread handling-- HWL */
+ if (GARBAGE COLLECTION IS NECESSARY) {
+ /* Some kind of backoff needed here in case there's too little heap */
+# if defined(GRAN_CHECK) && defined(GRAN)
+ if (RtsFlags.GcFlags.giveStats)
+ fprintf(RtsFlags.GcFlags.statsFile,"***** vIS Qu' chen veQ boSwI'; spark=%p, node=%p; name=%u\n",
+ /* (found==2 ? "no hi pri spark" : "hi pri spark"), */
+ spark, node, spark->name);
+# endif
+ new_event(CurrentProc, CurrentProc, CurrentTime[CurrentProc]+1,
+ FindWork,
+ (StgTSO*)NULL, (StgClosure*)NULL, (rtsSpark*)NULL);
+ barf("//// activateSpark: out of heap ; ToDo: call GarbageCollect()");
+ GarbageCollect(GetRoots, rtsFalse);
+ // HWL old: ReallyPerformThreadGC(TSO_HS+TSO_CTS_SIZE,rtsFalse);
+ // HWL old: SAVE_Hp -= TSO_HS+TSO_CTS_SIZE;
+ spark = NULL;
+ return; /* was: continue; */ /* to the next event, eventually */
+ }
+# endif
+
+ if (RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(CurrentProc,(PEs)0,SP_USED,(StgTSO*)NULL,
+ spark->node, spark->name,
+ spark_queue_len(CurrentProc));
+
+ new_event(proc, proc, CurrentTime[proc],
+ StartThread,
+ END_TSO_QUEUE, node, spark); // (rtsSpark*)NULL);
+
+ procStatus[proc] = Starting;
+ }
+}
+
+/* -------------------------------------------------------------------------
+ This is the main point where handling granularity information comes into
+ play.
+ ------------------------------------------------------------------------- */
+
+#define MAX_RAND_PRI 100
+
+/*
+ Granularity info transformers.
+ Applied to the GRAN_INFO field of a spark.
+*/
+STATIC_INLINE nat ID(nat x) { return(x); };
+STATIC_INLINE nat INV(nat x) { return(-x); };
+STATIC_INLINE nat IGNORE(nat x) { return (0); };
+STATIC_INLINE nat RAND(nat x) { return ((random() % MAX_RAND_PRI) + 1); }
+
+/* NB: size_info and par_info are currently unused (what a shame!) -- HWL */
+rtsSpark *
+newSpark(node,name,gran_info,size_info,par_info,local)
+StgClosure *node;
+nat name, gran_info, size_info, par_info, local;
+{
+ nat pri;
+ rtsSpark *newspark;
+
+ pri = RtsFlags.GranFlags.RandomPriorities ? RAND(gran_info) :
+ RtsFlags.GranFlags.InversePriorities ? INV(gran_info) :
+ RtsFlags.GranFlags.IgnorePriorities ? IGNORE(gran_info) :
+ ID(gran_info);
+
+ if ( RtsFlags.GranFlags.SparkPriority!=0 &&
+ pri<RtsFlags.GranFlags.SparkPriority ) {
+ IF_GRAN_DEBUG(pri,
+ debugBelch(",, NewSpark: Ignoring spark of priority %u (SparkPriority=%u); node=%#x; name=%u\n",
+ pri, RtsFlags.GranFlags.SparkPriority, node, name));
+ return ((rtsSpark*)NULL);
+ }
+
+ newspark = (rtsSpark*) stgMallocBytes(sizeof(rtsSpark), "NewSpark");
+ newspark->prev = newspark->next = (rtsSpark*)NULL;
+ newspark->node = node;
+ newspark->name = (name==1) ? CurrentTSO->gran.sparkname : name;
+ newspark->gran_info = pri;
+ newspark->global = !local; /* Check that with parAt, parAtAbs !!*/
+
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.tot_sparks_created++;
+ globalGranStats.sparks_created_on_PE[CurrentProc]++;
+ }
+
+ return(newspark);
+}
+
+void
+disposeSpark(spark)
+rtsSpark *spark;
+{
+ ASSERT(spark!=NULL);
+ stgFree(spark);
+}
+
+void
+disposeSparkQ(spark)
+rtsSparkQ spark;
+{
+ if (spark==NULL)
+ return;
+
+ disposeSparkQ(spark->next);
+
+# ifdef GRAN_CHECK
+ if (SparksAvail < 0) {
+ debugBelch("disposeSparkQ: SparksAvail<0 after disposing sparkq @ %p\n", &spark);
+ print_spark(spark);
+ }
+# endif
+
+ stgFree(spark);
+}
+
+/*
+ With PrioritySparking add_to_spark_queue performs an insert sort to keep
+ the spark queue sorted. Otherwise the spark is just added to the end of
+ the queue.
+*/
+
+void
+add_to_spark_queue(spark)
+rtsSpark *spark;
+{
+ rtsSpark *prev = NULL, *next = NULL;
+ nat count = 0;
+ rtsBool found = rtsFalse;
+
+ if ( spark == (rtsSpark *)NULL ) {
+ return;
+ }
+
+ if (RtsFlags.GranFlags.DoPrioritySparking && (spark->gran_info != 0) ) {
+ /* Priority sparking is enabled i.e. spark queues must be sorted */
+
+ for (prev = NULL, next = pending_sparks_hd, count=0;
+ (next != NULL) &&
+ !(found = (spark->gran_info >= next->gran_info));
+ prev = next, next = next->next, count++)
+ {}
+
+ } else { /* 'utQo' */
+ /* Priority sparking is disabled */
+
+ found = rtsFalse; /* to add it at the end */
+
+ }
+
+ if (found) {
+ /* next points to the first spark with a gran_info smaller than that
+ of spark; therefore, add spark before next into the spark queue */
+ spark->next = next;
+ if ( next == NULL ) {
+ pending_sparks_tl = spark;
+ } else {
+ next->prev = spark;
+ }
+ spark->prev = prev;
+ if ( prev == NULL ) {
+ pending_sparks_hd = spark;
+ } else {
+ prev->next = spark;
+ }
+ } else { /* (RtsFlags.GranFlags.DoPrioritySparking && !found) || !DoPrioritySparking */
+ /* add the spark at the end of the spark queue */
+ spark->next = NULL;
+ spark->prev = pending_sparks_tl;
+ if (pending_sparks_hd == NULL)
+ pending_sparks_hd = spark;
+ else
+ pending_sparks_tl->next = spark;
+ pending_sparks_tl = spark;
+ }
+ ++SparksAvail;
+
+ /* add costs for search in priority sparking */
+ if (RtsFlags.GranFlags.DoPrioritySparking) {
+ CurrentTime[CurrentProc] += count * RtsFlags.GranFlags.Costs.pri_spark_overhead;
+ }
+
+ IF_GRAN_DEBUG(checkSparkQ,
+ debugBelch("++ Spark stats after adding spark %p (node %p) to queue on PE %d",
+ spark, spark->node, CurrentProc);
+ print_sparkq_stats());
+
+# if defined(GRAN_CHECK)
+ if (RtsFlags.GranFlags.Debug.checkSparkQ) {
+ for (prev = NULL, next = pending_sparks_hd;
+ (next != NULL);
+ prev = next, next = next->next)
+ {}
+ if ( (prev!=NULL) && (prev!=pending_sparks_tl) )
+ debugBelch("SparkQ inconsistency after adding spark %p: (PE %u) pending_sparks_tl (%p) not end of queue (%p)\n",
+ spark,CurrentProc,
+ pending_sparks_tl, prev);
+ }
+# endif
+
+# if defined(GRAN_CHECK)
+ /* Check if the sparkq is still sorted. Just for testing, really! */
+ if ( RtsFlags.GranFlags.Debug.checkSparkQ &&
+ RtsFlags.GranFlags.Debug.pri ) {
+ rtsBool sorted = rtsTrue;
+ rtsSpark *prev, *next;
+
+ if (pending_sparks_hd == NULL ||
+ pending_sparks_hd->next == NULL ) {
+ /* just 1 elem => ok */
+ } else {
+ for (prev = pending_sparks_hd,
+ next = pending_sparks_hd->next;
+ (next != NULL) ;
+ prev = next, next = next->next) {
+ sorted = sorted &&
+ (prev->gran_info >= next->gran_info);
+ }
+ }
+ if (!sorted) {
+ debugBelch("ghuH: SPARKQ on PE %d is not sorted:\n",
+ CurrentProc);
+ print_sparkq(CurrentProc);
+ }
+ }
+# endif
+}
+
+nat
+spark_queue_len(proc)
+PEs proc;
+{
+ rtsSpark *prev, *spark; /* prev only for testing !! */
+ nat len;
+
+ for (len = 0, prev = NULL, spark = pending_sparks_hds[proc];
+ spark != NULL;
+ len++, prev = spark, spark = spark->next)
+ {}
+
+# if defined(GRAN_CHECK)
+ if ( RtsFlags.GranFlags.Debug.checkSparkQ )
+ if ( (prev!=NULL) && (prev!=pending_sparks_tls[proc]) )
+ debugBelch("ERROR in spark_queue_len: (PE %u) pending_sparks_tl (%p) not end of queue (%p)\n",
+ proc, pending_sparks_tls[proc], prev);
+# endif
+
+ return (len);
+}
+
+/*
+ Take spark out of the spark queue on PE p and nuke the spark. Adjusts
+ hd and tl pointers of the spark queue. Returns a pointer to the next
+ spark in the queue.
+*/
+rtsSpark *
+delete_from_sparkq (spark, p, dispose_too) /* unlink and dispose spark */
+rtsSpark *spark;
+PEs p;
+rtsBool dispose_too;
+{
+ rtsSpark *new_spark;
+
+ if (spark==NULL)
+ barf("delete_from_sparkq: trying to delete NULL spark\n");
+
+# if defined(GRAN_CHECK)
+ if ( RtsFlags.GranFlags.Debug.checkSparkQ ) {
+ debugBelch("## |%p:%p| (%p)<-spark=%p->(%p) <-(%p)\n",
+ pending_sparks_hd, pending_sparks_tl,
+ spark->prev, spark, spark->next,
+ (spark->next==NULL ? 0 : spark->next->prev));
+ }
+# endif
+
+ if (spark->prev==NULL) {
+ /* spark is first spark of queue => adjust hd pointer */
+ ASSERT(pending_sparks_hds[p]==spark);
+ pending_sparks_hds[p] = spark->next;
+ } else {
+ spark->prev->next = spark->next;
+ }
+ if (spark->next==NULL) {
+ ASSERT(pending_sparks_tls[p]==spark);
+ /* spark is first spark of queue => adjust tl pointer */
+ pending_sparks_tls[p] = spark->prev;
+ } else {
+ spark->next->prev = spark->prev;
+ }
+ new_spark = spark->next;
+
+# if defined(GRAN_CHECK)
+ if ( RtsFlags.GranFlags.Debug.checkSparkQ ) {
+ debugBelch("## |%p:%p| (%p)<-spark=%p->(%p) <-(%p); spark=%p will be deleted NOW \n",
+ pending_sparks_hd, pending_sparks_tl,
+ spark->prev, spark, spark->next,
+ (spark->next==NULL ? 0 : spark->next->prev), spark);
+ }
+# endif
+
+ if (dispose_too)
+ disposeSpark(spark);
+
+ return new_spark;
+}
+
+/* Mark all nodes pointed to by sparks in the spark queues (for GC) */
+void
+markSparkQueue(void)
+{
+ StgClosure *MarkRoot(StgClosure *root); // prototype
+ PEs p;
+ rtsSpark *sp;
+
+ for (p=0; p<RtsFlags.GranFlags.proc; p++)
+ for (sp=pending_sparks_hds[p]; sp!=NULL; sp=sp->next) {
+ ASSERT(sp->node!=NULL);
+ ASSERT(LOOKS_LIKE_GHC_INFO(sp->node->header.info));
+ // ToDo?: statistics gathering here (also for GUM!)
+ sp->node = (StgClosure *)MarkRoot(sp->node);
+ }
+ IF_DEBUG(gc,
+ debugBelch("@@ markSparkQueue: spark statistics at start of GC:");
+ print_sparkq_stats());
+}
+
+void
+print_spark(spark)
+rtsSpark *spark;
+{
+ char str[16];
+
+ if (spark==NULL) {
+ debugBelch("Spark: NIL\n");
+ return;
+ } else {
+ sprintf(str,
+ ((spark->node==NULL) ? "______" : "%#6lx"),
+ stgCast(StgPtr,spark->node));
+
+ debugBelch("Spark: Node %8s, Name %#6x, Global %5s, Creator %5x, Prev %6p, Next %6p\n",
+ str, spark->name,
+ ((spark->global)==rtsTrue?"True":"False"), spark->creator,
+ spark->prev, spark->next);
+ }
+}
+
+void
+print_sparkq(proc)
+PEs proc;
+// rtsSpark *hd;
+{
+ rtsSpark *x = pending_sparks_hds[proc];
+
+ debugBelch("Spark Queue of PE %d with root at %p:\n", proc, x);
+ for (; x!=(rtsSpark*)NULL; x=x->next) {
+ print_spark(x);
+ }
+}
+
+/*
+ Print a statistics of all spark queues.
+*/
+void
+print_sparkq_stats(void)
+{
+ PEs p;
+
+ debugBelch("SparkQs: [");
+ for (p=0; p<RtsFlags.GranFlags.proc; p++)
+ debugBelch(", PE %d: %d", p, spark_queue_len(p));
+ debugBelch("\n");
+}
+
+#endif
diff --git a/rts/Sparks.h b/rts/Sparks.h
new file mode 100644
index 0000000000..77d280bea8
--- /dev/null
+++ b/rts/Sparks.h
@@ -0,0 +1,104 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2000-2006
+ *
+ * Sparking support for GRAN, PAR and THREADED_RTS versions of the RTS.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef SPARKS_H
+#define SPARKS_H
+
+#if !defined(GRAN)
+StgInt newSpark (StgRegTable *reg, StgClosure *p);
+#endif
+
+#if defined(PARALLEL_HASKELL) || defined(THREADED_RTS)
+StgClosure * findSpark (Capability *cap);
+void initSparkPools (void);
+void markSparkQueue (evac_fn evac);
+void createSparkThread (Capability *cap, StgClosure *p);
+
+INLINE_HEADER void discardSparks (StgSparkPool *pool);
+INLINE_HEADER nat sparkPoolSize (StgSparkPool *pool);
+INLINE_HEADER rtsBool emptySparkPool (StgSparkPool *pool);
+
+INLINE_HEADER void discardSparksCap (Capability *cap);
+INLINE_HEADER nat sparkPoolSizeCap (Capability *cap);
+INLINE_HEADER rtsBool emptySparkPoolCap (Capability *cap);
+#endif
+
+#if defined(PARALLEL_HASKELL)
+StgTSO *activateSpark (rtsSpark spark) ;
+rtsBool add_to_spark_queue( StgClosure *closure, StgSparkPool *pool );
+void markSparkQueue( void );
+nat spark_queue_len( StgSparkPool *pool );
+void disposeSpark( StgClosure *spark );
+#endif
+
+#if defined(GRAN)
+void findLocalSpark (rtsEvent *event, rtsBool *found_res, rtsSparkQ *spark_res);
+rtsBool activateSpark (rtsEvent *event, rtsSparkQ spark);
+rtsSpark *newSpark(StgClosure *node, nat name, nat gran_info,
+ nat size_info, nat par_info, nat local);
+void add_to_spark_queue(rtsSpark *spark);
+rtsSpark *delete_from_sparkq (rtsSpark *spark, PEs p, rtsBool dispose_too);
+void disposeSpark(rtsSpark *spark);
+void disposeSparkQ(rtsSparkQ spark);
+void print_spark(rtsSpark *spark);
+void print_sparkq(PEs proc);
+void print_sparkq_stats(void);
+nat spark_queue_len(PEs proc);
+void markSparkQueue(void);
+#endif
+
+/* -----------------------------------------------------------------------------
+ * PRIVATE below here
+ * -------------------------------------------------------------------------- */
+
+#if defined(PARALLEL_HASKELL) || defined(THREADED_RTS)
+
+INLINE_HEADER rtsBool
+emptySparkPool (StgSparkPool *pool)
+{
+ return (pool->hd == pool->tl);
+}
+
+INLINE_HEADER rtsBool
+emptySparkPoolCap (Capability *cap)
+{ return emptySparkPool(&cap->r.rSparks); }
+
+INLINE_HEADER nat
+sparkPoolSize (StgSparkPool *pool)
+{
+ if (pool->hd <= pool->tl) {
+ return (pool->hd - pool->tl);
+ } else {
+ return (pool->lim - pool->hd + pool->tl - pool->base);
+ }
+}
+
+INLINE_HEADER nat
+sparkPoolSizeCap (Capability *cap)
+{ return sparkPoolSize(&cap->r.rSparks); }
+
+INLINE_HEADER void
+discardSparks (StgSparkPool *pool)
+{
+ pool->hd = pool->tl;
+}
+
+INLINE_HEADER void
+discardSparksCap (Capability *cap)
+{ return discardSparks(&cap->r.rSparks); }
+
+
+#elif defined(THREADED_RTS)
+
+INLINE_HEADER rtsBool
+emptySparkPoolCap (Capability *cap STG_UNUSED)
+{ return rtsTrue; }
+
+#endif
+
+#endif /* SPARKS_H */
diff --git a/rts/Stable.c b/rts/Stable.c
new file mode 100644
index 0000000000..a4db5cd749
--- /dev/null
+++ b/rts/Stable.c
@@ -0,0 +1,460 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2002
+ *
+ * Stable names and stable pointers.
+ *
+ * ---------------------------------------------------------------------------*/
+
+// Make static versions of inline functions in Stable.h:
+#define RTS_STABLE_C
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "Hash.h"
+#include "RtsUtils.h"
+#include "OSThreads.h"
+#include "Storage.h"
+#include "RtsAPI.h"
+#include "RtsFlags.h"
+#include "OSThreads.h"
+
+/* Comment from ADR's implementation in old RTS:
+
+ This files (together with @ghc/runtime/storage/PerformIO.lhc@ and a
+ small change in @HpOverflow.lc@) consists of the changes in the
+ runtime system required to implement "Stable Pointers". But we're
+ getting a bit ahead of ourselves --- what is a stable pointer and what
+ is it used for?
+
+ When Haskell calls C, it normally just passes over primitive integers,
+ floats, bools, strings, etc. This doesn't cause any problems at all
+ for garbage collection because the act of passing them makes a copy
+ from the heap, stack or wherever they are onto the C-world stack.
+ However, if we were to pass a heap object such as a (Haskell) @String@
+ and a garbage collection occured before we finished using it, we'd run
+ into problems since the heap object might have been moved or even
+ deleted.
+
+ So, if a C call is able to cause a garbage collection or we want to
+ store a pointer to a heap object between C calls, we must be careful
+ when passing heap objects. Our solution is to keep a table of all
+ objects we've given to the C-world and to make sure that the garbage
+ collector collects these objects --- updating the table as required to
+ make sure we can still find the object.
+
+
+ Of course, all this rather begs the question: why would we want to
+ pass a boxed value?
+
+ One very good reason is to preserve laziness across the language
+ interface. Rather than evaluating an integer or a string because it
+ {\em might\/} be required by the C function, we can wait until the C
+ function actually wants the value and then force an evaluation.
+
+ Another very good reason (the motivating reason!) is that the C code
+ might want to execute an object of sort $IO ()$ for the side-effects
+ it will produce. For example, this is used when interfacing to an X
+ widgets library to allow a direct implementation of callbacks.
+
+
+ The @makeStablePointer :: a -> IO (StablePtr a)@ function
+ converts a value into a stable pointer. It is part of the @PrimIO@
+ monad, because we want to be sure we don't allocate one twice by
+ accident, and then only free one of the copies.
+
+ \begin{verbatim}
+ makeStablePtr# :: a -> State# RealWorld -> (# RealWorld, a #)
+ freeStablePtr# :: StablePtr# a -> State# RealWorld -> State# RealWorld
+ deRefStablePtr# :: StablePtr# a -> State# RealWorld ->
+ (# State# RealWorld, a #)
+ \end{verbatim}
+
+ There may be additional functions on the C side to allow evaluation,
+ application, etc of a stable pointer.
+
+*/
+
+snEntry *stable_ptr_table = NULL;
+static snEntry *stable_ptr_free = NULL;
+
+static unsigned int SPT_size = 0;
+
+#ifdef THREADED_RTS
+static Mutex stable_mutex;
+#endif
+
+/* This hash table maps Haskell objects to stable names, so that every
+ * call to lookupStableName on a given object will return the same
+ * stable name.
+ *
+ * OLD COMMENTS about reference counting follow. The reference count
+ * in a stable name entry is now just a counter.
+ *
+ * Reference counting
+ * ------------------
+ * A plain stable name entry has a zero reference count, which means
+ * the entry will dissappear when the object it points to is
+ * unreachable. For stable pointers, we need an entry that sticks
+ * around and keeps the object it points to alive, so each stable name
+ * entry has an associated reference count.
+ *
+ * A stable pointer has a weighted reference count N attached to it
+ * (actually in its upper 5 bits), which represents the weight
+ * 2^(N-1). The stable name entry keeps a 32-bit reference count, which
+ * represents any weight between 1 and 2^32 (represented as zero).
+ * When the weight is 2^32, the stable name table owns "all" of the
+ * stable pointers to this object, and the entry can be garbage
+ * collected if the object isn't reachable.
+ *
+ * A new stable pointer is given the weight log2(W/2), where W is the
+ * weight stored in the table entry. The new weight in the table is W
+ * - 2^log2(W/2).
+ *
+ * A stable pointer can be "split" into two stable pointers, by
+ * dividing the weight by 2 and giving each pointer half.
+ * When freeing a stable pointer, the weight of the pointer is added
+ * to the weight stored in the table entry.
+ * */
+
+static HashTable *addrToStableHash = NULL;
+
+#define INIT_SPT_SIZE 64
+
+STATIC_INLINE void
+initFreeList(snEntry *table, nat n, snEntry *free)
+{
+ snEntry *p;
+
+ for (p = table + n - 1; p >= table; p--) {
+ p->addr = (P_)free;
+ p->old = NULL;
+ p->ref = 0;
+ p->sn_obj = NULL;
+ free = p;
+ }
+ stable_ptr_free = table;
+}
+
+void
+initStablePtrTable(void)
+{
+ if (SPT_size > 0)
+ return;
+
+ SPT_size = INIT_SPT_SIZE;
+ stable_ptr_table = stgMallocBytes(SPT_size * sizeof(snEntry),
+ "initStablePtrTable");
+
+ /* we don't use index 0 in the stable name table, because that
+ * would conflict with the hash table lookup operations which
+ * return NULL if an entry isn't found in the hash table.
+ */
+ initFreeList(stable_ptr_table+1,INIT_SPT_SIZE-1,NULL);
+ addrToStableHash = allocHashTable();
+
+#ifdef THREADED_RTS
+ initMutex(&stable_mutex);
+#endif
+}
+
+/*
+ * get at the real stuff...remove indirections.
+ *
+ * ToDo: move to a better home.
+ */
+static
+StgClosure*
+removeIndirections(StgClosure* p)
+{
+ StgClosure* q = p;
+
+ while (get_itbl(q)->type == IND ||
+ get_itbl(q)->type == IND_STATIC ||
+ get_itbl(q)->type == IND_OLDGEN ||
+ get_itbl(q)->type == IND_PERM ||
+ get_itbl(q)->type == IND_OLDGEN_PERM ) {
+ q = ((StgInd *)q)->indirectee;
+ }
+ return q;
+}
+
+static StgWord
+lookupStableName_(StgPtr p)
+{
+ StgWord sn;
+ void* sn_tmp;
+
+ if (stable_ptr_free == NULL) {
+ enlargeStablePtrTable();
+ }
+
+ /* removing indirections increases the likelihood
+ * of finding a match in the stable name hash table.
+ */
+ p = (StgPtr)removeIndirections((StgClosure*)p);
+
+ sn_tmp = lookupHashTable(addrToStableHash,(W_)p);
+ sn = (StgWord)sn_tmp;
+
+ if (sn != 0) {
+ ASSERT(stable_ptr_table[sn].addr == p);
+ IF_DEBUG(stable,debugBelch("cached stable name %ld at %p\n",sn,p));
+ return sn;
+ } else {
+ sn = stable_ptr_free - stable_ptr_table;
+ stable_ptr_free = (snEntry*)(stable_ptr_free->addr);
+ stable_ptr_table[sn].ref = 0;
+ stable_ptr_table[sn].addr = p;
+ stable_ptr_table[sn].sn_obj = NULL;
+ /* IF_DEBUG(stable,debugBelch("new stable name %d at %p\n",sn,p)); */
+
+ /* add the new stable name to the hash table */
+ insertHashTable(addrToStableHash, (W_)p, (void *)sn);
+
+ return sn;
+ }
+}
+
+StgWord
+lookupStableName(StgPtr p)
+{
+ StgWord res;
+
+ initStablePtrTable();
+ ACQUIRE_LOCK(&stable_mutex);
+ res = lookupStableName_(p);
+ RELEASE_LOCK(&stable_mutex);
+ return res;
+}
+
+STATIC_INLINE void
+freeStableName(snEntry *sn)
+{
+ ASSERT(sn->sn_obj == NULL);
+ if (sn->addr != NULL) {
+ removeHashTable(addrToStableHash, (W_)sn->addr, NULL);
+ }
+ sn->addr = (P_)stable_ptr_free;
+ stable_ptr_free = sn;
+}
+
+StgStablePtr
+getStablePtr(StgPtr p)
+{
+ StgWord sn;
+
+ initStablePtrTable();
+ ACQUIRE_LOCK(&stable_mutex);
+ sn = lookupStableName_(p);
+ stable_ptr_table[sn].ref++;
+ RELEASE_LOCK(&stable_mutex);
+ return (StgStablePtr)(sn);
+}
+
+void
+freeStablePtr(StgStablePtr sp)
+{
+ snEntry *sn;
+
+ initStablePtrTable();
+ ACQUIRE_LOCK(&stable_mutex);
+
+ sn = &stable_ptr_table[(StgWord)sp];
+
+ ASSERT((StgWord)sp < SPT_size && sn->addr != NULL && sn->ref > 0);
+
+ sn->ref--;
+
+ // If this entry has no StableName attached, then just free it
+ // immediately. This is important; it might be a while before the
+ // next major GC which actually collects the entry.
+ if (sn->sn_obj == NULL && sn->ref == 0) {
+ freeStableName(sn);
+ }
+
+ RELEASE_LOCK(&stable_mutex);
+}
+
+void
+enlargeStablePtrTable(void)
+{
+ nat old_SPT_size = SPT_size;
+
+ // 2nd and subsequent times
+ SPT_size *= 2;
+ stable_ptr_table =
+ stgReallocBytes(stable_ptr_table,
+ SPT_size * sizeof(snEntry),
+ "enlargeStablePtrTable");
+
+ initFreeList(stable_ptr_table + old_SPT_size, old_SPT_size, NULL);
+}
+
+/* -----------------------------------------------------------------------------
+ * Treat stable pointers as roots for the garbage collector.
+ *
+ * A stable pointer is any stable name entry with a ref > 0. We'll
+ * take the opportunity to zero the "keep" flags at the same time.
+ * -------------------------------------------------------------------------- */
+
+void
+markStablePtrTable(evac_fn evac)
+{
+ snEntry *p, *end_stable_ptr_table;
+ StgPtr q;
+
+ end_stable_ptr_table = &stable_ptr_table[SPT_size];
+
+ // Mark all the stable *pointers* (not stable names).
+ // _starting_ at index 1; index 0 is unused.
+ for (p = stable_ptr_table+1; p < end_stable_ptr_table; p++) {
+ q = p->addr;
+
+ // Internal pointers are free slots. If q == NULL, it's a
+ // stable name where the object has been GC'd, but the
+ // StableName object (sn_obj) is still alive.
+ if (q && (q < (P_)stable_ptr_table || q >= (P_)end_stable_ptr_table)) {
+
+ // save the current addr away: we need to be able to tell
+ // whether the objects moved in order to be able to update
+ // the hash table later.
+ p->old = p->addr;
+
+ // if the ref is non-zero, treat addr as a root
+ if (p->ref != 0) {
+ evac((StgClosure **)&p->addr);
+ }
+ }
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Thread the stable pointer table for compacting GC.
+ *
+ * Here we must call the supplied evac function for each pointer into
+ * the heap from the stable pointer table, because the compacting
+ * collector may move the object it points to.
+ * -------------------------------------------------------------------------- */
+
+void
+threadStablePtrTable( evac_fn evac )
+{
+ snEntry *p, *end_stable_ptr_table;
+ StgPtr q;
+
+ end_stable_ptr_table = &stable_ptr_table[SPT_size];
+
+ for (p = stable_ptr_table+1; p < end_stable_ptr_table; p++) {
+
+ if (p->sn_obj != NULL) {
+ evac((StgClosure **)&p->sn_obj);
+ }
+
+ q = p->addr;
+ if (q && (q < (P_)stable_ptr_table || q >= (P_)end_stable_ptr_table)) {
+ evac((StgClosure **)&p->addr);
+ }
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Garbage collect any dead entries in the stable pointer table.
+ *
+ * A dead entry has:
+ *
+ * - a zero reference count
+ * - a dead sn_obj
+ *
+ * Both of these conditions must be true in order to re-use the stable
+ * name table entry. We can re-use stable name table entries for live
+ * heap objects, as long as the program has no StableName objects that
+ * refer to the entry.
+ * -------------------------------------------------------------------------- */
+
+void
+gcStablePtrTable( void )
+{
+ snEntry *p, *end_stable_ptr_table;
+ StgPtr q;
+
+ end_stable_ptr_table = &stable_ptr_table[SPT_size];
+
+ // NOTE: _starting_ at index 1; index 0 is unused.
+ for (p = stable_ptr_table + 1; p < end_stable_ptr_table; p++) {
+
+ // Update the pointer to the StableName object, if there is one
+ if (p->sn_obj != NULL) {
+ p->sn_obj = isAlive(p->sn_obj);
+ }
+
+ // Internal pointers are free slots. If q == NULL, it's a
+ // stable name where the object has been GC'd, but the
+ // StableName object (sn_obj) is still alive.
+ q = p->addr;
+ if (q && (q < (P_)stable_ptr_table || q >= (P_)end_stable_ptr_table)) {
+
+ // StableNames only:
+ if (p->ref == 0) {
+ if (p->sn_obj == NULL) {
+ // StableName object is dead
+ freeStableName(p);
+ IF_DEBUG(stable, debugBelch("GC'd Stable name %ld\n",
+ p - stable_ptr_table));
+ continue;
+
+ } else {
+ p->addr = (StgPtr)isAlive((StgClosure *)p->addr);
+ IF_DEBUG(stable, debugBelch("Stable name %ld still alive at %p, ref %ld\n", p - stable_ptr_table, p->addr, p->ref));
+ }
+ }
+ }
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Update the StablePtr/StableName hash table
+ *
+ * The boolean argument 'full' indicates that a major collection is
+ * being done, so we might as well throw away the hash table and build
+ * a new one. For a minor collection, we just re-hash the elements
+ * that changed.
+ * -------------------------------------------------------------------------- */
+
+void
+updateStablePtrTable(rtsBool full)
+{
+ snEntry *p, *end_stable_ptr_table;
+
+ if (full && addrToStableHash != NULL) {
+ freeHashTable(addrToStableHash,NULL);
+ addrToStableHash = allocHashTable();
+ }
+
+ end_stable_ptr_table = &stable_ptr_table[SPT_size];
+
+ // NOTE: _starting_ at index 1; index 0 is unused.
+ for (p = stable_ptr_table + 1; p < end_stable_ptr_table; p++) {
+
+ if (p->addr == NULL) {
+ if (p->old != NULL) {
+ // The target has been garbage collected. Remove its
+ // entry from the hash table.
+ removeHashTable(addrToStableHash, (W_)p->old, NULL);
+ p->old = NULL;
+ }
+ }
+ else if (p->addr < (P_)stable_ptr_table
+ || p->addr >= (P_)end_stable_ptr_table) {
+ // Target still alive, Re-hash this stable name
+ if (full) {
+ insertHashTable(addrToStableHash, (W_)p->addr,
+ (void *)(p - stable_ptr_table));
+ } else if (p->addr != p->old) {
+ removeHashTable(addrToStableHash, (W_)p->old, NULL);
+ insertHashTable(addrToStableHash, (W_)p->addr,
+ (void *)(p - stable_ptr_table));
+ }
+ }
+ }
+}
diff --git a/rts/Stats.c b/rts/Stats.c
new file mode 100644
index 0000000000..28d09bdbed
--- /dev/null
+++ b/rts/Stats.c
@@ -0,0 +1,632 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Statistics and timing-related functions.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "MBlock.h"
+#include "Schedule.h"
+#include "Stats.h"
+#include "ParTicky.h" /* ToDo: move into Rts.h */
+#include "Profiling.h"
+#include "Storage.h"
+#include "GetTime.h"
+
+/* huh? */
+#define BIG_STRING_LEN 512
+
+#define TICK_TO_DBL(t) ((double)(t) / TICKS_PER_SECOND)
+
+static Ticks ElapsedTimeStart = 0;
+
+static Ticks InitUserTime = 0;
+static Ticks InitElapsedTime = 0;
+static Ticks InitElapsedStamp = 0;
+
+static Ticks MutUserTime = 0;
+static Ticks MutElapsedTime = 0;
+static Ticks MutElapsedStamp = 0;
+
+static Ticks ExitUserTime = 0;
+static Ticks ExitElapsedTime = 0;
+
+static ullong GC_tot_alloc = 0;
+static ullong GC_tot_copied = 0;
+static ullong GC_tot_scavd_copied = 0;
+
+static Ticks GC_start_time = 0, GC_tot_time = 0; /* User GC Time */
+static Ticks GCe_start_time = 0, GCe_tot_time = 0; /* Elapsed GC time */
+
+#ifdef PROFILING
+static Ticks RP_start_time = 0, RP_tot_time = 0; /* retainer prof user time */
+static Ticks RPe_start_time = 0, RPe_tot_time = 0; /* retainer prof elap time */
+
+static Ticks HC_start_time, HC_tot_time = 0; // heap census prof user time
+static Ticks HCe_start_time, HCe_tot_time = 0; // heap census prof elap time
+#endif
+
+#ifdef PROFILING
+#define PROF_VAL(x) (x)
+#else
+#define PROF_VAL(x) 0
+#endif
+
+static lnat MaxResidency = 0; // in words; for stats only
+static lnat AvgResidency = 0;
+static lnat ResidencySamples = 0; // for stats only
+
+static lnat GC_start_faults = 0, GC_end_faults = 0;
+
+static Ticks *GC_coll_times;
+
+static void statsPrintf( char *s, ... )
+ GNUC3_ATTRIBUTE(format (printf, 1, 2));
+
+static void statsFlush( void );
+static void statsClose( void );
+
+Ticks stat_getElapsedGCTime(void)
+{
+ return GCe_tot_time;
+}
+
+/* mut_user_time_during_GC() and mut_user_time()
+ *
+ * The former function can be used to get the current mutator time
+ * *during* a GC, i.e. between stat_startGC and stat_endGC. This is
+ * used in the heap profiler for accurately time stamping the heap
+ * sample.
+ *
+ * ATTENTION: mut_user_time_during_GC() relies on GC_start_time being
+ * defined in stat_startGC() - to minimise system calls,
+ * GC_start_time is, however, only defined when really needed (check
+ * stat_startGC() for details)
+ */
+double
+mut_user_time_during_GC( void )
+{
+ return TICK_TO_DBL(GC_start_time - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time));
+}
+
+double
+mut_user_time( void )
+{
+ Ticks user;
+ user = getProcessCPUTime();
+ return TICK_TO_DBL(user - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time));
+}
+
+#ifdef PROFILING
+/*
+ mut_user_time_during_RP() is similar to mut_user_time_during_GC();
+ it returns the MUT time during retainer profiling.
+ The same is for mut_user_time_during_HC();
+ */
+double
+mut_user_time_during_RP( void )
+{
+ return TICK_TO_DBL(RP_start_time - GC_tot_time - RP_tot_time - HC_tot_time);
+}
+
+double
+mut_user_time_during_heap_census( void )
+{
+ return TICK_TO_DBL(HC_start_time - GC_tot_time - RP_tot_time - HC_tot_time);
+}
+#endif /* PROFILING */
+
+void
+initStats(void)
+{
+ nat i;
+
+ if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
+ statsPrintf(" Alloc Copied Live GC GC TOT TOT Page Flts\n");
+ statsPrintf(" bytes bytes bytes user elap user elap\n");
+ }
+ GC_coll_times =
+ (Ticks *)stgMallocBytes(
+ sizeof(Ticks)*RtsFlags.GcFlags.generations,
+ "initStats");
+ for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
+ GC_coll_times[i] = 0;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Initialisation time...
+ -------------------------------------------------------------------------- */
+
+void
+stat_startInit(void)
+{
+ Ticks elapsed;
+
+ elapsed = getProcessElapsedTime();
+ ElapsedTimeStart = elapsed;
+}
+
+void
+stat_endInit(void)
+{
+ Ticks user, elapsed;
+
+ getProcessTimes(&user, &elapsed);
+
+ InitUserTime = user;
+ InitElapsedStamp = elapsed;
+ if (ElapsedTimeStart > elapsed) {
+ InitElapsedTime = 0;
+ } else {
+ InitElapsedTime = elapsed - ElapsedTimeStart;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ stat_startExit and stat_endExit
+
+ These two measure the time taken in shutdownHaskell().
+ -------------------------------------------------------------------------- */
+
+void
+stat_startExit(void)
+{
+ Ticks user, elapsed;
+
+ getProcessTimes(&user, &elapsed);
+
+ MutElapsedStamp = elapsed;
+ MutElapsedTime = elapsed - GCe_tot_time -
+ PROF_VAL(RPe_tot_time + HCe_tot_time) - InitElapsedStamp;
+ if (MutElapsedTime < 0) { MutElapsedTime = 0; } /* sometimes -0.00 */
+
+ MutUserTime = user - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime;
+ if (MutUserTime < 0) { MutUserTime = 0; }
+}
+
+void
+stat_endExit(void)
+{
+ Ticks user, elapsed;
+
+ getProcessTimes(&user, &elapsed);
+
+ ExitUserTime = user - MutUserTime - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime;
+ ExitElapsedTime = elapsed - MutElapsedStamp;
+ if (ExitUserTime < 0) {
+ ExitUserTime = 0;
+ }
+ if (ExitElapsedTime < 0) {
+ ExitElapsedTime = 0;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Called at the beginning of each GC
+ -------------------------------------------------------------------------- */
+
+static nat rub_bell = 0;
+
+/* initialise global variables needed during GC
+ *
+ * * GC_start_time is read in mut_user_time_during_GC(), which in turn is
+ * needed if either PROFILING or DEBUGing is enabled
+ */
+void
+stat_startGC(void)
+{
+ nat bell = RtsFlags.GcFlags.ringBell;
+
+ if (bell) {
+ if (bell > 1) {
+ debugBelch(" GC ");
+ rub_bell = 1;
+ } else {
+ debugBelch("\007");
+ }
+ }
+
+#if defined(PROFILING) || defined(DEBUG)
+ GC_start_time = getProcessCPUTime(); // needed in mut_user_time_during_GC()
+#endif
+
+ if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
+#if !defined(PROFILING) && !defined(DEBUG)
+ GC_start_time = getProcessCPUTime();
+#endif
+ GCe_start_time = getProcessElapsedTime();
+ if (RtsFlags.GcFlags.giveStats) {
+ GC_start_faults = getPageFaults();
+ }
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Called at the end of each GC
+ -------------------------------------------------------------------------- */
+
+void
+stat_endGC (lnat alloc, lnat live, lnat copied,
+ lnat scavd_copied, lnat gen)
+{
+ if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
+ Ticks time, etime, gc_time, gc_etime;
+
+ getProcessTimes(&time, &etime);
+ gc_time = time - GC_start_time;
+ gc_etime = etime - GCe_start_time;
+
+ if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
+ nat faults = getPageFaults();
+
+ statsPrintf("%9ld %9ld %9ld",
+ alloc*sizeof(W_), (copied+scavd_copied)*sizeof(W_),
+ live*sizeof(W_));
+ statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4ld %4ld (Gen: %2ld)\n",
+ TICK_TO_DBL(gc_time),
+ TICK_TO_DBL(gc_etime),
+ TICK_TO_DBL(time),
+ TICK_TO_DBL(etime - ElapsedTimeStart),
+ faults - GC_start_faults,
+ GC_start_faults - GC_end_faults,
+ gen);
+
+ GC_end_faults = faults;
+ statsFlush();
+ }
+
+ GC_coll_times[gen] += gc_time;
+
+ GC_tot_copied += (ullong) copied;
+ GC_tot_scavd_copied += (ullong) scavd_copied;
+ GC_tot_alloc += (ullong) alloc;
+ GC_tot_time += gc_time;
+ GCe_tot_time += gc_etime;
+
+#if defined(THREADED_RTS)
+ {
+ Task *task;
+ if ((task = myTask()) != NULL) {
+ task->gc_time += gc_time;
+ task->gc_etime += gc_etime;
+ }
+ }
+#endif
+
+ if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */
+ if (live > MaxResidency) {
+ MaxResidency = live;
+ }
+ ResidencySamples++;
+ AvgResidency += live;
+ }
+ }
+
+ if (rub_bell) {
+ debugBelch("\b\b\b \b\b\b");
+ rub_bell = 0;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Called at the beginning of each Retainer Profiliing
+ -------------------------------------------------------------------------- */
+#ifdef PROFILING
+void
+stat_startRP(void)
+{
+ Ticks user, elapsed;
+ getProcessTimes( &user, &elapsed );
+
+ RP_start_time = user;
+ RPe_start_time = elapsed;
+}
+#endif /* PROFILING */
+
+/* -----------------------------------------------------------------------------
+ Called at the end of each Retainer Profiliing
+ -------------------------------------------------------------------------- */
+
+#ifdef PROFILING
+void
+stat_endRP(
+ nat retainerGeneration,
+#ifdef DEBUG_RETAINER
+ nat maxCStackSize,
+ int maxStackSize,
+#endif
+ double averageNumVisit)
+{
+ Ticks user, elapsed;
+ getProcessTimes( &user, &elapsed );
+
+ RP_tot_time += user - RP_start_time;
+ RPe_tot_time += elapsed - RPe_start_time;
+
+ fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n",
+ retainerGeneration, mut_user_time_during_RP());
+#ifdef DEBUG_RETAINER
+ fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
+ fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize);
+#endif
+ fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit);
+}
+#endif /* PROFILING */
+
+/* -----------------------------------------------------------------------------
+ Called at the beginning of each heap census
+ -------------------------------------------------------------------------- */
+#ifdef PROFILING
+void
+stat_startHeapCensus(void)
+{
+ Ticks user, elapsed;
+ getProcessTimes( &user, &elapsed );
+
+ HC_start_time = user;
+ HCe_start_time = elapsed;
+}
+#endif /* PROFILING */
+
+/* -----------------------------------------------------------------------------
+ Called at the end of each heap census
+ -------------------------------------------------------------------------- */
+#ifdef PROFILING
+void
+stat_endHeapCensus(void)
+{
+ Ticks user, elapsed;
+ getProcessTimes( &user, &elapsed );
+
+ HC_tot_time += user - HC_start_time;
+ HCe_tot_time += elapsed - HCe_start_time;
+}
+#endif /* PROFILING */
+
+/* -----------------------------------------------------------------------------
+ Called at the end of execution
+
+ NOTE: number of allocations is not entirely accurate: it doesn't
+ take into account the few bytes at the end of the heap that
+ were left unused when the heap-check failed.
+ -------------------------------------------------------------------------- */
+
+void
+stat_exit(int alloc)
+{
+ if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
+
+ char temp[BIG_STRING_LEN];
+ Ticks time;
+ Ticks etime;
+ nat g, total_collections = 0;
+
+ getProcessTimes( &time, &etime );
+ etime -= ElapsedTimeStart;
+
+ GC_tot_alloc += alloc;
+
+ /* Count total garbage collections */
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++)
+ total_collections += generations[g].collections;
+
+ /* avoid divide by zero if time is measured as 0.00 seconds -- SDM */
+ if (time == 0.0) time = 1;
+ if (etime == 0.0) etime = 1;
+
+ if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) {
+ statsPrintf("%9ld %9.9s %9.9s", (lnat)alloc*sizeof(W_), "", "");
+ statsPrintf(" %5.2f %5.2f\n\n", 0.0, 0.0);
+ }
+
+ if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
+ ullong_format_string(GC_tot_alloc*sizeof(W_),
+ temp, rtsTrue/*commas*/);
+ statsPrintf("%11s bytes allocated in the heap\n", temp);
+
+ ullong_format_string(GC_tot_copied*sizeof(W_),
+ temp, rtsTrue/*commas*/);
+ statsPrintf("%11s bytes copied during GC (scavenged)\n", temp);
+
+ ullong_format_string(GC_tot_scavd_copied*sizeof(W_),
+ temp, rtsTrue/*commas*/);
+ statsPrintf("%11s bytes copied during GC (not scavenged)\n", temp);
+
+ if ( ResidencySamples > 0 ) {
+ ullong_format_string(MaxResidency*sizeof(W_),
+ temp, rtsTrue/*commas*/);
+ statsPrintf("%11s bytes maximum residency (%ld sample(s))\n",
+ temp, ResidencySamples);
+ }
+ statsPrintf("\n");
+
+ /* Print garbage collections in each gen */
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ statsPrintf("%11d collections in generation %d (%6.2fs)\n",
+ generations[g].collections, g,
+ TICK_TO_DBL(GC_coll_times[g]));
+ }
+
+ statsPrintf("\n%11ld Mb total memory in use\n\n",
+ mblocks_allocated * MBLOCK_SIZE / (1024 * 1024));
+
+#if defined(THREADED_RTS)
+ {
+ nat i;
+ Task *task;
+ for (i = 0, task = all_tasks;
+ task != NULL;
+ i++, task = task->all_link) {
+ statsPrintf(" Task %2d %-8s : MUT time: %6.2fs (%6.2fs elapsed)\n"
+ " GC time: %6.2fs (%6.2fs elapsed)\n\n",
+ i,
+ (task->tso == NULL) ? "(worker)" : "(bound)",
+ TICK_TO_DBL(task->mut_time),
+ TICK_TO_DBL(task->mut_etime),
+ TICK_TO_DBL(task->gc_time),
+ TICK_TO_DBL(task->gc_etime));
+ }
+ }
+#endif
+
+ statsPrintf(" INIT time %6.2fs (%6.2fs elapsed)\n",
+ TICK_TO_DBL(InitUserTime), TICK_TO_DBL(InitElapsedTime));
+ statsPrintf(" MUT time %6.2fs (%6.2fs elapsed)\n",
+ TICK_TO_DBL(MutUserTime), TICK_TO_DBL(MutElapsedTime));
+ statsPrintf(" GC time %6.2fs (%6.2fs elapsed)\n",
+ TICK_TO_DBL(GC_tot_time), TICK_TO_DBL(GCe_tot_time));
+#ifdef PROFILING
+ statsPrintf(" RP time %6.2fs (%6.2fs elapsed)\n",
+ TICK_TO_DBL(RP_tot_time), TICK_TO_DBL(RPe_tot_time));
+ statsPrintf(" PROF time %6.2fs (%6.2fs elapsed)\n",
+ TICK_TO_DBL(HC_tot_time), TICK_TO_DBL(HCe_tot_time));
+#endif
+ statsPrintf(" EXIT time %6.2fs (%6.2fs elapsed)\n",
+ TICK_TO_DBL(ExitUserTime), TICK_TO_DBL(ExitElapsedTime));
+ statsPrintf(" Total time %6.2fs (%6.2fs elapsed)\n\n",
+ TICK_TO_DBL(time), TICK_TO_DBL(etime));
+ statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n",
+ TICK_TO_DBL(GC_tot_time)*100/TICK_TO_DBL(time),
+ TICK_TO_DBL(GCe_tot_time)*100/TICK_TO_DBL(etime));
+
+ if (time - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time) == 0)
+ ullong_format_string(0, temp, rtsTrue/*commas*/);
+ else
+ ullong_format_string(
+ (ullong)((GC_tot_alloc*sizeof(W_))/
+ TICK_TO_DBL(time - GC_tot_time -
+ PROF_VAL(RP_tot_time + HC_tot_time))),
+ temp, rtsTrue/*commas*/);
+
+ statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp);
+
+ statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
+ TICK_TO_DBL(time - GC_tot_time -
+ PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime) * 100
+ / TICK_TO_DBL(time),
+ TICK_TO_DBL(time - GC_tot_time -
+ PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime) * 100
+ / TICK_TO_DBL(etime));
+ }
+
+ if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
+ /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
+ statsPrintf("<<ghc: %llu bytes, ", GC_tot_alloc*(ullong)sizeof(W_));
+ statsPrintf("%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.2f INIT (%.2f elapsed), %.2f MUT (%.2f elapsed), %.2f GC (%.2f elapsed) :ghc>>\n",
+ total_collections,
+ ResidencySamples == 0 ? 0 :
+ AvgResidency*sizeof(W_)/ResidencySamples,
+ MaxResidency*sizeof(W_),
+ ResidencySamples,
+ (unsigned long)(mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
+ TICK_TO_DBL(InitUserTime), TICK_TO_DBL(InitElapsedTime),
+ TICK_TO_DBL(MutUserTime), TICK_TO_DBL(MutElapsedTime),
+ TICK_TO_DBL(GC_tot_time), TICK_TO_DBL(GCe_tot_time));
+ }
+
+ statsFlush();
+ statsClose();
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ stat_describe_gens
+
+ Produce some detailed info on the state of the generational GC.
+ -------------------------------------------------------------------------- */
+#ifdef DEBUG
+void
+statDescribeGens(void)
+{
+ nat g, s, mut, lge;
+ lnat live;
+ bdescr *bd;
+ step *step;
+
+ debugBelch(
+" Gen Steps Max Mutable Step Blocks Live Large\n"
+" Blocks Closures Objects\n");
+
+ mut = 0;
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
+ mut += bd->free - bd->start;
+ }
+
+ debugBelch("%8d %8d %8d %9d", g, generations[g].n_steps,
+ generations[g].max_blocks, mut);
+
+ for (s = 0; s < generations[g].n_steps; s++) {
+ step = &generations[g].steps[s];
+ live = 0;
+ for (bd = step->large_objects, lge = 0; bd; bd = bd->link) {
+ lge++;
+ }
+ live = step->n_large_blocks * BLOCK_SIZE;
+ bd = step->blocks;
+ // This live figure will be slightly less that the "live" figure
+ // given by +RTS -Sstderr, because we take don't count the
+ // slop at the end of each block.
+ for (; bd; bd = bd->link) {
+ live += (bd->free - bd->start) * sizeof(W_);
+ }
+ if (s != 0) {
+ debugBelch("%36s","");
+ }
+ debugBelch("%6d %8d %8d %8d\n", s, step->n_blocks,
+ live, lge);
+ }
+ }
+ debugBelch("\n");
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ Stats available via a programmatic interface, so eg. GHCi can time
+ each compilation and expression evaluation.
+ -------------------------------------------------------------------------- */
+
+extern HsInt64 getAllocations( void )
+{ return (HsInt64)total_allocated * sizeof(W_); }
+
+/* -----------------------------------------------------------------------------
+ Dumping stuff in the stats file, or via the debug message interface
+ -------------------------------------------------------------------------- */
+
+static void
+statsPrintf( char *s, ... )
+{
+ FILE *sf = RtsFlags.GcFlags.statsFile;
+ va_list ap;
+
+ va_start(ap,s);
+ if (sf == NULL) {
+ vdebugBelch(s,ap);
+ } else {
+ vfprintf(sf, s, ap);
+ }
+ va_end(ap);
+}
+
+static void
+statsFlush( void )
+{
+ FILE *sf = RtsFlags.GcFlags.statsFile;
+ if (sf != NULL) {
+ fflush(sf);
+ }
+}
+
+static void
+statsClose( void )
+{
+ FILE *sf = RtsFlags.GcFlags.statsFile;
+ if (sf != NULL) {
+ fclose(sf);
+ }
+}
diff --git a/rts/Stats.h b/rts/Stats.h
new file mode 100644
index 0000000000..20bc0155ad
--- /dev/null
+++ b/rts/Stats.h
@@ -0,0 +1,56 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Statistics and timing-related functions.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef STATS_H
+#define STATS_H
+
+#include "GetTime.h"
+
+void stat_startInit(void);
+void stat_endInit(void);
+
+void stat_startGC(void);
+void stat_endGC (lnat alloc, lnat live,
+ lnat copied, lnat scavd_copied, lnat gen);
+
+#ifdef PROFILING
+void stat_startRP(void);
+void stat_endRP(nat,
+#ifdef DEBUG_RETAINER
+ nat, int,
+#endif
+ double);
+#endif /* PROFILING */
+
+#if defined(PROFILING) || defined(DEBUG)
+void stat_startHeapCensus(void);
+void stat_endHeapCensus(void);
+#endif
+
+void stat_startExit(void);
+void stat_endExit(void);
+
+void stat_exit(int alloc);
+void stat_workerStop(void);
+
+void initStats(void);
+
+double mut_user_time_during_GC(void);
+double mut_user_time(void);
+
+#ifdef PROFILING
+double mut_user_time_during_RP(void);
+double mut_user_time_during_heap_census(void);
+#endif /* PROFILING */
+
+void statDescribeGens( void );
+HsInt64 getAllocations( void );
+
+Ticks stat_getElapsedGCTime(void);
+
+#endif /* STATS_H */
diff --git a/rts/StgCRun.c b/rts/StgCRun.c
new file mode 100644
index 0000000000..c1afc16559
--- /dev/null
+++ b/rts/StgCRun.c
@@ -0,0 +1,897 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2003
+ *
+ * STG-to-C glue.
+ *
+ * To run an STG function from C land, call
+ *
+ * rv = StgRun(f,BaseReg);
+ *
+ * where "f" is the STG function to call, and BaseReg is the address of the
+ * RegTable for this run (we might have separate RegTables if we're running
+ * multiple threads on an SMP machine).
+ *
+ * In the end, "f" must JMP to StgReturn (defined below),
+ * passing the return-value "rv" in R1,
+ * to return to the caller of StgRun returning "rv" in
+ * the whatever way C returns a value.
+ *
+ * NOTE: StgRun/StgReturn do *NOT* load or store Hp or any
+ * other registers (other than saving the C callee-saves
+ * registers). Instead, the called function "f" must do that
+ * in STG land.
+ *
+ * GCC will have assumed that pushing/popping of C-stack frames is
+ * going on when it generated its code, and used stack space
+ * accordingly. However, we actually {\em post-process away} all
+ * such stack-framery (see \tr{ghc/driver/ghc-asm.lprl}). Things will
+ * be OK however, if we initially make sure there are
+ * @RESERVED_C_STACK_BYTES@ on the C-stack to begin with, for local
+ * variables.
+ *
+ * -------------------------------------------------------------------------- */
+
+#include "PosixSource.h"
+
+
+/*
+ * We define the following (unused) global register variables, because for
+ * some reason gcc generates sub-optimal code for StgRun() on the Alpha
+ * (unnecessarily saving extra registers on the stack) if we don't.
+ *
+ * Why do it at the top of this file, rather than near StgRun() below? Because
+ * gcc doesn't let us define global register variables after any function
+ * definition has been read. Any point after #include "Stg.h" would be too
+ * late.
+ *
+ * We define alpha_EXTRA_CAREFUL here to save $s6, $f8 and $f9 -- registers
+ * that we don't use but which are callee-save registers. The __divq() routine
+ * in libc.a clobbers $s6.
+ */
+#include "ghcconfig.h"
+#ifdef alpha_HOST_ARCH
+#define alpha_EXTRA_CAREFUL
+register long fake_ra __asm__("$26");
+register long fake_gp __asm__("$29");
+#ifdef alpha_EXTRA_CAREFUL
+register long fake_s6 __asm__("$15");
+register double fake_f8 __asm__("$f8");
+register double fake_f9 __asm__("$f9");
+#endif
+#endif
+
+/* include Stg.h first because we want real machine regs in here: we
+ * have to get the value of R1 back from Stg land to C land intact.
+ */
+#include "Stg.h"
+#include "Rts.h"
+#include "StgRun.h"
+#include "RtsFlags.h"
+#include "OSThreads.h"
+#include "Capability.h"
+
+#ifdef DEBUG
+#include "RtsUtils.h"
+#include "Printer.h"
+#endif
+
+#ifdef USE_MINIINTERPRETER
+
+/* -----------------------------------------------------------------------------
+ any architecture (using miniinterpreter)
+ -------------------------------------------------------------------------- */
+
+StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg STG_UNUSED)
+{
+ while (f) {
+ IF_DEBUG(interpreter,
+ debugBelch("Jumping to ");
+ printPtr((P_)f); fflush(stdout);
+ debugBelch("\n");
+ );
+ f = (StgFunPtr) (f)();
+ }
+ return (StgRegTable *)R1.p;
+}
+
+StgFunPtr StgReturn(void)
+{
+ return 0;
+}
+
+#else /* !USE_MINIINTERPRETER */
+
+#ifdef LEADING_UNDERSCORE
+#define STG_RETURN "_StgReturn"
+#else
+#define STG_RETURN "StgReturn"
+#endif
+
+/* -----------------------------------------------------------------------------
+ x86 architecture
+ -------------------------------------------------------------------------- */
+
+#ifdef i386_HOST_ARCH
+
+#ifdef darwin_TARGET_OS
+#define STG_GLOBAL ".globl "
+#else
+#define STG_GLOBAL ".global "
+#endif
+
+StgRegTable *
+StgRun(StgFunPtr f, StgRegTable *basereg) {
+
+ unsigned char space[ RESERVED_C_STACK_BYTES + 4*sizeof(void *) ];
+ StgRegTable * r;
+
+ __asm__ volatile (
+ /*
+ * save callee-saves registers on behalf of the STG code.
+ */
+ "movl %%esp, %%eax\n\t"
+ "addl %4, %%eax\n\t"
+ "movl %%ebx,0(%%eax)\n\t"
+ "movl %%esi,4(%%eax)\n\t"
+ "movl %%edi,8(%%eax)\n\t"
+ "movl %%ebp,12(%%eax)\n\t"
+ /*
+ * Set BaseReg
+ */
+ "movl %3,%%ebx\n\t"
+ /*
+ * grab the function argument from the stack
+ */
+ "movl %2,%%eax\n\t"
+
+ /*
+ * Darwin note:
+ * The stack pointer has to be aligned to a multiple of 16 bytes at
+ * this point. This works out correctly with gcc 4.0.1, but it might
+ * break at any time in the future. TODO: Make this future-proof.
+ */
+
+ /*
+ * jump to it
+ */
+ "jmp *%%eax\n\t"
+
+ STG_GLOBAL STG_RETURN "\n"
+ STG_RETURN ":\n\t"
+
+ "movl %%esi, %%eax\n\t" /* Return value in R1 */
+
+ /*
+ * restore callee-saves registers. (Don't stomp on %%eax!)
+ */
+ "movl %%esp, %%edx\n\t"
+ "addl %4, %%edx\n\t"
+ "movl 0(%%edx),%%ebx\n\t" /* restore the registers saved above */
+ "movl 4(%%edx),%%esi\n\t"
+ "movl 8(%%edx),%%edi\n\t"
+ "movl 12(%%edx),%%ebp\n\t"
+
+ : "=&a" (r), "=m" (space)
+ : "m" (f), "m" (basereg), "i" (RESERVED_C_STACK_BYTES)
+ : "edx" /* stomps on %edx */
+ );
+
+ return r;
+}
+
+#endif
+
+/* ----------------------------------------------------------------------------
+ x86-64 is almost the same as plain x86.
+
+ I've done it using entirely inline assembler, because I couldn't
+ get gcc to generate the correct subtraction from %rsp by using
+ the local array variable trick. It didn't seem to reserve
+ enough space. Oh well, it's not much harder this way.
+
+ ------------------------------------------------------------------------- */
+
+#ifdef x86_64_HOST_ARCH
+
+extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
+
+static void GNUC3_ATTRIBUTE(used)
+StgRunIsImplementedInAssembler(void)
+{
+ __asm__ volatile (
+ /*
+ * save callee-saves registers on behalf of the STG code.
+ */
+ ".globl StgRun\n"
+ "StgRun:\n\t"
+ "subq %0, %%rsp\n\t"
+ "movq %%rsp, %%rax\n\t"
+ "addq %0-48, %%rax\n\t"
+ "movq %%rbx,0(%%rax)\n\t"
+ "movq %%rbp,8(%%rax)\n\t"
+ "movq %%r12,16(%%rax)\n\t"
+ "movq %%r13,24(%%rax)\n\t"
+ "movq %%r14,32(%%rax)\n\t"
+ "movq %%r15,40(%%rax)\n\t"
+ /*
+ * Set BaseReg
+ */
+ "movq %%rsi,%%r13\n\t"
+ /*
+ * grab the function argument from the stack, and jump to it.
+ */
+ "movq %%rdi,%%rax\n\t"
+ "jmp *%%rax\n\t"
+
+ ".global " STG_RETURN "\n"
+ STG_RETURN ":\n\t"
+
+ "movq %%rbx, %%rax\n\t" /* Return value in R1 */
+
+ /*
+ * restore callee-saves registers. (Don't stomp on %%rax!)
+ */
+ "movq %%rsp, %%rdx\n\t"
+ "addq %0-48, %%rdx\n\t"
+ "movq 0(%%rdx),%%rbx\n\t" /* restore the registers saved above */
+ "movq 8(%%rdx),%%rbp\n\t"
+ "movq 16(%%rdx),%%r12\n\t"
+ "movq 24(%%rdx),%%r13\n\t"
+ "movq 32(%%rdx),%%r14\n\t"
+ "movq 40(%%rdx),%%r15\n\t"
+ "addq %0, %%rsp\n\t"
+ "retq"
+
+ : : "i"(RESERVED_C_STACK_BYTES+48+8 /*stack frame size*/));
+ /*
+ HACK alert!
+
+ The x86_64 ABI specifies that on a procedure call, %rsp is
+ aligned on a 16-byte boundary + 8. That is, the first
+ argument on the stack after the return address will be
+ 16-byte aligned.
+
+ Which should be fine: RESERVED_C_STACK_BYTES+48 is a multiple
+ of 16 bytes.
+
+ BUT... when we do a C-call from STG land, gcc likes to put the
+ stack alignment adjustment in the prolog. eg. if we're calling
+ a function with arguments in regs, gcc will insert 'subq $8,%rsp'
+ in the prolog, to keep %rsp aligned (the return address is 8
+ bytes, remember). The mangler throws away the prolog, so we
+ lose the stack alignment.
+
+ The hack is to add this extra 8 bytes to our %rsp adjustment
+ here, so that throughout STG code, %rsp is 16-byte aligned,
+ ready for a C-call.
+
+ A quick way to see if this is wrong is to compile this code:
+
+ main = System.Exit.exitWith ExitSuccess
+
+ And run it with +RTS -sstderr. The stats code in the RTS, in
+ particular statsPrintf(), relies on the stack alignment because
+ it saves the %xmm regs on the stack, so it'll fall over if the
+ stack isn't aligned, and calling exitWith from Haskell invokes
+ shutdownHaskellAndExit using a C call.
+
+ Future gcc releases will almost certainly break this hack...
+ */
+}
+
+#endif /* x86-64 */
+
+/* -----------------------------------------------------------------------------
+ Sparc architecture
+
+ --
+ OLD COMMENT from GHC-3.02:
+
+ We want tailjumps to be calls, because `call xxx' is the only Sparc
+ branch that allows an arbitrary label as a target. (Gcc's ``goto
+ *target'' construct ends up loading the label into a register and
+ then jumping, at the cost of two extra instructions for the 32-bit
+ load.)
+
+ When entering the threaded world, we stash our return address in a
+ known location so that \tr{%i7} is available as an extra
+ callee-saves register. Of course, we have to restore this when
+ coming out of the threaded world.
+
+ I hate this god-forsaken architecture. Since the top of the
+ reserved stack space is used for globals and the bottom is reserved
+ for outgoing arguments, we have to stick our return address
+ somewhere in the middle. Currently, I'm allowing 100 extra
+ outgoing arguments beyond the first 6. --JSM
+
+ Updated info (GHC 4.06): we don't appear to use %i7 any more, so
+ I'm not sure whether we still need to save it. Incedentally, what
+ does the last paragraph above mean when it says "the top of the
+ stack is used for globals"? What globals? --SDM
+
+ Updated info (GHC 4.08.2): not saving %i7 any more (see below).
+ -------------------------------------------------------------------------- */
+
+#ifdef sparc_HOST_ARCH
+
+StgRegTable *
+StgRun(StgFunPtr f, StgRegTable *basereg) {
+
+ unsigned char space[RESERVED_C_STACK_BYTES];
+#if 0
+ register void *i7 __asm__("%i7");
+ ((void **)(space))[100] = i7;
+#endif
+ f();
+ __asm__ volatile (
+ ".align 4\n"
+ ".global " STG_RETURN "\n"
+ STG_RETURN ":"
+ : : : "l0","l1","l2","l3","l4","l5","l6","l7");
+ /* we tell the C compiler that l0-l7 are clobbered on return to
+ * StgReturn, otherwise it tries to use these to save eg. the
+ * address of space[100] across the call. The correct thing
+ * to do would be to save all the callee-saves regs, but we
+ * can't be bothered to do that.
+ *
+ * The code that gcc generates for this little fragment is now
+ * terrible. We could do much better by coding it directly in
+ * assembler.
+ */
+#if 0
+ /* updated 4.08.2: we don't save %i7 in the middle of the reserved
+ * space any more, since gcc tries to save its address across the
+ * call to f(), this gets clobbered in STG land and we end up
+ * dereferencing a bogus pointer in StgReturn.
+ */
+ __asm__ volatile ("ld %1,%0"
+ : "=r" (i7) : "m" (((void **)(space))[100]));
+#endif
+ return (StgRegTable *)R1.i;
+}
+
+#endif
+
+/* -----------------------------------------------------------------------------
+ alpha architecture
+
+ "The stack pointer (SP) must at all times denote an address that has octaword
+ alignment. (This restriction has the side effect that the in-memory portion
+ of the argument list, if any, will start on an octaword boundary.) Note that
+ the stack grows toward lower addresses. During a procedure invocation, SP
+ can never be set to a value that is higher than the value of SP at entry to
+ that procedure invocation.
+
+ "The contents of the stack, located above the portion of the argument list
+ (if any) that is passed in memory, belong to the calling procedure. Because
+ they are part of the calling procedure, they should not be read or written
+ by the called procedure, except as specified by indirect arguments or
+ language-controlled up-level references.
+
+ "The SP value might be used by the hardware when raising exceptions and
+ asynchronous interrupts. It must be assumed that the contents of the stack
+ below the current SP value and within the stack for the current thread are
+ continually and unpredictably modified, as specified in the _Alpha
+ Architecture Reference Manual_, and as a result of asynchronous software
+ actions."
+
+ -- Compaq Computer Corporation, Houston. Tru64 UNIX Calling Standard for
+ Alpha Systems, 5.1 edition, August 2000, section 3.2.1. http://www.
+ tru64unix.compaq.com/docs/base_doc/DOCUMENTATION/V51_PDF/ARH9MBTE.PDF
+ -------------------------------------------------------------------------- */
+
+#ifdef alpha_HOST_ARCH
+
+StgRegTable *
+StgRun(StgFunPtr f, StgRegTable *basereg)
+{
+ register long real_ra __asm__("$26"); volatile long save_ra;
+ register long real_gp __asm__("$29"); volatile long save_gp;
+
+ register long real_s0 __asm__("$9" ); volatile long save_s0;
+ register long real_s1 __asm__("$10"); volatile long save_s1;
+ register long real_s2 __asm__("$11"); volatile long save_s2;
+ register long real_s3 __asm__("$12"); volatile long save_s3;
+ register long real_s4 __asm__("$13"); volatile long save_s4;
+ register long real_s5 __asm__("$14"); volatile long save_s5;
+#ifdef alpha_EXTRA_CAREFUL
+ register long real_s6 __asm__("$15"); volatile long save_s6;
+#endif
+
+ register double real_f2 __asm__("$f2"); volatile double save_f2;
+ register double real_f3 __asm__("$f3"); volatile double save_f3;
+ register double real_f4 __asm__("$f4"); volatile double save_f4;
+ register double real_f5 __asm__("$f5"); volatile double save_f5;
+ register double real_f6 __asm__("$f6"); volatile double save_f6;
+ register double real_f7 __asm__("$f7"); volatile double save_f7;
+#ifdef alpha_EXTRA_CAREFUL
+ register double real_f8 __asm__("$f8"); volatile double save_f8;
+ register double real_f9 __asm__("$f9"); volatile double save_f9;
+#endif
+
+ register StgFunPtr real_pv __asm__("$27");
+
+ StgRegTable * ret;
+
+ save_ra = real_ra;
+ save_gp = real_gp;
+
+ save_s0 = real_s0;
+ save_s1 = real_s1;
+ save_s2 = real_s2;
+ save_s3 = real_s3;
+ save_s4 = real_s4;
+ save_s5 = real_s5;
+#ifdef alpha_EXTRA_CAREFUL
+ save_s6 = real_s6;
+#endif
+
+ save_f2 = real_f2;
+ save_f3 = real_f3;
+ save_f4 = real_f4;
+ save_f5 = real_f5;
+ save_f6 = real_f6;
+ save_f7 = real_f7;
+#ifdef alpha_EXTRA_CAREFUL
+ save_f8 = real_f8;
+ save_f9 = real_f9;
+#endif
+
+ real_pv = f;
+
+ __asm__ volatile( "lda $30,-%0($30)" "\n"
+ "\t" "jmp ($27)" "\n"
+ "\t" ".align 3" "\n"
+ ".globl " STG_RETURN "\n"
+ STG_RETURN ":" "\n"
+ "\t" "lda $30,%0($30)" "\n"
+ : : "K" (RESERVED_C_STACK_BYTES));
+
+ ret = real_s5;
+
+ real_s0 = save_s0;
+ real_s1 = save_s1;
+ real_s2 = save_s2;
+ real_s3 = save_s3;
+ real_s4 = save_s4;
+ real_s5 = save_s5;
+#ifdef alpha_EXTRA_CAREFUL
+ real_s6 = save_s6;
+#endif
+
+ real_f2 = save_f2;
+ real_f3 = save_f3;
+ real_f4 = save_f4;
+ real_f5 = save_f5;
+ real_f6 = save_f6;
+ real_f7 = save_f7;
+#ifdef alpha_EXTRA_CAREFUL
+ real_f8 = save_f8;
+ real_f9 = save_f9;
+#endif
+
+ real_ra = save_ra;
+ real_gp = save_gp;
+
+ return ret;
+}
+
+#endif /* alpha_HOST_ARCH */
+
+/* -----------------------------------------------------------------------------
+ HP-PA architecture
+ -------------------------------------------------------------------------- */
+
+#ifdef hppa1_1_HOST_ARCH
+
+StgRegTable *
+StgRun(StgFunPtr f, StgRegTable *basereg)
+{
+ StgChar space[RESERVED_C_STACK_BYTES+16*sizeof(long)+10*sizeof(double)];
+ StgRegTable * ret;
+
+ __asm__ volatile ("ldo %0(%%r30),%%r19\n"
+ "\tstw %%r3, 0(0,%%r19)\n"
+ "\tstw %%r4, 4(0,%%r19)\n"
+ "\tstw %%r5, 8(0,%%r19)\n"
+ "\tstw %%r6,12(0,%%r19)\n"
+ "\tstw %%r7,16(0,%%r19)\n"
+ "\tstw %%r8,20(0,%%r19)\n"
+ "\tstw %%r9,24(0,%%r19)\n"
+ "\tstw %%r10,28(0,%%r19)\n"
+ "\tstw %%r11,32(0,%%r19)\n"
+ "\tstw %%r12,36(0,%%r19)\n"
+ "\tstw %%r13,40(0,%%r19)\n"
+ "\tstw %%r14,44(0,%%r19)\n"
+ "\tstw %%r15,48(0,%%r19)\n"
+ "\tstw %%r16,52(0,%%r19)\n"
+ "\tstw %%r17,56(0,%%r19)\n"
+ "\tstw %%r18,60(0,%%r19)\n"
+ "\tldo 80(%%r19),%%r19\n"
+ "\tfstds %%fr12,-16(0,%%r19)\n"
+ "\tfstds %%fr13, -8(0,%%r19)\n"
+ "\tfstds %%fr14, 0(0,%%r19)\n"
+ "\tfstds %%fr15, 8(0,%%r19)\n"
+ "\tldo 32(%%r19),%%r19\n"
+ "\tfstds %%fr16,-16(0,%%r19)\n"
+ "\tfstds %%fr17, -8(0,%%r19)\n"
+ "\tfstds %%fr18, 0(0,%%r19)\n"
+ "\tfstds %%fr19, 8(0,%%r19)\n"
+ "\tldo 32(%%r19),%%r19\n"
+ "\tfstds %%fr20,-16(0,%%r19)\n"
+ "\tfstds %%fr21, -8(0,%%r19)\n" : :
+ "n" (-(116 * sizeof(long) + 10 * sizeof(double))) : "%r19"
+ );
+
+ f();
+
+ __asm__ volatile (".align 4\n"
+ "\t.EXPORT " STG_RETURN ",CODE\n"
+ "\t.EXPORT " STG_RETURN ",ENTRY,PRIV_LEV=3\n"
+ STG_RETURN "\n"
+ /* "\tldo %0(%%r3),%%r19\n" */
+ "\tldo %1(%%r30),%%r19\n"
+ "\tcopy %%r11, %0\n" /* save R1 */
+ "\tldw 0(0,%%r19),%%r3\n"
+ "\tldw 4(0,%%r19),%%r4\n"
+ "\tldw 8(0,%%r19),%%r5\n"
+ "\tldw 12(0,%%r19),%%r6\n"
+ "\tldw 16(0,%%r19),%%r7\n"
+ "\tldw 20(0,%%r19),%%r8\n"
+ "\tldw 24(0,%%r19),%%r9\n"
+ "\tldw 28(0,%%r19),%%r10\n"
+ "\tldw 32(0,%%r19),%%r11\n"
+ "\tldw 36(0,%%r19),%%r12\n"
+ "\tldw 40(0,%%r19),%%r13\n"
+ "\tldw 44(0,%%r19),%%r14\n"
+ "\tldw 48(0,%%r19),%%r15\n"
+ "\tldw 52(0,%%r19),%%r16\n"
+ "\tldw 56(0,%%r19),%%r17\n"
+ "\tldw 60(0,%%r19),%%r18\n"
+ "\tldo 80(%%r19),%%r19\n"
+ "\tfldds -16(0,%%r19),%%fr12\n"
+ "\tfldds -8(0,%%r19),%%fr13\n"
+ "\tfldds 0(0,%%r19),%%fr14\n"
+ "\tfldds 8(0,%%r19),%%fr15\n"
+ "\tldo 32(%%r19),%%r19\n"
+ "\tfldds -16(0,%%r19),%%fr16\n"
+ "\tfldds -8(0,%%r19),%%fr17\n"
+ "\tfldds 0(0,%%r19),%%fr18\n"
+ "\tfldds 8(0,%%r19),%%fr19\n"
+ "\tldo 32(%%r19),%%r19\n"
+ "\tfldds -16(0,%%r19),%%fr20\n"
+ "\tfldds -8(0,%%r19),%%fr21\n"
+ : "=r" (ret)
+ : "n" (-(116 * sizeof(long) + 10 * sizeof(double)))
+ : "%r19"
+ );
+
+ return ret;
+}
+
+#endif /* hppa1_1_HOST_ARCH */
+
+/* -----------------------------------------------------------------------------
+ PowerPC architecture
+
+ Everything is in assembler, so we don't have to deal with GCC...
+
+ -------------------------------------------------------------------------- */
+
+#ifdef powerpc_HOST_ARCH
+
+extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
+
+#ifdef darwin_HOST_OS
+void StgRunIsImplementedInAssembler(void)
+{
+#if HAVE_SUBSECTIONS_VIA_SYMBOLS
+ // if the toolchain supports deadstripping, we have to
+ // prevent it here (it tends to get confused here).
+ __asm__ volatile (".no_dead_strip _StgRunIsImplementedInAssembler");
+#endif
+ __asm__ volatile (
+ "\n.globl _StgRun\n"
+ "_StgRun:\n"
+ "\tmflr r0\n"
+ "\tbl saveFP # f14\n"
+ "\tstmw r13,-220(r1)\n"
+ "\tstwu r1,-%0(r1)\n"
+ "\tmr r27,r4\n" // BaseReg == r27
+ "\tmtctr r3\n"
+ "\tmr r12,r3\n"
+ "\tbctr\n"
+ ".globl _StgReturn\n"
+ "_StgReturn:\n"
+ "\tmr r3,r14\n"
+ "\tla r1,%0(r1)\n"
+ "\tlmw r13,-220(r1)\n"
+ "\tb restFP # f14\n"
+ : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
+}
+#else
+
+// This version is for PowerPC Linux.
+
+// Differences from the Darwin/Mac OS X version:
+// *) Different Assembler Syntax
+// *) Doesn't use Register Saving Helper Functions (although they exist somewhere)
+// *) We may not access positive stack offsets
+// (no "Red Zone" as in the Darwin ABI)
+// *) The Link Register is saved to a different offset in the caller's stack frame
+// (Linux: 4(r1), Darwin 8(r1))
+
+static void GNUC3_ATTRIBUTE(used)
+StgRunIsImplementedInAssembler(void)
+{
+ __asm__ volatile (
+ "\t.globl StgRun\n"
+ "\t.type StgRun,@function\n"
+ "StgRun:\n"
+ "\tmflr 0\n"
+ "\tstw 0,4(1)\n"
+ "\tmr 5,1\n"
+ "\tstwu 1,-%0(1)\n"
+ "\tstmw 13,-220(5)\n"
+ "\tstfd 14,-144(5)\n"
+ "\tstfd 15,-136(5)\n"
+ "\tstfd 16,-128(5)\n"
+ "\tstfd 17,-120(5)\n"
+ "\tstfd 18,-112(5)\n"
+ "\tstfd 19,-104(5)\n"
+ "\tstfd 20,-96(5)\n"
+ "\tstfd 21,-88(5)\n"
+ "\tstfd 22,-80(5)\n"
+ "\tstfd 23,-72(5)\n"
+ "\tstfd 24,-64(5)\n"
+ "\tstfd 25,-56(5)\n"
+ "\tstfd 26,-48(5)\n"
+ "\tstfd 27,-40(5)\n"
+ "\tstfd 28,-32(5)\n"
+ "\tstfd 29,-24(5)\n"
+ "\tstfd 30,-16(5)\n"
+ "\tstfd 31,-8(5)\n"
+ "\tmr 27,4\n" // BaseReg == r27
+ "\tmtctr 3\n"
+ "\tmr 12,3\n"
+ "\tbctr\n"
+ ".globl StgReturn\n"
+ "\t.type StgReturn,@function\n"
+ "StgReturn:\n"
+ "\tmr 3,14\n"
+ "\tla 5,%0(1)\n"
+ "\tlmw 13,-220(5)\n"
+ "\tlfd 14,-144(5)\n"
+ "\tlfd 15,-136(5)\n"
+ "\tlfd 16,-128(5)\n"
+ "\tlfd 17,-120(5)\n"
+ "\tlfd 18,-112(5)\n"
+ "\tlfd 19,-104(5)\n"
+ "\tlfd 20,-96(5)\n"
+ "\tlfd 21,-88(5)\n"
+ "\tlfd 22,-80(5)\n"
+ "\tlfd 23,-72(5)\n"
+ "\tlfd 24,-64(5)\n"
+ "\tlfd 25,-56(5)\n"
+ "\tlfd 26,-48(5)\n"
+ "\tlfd 27,-40(5)\n"
+ "\tlfd 28,-32(5)\n"
+ "\tlfd 29,-24(5)\n"
+ "\tlfd 30,-16(5)\n"
+ "\tlfd 31,-8(5)\n"
+ "\tmr 1,5\n"
+ "\tlwz 0,4(1)\n"
+ "\tmtlr 0\n"
+ "\tblr\n"
+ : : "i"(RESERVED_C_STACK_BYTES+224 /*stack frame size*/));
+}
+#endif
+
+#endif
+
+/* -----------------------------------------------------------------------------
+ PowerPC 64 architecture
+
+ Everything is in assembler, so we don't have to deal with GCC...
+
+ -------------------------------------------------------------------------- */
+
+#ifdef powerpc64_HOST_ARCH
+
+#ifdef linux_HOST_OS
+extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
+
+static void GNUC3_ATTRIBUTE(used)
+StgRunIsImplementedInAssembler(void)
+{
+ // r0 volatile
+ // r1 stack pointer
+ // r2 toc - needs to be saved
+ // r3-r10 argument passing, volatile
+ // r11, r12 very volatile (not saved across cross-module calls)
+ // r13 thread local state (never modified, don't need to save)
+ // r14-r31 callee-save
+ __asm__ volatile (
+ ".section \".opd\",\"aw\"\n"
+ ".align 3\n"
+ ".globl StgRun\n"
+ "StgRun:\n"
+ "\t.quad\t.StgRun,.TOC.@tocbase,0\n"
+ "\t.size StgRun,24\n"
+ ".globl StgReturn\n"
+ "StgReturn:\n"
+ "\t.quad\t.StgReturn,.TOC.@tocbase,0\n"
+ "\t.size StgReturn,24\n"
+ ".previous\n"
+ ".globl .StgRun\n"
+ ".type .StgRun,@function\n"
+ ".StgRun:\n"
+ "\tmflr 0\n"
+ "\tmr 5, 1\n"
+ "\tstd 0, 16(1)\n"
+ "\tstdu 1, -%0(1)\n"
+ "\tstd 2, -296(5)\n"
+ "\tstd 14, -288(5)\n"
+ "\tstd 15, -280(5)\n"
+ "\tstd 16, -272(5)\n"
+ "\tstd 17, -264(5)\n"
+ "\tstd 18, -256(5)\n"
+ "\tstd 19, -248(5)\n"
+ "\tstd 20, -240(5)\n"
+ "\tstd 21, -232(5)\n"
+ "\tstd 22, -224(5)\n"
+ "\tstd 23, -216(5)\n"
+ "\tstd 24, -208(5)\n"
+ "\tstd 25, -200(5)\n"
+ "\tstd 26, -192(5)\n"
+ "\tstd 27, -184(5)\n"
+ "\tstd 28, -176(5)\n"
+ "\tstd 29, -168(5)\n"
+ "\tstd 30, -160(5)\n"
+ "\tstd 31, -152(5)\n"
+ "\tstfd 14, -144(5)\n"
+ "\tstfd 15, -136(5)\n"
+ "\tstfd 16, -128(5)\n"
+ "\tstfd 17, -120(5)\n"
+ "\tstfd 18, -112(5)\n"
+ "\tstfd 19, -104(5)\n"
+ "\tstfd 20, -96(5)\n"
+ "\tstfd 21, -88(5)\n"
+ "\tstfd 22, -80(5)\n"
+ "\tstfd 23, -72(5)\n"
+ "\tstfd 24, -64(5)\n"
+ "\tstfd 25, -56(5)\n"
+ "\tstfd 26, -48(5)\n"
+ "\tstfd 27, -40(5)\n"
+ "\tstfd 28, -32(5)\n"
+ "\tstfd 29, -24(5)\n"
+ "\tstfd 30, -16(5)\n"
+ "\tstfd 31, -8(5)\n"
+ "\tmr 27, 4\n" // BaseReg == r27
+ "\tld 2, 8(3)\n"
+ "\tld 3, 0(3)\n"
+ "\tmtctr 3\n"
+ "\tbctr\n"
+ ".globl .StgReturn\n"
+ ".type .StgReturn,@function\n"
+ ".StgReturn:\n"
+ "\tmr 3,14\n"
+ "\tla 5, %0(1)\n" // load address == addi r5, r1, %0
+ "\tld 2, -296(5)\n"
+ "\tld 14, -288(5)\n"
+ "\tld 15, -280(5)\n"
+ "\tld 16, -272(5)\n"
+ "\tld 17, -264(5)\n"
+ "\tld 18, -256(5)\n"
+ "\tld 19, -248(5)\n"
+ "\tld 20, -240(5)\n"
+ "\tld 21, -232(5)\n"
+ "\tld 22, -224(5)\n"
+ "\tld 23, -216(5)\n"
+ "\tld 24, -208(5)\n"
+ "\tld 25, -200(5)\n"
+ "\tld 26, -192(5)\n"
+ "\tld 27, -184(5)\n"
+ "\tld 28, -176(5)\n"
+ "\tld 29, -168(5)\n"
+ "\tld 30, -160(5)\n"
+ "\tld 31, -152(5)\n"
+ "\tlfd 14, -144(5)\n"
+ "\tlfd 15, -136(5)\n"
+ "\tlfd 16, -128(5)\n"
+ "\tlfd 17, -120(5)\n"
+ "\tlfd 18, -112(5)\n"
+ "\tlfd 19, -104(5)\n"
+ "\tlfd 20, -96(5)\n"
+ "\tlfd 21, -88(5)\n"
+ "\tlfd 22, -80(5)\n"
+ "\tlfd 23, -72(5)\n"
+ "\tlfd 24, -64(5)\n"
+ "\tlfd 25, -56(5)\n"
+ "\tlfd 26, -48(5)\n"
+ "\tlfd 27, -40(5)\n"
+ "\tlfd 28, -32(5)\n"
+ "\tlfd 29, -24(5)\n"
+ "\tlfd 30, -16(5)\n"
+ "\tlfd 31, -8(5)\n"
+ "\tmr 1, 5\n"
+ "\tld 0, 16(1)\n"
+ "\tmtlr 0\n"
+ "\tblr\n"
+ : : "i"(RESERVED_C_STACK_BYTES+304 /*stack frame size*/));
+}
+#else // linux_HOST_OS
+#error Only linux support for power64 right now.
+#endif
+
+#endif
+
+/* -----------------------------------------------------------------------------
+ IA64 architecture
+
+ Again, in assembler - so we can fiddle with the register stack, and because
+ gcc doesn't handle asm-clobbered callee-saves correctly.
+
+ loc0 - loc15: preserved locals
+ loc16 - loc28: STG registers
+ loc29: saved ar.pfs
+ loc30: saved b0
+ loc31: saved gp (gcc 3.3 uses this slot)
+ -------------------------------------------------------------------------- */
+
+#ifdef ia64_HOST_ARCH
+
+/* the memory stack is rarely used, so 16K is excessive */
+#undef RESERVED_C_STACK_BYTES
+#define RESERVED_C_STACK_BYTES 1024
+
+#if ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)
+/* gcc 3.3+: leave an extra slot for gp saves */
+#define LOCALS 32
+#else
+#define LOCALS 31
+#endif
+
+static void GNUC3_ATTRIBUTE(used)
+StgRunIsImplementedInAssembler(void)
+{
+ __asm__ volatile(
+ ".global StgRun\n"
+ "StgRun:\n"
+ "\talloc loc29 = ar.pfs, 0, %1, 8, 0\n" /* setup register frame */
+ "\tld8 r18 = [r32],8\n" /* get procedure address */
+ "\tadds sp = -%0, sp ;;\n" /* setup stack */
+ "\tld8 gp = [r32]\n" /* get procedure GP */
+ "\tadds r16 = %0-(6*16), sp\n"
+ "\tadds r17 = %0-(5*16), sp ;;\n"
+ "\tstf.spill [r16] = f16,32\n" /* spill callee-saved fp regs */
+ "\tstf.spill [r17] = f17,32\n"
+ "\tmov b6 = r18 ;;\n" /* set target address */
+ "\tstf.spill [r16] = f18,32\n"
+ "\tstf.spill [r17] = f19,32\n"
+ "\tmov loc30 = b0 ;;\n" /* save return address */
+ "\tstf.spill [r16] = f20,32\n"
+ "\tstf.spill [r17] = f21,32\n"
+ "\tbr.few b6 ;;\n" /* branch to function */
+ ".global StgReturn\n"
+ "StgReturn:\n"
+ "\tmov r8 = loc16\n" /* return value in r8 */
+ "\tadds r16 = %0-(6*16), sp\n"
+ "\tadds r17 = %0-(5*16), sp ;;\n"
+ "\tldf.fill f16 = [r16],32\n" /* start restoring fp regs */
+ "\tldf.fill f17 = [r17],32\n"
+ "\tmov ar.pfs = loc29 ;;\n" /* restore register frame */
+ "\tldf.fill f18 = [r16],32\n"
+ "\tldf.fill f19 = [r17],32\n"
+ "\tmov b0 = loc30 ;;\n" /* restore return address */
+ "\tldf.fill f20 = [r16],32\n"
+ "\tldf.fill f21 = [r17],32\n"
+ "\tadds sp = %0, sp\n" /* restore stack */
+ "\tbr.ret.sptk.many b0 ;;\n" /* return */
+ : : "i"(RESERVED_C_STACK_BYTES + 6*16), "i"(LOCALS));
+}
+
+#endif
+
+#endif /* !USE_MINIINTERPRETER */
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
new file mode 100644
index 0000000000..70d08aeb0e
--- /dev/null
+++ b/rts/StgMiscClosures.cmm
@@ -0,0 +1,953 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Entry code for various built-in closure types.
+ *
+ * This file is written in a subset of C--, extended with various
+ * features specific to GHC. It is compiled by GHC directly. For the
+ * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
+ *
+ * --------------------------------------------------------------------------*/
+
+#include "Cmm.h"
+
+/* ----------------------------------------------------------------------------
+ Support for the bytecode interpreter.
+ ------------------------------------------------------------------------- */
+
+/* 9 bits of return code for constructors created by the interpreter. */
+stg_interp_constr_entry
+{
+ /* R1 points at the constructor */
+ jump %ENTRY_CODE(Sp(0));
+}
+
+stg_interp_constr1_entry { jump %RET_VEC(Sp(0),0); }
+stg_interp_constr2_entry { jump %RET_VEC(Sp(0),1); }
+stg_interp_constr3_entry { jump %RET_VEC(Sp(0),2); }
+stg_interp_constr4_entry { jump %RET_VEC(Sp(0),3); }
+stg_interp_constr5_entry { jump %RET_VEC(Sp(0),4); }
+stg_interp_constr6_entry { jump %RET_VEC(Sp(0),5); }
+stg_interp_constr7_entry { jump %RET_VEC(Sp(0),6); }
+stg_interp_constr8_entry { jump %RET_VEC(Sp(0),7); }
+
+/* Some info tables to be used when compiled code returns a value to
+ the interpreter, i.e. the interpreter pushes one of these onto the
+ stack before entering a value. What the code does is to
+ impedance-match the compiled return convention (in R1p/R1n/F1/D1 etc) to
+ the interpreter's convention (returned value is on top of stack),
+ and then cause the scheduler to enter the interpreter.
+
+ On entry, the stack (growing down) looks like this:
+
+ ptr to BCO holding return continuation
+ ptr to one of these info tables.
+
+ The info table code, both direct and vectored, must:
+ * push R1/F1/D1 on the stack, and its tag if necessary
+ * push the BCO (so it's now on the stack twice)
+ * Yield, ie, go to the scheduler.
+
+ Scheduler examines the t.o.s, discovers it is a BCO, and proceeds
+ directly to the bytecode interpreter. That pops the top element
+ (the BCO, containing the return continuation), and interprets it.
+ Net result: return continuation gets interpreted, with the
+ following stack:
+
+ ptr to this BCO
+ ptr to the info table just jumped thru
+ return value
+
+ which is just what we want -- the "standard" return layout for the
+ interpreter. Hurrah!
+
+ Don't ask me how unboxed tuple returns are supposed to work. We
+ haven't got a good story about that yet.
+*/
+
+INFO_TABLE_RET( stg_ctoi_R1p,
+ 0/*size*/, 0/*bitmap*/, /* special layout! */
+ RET_BCO,
+ RET_LBL(stg_ctoi_R1p),
+ RET_LBL(stg_ctoi_R1p),
+ RET_LBL(stg_ctoi_R1p),
+ RET_LBL(stg_ctoi_R1p),
+ RET_LBL(stg_ctoi_R1p),
+ RET_LBL(stg_ctoi_R1p),
+ RET_LBL(stg_ctoi_R1p),
+ RET_LBL(stg_ctoi_R1p))
+{
+ Sp_adj(-2);
+ Sp(1) = R1;
+ Sp(0) = stg_enter_info;
+ jump stg_yield_to_interpreter;
+}
+
+#if MAX_VECTORED_RTN != 8
+#error MAX_VECTORED_RTN has changed: please modify stg_ctoi_R1p too.
+#endif
+
+/*
+ * When the returned value is a pointer, but unlifted, in R1 ...
+ */
+INFO_TABLE_RET( stg_ctoi_R1unpt,
+ 0/*size*/, 0/*bitmap*/, /* special layout! */
+ RET_BCO )
+{
+ Sp_adj(-2);
+ Sp(1) = R1;
+ Sp(0) = stg_gc_unpt_r1_info;
+ jump stg_yield_to_interpreter;
+}
+
+/*
+ * When the returned value is a non-pointer in R1 ...
+ */
+INFO_TABLE_RET( stg_ctoi_R1n,
+ 0/*size*/, 0/*bitmap*/, /* special layout! */
+ RET_BCO )
+{
+ Sp_adj(-2);
+ Sp(1) = R1;
+ Sp(0) = stg_gc_unbx_r1_info;
+ jump stg_yield_to_interpreter;
+}
+
+/*
+ * When the returned value is in F1
+ */
+INFO_TABLE_RET( stg_ctoi_F1,
+ 0/*size*/, 0/*bitmap*/, /* special layout! */
+ RET_BCO )
+{
+ Sp_adj(-2);
+ F_[Sp + WDS(1)] = F1;
+ Sp(0) = stg_gc_f1_info;
+ jump stg_yield_to_interpreter;
+}
+
+/*
+ * When the returned value is in D1
+ */
+INFO_TABLE_RET( stg_ctoi_D1,
+ 0/*size*/, 0/*bitmap*/, /* special layout! */
+ RET_BCO )
+{
+ Sp_adj(-1) - SIZEOF_DOUBLE;
+ D_[Sp + WDS(1)] = D1;
+ Sp(0) = stg_gc_d1_info;
+ jump stg_yield_to_interpreter;
+}
+
+/*
+ * When the returned value is in L1
+ */
+INFO_TABLE_RET( stg_ctoi_L1,
+ 0/*size*/, 0/*bitmap*/, /* special layout! */
+ RET_BCO )
+{
+ Sp_adj(-1) - 8;
+ L_[Sp + WDS(1)] = L1;
+ Sp(0) = stg_gc_l1_info;
+ jump stg_yield_to_interpreter;
+}
+
+/*
+ * When the returned value is a void
+ */
+INFO_TABLE_RET( stg_ctoi_V,
+ 0/*size*/, 0/*bitmap*/, /* special layout! */
+ RET_BCO )
+{
+ Sp_adj(-1);
+ Sp(0) = stg_gc_void_info;
+ jump stg_yield_to_interpreter;
+}
+
+/*
+ * Dummy info table pushed on the top of the stack when the interpreter
+ * should apply the BCO on the stack to its arguments, also on the
+ * stack.
+ */
+INFO_TABLE_RET( stg_apply_interp,
+ 0/*size*/, 0/*bitmap*/, /* special layout! */
+ RET_BCO )
+{
+ /* Just in case we end up in here... (we shouldn't) */
+ jump stg_yield_to_interpreter;
+}
+
+/* ----------------------------------------------------------------------------
+ Entry code for a BCO
+ ------------------------------------------------------------------------- */
+
+INFO_TABLE_FUN( stg_BCO, 4, 0, BCO, "BCO", "BCO", ARG_BCO )
+{
+ /* entering a BCO means "apply it", same as a function */
+ Sp_adj(-2);
+ Sp(1) = R1;
+ Sp(0) = stg_apply_interp_info;
+ jump stg_yield_to_interpreter;
+}
+
+/* ----------------------------------------------------------------------------
+ Info tables for indirections.
+
+ SPECIALISED INDIRECTIONS: we have a specialised indirection for each
+ kind of return (direct, vectored 0-7), so that we can avoid entering
+ the object when we know what kind of return it will do. The update
+ code (Updates.hc) updates objects with the appropriate kind of
+ indirection. We only do this for young-gen indirections.
+ ------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_IND,1,0,IND,"IND","IND")
+{
+ TICK_ENT_DYN_IND(); /* tick */
+ R1 = StgInd_indirectee(R1);
+ TICK_ENT_VIA_NODE();
+ jump %GET_ENTRY(R1);
+}
+
+#define IND_SPEC(label,ret) \
+INFO_TABLE(label,1,0,IND,"IND","IND") \
+{ \
+ TICK_ENT_DYN_IND(); /* tick */ \
+ R1 = StgInd_indirectee(R1); \
+ TICK_ENT_VIA_NODE(); \
+ jump ret; \
+}
+
+IND_SPEC(stg_IND_direct, %ENTRY_CODE(Sp(0)))
+IND_SPEC(stg_IND_0, %RET_VEC(Sp(0),0))
+IND_SPEC(stg_IND_1, %RET_VEC(Sp(0),1))
+IND_SPEC(stg_IND_2, %RET_VEC(Sp(0),2))
+IND_SPEC(stg_IND_3, %RET_VEC(Sp(0),3))
+IND_SPEC(stg_IND_4, %RET_VEC(Sp(0),4))
+IND_SPEC(stg_IND_5, %RET_VEC(Sp(0),5))
+IND_SPEC(stg_IND_6, %RET_VEC(Sp(0),6))
+IND_SPEC(stg_IND_7, %RET_VEC(Sp(0),7))
+
+INFO_TABLE(stg_IND_STATIC,1,0,IND_STATIC,"IND_STATIC","IND_STATIC")
+{
+ TICK_ENT_STATIC_IND(); /* tick */
+ R1 = StgInd_indirectee(R1);
+ TICK_ENT_VIA_NODE();
+ jump %GET_ENTRY(R1);
+}
+
+INFO_TABLE(stg_IND_PERM,1,0,IND_PERM,"IND_PERM","IND_PERM")
+{
+ /* Don't add INDs to granularity cost */
+
+ /* Don't: TICK_ENT_STATIC_IND(Node); for ticky-ticky; this ind is
+ here only to help profiling */
+
+#if defined(TICKY_TICKY) && !defined(PROFILING)
+ /* TICKY_TICKY && !PROFILING means PERM_IND *replaces* an IND, rather than
+ being extra */
+ TICK_ENT_PERM_IND();
+#endif
+
+ LDV_ENTER(R1);
+
+ /* Enter PAP cost centre */
+ ENTER_CCS_PAP_CL(R1);
+
+ /* For ticky-ticky, change the perm_ind to a normal ind on first
+ * entry, so the number of ent_perm_inds is the number of *thunks*
+ * entered again, not the number of subsequent entries.
+ *
+ * Since this screws up cost centres, we die if profiling and
+ * ticky_ticky are on at the same time. KSW 1999-01.
+ */
+#ifdef TICKY_TICKY
+# ifdef PROFILING
+# error Profiling and ticky-ticky do not mix at present!
+# endif /* PROFILING */
+ StgHeader_info(R1) = stg_IND_info;
+#endif /* TICKY_TICKY */
+
+ R1 = StgInd_indirectee(R1);
+
+#if defined(TICKY_TICKY) && !defined(PROFILING)
+ TICK_ENT_VIA_NODE();
+#endif
+
+ jump %GET_ENTRY(R1);
+}
+
+
+INFO_TABLE(stg_IND_OLDGEN,1,0,IND_OLDGEN,"IND_OLDGEN","IND_OLDGEN")
+{
+ TICK_ENT_STATIC_IND(); /* tick */
+ R1 = StgInd_indirectee(R1);
+ TICK_ENT_VIA_NODE();
+ jump %GET_ENTRY(R1);
+}
+
+INFO_TABLE(stg_IND_OLDGEN_PERM,1,0,IND_OLDGEN_PERM,"IND_OLDGEN_PERM","IND_OLDGEN_PERM")
+{
+ /* Don't: TICK_ENT_STATIC_IND(Node); for ticky-ticky;
+ this ind is here only to help profiling */
+
+#if defined(TICKY_TICKY) && !defined(PROFILING)
+ /* TICKY_TICKY && !PROFILING means PERM_IND *replaces* an IND,
+ rather than being extra */
+ TICK_ENT_PERM_IND(R1); /* tick */
+#endif
+
+ LDV_ENTER(R1);
+
+ /* Enter PAP cost centre -- lexical scoping only */
+ ENTER_CCS_PAP_CL(R1);
+
+ /* see comment in IND_PERM */
+#ifdef TICKY_TICKY
+# ifdef PROFILING
+# error Profiling and ticky-ticky do not mix at present!
+# endif /* PROFILING */
+ StgHeader_info(R1) = stg_IND_OLDGEN_info;
+#endif /* TICKY_TICKY */
+
+ R1 = StgInd_indirectee(R1);
+
+ TICK_ENT_VIA_NODE();
+ jump %GET_ENTRY(R1);
+}
+
+/* ----------------------------------------------------------------------------
+ Black holes.
+
+ Entering a black hole normally causes a cyclic data dependency, but
+ in the concurrent world, black holes are synchronization points,
+ and they are turned into blocking queues when there are threads
+ waiting for the evaluation of the closure to finish.
+ ------------------------------------------------------------------------- */
+
+/* Note: a BLACKHOLE must be big enough to be
+ * overwritten with an indirection/evacuee/catch. Thus we claim it
+ * has 1 non-pointer word of payload.
+ */
+INFO_TABLE(stg_BLACKHOLE,0,1,BLACKHOLE,"BLACKHOLE","BLACKHOLE")
+{
+#if defined(GRAN)
+ /* Before overwriting TSO_LINK */
+ STGCALL3(GranSimBlock,CurrentTSO,CurrentProc,(StgClosure *)R1 /*Node*/);
+#endif
+
+ TICK_ENT_BH();
+
+#ifdef THREADED_RTS
+ // foreign "C" debugBelch("BLACKHOLE entry\n");
+#endif
+
+ /* Actually this is not necessary because R1 is about to be destroyed. */
+ LDV_ENTER(R1);
+
+#if defined(THREADED_RTS)
+ foreign "C" ACQUIRE_LOCK(sched_mutex "ptr");
+ // released in stg_block_blackhole_finally
+#endif
+
+ /* Put ourselves on the blackhole queue */
+ StgTSO_link(CurrentTSO) = W_[blackhole_queue];
+ W_[blackhole_queue] = CurrentTSO;
+
+ /* jot down why and on what closure we are blocked */
+ StgTSO_why_blocked(CurrentTSO) = BlockedOnBlackHole::I16;
+ StgTSO_block_info(CurrentTSO) = R1;
+
+ jump stg_block_blackhole;
+}
+
+#if defined(PAR) || defined(GRAN)
+
+INFO_TABLE(stg_RBH,1,1,RBH,"RBH","RBH")
+{
+# if defined(GRAN)
+ /* mainly statistics gathering for GranSim simulation */
+ STGCALL3(GranSimBlock,CurrentTSO,CurrentProc,(StgClosure *)R1 /*Node*/);
+# endif
+
+ /* exactly the same as a BLACKHOLE_BQ_entry -- HWL */
+ /* Put ourselves on the blocking queue for this black hole */
+ TSO_link(CurrentTSO) = StgBlockingQueue_blocking_queue(R1);
+ StgBlockingQueue_blocking_queue(R1) = CurrentTSO;
+ /* jot down why and on what closure we are blocked */
+ TSO_why_blocked(CurrentTSO) = BlockedOnBlackHole::I16;
+ TSO_block_info(CurrentTSO) = R1;
+
+ /* PAR: dumping of event now done in blockThread -- HWL */
+
+ /* stg_gen_block is too heavyweight, use a specialised one */
+ jump stg_block_1;
+}
+
+INFO_TABLE(stg_RBH_Save_0,0,2,CONSTR,"RBH_Save_0","RBH_Save_0")
+{ foreign "C" barf("RBH_Save_0 object entered!"); }
+
+INFO_TABLE(stg_RBH_Save_1,1,1,CONSTR,"RBH_Save_1","RBH_Save_1");
+{ foreign "C" barf("RBH_Save_1 object entered!"); }
+
+INFO_TABLE(stg_RBH_Save_2,2,0,CONSTR,"RBH_Save_2","RBH_Save_2");
+{ foreign "C" barf("RBH_Save_2 object entered!"); }
+
+#endif /* defined(PAR) || defined(GRAN) */
+
+/* identical to BLACKHOLEs except for the infotag */
+INFO_TABLE(stg_CAF_BLACKHOLE,0,1,CAF_BLACKHOLE,"CAF_BLACKHOLE","CAF_BLACKHOLE")
+{
+#if defined(GRAN)
+ /* mainly statistics gathering for GranSim simulation */
+ STGCALL3(GranSimBlock,CurrentTSO,CurrentProc,(StgClosure *)R1 /*Node*/);
+#endif
+
+ TICK_ENT_BH();
+ LDV_ENTER(R1);
+
+#if defined(THREADED_RTS)
+ // foreign "C" debugBelch("BLACKHOLE entry\n");
+#endif
+
+#if defined(THREADED_RTS)
+ foreign "C" ACQUIRE_LOCK(sched_mutex "ptr");
+ // released in stg_block_blackhole_finally
+#endif
+
+ /* Put ourselves on the blackhole queue */
+ StgTSO_link(CurrentTSO) = W_[blackhole_queue];
+ W_[blackhole_queue] = CurrentTSO;
+
+ /* jot down why and on what closure we are blocked */
+ StgTSO_why_blocked(CurrentTSO) = BlockedOnBlackHole::I16;
+ StgTSO_block_info(CurrentTSO) = R1;
+
+ jump stg_block_blackhole;
+}
+
+#ifdef EAGER_BLACKHOLING
+INFO_TABLE(stg_SE_BLACKHOLE,0,1,SE_BLACKHOLE,"SE_BLACKHOLE","SE_BLACKHOLE")
+{ foreign "C" barf("SE_BLACKHOLE object entered!"); }
+
+INFO_TABLE(stg_SE_CAF_BLACKHOLE,0,1,SE_CAF_BLACKHOLE,"SE_CAF_BLACKHOLE","SE_CAF_BLACKHOLE")
+{ foreign "C" barf("SE_CAF_BLACKHOLE object entered!"); }
+#endif
+
+/* ----------------------------------------------------------------------------
+ Whiteholes are used for the "locked" state of a closure (see lockClosure())
+
+ The closure type is BLAKCHOLE, just because we need a valid closure type
+ for sanity checking.
+ ------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_WHITEHOLE, 0,0, BLACKHOLE, "WHITEHOLE", "WHITEHOLE")
+{ foreign "C" barf("WHITEHOLE object entered!"); }
+
+/* ----------------------------------------------------------------------------
+ Some static info tables for things that don't get entered, and
+ therefore don't need entry code (i.e. boxed but unpointed objects)
+ NON_ENTERABLE_ENTRY_CODE now defined at the beginning of the file
+ ------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_TSO, 0,0,TSO, "TSO", "TSO")
+{ foreign "C" barf("TSO object entered!"); }
+
+/* ----------------------------------------------------------------------------
+ Evacuees are left behind by the garbage collector. Any attempt to enter
+ one is a real bug.
+ ------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_EVACUATED,1,0,EVACUATED,"EVACUATED","EVACUATED")
+{ foreign "C" barf("EVACUATED object entered!"); }
+
+/* ----------------------------------------------------------------------------
+ Weak pointers
+
+ Live weak pointers have a special closure type. Dead ones are just
+ nullary constructors (although they live on the heap - we overwrite
+ live weak pointers with dead ones).
+ ------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_WEAK,0,4,WEAK,"WEAK","WEAK")
+{ foreign "C" barf("WEAK object entered!"); }
+
+/*
+ * It's important when turning an existing WEAK into a DEAD_WEAK
+ * (which is what finalizeWeak# does) that we don't lose the link
+ * field and break the linked list of weak pointers. Hence, we give
+ * DEAD_WEAK 4 non-pointer fields, the same as WEAK.
+ */
+INFO_TABLE_CONSTR(stg_DEAD_WEAK,0,4,0,CONSTR,"DEAD_WEAK","DEAD_WEAK")
+{ foreign "C" barf("DEAD_WEAK object entered!"); }
+
+/* ----------------------------------------------------------------------------
+ NO_FINALIZER
+
+ This is a static nullary constructor (like []) that we use to mark an empty
+ finalizer in a weak pointer object.
+ ------------------------------------------------------------------------- */
+
+INFO_TABLE_CONSTR(stg_NO_FINALIZER,0,0,0,CONSTR_NOCAF_STATIC,"NO_FINALIZER","NO_FINALIZER")
+{ foreign "C" barf("NO_FINALIZER object entered!"); }
+
+CLOSURE(stg_NO_FINALIZER_closure,stg_NO_FINALIZER);
+
+/* ----------------------------------------------------------------------------
+ Stable Names are unlifted too.
+ ------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_STABLE_NAME,0,1,STABLE_NAME,"STABLE_NAME","STABLE_NAME")
+{ foreign "C" barf("STABLE_NAME object entered!"); }
+
+/* ----------------------------------------------------------------------------
+ MVars
+
+ There are two kinds of these: full and empty. We need an info table
+ and entry code for each type.
+ ------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_FULL_MVAR,3,0,MVAR,"MVAR","MVAR")
+{ foreign "C" barf("FULL_MVAR object entered!"); }
+
+INFO_TABLE(stg_EMPTY_MVAR,3,0,MVAR,"MVAR","MVAR")
+{ foreign "C" barf("EMPTY_MVAR object entered!"); }
+
+/* -----------------------------------------------------------------------------
+ STM
+ -------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_TVAR, 0, 0, TVAR, "TVAR", "TVAR")
+{ foreign "C" barf("TVAR object entered!"); }
+
+INFO_TABLE(stg_TVAR_WAIT_QUEUE, 0, 0, TVAR_WAIT_QUEUE, "TVAR_WAIT_QUEUE", "TVAR_WAIT_QUEUE")
+{ foreign "C" barf("TVAR_WAIT_QUEUE object entered!"); }
+
+INFO_TABLE(stg_TREC_CHUNK, 0, 0, TREC_CHUNK, "TREC_CHUNK", "TREC_CHUNK")
+{ foreign "C" barf("TREC_CHUNK object entered!"); }
+
+INFO_TABLE(stg_TREC_HEADER, 0, 0, TREC_HEADER, "TREC_HEADER", "TREC_HEADER")
+{ foreign "C" barf("TREC_HEADER object entered!"); }
+
+INFO_TABLE_CONSTR(stg_END_STM_WAIT_QUEUE,0,0,0,CONSTR_NOCAF_STATIC,"END_STM_WAIT_QUEUE","END_STM_WAIT_QUEUE")
+{ foreign "C" barf("END_STM_WAIT_QUEUE object entered!"); }
+
+INFO_TABLE_CONSTR(stg_END_STM_CHUNK_LIST,0,0,0,CONSTR_NOCAF_STATIC,"END_STM_CHUNK_LIST","END_STM_CHUNK_LIST")
+{ foreign "C" barf("END_STM_CHUNK_LIST object entered!"); }
+
+INFO_TABLE_CONSTR(stg_NO_TREC,0,0,0,CONSTR_NOCAF_STATIC,"NO_TREC","NO_TREC")
+{ foreign "C" barf("NO_TREC object entered!"); }
+
+CLOSURE(stg_END_STM_WAIT_QUEUE_closure,stg_END_STM_WAIT_QUEUE);
+
+CLOSURE(stg_END_STM_CHUNK_LIST_closure,stg_END_STM_CHUNK_LIST);
+
+CLOSURE(stg_NO_TREC_closure,stg_NO_TREC);
+
+/* ----------------------------------------------------------------------------
+ END_TSO_QUEUE
+
+ This is a static nullary constructor (like []) that we use to mark the
+ end of a linked TSO queue.
+ ------------------------------------------------------------------------- */
+
+INFO_TABLE_CONSTR(stg_END_TSO_QUEUE,0,0,0,CONSTR_NOCAF_STATIC,"END_TSO_QUEUE","END_TSO_QUEUE")
+{ foreign "C" barf("END_TSO_QUEUE object entered!"); }
+
+CLOSURE(stg_END_TSO_QUEUE_closure,stg_END_TSO_QUEUE);
+
+/* ----------------------------------------------------------------------------
+ Exception lists
+ ------------------------------------------------------------------------- */
+
+INFO_TABLE_CONSTR(stg_END_EXCEPTION_LIST,0,0,0,CONSTR_NOCAF_STATIC,"END_EXCEPTION_LIST","END_EXCEPTION_LIST")
+{ foreign "C" barf("END_EXCEPTION_LIST object entered!"); }
+
+CLOSURE(stg_END_EXCEPTION_LIST_closure,stg_END_EXCEPTION_LIST);
+
+INFO_TABLE(stg_EXCEPTION_CONS,1,1,CONSTR,"EXCEPTION_CONS","EXCEPTION_CONS")
+{ foreign "C" barf("EXCEPTION_CONS object entered!"); }
+
+/* ----------------------------------------------------------------------------
+ Arrays
+
+ These come in two basic flavours: arrays of data (StgArrWords) and arrays of
+ pointers (StgArrPtrs). They all have a similar layout:
+
+ ___________________________
+ | Info | No. of | data....
+ | Ptr | Words |
+ ---------------------------
+
+ These are *unpointed* objects: i.e. they cannot be entered.
+
+ ------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_ARR_WORDS, 0, 0, ARR_WORDS, "ARR_WORDS", "ARR_WORDS")
+{ foreign "C" barf("ARR_WORDS object entered!"); }
+
+INFO_TABLE(stg_MUT_ARR_PTRS_CLEAN, 0, 0, MUT_ARR_PTRS_CLEAN, "MUT_ARR_PTRS_CLEAN", "MUT_ARR_PTRS_CLEAN")
+{ foreign "C" barf("MUT_ARR_PTRS_CLEAN object entered!"); }
+
+INFO_TABLE(stg_MUT_ARR_PTRS_DIRTY, 0, 0, MUT_ARR_PTRS_DIRTY, "MUT_ARR_PTRS_DIRTY", "MUT_ARR_PTRS_DIRTY")
+{ foreign "C" barf("MUT_ARR_PTRS_DIRTY object entered!"); }
+
+INFO_TABLE(stg_MUT_ARR_PTRS_FROZEN, 0, 0, MUT_ARR_PTRS_FROZEN, "MUT_ARR_PTRS_FROZEN", "MUT_ARR_PTRS_FROZEN")
+{ foreign "C" barf("MUT_ARR_PTRS_FROZEN object entered!"); }
+
+INFO_TABLE(stg_MUT_ARR_PTRS_FROZEN0, 0, 0, MUT_ARR_PTRS_FROZEN0, "MUT_ARR_PTRS_FROZEN0", "MUT_ARR_PTRS_FROZEN0")
+{ foreign "C" barf("MUT_ARR_PTRS_FROZEN0 object entered!"); }
+
+/* ----------------------------------------------------------------------------
+ Mutable Variables
+ ------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_MUT_VAR_CLEAN, 1, 0, MUT_VAR_CLEAN, "MUT_VAR_CLEAN", "MUT_VAR_CLEAN")
+{ foreign "C" barf("MUT_VAR_CLEAN object entered!"); }
+INFO_TABLE(stg_MUT_VAR_DIRTY, 1, 0, MUT_VAR_DIRTY, "MUT_VAR_DIRTY", "MUT_VAR_DIRTY")
+{ foreign "C" barf("MUT_VAR_DIRTY object entered!"); }
+
+/* ----------------------------------------------------------------------------
+ Dummy return closure
+
+ Entering this closure will just return to the address on the top of the
+ stack. Useful for getting a thread in a canonical form where we can
+ just enter the top stack word to start the thread. (see deleteThread)
+ * ------------------------------------------------------------------------- */
+
+INFO_TABLE( stg_dummy_ret, 0, 0, CONSTR_NOCAF_STATIC, "DUMMY_RET", "DUMMY_RET")
+{
+ jump %ENTRY_CODE(Sp(0));
+}
+CLOSURE(stg_dummy_ret_closure,stg_dummy_ret);
+
+/* ----------------------------------------------------------------------------
+ CHARLIKE and INTLIKE closures.
+
+ These are static representations of Chars and small Ints, so that
+ we can remove dynamic Chars and Ints during garbage collection and
+ replace them with references to the static objects.
+ ------------------------------------------------------------------------- */
+
+#if defined(ENABLE_WIN32_DLL_SUPPORT)
+/*
+ * When sticking the RTS in a DLL, we delay populating the
+ * Charlike and Intlike tables until load-time, which is only
+ * when we've got the real addresses to the C# and I# closures.
+ *
+ */
+static INFO_TBL_CONST StgInfoTable czh_static_info;
+static INFO_TBL_CONST StgInfoTable izh_static_info;
+#define Char_hash_static_info czh_static_info
+#define Int_hash_static_info izh_static_info
+#else
+#define Char_hash_static_info GHCziBase_Czh_static
+#define Int_hash_static_info GHCziBase_Izh_static
+#endif
+
+
+#define CHARLIKE_HDR(n) CLOSURE(Char_hash_static_info, n)
+#define INTLIKE_HDR(n) CLOSURE(Int_hash_static_info, n)
+
+/* put these in the *data* section, since the garbage collector relies
+ * on the fact that static closures live in the data section.
+ */
+
+/* end the name with _closure, to convince the mangler this is a closure */
+
+section "data" {
+ stg_CHARLIKE_closure:
+ CHARLIKE_HDR(0)
+ CHARLIKE_HDR(1)
+ CHARLIKE_HDR(2)
+ CHARLIKE_HDR(3)
+ CHARLIKE_HDR(4)
+ CHARLIKE_HDR(5)
+ CHARLIKE_HDR(6)
+ CHARLIKE_HDR(7)
+ CHARLIKE_HDR(8)
+ CHARLIKE_HDR(9)
+ CHARLIKE_HDR(10)
+ CHARLIKE_HDR(11)
+ CHARLIKE_HDR(12)
+ CHARLIKE_HDR(13)
+ CHARLIKE_HDR(14)
+ CHARLIKE_HDR(15)
+ CHARLIKE_HDR(16)
+ CHARLIKE_HDR(17)
+ CHARLIKE_HDR(18)
+ CHARLIKE_HDR(19)
+ CHARLIKE_HDR(20)
+ CHARLIKE_HDR(21)
+ CHARLIKE_HDR(22)
+ CHARLIKE_HDR(23)
+ CHARLIKE_HDR(24)
+ CHARLIKE_HDR(25)
+ CHARLIKE_HDR(26)
+ CHARLIKE_HDR(27)
+ CHARLIKE_HDR(28)
+ CHARLIKE_HDR(29)
+ CHARLIKE_HDR(30)
+ CHARLIKE_HDR(31)
+ CHARLIKE_HDR(32)
+ CHARLIKE_HDR(33)
+ CHARLIKE_HDR(34)
+ CHARLIKE_HDR(35)
+ CHARLIKE_HDR(36)
+ CHARLIKE_HDR(37)
+ CHARLIKE_HDR(38)
+ CHARLIKE_HDR(39)
+ CHARLIKE_HDR(40)
+ CHARLIKE_HDR(41)
+ CHARLIKE_HDR(42)
+ CHARLIKE_HDR(43)
+ CHARLIKE_HDR(44)
+ CHARLIKE_HDR(45)
+ CHARLIKE_HDR(46)
+ CHARLIKE_HDR(47)
+ CHARLIKE_HDR(48)
+ CHARLIKE_HDR(49)
+ CHARLIKE_HDR(50)
+ CHARLIKE_HDR(51)
+ CHARLIKE_HDR(52)
+ CHARLIKE_HDR(53)
+ CHARLIKE_HDR(54)
+ CHARLIKE_HDR(55)
+ CHARLIKE_HDR(56)
+ CHARLIKE_HDR(57)
+ CHARLIKE_HDR(58)
+ CHARLIKE_HDR(59)
+ CHARLIKE_HDR(60)
+ CHARLIKE_HDR(61)
+ CHARLIKE_HDR(62)
+ CHARLIKE_HDR(63)
+ CHARLIKE_HDR(64)
+ CHARLIKE_HDR(65)
+ CHARLIKE_HDR(66)
+ CHARLIKE_HDR(67)
+ CHARLIKE_HDR(68)
+ CHARLIKE_HDR(69)
+ CHARLIKE_HDR(70)
+ CHARLIKE_HDR(71)
+ CHARLIKE_HDR(72)
+ CHARLIKE_HDR(73)
+ CHARLIKE_HDR(74)
+ CHARLIKE_HDR(75)
+ CHARLIKE_HDR(76)
+ CHARLIKE_HDR(77)
+ CHARLIKE_HDR(78)
+ CHARLIKE_HDR(79)
+ CHARLIKE_HDR(80)
+ CHARLIKE_HDR(81)
+ CHARLIKE_HDR(82)
+ CHARLIKE_HDR(83)
+ CHARLIKE_HDR(84)
+ CHARLIKE_HDR(85)
+ CHARLIKE_HDR(86)
+ CHARLIKE_HDR(87)
+ CHARLIKE_HDR(88)
+ CHARLIKE_HDR(89)
+ CHARLIKE_HDR(90)
+ CHARLIKE_HDR(91)
+ CHARLIKE_HDR(92)
+ CHARLIKE_HDR(93)
+ CHARLIKE_HDR(94)
+ CHARLIKE_HDR(95)
+ CHARLIKE_HDR(96)
+ CHARLIKE_HDR(97)
+ CHARLIKE_HDR(98)
+ CHARLIKE_HDR(99)
+ CHARLIKE_HDR(100)
+ CHARLIKE_HDR(101)
+ CHARLIKE_HDR(102)
+ CHARLIKE_HDR(103)
+ CHARLIKE_HDR(104)
+ CHARLIKE_HDR(105)
+ CHARLIKE_HDR(106)
+ CHARLIKE_HDR(107)
+ CHARLIKE_HDR(108)
+ CHARLIKE_HDR(109)
+ CHARLIKE_HDR(110)
+ CHARLIKE_HDR(111)
+ CHARLIKE_HDR(112)
+ CHARLIKE_HDR(113)
+ CHARLIKE_HDR(114)
+ CHARLIKE_HDR(115)
+ CHARLIKE_HDR(116)
+ CHARLIKE_HDR(117)
+ CHARLIKE_HDR(118)
+ CHARLIKE_HDR(119)
+ CHARLIKE_HDR(120)
+ CHARLIKE_HDR(121)
+ CHARLIKE_HDR(122)
+ CHARLIKE_HDR(123)
+ CHARLIKE_HDR(124)
+ CHARLIKE_HDR(125)
+ CHARLIKE_HDR(126)
+ CHARLIKE_HDR(127)
+ CHARLIKE_HDR(128)
+ CHARLIKE_HDR(129)
+ CHARLIKE_HDR(130)
+ CHARLIKE_HDR(131)
+ CHARLIKE_HDR(132)
+ CHARLIKE_HDR(133)
+ CHARLIKE_HDR(134)
+ CHARLIKE_HDR(135)
+ CHARLIKE_HDR(136)
+ CHARLIKE_HDR(137)
+ CHARLIKE_HDR(138)
+ CHARLIKE_HDR(139)
+ CHARLIKE_HDR(140)
+ CHARLIKE_HDR(141)
+ CHARLIKE_HDR(142)
+ CHARLIKE_HDR(143)
+ CHARLIKE_HDR(144)
+ CHARLIKE_HDR(145)
+ CHARLIKE_HDR(146)
+ CHARLIKE_HDR(147)
+ CHARLIKE_HDR(148)
+ CHARLIKE_HDR(149)
+ CHARLIKE_HDR(150)
+ CHARLIKE_HDR(151)
+ CHARLIKE_HDR(152)
+ CHARLIKE_HDR(153)
+ CHARLIKE_HDR(154)
+ CHARLIKE_HDR(155)
+ CHARLIKE_HDR(156)
+ CHARLIKE_HDR(157)
+ CHARLIKE_HDR(158)
+ CHARLIKE_HDR(159)
+ CHARLIKE_HDR(160)
+ CHARLIKE_HDR(161)
+ CHARLIKE_HDR(162)
+ CHARLIKE_HDR(163)
+ CHARLIKE_HDR(164)
+ CHARLIKE_HDR(165)
+ CHARLIKE_HDR(166)
+ CHARLIKE_HDR(167)
+ CHARLIKE_HDR(168)
+ CHARLIKE_HDR(169)
+ CHARLIKE_HDR(170)
+ CHARLIKE_HDR(171)
+ CHARLIKE_HDR(172)
+ CHARLIKE_HDR(173)
+ CHARLIKE_HDR(174)
+ CHARLIKE_HDR(175)
+ CHARLIKE_HDR(176)
+ CHARLIKE_HDR(177)
+ CHARLIKE_HDR(178)
+ CHARLIKE_HDR(179)
+ CHARLIKE_HDR(180)
+ CHARLIKE_HDR(181)
+ CHARLIKE_HDR(182)
+ CHARLIKE_HDR(183)
+ CHARLIKE_HDR(184)
+ CHARLIKE_HDR(185)
+ CHARLIKE_HDR(186)
+ CHARLIKE_HDR(187)
+ CHARLIKE_HDR(188)
+ CHARLIKE_HDR(189)
+ CHARLIKE_HDR(190)
+ CHARLIKE_HDR(191)
+ CHARLIKE_HDR(192)
+ CHARLIKE_HDR(193)
+ CHARLIKE_HDR(194)
+ CHARLIKE_HDR(195)
+ CHARLIKE_HDR(196)
+ CHARLIKE_HDR(197)
+ CHARLIKE_HDR(198)
+ CHARLIKE_HDR(199)
+ CHARLIKE_HDR(200)
+ CHARLIKE_HDR(201)
+ CHARLIKE_HDR(202)
+ CHARLIKE_HDR(203)
+ CHARLIKE_HDR(204)
+ CHARLIKE_HDR(205)
+ CHARLIKE_HDR(206)
+ CHARLIKE_HDR(207)
+ CHARLIKE_HDR(208)
+ CHARLIKE_HDR(209)
+ CHARLIKE_HDR(210)
+ CHARLIKE_HDR(211)
+ CHARLIKE_HDR(212)
+ CHARLIKE_HDR(213)
+ CHARLIKE_HDR(214)
+ CHARLIKE_HDR(215)
+ CHARLIKE_HDR(216)
+ CHARLIKE_HDR(217)
+ CHARLIKE_HDR(218)
+ CHARLIKE_HDR(219)
+ CHARLIKE_HDR(220)
+ CHARLIKE_HDR(221)
+ CHARLIKE_HDR(222)
+ CHARLIKE_HDR(223)
+ CHARLIKE_HDR(224)
+ CHARLIKE_HDR(225)
+ CHARLIKE_HDR(226)
+ CHARLIKE_HDR(227)
+ CHARLIKE_HDR(228)
+ CHARLIKE_HDR(229)
+ CHARLIKE_HDR(230)
+ CHARLIKE_HDR(231)
+ CHARLIKE_HDR(232)
+ CHARLIKE_HDR(233)
+ CHARLIKE_HDR(234)
+ CHARLIKE_HDR(235)
+ CHARLIKE_HDR(236)
+ CHARLIKE_HDR(237)
+ CHARLIKE_HDR(238)
+ CHARLIKE_HDR(239)
+ CHARLIKE_HDR(240)
+ CHARLIKE_HDR(241)
+ CHARLIKE_HDR(242)
+ CHARLIKE_HDR(243)
+ CHARLIKE_HDR(244)
+ CHARLIKE_HDR(245)
+ CHARLIKE_HDR(246)
+ CHARLIKE_HDR(247)
+ CHARLIKE_HDR(248)
+ CHARLIKE_HDR(249)
+ CHARLIKE_HDR(250)
+ CHARLIKE_HDR(251)
+ CHARLIKE_HDR(252)
+ CHARLIKE_HDR(253)
+ CHARLIKE_HDR(254)
+ CHARLIKE_HDR(255)
+}
+
+section "data" {
+ stg_INTLIKE_closure:
+ INTLIKE_HDR(-16) /* MIN_INTLIKE == -16 */
+ INTLIKE_HDR(-15)
+ INTLIKE_HDR(-14)
+ INTLIKE_HDR(-13)
+ INTLIKE_HDR(-12)
+ INTLIKE_HDR(-11)
+ INTLIKE_HDR(-10)
+ INTLIKE_HDR(-9)
+ INTLIKE_HDR(-8)
+ INTLIKE_HDR(-7)
+ INTLIKE_HDR(-6)
+ INTLIKE_HDR(-5)
+ INTLIKE_HDR(-4)
+ INTLIKE_HDR(-3)
+ INTLIKE_HDR(-2)
+ INTLIKE_HDR(-1)
+ INTLIKE_HDR(0)
+ INTLIKE_HDR(1)
+ INTLIKE_HDR(2)
+ INTLIKE_HDR(3)
+ INTLIKE_HDR(4)
+ INTLIKE_HDR(5)
+ INTLIKE_HDR(6)
+ INTLIKE_HDR(7)
+ INTLIKE_HDR(8)
+ INTLIKE_HDR(9)
+ INTLIKE_HDR(10)
+ INTLIKE_HDR(11)
+ INTLIKE_HDR(12)
+ INTLIKE_HDR(13)
+ INTLIKE_HDR(14)
+ INTLIKE_HDR(15)
+ INTLIKE_HDR(16) /* MAX_INTLIKE == 16 */
+}
diff --git a/rts/StgPrimFloat.c b/rts/StgPrimFloat.c
new file mode 100644
index 0000000000..5bd6aebb1c
--- /dev/null
+++ b/rts/StgPrimFloat.c
@@ -0,0 +1,491 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2000
+ *
+ * Miscellaneous support for floating-point primitives
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+
+#include <math.h>
+
+/*
+ * Encoding and decoding Doubles. Code based on the HBC code
+ * (lib/fltcode.c).
+ */
+
+#ifdef _SHORT_LIMB
+#define SIZEOF_LIMB_T SIZEOF_UNSIGNED_INT
+#else
+#ifdef _LONG_LONG_LIMB
+#define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG_LONG
+#else
+#define SIZEOF_LIMB_T SIZEOF_UNSIGNED_LONG
+#endif
+#endif
+
+#if SIZEOF_LIMB_T == 4
+#define GMP_BASE 4294967296.0
+#elif SIZEOF_LIMB_T == 8
+#define GMP_BASE 18446744073709551616.0
+#else
+#error Cannot cope with SIZEOF_LIMB_T -- please add definition of GMP_BASE
+#endif
+
+#define DNBIGIT ((SIZEOF_DOUBLE+SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
+#define FNBIGIT ((SIZEOF_FLOAT +SIZEOF_LIMB_T-1)/SIZEOF_LIMB_T)
+
+#if IEEE_FLOATING_POINT
+#define MY_DMINEXP ((DBL_MIN_EXP) - (DBL_MANT_DIG) - 1)
+/* DMINEXP is defined in values.h on Linux (for example) */
+#define DHIGHBIT 0x00100000
+#define DMSBIT 0x80000000
+
+#define MY_FMINEXP ((FLT_MIN_EXP) - (FLT_MANT_DIG) - 1)
+#define FHIGHBIT 0x00800000
+#define FMSBIT 0x80000000
+#endif
+
+#ifdef WORDS_BIGENDIAN
+#define L 1
+#define H 0
+#else
+#define L 0
+#define H 1
+#endif
+
+#define __abs(a) (( (a) >= 0 ) ? (a) : (-(a)))
+
+StgDouble
+__encodeDouble (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
+{
+ StgDouble r;
+ const mp_limb_t *const arr = (const mp_limb_t *)ba;
+ I_ i;
+
+ /* Convert MP_INT to a double; knows a lot about internal rep! */
+ for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
+ r = (r * GMP_BASE) + arr[i];
+
+ /* Now raise to the exponent */
+ if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
+ r = ldexp(r, e);
+
+ /* sign is encoded in the size */
+ if (size < 0)
+ r = -r;
+
+ return r;
+}
+
+/* Special version for small Integers */
+StgDouble
+__int_encodeDouble (I_ j, I_ e)
+{
+ StgDouble r;
+
+ r = (StgDouble)__abs(j);
+
+ /* Now raise to the exponent */
+ if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
+ r = ldexp(r, e);
+
+ /* sign is encoded in the size */
+ if (j < 0)
+ r = -r;
+
+ return r;
+}
+
+StgFloat
+__encodeFloat (I_ size, StgByteArray ba, I_ e) /* result = s * 2^e */
+{
+ StgFloat r;
+ const mp_limb_t *arr = (const mp_limb_t *)ba;
+ I_ i;
+
+ /* Convert MP_INT to a float; knows a lot about internal rep! */
+ for(r = 0.0, i = __abs(size)-1; i >= 0; i--)
+ r = (r * GMP_BASE) + arr[i];
+
+ /* Now raise to the exponent */
+ if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
+ r = ldexp(r, e);
+
+ /* sign is encoded in the size */
+ if (size < 0)
+ r = -r;
+
+ return r;
+}
+
+/* Special version for small Integers */
+StgFloat
+__int_encodeFloat (I_ j, I_ e)
+{
+ StgFloat r;
+
+ r = (StgFloat)__abs(j);
+
+ /* Now raise to the exponent */
+ if ( r != 0.0 ) /* Lennart suggests this avoids a bug in MIPS's ldexp */
+ r = ldexp(r, e);
+
+ /* sign is encoded in the size */
+ if (j < 0)
+ r = -r;
+
+ return r;
+}
+
+/* This only supports IEEE floating point */
+
+void
+__decodeDouble (MP_INT *man, I_ *exp, StgDouble dbl)
+{
+ /* Do some bit fiddling on IEEE */
+ unsigned int low, high; /* assuming 32 bit ints */
+ int sign, iexp;
+ union { double d; unsigned int i[2]; } u; /* assuming 32 bit ints, 64 bit double */
+
+ ASSERT(sizeof(unsigned int ) == 4 );
+ ASSERT(sizeof(dbl ) == SIZEOF_DOUBLE);
+ ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
+ ASSERT(DNBIGIT*SIZEOF_LIMB_T >= SIZEOF_DOUBLE);
+
+ u.d = dbl; /* grab chunks of the double */
+ low = u.i[L];
+ high = u.i[H];
+
+ /* we know the MP_INT* passed in has size zero, so we realloc
+ no matter what.
+ */
+ man->_mp_alloc = DNBIGIT;
+
+ if (low == 0 && (high & ~DMSBIT) == 0) {
+ man->_mp_size = 0;
+ *exp = 0L;
+ } else {
+ man->_mp_size = DNBIGIT;
+ iexp = ((high >> 20) & 0x7ff) + MY_DMINEXP;
+ sign = high;
+
+ high &= DHIGHBIT-1;
+ if (iexp != MY_DMINEXP) /* don't add hidden bit to denorms */
+ high |= DHIGHBIT;
+ else {
+ iexp++;
+ /* A denorm, normalize the mantissa */
+ while (! (high & DHIGHBIT)) {
+ high <<= 1;
+ if (low & DMSBIT)
+ high++;
+ low <<= 1;
+ iexp--;
+ }
+ }
+ *exp = (I_) iexp;
+#if DNBIGIT == 2
+ man->_mp_d[0] = (mp_limb_t)low;
+ man->_mp_d[1] = (mp_limb_t)high;
+#else
+#if DNBIGIT == 1
+ man->_mp_d[0] = ((mp_limb_t)high) << 32 | (mp_limb_t)low;
+#else
+#error Cannot cope with DNBIGIT
+#endif
+#endif
+ if (sign < 0)
+ man->_mp_size = -man->_mp_size;
+ }
+}
+
+void
+__decodeFloat (MP_INT *man, I_ *exp, StgFloat flt)
+{
+ /* Do some bit fiddling on IEEE */
+ int high, sign; /* assuming 32 bit ints */
+ union { float f; int i; } u; /* assuming 32 bit float and int */
+
+ ASSERT(sizeof(int ) == 4 );
+ ASSERT(sizeof(flt ) == SIZEOF_FLOAT );
+ ASSERT(sizeof(man->_mp_d[0]) == SIZEOF_LIMB_T);
+ ASSERT(FNBIGIT*SIZEOF_LIMB_T >= SIZEOF_FLOAT );
+
+ u.f = flt; /* grab the float */
+ high = u.i;
+
+ /* we know the MP_INT* passed in has size zero, so we realloc
+ no matter what.
+ */
+ man->_mp_alloc = FNBIGIT;
+
+ if ((high & ~FMSBIT) == 0) {
+ man->_mp_size = 0;
+ *exp = 0;
+ } else {
+ man->_mp_size = FNBIGIT;
+ *exp = ((high >> 23) & 0xff) + MY_FMINEXP;
+ sign = high;
+
+ high &= FHIGHBIT-1;
+ if (*exp != MY_FMINEXP) /* don't add hidden bit to denorms */
+ high |= FHIGHBIT;
+ else {
+ (*exp)++;
+ /* A denorm, normalize the mantissa */
+ while (! (high & FHIGHBIT)) {
+ high <<= 1;
+ (*exp)--;
+ }
+ }
+#if FNBIGIT == 1
+ man->_mp_d[0] = (mp_limb_t)high;
+#else
+#error Cannot cope with FNBIGIT
+#endif
+ if (sign < 0)
+ man->_mp_size = -man->_mp_size;
+ }
+}
+
+/* Convenient union types for checking the layout of IEEE 754 types -
+ based on defs in GNU libc <ieee754.h>
+*/
+
+union stg_ieee754_flt
+{
+ float f;
+ struct {
+
+#if WORDS_BIGENDIAN
+ unsigned int negative:1;
+ unsigned int exponent:8;
+ unsigned int mantissa:23;
+#else
+ unsigned int mantissa:23;
+ unsigned int exponent:8;
+ unsigned int negative:1;
+#endif
+ } ieee;
+ struct {
+
+#if WORDS_BIGENDIAN
+ unsigned int negative:1;
+ unsigned int exponent:8;
+ unsigned int quiet_nan:1;
+ unsigned int mantissa:22;
+#else
+ unsigned int mantissa:22;
+ unsigned int quiet_nan:1;
+ unsigned int exponent:8;
+ unsigned int negative:1;
+#endif
+ } ieee_nan;
+};
+
+/*
+
+ To recap, here's the representation of a double precision
+ IEEE floating point number:
+
+ sign 63 sign bit (0==positive, 1==negative)
+ exponent 62-52 exponent (biased by 1023)
+ fraction 51-0 fraction (bits to right of binary point)
+*/
+
+union stg_ieee754_dbl
+{
+ double d;
+ struct {
+
+#if WORDS_BIGENDIAN
+ unsigned int negative:1;
+ unsigned int exponent:11;
+ unsigned int mantissa0:20;
+ unsigned int mantissa1:32;
+#else
+ unsigned int mantissa1:32;
+ unsigned int mantissa0:20;
+ unsigned int exponent:11;
+ unsigned int negative:1;
+#endif
+ } ieee;
+ /* This format makes it easier to see if a NaN is a signalling NaN. */
+ struct {
+
+#if WORDS_BIGENDIAN
+ unsigned int negative:1;
+ unsigned int exponent:11;
+ unsigned int quiet_nan:1;
+ unsigned int mantissa0:19;
+ unsigned int mantissa1:32;
+#else
+ unsigned int mantissa1:32;
+ unsigned int mantissa0:19;
+ unsigned int quiet_nan:1;
+ unsigned int exponent:11;
+ unsigned int negative:1;
+#endif
+ } ieee_nan;
+};
+
+/*
+ * Predicates for testing for extreme IEEE fp values. Used
+ * by the bytecode evaluator and the Prelude.
+ *
+ */
+
+/* In case you don't suppport IEEE, you'll just get dummy defs.. */
+#ifdef IEEE_FLOATING_POINT
+
+StgInt
+isDoubleNaN(StgDouble d)
+{
+ union stg_ieee754_dbl u;
+
+ u.d = d;
+
+ return (
+ u.ieee.exponent == 2047 /* 2^11 - 1 */ && /* Is the exponent all ones? */
+ (u.ieee.mantissa0 != 0 || u.ieee.mantissa1 != 0)
+ /* and the mantissa non-zero? */
+ );
+}
+
+StgInt
+isDoubleInfinite(StgDouble d)
+{
+ union stg_ieee754_dbl u;
+
+ u.d = d;
+
+ /* Inf iff exponent is all ones, mantissa all zeros */
+ return (
+ u.ieee.exponent == 2047 /* 2^11 - 1 */ &&
+ u.ieee.mantissa0 == 0 &&
+ u.ieee.mantissa1 == 0
+ );
+}
+
+StgInt
+isDoubleDenormalized(StgDouble d)
+{
+ union stg_ieee754_dbl u;
+
+ u.d = d;
+
+ /* A (single/double/quad) precision floating point number
+ is denormalised iff:
+ - exponent is zero
+ - mantissa is non-zero.
+ - (don't care about setting of sign bit.)
+
+ */
+ return (
+ u.ieee.exponent == 0 &&
+ (u.ieee.mantissa0 != 0 ||
+ u.ieee.mantissa1 != 0)
+ );
+
+}
+
+StgInt
+isDoubleNegativeZero(StgDouble d)
+{
+ union stg_ieee754_dbl u;
+
+ u.d = d;
+ /* sign (bit 63) set (only) => negative zero */
+
+ return (
+ u.ieee.negative == 1 &&
+ u.ieee.exponent == 0 &&
+ u.ieee.mantissa0 == 0 &&
+ u.ieee.mantissa1 == 0);
+}
+
+/* Same tests, this time for StgFloats. */
+
+/*
+ To recap, here's the representation of a single precision
+ IEEE floating point number:
+
+ sign 31 sign bit (0 == positive, 1 == negative)
+ exponent 30-23 exponent (biased by 127)
+ fraction 22-0 fraction (bits to right of binary point)
+*/
+
+
+StgInt
+isFloatNaN(StgFloat f)
+{
+ union stg_ieee754_flt u;
+ u.f = f;
+
+ /* Floating point NaN iff exponent is all ones, mantissa is
+ non-zero (but see below.) */
+ return (
+ u.ieee.exponent == 255 /* 2^8 - 1 */ &&
+ u.ieee.mantissa != 0);
+}
+
+StgInt
+isFloatInfinite(StgFloat f)
+{
+ union stg_ieee754_flt u;
+ u.f = f;
+
+ /* A float is Inf iff exponent is max (all ones),
+ and mantissa is min(all zeros.) */
+ return (
+ u.ieee.exponent == 255 /* 2^8 - 1 */ &&
+ u.ieee.mantissa == 0);
+}
+
+StgInt
+isFloatDenormalized(StgFloat f)
+{
+ union stg_ieee754_flt u;
+ u.f = f;
+
+ /* A (single/double/quad) precision floating point number
+ is denormalised iff:
+ - exponent is zero
+ - mantissa is non-zero.
+ - (don't care about setting of sign bit.)
+
+ */
+ return (
+ u.ieee.exponent == 0 &&
+ u.ieee.mantissa != 0);
+}
+
+StgInt
+isFloatNegativeZero(StgFloat f)
+{
+ union stg_ieee754_flt u;
+ u.f = f;
+
+ /* sign (bit 31) set (only) => negative zero */
+ return (
+ u.ieee.negative &&
+ u.ieee.exponent == 0 &&
+ u.ieee.mantissa == 0);
+}
+
+#else /* ! IEEE_FLOATING_POINT */
+
+/* Dummy definitions of predicates - they all return false */
+StgInt isDoubleNaN(d) StgDouble d; { return 0; }
+StgInt isDoubleInfinite(d) StgDouble d; { return 0; }
+StgInt isDoubleDenormalized(d) StgDouble d; { return 0; }
+StgInt isDoubleNegativeZero(d) StgDouble d; { return 0; }
+StgInt isFloatNaN(f) StgFloat f; { return 0; }
+StgInt isFloatInfinite(f) StgFloat f; { return 0; }
+StgInt isFloatDenormalized(f) StgFloat f; { return 0; }
+StgInt isFloatNegativeZero(f) StgFloat f; { return 0; }
+
+#endif /* ! IEEE_FLOATING_POINT */
diff --git a/rts/StgRun.h b/rts/StgRun.h
new file mode 100644
index 0000000000..da376b4971
--- /dev/null
+++ b/rts/StgRun.h
@@ -0,0 +1,16 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Tiny assembler 'layer' between the C and STG worlds.
+ *
+ ---------------------------------------------------------------------------- */
+
+#ifndef STGRUN_H
+#define STGRUN_H
+
+extern StgRegTable * StgRun(StgFunPtr f, StgRegTable *basereg);
+
+RTS_FUN(StgReturn);
+
+#endif /* STGRUN_H */
diff --git a/rts/StgStartup.cmm b/rts/StgStartup.cmm
new file mode 100644
index 0000000000..2f2a759c81
--- /dev/null
+++ b/rts/StgStartup.cmm
@@ -0,0 +1,218 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Code for starting, stopping and restarting threads.
+ *
+ * This file is written in a subset of C--, extended with various
+ * features specific to GHC. It is compiled by GHC directly. For the
+ * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Cmm.h"
+
+/*
+ * This module contains the two entry points and the final exit point
+ * to/from the Haskell world. We can enter either by:
+ *
+ * a) returning to the address on the top of the stack, or
+ * b) entering the closure on the top of the stack
+ *
+ * the function stg_stop_thread_entry is the final exit for a
+ * thread: it is the last return address on the stack. It returns
+ * to the scheduler marking the thread as finished.
+ */
+
+#define CHECK_SENSIBLE_REGS() \
+ ASSERT(Hp != 0); \
+ ASSERT(Sp != 0); \
+ ASSERT(SpLim != 0); \
+ ASSERT(HpLim != 0); \
+ ASSERT(SpLim - WDS(RESERVED_STACK_WORDS) <= Sp); \
+ ASSERT(HpLim >= Hp);
+
+/* -----------------------------------------------------------------------------
+ Returning from the STG world.
+
+ This is a polymorphic return address, meaning that any old constructor
+ can be returned, we don't care (actually, it's probably going to be
+ an IOok constructor, which will indirect through the vector table
+ slot 0).
+ -------------------------------------------------------------------------- */
+
+#if defined(PROFILING)
+#define STOP_THREAD_BITMAP 3
+#define STOP_THREAD_WORDS 2
+#else
+#define STOP_THREAD_BITMAP 0
+#define STOP_THREAD_WORDS 0
+#endif
+
+/* A polymorhpic return address, where all the vector slots point to the
+ direct entry point. */
+INFO_TABLE_RET( stg_stop_thread, STOP_THREAD_WORDS, STOP_THREAD_BITMAP,
+ STOP_FRAME,
+ RET_LBL(stg_stop_thread),
+ RET_LBL(stg_stop_thread),
+ RET_LBL(stg_stop_thread),
+ RET_LBL(stg_stop_thread),
+ RET_LBL(stg_stop_thread),
+ RET_LBL(stg_stop_thread),
+ RET_LBL(stg_stop_thread),
+ RET_LBL(stg_stop_thread) )
+{
+ /*
+ The final exit.
+
+ The top-top-level closures (e.g., "main") are of type "IO a".
+ When entered, they perform an IO action and return an 'a' in R1.
+
+ We save R1 on top of the stack where the scheduler can find it,
+ tidy up the registers and return to the scheduler.
+
+ We Leave the stack looking like this:
+
+ +----------------+
+ | -------------------> return value
+ +----------------+
+ | stg_enter_info |
+ +----------------+
+
+ The stg_enter_info is just a dummy info table so that the
+ garbage collector can understand the stack (there must always
+ be an info table on top of the stack).
+ */
+
+ Sp = Sp + SIZEOF_StgStopFrame - WDS(2);
+ Sp(1) = R1;
+ Sp(0) = stg_enter_info;
+
+ StgTSO_what_next(CurrentTSO) = ThreadComplete::I16;
+
+ SAVE_THREAD_STATE();
+
+ /* The return code goes in BaseReg->rRet, and BaseReg is returned in R1 */
+ StgRegTable_rRet(BaseReg) = ThreadFinished;
+ R1 = BaseReg;
+
+ jump StgReturn;
+}
+
+/* -----------------------------------------------------------------------------
+ Start a thread from the scheduler by returning to the address on
+ the top of the stack. This is used for all entries to STG code
+ from C land.
+
+ On the way back, we (usually) pass through stg_returnToSched which saves
+ the thread's state away nicely.
+ -------------------------------------------------------------------------- */
+
+stg_returnToStackTop
+{
+ LOAD_THREAD_STATE();
+ CHECK_SENSIBLE_REGS();
+ jump %ENTRY_CODE(Sp(0));
+}
+
+stg_returnToSched
+{
+ SAVE_THREAD_STATE();
+ foreign "C" threadPaused(MyCapability() "ptr", CurrentTSO);
+ jump StgReturn;
+}
+
+// A variant of stg_returntToSched that doesn't call threadPaused() on the
+// current thread. This is used for switching from compiled execution to the
+// interpreter, where calling threadPaused() on every switch would be too
+// expensive.
+stg_returnToSchedNotPaused
+{
+ SAVE_THREAD_STATE();
+ jump StgReturn;
+}
+
+// A variant of stg_returnToSched, but instead of returning directly to the
+// scheduler, we jump to the code fragment pointed to by R2. This lets us
+// perform some final actions after making the thread safe, such as unlocking
+// the MVar on which we are about to block in SMP mode.
+stg_returnToSchedButFirst
+{
+ SAVE_THREAD_STATE();
+ foreign "C" threadPaused(MyCapability() "ptr", CurrentTSO);
+ jump R2;
+}
+
+/* -----------------------------------------------------------------------------
+ Strict IO application - performing an IO action and entering its result.
+
+ rts_evalIO() lets you perform Haskell IO actions from outside of
+ Haskell-land, returning back to you their result. Want this result
+ to be evaluated to WHNF by that time, so that we can easily get at
+ the int/char/whatever using the various get{Ty} functions provided
+ by the RTS API.
+
+ forceIO takes care of this, performing the IO action and entering the
+ results that comes back.
+ ------------------------------------------------------------------------- */
+
+INFO_TABLE_RET( stg_forceIO, 0/*size*/, 0/*bitmap*/, RET_SMALL)
+
+#ifdef REG_R1
+{
+ Sp_adj(1);
+ ENTER();
+}
+#else
+{
+ R1 = Sp(0);
+ Sp_adj(2);
+ ENTER();
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ Non-strict IO application.
+
+ This stack frame works like stg_forceIO_info except that it
+ doesn't evaluate the return value. We need the layer because the
+ return convention for an IO action differs depending on whether R1
+ is a register or not.
+ ------------------------------------------------------------------------- */
+
+INFO_TABLE_RET( stg_noforceIO, 0/*size*/, 0/*bitmap*/, RET_SMALL )
+
+#ifdef REG_R1
+{
+ Sp_adj(1);
+ jump %ENTRY_CODE(Sp(0));
+}
+#else
+{
+ R1 = Sp(0);
+ Sp_adj(2);
+ jump %ENTRY_CODE(Sp(0));
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ Special STG entry points for module registration.
+ -------------------------------------------------------------------------- */
+
+stg_init_finish
+{
+ jump StgReturn;
+}
+
+/* On entry to stg_init:
+ * init_stack[0] = &stg_init_ret;
+ * init_stack[1] = __stginit_Something;
+ */
+stg_init
+{
+ W_ next;
+ Sp = W_[BaseReg + OFFSET_StgRegTable_rSp];
+ next = W_[Sp];
+ Sp_adj(1);
+ jump next;
+}
diff --git a/rts/StgStdThunks.cmm b/rts/StgStdThunks.cmm
new file mode 100644
index 0000000000..342a6eb164
--- /dev/null
+++ b/rts/StgStdThunks.cmm
@@ -0,0 +1,274 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The University of Glasgow, 1998-2004
+ *
+ * Canned "Standard Form" Thunks
+ *
+ * This file is written in a subset of C--, extended with various
+ * features specific to GHC. It is compiled by GHC directly. For the
+ * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Cmm.h"
+
+/* -----------------------------------------------------------------------------
+ The code for a thunk that simply extracts a field from a
+ single-constructor datatype depends only on the offset of the field
+ to be selected.
+
+ Here we define some canned "selector" thunks that do just that; any
+ selector thunk appearing in a program will refer to one of these
+ instead of being compiled independently.
+
+ The garbage collector spots selector thunks and reduces them if
+ possible, in order to avoid space leaks resulting from lazy pattern
+ matching.
+ -------------------------------------------------------------------------- */
+
+#define WITHUPD_FRAME_SIZE (SIZEOF_StgUpdateFrame + SIZEOF_StgHeader)
+#define NOUPD_FRAME_SIZE (SIZEOF_StgHeader)
+
+#ifdef PROFILING
+#define SAVE_CCCS(fs) StgHeader_ccs(Sp-fs) = W_[CCCS]
+#define GET_SAVED_CCCS W_[CCCS] = StgHeader_ccs(Sp)
+#define RET_BITMAP 3
+#define RET_FRAMESIZE 2
+#else
+#define SAVE_CCCS(fs) /* empty */
+#define GET_SAVED_CCCS /* empty */
+#define RET_BITMAP 0
+#define RET_FRAMESIZE 0
+#endif
+
+#define SELECTOR_CODE_UPD(offset) \
+ INFO_TABLE_RET(stg_sel_ret_##offset##_upd, RET_FRAMESIZE, RET_BITMAP, RET_SMALL) \
+ { \
+ R1 = StgClosure_payload(R1,offset); \
+ GET_SAVED_CCCS; \
+ Sp = Sp + SIZEOF_StgHeader; \
+ ENTER(); \
+ } \
+ \
+ INFO_TABLE_SELECTOR(stg_sel_##offset##_upd, offset, THUNK_SELECTOR, "stg_sel_upd", "stg_sel_upd") \
+ { \
+ TICK_ENT_DYN_THK(); \
+ STK_CHK_NP(WITHUPD_FRAME_SIZE); \
+ UPD_BH_UPDATABLE(); \
+ LDV_ENTER(R1); \
+ PUSH_UPD_FRAME(Sp - SIZEOF_StgUpdateFrame, R1); \
+ ENTER_CCS_THUNK(R1); \
+ SAVE_CCCS(WITHUPD_FRAME_SIZE); \
+ W_[Sp-WITHUPD_FRAME_SIZE] = stg_sel_ret_##offset##_upd_info; \
+ R1 = StgThunk_payload(R1,0); \
+ Sp = Sp - WITHUPD_FRAME_SIZE; \
+ jump %GET_ENTRY(R1); \
+ }
+ /* NOTE: no need to ENTER() here, we know the closure cannot evaluate to a function,
+ because we're going to do a field selection on the result. */
+
+SELECTOR_CODE_UPD(0)
+SELECTOR_CODE_UPD(1)
+SELECTOR_CODE_UPD(2)
+SELECTOR_CODE_UPD(3)
+SELECTOR_CODE_UPD(4)
+SELECTOR_CODE_UPD(5)
+SELECTOR_CODE_UPD(6)
+SELECTOR_CODE_UPD(7)
+SELECTOR_CODE_UPD(8)
+SELECTOR_CODE_UPD(9)
+SELECTOR_CODE_UPD(10)
+SELECTOR_CODE_UPD(11)
+SELECTOR_CODE_UPD(12)
+SELECTOR_CODE_UPD(13)
+SELECTOR_CODE_UPD(14)
+SELECTOR_CODE_UPD(15)
+
+#define SELECTOR_CODE_NOUPD(offset) \
+ INFO_TABLE_RET(stg_sel_ret_##offset##_noupd, RET_FRAMESIZE, RET_BITMAP, RET_SMALL) \
+ { \
+ R1 = StgClosure_payload(R1,offset); \
+ GET_SAVED_CCCS; \
+ Sp = Sp + SIZEOF_StgHeader; \
+ jump %GET_ENTRY(R1); \
+ } \
+ \
+ INFO_TABLE_SELECTOR(stg_sel_##offset##_noupd, offset, THUNK_SELECTOR, "stg_sel_noupd", "stg_sel_noupd")\
+ { \
+ TICK_ENT_DYN_THK(); \
+ STK_CHK_NP(NOUPD_FRAME_SIZE); \
+ UPD_BH_SINGLE_ENTRY(); \
+ LDV_ENTER(R1); \
+ TICK_UPDF_OMITTED(); \
+ ENTER_CCS_THUNK(R1); \
+ SAVE_CCCS(NOUPD_FRAME_SIZE); \
+ W_[Sp-NOUPD_FRAME_SIZE] = stg_sel_ret_##offset##_noupd_info; \
+ R1 = StgThunk_payload(R1,0); \
+ Sp = Sp - NOUPD_FRAME_SIZE; \
+ jump %GET_ENTRY(R1); \
+ }
+
+SELECTOR_CODE_NOUPD(0)
+SELECTOR_CODE_NOUPD(1)
+SELECTOR_CODE_NOUPD(2)
+SELECTOR_CODE_NOUPD(3)
+SELECTOR_CODE_NOUPD(4)
+SELECTOR_CODE_NOUPD(5)
+SELECTOR_CODE_NOUPD(6)
+SELECTOR_CODE_NOUPD(7)
+SELECTOR_CODE_NOUPD(8)
+SELECTOR_CODE_NOUPD(9)
+SELECTOR_CODE_NOUPD(10)
+SELECTOR_CODE_NOUPD(11)
+SELECTOR_CODE_NOUPD(12)
+SELECTOR_CODE_NOUPD(13)
+SELECTOR_CODE_NOUPD(14)
+SELECTOR_CODE_NOUPD(15)
+
+/* -----------------------------------------------------------------------------
+ Apply thunks
+
+ An apply thunk is a thunk of the form
+
+ let z = [x1...xn] \u x1...xn
+ in ...
+
+ We pre-compile some of these because the code is always the same.
+
+ These have to be independent of the update frame size, so the code
+ works when profiling etc.
+ -------------------------------------------------------------------------- */
+
+/* stg_ap_1_upd_info is a bit redundant, but there appears to be a bug
+ * in the compiler that means stg_ap_1 is generated occasionally (ToDo)
+ */
+
+INFO_TABLE(stg_ap_1_upd,1,1,THUNK_1_0,"stg_ap_1_upd_info","stg_ap_1_upd_info")
+{
+ TICK_ENT_DYN_THK();
+ STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(1));
+ UPD_BH_UPDATABLE();
+ LDV_ENTER(R1);
+ ENTER_CCS_THUNK(R1);
+ PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1);
+ R1 = StgThunk_payload(R1,0);
+ Sp = Sp - SIZEOF_StgUpdateFrame;
+ jump stg_ap_0_fast;
+}
+
+INFO_TABLE(stg_ap_2_upd,2,0,THUNK_2_0,"stg_ap_2_upd_info","stg_ap_2_upd_info")
+{
+ TICK_ENT_DYN_THK();
+ STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(2));
+ UPD_BH_UPDATABLE();
+ LDV_ENTER(R1);
+ ENTER_CCS_THUNK(R1);
+ PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,1);
+ R1 = StgThunk_payload(R1,0);
+ Sp = Sp - SIZEOF_StgUpdateFrame - WDS(1);
+ Sp_adj(-1); // for stg_ap_*_ret
+ TICK_UNKNOWN_CALL();
+ TICK_SLOW_CALL_p();
+ jump RET_LBL(stg_ap_p);
+}
+
+INFO_TABLE(stg_ap_3_upd,3,0,THUNK,"stg_ap_3_upd_info","stg_ap_3_upd_info")
+{
+ TICK_ENT_DYN_THK();
+ STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(3));
+ UPD_BH_UPDATABLE();
+ LDV_ENTER(R1);
+ ENTER_CCS_THUNK(R1);
+ PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,2);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(2)] = StgThunk_payload(R1,1);
+ R1 = StgThunk_payload(R1,0);
+ Sp = Sp - SIZEOF_StgUpdateFrame - WDS(2);
+ Sp_adj(-1); // for stg_ap_*_ret
+ TICK_UNKNOWN_CALL();
+ TICK_SLOW_CALL_pp();
+ jump RET_LBL(stg_ap_pp);
+}
+
+INFO_TABLE(stg_ap_4_upd,4,0,THUNK,"stg_ap_4_upd_info","stg_ap_4_upd_info")
+{
+ TICK_ENT_DYN_THK();
+ STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(4));
+ UPD_BH_UPDATABLE();
+ LDV_ENTER(R1);
+ ENTER_CCS_THUNK(R1);
+ PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,3);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(2)] = StgThunk_payload(R1,2);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(3)] = StgThunk_payload(R1,1);
+ R1 = StgThunk_payload(R1,0);
+ Sp = Sp - SIZEOF_StgUpdateFrame - WDS(3);
+ Sp_adj(-1); // for stg_ap_*_ret
+ TICK_UNKNOWN_CALL();
+ TICK_SLOW_CALL_ppp();
+ jump RET_LBL(stg_ap_ppp);
+}
+
+INFO_TABLE(stg_ap_5_upd,5,0,THUNK,"stg_ap_5_upd_info","stg_ap_5_upd_info")
+{
+ TICK_ENT_DYN_THK();
+ STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(5));
+ UPD_BH_UPDATABLE();
+ LDV_ENTER(R1);
+ ENTER_CCS_THUNK(R1);
+ PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,4);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(2)] = StgThunk_payload(R1,3);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(3)] = StgThunk_payload(R1,2);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(4)] = StgThunk_payload(R1,1);
+ R1 = StgThunk_payload(R1,0);
+ Sp = Sp - SIZEOF_StgUpdateFrame - WDS(4);
+ Sp_adj(-1); // for stg_ap_*_ret
+ TICK_UNKNOWN_CALL();
+ TICK_SLOW_CALL_pppp();
+ jump RET_LBL(stg_ap_pppp);
+}
+
+INFO_TABLE(stg_ap_6_upd,6,0,THUNK,"stg_ap_6_upd_info","stg_ap_6_upd_info")
+{
+ TICK_ENT_DYN_THK();
+ STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(6));
+ UPD_BH_UPDATABLE();
+ LDV_ENTER(R1);
+ ENTER_CCS_THUNK(R1);
+ PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,5);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(2)] = StgThunk_payload(R1,4);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(3)] = StgThunk_payload(R1,3);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(4)] = StgThunk_payload(R1,2);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(5)] = StgThunk_payload(R1,1);
+ R1 = StgThunk_payload(R1,0);
+ Sp = Sp - SIZEOF_StgUpdateFrame - WDS(5);
+ Sp_adj(-1); // for stg_ap_*_ret
+ TICK_UNKNOWN_CALL();
+ TICK_SLOW_CALL_ppppp();
+ jump RET_LBL(stg_ap_ppppp);
+}
+
+INFO_TABLE(stg_ap_7_upd,7,0,THUNK,"stg_ap_7_upd_info","stg_ap_7_upd_info")
+{
+ TICK_ENT_DYN_THK();
+ STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(7));
+ UPD_BH_UPDATABLE();
+ LDV_ENTER(R1);
+ ENTER_CCS_THUNK(R1);
+ PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,6);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(2)] = StgThunk_payload(R1,5);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(3)] = StgThunk_payload(R1,4);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(4)] = StgThunk_payload(R1,3);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(5)] = StgThunk_payload(R1,2);
+ W_[Sp-SIZEOF_StgUpdateFrame-WDS(6)] = StgThunk_payload(R1,1);
+ R1 = StgThunk_payload(R1,0);
+ Sp = Sp - SIZEOF_StgUpdateFrame - WDS(6);
+ Sp_adj(-1); // for stg_ap_*_ret
+ TICK_UNKNOWN_CALL();
+ TICK_SLOW_CALL_pppppp();
+ jump RET_LBL(stg_ap_pppppp);
+}
diff --git a/rts/Storage.c b/rts/Storage.c
new file mode 100644
index 0000000000..974be45f10
--- /dev/null
+++ b/rts/Storage.c
@@ -0,0 +1,1137 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Storage manager front end
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "RtsFlags.h"
+#include "Stats.h"
+#include "Hooks.h"
+#include "BlockAlloc.h"
+#include "MBlock.h"
+#include "Weak.h"
+#include "Sanity.h"
+#include "Arena.h"
+#include "OSThreads.h"
+#include "Capability.h"
+#include "Storage.h"
+#include "Schedule.h"
+#include "RetainerProfile.h" // for counting memory blocks (memInventory)
+
+#include <stdlib.h>
+#include <string.h>
+
+/*
+ * All these globals require sm_mutex to access in THREADED_RTS mode.
+ */
+StgClosure *caf_list = NULL;
+StgClosure *revertible_caf_list = NULL;
+rtsBool keepCAFs;
+
+bdescr *small_alloc_list; /* allocate()d small objects */
+bdescr *pinned_object_block; /* allocate pinned objects into this block */
+nat alloc_blocks; /* number of allocate()d blocks since GC */
+nat alloc_blocks_lim; /* approximate limit on alloc_blocks */
+
+StgPtr alloc_Hp = NULL; /* next free byte in small_alloc_list */
+StgPtr alloc_HpLim = NULL; /* end of block at small_alloc_list */
+
+generation *generations = NULL; /* all the generations */
+generation *g0 = NULL; /* generation 0, for convenience */
+generation *oldest_gen = NULL; /* oldest generation, for convenience */
+step *g0s0 = NULL; /* generation 0, step 0, for convenience */
+
+ullong total_allocated = 0; /* total memory allocated during run */
+
+nat n_nurseries = 0; /* == RtsFlags.ParFlags.nNodes, convenience */
+step *nurseries = NULL; /* array of nurseries, >1 only if THREADED_RTS */
+
+#ifdef THREADED_RTS
+/*
+ * Storage manager mutex: protects all the above state from
+ * simultaneous access by two STG threads.
+ */
+Mutex sm_mutex;
+/*
+ * This mutex is used by atomicModifyMutVar# only
+ */
+Mutex atomic_modify_mutvar_mutex;
+#endif
+
+
+/*
+ * Forward references
+ */
+static void *stgAllocForGMP (size_t size_in_bytes);
+static void *stgReallocForGMP (void *ptr, size_t old_size, size_t new_size);
+static void stgDeallocForGMP (void *ptr, size_t size);
+
+static void
+initStep (step *stp, int g, int s)
+{
+ stp->no = s;
+ stp->blocks = NULL;
+ stp->n_blocks = 0;
+ stp->old_blocks = NULL;
+ stp->n_old_blocks = 0;
+ stp->gen = &generations[g];
+ stp->gen_no = g;
+ stp->hp = NULL;
+ stp->hpLim = NULL;
+ stp->hp_bd = NULL;
+ stp->scavd_hp = NULL;
+ stp->scavd_hpLim = NULL;
+ stp->scan = NULL;
+ stp->scan_bd = NULL;
+ stp->large_objects = NULL;
+ stp->n_large_blocks = 0;
+ stp->new_large_objects = NULL;
+ stp->scavenged_large_objects = NULL;
+ stp->n_scavenged_large_blocks = 0;
+ stp->is_compacted = 0;
+ stp->bitmap = NULL;
+}
+
+void
+initStorage( void )
+{
+ nat g, s;
+ generation *gen;
+
+ if (generations != NULL) {
+ // multi-init protection
+ return;
+ }
+
+ /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
+ * doing something reasonable.
+ */
+ ASSERT(LOOKS_LIKE_INFO_PTR(&stg_BLACKHOLE_info));
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
+ ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
+
+ if (RtsFlags.GcFlags.maxHeapSize != 0 &&
+ RtsFlags.GcFlags.heapSizeSuggestion >
+ RtsFlags.GcFlags.maxHeapSize) {
+ RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
+ }
+
+ if (RtsFlags.GcFlags.maxHeapSize != 0 &&
+ RtsFlags.GcFlags.minAllocAreaSize >
+ RtsFlags.GcFlags.maxHeapSize) {
+ errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
+ exit(1);
+ }
+
+ initBlockAllocator();
+
+#if defined(THREADED_RTS)
+ initMutex(&sm_mutex);
+ initMutex(&atomic_modify_mutvar_mutex);
+#endif
+
+ ACQUIRE_SM_LOCK;
+
+ /* allocate generation info array */
+ generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
+ * sizeof(struct generation_),
+ "initStorage: gens");
+
+ /* Initialise all generations */
+ for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ gen = &generations[g];
+ gen->no = g;
+ gen->mut_list = allocBlock();
+ gen->collections = 0;
+ gen->failed_promotions = 0;
+ gen->max_blocks = 0;
+ }
+
+ /* A couple of convenience pointers */
+ g0 = &generations[0];
+ oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
+
+ /* Allocate step structures in each generation */
+ if (RtsFlags.GcFlags.generations > 1) {
+ /* Only for multiple-generations */
+
+ /* Oldest generation: one step */
+ oldest_gen->n_steps = 1;
+ oldest_gen->steps =
+ stgMallocBytes(1 * sizeof(struct step_), "initStorage: last step");
+
+ /* set up all except the oldest generation with 2 steps */
+ for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
+ generations[g].n_steps = RtsFlags.GcFlags.steps;
+ generations[g].steps =
+ stgMallocBytes (RtsFlags.GcFlags.steps * sizeof(struct step_),
+ "initStorage: steps");
+ }
+
+ } else {
+ /* single generation, i.e. a two-space collector */
+ g0->n_steps = 1;
+ g0->steps = stgMallocBytes (sizeof(struct step_), "initStorage: steps");
+ }
+
+#ifdef THREADED_RTS
+ n_nurseries = n_capabilities;
+ nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_),
+ "initStorage: nurseries");
+#else
+ n_nurseries = 1;
+ nurseries = g0->steps; // just share nurseries[0] with g0s0
+#endif
+
+ /* Initialise all steps */
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ initStep(&generations[g].steps[s], g, s);
+ }
+ }
+
+#ifdef THREADED_RTS
+ for (s = 0; s < n_nurseries; s++) {
+ initStep(&nurseries[s], 0, s);
+ }
+#endif
+
+ /* Set up the destination pointers in each younger gen. step */
+ for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
+ for (s = 0; s < generations[g].n_steps-1; s++) {
+ generations[g].steps[s].to = &generations[g].steps[s+1];
+ }
+ generations[g].steps[s].to = &generations[g+1].steps[0];
+ }
+ oldest_gen->steps[0].to = &oldest_gen->steps[0];
+
+#ifdef THREADED_RTS
+ for (s = 0; s < n_nurseries; s++) {
+ nurseries[s].to = generations[0].steps[0].to;
+ }
+#endif
+
+ /* The oldest generation has one step. */
+ if (RtsFlags.GcFlags.compact) {
+ if (RtsFlags.GcFlags.generations == 1) {
+ errorBelch("WARNING: compaction is incompatible with -G1; disabled");
+ } else {
+ oldest_gen->steps[0].is_compacted = 1;
+ }
+ }
+
+#ifdef THREADED_RTS
+ if (RtsFlags.GcFlags.generations == 1) {
+ errorBelch("-G1 is incompatible with -threaded");
+ stg_exit(EXIT_FAILURE);
+ }
+#endif
+
+ /* generation 0 is special: that's the nursery */
+ generations[0].max_blocks = 0;
+
+ /* G0S0: the allocation area. Policy: keep the allocation area
+ * small to begin with, even if we have a large suggested heap
+ * size. Reason: we're going to do a major collection first, and we
+ * don't want it to be a big one. This vague idea is borne out by
+ * rigorous experimental evidence.
+ */
+ g0s0 = &generations[0].steps[0];
+
+ allocNurseries();
+
+ weak_ptr_list = NULL;
+ caf_list = NULL;
+ revertible_caf_list = NULL;
+
+ /* initialise the allocate() interface */
+ small_alloc_list = NULL;
+ alloc_blocks = 0;
+ alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
+
+ /* Tell GNU multi-precision pkg about our custom alloc functions */
+ mp_set_memory_functions(stgAllocForGMP, stgReallocForGMP, stgDeallocForGMP);
+
+ IF_DEBUG(gc, statDescribeGens());
+
+ RELEASE_SM_LOCK;
+}
+
+void
+exitStorage (void)
+{
+ stat_exit(calcAllocated());
+}
+
+void
+freeStorage (void)
+{
+ freeAllMBlocks();
+}
+
+/* -----------------------------------------------------------------------------
+ CAF management.
+
+ The entry code for every CAF does the following:
+
+ - builds a CAF_BLACKHOLE in the heap
+ - pushes an update frame pointing to the CAF_BLACKHOLE
+ - invokes UPD_CAF(), which:
+ - calls newCaf, below
+ - updates the CAF with a static indirection to the CAF_BLACKHOLE
+
+ Why do we build a BLACKHOLE in the heap rather than just updating
+ the thunk directly? It's so that we only need one kind of update
+ frame - otherwise we'd need a static version of the update frame too.
+
+ newCaf() does the following:
+
+ - it puts the CAF on the oldest generation's mut-once list.
+ This is so that we can treat the CAF as a root when collecting
+ younger generations.
+
+ For GHCI, we have additional requirements when dealing with CAFs:
+
+ - we must *retain* all dynamically-loaded CAFs ever entered,
+ just in case we need them again.
+ - we must be able to *revert* CAFs that have been evaluated, to
+ their pre-evaluated form.
+
+ To do this, we use an additional CAF list. When newCaf() is
+ called on a dynamically-loaded CAF, we add it to the CAF list
+ instead of the old-generation mutable list, and save away its
+ old info pointer (in caf->saved_info) for later reversion.
+
+ To revert all the CAFs, we traverse the CAF list and reset the
+ info pointer to caf->saved_info, then throw away the CAF list.
+ (see GC.c:revertCAFs()).
+
+ -- SDM 29/1/01
+
+ -------------------------------------------------------------------------- */
+
+void
+newCAF(StgClosure* caf)
+{
+ ACQUIRE_SM_LOCK;
+
+ if(keepCAFs)
+ {
+ // HACK:
+ // If we are in GHCi _and_ we are using dynamic libraries,
+ // then we can't redirect newCAF calls to newDynCAF (see below),
+ // so we make newCAF behave almost like newDynCAF.
+ // The dynamic libraries might be used by both the interpreted
+ // program and GHCi itself, so they must not be reverted.
+ // This also means that in GHCi with dynamic libraries, CAFs are not
+ // garbage collected. If this turns out to be a problem, we could
+ // do another hack here and do an address range test on caf to figure
+ // out whether it is from a dynamic library.
+ ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
+ ((StgIndStatic *)caf)->static_link = caf_list;
+ caf_list = caf;
+ }
+ else
+ {
+ /* Put this CAF on the mutable list for the old generation.
+ * This is a HACK - the IND_STATIC closure doesn't really have
+ * a mut_link field, but we pretend it has - in fact we re-use
+ * the STATIC_LINK field for the time being, because when we
+ * come to do a major GC we won't need the mut_link field
+ * any more and can use it as a STATIC_LINK.
+ */
+ ((StgIndStatic *)caf)->saved_info = NULL;
+ recordMutableGen(caf, oldest_gen);
+ }
+
+ RELEASE_SM_LOCK;
+
+#ifdef PAR
+ /* If we are PAR or DIST then we never forget a CAF */
+ { globalAddr *newGA;
+ //debugBelch("<##> Globalising CAF %08x %s",caf,info_type(caf));
+ newGA=makeGlobal(caf,rtsTrue); /*given full weight*/
+ ASSERT(newGA);
+ }
+#endif /* PAR */
+}
+
+// An alternate version of newCaf which is used for dynamically loaded
+// object code in GHCi. In this case we want to retain *all* CAFs in
+// the object code, because they might be demanded at any time from an
+// expression evaluated on the command line.
+// Also, GHCi might want to revert CAFs, so we add these to the
+// revertible_caf_list.
+//
+// The linker hackily arranges that references to newCaf from dynamic
+// code end up pointing to newDynCAF.
+void
+newDynCAF(StgClosure *caf)
+{
+ ACQUIRE_SM_LOCK;
+
+ ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info;
+ ((StgIndStatic *)caf)->static_link = revertible_caf_list;
+ revertible_caf_list = caf;
+
+ RELEASE_SM_LOCK;
+}
+
+/* -----------------------------------------------------------------------------
+ Nursery management.
+ -------------------------------------------------------------------------- */
+
+static bdescr *
+allocNursery (step *stp, bdescr *tail, nat blocks)
+{
+ bdescr *bd;
+ nat i;
+
+ // Allocate a nursery: we allocate fresh blocks one at a time and
+ // cons them on to the front of the list, not forgetting to update
+ // the back pointer on the tail of the list to point to the new block.
+ for (i=0; i < blocks; i++) {
+ // @LDV profiling
+ /*
+ processNursery() in LdvProfile.c assumes that every block group in
+ the nursery contains only a single block. So, if a block group is
+ given multiple blocks, change processNursery() accordingly.
+ */
+ bd = allocBlock();
+ bd->link = tail;
+ // double-link the nursery: we might need to insert blocks
+ if (tail != NULL) {
+ tail->u.back = bd;
+ }
+ bd->step = stp;
+ bd->gen_no = 0;
+ bd->flags = 0;
+ bd->free = bd->start;
+ tail = bd;
+ }
+ tail->u.back = NULL;
+ return tail;
+}
+
+static void
+assignNurseriesToCapabilities (void)
+{
+#ifdef THREADED_RTS
+ nat i;
+
+ for (i = 0; i < n_nurseries; i++) {
+ capabilities[i].r.rNursery = &nurseries[i];
+ capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
+ capabilities[i].r.rCurrentAlloc = NULL;
+ }
+#else /* THREADED_RTS */
+ MainCapability.r.rNursery = &nurseries[0];
+ MainCapability.r.rCurrentNursery = nurseries[0].blocks;
+ MainCapability.r.rCurrentAlloc = NULL;
+#endif
+}
+
+void
+allocNurseries( void )
+{
+ nat i;
+
+ for (i = 0; i < n_nurseries; i++) {
+ nurseries[i].blocks =
+ allocNursery(&nurseries[i], NULL,
+ RtsFlags.GcFlags.minAllocAreaSize);
+ nurseries[i].n_blocks = RtsFlags.GcFlags.minAllocAreaSize;
+ nurseries[i].old_blocks = NULL;
+ nurseries[i].n_old_blocks = 0;
+ /* hp, hpLim, hp_bd, to_space etc. aren't used in the nursery */
+ }
+ assignNurseriesToCapabilities();
+}
+
+void
+resetNurseries( void )
+{
+ nat i;
+ bdescr *bd;
+ step *stp;
+
+ for (i = 0; i < n_nurseries; i++) {
+ stp = &nurseries[i];
+ for (bd = stp->blocks; bd; bd = bd->link) {
+ bd->free = bd->start;
+ ASSERT(bd->gen_no == 0);
+ ASSERT(bd->step == stp);
+ IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
+ }
+ }
+ assignNurseriesToCapabilities();
+}
+
+lnat
+countNurseryBlocks (void)
+{
+ nat i;
+ lnat blocks = 0;
+
+ for (i = 0; i < n_nurseries; i++) {
+ blocks += nurseries[i].n_blocks;
+ }
+ return blocks;
+}
+
+static void
+resizeNursery ( step *stp, nat blocks )
+{
+ bdescr *bd;
+ nat nursery_blocks;
+
+ nursery_blocks = stp->n_blocks;
+ if (nursery_blocks == blocks) return;
+
+ if (nursery_blocks < blocks) {
+ IF_DEBUG(gc, debugBelch("Increasing size of nursery to %d blocks\n",
+ blocks));
+ stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
+ }
+ else {
+ bdescr *next_bd;
+
+ IF_DEBUG(gc, debugBelch("Decreasing size of nursery to %d blocks\n",
+ blocks));
+
+ bd = stp->blocks;
+ while (nursery_blocks > blocks) {
+ next_bd = bd->link;
+ next_bd->u.back = NULL;
+ nursery_blocks -= bd->blocks; // might be a large block
+ freeGroup(bd);
+ bd = next_bd;
+ }
+ stp->blocks = bd;
+ // might have gone just under, by freeing a large block, so make
+ // up the difference.
+ if (nursery_blocks < blocks) {
+ stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
+ }
+ }
+
+ stp->n_blocks = blocks;
+ ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
+}
+
+//
+// Resize each of the nurseries to the specified size.
+//
+void
+resizeNurseriesFixed (nat blocks)
+{
+ nat i;
+ for (i = 0; i < n_nurseries; i++) {
+ resizeNursery(&nurseries[i], blocks);
+ }
+}
+
+//
+// Resize the nurseries to the total specified size.
+//
+void
+resizeNurseries (nat blocks)
+{
+ // If there are multiple nurseries, then we just divide the number
+ // of available blocks between them.
+ resizeNurseriesFixed(blocks / n_nurseries);
+}
+
+/* -----------------------------------------------------------------------------
+ The allocate() interface
+
+ allocate(n) always succeeds, and returns a chunk of memory n words
+ long. n can be larger than the size of a block if necessary, in
+ which case a contiguous block group will be allocated.
+ -------------------------------------------------------------------------- */
+
+StgPtr
+allocate( nat n )
+{
+ bdescr *bd;
+ StgPtr p;
+
+ ACQUIRE_SM_LOCK;
+
+ TICK_ALLOC_HEAP_NOCTR(n);
+ CCS_ALLOC(CCCS,n);
+
+ /* big allocation (>LARGE_OBJECT_THRESHOLD) */
+ /* ToDo: allocate directly into generation 1 */
+ if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
+ nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
+ bd = allocGroup(req_blocks);
+ dbl_link_onto(bd, &g0s0->large_objects);
+ g0s0->n_large_blocks += req_blocks;
+ bd->gen_no = 0;
+ bd->step = g0s0;
+ bd->flags = BF_LARGE;
+ bd->free = bd->start + n;
+ alloc_blocks += req_blocks;
+ RELEASE_SM_LOCK;
+ return bd->start;
+
+ /* small allocation (<LARGE_OBJECT_THRESHOLD) */
+ } else if (small_alloc_list == NULL || alloc_Hp + n > alloc_HpLim) {
+ if (small_alloc_list) {
+ small_alloc_list->free = alloc_Hp;
+ }
+ bd = allocBlock();
+ bd->link = small_alloc_list;
+ small_alloc_list = bd;
+ bd->gen_no = 0;
+ bd->step = g0s0;
+ bd->flags = 0;
+ alloc_Hp = bd->start;
+ alloc_HpLim = bd->start + BLOCK_SIZE_W;
+ alloc_blocks++;
+ }
+
+ p = alloc_Hp;
+ alloc_Hp += n;
+ RELEASE_SM_LOCK;
+ return p;
+}
+
+lnat
+allocated_bytes( void )
+{
+ lnat allocated;
+
+ allocated = alloc_blocks * BLOCK_SIZE_W - (alloc_HpLim - alloc_Hp);
+ if (pinned_object_block != NULL) {
+ allocated -= (pinned_object_block->start + BLOCK_SIZE_W) -
+ pinned_object_block->free;
+ }
+
+ return allocated;
+}
+
+void
+tidyAllocateLists (void)
+{
+ if (small_alloc_list != NULL) {
+ ASSERT(alloc_Hp >= small_alloc_list->start &&
+ alloc_Hp <= small_alloc_list->start + BLOCK_SIZE);
+ small_alloc_list->free = alloc_Hp;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ allocateLocal()
+
+ This allocates memory in the current thread - it is intended for
+ use primarily from STG-land where we have a Capability. It is
+ better than allocate() because it doesn't require taking the
+ sm_mutex lock in the common case.
+
+ Memory is allocated directly from the nursery if possible (but not
+ from the current nursery block, so as not to interfere with
+ Hp/HpLim).
+ -------------------------------------------------------------------------- */
+
+StgPtr
+allocateLocal (Capability *cap, nat n)
+{
+ bdescr *bd;
+ StgPtr p;
+
+ TICK_ALLOC_HEAP_NOCTR(n);
+ CCS_ALLOC(CCCS,n);
+
+ /* big allocation (>LARGE_OBJECT_THRESHOLD) */
+ /* ToDo: allocate directly into generation 1 */
+ if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
+ nat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
+ ACQUIRE_SM_LOCK;
+ bd = allocGroup(req_blocks);
+ dbl_link_onto(bd, &g0s0->large_objects);
+ g0s0->n_large_blocks += req_blocks;
+ bd->gen_no = 0;
+ bd->step = g0s0;
+ bd->flags = BF_LARGE;
+ bd->free = bd->start + n;
+ alloc_blocks += req_blocks;
+ RELEASE_SM_LOCK;
+ return bd->start;
+
+ /* small allocation (<LARGE_OBJECT_THRESHOLD) */
+ } else {
+
+ bd = cap->r.rCurrentAlloc;
+ if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
+
+ // The CurrentAlloc block is full, we need to find another
+ // one. First, we try taking the next block from the
+ // nursery:
+ bd = cap->r.rCurrentNursery->link;
+
+ if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
+ // The nursery is empty, or the next block is already
+ // full: allocate a fresh block (we can't fail here).
+ ACQUIRE_SM_LOCK;
+ bd = allocBlock();
+ cap->r.rNursery->n_blocks++;
+ RELEASE_SM_LOCK;
+ bd->gen_no = 0;
+ bd->step = cap->r.rNursery;
+ bd->flags = 0;
+ } else {
+ // we have a block in the nursery: take it and put
+ // it at the *front* of the nursery list, and use it
+ // to allocate() from.
+ cap->r.rCurrentNursery->link = bd->link;
+ if (bd->link != NULL) {
+ bd->link->u.back = cap->r.rCurrentNursery;
+ }
+ }
+ dbl_link_onto(bd, &cap->r.rNursery->blocks);
+ cap->r.rCurrentAlloc = bd;
+ IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
+ }
+ }
+ p = bd->free;
+ bd->free += n;
+ return p;
+}
+
+/* ---------------------------------------------------------------------------
+ Allocate a fixed/pinned object.
+
+ We allocate small pinned objects into a single block, allocating a
+ new block when the current one overflows. The block is chained
+ onto the large_object_list of generation 0 step 0.
+
+ NOTE: The GC can't in general handle pinned objects. This
+ interface is only safe to use for ByteArrays, which have no
+ pointers and don't require scavenging. It works because the
+ block's descriptor has the BF_LARGE flag set, so the block is
+ treated as a large object and chained onto various lists, rather
+ than the individual objects being copied. However, when it comes
+ to scavenge the block, the GC will only scavenge the first object.
+ The reason is that the GC can't linearly scan a block of pinned
+ objects at the moment (doing so would require using the
+ mostly-copying techniques). But since we're restricting ourselves
+ to pinned ByteArrays, not scavenging is ok.
+
+ This function is called by newPinnedByteArray# which immediately
+ fills the allocated memory with a MutableByteArray#.
+ ------------------------------------------------------------------------- */
+
+StgPtr
+allocatePinned( nat n )
+{
+ StgPtr p;
+ bdescr *bd = pinned_object_block;
+
+ // If the request is for a large object, then allocate()
+ // will give us a pinned object anyway.
+ if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
+ return allocate(n);
+ }
+
+ ACQUIRE_SM_LOCK;
+
+ TICK_ALLOC_HEAP_NOCTR(n);
+ CCS_ALLOC(CCCS,n);
+
+ // we always return 8-byte aligned memory. bd->free must be
+ // 8-byte aligned to begin with, so we just round up n to
+ // the nearest multiple of 8 bytes.
+ if (sizeof(StgWord) == 4) {
+ n = (n+1) & ~1;
+ }
+
+ // If we don't have a block of pinned objects yet, or the current
+ // one isn't large enough to hold the new object, allocate a new one.
+ if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
+ pinned_object_block = bd = allocBlock();
+ dbl_link_onto(bd, &g0s0->large_objects);
+ bd->gen_no = 0;
+ bd->step = g0s0;
+ bd->flags = BF_PINNED | BF_LARGE;
+ bd->free = bd->start;
+ alloc_blocks++;
+ }
+
+ p = bd->free;
+ bd->free += n;
+ RELEASE_SM_LOCK;
+ return p;
+}
+
+/* -----------------------------------------------------------------------------
+ This is the write barrier for MUT_VARs, a.k.a. IORefs. A
+ MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
+ is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
+ and is put on the mutable list.
+ -------------------------------------------------------------------------- */
+
+void
+dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
+{
+ Capability *cap = regTableToCapability(reg);
+ bdescr *bd;
+ if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
+ p->header.info = &stg_MUT_VAR_DIRTY_info;
+ bd = Bdescr((StgPtr)p);
+ if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no);
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Allocation functions for GMP.
+
+ These all use the allocate() interface - we can't have any garbage
+ collection going on during a gmp operation, so we use allocate()
+ which always succeeds. The gmp operations which might need to
+ allocate will ask the storage manager (via doYouWantToGC()) whether
+ a garbage collection is required, in case we get into a loop doing
+ only allocate() style allocation.
+ -------------------------------------------------------------------------- */
+
+static void *
+stgAllocForGMP (size_t size_in_bytes)
+{
+ StgArrWords* arr;
+ nat data_size_in_words, total_size_in_words;
+
+ /* round up to a whole number of words */
+ data_size_in_words = (size_in_bytes + sizeof(W_) + 1) / sizeof(W_);
+ total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
+
+ /* allocate and fill it in. */
+#if defined(THREADED_RTS)
+ arr = (StgArrWords *)allocateLocal(myTask()->cap, total_size_in_words);
+#else
+ arr = (StgArrWords *)allocateLocal(&MainCapability, total_size_in_words);
+#endif
+ SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, data_size_in_words);
+
+ /* and return a ptr to the goods inside the array */
+ return arr->payload;
+}
+
+static void *
+stgReallocForGMP (void *ptr, size_t old_size, size_t new_size)
+{
+ void *new_stuff_ptr = stgAllocForGMP(new_size);
+ nat i = 0;
+ char *p = (char *) ptr;
+ char *q = (char *) new_stuff_ptr;
+
+ for (; i < old_size; i++, p++, q++) {
+ *q = *p;
+ }
+
+ return(new_stuff_ptr);
+}
+
+static void
+stgDeallocForGMP (void *ptr STG_UNUSED,
+ size_t size STG_UNUSED)
+{
+ /* easy for us: the garbage collector does the dealloc'n */
+}
+
+/* -----------------------------------------------------------------------------
+ * Stats and stuff
+ * -------------------------------------------------------------------------- */
+
+/* -----------------------------------------------------------------------------
+ * calcAllocated()
+ *
+ * Approximate how much we've allocated: number of blocks in the
+ * nursery + blocks allocated via allocate() - unused nusery blocks.
+ * This leaves a little slop at the end of each block, and doesn't
+ * take into account large objects (ToDo).
+ * -------------------------------------------------------------------------- */
+
+lnat
+calcAllocated( void )
+{
+ nat allocated;
+ bdescr *bd;
+
+ allocated = allocated_bytes();
+ allocated += countNurseryBlocks() * BLOCK_SIZE_W;
+
+ {
+#ifdef THREADED_RTS
+ nat i;
+ for (i = 0; i < n_nurseries; i++) {
+ Capability *cap;
+ for ( bd = capabilities[i].r.rCurrentNursery->link;
+ bd != NULL; bd = bd->link ) {
+ allocated -= BLOCK_SIZE_W;
+ }
+ cap = &capabilities[i];
+ if (cap->r.rCurrentNursery->free <
+ cap->r.rCurrentNursery->start + BLOCK_SIZE_W) {
+ allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W)
+ - cap->r.rCurrentNursery->free;
+ }
+ }
+#else
+ bdescr *current_nursery = MainCapability.r.rCurrentNursery;
+
+ for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) {
+ allocated -= BLOCK_SIZE_W;
+ }
+ if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) {
+ allocated -= (current_nursery->start + BLOCK_SIZE_W)
+ - current_nursery->free;
+ }
+#endif
+ }
+
+ total_allocated += allocated;
+ return allocated;
+}
+
+/* Approximate the amount of live data in the heap. To be called just
+ * after garbage collection (see GarbageCollect()).
+ */
+extern lnat
+calcLive(void)
+{
+ nat g, s;
+ lnat live = 0;
+ step *stp;
+
+ if (RtsFlags.GcFlags.generations == 1) {
+ live = (g0s0->n_blocks - 1) * BLOCK_SIZE_W +
+ ((lnat)g0s0->hp_bd->free - (lnat)g0s0->hp_bd->start) / sizeof(W_);
+ return live;
+ }
+
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ /* approximate amount of live data (doesn't take into account slop
+ * at end of each block).
+ */
+ if (g == 0 && s == 0) {
+ continue;
+ }
+ stp = &generations[g].steps[s];
+ live += (stp->n_large_blocks + stp->n_blocks - 1) * BLOCK_SIZE_W;
+ if (stp->hp_bd != NULL) {
+ live += ((lnat)stp->hp_bd->free - (lnat)stp->hp_bd->start)
+ / sizeof(W_);
+ }
+ if (stp->scavd_hp != NULL) {
+ live -= (P_)(BLOCK_ROUND_UP(stp->scavd_hp)) - stp->scavd_hp;
+ }
+ }
+ }
+ return live;
+}
+
+/* Approximate the number of blocks that will be needed at the next
+ * garbage collection.
+ *
+ * Assume: all data currently live will remain live. Steps that will
+ * be collected next time will therefore need twice as many blocks
+ * since all the data will be copied.
+ */
+extern lnat
+calcNeeded(void)
+{
+ lnat needed = 0;
+ nat g, s;
+ step *stp;
+
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ if (g == 0 && s == 0) { continue; }
+ stp = &generations[g].steps[s];
+ if (generations[g].steps[0].n_blocks +
+ generations[g].steps[0].n_large_blocks
+ > generations[g].max_blocks
+ && stp->is_compacted == 0) {
+ needed += 2 * stp->n_blocks;
+ } else {
+ needed += stp->n_blocks;
+ }
+ }
+ }
+ return needed;
+}
+
+/* -----------------------------------------------------------------------------
+ Debugging
+
+ memInventory() checks for memory leaks by counting up all the
+ blocks we know about and comparing that to the number of blocks
+ allegedly floating around in the system.
+ -------------------------------------------------------------------------- */
+
+#ifdef DEBUG
+
+static lnat
+stepBlocks (step *stp)
+{
+ lnat total_blocks;
+ bdescr *bd;
+
+ total_blocks = stp->n_blocks;
+ total_blocks += stp->n_old_blocks;
+ for (bd = stp->large_objects; bd; bd = bd->link) {
+ total_blocks += bd->blocks;
+ /* hack for megablock groups: they have an extra block or two in
+ the second and subsequent megablocks where the block
+ descriptors would normally go.
+ */
+ if (bd->blocks > BLOCKS_PER_MBLOCK) {
+ total_blocks -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK)
+ * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE));
+ }
+ }
+ return total_blocks;
+}
+
+void
+memInventory(void)
+{
+ nat g, s, i;
+ step *stp;
+ bdescr *bd;
+ lnat total_blocks = 0, free_blocks = 0;
+
+ /* count the blocks we current have */
+
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (i = 0; i < n_capabilities; i++) {
+ for (bd = capabilities[i].mut_lists[g]; bd != NULL; bd = bd->link) {
+ total_blocks += bd->blocks;
+ }
+ }
+ for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
+ total_blocks += bd->blocks;
+ }
+ for (s = 0; s < generations[g].n_steps; s++) {
+ if (g==0 && s==0) continue;
+ stp = &generations[g].steps[s];
+ total_blocks += stepBlocks(stp);
+ }
+ }
+
+ for (i = 0; i < n_nurseries; i++) {
+ total_blocks += stepBlocks(&nurseries[i]);
+ }
+#ifdef THREADED_RTS
+ // We put pinned object blocks in g0s0, so better count blocks there too.
+ total_blocks += stepBlocks(g0s0);
+#endif
+
+ /* any blocks held by allocate() */
+ for (bd = small_alloc_list; bd; bd = bd->link) {
+ total_blocks += bd->blocks;
+ }
+
+#ifdef PROFILING
+ if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
+ total_blocks += retainerStackBlocks();
+ }
+#endif
+
+ // count the blocks allocated by the arena allocator
+ total_blocks += arenaBlocks();
+
+ /* count the blocks on the free list */
+ free_blocks = countFreeList();
+
+ if (total_blocks + free_blocks != mblocks_allocated *
+ BLOCKS_PER_MBLOCK) {
+ debugBelch("Blocks: %ld live + %ld free = %ld total (%ld around)\n",
+ total_blocks, free_blocks, total_blocks + free_blocks,
+ mblocks_allocated * BLOCKS_PER_MBLOCK);
+ }
+
+ ASSERT(total_blocks + free_blocks == mblocks_allocated * BLOCKS_PER_MBLOCK);
+}
+
+
+nat
+countBlocks(bdescr *bd)
+{
+ nat n;
+ for (n=0; bd != NULL; bd=bd->link) {
+ n += bd->blocks;
+ }
+ return n;
+}
+
+/* Full heap sanity check. */
+void
+checkSanity( void )
+{
+ nat g, s;
+
+ if (RtsFlags.GcFlags.generations == 1) {
+ checkHeap(g0s0->blocks);
+ checkChain(g0s0->large_objects);
+ } else {
+
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ if (g == 0 && s == 0) { continue; }
+ ASSERT(countBlocks(generations[g].steps[s].blocks)
+ == generations[g].steps[s].n_blocks);
+ ASSERT(countBlocks(generations[g].steps[s].large_objects)
+ == generations[g].steps[s].n_large_blocks);
+ checkHeap(generations[g].steps[s].blocks);
+ checkChain(generations[g].steps[s].large_objects);
+ if (g > 0) {
+ checkMutableList(generations[g].mut_list, g);
+ }
+ }
+ }
+
+ for (s = 0; s < n_nurseries; s++) {
+ ASSERT(countBlocks(nurseries[s].blocks)
+ == nurseries[s].n_blocks);
+ ASSERT(countBlocks(nurseries[s].large_objects)
+ == nurseries[s].n_large_blocks);
+ }
+
+ checkFreeListSanity();
+ }
+}
+
+/* Nursery sanity check */
+void
+checkNurserySanity( step *stp )
+{
+ bdescr *bd, *prev;
+ nat blocks = 0;
+
+ prev = NULL;
+ for (bd = stp->blocks; bd != NULL; bd = bd->link) {
+ ASSERT(bd->u.back == prev);
+ prev = bd;
+ blocks += bd->blocks;
+ }
+ ASSERT(blocks == stp->n_blocks);
+}
+
+// handy function for use in gdb, because Bdescr() is inlined.
+extern bdescr *_bdescr( StgPtr p );
+
+bdescr *
+_bdescr( StgPtr p )
+{
+ return Bdescr(p);
+}
+
+#endif
diff --git a/rts/Task.c b/rts/Task.c
new file mode 100644
index 0000000000..7366480094
--- /dev/null
+++ b/rts/Task.c
@@ -0,0 +1,315 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 2001-2005
+ *
+ * The task manager subsystem. Tasks execute STG code, with this
+ * module providing the API which the Scheduler uses to control their
+ * creation and destruction.
+ *
+ * -------------------------------------------------------------------------*/
+
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "OSThreads.h"
+#include "Task.h"
+#include "Capability.h"
+#include "Stats.h"
+#include "RtsFlags.h"
+#include "Schedule.h"
+#include "Hash.h"
+
+#if HAVE_SIGNAL_H
+#include <signal.h>
+#endif
+
+// Task lists and global counters.
+// Locks required: sched_mutex.
+Task *all_tasks = NULL;
+static Task *task_free_list = NULL; // singly-linked
+static nat taskCount;
+#define DEFAULT_MAX_WORKERS 64
+static nat maxWorkers; // we won't create more workers than this
+static nat tasksRunning;
+static nat workerCount;
+
+/* -----------------------------------------------------------------------------
+ * Remembering the current thread's Task
+ * -------------------------------------------------------------------------- */
+
+// A thread-local-storage key that we can use to get access to the
+// current thread's Task structure.
+#if defined(THREADED_RTS)
+ThreadLocalKey currentTaskKey;
+#else
+Task *my_task;
+#endif
+
+/* -----------------------------------------------------------------------------
+ * Rest of the Task API
+ * -------------------------------------------------------------------------- */
+
+void
+initTaskManager (void)
+{
+ static int initialized = 0;
+
+ if (!initialized) {
+ taskCount = 0;
+ workerCount = 0;
+ tasksRunning = 0;
+ maxWorkers = DEFAULT_MAX_WORKERS;
+ initialized = 1;
+#if defined(THREADED_RTS)
+ newThreadLocalKey(&currentTaskKey);
+#endif
+ }
+}
+
+
+void
+stopTaskManager (void)
+{
+ IF_DEBUG(scheduler, sched_belch("stopping task manager, %d tasks still running", tasksRunning));
+}
+
+
+static Task*
+newTask (void)
+{
+#if defined(THREADED_RTS)
+ Ticks currentElapsedTime, currentUserTime;
+#endif
+ Task *task;
+
+ task = stgMallocBytes(sizeof(Task), "newTask");
+
+ task->cap = NULL;
+ task->stopped = rtsFalse;
+ task->suspended_tso = NULL;
+ task->tso = NULL;
+ task->stat = NoStatus;
+ task->ret = NULL;
+
+#if defined(THREADED_RTS)
+ initCondition(&task->cond);
+ initMutex(&task->lock);
+ task->wakeup = rtsFalse;
+#endif
+
+#if defined(THREADED_RTS)
+ currentUserTime = getThreadCPUTime();
+ currentElapsedTime = getProcessElapsedTime();
+ task->mut_time = 0.0;
+ task->mut_etime = 0.0;
+ task->gc_time = 0.0;
+ task->gc_etime = 0.0;
+ task->muttimestart = currentUserTime;
+ task->elapsedtimestart = currentElapsedTime;
+#endif
+
+ task->prev = NULL;
+ task->next = NULL;
+ task->return_link = NULL;
+
+ task->all_link = all_tasks;
+ all_tasks = task;
+
+ taskCount++;
+ workerCount++;
+
+ return task;
+}
+
+Task *
+newBoundTask (void)
+{
+ Task *task;
+
+ ASSERT_LOCK_HELD(&sched_mutex);
+ if (task_free_list == NULL) {
+ task = newTask();
+ } else {
+ task = task_free_list;
+ task_free_list = task->next;
+ task->next = NULL;
+ task->prev = NULL;
+ task->stopped = rtsFalse;
+ }
+#if defined(THREADED_RTS)
+ task->id = osThreadId();
+#endif
+ ASSERT(task->cap == NULL);
+
+ tasksRunning++;
+
+ taskEnter(task);
+
+ IF_DEBUG(scheduler,sched_belch("new task (taskCount: %d)", taskCount););
+ return task;
+}
+
+void
+boundTaskExiting (Task *task)
+{
+ task->stopped = rtsTrue;
+ task->cap = NULL;
+
+#if defined(THREADED_RTS)
+ ASSERT(osThreadId() == task->id);
+#endif
+ ASSERT(myTask() == task);
+ setMyTask(task->prev_stack);
+
+ tasksRunning--;
+
+ // sadly, we need a lock around the free task list. Todo: eliminate.
+ ACQUIRE_LOCK(&sched_mutex);
+ task->next = task_free_list;
+ task_free_list = task;
+ RELEASE_LOCK(&sched_mutex);
+
+ IF_DEBUG(scheduler,sched_belch("task exiting"));
+}
+
+#ifdef THREADED_RTS
+#define TASK_ID(t) (t)->id
+#else
+#define TASK_ID(t) (t)
+#endif
+
+void
+discardTask (Task *task)
+{
+ ASSERT_LOCK_HELD(&sched_mutex);
+ if (!task->stopped) {
+ IF_DEBUG(scheduler,sched_belch("discarding task %p", TASK_ID(task)));
+ task->cap = NULL;
+ task->tso = NULL;
+ task->stopped = rtsTrue;
+ tasksRunning--;
+ task->next = task_free_list;
+ task_free_list = task;
+ }
+}
+
+void
+taskStop (Task *task)
+{
+#if defined(THREADED_RTS)
+ OSThreadId id;
+ Ticks currentElapsedTime, currentUserTime, elapsedGCTime;
+
+ id = osThreadId();
+ ASSERT(task->id == id);
+ ASSERT(myTask() == task);
+
+ currentUserTime = getThreadCPUTime();
+ currentElapsedTime = getProcessElapsedTime();
+
+ // XXX this is wrong; we want elapsed GC time since the
+ // Task started.
+ elapsedGCTime = stat_getElapsedGCTime();
+
+ task->mut_time =
+ currentUserTime - task->muttimestart - task->gc_time;
+ task->mut_etime =
+ currentElapsedTime - task->elapsedtimestart - elapsedGCTime;
+
+ if (task->mut_time < 0.0) { task->mut_time = 0.0; }
+ if (task->mut_etime < 0.0) { task->mut_etime = 0.0; }
+#endif
+
+ task->stopped = rtsTrue;
+ tasksRunning--;
+}
+
+void
+resetTaskManagerAfterFork (void)
+{
+#warning TODO!
+ taskCount = 0;
+}
+
+#if defined(THREADED_RTS)
+
+void
+startWorkerTask (Capability *cap,
+ void OSThreadProcAttr (*taskStart)(Task *task))
+{
+ int r;
+ OSThreadId tid;
+ Task *task;
+
+ if (workerCount >= maxWorkers) {
+ barf("too many workers; runaway worker creation?");
+ }
+ workerCount++;
+
+ // A worker always gets a fresh Task structure.
+ task = newTask();
+
+ tasksRunning++;
+
+ // The lock here is to synchronise with taskStart(), to make sure
+ // that we have finished setting up the Task structure before the
+ // worker thread reads it.
+ ACQUIRE_LOCK(&task->lock);
+
+ task->cap = cap;
+
+ // Give the capability directly to the worker; we can't let anyone
+ // else get in, because the new worker Task has nowhere to go to
+ // sleep so that it could be woken up again.
+ ASSERT_LOCK_HELD(&cap->lock);
+ cap->running_task = task;
+
+ r = createOSThread(&tid, (OSThreadProc *)taskStart, task);
+ if (r != 0) {
+ barf("startTask: Can't create new task");
+ }
+
+ IF_DEBUG(scheduler,sched_belch("new worker task (taskCount: %d)", taskCount););
+
+ task->id = tid;
+
+ // ok, finished with the Task struct.
+ RELEASE_LOCK(&task->lock);
+}
+
+#endif /* THREADED_RTS */
+
+#ifdef DEBUG
+
+static void *taskId(Task *task)
+{
+#ifdef THREADED_RTS
+ return (void *)task->id;
+#else
+ return (void *)task;
+#endif
+}
+
+void printAllTasks(void);
+
+void
+printAllTasks(void)
+{
+ Task *task;
+ for (task = all_tasks; task != NULL; task = task->all_link) {
+ debugBelch("task %p is %s, ", taskId(task), task->stopped ? "stopped" : "alive");
+ if (!task->stopped) {
+ if (task->cap) {
+ debugBelch("on capability %d, ", task->cap->no);
+ }
+ if (task->tso) {
+ debugBelch("bound to thread %d", task->tso->id);
+ } else {
+ debugBelch("worker");
+ }
+ }
+ debugBelch("\n");
+ }
+}
+
+#endif
+
diff --git a/rts/Task.h b/rts/Task.h
new file mode 100644
index 0000000000..ca71d2809a
--- /dev/null
+++ b/rts/Task.h
@@ -0,0 +1,271 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 2001-2005
+ *
+ * Tasks
+ *
+ * -------------------------------------------------------------------------*/
+
+#ifndef TASK_H
+#define TASK_H
+
+#include "GetTime.h"
+
+/*
+ Definition of a Task
+ --------------------
+
+ A task is an OSThread that runs Haskell code. Every OSThread
+ created by the RTS for the purposes of running Haskell code is a
+ Task, and OS threads that enter the Haskell RTS for the purposes of
+ making a call-in are also Tasks.
+
+ The relationship between the number of tasks and capabilities, and
+ the runtime build (-threaded, -smp etc.) is summarised by the
+ following table:
+
+ build Tasks Capabilities
+ ---------------------------------
+ normal 1 1
+ -threaded N N
+
+ The non-threaded build has a single Task and a single global
+ Capability.
+
+ The THREADED_RTS build allows multiple tasks and mulitple Capabilities.
+ Multiple Tasks may all be running Haskell code simultaneously. A task
+ relinquishes its Capability when it is asked to evaluate an external
+ (C) call.
+
+ In general, there may be multiple Tasks for an OS thread. This
+ happens if one Task makes a foreign call from Haskell, and
+ subsequently calls back in to create a new bound thread.
+
+ A particular Task structure can belong to more than one OS thread
+ over its lifetime. This is to avoid creating an unbounded number
+ of Task structures. The stats just accumulate.
+
+ Ownership of Task
+ -----------------
+
+ The OS thread named in the Task structure has exclusive access to
+ the structure, as long as it is the running_task of its Capability.
+ That is, if (task->cap->running_task == task), then task->id owns
+ the Task. Otherwise the Task is owned by the owner of the parent
+ data structure on which it is sleeping; for example, if the task is
+ sleeping on spare_workers field of a Capability, then the owner of the
+ Capability has access to the Task.
+
+ When a task is migrated from sleeping on one Capability to another,
+ its task->cap field must be modified. When the task wakes up, it
+ will read the new value of task->cap to find out which Capability
+ it belongs to. Hence some synchronisation is required on
+ task->cap, and this is why we have task->lock.
+
+ If the Task is not currently owned by task->id, then the thread is
+ either
+
+ (a) waiting on the condition task->cond. The Task is either
+ (1) a bound Task, the TSO will be on a queue somewhere
+ (2) a worker task, on the spare_workers queue of task->cap.
+
+ (b) making a foreign call. The Task will be on the
+ suspended_ccalling_tasks list.
+
+ We re-establish ownership in each case by respectively
+
+ (a) the task is currently blocked in yieldCapability().
+ This call will return when we have ownership of the Task and
+ a Capability. The Capability we get might not be the same
+ as the one we had when we called yieldCapability().
+
+ (b) we must call resumeThread(task), which will safely establish
+ ownership of the Task and a Capability.
+*/
+
+typedef struct Task_ {
+#if defined(THREADED_RTS)
+ OSThreadId id; // The OS Thread ID of this task
+#endif
+
+ // This points to the Capability that the Task "belongs" to. If
+ // the Task owns a Capability, then task->cap points to it. If
+ // the task does not own a Capability, then either (a) if the task
+ // is a worker, then task->cap points to the Capability it belongs
+ // to, or (b) it is returning from a foreign call, then task->cap
+ // points to the Capability with the returning_worker queue that this
+ // this Task is on.
+ //
+ // When a task goes to sleep, it may be migrated to a different
+ // Capability. Hence, we always check task->cap on wakeup. To
+ // syncrhonise between the migrater and the migratee, task->lock
+ // must be held when modifying task->cap.
+ struct Capability_ *cap;
+
+ rtsBool stopped; // this task has stopped or exited Haskell
+ StgTSO * suspended_tso; // the TSO is stashed here when we
+ // make a foreign call (NULL otherwise);
+
+ // The following 3 fields are used by bound threads:
+ StgTSO * tso; // the bound TSO (or NULL)
+ SchedulerStatus stat; // return status
+ StgClosure ** ret; // return value
+
+#if defined(THREADED_RTS)
+ Condition cond; // used for sleeping & waking up this task
+ Mutex lock; // lock for the condition variable
+
+ // this flag tells the task whether it should wait on task->cond
+ // or just continue immediately. It's a workaround for the fact
+ // that signalling a condition variable doesn't do anything if the
+ // thread is already running, but we want it to be sticky.
+ rtsBool wakeup;
+#endif
+
+ // Stats that we collect about this task
+ // ToDo: we probably want to put this in a separate TaskStats
+ // structure, so we can share it between multiple Tasks. We don't
+ // really want separate stats for each call in a nested chain of
+ // foreign->haskell->foreign->haskell calls, but we'll get a
+ // separate Task for each of the haskell calls.
+ Ticks elapsedtimestart;
+ Ticks muttimestart;
+ Ticks mut_time;
+ Ticks mut_etime;
+ Ticks gc_time;
+ Ticks gc_etime;
+
+ // Links tasks onto various lists. (ToDo: do we need double
+ // linking now?)
+ struct Task_ *prev;
+ struct Task_ *next;
+
+ // Links tasks on the returning_tasks queue of a Capability.
+ struct Task_ *return_link;
+
+ // Links tasks on the all_tasks list
+ struct Task_ *all_link;
+
+ // When a Haskell thread makes a foreign call that re-enters
+ // Haskell, we end up with another Task associated with the
+ // current thread. We have to remember the whole stack of Tasks
+ // associated with the current thread so that we can correctly
+ // save & restore the thread-local current task pointer.
+ struct Task_ *prev_stack;
+} Task;
+
+INLINE_HEADER rtsBool
+isBoundTask (Task *task)
+{
+ return (task->tso != NULL);
+}
+
+
+// Linked list of all tasks.
+//
+extern Task *all_tasks;
+
+// Start and stop the task manager.
+// Requires: sched_mutex.
+//
+void initTaskManager (void);
+void stopTaskManager (void);
+
+// Create a new Task for a bound thread
+// Requires: sched_mutex.
+//
+Task *newBoundTask (void);
+
+// The current task is a bound task that is exiting.
+// Requires: sched_mutex.
+//
+void boundTaskExiting (Task *task);
+
+// This must be called when a new Task is associated with the current
+// thread. It sets up the thread-local current task pointer so that
+// myTask() can work.
+INLINE_HEADER void taskEnter (Task *task);
+
+// Notify the task manager that a task has stopped. This is used
+// mainly for stats-gathering purposes.
+// Requires: sched_mutex.
+//
+void taskStop (Task *task);
+
+// Put the task back on the free list, mark it stopped. Used by
+// forkProcess().
+//
+void discardTask (Task *task);
+
+// Get the Task associated with the current OS thread (or NULL if none).
+//
+INLINE_HEADER Task *myTask (void);
+
+// After a fork, the tasks are not carried into the child process, so
+// we must tell the task manager.
+// Requires: sched_mutex.
+//
+void resetTaskManagerAfterFork (void);
+
+#if defined(THREADED_RTS)
+
+// Workers are attached to the supplied Capability. This Capability
+// should not currently have a running_task, because the new task
+// will become the running_task for that Capability.
+// Requires: sched_mutex.
+//
+void startWorkerTask (struct Capability_ *cap,
+ void OSThreadProcAttr (*taskStart)(Task *task));
+
+#endif /* THREADED_RTS */
+
+// -----------------------------------------------------------------------------
+// INLINE functions... private from here on down:
+
+// A thread-local-storage key that we can use to get access to the
+// current thread's Task structure.
+#if defined(THREADED_RTS)
+extern ThreadLocalKey currentTaskKey;
+#else
+extern Task *my_task;
+#endif
+
+//
+// myTask() uses thread-local storage to find the Task associated with
+// the current OS thread. If the current OS thread has multiple
+// Tasks, because it has re-entered the RTS, then the task->prev_stack
+// field is used to store the previous Task.
+//
+INLINE_HEADER Task *
+myTask (void)
+{
+#if defined(THREADED_RTS)
+ return getThreadLocalVar(&currentTaskKey);
+#else
+ return my_task;
+#endif
+}
+
+INLINE_HEADER void
+setMyTask (Task *task)
+{
+#if defined(THREADED_RTS)
+ setThreadLocalVar(&currentTaskKey,task);
+#else
+ my_task = task;
+#endif
+}
+
+// This must be called when a new Task is associated with the current
+// thread. It sets up the thread-local current task pointer so that
+// myTask() can work.
+INLINE_HEADER void
+taskEnter (Task *task)
+{
+ // save the current value, just in case this Task has been created
+ // as a result of re-entering the RTS (defaults to NULL):
+ task->prev_stack = myTask();
+ setMyTask(task);
+}
+
+#endif /* TASK_H */
diff --git a/rts/ThreadLabels.c b/rts/ThreadLabels.c
new file mode 100644
index 0000000000..9b9f1723ff
--- /dev/null
+++ b/rts/ThreadLabels.c
@@ -0,0 +1,50 @@
+/* -----------------------------------------------------------------------------
+ * ThreadLabels.c
+ *
+ * (c) The GHC Team 2002-2003
+ *
+ * Table of thread labels.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#include "ThreadLabels.h"
+#include "RtsUtils.h"
+
+#include <stdlib.h>
+
+#if defined(DEBUG)
+/* to the end */
+static HashTable * threadLabels = NULL;
+
+void
+initThreadLabelTable(void)
+{
+ if (threadLabels == NULL) {
+ threadLabels = allocHashTable();
+ }
+}
+
+void
+updateThreadLabel(StgWord key, void *data)
+{
+ removeThreadLabel(key);
+ insertHashTable(threadLabels,key,data);
+}
+
+void *
+lookupThreadLabel(StgWord key)
+{
+ return lookupHashTable(threadLabels,key);
+}
+
+void
+removeThreadLabel(StgWord key)
+{
+ void * old = NULL;
+ if ((old = lookupHashTable(threadLabels,key))) {
+ removeHashTable(threadLabels,key,old);
+ stgFree(old);
+ }
+}
+#endif /* DEBUG */
diff --git a/rts/ThreadLabels.h b/rts/ThreadLabels.h
new file mode 100644
index 0000000000..97d3d0d241
--- /dev/null
+++ b/rts/ThreadLabels.h
@@ -0,0 +1,27 @@
+/* -----------------------------------------------------------------------------
+ * ThreadLabels.h
+ *
+ * (c) The GHC Team 2002-2003
+ *
+ * Table of thread labels.
+ *
+ * ---------------------------------------------------------------------------*/
+#ifndef __THREADLABELS_H__
+#define __THREADLABELS_H__
+
+#include "Rts.h"
+#include "Hash.h"
+
+void
+initThreadLabelTable(void);
+
+void
+updateThreadLabel(StgWord key, void *data);
+
+void *
+lookupThreadLabel(StgWord key);
+
+void
+removeThreadLabel(StgWord key);
+
+#endif /* __THREADLABELS_H__ */
diff --git a/rts/Ticker.h b/rts/Ticker.h
new file mode 100644
index 0000000000..f9555768b5
--- /dev/null
+++ b/rts/Ticker.h
@@ -0,0 +1,15 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 2005
+ *
+ * Ticker interface (implementation is OS-specific)
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef TICKER_H
+#define TICKER_H
+
+extern int startTicker( nat ms, TickProc handle_tick );
+extern int stopTicker ( void );
+
+#endif /* TICKER_H */
diff --git a/rts/Ticky.c b/rts/Ticky.c
new file mode 100644
index 0000000000..294e12bdda
--- /dev/null
+++ b/rts/Ticky.c
@@ -0,0 +1,628 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The AQUA project, Glasgow University, 1992-1997
+ * (c) The GHC Team, 1998-1999
+ *
+ * Ticky-ticky profiling
+ *-------------------------------------------------------------------------- */
+
+#if defined(TICKY_TICKY)
+
+#define TICKY_C /* define those variables */
+#include "PosixSource.h"
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "Ticky.h"
+
+/* -----------------------------------------------------------------------------
+ Print out all the counters
+ -------------------------------------------------------------------------- */
+
+static void printRegisteredCounterInfo (FILE *); /* fwd decl */
+
+#define INTAVG(a,b) ((b == 0) ? 0.0 : ((double) (a) / (double) (b)))
+#define PC(a) (100.0 * a)
+
+#define AVG(thing) \
+ StgDouble avg##thing = INTAVG(tot##thing,ctr##thing)
+
+void
+PrintTickyInfo(void)
+{
+ unsigned long i;
+ unsigned long tot_allocs = /* total number of things allocated */
+ ALLOC_FUN_ctr + ALLOC_SE_THK_ctr + ALLOC_UP_THK_ctr + ALLOC_CON_ctr + ALLOC_TUP_ctr +
+ + ALLOC_TSO_ctr + ALLOC_BH_ctr + ALLOC_PAP_ctr + ALLOC_PRIM_ctr
+#ifdef PAR
+ + ALLOC_FMBQ_ctr + ALLOC_FME_ctr + ALLOC_BF_ctr
+#endif
+ ;
+
+ unsigned long tot_adm_wds = /* total number of admin words allocated */
+ ALLOC_FUN_adm + ALLOC_THK_adm + ALLOC_CON_adm + ALLOC_TUP_adm
+ + ALLOC_TSO_adm + ALLOC_BH_adm + ALLOC_PAP_adm + ALLOC_PRIM_adm
+#ifdef PAR
+ + ALLOC_FMBQ_adm + ALLOC_FME_adm + ALLOC_BF_adm
+#endif
+ ;
+
+ unsigned long tot_gds_wds = /* total number of words of ``good stuff'' allocated */
+ ALLOC_FUN_gds + ALLOC_THK_gds + ALLOC_CON_gds + ALLOC_TUP_gds
+ + ALLOC_TSO_gds + ALLOC_BH_gds + ALLOC_PAP_gds + ALLOC_PRIM_gds
+#ifdef PAR
+ + ALLOC_FMBQ_gds + ALLOC_FME_gds + ALLOC_BF_gds
+#endif
+ ;
+
+ unsigned long tot_slp_wds = /* total number of ``slop'' words allocated */
+ ALLOC_FUN_slp + ALLOC_THK_slp + ALLOC_CON_slp + ALLOC_TUP_slp
+ + ALLOC_TSO_slp + ALLOC_BH_slp + ALLOC_PAP_slp + ALLOC_PRIM_slp
+#ifdef PAR
+ + ALLOC_FMBQ_slp + ALLOC_FME_slp + ALLOC_BF_slp
+#endif
+ ;
+
+ unsigned long tot_wds = /* total words */
+ tot_adm_wds + tot_gds_wds + tot_slp_wds;
+
+ unsigned long tot_thk_enters = ENT_STATIC_THK_ctr + ENT_DYN_THK_ctr;
+ unsigned long tot_con_enters = ENT_STATIC_CON_ctr + ENT_DYN_CON_ctr;
+ unsigned long tot_fun_direct_enters = ENT_STATIC_FUN_DIRECT_ctr + ENT_DYN_FUN_DIRECT_ctr;
+ unsigned long tot_ind_enters = ENT_STATIC_IND_ctr + ENT_DYN_IND_ctr;
+
+ // This is the number of times we entered a function via some kind
+ // of slow call. It amounts to all the slow applications, not
+ // counting those that were to too few arguments.
+ unsigned long tot_fun_slow_enters =
+ SLOW_CALL_ctr -
+ SLOW_CALL_FUN_TOO_FEW_ctr -
+ SLOW_CALL_PAP_TOO_FEW_ctr;
+
+ unsigned long tot_known_calls =
+ KNOWN_CALL_ctr + KNOWN_CALL_TOO_FEW_ARGS_ctr +
+ + KNOWN_CALL_EXTRA_ARGS_ctr;
+ unsigned long tot_tail_calls =
+ UNKNOWN_CALL_ctr + tot_known_calls;
+
+ unsigned long tot_enters =
+ tot_con_enters + tot_fun_direct_enters +
+ tot_ind_enters + ENT_PERM_IND_ctr + ENT_PAP_ctr + tot_thk_enters;
+ unsigned long jump_direct_enters =
+ tot_enters - ENT_VIA_NODE_ctr;
+
+ unsigned long tot_returns =
+ RET_NEW_ctr + RET_OLD_ctr + RET_UNBOXED_TUP_ctr;
+
+ unsigned long tot_returns_of_new = RET_NEW_ctr;
+
+ unsigned long con_updates = UPD_CON_IN_NEW_ctr + UPD_CON_IN_PLACE_ctr;
+ unsigned long pap_updates = UPD_PAP_IN_NEW_ctr + UPD_PAP_IN_PLACE_ctr;
+
+ unsigned long tot_updates = UPD_SQUEEZED_ctr + pap_updates + con_updates;
+
+ unsigned long tot_new_updates = UPD_NEW_IND_ctr + UPD_NEW_PERM_IND_ctr;
+ unsigned long tot_old_updates = UPD_OLD_IND_ctr + UPD_OLD_PERM_IND_ctr;
+ unsigned long tot_gengc_updates = tot_new_updates + tot_old_updates;
+
+ FILE *tf = RtsFlags.TickyFlags.tickyFile;
+
+ fprintf(tf,"\n\nALLOCATIONS: %ld (%ld words total: %ld admin, %ld goods, %ld slop)\n",
+ tot_allocs, tot_wds, tot_adm_wds, tot_gds_wds, tot_slp_wds);
+ fprintf(tf,"\t\t\t\ttotal words:\t 2 3 4 5 6+\n");
+
+#define ALLOC_HISTO_MAGIC(categ) \
+ (PC(INTAVG(ALLOC_##categ##_hst[0], ALLOC_##categ##_ctr))), \
+ (PC(INTAVG(ALLOC_##categ##_hst[1], ALLOC_##categ##_ctr))), \
+ (PC(INTAVG(ALLOC_##categ##_hst[2], ALLOC_##categ##_ctr))), \
+ (PC(INTAVG(ALLOC_##categ##_hst[3], ALLOC_##categ##_ctr))), \
+ (PC(INTAVG(ALLOC_##categ##_hst[4], ALLOC_##categ##_ctr)))
+
+ fprintf(tf,"%7ld (%5.1f%%) function values",
+ ALLOC_FUN_ctr,
+ PC(INTAVG(ALLOC_FUN_ctr, tot_allocs)));
+ if (ALLOC_FUN_ctr != 0)
+ fprintf(tf,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(FUN));
+
+ fprintf(tf,"\n%7ld (%5.1f%%) thunks",
+ ALLOC_SE_THK_ctr + ALLOC_UP_THK_ctr,
+ PC(INTAVG(ALLOC_SE_THK_ctr + ALLOC_UP_THK_ctr, tot_allocs)));
+
+#define ALLOC_THK_ctr (ALLOC_UP_THK_ctr + ALLOC_SE_THK_ctr)
+ /* hack to make ALLOC_HISTO_MAGIC still work for THK */
+ if ((ALLOC_SE_THK_ctr + ALLOC_UP_THK_ctr) != 0)
+ fprintf(tf,"\t\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(THK));
+#undef ALLOC_THK_ctr
+
+ fprintf(tf,"\n%7ld (%5.1f%%) data values",
+ ALLOC_CON_ctr,
+ PC(INTAVG(ALLOC_CON_ctr, tot_allocs)));
+ if (ALLOC_CON_ctr != 0)
+ fprintf(tf,"\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(CON));
+
+ fprintf(tf,"\n%7ld (%5.1f%%) big tuples",
+ ALLOC_TUP_ctr,
+ PC(INTAVG(ALLOC_TUP_ctr, tot_allocs)));
+ if (ALLOC_TUP_ctr != 0)
+ fprintf(tf,"\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(TUP));
+
+ fprintf(tf,"\n%7ld (%5.1f%%) black holes",
+ ALLOC_BH_ctr,
+ PC(INTAVG(ALLOC_BH_ctr, tot_allocs)));
+ if (ALLOC_BH_ctr != 0)
+ fprintf(tf,"\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(BH));
+
+ fprintf(tf,"\n%7ld (%5.1f%%) prim things",
+ ALLOC_PRIM_ctr,
+ PC(INTAVG(ALLOC_PRIM_ctr, tot_allocs)));
+ if (ALLOC_PRIM_ctr != 0)
+ fprintf(tf,"\t\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(PRIM));
+
+ fprintf(tf,"\n%7ld (%5.1f%%) partial applications",
+ ALLOC_PAP_ctr,
+ PC(INTAVG(ALLOC_PAP_ctr, tot_allocs)));
+ if (ALLOC_PAP_ctr != 0)
+ fprintf(tf,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(PAP));
+
+ fprintf(tf,"\n%7ld (%5.1f%%) thread state objects",
+ ALLOC_TSO_ctr,
+ PC(INTAVG(ALLOC_TSO_ctr, tot_allocs)));
+ if (ALLOC_TSO_ctr != 0)
+ fprintf(tf,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(TSO));
+#ifdef PAR
+ fprintf(tf,"\n%7ld (%5.1f%%) thread state objects",
+ ALLOC_FMBQ_ctr,
+ PC(INTAVG(ALLOC_FMBQ_ctr, tot_allocs)));
+ if (ALLOC_FMBQ_ctr != 0)
+ fprintf(tf,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(FMBQ));
+ fprintf(tf,"\n%7ld (%5.1f%%) thread state objects",
+ ALLOC_FME_ctr,
+ PC(INTAVG(ALLOC_FME_ctr, tot_allocs)));
+ if (ALLOC_FME_ctr != 0)
+ fprintf(tf,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(FME));
+ fprintf(tf,"\n%7ld (%5.1f%%) thread state objects",
+ ALLOC_BF_ctr,
+ PC(INTAVG(ALLOC_BF_ctr, tot_allocs)));
+ if (ALLOC_BF_ctr != 0)
+ fprintf(tf,"\t\t%5.1f %5.1f %5.1f %5.1f %5.1f", ALLOC_HISTO_MAGIC(BF));
+#endif
+ fprintf(tf,"\n");
+
+ fprintf(tf,"\nTotal storage-manager allocations: %ld (%ld words)\n\t[%ld words lost to speculative heap-checks]\n", ALLOC_HEAP_ctr, ALLOC_HEAP_tot, ALLOC_HEAP_tot - tot_wds);
+
+ fprintf(tf,"\nSTACK USAGE:\n"); /* NB: some bits are direction sensitive */
+
+ fprintf(tf,"\nENTERS: %ld of which %ld (%.1f%%) direct to the entry code\n\t\t [the rest indirected via Node's info ptr]\n",
+ tot_enters,
+ jump_direct_enters,
+ PC(INTAVG(jump_direct_enters,tot_enters)));
+ fprintf(tf,"%7ld (%5.1f%%) thunks\n",
+ tot_thk_enters,
+ PC(INTAVG(tot_thk_enters,tot_enters)));
+ fprintf(tf,"%7ld (%5.1f%%) data values\n",
+ tot_con_enters,
+ PC(INTAVG(tot_con_enters,tot_enters)));
+ fprintf(tf,"%7ld (%5.1f%%) normal indirections\n",
+ tot_ind_enters,
+ PC(INTAVG(tot_ind_enters,tot_enters)));
+ fprintf(tf,"%7ld (%5.1f%%) permanent indirections\n",
+ ENT_PERM_IND_ctr,
+ PC(INTAVG(ENT_PERM_IND_ctr,tot_enters)));
+
+ fprintf(tf,"\nFUNCTION ENTRIES: %ld\n", tot_fun_direct_enters);
+
+ fprintf(tf, "\nTAIL CALLS: %ld, of which %ld (%.lf%%) were to known functions\n",
+ tot_tail_calls, tot_known_calls,
+ PC(INTAVG(tot_known_calls,tot_tail_calls)));
+
+ fprintf(tf, "\nSLOW APPLICATIONS: %ld evaluated, %ld unevaluated\n",
+ SLOW_CALL_ctr, SLOW_CALL_UNEVALD_ctr);
+ fprintf(tf, "\n");
+ fprintf(tf, " Too few args Correct args Too many args\n");
+ fprintf(tf, " FUN %8ld %8ld %8ld\n",
+ SLOW_CALL_FUN_TOO_FEW_ctr, SLOW_CALL_FUN_CORRECT_ctr, SLOW_CALL_FUN_TOO_MANY_ctr);
+ fprintf(tf, " PAP %8ld %8ld %8ld\n",
+ SLOW_CALL_PAP_TOO_FEW_ctr, SLOW_CALL_PAP_CORRECT_ctr, SLOW_CALL_PAP_TOO_MANY_ctr);
+ fprintf(tf, "\n");
+
+ fprintf(tf,"\nRETURNS: %ld\n", tot_returns);
+ fprintf(tf,"%7ld (%5.1f%%) from entering a new constructor\n\t\t [the rest from entering an existing constructor]\n",
+ tot_returns_of_new,
+ PC(INTAVG(tot_returns_of_new,tot_returns)));
+ fprintf(tf,"%7ld (%5.1f%%) vectored [the rest unvectored]\n",
+ VEC_RETURN_ctr,
+ PC(INTAVG(VEC_RETURN_ctr,tot_returns)));
+
+ fprintf(tf, "\nRET_NEW: %7ld: ", RET_NEW_ctr);
+ for (i = 0; i < 9; i++) { fprintf(tf, "%5.1f%%",
+ PC(INTAVG(RET_NEW_hst[i],RET_NEW_ctr))); }
+ fprintf(tf, "\n");
+ fprintf(tf, "RET_OLD: %7ld: ", RET_OLD_ctr);
+ for (i = 0; i < 9; i++) { fprintf(tf, "%5.1f%%",
+ PC(INTAVG(RET_OLD_hst[i],RET_OLD_ctr))); }
+ fprintf(tf, "\n");
+ fprintf(tf, "RET_UNBOXED_TUP: %7ld: ", RET_UNBOXED_TUP_ctr);
+ for (i = 0; i < 9; i++) { fprintf(tf, "%5.1f%%",
+ PC(INTAVG(RET_UNBOXED_TUP_hst[i],
+ RET_UNBOXED_TUP_ctr))); }
+ fprintf(tf, "\n");
+ fprintf(tf, "\nRET_VEC_RETURN : %7ld: ", VEC_RETURN_ctr);
+ for (i = 0; i < 9; i++) { fprintf(tf, "%5.1f%%",
+ PC(INTAVG(RET_VEC_RETURN_hst[i],VEC_RETURN_ctr))); }
+ fprintf(tf, "\n");
+
+ fprintf(tf,"\nUPDATE FRAMES: %ld (%ld omitted from thunks)",
+ UPDF_PUSHED_ctr,
+ UPDF_OMITTED_ctr);
+
+ fprintf(tf,"\nCATCH FRAMES: %ld", CATCHF_PUSHED_ctr);
+
+ if (UPDF_RCC_PUSHED_ctr != 0)
+ fprintf(tf,"%7ld restore cost centre frames (%ld omitted)\n",
+ UPDF_RCC_PUSHED_ctr,
+ UPDF_RCC_OMITTED_ctr);
+
+ fprintf(tf,"\nUPDATES: %ld\n", tot_updates);
+ fprintf(tf,"%7ld (%5.1f%%) data values\n\t\t [%ld in place, %ld allocated new space]\n",
+ con_updates,
+ PC(INTAVG(con_updates,tot_updates)),
+ UPD_CON_IN_PLACE_ctr, UPD_CON_IN_NEW_ctr);
+ fprintf(tf,"%7ld (%5.1f%%) partial applications\n\t\t [%ld in place, %ld allocated new space]\n",
+ pap_updates,
+ PC(INTAVG(pap_updates,tot_updates)),
+ UPD_PAP_IN_PLACE_ctr, UPD_PAP_IN_NEW_ctr);
+ fprintf(tf,"%7ld (%5.1f%%) updates by squeezing\n",
+ UPD_SQUEEZED_ctr,
+ PC(INTAVG(UPD_SQUEEZED_ctr, tot_updates)));
+
+ fprintf(tf, "\nUPD_CON_IN_NEW: %7ld: ", UPD_CON_IN_NEW_ctr);
+ for (i = 0; i < 9; i++) { fprintf(tf, "%7ld", UPD_CON_IN_NEW_hst[i]); }
+ fprintf(tf, "\n");
+ fprintf(tf, "UPD_CON_IN_PLACE: %7ld: ", UPD_CON_IN_PLACE_ctr);
+ for (i = 0; i < 9; i++) { fprintf(tf, "%7ld", UPD_CON_IN_PLACE_hst[i]); }
+ fprintf(tf, "\n");
+ fprintf(tf, "UPD_PAP_IN_NEW: %7ld: ", UPD_PAP_IN_NEW_ctr);
+ for (i = 0; i < 9; i++) { fprintf(tf, "%7ld", UPD_PAP_IN_NEW_hst[i]); }
+ fprintf(tf, "\n");
+
+ if (tot_gengc_updates != 0) {
+ fprintf(tf,"\nNEW GEN UPDATES: %9ld (%5.1f%%)\n",
+ tot_new_updates,
+ PC(INTAVG(tot_new_updates,tot_gengc_updates)));
+ fprintf(tf,"OLD GEN UPDATES: %9ld (%5.1f%%)\n",
+ tot_old_updates,
+ PC(INTAVG(tot_old_updates,tot_gengc_updates)));
+ }
+
+ fprintf(tf,"\nTotal bytes copied during GC: %ld\n",
+ GC_WORDS_COPIED_ctr * sizeof(W_));
+
+ printRegisteredCounterInfo(tf);
+
+ fprintf(tf,"\n**************************************************\n");
+
+ /* here, we print out all the raw numbers; these are really
+ more useful when we want to snag them for subsequent
+ rdb-etc processing. WDP 95/11
+ */
+
+#define PR_CTR(ctr) \
+ do { fprintf(tf,"%7ld " #ctr "\n", ctr); } while(0)
+/* COND_PR_CTR takes a boolean; if false then msg is the printname rather than ctr */
+#define COND_PR_CTR(ctr,b,msg) \
+ if (b) { fprintf(tf,"%7ld " #ctr "\n", ctr); } else { fprintf(tf,"%7ld " msg "\n", ctr); }
+#define PR_HST(hst,i) \
+ do { fprintf(tf,"%7ld " #hst "_" #i "\n", hst[i]); } while(0)
+
+ PR_CTR(ALLOC_HEAP_ctr);
+ PR_CTR(ALLOC_HEAP_tot);
+
+ PR_CTR(ALLOC_FUN_ctr);
+ PR_CTR(ALLOC_FUN_adm);
+ PR_CTR(ALLOC_FUN_gds);
+ PR_CTR(ALLOC_FUN_slp);
+ PR_HST(ALLOC_FUN_hst,0);
+ PR_HST(ALLOC_FUN_hst,1);
+ PR_HST(ALLOC_FUN_hst,2);
+ PR_HST(ALLOC_FUN_hst,3);
+ PR_HST(ALLOC_FUN_hst,4);
+ PR_CTR(ALLOC_UP_THK_ctr);
+ PR_CTR(ALLOC_SE_THK_ctr);
+ PR_CTR(ALLOC_THK_adm);
+ PR_CTR(ALLOC_THK_gds);
+ PR_CTR(ALLOC_THK_slp);
+ PR_HST(ALLOC_THK_hst,0);
+ PR_HST(ALLOC_THK_hst,1);
+ PR_HST(ALLOC_THK_hst,2);
+ PR_HST(ALLOC_THK_hst,3);
+ PR_HST(ALLOC_THK_hst,4);
+ PR_CTR(ALLOC_CON_ctr);
+ PR_CTR(ALLOC_CON_adm);
+ PR_CTR(ALLOC_CON_gds);
+ PR_CTR(ALLOC_CON_slp);
+ PR_HST(ALLOC_CON_hst,0);
+ PR_HST(ALLOC_CON_hst,1);
+ PR_HST(ALLOC_CON_hst,2);
+ PR_HST(ALLOC_CON_hst,3);
+ PR_HST(ALLOC_CON_hst,4);
+ PR_CTR(ALLOC_TUP_ctr);
+ PR_CTR(ALLOC_TUP_adm);
+ PR_CTR(ALLOC_TUP_gds);
+ PR_CTR(ALLOC_TUP_slp);
+ PR_HST(ALLOC_TUP_hst,0);
+ PR_HST(ALLOC_TUP_hst,1);
+ PR_HST(ALLOC_TUP_hst,2);
+ PR_HST(ALLOC_TUP_hst,3);
+ PR_HST(ALLOC_TUP_hst,4);
+ PR_CTR(ALLOC_BH_ctr);
+ PR_CTR(ALLOC_BH_adm);
+ PR_CTR(ALLOC_BH_gds);
+ PR_CTR(ALLOC_BH_slp);
+ PR_HST(ALLOC_BH_hst,0);
+ PR_HST(ALLOC_BH_hst,1);
+ PR_HST(ALLOC_BH_hst,2);
+ PR_HST(ALLOC_BH_hst,3);
+ PR_HST(ALLOC_BH_hst,4);
+ PR_CTR(ALLOC_PRIM_ctr);
+ PR_CTR(ALLOC_PRIM_adm);
+ PR_CTR(ALLOC_PRIM_gds);
+ PR_CTR(ALLOC_PRIM_slp);
+ PR_HST(ALLOC_PRIM_hst,0);
+ PR_HST(ALLOC_PRIM_hst,1);
+ PR_HST(ALLOC_PRIM_hst,2);
+ PR_HST(ALLOC_PRIM_hst,3);
+ PR_HST(ALLOC_PRIM_hst,4);
+ PR_CTR(ALLOC_PAP_ctr);
+ PR_CTR(ALLOC_PAP_adm);
+ PR_CTR(ALLOC_PAP_gds);
+ PR_CTR(ALLOC_PAP_slp);
+ PR_HST(ALLOC_PAP_hst,0);
+ PR_HST(ALLOC_PAP_hst,1);
+ PR_HST(ALLOC_PAP_hst,2);
+ PR_HST(ALLOC_PAP_hst,3);
+ PR_HST(ALLOC_PAP_hst,4);
+
+ PR_CTR(ALLOC_TSO_ctr);
+ PR_CTR(ALLOC_TSO_adm);
+ PR_CTR(ALLOC_TSO_gds);
+ PR_CTR(ALLOC_TSO_slp);
+ PR_HST(ALLOC_TSO_hst,0);
+ PR_HST(ALLOC_TSO_hst,1);
+ PR_HST(ALLOC_TSO_hst,2);
+ PR_HST(ALLOC_TSO_hst,3);
+ PR_HST(ALLOC_TSO_hst,4);
+
+#ifdef PAR
+ PR_CTR(ALLOC_FMBQ_ctr);
+ PR_CTR(ALLOC_FMBQ_adm);
+ PR_CTR(ALLOC_FMBQ_gds);
+ PR_CTR(ALLOC_FMBQ_slp);
+ PR_HST(ALLOC_FMBQ_hst,0);
+ PR_HST(ALLOC_FMBQ_hst,1);
+ PR_HST(ALLOC_FMBQ_hst,2);
+ PR_HST(ALLOC_FMBQ_hst,3);
+ PR_HST(ALLOC_FMBQ_hst,4);
+ PR_CTR(ALLOC_FME_ctr);
+ PR_CTR(ALLOC_FME_adm);
+ PR_CTR(ALLOC_FME_gds);
+ PR_CTR(ALLOC_FME_slp);
+ PR_HST(ALLOC_FME_hst,0);
+ PR_HST(ALLOC_FME_hst,1);
+ PR_HST(ALLOC_FME_hst,2);
+ PR_HST(ALLOC_FME_hst,3);
+ PR_HST(ALLOC_FME_hst,4);
+ PR_CTR(ALLOC_BF_ctr);
+ PR_CTR(ALLOC_BF_adm);
+ PR_CTR(ALLOC_BF_gds);
+ PR_CTR(ALLOC_BF_slp);
+ PR_HST(ALLOC_BF_hst,0);
+ PR_HST(ALLOC_BF_hst,1);
+ PR_HST(ALLOC_BF_hst,2);
+ PR_HST(ALLOC_BF_hst,3);
+ PR_HST(ALLOC_BF_hst,4);
+#endif
+
+ PR_CTR(ENT_VIA_NODE_ctr);
+ PR_CTR(ENT_STATIC_CON_ctr);
+ PR_CTR(ENT_DYN_CON_ctr);
+ PR_CTR(ENT_STATIC_FUN_DIRECT_ctr);
+ PR_CTR(ENT_DYN_FUN_DIRECT_ctr);
+ PR_CTR(ENT_STATIC_IND_ctr);
+ PR_CTR(ENT_DYN_IND_ctr);
+
+/* The counters ENT_PERM_IND and UPD_{NEW,OLD}_PERM_IND are not dumped
+ * at the end of execution unless update squeezing is turned off (+RTS
+ * -Z =RtsFlags.GcFlags.squeezeUpdFrames), as they will be wrong
+ * otherwise. Why? Because for each update frame squeezed out, we
+ * count an UPD_NEW_PERM_IND *at GC time* (i.e., too early). And
+ * further, when we enter the closure that has been updated, we count
+ * the ENT_PERM_IND, but we then enter the PERM_IND that was built for
+ * the next update frame below, and so on down the chain until we
+ * finally reach the value. Thus we count many new ENT_PERM_INDs too
+ * early.
+ *
+ * This of course refers to the -ticky version that uses PERM_INDs to
+ * determine the number of closures entered 0/1/>1. KSW 1999-04. */
+ COND_PR_CTR(ENT_PERM_IND_ctr,RtsFlags.GcFlags.squeezeUpdFrames == rtsFalse,"E!NT_PERM_IND_ctr requires +RTS -Z");
+
+ PR_CTR(ENT_AP_ctr);
+ PR_CTR(ENT_PAP_ctr);
+ PR_CTR(ENT_AP_STACK_ctr);
+ PR_CTR(ENT_BH_ctr);
+ PR_CTR(ENT_STATIC_THK_ctr);
+ PR_CTR(ENT_DYN_THK_ctr);
+
+ PR_CTR(SLOW_CALL_v_ctr);
+ PR_CTR(SLOW_CALL_f_ctr);
+ PR_CTR(SLOW_CALL_d_ctr);
+ PR_CTR(SLOW_CALL_l_ctr);
+ PR_CTR(SLOW_CALL_n_ctr);
+ PR_CTR(SLOW_CALL_p_ctr);
+ PR_CTR(SLOW_CALL_pv_ctr);
+ PR_CTR(SLOW_CALL_pp_ctr);
+ PR_CTR(SLOW_CALL_ppv_ctr);
+ PR_CTR(SLOW_CALL_ppp_ctr);
+ PR_CTR(SLOW_CALL_pppv_ctr);
+ PR_CTR(SLOW_CALL_pppp_ctr);
+ PR_CTR(SLOW_CALL_ppppp_ctr);
+ PR_CTR(SLOW_CALL_pppppp_ctr);
+ PR_CTR(SLOW_CALL_OTHER_ctr);
+
+ PR_CTR(UNKNOWN_CALL_ctr);
+ PR_CTR(KNOWN_CALL_ctr);
+ PR_CTR(KNOWN_CALL_TOO_FEW_ARGS_ctr);
+ PR_CTR(KNOWN_CALL_EXTRA_ARGS_ctr);
+ PR_CTR(MULTI_CHUNK_SLOW_CALL_ctr);
+ PR_CTR(MULTI_CHUNK_SLOW_CALL_CHUNKS_ctr);
+ PR_CTR(SLOW_CALL_ctr);
+ PR_CTR(SLOW_CALL_FUN_TOO_FEW_ctr);
+ PR_CTR(SLOW_CALL_FUN_CORRECT_ctr);
+ PR_CTR(SLOW_CALL_FUN_TOO_MANY_ctr);
+ PR_CTR(SLOW_CALL_PAP_TOO_FEW_ctr);
+ PR_CTR(SLOW_CALL_PAP_CORRECT_ctr);
+ PR_CTR(SLOW_CALL_PAP_TOO_MANY_ctr);
+ PR_CTR(SLOW_CALL_UNEVALD_ctr);
+ PR_HST(SLOW_CALL_hst,0);
+ PR_HST(SLOW_CALL_hst,1);
+ PR_HST(SLOW_CALL_hst,2);
+ PR_HST(SLOW_CALL_hst,3);
+ PR_HST(SLOW_CALL_hst,4);
+ PR_HST(SLOW_CALL_hst,5);
+ PR_HST(SLOW_CALL_hst,6);
+ PR_HST(SLOW_CALL_hst,7);
+
+ PR_CTR(RET_NEW_ctr);
+ PR_CTR(RET_OLD_ctr);
+ PR_CTR(RET_UNBOXED_TUP_ctr);
+ PR_CTR(VEC_RETURN_ctr);
+
+ PR_HST(RET_NEW_hst,0);
+ PR_HST(RET_NEW_hst,1);
+ PR_HST(RET_NEW_hst,2);
+ PR_HST(RET_NEW_hst,3);
+ PR_HST(RET_NEW_hst,4);
+ PR_HST(RET_NEW_hst,5);
+ PR_HST(RET_NEW_hst,6);
+ PR_HST(RET_NEW_hst,7);
+ PR_HST(RET_NEW_hst,8);
+ PR_HST(RET_OLD_hst,0);
+ PR_HST(RET_OLD_hst,1);
+ PR_HST(RET_OLD_hst,2);
+ PR_HST(RET_OLD_hst,3);
+ PR_HST(RET_OLD_hst,4);
+ PR_HST(RET_OLD_hst,5);
+ PR_HST(RET_OLD_hst,6);
+ PR_HST(RET_OLD_hst,7);
+ PR_HST(RET_OLD_hst,8);
+ PR_HST(RET_UNBOXED_TUP_hst,0);
+ PR_HST(RET_UNBOXED_TUP_hst,1);
+ PR_HST(RET_UNBOXED_TUP_hst,2);
+ PR_HST(RET_UNBOXED_TUP_hst,3);
+ PR_HST(RET_UNBOXED_TUP_hst,4);
+ PR_HST(RET_UNBOXED_TUP_hst,5);
+ PR_HST(RET_UNBOXED_TUP_hst,6);
+ PR_HST(RET_UNBOXED_TUP_hst,7);
+ PR_HST(RET_UNBOXED_TUP_hst,8);
+ PR_HST(RET_VEC_RETURN_hst,0);
+ PR_HST(RET_VEC_RETURN_hst,1);
+ PR_HST(RET_VEC_RETURN_hst,2);
+ PR_HST(RET_VEC_RETURN_hst,3);
+ PR_HST(RET_VEC_RETURN_hst,4);
+ PR_HST(RET_VEC_RETURN_hst,5);
+ PR_HST(RET_VEC_RETURN_hst,6);
+ PR_HST(RET_VEC_RETURN_hst,7);
+ PR_HST(RET_VEC_RETURN_hst,8);
+
+ PR_CTR(UPDF_OMITTED_ctr);
+ PR_CTR(UPDF_PUSHED_ctr);
+ PR_CTR(CATCHF_PUSHED_ctr);
+
+ PR_CTR(UPDF_RCC_PUSHED_ctr);
+ PR_CTR(UPDF_RCC_OMITTED_ctr);
+
+ PR_CTR(UPD_SQUEEZED_ctr);
+ PR_CTR(UPD_CON_IN_NEW_ctr);
+ PR_CTR(UPD_CON_IN_PLACE_ctr);
+ PR_CTR(UPD_PAP_IN_NEW_ctr);
+ PR_CTR(UPD_PAP_IN_PLACE_ctr);
+
+ PR_CTR(UPD_BH_UPDATABLE_ctr);
+ PR_CTR(UPD_BH_SINGLE_ENTRY_ctr);
+ PR_CTR(UPD_CAF_BH_UPDATABLE_ctr);
+ PR_CTR(UPD_CAF_BH_SINGLE_ENTRY_ctr);
+
+ PR_HST(UPD_CON_IN_NEW_hst,0);
+ PR_HST(UPD_CON_IN_NEW_hst,1);
+ PR_HST(UPD_CON_IN_NEW_hst,2);
+ PR_HST(UPD_CON_IN_NEW_hst,3);
+ PR_HST(UPD_CON_IN_NEW_hst,4);
+ PR_HST(UPD_CON_IN_NEW_hst,5);
+ PR_HST(UPD_CON_IN_NEW_hst,6);
+ PR_HST(UPD_CON_IN_NEW_hst,7);
+ PR_HST(UPD_CON_IN_NEW_hst,8);
+ PR_HST(UPD_PAP_IN_NEW_hst,0);
+ PR_HST(UPD_PAP_IN_NEW_hst,1);
+ PR_HST(UPD_PAP_IN_NEW_hst,2);
+ PR_HST(UPD_PAP_IN_NEW_hst,3);
+ PR_HST(UPD_PAP_IN_NEW_hst,4);
+ PR_HST(UPD_PAP_IN_NEW_hst,5);
+ PR_HST(UPD_PAP_IN_NEW_hst,6);
+ PR_HST(UPD_PAP_IN_NEW_hst,7);
+ PR_HST(UPD_PAP_IN_NEW_hst,8);
+
+ PR_CTR(UPD_NEW_IND_ctr);
+ /* see comment on ENT_PERM_IND_ctr */
+ COND_PR_CTR(UPD_NEW_PERM_IND_ctr,RtsFlags.GcFlags.squeezeUpdFrames == rtsFalse,"U!PD_NEW_PERM_IND_ctr requires +RTS -Z");
+ PR_CTR(UPD_OLD_IND_ctr);
+ /* see comment on ENT_PERM_IND_ctr */
+ COND_PR_CTR(UPD_OLD_PERM_IND_ctr,RtsFlags.GcFlags.squeezeUpdFrames == rtsFalse,"U!PD_OLD_PERM_IND_ctr requires +RTS -Z");
+
+ PR_CTR(GC_SEL_ABANDONED_ctr);
+ PR_CTR(GC_SEL_MINOR_ctr);
+ PR_CTR(GC_SEL_MAJOR_ctr);
+ PR_CTR(GC_FAILED_PROMOTION_ctr);
+ PR_CTR(GC_WORDS_COPIED_ctr);
+}
+
+/* Data structure used in ``registering'' one of these counters. */
+
+StgEntCounter *ticky_entry_ctrs = NULL; /* root of list of them */
+
+/* To print out all the registered-counter info: */
+
+static void
+printRegisteredCounterInfo (FILE *tf)
+{
+ StgEntCounter *p;
+
+ if ( ticky_entry_ctrs != NULL ) {
+ fprintf(tf,"\n**************************************************\n\n");
+ }
+ fprintf(tf, "%11s%11s %6s%6s %-11s%-30s\n",
+ "Entries", "Allocs", "Arity", "Stack", "Kinds", "Function");
+ fprintf(tf, "--------------------------------------------------------------------------------\n");
+ /* Function name at the end so it doesn't mess up the tabulation */
+
+ for (p = ticky_entry_ctrs; p != NULL; p = p->link) {
+ fprintf(tf, "%11ld%11ld %6u%6u %-11s%-30s",
+ p->entry_count,
+ p->allocs,
+ p->arity,
+ p->stk_args,
+ p->arg_kinds,
+ p->str);
+
+ fprintf(tf, "\n");
+
+ }
+}
+
+/* Catch-all top-level counter struct. Allocations from CAFs will go
+ * here.
+ */
+StgEntCounter top_ct
+ = { 0, 0, 0,
+ "TOP", "",
+ 0, 0, NULL };
+
+#endif /* TICKY_TICKY */
+
diff --git a/rts/Ticky.h b/rts/Ticky.h
new file mode 100644
index 0000000000..21765e4bbb
--- /dev/null
+++ b/rts/Ticky.h
@@ -0,0 +1,9 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1999
+ *
+ * Header for Ticky.c
+ *
+ * ---------------------------------------------------------------------------*/
+
+extern void PrintTickyInfo(void);
diff --git a/rts/Timer.c b/rts/Timer.c
new file mode 100644
index 0000000000..0bfea2d6fd
--- /dev/null
+++ b/rts/Timer.c
@@ -0,0 +1,102 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1995-2005
+ *
+ * Interval timer service for profiling and pre-emptive scheduling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+/*
+ * The interval timer is used for profiling and for context switching in the
+ * threaded build.
+ *
+ * This file defines the platform-independent view of interval timing, relying
+ * on platform-specific services to install and run the timers.
+ *
+ */
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "Proftimer.h"
+#include "Schedule.h"
+#include "Timer.h"
+#include "Ticker.h"
+#include "Capability.h"
+
+/* ticks left before next pre-emptive context switch */
+static int ticks_to_ctxt_switch = 0;
+
+#if defined(THREADED_RTS)
+/* idle ticks left before we perform a GC */
+static int ticks_to_gc = 0;
+#endif
+
+/*
+ * Function: handle_tick()
+ *
+ * At each occurrence of a tick, the OS timer will invoke
+ * handle_tick().
+ */
+static
+void
+handle_tick(int unused STG_UNUSED)
+{
+#ifdef PROFILING
+ handleProfTick();
+#endif
+ if (RtsFlags.ConcFlags.ctxtSwitchTicks > 0) {
+ ticks_to_ctxt_switch--;
+ if (ticks_to_ctxt_switch <= 0) {
+ ticks_to_ctxt_switch = RtsFlags.ConcFlags.ctxtSwitchTicks;
+ context_switch = 1; /* schedule a context switch */
+ }
+ }
+
+#if defined(THREADED_RTS)
+ /*
+ * If we've been inactive for idleGCDelayTicks (set by +RTS
+ * -I), tell the scheduler to wake up and do a GC, to check
+ * for threads that are deadlocked.
+ */
+ switch (recent_activity) {
+ case ACTIVITY_YES:
+ recent_activity = ACTIVITY_MAYBE_NO;
+ ticks_to_gc = RtsFlags.GcFlags.idleGCDelayTicks;
+ break;
+ case ACTIVITY_MAYBE_NO:
+ if (ticks_to_gc == 0) break; /* 0 ==> no idle GC */
+ ticks_to_gc--;
+ if (ticks_to_gc == 0) {
+ ticks_to_gc = RtsFlags.GcFlags.idleGCDelayTicks;
+ recent_activity = ACTIVITY_INACTIVE;
+ blackholes_need_checking = rtsTrue;
+ /* hack: re-use the blackholes_need_checking flag */
+
+ /* ToDo: this doesn't work. Can't invoke
+ * pthread_cond_signal from a signal handler.
+ * Furthermore, we can't prod a capability that we
+ * might be holding. What can we do?
+ */
+ prodOneCapability();
+ }
+ break;
+ default:
+ break;
+ }
+#endif
+}
+
+int
+startTimer(nat ms)
+{
+#ifdef PROFILING
+ initProfTimer();
+#endif
+
+ return startTicker(ms, handle_tick);
+}
+
+int
+stopTimer()
+{
+ return stopTicker();
+}
diff --git a/rts/Timer.h b/rts/Timer.h
new file mode 100644
index 0000000000..ae26653462
--- /dev/null
+++ b/rts/Timer.h
@@ -0,0 +1,24 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1995-2005
+ *
+ * Interval timer service for profiling and pre-emptive scheduling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef TIMER_H
+#define TIMER_H
+
+# define TICK_MILLISECS (1000/TICK_FREQUENCY) /* ms per tick */
+
+/* Context switch timing constants. Context switches happen after a
+ * whole number of ticks, the default being every tick.
+ */
+#define CS_MIN_MILLISECS TICK_MILLISECS /* milliseconds per slice */
+
+typedef void (*TickProc)(int);
+
+extern int startTimer(nat ms);
+extern int stopTimer(void);
+
+#endif /* TIMER_H */
diff --git a/rts/Updates.cmm b/rts/Updates.cmm
new file mode 100644
index 0000000000..1d2fc5fe0f
--- /dev/null
+++ b/rts/Updates.cmm
@@ -0,0 +1,153 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Code to perform updates.
+ *
+ * This file is written in a subset of C--, extended with various
+ * features specific to GHC. It is compiled by GHC directly. For the
+ * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Cmm.h"
+#include "Updates.h"
+#include "StgLdvProf.h"
+
+/*
+ The update frame return address must be *polymorphic*, that means
+ we have to cope with both vectored and non-vectored returns. This
+ is done by putting the return vector right before the info table, and
+ having a standard direct return address after the info table (pointed
+ to by the return address itself, as usual).
+
+ Each entry in the vector table points to a specialised entry code fragment
+ that knows how to return after doing the update. It would be possible to
+ use a single generic piece of code that simply entered the return value
+ to return, but it's quicker this way. The direct return code of course
+ just does another direct return when it's finished.
+*/
+
+/* on entry to the update code
+ (1) R1 points to the closure being returned
+ (2) Sp points to the update frame
+*/
+
+/* The update fragment has been tuned so as to generate good
+ code with gcc, which accounts for some of the strangeness in the
+ way it is written.
+
+ In particular, the JMP_(ret) bit is passed down and pinned on the
+ end of each branch (there end up being two major branches in the
+ code), since we don't mind duplicating this jump.
+*/
+
+#define UPD_FRAME_ENTRY_TEMPLATE(label,ind_info,ret) \
+ label \
+ { \
+ W_ updatee; \
+ \
+ updatee = StgUpdateFrame_updatee(Sp); \
+ \
+ /* remove the update frame from the stack */ \
+ Sp = Sp + SIZEOF_StgUpdateFrame; \
+ \
+ /* ToDo: it might be a PAP, so we should check... */ \
+ TICK_UPD_CON_IN_NEW(sizeW_fromITBL(%GET_STD_INFO(updatee))); \
+ \
+ UPD_SPEC_IND(updatee, ind_info, R1, jump (ret)); \
+ }
+
+UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_0_ret,stg_IND_0_info,%RET_VEC(Sp(0),0))
+UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_1_ret,stg_IND_1_info,%RET_VEC(Sp(0),1))
+UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_2_ret,stg_IND_2_info,%RET_VEC(Sp(0),2))
+UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_3_ret,stg_IND_3_info,%RET_VEC(Sp(0),3))
+UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_4_ret,stg_IND_4_info,%RET_VEC(Sp(0),4))
+UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_5_ret,stg_IND_5_info,%RET_VEC(Sp(0),5))
+UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_6_ret,stg_IND_6_info,%RET_VEC(Sp(0),6))
+UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_7_ret,stg_IND_7_info,%RET_VEC(Sp(0),7))
+
+#if MAX_VECTORED_RTN > 8
+#error MAX_VECTORED_RTN has changed: please modify stg_upd_frame too.
+#endif
+
+/*
+ Make sure this table is big enough to handle the maximum vectored
+ return size!
+ */
+
+#if defined(PROFILING)
+#define UPD_FRAME_BITMAP 3
+#define UPD_FRAME_WORDS 3
+#else
+#define UPD_FRAME_BITMAP 0
+#define UPD_FRAME_WORDS 1
+#endif
+
+/* this bitmap indicates that the first word of an update frame is a
+ * non-pointer - this is the update frame link. (for profiling,
+ * there's a cost-centre-stack in there too).
+ */
+
+INFO_TABLE_RET( stg_upd_frame,
+ UPD_FRAME_WORDS, UPD_FRAME_BITMAP, UPDATE_FRAME,
+ stg_upd_frame_0_ret,
+ stg_upd_frame_1_ret,
+ stg_upd_frame_2_ret,
+ stg_upd_frame_3_ret,
+ stg_upd_frame_4_ret,
+ stg_upd_frame_5_ret,
+ stg_upd_frame_6_ret,
+ stg_upd_frame_7_ret
+ )
+UPD_FRAME_ENTRY_TEMPLATE(,stg_IND_direct_info,%ENTRY_CODE(Sp(0)))
+
+
+INFO_TABLE_RET( stg_marked_upd_frame,
+ UPD_FRAME_WORDS, UPD_FRAME_BITMAP, UPDATE_FRAME,
+ stg_upd_frame_0_ret,
+ stg_upd_frame_1_ret,
+ stg_upd_frame_2_ret,
+ stg_upd_frame_3_ret,
+ stg_upd_frame_4_ret,
+ stg_upd_frame_5_ret,
+ stg_upd_frame_6_ret,
+ stg_upd_frame_7_ret
+ )
+UPD_FRAME_ENTRY_TEMPLATE(,stg_IND_direct_info,%ENTRY_CODE(Sp(0)))
+
+/*-----------------------------------------------------------------------------
+ Seq frames
+
+ We don't have a primitive seq# operator: it is just a 'case'
+ expression whose scrutinee has either a polymorphic or function type
+ (constructor types can be handled by normal 'case' expressions).
+
+ To handle a polymorphic/function typed seq, we push a SEQ frame on
+ the stack. This is a polymorphic activation record that just pops
+ itself and returns (in a non-vectored way) when entered. The
+ purpose of the SEQ frame is to avoid having to make a polymorphic return
+ point for each polymorphic case expression.
+
+ Another way of looking at it: the SEQ frame turns a vectored return
+ into a direct one.
+ -------------------------------------------------------------------------- */
+
+#if MAX_VECTORED_RTN > 8
+#error MAX_VECTORED_RTN has changed: please modify stg_seq_frame too.
+#endif
+
+INFO_TABLE_RET( stg_seq_frame, 0/* words */, 0/* bitmap */, RET_SMALL,
+ RET_LBL(stg_seq_frame), /* 0 */
+ RET_LBL(stg_seq_frame), /* 1 */
+ RET_LBL(stg_seq_frame), /* 2 */
+ RET_LBL(stg_seq_frame), /* 3 */
+ RET_LBL(stg_seq_frame), /* 4 */
+ RET_LBL(stg_seq_frame), /* 5 */
+ RET_LBL(stg_seq_frame), /* 6 */
+ RET_LBL(stg_seq_frame) /* 7 */
+ )
+{
+ Sp_adj(1);
+ jump %ENTRY_CODE(Sp(0));
+}
diff --git a/rts/Updates.h b/rts/Updates.h
new file mode 100644
index 0000000000..5872157c81
--- /dev/null
+++ b/rts/Updates.h
@@ -0,0 +1,361 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Performing updates.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef UPDATES_H
+#define UPDATES_H
+
+/* -----------------------------------------------------------------------------
+ Updates
+
+ We have two layers of update macros. The top layer, UPD_IND() and
+ friends perform all the work of an update. In detail:
+
+ - if the closure being updated is a blocking queue, then all the
+ threads waiting on the blocking queue are updated.
+
+ - then the lower level updateWithIndirection() macro is invoked
+ to actually replace the closure with an indirection (see below).
+
+ -------------------------------------------------------------------------- */
+
+#ifdef TICKY_TICKY
+# define UPD_IND(updclosure, heapptr) \
+ UPD_PERM_IND(updclosure,heapptr)
+# define UPD_SPEC_IND(updclosure, ind_info, heapptr, and_then) \
+ UPD_PERM_IND(updclosure,heapptr); and_then
+#else
+# define SEMI ;
+# define UPD_IND(updclosure, heapptr) \
+ UPD_REAL_IND(updclosure,INFO_PTR(stg_IND_info),heapptr,SEMI)
+# define UPD_SPEC_IND(updclosure, ind_info, heapptr, and_then) \
+ UPD_REAL_IND(updclosure,ind_info,heapptr,and_then)
+#endif
+
+/* These macros have to work in both C and C--, so here's the
+ * impedence matching:
+ */
+#ifdef CMINUSMINUS
+#define BLOCK_BEGIN
+#define BLOCK_END
+#define DECLARE_IPTR(info) W_ info
+#define FCALL foreign "C"
+#define INFO_PTR(info) info
+#define ARG_PTR "ptr"
+#else
+#define BLOCK_BEGIN {
+#define BLOCK_END }
+#define DECLARE_IPTR(info) const StgInfoTable *(info)
+#define FCALL /* nothing */
+#define INFO_PTR(info) &info
+#define StgBlockingQueue_blocking_queue(closure) \
+ (((StgBlockingQueue *)closure)->blocking_queue)
+#define ARG_PTR /* nothing */
+#endif
+
+/* UPD_IND actually does a PERM_IND if TICKY_TICKY is on;
+ if you *really* need an IND use UPD_REAL_IND
+ */
+#define UPD_REAL_IND(updclosure, ind_info, heapptr, and_then) \
+ BLOCK_BEGIN \
+ DECLARE_IPTR(info); \
+ info = GET_INFO(updclosure); \
+ updateWithIndirection(ind_info, \
+ updclosure, \
+ heapptr, \
+ and_then); \
+ BLOCK_END
+
+#if defined(PROFILING) || defined(TICKY_TICKY)
+#define UPD_PERM_IND(updclosure, heapptr) \
+ BLOCK_BEGIN \
+ updateWithPermIndirection(updclosure, \
+ heapptr); \
+ BLOCK_END
+#endif
+
+#if defined(RTS_SUPPORTS_THREADS)
+
+# ifdef TICKY_TICKY
+# define UPD_IND_NOLOCK(updclosure, heapptr) \
+ BLOCK_BEGIN \
+ updateWithPermIndirection(updclosure, \
+ heapptr); \
+ BLOCK_END
+# else
+# define UPD_IND_NOLOCK(updclosure, heapptr) \
+ BLOCK_BEGIN \
+ updateWithIndirection(INFO_PTR(stg_IND_info), \
+ updclosure, \
+ heapptr,); \
+ BLOCK_END
+# endif
+
+#else
+#define UPD_IND_NOLOCK(updclosure,heapptr) UPD_IND(updclosure,heapptr)
+#endif
+
+/* -----------------------------------------------------------------------------
+ Awaken any threads waiting on a blocking queue (BLACKHOLE_BQ).
+ -------------------------------------------------------------------------- */
+
+#if defined(PAR)
+
+/*
+ In a parallel setup several types of closures might have a blocking queue:
+ BLACKHOLE_BQ ... same as in the default concurrent setup; it will be
+ reawakened via calling UPD_IND on that closure after
+ having finished the computation of the graph
+ FETCH_ME_BQ ... a global indirection (FETCH_ME) may be entered by a
+ local TSO, turning it into a FETCH_ME_BQ; it will be
+ reawakened via calling processResume
+ RBH ... a revertible black hole may be entered by another
+ local TSO, putting it onto its blocking queue; since
+ RBHs only exist while the corresponding closure is in
+ transit, they will be reawakened via calling
+ convertToFetchMe (upon processing an ACK message)
+
+ In a parallel setup a blocking queue may contain 3 types of closures:
+ TSO ... as in the default concurrent setup
+ BLOCKED_FETCH ... indicating that a TSO on another PE is waiting for
+ the result of the current computation
+ CONSTR ... an RBHSave closure (which contains data ripped out of
+ the closure to make room for a blocking queue; since
+ it only contains data we use the exisiting type of
+ a CONSTR closure); this closure is the end of a
+ blocking queue for an RBH closure; it only exists in
+ this kind of blocking queue and must be at the end
+ of the queue
+*/
+extern void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node);
+#define DO_AWAKEN_BQ(bqe, node) STGCALL2(awakenBlockedQueue, bqe, node);
+
+#define AWAKEN_BQ(info,closure) \
+ if (info == &stg_BLACKHOLE_BQ_info || \
+ info == &stg_FETCH_ME_BQ_info || \
+ get_itbl(closure)->type == RBH) { \
+ DO_AWAKEN_BQ(((StgBlockingQueue *)closure)->blocking_queue, closure); \
+ }
+
+#elif defined(GRAN)
+
+extern void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node);
+#define DO_AWAKEN_BQ(bq, node) STGCALL2(awakenBlockedQueue, bq, node);
+
+/* In GranSim we don't have FETCH_ME or FETCH_ME_BQ closures, so they are
+ not checked. The rest of the code is the same as for GUM.
+*/
+#define AWAKEN_BQ(info,closure) \
+ if (info == &stg_BLACKHOLE_BQ_info || \
+ get_itbl(closure)->type == RBH) { \
+ DO_AWAKEN_BQ(((StgBlockingQueue *)closure)->blocking_queue, closure); \
+ }
+
+#endif /* GRAN || PAR */
+
+
+/* -----------------------------------------------------------------------------
+ Updates: lower-level macros which update a closure with an
+ indirection to another closure.
+
+ There are several variants of this code.
+
+ PROFILING:
+ -------------------------------------------------------------------------- */
+
+/* LDV profiling:
+ * We call LDV_recordDead_FILL_SLOP_DYNAMIC(p1) regardless of the generation in
+ * which p1 resides.
+ *
+ * Note:
+ * After all, we do *NOT* need to call LDV_RECORD_CREATE() for both IND and
+ * IND_OLDGEN closures because they are inherently used. But, it corrupts
+ * the invariants that every closure keeps its creation time in the profiling
+ * field. So, we call LDV_RECORD_CREATE().
+ */
+
+/* In the DEBUG case, we also zero out the slop of the old closure,
+ * so that the sanity checker can tell where the next closure is.
+ *
+ * Two important invariants: we should never try to update a closure
+ * to point to itself, and the closure being updated should not
+ * already have been updated (the mutable list will get messed up
+ * otherwise).
+ *
+ * NB. We do *not* do this in THREADED_RTS mode, because when we have the
+ * possibility of multiple threads entering the same closure, zeroing
+ * the slop in one of the threads would have a disastrous effect on
+ * the other (seen in the wild!).
+ */
+#ifdef CMINUSMINUS
+
+#define FILL_SLOP(p) \
+ W_ inf; \
+ W_ sz; \
+ W_ i; \
+ inf = %GET_STD_INFO(p); \
+ if (%INFO_TYPE(inf) != HALF_W_(THUNK_SELECTOR) \
+ && %INFO_TYPE(inf) != HALF_W_(BLACKHOLE) \
+ && %INFO_TYPE(inf) != HALF_W_(CAF_BLACKHOLE)) { \
+ if (%INFO_TYPE(inf) == HALF_W_(AP_STACK)) { \
+ sz = StgAP_STACK_size(p) + BYTES_TO_WDS(SIZEOF_StgAP_STACK_NoThunkHdr); \
+ } else { \
+ if (%INFO_TYPE(inf) == HALF_W_(AP)) { \
+ sz = TO_W_(StgAP_n_args(p)) + BYTES_TO_WDS(SIZEOF_StgAP_NoThunkHdr); \
+ } else { \
+ sz = TO_W_(%INFO_PTRS(inf)) + TO_W_(%INFO_NPTRS(inf)); \
+ } \
+ } \
+ i = 0; \
+ for: \
+ if (i < sz) { \
+ StgThunk_payload(p,i) = 0; \
+ i = i + 1; \
+ goto for; \
+ } \
+ }
+
+#else /* !CMINUSMINUS */
+
+INLINE_HEADER void
+FILL_SLOP(StgClosure *p)
+{
+ StgInfoTable *inf = get_itbl(p);
+ nat i, sz;
+
+ switch (inf->type) {
+ case BLACKHOLE:
+ case CAF_BLACKHOLE:
+ case THUNK_SELECTOR:
+ return;
+ case AP:
+ sz = ((StgAP *)p)->n_args + sizeofW(StgAP) - sizeofW(StgThunkHeader);
+ break;
+ case AP_STACK:
+ sz = ((StgAP_STACK *)p)->size + sizeofW(StgAP_STACK) - sizeofW(StgThunkHeader);
+ break;
+ default:
+ sz = inf->layout.payload.ptrs + inf->layout.payload.nptrs;
+ break;
+ }
+ for (i = 0; i < sz; i++) {
+ ((StgThunk *)p)->payload[i] = 0;
+ }
+}
+
+#endif /* CMINUSMINUS */
+
+#if !defined(DEBUG) || defined(THREADED_RTS)
+#define DEBUG_FILL_SLOP(p) /* do nothing */
+#else
+#define DEBUG_FILL_SLOP(p) FILL_SLOP(p)
+#endif
+
+/* We have two versions of this macro (sadly), one for use in C-- code,
+ * and the other for C.
+ *
+ * The and_then argument is a performance hack so that we can paste in
+ * the continuation code directly. It helps shave a couple of
+ * instructions off the common case in the update code, which is
+ * worthwhile (the update code is often part of the inner loop).
+ * (except that gcc now appears to common up this code again and
+ * invert the optimisation. Grrrr --SDM).
+ */
+#ifdef CMINUSMINUS
+#define generation(n) (W_[generations] + n*SIZEOF_generation)
+#define updateWithIndirection(ind_info, p1, p2, and_then) \
+ W_ bd; \
+ \
+ DEBUG_FILL_SLOP(p1); \
+ LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1); \
+ StgInd_indirectee(p1) = p2; \
+ foreign "C" wb() []; \
+ bd = Bdescr(p1); \
+ if (bdescr_gen_no(bd) != 0 :: CInt) { \
+ foreign "C" recordMutableCap(p1 "ptr", \
+ MyCapability() "ptr", \
+ bdescr_gen_no(bd)) [R1]; \
+ SET_INFO(p1, stg_IND_OLDGEN_info); \
+ LDV_RECORD_CREATE(p1); \
+ TICK_UPD_OLD_IND(); \
+ and_then; \
+ } else { \
+ SET_INFO(p1, ind_info); \
+ LDV_RECORD_CREATE(p1); \
+ TICK_UPD_NEW_IND(); \
+ and_then; \
+ }
+#else
+#define updateWithIndirection(ind_info, p1, p2, and_then) \
+ { \
+ bdescr *bd; \
+ \
+ /* cas(p1, 0, &stg_WHITEHOLE_info); */ \
+ ASSERT( (P_)p1 != (P_)p2 && !closure_IND(p1) ); \
+ DEBUG_FILL_SLOP(p1); \
+ LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1); \
+ ((StgInd *)p1)->indirectee = p2; \
+ wb(); \
+ bd = Bdescr((P_)p1); \
+ if (bd->gen_no != 0) { \
+ recordMutableGenLock(p1, &generations[bd->gen_no]); \
+ SET_INFO(p1, &stg_IND_OLDGEN_info); \
+ TICK_UPD_OLD_IND(); \
+ and_then; \
+ } else { \
+ SET_INFO(p1, ind_info); \
+ LDV_RECORD_CREATE(p1); \
+ TICK_UPD_NEW_IND(); \
+ and_then; \
+ } \
+ }
+#endif
+
+/* The permanent indirection version isn't performance critical. We
+ * therefore use an inline C function instead of the C-- macro.
+ */
+#ifndef CMINUSMINUS
+INLINE_HEADER void
+updateWithPermIndirection(StgClosure *p1,
+ StgClosure *p2)
+{
+ bdescr *bd;
+
+ ASSERT( p1 != p2 && !closure_IND(p1) );
+
+ /*
+ * @LDV profiling
+ * Destroy the old closure.
+ * Nb: LDV_* stuff cannot mix with ticky-ticky
+ */
+ LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1);
+
+ bd = Bdescr((P_)p1);
+ if (bd->gen_no != 0) {
+ recordMutableGenLock(p1, &generations[bd->gen_no]);
+ ((StgInd *)p1)->indirectee = p2;
+ SET_INFO(p1, &stg_IND_OLDGEN_PERM_info);
+ /*
+ * @LDV profiling
+ * We have just created a new closure.
+ */
+ LDV_RECORD_CREATE(p1);
+ TICK_UPD_OLD_PERM_IND();
+ } else {
+ ((StgInd *)p1)->indirectee = p2;
+ SET_INFO(p1, &stg_IND_PERM_info);
+ /*
+ * @LDV profiling
+ * We have just created a new closure.
+ */
+ LDV_RECORD_CREATE(p1);
+ TICK_UPD_NEW_PERM_IND(p1);
+ }
+}
+#endif
+
+#endif /* UPDATES_H */
diff --git a/rts/VisCallbacks.c b/rts/VisCallbacks.c
new file mode 100644
index 0000000000..8e3c6ceb6c
--- /dev/null
+++ b/rts/VisCallbacks.c
@@ -0,0 +1,75 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 2000
+ *
+ * RTS GTK Front Panel (callbacks)
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifdef RTS_GTK_FRONTPANEL
+
+#include "Rts.h"
+
+#include <gtk/gtk.h>
+
+#include "VisCallbacks.h"
+#include "VisWindow.h"
+#include "VisSupport.h"
+#include "FrontPanel.h"
+
+void
+on_cont_radio_clicked (GtkButton *button,
+ gpointer user_data)
+{
+ update_mode = Continuous;
+}
+
+
+void
+on_stop_before_radio_clicked (GtkButton *button,
+ gpointer user_data)
+{
+ update_mode = BeforeGC;
+}
+
+
+void
+on_stop_after_radio_clicked (GtkButton *button,
+ gpointer user_data)
+{
+ update_mode = AfterGC;
+}
+
+
+void
+on_stop_both_radio_clicked (GtkButton *button,
+ gpointer user_data)
+{
+ update_mode = BeforeAfterGC;
+}
+
+
+void
+on_stop_but_clicked (GtkButton *button,
+ gpointer user_data)
+{
+ stop_now = TRUE;
+}
+
+
+void
+on_continue_but_clicked (GtkButton *button,
+ gpointer user_data)
+{
+ continue_now = TRUE;
+}
+
+
+void
+on_quit_but_clicked (GtkButton *button,
+ gpointer user_data)
+{
+ quit = TRUE;
+}
+
+#endif /* RTS_GTK_FRONTPANEL */
diff --git a/rts/VisCallbacks.h b/rts/VisCallbacks.h
new file mode 100644
index 0000000000..d242010fad
--- /dev/null
+++ b/rts/VisCallbacks.h
@@ -0,0 +1,30 @@
+#include <gtk/gtk.h>
+
+
+void
+on_cont_radio_clicked (GtkButton *button,
+ gpointer user_data);
+
+void
+on_stop_before_radio_clicked (GtkButton *button,
+ gpointer user_data);
+
+void
+on_stop_after_radio_clicked (GtkButton *button,
+ gpointer user_data);
+
+void
+on_stop_both_radio_clicked (GtkButton *button,
+ gpointer user_data);
+
+void
+on_stop_but_clicked (GtkButton *button,
+ gpointer user_data);
+
+void
+on_continue_but_clicked (GtkButton *button,
+ gpointer user_data);
+
+void
+on_quit_but_clicked (GtkButton *button,
+ gpointer user_data);
diff --git a/rts/VisSupport.c b/rts/VisSupport.c
new file mode 100644
index 0000000000..a85c5f43a4
--- /dev/null
+++ b/rts/VisSupport.c
@@ -0,0 +1,144 @@
+/*
+ * DO NOT EDIT THIS FILE - it is generated by Glade.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdio.h>
+
+#include <gtk/gtk.h>
+
+#include "VisSupport.h"
+
+GtkWidget*
+lookup_widget (GtkWidget *widget,
+ const gchar *widget_name)
+{
+ GtkWidget *parent, *found_widget;
+
+ for (;;)
+ {
+ if (GTK_IS_MENU (widget))
+ parent = gtk_menu_get_attach_widget (GTK_MENU (widget));
+ else
+ parent = widget->parent;
+ if (!parent)
+ parent = (GtkWidget*) g_object_get_data (G_OBJECT (widget), "GladeParentKey");
+ if (parent == NULL)
+ break;
+ widget = parent;
+ }
+
+ found_widget = (GtkWidget*) g_object_get_data (G_OBJECT (widget),
+ widget_name);
+ if (!found_widget)
+ g_warning ("Widget not found: %s", widget_name);
+ return found_widget;
+}
+
+static GList *pixmaps_directories = NULL;
+
+/* Use this function to set the directory containing installed pixmaps. */
+void
+add_pixmap_directory (const gchar *directory)
+{
+ pixmaps_directories = g_list_prepend (pixmaps_directories,
+ g_strdup (directory));
+}
+
+/* This is an internally used function to find pixmap files. */
+static gchar*
+find_pixmap_file (const gchar *filename)
+{
+ GList *elem;
+
+ /* We step through each of the pixmaps directory to find it. */
+ elem = pixmaps_directories;
+ while (elem)
+ {
+ gchar *pathname = g_strdup_printf ("%s%s%s", (gchar*)elem->data,
+ G_DIR_SEPARATOR_S, filename);
+ if (g_file_test (pathname, G_FILE_TEST_EXISTS))
+ return pathname;
+ g_free (pathname);
+ elem = elem->next;
+ }
+ return NULL;
+}
+
+/* This is an internally used function to create pixmaps. */
+GtkWidget*
+create_pixmap (GtkWidget *widget,
+ const gchar *filename)
+{
+ gchar *pathname = NULL;
+ GtkWidget *pixmap;
+
+ if (!filename || !filename[0])
+ return gtk_image_new ();
+
+ pathname = find_pixmap_file (filename);
+
+ if (!pathname)
+ {
+ g_warning ("Couldn't find pixmap file: %s", filename);
+ return gtk_image_new ();
+ }
+
+ pixmap = gtk_image_new_from_file (pathname);
+ g_free (pathname);
+ return pixmap;
+}
+
+/* This is an internally used function to create pixmaps. */
+GdkPixbuf*
+create_pixbuf (const gchar *filename)
+{
+ gchar *pathname = NULL;
+ GdkPixbuf *pixbuf;
+ GError *error = NULL;
+
+ if (!filename || !filename[0])
+ return NULL;
+
+ pathname = find_pixmap_file (filename);
+
+ if (!pathname)
+ {
+ g_warning ("Couldn't find pixmap file: %s", filename);
+ return NULL;
+ }
+
+ pixbuf = gdk_pixbuf_new_from_file (pathname, &error);
+ if (!pixbuf)
+ {
+ fprintf (stderr, "Failed to load pixbuf file: %s: %s\n",
+ pathname, error->message);
+ g_error_free (error);
+ }
+ g_free (pathname);
+ return pixbuf;
+}
+
+/* This is used to set ATK action descriptions. */
+void
+glade_set_atk_action_description (AtkAction *action,
+ const gchar *action_name,
+ const gchar *description)
+{
+ gint n_actions, i;
+
+ n_actions = atk_action_get_n_actions (action);
+ for (i = 0; i < n_actions; i++)
+ {
+ if (!strcmp (atk_action_get_name (action, i), action_name))
+ atk_action_set_description (action, i, description);
+ }
+}
+
diff --git a/rts/VisSupport.h b/rts/VisSupport.h
new file mode 100644
index 0000000000..2dea079c2a
--- /dev/null
+++ b/rts/VisSupport.h
@@ -0,0 +1,44 @@
+/*
+ * DO NOT EDIT THIS FILE - it is generated by Glade.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <gtk/gtk.h>
+
+/*
+ * Public Functions.
+ */
+
+/*
+ * This function returns a widget in a component created by Glade.
+ * Call it with the toplevel widget in the component (i.e. a window/dialog),
+ * or alternatively any widget in the component, and the name of the widget
+ * you want returned.
+ */
+GtkWidget* lookup_widget (GtkWidget *widget,
+ const gchar *widget_name);
+
+
+/* Use this function to set the directory containing installed pixmaps. */
+void add_pixmap_directory (const gchar *directory);
+
+
+/*
+ * Private Functions.
+ */
+
+/* This is used to create the pixmaps used in the interface. */
+GtkWidget* create_pixmap (GtkWidget *widget,
+ const gchar *filename);
+
+/* This is used to create the pixbufs used in the interface. */
+GdkPixbuf* create_pixbuf (const gchar *filename);
+
+/* This is used to set ATK action descriptions. */
+void glade_set_atk_action_description (AtkAction *action,
+ const gchar *action_name,
+ const gchar *description);
+
diff --git a/rts/VisWindow.c b/rts/VisWindow.c
new file mode 100644
index 0000000000..188b88976e
--- /dev/null
+++ b/rts/VisWindow.c
@@ -0,0 +1,747 @@
+/*
+ * DO NOT EDIT THIS FILE - it is generated by Glade.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdio.h>
+
+#include <gdk/gdkkeysyms.h>
+#include <gtk/gtk.h>
+
+#include "VisCallbacks.h"
+#include "VisWindow.h"
+#include "VisSupport.h"
+
+#define GLADE_HOOKUP_OBJECT(component,widget,name) \
+ g_object_set_data_full (G_OBJECT (component), name, \
+ gtk_widget_ref (widget), (GDestroyNotify) gtk_widget_unref)
+
+#define GLADE_HOOKUP_OBJECT_NO_REF(component,widget,name) \
+ g_object_set_data (G_OBJECT (component), name, widget)
+
+GtkWidget*
+create_GHC_Front_Panel (void)
+{
+ GtkWidget *GHC_Front_Panel;
+ GtkWidget *vbox1;
+ GtkWidget *hbox1;
+ GtkWidget *vbox4;
+ GtkWidget *frame3;
+ GtkWidget *hbox3;
+ GtkWidget *label40;
+ GtkWidget *map_ruler;
+ GtkWidget *memmap;
+ GtkWidget *label1;
+ GtkWidget *frame8;
+ GtkWidget *vbox14;
+ GtkWidget *table4;
+ GtkWidget *gen_ruler;
+ GtkWidget *gen_hbox;
+ GtkWidget *generations;
+ GtkWidget *label39;
+ GtkWidget *label41;
+ GtkWidget *frame7;
+ GtkWidget *table3;
+ GtkWidget *res_hruler;
+ GtkWidget *res_vruler;
+ GtkWidget *res_drawingarea;
+ GtkWidget *label37;
+ GtkWidget *label38;
+ GtkWidget *label42;
+ GtkWidget *vbox5;
+ GtkWidget *frame5;
+ GtkWidget *vbox6;
+ GtkWidget *table1;
+ GtkWidget *label12;
+ GtkWidget *label13;
+ GtkWidget *label14;
+ GtkWidget *label15;
+ GtkWidget *label16;
+ GtkWidget *label17;
+ GtkWidget *label18;
+ GtkWidget *label19;
+ GtkWidget *live_label;
+ GtkWidget *allocated_label;
+ GtkWidget *footprint_label;
+ GtkWidget *alloc_rate_label;
+ GtkWidget *label43;
+ GtkWidget *frame9;
+ GtkWidget *table5;
+ GtkWidget *label20;
+ GtkWidget *label21;
+ GtkWidget *label22;
+ GtkWidget *label24;
+ GtkWidget *label26;
+ GtkWidget *label25;
+ GtkWidget *label27;
+ GtkWidget *running_label;
+ GtkWidget *blockread_label;
+ GtkWidget *blockwrite_label;
+ GtkWidget *blockmvar_label;
+ GtkWidget *blockthrowto_label;
+ GtkWidget *blockbh_label;
+ GtkWidget *sleeping_label;
+ GtkWidget *hseparator1;
+ GtkWidget *hseparator2;
+ GtkWidget *label35;
+ GtkWidget *total_label;
+ GtkWidget *label44;
+ GtkWidget *frame6;
+ GtkWidget *vbox7;
+ GtkWidget *vbox9;
+ GtkWidget *cont_radio;
+ GSList *cont_radio_group = NULL;
+ GtkWidget *stop_before_radio;
+ GtkWidget *stop_after_radio;
+ GtkWidget *stop_both_radio;
+ GtkWidget *vbox8;
+ GtkWidget *stop_but;
+ GtkWidget *continue_but;
+ GtkWidget *label45;
+ GtkWidget *quit_but;
+ GtkWidget *statusbar;
+
+ GHC_Front_Panel = gtk_window_new (GTK_WINDOW_TOPLEVEL);
+ gtk_widget_set_name (GHC_Front_Panel, "GHC_Front_Panel");
+ gtk_window_set_title (GTK_WINDOW (GHC_Front_Panel), "GHC Front Panel");
+ gtk_window_set_default_size (GTK_WINDOW (GHC_Front_Panel), 450, 600);
+
+ vbox1 = gtk_vbox_new (FALSE, 0);
+ gtk_widget_set_name (vbox1, "vbox1");
+ gtk_widget_show (vbox1);
+ gtk_container_add (GTK_CONTAINER (GHC_Front_Panel), vbox1);
+
+ hbox1 = gtk_hbox_new (FALSE, 10);
+ gtk_widget_set_name (hbox1, "hbox1");
+ gtk_widget_show (hbox1);
+ gtk_box_pack_start (GTK_BOX (vbox1), hbox1, TRUE, TRUE, 0);
+ gtk_container_set_border_width (GTK_CONTAINER (hbox1), 10);
+
+ vbox4 = gtk_vbox_new (FALSE, 10);
+ gtk_widget_set_name (vbox4, "vbox4");
+ gtk_widget_show (vbox4);
+ gtk_box_pack_start (GTK_BOX (hbox1), vbox4, TRUE, TRUE, 0);
+
+ frame3 = gtk_frame_new (NULL);
+ gtk_widget_set_name (frame3, "frame3");
+ gtk_widget_show (frame3);
+ gtk_box_pack_start (GTK_BOX (vbox4), frame3, TRUE, TRUE, 0);
+
+ hbox3 = gtk_hbox_new (FALSE, 0);
+ gtk_widget_set_name (hbox3, "hbox3");
+ gtk_widget_show (hbox3);
+ gtk_container_add (GTK_CONTAINER (frame3), hbox3);
+
+ label40 = gtk_label_new ("Mb");
+ gtk_widget_set_name (label40, "label40");
+ gtk_widget_show (label40);
+ gtk_box_pack_start (GTK_BOX (hbox3), label40, FALSE, FALSE, 0);
+ gtk_label_set_justify (GTK_LABEL (label40), GTK_JUSTIFY_CENTER);
+
+ map_ruler = gtk_vruler_new ();
+ gtk_widget_set_name (map_ruler, "map_ruler");
+ gtk_widget_show (map_ruler);
+ gtk_box_pack_start (GTK_BOX (hbox3), map_ruler, FALSE, FALSE, 0);
+ gtk_ruler_set_range (GTK_RULER (map_ruler), 0, 10, 1.40845, 10);
+
+ memmap = gtk_drawing_area_new ();
+ gtk_widget_set_name (memmap, "memmap");
+ gtk_widget_show (memmap);
+ gtk_box_pack_start (GTK_BOX (hbox3), memmap, TRUE, TRUE, 0);
+
+ label1 = gtk_label_new ("Memory Map");
+ gtk_widget_set_name (label1, "label1");
+ gtk_widget_show (label1);
+ gtk_frame_set_label_widget (GTK_FRAME (frame3), label1);
+
+ frame8 = gtk_frame_new (NULL);
+ gtk_widget_set_name (frame8, "frame8");
+ gtk_widget_show (frame8);
+ gtk_box_pack_start (GTK_BOX (vbox4), frame8, TRUE, TRUE, 0);
+
+ vbox14 = gtk_vbox_new (FALSE, 0);
+ gtk_widget_set_name (vbox14, "vbox14");
+ gtk_widget_show (vbox14);
+ gtk_container_add (GTK_CONTAINER (frame8), vbox14);
+
+ table4 = gtk_table_new (2, 3, FALSE);
+ gtk_widget_set_name (table4, "table4");
+ gtk_widget_show (table4);
+ gtk_box_pack_start (GTK_BOX (vbox14), table4, TRUE, TRUE, 0);
+
+ gen_ruler = gtk_vruler_new ();
+ gtk_widget_set_name (gen_ruler, "gen_ruler");
+ gtk_widget_show (gen_ruler);
+ gtk_table_attach (GTK_TABLE (table4), gen_ruler, 1, 2, 0, 1,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (GTK_EXPAND | GTK_FILL), 0, 0);
+ gtk_ruler_set_range (GTK_RULER (gen_ruler), 0, 10, 1.69935, 10);
+
+ gen_hbox = gtk_hbox_new (FALSE, 0);
+ gtk_widget_set_name (gen_hbox, "gen_hbox");
+ gtk_widget_show (gen_hbox);
+ gtk_table_attach (GTK_TABLE (table4), gen_hbox, 2, 3, 1, 2,
+ (GtkAttachOptions) (GTK_EXPAND | GTK_FILL),
+ (GtkAttachOptions) (GTK_FILL), 0, 0);
+
+ generations = gtk_drawing_area_new ();
+ gtk_widget_set_name (generations, "generations");
+ gtk_widget_show (generations);
+ gtk_table_attach (GTK_TABLE (table4), generations, 2, 3, 0, 1,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (GTK_FILL), 0, 0);
+
+ label39 = gtk_label_new ("Mb");
+ gtk_widget_set_name (label39, "label39");
+ gtk_widget_show (label39);
+ gtk_table_attach (GTK_TABLE (table4), label39, 0, 1, 0, 1,
+ (GtkAttachOptions) (0),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (label39), GTK_JUSTIFY_CENTER);
+
+ label41 = gtk_label_new ("Generations");
+ gtk_widget_set_name (label41, "label41");
+ gtk_widget_show (label41);
+ gtk_frame_set_label_widget (GTK_FRAME (frame8), label41);
+
+ frame7 = gtk_frame_new (NULL);
+ gtk_widget_set_name (frame7, "frame7");
+ gtk_widget_show (frame7);
+ gtk_box_pack_start (GTK_BOX (vbox4), frame7, TRUE, TRUE, 0);
+
+ table3 = gtk_table_new (3, 3, FALSE);
+ gtk_widget_set_name (table3, "table3");
+ gtk_widget_show (table3);
+ gtk_container_add (GTK_CONTAINER (frame7), table3);
+ gtk_container_set_border_width (GTK_CONTAINER (table3), 2);
+
+ res_hruler = gtk_hruler_new ();
+ gtk_widget_set_name (res_hruler, "res_hruler");
+ gtk_widget_show (res_hruler);
+ gtk_table_attach (GTK_TABLE (table3), res_hruler, 2, 3, 1, 2,
+ (GtkAttachOptions) (GTK_EXPAND | GTK_FILL),
+ (GtkAttachOptions) (GTK_FILL), 0, 0);
+ gtk_ruler_set_range (GTK_RULER (res_hruler), 0, 10, 8.35443, 10);
+
+ res_vruler = gtk_vruler_new ();
+ gtk_widget_set_name (res_vruler, "res_vruler");
+ gtk_widget_show (res_vruler);
+ gtk_table_attach (GTK_TABLE (table3), res_vruler, 1, 2, 2, 3,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (GTK_EXPAND | GTK_FILL), 0, 0);
+ gtk_ruler_set_range (GTK_RULER (res_vruler), 0, 10, 9.69925, 10);
+
+ res_drawingarea = gtk_drawing_area_new ();
+ gtk_widget_set_name (res_drawingarea, "res_drawingarea");
+ gtk_widget_show (res_drawingarea);
+ gtk_table_attach (GTK_TABLE (table3), res_drawingarea, 2, 3, 2, 3,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (GTK_FILL), 0, 0);
+
+ label37 = gtk_label_new ("Secs");
+ gtk_widget_set_name (label37, "label37");
+ gtk_widget_show (label37);
+ gtk_table_attach (GTK_TABLE (table3), label37, 2, 3, 0, 1,
+ (GtkAttachOptions) (0),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (label37), GTK_JUSTIFY_CENTER);
+
+ label38 = gtk_label_new ("Mb");
+ gtk_widget_set_name (label38, "label38");
+ gtk_widget_show (label38);
+ gtk_table_attach (GTK_TABLE (table3), label38, 0, 1, 2, 3,
+ (GtkAttachOptions) (0),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (label38), GTK_JUSTIFY_CENTER);
+
+ label42 = gtk_label_new ("Residency");
+ gtk_widget_set_name (label42, "label42");
+ gtk_widget_show (label42);
+ gtk_frame_set_label_widget (GTK_FRAME (frame7), label42);
+
+ vbox5 = gtk_vbox_new (FALSE, 10);
+ gtk_widget_set_name (vbox5, "vbox5");
+ gtk_widget_show (vbox5);
+ gtk_box_pack_end (GTK_BOX (hbox1), vbox5, FALSE, FALSE, 0);
+
+ frame5 = gtk_frame_new (NULL);
+ gtk_widget_set_name (frame5, "frame5");
+ gtk_widget_show (frame5);
+ gtk_box_pack_start (GTK_BOX (vbox5), frame5, FALSE, TRUE, 0);
+
+ vbox6 = gtk_vbox_new (FALSE, 0);
+ gtk_widget_set_name (vbox6, "vbox6");
+ gtk_widget_show (vbox6);
+ gtk_container_add (GTK_CONTAINER (frame5), vbox6);
+ gtk_container_set_border_width (GTK_CONTAINER (vbox6), 5);
+
+ table1 = gtk_table_new (4, 3, FALSE);
+ gtk_widget_set_name (table1, "table1");
+ gtk_widget_show (table1);
+ gtk_box_pack_start (GTK_BOX (vbox6), table1, TRUE, TRUE, 0);
+ gtk_table_set_col_spacings (GTK_TABLE (table1), 7);
+
+ label12 = gtk_label_new ("Allocated");
+ gtk_widget_set_name (label12, "label12");
+ gtk_widget_show (label12);
+ gtk_table_attach (GTK_TABLE (table1), label12, 0, 1, 1, 2,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (label12), GTK_JUSTIFY_RIGHT);
+ gtk_misc_set_alignment (GTK_MISC (label12), 1, 0.5);
+
+ label13 = gtk_label_new ("Live");
+ gtk_widget_set_name (label13, "label13");
+ gtk_widget_show (label13);
+ gtk_table_attach (GTK_TABLE (table1), label13, 0, 1, 0, 1,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (label13), GTK_JUSTIFY_RIGHT);
+ gtk_misc_set_alignment (GTK_MISC (label13), 1, 0.5);
+
+ label14 = gtk_label_new ("Allocation Rate");
+ gtk_widget_set_name (label14, "label14");
+ gtk_widget_show (label14);
+ gtk_table_attach (GTK_TABLE (table1), label14, 0, 1, 3, 4,
+ (GtkAttachOptions) (0),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (label14), GTK_JUSTIFY_RIGHT);
+ gtk_misc_set_alignment (GTK_MISC (label14), 1, 0.5);
+
+ label15 = gtk_label_new ("\t\tFootprint");
+ gtk_widget_set_name (label15, "label15");
+ gtk_widget_show (label15);
+ gtk_table_attach (GTK_TABLE (table1), label15, 0, 1, 2, 3,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (label15), GTK_JUSTIFY_RIGHT);
+ gtk_misc_set_alignment (GTK_MISC (label15), 1, 0.5);
+
+ label16 = gtk_label_new ("M/sec");
+ gtk_widget_set_name (label16, "label16");
+ gtk_widget_show (label16);
+ gtk_table_attach (GTK_TABLE (table1), label16, 2, 3, 3, 4,
+ (GtkAttachOptions) (0),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (label16), GTK_JUSTIFY_CENTER);
+
+ label17 = gtk_label_new ("M");
+ gtk_widget_set_name (label17, "label17");
+ gtk_widget_show (label17);
+ gtk_table_attach (GTK_TABLE (table1), label17, 2, 3, 2, 3,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_misc_set_alignment (GTK_MISC (label17), 7.45058e-09, 0.5);
+
+ label18 = gtk_label_new ("M");
+ gtk_widget_set_name (label18, "label18");
+ gtk_widget_show (label18);
+ gtk_table_attach (GTK_TABLE (table1), label18, 2, 3, 1, 2,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (label18), GTK_JUSTIFY_CENTER);
+ gtk_misc_set_alignment (GTK_MISC (label18), 7.45058e-09, 0.5);
+
+ label19 = gtk_label_new ("M");
+ gtk_widget_set_name (label19, "label19");
+ gtk_widget_show (label19);
+ gtk_table_attach (GTK_TABLE (table1), label19, 2, 3, 0, 1,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (label19), GTK_JUSTIFY_CENTER);
+ gtk_misc_set_alignment (GTK_MISC (label19), 7.45058e-09, 0.5);
+
+ live_label = gtk_label_new ("");
+ gtk_widget_set_name (live_label, "live_label");
+ gtk_widget_show (live_label);
+ gtk_table_attach (GTK_TABLE (table1), live_label, 1, 2, 0, 1,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (live_label), GTK_JUSTIFY_CENTER);
+ gtk_misc_set_alignment (GTK_MISC (live_label), 1, 0.5);
+
+ allocated_label = gtk_label_new ("");
+ gtk_widget_set_name (allocated_label, "allocated_label");
+ gtk_widget_show (allocated_label);
+ gtk_table_attach (GTK_TABLE (table1), allocated_label, 1, 2, 1, 2,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (allocated_label), GTK_JUSTIFY_CENTER);
+ gtk_misc_set_alignment (GTK_MISC (allocated_label), 1, 0.5);
+
+ footprint_label = gtk_label_new ("");
+ gtk_widget_set_name (footprint_label, "footprint_label");
+ gtk_widget_show (footprint_label);
+ gtk_table_attach (GTK_TABLE (table1), footprint_label, 1, 2, 2, 3,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (footprint_label), GTK_JUSTIFY_CENTER);
+ gtk_misc_set_alignment (GTK_MISC (footprint_label), 1, 0.5);
+
+ alloc_rate_label = gtk_label_new ("");
+ gtk_widget_set_name (alloc_rate_label, "alloc_rate_label");
+ gtk_widget_show (alloc_rate_label);
+ gtk_table_attach (GTK_TABLE (table1), alloc_rate_label, 1, 2, 3, 4,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (alloc_rate_label), GTK_JUSTIFY_CENTER);
+ gtk_misc_set_alignment (GTK_MISC (alloc_rate_label), 1, 0.5);
+
+ label43 = gtk_label_new ("Stats");
+ gtk_widget_set_name (label43, "label43");
+ gtk_widget_show (label43);
+ gtk_frame_set_label_widget (GTK_FRAME (frame5), label43);
+
+ frame9 = gtk_frame_new (NULL);
+ gtk_widget_set_name (frame9, "frame9");
+ gtk_widget_show (frame9);
+ gtk_box_pack_start (GTK_BOX (vbox5), frame9, FALSE, TRUE, 0);
+
+ table5 = gtk_table_new (9, 2, FALSE);
+ gtk_widget_set_name (table5, "table5");
+ gtk_widget_show (table5);
+ gtk_container_add (GTK_CONTAINER (frame9), table5);
+ gtk_container_set_border_width (GTK_CONTAINER (table5), 6);
+ gtk_table_set_col_spacings (GTK_TABLE (table5), 10);
+
+ label20 = gtk_label_new ("Running");
+ gtk_widget_set_name (label20, "label20");
+ gtk_widget_show (label20);
+ gtk_table_attach (GTK_TABLE (table5), label20, 0, 1, 0, 1,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (label20), GTK_JUSTIFY_CENTER);
+ gtk_misc_set_alignment (GTK_MISC (label20), 1, 0.5);
+
+ label21 = gtk_label_new ("Blocked on I/O (Read)");
+ gtk_widget_set_name (label21, "label21");
+ gtk_widget_show (label21);
+ gtk_table_attach (GTK_TABLE (table5), label21, 0, 1, 1, 2,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (label21), GTK_JUSTIFY_CENTER);
+ gtk_misc_set_alignment (GTK_MISC (label21), 1, 0.5);
+
+ label22 = gtk_label_new ("Blocked on MVar");
+ gtk_widget_set_name (label22, "label22");
+ gtk_widget_show (label22);
+ gtk_table_attach (GTK_TABLE (table5), label22, 0, 1, 3, 4,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (label22), GTK_JUSTIFY_CENTER);
+ gtk_misc_set_alignment (GTK_MISC (label22), 1, 0.5);
+
+ label24 = gtk_label_new ("Blocked on throwTo");
+ gtk_widget_set_name (label24, "label24");
+ gtk_widget_show (label24);
+ gtk_table_attach (GTK_TABLE (table5), label24, 0, 1, 4, 5,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (label24), GTK_JUSTIFY_CENTER);
+ gtk_misc_set_alignment (GTK_MISC (label24), 1, 0.5);
+
+ label26 = gtk_label_new ("Blocked on Black Hole");
+ gtk_widget_set_name (label26, "label26");
+ gtk_widget_show (label26);
+ gtk_table_attach (GTK_TABLE (table5), label26, 0, 1, 5, 6,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (label26), GTK_JUSTIFY_CENTER);
+ gtk_misc_set_alignment (GTK_MISC (label26), 1, 0.5);
+
+ label25 = gtk_label_new ("Sleeping");
+ gtk_widget_set_name (label25, "label25");
+ gtk_widget_show (label25);
+ gtk_table_attach (GTK_TABLE (table5), label25, 0, 1, 6, 7,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (label25), GTK_JUSTIFY_CENTER);
+ gtk_misc_set_alignment (GTK_MISC (label25), 1, 0.5);
+
+ label27 = gtk_label_new ("Blocked on I/O (Write)");
+ gtk_widget_set_name (label27, "label27");
+ gtk_widget_show (label27);
+ gtk_table_attach (GTK_TABLE (table5), label27, 0, 1, 2, 3,
+ (GtkAttachOptions) (0),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (label27), GTK_JUSTIFY_CENTER);
+ gtk_misc_set_alignment (GTK_MISC (label27), 1, 0.5);
+
+ running_label = gtk_label_new ("label28");
+ gtk_widget_set_name (running_label, "running_label");
+ gtk_widget_show (running_label);
+ gtk_table_attach (GTK_TABLE (table5), running_label, 1, 2, 0, 1,
+ (GtkAttachOptions) (0),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (running_label), GTK_JUSTIFY_CENTER);
+
+ blockread_label = gtk_label_new ("label29");
+ gtk_widget_set_name (blockread_label, "blockread_label");
+ gtk_widget_show (blockread_label);
+ gtk_table_attach (GTK_TABLE (table5), blockread_label, 1, 2, 1, 2,
+ (GtkAttachOptions) (0),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (blockread_label), GTK_JUSTIFY_CENTER);
+
+ blockwrite_label = gtk_label_new ("label30");
+ gtk_widget_set_name (blockwrite_label, "blockwrite_label");
+ gtk_widget_show (blockwrite_label);
+ gtk_table_attach (GTK_TABLE (table5), blockwrite_label, 1, 2, 2, 3,
+ (GtkAttachOptions) (0),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (blockwrite_label), GTK_JUSTIFY_CENTER);
+
+ blockmvar_label = gtk_label_new ("label31");
+ gtk_widget_set_name (blockmvar_label, "blockmvar_label");
+ gtk_widget_show (blockmvar_label);
+ gtk_table_attach (GTK_TABLE (table5), blockmvar_label, 1, 2, 3, 4,
+ (GtkAttachOptions) (0),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (blockmvar_label), GTK_JUSTIFY_CENTER);
+
+ blockthrowto_label = gtk_label_new ("label32");
+ gtk_widget_set_name (blockthrowto_label, "blockthrowto_label");
+ gtk_widget_show (blockthrowto_label);
+ gtk_table_attach (GTK_TABLE (table5), blockthrowto_label, 1, 2, 4, 5,
+ (GtkAttachOptions) (0),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (blockthrowto_label), GTK_JUSTIFY_CENTER);
+
+ blockbh_label = gtk_label_new ("label33");
+ gtk_widget_set_name (blockbh_label, "blockbh_label");
+ gtk_widget_show (blockbh_label);
+ gtk_table_attach (GTK_TABLE (table5), blockbh_label, 1, 2, 5, 6,
+ (GtkAttachOptions) (0),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (blockbh_label), GTK_JUSTIFY_CENTER);
+
+ sleeping_label = gtk_label_new ("label34");
+ gtk_widget_set_name (sleeping_label, "sleeping_label");
+ gtk_widget_show (sleeping_label);
+ gtk_table_attach (GTK_TABLE (table5), sleeping_label, 1, 2, 6, 7,
+ (GtkAttachOptions) (0),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (sleeping_label), GTK_JUSTIFY_CENTER);
+
+ hseparator1 = gtk_hseparator_new ();
+ gtk_widget_set_name (hseparator1, "hseparator1");
+ gtk_widget_show (hseparator1);
+ gtk_table_attach (GTK_TABLE (table5), hseparator1, 0, 1, 7, 8,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (GTK_EXPAND | GTK_FILL), 0, 0);
+
+ hseparator2 = gtk_hseparator_new ();
+ gtk_widget_set_name (hseparator2, "hseparator2");
+ gtk_widget_show (hseparator2);
+ gtk_table_attach (GTK_TABLE (table5), hseparator2, 1, 2, 7, 8,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (GTK_FILL), 0, 0);
+
+ label35 = gtk_label_new ("Total");
+ gtk_widget_set_name (label35, "label35");
+ gtk_widget_show (label35);
+ gtk_table_attach (GTK_TABLE (table5), label35, 0, 1, 8, 9,
+ (GtkAttachOptions) (GTK_FILL),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (label35), GTK_JUSTIFY_CENTER);
+ gtk_misc_set_alignment (GTK_MISC (label35), 1, 0.5);
+
+ total_label = gtk_label_new ("label36");
+ gtk_widget_set_name (total_label, "total_label");
+ gtk_widget_show (total_label);
+ gtk_table_attach (GTK_TABLE (table5), total_label, 1, 2, 8, 9,
+ (GtkAttachOptions) (0),
+ (GtkAttachOptions) (0), 0, 0);
+ gtk_label_set_justify (GTK_LABEL (total_label), GTK_JUSTIFY_CENTER);
+
+ label44 = gtk_label_new ("Threads");
+ gtk_widget_set_name (label44, "label44");
+ gtk_widget_show (label44);
+ gtk_frame_set_label_widget (GTK_FRAME (frame9), label44);
+
+ frame6 = gtk_frame_new (NULL);
+ gtk_widget_set_name (frame6, "frame6");
+ gtk_widget_show (frame6);
+ gtk_box_pack_start (GTK_BOX (vbox5), frame6, FALSE, FALSE, 0);
+
+ vbox7 = gtk_vbox_new (FALSE, 10);
+ gtk_widget_set_name (vbox7, "vbox7");
+ gtk_widget_show (vbox7);
+ gtk_container_add (GTK_CONTAINER (frame6), vbox7);
+ gtk_container_set_border_width (GTK_CONTAINER (vbox7), 5);
+
+ vbox9 = gtk_vbox_new (FALSE, 0);
+ gtk_widget_set_name (vbox9, "vbox9");
+ gtk_widget_show (vbox9);
+ gtk_box_pack_start (GTK_BOX (vbox7), vbox9, TRUE, TRUE, 0);
+
+ cont_radio = gtk_radio_button_new_with_mnemonic (NULL, "Continuous");
+ gtk_widget_set_name (cont_radio, "cont_radio");
+ gtk_widget_show (cont_radio);
+ gtk_box_pack_start (GTK_BOX (vbox9), cont_radio, FALSE, FALSE, 0);
+ gtk_radio_button_set_group (GTK_RADIO_BUTTON (cont_radio), cont_radio_group);
+ cont_radio_group = gtk_radio_button_get_group (GTK_RADIO_BUTTON (cont_radio));
+ gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (cont_radio), TRUE);
+
+ stop_before_radio = gtk_radio_button_new_with_mnemonic (NULL, "Stop before GC");
+ gtk_widget_set_name (stop_before_radio, "stop_before_radio");
+ gtk_widget_show (stop_before_radio);
+ gtk_box_pack_start (GTK_BOX (vbox9), stop_before_radio, FALSE, FALSE, 0);
+ gtk_radio_button_set_group (GTK_RADIO_BUTTON (stop_before_radio), cont_radio_group);
+ cont_radio_group = gtk_radio_button_get_group (GTK_RADIO_BUTTON (stop_before_radio));
+
+ stop_after_radio = gtk_radio_button_new_with_mnemonic (NULL, "Stop after GC");
+ gtk_widget_set_name (stop_after_radio, "stop_after_radio");
+ gtk_widget_show (stop_after_radio);
+ gtk_box_pack_start (GTK_BOX (vbox9), stop_after_radio, FALSE, FALSE, 0);
+ gtk_radio_button_set_group (GTK_RADIO_BUTTON (stop_after_radio), cont_radio_group);
+ cont_radio_group = gtk_radio_button_get_group (GTK_RADIO_BUTTON (stop_after_radio));
+
+ stop_both_radio = gtk_radio_button_new_with_mnemonic (NULL, "Stop before & after GC");
+ gtk_widget_set_name (stop_both_radio, "stop_both_radio");
+ gtk_widget_show (stop_both_radio);
+ gtk_box_pack_start (GTK_BOX (vbox9), stop_both_radio, FALSE, FALSE, 0);
+ gtk_radio_button_set_group (GTK_RADIO_BUTTON (stop_both_radio), cont_radio_group);
+ cont_radio_group = gtk_radio_button_get_group (GTK_RADIO_BUTTON (stop_both_radio));
+
+ vbox8 = gtk_vbox_new (FALSE, 0);
+ gtk_widget_set_name (vbox8, "vbox8");
+ gtk_widget_show (vbox8);
+ gtk_box_pack_start (GTK_BOX (vbox7), vbox8, FALSE, FALSE, 0);
+
+ stop_but = gtk_button_new_with_mnemonic ("Stop");
+ gtk_widget_set_name (stop_but, "stop_but");
+ gtk_widget_show (stop_but);
+ gtk_box_pack_start (GTK_BOX (vbox8), stop_but, FALSE, FALSE, 0);
+
+ continue_but = gtk_button_new_with_mnemonic ("Continue");
+ gtk_widget_set_name (continue_but, "continue_but");
+ gtk_widget_show (continue_but);
+ gtk_box_pack_start (GTK_BOX (vbox8), continue_but, FALSE, FALSE, 0);
+
+ label45 = gtk_label_new ("Updates");
+ gtk_widget_set_name (label45, "label45");
+ gtk_widget_show (label45);
+ gtk_frame_set_label_widget (GTK_FRAME (frame6), label45);
+
+ quit_but = gtk_button_new_with_mnemonic ("Quit");
+ gtk_widget_set_name (quit_but, "quit_but");
+ gtk_widget_show (quit_but);
+ gtk_box_pack_end (GTK_BOX (vbox5), quit_but, FALSE, FALSE, 0);
+
+ statusbar = gtk_statusbar_new ();
+ gtk_widget_set_name (statusbar, "statusbar");
+ gtk_widget_show (statusbar);
+ gtk_box_pack_start (GTK_BOX (vbox1), statusbar, FALSE, FALSE, 0);
+
+ g_signal_connect ((gpointer) cont_radio, "clicked",
+ G_CALLBACK (on_cont_radio_clicked),
+ NULL);
+ g_signal_connect ((gpointer) stop_before_radio, "clicked",
+ G_CALLBACK (on_stop_before_radio_clicked),
+ NULL);
+ g_signal_connect ((gpointer) stop_after_radio, "clicked",
+ G_CALLBACK (on_stop_after_radio_clicked),
+ NULL);
+ g_signal_connect ((gpointer) stop_both_radio, "clicked",
+ G_CALLBACK (on_stop_both_radio_clicked),
+ NULL);
+ g_signal_connect ((gpointer) stop_but, "clicked",
+ G_CALLBACK (on_stop_but_clicked),
+ NULL);
+ g_signal_connect ((gpointer) continue_but, "clicked",
+ G_CALLBACK (on_continue_but_clicked),
+ NULL);
+ g_signal_connect ((gpointer) quit_but, "clicked",
+ G_CALLBACK (on_quit_but_clicked),
+ NULL);
+
+ /* Store pointers to all widgets, for use by lookup_widget(). */
+ GLADE_HOOKUP_OBJECT_NO_REF (GHC_Front_Panel, GHC_Front_Panel, "GHC_Front_Panel");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, vbox1, "vbox1");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, hbox1, "hbox1");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, vbox4, "vbox4");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, frame3, "frame3");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, hbox3, "hbox3");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label40, "label40");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, map_ruler, "map_ruler");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, memmap, "memmap");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label1, "label1");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, frame8, "frame8");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, vbox14, "vbox14");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, table4, "table4");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, gen_ruler, "gen_ruler");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, gen_hbox, "gen_hbox");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, generations, "generations");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label39, "label39");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label41, "label41");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, frame7, "frame7");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, table3, "table3");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, res_hruler, "res_hruler");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, res_vruler, "res_vruler");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, res_drawingarea, "res_drawingarea");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label37, "label37");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label38, "label38");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label42, "label42");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, vbox5, "vbox5");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, frame5, "frame5");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, vbox6, "vbox6");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, table1, "table1");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label12, "label12");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label13, "label13");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label14, "label14");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label15, "label15");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label16, "label16");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label17, "label17");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label18, "label18");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label19, "label19");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, live_label, "live_label");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, allocated_label, "allocated_label");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, footprint_label, "footprint_label");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, alloc_rate_label, "alloc_rate_label");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label43, "label43");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, frame9, "frame9");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, table5, "table5");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label20, "label20");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label21, "label21");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label22, "label22");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label24, "label24");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label26, "label26");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label25, "label25");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label27, "label27");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, running_label, "running_label");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, blockread_label, "blockread_label");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, blockwrite_label, "blockwrite_label");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, blockmvar_label, "blockmvar_label");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, blockthrowto_label, "blockthrowto_label");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, blockbh_label, "blockbh_label");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, sleeping_label, "sleeping_label");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, hseparator1, "hseparator1");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, hseparator2, "hseparator2");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label35, "label35");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, total_label, "total_label");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label44, "label44");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, frame6, "frame6");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, vbox7, "vbox7");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, vbox9, "vbox9");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, cont_radio, "cont_radio");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, stop_before_radio, "stop_before_radio");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, stop_after_radio, "stop_after_radio");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, stop_both_radio, "stop_both_radio");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, vbox8, "vbox8");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, stop_but, "stop_but");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, continue_but, "continue_but");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, label45, "label45");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, quit_but, "quit_but");
+ GLADE_HOOKUP_OBJECT (GHC_Front_Panel, statusbar, "statusbar");
+
+ return GHC_Front_Panel;
+}
+
diff --git a/rts/VisWindow.h b/rts/VisWindow.h
new file mode 100644
index 0000000000..c646c40c02
--- /dev/null
+++ b/rts/VisWindow.h
@@ -0,0 +1,5 @@
+/*
+ * DO NOT EDIT THIS FILE - it is generated by Glade.
+ */
+
+GtkWidget* create_GHC_Front_Panel (void);
diff --git a/rts/Weak.c b/rts/Weak.c
new file mode 100644
index 0000000000..f010395221
--- /dev/null
+++ b/rts/Weak.c
@@ -0,0 +1,97 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-1999
+ *
+ * Weak pointers / finalizers
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "PosixSource.h"
+#define COMPILING_RTS_MAIN
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "SchedAPI.h"
+#include "RtsFlags.h"
+#include "Weak.h"
+#include "Storage.h"
+#include "Schedule.h"
+#include "Prelude.h"
+#include "RtsAPI.h"
+
+StgWeak *weak_ptr_list;
+
+/*
+ * scheduleFinalizers() is called on the list of weak pointers found
+ * to be dead after a garbage collection. It overwrites each object
+ * with DEAD_WEAK, and creates a new thread to run the pending finalizers.
+ *
+ * This function is called just after GC. The weak pointers on the
+ * argument list are those whose keys were found to be not reachable,
+ * however the value and finalizer fields have by now been marked live.
+ * The weak pointer object itself may not be alive - i.e. we may be
+ * looking at either an object in from-space or one in to-space. It
+ * doesn't really matter either way.
+ *
+ * Pre-condition: sched_mutex _not_ held.
+ */
+
+void
+scheduleFinalizers(Capability *cap, StgWeak *list)
+{
+ StgWeak *w;
+ StgTSO *t;
+ StgMutArrPtrs *arr;
+ nat n;
+
+ // count number of finalizers, and kill all the weak pointers first...
+ n = 0;
+ for (w = list; w; w = w->link) {
+
+ // Better not be a DEAD_WEAK at this stage; the garbage
+ // collector removes DEAD_WEAKs from the weak pointer list.
+ ASSERT(w->header.info != &stg_DEAD_WEAK_info);
+
+ if (w->finalizer != &stg_NO_FINALIZER_closure) {
+ n++;
+ }
+
+#ifdef PROFILING
+ // A weak pointer is inherently used, so we do not need to call
+ // LDV_recordDead().
+ //
+ // Furthermore, when PROFILING is turned on, dead weak
+ // pointers are exactly as large as weak pointers, so there is
+ // no need to fill the slop, either. See stg_DEAD_WEAK_info
+ // in StgMiscClosures.hc.
+#endif
+ SET_HDR(w, &stg_DEAD_WEAK_info, w->header.prof.ccs);
+ }
+
+ // No finalizers to run?
+ if (n == 0) return;
+
+ IF_DEBUG(weak,debugBelch("weak: batching %d finalizers\n", n));
+
+ arr = (StgMutArrPtrs *)allocateLocal(cap, sizeofW(StgMutArrPtrs) + n);
+ TICK_ALLOC_PRIM(sizeofW(StgMutArrPtrs), n, 0);
+ SET_HDR(arr, &stg_MUT_ARR_PTRS_FROZEN_info, CCS_SYSTEM);
+ arr->ptrs = n;
+
+ n = 0;
+ for (w = list; w; w = w->link) {
+ if (w->finalizer != &stg_NO_FINALIZER_closure) {
+ arr->payload[n] = w->finalizer;
+ n++;
+ }
+ }
+
+ t = createIOThread(cap,
+ RtsFlags.GcFlags.initialStkSize,
+ rts_apply(cap,
+ rts_apply(cap,
+ (StgClosure *)runFinalizerBatch_closure,
+ rts_mkInt(cap,n)),
+ (StgClosure *)arr)
+ );
+ scheduleThread(cap,t);
+}
diff --git a/rts/Weak.h b/rts/Weak.h
new file mode 100644
index 0000000000..ba8c1ca913
--- /dev/null
+++ b/rts/Weak.h
@@ -0,0 +1,17 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Weak pointers / finalizers
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef WEAK_H
+#define WEAK_H
+
+#include "Capability.h"
+
+void scheduleFinalizers(Capability *cap, StgWeak *w);
+void markWeakList(void);
+
+#endif
diff --git a/rts/dotnet/Invoke.c b/rts/dotnet/Invoke.c
new file mode 100644
index 0000000000..585dcacaad
--- /dev/null
+++ b/rts/dotnet/Invoke.c
@@ -0,0 +1,1081 @@
+/*
+ * C callable bridge to the .NET object model
+ *
+ * Managed C++ is used to access the .NET object model via
+ * System.Reflection. Here we provide C callable functions
+ * to that functionality, which we then export via a COM
+ * component.
+ *
+ * Note: the _only_ reason why we're going via COM and not simply
+ * exposing the required via some DLL entry points, is that COM
+ * gives us location independence (i.e., the RTS doesn't need
+ * be told where this interop layer resides in order to hoik
+ * it in, the CLSID suffices (provided the component has been
+ * registered, of course.)) It is a bit tiresome to have play
+ * by the .NET COM Interop's rules as regards argument arrays,
+ * so we may want to revisit this issue at some point.
+ *
+ * [ But why not simply use MC++ and provide C-callable entry
+ * points to the relevant functionality, and avoid COM interop
+ * alltogether? Because we have to be able to (statically)
+ * link with gcc-compiled code, and linking MC++ and gcc-compiled
+ * object files doesn't work.]
+ *
+ * Note: you need something never than gcc-2.95 to compile this
+ * code (I'm using gcc-3.2, which comes with mingw-2).
+ */
+#define _WIN32_DCOM
+#define COBJMACROS
+#include <stdio.h>
+#include <stdlib.h>
+#include <wtypes.h>
+#ifndef _MSC_VER
+#include <oaidl.h>
+#include <objbase.h>
+#include <oleauto.h>
+# if defined(COBJMACROS) && !defined(_MSC_VER)
+#define IErrorInfo_QueryInterface(T,r,O) (T)->lpVtbl->QueryInterface(T,r,O)
+#define IErrorInfo_AddRef(T) (T)->lpVtbl->AddRef(T)
+#define IErrorInfo_Release(T) (T)->lpVtbl->Release(T)
+#define IErrorInfo_GetSource(T,pbstr) (T)->lpVtbl->GetSource(T,pbstr)
+#define IErrorInfo_GetDescription(T,pbstr) (T)->lpVtbl->GetDescription(T,pbstr)
+
+#define ISupportErrorInfo_QueryInterface(T,r,O) (T)->lpVtbl->QueryInterface(T,r,O)
+#define ISupportErrorInfo_AddRef(T) (T)->lpVtbl->AddRef(T)
+#define ISupportErrorInfo_Release(T) (T)->lpVtbl->Release(T)
+#define ISupportErrorInfo_InterfaceSupportsErrorInfo(T,iid) (T)->lpVtbl->InterfaceSupportsErrorInfo(T,iid)
+# endif
+#endif
+#include "DNInvoke.h"
+#define WANT_UUID_DECLS
+#include "InvokerClient.h"
+#include "Dotnet.h"
+
+/* Local prototypes */
+static void genError( IUnknown* pUnk,
+ HRESULT hr,
+ char* loc,
+ char** pErrMsg);
+static int startBridge(char**);
+static int fromVariant
+ ( DotnetType resTy,
+ VARIANT* pVar,
+ void* res,
+ char** pErrMsg);
+static VARIANT* toVariant ( DotnetArg* p );
+
+/* Pointer to .NET COM component instance; instantiated on demand. */
+static InvokeBridge* pBridge = NULL;
+
+/* convert a char* to a BSTR, copied from the HDirect comlib/ sources */
+static
+HRESULT
+stringToBSTR( /*[in,ptr]*/const char* pstrz
+ , /*[out]*/ BSTR* pbstr
+ )
+{
+ int i;
+
+ if (!pbstr) {
+ return E_FAIL;
+ } else {
+ *pbstr = NULL;
+ }
+ if (!pstrz) {
+ return S_OK;
+ }
+
+ i = MultiByteToWideChar(CP_ACP, 0, pstrz, -1, NULL, 0);
+ if ( i < 0 ) {
+ return E_FAIL;
+ }
+ *pbstr = SysAllocStringLen(NULL,i-1);
+ if (*pbstr != NULL) {
+ MultiByteToWideChar(CP_ACP, 0, pstrz, -1, *pbstr, i-1);
+ // (*pbstr)[i]=0;
+ return S_OK;
+ } else {
+ return E_FAIL;
+ }
+}
+
+static
+char*
+bstrToString( BSTR bstr )
+{
+ int i,len;
+ char *res;
+ int blen;
+
+ if (!bstr) {
+ return NULL;
+ }
+
+ blen = SysStringLen(bstr);
+
+ /* pass in NULL for the multi-byte arg in order to compute length first */
+ len = WideCharToMultiByte(CP_ACP, 0, bstr, blen,
+ NULL, 0, NULL, NULL);
+ if (len == 0) return NULL;
+
+ /* Allocate string of required length. */
+ res = (char*)malloc(sizeof(char) * (len + 1));
+ if (!res) return NULL;
+
+ i = WideCharToMultiByte(CP_ACP, 0, bstr, blen,
+ res, (len+1), NULL, NULL);
+
+ /* Poor error handling to map this to NULL. */
+ if ( i == 0 ) return NULL;
+
+ /* Terminate and return */
+ res[i] = '\0';
+ return res;
+}
+
+static
+void
+freeArgs ( SAFEARRAY* psa )
+{
+ /* The argument SAFEARRAYs contain dynamically allocated
+ * VARIANTs. Release the VARIANT contents and its memory here.
+ */
+ long lb,ub;
+ int i;
+ HRESULT hr;
+ VARIANT *pv = NULL;
+
+ hr = SafeArrayGetLBound(psa, 1, &lb);
+ if (FAILED(hr)) {
+ fprintf(stderr, "freeArgs: failed fetching lower bound\n");
+ SafeArrayDestroy(psa);
+ return;
+ }
+ hr = SafeArrayGetUBound(psa, 1, &ub);
+ if (FAILED(hr)) {
+ fprintf(stderr, "freeArgs: failed fetching upper bound\n");
+ SafeArrayDestroy(psa);
+ return;
+ }
+ for ( i = 0; i < (ub - lb); i++ ) {
+ hr = SafeArrayGetElement(psa,(long*)&i,(void*)pv);
+ if (FAILED(hr)) {
+ fprintf(stderr, "freeArgs: unable to fetch element %d\n", i);
+ SafeArrayDestroy(psa);
+ return;
+ }
+ VariantClear(pv);
+ free(pv);
+ }
+ SafeArrayDestroy(psa);
+}
+
+static
+SAFEARRAY*
+marshalArgs ( DotnetArg* args,
+ unsigned int n_args )
+{
+ SAFEARRAY *psa;
+ SAFEARRAYBOUND rgsabound[1];
+ int i;
+ long idxArr[1];
+ HRESULT hr;
+ VARIANT* var;
+
+ rgsabound[0].lLbound = 0;
+ rgsabound[0].cElements = n_args;
+ psa = SafeArrayCreate(VT_VARIANT, 1, rgsabound);
+
+ for(i=0;i < n_args; i++) {
+ idxArr[0] = i;
+ var = toVariant(&args[i]);
+ hr = SafeArrayPutElement(psa, idxArr, (void*)var);
+ }
+ return psa;
+}
+
+/*
+ * ***** Accessing the .NET object model *****
+ *
+ * General remarks:
+ *
+ * - the functions report error conditions via their return value; a char*.
+ * If NULL, the call was successful. If not, the returned string
+ * contains the (dynamically allocated) error message.
+ *
+ * This unorthodox calling convetion is used to simplify the task
+ * of interfacing to these funs from GHC-generated code.
+ */
+
+/*
+ * Function: DN_invokeStatic()
+ *
+ * Given assembly and fully-qualified name of a static .NET method,
+ * invoke it using the supplied arguments.
+ *
+ * Returns NULL on success, pointer to error message if an error.
+ *
+ */
+char*
+DN_invokeStatic ( char *assemName,
+ char *methName,
+ DotnetArg *args,
+ int n_args,
+ DotnetType resultTy,
+ void *res)
+{
+ SAFEARRAY* psa;
+ VARIANT result;
+ HRESULT hr;
+ BSTR b_assemName;
+ BSTR b_methName;
+ char* errMsg = NULL;
+
+ if (!pBridge && !startBridge(&errMsg)) {
+ return errMsg;
+ }
+
+ /* Package up arguments */
+ psa = marshalArgs(args, n_args);
+ VariantInit(&result);
+
+ hr = stringToBSTR(assemName, &b_assemName);
+ hr = stringToBSTR(methName, &b_methName);
+
+ hr = InvokeBridge_InvokeStaticMethod(pBridge,
+ b_assemName,
+ b_methName,
+ psa,
+ &result);
+ SysFreeString(b_assemName);
+ SysFreeString(b_methName);
+ if (FAILED(hr)) {
+ genError((IUnknown*)pBridge, hr, "DInvoke.invokeStatic", &errMsg);
+ return errMsg;
+ }
+
+ fromVariant(resultTy, &result, res, &errMsg);
+ freeArgs(psa);
+
+ return errMsg;
+}
+
+/*
+ * Function: DN_invokeMethod()
+ *
+ * Given method name and arguments, invoke .NET method on an object.
+ * The object ref / this-pointer is passed in as the last argument.
+ *
+ * Returns NULL on success, pointer to error message if an error.
+ *
+ */
+char*
+DN_invokeMethod ( char *clsAndMethName,
+ DotnetArg *args,
+ int n_args,
+ DotnetType resultTy,
+ void *res)
+{
+ SAFEARRAY* psa;
+ VARIANT result;
+ HRESULT hr;
+ char* methName;
+ BSTR b_methName;
+ char* errMsg = NULL;
+ VARIANT *thisPtr;
+
+ if (!pBridge && !startBridge(&errMsg)) {
+ return errMsg;
+ }
+
+ if (n_args <= 0) {
+ genError(NULL, 0x0, "Invoke.invokeMethod - missing this pointer", &errMsg);
+ return errMsg;
+ }
+
+ /* The this-pointer is last */
+ thisPtr = toVariant(&args[n_args-1]);
+
+ /* Package up arguments */
+ psa = marshalArgs(args, n_args-1);
+ VariantInit(&result);
+
+ /* If the user has qualified method with class, ignore the class bit. */
+ if ( (methName = strrchr(clsAndMethName, '.')) == NULL) {
+ methName = clsAndMethName;
+ } else {
+ /* Skip past '.' */
+ methName++;
+ }
+
+ hr = stringToBSTR(methName, &b_methName);
+ hr = InvokeBridge_InvokeMethod(pBridge,
+ *thisPtr,
+ b_methName,
+ psa,
+ &result);
+ SysFreeString(b_methName);
+ if (FAILED(hr)) {
+ genError((IUnknown*)pBridge, hr, "Invoke.invokeMethod", &errMsg);
+ return errMsg;
+ }
+
+ fromVariant(resultTy, &result, res, &errMsg);
+ freeArgs(psa);
+
+ return errMsg;
+}
+
+/*
+ * Function: DN_getField()
+ *
+ * Given a field name and an object pointer, read a field value.
+ * The object ref / this-pointer is passed in as the last argument.
+ *
+ * Returns NULL on success, pointer to error message if an error.
+ *
+ */
+char*
+DN_getField ( char *clsAndMethName,
+ DotnetArg *args,
+ int n_args,
+ DotnetType resultTy,
+ void *res)
+{
+ VARIANT result;
+ HRESULT hr;
+ char* methName;
+ BSTR b_methName;
+ char* errMsg = NULL;
+ VARIANT *thisPtr;
+
+ if (!pBridge && !startBridge(&errMsg)) {
+ return errMsg;
+ }
+
+ if (n_args <= 0) {
+ genError(NULL, 0x0, "Invoke.getField - missing this pointer", &errMsg);
+ return errMsg;
+ }
+
+ /* The this-pointer is last */
+ thisPtr = toVariant(&args[n_args-1]);
+ VariantInit(&result);
+
+ /* If the user has qualified method with class, ignore the class bit. */
+ if ( (methName = strrchr(clsAndMethName, '.')) == NULL) {
+ methName = clsAndMethName;
+ } else {
+ /* Skip past '.' */
+ methName++;
+ }
+
+ hr = stringToBSTR(methName, &b_methName);
+ hr = InvokeBridge_GetField(pBridge,
+ *thisPtr,
+ b_methName,
+ &result);
+ SysFreeString(b_methName);
+ if (FAILED(hr)) {
+ genError((IUnknown*)pBridge, hr, "Invoke.getField", &errMsg);
+ return errMsg;
+ }
+
+ fromVariant(resultTy, &result, res, &errMsg);
+ return errMsg;
+}
+
+/*
+ * Function: DN_setField()
+ *
+ * Given field name, a value and an object reference, set the field value of
+ * an object.
+ * The object ref / this-pointer is passed in as the last argument.
+ *
+ * Returns NULL on success, pointer to error message if an error.
+ *
+ */
+char*
+DN_setField ( char *clsAndMethName,
+ DotnetArg *args,
+ int n_args,
+ /* next two args are ignored */
+ DotnetType resultTy,
+ void *res)
+{
+ HRESULT hr;
+ char* methName;
+ BSTR b_methName;
+ char* errMsg = NULL;
+ VARIANT *thisPtr;
+ VARIANT *pVal;
+
+ if (!pBridge && !startBridge(&errMsg)) {
+ return errMsg;
+ }
+
+ if (n_args != 2) {
+ genError(NULL, 0x0, "Invoke.setField - missing this pointer", &errMsg);
+ return errMsg;
+ }
+
+ /* The this-pointer is last */
+ thisPtr = toVariant(&args[1]);
+
+ /* Package up arguments */
+ pVal = toVariant(&args[0]);
+
+ /* If the user has qualified method with class, ignore the class bit. */
+ if ( (methName = strrchr(clsAndMethName, '.')) == NULL) {
+ methName = clsAndMethName;
+ } else {
+ /* Skip past '.' */
+ methName++;
+ }
+
+ hr = stringToBSTR(methName, &b_methName);
+ hr = InvokeBridge_SetField(pBridge,
+ *thisPtr,
+ b_methName,
+ *pVal);
+ SysFreeString(b_methName);
+ VariantClear(pVal);
+ free(pVal);
+ free(thisPtr);
+
+ if (FAILED(hr)) {
+ genError((IUnknown*)pBridge, hr, "Invoke.setField", &errMsg);
+ return errMsg;
+ }
+ return errMsg;
+}
+
+
+/*
+ * Function: DN_createObject()
+ *
+ * Given assembly and fully-qualified name of a type,
+ * invoke its (possibly parameterised) constructor.
+ *
+ * Returns NULL on success, pointer to error message if an error.
+ *
+ */
+char*
+DN_createObject ( char *assemName,
+ char *methName,
+ DotnetArg *args,
+ int n_args,
+ DotnetType resultTy,
+ void *res)
+{
+ SAFEARRAY* psa;
+ VARIANT result;
+ HRESULT hr;
+ BSTR b_assemName;
+ BSTR b_methName;
+ char* errMsg = NULL;
+
+ if (!pBridge && !startBridge(&errMsg)) {
+ return errMsg;
+ }
+
+ /* Package up arguments */
+ psa = marshalArgs(args, n_args);
+ VariantInit(&result);
+
+ hr = stringToBSTR(assemName, &b_assemName);
+ hr = stringToBSTR(methName, &b_methName);
+
+ hr = InvokeBridge_CreateObject(pBridge,
+ b_assemName,
+ b_methName,
+ psa,
+ &result);
+ SysFreeString(b_assemName);
+ SysFreeString(b_methName);
+ if (FAILED(hr)) {
+ genError((IUnknown*)pBridge, hr, "DN_createObject", &errMsg);
+ return errMsg;
+ }
+
+ fromVariant(resultTy, &result, res, &errMsg);
+ freeArgs(psa);
+
+ return errMsg;
+}
+
+/*
+ * Function: DN_getStatic()
+ *
+ * Given assembly and fully-qualified field name, fetch value of static
+ * field.
+ *
+ * Returns NULL on success, pointer to error message if an error.
+ *
+ */
+char*
+DN_getStatic ( char *assemName,
+ char *fieldClsName,
+ /* the next two args are ignored */
+ DotnetArg *args,
+ int n_args,
+ DotnetType resultTy,
+ void *res)
+{
+ VARIANT result;
+ HRESULT hr;
+ BSTR b_assemName;
+ BSTR b_clsName;
+ BSTR b_fieldName;
+ char* errMsg = NULL;
+ char* fieldName;
+ char* clsName = fieldName;
+
+ if (!pBridge && !startBridge(&errMsg)) {
+ return errMsg;
+ }
+
+ fieldName = (char*)malloc(sizeof(char) * (strlen(fieldClsName) + 1));
+ strcpy(fieldName, fieldClsName);
+ clsName = fieldName;
+
+ if (( fieldName = strrchr(fieldName, '.')) == NULL ) {
+ genError((IUnknown*)pBridge, 0x0, "Invoke.getStatic - malformed field spec", &errMsg);
+ return errMsg;
+ }
+ *fieldName = '\0';
+ fieldName++;
+
+ VariantInit(&result);
+
+ hr = stringToBSTR(assemName, &b_assemName);
+ hr = stringToBSTR(fieldName, &b_fieldName);
+ hr = stringToBSTR(clsName, &b_clsName);
+ /* ToDo: honour assembly spec */
+ hr = InvokeBridge_GetStaticField(pBridge,
+ b_clsName,
+ b_fieldName,
+ &result);
+ SysFreeString(b_assemName);
+ SysFreeString(b_clsName);
+ SysFreeString(b_fieldName);
+ if (FAILED(hr)) {
+ genError((IUnknown*)pBridge, hr, "Invoke.getStatic", &errMsg);
+ return errMsg;
+ }
+ fromVariant(resultTy, &result, res, &errMsg);
+
+ return errMsg;
+}
+
+/*
+ * Function: DN_setStatic()
+ *
+ * Given assembly and fully-qualified field name, set value of static
+ * field.
+ *
+ * Returns NULL on success, pointer to error message if an error.
+ *
+ */
+char*
+DN_setStatic ( char *assemName,
+ char *fieldClsName,
+ DotnetArg *args,
+ int n_args,
+ /* the next two args are ignored */
+ DotnetType resultTy,
+ void *res)
+{
+ VARIANT result;
+ VARIANT *pVal;
+ HRESULT hr;
+ BSTR b_assemName;
+ BSTR b_clsName;
+ BSTR b_fieldName;
+ char* errMsg = NULL;
+ char* fieldName;
+ char* clsName = fieldName;
+
+ if (!pBridge && !startBridge(&errMsg)) {
+ return errMsg;
+ }
+
+ fieldName = (char*)malloc(sizeof(char) * (strlen(fieldClsName) + 1));
+ strcpy(fieldName, fieldClsName);
+ clsName = fieldName;
+
+ if (( fieldName = strrchr(fieldName, '.')) == NULL ) {
+ genError((IUnknown*)pBridge, 0x0, "Invoke.setStatic - malformed field spec", &errMsg);
+ return errMsg;
+ }
+ *fieldName = '\0';
+ fieldName++;
+
+ pVal = toVariant(&args[0]);
+ VariantInit(&result);
+
+ hr = stringToBSTR(assemName, &b_assemName);
+ hr = stringToBSTR(fieldName, &b_fieldName);
+ hr = stringToBSTR(clsName, &b_clsName);
+ /* ToDo: honour assembly spec */
+ hr = InvokeBridge_SetStaticField(pBridge,
+ b_clsName,
+ b_fieldName,
+ *pVal);
+ SysFreeString(b_assemName);
+ SysFreeString(b_clsName);
+ SysFreeString(b_fieldName);
+ VariantClear(pVal);
+ free(pVal);
+ if (FAILED(hr)) {
+ genError((IUnknown*)pBridge, hr, "Invoke.setStatic", &errMsg);
+ return errMsg;
+ }
+ fromVariant(resultTy, &result, res, &errMsg);
+
+ return errMsg;
+}
+
+
+
+
+/*
+ * Function: startBridge(pErrMsg)
+ *
+ * Instantiates an InvokeBridge component, which is then
+ * used to interact with the .NET world.
+ *
+ * If the component isn't available locally, zero is returned.
+ * Otherwise, 1.
+ */
+static
+int
+startBridge(char** pErrMsg)
+{
+ HRESULT hr;
+ IUnknown *pUnk;
+
+ hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED);
+ if (FAILED(hr)) {
+ genError(NULL, hr, "DInvoke.createBridge.CoInitializeEx", pErrMsg);
+ return FALSE;
+ }
+
+ hr = CoCreateInstance( &CLSID_InvokeBridge,
+ NULL,
+ CLSCTX_INPROC_SERVER,
+ &IID_IUnknown,
+ (void**)&pUnk);
+ if (FAILED(hr)) {
+ genError(NULL, hr, "DInvoke.createBridge.CoCreateInstance", pErrMsg);
+ return 0;
+ }
+
+ hr = IUnknown_QueryInterface(pUnk, &IID_InvokeBridge, (void**)&pBridge);
+ IUnknown_Release(pUnk);
+ if (FAILED(hr)) {
+ genError(pUnk, hr, "DInvoke.createBridge.QueryInterface.InvokeBridge", pErrMsg);
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Function: stopBridge()
+ *
+ * Releases the InvokeBridge object and closes the COM library.
+ *
+ */
+void
+stopDotnetBridge()
+{
+ if (pBridge) {
+ InvokeBridge_Release(pBridge);
+ pBridge = NULL;
+ CoUninitialize();
+ }
+ /* Match up the call to CoInitializeEx() in startBridge(). */
+}
+
+/*
+ * Function: genError()
+ *
+ * Construct a string describing an error condition given
+ * an HRESULT and a location.
+ *
+ * If an interface pointer is passed in via the first arg,
+ * attempts are made to get at richer error information through
+ * the IErrorInfo interface. (Note: we don't currently look for
+ * the _Exception interface for even more detailed info.)
+ *
+ */
+#define LOCATION_HDR "Location: "
+#define HRESULT_HDR "HRESULT: "
+#define SOURCE_HDR "Source: "
+#define DESCR_HDR "Description: "
+#define NEWLINE_EXTRA 3
+
+static
+void
+genError(IUnknown* pUnk,
+ HRESULT err,
+ char* loc,
+ char** pErrMsg)
+{
+ HRESULT hr;
+ HRESULT invoke_hr = err;
+ char* invoke_src = NULL;
+ char* invoke_descr = NULL;
+ char* buf;
+ int bufLen;
+
+ /* If an interface pointer has been supplied, look for
+ * IErrorInfo in order to get more detailed information
+ * on the failure.
+ *
+ * The CLR's .NET COM Interop implementation does provide
+ * IErrorInfo, so we're not really clutching at straws here..
+ *
+ * Note: CLR also reflects .NET exceptions via the _Exception*
+ * interface..
+ *
+ */
+ if (pUnk) {
+ ISupportErrorInfo *pSupp;
+ IErrorInfo *pErrInfo;
+ BSTR src = NULL;
+ BSTR descr = NULL;
+
+ hr = IUnknown_QueryInterface(pUnk,
+ &IID_ISupportErrorInfo,
+ (void**)&pSupp);
+ if ( SUCCEEDED(hr) ) {
+ hr = ISupportErrorInfo_InterfaceSupportsErrorInfo(pSupp,
+ &IID_InvokeBridge);
+ if ( SUCCEEDED(hr) ) {
+ hr = GetErrorInfo(0,&pErrInfo);
+ if ( SUCCEEDED(hr) ) {
+ IErrorInfo_GetSource(pErrInfo,&src);
+ IErrorInfo_GetDescription(pErrInfo,&descr);
+ invoke_src = bstrToString(src);
+ invoke_descr = bstrToString(descr);
+
+ IErrorInfo_Release(pErrInfo);
+ if (src) { SysFreeString(src); src = NULL; }
+ if (descr) { SysFreeString(descr); descr = NULL; }
+ }
+ ISupportErrorInfo_Release(pSupp);
+ }
+ }
+ }
+ /* Putting it all together.. */
+ bufLen = sizeof(LOCATION_HDR) + strlen(loc) + NEWLINE_EXTRA +
+ sizeof(HRESULT_HDR) + 16 + NEWLINE_EXTRA +
+ sizeof(SOURCE_HDR) + (invoke_src ? strlen(invoke_src) : 16) + NEWLINE_EXTRA +
+ sizeof(DESCR_HDR) + (invoke_descr ? strlen(invoke_descr) : 16) + NEWLINE_EXTRA;
+ buf = (char*) malloc(sizeof(char) * (bufLen + 1));
+ if (!buf) {
+ fprintf(stderr, "Unable to allocate %d for error message", (bufLen + 1));
+ *pErrMsg = NULL;
+ return;
+ }
+
+ _snprintf(buf, bufLen, "%s%s\n%s0x%08x\n%s%s\n%s%s",
+ LOCATION_HDR, loc,
+ HRESULT_HDR, invoke_hr,
+ SOURCE_HDR, invoke_src,
+ DESCR_HDR, invoke_descr);
+
+ /* Done with these chaps */
+ if (invoke_src) free(invoke_src);
+ if (invoke_descr) free(invoke_descr);
+
+ if (pErrMsg) *pErrMsg = buf;
+ fprintf(stderr, "**InvokeBridge Error:\n%s", buf); fflush(stderr);
+}
+
+/* Converting to/from VARIANTs */
+
+/*
+ * Function: fromVariant()
+ *
+ * Unmarshal the contents of a VARIANT, converting its embedded value
+ * into the desired DotnetType (if possible.)
+ *
+ * Returns 1 if successful, 0 otherwise. If the conversion fails,
+ * *pErrMsg holds the error message string.
+ */
+static
+int
+fromVariant (DotnetType resTy,
+ VARIANT* pVar,
+ void* res,
+ char** pErrMsg)
+{
+ VARIANT vNew;
+ HRESULT hr;
+
+ VariantInit(&vNew);
+ switch(resTy) {
+ case Dotnet_Byte:
+ case Dotnet_Char:
+ hr = VariantChangeType (&vNew, pVar, 0, VT_UI1);
+ if (FAILED(hr)) {
+ genError(NULL, hr, "DInvoke.fromVariant{VT_UI1}", pErrMsg);
+ return FALSE;
+ }
+ *((unsigned char*)res) = vNew.bVal;
+ return 1;
+ case Dotnet_Boolean:
+ hr = VariantChangeType (&vNew, pVar, 0, VT_BOOL);
+ if (FAILED(hr)) {
+ genError(NULL, hr, "DInvoke.fromVariant{VT_BOOL}", pErrMsg);
+ return 0;
+ }
+ *((unsigned char*)res) = vNew.bVal;
+ return 1;
+ case Dotnet_Int:
+ hr = VariantChangeType (&vNew, pVar, 0, VT_INT);
+ if (FAILED(hr)) {
+ genError(NULL, hr, "DInvoke.fromVariant{VT_INT}", pErrMsg);
+ return 0;
+ }
+ *((int*)res) = vNew.intVal;
+ return 1;
+ case Dotnet_Int8:
+ hr = VariantChangeType (&vNew, pVar, 0, VT_I1);
+ if (FAILED(hr)) {
+ genError(NULL, hr, "DInvoke.fromVariant{VT_I1}", pErrMsg);
+ return 0;
+ }
+ *((signed char*)res) = vNew.bVal;
+ return 1;
+ case Dotnet_Int16:
+ hr = VariantChangeType (&vNew, pVar, 0, VT_I2);
+ if (FAILED(hr)) {
+ genError(NULL, hr, "DInvoke.fromVariant{VT_I2}", pErrMsg);
+ return 0;
+ }
+ *((signed short*)res) = vNew.iVal;
+ return 1;
+ case Dotnet_Int32:
+ hr = VariantChangeType (&vNew, pVar, 0, VT_I4);
+ if (FAILED(hr)) {
+ genError(NULL, hr, "DInvoke.fromVariant{VT_I4}", pErrMsg);
+ return 0;
+ }
+ *((signed int*)res) = vNew.lVal;
+ return 1;
+ case Dotnet_Int64:
+ hr = VariantChangeType (&vNew, pVar, 0, VT_I8);
+ if (FAILED(hr)) {
+ genError(NULL, hr, "DInvoke.fromVariant{VT_I8}", pErrMsg);
+ return 0;
+ }
+#ifdef _MSC_VER
+ *((__int64*)res) = vNew.llVal;
+#else
+ *((long long*)res) = vNew.lVal;
+#endif
+ return 1;
+ case Dotnet_Float:
+ hr = VariantChangeType (&vNew, pVar, 0, VT_R4);
+ if (FAILED(hr)) {
+ genError(NULL, hr, "DInvoke.fromVariant{VT_R4}", pErrMsg);
+ return 0;
+ }
+ *((float*)res) = vNew.fltVal;
+ return 1;
+ case Dotnet_Double:
+ hr = VariantChangeType (&vNew, pVar, 0, VT_R8);
+ if (FAILED(hr)) {
+ genError(NULL, hr, "DInvoke.fromVariant{VT_R4}", pErrMsg);
+ return 0;
+ }
+ *((double*)res) = vNew.dblVal;
+ return 1;
+ case Dotnet_Word8:
+ hr = VariantChangeType (&vNew, pVar, 0, VT_UI1);
+ if (FAILED(hr)) {
+ genError(NULL, hr, "DInvoke.fromVariant{VT_UI1}", pErrMsg);
+ return 0;
+ }
+ *((unsigned char*)res) = vNew.bVal;
+ return 1;
+ case Dotnet_Word16:
+ hr = VariantChangeType (&vNew, pVar, 0, VT_UI2);
+ if (FAILED(hr)) {
+ genError(NULL, hr, "DInvoke.fromVariant{VT_UI2}", pErrMsg);
+ return 0;
+ }
+ *((unsigned short*)res) = vNew.uiVal;
+ return 1;
+ case Dotnet_Word32:
+ hr = VariantChangeType (&vNew, pVar, 0, VT_UI4);
+ if (FAILED(hr)) {
+ genError(NULL, hr, "DInvoke.fromVariant{VT_UI4}", pErrMsg);
+ return 0;
+ }
+ *((unsigned int*)res) = vNew.ulVal;
+ return 1;
+ case Dotnet_Word64:
+ hr = VariantChangeType (&vNew, pVar, 0, VT_UI8);
+ if (FAILED(hr)) {
+ genError(NULL, hr, "DInvoke.fromVariant{VT_UI8}", pErrMsg);
+ return 0;
+ }
+#ifdef _MSC_VER
+ *((unsigned __int64*)res) = vNew.ullVal;
+#else
+ *((unsigned long long*)res) = vNew.lVal;
+#endif
+ return 1;
+ case Dotnet_Ptr:
+ hr = VariantChangeType (&vNew, pVar, 0, VT_BYREF);
+ if (FAILED(hr)) {
+ genError(NULL, hr, "DInvoke.fromVariant{VT_BYREF}", pErrMsg);
+ return 0;
+ }
+ *((void**)res) = vNew.byref;
+ return 1;
+ case Dotnet_Unit:
+ return 0;
+ case Dotnet_Object:
+ if ( pVar->vt == VT_BSTR ) {
+ /* Special handling for strings. If the user has asked for
+ * the string in object form, give him/her that.
+ */
+ VARIANT res;
+
+ VariantInit(&res);
+ hr = InvokeBridge_NewString(pBridge,
+ pVar->bstrVal,
+ &res);
+ if (SUCCEEDED(hr)) {
+ pVar = &res;
+ }
+ }
+ hr = VariantChangeType (&vNew, pVar, 0, VT_UNKNOWN);
+ if (FAILED(hr)) {
+ genError(NULL, hr, "DInvoke.fromVariant{VT_UNKNOWN}", pErrMsg);
+ return 0;
+ }
+ *((IUnknown**)res) = vNew.punkVal;
+ return 1;
+ case Dotnet_String:
+ hr = VariantChangeType (&vNew, pVar, 0, VT_BSTR);
+ if (FAILED(hr)) {
+ genError(NULL, hr, "DInvoke.fromVariant{VT_BSTR}", pErrMsg);
+ return 0;
+ }
+ /* Storage is allocated by malloc(), caller is resp for freeing. */
+ *((char**)res) = bstrToString(vNew.bstrVal);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Function: toVariant()
+ *
+ * Convert a DotnetArg into a VARIANT. The VARIANT
+ * is dynamically allocated.
+ *
+ * The result is the pointer to the filled-in VARIANT structure;
+ * NULL if allocation failed.
+ *
+ */
+static
+VARIANT*
+toVariant ( DotnetArg* p )
+{
+ VARIANT* v = (VARIANT*)malloc(sizeof(VARIANT));
+ if (!v) return NULL;
+ VariantInit(v);
+ switch (p->arg_type) {
+ case Dotnet_Byte:
+ v->vt = VT_UI1;
+ v->bVal = p->arg.arg_byte;
+ break;
+ case Dotnet_Char:
+ v->vt = VT_UI1;
+ v->bVal = p->arg.arg_char;
+ break;
+ case Dotnet_Boolean:
+ v->vt = VT_BOOL;
+ v->boolVal = p->arg.arg_bool;
+ break;
+ case Dotnet_Int:
+ v->vt = VT_INT;
+ v->intVal = p->arg.arg_int;
+ break;
+ case Dotnet_Int8:
+ v->vt = VT_I1;
+ v->bVal = p->arg.arg_int8;
+ break;
+ case Dotnet_Int16:
+ v->vt = VT_I2;
+ v->iVal = p->arg.arg_int16;
+ break;
+ case Dotnet_Int32:
+ v->vt = VT_I4;
+ v->lVal = p->arg.arg_int32;
+ break;
+ case Dotnet_Int64:
+ v->vt = VT_I8;
+#ifdef _MSC_VER
+ v->llVal = p->arg.arg_int64;
+#else
+ (long long*)(v->lVal) = p->arg.arg_int64;
+#endif
+ break;
+ case Dotnet_Float:
+ v->vt = VT_R4;
+ v->fltVal = p->arg.arg_float;
+ break;
+ case Dotnet_Double:
+ v->vt = VT_R8;
+ v->dblVal = p->arg.arg_double;
+ break;
+ case Dotnet_Word8:
+ v->vt = VT_UI1;
+ v->bVal = p->arg.arg_word8;
+ break;
+ case Dotnet_Word16:
+ v->vt = VT_UI2;
+ v->uiVal = p->arg.arg_word16;
+ break;
+ case Dotnet_Word32:
+ v->vt = VT_UI4;
+ v->ulVal = p->arg.arg_word32;
+ break;
+ case Dotnet_Word64:
+ v->vt = VT_UI8;
+#ifdef _MSC_VER
+ v->ullVal = p->arg.arg_word64;
+#else
+ (unsigned long long*)(v->lVal) = p->arg.arg_word64;
+#endif
+ break;
+ case Dotnet_Ptr:
+ v->vt = VT_BYREF;
+ v->byref = p->arg.arg_ptr;
+ break;
+ case Dotnet_Unit:
+ v->vt = VT_EMPTY;
+ break;
+ case Dotnet_Object:
+ v->vt = VT_UNKNOWN;
+ v->punkVal = (IUnknown*)p->arg.arg_obj;
+ break;
+ case Dotnet_String: {
+ BSTR b;
+ HRESULT hr;
+ v->vt = VT_BSTR;
+ hr = stringToBSTR((const char*)p->arg.arg_str,&b);
+ v->bstrVal = b;
+ break; }
+ }
+ return v;
+}
diff --git a/rts/dotnet/Invoker.cpp b/rts/dotnet/Invoker.cpp
new file mode 100644
index 0000000000..d8ad87212d
--- /dev/null
+++ b/rts/dotnet/Invoker.cpp
@@ -0,0 +1,338 @@
+//
+// (c) 2002-2003, sof.
+//
+// Dynamic invocation helper classes. The details of how
+// to access the .NET object model via the Reflection API
+// is taken care of by Invoker.{h,cpp}
+//
+#include "Invoker.h"
+
+namespace DynInvoke {
+
+static TypeName* ParseType(String* str) {
+ int curPos = 0;
+ int endPos;
+
+ // Console::WriteLine("x{0}y", str);
+ TypeName* typeName = new TypeName();
+
+ if ( str->get_Chars(0) == '[' ) {
+ endPos = str->IndexOf(']');
+ curPos = endPos + 1;
+ typeName->m_assembly = str->Substring(1,endPos-1);
+ typeName->m_length = endPos+1;
+ }
+ String* delimStr = " ,()";
+ Char delims __gc [] = delimStr->ToCharArray();
+
+ endPos = str->IndexOfAny(delims,curPos);
+ // Console::WriteLine("{0} {1} x{2}x", __box(endPos), __box(curPos), str);
+ if ( endPos == -1 ) {
+ typeName->m_class = str->Substring(curPos);
+ } else {
+ typeName->m_class = str->Substring(curPos,endPos-curPos);
+ }
+
+ // typeName->m_class = str->Substring(curPos,endPos-curPos);
+ typeName->m_length += endPos-curPos;
+
+ return typeName;
+}
+
+// Method: GetType(String* typeName);
+//
+// Purpose: Assembly-savvy version of Type::GetType()
+//
+Type* InvokeBridge::GetType(String* typeName) {
+
+ try {
+ Type* t = Type::GetType(typeName);
+ if (t) return t;
+ } catch (Exception*) {
+ ;
+ }
+
+ for (int i=0;i < InvokeBridge::m_assemblies->Count; i++) {
+ try {
+ String* stuff = String::Format("{0},{1}",typeName,InvokeBridge::m_assemblies->get_Item(i)->ToString());
+ // Console::WriteLine(stuff);
+ Type* t = Type::GetType(stuff);
+ if (t) {
+ return t;
+ }
+ } catch (Exception*) {
+ continue;
+ }
+ }
+ return 0;
+}
+
+//
+// Method: CreateInstance(String* typeName, Object* [])
+//
+// Purpose: Assembly-savvy invocation of Activator::CreateInstance
+Object* InvokeBridge::CreateInstance(TypeName* typeName,
+ Object* args[]) {
+
+ Object* instance = 0;
+ Type* t = InvokeBridge::GetType(typeName->toStdString());
+
+ // Console::WriteLine("x{0} y{1}", typeName->toStdString(), t);
+ if (!t) {
+ try {
+ Assembly* localA = Assembly::LoadFrom(typeName->m_assembly);
+ t = localA->GetType(typeName->m_class);
+ } catch (Exception* e) {
+ ;
+ }
+ }
+
+ if (!t) {
+ try {
+ AppDomain* currentDomain = AppDomain::CurrentDomain;
+
+ // Assembly* stuff[] = currentDomain->GetAssemblies();
+ // for (int i=0;i < stuff.Length; i++) {
+ // Console::WriteLine("x{0} y{1}", stuff[i]->ToString(), stuff[i]->FullName);
+ // }
+ // Console::WriteLine("x{0} y{1}", typeName->toStdString(), t);
+ Assembly* localA = Assembly::LoadWithPartialName("HugsAssembly");
+ t = localA->GetType(typeName->m_class);
+ // Console::WriteLine("x{0} y{1}", typeName->toStdString(), t);
+ } catch (Exception*) {
+ ;
+ }
+ }
+
+ if (t) {
+ try {
+ Object* o =Activator::CreateInstance(t,(Object* [])args);
+ return o;
+ } catch (Exception* e) {
+ Console::WriteLine("Failure: {0}", e);
+ return 0;
+ }
+ }
+}
+
+//
+// Method: CreateObject(String* objSpec, Object* args[])
+//
+// Purpose: Given a fully qualified name of a class/type, try
+// to create an instance of it.
+//
+Object* InvokeBridge::CreateObject(String* assemName,
+ String* objSpec,
+ Object* args[]) {
+
+ Object* instance = 0;
+
+ // Unravel the name of the class/type.
+ TypeName* typeName = ParseType(objSpec);
+
+ if (assemName != 0 && assemName->Length > 0) {
+ typeName->m_assembly = assemName;
+ }
+
+ // Try creating the instance..
+ try {
+ instance = InvokeBridge::CreateInstance(typeName,(Object* [])args);
+ } catch (Exception* e) {
+ Console::WriteLine("Unable to create instance \"{0}\" {1}", objSpec, e);
+ throw(e);
+ }
+ if (!instance) {
+ Console::WriteLine("Unable to create instance \"{0}\"", objSpec);
+ }
+ return instance;
+}
+
+//
+// Method: InvokeMethod
+//
+// Purpose: Given a pointer to an already created object, look up
+// one of its method. If found, invoke the method passing it
+// 'args' as arguments.
+//
+Object*
+InvokeBridge::InvokeMethod(Object* obj,
+ String* methName,
+ Object* args[]) {
+ // Get the methods from the type
+ MethodInfo* methods __gc[] = obj->GetType()->GetMethods();
+ MethodInfo* mInfo;
+
+ if (!methods) {
+ Console::WriteLine("InvokeMethod: No matching types found");
+ return 0;
+ }
+
+ System::Reflection::BindingFlags flgs
+ = (System::Reflection::BindingFlags) // why do I need to cast?
+ (System::Reflection::BindingFlags::Public |
+ System::Reflection::BindingFlags::NonPublic |
+ System::Reflection::BindingFlags::Instance |
+ System::Reflection::BindingFlags::Static |
+ System::Reflection::BindingFlags::InvokeMethod);
+
+ /* Caller is assumed to catch any exceptions raised. */
+ return obj->GetType()->InvokeMember(methName,
+ flgs,
+ 0,
+ obj,
+ (Object __gc* [])args);
+}
+
+//
+// Method: InvokeStaticMethod
+//
+// Purpose: Invoke a static method, given the fully qualified name
+// of the method (and its arguments). If found, invoke the
+// method passing it 'args' as arguments.
+//
+Object* InvokeBridge::InvokeStaticMethod(String* assemName,
+ String* typeAndMethName,
+ Object* args[]) {
+
+ // Get the methods from the type
+ MethodInfo* methods __gc[];
+ MethodInfo* mInfo;
+
+ int lastDot = typeAndMethName->LastIndexOf('.');
+ String* className = typeAndMethName->Substring(0,lastDot);
+ String* methName = typeAndMethName->Substring(lastDot+1);
+
+ // Unravel the name of the class/type.
+ TypeName* typeName = ParseType(className);
+ Type* t;
+
+ if (assemName != 0 && assemName->Length > 0) {
+ typeName->m_assembly = assemName;
+ }
+
+ try {
+ t = InvokeBridge::GetType(typeName->toStdString());
+
+ if (!t) {
+ try {
+ Assembly* localA = Assembly::LoadFrom(typeName->m_assembly);
+ t = localA->GetType(typeName->m_class);
+ // Console::WriteLine("InvokeStaticMethod: Type {0} found", t);
+ } catch (Exception* e) {
+ ;
+ }
+ }
+
+ if (t) {
+ methods = t->GetMethods();
+ } else {
+ Console::WriteLine("InvokeStaticMethod: Type {0} not found", className);
+ return 0;
+ }
+ } catch (Exception *e) {
+ Console::WriteLine("InvokeStaticMethod: Type {0} not found", className);
+ throw(e);
+ }
+
+ System::Reflection::BindingFlags flgs
+ = (System::Reflection::BindingFlags) // why do I need to cast?
+ (System::Reflection::BindingFlags::DeclaredOnly |
+ System::Reflection::BindingFlags::Public |
+ System::Reflection::BindingFlags::NonPublic |
+ System::Reflection::BindingFlags::Static |
+ System::Reflection::BindingFlags::InvokeMethod);
+
+ return t->InvokeMember(methName,
+ flgs,
+ 0,
+ 0,
+ (Object __gc* [])args);
+}
+
+//
+// Method: GetField
+//
+// Purpose: Fetch the (boxed) value of named field of a given object.
+//
+Object* InvokeBridge::GetField(Object* obj, System::String* fieldName) {
+
+ FieldInfo* fInfo = obj->GetType()->GetField(fieldName);
+ return fInfo->GetValue(obj);
+}
+
+//
+// Method: GetStaticField
+//
+// Purpose: Fetch the (boxed) value of named static field.
+//
+Object* InvokeBridge::GetStaticField(System::String* clsName,
+ System::String* fieldName) {
+
+ Type* ty = InvokeBridge::GetType(clsName);
+ System::Reflection::BindingFlags static_field_flgs
+ = (System::Reflection::BindingFlags)
+ (System::Reflection::BindingFlags::Public |
+ System::Reflection::BindingFlags::NonPublic |
+ System::Reflection::BindingFlags::FlattenHierarchy |
+ System::Reflection::BindingFlags::Static);
+
+ FieldInfo* fInfo = ty->GetField(fieldName, static_field_flgs);
+ return fInfo->GetValue(0); // according to doc, ok to pass any val here.
+}
+
+//
+// Method: SetField
+//
+// Purpose: Replace the (boxed) value of named field of a given object.
+//
+void InvokeBridge::SetField(Object* obj, System::String* fieldName, Object* val) {
+
+ FieldInfo* fInfo = obj->GetType()->GetField(fieldName);
+ fInfo->SetValue(obj,val);
+ return;
+}
+
+//
+// Method: SetStaticField
+//
+// Purpose: Replace the (boxed) value of named static field.
+//
+void InvokeBridge::SetStaticField(System::String* clsName,
+ System::String* fieldName,
+ Object* val) {
+
+ Type* ty = InvokeBridge::GetType(clsName);
+ System::Reflection::BindingFlags static_field_flgs
+ = (System::Reflection::BindingFlags)
+ (System::Reflection::BindingFlags::Public |
+ System::Reflection::BindingFlags::NonPublic |
+ System::Reflection::BindingFlags::FlattenHierarchy |
+ System::Reflection::BindingFlags::Static);
+
+ FieldInfo* fInfo = ty->GetField(fieldName,static_field_flgs);
+ fInfo->SetValue(0,val);
+ return;
+}
+
+Object* InvokeBridge::NewString(System::String* s)
+{
+ System::String* c = System::String::Copy(s);
+ return dynamic_cast<Object*>(c);
+}
+
+Array* InvokeBridge::NewArgArray(int sz)
+{
+ return Array::CreateInstance(__typeof(Object), sz);
+}
+
+void InvokeBridge::SetArg(Object* arr[], Object* val, int idx)
+{
+ arr->SetValue(val,idx);
+}
+
+Object* InvokeBridge::GetArg(Object* arr[], int idx)
+{
+ return arr->GetValue(idx);
+}
+
+} /* namespace */
diff --git a/rts/dotnet/Invoker.h b/rts/dotnet/Invoker.h
new file mode 100644
index 0000000000..d649a4c716
--- /dev/null
+++ b/rts/dotnet/Invoker.h
@@ -0,0 +1,197 @@
+//
+// (c) 2003, sof.
+//
+// Dynamic invocation helper classes. The details of how
+// to access the .NET object model via the Reflection API
+// is taken care of by Invoker.{h,cpp}
+//
+#pragma once
+#using <mscorlib.dll>
+
+using namespace System;
+using namespace System::Reflection;
+using namespace System::Text;
+using namespace System::Runtime::InteropServices;
+
+[assembly:AssemblyKeyFileAttribute(S"invoker.snk")];
+
+namespace DynInvoke {
+
+//
+// Class: TypeName
+//
+// Purpose: pairing up an assembly name and the type/class name.
+//
+[ComVisible(false)]
+public __gc class TypeName {
+
+public:
+ System::String* m_assembly;
+ System::String* m_class;
+ int m_length;
+
+ TypeName() {
+ m_assembly = String::Empty;
+ m_class = String::Empty;
+ m_length = 0;
+ }
+
+ void Print() {
+ if (m_assembly && m_assembly != String::Empty ) {
+ Console::Write("[");
+ Console::Write(m_assembly);
+ Console::Write("]");
+ }
+ Console::WriteLine(m_class);
+ }
+
+ int Length() { return m_length; }
+
+ System::String* toStdString() {
+ System::String* res = new System::String(m_class->ToCharArray());
+
+ if (m_assembly && m_assembly != String::Empty ){
+ res = String::Concat(res, S",");
+ res = String::Concat(res, m_assembly);
+ }
+ return res;
+ }
+};
+
+//
+// Class: InvokeBridge
+//
+// Purpose: Collection of (static) methods for dynamically creating
+// objects and accessing methods/fields on them.
+//
+[ClassInterface(ClassInterfaceType::AutoDual),
+GuidAttribute("39D497D9-60E0-3525-B7F2-7BC096D3A2A3"),
+ComVisible(true)
+]
+public __gc class InvokeBridge {
+public:
+ InvokeBridge() {
+ Assembly* corAss = Assembly::Load("mscorlib.dll");
+ System::String* dir = System::IO::Path::GetDirectoryName(corAss->Location);
+
+ m_assemblies = new System::Collections::ArrayList();
+
+ System::String* fs[] = System::IO::Directory::GetFiles(dir, "*.dll");
+ for (int i=0;i < fs->Length; i++) {
+ try {
+ Assembly* tAss = Assembly::LoadFrom(fs[i]);
+ m_assemblies->Add(tAss->FullName);
+ } catch (Exception* e) {
+ continue;
+ }
+ }
+ }
+
+ //
+ // Method: CreateObject(String* assemName, String* objSpec, Object* args[])
+ //
+ // Purpose: Given a fully qualified name of a class/type, try
+ // to create an instance of it.
+ //
+ Object* CreateObject(System::String* assemName,
+ System::String* objSpec,
+ Object* args[]);
+
+ //
+ // Method: InvokeMethod
+ //
+ // Purpose: Given a pointer to an already created object, look up
+ // one of its method. If found, invoke the method passing it
+ // 'args' as arguments.
+ //
+ // Comments: the format of the method-spec is "methodName(type1,..,typeN)" [N>=0]
+ //
+ Object* InvokeMethod(Object* obj,
+ System::String* methSpec,
+ Object* args[]);
+
+ //
+ // Method: InvokeStaticMethod
+ //
+ // Purpose: Invoke a static method, given the fully qualified name
+ // of the method (and its arguments). If found, invoke the
+ // method passing it 'args' as arguments.
+ //
+ // Comments: the format of the method-spec is
+ // "T1.T2.<..>.Tn.methodName(type1,..,typeN)" [N>=0]
+ //
+ Object* InvokeStaticMethod(System::String* assemName,
+ System::String* methSpec,
+ Object* args[]);
+
+ //
+ // Method: GetField
+ //
+ // Purpose: Fetch the (boxed) value of named field of a given object.
+ //
+ Object* GetField(Object* obj, System::String* fieldSpec);
+
+ //
+ // Method: GetField
+ //
+ // Purpose: Fetch the (boxed) value of named static field.
+ //
+ Object* GetStaticField(System::String* clsName,
+ System::String* fieldSpec);
+
+ //
+ // Method: SetField
+ //
+ // Purpose: Replace the (boxed) value of named field of a given object.
+ //
+ void SetField(Object* obj, System::String* fieldSpec, Object* val);
+
+ //
+ // Method: SetStaticField
+ //
+ // Purpose: Replace the (boxed) value of named field of a given object.
+ //
+ void SetStaticField(System::String* clsName,
+ System::String* fieldSpec,
+ Object* val);
+
+
+ //
+ // Method: NewString
+ //
+ // Purpose: construct a System.String object copy in a manner that avoids
+ // COM Interop from deconstructing it to a BSTR.
+ //
+ System::Object* NewString( System::String* s);
+
+ //
+ // Method: NewArgArray
+ //
+ // Purpose: create a new array for holding (boxed) arguments to constructors/
+ // methods.
+ //
+ Array* NewArgArray(int sz);
+
+ //
+ // Method: SetArg
+ //
+ // Purpose: set an entry in the argument vector.
+ //
+ void SetArg(Object* arr[], Object* val, int idx);
+
+ //
+ // Method: GetArg
+ //
+ // Purpose: get an entry in the argument vector.
+ //
+ Object* GetArg(Object* arr[], int idx);
+
+ System::Type* InvokeBridge::GetType(System::String* typeName);
+
+protected:
+ System::Collections::ArrayList __gc* m_assemblies;
+ Object* InvokeBridge::CreateInstance(TypeName* typeName,
+ Object* args[]);
+};
+
+} /* namespace */
diff --git a/rts/dotnet/InvokerClient.h b/rts/dotnet/InvokerClient.h
new file mode 100644
index 0000000000..122f455c01
--- /dev/null
+++ b/rts/dotnet/InvokerClient.h
@@ -0,0 +1,180 @@
+/*
+ * InvokerClient interface defns for use with gcc.
+ *
+ * Note: These declarations mirror those of the InvokeBridge
+ * class declaration.
+ *
+ */
+
+#include <windows.h>
+#include <wtypes.h>
+#include <oaidl.h>
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+#ifndef STDCALL
+#define STDCALL __stdcall
+#endif
+
+extern const CLSID CLSID_InvokeBridge;
+extern const IID IID_IUnknown;
+extern const IID IID_NULL;
+extern const IID IID_InvokeBridge;
+
+#ifdef WANT_UUID_DECLS
+const CLSID CLSID_InvokeBridge = { 0x39D497D9,0x60E0,0x3525,{0xB7,0xF2,0x7B,0xC0,0x96,0xD3,0xA2,0xA3}};
+//const IID IID_NULL = {0x00000000L, 0x0000, 0x0000, {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}};
+//const IID IID_IUnknown = {0x00000000L, 0x0000, 0x0000, {0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46}};
+const IID IID_InvokeBridge = { 0xAFF5FFCA, 0xC5C2, 0x3D5B, {0xAF, 0xD5, 0xED, 0x8E, 0x4B, 0x38, 0xDB, 0x7B}};
+ //0x3A85D703, 0xFAE4,0x3C5E, {0x9F,0x7E,0x20,0x98,0x31,0xCD,0x61,0x7A}};
+#endif
+
+#ifndef __InvokeBridge_INTERFACE_DEFINED__
+#define __InvokeBridge_INTERFACE_DEFINED__
+#undef INTERFACE
+#define INTERFACE InvokeBridge
+DECLARE_INTERFACE(InvokeBridge)
+{
+ STDMETHOD(QueryInterface)(THIS_ REFIID,PVOID*) PURE;
+ STDMETHOD_(ULONG,AddRef)(THIS) PURE;
+ STDMETHOD_(ULONG,Release)(THIS) PURE;
+ STDMETHOD(GetTypeInfoCount)(THIS_ UINT*) PURE;
+ STDMETHOD(GetTypeInfo)(THIS_ UINT,LCID,LPTYPEINFO*) PURE;
+ STDMETHOD(GetIDsOfNames)(THIS_ REFIID,LPOLESTR*,UINT,LCID,DISPID*) PURE;
+ STDMETHOD(Invoke)(THIS_ DISPID,REFIID,LCID,WORD,DISPPARAMS*,VARIANT*,EXCEPINFO*,UINT*) PURE;
+
+ STDMETHOD(ToString)(THIS_ BSTR*) PURE;
+ STDMETHOD(Equals)(THIS_ BSTR*) PURE;
+ STDMETHOD(GetHashCode)(THIS_ long*) PURE;
+ STDMETHOD(GetType)(THIS_ IUnknown**);
+ STDMETHOD(CreateObject)(THIS_ BSTR,BSTR,SAFEARRAY*, VARIANT*) PURE;
+ STDMETHOD(InvokeMethod)(THIS_ VARIANT,BSTR,SAFEARRAY*,VARIANT*) PURE;
+ STDMETHOD(InvokeStaticMethod)(THIS_ BSTR,BSTR,SAFEARRAY*,VARIANT*) PURE;
+
+ HRESULT ( STDCALL *GetField )(
+ InvokeBridge * This,
+ /* [in] */ VARIANT obj,
+ /* [in] */ BSTR fieldSpec,
+ /* [retval][out] */ VARIANT *pRetVal);
+
+ HRESULT ( STDCALL *GetStaticField )(
+ InvokeBridge * This,
+ /* [in] */ BSTR clsName,
+ /* [in] */ BSTR fieldSpec,
+ /* [retval][out] */ VARIANT *pRetVal);
+
+ HRESULT ( STDCALL *SetField )(
+ InvokeBridge * This,
+ /* [in] */ VARIANT obj,
+ /* [in] */ BSTR fieldSpec,
+ /* [in] */ VARIANT val);
+
+ HRESULT ( STDCALL *SetStaticField )(
+ InvokeBridge * This,
+ /* [in] */ BSTR clsName,
+ /* [in] */ BSTR fieldSpec,
+ /* [in] */ VARIANT val);
+
+ HRESULT ( STDCALL *NewString )(
+ InvokeBridge * This,
+ /* [in] */ BSTR s,
+ /* [retval][out] */VARIANT* pRetVal);
+
+ HRESULT ( STDCALL *NewArgArray )(
+ InvokeBridge * This,
+ /* [in] */ long sz,
+ /* [retval][out] */IUnknown **pRetVal);
+
+ HRESULT ( STDCALL *SetArg )(
+ InvokeBridge * This,
+ /* [in] */ SAFEARRAY * arr,
+ /* [in] */ VARIANT val,
+ /* [in] */ long idx);
+
+ HRESULT ( STDCALL *GetArg )(
+ InvokeBridge * This,
+ /* [in] */ SAFEARRAY * arr,
+ /* [in] */ long idx,
+ /* [retval][out] */ VARIANT *pRetVal);
+
+ HRESULT ( STDCALL *GetType_2 )(
+ InvokeBridge * This,
+ /* [in] */ BSTR typeName,
+ /* [retval][out] */ IUnknown **pRetVal);
+};
+#endif
+
+#define InvokeBridge_QueryInterface(This,riid,ppvObject) \
+ (This)->lpVtbl->QueryInterface(This,riid,ppvObject)
+
+#define InvokeBridge_AddRef(This) \
+ (This)->lpVtbl->AddRef(This)
+
+#define InvokeBridge_Release(This) \
+ (This)->lpVtbl->Release(This)
+
+#define InvokeBridge_GetTypeInfoCount(This,pctinfo) \
+ (This)->lpVtbl->GetTypeInfoCount(This,pctinfo)
+
+#define InvokeBridge_GetTypeInfo(This,iTInfo,lcid,ppTInfo) \
+ (This)->lpVtbl->GetTypeInfo(This,iTInfo,lcid,ppTInfo)
+
+#define InvokeBridge_GetIDsOfNames(This,riid,rgszNames,cNames,lcid,rgDispId) \
+ (This)->lpVtbl->GetIDsOfNames(This,riid,rgszNames,cNames,lcid,rgDispId)
+
+#define InvokeBridge_Invoke(This,dispIdMember,riid,lcid,wFlags,pDispParams,pVarResult,pExcepInfo,puArgErr) \
+ (This)->lpVtbl->Invoke(This,dispIdMember,riid,lcid,wFlags,pDispParams,pVarResult,pExcepInfo,puArgErr)
+
+#define InvokeBridge_get_ToString(This,pRetVal) \
+ (This)->lpVtbl->get_ToString(This,pRetVal)
+
+#define InvokeBridge_Equals(This,obj,pRetVal) \
+ (This)->lpVtbl->Equals(This,obj,pRetVal)
+
+#define InvokeBridge_GetHashCode(This,pRetVal) \
+ (This)->lpVtbl->GetHashCode(This,pRetVal)
+
+#define InvokeBridge_GetType(This,pRetVal) \
+ (This)->lpVtbl->GetType(This,pRetVal)
+
+#define InvokeBridge_CreateObject(This,assemName,objSpec,args,pRetVal) \
+ (This)->lpVtbl->CreateObject(This,assemName,objSpec,args,pRetVal)
+
+#define InvokeBridge_InvokeMethod(This,obj,methSpec,args,pRetVal) \
+ (This)->lpVtbl->InvokeMethod(This,obj,methSpec,args,pRetVal)
+
+#define InvokeBridge_InvokeStaticMethod(This,assemName,methSpec,args,pRetVal) \
+ (This)->lpVtbl->InvokeStaticMethod(This,assemName,methSpec,args,pRetVal)
+
+#define InvokeBridge_GetField(This,obj,fieldSpec,pRetVal) \
+ (This)->lpVtbl->GetField(This,obj,fieldSpec,pRetVal)
+
+#define InvokeBridge_GetStaticField(This,clsName,fieldSpec,pRetVal) \
+ (This)->lpVtbl->GetStaticField(This,clsName,fieldSpec,pRetVal)
+
+#define InvokeBridge_SetField(This,obj,fieldSpec,val) \
+ (This)->lpVtbl->SetField(This,obj,fieldSpec,val)
+
+#define InvokeBridge_SetStaticField(This,clsName,fieldSpec,val) \
+ (This)->lpVtbl->SetStaticField(This,clsName,fieldSpec,val)
+
+#define InvokeBridge_NewString(This,s,pRetVal) \
+ (This)->lpVtbl->NewString(This,s,pRetVal)
+
+#define InvokeBridge_NewArgArray(This,sz,pRetVal) \
+ (This)->lpVtbl->NewArgArray(This,sz,pRetVal)
+
+#define InvokeBridge_SetArg(This,arr,val,idx) \
+ (This)->lpVtbl->SetArg(This,arr,val,idx)
+
+#define InvokeBridge_GetArg(This,arr,idx,pRetVal) \
+ (This)->lpVtbl->GetArg(This,arr,idx,pRetVal)
+
+#define InvokeBridge_GetType_2(This,typeName,pRetVal) \
+ (This)->lpVtbl->GetType_2(This,typeName,pRetVal)
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/rts/dotnet/Makefile b/rts/dotnet/Makefile
new file mode 100644
index 0000000000..95b6c38890
--- /dev/null
+++ b/rts/dotnet/Makefile
@@ -0,0 +1,53 @@
+#
+# .NET interop for GHC.
+#
+# (c) 2003, sof.
+#
+TOP=../..
+include $(TOP)/mk/boilerplate.mk
+
+all :: Invoker.dll Invoke.o
+
+#
+# To compile the dotnet interop bits, you need to have the
+# .NET Framework SDK or VS.NET installed. The following
+# apps are used:
+#
+MCPP=cl
+TLBEXP=tlbexp
+REGASM=regasm
+GACUTIL=gacutil
+
+Invoker.dll : Invoker.obj
+ $(MCPP) /LD /clr /o Invoker.dll Invoker.obj
+ $(TLBEXP) Invoker.dll
+ $(REGASM) Invoker.dll
+ $(GACUTIL) /i Invoker.dll
+
+Invoker.obj : Invoker.cpp Invoker.h
+ $(MCPP) /LD /clr /c Invoker.cpp
+
+CLEAN_FILES += $(wildcard *.obj *.dll *.tlb)
+
+# ToDo:
+# - switch to /ir (i.e., copy it into the GAC.)
+# - sort out installation story.
+
+# drop the assembly
+remove :
+ $(GACUTIL) /u Invoker
+
+#
+# NOTE: For DotnetCc a version of gcc later than gcc-2.95 is
+# required (I'm using the gcc-3.2 snapshot that comes with mingw-2)
+#
+ifeq "$(DotnetCc)" ""
+DotnetCc=$(CC)
+endif
+DotnetCcOpts=$(CC_OPTS) $(DOTNET_EXTRA_CC_OPTS)
+SRC_CC_OPTS += -I$(TOP)/includes
+
+Invoke.o : Invoke.c
+ $(DotnetCc) $(DotnetCcOpts) -c $< -o $@
+
+include $(TOP)/mk/target.mk
diff --git a/rts/dotnet/invoker.snk b/rts/dotnet/invoker.snk
new file mode 100644
index 0000000000..05a222178a
--- /dev/null
+++ b/rts/dotnet/invoker.snk
Binary files differ
diff --git a/rts/ghc-frontpanel.glade b/rts/ghc-frontpanel.glade
new file mode 100644
index 0000000000..9b73afce47
--- /dev/null
+++ b/rts/ghc-frontpanel.glade
@@ -0,0 +1,1622 @@
+<?xml version="1.0" standalone="no"?> <!--*- mode: xml -*-->
+<!DOCTYPE glade-interface SYSTEM "http://glade.gnome.org/glade-2.0.dtd">
+
+<glade-interface>
+
+<widget class="GtkWindow" id="GHC Front Panel">
+ <property name="visible">True</property>
+ <property name="title" translatable="yes">GHC Front Panel</property>
+ <property name="type">GTK_WINDOW_TOPLEVEL</property>
+ <property name="window_position">GTK_WIN_POS_NONE</property>
+ <property name="modal">False</property>
+ <property name="default_width">450</property>
+ <property name="default_height">600</property>
+ <property name="resizable">True</property>
+ <property name="destroy_with_parent">False</property>
+ <property name="decorated">True</property>
+ <property name="skip_taskbar_hint">False</property>
+ <property name="skip_pager_hint">False</property>
+ <property name="type_hint">GDK_WINDOW_TYPE_HINT_NORMAL</property>
+ <property name="gravity">GDK_GRAVITY_NORTH_WEST</property>
+ <property name="focus_on_map">True</property>
+
+ <child>
+ <widget class="GtkVBox" id="vbox1">
+ <property name="visible">True</property>
+ <property name="homogeneous">False</property>
+ <property name="spacing">0</property>
+
+ <child>
+ <widget class="GtkHBox" id="hbox1">
+ <property name="border_width">10</property>
+ <property name="visible">True</property>
+ <property name="homogeneous">False</property>
+ <property name="spacing">10</property>
+
+ <child>
+ <widget class="GtkVBox" id="vbox4">
+ <property name="visible">True</property>
+ <property name="homogeneous">False</property>
+ <property name="spacing">10</property>
+
+ <child>
+ <widget class="GtkFrame" id="frame3">
+ <property name="visible">True</property>
+ <property name="label_xalign">0</property>
+ <property name="label_yalign">0.5</property>
+ <property name="shadow_type">GTK_SHADOW_ETCHED_IN</property>
+
+ <child>
+ <widget class="GtkHBox" id="hbox3">
+ <property name="visible">True</property>
+ <property name="homogeneous">False</property>
+ <property name="spacing">0</property>
+
+ <child>
+ <widget class="GtkLabel" id="label40">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Mb</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">False</property>
+ <property name="fill">False</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkVRuler" id="map_ruler">
+ <property name="visible">True</property>
+ <property name="metric">GTK_PIXELS</property>
+ <property name="lower">0</property>
+ <property name="upper">10</property>
+ <property name="position">1.40845072269</property>
+ <property name="max_size">10</property>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">False</property>
+ <property name="fill">False</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkDrawingArea" id="memmap">
+ <property name="visible">True</property>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">True</property>
+ <property name="fill">True</property>
+ </packing>
+ </child>
+ </widget>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label1">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Memory Map</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_LEFT</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="type">label_item</property>
+ </packing>
+ </child>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">True</property>
+ <property name="fill">True</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkFrame" id="frame8">
+ <property name="visible">True</property>
+ <property name="label_xalign">0</property>
+ <property name="label_yalign">0.5</property>
+ <property name="shadow_type">GTK_SHADOW_ETCHED_IN</property>
+
+ <child>
+ <widget class="GtkVBox" id="vbox14">
+ <property name="visible">True</property>
+ <property name="homogeneous">False</property>
+ <property name="spacing">0</property>
+
+ <child>
+ <widget class="GtkTable" id="table4">
+ <property name="visible">True</property>
+ <property name="n_rows">2</property>
+ <property name="n_columns">3</property>
+ <property name="homogeneous">False</property>
+ <property name="row_spacing">0</property>
+ <property name="column_spacing">0</property>
+
+ <child>
+ <widget class="GtkVRuler" id="gen_ruler">
+ <property name="visible">True</property>
+ <property name="metric">GTK_PIXELS</property>
+ <property name="lower">0</property>
+ <property name="upper">10</property>
+ <property name="position">1.69934999943</property>
+ <property name="max_size">10</property>
+ </widget>
+ <packing>
+ <property name="left_attach">1</property>
+ <property name="right_attach">2</property>
+ <property name="top_attach">0</property>
+ <property name="bottom_attach">1</property>
+ <property name="x_options">fill</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkHBox" id="gen_hbox">
+ <property name="visible">True</property>
+ <property name="homogeneous">False</property>
+ <property name="spacing">0</property>
+
+ <child>
+ <placeholder/>
+ </child>
+
+ <child>
+ <placeholder/>
+ </child>
+
+ <child>
+ <placeholder/>
+ </child>
+ </widget>
+ <packing>
+ <property name="left_attach">2</property>
+ <property name="right_attach">3</property>
+ <property name="top_attach">1</property>
+ <property name="bottom_attach">2</property>
+ <property name="y_options">fill</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkDrawingArea" id="generations">
+ <property name="visible">True</property>
+ </widget>
+ <packing>
+ <property name="left_attach">2</property>
+ <property name="right_attach">3</property>
+ <property name="top_attach">0</property>
+ <property name="bottom_attach">1</property>
+ <property name="x_options">fill</property>
+ <property name="y_options">fill</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label39">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Mb</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">0</property>
+ <property name="right_attach">1</property>
+ <property name="top_attach">0</property>
+ <property name="bottom_attach">1</property>
+ <property name="x_options"></property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">True</property>
+ <property name="fill">True</property>
+ </packing>
+ </child>
+ </widget>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label41">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Generations</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_LEFT</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="type">label_item</property>
+ </packing>
+ </child>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">True</property>
+ <property name="fill">True</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkFrame" id="frame7">
+ <property name="visible">True</property>
+ <property name="label_xalign">0</property>
+ <property name="label_yalign">0.5</property>
+ <property name="shadow_type">GTK_SHADOW_ETCHED_IN</property>
+
+ <child>
+ <widget class="GtkTable" id="table3">
+ <property name="border_width">2</property>
+ <property name="visible">True</property>
+ <property name="n_rows">3</property>
+ <property name="n_columns">3</property>
+ <property name="homogeneous">False</property>
+ <property name="row_spacing">0</property>
+ <property name="column_spacing">0</property>
+
+ <child>
+ <widget class="GtkHRuler" id="res_hruler">
+ <property name="visible">True</property>
+ <property name="metric">GTK_PIXELS</property>
+ <property name="lower">0</property>
+ <property name="upper">10</property>
+ <property name="position">8.35443019867</property>
+ <property name="max_size">10</property>
+ </widget>
+ <packing>
+ <property name="left_attach">2</property>
+ <property name="right_attach">3</property>
+ <property name="top_attach">1</property>
+ <property name="bottom_attach">2</property>
+ <property name="y_options">fill</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkVRuler" id="res_vruler">
+ <property name="visible">True</property>
+ <property name="metric">GTK_PIXELS</property>
+ <property name="lower">0</property>
+ <property name="upper">10</property>
+ <property name="position">9.69925022125</property>
+ <property name="max_size">10</property>
+ </widget>
+ <packing>
+ <property name="left_attach">1</property>
+ <property name="right_attach">2</property>
+ <property name="top_attach">2</property>
+ <property name="bottom_attach">3</property>
+ <property name="x_options">fill</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkDrawingArea" id="res_drawingarea">
+ <property name="visible">True</property>
+ </widget>
+ <packing>
+ <property name="left_attach">2</property>
+ <property name="right_attach">3</property>
+ <property name="top_attach">2</property>
+ <property name="bottom_attach">3</property>
+ <property name="x_options">fill</property>
+ <property name="y_options">fill</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label37">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Secs</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">2</property>
+ <property name="right_attach">3</property>
+ <property name="top_attach">0</property>
+ <property name="bottom_attach">1</property>
+ <property name="x_options"></property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label38">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Mb</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">0</property>
+ <property name="right_attach">1</property>
+ <property name="top_attach">2</property>
+ <property name="bottom_attach">3</property>
+ <property name="x_options"></property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+ </widget>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label42">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Residency</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_LEFT</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="type">label_item</property>
+ </packing>
+ </child>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">True</property>
+ <property name="fill">True</property>
+ </packing>
+ </child>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">True</property>
+ <property name="fill">True</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkVBox" id="vbox5">
+ <property name="visible">True</property>
+ <property name="homogeneous">False</property>
+ <property name="spacing">10</property>
+
+ <child>
+ <widget class="GtkFrame" id="frame5">
+ <property name="visible">True</property>
+ <property name="label_xalign">0</property>
+ <property name="label_yalign">0.5</property>
+ <property name="shadow_type">GTK_SHADOW_ETCHED_IN</property>
+
+ <child>
+ <widget class="GtkVBox" id="vbox6">
+ <property name="border_width">5</property>
+ <property name="visible">True</property>
+ <property name="homogeneous">False</property>
+ <property name="spacing">0</property>
+
+ <child>
+ <placeholder/>
+ </child>
+
+ <child>
+ <placeholder/>
+ </child>
+
+ <child>
+ <widget class="GtkTable" id="table1">
+ <property name="visible">True</property>
+ <property name="n_rows">4</property>
+ <property name="n_columns">3</property>
+ <property name="homogeneous">False</property>
+ <property name="row_spacing">0</property>
+ <property name="column_spacing">7</property>
+
+ <child>
+ <widget class="GtkLabel" id="label12">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Allocated</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_RIGHT</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">1</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">0</property>
+ <property name="right_attach">1</property>
+ <property name="top_attach">1</property>
+ <property name="bottom_attach">2</property>
+ <property name="x_options">fill</property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label13">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Live</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_RIGHT</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">1</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">0</property>
+ <property name="right_attach">1</property>
+ <property name="top_attach">0</property>
+ <property name="bottom_attach">1</property>
+ <property name="x_options">fill</property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label14">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Allocation Rate</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_RIGHT</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">1</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">0</property>
+ <property name="right_attach">1</property>
+ <property name="top_attach">3</property>
+ <property name="bottom_attach">4</property>
+ <property name="x_options"></property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label15">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes"> Footprint</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_RIGHT</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">1</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">0</property>
+ <property name="right_attach">1</property>
+ <property name="top_attach">2</property>
+ <property name="bottom_attach">3</property>
+ <property name="x_options">fill</property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label16">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">M/sec</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">2</property>
+ <property name="right_attach">3</property>
+ <property name="top_attach">3</property>
+ <property name="bottom_attach">4</property>
+ <property name="x_options"></property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label17">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">M</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_LEFT</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">7.45058015283e-09</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">2</property>
+ <property name="right_attach">3</property>
+ <property name="top_attach">2</property>
+ <property name="bottom_attach">3</property>
+ <property name="x_options">fill</property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label18">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">M</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">7.45058015283e-09</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">2</property>
+ <property name="right_attach">3</property>
+ <property name="top_attach">1</property>
+ <property name="bottom_attach">2</property>
+ <property name="x_options">fill</property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label19">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">M</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">7.45058015283e-09</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">2</property>
+ <property name="right_attach">3</property>
+ <property name="top_attach">0</property>
+ <property name="bottom_attach">1</property>
+ <property name="x_options">fill</property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="live_label">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes"></property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">1</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">1</property>
+ <property name="right_attach">2</property>
+ <property name="top_attach">0</property>
+ <property name="bottom_attach">1</property>
+ <property name="x_options">fill</property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="allocated_label">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes"></property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">1</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">1</property>
+ <property name="right_attach">2</property>
+ <property name="top_attach">1</property>
+ <property name="bottom_attach">2</property>
+ <property name="x_options">fill</property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="footprint_label">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes"></property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">1</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">1</property>
+ <property name="right_attach">2</property>
+ <property name="top_attach">2</property>
+ <property name="bottom_attach">3</property>
+ <property name="x_options">fill</property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="alloc_rate_label">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes"></property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">1</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">1</property>
+ <property name="right_attach">2</property>
+ <property name="top_attach">3</property>
+ <property name="bottom_attach">4</property>
+ <property name="x_options">fill</property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">True</property>
+ <property name="fill">True</property>
+ </packing>
+ </child>
+ </widget>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label43">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Stats</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_LEFT</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="type">label_item</property>
+ </packing>
+ </child>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">False</property>
+ <property name="fill">True</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkFrame" id="frame9">
+ <property name="visible">True</property>
+ <property name="label_xalign">0</property>
+ <property name="label_yalign">0.5</property>
+ <property name="shadow_type">GTK_SHADOW_ETCHED_IN</property>
+
+ <child>
+ <widget class="GtkTable" id="table5">
+ <property name="border_width">6</property>
+ <property name="visible">True</property>
+ <property name="n_rows">9</property>
+ <property name="n_columns">2</property>
+ <property name="homogeneous">False</property>
+ <property name="row_spacing">0</property>
+ <property name="column_spacing">10</property>
+
+ <child>
+ <widget class="GtkLabel" id="label20">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Running</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">1</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">0</property>
+ <property name="right_attach">1</property>
+ <property name="top_attach">0</property>
+ <property name="bottom_attach">1</property>
+ <property name="x_options">fill</property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label21">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Blocked on I/O (Read)</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">1</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">0</property>
+ <property name="right_attach">1</property>
+ <property name="top_attach">1</property>
+ <property name="bottom_attach">2</property>
+ <property name="x_options">fill</property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label22">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Blocked on MVar</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">1</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">0</property>
+ <property name="right_attach">1</property>
+ <property name="top_attach">3</property>
+ <property name="bottom_attach">4</property>
+ <property name="x_options">fill</property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label24">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Blocked on throwTo</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">1</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">0</property>
+ <property name="right_attach">1</property>
+ <property name="top_attach">4</property>
+ <property name="bottom_attach">5</property>
+ <property name="x_options">fill</property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label26">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Blocked on Black Hole</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">1</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">0</property>
+ <property name="right_attach">1</property>
+ <property name="top_attach">5</property>
+ <property name="bottom_attach">6</property>
+ <property name="x_options">fill</property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label25">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Sleeping</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">1</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">0</property>
+ <property name="right_attach">1</property>
+ <property name="top_attach">6</property>
+ <property name="bottom_attach">7</property>
+ <property name="x_options">fill</property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label27">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Blocked on I/O (Write)</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">1</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">0</property>
+ <property name="right_attach">1</property>
+ <property name="top_attach">2</property>
+ <property name="bottom_attach">3</property>
+ <property name="x_options"></property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="running_label">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">label28</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">1</property>
+ <property name="right_attach">2</property>
+ <property name="top_attach">0</property>
+ <property name="bottom_attach">1</property>
+ <property name="x_options"></property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="blockread_label">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">label29</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">1</property>
+ <property name="right_attach">2</property>
+ <property name="top_attach">1</property>
+ <property name="bottom_attach">2</property>
+ <property name="x_options"></property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="blockwrite_label">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">label30</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">1</property>
+ <property name="right_attach">2</property>
+ <property name="top_attach">2</property>
+ <property name="bottom_attach">3</property>
+ <property name="x_options"></property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="blockmvar_label">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">label31</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">1</property>
+ <property name="right_attach">2</property>
+ <property name="top_attach">3</property>
+ <property name="bottom_attach">4</property>
+ <property name="x_options"></property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="blockthrowto_label">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">label32</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">1</property>
+ <property name="right_attach">2</property>
+ <property name="top_attach">4</property>
+ <property name="bottom_attach">5</property>
+ <property name="x_options"></property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="blockbh_label">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">label33</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">1</property>
+ <property name="right_attach">2</property>
+ <property name="top_attach">5</property>
+ <property name="bottom_attach">6</property>
+ <property name="x_options"></property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="sleeping_label">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">label34</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">1</property>
+ <property name="right_attach">2</property>
+ <property name="top_attach">6</property>
+ <property name="bottom_attach">7</property>
+ <property name="x_options"></property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkHSeparator" id="hseparator1">
+ <property name="visible">True</property>
+ </widget>
+ <packing>
+ <property name="left_attach">0</property>
+ <property name="right_attach">1</property>
+ <property name="top_attach">7</property>
+ <property name="bottom_attach">8</property>
+ <property name="x_options">fill</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkHSeparator" id="hseparator2">
+ <property name="visible">True</property>
+ </widget>
+ <packing>
+ <property name="left_attach">1</property>
+ <property name="right_attach">2</property>
+ <property name="top_attach">7</property>
+ <property name="bottom_attach">8</property>
+ <property name="x_options">fill</property>
+ <property name="y_options">fill</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label35">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Total</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">1</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">0</property>
+ <property name="right_attach">1</property>
+ <property name="top_attach">8</property>
+ <property name="bottom_attach">9</property>
+ <property name="x_options">fill</property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="total_label">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">label36</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_CENTER</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="left_attach">1</property>
+ <property name="right_attach">2</property>
+ <property name="top_attach">8</property>
+ <property name="bottom_attach">9</property>
+ <property name="x_options"></property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+ </widget>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label44">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Threads</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_LEFT</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="type">label_item</property>
+ </packing>
+ </child>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">False</property>
+ <property name="fill">True</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkFrame" id="frame6">
+ <property name="visible">True</property>
+ <property name="label_xalign">0</property>
+ <property name="label_yalign">0.5</property>
+ <property name="shadow_type">GTK_SHADOW_ETCHED_IN</property>
+
+ <child>
+ <widget class="GtkVBox" id="vbox7">
+ <property name="border_width">5</property>
+ <property name="visible">True</property>
+ <property name="homogeneous">False</property>
+ <property name="spacing">10</property>
+
+ <child>
+ <widget class="GtkVBox" id="vbox9">
+ <property name="visible">True</property>
+ <property name="homogeneous">False</property>
+ <property name="spacing">0</property>
+
+ <child>
+ <widget class="GtkRadioButton" id="cont_radio">
+ <property name="visible">True</property>
+ <property name="can_focus">True</property>
+ <property name="label" translatable="yes">Continuous</property>
+ <property name="use_underline">True</property>
+ <property name="relief">GTK_RELIEF_NORMAL</property>
+ <property name="focus_on_click">True</property>
+ <property name="active">True</property>
+ <property name="inconsistent">False</property>
+ <property name="draw_indicator">True</property>
+ <signal name="clicked" handler="on_cont_radio_clicked"/>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">False</property>
+ <property name="fill">False</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkRadioButton" id="stop_before_radio">
+ <property name="visible">True</property>
+ <property name="can_focus">True</property>
+ <property name="label" translatable="yes">Stop before GC</property>
+ <property name="use_underline">True</property>
+ <property name="relief">GTK_RELIEF_NORMAL</property>
+ <property name="focus_on_click">True</property>
+ <property name="active">False</property>
+ <property name="inconsistent">False</property>
+ <property name="draw_indicator">True</property>
+ <property name="group">cont_radio</property>
+ <signal name="clicked" handler="on_stop_before_radio_clicked"/>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">False</property>
+ <property name="fill">False</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkRadioButton" id="stop_after_radio">
+ <property name="visible">True</property>
+ <property name="can_focus">True</property>
+ <property name="label" translatable="yes">Stop after GC</property>
+ <property name="use_underline">True</property>
+ <property name="relief">GTK_RELIEF_NORMAL</property>
+ <property name="focus_on_click">True</property>
+ <property name="active">False</property>
+ <property name="inconsistent">False</property>
+ <property name="draw_indicator">True</property>
+ <property name="group">cont_radio</property>
+ <signal name="clicked" handler="on_stop_after_radio_clicked"/>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">False</property>
+ <property name="fill">False</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkRadioButton" id="stop_both_radio">
+ <property name="visible">True</property>
+ <property name="can_focus">True</property>
+ <property name="label" translatable="yes">Stop before &amp; after GC</property>
+ <property name="use_underline">True</property>
+ <property name="relief">GTK_RELIEF_NORMAL</property>
+ <property name="focus_on_click">True</property>
+ <property name="active">False</property>
+ <property name="inconsistent">False</property>
+ <property name="draw_indicator">True</property>
+ <property name="group">cont_radio</property>
+ <signal name="clicked" handler="on_stop_both_radio_clicked"/>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">False</property>
+ <property name="fill">False</property>
+ </packing>
+ </child>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">True</property>
+ <property name="fill">True</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkVBox" id="vbox8">
+ <property name="visible">True</property>
+ <property name="homogeneous">False</property>
+ <property name="spacing">0</property>
+
+ <child>
+ <widget class="GtkButton" id="stop_but">
+ <property name="visible">True</property>
+ <property name="can_focus">True</property>
+ <property name="label" translatable="yes">Stop</property>
+ <property name="use_underline">True</property>
+ <property name="relief">GTK_RELIEF_NORMAL</property>
+ <property name="focus_on_click">True</property>
+ <signal name="clicked" handler="on_stop_but_clicked"/>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">False</property>
+ <property name="fill">False</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkButton" id="continue_but">
+ <property name="visible">True</property>
+ <property name="can_focus">True</property>
+ <property name="label" translatable="yes">Continue</property>
+ <property name="use_underline">True</property>
+ <property name="relief">GTK_RELIEF_NORMAL</property>
+ <property name="focus_on_click">True</property>
+ <signal name="clicked" handler="on_continue_but_clicked"/>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">False</property>
+ <property name="fill">False</property>
+ </packing>
+ </child>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">False</property>
+ <property name="fill">False</property>
+ </packing>
+ </child>
+ </widget>
+ </child>
+
+ <child>
+ <widget class="GtkLabel" id="label45">
+ <property name="visible">True</property>
+ <property name="label" translatable="yes">Updates</property>
+ <property name="use_underline">False</property>
+ <property name="use_markup">False</property>
+ <property name="justify">GTK_JUSTIFY_LEFT</property>
+ <property name="wrap">False</property>
+ <property name="selectable">False</property>
+ <property name="xalign">0.5</property>
+ <property name="yalign">0.5</property>
+ <property name="xpad">0</property>
+ <property name="ypad">0</property>
+ <property name="ellipsize">PANGO_ELLIPSIZE_NONE</property>
+ <property name="width_chars">-1</property>
+ <property name="single_line_mode">False</property>
+ <property name="angle">0</property>
+ </widget>
+ <packing>
+ <property name="type">label_item</property>
+ </packing>
+ </child>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">False</property>
+ <property name="fill">False</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkButton" id="quit_but">
+ <property name="visible">True</property>
+ <property name="can_focus">True</property>
+ <property name="label" translatable="yes">Quit</property>
+ <property name="use_underline">True</property>
+ <property name="relief">GTK_RELIEF_NORMAL</property>
+ <property name="focus_on_click">True</property>
+ <signal name="clicked" handler="on_quit_but_clicked"/>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">False</property>
+ <property name="fill">False</property>
+ <property name="pack_type">GTK_PACK_END</property>
+ </packing>
+ </child>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">False</property>
+ <property name="fill">False</property>
+ <property name="pack_type">GTK_PACK_END</property>
+ </packing>
+ </child>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">True</property>
+ <property name="fill">True</property>
+ </packing>
+ </child>
+
+ <child>
+ <widget class="GtkStatusbar" id="statusbar">
+ <property name="visible">True</property>
+ <property name="has_resize_grip">True</property>
+ </widget>
+ <packing>
+ <property name="padding">0</property>
+ <property name="expand">False</property>
+ <property name="fill">False</property>
+ </packing>
+ </child>
+ </widget>
+ </child>
+</widget>
+
+</glade-interface>
diff --git a/rts/gmp/.gdbinit b/rts/gmp/.gdbinit
new file mode 100644
index 0000000000..843c109e89
--- /dev/null
+++ b/rts/gmp/.gdbinit
@@ -0,0 +1,34 @@
+# Copyright (C) 1999 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+define pz
+set __gmpz_dump ($)
+end
+
+define pq
+set __gmpz_dump ($->_mp_num)
+echo /
+set __gmpz_dump ($->_mp_den)
+end
+
+define pf
+set __gmpf_dump ($)
+end
+
diff --git a/rts/gmp/AUTHORS b/rts/gmp/AUTHORS
new file mode 100644
index 0000000000..1fa057af6c
--- /dev/null
+++ b/rts/gmp/AUTHORS
@@ -0,0 +1,12 @@
+Authors if GNU MP (in chronological order)
+Torbjörn Granlund
+John Amanatides
+Paul Zimmermann
+Ken Weber
+Bennet Yee
+Andreas Schwab
+Robert Harley
+Linus Nordberg
+Kent Boortz
+Kevin Ryde
+Guillaume Hanrot
diff --git a/rts/gmp/COPYING b/rts/gmp/COPYING
new file mode 100644
index 0000000000..a6d7d0188a
--- /dev/null
+++ b/rts/gmp/COPYING
@@ -0,0 +1,336 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Hereny it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Section
diff --git a/rts/gmp/COPYING.LIB b/rts/gmp/COPYING.LIB
new file mode 100644
index 0000000000..c4792dd27a
--- /dev/null
+++ b/rts/gmp/COPYING.LIB
@@ -0,0 +1,515 @@
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations
+below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+^L
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it
+becomes
+a de-facto standard. To achieve this, non-free programs must be
+allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+^L
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control
+compilation
+and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+^L
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+^L
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+^L
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+^L
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply, and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License
+may add an explicit geographical distribution limitation excluding those
+countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+^L
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+^L
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms
+of the ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library.
+It is safest to attach them to the start of each source file to most
+effectively convey the exclusion of warranty; and each file should
+have at least the "copyright" line and a pointer to where the full
+notice is found.
+
+
+ <one line to give the library's name and a brief idea of what it
+does.>
+ Copyright (C) <year> <name of author>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Also add information on how to contact you by electronic and paper
+mail.
+
+You should also get your employer (if you work as a programmer) or
+your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James
+Random Hacker.
+
+ <signature of Ty Coon>, 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
+
+
diff --git a/rts/gmp/INSTALL b/rts/gmp/INSTALL
new file mode 100644
index 0000000000..62faa1a2e3
--- /dev/null
+++ b/rts/gmp/INSTALL
@@ -0,0 +1,146 @@
+
+ INSTALLING GNU MP
+ =================
+
+
+These instructions are only for the impatient. Others should read the install
+instructions in the manual, gmp.info. Use
+
+ info -f ./gmp.info
+
+or in emacs
+
+ C-u C-h i gmp.info
+
+
+Here are some brief instructions on how to install GMP, and some examples to
+help you get started using GMP.
+
+First, you need to compile, and optionally install, GMP. Since you're
+impatient, try this:
+
+ ./configure; make
+
+If that fails, or you care about the performance of GMP, you need to read the
+full instructions in the chapter "Installing GMP", in the manual.
+
+Next, try some small test programs, for example the ones below.
+
+In GMP programs, all variables need to be initialized before they are
+assigned, and cleared out before program flow leaves the scope in which they
+were declared. Here is an example program that reads two numbers from the
+command line, multiplies them, and prints the result to stdout.
+
+
+ #include <stdio.h>
+ #include <gmp.h> /* All GMP programs need to include gmp.h */
+
+ main (int argc, char **argv)
+ {
+ mpz_t a, b, p;
+
+ if (argc != 3)
+ { printf ("Usage: %s <number> <number>\n", argv[0]); exit (1); }
+
+ /* Initialize variables */
+ mpz_init (a);
+ mpz_init (b);
+ mpz_init (p);
+
+ /* Assign a and b from base 10 strings in argv */
+ mpz_set_str (a, argv[1], 10);
+ mpz_set_str (b, argv[2], 10);
+
+ /* Multiply a and b and put the result in p */
+ mpz_mul (p, a, b);
+
+ /* Print p in base 10 */
+ mpz_out_str (stdout, 10, p);
+ fputc ('\n', stdout);
+
+ /* Clear out variables */
+ mpz_clear (a);
+ mpz_clear (b);
+ mpz_clear (p);
+ exit (0);
+ }
+
+
+This might look tedious, with all the initializing and clearing. Fortunately
+some of these operations can be combined, and other operations can often be
+avoided. An experienced GMP user might write:
+
+
+ #include <stdio.h>
+ #include <gmp.h>
+
+ main (int argc, char **argv)
+ {
+ mpz_t a, b, p;
+
+ if (argc != 3)
+ { printf ("Usage: %s <number> <number>\n", argv[0]); exit (1); }
+
+ /* Initialize and assign a and b from base 10 strings in argv */
+ mpz_init_set_str (a, argv[1], 10);
+ mpz_init_set_str (b, argv[2], 10);
+ /* Initialize p */
+ mpz_init (p);
+
+ /* Multiply a and b and put the result in p */
+ mpz_mul (p, a, b);
+
+ /* Print p in base 10 */
+ mpz_out_str (stdout, 10, p);
+ fputc ('\n', stdout);
+
+ /* Since we're about to exit, no need to clear out variables */
+ exit (0);
+ }
+
+
+Now you have to compile your test program, and link it with the GMP library.
+Assuming your working directory is still the gmp source directory, and your
+source file is called example.c, enter:
+
+ gcc -g -I. example.c .libs/libgmp.a
+
+After installing, the command becomes: "gcc -g example.c -lgmp". Also, GMP is
+libtool based so you can use that to link if you want.
+
+Now try to run the example:
+
+ ./a.out 98365871231256752134 319378318340103345227
+ 31415926535897932384618573336104570964418
+
+The functions used here all operate on signed integers, and have names
+starting with "mpz_". There are many more such functions than used in these
+examples. See the chapter "Integer Functions" in the manual, for a complete
+list.
+
+There are two other main classes of functions in GMP. They operate on
+rational numbers and floating-point numbers, respectively. The chapters
+"Rational Number Functions", and "Floating-point Functions" document these
+classes.
+
+To run a set of tests, do "make check". This will take a while.
+
+To create the printable documentation from the texinfo source, type "make
+gmp.dvi" or "make gmp.ps". This requires various "tex" commands.
+
+To install the library, do "make install" (then you can use -lgmp instead of
+.libs/libgmp.a).
+
+If you decide to use GMP, it is a good idea you at least read the chapter "GMP
+Basics" in the manual.
+
+Some known build problems are noted in the "Installing GMP" chapter of
+the manual. Please report other problems to bug-gmp@gnu.org.
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 78
+End:
diff --git a/rts/gmp/Makefile.am b/rts/gmp/Makefile.am
new file mode 100644
index 0000000000..b73b805c6e
--- /dev/null
+++ b/rts/gmp/Makefile.am
@@ -0,0 +1,197 @@
+## Process this file with automake to generate Makefile.in
+
+
+# Copyright (C) 1991, 1993, 1994, 1996, 1997, 1999, 2000 Free Software
+# Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# make check
+#
+# It'd be good if "make check" first did a "make all" or whatever to
+# build libgmp.la, but it's not clear how best to do this. Putting a
+# "check:" target is overridden by automake, and a "check-local:" runs
+# too late (due to depth-first subdirectory processing). For now it's
+# necessary to do "make && make check".
+#
+# MPF_OBJECTS etc
+#
+# Libtool needs all the .lo files passed to it if it's going to build
+# both a static and shared library. If a convenience library like
+# mpf/libmpf.la is passed then the resulting libgmp.a gets the PIC .lo
+# objects rather than the non-PIC .o's.
+#
+# Unfortunately this leads to the big lists of objects below. Something
+# like mpz/*.lo would probably work, but might risk missing something
+# out or getting something extra. The source files for each .lo are
+# listed in the Makefile.am's in the subdirectories.
+
+
+# Libtool -version-info for libgmp.la and libmp.la. See (libtool)Versioning
+#
+# 1. No interfaces changed, only implementations (good): Increment REVISION.
+#
+# 2. Interfaces added, none removed (good): Increment CURRENT, increment
+# AGE, set REVISION to 0.
+#
+# 3. Interfaces removed (BAD, breaks upward compatibility): Increment
+# CURRENT, set AGE and REVISION to 0.
+#
+# Do this separately for libgmp and libmp, only do it just before a release.
+#
+# GMP -version-info
+# release libgmp libmp
+# 2.0.x - -
+# 3.0 3:0:0 3:0:0
+# 3.0.1 3:1:0 3:0:0
+# 3.1 4:0:1 4:0:1
+# 3.1.1 4:1:1 4:1:1
+#
+#
+# Starting at 3:0:0 is a slight abuse of the versioning system, but it
+# ensures we're past soname libgmp.so.2, which is what has been used on
+# Debian GNU/Linux packages of gmp 2. Pretend gmp 2 was 2:0:0, so the
+# interface changes for gmp 3 mean 3:0:0 is right.
+
+LIBGMP_LT_CURRENT = 4
+LIBGMP_LT_REVISION = 1
+LIBGMP_LT_AGE = 1
+
+LIBMP_LT_CURRENT = 4
+LIBMP_LT_REVISION = 1
+LIBMP_LT_AGE = 1
+
+
+AUTOMAKE_OPTIONS = gnu check-news no-dependencies ansi2knr
+
+SUBDIRS = mpn mpz mpq mpf mpbsd mpfr tests demos tune
+
+include_HEADERS = gmp.h $(MPBSD_HEADERS_OPTION) $(MPFR_HEADERS_OPTION)
+EXTRA_HEADERS = mp.h
+
+lib_LTLIBRARIES = libgmp.la $(MPBSD_LTLIBRARIES_OPTION)
+
+EXTRA_DIST = .gdbinit gmp-impl.h longlong.h stack-alloc.h urandom.h doc macos
+
+DISTCLEANFILES = asm-syntax.h config.m4 @gmp_srclinks@
+
+
+MPF_OBJECTS = mpf/init.lo mpf/init2.lo mpf/set.lo mpf/set_ui.lo mpf/set_si.lo \
+ mpf/set_str.lo mpf/set_d.lo mpf/set_z.lo mpf/iset.lo mpf/iset_ui.lo \
+ mpf/iset_si.lo mpf/iset_str.lo mpf/iset_d.lo mpf/clear.lo mpf/get_str.lo \
+ mpf/dump.lo mpf/size.lo mpf/eq.lo mpf/reldiff.lo mpf/sqrt.lo mpf/random2.lo \
+ mpf/inp_str.lo mpf/out_str.lo mpf/add.lo mpf/add_ui.lo mpf/sub.lo \
+ mpf/sub_ui.lo mpf/ui_sub.lo mpf/mul.lo mpf/mul_ui.lo mpf/div.lo \
+ mpf/div_ui.lo mpf/cmp.lo mpf/cmp_ui.lo mpf/cmp_si.lo mpf/mul_2exp.lo \
+ mpf/div_2exp.lo mpf/abs.lo mpf/neg.lo mpf/set_q.lo mpf/get_d.lo \
+ mpf/set_dfl_prec.lo mpf/set_prc.lo mpf/set_prc_raw.lo mpf/get_prc.lo \
+ mpf/ui_div.lo mpf/sqrt_ui.lo mpf/floor.lo mpf/ceil.lo mpf/trunc.lo \
+ mpf/pow_ui.lo mpf/urandomb.lo mpf/swap.lo
+MPZ_OBJECTS = mpz/abs.lo mpz/add.lo mpz/add_ui.lo mpz/addmul_ui.lo mpz/and.lo \
+ mpz/array_init.lo mpz/bin_ui.lo mpz/bin_uiui.lo mpz/cdiv_q.lo \
+ mpz/cdiv_q_ui.lo mpz/cdiv_qr.lo mpz/cdiv_qr_ui.lo mpz/cdiv_r.lo \
+ mpz/cdiv_r_ui.lo mpz/cdiv_ui.lo mpz/clear.lo mpz/clrbit.lo mpz/cmp.lo \
+ mpz/cmp_si.lo mpz/cmp_ui.lo mpz/cmpabs.lo mpz/cmpabs_ui.lo mpz/com.lo \
+ mpz/divexact.lo mpz/dump.lo mpz/fac_ui.lo mpz/fdiv_q.lo mpz/fdiv_q_2exp.lo \
+ mpz/fdiv_q_ui.lo mpz/fdiv_qr.lo mpz/fdiv_qr_ui.lo mpz/fdiv_r.lo \
+ mpz/fdiv_r_2exp.lo mpz/fdiv_r_ui.lo mpz/fdiv_ui.lo mpz/fib_ui.lo \
+ mpz/fits_sint_p.lo mpz/fits_slong_p.lo mpz/fits_sshort_p.lo \
+ mpz/fits_uint_p.lo mpz/fits_ulong_p.lo mpz/fits_ushort_p.lo mpz/gcd.lo \
+ mpz/gcd_ui.lo mpz/gcdext.lo mpz/get_d.lo mpz/get_si.lo mpz/get_str.lo \
+ mpz/get_ui.lo mpz/getlimbn.lo mpz/hamdist.lo mpz/init.lo mpz/inp_raw.lo \
+ mpz/inp_str.lo mpz/invert.lo mpz/ior.lo mpz/iset.lo mpz/iset_d.lo \
+ mpz/iset_si.lo mpz/iset_str.lo mpz/iset_ui.lo mpz/jacobi.lo \
+ mpz/kronsz.lo mpz/kronuz.lo mpz/kronzs.lo mpz/kronzu.lo \
+ mpz/lcm.lo mpz/legendre.lo \
+ mpz/mod.lo mpz/mul.lo mpz/mul_2exp.lo mpz/mul_si.lo mpz/mul_ui.lo \
+ mpz/neg.lo mpz/nextprime.lo mpz/out_raw.lo mpz/out_str.lo mpz/perfpow.lo mpz/perfsqr.lo \
+ mpz/popcount.lo mpz/pow_ui.lo mpz/powm.lo mpz/powm_ui.lo mpz/pprime_p.lo \
+ mpz/random.lo mpz/random2.lo mpz/realloc.lo mpz/remove.lo mpz/root.lo \
+ mpz/rrandomb.lo \
+ mpz/scan0.lo mpz/scan1.lo mpz/set.lo mpz/set_d.lo mpz/set_f.lo mpz/set_q.lo \
+ mpz/set_si.lo mpz/set_str.lo mpz/set_ui.lo mpz/setbit.lo mpz/size.lo \
+ mpz/sizeinbase.lo mpz/sqrt.lo mpz/sqrtrem.lo mpz/sub.lo mpz/sub_ui.lo \
+ mpz/swap.lo mpz/tdiv_ui.lo mpz/tdiv_q.lo mpz/tdiv_q_2exp.lo mpz/tdiv_q_ui.lo \
+ mpz/tdiv_qr.lo mpz/tdiv_qr_ui.lo mpz/tdiv_r.lo mpz/tdiv_r_2exp.lo \
+ mpz/tdiv_r_ui.lo mpz/tstbit.lo mpz/ui_pow_ui.lo mpz/urandomb.lo \
+ mpz/urandomm.lo mpz/xor.lo
+MPQ_OBJECTS = mpq/add.lo mpq/canonicalize.lo mpq/clear.lo mpq/cmp.lo \
+ mpq/cmp_ui.lo mpq/div.lo mpq/get_d.lo mpq/get_den.lo mpq/get_num.lo \
+ mpq/init.lo mpq/inv.lo mpq/mul.lo mpq/neg.lo mpq/out_str.lo \
+ mpq/set.lo mpq/set_den.lo \
+ mpq/set_num.lo mpq/set_si.lo mpq/set_ui.lo mpq/sub.lo mpq/equal.lo \
+ mpq/set_z.lo mpq/set_d.lo mpq/swap.lo
+MPN_OBJECTS = @mpn_objs_in_libgmp@
+
+MPBSD_OBJECTS = mpbsd/add.lo mpbsd/tdiv_qr.lo mpbsd/move.lo mpbsd/powm.lo \
+ mpbsd/sub.lo mpbsd/cmp.lo mpbsd/mfree.lo mpbsd/mtox.lo mpbsd/realloc.lo \
+ mpbsd/gcd.lo mpbsd/itom.lo mpbsd/min.lo mpbsd/mul.lo mpbsd/mout.lo \
+ mpbsd/pow_ui.lo mpbsd/sdiv.lo mpbsd/sqrtrem.lo mpbsd/xtom.lo
+
+# FIXME: Add mpfr/rnd_mode.lo when it's clean.
+MPFR_OBJECTS = mpfr/add.lo mpfr/div_2exp.lo mpfr/neg.lo mpfr/set_dfl_prec.lo \
+ mpfr/set_str_raw.lo mpfr/agm.lo mpfr/get_str.lo mpfr/print_raw.lo \
+ mpfr/set_dfl_rnd.lo mpfr/sqrt.lo mpfr/clear.lo mpfr/init.lo \
+ mpfr/set_f.lo mpfr/sub.lo mpfr/cmp.lo mpfr/mul.lo mpfr/round.lo \
+ mpfr/set_prec.lo mpfr/cmp_ui.lo mpfr/mul_2exp.lo mpfr/set.lo mpfr/set_si.lo \
+ mpfr/div.lo mpfr/mul_ui.lo mpfr/set_d.lo mpfr/pow.lo mpfr/out_str.lo \
+ mpfr/pi.lo mpfr/set_z.lo mpfr/add_ulp.lo mpfr/log2.lo mpfr/random.lo \
+ mpfr/log.lo mpfr/exp.lo mpfr/div_ui.lo mpfr/zeta.lo mpfr/karadiv.lo \
+ mpfr/karasqrt.lo mpfr/print_rnd_mode.lo
+
+
+if WANT_MPFR
+MPFR_HEADERS_OPTION = mpfr/mpfr.h
+MPFR_OBJECTS_OPTION = $(MPFR_OBJECTS)
+MPFR_LIBADD_OPTION = -lm
+endif
+libgmp_la_SOURCES = assert.c compat.c errno.c memory.c mp_set_fns.c \
+ mp_clz_tab.c mp_minv_tab.c \
+ rand.c randclr.c randlc.c randlc2x.c randraw.c randsd.c \
+ randsdui.c version.c stack-alloc.c mp_bpl.c extract-dbl.c insert-dbl.c
+libgmp_la_DEPENDENCIES = \
+ $(MPF_OBJECTS) $(MPZ_OBJECTS) $(MPN_OBJECTS) $(MPQ_OBJECTS) \
+ $(MPFR_OBJECTS_OPTION)
+libgmp_la_LIBADD = $(libgmp_la_DEPENDENCIES) $(MPFR_LIBADD_OPTION)
+libgmp_la_LDFLAGS = \
+ -version-info $(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE)
+
+
+if WANT_MPBSD
+MPBSD_HEADERS_OPTION = mp.h
+MPBSD_LTLIBRARIES_OPTION = libmp.la
+endif
+libmp_la_SOURCES = assert.c errno.c memory.c mp_bpl.c mp_clz_tab.c \
+ mp_minv_tab.c mp_set_fns.c stack-alloc.c
+libmp_la_DEPENDENCIES = $(MPBSD_OBJECTS) $(MPN_OBJECTS) \
+ mpz/add.lo mpz/clear.lo mpz/cmp.lo mpz/init.lo mpz/mod.lo mpz/mul.lo \
+ mpz/mul_2exp.lo mpz/realloc.lo mpz/set.lo mpz/set_ui.lo mpz/tdiv_r.lo \
+ mpz/sub.lo
+libmp_la_LIBADD = $(libmp_la_DEPENDENCIES)
+libmp_la_LDFLAGS = \
+ -version-info $(LIBMP_LT_CURRENT):$(LIBMP_LT_REVISION):$(LIBMP_LT_AGE)
+
+
+info_TEXINFOS = gmp.texi
+
+
+# Don't ship CVS directories or emacs backups.
+dist-hook:
+ -find $(distdir) \( -name CVS -type d \) -o -name "*.~*" \
+ | xargs rm -rf
diff --git a/rts/gmp/Makefile.in b/rts/gmp/Makefile.in
new file mode 100644
index 0000000000..e63383e7a7
--- /dev/null
+++ b/rts/gmp/Makefile.in
@@ -0,0 +1,932 @@
+# Makefile.in generated automatically by automake 1.4a from Makefile.am
+
+# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+
+DESTDIR =
+
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+
+top_builddir = .
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_FLAG =
+transform = @program_transform_name@
+
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+
+@SET_MAKE@
+build_alias = @build_alias@
+build_triplet = @build@
+host_alias = @host_alias@
+host_triplet = @host@
+target_alias = @target_alias@
+target_triplet = @target@
+AMDEP = @AMDEP@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CPP = @CPP@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+EXEEXT = @EXEEXT@
+LIBTOOL = @LIBTOOL@
+LN_S = @LN_S@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+RANLIB = @RANLIB@
+SPEED_CYCLECOUNTER_OBJS = @SPEED_CYCLECOUNTER_OBJS@
+STRIP = @STRIP@
+U = @U@
+VERSION = @VERSION@
+gmp_srclinks = @gmp_srclinks@
+install_sh = @install_sh@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+
+# Copyright (C) 1991, 1993, 1994, 1996, 1997, 1999, 2000 Free Software
+# Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+# make check
+#
+# It'd be good if "make check" first did a "make all" or whatever to
+# build libgmp.la, but it's not clear how best to do this. Putting a
+# "check:" target is overridden by automake, and a "check-local:" runs
+# too late (due to depth-first subdirectory processing). For now it's
+# necessary to do "make && make check".
+#
+# MPF_OBJECTS etc
+#
+# Libtool needs all the .lo files passed to it if it's going to build
+# both a static and shared library. If a convenience library like
+# mpf/libmpf.la is passed then the resulting libgmp.a gets the PIC .lo
+# objects rather than the non-PIC .o's.
+#
+# Unfortunately this leads to the big lists of objects below. Something
+# like mpz/*.lo would probably work, but might risk missing something
+# out or getting something extra. The source files for each .lo are
+# listed in the Makefile.am's in the subdirectories.
+
+# Libtool -version-info for libgmp.la and libmp.la. See (libtool)Versioning
+#
+# 1. No interfaces changed, only implementations (good): Increment REVISION.
+#
+# 2. Interfaces added, none removed (good): Increment CURRENT, increment
+# AGE, set REVISION to 0.
+#
+# 3. Interfaces removed (BAD, breaks upward compatibility): Increment
+# CURRENT, set AGE and REVISION to 0.
+#
+# Do this separately for libgmp and libmp, only do it just before a release.
+#
+# GMP -version-info
+# release libgmp libmp
+# 2.0.x - -
+# 3.0 3:0:0 3:0:0
+# 3.0.1 3:1:0 3:0:0
+# 3.1 4:0:1 4:0:1
+# 3.1.1 4:1:1 4:1:1
+#
+#
+# Starting at 3:0:0 is a slight abuse of the versioning system, but it
+# ensures we're past soname libgmp.so.2, which is what has been used on
+# Debian GNU/Linux packages of gmp 2. Pretend gmp 2 was 2:0:0, so the
+# interface changes for gmp 3 mean 3:0:0 is right.
+
+
+LIBGMP_LT_CURRENT = 4
+LIBGMP_LT_REVISION = 1
+LIBGMP_LT_AGE = 1
+
+LIBMP_LT_CURRENT = 4
+LIBMP_LT_REVISION = 1
+LIBMP_LT_AGE = 1
+
+AUTOMAKE_OPTIONS = gnu check-news no-dependencies ansi2knr
+
+SUBDIRS = mpn mpz
+
+include_HEADERS = gmp.h $(MPBSD_HEADERS_OPTION) $(MPFR_HEADERS_OPTION)
+EXTRA_HEADERS = mp.h
+
+lib_LTLIBRARIES = libgmp.la $(MPBSD_LTLIBRARIES_OPTION)
+
+EXTRA_DIST = .gdbinit gmp-impl.h longlong.h stack-alloc.h urandom.h doc macos
+
+DISTCLEANFILES = asm-syntax.h config.m4 @gmp_srclinks@
+
+MPZ_OBJECTS = mpz/abs.lo mpz/add.lo mpz/add_ui.lo mpz/addmul_ui.lo mpz/and.lo \
+ mpz/array_init.lo mpz/bin_ui.lo mpz/bin_uiui.lo mpz/cdiv_q.lo \
+ mpz/cdiv_q_ui.lo mpz/cdiv_qr.lo mpz/cdiv_qr_ui.lo mpz/cdiv_r.lo \
+ mpz/cdiv_r_ui.lo mpz/cdiv_ui.lo mpz/clear.lo mpz/clrbit.lo mpz/cmp.lo \
+ mpz/cmp_si.lo mpz/cmp_ui.lo mpz/cmpabs.lo mpz/cmpabs_ui.lo mpz/com.lo \
+ mpz/divexact.lo mpz/dump.lo mpz/fac_ui.lo mpz/fdiv_q.lo mpz/fdiv_q_2exp.lo \
+ mpz/fdiv_q_ui.lo mpz/fdiv_qr.lo mpz/fdiv_qr_ui.lo mpz/fdiv_r.lo \
+ mpz/fdiv_r_2exp.lo mpz/fdiv_r_ui.lo mpz/fdiv_ui.lo mpz/fib_ui.lo \
+ mpz/fits_sint_p.lo mpz/fits_slong_p.lo mpz/fits_sshort_p.lo \
+ mpz/fits_uint_p.lo mpz/fits_ulong_p.lo mpz/fits_ushort_p.lo mpz/gcd.lo \
+ mpz/gcd_ui.lo mpz/gcdext.lo mpz/get_d.lo mpz/get_si.lo mpz/get_str.lo \
+ mpz/get_ui.lo mpz/getlimbn.lo mpz/hamdist.lo mpz/init.lo mpz/inp_raw.lo \
+ mpz/inp_str.lo mpz/invert.lo mpz/ior.lo mpz/iset.lo mpz/iset_d.lo \
+ mpz/iset_si.lo mpz/iset_str.lo mpz/iset_ui.lo mpz/jacobi.lo \
+ mpz/kronsz.lo mpz/kronuz.lo mpz/kronzs.lo mpz/kronzu.lo \
+ mpz/lcm.lo mpz/legendre.lo \
+ mpz/mod.lo mpz/mul.lo mpz/mul_2exp.lo mpz/mul_si.lo mpz/mul_ui.lo \
+ mpz/neg.lo mpz/nextprime.lo mpz/out_raw.lo mpz/out_str.lo mpz/perfpow.lo mpz/perfsqr.lo \
+ mpz/popcount.lo mpz/pow_ui.lo mpz/powm.lo mpz/powm_ui.lo mpz/pprime_p.lo \
+ mpz/random.lo mpz/random2.lo mpz/realloc.lo mpz/remove.lo mpz/root.lo \
+ mpz/rrandomb.lo \
+ mpz/scan0.lo mpz/scan1.lo mpz/set.lo mpz/set_d.lo mpz/set_f.lo mpz/set_q.lo \
+ mpz/set_si.lo mpz/set_str.lo mpz/set_ui.lo mpz/setbit.lo mpz/size.lo \
+ mpz/sizeinbase.lo mpz/sqrt.lo mpz/sqrtrem.lo mpz/sub.lo mpz/sub_ui.lo \
+ mpz/swap.lo mpz/tdiv_ui.lo mpz/tdiv_q.lo mpz/tdiv_q_2exp.lo mpz/tdiv_q_ui.lo \
+ mpz/tdiv_qr.lo mpz/tdiv_qr_ui.lo mpz/tdiv_r.lo mpz/tdiv_r_2exp.lo \
+ mpz/tdiv_r_ui.lo mpz/tstbit.lo mpz/ui_pow_ui.lo mpz/urandomb.lo \
+ mpz/urandomm.lo mpz/xor.lo
+
+MPN_OBJECTS = @mpn_objs_in_libgmp@
+
+MPBSD_OBJECTS = mpbsd/add.lo mpbsd/tdiv_qr.lo mpbsd/move.lo mpbsd/powm.lo \
+ mpbsd/sub.lo mpbsd/cmp.lo mpbsd/mfree.lo mpbsd/mtox.lo mpbsd/realloc.lo \
+ mpbsd/gcd.lo mpbsd/itom.lo mpbsd/min.lo mpbsd/mul.lo mpbsd/mout.lo \
+ mpbsd/pow_ui.lo mpbsd/sdiv.lo mpbsd/sqrtrem.lo mpbsd/xtom.lo
+
+
+
+@WANT_MPFR_TRUE@MPFR_HEADERS_OPTION = @WANT_MPFR_TRUE@mpfr/mpfr.h
+@WANT_MPFR_TRUE@MPFR_OBJECTS_OPTION = @WANT_MPFR_TRUE@$(MPFR_OBJECTS)
+@WANT_MPFR_TRUE@MPFR_LIBADD_OPTION = @WANT_MPFR_TRUE@-lm
+libgmp_la_SOURCES = assert.c compat.c errno.c memory.c mp_set_fns.c \
+ mp_clz_tab.c mp_minv_tab.c \
+ version.c stack-alloc.c mp_bpl.c extract-dbl.c insert-dbl.c
+
+libgmp_la_DEPENDENCIES = \
+ $(MPF_OBJECTS) $(MPZ_OBJECTS) $(MPN_OBJECTS) $(MPQ_OBJECTS) \
+ $(MPFR_OBJECTS_OPTION)
+
+libgmp_la_LIBADD = $(libgmp_la_DEPENDENCIES) $(MPFR_LIBADD_OPTION)
+libgmp_la_LDFLAGS = \
+ -version-info $(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE)
+
+
+@WANT_MPBSD_TRUE@MPBSD_HEADERS_OPTION = @WANT_MPBSD_TRUE@mp.h
+@WANT_MPBSD_TRUE@MPBSD_LTLIBRARIES_OPTION = @WANT_MPBSD_TRUE@libmp.la
+libmp_la_SOURCES = assert.c errno.c memory.c mp_bpl.c mp_clz_tab.c \
+ mp_minv_tab.c mp_set_fns.c stack-alloc.c
+
+libmp_la_DEPENDENCIES = $(MPBSD_OBJECTS) $(MPN_OBJECTS) \
+ mpz/add.lo mpz/clear.lo mpz/cmp.lo mpz/init.lo mpz/mod.lo mpz/mul.lo \
+ mpz/mul_2exp.lo mpz/realloc.lo mpz/set.lo mpz/set_ui.lo mpz/tdiv_r.lo \
+ mpz/sub.lo
+
+libmp_la_LIBADD = $(libmp_la_DEPENDENCIES)
+libmp_la_LDFLAGS = \
+ -version-info $(LIBMP_LT_CURRENT):$(LIBMP_LT_REVISION):$(LIBMP_LT_AGE)
+
+
+info_TEXINFOS = gmp.texi
+subdir = .
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = config.h
+CONFIG_CLEAN_FILES =
+LTLIBRARIES = $(lib_LTLIBRARIES)
+
+
+DEFS = @DEFS@ -I. -I$(srcdir) -I.
+CPPFLAGS = @CPPFLAGS@
+LDFLAGS = @LDFLAGS@
+LIBS = @LIBS@
+ANSI2KNR = @ANSI2KNR@
+am_libgmp_la_OBJECTS = assert$U.lo compat$U.lo errno$U.lo memory$U.lo \
+mp_set_fns$U.lo mp_clz_tab$U.lo mp_minv_tab$U.lo rand$U.lo randclr$U.lo \
+randlc$U.lo randlc2x$U.lo randraw$U.lo randsd$U.lo randsdui$U.lo \
+version$U.lo stack-alloc$U.lo mp_bpl$U.lo extract-dbl$U.lo \
+insert-dbl$U.lo
+libgmp_la_OBJECTS = $(am_libgmp_la_OBJECTS)
+am_libmp_la_OBJECTS = assert$U.lo errno$U.lo memory$U.lo mp_bpl$U.lo \
+mp_clz_tab$U.lo mp_minv_tab$U.lo mp_set_fns$U.lo stack-alloc$U.lo
+libmp_la_OBJECTS = $(am_libmp_la_OBJECTS)
+COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CFLAGS = @CFLAGS@
+CCLD = $(CC)
+LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+DIST_SOURCES = $(libgmp_la_SOURCES) $(libmp_la_SOURCES)
+TEXI2DVI = texi2dvi
+# INFO_DEPS = gmp.info
+DVIS = gmp.dvi
+TEXINFOS = gmp.texi
+HEADERS = $(include_HEADERS)
+
+DIST_COMMON = README $(EXTRA_HEADERS) $(include_HEADERS) ./stamp-h.in \
+AUTHORS COPYING COPYING.LIB ChangeLog INSTALL Makefile.am Makefile.in \
+NEWS acconfig.h acinclude.m4 aclocal.m4 ansi2knr.1 ansi2knr.c \
+config.guess config.in config.sub configure configure.in depcomp \
+install-sh ltconfig ltmain.sh mdate-sh missing mkinstalldirs stamp-vti \
+texinfo.tex version.texi
+
+
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+GZIP_ENV = --best
+depcomp =
+SOURCES = $(libgmp_la_SOURCES) $(libmp_la_SOURCES)
+OBJECTS = $(am_libgmp_la_OBJECTS) $(am_libmp_la_OBJECTS)
+
+all: all-redirect
+.SUFFIXES:
+.SUFFIXES: .c .dvi .info .lo .o .obj .ps .texi .texinfo .txi
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4)
+ cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile
+
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) \
+ && CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ configure.in acinclude.m4
+ cd $(srcdir) && $(ACLOCAL)
+
+config.status: $(srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ $(SHELL) ./config.status --recheck
+$(srcdir)/configure: @MAINTAINER_MODE_TRUE@$(srcdir)/configure.in $(ACLOCAL_M4) $(CONFIGURE_DEPENDENCIES)
+ cd $(srcdir) && $(AUTOCONF)
+
+config.h: stamp-h
+ @if test ! -f $@; then \
+ rm -f stamp-h; \
+ $(MAKE) stamp-h; \
+ else :; fi
+stamp-h: $(srcdir)/config.in $(top_builddir)/config.status
+ @rm -f stamp-h stamp-hT
+ @echo timestamp > stamp-hT 2> /dev/null
+ cd $(top_builddir) \
+ && CONFIG_FILES= CONFIG_HEADERS=config.h:config.in \
+ $(SHELL) ./config.status
+ @mv stamp-hT stamp-h
+$(srcdir)/config.in: @MAINTAINER_MODE_TRUE@$(srcdir)/./stamp-h.in
+ @if test ! -f $@; then \
+ rm -f $(srcdir)/./stamp-h.in; \
+ $(MAKE) $(srcdir)/./stamp-h.in; \
+ else :; fi
+$(srcdir)/./stamp-h.in: $(top_srcdir)/configure.in $(ACLOCAL_M4) acconfig.h
+ @rm -f $(srcdir)/./stamp-h.in $(srcdir)/./stamp-h.inT
+ @echo timestamp > $(srcdir)/./stamp-h.inT 2> /dev/null
+ cd $(top_srcdir) && $(AUTOHEADER)
+ @mv $(srcdir)/./stamp-h.inT $(srcdir)/./stamp-h.in
+
+mostlyclean-hdr:
+
+clean-hdr:
+
+distclean-hdr:
+ -rm -f config.h
+
+maintainer-clean-hdr:
+
+mostlyclean-libLTLIBRARIES:
+
+clean-libLTLIBRARIES:
+ -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+
+distclean-libLTLIBRARIES:
+
+maintainer-clean-libLTLIBRARIES:
+
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(libdir)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ if test -f $$p; then \
+ echo " $(LIBTOOL) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$p"; \
+ $(LIBTOOL) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libdir)/$$p; \
+ else :; fi; \
+ done
+
+uninstall-libLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ echo " $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p"; \
+ $(LIBTOOL) --mode=uninstall rm -f $(DESTDIR)$(libdir)/$$p; \
+ done
+
+mostlyclean-compile:
+ -rm -f *.o core *.core
+ -rm -f *.$(OBJEXT)
+
+clean-compile:
+
+distclean-compile:
+ -rm -f *.tab.c
+
+maintainer-clean-compile:
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+distclean-libtool:
+
+maintainer-clean-libtool:
+
+mostlyclean-krextra:
+
+clean-krextra:
+ -rm -f ansi2knr
+
+distclean-krextra:
+
+maintainer-clean-krextra:
+ansi2knr: ansi2knr.$(OBJEXT)
+ $(LINK) ansi2knr.$(OBJEXT) $(LIBS)
+ansi2knr.$(OBJEXT): $(CONFIG_HEADER)
+
+
+mostlyclean-kr:
+ -rm -f *_.c
+
+clean-kr:
+
+distclean-kr:
+
+maintainer-clean-kr:
+
+gmp.dll: libgmp.a
+ dllwrap -mno-cygwin --target=i386-unknown-mingw32 \
+ --export-all --dllname gmp.dll --output-lib=libgmp_imp.a \
+ -o gmp.dll libgmp.a
+
+libgmp.la: $(libgmp_la_OBJECTS) $(libgmp_la_DEPENDENCIES)
+ $(LINK) -rpath $(libdir) $(libgmp_la_LDFLAGS) $(libgmp_la_OBJECTS) $(libgmp_la_LIBADD) $(LIBS)
+
+libmp.la: $(libmp_la_OBJECTS) $(libmp_la_DEPENDENCIES)
+ $(LINK) -rpath $(libdir) $(libmp_la_LDFLAGS) $(libmp_la_OBJECTS) $(libmp_la_LIBADD) $(LIBS)
+.c.o:
+ $(COMPILE) -c $<
+.c.obj:
+ $(COMPILE) -c `cygpath -w $<`
+.c.lo:
+ $(LTCOMPILE) -c -o $@ $<
+assert_.c: assert.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/assert.c; then echo $(srcdir)/assert.c; else echo assert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > assert_.c
+compat_.c: compat.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/compat.c; then echo $(srcdir)/compat.c; else echo compat.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > compat_.c
+errno_.c: errno.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/errno.c; then echo $(srcdir)/errno.c; else echo errno.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > errno_.c
+extract-dbl_.c: extract-dbl.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/extract-dbl.c; then echo $(srcdir)/extract-dbl.c; else echo extract-dbl.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > extract-dbl_.c
+insert-dbl_.c: insert-dbl.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/insert-dbl.c; then echo $(srcdir)/insert-dbl.c; else echo insert-dbl.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > insert-dbl_.c
+memory_.c: memory.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/memory.c; then echo $(srcdir)/memory.c; else echo memory.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > memory_.c
+mp_bpl_.c: mp_bpl.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_bpl.c; then echo $(srcdir)/mp_bpl.c; else echo mp_bpl.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > mp_bpl_.c
+mp_clz_tab_.c: mp_clz_tab.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_clz_tab.c; then echo $(srcdir)/mp_clz_tab.c; else echo mp_clz_tab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > mp_clz_tab_.c
+mp_minv_tab_.c: mp_minv_tab.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_minv_tab.c; then echo $(srcdir)/mp_minv_tab.c; else echo mp_minv_tab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > mp_minv_tab_.c
+mp_set_fns_.c: mp_set_fns.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_set_fns.c; then echo $(srcdir)/mp_set_fns.c; else echo mp_set_fns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > mp_set_fns_.c
+rand_.c: rand.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rand.c; then echo $(srcdir)/rand.c; else echo rand.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > rand_.c
+randclr_.c: randclr.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randclr.c; then echo $(srcdir)/randclr.c; else echo randclr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > randclr_.c
+randlc_.c: randlc.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randlc.c; then echo $(srcdir)/randlc.c; else echo randlc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > randlc_.c
+randlc2x_.c: randlc2x.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randlc2x.c; then echo $(srcdir)/randlc2x.c; else echo randlc2x.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > randlc2x_.c
+randraw_.c: randraw.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randraw.c; then echo $(srcdir)/randraw.c; else echo randraw.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > randraw_.c
+randsd_.c: randsd.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randsd.c; then echo $(srcdir)/randsd.c; else echo randsd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > randsd_.c
+randsdui_.c: randsdui.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randsdui.c; then echo $(srcdir)/randsdui.c; else echo randsdui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > randsdui_.c
+stack-alloc_.c: stack-alloc.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/stack-alloc.c; then echo $(srcdir)/stack-alloc.c; else echo stack-alloc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > stack-alloc_.c
+version_.c: version.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/version.c; then echo $(srcdir)/version.c; else echo version.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > version_.c
+assert_.$(OBJEXT) assert_.lo compat_.$(OBJEXT) compat_.lo \
+errno_.$(OBJEXT) errno_.lo extract-dbl_.$(OBJEXT) extract-dbl_.lo \
+insert-dbl_.$(OBJEXT) insert-dbl_.lo memory_.$(OBJEXT) memory_.lo \
+mp_bpl_.$(OBJEXT) mp_bpl_.lo mp_clz_tab_.$(OBJEXT) mp_clz_tab_.lo \
+mp_minv_tab_.$(OBJEXT) mp_minv_tab_.lo mp_set_fns_.$(OBJEXT) \
+mp_set_fns_.lo rand_.$(OBJEXT) rand_.lo randclr_.$(OBJEXT) randclr_.lo \
+randlc_.$(OBJEXT) randlc_.lo randlc2x_.$(OBJEXT) randlc2x_.lo \
+randraw_.$(OBJEXT) randraw_.lo randsd_.$(OBJEXT) randsd_.lo \
+randsdui_.$(OBJEXT) randsdui_.lo stack-alloc_.$(OBJEXT) stack-alloc_.lo \
+version_.$(OBJEXT) version_.lo : $(ANSI2KNR)
+
+$(srcdir)/version.texi: @MAINTAINER_MODE_TRUE@stamp-vti
+ @:
+
+$(srcdir)/stamp-vti: gmp.texi $(top_srcdir)/configure.in
+ @echo "@set UPDATED `$(SHELL) $(srcdir)/mdate-sh $(srcdir)/gmp.texi`" > vti.tmp
+ @echo "@set EDITION $(VERSION)" >> vti.tmp
+ @echo "@set VERSION $(VERSION)" >> vti.tmp
+ @cmp -s vti.tmp $(srcdir)/version.texi \
+ || (echo "Updating $(srcdir)/version.texi"; \
+ cp vti.tmp $(srcdir)/version.texi)
+ -@rm -f vti.tmp
+ @cp $(srcdir)/version.texi $@
+
+mostlyclean-vti:
+ -rm -f vti.tmp
+
+clean-vti:
+
+distclean-vti:
+
+maintainer-clean-vti:
+ -@MAINTAINER_MODE_TRUE@rm -f $(srcdir)/stamp-vti $(srcdir)/version.texi
+
+# gmp.info: gmp.texi version.texi
+# gmp.dvi: gmp.texi version.texi
+
+
+DVIPS = dvips
+
+.texi.info:
+ @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9]
+ cd $(srcdir) \
+ && $(MAKEINFO) `echo $< | sed 's,.*/,,'`
+
+.texi.dvi:
+ TEXINPUTS=$(srcdir):$$TEXINPUTS \
+ MAKEINFO='$(MAKEINFO) -I $(srcdir)' $(TEXI2DVI) $<
+
+.texi:
+ @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9]
+ cd $(srcdir) \
+ && $(MAKEINFO) `echo $< | sed 's,.*/,,'`
+
+.texinfo.info:
+ @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9]
+ cd $(srcdir) \
+ && $(MAKEINFO) `echo $< | sed 's,.*/,,'`
+
+.texinfo:
+ @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9]
+ cd $(srcdir) \
+ && $(MAKEINFO) `echo $< | sed 's,.*/,,'`
+
+.texinfo.dvi:
+ TEXINPUTS=$(srcdir):$$TEXINPUTS \
+ MAKEINFO='$(MAKEINFO) -I $(srcdir)' $(TEXI2DVI) $<
+
+.txi.info:
+ @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9]
+ cd $(srcdir) \
+ && $(MAKEINFO) `echo $< | sed 's,.*/,,'`
+
+.txi.dvi:
+ TEXINPUTS=$(srcdir):$$TEXINPUTS \
+ MAKEINFO='$(MAKEINFO) -I $(srcdir)' $(TEXI2DVI) $<
+
+.txi:
+ @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9]
+ cd $(srcdir) \
+ && $(MAKEINFO) `echo $< | sed 's,.*/,,'`
+.dvi.ps:
+ $(DVIPS) $< -o $@
+
+install-info-am: $(INFO_DEPS)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(infodir)
+ @list='$(INFO_DEPS)'; \
+ for file in $$list; do \
+ d=$(srcdir); \
+ for ifile in `CDPATH=: && cd $$d && echo $$file $$file-[0-9] $$file-[0-9][0-9]`; do \
+ if test -f $$d/$$ifile; then \
+ echo " $(INSTALL_DATA) $$d/$$ifile $(DESTDIR)$(infodir)/$$ifile"; \
+ $(INSTALL_DATA) $$d/$$ifile $(DESTDIR)$(infodir)/$$ifile; \
+ else : ; fi; \
+ done; \
+ done
+ @$(POST_INSTALL)
+ @if $(SHELL) -c 'install-info --version | sed 1q | fgrep -s -v -i debian' >/dev/null 2>&1; then \
+ list='$(INFO_DEPS)'; \
+ for file in $$list; do \
+ echo " install-info --info-dir=$(DESTDIR)$(infodir) $(DESTDIR)$(infodir)/$$file";\
+ install-info --info-dir=$(DESTDIR)$(infodir) $(DESTDIR)$(infodir)/$$file || :;\
+ done; \
+ else : ; fi
+
+uninstall-info:
+ $(PRE_UNINSTALL)
+ @if $(SHELL) -c 'install-info --version | sed 1q | fgrep -s -v -i debian' >/dev/null 2>&1; then \
+ list='$(INFO_DEPS)'; \
+ for file in $$list; do \
+ echo " install-info --info-dir=$(DESTDIR)$(infodir) --remove $(DESTDIR)$(infodir)/$$file"; \
+ install-info --info-dir=$(DESTDIR)$(infodir) --remove $(DESTDIR)$(infodir)/$$file; \
+ done; \
+ else :; fi
+ @$(NORMAL_UNINSTALL)
+ @list='$(INFO_DEPS)'; \
+ for file in $$list; do \
+ (if cd $(DESTDIR)$(infodir); then \
+ echo " rm -f $$file $$file-[0-9] $$file-[0-9][0-9])"; \
+ rm -f $$file $$file-[0-9] $$file-[0-9][0-9]; \
+ else :; fi); \
+ done
+
+dist-info: $(INFO_DEPS)
+ list='$(INFO_DEPS)'; \
+ for base in $$list; do \
+ d=$(srcdir); \
+ for file in `CDPATH=: && cd $$d && eval echo $$base*`; do \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file; \
+ done; \
+ done
+
+mostlyclean-aminfo:
+ -rm -f gmp.aux gmp.cp gmp.cps gmp.dvi gmp.fn gmp.fns gmp.pgs gmp.ky \
+ gmp.kys gmp.ps gmp.log gmp.pg gmp.toc gmp.tp gmp.tps gmp.vr \
+ gmp.vrs gmp.op gmp.tr gmp.cv gmp.cn gmp.cm gmp.ov
+
+clean-aminfo:
+
+distclean-aminfo:
+
+maintainer-clean-aminfo:
+ cd $(srcdir) && for i in $(INFO_DEPS); do \
+ rm -f $$i; \
+ if test "`echo $$i-[0-9]*`" != "$$i-[0-9]*"; then \
+ rm -f $$i-[0-9]*; \
+ fi; \
+ done
+
+install-includeHEADERS: $(include_HEADERS)
+ @$(NORMAL_INSTALL)
+ $(mkinstalldirs) $(DESTDIR)$(includedir)
+ @list='$(include_HEADERS)'; for p in $$list; do \
+ if test -f "$$p"; then d= ; else d="$(srcdir)/"; fi; \
+ f="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " $(INSTALL_DATA) $$d$$p $(DESTDIR)$(includedir)/$$f"; \
+ $(INSTALL_DATA) $$d$$p $(DESTDIR)$(includedir)/$$f; \
+ done
+
+uninstall-includeHEADERS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(include_HEADERS)'; for p in $$list; do \
+ f="`echo $$p | sed -e 's|^.*/||'`"; \
+ echo " rm -f $(DESTDIR)$(includedir)/$$f"; \
+ rm -f $(DESTDIR)$(includedir)/$$f; \
+ done
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+# (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+
+all-recursive install-data-recursive install-exec-recursive \
+installdirs-recursive install-recursive uninstall-recursive \
+check-recursive installcheck-recursive info-recursive dvi-recursive:
+ @set fnord $(MAKEFLAGS); amf=$$2; \
+ dot_seen=no; \
+ target=`echo $@ | sed s/-recursive//`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ dot_seen=yes; \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
+ done; \
+ if test "$$dot_seen" = "no"; then \
+ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+ fi; test -z "$$fail"
+
+mostlyclean-recursive clean-recursive distclean-recursive \
+maintainer-clean-recursive:
+ @set fnord $(MAKEFLAGS); amf=$$2; \
+ dot_seen=no; \
+ rev=''; list='$(SUBDIRS)'; for subdir in $$list; do \
+ rev="$$subdir $$rev"; \
+ if test "$$subdir" = "."; then dot_seen=yes; else :; fi; \
+ done; \
+ test "$$dot_seen" = "no" && rev=". $$rev"; \
+ target=`echo $@ | sed s/-recursive//`; \
+ for subdir in $$rev; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
+ done && test -z "$$fail"
+tags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+ done
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ mkid -f$$here/ID $$unique $(LISP)
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES) config.in $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test -f $$subdir/TAGS && tags="$$tags -i $$here/$$subdir/TAGS"; \
+ fi; \
+ done; \
+ list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)config.in$$unique$(LISP)$$tags" \
+ || etags $(ETAGS_ARGS) $$tags config.in $$unique $(LISP)
+
+mostlyclean-tags:
+
+clean-tags:
+
+distclean-tags:
+ -rm -f TAGS ID
+
+maintainer-clean-tags:
+
+distdir = $(PACKAGE)-$(VERSION)
+top_distdir = $(distdir)
+
+
+# This target untars the dist file and tries a VPATH configuration. Then
+# it guarantees that the distribution is self-contained by making another
+# tarfile.
+distcheck: dist
+ -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir)
+ GZIP=$(GZIP_ENV) gunzip -c $(distdir).tar.gz | $(AMTAR) xf -
+ chmod -R a-w $(distdir); chmod a+w $(distdir)
+ mkdir $(distdir)/=build
+ mkdir $(distdir)/=inst
+ chmod a-w $(distdir)
+ dc_install_base=`CDPATH=: && cd $(distdir)/=inst && pwd` \
+ && cd $(distdir)/=build \
+ && ../configure --srcdir=.. --prefix=$$dc_install_base \
+ && $(MAKE) $(AM_MAKEFLAGS) \
+ && $(MAKE) $(AM_MAKEFLAGS) dvi \
+ && $(MAKE) $(AM_MAKEFLAGS) check \
+ && $(MAKE) $(AM_MAKEFLAGS) install \
+ && $(MAKE) $(AM_MAKEFLAGS) installcheck \
+ && $(MAKE) $(AM_MAKEFLAGS) uninstall \
+ && test `find $$dc_install_base -type f -print | wc -l` -le 1 \
+ && $(MAKE) $(AM_MAKEFLAGS) dist \
+ && $(MAKE) $(AM_MAKEFLAGS) distclean \
+ && rm -f $(distdir).tar.gz \
+ && test `find . -type f -print | wc -l` -eq 0
+ -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir)
+ @banner="$(distdir).tar.gz is ready for distribution"; \
+ dashes=`echo "$$banner" | sed s/./=/g`; \
+ echo "$$dashes"; \
+ echo "$$banner"; \
+ echo "$$dashes"
+dist: distdir
+ -find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \
+ ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
+ ! -type d ! -perm -400 -exec chmod a+r {} \; -o \
+ ! -type d ! -perm -444 -exec $(SHELL) $(install_sh) -c -m a+r {} {} \; \
+ || chmod -R a+r $(distdir)
+ $(AMTAR) chof - $(distdir) | GZIP=$(GZIP_ENV) gzip -c > $(distdir).tar.gz
+ -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir)
+dist-all: distdir
+ -find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \
+ ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
+ ! -type d ! -perm -400 -exec chmod a+r {} \; -o \
+ ! -type d ! -perm -444 -exec $(SHELL) $(install_sh) -c -m a+r {} {} \; \
+ || chmod -R a+r $(distdir)
+ $(AMTAR) chof - $(distdir) | GZIP=$(GZIP_ENV) gzip -c > $(distdir).tar.gz
+ -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir)
+distdir: $(DISTFILES)
+ @if sed 15q $(srcdir)/NEWS | fgrep -e "$(VERSION)" > /dev/null; then :; else \
+ echo "NEWS not updated; not releasing" 1>&2; \
+ exit 1; \
+ fi
+ -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir)
+ mkdir $(distdir)
+ $(mkinstalldirs) $(distdir)/mpfr
+ @for file in $(DISTFILES); do \
+ d=$(srcdir); \
+ if test -d $$d/$$file; then \
+ cp -pR $$d/$$file $(distdir); \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file || :; \
+ fi; \
+ done
+ for subdir in $(SUBDIRS); do \
+ if test "$$subdir" = .; then :; else \
+ test -d $(distdir)/$$subdir \
+ || mkdir $(distdir)/$$subdir \
+ || exit 1; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir=../$(distdir) distdir=../$(distdir)/$$subdir distdir) \
+ || exit 1; \
+ fi; \
+ done
+ $(MAKE) $(AM_MAKEFLAGS) top_distdir="$(top_distdir)" distdir="$(distdir)" dist-info
+ $(MAKE) $(AM_MAKEFLAGS) top_distdir="$(top_distdir)" distdir="$(distdir)" dist-hook
+info-am: $(INFO_DEPS)
+info: info-recursive
+dvi-am: $(DVIS)
+dvi: dvi-recursive
+check-am: all-am
+check: check-recursive
+installcheck-am:
+installcheck: installcheck-recursive
+all-recursive-am: config.h
+ $(MAKE) $(AM_MAKEFLAGS) all-recursive
+
+install-exec-am: install-libLTLIBRARIES
+install-exec: install-exec-recursive
+
+install-data-am: install-info-am install-includeHEADERS
+install-data: install-data-recursive
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+install: install-recursive
+uninstall-am: uninstall-libLTLIBRARIES uninstall-info \
+ uninstall-includeHEADERS
+uninstall: uninstall-recursive
+all-am: Makefile $(INFO_DEPS) $(ANSI2KNR) $(LTLIBRARIES) $(HEADERS) \
+ config.h
+all-redirect: all-recursive-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_STRIP_FLAG=-s install
+installdirs: installdirs-recursive
+installdirs-am:
+ $(mkinstalldirs) $(DESTDIR)$(libdir) $(DESTDIR)$(infodir) \
+ $(DESTDIR)$(includedir)
+
+
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -rm -f Makefile $(CONFIG_CLEAN_FILES)
+ -rm -f config.cache config.log stamp-h stamp-h[0-9]*
+ -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES)
+
+maintainer-clean-generic:
+ -rm -f Makefile.in
+mostlyclean-am: mostlyclean-hdr mostlyclean-libLTLIBRARIES \
+ mostlyclean-compile mostlyclean-libtool \
+ mostlyclean-krextra mostlyclean-kr mostlyclean-vti \
+ mostlyclean-aminfo mostlyclean-tags mostlyclean-generic
+
+mostlyclean: mostlyclean-recursive
+
+clean-am: clean-hdr clean-libLTLIBRARIES clean-compile clean-libtool \
+ clean-krextra clean-kr clean-vti clean-aminfo \
+ clean-tags clean-generic mostlyclean-am
+
+clean: clean-recursive
+
+distclean-am: distclean-hdr distclean-libLTLIBRARIES distclean-compile \
+ distclean-libtool distclean-krextra distclean-kr \
+ distclean-vti distclean-aminfo distclean-tags \
+ distclean-generic clean-am
+ -rm -f libtool
+
+distclean: distclean-recursive
+ -rm -f config.status
+
+maintainer-clean-am: maintainer-clean-hdr \
+ maintainer-clean-libLTLIBRARIES \
+ maintainer-clean-compile maintainer-clean-libtool \
+ maintainer-clean-krextra maintainer-clean-kr \
+ maintainer-clean-vti maintainer-clean-aminfo \
+ maintainer-clean-tags maintainer-clean-generic \
+ distclean-am
+ @echo "This command is intended for maintainers to use;"
+ @echo "it deletes files that may require special tools to rebuild."
+
+maintainer-clean: maintainer-clean-recursive
+ -rm -f config.status
+
+.PHONY: mostlyclean-hdr distclean-hdr clean-hdr maintainer-clean-hdr \
+mostlyclean-libLTLIBRARIES distclean-libLTLIBRARIES \
+clean-libLTLIBRARIES maintainer-clean-libLTLIBRARIES \
+uninstall-libLTLIBRARIES install-libLTLIBRARIES mostlyclean-compile \
+distclean-compile clean-compile maintainer-clean-compile \
+mostlyclean-libtool distclean-libtool clean-libtool \
+maintainer-clean-libtool mostlyclean-krextra distclean-krextra \
+clean-krextra maintainer-clean-krextra mostlyclean-kr distclean-kr \
+clean-kr maintainer-clean-kr mostlyclean-vti distclean-vti clean-vti \
+maintainer-clean-vti install-info-am uninstall-info mostlyclean-aminfo \
+distclean-aminfo clean-aminfo maintainer-clean-aminfo \
+uninstall-includeHEADERS install-includeHEADERS install-recursive \
+uninstall-recursive install-data-recursive uninstall-data-recursive \
+install-exec-recursive uninstall-exec-recursive installdirs-recursive \
+uninstalldirs-recursive all-recursive check-recursive \
+installcheck-recursive info-recursive dvi-recursive \
+mostlyclean-recursive distclean-recursive clean-recursive \
+maintainer-clean-recursive tags tags-recursive mostlyclean-tags \
+distclean-tags clean-tags maintainer-clean-tags distdir info-am info \
+dvi-am dvi check check-am installcheck-am installcheck all-recursive-am \
+install-exec-am install-exec install-data-am install-data install-am \
+install uninstall-am uninstall all-redirect all-am all install-strip \
+installdirs-am installdirs mostlyclean-generic distclean-generic \
+clean-generic maintainer-clean-generic clean mostlyclean distclean \
+maintainer-clean
+
+
+# Don't ship CVS directories or emacs backups.
+dist-hook:
+ -find $(distdir) \( -name CVS -type d \) -o -name "*.~*" \
+ | xargs rm -rf
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/rts/gmp/NEWS b/rts/gmp/NEWS
new file mode 100644
index 0000000000..3b549d59f3
--- /dev/null
+++ b/rts/gmp/NEWS
@@ -0,0 +1,136 @@
+Changes between MP version 3.1 and 3.1.1
+
+* Bug fixes for division (rare), mpf_get_str, FFT, and miscellaneous minor
+ things.
+
+Changes between MP version 3.0 and 3.1
+
+* Bug fixes.
+* Improved `make check' running more tests.
+* Tuned algorithm cutoff points for many machines. This will improve speed for
+ a lot of operations, in some cases by a large amount.
+* Major speed improvments: Alpha 21264.
+* Some speed improvments: Cray vector computers, AMD K6 and Athlon, Intel P5
+ and Pentium Pro/II/III.
+* The mpf_get_prec function now works as it did in GMP 2.
+* New utilities for auto-tuning and speed measuring.
+* Multiplication now optionally uses FFT for very large operands. (To enable
+ it, pass --enable-fft to configure.)
+* Support for new systems: Solaris running on x86, FreeBSD 5, HP-UX 11, Cray
+ vector computers, Rhapsody, Nextstep/Openstep, MacOS.
+* Support for shared libraries on 32-bit HPPA.
+* New integer functions: mpz_mul_si, mpz_odd_p, mpz_even_p.
+* New Kronecker symbol functions: mpz_kronecker_si, mpz_kronecker_ui,
+ mpz_si_kronecker, mpz_ui_kronecker.
+* New rational functions: mpq_out_str, mpq_swap.
+* New float functions: mpf_swap.
+* New mpn functions: mpn_divexact_by3c, mpn_tdiv_qr.
+* New EXPERIMENTAL function layer for accurate floating-point arithmetic, mpfr.
+ To try it, pass --enable-mpfr to configure. See the mpfr subdirectory for
+ more information; it is not documented in the main GMP manual.
+
+Changes between MP version 3.0 and 3.0.1
+
+* Memory leaks in gmp_randinit and mpz_probab_prime_p fixed.
+* Documentation for gmp_randinit fixed. Misc documentation errors fixed.
+
+Changes between MP version 2.0 and 3.0
+
+* Source level compatibility with past releases (except mpn_gcd).
+* Bug fixes.
+* Much improved speed thanks to both host independent and host dependent
+ optimizations.
+* Switch to autoconf/automake/libtool.
+* Support for building libgmp as a shared library.
+* Multiplication and squaring using 3-way Toom-Cook.
+* Division using the Burnikel-Ziegler method.
+* New functions computing binomial coefficients: mpz_bin_ui, mpz_bin_uiui.
+* New function computing Fibonacci numbers: mpz_fib_ui.
+* New random number generators: mpf_urandomb, mpz_rrandomb, mpz_urandomb,
+ mpz_urandomm, gmp_randclear, gmp_randinit, gmp_randinit_lc_2exp, gmp_randseed,
+ gmp_randseed_ui.
+* New function for quickly extracting limbs: mpz_getlimbn.
+* New functions performing integer size tests: mpz_fits_sint_p,
+ mpz_fits_slong_p, mpz_fits_sshort_p, mpz_fits_uint_p, mpz_fits_ulong_p,
+ mpz_fits_ushort_p.
+* New mpf functions: mpf_ceil, mpf_floor, mpf_pow_ui, mpf_trunc.
+* New mpq function: mpq_set_d.
+* New mpz functions: mpz_addmul_ui, mpz_cmpabs, mpz_cmpabs_ui, mpz_lcm,
+ mpz_nextprime, mpz_perfect_power_p, mpz_remove, mpz_root, mpz_swap,
+ mpz_tdiv_ui, mpz_tstbit, mpz_xor.
+* New mpn function: mpn_divexact_by3.
+* New CPU support: DEC Alpha 21264, AMD K6 and Athlon, HPPA 2.0 and 64,
+ Intel Pentium Pro and Pentium-II/III, Sparc 64, PowerPC 64.
+* Almost 10 times faster mpz_invert and mpn_gcdext.
+* The interface of mpn_gcd has changed.
+* Better support for MIPS R4x000 and R5000 under Irix 6.
+* Improved support for SPARCv8 and SPARCv9 processors.
+
+Changes between MP version 2.0 and 2.0.2
+
+* Many bug fixes.
+
+Changes between MP version 1.3.2 and 2.0
+
+* Division routines in the mpz class have changed. There are three classes of
+ functions, that rounds the quotient to -infinity, 0, and +infinity,
+ respectively. The first class of functions have names that begin with
+ mpz_fdiv (f is short for floor), the second class' names begin with mpz_tdiv
+ (t is short for trunc), and the third class' names begin with mpz_cdiv (c is
+ short for ceil).
+
+ The old division routines beginning with mpz_m are similar to the new
+ mpz_fdiv, with the exception that some of the new functions return useful
+ values.
+
+ The old function names can still be used. All the old functions names will
+ now do floor division, not trunc division as some of them used to. This was
+ changed to make the functions more compatible with common mathematical
+ practice.
+
+ The mpz_mod and mpz_mod_ui functions now compute the mathematical mod
+ function. I.e., the sign of the 2nd argument is ignored.
+
+* The mpq assignment functions do not canonicalize their results. A new
+ function, mpq_canonicalize must be called by the user if the result is not
+ known to be canonical.
+* The mpn functions are now documented. These functions are intended for
+ very time critical applications, or applications that need full control over
+ memory allocation. Note that the mpn interface is irregular and hard to
+ use.
+* New functions for arbitrary precision floating point arithmetic. Names
+ begin with `mpf_'. Associated type mpf_t.
+* New and improved mpz functions, including much faster GCD, fast exact
+ division (mpz_divexact), bit scan (mpz_scan0 and mpz_scan1), and number
+ theoretical functions like Jacobi (mpz_jacobi) and multiplicative inverse
+ (mpz_invert).
+* New variable types (mpz_t and mpq_t) are available that makes syntax of
+ mpz and mpq calls nicer (no need for & before variables). The MP_INT and
+ MP_RAT types are still available for compatibility.
+* Uses GNU configure. This makes it possible to choose target architecture
+ and CPU variant, and to compile into a separate object directory.
+* Carefully optimized assembly for important inner loops. Support for DEC
+ Alpha, Amd 29000, HPPA 1.0 and 1.1, Intel Pentium and generic x86, Intel
+ i960, Motorola MC68000, MC68020, MC88100, and MC88110, Motorola/IBM
+ PowerPC, National NS32000, IBM POWER, MIPS R3000, R4000, SPARCv7,
+ SuperSPARC, generic SPARCv8, and DEC VAX. Some support also for ARM,
+ Clipper, IBM ROMP (RT), and Pyramid AP/XP.
+* Faster. Thanks to the assembler code, new algorithms, and general tuning.
+ In particular, the speed on machines without GCC is improved.
+* Support for machines without alloca.
+* Now under the LGPL.
+
+INCOMPATIBILITIES BETWEEN GMP 1 AND GMP 2
+
+* mpq assignment functions do not canonicalize their results.
+* mpz division functions round differently.
+* mpz mod functions now really compute mod.
+* mpz_powm and mpz_powm_ui now really use mod for reduction.
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/rts/gmp/README b/rts/gmp/README
new file mode 100644
index 0000000000..177c97eb12
--- /dev/null
+++ b/rts/gmp/README
@@ -0,0 +1,84 @@
+
+ THE GNU MP LIBRARY
+
+
+GNU MP is a library for arbitrary precision arithmetic, operating on signed
+integers, rational numbers, and floating point numbers. It has a rich set of
+functions, and the functions have a regular interface.
+
+GNU MP is designed to be as fast as possible, both for small operands and huge
+operands. The speed is achieved by using fullwords as the basic arithmetic
+type, by using fast algorithms, with carefully optimized assembly code for the
+most common inner loops for lots of CPUs, and by a general emphasis on speed
+(instead of simplicity or elegance).
+
+GNU MP is believed to be faster than any other similar library. Its advantage
+increases with operand sizes for certain operations, since GNU MP in many
+cases has asymptotically faster algorithms.
+
+GNU MP is free software and may be freely copied on the terms contained in the
+files COPYING.LIB and COPYING (most of GNU MP is under the former, some under
+the latter).
+
+
+
+ OVERVIEW OF GNU MP
+
+There are five classes of functions in GNU MP.
+
+ 1. Signed integer arithmetic functions (mpz). These functions are intended
+ to be easy to use, with their regular interface. The associated type is
+ `mpz_t'.
+
+ 2. Rational arithmetic functions (mpq). For now, just a small set of
+ functions necessary for basic rational arithmetics. The associated type
+ is `mpq_t'.
+
+ 3. Floating-point arithmetic functions (mpf). If the C type `double'
+ doesn't give enough precision for your application, declare your
+ variables as `mpf_t' instead, set the precision to any number desired,
+ and call the functions in the mpf class for the arithmetic operations.
+
+ 4. Positive-integer, hard-to-use, very low overhead functions are in the
+ mpn class. No memory management is performed. The caller must ensure
+ enough space is available for the results. The set of functions is not
+ regular, nor is the calling interface. These functions accept input
+ arguments in the form of pairs consisting of a pointer to the least
+ significant word, and an integral size telling how many limbs (= words)
+ the pointer points to.
+
+ Almost all calculations, in the entire package, are made by calling these
+ low-level functions.
+
+ 5. Berkeley MP compatible functions.
+
+ To use these functions, include the file "mp.h". You can test if you are
+ using the GNU version by testing if the symbol __GNU_MP__ is defined.
+
+For more information on how to use GNU MP, please refer to the documentation.
+It is composed from the file gmp.texi, and can be displayed on the screen or
+printed. How to do that, as well how to build the library, is described in
+the INSTALL file in this directory.
+
+
+
+ REPORTING BUGS
+
+If you find a bug in the library, please make sure to tell us about it!
+
+You should first check the GNU MP web pages at http://www.swox.com/gmp/,
+under "Status of the current release". There will be patches for all known
+serious bugs there.
+
+Report bugs to bug-gmp@gnu.org. What information is needed in a good bug
+report is described in the manual. The same address can be used for
+suggesting modifications and enhancements.
+
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 78
+End:
diff --git a/rts/gmp/acconfig.h b/rts/gmp/acconfig.h
new file mode 100644
index 0000000000..dfb1b0b039
--- /dev/null
+++ b/rts/gmp/acconfig.h
@@ -0,0 +1,92 @@
+/*
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+@TOP@
+
+/* Define if a limb is long long. */
+#undef _LONG_LONG_LIMB
+
+/* Define if we have native implementation of function. */
+#undef HAVE_NATIVE_
+#undef HAVE_NATIVE_mpn_add
+#undef HAVE_NATIVE_mpn_add_1
+#undef HAVE_NATIVE_mpn_add_n
+#undef HAVE_NATIVE_mpn_add_nc
+#undef HAVE_NATIVE_mpn_addmul_1
+#undef HAVE_NATIVE_mpn_addmul_1c
+#undef HAVE_NATIVE_mpn_addsub_n
+#undef HAVE_NATIVE_mpn_addsub_nc
+#undef HAVE_NATIVE_mpn_and_n
+#undef HAVE_NATIVE_mpn_andn_n
+#undef HAVE_NATIVE_mpn_bdivmod
+#undef HAVE_NATIVE_mpn_cmp
+#undef HAVE_NATIVE_mpn_com_n
+#undef HAVE_NATIVE_mpn_copyd
+#undef HAVE_NATIVE_mpn_copyi
+#undef HAVE_NATIVE_mpn_divexact_by3c
+#undef HAVE_NATIVE_mpn_divrem
+#undef HAVE_NATIVE_mpn_divrem_1
+#undef HAVE_NATIVE_mpn_divrem_1c
+#undef HAVE_NATIVE_mpn_divrem_2
+#undef HAVE_NATIVE_mpn_divrem_newton
+#undef HAVE_NATIVE_mpn_divrem_classic
+#undef HAVE_NATIVE_mpn_dump
+#undef HAVE_NATIVE_mpn_gcd
+#undef HAVE_NATIVE_mpn_gcd_1
+#undef HAVE_NATIVE_mpn_gcdext
+#undef HAVE_NATIVE_mpn_get_str
+#undef HAVE_NATIVE_mpn_hamdist
+#undef HAVE_NATIVE_mpn_invert_limb
+#undef HAVE_NATIVE_mpn_ior_n
+#undef HAVE_NATIVE_mpn_iorn_n
+#undef HAVE_NATIVE_mpn_lshift
+#undef HAVE_NATIVE_mpn_mod_1
+#undef HAVE_NATIVE_mpn_mod_1c
+#undef HAVE_NATIVE_mpn_mul
+#undef HAVE_NATIVE_mpn_mul_1
+#undef HAVE_NATIVE_mpn_mul_1c
+#undef HAVE_NATIVE_mpn_mul_basecase
+#undef HAVE_NATIVE_mpn_mul_n
+#undef HAVE_NATIVE_mpn_nand_n
+#undef HAVE_NATIVE_mpn_nior_n
+#undef HAVE_NATIVE_mpn_perfect_square_p
+#undef HAVE_NATIVE_mpn_popcount
+#undef HAVE_NATIVE_mpn_preinv_mod_1
+#undef HAVE_NATIVE_mpn_random2
+#undef HAVE_NATIVE_mpn_random
+#undef HAVE_NATIVE_mpn_rawrandom
+#undef HAVE_NATIVE_mpn_rshift
+#undef HAVE_NATIVE_mpn_scan0
+#undef HAVE_NATIVE_mpn_scan1
+#undef HAVE_NATIVE_mpn_set_str
+#undef HAVE_NATIVE_mpn_sqrtrem
+#undef HAVE_NATIVE_mpn_sqr_basecase
+#undef HAVE_NATIVE_mpn_sub
+#undef HAVE_NATIVE_mpn_sub_1
+#undef HAVE_NATIVE_mpn_sub_n
+#undef HAVE_NATIVE_mpn_sub_nc
+#undef HAVE_NATIVE_mpn_submul_1
+#undef HAVE_NATIVE_mpn_submul_1c
+#undef HAVE_NATIVE_mpn_udiv_w_sdiv
+#undef HAVE_NATIVE_mpn_umul_ppmm
+#undef HAVE_NATIVE_mpn_udiv_qrnnd
+#undef HAVE_NATIVE_mpn_xor_n
+#undef HAVE_NATIVE_mpn_xnor_n
diff --git a/rts/gmp/acinclude.m4 b/rts/gmp/acinclude.m4
new file mode 100644
index 0000000000..a02394a963
--- /dev/null
+++ b/rts/gmp/acinclude.m4
@@ -0,0 +1,835 @@
+dnl GMP specific autoconf macros
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl GMP_HEADER_GETVAL(NAME,FILE)
+dnl ----------------------------
+dnl Expand to the value of a "#define NAME" from the given FILE.
+dnl The regexps here aren't very rugged, but are enough for gmp.
+dnl /dev/null as a parameter prevents a hang if $2 is accidentally omitted.
+
+define(GMP_HEADER_GETVAL,
+[patsubst(patsubst(
+esyscmd([grep "^#define $1 " $2 /dev/null 2>/dev/null]),
+[^.*$1[ ]+],[]),
+[[
+ ]*$],[])])
+
+
+dnl GMP_VERSION
+dnl -----------
+dnl The gmp version number, extracted from the #defines in gmp.h.
+dnl Two digits like 3.0 if patchlevel <= 0, or three digits like 3.0.1 if
+dnl patchlevel > 0.
+
+define(GMP_VERSION,
+[GMP_HEADER_GETVAL(__GNU_MP_VERSION,gmp.h)[]dnl
+.GMP_HEADER_GETVAL(__GNU_MP_VERSION_MINOR,gmp.h)[]dnl
+ifelse(m4_eval(GMP_HEADER_GETVAL(__GNU_MP_VERSION_PATCHLEVEL,gmp.h) > 0),1,
+[.GMP_HEADER_GETVAL(__GNU_MP_VERSION_PATCHLEVEL,gmp.h)])])
+
+
+dnl GMP_PROG_M4()
+dnl -------------
+dnl
+dnl Find a working m4, either in $PATH or likely locations, and setup $M4
+dnl and an AC_SUBST accordingly. If $M4 is already set then it's a user
+dnl choice and is accepted with no checks. GMP_PROG_M4 is like
+dnl AC_PATH_PROG or AC_CHECK_PROG, but it tests each m4 found to see if
+dnl it's good enough.
+dnl
+dnl See mpn/asm-defs.m4 for details on the known bad m4s.
+
+AC_DEFUN(GMP_PROG_M4,
+[AC_CACHE_CHECK([for suitable m4],
+ gmp_cv_prog_m4,
+[if test -n "$M4"; then
+ gmp_cv_prog_m4="$M4"
+else
+ cat >conftest.m4 <<\EOF
+dnl must protect this against being expanded during autoconf m4!
+[define(dollarhash,``$][#'')dnl
+ifelse(dollarhash(x),1,`define(t1,Y)',
+``bad: $][# not supported (SunOS /usr/bin/m4)
+'')dnl
+ifelse(eval(89),89,`define(t2,Y)',
+`bad: eval() doesnt support 8 or 9 in a constant (OpenBSD 2.6 m4)
+')dnl
+ifelse(t1`'t2,YY,`good
+')dnl]
+EOF
+ echo "trying m4" 1>&AC_FD_CC
+ gmp_tmp_val="`(m4 conftest.m4) 2>&AC_FD_CC`"
+ echo "$gmp_tmp_val" 1>&AC_FD_CC
+ if test "$gmp_tmp_val" = good; then
+ gmp_cv_prog_m4="m4"
+ else
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+dnl $ac_dummy forces splitting on constant user-supplied paths.
+dnl POSIX.2 word splitting is done only on the output of word expansions,
+dnl not every word. This closes a longstanding sh security hole.
+ ac_dummy="$PATH:/usr/5bin"
+ for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ echo "trying $ac_dir/m4" 1>&AC_FD_CC
+ gmp_tmp_val="`($ac_dir/m4 conftest.m4) 2>&AC_FD_CC`"
+ echo "$gmp_tmp_val" 1>&AC_FD_CC
+ if test "$gmp_tmp_val" = good; then
+ gmp_cv_prog_m4="$ac_dir/m4"
+ break
+ fi
+ done
+ IFS="$ac_save_ifs"
+ if test -z "$gmp_cv_prog_m4"; then
+ AC_MSG_ERROR([No usable m4 in \$PATH or /usr/5bin (see config.log for reasons).])
+ fi
+ fi
+ rm -f conftest.m4
+fi])
+M4="$gmp_cv_prog_m4"
+AC_SUBST(M4)
+])
+
+
+dnl GMP_PROG_CC_FIND([CC_LIST], [REQ_64BIT_CC])
+dnl Find first working compiler in CC_LIST.
+dnl If REQ_64BIT_CC is "yes", the compiler is required to be able to
+dnl produce 64-bit code.
+dnl NOTE: If a compiler needs any special flags for producing 64-bit code,
+dnl these have to be found in shell variable `gmp_cflags64_{cc}', where `{cc}'
+dnl is the name of the compiler.
+dnl Set CC to the name of the first working compiler.
+dnl If a 64-bit compiler is found, set CC64 to the name of the compiler and
+dnl CFLAGS64 to flags to use.
+dnl This macro does not test if any of the compilers found is a GNU compiler.
+dnl To do this, when you have finally made up your mind on which one to use,
+dnl and set CC accordingly, invoke [GMP_PROG_CC_SELECT]. That macro will
+dnl also make sure that your selection of CFLAGS is valid.
+dnl
+AC_DEFUN(GMP_PROG_CC_FIND,
+[AC_BEFORE([$0], [CC_PROG_CPP])
+ifelse([$1], , gmp_cc_list="gcc cc", gmp_cc_list="[$1]")
+ifelse([$2], , gmp_req_64bit_cc="no", gmp_req_64bit_cc="[$2]")
+
+CC32=
+CC64=
+for c in $gmp_cc_list; do
+ # Avoid cache hits.
+ unset CC
+ unset ac_cv_prog_CC
+ AC_CHECK_TOOL(CC, $c, $c)
+ if test -n "$CC"; then
+ eval c_flags=\$gmp_cflags_$c
+ GMP_PROG_CC_WORKS($CC, $c_flags,
+ gmp_prog_cc_works=yes,
+ gmp_prog_cc_works=no)
+
+ if test "$gmp_prog_cc_works" != "yes"; then
+ continue
+ fi
+
+ # Save first working compiler, whether 32- or 64-bit capable.
+ if test -z "$CC32"; then
+ CC32="$CC"
+ fi
+ if test "$gmp_req_64bit_cc" = "yes"; then
+ eval c_flags=\$gmp_cflags64_$c
+
+ # Verify that the compiler works in 64-bit mode as well.
+ # /usr/ucb/cc on Solaris 7 can *compile* in 64-bit mode, but not link.
+ GMP_PROG_CC_WORKS($c, $c_flags,
+ gmp_prog_cc_works=yes,
+ gmp_prog_cc_works=no)
+
+ if test "$gmp_prog_cc_works" = "yes"; then
+ GMP_CHECK_CC_64BIT($c, $c_flags)
+ if test "$gmp_cv_cc_64bit" = "yes"; then
+ test -z "$CC64" && CC64="$c"
+ test -z "$CFLAGS64" && CFLAGS64="$c_flags"
+ # We have CC64 so we're done.
+ break
+ fi
+ fi
+ else
+ # We have CC32, and we don't need a 64-bit compiler so we're done.
+ break
+ fi
+ fi
+done
+CC="$CC32"
+])dnl
+
+dnl GMP_PROG_CC_SELECT
+dnl Check that `CC' works with `CFLAGS'. Check if `CC' is a GNU compiler.
+dnl Cache the result as `ac_cv_prog_CC'.
+AC_DEFUN(GMP_PROG_CC_SELECT,
+[AC_BEFORE([$0], [CC_PROG_CPP])
+AC_PROG_CC_WORKS
+AC_PROG_CC_GNU
+
+if test "$ac_cv_prog_gcc" = "yes"; then
+ GCC=yes
+else
+ GCC=
+fi
+
+# Set CFLAGS if not already set.
+if test -z "$CFLAGS"; then
+ CFLAGS="-g"
+ if test "$GCC" = "yes"; then
+ CFLAGS="$CFLAGS -O2"
+ fi
+fi
+
+AC_SUBST(CC)
+AC_CACHE_VAL(ac_cv_prog_CC, ac_cv_prog_CC="$CC")
+AC_PROVIDE([AC_PROG_CC])
+])dnl
+
+dnl GMP_CHECK_CC_64BIT(cc, cflags64)
+dnl Find out if `CC' can produce 64-bit code.
+dnl Requires NM to be set to nm for target.
+dnl FIXME: Cache result.
+AC_DEFUN(GMP_CHECK_CC_64BIT,
+[
+ gmp_tmp_CC_save="$CC"
+ CC="[$1]"
+ AC_MSG_CHECKING([whether the C compiler ($CC) is 64-bit capable])
+ if test -z "$NM"; then
+ echo; echo ["configure: $0: fatal: need nm"]
+ exit 1
+ fi
+ gmp_tmp_CFLAGS_save="$CFLAGS"
+ CFLAGS="[$2]"
+
+ case "$target" in
+ hppa2.0*-*-*)
+ # FIXME: If gcc is installed under another name than "gcc", we will
+ # test the wrong thing.
+ if test "$CC" != "gcc"; then
+ dnl Let compiler version A.10.32.30 or higher be ok.
+ dnl Bad compiler output:
+ dnl ccom: HP92453-01 G.10.32.05 HP C Compiler
+ dnl Good compiler output:
+ dnl ccom: HP92453-01 A.10.32.30 HP C Compiler
+ echo >conftest.c
+ gmp_tmp_vs=`$CC $CFLAGS -V -c -o conftest.o conftest.c 2>&1 | grep "^ccom:"`
+ rm conftest*
+ gmp_tmp_v1=`echo $gmp_tmp_vs | sed 's/.* .\.\(.*\)\..*\..* HP C.*/\1/'`
+ gmp_tmp_v2=`echo $gmp_tmp_vs | sed 's/.* .\..*\.\(.*\)\..* HP C.*/\1/'`
+ gmp_tmp_v3=`echo $gmp_tmp_vs | sed 's/.* .\..*\..*\.\(.*\) HP C.*/\1/'`
+ gmp_cv_cc_64bit=no
+ test -n "$gmp_tmp_v1" && test "$gmp_tmp_v1" -ge "10" \
+ && test -n "$gmp_tmp_v2" && test "$gmp_tmp_v2" -ge "32" \
+ && test -n "$gmp_tmp_v3" && test "$gmp_tmp_v3" -ge "30" \
+ && gmp_cv_cc_64bit=yes
+ else # gcc
+ # FIXME: Compile a minimal file and determine if the resulting object
+ # file is an ELF file. If so, gcc can produce 64-bit code.
+ # Do we have file(1) for target?
+ gmp_cv_cc_64bit=no
+ fi
+ ;;
+ mips-sgi-irix6.*)
+ # We use `-n32' to cc and `-mabi=n32' to gcc, resulting in 64-bit
+ # arithmetic but not 64-bit pointers, so the general test for sizeof
+ # (void *) is not valid.
+ # Simply try to compile an empty main. If that succeeds return
+ # true.
+ AC_TRY_COMPILE( , ,
+ gmp_cv_cc_64bit=yes, gmp_cv_cc_64bit=no,
+ gmp_cv_cc_64bit=no)
+ ;;
+ *-*-*)
+ # Allocate an array of size sizeof (void *) and use nm to determine its
+ # size. We depend on the first declared variable being put at address 0.
+ cat >conftest.c <<EOF
+[char arr[sizeof (void *)]={0};
+char post=0;]
+EOF
+ gmp_compile="$CC $CFLAGS -c conftest.c 1>&AC_FD_CC"
+ if AC_TRY_EVAL(gmp_compile); then
+ changequote(<,>)dnl
+ gmp_tmp_val=`$NM conftest.o | grep post | sed -e 's;[[][0-9][]]\(.*\);\1;' \
+ -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+ changequote([, ])dnl
+ if test "$gmp_tmp_val" = "8"; then
+ gmp_cv_cc_64bit=yes
+ else
+ gmp_cv_cc_64bit=no
+ fi
+ else
+ echo "configure: failed program was:" >&AC_FD_CC
+ cat conftest.$ac_ext >&AC_FD_CC
+ gmp_cv_cc_64bit=no
+ fi
+ rm -f conftest*
+ ;;
+ esac
+
+ CC="$gmp_tmp_CC_save"
+ CFLAGS="$gmp_tmp_CFLAGS_save"
+ AC_MSG_RESULT($gmp_cv_cc_64bit)
+])dnl
+
+dnl GMP_INIT([M4-DEF-FILE])
+dnl
+AC_DEFUN(GMP_INIT,
+[ifelse([$1], , gmp_configm4=config.m4, gmp_configm4="[$1]")
+gmp_tmpconfigm4=cnfm4.tmp
+gmp_tmpconfigm4i=cnfm4i.tmp
+gmp_tmpconfigm4p=cnfm4p.tmp
+test -f $gmp_tmpconfigm4 && rm $gmp_tmpconfigm4
+test -f $gmp_tmpconfigm4i && rm $gmp_tmpconfigm4i
+test -f $gmp_tmpconfigm4p && rm $gmp_tmpconfigm4p
+])dnl
+
+dnl GMP_FINISH
+dnl ----------
+dnl Create config.m4 from its accumulated parts.
+dnl
+dnl __CONFIG_M4_INCLUDED__ is used so that a second or subsequent include
+dnl of config.m4 is harmless.
+dnl
+dnl A separate ifdef on the angle bracket quoted part ensures the quoting
+dnl style there is respected. The basic defines from gmp_tmpconfigm4 are
+dnl fully quoted but are still put under an ifdef in case any have been
+dnl redefined by one of the m4 include files.
+dnl
+dnl Doing a big ifdef within asm-defs.m4 and/or other macro files wouldn't
+dnl work, since it'd interpret parentheses and quotes in dnl comments, and
+dnl having a whole file as a macro argument would overflow the string space
+dnl on BSD m4.
+
+AC_DEFUN(GMP_FINISH,
+[AC_REQUIRE([GMP_INIT])
+echo "creating $gmp_configm4"
+echo ["dnl $gmp_configm4. Generated automatically by configure."] > $gmp_configm4
+if test -f $gmp_tmpconfigm4; then
+ echo ["changequote(<,>)dnl"] >> $gmp_configm4
+ echo ["ifdef(<__CONFIG_M4_INCLUDED__>,,<"] >> $gmp_configm4
+ cat $gmp_tmpconfigm4 >> $gmp_configm4
+ echo [">)"] >> $gmp_configm4
+ echo ["changequote(\`,')dnl"] >> $gmp_configm4
+ rm $gmp_tmpconfigm4
+fi
+echo ["ifdef(\`__CONFIG_M4_INCLUDED__',,\`"] >> $gmp_configm4
+if test -f $gmp_tmpconfigm4i; then
+ cat $gmp_tmpconfigm4i >> $gmp_configm4
+ rm $gmp_tmpconfigm4i
+fi
+if test -f $gmp_tmpconfigm4p; then
+ cat $gmp_tmpconfigm4p >> $gmp_configm4
+ rm $gmp_tmpconfigm4p
+fi
+echo ["')"] >> $gmp_configm4
+echo ["define(\`__CONFIG_M4_INCLUDED__')"] >> $gmp_configm4
+])dnl
+
+dnl GMP_INCLUDE(FILE)
+AC_DEFUN(GMP_INCLUDE,
+[AC_REQUIRE([GMP_INIT])
+echo ["include(\`$1')"] >> $gmp_tmpconfigm4i
+])dnl
+
+dnl GMP_SINCLUDE(FILE)
+AC_DEFUN(GMP_SINCLUDE,
+[AC_REQUIRE([GMP_INIT])
+echo ["sinclude(\`$1')"] >> $gmp_tmpconfigm4i
+])dnl
+
+dnl GMP_DEFINE(MACRO, DEFINITION [, LOCATION])
+dnl [ Define M4 macro MACRO as DEFINITION in temporary file. ]
+dnl [ If LOCATION is `POST', the definition will appear after any ]
+dnl [ include() directives inserted by GMP_INCLUDE/GMP_SINCLUDE. ]
+dnl [ Mind the quoting! No shell variables will get expanded. ]
+dnl [ Don't forget to invoke GMP_FINISH to create file config.m4. ]
+dnl [ config.m4 uses `<' and '>' as quote characters for all defines. ]
+AC_DEFUN(GMP_DEFINE,
+[AC_REQUIRE([GMP_INIT])
+echo ['define(<$1>, <$2>)'] >> ifelse([$3], [POST], $gmp_tmpconfigm4p, $gmp_tmpconfigm4)
+])dnl
+
+dnl GMP_DEFINE_RAW(STRING, [, LOCATION])
+dnl [ Put STRING in temporary file. ]
+dnl [ If LOCATION is `POST', the definition will appear after any ]
+dnl [ include() directives inserted by GMP_INCLUDE/GMP_SINCLUDE. ]
+dnl [ Don't forget to invoke GMP_FINISH to create file config.m4. ]
+AC_DEFUN(GMP_DEFINE_RAW,
+[AC_REQUIRE([GMP_INIT])
+echo [$1] >> ifelse([$2], [POST], $gmp_tmpconfigm4p, $gmp_tmpconfigm4)
+])dnl
+
+dnl GMP_CHECK_ASM_LABEL_SUFFIX
+dnl Should a label have a colon or not?
+AC_DEFUN(GMP_CHECK_ASM_LABEL_SUFFIX,
+[AC_CACHE_CHECK([what assembly label suffix to use],
+ gmp_cv_check_asm_label_suffix,
+[case "$target" in
+ *-*-hpux*) gmp_cv_check_asm_label_suffix=[""] ;;
+ *) gmp_cv_check_asm_label_suffix=[":"] ;;
+esac
+])
+echo ["define(<LABEL_SUFFIX>, <\$][1$gmp_cv_check_asm_label_suffix>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_UNDERSCORE([ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]])
+dnl Shamelessly borrowed from glibc.
+AC_DEFUN(GMP_CHECK_ASM_UNDERSCORE,
+[AC_CACHE_CHECK([if symbols are prefixed by underscore],
+ gmp_cv_check_asm_underscore,
+[cat > conftest.$ac_ext <<EOF
+dnl This sometimes fails to find confdefs.h, for some reason.
+dnl [#]line __oline__ "[$]0"
+[#]line __oline__ "configure"
+#include "confdefs.h"
+int underscore_test() {
+return; }
+EOF
+if AC_TRY_EVAL(ac_compile); then
+ if grep _underscore_test conftest* >/dev/null; then
+ gmp_cv_check_asm_underscore=yes
+ else
+ gmp_cv_check_asm_underscore=no
+ fi
+else
+ echo "configure: failed program was:" >&AC_FD_CC
+ cat conftest.$ac_ext >&AC_FD_CC
+fi
+rm -f conftest*
+])
+if test "$gmp_cv_check_asm_underscore" = "yes"; then
+ GMP_DEFINE(GSYM_PREFIX, [_])
+ ifelse([$1], , :, [$1])
+else
+ GMP_DEFINE(GSYM_PREFIX, [])
+ ifelse([$2], , :, [$2])
+fi
+])dnl
+
+dnl GMP_CHECK_ASM_ALIGN_LOG([ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]])
+dnl Is parameter to `.align' logarithmic?
+dnl Requires NM to be set to nm for target.
+AC_DEFUN(GMP_CHECK_ASM_ALIGN_LOG,
+[AC_REQUIRE([GMP_CHECK_ASM_GLOBL])
+AC_REQUIRE([GMP_CHECK_ASM_DATA])
+AC_REQUIRE([GMP_CHECK_ASM_LABEL_SUFFIX])
+AC_CACHE_CHECK([if .align assembly directive is logarithmic],
+ gmp_cv_check_asm_align_log,
+[if test -z "$NM"; then
+ echo; echo ["configure: $0: fatal: need nm"]
+ exit 1
+fi
+cat > conftest.s <<EOF
+ $gmp_cv_check_asm_data
+ .align 4
+ $gmp_cv_check_asm_globl foo
+ .byte 1
+ .align 4
+foo$gmp_cv_check_asm_label_suffix
+ .byte 2
+EOF
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+if AC_TRY_EVAL(ac_assemble); then
+ changequote(<,>)
+ gmp_tmp_val=`$NM conftest.o | grep foo | sed -e 's;[[][0-9][]]\(.*\);\1;' \
+ -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+ changequote([, ])dnl
+ if test "$gmp_tmp_val" = "10" || test "$gmp_tmp_val" = "16"; then
+ gmp_cv_check_asm_align_log=yes
+ else
+ gmp_cv_check_asm_align_log=no
+ fi
+else
+ echo "configure: failed program was:" >&AC_FD_CC
+ cat conftest.s >&AC_FD_CC
+fi
+rm -f conftest*
+])
+GMP_DEFINE_RAW(["define(<ALIGN_LOGARITHMIC>,<$gmp_cv_check_asm_align_log>)"])
+if test "$gmp_cv_check_asm_align_log" = "yes"; then
+ ifelse([$1], , :, [$1])
+else
+ ifelse([$2], , :, [$2])
+fi
+])dnl
+
+
+dnl GMP_CHECK_ASM_ALIGN_FILL_0x90
+dnl -----------------------------
+dnl Determine whether a ",0x90" suffix works on a .align directive.
+dnl This is only meant for use on x86, where 0x90 is a "nop".
+dnl
+dnl Old gas, eg. 1.92.3 - needs ",0x90" or else the fill is an invalid 0x00.
+dnl New gas, eg. 2.91 - generates the good multibyte nop fills even when
+dnl ",0x90" is given.
+dnl Solaris 2.6 as - doesn't allow ",0x90", gives a fatal error.
+dnl Solaris 2.8 as - gives a warning for ",0x90", no ill effect.
+dnl
+dnl Note that both solaris "as"s only care about ",0x90" if they actually
+dnl have to use it to fill something, hence the .byte in the sample. It's
+dnl only the second .align that provokes an error or warning.
+dnl
+dnl We prefer to suppress the warning from solaris 2.8 to stop anyone
+dnl worrying something might be wrong.
+
+AC_DEFUN(GMP_CHECK_ASM_ALIGN_FILL_0x90,
+[AC_CACHE_CHECK([if the .align directive accepts an 0x90 fill in .text],
+ gmp_cv_check_asm_align_fill_0x90,
+[AC_REQUIRE([GMP_CHECK_ASM_TEXT])
+cat > conftest.s <<EOF
+ $gmp_cv_check_asm_text
+ .align 4, 0x90
+ .byte 0
+ .align 4, 0x90
+EOF
+gmp_tmp_val="`$CCAS $CFLAGS conftest.s 2>&1`"
+if test $? = 0; then
+ echo "$gmp_tmp_val" 1>&AC_FD_CC
+ if echo "$gmp_tmp_val" | grep "Warning: Fill parameter ignored for executable section"; then
+ echo "Supressing this warning by omitting 0x90" 1>&AC_FD_CC
+ gmp_cv_check_asm_align_fill_0x90=no
+ else
+ gmp_cv_check_asm_align_fill_0x90=yes
+ fi
+else
+ echo "Non-zero exit code" 1>&AC_FD_CC
+ echo "$gmp_tmp_val" 1>&AC_FD_CC
+ gmp_cv_check_asm_align_fill_0x90=no
+fi
+rm -f conftest*
+])
+GMP_DEFINE_RAW(
+["define(<ALIGN_FILL_0x90>,<$gmp_cv_check_asm_align_fill_0x90>)"])
+])
+
+
+dnl GMP_CHECK_ASM_TEXT
+AC_DEFUN(GMP_CHECK_ASM_TEXT,
+[AC_CACHE_CHECK([how to switch to text section], gmp_cv_check_asm_text,
+[case "$target" in
+ *-*-aix*)
+ changequote({, })
+ gmp_cv_check_asm_text={".csect .text[PR]"}
+ changequote([, ])
+ ;;
+ *-*-hpux*) gmp_cv_check_asm_text=[".code"] ;;
+ *) gmp_cv_check_asm_text=[".text"] ;;
+esac
+])
+echo ["define(<TEXT>, <$gmp_cv_check_asm_text>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_DATA
+dnl Can we say `.data'?
+AC_DEFUN(GMP_CHECK_ASM_DATA,
+[AC_CACHE_CHECK([how to switch to data section], gmp_cv_check_asm_data,
+[case "$target" in
+ *-*-aix*)
+ changequote({, })
+ gmp_cv_check_asm_data={".csect .data[RW]"}
+ changequote([, ])
+ ;;
+ *) gmp_cv_check_asm_data=[".data"] ;;
+esac
+])
+echo ["define(<DATA>, <$gmp_cv_check_asm_data>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_GLOBL
+dnl Can we say `.global'?
+AC_DEFUN(GMP_CHECK_ASM_GLOBL,
+[AC_CACHE_CHECK([how to export a symbol], gmp_cv_check_asm_globl,
+[case "$target" in
+ *-*-hpux*) gmp_cv_check_asm_globl=[".export"] ;;
+ *) gmp_cv_check_asm_globl=[".globl"] ;;
+esac
+])
+echo ["define(<GLOBL>, <$gmp_cv_check_asm_globl>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_TYPE
+dnl Can we say `.type'?
+AC_DEFUN(GMP_CHECK_ASM_TYPE,
+[AC_CACHE_CHECK([how the .type assembly directive should be used],
+gmp_cv_check_asm_type,
+[ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+for gmp_tmp_prefix in @ \# %; do
+ echo " .type sym,${gmp_tmp_prefix}function" > conftest.s
+ if AC_TRY_EVAL(ac_assemble); then
+ gmp_cv_check_asm_type="[.type \$][1,${gmp_tmp_prefix}\$][2]"
+ break
+ fi
+done
+if test -z "$gmp_cv_check_asm_type"; then
+ gmp_cv_check_asm_type="[dnl]"
+fi
+])
+echo ["define(<TYPE>, <$gmp_cv_check_asm_type>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_SIZE
+dnl Can we say `.size'?
+AC_DEFUN(GMP_CHECK_ASM_SIZE,
+[AC_CACHE_CHECK([if the .size assembly directive works], gmp_cv_check_asm_size,
+[ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+echo ' .size sym,1' > conftest.s
+if AC_TRY_EVAL(ac_assemble); then
+ gmp_cv_check_asm_size="[.size \$][1,\$][2]"
+else
+ gmp_cv_check_asm_size="[dnl]"
+fi
+])
+echo ["define(<SIZE>, <$gmp_cv_check_asm_size>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_LSYM_PREFIX
+dnl What is the prefix for a local label?
+dnl Requires NM to be set to nm for target.
+AC_DEFUN(GMP_CHECK_ASM_LSYM_PREFIX,
+[AC_REQUIRE([GMP_CHECK_ASM_LABEL_SUFFIX])
+AC_CACHE_CHECK([what prefix to use for a local label],
+gmp_cv_check_asm_lsym_prefix,
+[if test -z "$NM"; then
+ echo; echo ["$0: fatal: need nm"]
+ exit 1
+fi
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+gmp_cv_check_asm_lsym_prefix="L"
+for gmp_tmp_pre in L .L $ L$; do
+ cat > conftest.s <<EOF
+dummy${gmp_cv_check_asm_label_suffix}
+${gmp_tmp_pre}gurkmacka${gmp_cv_check_asm_label_suffix}
+ .byte 0
+EOF
+ if AC_TRY_EVAL(ac_assemble); then
+ $NM conftest.o >/dev/null 2>&1
+ gmp_rc=$?
+ if test "$gmp_rc" != "0"; then
+ echo "configure: $NM failure, using default"
+ break
+ fi
+ if $NM conftest.o | grep gurkmacka >/dev/null; then true; else
+ gmp_cv_check_asm_lsym_prefix="$gmp_tmp_pre"
+ break
+ fi
+ else
+ echo "configure: failed program was:" >&AC_FD_CC
+ cat conftest.s >&AC_FD_CC
+ # Use default.
+ fi
+done
+rm -f conftest*
+])
+echo ["define(<LSYM_PREFIX>, <${gmp_cv_check_asm_lsym_prefix}>)"] >> $gmp_tmpconfigm4
+])
+
+dnl GMP_CHECK_ASM_W32
+dnl How to [define] a 32-bit word.
+dnl Requires NM to be set to nm for target.
+AC_DEFUN(GMP_CHECK_ASM_W32,
+[AC_REQUIRE([GMP_CHECK_ASM_DATA])
+AC_REQUIRE([GMP_CHECK_ASM_GLOBL])
+AC_REQUIRE([GMP_CHECK_ASM_LABEL_SUFFIX])
+AC_CACHE_CHECK([how to [define] a 32-bit word],
+ gmp_cv_check_asm_w32,
+[if test -z "$NM"; then
+ echo; echo ["configure: $0: fatal: need nm"]
+ exit 1
+fi
+
+# FIXME: HPUX puts first symbol at 0x40000000, breaking our assumption
+# that it's at 0x0. We'll have to declare another symbol before the
+# .long/.word and look at the distance between the two symbols. The
+# only problem is that the sed expression(s) barfs (on Solaris, for
+# example) for the symbol with value 0. For now, HPUX uses .word.
+
+case "$target" in
+ *-*-hpux*)
+ gmp_cv_check_asm_w32=".word"
+ ;;
+ *-*-*)
+ ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+ for gmp_tmp_op in .long .word; do
+ cat > conftest.s <<EOF
+ $gmp_cv_check_asm_data
+ $gmp_cv_check_asm_globl foo
+ $gmp_tmp_op 0
+foo${gmp_cv_check_asm_label_suffix}
+ .byte 0
+EOF
+ if AC_TRY_EVAL(ac_assemble); then
+ changequote(<,>)
+ gmp_tmp_val=`$NM conftest.o | grep foo | sed -e 's;[[][0-9][]]\(.*\);\1;' \
+ -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+ changequote([, ])dnl
+ if test "$gmp_tmp_val" = "4"; then
+ gmp_cv_check_asm_w32="$gmp_tmp_op"
+ break
+ fi
+ fi
+ done
+ ;;
+esac
+
+if test -z "$gmp_cv_check_asm_w32"; then
+ echo; echo ["configure: $0: fatal: do not know how to define a 32-bit word"]
+ exit 1
+fi
+rm -f conftest*
+])
+echo ["define(<W32>, <$gmp_cv_check_asm_w32>)"] >> $gmp_tmpconfigm4
+])
+
+dnl GMP_CHECK_ASM_MMX([ACTION-IF-FOUND, [ACTION-IF-NOT-FOUND]])
+dnl Can we assemble MMX insns?
+AC_DEFUN(GMP_CHECK_ASM_MMX,
+[AC_REQUIRE([GMP_CHECK_ASM_TEXT])
+AC_CACHE_CHECK([if the assembler knows about MMX instructions],
+ gmp_cv_check_asm_mmx,
+[cat > conftest.s <<EOF
+ $gmp_cv_check_asm_text
+ por %mm0, %mm0
+EOF
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+if AC_TRY_EVAL(ac_assemble); then
+ gmp_cv_check_asm_mmx=yes
+else
+ gmp_cv_check_asm_mmx=no
+fi
+rm -f conftest*
+])
+if test "$gmp_cv_check_asm_mmx" = "yes"; then
+ ifelse([$1], , :, [$1])
+else
+ AC_MSG_WARN([+----------------------------------------------------------])
+ AC_MSG_WARN([| WARNING WARNING WARNING])
+ AC_MSG_WARN([| Target CPU has MMX code, but it can't be assembled by])
+ AC_MSG_WARN([| $CCAS $CFLAGS])
+ AC_MSG_WARN([| Non-MMX replacements will be used.])
+ AC_MSG_WARN([| This will be an inferior build.])
+ AC_MSG_WARN([+----------------------------------------------------------])
+ ifelse([$2], , :, [$2])
+fi
+])dnl
+
+dnl GMP_CHECK_ASM_SHLDL_CL([ACTION-IF-FOUND, [ACTION-IF-NOT-FOUND]])
+AC_DEFUN(GMP_CHECK_ASM_SHLDL_CL,
+[AC_REQUIRE([GMP_CHECK_ASM_TEXT])
+AC_CACHE_CHECK([if the assembler takes cl with shldl],
+ gmp_cv_check_asm_shldl_cl,
+[cat > conftest.s <<EOF
+ $gmp_cv_check_asm_text
+ shldl %cl, %eax, %ebx
+EOF
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+if AC_TRY_EVAL(ac_assemble); then
+ gmp_cv_check_asm_shldl_cl=yes
+else
+ gmp_cv_check_asm_shldl_cl=no
+fi
+rm -f conftest*
+])
+if test "$gmp_cv_check_asm_shldl_cl" = "yes"; then
+ ifelse([$1], , :, [$1])
+else
+ ifelse([$2], , :, [$2])
+fi
+])dnl
+
+dnl GMP_PROG_CC_WORKS(CC, CFLAGS, ACTION-IF-WORKS, [ACTION-IF-NOT-WORKS])
+dnl Check if CC can compile and link. Perform various target specific tests.
+dnl FIXME: Require `$target'.
+AC_DEFUN(GMP_PROG_CC_WORKS,
+[AC_LANG_C dnl Note: Destructive.
+CC="[$1]"
+CFLAGS="[$2]"
+AC_MSG_CHECKING([if the C compiler ($CC) works with flags $CFLAGS])
+
+# Simple test for all targets.
+AC_TRY_COMPILER([int main(){return(0);}],
+ tmp_works, tmp_cross)
+
+# Target specific tests.
+if test "$tmp_works" = "yes"; then
+ case "$target" in
+ *-*-aix*) # Returning a funcptr.
+ AC_TRY_COMPILE( , [} void *g(); void *f() { return g(); } int bar(){],
+ tmp_works=yes, tmp_works=no)
+ ;;
+ esac
+fi
+
+if test "$tmp_works" = "yes"; then
+ [$3]
+else
+ ifelse([$4], , :, [$4])
+fi
+
+AC_MSG_RESULT($tmp_works)
+])dnl
+
+
+dnl GMP_C_ANSI2KNR
+dnl --------------
+dnl Setup to use ansi2knr if necessary.
+dnl
+dnl The test here is simply that if an ANSI style function works then
+dnl ansi2knr isn't needed. The normal tests for whether $CC works mean we
+dnl don't need to worry here about anything badly broken.
+dnl
+dnl AM_C_PROTOTYPES is the normal way to set up ansi2knr, but (in automake
+dnl March 2000) it gives the wrong answer on a C++ compiler because its
+dnl test requires that the compiler accept both ANSI and K&R, or otherwise
+dnl ansi2knr is used. A C++ compiler fails on the K&R part, which makes
+dnl AM_C_PROTOTYPES think it needs ansi2knr! GMP has no bare K&R so we
+dnl only need ANSI or K&R to work, not both.
+
+AC_DEFUN(GMP_C_ANSI2KNR,
+[AC_CACHE_CHECK([if ansi2knr should be used],
+ gmp_cv_c_ansi2knr,
+[cat >conftest.c <<EOF
+int main (int argc, char *argv[]) { return 0; }
+EOF
+if AC_TRY_EVAL(ac_compile); then
+ gmp_cv_c_ansi2knr=no
+else
+ gmp_cv_c_ansi2knr=yes
+fi
+rm -f conftest.*
+])
+if test $gmp_cv_c_ansi2knr = no; then
+ U= ANSI2KNR=
+else
+ U=_ ANSI2KNR=./ansi2knr
+ # Ensure some checks needed by ansi2knr itself.
+ AC_HEADER_STDC
+ AC_CHECK_HEADERS(string.h)
+fi
+AC_SUBST(U)
+AC_SUBST(ANSI2KNR)
+])
+
+
+dnl Deal with bad synchronization of Autoconf with Libtool.
+AC_DEFUN(AC_CANONICAL_BUILD, [_AC_CANONICAL_BUILD])
+AC_DEFUN(AC_CHECK_TOOL_PREFIX, [_AC_CHECK_TOOL_PREFIX])
diff --git a/rts/gmp/aclocal.m4 b/rts/gmp/aclocal.m4
new file mode 100644
index 0000000000..086c77915c
--- /dev/null
+++ b/rts/gmp/aclocal.m4
@@ -0,0 +1,1963 @@
+dnl aclocal.m4 generated automatically by aclocal 1.4a
+
+dnl Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+dnl This program is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+dnl even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+dnl PARTICULAR PURPOSE.
+
+dnl GMP specific autoconf macros
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl GMP_HEADER_GETVAL(NAME,FILE)
+dnl ----------------------------
+dnl Expand to the value of a "#define NAME" from the given FILE.
+dnl The regexps here aren't very rugged, but are enough for gmp.
+dnl /dev/null as a parameter prevents a hang if $2 is accidentally omitted.
+
+define(GMP_HEADER_GETVAL,
+[patsubst(patsubst(
+esyscmd([grep "^#define $1 " $2 /dev/null 2>/dev/null]),
+[^.*$1[ ]+],[]),
+[[
+ ]*$],[])])
+
+
+dnl GMP_VERSION
+dnl -----------
+dnl The gmp version number, extracted from the #defines in gmp.h.
+dnl Two digits like 3.0 if patchlevel <= 0, or three digits like 3.0.1 if
+dnl patchlevel > 0.
+
+define(GMP_VERSION,
+[GMP_HEADER_GETVAL(__GNU_MP_VERSION,gmp.h)[]dnl
+.GMP_HEADER_GETVAL(__GNU_MP_VERSION_MINOR,gmp.h)[]dnl
+ifelse(m4_eval(GMP_HEADER_GETVAL(__GNU_MP_VERSION_PATCHLEVEL,gmp.h) > 0),1,
+[.GMP_HEADER_GETVAL(__GNU_MP_VERSION_PATCHLEVEL,gmp.h)])])
+
+
+dnl GMP_PROG_M4()
+dnl -------------
+dnl
+dnl Find a working m4, either in $PATH or likely locations, and setup $M4
+dnl and an AC_SUBST accordingly. If $M4 is already set then it's a user
+dnl choice and is accepted with no checks. GMP_PROG_M4 is like
+dnl AC_PATH_PROG or AC_CHECK_PROG, but it tests each m4 found to see if
+dnl it's good enough.
+dnl
+dnl See mpn/asm-defs.m4 for details on the known bad m4s.
+
+AC_DEFUN(GMP_PROG_M4,
+[AC_CACHE_CHECK([for suitable m4],
+ gmp_cv_prog_m4,
+[if test -n "$M4"; then
+ gmp_cv_prog_m4="$M4"
+else
+ cat >conftest.m4 <<\EOF
+dnl must protect this against being expanded during autoconf m4!
+[define(dollarhash,``$][#'')dnl
+ifelse(dollarhash(x),1,`define(t1,Y)',
+``bad: $][# not supported (SunOS /usr/bin/m4)
+'')dnl
+ifelse(eval(89),89,`define(t2,Y)',
+`bad: eval() doesnt support 8 or 9 in a constant (OpenBSD 2.6 m4)
+')dnl
+ifelse(t1`'t2,YY,`good
+')dnl]
+EOF
+ echo "trying m4" 1>&AC_FD_CC
+ gmp_tmp_val="`(m4 conftest.m4) 2>&AC_FD_CC`"
+ echo "$gmp_tmp_val" 1>&AC_FD_CC
+ if test "$gmp_tmp_val" = good; then
+ gmp_cv_prog_m4="m4"
+ else
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+dnl $ac_dummy forces splitting on constant user-supplied paths.
+dnl POSIX.2 word splitting is done only on the output of word expansions,
+dnl not every word. This closes a longstanding sh security hole.
+ ac_dummy="$PATH:/usr/5bin"
+ for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ echo "trying $ac_dir/m4" 1>&AC_FD_CC
+ gmp_tmp_val="`($ac_dir/m4 conftest.m4) 2>&AC_FD_CC`"
+ echo "$gmp_tmp_val" 1>&AC_FD_CC
+ if test "$gmp_tmp_val" = good; then
+ gmp_cv_prog_m4="$ac_dir/m4"
+ break
+ fi
+ done
+ IFS="$ac_save_ifs"
+ if test -z "$gmp_cv_prog_m4"; then
+ AC_MSG_ERROR([No usable m4 in \$PATH or /usr/5bin (see config.log for reasons).])
+ fi
+ fi
+ rm -f conftest.m4
+fi])
+M4="$gmp_cv_prog_m4"
+AC_SUBST(M4)
+])
+
+
+dnl GMP_PROG_CC_FIND([CC_LIST], [REQ_64BIT_CC])
+dnl Find first working compiler in CC_LIST.
+dnl If REQ_64BIT_CC is "yes", the compiler is required to be able to
+dnl produce 64-bit code.
+dnl NOTE: If a compiler needs any special flags for producing 64-bit code,
+dnl these have to be found in shell variable `gmp_cflags64_{cc}', where `{cc}'
+dnl is the name of the compiler.
+dnl Set CC to the name of the first working compiler.
+dnl If a 64-bit compiler is found, set CC64 to the name of the compiler and
+dnl CFLAGS64 to flags to use.
+dnl This macro does not test if any of the compilers found is a GNU compiler.
+dnl To do this, when you have finally made up your mind on which one to use,
+dnl and set CC accordingly, invoke [GMP_PROG_CC_SELECT]. That macro will
+dnl also make sure that your selection of CFLAGS is valid.
+dnl
+AC_DEFUN(GMP_PROG_CC_FIND,
+[AC_BEFORE([$0], [CC_PROG_CPP])
+ifelse([$1], , gmp_cc_list="gcc cc", gmp_cc_list="[$1]")
+ifelse([$2], , gmp_req_64bit_cc="no", gmp_req_64bit_cc="[$2]")
+
+CC32=
+CC64=
+for c in $gmp_cc_list; do
+ # Avoid cache hits.
+ unset CC
+ unset ac_cv_prog_CC
+ AC_CHECK_TOOL(CC, $c, $c)
+ if test -n "$CC"; then
+ eval c_flags=\$gmp_cflags_$c
+ GMP_PROG_CC_WORKS($CC, $c_flags,
+ gmp_prog_cc_works=yes,
+ gmp_prog_cc_works=no)
+
+ if test "$gmp_prog_cc_works" != "yes"; then
+ continue
+ fi
+
+ # Save first working compiler, whether 32- or 64-bit capable.
+ if test -z "$CC32"; then
+ CC32="$CC"
+ fi
+ if test "$gmp_req_64bit_cc" = "yes"; then
+ eval c_flags=\$gmp_cflags64_$c
+
+ # Verify that the compiler works in 64-bit mode as well.
+ # /usr/ucb/cc on Solaris 7 can *compile* in 64-bit mode, but not link.
+ GMP_PROG_CC_WORKS($c, $c_flags,
+ gmp_prog_cc_works=yes,
+ gmp_prog_cc_works=no)
+
+ if test "$gmp_prog_cc_works" = "yes"; then
+ GMP_CHECK_CC_64BIT($c, $c_flags)
+ if test "$gmp_cv_cc_64bit" = "yes"; then
+ test -z "$CC64" && CC64="$c"
+ test -z "$CFLAGS64" && CFLAGS64="$c_flags"
+ # We have CC64 so we're done.
+ break
+ fi
+ fi
+ else
+ # We have CC32, and we don't need a 64-bit compiler so we're done.
+ break
+ fi
+ fi
+done
+CC="$CC32"
+])dnl
+
+dnl GMP_PROG_CC_SELECT
+dnl Check that `CC' works with `CFLAGS'. Check if `CC' is a GNU compiler.
+dnl Cache the result as `ac_cv_prog_CC'.
+AC_DEFUN(GMP_PROG_CC_SELECT,
+[AC_BEFORE([$0], [CC_PROG_CPP])
+AC_PROG_CC_WORKS
+AC_PROG_CC_GNU
+
+if test "$ac_cv_prog_gcc" = "yes"; then
+ GCC=yes
+else
+ GCC=
+fi
+
+# Set CFLAGS if not already set.
+if test -z "$CFLAGS"; then
+ CFLAGS="-g"
+ if test "$GCC" = "yes"; then
+ CFLAGS="$CFLAGS -O2"
+ fi
+fi
+
+AC_SUBST(CC)
+AC_CACHE_VAL(ac_cv_prog_CC, ac_cv_prog_CC="$CC")
+AC_PROVIDE([AC_PROG_CC])
+])dnl
+
+dnl GMP_CHECK_CC_64BIT(cc, cflags64)
+dnl Find out if `CC' can produce 64-bit code.
+dnl Requires NM to be set to nm for target.
+dnl FIXME: Cache result.
+AC_DEFUN(GMP_CHECK_CC_64BIT,
+[
+ gmp_tmp_CC_save="$CC"
+ CC="[$1]"
+ AC_MSG_CHECKING([whether the C compiler ($CC) is 64-bit capable])
+ if test -z "$NM"; then
+ echo; echo ["configure: $0: fatal: need nm"]
+ exit 1
+ fi
+ gmp_tmp_CFLAGS_save="$CFLAGS"
+ CFLAGS="[$2]"
+
+ case "$target" in
+ hppa2.0*-*-*)
+ # FIXME: If gcc is installed under another name than "gcc", we will
+ # test the wrong thing.
+ if test "$CC" != "gcc"; then
+ dnl Let compiler version A.10.32.30 or higher be ok.
+ dnl Bad compiler output:
+ dnl ccom: HP92453-01 G.10.32.05 HP C Compiler
+ dnl Good compiler output:
+ dnl ccom: HP92453-01 A.10.32.30 HP C Compiler
+ echo >conftest.c
+ gmp_tmp_vs=`$CC $CFLAGS -V -c -o conftest.o conftest.c 2>&1 | grep "^ccom:"`
+ rm conftest*
+ gmp_tmp_v1=`echo $gmp_tmp_vs | sed 's/.* .\.\(.*\)\..*\..* HP C.*/\1/'`
+ gmp_tmp_v2=`echo $gmp_tmp_vs | sed 's/.* .\..*\.\(.*\)\..* HP C.*/\1/'`
+ gmp_tmp_v3=`echo $gmp_tmp_vs | sed 's/.* .\..*\..*\.\(.*\) HP C.*/\1/'`
+ gmp_cv_cc_64bit=no
+ test -n "$gmp_tmp_v1" && test "$gmp_tmp_v1" -ge "10" \
+ && test -n "$gmp_tmp_v2" && test "$gmp_tmp_v2" -ge "32" \
+ && test -n "$gmp_tmp_v3" && test "$gmp_tmp_v3" -ge "30" \
+ && gmp_cv_cc_64bit=yes
+ else # gcc
+ # FIXME: Compile a minimal file and determine if the resulting object
+ # file is an ELF file. If so, gcc can produce 64-bit code.
+ # Do we have file(1) for target?
+ gmp_cv_cc_64bit=no
+ fi
+ ;;
+ mips-sgi-irix6.*)
+ # We use `-n32' to cc and `-mabi=n32' to gcc, resulting in 64-bit
+ # arithmetic but not 64-bit pointers, so the general test for sizeof
+ # (void *) is not valid.
+ # Simply try to compile an empty main. If that succeeds return
+ # true.
+ AC_TRY_COMPILE( , ,
+ gmp_cv_cc_64bit=yes, gmp_cv_cc_64bit=no,
+ gmp_cv_cc_64bit=no)
+ ;;
+ *-*-*)
+ # Allocate an array of size sizeof (void *) and use nm to determine its
+ # size. We depend on the first declared variable being put at address 0.
+ cat >conftest.c <<EOF
+[char arr[sizeof (void *)]={0};
+char post=0;]
+EOF
+ gmp_compile="$CC $CFLAGS -c conftest.c 1>&AC_FD_CC"
+ if AC_TRY_EVAL(gmp_compile); then
+ changequote(<,>)dnl
+ gmp_tmp_val=`$NM conftest.o | grep post | sed -e 's;[[][0-9][]]\(.*\);\1;' \
+ -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+ changequote([, ])dnl
+ if test "$gmp_tmp_val" = "8"; then
+ gmp_cv_cc_64bit=yes
+ else
+ gmp_cv_cc_64bit=no
+ fi
+ else
+ echo "configure: failed program was:" >&AC_FD_CC
+ cat conftest.$ac_ext >&AC_FD_CC
+ gmp_cv_cc_64bit=no
+ fi
+ rm -f conftest*
+ ;;
+ esac
+
+ CC="$gmp_tmp_CC_save"
+ CFLAGS="$gmp_tmp_CFLAGS_save"
+ AC_MSG_RESULT($gmp_cv_cc_64bit)
+])dnl
+
+dnl GMP_INIT([M4-DEF-FILE])
+dnl
+AC_DEFUN(GMP_INIT,
+[ifelse([$1], , gmp_configm4=config.m4, gmp_configm4="[$1]")
+gmp_tmpconfigm4=cnfm4.tmp
+gmp_tmpconfigm4i=cnfm4i.tmp
+gmp_tmpconfigm4p=cnfm4p.tmp
+test -f $gmp_tmpconfigm4 && rm $gmp_tmpconfigm4
+test -f $gmp_tmpconfigm4i && rm $gmp_tmpconfigm4i
+test -f $gmp_tmpconfigm4p && rm $gmp_tmpconfigm4p
+])dnl
+
+dnl GMP_FINISH
+dnl ----------
+dnl Create config.m4 from its accumulated parts.
+dnl
+dnl __CONFIG_M4_INCLUDED__ is used so that a second or subsequent include
+dnl of config.m4 is harmless.
+dnl
+dnl A separate ifdef on the angle bracket quoted part ensures the quoting
+dnl style there is respected. The basic defines from gmp_tmpconfigm4 are
+dnl fully quoted but are still put under an ifdef in case any have been
+dnl redefined by one of the m4 include files.
+dnl
+dnl Doing a big ifdef within asm-defs.m4 and/or other macro files wouldn't
+dnl work, since it'd interpret parentheses and quotes in dnl comments, and
+dnl having a whole file as a macro argument would overflow the string space
+dnl on BSD m4.
+
+AC_DEFUN(GMP_FINISH,
+[AC_REQUIRE([GMP_INIT])
+echo "creating $gmp_configm4"
+echo ["dnl $gmp_configm4. Generated automatically by configure."] > $gmp_configm4
+if test -f $gmp_tmpconfigm4; then
+ echo ["changequote(<,>)dnl"] >> $gmp_configm4
+ echo ["ifdef(<__CONFIG_M4_INCLUDED__>,,<"] >> $gmp_configm4
+ cat $gmp_tmpconfigm4 >> $gmp_configm4
+ echo [">)"] >> $gmp_configm4
+ echo ["changequote(\`,')dnl"] >> $gmp_configm4
+ rm $gmp_tmpconfigm4
+fi
+echo ["ifdef(\`__CONFIG_M4_INCLUDED__',,\`"] >> $gmp_configm4
+if test -f $gmp_tmpconfigm4i; then
+ cat $gmp_tmpconfigm4i >> $gmp_configm4
+ rm $gmp_tmpconfigm4i
+fi
+if test -f $gmp_tmpconfigm4p; then
+ cat $gmp_tmpconfigm4p >> $gmp_configm4
+ rm $gmp_tmpconfigm4p
+fi
+echo ["')"] >> $gmp_configm4
+echo ["define(\`__CONFIG_M4_INCLUDED__')"] >> $gmp_configm4
+])dnl
+
+dnl GMP_INCLUDE(FILE)
+AC_DEFUN(GMP_INCLUDE,
+[AC_REQUIRE([GMP_INIT])
+echo ["include(\`$1')"] >> $gmp_tmpconfigm4i
+])dnl
+
+dnl GMP_SINCLUDE(FILE)
+AC_DEFUN(GMP_SINCLUDE,
+[AC_REQUIRE([GMP_INIT])
+echo ["sinclude(\`$1')"] >> $gmp_tmpconfigm4i
+])dnl
+
+dnl GMP_DEFINE(MACRO, DEFINITION [, LOCATION])
+dnl [ Define M4 macro MACRO as DEFINITION in temporary file. ]
+dnl [ If LOCATION is `POST', the definition will appear after any ]
+dnl [ include() directives inserted by GMP_INCLUDE/GMP_SINCLUDE. ]
+dnl [ Mind the quoting! No shell variables will get expanded. ]
+dnl [ Don't forget to invoke GMP_FINISH to create file config.m4. ]
+dnl [ config.m4 uses `<' and '>' as quote characters for all defines. ]
+AC_DEFUN(GMP_DEFINE,
+[AC_REQUIRE([GMP_INIT])
+echo ['define(<$1>, <$2>)'] >> ifelse([$3], [POST], $gmp_tmpconfigm4p, $gmp_tmpconfigm4)
+])dnl
+
+dnl GMP_DEFINE_RAW(STRING, [, LOCATION])
+dnl [ Put STRING in temporary file. ]
+dnl [ If LOCATION is `POST', the definition will appear after any ]
+dnl [ include() directives inserted by GMP_INCLUDE/GMP_SINCLUDE. ]
+dnl [ Don't forget to invoke GMP_FINISH to create file config.m4. ]
+AC_DEFUN(GMP_DEFINE_RAW,
+[AC_REQUIRE([GMP_INIT])
+echo [$1] >> ifelse([$2], [POST], $gmp_tmpconfigm4p, $gmp_tmpconfigm4)
+])dnl
+
+dnl GMP_CHECK_ASM_LABEL_SUFFIX
+dnl Should a label have a colon or not?
+AC_DEFUN(GMP_CHECK_ASM_LABEL_SUFFIX,
+[AC_CACHE_CHECK([what assembly label suffix to use],
+ gmp_cv_check_asm_label_suffix,
+[case "$target" in
+ *-*-hpux*) gmp_cv_check_asm_label_suffix=[""] ;;
+ *) gmp_cv_check_asm_label_suffix=[":"] ;;
+esac
+])
+echo ["define(<LABEL_SUFFIX>, <\$][1$gmp_cv_check_asm_label_suffix>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_UNDERSCORE([ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]])
+dnl Shamelessly borrowed from glibc.
+AC_DEFUN(GMP_CHECK_ASM_UNDERSCORE,
+[AC_CACHE_CHECK([if symbols are prefixed by underscore],
+ gmp_cv_check_asm_underscore,
+[cat > conftest.$ac_ext <<EOF
+dnl This sometimes fails to find confdefs.h, for some reason.
+dnl [#]line __oline__ "[$]0"
+[#]line __oline__ "configure"
+#include "confdefs.h"
+int underscore_test() {
+return; }
+EOF
+if AC_TRY_EVAL(ac_compile); then
+ if grep _underscore_test conftest* >/dev/null; then
+ gmp_cv_check_asm_underscore=yes
+ else
+ gmp_cv_check_asm_underscore=no
+ fi
+else
+ echo "configure: failed program was:" >&AC_FD_CC
+ cat conftest.$ac_ext >&AC_FD_CC
+fi
+rm -f conftest*
+])
+if test "$gmp_cv_check_asm_underscore" = "yes"; then
+ GMP_DEFINE(GSYM_PREFIX, [_])
+ ifelse([$1], , :, [$1])
+else
+ GMP_DEFINE(GSYM_PREFIX, [])
+ ifelse([$2], , :, [$2])
+fi
+])dnl
+
+dnl GMP_CHECK_ASM_ALIGN_LOG([ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND]])
+dnl Is parameter to `.align' logarithmic?
+dnl Requires NM to be set to nm for target.
+AC_DEFUN(GMP_CHECK_ASM_ALIGN_LOG,
+[AC_REQUIRE([GMP_CHECK_ASM_GLOBL])
+AC_REQUIRE([GMP_CHECK_ASM_DATA])
+AC_REQUIRE([GMP_CHECK_ASM_LABEL_SUFFIX])
+AC_CACHE_CHECK([if .align assembly directive is logarithmic],
+ gmp_cv_check_asm_align_log,
+[if test -z "$NM"; then
+ echo; echo ["configure: $0: fatal: need nm"]
+ exit 1
+fi
+cat > conftest.s <<EOF
+ $gmp_cv_check_asm_data
+ .align 4
+ $gmp_cv_check_asm_globl foo
+ .byte 1
+ .align 4
+foo$gmp_cv_check_asm_label_suffix
+ .byte 2
+EOF
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+if AC_TRY_EVAL(ac_assemble); then
+ changequote(<,>)
+ gmp_tmp_val=`$NM conftest.o | grep foo | sed -e 's;[[][0-9][]]\(.*\);\1;' \
+ -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+ changequote([, ])dnl
+ if test "$gmp_tmp_val" = "10" || test "$gmp_tmp_val" = "16"; then
+ gmp_cv_check_asm_align_log=yes
+ else
+ gmp_cv_check_asm_align_log=no
+ fi
+else
+ echo "configure: failed program was:" >&AC_FD_CC
+ cat conftest.s >&AC_FD_CC
+fi
+rm -f conftest*
+])
+GMP_DEFINE_RAW(["define(<ALIGN_LOGARITHMIC>,<$gmp_cv_check_asm_align_log>)"])
+if test "$gmp_cv_check_asm_align_log" = "yes"; then
+ ifelse([$1], , :, [$1])
+else
+ ifelse([$2], , :, [$2])
+fi
+])dnl
+
+
+dnl GMP_CHECK_ASM_ALIGN_FILL_0x90
+dnl -----------------------------
+dnl Determine whether a ",0x90" suffix works on a .align directive.
+dnl This is only meant for use on x86, where 0x90 is a "nop".
+dnl
+dnl Old gas, eg. 1.92.3 - needs ",0x90" or else the fill is an invalid 0x00.
+dnl New gas, eg. 2.91 - generates the good multibyte nop fills even when
+dnl ",0x90" is given.
+dnl Solaris 2.6 as - doesn't allow ",0x90", gives a fatal error.
+dnl Solaris 2.8 as - gives a warning for ",0x90", no ill effect.
+dnl
+dnl Note that both solaris "as"s only care about ",0x90" if they actually
+dnl have to use it to fill something, hence the .byte in the sample. It's
+dnl only the second .align that provokes an error or warning.
+dnl
+dnl We prefer to suppress the warning from solaris 2.8 to stop anyone
+dnl worrying something might be wrong.
+
+AC_DEFUN(GMP_CHECK_ASM_ALIGN_FILL_0x90,
+[AC_CACHE_CHECK([if the .align directive accepts an 0x90 fill in .text],
+ gmp_cv_check_asm_align_fill_0x90,
+[AC_REQUIRE([GMP_CHECK_ASM_TEXT])
+cat > conftest.s <<EOF
+ $gmp_cv_check_asm_text
+ .align 4, 0x90
+ .byte 0
+ .align 4, 0x90
+EOF
+gmp_tmp_val="`$CCAS $CFLAGS conftest.s 2>&1`"
+if test $? = 0; then
+ echo "$gmp_tmp_val" 1>&AC_FD_CC
+ if echo "$gmp_tmp_val" | grep "Warning: Fill parameter ignored for executable section"; then
+ echo "Supressing this warning by omitting 0x90" 1>&AC_FD_CC
+ gmp_cv_check_asm_align_fill_0x90=no
+ else
+ gmp_cv_check_asm_align_fill_0x90=yes
+ fi
+else
+ echo "Non-zero exit code" 1>&AC_FD_CC
+ echo "$gmp_tmp_val" 1>&AC_FD_CC
+ gmp_cv_check_asm_align_fill_0x90=no
+fi
+rm -f conftest*
+])
+GMP_DEFINE_RAW(
+["define(<ALIGN_FILL_0x90>,<$gmp_cv_check_asm_align_fill_0x90>)"])
+])
+
+
+dnl GMP_CHECK_ASM_TEXT
+AC_DEFUN(GMP_CHECK_ASM_TEXT,
+[AC_CACHE_CHECK([how to switch to text section], gmp_cv_check_asm_text,
+[case "$target" in
+ *-*-aix*)
+ changequote({, })
+ gmp_cv_check_asm_text={".csect .text[PR]"}
+ changequote([, ])
+ ;;
+ *-*-hpux*) gmp_cv_check_asm_text=[".code"] ;;
+ *) gmp_cv_check_asm_text=[".text"] ;;
+esac
+])
+echo ["define(<TEXT>, <$gmp_cv_check_asm_text>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_DATA
+dnl Can we say `.data'?
+AC_DEFUN(GMP_CHECK_ASM_DATA,
+[AC_CACHE_CHECK([how to switch to data section], gmp_cv_check_asm_data,
+[case "$target" in
+ *-*-aix*)
+ changequote({, })
+ gmp_cv_check_asm_data={".csect .data[RW]"}
+ changequote([, ])
+ ;;
+ *) gmp_cv_check_asm_data=[".data"] ;;
+esac
+])
+echo ["define(<DATA>, <$gmp_cv_check_asm_data>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_GLOBL
+dnl Can we say `.global'?
+AC_DEFUN(GMP_CHECK_ASM_GLOBL,
+[AC_CACHE_CHECK([how to export a symbol], gmp_cv_check_asm_globl,
+[case "$target" in
+ *-*-hpux*) gmp_cv_check_asm_globl=[".export"] ;;
+ *) gmp_cv_check_asm_globl=[".globl"] ;;
+esac
+])
+echo ["define(<GLOBL>, <$gmp_cv_check_asm_globl>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_TYPE
+dnl Can we say `.type'?
+AC_DEFUN(GMP_CHECK_ASM_TYPE,
+[AC_CACHE_CHECK([how the .type assembly directive should be used],
+gmp_cv_check_asm_type,
+[ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+for gmp_tmp_prefix in @ \# %; do
+ echo " .type sym,${gmp_tmp_prefix}function" > conftest.s
+ if AC_TRY_EVAL(ac_assemble); then
+ gmp_cv_check_asm_type="[.type \$][1,${gmp_tmp_prefix}\$][2]"
+ break
+ fi
+done
+if test -z "$gmp_cv_check_asm_type"; then
+ gmp_cv_check_asm_type="[dnl]"
+fi
+])
+echo ["define(<TYPE>, <$gmp_cv_check_asm_type>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_SIZE
+dnl Can we say `.size'?
+AC_DEFUN(GMP_CHECK_ASM_SIZE,
+[AC_CACHE_CHECK([if the .size assembly directive works], gmp_cv_check_asm_size,
+[ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+echo ' .size sym,1' > conftest.s
+if AC_TRY_EVAL(ac_assemble); then
+ gmp_cv_check_asm_size="[.size \$][1,\$][2]"
+else
+ gmp_cv_check_asm_size="[dnl]"
+fi
+])
+echo ["define(<SIZE>, <$gmp_cv_check_asm_size>)"] >> $gmp_tmpconfigm4
+])dnl
+
+dnl GMP_CHECK_ASM_LSYM_PREFIX
+dnl What is the prefix for a local label?
+dnl Requires NM to be set to nm for target.
+AC_DEFUN(GMP_CHECK_ASM_LSYM_PREFIX,
+[AC_REQUIRE([GMP_CHECK_ASM_LABEL_SUFFIX])
+AC_CACHE_CHECK([what prefix to use for a local label],
+gmp_cv_check_asm_lsym_prefix,
+[if test -z "$NM"; then
+ echo; echo ["$0: fatal: need nm"]
+ exit 1
+fi
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+gmp_cv_check_asm_lsym_prefix="L"
+for gmp_tmp_pre in L .L $ L$; do
+ cat > conftest.s <<EOF
+dummy${gmp_cv_check_asm_label_suffix}
+${gmp_tmp_pre}gurkmacka${gmp_cv_check_asm_label_suffix}
+ .byte 0
+EOF
+ if AC_TRY_EVAL(ac_assemble); then
+ $NM conftest.o >/dev/null 2>&1
+ gmp_rc=$?
+ if test "$gmp_rc" != "0"; then
+ echo "configure: $NM failure, using default"
+ break
+ fi
+ if $NM conftest.o | grep gurkmacka >/dev/null; then true; else
+ gmp_cv_check_asm_lsym_prefix="$gmp_tmp_pre"
+ break
+ fi
+ else
+ echo "configure: failed program was:" >&AC_FD_CC
+ cat conftest.s >&AC_FD_CC
+ # Use default.
+ fi
+done
+rm -f conftest*
+])
+echo ["define(<LSYM_PREFIX>, <${gmp_cv_check_asm_lsym_prefix}>)"] >> $gmp_tmpconfigm4
+])
+
+dnl GMP_CHECK_ASM_W32
+dnl How to [define] a 32-bit word.
+dnl Requires NM to be set to nm for target.
+AC_DEFUN(GMP_CHECK_ASM_W32,
+[AC_REQUIRE([GMP_CHECK_ASM_DATA])
+AC_REQUIRE([GMP_CHECK_ASM_GLOBL])
+AC_REQUIRE([GMP_CHECK_ASM_LABEL_SUFFIX])
+AC_CACHE_CHECK([how to [define] a 32-bit word],
+ gmp_cv_check_asm_w32,
+[if test -z "$NM"; then
+ echo; echo ["configure: $0: fatal: need nm"]
+ exit 1
+fi
+
+# FIXME: HPUX puts first symbol at 0x40000000, breaking our assumption
+# that it's at 0x0. We'll have to declare another symbol before the
+# .long/.word and look at the distance between the two symbols. The
+# only problem is that the sed expression(s) barfs (on Solaris, for
+# example) for the symbol with value 0. For now, HPUX uses .word.
+
+case "$target" in
+ *-*-hpux*)
+ gmp_cv_check_asm_w32=".word"
+ ;;
+ *-*-*)
+ ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+ for gmp_tmp_op in .long .word; do
+ cat > conftest.s <<EOF
+ $gmp_cv_check_asm_data
+ $gmp_cv_check_asm_globl foo
+ $gmp_tmp_op 0
+foo${gmp_cv_check_asm_label_suffix}
+ .byte 0
+EOF
+ if AC_TRY_EVAL(ac_assemble); then
+ changequote(<,>)
+ gmp_tmp_val=`$NM conftest.o | grep foo | sed -e 's;[[][0-9][]]\(.*\);\1;' \
+ -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+ changequote([, ])dnl
+ if test "$gmp_tmp_val" = "4"; then
+ gmp_cv_check_asm_w32="$gmp_tmp_op"
+ break
+ fi
+ fi
+ done
+ ;;
+esac
+
+if test -z "$gmp_cv_check_asm_w32"; then
+ echo; echo ["configure: $0: fatal: do not know how to define a 32-bit word"]
+ exit 1
+fi
+rm -f conftest*
+])
+echo ["define(<W32>, <$gmp_cv_check_asm_w32>)"] >> $gmp_tmpconfigm4
+])
+
+dnl GMP_CHECK_ASM_MMX([ACTION-IF-FOUND, [ACTION-IF-NOT-FOUND]])
+dnl Can we assemble MMX insns?
+AC_DEFUN(GMP_CHECK_ASM_MMX,
+[AC_REQUIRE([GMP_CHECK_ASM_TEXT])
+AC_CACHE_CHECK([if the assembler knows about MMX instructions],
+ gmp_cv_check_asm_mmx,
+[cat > conftest.s <<EOF
+ $gmp_cv_check_asm_text
+ por %mm0, %mm0
+EOF
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+if AC_TRY_EVAL(ac_assemble); then
+ gmp_cv_check_asm_mmx=yes
+else
+ gmp_cv_check_asm_mmx=no
+fi
+rm -f conftest*
+])
+if test "$gmp_cv_check_asm_mmx" = "yes"; then
+ ifelse([$1], , :, [$1])
+else
+ AC_MSG_WARN([+----------------------------------------------------------])
+ AC_MSG_WARN([| WARNING WARNING WARNING])
+ AC_MSG_WARN([| Target CPU has MMX code, but it can't be assembled by])
+ AC_MSG_WARN([| $CCAS $CFLAGS])
+ AC_MSG_WARN([| Non-MMX replacements will be used.])
+ AC_MSG_WARN([| This will be an inferior build.])
+ AC_MSG_WARN([+----------------------------------------------------------])
+ ifelse([$2], , :, [$2])
+fi
+])dnl
+
+dnl GMP_CHECK_ASM_SHLDL_CL([ACTION-IF-FOUND, [ACTION-IF-NOT-FOUND]])
+AC_DEFUN(GMP_CHECK_ASM_SHLDL_CL,
+[AC_REQUIRE([GMP_CHECK_ASM_TEXT])
+AC_CACHE_CHECK([if the assembler takes cl with shldl],
+ gmp_cv_check_asm_shldl_cl,
+[cat > conftest.s <<EOF
+ $gmp_cv_check_asm_text
+ shldl %cl, %eax, %ebx
+EOF
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&AC_FD_CC"
+if AC_TRY_EVAL(ac_assemble); then
+ gmp_cv_check_asm_shldl_cl=yes
+else
+ gmp_cv_check_asm_shldl_cl=no
+fi
+rm -f conftest*
+])
+if test "$gmp_cv_check_asm_shldl_cl" = "yes"; then
+ ifelse([$1], , :, [$1])
+else
+ ifelse([$2], , :, [$2])
+fi
+])dnl
+
+dnl GMP_PROG_CC_WORKS(CC, CFLAGS, ACTION-IF-WORKS, [ACTION-IF-NOT-WORKS])
+dnl Check if CC can compile and link. Perform various target specific tests.
+dnl FIXME: Require `$target'.
+AC_DEFUN(GMP_PROG_CC_WORKS,
+[AC_LANG_C dnl Note: Destructive.
+CC="[$1]"
+CFLAGS="[$2]"
+AC_MSG_CHECKING([if the C compiler ($CC) works with flags $CFLAGS])
+
+# Simple test for all targets.
+AC_TRY_COMPILER([int main(){return(0);}],
+ tmp_works, tmp_cross)
+
+# Target specific tests.
+if test "$tmp_works" = "yes"; then
+ case "$target" in
+ *-*-aix*) # Returning a funcptr.
+ AC_TRY_COMPILE( , [} void *g(); void *f() { return g(); } int bar(){],
+ tmp_works=yes, tmp_works=no)
+ ;;
+ esac
+fi
+
+if test "$tmp_works" = "yes"; then
+ [$3]
+else
+ ifelse([$4], , :, [$4])
+fi
+
+AC_MSG_RESULT($tmp_works)
+])dnl
+
+
+dnl GMP_C_ANSI2KNR
+dnl --------------
+dnl Setup to use ansi2knr if necessary.
+dnl
+dnl The test here is simply that if an ANSI style function works then
+dnl ansi2knr isn't needed. The normal tests for whether $CC works mean we
+dnl don't need to worry here about anything badly broken.
+dnl
+dnl AM_C_PROTOTYPES is the normal way to set up ansi2knr, but (in automake
+dnl March 2000) it gives the wrong answer on a C++ compiler because its
+dnl test requires that the compiler accept both ANSI and K&R, or otherwise
+dnl ansi2knr is used. A C++ compiler fails on the K&R part, which makes
+dnl AM_C_PROTOTYPES think it needs ansi2knr! GMP has no bare K&R so we
+dnl only need ANSI or K&R to work, not both.
+
+AC_DEFUN(GMP_C_ANSI2KNR,
+[AC_CACHE_CHECK([if ansi2knr should be used],
+ gmp_cv_c_ansi2knr,
+[cat >conftest.c <<EOF
+int main (int argc, char *argv[]) { return 0; }
+EOF
+if AC_TRY_EVAL(ac_compile); then
+ gmp_cv_c_ansi2knr=no
+else
+ gmp_cv_c_ansi2knr=yes
+fi
+rm -f conftest.*
+])
+if test $gmp_cv_c_ansi2knr = no; then
+ U= ANSI2KNR=
+else
+ U=_ ANSI2KNR=./ansi2knr
+ # Ensure some checks needed by ansi2knr itself.
+ AC_HEADER_STDC
+ AC_CHECK_HEADERS(string.h)
+fi
+AC_SUBST(U)
+AC_SUBST(ANSI2KNR)
+])
+
+
+dnl Deal with bad synchronization of Autoconf with Libtool.
+AC_DEFUN(AC_CANONICAL_BUILD, [_AC_CANONICAL_BUILD])
+AC_DEFUN(AC_CHECK_TOOL_PREFIX, [_AC_CHECK_TOOL_PREFIX])
+
+
+# serial 1
+
+AC_DEFUN(AM_C_PROTOTYPES,
+[AC_REQUIRE([AM_PROG_CC_STDC])
+AC_REQUIRE([AC_PROG_CPP])
+AC_MSG_CHECKING([for function prototypes])
+if test "$am_cv_prog_cc_stdc" != no; then
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(PROTOTYPES,1,[Define if compiler has function prototypes])
+ U= ANSI2KNR=
+else
+ AC_MSG_RESULT(no)
+ U=_ ANSI2KNR=./ansi2knr
+ # Ensure some checks needed by ansi2knr itself.
+ AC_HEADER_STDC
+ AC_CHECK_HEADERS(string.h)
+fi
+AC_SUBST(U)dnl
+AC_SUBST(ANSI2KNR)dnl
+])
+
+
+# serial 1
+
+# @defmac AC_PROG_CC_STDC
+# @maindex PROG_CC_STDC
+# @ovindex CC
+# If the C compiler in not in ANSI C mode by default, try to add an option
+# to output variable @code{CC} to make it so. This macro tries various
+# options that select ANSI C on some system or another. It considers the
+# compiler to be in ANSI C mode if it handles function prototypes correctly.
+#
+# If you use this macro, you should check after calling it whether the C
+# compiler has been set to accept ANSI C; if not, the shell variable
+# @code{am_cv_prog_cc_stdc} is set to @samp{no}. If you wrote your source
+# code in ANSI C, you can make an un-ANSIfied copy of it by using the
+# program @code{ansi2knr}, which comes with Ghostscript.
+# @end defmac
+
+AC_DEFUN(AM_PROG_CC_STDC,
+[AC_REQUIRE([AC_PROG_CC])
+AC_BEFORE([$0], [AC_C_INLINE])
+AC_BEFORE([$0], [AC_C_CONST])
+dnl Force this before AC_PROG_CPP. Some cpp's, eg on HPUX, require
+dnl a magic option to avoid problems with ANSI preprocessor commands
+dnl like #elif.
+dnl FIXME: can't do this because then AC_AIX won't work due to a
+dnl circular dependency.
+dnl AC_BEFORE([$0], [AC_PROG_CPP])
+AC_MSG_CHECKING(for ${CC-cc} option to accept ANSI C)
+AC_CACHE_VAL(am_cv_prog_cc_stdc,
+[am_cv_prog_cc_stdc=no
+ac_save_CC="$CC"
+# Don't try gcc -ansi; that turns off useful extensions and
+# breaks some systems' header files.
+# AIX -qlanglvl=ansi
+# Ultrix and OSF/1 -std1
+# HP-UX 10.20 and later -Ae
+# HP-UX older versions -Aa -D_HPUX_SOURCE
+# SVR4 -Xc -D__EXTENSIONS__
+for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+ CC="$ac_save_CC $ac_arg"
+ AC_TRY_COMPILE(
+[#include <stdarg.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+ char **p;
+ int i;
+{
+ return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+ char *s;
+ va_list v;
+ va_start (v,p);
+ s = g (p, va_arg (v,int));
+ va_end (v);
+ return s;
+}
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+], [
+return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1];
+],
+[am_cv_prog_cc_stdc="$ac_arg"; break])
+done
+CC="$ac_save_CC"
+])
+if test -z "$am_cv_prog_cc_stdc"; then
+ AC_MSG_RESULT([none needed])
+else
+ AC_MSG_RESULT($am_cv_prog_cc_stdc)
+fi
+case "x$am_cv_prog_cc_stdc" in
+ x|xno) ;;
+ *) CC="$CC $am_cv_prog_cc_stdc" ;;
+esac
+])
+
+# Do all the work for Automake. This macro actually does too much --
+# some checks are only needed if your package does certain things.
+# But this isn't really a big deal.
+
+# serial 1
+
+dnl Usage:
+dnl AM_INIT_AUTOMAKE(package,version, [no-define])
+
+AC_DEFUN(AM_INIT_AUTOMAKE,
+[AC_REQUIRE([AC_PROG_INSTALL])
+dnl We require 2.13 because we rely on SHELL being computed by configure.
+AC_PREREQ([2.13])
+PACKAGE=[$1]
+AC_SUBST(PACKAGE)
+VERSION=[$2]
+AC_SUBST(VERSION)
+dnl test to see if srcdir already configured
+if test "`CDPATH=: && cd $srcdir && pwd`" != "`pwd`" &&
+ test -f $srcdir/config.status; then
+ AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
+fi
+ifelse([$3],,
+AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package])
+AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package]))
+AC_REQUIRE([AM_SANITY_CHECK])
+AC_REQUIRE([AC_ARG_PROGRAM])
+AM_MISSING_PROG(ACLOCAL, aclocal)
+AM_MISSING_PROG(AUTOCONF, autoconf)
+AM_MISSING_PROG(AUTOMAKE, automake)
+AM_MISSING_PROG(AUTOHEADER, autoheader)
+AM_MISSING_PROG(MAKEINFO, makeinfo)
+AM_MISSING_PROG(AMTAR, tar)
+AM_MISSING_INSTALL_SH
+dnl We need awk for the "check" target. The system "awk" is bad on
+dnl some platforms.
+AC_REQUIRE([AC_PROG_AWK])
+AC_REQUIRE([AC_PROG_MAKE_SET])
+AC_REQUIRE([AM_DEP_TRACK])
+AC_REQUIRE([AM_SET_DEPDIR])
+ifdef([AC_PROVIDE_AC_PROG_CC], [AM_DEPENDENCIES(CC)], [
+ define([AC_PROG_CC], defn([AC_PROG_CC])[AM_DEPENDENCIES(CC)])])
+ifdef([AC_PROVIDE_AC_PROG_CXX], [AM_DEPENDENCIES(CXX)], [
+ define([AC_PROG_CXX], defn([AC_PROG_CXX])[AM_DEPENDENCIES(CXX)])])
+])
+
+#
+# Check to make sure that the build environment is sane.
+#
+
+AC_DEFUN(AM_SANITY_CHECK,
+[AC_MSG_CHECKING([whether build environment is sane])
+# Just in case
+sleep 1
+echo timestamp > conftestfile
+# Do `set' in a subshell so we don't clobber the current shell's
+# arguments. Must try -L first in case configure is actually a
+# symlink; some systems play weird games with the mod time of symlinks
+# (eg FreeBSD returns the mod time of the symlink's containing
+# directory).
+if (
+ set X `ls -Lt $srcdir/configure conftestfile 2> /dev/null`
+ if test "[$]*" = "X"; then
+ # -L didn't work.
+ set X `ls -t $srcdir/configure conftestfile`
+ fi
+ if test "[$]*" != "X $srcdir/configure conftestfile" \
+ && test "[$]*" != "X conftestfile $srcdir/configure"; then
+
+ # If neither matched, then we have a broken ls. This can happen
+ # if, for instance, CONFIG_SHELL is bash and it inherits a
+ # broken ls alias from the environment. This has actually
+ # happened. Such a system could not be considered "sane".
+ AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken
+alias in your environment])
+ fi
+
+ test "[$]2" = conftestfile
+ )
+then
+ # Ok.
+ :
+else
+ AC_MSG_ERROR([newly created file is older than distributed files!
+Check your system clock])
+fi
+rm -f conftest*
+AC_MSG_RESULT(yes)])
+
+dnl AM_MISSING_PROG(NAME, PROGRAM)
+AC_DEFUN(AM_MISSING_PROG, [
+AC_REQUIRE([AM_MISSING_HAS_RUN])
+$1=${$1-"${am_missing_run}$2"}
+AC_SUBST($1)])
+
+dnl Like AM_MISSING_PROG, but only looks for install-sh.
+dnl AM_MISSING_INSTALL_SH()
+AC_DEFUN(AM_MISSING_INSTALL_SH, [
+AC_REQUIRE([AM_MISSING_HAS_RUN])
+if test -z "$install_sh"; then
+ install_sh="$ac_aux_dir/install-sh"
+ test -f "$install_sh" || install_sh="$ac_aux_dir/install.sh"
+ test -f "$install_sh" || install_sh="${am_missing_run}${ac_auxdir}/install-sh"
+ dnl FIXME: an evil hack: we remove the SHELL invocation from
+ dnl install_sh because automake adds it back in. Sigh.
+ install_sh="`echo $install_sh | sed -e 's/\${SHELL}//'`"
+fi
+AC_SUBST(install_sh)])
+
+dnl AM_MISSING_HAS_RUN.
+dnl Define MISSING if not defined so far and test if it supports --run.
+dnl If it does, set am_missing_run to use it, otherwise, to nothing.
+AC_DEFUN([AM_MISSING_HAS_RUN], [
+test x"${MISSING+set}" = xset || \
+ MISSING="\${SHELL} `CDPATH=: && cd $ac_aux_dir && pwd`/missing"
+dnl Use eval to expand $SHELL
+if eval "$MISSING --run :"; then
+ am_missing_run="$MISSING --run "
+else
+ am_missing_run=
+ am_backtick='`'
+ AC_MSG_WARN([${am_backtick}missing' script is too old or missing])
+fi
+])
+
+dnl See how the compiler implements dependency checking.
+dnl Usage:
+dnl AM_DEPENDENCIES(NAME)
+dnl NAME is "CC", "CXX" or "OBJC".
+
+dnl We try a few techniques and use that to set a single cache variable.
+
+AC_DEFUN(AM_DEPENDENCIES,[
+AC_REQUIRE([AM_SET_DEPDIR])
+AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])
+ifelse([$1],CC,[
+AC_REQUIRE([AC_PROG_CC])
+AC_REQUIRE([AC_PROG_CPP])
+depcc="$CC"
+depcpp="$CPP"],[$1],CXX,[
+AC_REQUIRE([AC_PROG_CXX])
+AC_REQUIRE([AC_PROG_CXXCPP])
+depcc="$CXX"
+depcpp="$CXXCPP"],[$1],OBJC,[
+am_cv_OBJC_dependencies_compiler_type=gcc],[
+AC_REQUIRE([AC_PROG_][$1])
+depcc="$[$1]"
+depcpp=""])
+AC_MSG_CHECKING([dependency style of $depcc])
+AC_CACHE_VAL(am_cv_[$1]_dependencies_compiler_type,[
+if test -z "$AMDEP"; then
+ echo '#include "conftest.h"' > conftest.c
+ echo 'int i;' > conftest.h
+
+ am_cv_[$1]_dependencies_compiler_type=none
+ for depmode in `sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < "$am_depcomp"`; do
+ case "$depmode" in
+ nosideeffect)
+ # after this tag, mechanisms are not by side-effect, so they'll
+ # only be used when explicitly requested
+ if test "x$enable_dependency_tracking" = xyes; then
+ continue
+ else
+ break
+ fi
+ ;;
+ none) break ;;
+ esac
+ if depmode="$depmode" \
+ source=conftest.c object=conftest.o \
+ depfile=conftest.Po tmpdepfile=conftest.TPo \
+ $SHELL $am_depcomp $depcc -c conftest.c 2>/dev/null &&
+ grep conftest.h conftest.Po > /dev/null 2>&1; then
+ am_cv_[$1]_dependencies_compiler_type="$depmode"
+ break
+ fi
+ done
+
+ rm -f conftest.*
+else
+ am_cv_[$1]_dependencies_compiler_type=none
+fi
+])
+AC_MSG_RESULT($am_cv_[$1]_dependencies_compiler_type)
+[$1]DEPMODE="depmode=$am_cv_[$1]_dependencies_compiler_type"
+AC_SUBST([$1]DEPMODE)
+])
+
+dnl Choose a directory name for dependency files.
+dnl This macro is AC_REQUIREd in AM_DEPENDENCIES
+
+AC_DEFUN(AM_SET_DEPDIR,[
+if test -d .deps || mkdir .deps 2> /dev/null || test -d .deps; then
+ DEPDIR=.deps
+else
+ DEPDIR=_deps
+fi
+AC_SUBST(DEPDIR)
+])
+
+AC_DEFUN(AM_DEP_TRACK,[
+AC_ARG_ENABLE(dependency-tracking,
+[ --disable-dependency-tracking Speeds up one-time builds
+ --enable-dependency-tracking Do not reject slow dependency extractors])
+if test "x$enable_dependency_tracking" = xno; then
+ AMDEP="#"
+else
+ am_depcomp="$ac_aux_dir/depcomp"
+ if test ! -f "$am_depcomp"; then
+ AMDEP="#"
+ else
+ AMDEP=
+ fi
+fi
+AC_SUBST(AMDEP)
+if test -z "$AMDEP"; then
+ AMDEPBACKSLASH='\'
+else
+ AMDEPBACKSLASH=
+fi
+pushdef([subst], defn([AC_SUBST]))
+subst(AMDEPBACKSLASH)
+popdef([subst])
+])
+
+dnl Generate code to set up dependency tracking.
+dnl This macro should only be invoked once -- use via AC_REQUIRE.
+dnl Usage:
+dnl AM_OUTPUT_DEPENDENCY_COMMANDS
+
+dnl
+dnl This code is only required when automatic dependency tracking
+dnl is enabled. FIXME. This creates each `.P' file that we will
+dnl need in order to bootstrap the dependency handling code.
+AC_DEFUN(AM_OUTPUT_DEPENDENCY_COMMANDS,[
+AC_OUTPUT_COMMANDS([
+test x"$AMDEP" != x"" ||
+for mf in $CONFIG_FILES; do
+ case "$mf" in
+ Makefile) dirpart=.;;
+ */Makefile) dirpart=`echo "$mf" | sed -e 's|/[^/]*$||'`;;
+ *) continue;;
+ esac
+ grep '^DEP_FILES *= *[^ #]' < "$mf" > /dev/null || continue
+ # Extract the definition of DEP_FILES from the Makefile without
+ # running `make'.
+ DEPDIR=`sed -n -e '/^DEPDIR = / s///p' < "$mf"`
+ test -z "$DEPDIR" && continue
+ # When using ansi2knr, U may be empty or an underscore; expand it
+ U=`sed -n -e '/^U = / s///p' < "$mf"`
+ test -d "$dirpart/$DEPDIR" || mkdir "$dirpart/$DEPDIR"
+ # We invoke sed twice because it is the simplest approach to
+ # changing $(DEPDIR) to its actual value in the expansion.
+ for file in `sed -n -e '
+ /^DEP_FILES = .*\\\\$/ {
+ s/^DEP_FILES = //
+ :loop
+ s/\\\\$//
+ p
+ n
+ /\\\\$/ b loop
+ p
+ }
+ /^DEP_FILES = / s/^DEP_FILES = //p' < "$mf" | \
+ sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do
+ # Make sure the directory exists.
+ test -f "$dirpart/$file" && continue
+ fdir=`echo "$file" | sed -e 's|/[^/]*$||'`
+ $ac_aux_dir/mkinstalldirs "$dirpart/$fdir" > /dev/null 2>&1
+ # echo "creating $dirpart/$file"
+ echo '# dummy' > "$dirpart/$file"
+ done
+done
+], [AMDEP="$AMDEP"
+ac_aux_dir="$ac_aux_dir"])])
+
+# Like AC_CONFIG_HEADER, but automatically create stamp file.
+
+AC_DEFUN(AM_CONFIG_HEADER,
+[AC_PREREQ([2.12])
+AC_CONFIG_HEADER([$1])
+dnl When config.status generates a header, we must update the stamp-h file.
+dnl This file resides in the same directory as the config header
+dnl that is generated. We must strip everything past the first ":",
+dnl and everything past the last "/".
+AC_OUTPUT_COMMANDS(changequote(<<,>>)dnl
+ifelse(patsubst(<<$1>>, <<[^ ]>>, <<>>), <<>>,
+<<test -z "<<$>>CONFIG_HEADERS" || echo timestamp > patsubst(<<$1>>, <<^\([^:]*/\)?.*>>, <<\1>>)stamp-h<<>>dnl>>,
+<<am_indx=1
+for am_file in <<$1>>; do
+ case " <<$>>CONFIG_HEADERS " in
+ *" <<$>>am_file "*<<)>>
+ echo timestamp > `echo <<$>>am_file | sed -e 's%:.*%%' -e 's%[^/]*$%%'`stamp-h$am_indx
+ ;;
+ esac
+ am_indx=`expr "<<$>>am_indx" + 1`
+done<<>>dnl>>)
+changequote([,]))])
+
+# Add --enable-maintainer-mode option to configure.
+# From Jim Meyering
+
+# serial 1
+
+AC_DEFUN(AM_MAINTAINER_MODE,
+[AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
+ dnl maintainer-mode is disabled by default
+ AC_ARG_ENABLE(maintainer-mode,
+[ --enable-maintainer-mode enable make rules and dependencies not useful
+ (and sometimes confusing) to the casual installer],
+ USE_MAINTAINER_MODE=$enableval,
+ USE_MAINTAINER_MODE=no)
+ AC_MSG_RESULT($USE_MAINTAINER_MODE)
+ AM_CONDITIONAL(MAINTAINER_MODE, test $USE_MAINTAINER_MODE = yes)
+ MAINT=$MAINTAINER_MODE_TRUE
+ AC_SUBST(MAINT)dnl
+]
+)
+
+# Define a conditional.
+
+AC_DEFUN(AM_CONDITIONAL,
+[AC_SUBST($1_TRUE)
+AC_SUBST($1_FALSE)
+if $2; then
+ $1_TRUE=
+ $1_FALSE='#'
+else
+ $1_TRUE='#'
+ $1_FALSE=
+fi])
+
+
+# serial 42 AC_PROG_LIBTOOL
+AC_DEFUN(AC_PROG_LIBTOOL,
+[AC_REQUIRE([AC_LIBTOOL_SETUP])dnl
+
+# Save cache, so that ltconfig can load it
+AC_CACHE_SAVE
+
+# Actually configure libtool. ac_aux_dir is where install-sh is found.
+AR="$AR" CC="$CC" CFLAGS="$CFLAGS" CPPFLAGS="$CPPFLAGS" \
+MAGIC="$MAGIC" LD="$LD" LDFLAGS="$LDFLAGS" LIBS="$LIBS" \
+LN_S="$LN_S" NM="$NM" RANLIB="$RANLIB" STRIP="$STRIP" \
+AS="$AS" DLLTOOL="$DLLTOOL" OBJDUMP="$OBJDUMP" \
+objext="$OBJEXT" exeext="$EXEEXT" reload_flag="$reload_flag" \
+deplibs_check_method="$deplibs_check_method" file_magic_cmd="$file_magic_cmd" \
+${CONFIG_SHELL-/bin/sh} $ac_aux_dir/ltconfig --no-reexec \
+$libtool_flags --no-verify --build="$build" $ac_aux_dir/ltmain.sh $lt_target \
+|| AC_MSG_ERROR([libtool configure failed])
+
+# Reload cache, that may have been modified by ltconfig
+AC_CACHE_LOAD
+
+# This can be used to rebuild libtool when needed
+LIBTOOL_DEPS="$ac_aux_dir/ltconfig $ac_aux_dir/ltmain.sh"
+
+# Always use our own libtool.
+LIBTOOL='$(SHELL) $(top_builddir)/libtool'
+AC_SUBST(LIBTOOL)dnl
+
+# Redirect the config.log output again, so that the ltconfig log is not
+# clobbered by the next message.
+exec 5>>./config.log
+])
+
+AC_DEFUN(AC_LIBTOOL_SETUP,
+[AC_PREREQ(2.13)dnl
+AC_REQUIRE([AC_ENABLE_SHARED])dnl
+AC_REQUIRE([AC_ENABLE_STATIC])dnl
+AC_REQUIRE([AC_ENABLE_FAST_INSTALL])dnl
+AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_REQUIRE([AC_CANONICAL_BUILD])dnl
+AC_REQUIRE([AC_PROG_CC])dnl
+AC_REQUIRE([AC_PROG_LD])dnl
+AC_REQUIRE([AC_PROG_LD_RELOAD_FLAG])dnl
+AC_REQUIRE([AC_PROG_NM])dnl
+AC_REQUIRE([AC_PROG_LN_S])dnl
+AC_REQUIRE([AC_DEPLIBS_CHECK_METHOD])dnl
+AC_REQUIRE([AC_OBJEXT])dnl
+AC_REQUIRE([AC_EXEEXT])dnl
+dnl
+
+# Only perform the check for file, if the check method requires it
+case "$deplibs_check_method" in
+file_magic*)
+ if test "$file_magic_cmd" = '${MAGIC}'; then
+ AC_PATH_MAGIC
+ fi
+ ;;
+esac
+
+case "$target" in
+NONE) lt_target="$host" ;;
+*) lt_target="$target" ;;
+esac
+
+AC_CHECK_TOOL(RANLIB, ranlib, :)
+AC_CHECK_TOOL(STRIP, strip, :)
+
+# Check for any special flags to pass to ltconfig.
+libtool_flags="--cache-file=$cache_file"
+test "$enable_shared" = no && libtool_flags="$libtool_flags --disable-shared"
+test "$enable_static" = no && libtool_flags="$libtool_flags --disable-static"
+test "$enable_fast_install" = no && libtool_flags="$libtool_flags --disable-fast-install"
+test "$ac_cv_prog_gcc" = yes && libtool_flags="$libtool_flags --with-gcc"
+test "$ac_cv_prog_gnu_ld" = yes && libtool_flags="$libtool_flags --with-gnu-ld"
+ifdef([AC_PROVIDE_AC_LIBTOOL_DLOPEN],
+[libtool_flags="$libtool_flags --enable-dlopen"])
+ifdef([AC_PROVIDE_AC_LIBTOOL_WIN32_DLL],
+[libtool_flags="$libtool_flags --enable-win32-dll"])
+AC_ARG_ENABLE(libtool-lock,
+ [ --disable-libtool-lock avoid locking (might break parallel builds)])
+test "x$enable_libtool_lock" = xno && libtool_flags="$libtool_flags --disable-lock"
+test x"$silent" = xyes && libtool_flags="$libtool_flags --silent"
+
+AC_ARG_WITH(pic,
+ [ --with-pic try to use only PIC/non-PIC objects [default=use both]],
+ pic_mode="$withval", pic_mode=default)
+test x"$pic_mode" = xyes && libtool_flags="$libtool_flags --prefer-pic"
+test x"$pic_mode" = xno && libtool_flags="$libtool_flags --prefer-non-pic"
+
+# Some flags need to be propagated to the compiler or linker for good
+# libtool support.
+case "$lt_target" in
+*-*-irix6*)
+ # Find out which ABI we are using.
+ echo '[#]line __oline__ "configure"' > conftest.$ac_ext
+ if AC_TRY_EVAL(ac_compile); then
+ case "`/usr/bin/file conftest.o`" in
+ *32-bit*)
+ LD="${LD-ld} -32"
+ ;;
+ *N32*)
+ LD="${LD-ld} -n32"
+ ;;
+ *64-bit*)
+ LD="${LD-ld} -64"
+ ;;
+ esac
+ fi
+ rm -rf conftest*
+ ;;
+
+*-*-sco3.2v5*)
+ # On SCO OpenServer 5, we need -belf to get full-featured binaries.
+ SAVE_CFLAGS="$CFLAGS"
+ CFLAGS="$CFLAGS -belf"
+ AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf,
+ [AC_LANG_SAVE
+ AC_LANG_C
+ AC_TRY_LINK([],[],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no])
+ AC_LANG_RESTORE])
+ if test x"$lt_cv_cc_needs_belf" != x"yes"; then
+ # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf
+ CFLAGS="$SAVE_CFLAGS"
+ fi
+ ;;
+
+ifdef([AC_PROVIDE_AC_LIBTOOL_WIN32_DLL],
+[*-*-cygwin* | *-*-mingw*)
+ AC_CHECK_TOOL(DLLTOOL, dlltool, false)
+ AC_CHECK_TOOL(AS, as, false)
+ AC_CHECK_TOOL(OBJDUMP, objdump, false)
+
+ # recent cygwin and mingw systems supply a stub DllMain which the user
+ # can override, but on older systems we have to supply one
+ AC_CACHE_CHECK([if libtool should supply DllMain function], lt_cv_need_dllmain,
+ [AC_TRY_LINK([],
+ [extern int __attribute__((__stdcall__)) DllMain(void*, int, void*);
+ DllMain (0, 0, 0);],
+ [lt_cv_need_dllmain=no],[lt_cv_need_dllmain=yes])])
+
+ case "$lt_target/$CC" in
+ *-*-cygwin*/gcc*-mno-cygwin*|*-*-mingw*)
+ # old mingw systems require "-dll" to link a DLL, while more recent ones
+ # require "-mdll"
+ SAVE_CFLAGS="$CFLAGS"
+ CFLAGS="$CFLAGS -mdll"
+ AC_CACHE_CHECK([how to link DLLs], lt_cv_cc_dll_switch,
+ [AC_TRY_LINK([], [], [lt_cv_cc_dll_switch=-mdll],[lt_cv_cc_dll_switch=-dll])])
+ CFLAGS="$SAVE_CFLAGS" ;;
+ *-*-cygwin*)
+ # cygwin systems need to pass --dll to the linker, and not link
+ # crt.o which will require a WinMain@16 definition.
+ lt_cv_cc_dll_switch="-Wl,--dll -nostartfiles" ;;
+ esac
+ ;;
+ ])
+esac
+])
+
+# AC_LIBTOOL_DLOPEN - enable checks for dlopen support
+AC_DEFUN(AC_LIBTOOL_DLOPEN, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])])
+
+# AC_LIBTOOL_WIN32_DLL - declare package support for building win32 dll's
+AC_DEFUN(AC_LIBTOOL_WIN32_DLL, [AC_BEFORE([$0], [AC_LIBTOOL_SETUP])])
+
+# AC_ENABLE_SHARED - implement the --enable-shared flag
+# Usage: AC_ENABLE_SHARED[(DEFAULT)]
+# Where DEFAULT is either `yes' or `no'. If omitted, it defaults to
+# `yes'.
+AC_DEFUN(AC_ENABLE_SHARED, [dnl
+define([AC_ENABLE_SHARED_DEFAULT], ifelse($1, no, no, yes))dnl
+AC_ARG_ENABLE(shared,
+changequote(<<, >>)dnl
+<< --enable-shared[=PKGS] build shared libraries [default=>>AC_ENABLE_SHARED_DEFAULT],
+changequote([, ])dnl
+[p=${PACKAGE-default}
+case "$enableval" in
+yes) enable_shared=yes ;;
+no) enable_shared=no ;;
+*)
+ enable_shared=no
+ # Look at the argument we got. We use all the common list separators.
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:,"
+ for pkg in $enableval; do
+ if test "X$pkg" = "X$p"; then
+ enable_shared=yes
+ fi
+ done
+ IFS="$ac_save_ifs"
+ ;;
+esac],
+enable_shared=AC_ENABLE_SHARED_DEFAULT)dnl
+])
+
+# AC_DISABLE_SHARED - set the default shared flag to --disable-shared
+AC_DEFUN(AC_DISABLE_SHARED, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+AC_ENABLE_SHARED(no)])
+
+# AC_ENABLE_STATIC - implement the --enable-static flag
+# Usage: AC_ENABLE_STATIC[(DEFAULT)]
+# Where DEFAULT is either `yes' or `no'. If omitted, it defaults to
+# `yes'.
+AC_DEFUN(AC_ENABLE_STATIC, [dnl
+define([AC_ENABLE_STATIC_DEFAULT], ifelse($1, no, no, yes))dnl
+AC_ARG_ENABLE(static,
+changequote(<<, >>)dnl
+<< --enable-static[=PKGS] build static libraries [default=>>AC_ENABLE_STATIC_DEFAULT],
+changequote([, ])dnl
+[p=${PACKAGE-default}
+case "$enableval" in
+yes) enable_static=yes ;;
+no) enable_static=no ;;
+*)
+ enable_static=no
+ # Look at the argument we got. We use all the common list separators.
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:,"
+ for pkg in $enableval; do
+ if test "X$pkg" = "X$p"; then
+ enable_static=yes
+ fi
+ done
+ IFS="$ac_save_ifs"
+ ;;
+esac],
+enable_static=AC_ENABLE_STATIC_DEFAULT)dnl
+])
+
+# AC_DISABLE_STATIC - set the default static flag to --disable-static
+AC_DEFUN(AC_DISABLE_STATIC, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+AC_ENABLE_STATIC(no)])
+
+
+# AC_ENABLE_FAST_INSTALL - implement the --enable-fast-install flag
+# Usage: AC_ENABLE_FAST_INSTALL[(DEFAULT)]
+# Where DEFAULT is either `yes' or `no'. If omitted, it defaults to
+# `yes'.
+AC_DEFUN(AC_ENABLE_FAST_INSTALL, [dnl
+define([AC_ENABLE_FAST_INSTALL_DEFAULT], ifelse($1, no, no, yes))dnl
+AC_ARG_ENABLE(fast-install,
+changequote(<<, >>)dnl
+<< --enable-fast-install[=PKGS] optimize for fast installation [default=>>AC_ENABLE_FAST_INSTALL_DEFAULT],
+changequote([, ])dnl
+[p=${PACKAGE-default}
+case "$enableval" in
+yes) enable_fast_install=yes ;;
+no) enable_fast_install=no ;;
+*)
+ enable_fast_install=no
+ # Look at the argument we got. We use all the common list separators.
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:,"
+ for pkg in $enableval; do
+ if test "X$pkg" = "X$p"; then
+ enable_fast_install=yes
+ fi
+ done
+ IFS="$ac_save_ifs"
+ ;;
+esac],
+enable_fast_install=AC_ENABLE_FAST_INSTALL_DEFAULT)dnl
+])
+
+# AC_ENABLE_FAST_INSTALL - set the default to --disable-fast-install
+AC_DEFUN(AC_DISABLE_FAST_INSTALL, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+AC_ENABLE_FAST_INSTALL(no)])
+
+
+# AC_PATH_TOOL_PREFIX - find a file program which can recognise shared library
+AC_DEFUN(AC_PATH_TOOL_PREFIX,
+[AC_MSG_CHECKING([for $1])
+AC_CACHE_VAL(lt_cv_path_MAGIC,
+[case "$MAGIC" in
+ /*)
+ lt_cv_path_MAGIC="$MAGIC" # Let the user override the test with a path.
+ ;;
+ ?:/*)
+ ac_cv_path_MAGIC="$MAGIC" # Let the user override the test with a dos path.
+ ;;
+ *)
+ ac_save_MAGIC="$MAGIC"
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+dnl $ac_dummy forces splitting on constant user-supplied paths.
+dnl POSIX.2 word splitting is done only on the output of word expansions,
+dnl not every word. This closes a longstanding sh security hole.
+ ac_dummy="ifelse([$2], , $PATH, [$2])"
+ for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$1; then
+ lt_cv_path_MAGIC="$ac_dir/$1"
+ if test -n "$file_magic_test_file"; then
+ case "$deplibs_check_method" in
+ "file_magic "*)
+ file_magic_regex="`expr \"$deplibs_check_method\" : \"file_magic \(.*\)\"`"
+ MAGIC="$lt_cv_path_MAGIC"
+ if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+ egrep "$file_magic_regex" > /dev/null; then
+ :
+ else
+ cat <<EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such. This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem. Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+EOF
+ fi ;;
+ esac
+ fi
+ break
+ fi
+ done
+ IFS="$ac_save_ifs"
+ MAGIC="$ac_save_MAGIC"
+ ;;
+esac])
+MAGIC="$lt_cv_path_MAGIC"
+if test -n "$MAGIC"; then
+ AC_MSG_RESULT($MAGIC)
+else
+ AC_MSG_RESULT(no)
+fi
+])
+
+
+# AC_PATH_MAGIC - find a file program which can recognise a shared library
+AC_DEFUN(AC_PATH_MAGIC,
+[AC_REQUIRE([AC_CHECK_TOOL_PREFIX])dnl
+AC_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin:$PATH)
+if test -z "$lt_cv_path_MAGIC"; then
+ if test -n "$ac_tool_prefix"; then
+ AC_PATH_TOOL_PREFIX(file, /usr/bin:$PATH)
+ else
+ MAGIC=:
+ fi
+fi
+])
+
+
+# AC_PROG_LD - find the path to the GNU or non-GNU linker
+AC_DEFUN(AC_PROG_LD,
+[AC_ARG_WITH(gnu-ld,
+[ --with-gnu-ld assume the C compiler uses GNU ld [default=no]],
+test "$withval" = no || with_gnu_ld=yes, with_gnu_ld=no)
+AC_REQUIRE([AC_PROG_CC])dnl
+AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_REQUIRE([AC_CANONICAL_BUILD])dnl
+ac_prog=ld
+if test "$ac_cv_prog_gcc" = yes; then
+ # Check if gcc -print-prog-name=ld gives a path.
+ AC_MSG_CHECKING([for ld used by GCC])
+ case $lt_target in
+ *-*-mingw*)
+ # gcc leaves a trailing carriage return which upsets mingw
+ ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
+ *)
+ ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
+ esac
+ case "$ac_prog" in
+ # Accept absolute paths.
+changequote(,)dnl
+ [\\/]* | [A-Za-z]:[\\/]*)
+ re_direlt='/[^/][^/]*/\.\./'
+changequote([,])dnl
+ # Canonicalize the path of ld
+ ac_prog=`echo $ac_prog| sed 's%\\\\%/%g'`
+ while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do
+ ac_prog=`echo $ac_prog| sed "s%$re_direlt%/%"`
+ done
+ test -z "$LD" && LD="$ac_prog"
+ ;;
+ "")
+ # If it fails, then pretend we aren't using GCC.
+ ac_prog=ld
+ ;;
+ *)
+ # If it is relative, then search for the first ld in PATH.
+ with_gnu_ld=unknown
+ ;;
+ esac
+elif test "$with_gnu_ld" = yes; then
+ AC_MSG_CHECKING([for GNU ld])
+else
+ AC_MSG_CHECKING([for non-GNU ld])
+fi
+AC_CACHE_VAL(ac_cv_path_LD,
+[if test -z "$LD"; then
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}"
+ for ac_dir in $PATH; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
+ ac_cv_path_LD="$ac_dir/$ac_prog"
+ # Check to see if the program is GNU ld. I'd rather use --version,
+ # but apparently some GNU ld's only accept -v.
+ # Break only if it was the GNU/non-GNU ld that we prefer.
+ if "$ac_cv_path_LD" -v 2>&1 < /dev/null | egrep '(GNU|with BFD)' > /dev/null; then
+ test "$with_gnu_ld" != no && break
+ else
+ test "$with_gnu_ld" != yes && break
+ fi
+ fi
+ done
+ IFS="$ac_save_ifs"
+else
+ ac_cv_path_LD="$LD" # Let the user override the test with a path.
+fi])
+LD="$ac_cv_path_LD"
+if test -n "$LD"; then
+ AC_MSG_RESULT($LD)
+else
+ AC_MSG_RESULT(no)
+fi
+test -z "$LD" && AC_MSG_ERROR([no acceptable ld found in \$PATH])
+AC_PROG_LD_GNU
+])
+
+AC_DEFUN(AC_PROG_LD_GNU,
+[AC_CACHE_CHECK([if the linker ($LD) is GNU ld], ac_cv_prog_gnu_ld,
+[# I'd rather use --version here, but apparently some GNU ld's only accept -v.
+if $LD -v 2>&1 </dev/null | egrep '(GNU|with BFD)' 1>&5; then
+ ac_cv_prog_gnu_ld=yes
+else
+ ac_cv_prog_gnu_ld=no
+fi])
+with_gnu_ld=$ac_cv_prog_gnu_ld
+])
+
+# AC_PROG_LD_RELOAD_FLAG - find reload flag for linker
+# -- PORTME Some linkers may need a different reload flag.
+AC_DEFUN(AC_PROG_LD_RELOAD_FLAG,
+[AC_CACHE_CHECK([for $LD option to reload object files], lt_cv_ld_reload_flag,
+[lt_cv_ld_reload_flag='-r'])
+reload_flag=$lt_cv_ld_reload_flag
+test -n "$reload_flag" && reload_flag=" $reload_flag"
+])
+
+# AC_DEPLIBS_CHECK_METHOD - how to check for library dependencies
+# -- PORTME fill in with the dynamic library characteristics
+AC_DEFUN(AC_DEPLIBS_CHECK_METHOD,
+[AC_CACHE_CHECK([how to recognise dependant libraries],
+lt_cv_deplibs_check_method,
+[lt_cv_file_magic_cmd='${MAGIC}'
+lt_cv_file_magic_test_file=
+lt_cv_deplibs_check_method='unknown'
+# Need to set the preceding variable on all platforms that support
+# interlibrary dependencies.
+# 'none' -- dependencies not supported.
+# `unknown' -- same as none, but documents that we really don't know.
+# 'pass_all' -- all dependencies passed with no checks.
+# 'test_compile' -- check by making test program.
+# 'file_magic [regex]' -- check by looking for files in library path
+# which responds to the $file_magic_cmd with a given egrep regex.
+# If you have `file' or equivalent on your system and you're not sure
+# whether `pass_all' will *always* work, you probably want this one.
+
+case "$host_os" in
+aix4* | beos*)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+bsdi4*)
+ changequote(,)dnl
+ lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)'
+ changequote([, ])dnl
+ lt_cv_file_magic_test_file=/shlib/libc.so
+ ;;
+
+cygwin* | mingw*)
+ lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?'
+ lt_cv_file_magic_cmd='${OBJDUMP} -f'
+ ;;
+
+freebsd*)
+ case "$version_type" in
+ freebsd-elf*)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+ esac
+ ;;
+
+gnu*)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+irix5* | irix6*)
+ case "$host_os" in
+ irix5*)
+ # this will be overridden with pass_all, but let us keep it just in case
+ lt_cv_deplibs_check_method="file_magic ELF 32-bit MSB dynamic lib MIPS - version 1"
+ ;;
+ *)
+ case "$LD" in
+ *-32|*"-32 ") libmagic=32-bit;;
+ *-n32|*"-n32 ") libmagic=N32;;
+ *-64|*"-64 ") libmagic=64-bit;;
+ *) libmagic=never-match;;
+ esac
+ # this will be overridden with pass_all, but let us keep it just in case
+ changequote(,)dnl
+ lt_cv_deplibs_check_method="file_magic ELF ${libmagic} MSB mips-[1234] dynamic lib MIPS - version 1"
+ changequote([, ])dnl
+ ;;
+ esac
+ lt_cv_file_magic_test_file=`echo /lib${libsuff}/libc.so*`
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+# This must be Linux ELF.
+linux-gnu*)
+ case "$host_cpu" in
+ alpha* | i*86 | powerpc* | sparc* )
+ lt_cv_deplibs_check_method=pass_all ;;
+ *)
+ # glibc up to 2.1.1 does not perform some relocations on ARM
+ changequote(,)dnl
+ lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' ;;
+ changequote([, ])dnl
+ esac
+ lt_cv_file_magic_test_file=`echo /lib/libc.so* /lib/libc-*.so`
+ ;;
+
+osf3* | osf4* | osf5*)
+ # this will be overridden with pass_all, but let us keep it just in case
+ lt_cv_deplibs_check_method='file_magic COFF format alpha shared library'
+ lt_cv_file_magic_test_file=/shlib/libc.so
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+sco3.2v5*)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+solaris*)
+ lt_cv_deplibs_check_method=pass_all
+ lt_cv_file_magic_test_file=/lib/libc.so
+ ;;
+
+sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+ case "$host_vendor" in
+ ncr)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+ motorola)
+ changequote(,)dnl
+ lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]'
+ changequote([, ])dnl
+ lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*`
+ ;;
+ esac
+ ;;
+esac
+])
+file_magic_cmd=$lt_cv_file_magic_cmd
+deplibs_check_method=$lt_cv_deplibs_check_method
+])
+
+
+# AC_PROG_NM - find the path to a BSD-compatible name lister
+AC_DEFUN(AC_PROG_NM,
+[AC_MSG_CHECKING([for BSD-compatible nm])
+AC_CACHE_VAL(ac_cv_path_NM,
+[if test -n "$NM"; then
+ # Let the user override the test.
+ ac_cv_path_NM="$NM"
+else
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}"
+ for ac_dir in $PATH /usr/ccs/bin /usr/ucb /bin; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/nm || test -f $ac_dir/nm$ac_exeext ; then
+ # Check to see if the nm accepts a BSD-compat flag.
+ # Adding the `sed 1q' prevents false positives on HP-UX, which says:
+ # nm: unknown option "B" ignored
+ if ($ac_dir/nm -B /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then
+ ac_cv_path_NM="$ac_dir/nm -B"
+ break
+ elif ($ac_dir/nm -p /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then
+ ac_cv_path_NM="$ac_dir/nm -p"
+ break
+ else
+ ac_cv_path_NM=${ac_cv_path_NM="$ac_dir/nm"} # keep the first match, but
+ continue # so that we can try to find one that supports BSD flags
+ fi
+ fi
+ done
+ IFS="$ac_save_ifs"
+ test -z "$ac_cv_path_NM" && ac_cv_path_NM=nm
+fi])
+NM="$ac_cv_path_NM"
+AC_MSG_RESULT([$NM])
+])
+
+# AC_CHECK_LIBM - check for math library
+AC_DEFUN(AC_CHECK_LIBM,
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+LIBM=
+case "$lt_target" in
+*-*-beos* | *-*-cygwin*)
+ # These system don't have libm
+ ;;
+*-ncr-sysv4.3*)
+ AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw")
+ AC_CHECK_LIB(m, main, LIBM="$LIBM -lm")
+ ;;
+*)
+ AC_CHECK_LIB(m, main, LIBM="-lm")
+ ;;
+esac
+])
+
+# AC_LIBLTDL_CONVENIENCE[(dir)] - sets LIBLTDL to the link flags for
+# the libltdl convenience library, adds --enable-ltdl-convenience to
+# the configure arguments. Note that LIBLTDL is not AC_SUBSTed, nor
+# is AC_CONFIG_SUBDIRS called. If DIR is not provided, it is assumed
+# to be `${top_builddir}/libltdl'. Make sure you start DIR with
+# '${top_builddir}/' (note the single quotes!) if your package is not
+# flat, and, if you're not using automake, define top_builddir as
+# appropriate in the Makefiles.
+AC_DEFUN(AC_LIBLTDL_CONVENIENCE, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+ case "$enable_ltdl_convenience" in
+ no) AC_MSG_ERROR([this package needs a convenience libltdl]) ;;
+ "") enable_ltdl_convenience=yes
+ ac_configure_args="$ac_configure_args --enable-ltdl-convenience" ;;
+ esac
+ LIBLTDL=ifelse($#,1,$1,['${top_builddir}/libltdl'])/libltdlc.la
+ INCLTDL=ifelse($#,1,-I$1,['-I${top_srcdir}/libltdl'])
+])
+
+# AC_LIBLTDL_INSTALLABLE[(dir)] - sets LIBLTDL to the link flags for
+# the libltdl installable library, and adds --enable-ltdl-install to
+# the configure arguments. Note that LIBLTDL is not AC_SUBSTed, nor
+# is AC_CONFIG_SUBDIRS called. If DIR is not provided, it is assumed
+# to be `${top_builddir}/libltdl'. Make sure you start DIR with
+# '${top_builddir}/' (note the single quotes!) if your package is not
+# flat, and, if you're not using automake, define top_builddir as
+# appropriate in the Makefiles.
+# In the future, this macro may have to be called after AC_PROG_LIBTOOL.
+AC_DEFUN(AC_LIBLTDL_INSTALLABLE, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+ AC_CHECK_LIB(ltdl, main,
+ [test x"$enable_ltdl_install" != xyes && enable_ltdl_install=no],
+ [if test x"$enable_ltdl_install" = xno; then
+ AC_MSG_WARN([libltdl not installed, but installation disabled])
+ else
+ enable_ltdl_install=yes
+ fi
+ ])
+ if test x"$enable_ltdl_install" = x"yes"; then
+ ac_configure_args="$ac_configure_args --enable-ltdl-install"
+ LIBLTDL=ifelse($#,1,$1,['${top_builddir}/libltdl'])/libltdl.la
+ INCLTDL=ifelse($#,1,-I$1,['-I${top_srcdir}/libltdl'])
+ else
+ ac_configure_args="$ac_configure_args --enable-ltdl-install=no"
+ LIBLTDL="-lltdl"
+ INCLTDL=
+ fi
+])
+
+dnl old names
+AC_DEFUN(AM_PROG_LIBTOOL, [indir([AC_PROG_LIBTOOL])])dnl
+AC_DEFUN(AM_ENABLE_SHARED, [indir([AC_ENABLE_SHARED], $@)])dnl
+AC_DEFUN(AM_ENABLE_STATIC, [indir([AC_ENABLE_STATIC], $@)])dnl
+AC_DEFUN(AM_DISABLE_SHARED, [indir([AC_DISABLE_SHARED], $@)])dnl
+AC_DEFUN(AM_DISABLE_STATIC, [indir([AC_DISABLE_STATIC], $@)])dnl
+AC_DEFUN(AM_PROG_LD, [indir([AC_PROG_LD])])dnl
+AC_DEFUN(AM_PROG_NM, [indir([AC_PROG_NM])])dnl
+
+dnl This is just to silence aclocal about the macro not being used
+ifelse([AC_DISABLE_FAST_INSTALL])dnl
+
diff --git a/rts/gmp/ansi2knr.1 b/rts/gmp/ansi2knr.1
new file mode 100644
index 0000000000..f9ee5a631c
--- /dev/null
+++ b/rts/gmp/ansi2knr.1
@@ -0,0 +1,36 @@
+.TH ANSI2KNR 1 "19 Jan 1996"
+.SH NAME
+ansi2knr \- convert ANSI C to Kernighan & Ritchie C
+.SH SYNOPSIS
+.I ansi2knr
+[--varargs] input_file [output_file]
+.SH DESCRIPTION
+If no output_file is supplied, output goes to stdout.
+.br
+There are no error messages.
+.sp
+.I ansi2knr
+recognizes function definitions by seeing a non-keyword identifier at the left
+margin, followed by a left parenthesis, with a right parenthesis as the last
+character on the line, and with a left brace as the first token on the
+following line (ignoring possible intervening comments). It will recognize a
+multi-line header provided that no intervening line ends with a left or right
+brace or a semicolon. These algorithms ignore whitespace and comments, except
+that the function name must be the first thing on the line.
+.sp
+The following constructs will confuse it:
+.br
+ - Any other construct that starts at the left margin and follows the
+above syntax (such as a macro or function call).
+.br
+ - Some macros that tinker with the syntax of the function header.
+.sp
+The --varargs switch is obsolete, and is recognized only for
+backwards compatibility. The present version of
+.I ansi2knr
+will always attempt to convert a ... argument to va_alist and va_dcl.
+.SH AUTHOR
+L. Peter Deutsch <ghost@aladdin.com> wrote the original ansi2knr and
+continues to maintain the current version; most of the code in the current
+version is his work. ansi2knr also includes contributions by Francois
+Pinard <pinard@iro.umontreal.ca> and Jim Avera <jima@netcom.com>.
diff --git a/rts/gmp/ansi2knr.c b/rts/gmp/ansi2knr.c
new file mode 100644
index 0000000000..937c731886
--- /dev/null
+++ b/rts/gmp/ansi2knr.c
@@ -0,0 +1,677 @@
+/* Copyright (C) 1989, 1997, 1998, 1999 Aladdin Enterprises. All rights reserved. */
+
+/* Convert ANSI C function definitions to K&R ("traditional C") syntax */
+
+/*
+ansi2knr is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY. No author or distributor accepts responsibility to anyone for the
+consequences of using it or for whether it serves any particular purpose or
+works at all, unless he says so in writing. Refer to the GNU General Public
+License (the "GPL") for full details.
+
+Everyone is granted permission to copy, modify and redistribute ansi2knr,
+but only under the conditions described in the GPL. A copy of this license
+is supposed to have been given to you along with ansi2knr so you can know
+your rights and responsibilities. It should be in a file named COPYLEFT,
+or, if there is no file named COPYLEFT, a file named COPYING. Among other
+things, the copyright notice and this notice must be preserved on all
+copies.
+
+We explicitly state here what we believe is already implied by the GPL: if
+the ansi2knr program is distributed as a separate set of sources and a
+separate executable file which are aggregated on a storage medium together
+with another program, this in itself does not bring the other program under
+the GPL, nor does the mere fact that such a program or the procedures for
+constructing it invoke the ansi2knr executable bring any other part of the
+program under the GPL.
+*/
+
+/*
+ * Usage:
+ ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]
+ * --filename provides the file name for the #line directive in the output,
+ * overriding input_file (if present).
+ * If no input_file is supplied, input is read from stdin.
+ * If no output_file is supplied, output goes to stdout.
+ * There are no error messages.
+ *
+ * ansi2knr recognizes function definitions by seeing a non-keyword
+ * identifier at the left margin, followed by a left parenthesis,
+ * with a right parenthesis as the last character on the line,
+ * and with a left brace as the first token on the following line
+ * (ignoring possible intervening comments), except that a line
+ * consisting of only
+ * identifier1(identifier2)
+ * will not be considered a function definition unless identifier2 is
+ * the word "void", and a line consisting of
+ * identifier1(identifier2, <<arbitrary>>)
+ * will not be considered a function definition.
+ * ansi2knr will recognize a multi-line header provided
+ * that no intervening line ends with a left or right brace or a semicolon.
+ * These algorithms ignore whitespace and comments, except that
+ * the function name must be the first thing on the line.
+ * The following constructs will confuse it:
+ * - Any other construct that starts at the left margin and
+ * follows the above syntax (such as a macro or function call).
+ * - Some macros that tinker with the syntax of function headers.
+ */
+
+/*
+ * The original and principal author of ansi2knr is L. Peter Deutsch
+ * <ghost@aladdin.com>. Other authors are noted in the change history
+ * that follows (in reverse chronological order):
+ lpd 1999-04-12 added minor fixes from Pavel Roskin
+ <pavel_roskin@geocities.com> for clean compilation with
+ gcc -W -Wall
+ lpd 1999-03-22 added hack to recognize lines consisting of
+ identifier1(identifier2, xxx) as *not* being procedures
+ lpd 1999-02-03 made indentation of preprocessor commands consistent
+ lpd 1999-01-28 fixed two bugs: a '/' in an argument list caused an
+ endless loop; quoted strings within an argument list
+ confused the parser
+ lpd 1999-01-24 added a check for write errors on the output,
+ suggested by Jim Meyering <meyering@ascend.com>
+ lpd 1998-11-09 added further hack to recognize identifier(void)
+ as being a procedure
+ lpd 1998-10-23 added hack to recognize lines consisting of
+ identifier1(identifier2) as *not* being procedures
+ lpd 1997-12-08 made input_file optional; only closes input and/or
+ output file if not stdin or stdout respectively; prints
+ usage message on stderr rather than stdout; adds
+ --filename switch (changes suggested by
+ <ceder@lysator.liu.se>)
+ lpd 1996-01-21 added code to cope with not HAVE_CONFIG_H and with
+ compilers that don't understand void, as suggested by
+ Tom Lane
+ lpd 1996-01-15 changed to require that the first non-comment token
+ on the line following a function header be a left brace,
+ to reduce sensitivity to macros, as suggested by Tom Lane
+ <tgl@sss.pgh.pa.us>
+ lpd 1995-06-22 removed #ifndefs whose sole purpose was to define
+ undefined preprocessor symbols as 0; changed all #ifdefs
+ for configuration symbols to #ifs
+ lpd 1995-04-05 changed copyright notice to make it clear that
+ including ansi2knr in a program does not bring the entire
+ program under the GPL
+ lpd 1994-12-18 added conditionals for systems where ctype macros
+ don't handle 8-bit characters properly, suggested by
+ Francois Pinard <pinard@iro.umontreal.ca>;
+ removed --varargs switch (this is now the default)
+ lpd 1994-10-10 removed CONFIG_BROKETS conditional
+ lpd 1994-07-16 added some conditionals to help GNU `configure',
+ suggested by Francois Pinard <pinard@iro.umontreal.ca>;
+ properly erase prototype args in function parameters,
+ contributed by Jim Avera <jima@netcom.com>;
+ correct error in writeblanks (it shouldn't erase EOLs)
+ lpd 1989-xx-xx original version
+ */
+
+/* Most of the conditionals here are to make ansi2knr work with */
+/* or without the GNU configure machinery. */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdio.h>
+#include <ctype.h>
+
+#if HAVE_CONFIG_H
+
+/*
+ For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h).
+ This will define HAVE_CONFIG_H and so, activate the following lines.
+ */
+
+# if STDC_HEADERS || HAVE_STRING_H
+# include <string.h>
+# else
+# include <strings.h>
+# endif
+
+#else /* not HAVE_CONFIG_H */
+
+/* Otherwise do it the hard way */
+
+# ifdef BSD
+# include <strings.h>
+# else
+# ifdef VMS
+ extern int strlen(), strncmp();
+# else
+# include <string.h>
+# endif
+# endif
+
+#endif /* not HAVE_CONFIG_H */
+
+#if STDC_HEADERS
+# include <stdlib.h>
+#else
+/*
+ malloc and free should be declared in stdlib.h,
+ but if you've got a K&R compiler, they probably aren't.
+ */
+# ifdef MSDOS
+# include <malloc.h>
+# else
+# ifdef VMS
+ extern char *malloc();
+ extern void free();
+# else
+ extern char *malloc();
+ extern int free();
+# endif
+# endif
+
+#endif
+
+/* Define NULL (for *very* old compilers). */
+#ifndef NULL
+# define NULL (0)
+#endif
+
+/*
+ * The ctype macros don't always handle 8-bit characters correctly.
+ * Compensate for this here.
+ */
+#ifdef isascii
+# undef HAVE_ISASCII /* just in case */
+# define HAVE_ISASCII 1
+#else
+#endif
+#if STDC_HEADERS || !HAVE_ISASCII
+# define is_ascii(c) 1
+#else
+# define is_ascii(c) isascii(c)
+#endif
+
+#define is_space(c) (is_ascii(c) && isspace(c))
+#define is_alpha(c) (is_ascii(c) && isalpha(c))
+#define is_alnum(c) (is_ascii(c) && isalnum(c))
+
+/* Scanning macros */
+#define isidchar(ch) (is_alnum(ch) || (ch) == '_')
+#define isidfirstchar(ch) (is_alpha(ch) || (ch) == '_')
+
+/* Forward references */
+char *skipspace();
+char *scanstring();
+int writeblanks();
+int test1();
+int convert1();
+
+/* The main program */
+int
+main(argc, argv)
+ int argc;
+ char *argv[];
+{ FILE *in = stdin;
+ FILE *out = stdout;
+ char *filename = 0;
+ char *program_name = argv[0];
+ char *output_name = 0;
+#define bufsize 5000 /* arbitrary size */
+ char *buf;
+ char *line;
+ char *more;
+ char *usage =
+ "Usage: ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]\n";
+ /*
+ * In previous versions, ansi2knr recognized a --varargs switch.
+ * If this switch was supplied, ansi2knr would attempt to convert
+ * a ... argument to va_alist and va_dcl; if this switch was not
+ * supplied, ansi2knr would simply drop any such arguments.
+ * Now, ansi2knr always does this conversion, and we only
+ * check for this switch for backward compatibility.
+ */
+ int convert_varargs = 1;
+ int output_error;
+
+ while ( argc > 1 && argv[1][0] == '-' ) {
+ if ( !strcmp(argv[1], "--varargs") ) {
+ convert_varargs = 1;
+ argc--;
+ argv++;
+ continue;
+ }
+ if ( !strcmp(argv[1], "--filename") && argc > 2 ) {
+ filename = argv[2];
+ argc -= 2;
+ argv += 2;
+ continue;
+ }
+ fprintf(stderr, "%s: Unrecognized switch: %s\n", program_name,
+ argv[1]);
+ fprintf(stderr, usage);
+ exit(1);
+ }
+ switch ( argc )
+ {
+ default:
+ fprintf(stderr, usage);
+ exit(0);
+ case 3:
+ output_name = argv[2];
+ out = fopen(output_name, "w");
+ if ( out == NULL ) {
+ fprintf(stderr, "%s: Cannot open output file %s\n",
+ program_name, output_name);
+ exit(1);
+ }
+ /* falls through */
+ case 2:
+ in = fopen(argv[1], "r");
+ if ( in == NULL ) {
+ fprintf(stderr, "%s: Cannot open input file %s\n",
+ program_name, argv[1]);
+ exit(1);
+ }
+ if ( filename == 0 )
+ filename = argv[1];
+ /* falls through */
+ case 1:
+ break;
+ }
+ if ( filename )
+ fprintf(out, "#line 1 \"%s\"\n", filename);
+ buf = malloc(bufsize);
+ if ( buf == NULL )
+ {
+ fprintf(stderr, "Unable to allocate read buffer!\n");
+ exit(1);
+ }
+ line = buf;
+ while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL )
+ {
+test: line += strlen(line);
+ switch ( test1(buf) )
+ {
+ case 2: /* a function header */
+ convert1(buf, out, 1, convert_varargs);
+ break;
+ case 1: /* a function */
+ /* Check for a { at the start of the next line. */
+ more = ++line;
+f: if ( line >= buf + (bufsize - 1) ) /* overflow check */
+ goto wl;
+ if ( fgets(line, (unsigned)(buf + bufsize - line), in) == NULL )
+ goto wl;
+ switch ( *skipspace(more, 1) )
+ {
+ case '{':
+ /* Definitely a function header. */
+ convert1(buf, out, 0, convert_varargs);
+ fputs(more, out);
+ break;
+ case 0:
+ /* The next line was blank or a comment: */
+ /* keep scanning for a non-comment. */
+ line += strlen(line);
+ goto f;
+ default:
+ /* buf isn't a function header, but */
+ /* more might be. */
+ fputs(buf, out);
+ strcpy(buf, more);
+ line = buf;
+ goto test;
+ }
+ break;
+ case -1: /* maybe the start of a function */
+ if ( line != buf + (bufsize - 1) ) /* overflow check */
+ continue;
+ /* falls through */
+ default: /* not a function */
+wl: fputs(buf, out);
+ break;
+ }
+ line = buf;
+ }
+ if ( line != buf )
+ fputs(buf, out);
+ free(buf);
+ if ( output_name ) {
+ output_error = ferror(out);
+ output_error |= fclose(out);
+ } else { /* out == stdout */
+ fflush(out);
+ output_error = ferror(out);
+ }
+ if ( output_error ) {
+ fprintf(stderr, "%s: error writing to %s\n", program_name,
+ (output_name ? output_name : "stdout"));
+ exit(1);
+ }
+ if ( in != stdin )
+ fclose(in);
+ return 0;
+}
+
+/* Skip over whitespace and comments, in either direction. */
+char *
+skipspace(p, dir)
+ register char *p;
+ register int dir; /* 1 for forward, -1 for backward */
+{ for ( ; ; )
+ { while ( is_space(*p) )
+ p += dir;
+ if ( !(*p == '/' && p[dir] == '*') )
+ break;
+ p += dir; p += dir;
+ while ( !(*p == '*' && p[dir] == '/') )
+ { if ( *p == 0 )
+ return p; /* multi-line comment?? */
+ p += dir;
+ }
+ p += dir; p += dir;
+ }
+ return p;
+}
+
+/* Scan over a quoted string, in either direction. */
+char *
+scanstring(p, dir)
+ register char *p;
+ register int dir;
+{
+ for (p += dir; ; p += dir)
+ if (*p == '"' && p[-dir] != '\\')
+ return p + dir;
+}
+
+/*
+ * Write blanks over part of a string.
+ * Don't overwrite end-of-line characters.
+ */
+int
+writeblanks(start, end)
+ char *start;
+ char *end;
+{ char *p;
+ for ( p = start; p < end; p++ )
+ if ( *p != '\r' && *p != '\n' )
+ *p = ' ';
+ return 0;
+}
+
+/*
+ * Test whether the string in buf is a function definition.
+ * The string may contain and/or end with a newline.
+ * Return as follows:
+ * 0 - definitely not a function definition;
+ * 1 - definitely a function definition;
+ * 2 - definitely a function prototype (NOT USED);
+ * -1 - may be the beginning of a function definition,
+ * append another line and look again.
+ * The reason we don't attempt to convert function prototypes is that
+ * Ghostscript's declaration-generating macros look too much like
+ * prototypes, and confuse the algorithms.
+ */
+int
+test1(buf)
+ char *buf;
+{ register char *p = buf;
+ char *bend;
+ char *endfn;
+ int contin;
+
+ if ( !isidfirstchar(*p) )
+ return 0; /* no name at left margin */
+ bend = skipspace(buf + strlen(buf) - 1, -1);
+ switch ( *bend )
+ {
+ case ';': contin = 0 /*2*/; break;
+ case ')': contin = 1; break;
+ case '{': return 0; /* not a function */
+ case '}': return 0; /* not a function */
+ default: contin = -1;
+ }
+ while ( isidchar(*p) )
+ p++;
+ endfn = p;
+ p = skipspace(p, 1);
+ if ( *p++ != '(' )
+ return 0; /* not a function */
+ p = skipspace(p, 1);
+ if ( *p == ')' )
+ return 0; /* no parameters */
+ /* Check that the apparent function name isn't a keyword. */
+ /* We only need to check for keywords that could be followed */
+ /* by a left parenthesis (which, unfortunately, is most of them). */
+ { static char *words[] =
+ { "asm", "auto", "case", "char", "const", "double",
+ "extern", "float", "for", "if", "int", "long",
+ "register", "return", "short", "signed", "sizeof",
+ "static", "switch", "typedef", "unsigned",
+ "void", "volatile", "while", 0
+ };
+ char **key = words;
+ char *kp;
+ unsigned len = endfn - buf;
+
+ while ( (kp = *key) != 0 )
+ { if ( strlen(kp) == len && !strncmp(kp, buf, len) )
+ return 0; /* name is a keyword */
+ key++;
+ }
+ }
+ {
+ char *id = p;
+ int len;
+ /*
+ * Check for identifier1(identifier2) and not
+ * identifier1(void), or identifier1(identifier2, xxxx).
+ */
+
+ while ( isidchar(*p) )
+ p++;
+ len = p - id;
+ p = skipspace(p, 1);
+ if (*p == ',' ||
+ (*p == ')' && (len != 4 || strncmp(id, "void", 4)))
+ )
+ return 0; /* not a function */
+ }
+ /*
+ * If the last significant character was a ), we need to count
+ * parentheses, because it might be part of a formal parameter
+ * that is a procedure.
+ */
+ if (contin > 0) {
+ int level = 0;
+
+ for (p = skipspace(buf, 1); *p; p = skipspace(p + 1, 1))
+ level += (*p == '(' ? 1 : *p == ')' ? -1 : 0);
+ if (level > 0)
+ contin = -1;
+ }
+ return contin;
+}
+
+/* Convert a recognized function definition or header to K&R syntax. */
+int
+convert1(buf, out, header, convert_varargs)
+ char *buf;
+ FILE *out;
+ int header; /* Boolean */
+ int convert_varargs; /* Boolean */
+{ char *endfn;
+ register char *p;
+ /*
+ * The breaks table contains pointers to the beginning and end
+ * of each argument.
+ */
+ char **breaks;
+ unsigned num_breaks = 2; /* for testing */
+ char **btop;
+ char **bp;
+ char **ap;
+ char *vararg = 0;
+
+ /* Pre-ANSI implementations don't agree on whether strchr */
+ /* is called strchr or index, so we open-code it here. */
+ for ( endfn = buf; *(endfn++) != '('; )
+ ;
+top: p = endfn;
+ breaks = (char **)malloc(sizeof(char *) * num_breaks * 2);
+ if ( breaks == NULL )
+ { /* Couldn't allocate break table, give up */
+ fprintf(stderr, "Unable to allocate break table!\n");
+ fputs(buf, out);
+ return -1;
+ }
+ btop = breaks + num_breaks * 2 - 2;
+ bp = breaks;
+ /* Parse the argument list */
+ do
+ { int level = 0;
+ char *lp = NULL;
+ char *rp = NULL;
+ char *end = NULL;
+
+ if ( bp >= btop )
+ { /* Filled up break table. */
+ /* Allocate a bigger one and start over. */
+ free((char *)breaks);
+ num_breaks <<= 1;
+ goto top;
+ }
+ *bp++ = p;
+ /* Find the end of the argument */
+ for ( ; end == NULL; p++ )
+ { switch(*p)
+ {
+ case ',':
+ if ( !level ) end = p;
+ break;
+ case '(':
+ if ( !level ) lp = p;
+ level++;
+ break;
+ case ')':
+ if ( --level < 0 ) end = p;
+ else rp = p;
+ break;
+ case '/':
+ if (p[1] == '*')
+ p = skipspace(p, 1) - 1;
+ break;
+ case '"':
+ p = scanstring(p, 1) - 1;
+ break;
+ default:
+ ;
+ }
+ }
+ /* Erase any embedded prototype parameters. */
+ if ( lp && rp )
+ writeblanks(lp + 1, rp);
+ p--; /* back up over terminator */
+ /* Find the name being declared. */
+ /* This is complicated because of procedure and */
+ /* array modifiers. */
+ for ( ; ; )
+ { p = skipspace(p - 1, -1);
+ switch ( *p )
+ {
+ case ']': /* skip array dimension(s) */
+ case ')': /* skip procedure args OR name */
+ { int level = 1;
+ while ( level )
+ switch ( *--p )
+ {
+ case ']': case ')':
+ level++;
+ break;
+ case '[': case '(':
+ level--;
+ break;
+ case '/':
+ if (p > buf && p[-1] == '*')
+ p = skipspace(p, -1) + 1;
+ break;
+ case '"':
+ p = scanstring(p, -1) + 1;
+ break;
+ default: ;
+ }
+ }
+ if ( *p == '(' && *skipspace(p + 1, 1) == '*' )
+ { /* We found the name being declared */
+ while ( !isidfirstchar(*p) )
+ p = skipspace(p, 1) + 1;
+ goto found;
+ }
+ break;
+ default:
+ goto found;
+ }
+ }
+found: if ( *p == '.' && p[-1] == '.' && p[-2] == '.' )
+ { if ( convert_varargs )
+ { *bp++ = "va_alist";
+ vararg = p-2;
+ }
+ else
+ { p++;
+ if ( bp == breaks + 1 ) /* sole argument */
+ writeblanks(breaks[0], p);
+ else
+ writeblanks(bp[-1] - 1, p);
+ bp--;
+ }
+ }
+ else
+ { while ( isidchar(*p) ) p--;
+ *bp++ = p+1;
+ }
+ p = end;
+ }
+ while ( *p++ == ',' );
+ *bp = p;
+ /* Make a special check for 'void' arglist */
+ if ( bp == breaks+2 )
+ { p = skipspace(breaks[0], 1);
+ if ( !strncmp(p, "void", 4) )
+ { p = skipspace(p+4, 1);
+ if ( p == breaks[2] - 1 )
+ { bp = breaks; /* yup, pretend arglist is empty */
+ writeblanks(breaks[0], p + 1);
+ }
+ }
+ }
+ /* Put out the function name and left parenthesis. */
+ p = buf;
+ while ( p != endfn ) putc(*p, out), p++;
+ /* Put out the declaration. */
+ if ( header )
+ { fputs(");", out);
+ for ( p = breaks[0]; *p; p++ )
+ if ( *p == '\r' || *p == '\n' )
+ putc(*p, out);
+ }
+ else
+ { for ( ap = breaks+1; ap < bp; ap += 2 )
+ { p = *ap;
+ while ( isidchar(*p) )
+ putc(*p, out), p++;
+ if ( ap < bp - 1 )
+ fputs(", ", out);
+ }
+ fputs(") ", out);
+ /* Put out the argument declarations */
+ for ( ap = breaks+2; ap <= bp; ap += 2 )
+ (*ap)[-1] = ';';
+ if ( vararg != 0 )
+ { *vararg = 0;
+ fputs(breaks[0], out); /* any prior args */
+ fputs("va_dcl", out); /* the final arg */
+ fputs(bp[0], out);
+ }
+ else
+ fputs(breaks[0], out);
+ }
+ free((char *)breaks);
+ return 0;
+}
diff --git a/rts/gmp/assert.c b/rts/gmp/assert.c
new file mode 100644
index 0000000000..65eccfa30b
--- /dev/null
+++ b/rts/gmp/assert.c
@@ -0,0 +1,52 @@
+/* GMP assertion failure handler. */
+
+/*
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+int
+#if __STDC__
+__gmp_assert_fail (const char *filename, int linenum,
+ const char *expr)
+#else
+__gmp_assert_fail (filename, linenum, expr)
+char *filename;
+int linenum;
+char *expr;
+#endif
+{
+ if (filename != NULL && filename[0] != '\0')
+ {
+ fprintf (stderr, "%s:", filename);
+ if (linenum != -1)
+ fprintf (stderr, "%d: ", linenum);
+ }
+
+ fprintf (stderr, "GNU MP assertion failed: %s\n", expr);
+ abort();
+
+ /*NOTREACHED*/
+ return 0;
+}
diff --git a/rts/gmp/compat.c b/rts/gmp/compat.c
new file mode 100644
index 0000000000..ab7529f52f
--- /dev/null
+++ b/rts/gmp/compat.c
@@ -0,0 +1,46 @@
+/* Old function entrypoints retained for binary compatibility. */
+
+/*
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* mpn_divexact_by3 was a function in gmp 3.0, but as of gmp 3.1 it's a
+ macro calling mpn_divexact_by3c. */
+int
+__MPN (divexact_by3) (mp_ptr dst, mp_srcptr src, mp_size_t size)
+{
+ mpn_divexact_by3 (dst, src, size);
+}
+
+
+/* mpn_divmod_1 was a function in gmp 3.0 and earlier, but marked obsolete
+ in gmp 2 and 3. As of gmp 3.1 it's a macro calling mpn_divrem_1. */
+int
+__MPN (divmod_1) (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
+{
+ mpn_divmod_1 (dst, src, size, divisor);
+}
+
+
diff --git a/rts/gmp/config.guess b/rts/gmp/config.guess
new file mode 100644
index 0000000000..08018f497d
--- /dev/null
+++ b/rts/gmp/config.guess
@@ -0,0 +1,1373 @@
+#! /bin/sh
+# Attempt to guess a canonical system name.
+#
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000
+# Free Software Foundation, Inc.
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Written by Per Bothner <bothner@cygnus.com>.
+# Please send patches to <config-patches@gnu.org>.
+#
+# This script attempts to guess a canonical system name similar to
+# config.sub. If it succeeds, it prints the system name on stdout, and
+# exits with 0. Otherwise, it exits with 1.
+#
+# The plan is that this can be called by configure scripts if you
+# don't specify an explicit system type (host/target name).
+#
+# Only a few systems have been added to this list; please add others
+# (but try to keep the structure clean).
+#
+
+
+# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
+# (ghazi@noc.rutgers.edu 8/24/94.)
+if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
+ PATH=$PATH:/.attbin ; export PATH
+fi
+
+UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
+UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
+UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
+UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
+
+dummy=dummy-$$
+trap 'rm -f $dummy.c $dummy.o $dummy ${dummy}1.s ${dummy}2.c ; exit 1' 1 2 15
+
+# Use $HOST_CC if defined. $CC may point to a cross-compiler
+if test x"$CC_FOR_BUILD" = x; then
+ if test x"$HOST_CC" != x; then
+ CC_FOR_BUILD="$HOST_CC"
+ else
+ if test x"$CC" != x; then
+ CC_FOR_BUILD="$CC"
+ else
+ echo 'dummy(){}' >$dummy.c
+ for c in cc c89 gcc; do
+ ($c $dummy.c -c) >/dev/null 2>&1
+ if test $? = 0; then
+ CC_FOR_BUILD="$c"; break
+ fi
+ done
+ rm -f $dummy.c $dummy.o
+ if test x"$CC_FOR_BUILD" = x; then
+ CC_FOR_BUILD=no_compiler_found
+ fi
+ fi
+ fi
+fi
+
+
+# First make a best effort at recognizing x86 CPU type and leave it in X86CPU.
+# If we fail, set X86CPU to UNAME_MACHINE
+#
+# DJGPP v2 (or 2.03 at least) always gives "pc" for uname -m, and the
+# OEM for uname -s. Eg. pc:MS-DOS:6:2 on MS-DOS 6.21. The list of
+# possible OEMs is in src/libc/dos/dos/getdos_v.c of djlsr203.zip, but
+# just pc:*:*:* seems ok.
+
+case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
+ i?86:*:*:* | i86pc:*:*:* | pc:*:*:*)
+ case "${UNAME_MACHINE}" in
+ i86pc | pc) UNAME_MACHINE=i386 ;;
+ esac
+ cat <<EOF >${dummy}1.s
+ .globl cpuid
+ .globl _cpuid
+cpuid:
+_cpuid:
+ pushl %esi
+ pushl %ebx
+ movl 16(%esp),%eax
+ .byte 0x0f
+ .byte 0xa2
+ movl 12(%esp),%esi
+ movl %ebx,(%esi)
+ movl %edx,4(%esi)
+ movl %ecx,8(%esi)
+ popl %ebx
+ popl %esi
+ ret
+EOF
+ cat <<EOF >${dummy}2.c
+main ()
+{
+ char vendor_string[13];
+ char dummy_string[12];
+ long fms;
+ int family, model;
+ char *modelstr;
+
+ cpuid (vendor_string, 0);
+ vendor_string[12] = 0;
+
+ fms = cpuid (dummy_string, 1);
+
+ family = (fms >> 8) & 15;
+ model = (fms >> 4) & 15;
+
+ modelstr = "i486";
+ if (strcmp (vendor_string, "GenuineIntel") == 0)
+ {
+ switch (family)
+ {
+ case 5:
+ if (model <= 2)
+ modelstr = "pentium";
+ else if (model >= 4)
+ modelstr = "pentiummmx";
+ break;
+ case 6:
+ if (model == 1)
+ modelstr = "pentiumpro";
+ else if (model <= 6)
+ modelstr = "pentium2";
+ else
+ modelstr = "pentium3";
+ break;
+ }
+ }
+ else if (strcmp (vendor_string, "AuthenticAMD") == 0)
+ {
+ switch (family)
+ {
+ case 5:
+ if (model <= 3)
+ modelstr = "k5";
+ else if (model <= 7)
+ modelstr = "k6";
+ else if (model <= 8)
+ modelstr = "k62";
+ else if (model <= 9)
+ modelstr = "k63";
+ break;
+ case 6:
+ modelstr = "athlon";
+ break;
+ }
+ }
+ else if (strcmp (vendor_string, "CyrixInstead") == 0)
+ {
+ /* Should recognize Cyrix' processors too. */
+ }
+
+ printf ("%s\n", modelstr);
+ return 0;
+}
+EOF
+ $CC_FOR_BUILD ${dummy}1.s ${dummy}2.c -o $dummy >/dev/null 2>&1
+ if test "$?" = 0 ; then
+ X86CPU=`./$dummy`
+ fi
+
+
+ # Default to believing uname -m if the program fails to compile or
+ # run. Will fail to run on 386 since cpuid was only added on 486.
+ if test -z "$X86CPU"
+ then
+ X86CPU="$UNAME_MACHINE"
+ fi
+ rm -f ${dummy}1.s ${dummy}2.c $dummy
+ ;;
+esac
+
+# Note: order is significant - the case branches are not exclusive.
+
+case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
+ alpha:OSF1:*:*)
+ if test $UNAME_RELEASE = "V4.0"; then
+ UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
+ fi
+ # A Vn.n version is a released version.
+ # A Tn.n version is a released field test version.
+ # A Xn.n version is an unreleased experimental baselevel.
+ # 1.2 uses "1.2" for uname -r.
+ cat <<EOF >$dummy.s
+ .data
+\$Lformat:
+ .byte 37,100,45,37,120,10,0 # "%d-%x\n"
+
+ .text
+ .globl main
+ .align 4
+ .ent main
+main:
+ .frame \$30,16,\$26,0
+ ldgp \$29,0(\$27)
+ .prologue 1
+ .long 0x47e03d80 # implver \$0
+ lda \$2,-1
+ .long 0x47e20c21 # amask \$2,\$1
+ lda \$16,\$Lformat
+ mov \$0,\$17
+ not \$1,\$18
+ jsr \$26,printf
+ ldgp \$29,0(\$26)
+ mov 0,\$16
+ jsr \$26,exit
+ .end main
+EOF
+ $CC_FOR_BUILD $dummy.s -o $dummy 2>/dev/null
+ if test "$?" = 0 ; then
+ case `./$dummy` in
+ 0-0)
+ UNAME_MACHINE="alpha"
+ ;;
+ 1-0)
+ UNAME_MACHINE="alphaev5"
+ ;;
+ 1-1)
+ UNAME_MACHINE="alphaev56"
+ ;;
+ 1-101)
+ UNAME_MACHINE="alphapca56"
+ ;;
+ 2-303)
+ UNAME_MACHINE="alphaev6"
+ ;;
+ 2-307)
+ UNAME_MACHINE="alphaev67"
+ ;;
+ esac
+ fi
+ rm -f $dummy.s $dummy
+ echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[VTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+ exit 0 ;;
+ alpha:NetBSD:*:* | alpha:FreeBSD:*:*)
+ cat <<EOF >$dummy.s
+ .globl main
+ .ent main
+main:
+ .frame \$30,0,\$26,0
+ .prologue 0
+ .long 0x47e03d80 # implver $0
+ lda \$2,259
+ .long 0x47e20c21 # amask $2,$1
+ srl \$1,8,\$2
+ sll \$2,2,\$2
+ sll \$0,3,\$0
+ addl \$1,\$0,\$0
+ addl \$2,\$0,\$0
+ ret \$31,(\$26),1
+ .end main
+EOF
+ $CC_FOR_BUILD $dummy.s -o $dummy 2>/dev/null
+ if test "$?" = 0 ; then
+ ./$dummy
+ case "$?" in
+ 7)
+ UNAME_MACHINE="alpha"
+ ;;
+ 15)
+ UNAME_MACHINE="alphaev5"
+ ;;
+ 14)
+ UNAME_MACHINE="alphaev56"
+ ;;
+ 10)
+ UNAME_MACHINE="alphapca56"
+ ;;
+ 16)
+ UNAME_MACHINE="alphaev6"
+ ;;
+ esac
+ fi
+ rm -f $dummy.s $dummy
+ echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM}${UNAME_RELEASE} | sed -e 's/^[VTX]//' -e 's/[-(].*//' | tr [[A-Z]] [[a-z]]`
+ exit 0 ;;
+ Alpha\ *:Windows_NT*:*)
+ # How do we know it's Interix rather than the generic POSIX subsystem?
+ # Should we change UNAME_MACHINE based on the output of uname instead
+ # of the specific Alpha model?
+ echo alpha-pc-interix
+ exit 0 ;;
+ 21064:Windows_NT:50:3)
+ echo alpha-dec-winnt3.5
+ exit 0 ;;
+ Amiga*:UNIX_System_V:4.0:*)
+ echo m68k-cbm-sysv4
+ exit 0;;
+ amiga:NetBSD:*:*)
+ echo m68k-cbm-netbsd${UNAME_RELEASE}
+ exit 0 ;;
+ amiga:OpenBSD:*:*)
+ echo m68k-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ *:[Aa]miga[Oo][Ss]:*:*)
+ echo ${UNAME_MACHINE}-unknown-amigaos
+ exit 0 ;;
+ arc64:OpenBSD:*:*)
+ echo mips64el-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ arc:OpenBSD:*:*)
+ echo mipsel-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ hkmips:OpenBSD:*:*)
+ echo mips-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ pmax:OpenBSD:*:*)
+ echo mipsel-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ sgi:OpenBSD:*:*)
+ echo mips-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ wgrisc:OpenBSD:*:*)
+ echo mipsel-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ *:OS/390:*:*)
+ echo i370-ibm-openedition
+ exit 0 ;;
+ arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
+ echo arm-acorn-riscix${UNAME_RELEASE}
+ exit 0;;
+ arm32:NetBSD:*:*)
+ echo arm-unknown-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+ exit 0 ;;
+ SR2?01:HI-UX/MPP:*:*)
+ echo hppa1.1-hitachi-hiuxmpp
+ exit 0;;
+ Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
+ # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
+ if test "`(/bin/universe) 2>/dev/null`" = att ; then
+ echo pyramid-pyramid-sysv3
+ else
+ echo pyramid-pyramid-bsd
+ fi
+ exit 0 ;;
+ NILE*:*:*:dcosx)
+ echo pyramid-pyramid-svr4
+ exit 0 ;;
+ sun4H:SunOS:5.*:*)
+ echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit 0 ;;
+ sun4[md]:SunOS:5.*:*)
+ echo sparcv8-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit 0 ;;
+ sun4u:SunOS:5.*:*)
+ echo sparcv9-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit 0 ;;
+ sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
+ echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit 0 ;;
+ i386:SunOS:5.*:*)
+ echo ${X86CPU}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit 0 ;;
+ sun4[md]:SunOS:*:*)
+ case "`/usr/bin/arch -k`" in
+ Series*|S4*)
+ UNAME_RELEASE=`uname -v`
+ ;;
+ esac
+ # Japanese Language versions have a version number like `4.1.3-JL'.
+ echo sparcv8-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
+ exit 0 ;;
+ sun4*:SunOS:*:*)
+ case "`/usr/bin/arch -k`" in
+ Series*|S4*)
+ UNAME_RELEASE=`uname -v`
+ ;;
+ esac
+ # Japanese Language versions have a version number like `4.1.3-JL'.
+ echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
+ exit 0 ;;
+ sun3*:SunOS:*:*)
+ echo m68k-sun-sunos${UNAME_RELEASE}
+ exit 0 ;;
+ sun*:*:4.2BSD:*)
+ UNAME_RELEASE=`(head -1 /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
+ test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
+ case "`/bin/arch`" in
+ sun3)
+ echo m68k-sun-sunos${UNAME_RELEASE}
+ ;;
+ sun4)
+ echo sparc-sun-sunos${UNAME_RELEASE}
+ ;;
+ esac
+ exit 0 ;;
+ aushp:SunOS:*:*)
+ echo sparc-auspex-sunos${UNAME_RELEASE}
+ exit 0 ;;
+ atari*:NetBSD:*:*)
+ echo m68k-atari-netbsd${UNAME_RELEASE}
+ exit 0 ;;
+ atari*:OpenBSD:*:*)
+ echo m68k-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ # The situation for MiNT is a little confusing. The machine name
+ # can be virtually everything (everything which is not
+ # "atarist" or "atariste" at least should have a processor
+ # > m68000). The system name ranges from "MiNT" over "FreeMiNT"
+ # to the lowercase version "mint" (or "freemint"). Finally
+ # the system name "TOS" denotes a system which is actually not
+ # MiNT. But MiNT is downward compatible to TOS, so this should
+ # be no problem.
+ atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
+ echo m68k-atari-mint${UNAME_RELEASE}
+ exit 0 ;;
+ atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
+ echo m68k-atari-mint${UNAME_RELEASE}
+ exit 0 ;;
+ *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
+ echo m68k-atari-mint${UNAME_RELEASE}
+ exit 0 ;;
+ milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
+ echo m68k-milan-mint${UNAME_RELEASE}
+ exit 0 ;;
+ hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
+ echo m68k-hades-mint${UNAME_RELEASE}
+ exit 0 ;;
+ *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
+ echo m68k-unknown-mint${UNAME_RELEASE}
+ exit 0 ;;
+ sun3*:NetBSD:*:*)
+ echo m68k-sun-netbsd${UNAME_RELEASE}
+ exit 0 ;;
+ sun3*:OpenBSD:*:*)
+ echo m68k-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ mac68k:NetBSD:*:*)
+ echo m68k-apple-netbsd${UNAME_RELEASE}
+ exit 0 ;;
+ mac68k:OpenBSD:*:*)
+ echo m68k-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ macppc:NetBSD:*:*)
+ echo powerpc-apple-netbsd${UNAME_RELEASE}
+ exit 0 ;;
+ mvme68k:OpenBSD:*:*)
+ echo m68k-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ mvme88k:OpenBSD:*:*)
+ echo m88k-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ powerpc:machten:*:*)
+ echo powerpc-apple-machten${UNAME_RELEASE}
+ exit 0 ;;
+ RISC*:Mach:*:*)
+ echo mips-dec-mach_bsd4.3
+ exit 0 ;;
+ RISC*:ULTRIX:*:*)
+ echo mips-dec-ultrix${UNAME_RELEASE}
+ exit 0 ;;
+ VAX*:ULTRIX*:*:*)
+ echo vax-dec-ultrix${UNAME_RELEASE}
+ exit 0 ;;
+ 2020:CLIX:*:* | 2430:CLIX:*:*)
+ echo clipper-intergraph-clix${UNAME_RELEASE}
+ exit 0 ;;
+ mips:*:*:UMIPS | mips:*:*:RISCos)
+ sed 's/^ //' << EOF >$dummy.c
+#ifdef __cplusplus
+#include <stdio.h> /* for printf() prototype */
+ int main (int argc, char *argv[]) {
+#else
+ int main (argc, argv) int argc; char *argv[]; {
+#endif
+ #if defined (host_mips) && defined (MIPSEB)
+ #if defined (SYSTYPE_SYSV)
+ printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
+ #endif
+ #if defined (SYSTYPE_SVR4)
+ printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
+ #endif
+ #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
+ printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
+ #endif
+ #endif
+ exit (-1);
+ }
+EOF
+ $CC_FOR_BUILD $dummy.c -o $dummy \
+ && ./$dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \
+ && rm $dummy.c $dummy && exit 0
+ rm -f $dummy.c $dummy
+ echo mips-mips-riscos${UNAME_RELEASE}
+ exit 0 ;;
+ Night_Hawk:Power_UNIX:*:*)
+ echo powerpc-harris-powerunix
+ exit 0 ;;
+ m88k:CX/UX:7*:*)
+ echo m88k-harris-cxux7
+ exit 0 ;;
+ m88k:*:4*:R4*)
+ echo m88k-motorola-sysv4
+ exit 0 ;;
+ m88k:*:3*:R3*)
+ echo m88k-motorola-sysv3
+ exit 0 ;;
+ AViiON:dgux:*:*)
+ # DG/UX returns AViiON for all architectures
+ UNAME_PROCESSOR=`/usr/bin/uname -p`
+ if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
+ then
+ if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
+ [ ${TARGET_BINARY_INTERFACE}x = x ]
+ then
+ echo m88k-dg-dgux${UNAME_RELEASE}
+ else
+ echo m88k-dg-dguxbcs${UNAME_RELEASE}
+ fi
+ else
+ echo i586-dg-dgux${UNAME_RELEASE}
+ fi
+ exit 0 ;;
+ M88*:DolphinOS:*:*) # DolphinOS (SVR3)
+ echo m88k-dolphin-sysv3
+ exit 0 ;;
+ M88*:*:R3*:*)
+ # Delta 88k system running SVR3
+ echo m88k-motorola-sysv3
+ exit 0 ;;
+ XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
+ echo m88k-tektronix-sysv3
+ exit 0 ;;
+ Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
+ echo m68k-tektronix-bsd
+ exit 0 ;;
+ *:IRIX*:*:*)
+ echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
+ exit 0 ;;
+ ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
+ echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id
+ exit 0 ;; # Note that: echo "'`uname -s`'" gives 'AIX '
+ i?86:AIX:*:*)
+ echo i386-ibm-aix
+ exit 0 ;;
+ *:AIX:2:3)
+ if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
+ sed 's/^ //' << EOF >$dummy.c
+ #include <sys/systemcfg.h>
+
+ main()
+ {
+ if (!__power_pc())
+ exit(1);
+ puts("powerpc-ibm-aix3.2.5");
+ exit(0);
+ }
+EOF
+ $CC_FOR_BUILD $dummy.c -o $dummy && ./$dummy && rm $dummy.c $dummy && exit 0
+ rm -f $dummy.c $dummy
+ echo rs6000-ibm-aix3.2.5
+ elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
+ echo rs6000-ibm-aix3.2.4
+ else
+ echo rs6000-ibm-aix3.2
+ fi
+ exit 0 ;;
+ *:AIX:*:4)
+ sed 's/^ //' << EOF >$dummy.c
+ #include <stdio.h>
+ #include <sys/systemcfg.h>
+ main ()
+ {
+ if (_system_configuration.architecture == POWER_RS
+ || _system_configuration.implementation == POWER_601)
+ puts ("power");
+ else
+ {
+ if (_system_configuration.width == 64)
+ puts ("powerpc64");
+ else
+ puts ("powerpc");
+ }
+ exit (0);
+ }
+EOF
+ $CC_FOR_BUILD $dummy.c -o $dummy
+ IBM_ARCH=`./$dummy`
+ rm -f $dummy.c $dummy
+ if [ -x /usr/bin/oslevel ] ; then
+ IBM_REV=`/usr/bin/oslevel`
+ else
+ IBM_REV=4.${UNAME_RELEASE}
+ fi
+ echo ${IBM_ARCH}-ibm-aix${IBM_REV}
+ exit 0 ;;
+ *:AIX:*:*)
+ echo rs6000-ibm-aix
+ exit 0 ;;
+ ibmrt:4.4BSD:*|romp-ibm:BSD:*)
+ echo romp-ibm-bsd4.4
+ exit 0 ;;
+ ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and
+ echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to
+ exit 0 ;; # report: romp-ibm BSD 4.3
+ *:BOSX:*:*)
+ echo rs6000-bull-bosx
+ exit 0 ;;
+ DPX/2?00:B.O.S.:*:*)
+ echo m68k-bull-sysv3
+ exit 0 ;;
+ 9000/[34]??:4.3bsd:1.*:*)
+ echo m68k-hp-bsd
+ exit 0 ;;
+ hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
+ echo m68k-hp-bsd4.4
+ exit 0 ;;
+ 9000/[34678]??:HP-UX:*:*)
+ case "${UNAME_MACHINE}" in
+ 9000/31? ) HP_ARCH=m68000 ;;
+ 9000/[34]?? ) HP_ARCH=m68k ;;
+ 9000/[678][0-9][0-9])
+ sed 's/^ //' << EOF >$dummy.c
+
+ #define _HPUX_SOURCE
+ #include <stdlib.h>
+ #include <unistd.h>
+
+ int main ()
+ {
+ #if defined(_SC_KERNEL_BITS)
+ long bits = sysconf(_SC_KERNEL_BITS);
+ #endif
+ long cpu = sysconf (_SC_CPU_VERSION);
+
+ switch (cpu)
+ {
+ case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+ case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+ case CPU_PA_RISC2_0:
+ #if defined(_SC_KERNEL_BITS)
+ switch (bits)
+ {
+ case 64: puts ("hppa2.0w"); break;
+ case 32: puts ("hppa2.0n"); break;
+ default: puts ("hppa2.0"); break;
+ } break;
+ #else /* !defined(_SC_KERNEL_BITS) */
+ puts ("hppa2.0"); break;
+ #endif
+ default: puts ("hppa1.0"); break;
+ }
+ exit (0);
+ }
+EOF
+ (CCOPTS= $CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null ) && HP_ARCH=`./$dummy`
+ rm -f $dummy.c $dummy
+ esac
+ HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+ echo ${HP_ARCH}-hp-hpux${HPUX_REV}
+ exit 0 ;;
+ 3050*:HI-UX:*:*)
+ sed 's/^ //' << EOF >$dummy.c
+ #include <unistd.h>
+ int
+ main ()
+ {
+ long cpu = sysconf (_SC_CPU_VERSION);
+ /* The order matters, because CPU_IS_HP_MC68K erroneously returns
+ true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct
+ results, however. */
+ if (CPU_IS_PA_RISC (cpu))
+ {
+ switch (cpu)
+ {
+ case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
+ case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
+ case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
+ default: puts ("hppa-hitachi-hiuxwe2"); break;
+ }
+ }
+ else if (CPU_IS_HP_MC68K (cpu))
+ puts ("m68k-hitachi-hiuxwe2");
+ else puts ("unknown-hitachi-hiuxwe2");
+ exit (0);
+ }
+EOF
+ $CC_FOR_BUILD $dummy.c -o $dummy && ./$dummy && rm $dummy.c $dummy && exit 0
+ rm -f $dummy.c $dummy
+ echo unknown-hitachi-hiuxwe2
+ exit 0 ;;
+ 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
+ echo hppa1.1-hp-bsd
+ exit 0 ;;
+ 9000/8??:4.3bsd:*:*)
+ echo hppa1.0-hp-bsd
+ exit 0 ;;
+ *9??*:MPE/iX:*:*)
+ echo hppa1.0-hp-mpeix
+ exit 0 ;;
+ hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
+ echo hppa1.1-hp-osf
+ exit 0 ;;
+ hp8??:OSF1:*:*)
+ echo hppa1.0-hp-osf
+ exit 0 ;;
+ i?86:OSF1:*:*)
+ if [ -x /usr/sbin/sysversion ] ; then
+ echo ${UNAME_MACHINE}-unknown-osf1mk
+ else
+ echo ${UNAME_MACHINE}-unknown-osf1
+ fi
+ exit 0 ;;
+ parisc*:Lites*:*:*)
+ echo hppa1.1-hp-lites
+ exit 0 ;;
+ hppa*:OpenBSD:*:*)
+ echo hppa-unknown-openbsd
+ exit 0 ;;
+ C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
+ echo c1-convex-bsd
+ exit 0 ;;
+ C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
+ if getsysinfo -f scalar_acc
+ then echo c32-convex-bsd
+ else echo c2-convex-bsd
+ fi
+ exit 0 ;;
+ C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
+ echo c34-convex-bsd
+ exit 0 ;;
+ C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
+ echo c38-convex-bsd
+ exit 0 ;;
+ C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
+ echo c4-convex-bsd
+ exit 0 ;;
+ CRAY*X-MP:*:*:*)
+ echo xmp-cray-unicos
+ exit 0 ;;
+ CRAY*Y-MP:*:*:*)
+ echo ymp-cray-unicos${UNAME_RELEASE}
+ exit 0 ;;
+ CRAY*[A-Z]90:*:*:*)
+ echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
+ | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
+ -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/
+ exit 0 ;;
+ CRAY*TS:*:*:*)
+ echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ exit 0 ;;
+ CRAY*T3D:*:*:*)
+ echo alpha-cray-unicos
+ exit 0 ;;
+ CRAY*T3E:*:*:*)
+ echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ exit 0 ;;
+ CRAY*SV1:*:*:*)
+ echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ exit 0 ;;
+ CRAY-2:*:*:*)
+ echo cray2-cray-unicos
+ exit 0 ;;
+ F300:UNIX_System_V:*:*)
+ FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+ FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+ echo "f300-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+ exit 0 ;;
+ F301:UNIX_System_V:*:*)
+ echo f301-fujitsu-uxpv`echo $UNAME_RELEASE | sed 's/ .*//'`
+ exit 0 ;;
+ hp3[0-9][05]:NetBSD:*:*)
+ echo m68k-hp-netbsd${UNAME_RELEASE}
+ exit 0 ;;
+ hp300:OpenBSD:*:*)
+ echo m68k-unknown-openbsd${UNAME_RELEASE}
+ exit 0 ;;
+ i?86:BSD/386:*:* | i?86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
+ echo ${X86CPU}-pc-bsdi${UNAME_RELEASE}
+ exit 0 ;;
+ sparc*:BSD/OS:*:*)
+ echo sparc-unknown-bsdi${UNAME_RELEASE}
+ exit 0 ;;
+ *:BSD/OS:*:*)
+ echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
+ exit 0 ;;
+ i386:FreeBSD:*:*)
+ echo ${X86CPU}-pc-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
+ exit 0 ;;
+ *:FreeBSD:*:*)
+ echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
+ exit 0 ;;
+ i386:NetBSD:*:*)
+ echo ${X86CPU}-pc-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+ exit 0 ;;
+ *:NetBSD:*:*)
+ echo ${UNAME_MACHINE}-unknown-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+ exit 0 ;;
+ i386:OpenBSD:*:*)
+ echo ${X86CPU}-pc-openbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+ exit 0 ;;
+ *:OpenBSD:*:*)
+ echo ${UNAME_MACHINE}-unknown-openbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+ exit 0 ;;
+ i*:CYGWIN*:*)
+ echo ${X86CPU}-pc-cygwin
+ exit 0 ;;
+ i*:MINGW*:*)
+ echo ${UNAME_MACHINE}-pc-mingw32
+ exit 0 ;;
+ i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
+ # How do we know it's Interix rather than the generic POSIX subsystem?
+ # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
+ # UNAME_MACHINE based on the output of uname instead of i386?
+ echo i386-pc-interix
+ exit 0 ;;
+ i*:UWIN*:*)
+ echo ${UNAME_MACHINE}-pc-uwin
+ exit 0 ;;
+ p*:CYGWIN*:*)
+ echo powerpcle-unknown-cygwin
+ exit 0 ;;
+ prep*:SunOS:5.*:*)
+ echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ exit 0 ;;
+ *:GNU:*:*)
+ echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
+ exit 0 ;;
+ *:Linux:*:*)
+
+ # The BFD linker knows what the default object file format is, so
+ # first see if it will tell us. cd to the root directory to prevent
+ # problems with other programs or directories called `ld' in the path.
+ ld_help_string=`cd /; ld --help 2>&1`
+ ld_supported_emulations=`echo $ld_help_string \
+ | sed -ne '/supported emulations:/!d
+ s/[ ][ ]*/ /g
+ s/.*supported emulations: *//
+ s/ .*//
+ p'`
+ case "$ld_supported_emulations" in
+ *ia64)
+ echo "${UNAME_MACHINE}-unknown-linux"
+ exit 0
+ ;;
+ i?86linux)
+ echo "${X86CPU}-pc-linux-gnuaout"
+ exit 0
+ ;;
+ i?86coff)
+ echo "${X86CPU}-pc-linux-gnucoff"
+ exit 0
+ ;;
+ sparclinux)
+ echo "${UNAME_MACHINE}-unknown-linux-gnuaout"
+ exit 0
+ ;;
+ armlinux)
+ echo "${UNAME_MACHINE}-unknown-linux-gnuaout"
+ exit 0
+ ;;
+ elf32arm*)
+ echo "${UNAME_MACHINE}-unknown-linux-gnuoldld"
+ exit 0
+ ;;
+ armelf_linux*)
+ echo "${UNAME_MACHINE}-unknown-linux-gnu"
+ exit 0
+ ;;
+ m68klinux)
+ echo "${UNAME_MACHINE}-unknown-linux-gnuaout"
+ exit 0
+ ;;
+ elf32ppc | elf32ppclinux)
+ # Determine Lib Version
+ cat >$dummy.c <<EOF
+#include <features.h>
+#if defined(__GLIBC__)
+extern char __libc_version[];
+extern char __libc_release[];
+#endif
+main(argc, argv)
+ int argc;
+ char *argv[];
+{
+#if defined(__GLIBC__)
+ printf("%s %s\n", __libc_version, __libc_release);
+#else
+ printf("unkown\n");
+#endif
+ return 0;
+}
+EOF
+ LIBC=""
+ $CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null
+ if test "$?" = 0 ; then
+ ./$dummy | grep 1\.99 > /dev/null
+ if test "$?" = 0 ; then
+ LIBC="libc1"
+ fi
+ fi
+ rm -f $dummy.c $dummy
+ echo powerpc-unknown-linux-gnu${LIBC}
+ exit 0
+ ;;
+ esac
+
+ if test "${UNAME_MACHINE}" = "alpha" ; then
+ cat <<EOF >$dummy.s
+ .data
+ \$Lformat:
+ .byte 37,100,45,37,120,10,0 # "%d-%x\n"
+
+ .text
+ .globl main
+ .align 4
+ .ent main
+ main:
+ .frame \$30,16,\$26,0
+ ldgp \$29,0(\$27)
+ .prologue 1
+ .long 0x47e03d80 # implver \$0
+ lda \$2,-1
+ .long 0x47e20c21 # amask \$2,\$1
+ lda \$16,\$Lformat
+ mov \$0,\$17
+ not \$1,\$18
+ jsr \$26,printf
+ ldgp \$29,0(\$26)
+ mov 0,\$16
+ jsr \$26,exit
+ .end main
+EOF
+ LIBC=""
+ $CC_FOR_BUILD $dummy.s -o $dummy 2>/dev/null
+ if test "$?" = 0 ; then
+ case `./$dummy` in
+ 0-0)
+ UNAME_MACHINE="alpha"
+ ;;
+ 1-0)
+ UNAME_MACHINE="alphaev5"
+ ;;
+ 1-1)
+ UNAME_MACHINE="alphaev56"
+ ;;
+ 1-101)
+ UNAME_MACHINE="alphapca56"
+ ;;
+ 2-303)
+ UNAME_MACHINE="alphaev6"
+ ;;
+ 2-307)
+ UNAME_MACHINE="alphaev67"
+ ;;
+ esac
+
+ objdump --private-headers $dummy | \
+ grep ld.so.1 > /dev/null
+ if test "$?" = 0 ; then
+ LIBC="libc1"
+ fi
+ fi
+ rm -f $dummy.s $dummy
+ echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} ; exit 0
+ elif test "${UNAME_MACHINE}" = "mips" ; then
+ cat >$dummy.c <<EOF
+#ifdef __cplusplus
+#include <stdio.h> /* for printf() prototype */
+ int main (int argc, char *argv[]) {
+#else
+ int main (argc, argv) int argc; char *argv[]; {
+#endif
+#ifdef __MIPSEB__
+ printf ("%s-unknown-linux-gnu\n", argv[1]);
+#endif
+#ifdef __MIPSEL__
+ printf ("%sel-unknown-linux-gnu\n", argv[1]);
+#endif
+ return 0;
+}
+EOF
+ $CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null && ./$dummy "${UNAME_MACHINE}" && rm $dummy.c $dummy && exit 0
+ rm -f $dummy.c $dummy
+ elif test "${UNAME_MACHINE}" = "s390"; then
+ echo s390-ibm-linux && exit 0
+ else
+ # Either a pre-BFD a.out linker (linux-gnuoldld)
+ # or one that does not give us useful --help.
+ # GCC wants to distinguish between linux-gnuoldld and linux-gnuaout.
+ # If ld does not provide *any* "supported emulations:"
+ # that means it is gnuoldld.
+ echo "$ld_help_string" | grep >/dev/null 2>&1 "supported emulations:"
+ test $? != 0 && echo "${X86CPU}-pc-linux-gnuoldld" && exit 0
+
+ case "${UNAME_MACHINE}" in
+ i?86)
+ VENDOR=pc;
+ UNAME_MACHINE=${X86CPU}
+ ;;
+ *)
+ VENDOR=unknown;
+ ;;
+ esac
+ # Determine whether the default compiler is a.out or elf
+ cat >$dummy.c <<EOF
+#include <features.h>
+#ifdef __cplusplus
+#include <stdio.h> /* for printf() prototype */
+ int main (int argc, char *argv[]) {
+#else
+ int main (argc, argv) int argc; char *argv[]; {
+#endif
+#ifdef __ELF__
+# ifdef __GLIBC__
+# if __GLIBC__ >= 2
+ printf ("%s-${VENDOR}-linux-gnu\n", argv[1]);
+# else
+ printf ("%s-${VENDOR}-linux-gnulibc1\n", argv[1]);
+# endif
+# else
+ printf ("%s-${VENDOR}-linux-gnulibc1\n", argv[1]);
+# endif
+#else
+ printf ("%s-${VENDOR}-linux-gnuaout\n", argv[1]);
+#endif
+ return 0;
+}
+EOF
+ $CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null && ./$dummy "${UNAME_MACHINE}" && rm $dummy.c $dummy && exit 0
+ rm -f $dummy.c $dummy
+ fi ;;
+# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. earlier versions
+# are messed up and put the nodename in both sysname and nodename.
+ i?86:DYNIX/ptx:4*:*)
+ echo i386-sequent-sysv4
+ exit 0 ;;
+ i?86:UNIX_SV:4.2MP:2.*)
+ # Unixware is an offshoot of SVR4, but it has its own version
+ # number series starting with 2...
+ # I am not positive that other SVR4 systems won't match this,
+ # I just have to hope. -- rms.
+ # Use sysv4.2uw... so that sysv4* matches it.
+ echo ${X86CPU}-pc-sysv4.2uw${UNAME_VERSION}
+ exit 0 ;;
+ i?86:*:4.*:* | i?86:SYSTEM_V:4.*:*)
+ UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
+ if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
+ echo ${X86CPU}-univel-sysv${UNAME_REL}
+ else
+ echo ${X86CPU}-pc-sysv${UNAME_REL}
+ fi
+ exit 0 ;;
+ i?86:*:5:7*)
+ # Fixed at (any) Pentium or better
+ UNAME_MACHINE=i586
+ if [ ${UNAME_SYSTEM} = "UnixWare" ] ; then
+ echo ${X86CPU}-sco-sysv${UNAME_RELEASE}uw${UNAME_VERSION}
+ else
+ echo ${X86CPU}-pc-sysv${UNAME_RELEASE}
+ fi
+ exit 0 ;;
+ i?86:*:3.2:*)
+ if test -f /usr/options/cb.name; then
+ UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
+ echo ${X86CPU}-pc-isc$UNAME_REL
+ elif /bin/uname -X 2>/dev/null >/dev/null ; then
+ UNAME_REL=`(/bin/uname -X|egrep Release|sed -e 's/.*= //')`
+ (/bin/uname -X|egrep i80486 >/dev/null) && UNAME_MACHINE=i486
+ (/bin/uname -X|egrep '^Machine.*Pentium' >/dev/null) \
+ && UNAME_MACHINE=i586
+ (/bin/uname -X|egrep '^Machine.*Pent ?II' >/dev/null) \
+ && UNAME_MACHINE=i686
+ (/bin/uname -X|egrep '^Machine.*Pentium Pro' >/dev/null) \
+ && UNAME_MACHINE=i686
+ echo ${X86CPU}-pc-sco$UNAME_REL
+ else
+ echo ${X86CPU}-pc-sysv32
+ fi
+ exit 0 ;;
+ i?86:*DOS:*:*)
+ echo ${X86CPU}-pc-msdosdjgpp
+ exit 0 ;;
+ pc:*:*:*)
+ # Left here for compatibility:
+ # uname -m prints for DJGPP always 'pc', but it prints nothing about
+ # the processor, so we play safe by assuming i386.
+ echo i386-pc-msdosdjgpp
+ exit 0 ;;
+ Intel:Mach:3*:*)
+ echo i386-pc-mach3
+ exit 0 ;;
+ paragon:*:*:*)
+ echo i860-intel-osf1
+ exit 0 ;;
+ i860:*:4.*:*) # i860-SVR4
+ if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
+ echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
+ else # Add other i860-SVR4 vendors below as they are discovered.
+ echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4
+ fi
+ exit 0 ;;
+ mini*:CTIX:SYS*5:*)
+ # "miniframe"
+ echo m68010-convergent-sysv
+ exit 0 ;;
+ M68*:*:R3V[567]*:*)
+ test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;;
+ 3[34]??:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 4850:*:4.0:3.0)
+ OS_REL=''
+ test -r /etc/.relid \
+ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+ /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+ && echo i486-ncr-sysv4.3${OS_REL} && exit 0
+ /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+ && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;;
+ 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
+ /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+ && echo i486-ncr-sysv4 && exit 0 ;;
+ m68*:LynxOS:2.*:*)
+ echo m68k-unknown-lynxos${UNAME_RELEASE}
+ exit 0 ;;
+ mc68030:UNIX_System_V:4.*:*)
+ echo m68k-atari-sysv4
+ exit 0 ;;
+ i?86:LynxOS:2.*:* | i?86:LynxOS:3.[01]*:*)
+ echo i386-unknown-lynxos${UNAME_RELEASE}
+ exit 0 ;;
+ TSUNAMI:LynxOS:2.*:*)
+ echo sparc-unknown-lynxos${UNAME_RELEASE}
+ exit 0 ;;
+ rs6000:LynxOS:2.*:* | PowerPC:LynxOS:2.*:*)
+ echo rs6000-unknown-lynxos${UNAME_RELEASE}
+ exit 0 ;;
+ SM[BE]S:UNIX_SV:*:*)
+ echo mips-dde-sysv${UNAME_RELEASE}
+ exit 0 ;;
+ RM*:ReliantUNIX-*:*:*)
+ echo mips-sni-sysv4
+ exit 0 ;;
+ RM*:SINIX-*:*:*)
+ echo mips-sni-sysv4
+ exit 0 ;;
+ *:SINIX-*:*:*)
+ if uname -p 2>/dev/null >/dev/null ; then
+ UNAME_MACHINE=`(uname -p) 2>/dev/null`
+ echo ${UNAME_MACHINE}-sni-sysv4
+ else
+ echo ns32k-sni-sysv
+ fi
+ exit 0 ;;
+ PENTIUM:CPunix:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+ # says <Richard.M.Bartel@ccMail.Census.GOV>
+ echo i586-unisys-sysv4
+ exit 0 ;;
+ *:UNIX_System_V:4*:FTX*)
+ # From Gerald Hewes <hewes@openmarket.com>.
+ # How about differentiating between stratus architectures? -djm
+ echo hppa1.1-stratus-sysv4
+ exit 0 ;;
+ *:*:*:FTX*)
+ # From seanf@swdc.stratus.com.
+ echo i860-stratus-sysv4
+ exit 0 ;;
+ mc68*:A/UX:*:*)
+ echo m68k-apple-aux${UNAME_RELEASE}
+ exit 0 ;;
+ news*:NEWS-OS:*:6*)
+ echo mips-sony-newsos6
+ exit 0 ;;
+ R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
+ if [ -d /usr/nec ]; then
+ echo mips-nec-sysv${UNAME_RELEASE}
+ else
+ echo mips-unknown-sysv${UNAME_RELEASE}
+ fi
+ exit 0 ;;
+ BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only.
+ echo powerpc-be-beos
+ exit 0 ;;
+ BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only.
+ echo powerpc-apple-beos
+ exit 0 ;;
+ BePC:BeOS:*:*) # BeOS running on Intel PC compatible.
+ echo i586-pc-beos
+ exit 0 ;;
+ SX-4:SUPER-UX:*:*)
+ echo sx4-nec-superux${UNAME_RELEASE}
+ exit 0 ;;
+ SX-5:SUPER-UX:*:*)
+ echo sx5-nec-superux${UNAME_RELEASE}
+ exit 0 ;;
+ Power*:Rhapsody:*:*)
+ echo powerpc-apple-rhapsody${UNAME_RELEASE}
+ exit 0 ;;
+ *:Rhapsody:*:*)
+ echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
+ exit 0 ;;
+ Power*:Mac*OS:*:*)
+ echo powerpc-apple-macos${UNAME_RELEASE}
+ exit 0 ;;
+ *:Mac*OS:*:*)
+ echo ${UNAME_MACHINE}-apple-macos${UNAME_RELEASE}
+ exit 0 ;;
+ *:Darwin:*:*)
+ echo `uname -p`-apple-darwin${UNAME_RELEASE}
+ exit 0 ;;
+ *:procnto*:*:* | *:QNX:[0123456789]*:*)
+ if test "${UNAME_MACHINE}" = "x86pc"; then
+ UNAME_MACHINE=pc
+ fi
+ echo `uname -p`-${UNAME_MACHINE}-nto-qnx
+ exit 0 ;;
+ *:QNX:*:4*)
+ echo i386-pc-qnx
+ exit 0 ;;
+ NSR-W:NONSTOP_KERNEL:*:*)
+ echo nsr-tandem-nsk${UNAME_RELEASE}
+ exit 0 ;;
+ BS2000:POSIX*:*:*)
+ echo bs2000-siemens-sysv
+ exit 0 ;;
+esac
+
+#echo '(No uname command or uname output not recognized.)' 1>&2
+#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
+
+cat >$dummy.c <<EOF
+#ifdef _SEQUENT_
+# include <sys/types.h>
+# include <sys/utsname.h>
+#endif
+main ()
+{
+#if defined (sony)
+#if defined (MIPSEB)
+ /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed,
+ I don't know.... */
+ printf ("mips-sony-bsd\n"); exit (0);
+#else
+#include <sys/param.h>
+ printf ("m68k-sony-newsos%s\n",
+#ifdef NEWSOS4
+ "4"
+#else
+ ""
+#endif
+ ); exit (0);
+#endif
+#endif
+
+#if defined (__arm) && defined (__acorn) && defined (__unix)
+ printf ("arm-acorn-riscix"); exit (0);
+#endif
+
+#if defined (hp300) && !defined (hpux)
+ printf ("m68k-hp-bsd\n"); exit (0);
+#endif
+
+#if defined (NeXT)
+#if !defined (__ARCHITECTURE__)
+#define __ARCHITECTURE__ "m68k"
+#endif
+ int version;
+ version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
+ if (version < 4)
+ printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
+ else
+ printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
+ exit (0);
+#endif
+
+#if defined (MULTIMAX) || defined (n16)
+#if defined (UMAXV)
+ printf ("ns32k-encore-sysv\n"); exit (0);
+#else
+#if defined (CMU)
+ printf ("ns32k-encore-mach\n"); exit (0);
+#else
+ printf ("ns32k-encore-bsd\n"); exit (0);
+#endif
+#endif
+#endif
+
+#if defined (__386BSD__)
+ printf ("i386-pc-bsd\n"); exit (0);
+#endif
+
+#if defined (sequent)
+#if defined (i386)
+ printf ("i386-sequent-dynix\n"); exit (0);
+#endif
+#if defined (ns32000)
+ printf ("ns32k-sequent-dynix\n"); exit (0);
+#endif
+#endif
+
+#if defined (_SEQUENT_)
+ struct utsname un;
+
+ uname(&un);
+
+ if (strncmp(un.version, "V2", 2) == 0) {
+ printf ("i386-sequent-ptx2\n"); exit (0);
+ }
+ if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
+ printf ("i386-sequent-ptx1\n"); exit (0);
+ }
+ printf ("i386-sequent-ptx\n"); exit (0);
+
+#endif
+
+#if defined (vax)
+#if !defined (ultrix)
+ printf ("vax-dec-bsd\n"); exit (0);
+#else
+ printf ("vax-dec-ultrix\n"); exit (0);
+#endif
+#endif
+
+#if defined (alliant) && defined (i860)
+ printf ("i860-alliant-bsd\n"); exit (0);
+#endif
+
+ exit (1);
+}
+EOF
+
+$CC_FOR_BUILD $dummy.c -o $dummy 2>/dev/null && ./$dummy && rm $dummy.c $dummy && exit 0
+rm -f $dummy.c $dummy
+
+# Apollos put the system type in the environment.
+
+test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; }
+
+# Convex versions that predate uname can use getsysinfo(1)
+
+if [ -x /usr/convex/getsysinfo ]
+then
+ case `getsysinfo -f cpu_type` in
+ c1*)
+ echo c1-convex-bsd
+ exit 0 ;;
+ c2*)
+ if getsysinfo -f scalar_acc
+ then echo c32-convex-bsd
+ else echo c2-convex-bsd
+ fi
+ exit 0 ;;
+ c34*)
+ echo c34-convex-bsd
+ exit 0 ;;
+ c38*)
+ echo c38-convex-bsd
+ exit 0 ;;
+ c4*)
+ echo c4-convex-bsd
+ exit 0 ;;
+ esac
+fi
+
+#echo '(Unable to guess system type)' 1>&2
+
+exit 1
diff --git a/rts/gmp/config.in b/rts/gmp/config.in
new file mode 100644
index 0000000000..8b2546ef16
--- /dev/null
+++ b/rts/gmp/config.in
@@ -0,0 +1,162 @@
+/* config.in. Generated automatically from configure.in by autoheader. */
+/*
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+
+/* Define if a limb is long long. */
+#undef _LONG_LONG_LIMB
+
+/* Define if we have native implementation of function. */
+#undef HAVE_NATIVE_
+#undef HAVE_NATIVE_mpn_add
+#undef HAVE_NATIVE_mpn_add_1
+#undef HAVE_NATIVE_mpn_add_n
+#undef HAVE_NATIVE_mpn_add_nc
+#undef HAVE_NATIVE_mpn_addmul_1
+#undef HAVE_NATIVE_mpn_addmul_1c
+#undef HAVE_NATIVE_mpn_addsub_n
+#undef HAVE_NATIVE_mpn_addsub_nc
+#undef HAVE_NATIVE_mpn_and_n
+#undef HAVE_NATIVE_mpn_andn_n
+#undef HAVE_NATIVE_mpn_bdivmod
+#undef HAVE_NATIVE_mpn_cmp
+#undef HAVE_NATIVE_mpn_com_n
+#undef HAVE_NATIVE_mpn_copyd
+#undef HAVE_NATIVE_mpn_copyi
+#undef HAVE_NATIVE_mpn_divexact_by3c
+#undef HAVE_NATIVE_mpn_divrem
+#undef HAVE_NATIVE_mpn_divrem_1
+#undef HAVE_NATIVE_mpn_divrem_1c
+#undef HAVE_NATIVE_mpn_divrem_2
+#undef HAVE_NATIVE_mpn_divrem_newton
+#undef HAVE_NATIVE_mpn_divrem_classic
+#undef HAVE_NATIVE_mpn_dump
+#undef HAVE_NATIVE_mpn_gcd
+#undef HAVE_NATIVE_mpn_gcd_1
+#undef HAVE_NATIVE_mpn_gcdext
+#undef HAVE_NATIVE_mpn_get_str
+#undef HAVE_NATIVE_mpn_hamdist
+#undef HAVE_NATIVE_mpn_invert_limb
+#undef HAVE_NATIVE_mpn_ior_n
+#undef HAVE_NATIVE_mpn_iorn_n
+#undef HAVE_NATIVE_mpn_lshift
+#undef HAVE_NATIVE_mpn_mod_1
+#undef HAVE_NATIVE_mpn_mod_1c
+#undef HAVE_NATIVE_mpn_mul
+#undef HAVE_NATIVE_mpn_mul_1
+#undef HAVE_NATIVE_mpn_mul_1c
+#undef HAVE_NATIVE_mpn_mul_basecase
+#undef HAVE_NATIVE_mpn_mul_n
+#undef HAVE_NATIVE_mpn_nand_n
+#undef HAVE_NATIVE_mpn_nior_n
+#undef HAVE_NATIVE_mpn_perfect_square_p
+#undef HAVE_NATIVE_mpn_popcount
+#undef HAVE_NATIVE_mpn_preinv_mod_1
+#undef HAVE_NATIVE_mpn_random2
+#undef HAVE_NATIVE_mpn_random
+#undef HAVE_NATIVE_mpn_rawrandom
+#undef HAVE_NATIVE_mpn_rshift
+#undef HAVE_NATIVE_mpn_scan0
+#undef HAVE_NATIVE_mpn_scan1
+#undef HAVE_NATIVE_mpn_set_str
+#undef HAVE_NATIVE_mpn_sqrtrem
+#undef HAVE_NATIVE_mpn_sqr_basecase
+#undef HAVE_NATIVE_mpn_sub
+#undef HAVE_NATIVE_mpn_sub_1
+#undef HAVE_NATIVE_mpn_sub_n
+#undef HAVE_NATIVE_mpn_sub_nc
+#undef HAVE_NATIVE_mpn_submul_1
+#undef HAVE_NATIVE_mpn_submul_1c
+#undef HAVE_NATIVE_mpn_udiv_w_sdiv
+#undef HAVE_NATIVE_mpn_umul_ppmm
+#undef HAVE_NATIVE_mpn_udiv_qrnnd
+#undef HAVE_NATIVE_mpn_xor_n
+#undef HAVE_NATIVE_mpn_xnor_n
+
+/* Define to 1 if you have the declaration of `optarg', and to 0 if you don't.
+ */
+#undef HAVE_DECL_OPTARG
+
+/* ./configure --enable-assert option, to enable some ASSERT()s */
+#undef WANT_ASSERT
+
+/* Define if you have the <sys/sysctl.h> header file. */
+#undef HAVE_SYS_SYSCTL_H
+
+/* Define if you have the `strtoul' function. */
+#undef HAVE_STRTOUL
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define if you have the `sysctlbyname' function. */
+#undef HAVE_SYSCTLBYNAME
+
+/* Define if the system has the type `void'. */
+#undef HAVE_VOID
+
+/* Define if you have the `popen' function. */
+#undef HAVE_POPEN
+
+/* ./configure --disable-alloca option, to use stack-alloc.c, not alloca */
+#undef USE_STACK_ALLOC
+
+/* Define if cpp supports the ANSI # stringizing operator. */
+#undef HAVE_STRINGIZE
+
+/* Define if you have the <sys/time.h> header file. */
+#undef HAVE_SYS_TIME_H
+
+/* Define if you have the `sysconf' function. */
+#undef HAVE_SYSCONF
+
+/* Define if you have the `getpagesize' function. */
+#undef HAVE_GETPAGESIZE
+
+/* Define if you have the `processor_info' function. */
+#undef HAVE_PROCESSOR_INFO
+
+/* Version number of package */
+#undef VERSION
+
+/* Define if you have the `getopt_long' function. */
+#undef HAVE_GETOPT_LONG
+
+/* Define if you have the <getopt.h> header file. */
+#undef HAVE_GETOPT_H
+
+/* Define if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Define if a speed_cyclecounter exists (for the tune programs) */
+#undef HAVE_SPEED_CYCLECOUNTER
+
+/* Define if mpn/tests has calling conventions checking for the CPU */
+#undef HAVE_CALLING_CONVENTIONS
+
+/* ./configure --enable-fft option, to enable FFTs for multiplication */
+#undef WANT_FFT
+
+/* Define if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
diff --git a/rts/gmp/config.sub b/rts/gmp/config.sub
new file mode 100644
index 0000000000..c4123f28ff
--- /dev/null
+++ b/rts/gmp/config.sub
@@ -0,0 +1,1273 @@
+#! /bin/sh
+# Configuration validation subroutine script, version 1.1.
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000
+# Free Software Foundation, Inc.
+#
+# This file is (in principle) common to ALL GNU software.
+# The presence of a machine in this file suggests that SOME GNU software
+# can handle that machine. It does not imply ALL GNU software can.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Written by Per Bothner <bothner@cygnus.com>.
+# Please send patches to <config-patches@gnu.org>.
+#
+# Configuration subroutine to validate and canonicalize a configuration type.
+# Supply the specified configuration type as an argument.
+# If it is invalid, we print an error message on stderr and exit with code 1.
+# Otherwise, we print the canonical config type on stdout and succeed.
+
+# This file is supposed to be the same for all GNU packages
+# and recognize all the CPU types, system types and aliases
+# that are meaningful with *any* GNU software.
+# Each package is responsible for reporting which valid configurations
+# it does not support. The user should be able to distinguish
+# a failure to support a valid configuration from a meaningless
+# configuration.
+
+# The goal of this file is to map all the various variations of a given
+# machine specification into a single specification in the form:
+# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
+# or in some cases, the newer four-part form:
+# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
+# It is wrong to echo any other type of specification.
+
+if [ x$1 = x ]
+then
+ echo Configuration name missing. 1>&2
+ echo "Usage: $0 CPU-MFR-OPSYS" 1>&2
+ echo "or $0 ALIAS" 1>&2
+ echo where ALIAS is a recognized configuration type. 1>&2
+ exit 1
+fi
+
+# First pass through any local machine types.
+case $1 in
+ *local*)
+ echo $1
+ exit 0
+ ;;
+ *)
+ ;;
+esac
+
+# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
+# Here we must recognize all the valid KERNEL-OS combinations.
+maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
+case $maybe_os in
+ nto-qnx* | linux-gnu*)
+ os=-$maybe_os
+ basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
+ ;;
+ *)
+ basic_machine=`echo $1 | sed 's/-[^-]*$//'`
+ if [ $basic_machine != $1 ]
+ then os=`echo $1 | sed 's/.*-/-/'`
+ else os=; fi
+ ;;
+esac
+
+### Let's recognize common machines as not being operating systems so
+### that things like config.sub decstation-3100 work. We also
+### recognize some manufacturers as not being operating systems, so we
+### can provide default operating systems below.
+case $os in
+ -sun*os*)
+ # Prevent following clause from handling this invalid input.
+ ;;
+ -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
+ -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
+ -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
+ -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
+ -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
+ -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
+ -apple)
+ os=
+ basic_machine=$1
+ ;;
+ -sim | -cisco | -oki | -wec | -winbond)
+ os=
+ basic_machine=$1
+ ;;
+ -scout)
+ ;;
+ -wrs)
+ os=-vxworks
+ basic_machine=$1
+ ;;
+ -hiux*)
+ os=-hiuxwe2
+ ;;
+ -sco5)
+ os=-sco3.2v5
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -sco4)
+ os=-sco3.2v4
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -sco3.2.[4-9]*)
+ os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -sco3.2v[4-9]*)
+ # Don't forget version if it is 3.2v4 or newer.
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -sco*)
+ os=-sco3.2v2
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -udk*)
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -isc)
+ os=-isc2.2
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -clix*)
+ basic_machine=clipper-intergraph
+ ;;
+ -isc*)
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+ ;;
+ -lynx*)
+ os=-lynxos
+ ;;
+ -ptx*)
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
+ ;;
+ -windowsnt*)
+ os=`echo $os | sed -e 's/windowsnt/winnt/'`
+ ;;
+ -psos*)
+ os=-psos
+ ;;
+ -mint | -mint[0-9]*)
+ basic_machine=m68k-atari
+ os=-mint
+ ;;
+esac
+
+# Decode aliases for certain CPU-COMPANY combinations.
+case $basic_machine in
+ # Recognize the basic CPU types without company name.
+ # Some are omitted here because they have special meanings below.
+ tahoe | i860 | ia64 | m32r | m68k | m68000 | m88k | ns32k | arc | arm \
+ | arme[lb] | pyramid | mn10200 | mn10300 | tron | a29k \
+ | 580 | i960 | h8300 \
+ | x86 | ppcbe | mipsbe | mipsle | shbe | shle | armbe | armle \
+ | hppa | hppa1.0 | hppa1.1 | hppa2.0 | hppa2.0w | hppa2.0n \
+ | alpha | alphaev[4-8] | alphaev56 | alphapca5[67] \
+ | alphaev6[78] \
+ | we32k | ns16k | clipper | i370 | sh | powerpc | powerpcle \
+ | 1750a | dsp16xx | pdp11 | mips16 | mips64 | mipsel | mips64el \
+ | mips64orion | mips64orionel | mipstx39 | mipstx39el \
+ | mips64vr4300 | mips64vr4300el | mips64vr4100 | mips64vr4100el \
+ | mips64vr5000 | miprs64vr5000el | mcore \
+ | sparc | sparclet | sparclite | sparc64 | sparcv9 | v850 | c4x \
+ | powerpc64 | sparcv8 | supersparc | microsparc | ultrasparc \
+ | thumb | d10v | fr30 | avr)
+ basic_machine=$basic_machine-unknown
+ ;;
+ m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | z8k | v70 | h8500 | w65 | pj | pjl)
+ ;;
+
+ # We use `pc' rather than `unknown'
+ # because (1) that's what they normally are, and
+ # (2) the word "unknown" tends to confuse beginning users.
+ i[34567]86 | pentium[23] | k[56] | k6[23] | athlon)
+ basic_machine=$basic_machine-pc
+ ;;
+ # Object if more than one company name word.
+ *-*-*)
+ echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+ exit 1
+ ;;
+ # Recognize the basic CPU types with company name.
+ vax-* | tahoe-* | i[34567]86-* | pentium[23]-* | i860-* | ia64-* | m32r-* | m68k-* | m68000-* \
+ | m88k-* | sparc-* | ns32k-* | fx80-* | arc-* | arm-* | c[123]* \
+ | mips-* | pyramid-* | tron-* | a29k-* | romp-* | rs6000-* \
+ | power-* | none-* | 580-* | cray2-* | h8300-* | h8500-* | i960-* \
+ | xmp-* | ymp-* \
+ | x86-* | ppcbe-* | mipsbe-* | mipsle-* | shbe-* | shle-* | armbe-* | armle-* \
+ | hppa-* | hppa1.0-* | hppa1.1-* | hppa2.0-* | hppa2.0w-* | hppa2.0n-* \
+ | alpha-* | alphaev[4-8]-* | alphaev56-* | alphapca5[67]-* \
+ | alphaev6[78]-* \
+ | we32k-* | cydra-* | ns16k-* | pn-* | np1-* | xps100-* \
+ | clipper-* | orion-* \
+ | sparclite-* | pdp11-* | sh-* | powerpc-* | powerpcle-* \
+ | sparc64-* | sparcv9-* | sparc86x-* | mips16-* | mips64-* | mipsel-* \
+ | mips64el-* | mips64orion-* | mips64orionel-* \
+ | mips64vr4100-* | mips64vr4100el-* | mips64vr4300-* | mips64vr4300el-* \
+ | mipstx39-* | mipstx39el-* | mcore-* \
+ | f301-* | armv*-* | s390-* | sv1-* | t3e-* \
+ | m88110-* | m680[01234]0-* | m683?2-* | m68360-* | z8k-* | d10v-* \
+ | k[56]-* | k6[23]-* | athlon-* | powerpc64-* \
+ | sparcv8-* | supersparc-* | microsparc-* | ultrasparc-* \
+ | thumb-* | v850-* | d30v-* | tic30-* | c30-* | fr30-* )
+ ;;
+ # Recognize the various machine names and aliases which stand
+ # for a CPU type and a company and sometimes even an OS.
+ 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
+ basic_machine=m68000-att
+ ;;
+ 3b*)
+ basic_machine=we32k-att
+ ;;
+ a29khif)
+ basic_machine=a29k-amd
+ os=-udi
+ ;;
+ adobe68k)
+ basic_machine=m68010-adobe
+ os=-scout
+ ;;
+ alliant | fx80)
+ basic_machine=fx80-alliant
+ ;;
+ altos | altos3068)
+ basic_machine=m68k-altos
+ ;;
+ am29k)
+ basic_machine=a29k-none
+ os=-bsd
+ ;;
+ amdahl)
+ basic_machine=580-amdahl
+ os=-sysv
+ ;;
+ amiga | amiga-*)
+ basic_machine=m68k-cbm
+ ;;
+ amigaos | amigados)
+ basic_machine=m68k-cbm
+ os=-amigaos
+ ;;
+ amigaunix | amix)
+ basic_machine=m68k-cbm
+ os=-sysv4
+ ;;
+ apollo68)
+ basic_machine=m68k-apollo
+ os=-sysv
+ ;;
+ apollo68bsd)
+ basic_machine=m68k-apollo
+ os=-bsd
+ ;;
+ aux)
+ basic_machine=m68k-apple
+ os=-aux
+ ;;
+ balance)
+ basic_machine=ns32k-sequent
+ os=-dynix
+ ;;
+ convex-c1)
+ basic_machine=c1-convex
+ os=-bsd
+ ;;
+ convex-c2)
+ basic_machine=c2-convex
+ os=-bsd
+ ;;
+ convex-c32)
+ basic_machine=c32-convex
+ os=-bsd
+ ;;
+ convex-c34)
+ basic_machine=c34-convex
+ os=-bsd
+ ;;
+ convex-c38)
+ basic_machine=c38-convex
+ os=-bsd
+ ;;
+ cray | ymp)
+ basic_machine=ymp-cray
+ os=-unicos
+ ;;
+ cray2)
+ basic_machine=cray2-cray
+ os=-unicos
+ ;;
+ [ctj]90-cray)
+ basic_machine=c90-cray
+ os=-unicos
+ ;;
+ crds | unos)
+ basic_machine=m68k-crds
+ ;;
+ da30 | da30-*)
+ basic_machine=m68k-da30
+ ;;
+ decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
+ basic_machine=mips-dec
+ ;;
+ delta | 3300 | motorola-3300 | motorola-delta \
+ | 3300-motorola | delta-motorola)
+ basic_machine=m68k-motorola
+ ;;
+ delta88)
+ basic_machine=m88k-motorola
+ os=-sysv3
+ ;;
+ dpx20 | dpx20-*)
+ basic_machine=rs6000-bull
+ os=-bosx
+ ;;
+ dpx2* | dpx2*-bull)
+ basic_machine=m68k-bull
+ os=-sysv3
+ ;;
+ ebmon29k)
+ basic_machine=a29k-amd
+ os=-ebmon
+ ;;
+ elxsi)
+ basic_machine=elxsi-elxsi
+ os=-bsd
+ ;;
+ encore | umax | mmax)
+ basic_machine=ns32k-encore
+ ;;
+ es1800 | OSE68k | ose68k | ose | OSE)
+ basic_machine=m68k-ericsson
+ os=-ose
+ ;;
+ fx2800)
+ basic_machine=i860-alliant
+ ;;
+ genix)
+ basic_machine=ns32k-ns
+ ;;
+ gmicro)
+ basic_machine=tron-gmicro
+ os=-sysv
+ ;;
+ h3050r* | hiux*)
+ basic_machine=hppa1.1-hitachi
+ os=-hiuxwe2
+ ;;
+ h8300hms)
+ basic_machine=h8300-hitachi
+ os=-hms
+ ;;
+ h8300xray)
+ basic_machine=h8300-hitachi
+ os=-xray
+ ;;
+ h8500hms)
+ basic_machine=h8500-hitachi
+ os=-hms
+ ;;
+ harris)
+ basic_machine=m88k-harris
+ os=-sysv3
+ ;;
+ hp300-*)
+ basic_machine=m68k-hp
+ ;;
+ hp300bsd)
+ basic_machine=m68k-hp
+ os=-bsd
+ ;;
+ hp300hpux)
+ basic_machine=m68k-hp
+ os=-hpux
+ ;;
+ hp3k9[0-9][0-9] | hp9[0-9][0-9])
+ basic_machine=hppa1.0-hp
+ ;;
+ hp9k2[0-9][0-9] | hp9k31[0-9])
+ basic_machine=m68000-hp
+ ;;
+ hp9k3[2-9][0-9])
+ basic_machine=m68k-hp
+ ;;
+ hp9k6[0-9][0-9] | hp6[0-9][0-9])
+ basic_machine=hppa1.0-hp
+ ;;
+ hp9k7[0-79][0-9] | hp7[0-79][0-9])
+ basic_machine=hppa1.1-hp
+ ;;
+ hp9k78[0-9] | hp78[0-9])
+ basic_machine=hppa2.0-hp
+ ;;
+ hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
+ basic_machine=hppa2.0-hp
+ ;;
+ hp9k8[0-9][13679] | hp8[0-9][13679])
+ basic_machine=hppa1.1-hp
+ ;;
+ hp9k8[0-9][0-9] | hp8[0-9][0-9])
+ basic_machine=hppa1.0-hp
+ ;;
+ hppa-next)
+ os=-nextstep3
+ ;;
+ hppaosf)
+ basic_machine=hppa1.1-hp
+ os=-osf
+ ;;
+ hppro)
+ basic_machine=hppa1.1-hp
+ os=-proelf
+ ;;
+ i370-ibm* | ibm*)
+ basic_machine=i370-ibm
+ ;;
+# I'm not sure what "Sysv32" means. Should this be sysv3.2?
+ i[34567]86v32)
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+ os=-sysv32
+ ;;
+ i[34567]86v4*)
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+ os=-sysv4
+ ;;
+ i[34567]86v)
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+ os=-sysv
+ ;;
+ i[34567]86sol2)
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+ os=-solaris2
+ ;;
+ i386mach)
+ basic_machine=i386-mach
+ os=-mach
+ ;;
+ i386-vsta | vsta)
+ basic_machine=i386-unknown
+ os=-vsta
+ ;;
+ i386-go32 | go32)
+ basic_machine=i386-unknown
+ os=-go32
+ ;;
+ i386-mingw32 | mingw32)
+ basic_machine=i386-unknown
+ os=-mingw32
+ ;;
+ iris | iris4d)
+ basic_machine=mips-sgi
+ case $os in
+ -irix*)
+ ;;
+ *)
+ os=-irix4
+ ;;
+ esac
+ ;;
+ isi68 | isi)
+ basic_machine=m68k-isi
+ os=-sysv
+ ;;
+ macppc*)
+ basic_machine=powerpc-apple
+ ;;
+ m88k-omron*)
+ basic_machine=m88k-omron
+ ;;
+ magnum | m3230)
+ basic_machine=mips-mips
+ os=-sysv
+ ;;
+ merlin)
+ basic_machine=ns32k-utek
+ os=-sysv
+ ;;
+ miniframe)
+ basic_machine=m68000-convergent
+ ;;
+ *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
+ basic_machine=m68k-atari
+ os=-mint
+ ;;
+ mipsel*-linux*)
+ basic_machine=mipsel-unknown
+ os=-linux-gnu
+ ;;
+ mips*-linux*)
+ basic_machine=mips-unknown
+ os=-linux-gnu
+ ;;
+ mips3*-*)
+ basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
+ ;;
+ mips3*)
+ basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
+ ;;
+ mmix*)
+ basic_machine=mmix-knuth
+ os=-mmixware
+ ;;
+ monitor)
+ basic_machine=m68k-rom68k
+ os=-coff
+ ;;
+ msdos)
+ basic_machine=i386-unknown
+ os=-msdos
+ ;;
+ mvs)
+ basic_machine=i370-ibm
+ os=-mvs
+ ;;
+ ncr3000)
+ basic_machine=i486-ncr
+ os=-sysv4
+ ;;
+ netbsd386)
+ basic_machine=i386-unknown
+ os=-netbsd
+ ;;
+ netwinder)
+ basic_machine=armv4l-rebel
+ os=-linux
+ ;;
+ news | news700 | news800 | news900)
+ basic_machine=m68k-sony
+ os=-newsos
+ ;;
+ news1000)
+ basic_machine=m68030-sony
+ os=-newsos
+ ;;
+ news-3600 | risc-news)
+ basic_machine=mips-sony
+ os=-newsos
+ ;;
+ necv70)
+ basic_machine=v70-nec
+ os=-sysv
+ ;;
+ next | m*-next )
+ basic_machine=m68k-next
+ case $os in
+ -nextstep* )
+ ;;
+ -ns2*)
+ os=-nextstep2
+ ;;
+ *)
+ os=-nextstep3
+ ;;
+ esac
+ ;;
+ nh3000)
+ basic_machine=m68k-harris
+ os=-cxux
+ ;;
+ nh[45]000)
+ basic_machine=m88k-harris
+ os=-cxux
+ ;;
+ nindy960)
+ basic_machine=i960-intel
+ os=-nindy
+ ;;
+ mon960)
+ basic_machine=i960-intel
+ os=-mon960
+ ;;
+ np1)
+ basic_machine=np1-gould
+ ;;
+ nsr-tandem)
+ basic_machine=nsr-tandem
+ ;;
+ op50n-* | op60c-*)
+ basic_machine=hppa1.1-oki
+ os=-proelf
+ ;;
+ OSE68000 | ose68000)
+ basic_machine=m68000-ericsson
+ os=-ose
+ ;;
+ os68k)
+ basic_machine=m68k-none
+ os=-os68k
+ ;;
+ pa-hitachi)
+ basic_machine=hppa1.1-hitachi
+ os=-hiuxwe2
+ ;;
+ paragon)
+ basic_machine=i860-intel
+ os=-osf
+ ;;
+ pbd)
+ basic_machine=sparc-tti
+ ;;
+ pbb)
+ basic_machine=m68k-tti
+ ;;
+ pc532 | pc532-*)
+ basic_machine=ns32k-pc532
+ ;;
+ pentiummmx | p55)
+ basic_machine=pentiummmx-pc
+ ;;
+ pentium | p5 | i586)
+ basic_machine=pentium-pc
+ ;;
+ pentiumpro | p6)
+ basic_machine=pentiumpro-pc
+ ;;
+ pentiummmx-* | p55-*)
+ basic_machine=pentiummmx-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ pentium-* | p5-* | i586-*)
+ basic_machine=pentium-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ pentiumpro-* | p6-*)
+ basic_machine=pentiumpro-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ nexen)
+ # We don't have specific support for Nexgen yet, so just call it a Pentium
+ basic_machine=i586-nexgen
+ ;;
+ pn)
+ basic_machine=pn-gould
+ ;;
+ power) basic_machine=rs6000-ibm
+ ;;
+ ppc) basic_machine=powerpc-unknown
+ ;;
+ ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ ppc64) basic_machine=powerpc64-unknown
+ ;;
+ ppc64-*)
+ basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ ppcle | powerpclittle | ppc-le | powerpc-little)
+ basic_machine=powerpcle-unknown
+ ;;
+ ppcle-* | powerpclittle-*)
+ basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
+ ;;
+ ps2)
+ basic_machine=i386-ibm
+ ;;
+ rom68k)
+ basic_machine=m68k-rom68k
+ os=-coff
+ ;;
+ rm[46]00)
+ basic_machine=mips-siemens
+ ;;
+ rtpc | rtpc-*)
+ basic_machine=romp-ibm
+ ;;
+ sa29200)
+ basic_machine=a29k-amd
+ os=-udi
+ ;;
+ sequent)
+ basic_machine=i386-sequent
+ ;;
+ sh)
+ basic_machine=sh-hitachi
+ os=-hms
+ ;;
+ sparclite-wrs)
+ basic_machine=sparclite-wrs
+ os=-vxworks
+ ;;
+ sps7)
+ basic_machine=m68k-bull
+ os=-sysv2
+ ;;
+ spur)
+ basic_machine=spur-unknown
+ ;;
+ st2000)
+ basic_machine=m68k-tandem
+ ;;
+ stratus)
+ basic_machine=i860-stratus
+ os=-sysv4
+ ;;
+ sun2)
+ basic_machine=m68000-sun
+ ;;
+ sun2os3)
+ basic_machine=m68000-sun
+ os=-sunos3
+ ;;
+ sun2os4)
+ basic_machine=m68000-sun
+ os=-sunos4
+ ;;
+ sun3os3)
+ basic_machine=m68k-sun
+ os=-sunos3
+ ;;
+ sun3os4)
+ basic_machine=m68k-sun
+ os=-sunos4
+ ;;
+ sun4os3)
+ basic_machine=sparc-sun
+ os=-sunos3
+ ;;
+ sun4os4)
+ basic_machine=sparc-sun
+ os=-sunos4
+ ;;
+ sun4sol2)
+ basic_machine=sparc-sun
+ os=-solaris2
+ ;;
+ sun3 | sun3-*)
+ basic_machine=m68k-sun
+ ;;
+ sun4)
+ basic_machine=sparc-sun
+ ;;
+ sun386 | sun386i | roadrunner)
+ basic_machine=i386-sun
+ ;;
+ sv1)
+ basic_machine=sv1-cray
+ os=-unicos
+ ;;
+ symmetry)
+ basic_machine=i386-sequent
+ os=-dynix
+ ;;
+ t3e)
+ basic_machine=t3e-cray
+ os=-unicos
+ ;;
+ tx39)
+ basic_machine=mipstx39-unknown
+ ;;
+ tx39el)
+ basic_machine=mipstx39el-unknown
+ ;;
+ tower | tower-32)
+ basic_machine=m68k-ncr
+ ;;
+ udi29k)
+ basic_machine=a29k-amd
+ os=-udi
+ ;;
+ ultra3)
+ basic_machine=a29k-nyu
+ os=-sym1
+ ;;
+ v810 | necv810)
+ basic_machine=v810-nec
+ os=-none
+ ;;
+ vaxv)
+ basic_machine=vax-dec
+ os=-sysv
+ ;;
+ vms)
+ basic_machine=vax-dec
+ os=-vms
+ ;;
+ vpp*|vx|vx-*)
+ basic_machine=f301-fujitsu
+ ;;
+ vxworks960)
+ basic_machine=i960-wrs
+ os=-vxworks
+ ;;
+ vxworks68)
+ basic_machine=m68k-wrs
+ os=-vxworks
+ ;;
+ vxworks29k)
+ basic_machine=a29k-wrs
+ os=-vxworks
+ ;;
+ w65*)
+ basic_machine=w65-wdc
+ os=-none
+ ;;
+ w89k-*)
+ basic_machine=hppa1.1-winbond
+ os=-proelf
+ ;;
+ xmp)
+ basic_machine=xmp-cray
+ os=-unicos
+ ;;
+ xps | xps100)
+ basic_machine=xps100-honeywell
+ ;;
+ z8k-*-coff)
+ basic_machine=z8k-unknown
+ os=-sim
+ ;;
+ none)
+ basic_machine=none-none
+ os=-none
+ ;;
+
+# Here we handle the default manufacturer of certain CPU types. It is in
+# some cases the only manufacturer, in others, it is the most popular.
+ w89k)
+ basic_machine=hppa1.1-winbond
+ ;;
+ op50n)
+ basic_machine=hppa1.1-oki
+ ;;
+ op60c)
+ basic_machine=hppa1.1-oki
+ ;;
+ mips)
+ if [ x$os = x-linux-gnu ]; then
+ basic_machine=mips-unknown
+ else
+ basic_machine=mips-mips
+ fi
+ ;;
+ romp)
+ basic_machine=romp-ibm
+ ;;
+ rs6000)
+ basic_machine=rs6000-ibm
+ ;;
+ vax)
+ basic_machine=vax-dec
+ ;;
+ pdp11)
+ basic_machine=pdp11-dec
+ ;;
+ we32k)
+ basic_machine=we32k-att
+ ;;
+ sparc | sparcv9)
+ basic_machine=sparc-sun
+ ;;
+ cydra)
+ basic_machine=cydra-cydrome
+ ;;
+ orion)
+ basic_machine=orion-highlevel
+ ;;
+ orion105)
+ basic_machine=clipper-highlevel
+ ;;
+ mac | mpw | mac-mpw)
+ basic_machine=m68k-apple
+ ;;
+ pmac | pmac-mpw)
+ basic_machine=powerpc-apple
+ ;;
+ c4x*)
+ basic_machine=c4x-none
+ os=-coff
+ ;;
+ *)
+ echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+ exit 1
+ ;;
+esac
+
+# Here we canonicalize certain aliases for manufacturers.
+case $basic_machine in
+ *-digital*)
+ basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
+ ;;
+ *-commodore*)
+ basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
+ ;;
+ *)
+ ;;
+esac
+
+# Decode manufacturer-specific aliases for certain operating systems.
+
+if [ x"$os" != x"" ]
+then
+case $os in
+ # First match some system type aliases
+ # that might get confused with valid system types.
+ # -solaris* is a basic system type, with this one exception.
+ -solaris1 | -solaris1.*)
+ os=`echo $os | sed -e 's|solaris1|sunos4|'`
+ ;;
+ -solaris)
+ os=-solaris2
+ ;;
+ -svr4*)
+ os=-sysv4
+ ;;
+ -unixware*)
+ os=-sysv4.2uw
+ ;;
+ -gnu/linux*)
+ os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
+ ;;
+ # First accept the basic system types.
+ # The portable systems comes first.
+ # Each alternative MUST END IN A *, to match a version number.
+ # -sysv* is not here because it comes later, after sysvr4.
+ -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
+ | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
+ | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \
+ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
+ | -aos* \
+ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
+ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
+ | -hiux* | -386bsd* | -netbsd* | -openbsd* | -freebsd* | -riscix* \
+ | -lynxos* | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
+ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
+ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
+ | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+ | -mingw32* | -linux-gnu* | -uxpv* | -beos* | -mpeix* | -udk* \
+ | -interix* | -uwin* | -rhapsody* | -darwin* | -opened* \
+ | -openstep* | -oskit*)
+ # Remember, each alternative MUST END IN *, to match a version number.
+ ;;
+ -qnx*)
+ case $basic_machine in
+ x86-* | i[34567]86-*)
+ ;;
+ *)
+ os=-nto$os
+ ;;
+ esac
+ ;;
+ -nto*)
+ os=-nto-qnx
+ ;;
+ -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
+ | -windows* | -osx | -abug | -netware* | -os9* | -beos* \
+ | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+ ;;
+ -mac*)
+ os=`echo $os | sed -e 's|mac|macos|'`
+ ;;
+ -linux*)
+ os=`echo $os | sed -e 's|linux|linux-gnu|'`
+ ;;
+ -sunos5*)
+ os=`echo $os | sed -e 's|sunos5|solaris2|'`
+ ;;
+ -sunos6*)
+ os=`echo $os | sed -e 's|sunos6|solaris3|'`
+ ;;
+ -opened*)
+ os=-openedition
+ ;;
+ -wince*)
+ os=-wince
+ ;;
+ -osfrose*)
+ os=-osfrose
+ ;;
+ -osf*)
+ os=-osf
+ ;;
+ -utek*)
+ os=-bsd
+ ;;
+ -dynix*)
+ os=-bsd
+ ;;
+ -acis*)
+ os=-aos
+ ;;
+ -386bsd)
+ os=-bsd
+ ;;
+ -ctix* | -uts*)
+ os=-sysv
+ ;;
+ -ns2 )
+ os=-nextstep2
+ ;;
+ -nsk)
+ os=-nsk
+ ;;
+ # Preserve the version number of sinix5.
+ -sinix5.*)
+ os=`echo $os | sed -e 's|sinix|sysv|'`
+ ;;
+ -sinix*)
+ os=-sysv4
+ ;;
+ -triton*)
+ os=-sysv3
+ ;;
+ -oss*)
+ os=-sysv3
+ ;;
+ -svr4)
+ os=-sysv4
+ ;;
+ -svr3)
+ os=-sysv3
+ ;;
+ -sysvr4)
+ os=-sysv4
+ ;;
+ # This must come after -sysvr4.
+ -sysv*)
+ ;;
+ -ose*)
+ os=-ose
+ ;;
+ -es1800*)
+ os=-ose
+ ;;
+ -xenix)
+ os=-xenix
+ ;;
+ -*mint | -*MiNT)
+ os=-mint
+ ;;
+ -none)
+ ;;
+ *)
+ # Get rid of the `-' at the beginning of $os.
+ os=`echo $os | sed 's/[^-]*-//'`
+ echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
+ exit 1
+ ;;
+esac
+else
+
+# Here we handle the default operating systems that come with various machines.
+# The value should be what the vendor currently ships out the door with their
+# machine or put another way, the most popular os provided with the machine.
+
+# Note that if you're going to try to match "-MANUFACTURER" here (say,
+# "-sun"), then you have to tell the case statement up towards the top
+# that MANUFACTURER isn't an operating system. Otherwise, code above
+# will signal an error saying that MANUFACTURER isn't an operating
+# system, and we'll never get to this point.
+
+case $basic_machine in
+ *-acorn)
+ os=-riscix1.2
+ ;;
+ arm*-rebel)
+ os=-linux
+ ;;
+ arm*-semi)
+ os=-aout
+ ;;
+ pdp11-*)
+ os=-none
+ ;;
+ *-dec | vax-*)
+ os=-ultrix4.2
+ ;;
+ m68*-apollo)
+ os=-domain
+ ;;
+ i386-sun)
+ os=-sunos4.0.2
+ ;;
+ m68000-sun)
+ os=-sunos3
+ # This also exists in the configure program, but was not the
+ # default.
+ # os=-sunos4
+ ;;
+ m68*-cisco)
+ os=-aout
+ ;;
+ mips*-cisco)
+ os=-elf
+ ;;
+ mips*-*)
+ os=-elf
+ ;;
+ *-tti) # must be before sparc entry or we get the wrong os.
+ os=-sysv3
+ ;;
+ sparc-* | *-sun)
+ os=-sunos4.1.1
+ ;;
+ *-be)
+ os=-beos
+ ;;
+ *-ibm)
+ os=-aix
+ ;;
+ *-wec)
+ os=-proelf
+ ;;
+ *-winbond)
+ os=-proelf
+ ;;
+ *-oki)
+ os=-proelf
+ ;;
+ *-hp)
+ os=-hpux
+ ;;
+ *-hitachi)
+ os=-hiux
+ ;;
+ i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
+ os=-sysv
+ ;;
+ *-cbm)
+ os=-amigaos
+ ;;
+ *-dg)
+ os=-dgux
+ ;;
+ *-dolphin)
+ os=-sysv3
+ ;;
+ m68k-ccur)
+ os=-rtu
+ ;;
+ m88k-omron*)
+ os=-luna
+ ;;
+ *-next )
+ os=-nextstep
+ ;;
+ *-sequent)
+ os=-ptx
+ ;;
+ *-crds)
+ os=-unos
+ ;;
+ *-ns)
+ os=-genix
+ ;;
+ i370-*)
+ os=-mvs
+ ;;
+ *-next)
+ os=-nextstep3
+ ;;
+ *-gould)
+ os=-sysv
+ ;;
+ *-highlevel)
+ os=-bsd
+ ;;
+ *-encore)
+ os=-bsd
+ ;;
+ *-sgi)
+ os=-irix
+ ;;
+ *-siemens)
+ os=-sysv4
+ ;;
+ *-masscomp)
+ os=-rtu
+ ;;
+ f301-fujitsu)
+ os=-uxpv
+ ;;
+ *-rom68k)
+ os=-coff
+ ;;
+ *-*bug)
+ os=-coff
+ ;;
+ *-apple)
+ os=-macos
+ ;;
+ *-atari*)
+ os=-mint
+ ;;
+ *)
+ os=-none
+ ;;
+esac
+fi
+
+# Here we handle the case where we know the os, and the CPU type, but not the
+# manufacturer. We pick the logical manufacturer.
+vendor=unknown
+case $basic_machine in
+ *-unknown)
+ case $os in
+ -riscix*)
+ vendor=acorn
+ ;;
+ -sunos*)
+ vendor=sun
+ ;;
+ -aix*)
+ vendor=ibm
+ ;;
+ -beos*)
+ vendor=be
+ ;;
+ -hpux*)
+ vendor=hp
+ ;;
+ -mpeix*)
+ vendor=hp
+ ;;
+ -hiux*)
+ vendor=hitachi
+ ;;
+ -unos*)
+ vendor=crds
+ ;;
+ -dgux*)
+ vendor=dg
+ ;;
+ -luna*)
+ vendor=omron
+ ;;
+ -genix*)
+ vendor=ns
+ ;;
+ -mvs* | -opened*)
+ vendor=ibm
+ ;;
+ -ptx*)
+ vendor=sequent
+ ;;
+ -vxsim* | -vxworks*)
+ vendor=wrs
+ ;;
+ -aux*)
+ vendor=apple
+ ;;
+ -hms*)
+ vendor=hitachi
+ ;;
+ -mpw* | -macos*)
+ vendor=apple
+ ;;
+ -*mint | -*MiNT)
+ vendor=atari
+ ;;
+ esac
+ basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
+ ;;
+esac
+
+echo $basic_machine$os
diff --git a/rts/gmp/configure b/rts/gmp/configure
new file mode 100644
index 0000000000..8294680486
--- /dev/null
+++ b/rts/gmp/configure
@@ -0,0 +1,5216 @@
+#! /bin/sh
+# From configure.in Revision: 1.129.2.2
+# Guess values for system-dependent variables and create Makefiles.
+# Generated automatically using Autoconf version 2.14a.
+# Copyright (C) 1992, 93, 94, 95, 96, 98, 99, 2000
+# Free Software Foundation, Inc.
+#
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+
+# Defaults:
+ac_default_prefix=/usr/local
+# Any additions from configure.in:
+
+# Initialize some variables set by options.
+ac_init_help=false
+ac_init_version=false
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+build=NONE
+cache_file=./config.cache
+exec_prefix=NONE
+host=NONE
+no_create=
+nonopt=NONE
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+target=NONE
+verbose=
+x_includes=NONE
+x_libraries=NONE
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datadir='${prefix}/share'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+libdir='${exec_prefix}/lib'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+infodir='${prefix}/info'
+mandir='${prefix}/man'
+
+# Initialize some other variables.
+subdirs=
+MFLAGS= MAKEFLAGS=
+SHELL=${CONFIG_SHELL-/bin/sh}
+# Maximum number of lines to put in a shell here document.
+: ${ac_max_here_lines=48}
+# Sed expression to map a string onto a valid sh and CPP variable names.
+ac_tr_sh='sed -e y%*+%pp%;s%[^a-zA-Z0-9_]%_%g'
+ac_tr_cpp='sed -e y%*abcdefghijklmnopqrstuvwxyz%PABCDEFGHIJKLMNOPQRSTUVWXYZ%;s%[^A-Z0-9_]%_%g'
+
+ac_prev=
+for ac_option
+do
+ # If the previous option needs an argument, assign it.
+ if test -n "$ac_prev"; then
+ eval "$ac_prev=\$ac_option"
+ ac_prev=
+ continue
+ fi
+
+ ac_optarg=`echo "$ac_option" | sed -n 's/^[^=]*=//p'`
+
+ # Accept the important Cygnus configure options, so we can diagnose typos.
+
+ case "$ac_option" in
+
+ -bindir | --bindir | --bindi | --bind | --bin | --bi)
+ ac_prev=bindir ;;
+ -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+ bindir="$ac_optarg" ;;
+
+ -build | --build | --buil | --bui | --bu)
+ ac_prev=build ;;
+ -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+ build="$ac_optarg" ;;
+
+ -cache-file | --cache-file | --cache-fil | --cache-fi \
+ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+ ac_prev=cache_file ;;
+ -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+ cache_file="$ac_optarg" ;;
+
+ -datadir | --datadir | --datadi | --datad | --data | --dat | --da)
+ ac_prev=datadir ;;
+ -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \
+ | --da=*)
+ datadir="$ac_optarg" ;;
+
+ -disable-* | --disable-*)
+ ac_feature=`echo "$ac_option"|sed -e 's/-*disable-//'`
+ # Reject names that are not valid shell variable names.
+ if echo "$ac_feature" | grep '[^-a-zA-Z0-9_]' >/dev/null 2>&1; then
+ { echo "configure: error: invalid feature: $ac_feature" 1>&2; exit 1; }
+ fi
+ ac_feature=`echo $ac_feature| sed 's/-/_/g'`
+ eval "enable_${ac_feature}=no" ;;
+
+ -enable-* | --enable-*)
+ ac_feature=`echo "$ac_option"|sed -e 's/-*enable-//' -e 's/=.*//'`
+ # Reject names that are not valid shell variable names.
+ if echo "$ac_feature" | grep '[^-a-zA-Z0-9_]' >/dev/null 2>&1; then
+ { echo "configure: error: invalid feature: $ac_feature" 1>&2; exit 1; }
+ fi
+ ac_feature=`echo $ac_feature| sed 's/-/_/g'`
+ case "$ac_option" in
+ *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
+ *) ac_optarg=yes ;;
+ esac
+ eval "enable_${ac_feature}='$ac_optarg'" ;;
+
+ -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+ | --exec | --exe | --ex)
+ ac_prev=exec_prefix ;;
+ -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+ | --exec=* | --exe=* | --ex=*)
+ exec_prefix="$ac_optarg" ;;
+
+ -gas | --gas | --ga | --g)
+ # Obsolete; use --with-gas.
+ with_gas=yes ;;
+
+ -help | --help | --hel | --he | -h)
+ ac_init_help=: ;;
+ -host | --host | --hos | --ho)
+ ac_prev=host ;;
+ -host=* | --host=* | --hos=* | --ho=*)
+ host="$ac_optarg" ;;
+
+ -includedir | --includedir | --includedi | --included | --include \
+ | --includ | --inclu | --incl | --inc)
+ ac_prev=includedir ;;
+ -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+ | --includ=* | --inclu=* | --incl=* | --inc=*)
+ includedir="$ac_optarg" ;;
+
+ -infodir | --infodir | --infodi | --infod | --info | --inf)
+ ac_prev=infodir ;;
+ -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+ infodir="$ac_optarg" ;;
+
+ -libdir | --libdir | --libdi | --libd)
+ ac_prev=libdir ;;
+ -libdir=* | --libdir=* | --libdi=* | --libd=*)
+ libdir="$ac_optarg" ;;
+
+ -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+ | --libexe | --libex | --libe)
+ ac_prev=libexecdir ;;
+ -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+ | --libexe=* | --libex=* | --libe=*)
+ libexecdir="$ac_optarg" ;;
+
+ -localstatedir | --localstatedir | --localstatedi | --localstated \
+ | --localstate | --localstat | --localsta | --localst \
+ | --locals | --local | --loca | --loc | --lo)
+ ac_prev=localstatedir ;;
+ -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+ | --localstate=* | --localstat=* | --localsta=* | --localst=* \
+ | --locals=* | --local=* | --loca=* | --loc=* | --lo=*)
+ localstatedir="$ac_optarg" ;;
+
+ -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+ ac_prev=mandir ;;
+ -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+ mandir="$ac_optarg" ;;
+
+ -nfp | --nfp | --nf)
+ # Obsolete; use --without-fp.
+ with_fp=no ;;
+
+ -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+ | --no-cr | --no-c)
+ no_create=yes ;;
+
+ -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+ no_recursion=yes ;;
+
+ -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+ | --oldin | --oldi | --old | --ol | --o)
+ ac_prev=oldincludedir ;;
+ -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+ oldincludedir="$ac_optarg" ;;
+
+ -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+ ac_prev=prefix ;;
+ -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+ prefix="$ac_optarg" ;;
+
+ -program-prefix | --program-prefix | --program-prefi | --program-pref \
+ | --program-pre | --program-pr | --program-p)
+ ac_prev=program_prefix ;;
+ -program-prefix=* | --program-prefix=* | --program-prefi=* \
+ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+ program_prefix="$ac_optarg" ;;
+
+ -program-suffix | --program-suffix | --program-suffi | --program-suff \
+ | --program-suf | --program-su | --program-s)
+ ac_prev=program_suffix ;;
+ -program-suffix=* | --program-suffix=* | --program-suffi=* \
+ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+ program_suffix="$ac_optarg" ;;
+
+ -program-transform-name | --program-transform-name \
+ | --program-transform-nam | --program-transform-na \
+ | --program-transform-n | --program-transform- \
+ | --program-transform | --program-transfor \
+ | --program-transfo | --program-transf \
+ | --program-trans | --program-tran \
+ | --progr-tra | --program-tr | --program-t)
+ ac_prev=program_transform_name ;;
+ -program-transform-name=* | --program-transform-name=* \
+ | --program-transform-nam=* | --program-transform-na=* \
+ | --program-transform-n=* | --program-transform-=* \
+ | --program-transform=* | --program-transfor=* \
+ | --program-transfo=* | --program-transf=* \
+ | --program-trans=* | --program-tran=* \
+ | --progr-tra=* | --program-tr=* | --program-t=*)
+ program_transform_name="$ac_optarg" ;;
+
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil)
+ silent=yes ;;
+
+ -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+ ac_prev=sbindir ;;
+ -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+ | --sbi=* | --sb=*)
+ sbindir="$ac_optarg" ;;
+
+ -sharedstatedir | --sharedstatedir | --sharedstatedi \
+ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+ | --sharedst | --shareds | --shared | --share | --shar \
+ | --sha | --sh)
+ ac_prev=sharedstatedir ;;
+ -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+ | --sha=* | --sh=*)
+ sharedstatedir="$ac_optarg" ;;
+
+ -site | --site | --sit)
+ ac_prev=site ;;
+ -site=* | --site=* | --sit=*)
+ site="$ac_optarg" ;;
+
+ -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+ ac_prev=srcdir ;;
+ -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+ srcdir="$ac_optarg" ;;
+
+ -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+ | --syscon | --sysco | --sysc | --sys | --sy)
+ ac_prev=sysconfdir ;;
+ -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+ sysconfdir="$ac_optarg" ;;
+
+ -target | --target | --targe | --targ | --tar | --ta | --t)
+ ac_prev=target ;;
+ -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+ target="$ac_optarg" ;;
+
+ -v | -verbose | --verbose | --verbos | --verbo | --verb)
+ verbose=yes ;;
+
+ -version | --version | --versio | --versi | --vers | -V)
+ ac_init_version=: ;;
+
+ -with-* | --with-*)
+ ac_package=`echo "$ac_option"|sed -e 's/-*with-//' -e 's/=.*//'`
+ # Reject names that are not valid shell variable names.
+ if echo "$ac_package" | grep '[^-a-zA-Z0-9_]' >/dev/null 2>&1; then
+ { echo "configure: error: invalid package: $ac_package" 1>&2; exit 1; }
+ fi
+ ac_package=`echo $ac_package| sed 's/-/_/g'`
+ case "$ac_option" in
+ *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
+ *) ac_optarg=yes ;;
+ esac
+ eval "with_${ac_package}='$ac_optarg'" ;;
+
+ -without-* | --without-*)
+ ac_package=`echo "$ac_option"|sed -e 's/-*without-//'`
+ # Reject names that are not valid shell variable names.
+ if echo "$ac_package" | grep '[^-a-zA-Z0-9_]' >/dev/null 2>&1; then
+ { echo "configure: error: invalid package: $ac_package" 1>&2; exit 1; }
+ fi
+ ac_package=`echo $ac_package| sed 's/-/_/g'`
+ eval "with_${ac_package}=no" ;;
+
+ --x)
+ # Obsolete; use --with-x.
+ with_x=yes ;;
+
+ -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+ | --x-incl | --x-inc | --x-in | --x-i)
+ ac_prev=x_includes ;;
+ -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+ x_includes="$ac_optarg" ;;
+
+ -x-libraries | --x-libraries | --x-librarie | --x-librari \
+ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+ ac_prev=x_libraries ;;
+ -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+ x_libraries="$ac_optarg" ;;
+
+ -*) { echo "configure: error: unrecognized option: $ac_option
+Try \`configure --help' for more information." 1>&2; exit 1; }
+ ;;
+
+ *=*)
+ ac_envvar=`echo "$ac_option" | sed -e 's/=.*//'`
+ # Reject names that are not valid shell variable names.
+ if echo "$ac_envvar" | grep '[^a-zA-Z0-9_]' >/dev/null 2>&1; then
+ { echo "configure: error: invalid variable name: $ac_envvar" 1>&2; exit 1; }
+ fi
+ ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`
+ eval "$ac_envvar='$ac_optarg'"
+ export $ac_envvar ;;
+
+ *)
+ if echo "$ac_option" | grep '[^-a-zA-Z0-9.]' >/dev/null 2>&1; then
+ echo "configure: warning: invalid host type: $ac_option" 1>&2
+ fi
+ if test "x$nonopt" != xNONE; then
+ { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; }
+ fi
+ nonopt="$ac_option"
+ ;;
+
+ esac
+done
+
+if test -n "$ac_prev"; then
+ { echo "configure: error: missing argument to --\`echo $ac_prev | sed 's/_/-/g'\`" 1>&2; exit 1; }
+fi
+if $ac_init_help; then
+ # Omit some internal or obsolete options to make the list less imposing.
+ # This message is too long to be a string in the A/UX 3.1 sh.
+ cat <<\EOF
+`configure' configures software source code packages to adapt to many kinds
+of systems.
+
+Usage: configure [OPTION]... [VAR=VALUE]... [HOST]
+
+To safely assign special values to environment variables (e.g., CC,
+CFLAGS...), give to `configure' the definition as VAR=VALUE.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+ -h, --help print this message
+ -V, --version print the version of autoconf that created configure
+ -q, --quiet, --silent do not print `checking...' messages
+ --cache-file=FILE cache test results in FILE
+ -n, --no-create do not create output files
+
+EOF
+
+ cat <<EOF
+Directories:
+ --prefix=PREFIX install architecture-independent files in PREFIX
+ [$ac_default_prefix]
+ --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX
+ [same as prefix]
+ --bindir=DIR user executables in DIR [EPREFIX/bin]
+ --sbindir=DIR system admin executables in DIR [EPREFIX/sbin]
+ --libexecdir=DIR program executables in DIR [EPREFIX/libexec]
+ --datadir=DIR read-only architecture-independent data in DIR
+ [PREFIX/share]
+ --sysconfdir=DIR read-only single-machine data in DIR [PREFIX/etc]
+ --sharedstatedir=DIR modifiable architecture-independent data in DIR
+ [PREFIX/com]
+ --localstatedir=DIR modifiable single-machine data in DIR [PREFIX/var]
+ --libdir=DIR object code libraries in DIR [EPREFIX/lib]
+ --includedir=DIR C header files in DIR [PREFIX/include]
+ --oldincludedir=DIR C header files for non-gcc in DIR [/usr/include]
+ --infodir=DIR info documentation in DIR [PREFIX/info]
+ --mandir=DIR man documentation in DIR [PREFIX/man]
+ --srcdir=DIR find the sources in DIR [configure dir or ..]
+EOF
+
+ cat <<\EOF
+
+Host type:
+ --build=BUILD configure for building on BUILD [BUILD=HOST]
+ --host=HOST configure for HOST [guessed]
+ --target=TARGET configure for TARGET [TARGET=HOST]
+EOF
+
+ cat <<\EOF
+
+Program names:
+ --program-prefix=PREFIX prepend PREFIX to installed program names
+ --program-suffix=SUFFIX append SUFFIX to installed program names
+ --program-transform-name=PROGRAM run sed PROGRAM on installed program names
+
+Optional Features:
+ --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no)
+ --enable-FEATURE=ARG include FEATURE ARG=yes
+ --disable-dependency-tracking Speeds up one-time builds
+ --enable-dependency-tracking Do not reject slow dependency extractors
+ --enable-maintainer-mode enable make rules and dependencies not useful
+ (and sometimes confusing) to the casual installer
+ --enable-assert enable ASSERT checking default=no
+ --enable-alloca use alloca for temp space default=yes
+ --enable-fft enable FFTs for multiplication default=no
+ --enable-mpbsd build Berkley MP compatibility library default=no
+ --enable-mpfr build MPFR default=no
+ --enable-shared=PKGS build shared libraries default=yes
+ --enable-static=PKGS build static libraries default=yes
+ --enable-fast-install=PKGS optimize for fast installation default=yes
+ --disable-libtool-lock avoid locking (might break parallel builds)
+
+Optional Packages:
+ --with-PACKAGE=ARG use PACKAGE ARG=yes
+ --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
+ --with-gnu-ld assume the C compiler uses GNU ld default=no
+ --with-pic try to use only PIC/non-PIC objects default=use both
+EOF
+ exit 0
+fi
+if $ac_init_version; then
+ cat <<\EOF
+Generated automatically using Autoconf version 2.14a.
+Copyright (C) 1992, 93, 94, 95, 96, 98, 99, 2000
+Free Software Foundation, Inc.
+
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+EOF
+ exit 0
+fi
+trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Also quote any args containing shell meta-characters.
+ac_configure_args=
+for ac_arg
+do
+ case "$ac_arg" in
+ -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+ | --no-cr | --no-c) ;;
+ -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) ;;
+ *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*)
+ ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"`
+ ac_configure_args="$ac_configure_args '$ac_arg'" ;;
+ *) ac_configure_args="$ac_configure_args $ac_arg" ;;
+ esac
+done
+
+# File descriptor usage:
+# 0 standard input
+# 1 file creation
+# 2 errors and warnings
+# 3 some systems may open it to /dev/tty
+# 4 used on the Kubota Titan
+# 6 checking for... messages and results
+# 5 compiler messages saved in config.log
+if test "$silent" = yes; then
+ exec 6>/dev/null
+else
+ exec 6>&1
+fi
+exec 5>./config.log
+
+echo "\
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by configure version 2.14a, executed with
+ > $0 $ac_configure_args
+" 1>&5
+
+# NLS nuisances.
+# Only set these to C if already set. These must not be set unconditionally
+# because not all systems understand e.g. LANG=C (notably SCO).
+# Fixing LC_MESSAGES prevents Solaris sh from translating var values in `set'!
+# Non-C LC_CTYPE values break the ctype check.
+if test "${LANG+set}" = set; then LANG=C; export LANG; fi
+if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi
+if test "${LC_MESSAGES+set}" = set; then LC_MESSAGES=C; export LC_MESSAGES; fi
+if test "${LC_CTYPE+set}" = set; then LC_CTYPE=C; export LC_CTYPE; fi
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -rf conftest* confdefs.h
+# AIX cpp loses on an empty file, so make sure it contains at least a newline.
+echo >confdefs.h
+
+# A filename unique to this package, relative to the directory that
+# configure is in, which we can look for to find out if srcdir is correct.
+ac_unique_file=
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+ ac_srcdir_defaulted=yes
+ # Try the directory containing this script, then its parent.
+ ac_prog=$0
+ ac_confdir=`echo "$ac_prog" | sed 's%/[^/][^/]*$%%'`
+ test "x$ac_confdir" = "x$ac_prog" && ac_confdir=.
+ srcdir=$ac_confdir
+ if test ! -r $srcdir/$ac_unique_file; then
+ srcdir=..
+ fi
+else
+ ac_srcdir_defaulted=no
+fi
+if test ! -r $srcdir/$ac_unique_file; then
+ if test "$ac_srcdir_defaulted" = yes; then
+ { echo "configure: error: cannot find sources in $ac_confdir or .." 1>&2; exit 1; }
+ else
+ { echo "configure: error: cannot find sources in $srcdir" 1>&2; exit 1; }
+ fi
+fi
+srcdir=`echo "$srcdir" | sed 's%\([^/]\)/*$%\1%'`
+
+# Prefer explicitly selected file to automatically selected ones.
+if test -z "$CONFIG_SITE"; then
+ if test "x$prefix" != xNONE; then
+ CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site"
+ else
+ CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site"
+ fi
+fi
+for ac_site_file in $CONFIG_SITE; do
+ if test -r "$ac_site_file"; then
+ echo "loading site script $ac_site_file"
+ . "$ac_site_file"
+ fi
+done
+
+if test -r "$cache_file"; then
+ echo "loading cache $cache_file"
+ test -f "$cache_file" && . $cache_file
+else
+ echo "creating cache $cache_file"
+ >$cache_file
+fi
+
+ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+ac_exeext=
+ac_objext=o
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stdio.h>
+#include <sys/types.h>
+#if STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# if HAVE_STDLIB_H
+# include <stdlib.h>
+# endif
+#endif
+#if HAVE_STRING_H
+# if !STDC_HEADERS && HAVE_MEMORY_H
+# include <memory.h>
+# endif
+# include <string.h>
+#else
+# if HAVE_STRINGS_H
+# include <strings.h>
+# endif
+#endif
+#if HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+#if HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
+if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then
+ # Stardent Vistra SVR4 grep lacks -e, says Kaveh R. Ghazi.
+ if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then
+ ECHO_N= ECHO_C='
+' ECHO_T=' '
+ else
+ ECHO_N=-n ECHO_C= ECHO_T=
+ fi
+else
+ ECHO_N= ECHO_C='\c' ECHO_T=
+fi
+
+ac_aux_dir=
+for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do
+ if test -f $ac_dir/install-sh; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/install-sh -c"
+ break
+ elif test -f $ac_dir/install.sh; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/install.sh -c"
+ break
+ elif test -f $ac_dir/shtool; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/shtool install -c"
+ break
+ fi
+done
+if test -z "$ac_aux_dir"; then
+ { echo "configure: error: cannot find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." 1>&2; exit 1; }
+fi
+ac_config_guess="$SHELL $ac_aux_dir/config.guess"
+ac_config_sub="$SHELL $ac_aux_dir/config.sub"
+ac_configure="$SHELL $ac_aux_dir/configure" # This should be Cygnus configure.
+
+echo $ECHO_N "checking host system type... $ECHO_C" 1>&6
+echo "configure:636: checking host system type" 1>&5
+if test "x$ac_cv_host" = "x" || (test "x$host" != "xNONE" && test "x$host" != "x$ac_cv_host_alias"); then
+
+ # Make sure we can run config.sub.
+ if $ac_config_sub sun4 >/dev/null 2>&1; then :; else
+ { echo "configure: error: cannot run $ac_config_sub" 1>&2; exit 1; }
+ fi
+
+ ac_cv_host_alias=$host
+ case "$ac_cv_host_alias" in
+ NONE)
+ case $nonopt in
+ NONE)
+ if ac_cv_host_alias=`$ac_config_guess`; then :
+ else { echo "configure: error: cannot guess host type; you must specify one" 1>&2; exit 1; }
+ fi ;; *) ac_cv_host_alias=$nonopt ;;
+ esac ;;
+ esac
+
+ ac_cv_host=`$ac_config_sub $ac_cv_host_alias` || exit 1
+ ac_cv_host_cpu=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+ ac_cv_host_vendor=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+ ac_cv_host_os=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+else
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+fi
+
+echo "$ECHO_T""$ac_cv_host" 1>&6
+
+host=$ac_cv_host
+host_alias=$ac_cv_host_alias
+host_cpu=$ac_cv_host_cpu
+host_vendor=$ac_cv_host_vendor
+host_os=$ac_cv_host_os
+
+echo $ECHO_N "checking target system type... $ECHO_C" 1>&6
+echo "configure:672: checking target system type" 1>&5
+if test "x$ac_cv_target" = "x" || (test "x$target" != "xNONE" && test "x$target" != "x$ac_cv_target_alias"); then
+
+ # Make sure we can run config.sub.
+ if $ac_config_sub sun4 >/dev/null 2>&1; then :; else
+ { echo "configure: error: cannot run $ac_config_sub" 1>&2; exit 1; }
+ fi
+
+ ac_cv_target_alias=$target
+ case "$ac_cv_target_alias" in
+ NONE)
+ case $nonopt in
+ NONE)
+ ac_cv_target_alias=$host_alias ;;
+ *) ac_cv_target_alias=$nonopt ;;
+ esac ;;
+ esac
+
+ ac_cv_target=`$ac_config_sub $ac_cv_target_alias` || exit 1
+ ac_cv_target_cpu=`echo $ac_cv_target | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+ ac_cv_target_vendor=`echo $ac_cv_target | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+ ac_cv_target_os=`echo $ac_cv_target | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+else
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+fi
+
+echo "$ECHO_T""$ac_cv_target" 1>&6
+
+target=$ac_cv_target
+target_alias=$ac_cv_target_alias
+target_cpu=$ac_cv_target_cpu
+target_vendor=$ac_cv_target_vendor
+target_os=$ac_cv_target_os
+
+echo $ECHO_N "checking build system type... $ECHO_C" 1>&6
+echo "configure:707: checking build system type" 1>&5
+if test "x$ac_cv_build" = "x" || (test "x$build" != "xNONE" && test "x$build" != "x$ac_cv_build_alias"); then
+
+ # Make sure we can run config.sub.
+ if $ac_config_sub sun4 >/dev/null 2>&1; then :; else
+ { echo "configure: error: cannot run $ac_config_sub" 1>&2; exit 1; }
+ fi
+
+ ac_cv_build_alias=$build
+ case "$ac_cv_build_alias" in
+ NONE)
+ case $nonopt in
+ NONE)
+ ac_cv_build_alias=$host_alias ;;
+ *) ac_cv_build_alias=$nonopt ;;
+ esac ;;
+ esac
+
+ ac_cv_build=`$ac_config_sub $ac_cv_build_alias` || exit 1
+ ac_cv_build_cpu=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+ ac_cv_build_vendor=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+ ac_cv_build_os=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+else
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+fi
+
+echo "$ECHO_T""$ac_cv_build" 1>&6
+
+build=$ac_cv_build
+build_alias=$ac_cv_build_alias
+build_cpu=$ac_cv_build_cpu
+build_vendor=$ac_cv_build_vendor
+build_os=$ac_cv_build_os
+
+# Do some error checking and defaulting for the host and target type.
+# The inputs are:
+# configure --host=HOST --target=TARGET --build=BUILD NONOPT
+#
+# The rules are:
+# 1. You are not allowed to specify --host, --target, and nonopt at the
+# same time.
+# 2. Host defaults to nonopt.
+# 3. If nonopt is not specified, then host defaults to the current host,
+# as determined by config.guess.
+# 4. Target and build default to nonopt.
+# 5. If nonopt is not specified, then target and build default to host.
+
+# The aliases save the names the user supplied, while $host etc.
+# will get canonicalized.
+case $host---$target---$nonopt in
+NONE---*---* | *---NONE---* | *---*---NONE) ;;
+*) { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; } ;;
+esac
+
+test "$host_alias" != "$target_alias" &&
+ test "$program_prefix$program_suffix$program_transform_name" = \
+ NONENONEs,x,x, &&
+ program_prefix=${target_alias}-
+
+# Find a good install program. We prefer a C program (faster),
+# so one script is as good as another. But avoid the broken or
+# incompatible versions:
+# SysV /etc/install, /usr/sbin/install
+# SunOS /usr/etc/install
+# IRIX /sbin/install
+# AIX /bin/install
+# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
+# AFS /usr/afsws/bin/install, which mishandles nonexistent args
+# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
+# ./install, which can be erroneously created by make from ./install.sh.
+echo $ECHO_N "checking for a BSD compatible install... $ECHO_C" 1>&6
+echo "configure:778: checking for a BSD compatible install" 1>&5
+if test -z "$INSTALL"; then
+if test "${ac_cv_path_install+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS=":"
+ for ac_dir in $PATH; do
+ # Account for people who put trailing slashes in PATH elements.
+ case "$ac_dir/" in
+ /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;;
+ *)
+ # OSF1 and SCO ODT 3.0 have their own names for install.
+ # Don't use installbsd from OSF since it installs stuff as root
+ # by default.
+ for ac_prog in ginstall scoinst install; do
+ if test -f $ac_dir/$ac_prog; then
+ if test $ac_prog = install &&
+ grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then
+ # AIX install. It has an incompatible calling convention.
+ :
+ elif test $ac_prog = install &&
+ grep pwplus $ac_dir/$ac_prog >/dev/null 2>&1; then
+ # program-specific install script used by HP pwplus--don't use.
+ :
+ else
+ ac_cv_path_install="$ac_dir/$ac_prog -c"
+ break 2
+ fi
+ fi
+ done
+ ;;
+ esac
+ done
+ IFS="$ac_save_IFS"
+
+fi
+ if test "${ac_cv_path_install+set}" = set; then
+ INSTALL="$ac_cv_path_install"
+ else
+ # As a last resort, use the slow shell script. We don't cache a
+ # path for INSTALL within a source directory, because that will
+ # break other packages using the cache if that directory is
+ # removed, or if the path is relative.
+ INSTALL="$ac_install_sh"
+ fi
+fi
+echo "$ECHO_T""$INSTALL" 1>&6
+
+# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
+# It thinks the first close brace ends the variable substitution.
+test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
+
+test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}'
+
+test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
+
+echo $ECHO_N "checking whether build environment is sane... $ECHO_C" 1>&6
+echo "configure:835: checking whether build environment is sane" 1>&5
+# Just in case
+sleep 1
+echo timestamp > conftestfile
+# Do `set' in a subshell so we don't clobber the current shell's
+# arguments. Must try -L first in case configure is actually a
+# symlink; some systems play weird games with the mod time of symlinks
+# (eg FreeBSD returns the mod time of the symlink's containing
+# directory).
+if (
+ set X `ls -Lt $srcdir/configure conftestfile 2> /dev/null`
+ if test "$*" = "X"; then
+ # -L didn't work.
+ set X `ls -t $srcdir/configure conftestfile`
+ fi
+ if test "$*" != "X $srcdir/configure conftestfile" \
+ && test "$*" != "X conftestfile $srcdir/configure"; then
+
+ # If neither matched, then we have a broken ls. This can happen
+ # if, for instance, CONFIG_SHELL is bash and it inherits a
+ # broken ls alias from the environment. This has actually
+ # happened. Such a system could not be considered "sane".
+ { echo "configure: error: ls -t appears to fail. Make sure there is not a broken
+alias in your environment" 1>&2; exit 1; }
+ fi
+
+ test "$2" = conftestfile
+ )
+then
+ # Ok.
+ :
+else
+ { echo "configure: error: newly created file is older than distributed files!
+Check your system clock" 1>&2; exit 1; }
+fi
+rm -f conftest*
+echo "$ECHO_T""yes" 1>&6
+if test "$program_transform_name" = s,x,x,; then
+ program_transform_name=
+else
+ # Double any \ or $. echo might interpret backslashes.
+ cat <<\EOF >conftestsed
+s,\\,\\\\,g; s,\$,$$,g
+EOF
+ program_transform_name=`echo $program_transform_name | sed -f conftestsed`
+ rm -f conftestsed
+fi
+test "$program_prefix" != NONE &&
+ program_transform_name="s,^,${program_prefix},;$program_transform_name"
+# Use a double $ so make ignores it.
+test "$program_suffix" != NONE &&
+ program_transform_name="s,\$\$,${program_suffix},;$program_transform_name"
+
+# sed with no file args requires a program.
+test "$program_transform_name" = "" && program_transform_name="s,x,x,"
+
+test x"${MISSING+set}" = xset || \
+ MISSING="\${SHELL} `CDPATH=: && cd $ac_aux_dir && pwd`/missing"
+if eval "$MISSING --run :"; then
+ am_missing_run="$MISSING --run "
+else
+ am_missing_run=
+ am_backtick='`'
+ echo "configure: warning: ${am_backtick}missing' script is too old or missing" 1>&2
+fi
+
+for ac_prog in mawk gawk nawk awk
+do
+# Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6
+echo "configure:906: checking for $ac_word" 1>&5
+if test "${ac_cv_prog_AWK+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$AWK"; then
+ ac_cv_prog_AWK="$AWK" # Let the user override the test.
+else
+ for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ac_dummy="$PATH"
+for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$ac_word; then
+ echo "$ac_dir/$ac_word"
+ fi
+done
+IFS="$ac_save_ifs"
+`; do
+ ac_cv_prog_AWK="$ac_prog"
+ break
+ done
+fi
+fi
+AWK="$ac_cv_prog_AWK"
+if test -n "$AWK"; then
+ echo "$ECHO_T""$AWK" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+test -n "$AWK" && break
+done
+
+echo $ECHO_N "checking whether ${MAKE-make} sets \${MAKE}... $ECHO_C" 1>&6
+echo "configure:939: checking whether ${MAKE-make} sets \${MAKE}" 1>&5
+set dummy ${MAKE-make}; ac_make=`echo "$2" | sed 'y%./+-%__p_%'`
+if eval "test \"\${ac_cv_prog_make_${ac_make}_set+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat >conftestmake <<\EOF
+all:
+ @echo 'ac_maketemp="${MAKE}"'
+EOF
+# GNU make sometimes prints "make[1]: Entering...", which would confuse us.
+eval `${MAKE-make} -f conftestmake 2>/dev/null | grep temp=`
+if test -n "$ac_maketemp"; then
+ eval ac_cv_prog_make_${ac_make}_set=yes
+else
+ eval ac_cv_prog_make_${ac_make}_set=no
+fi
+rm -f conftestmake
+fi
+if eval "test \"`echo '$ac_cv_prog_make_'${ac_make}_set`\" = yes"; then
+ echo "$ECHO_T""yes" 1>&6
+ SET_MAKE=
+else
+ echo "$ECHO_T""no" 1>&6
+ SET_MAKE="MAKE=${MAKE-make}"
+fi
+
+# Check whether --enable-dependency-tracking or --disable-dependency-tracking was given.
+if test "${enable_dependency_tracking+set}" = set; then
+ enableval="$enable_dependency_tracking"
+
+fi
+if test "x$enable_dependency_tracking" = xno; then
+ AMDEP="#"
+else
+ am_depcomp="$ac_aux_dir/depcomp"
+ if test ! -f "$am_depcomp"; then
+ AMDEP="#"
+ else
+ AMDEP=
+ fi
+fi
+
+if test -z "$AMDEP"; then
+ AMDEPBACKSLASH='\'
+else
+ AMDEPBACKSLASH=
+fi
+
+if test -d .deps || mkdir .deps 2> /dev/null || test -d .deps; then
+ DEPDIR=.deps
+else
+ DEPDIR=_deps
+fi
+
+PACKAGE=gmp
+
+VERSION=3.1.1
+
+if test "`CDPATH=: && cd $srcdir && pwd`" != "`pwd`" &&
+ test -f $srcdir/config.status; then
+ { echo "configure: error: source directory already configured; run "make distclean" there first" 1>&2; exit 1; }
+fi
+cat >>confdefs.h <<EOF
+#define PACKAGE "$PACKAGE"
+EOF
+
+cat >>confdefs.h <<EOF
+#define VERSION "$VERSION"
+EOF
+
+ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal"}
+
+AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"}
+
+AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake"}
+
+AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"}
+
+MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
+
+AMTAR=${AMTAR-"${am_missing_run}tar"}
+
+if test -z "$install_sh"; then
+ install_sh="$ac_aux_dir/install-sh"
+ test -f "$install_sh" || install_sh="$ac_aux_dir/install.sh"
+ test -f "$install_sh" || install_sh="${am_missing_run}${ac_auxdir}/install-sh"
+ install_sh="`echo $install_sh | sed -e 's/\${SHELL}//'`"
+fi
+
+echo $ECHO_N "checking whether to enable maintainer-specific portions of Makefiles... $ECHO_C" 1>&6
+echo "configure:1029: checking whether to enable maintainer-specific portions of Makefiles" 1>&5
+ # Check whether --enable-maintainer-mode or --disable-maintainer-mode was given.
+if test "${enable_maintainer_mode+set}" = set; then
+ enableval="$enable_maintainer_mode"
+ USE_MAINTAINER_MODE=$enableval
+else
+ USE_MAINTAINER_MODE=no
+fi
+ echo "$ECHO_T""$USE_MAINTAINER_MODE" 1>&6
+
+if test $USE_MAINTAINER_MODE = yes; then
+ MAINTAINER_MODE_TRUE=
+ MAINTAINER_MODE_FALSE='#'
+else
+ MAINTAINER_MODE_TRUE='#'
+ MAINTAINER_MODE_FALSE=
+fi
+ MAINT=$MAINTAINER_MODE_TRUE
+
+gmp_configm4="config.m4"
+gmp_tmpconfigm4=cnfm4.tmp
+gmp_tmpconfigm4i=cnfm4i.tmp
+gmp_tmpconfigm4p=cnfm4p.tmp
+test -f $gmp_tmpconfigm4 && rm $gmp_tmpconfigm4
+test -f $gmp_tmpconfigm4i && rm $gmp_tmpconfigm4i
+test -f $gmp_tmpconfigm4p && rm $gmp_tmpconfigm4p
+
+# Check whether --enable-assert or --disable-assert was given.
+if test "${enable_assert+set}" = set; then
+ enableval="$enable_assert"
+ case "${enableval}" in
+yes|no) ;;
+*) { echo "configure: error: bad value ${enableval} for --enable-assert, need yes or no" 1>&2; exit 1; } ;;
+esac
+else
+ enable_assert=no
+fi
+
+if test "$enable_assert" = "yes"; then
+ cat >>confdefs.h <<\EOF
+#define WANT_ASSERT 1
+EOF
+
+fi
+
+# Check whether --enable-alloca or --disable-alloca was given.
+if test "${enable_alloca+set}" = set; then
+ enableval="$enable_alloca"
+ case "${enableval}" in
+yes|no) ;;
+*) { echo "configure: error: bad value ${enableval} for --enable-alloca, need yes or no" 1>&2; exit 1; } ;;
+esac
+else
+ enable_alloca=yes
+fi
+
+if test "$enable_alloca" = "no"; then
+ cat >>confdefs.h <<\EOF
+#define USE_STACK_ALLOC 1
+EOF
+
+fi
+
+# Check whether --enable-fft or --disable-fft was given.
+if test "${enable_fft+set}" = set; then
+ enableval="$enable_fft"
+ case "${enableval}" in
+yes|no) ;;
+*) { echo "configure: error: bad value ${enableval} for --enable-fft, need yes or no" 1>&2; exit 1; } ;;
+esac
+else
+ enable_fft=no
+fi
+
+if test "$enable_fft" = "yes"; then
+ cat >>confdefs.h <<\EOF
+#define WANT_FFT 1
+EOF
+
+fi
+
+# Check whether --enable-mpbsd or --disable-mpbsd was given.
+if test "${enable_mpbsd+set}" = set; then
+ enableval="$enable_mpbsd"
+ case "${enableval}" in
+yes|no) ;;
+*) { echo "configure: error: bad value ${enableval} for --enable-mpbsd, need yes or no" 1>&2; exit 1; } ;;
+esac
+else
+ enable_mpbsd=no
+fi
+
+if test "$enable_mpbsd" = "yes"; then
+ WANT_MPBSD_TRUE=
+ WANT_MPBSD_FALSE='#'
+else
+ WANT_MPBSD_TRUE='#'
+ WANT_MPBSD_FALSE=
+fi
+
+# Check whether --enable-mpfr or --disable-mpfr was given.
+if test "${enable_mpfr+set}" = set; then
+ enableval="$enable_mpfr"
+ case "${enableval}" in
+yes|no) ;;
+*) { echo "configure: error: bad value ${enableval} for --enable-mpfr, need yes or no" 1>&2; exit 1; } ;;
+esac
+else
+ enable_mpfr=no
+fi
+
+if test "$enable_mpfr" = "yes"; then
+ WANT_MPFR_TRUE=
+ WANT_MPFR_FALSE='#'
+else
+ WANT_MPFR_TRUE='#'
+ WANT_MPFR_FALSE=
+fi
+
+os_64bit="no"
+cclist="gcc cc" # FIXME: Prefer c89 to cc.
+gmp_cflags_gcc="-g -O2"
+gmp_cflags64_gcc="-g -O2"
+gmp_cflags_cc="-g"
+gmp_cflags64_cc="-g"
+
+case "$target" in
+ # Alpha
+ alpha*-cray-unicos*)
+ # Don't perform any assembly syntax tests on this beast.
+ gmp_no_asm_syntax_testing=yes
+ cclist=cc
+ gmp_cflags_cc="$gmp_cflags_cc -O"
+ ;;
+ alpha*-*-osf*)
+ flavour=`echo $target_cpu | sed 's/^alpha//g'`
+ if test -n "$flavour"; then
+ case $flavour in # compilers don't seem to understand `ev67' and such.
+ ev6? | ev7*) flavour=ev6;;
+ esac
+ gmp_optcflags_gcc="-mcpu=$flavour"
+ # FIXME: We shouldn't fail fatally if none of these work, but that's
+ # how xoptcflags work and we don't have any other mechanism right now.
+ # Why do we need this here and not for alpha*-*-* below?
+ gmp_xoptcflags_gcc="-Wa,-arch,${flavour} -Wa,-m${flavour}"
+ gmp_optcflags_cc="-arch $flavour -tune $flavour"
+ fi
+ ;;
+ alpha*-*-*)
+ cclist="gcc"
+ flavour=`echo $target_cpu | sed 's/^alpha//g'`
+ if test -n "$flavour"; then
+ case $flavour in
+ ev6? | ev7*) flavour=ev6;;
+ esac
+ gmp_optcflags_gcc="-mcpu=$flavour"
+ fi
+ ;;
+ # Cray vector machines. This must come after alpha* so that we can
+ # recognize present and future vector processors with a wildcard.
+ *-cray-unicos*)
+ # Don't perform any assembly syntax tests on this beast.
+ gmp_no_asm_syntax_testing=yes
+ cclist=cc
+ # Don't inherit default gmp_cflags_cc value; it comes with -g which
+ # disables all optimization on Cray vector systems
+ gmp_cflags_cc="-O"
+ ;;
+
+ # AMD and Intel x86 configurations
+ i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-*)
+ # Rumour has it -O2 used to give worse register allocation than just -O.
+ gmp_cflags_gcc="-g -O -fomit-frame-pointer"
+
+ case "${target}" in
+ i386*-*-*) gmp_optcflags_gcc="-mcpu=i386 -march=i386";;
+ i486*-*-*) gmp_optcflags_gcc="-mcpu=i486 -march=i486";;
+ i586*-*-* | pentium-*-* | pentiummmx-*-*)
+ gmp_optcflags_gcc="-mcpu=pentium -march=pentium";;
+
+ # -march=pentiumpro not used because mpz/powm.c (swox cvs rev 1.4)
+ # tickles a bug in gcc 2.95.2 (believed fixed in 2.96).
+ i686*-*-* | pentiumpro-*-* | pentium[23]-*-*)
+ gmp_optcflags_gcc="-mcpu=pentiumpro";;
+
+ k6*-*-*) gmp_optcflags_gcc="-mcpu=k6 -march=k6";;
+
+ # Athlon instruction costs are close to p6: 3 cycle load latency, 4-6
+ # cycle mul, 40 cycle div, pairable adc, ...
+ # FIXME: Change this when gcc gets something specific for Athlon.
+ # -march=pentiumpro not used, per i686 above.
+ athlon-*-*) gmp_optcflags_gcc="-mcpu=pentiumpro";;
+ esac
+ ;;
+
+ # Sparc
+ ultrasparc*-*-solaris2.[7-9] | sparcv9-*-solaris2.[7-9])
+ os_64bit=yes
+ gmp_cflags_gcc="$gmp_cflags_gcc -Wa,-xarch=v8plus"
+ gmp_xoptcflags_gcc="-mcpu=v9 -mcpu=v8 -mv8"
+ gmp_cflags64_gcc="$gmp_cflags64_gcc -m64 -mptr64 -Wa,-xarch=v9 -mcpu=v9"
+ gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4"
+ gmp_cflags64_cc="-xtarget=native -xarch=v9 -xO4"
+ ;;
+ sparc64-*-linux*)
+ # Need to think more about the options passed here. This isn't good for
+ # some sparc64 linux distros, since we end up not optimizing when all the
+ # options below fail.
+ os_64bit=yes
+ gmp_cflags64_gcc="$gmp_cflags64_gcc -m64 -mptr64 -Wa,-xarch=v9 -mcpu=v9"
+ gmp_cflags_gcc="$gmp_cflags_gcc -m32"
+ gmp_xoptflags_gcc="-mcpu=ultrasparc -mvis"
+ ;;
+ ultrasparc*-*-* | sparcv9-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -Wa,-xarch=v8plus"
+ gmp_xoptcflags_gcc="-mcpu=v9 -mcpu=v8 -mv8"
+ gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4"
+ ;;
+ sparcv8*-*-solaris2.* | microsparc*-*-solaris2.*)
+ gmp_cflags_gcc="$gmp_cflags_gcc"
+ gmp_xoptcflags_gcc="-mcpu=v8 -mv8"
+ gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4"
+ ;;
+ sparcv8*-*-* | microsparc*-*-*) # SunOS, Linux, *BSD
+ cclist="gcc acc cc"
+ gmp_cflags_gcc="$gmp_cflags_gcc"
+ gmp_xoptcflags_gcc="-mcpu=v8 -mv8"
+ gmp_cflags_acc="-g -O2 -cg92"
+ gmp_cflags_cc="-O2" # FIXME: Flag for v8?
+ ;;
+ supersparc*-*-solaris2.*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -DSUPERSPARC"
+ gmp_xoptcflags_gcc="-mcpu=v8 -mv8"
+ gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4 -DSUPERSPARC"
+ ;;
+ supersparc*-*-*) # SunOS, Linux, *BSD
+ cclist="gcc acc cc"
+ gmp_cflags_gcc="$gmp_cflags_gcc -DSUPERSPARC"
+ gmp_xoptcflags_gcc="-mcpu=v8 -mv8"
+ gmp_cflags_acc="-g -O2 -cg92 -DSUPERSPARC"
+ gmp_cflags_cc="-O2 -DSUPERSPARC" # FIXME: Flag for v8?
+ ;;
+ *sparc*-*-*)
+ cclist="gcc acc cc"
+ gmp_cflags_acc="-g -O2"
+ gmp_cflags_cc="-g -O2"
+ ;;
+
+ # POWER/PowerPC
+ powerpc64-*-aix*)
+ cclist="gcc xlc"
+ gmp_cflags_gcc="$gmp_cflags_gcc -maix64 -mpowerpc64"
+ gmp_cflags_xlc="-g -O2 -q64 -qtune=pwr3"
+ ;;
+ powerpc*-*-aix*)
+ cclist="gcc xlc"
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc"
+ gmp_cflags_xlc="$gmp_cflags_cc -qarch=ppc -O2"
+ ;;
+ power-*-aix*)
+ cclist="gcc xlc"
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpower"
+ gmp_cflags_xlc="$gmp_cflags_cc -qarch=pwr -O2"
+ ;;
+ powerpc64*-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc64"
+ cat >>confdefs.h <<\EOF
+#define _LONG_LONG_LIMB 1
+EOF
+ ;;
+ powerpc-apple-darwin* | powerpc-apple-macosx*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc -traditional-cpp"
+ ;;
+ powerpc*-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc"
+ ;;
+
+ # MIPS
+ mips-sgi-irix6.*)
+ os_64bit=yes
+ gmp_cflags64_gcc="-g -O2 -mabi=n32"
+ gmp_cflags64_cc="$gmp_cflags64_cc -O2 -n32"
+ ;;
+
+ # Motorola 68k family
+ m88110*-*-*)
+ gmp_cflags_gcc="-g -O -m88110" ;;
+ m68*-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -fomit-frame-pointer"
+ ;;
+
+ # HP
+ hppa1.0*-*-*)
+ cclist="gcc c89 cc"
+ gmp_cflags_c89="$gmp_cflags_cc +O2"
+ gmp_cflags_cc="$gmp_cflags_cc +O2"
+ ;;
+ hppa2.0w*-*-*)
+ cclist="c89 cc"
+ gmp_cflags_c89="+DD64 +O3"
+ gmp_cflags_cc="+DD64 +O3"
+ ;;
+ hppa2.0*-*-*)
+ os_64bit=yes
+ cclist="gcc c89 cc"
+ gmp_cflags64_gcc="$gmp_cflags64_gcc -mWHAT -D_LONG_LONG_LIMB"
+ # +O2 to cc triggers bug in mpz/powm.c (1.4)
+ gmp_cflags64_c89="+DA2.0 +e +O3 -D_LONG_LONG_LIMB"
+ gmp_cflags64_cc="+DA2.0 +e +O3 -D_LONG_LONG_LIMB"
+ gmp_cflags_c89="$gmp_cflags_cc +O2"
+ gmp_cflags_cc="$gmp_cflags_cc +O2"
+ ;;
+
+ # VAX
+ vax*-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -fomit-frame-pointer"
+ ;;
+
+ # Fujitsu
+ f30[01]-fujitsu-sysv*)
+ cclist="gcc vcc"
+ gmp_cflags_vcc="-g" # FIXME: flags for vcc?
+ ;;
+esac
+
+case "${target}" in
+ *-*-mingw32) gmp_cflags_gcc="$gmp_cflags_gcc -mno-cygwin";;
+esac
+
+echo $ECHO_N "checking for BSD-compatible nm... $ECHO_C" 1>&6
+echo "configure:1352: checking for BSD-compatible nm" 1>&5
+if test "${ac_cv_path_NM+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$NM"; then
+ # Let the user override the test.
+ ac_cv_path_NM="$NM"
+else
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}"
+ for ac_dir in $PATH /usr/ccs/bin /usr/ucb /bin; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/nm || test -f $ac_dir/nm$ac_exeext ; then
+ # Check to see if the nm accepts a BSD-compat flag.
+ # Adding the `sed 1q' prevents false positives on HP-UX, which says:
+ # nm: unknown option "B" ignored
+ if ($ac_dir/nm -B /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then
+ ac_cv_path_NM="$ac_dir/nm -B"
+ break
+ elif ($ac_dir/nm -p /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then
+ ac_cv_path_NM="$ac_dir/nm -p"
+ break
+ else
+ ac_cv_path_NM=${ac_cv_path_NM="$ac_dir/nm"} # keep the first match, but
+ continue # so that we can try to find one that supports BSD flags
+ fi
+ fi
+ done
+ IFS="$ac_save_ifs"
+ test -z "$ac_cv_path_NM" && ac_cv_path_NM=nm
+fi
+fi
+
+NM="$ac_cv_path_NM"
+echo "$ECHO_T""$NM" 1>&6
+ # nm on 64-bit AIX needs to know the object file format
+case "$target" in
+ powerpc64*-*-aix*)
+ NM="$NM -X 64"
+ ;;
+esac
+
+# Save CFLAGS given on command line.
+gmp_user_CFLAGS="$CFLAGS"
+
+if test -z "$CC"; then
+ # Find compiler.
+
+if test $host != $build; then
+ ac_tool_prefix=${host_alias}-
+else
+ ac_tool_prefix=
+fi
+
+gmp_cc_list="$cclist"
+gmp_req_64bit_cc="$os_64bit"
+
+CC32=
+CC64=
+for c in $gmp_cc_list; do
+ # Avoid cache hits.
+ unset CC
+ unset ac_cv_prog_CC
+
+# Extract the first word of "${ac_tool_prefix}$c", so it can be a program name with args.
+set dummy ${ac_tool_prefix}$c; ac_word=$2
+echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6
+echo "configure:1418: checking for $ac_word" 1>&5
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+ for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ac_dummy="$PATH"
+for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$ac_word; then
+ echo "$ac_dir/$ac_word"
+ fi
+done
+IFS="$ac_save_ifs"
+`; do
+ ac_cv_prog_CC="${ac_tool_prefix}$c"
+ break
+ done
+fi
+fi
+CC="$ac_cv_prog_CC"
+if test -n "$CC"; then
+ echo "$ECHO_T""$CC" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+if test -z "$ac_cv_prog_CC"; then
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "$c", so it can be a program name with args.
+set dummy $c; ac_word=$2
+echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6
+echo "configure:1452: checking for $ac_word" 1>&5
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+ for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ac_dummy="$PATH"
+for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$ac_word; then
+ echo "$ac_dir/$ac_word"
+ fi
+done
+IFS="$ac_save_ifs"
+`; do
+ ac_cv_prog_CC="$c"
+ break
+ done
+ test -z "$ac_cv_prog_CC" && ac_cv_prog_CC="$c"
+fi
+fi
+CC="$ac_cv_prog_CC"
+if test -n "$CC"; then
+ echo "$ECHO_T""$CC" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+ else
+ CC="$c"
+ fi
+fi
+
+ if test -n "$CC"; then
+ eval c_flags=\$gmp_cflags_$c
+ ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+ CC="$CC"
+CFLAGS="$c_flags"
+echo $ECHO_N "checking if the C compiler ($CC) works with flags $CFLAGS... $ECHO_C" 1>&6
+echo "configure:1498: checking if the C compiler ($CC) works with flags $CFLAGS" 1>&5
+
+# Simple test for all targets.
+cat >conftest.$ac_ext <<EOF
+
+#line 1503 "configure"
+#include "confdefs.h"
+
+int main(){return(0);}
+EOF
+if { (eval echo configure:1508: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ tmp_works=yes
+ # If we can't run a trivial program, we are probably using a cross compiler.
+ if (./conftest; exit) 2>/dev/null; then
+ tmp_cross=no
+ else
+ tmp_cross=yes
+ fi
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ tmp_works=no
+fi
+rm -fr conftest*
+
+# Target specific tests.
+if test "$tmp_works" = "yes"; then
+ case "$target" in
+ *-*-aix*) # Returning a funcptr.
+ cat >conftest.$ac_ext <<EOF
+#line 1528 "configure"
+#include "confdefs.h"
+
+int
+main ()
+{
+} void *g(); void *f() { return g(); } int bar(){
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:1539: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ rm -rf conftest*
+ tmp_works=yes
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ tmp_works=no
+fi
+rm -f conftest*
+ ;;
+ esac
+fi
+
+if test "$tmp_works" = "yes"; then
+ gmp_prog_cc_works=yes
+else
+ gmp_prog_cc_works=no
+fi
+
+echo "$ECHO_T""$tmp_works" 1>&6
+
+ if test "$gmp_prog_cc_works" != "yes"; then
+ continue
+ fi
+
+ # Save first working compiler, whether 32- or 64-bit capable.
+ if test -z "$CC32"; then
+ CC32="$CC"
+ fi
+ if test "$gmp_req_64bit_cc" = "yes"; then
+ eval c_flags=\$gmp_cflags64_$c
+
+ # Verify that the compiler works in 64-bit mode as well.
+ # /usr/ucb/cc on Solaris 7 can *compile* in 64-bit mode, but not link.
+ ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+ CC="$c"
+CFLAGS="$c_flags"
+echo $ECHO_N "checking if the C compiler ($CC) works with flags $CFLAGS... $ECHO_C" 1>&6
+echo "configure:1583: checking if the C compiler ($CC) works with flags $CFLAGS" 1>&5
+
+# Simple test for all targets.
+cat >conftest.$ac_ext <<EOF
+
+#line 1588 "configure"
+#include "confdefs.h"
+
+int main(){return(0);}
+EOF
+if { (eval echo configure:1593: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ tmp_works=yes
+ # If we can't run a trivial program, we are probably using a cross compiler.
+ if (./conftest; exit) 2>/dev/null; then
+ tmp_cross=no
+ else
+ tmp_cross=yes
+ fi
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ tmp_works=no
+fi
+rm -fr conftest*
+
+# Target specific tests.
+if test "$tmp_works" = "yes"; then
+ case "$target" in
+ *-*-aix*) # Returning a funcptr.
+ cat >conftest.$ac_ext <<EOF
+#line 1613 "configure"
+#include "confdefs.h"
+
+int
+main ()
+{
+} void *g(); void *f() { return g(); } int bar(){
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:1624: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ rm -rf conftest*
+ tmp_works=yes
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ tmp_works=no
+fi
+rm -f conftest*
+ ;;
+ esac
+fi
+
+if test "$tmp_works" = "yes"; then
+ gmp_prog_cc_works=yes
+else
+ gmp_prog_cc_works=no
+fi
+
+echo "$ECHO_T""$tmp_works" 1>&6
+
+ if test "$gmp_prog_cc_works" = "yes"; then
+
+ gmp_tmp_CC_save="$CC"
+ CC="$c"
+ echo $ECHO_N "checking whether the C compiler ($CC) is 64-bit capable... $ECHO_C" 1>&6
+echo "configure:1651: checking whether the C compiler ($CC) is 64-bit capable" 1>&5
+ if test -z "$NM"; then
+ echo; echo "configure: GMP_CHECK_CC_64BIT: fatal: need nm"
+ exit 1
+ fi
+ gmp_tmp_CFLAGS_save="$CFLAGS"
+ CFLAGS="$c_flags"
+
+ case "$target" in
+ hppa2.0*-*-*)
+ # FIXME: If gcc is installed under another name than "gcc", we will
+ # test the wrong thing.
+ if test "$CC" != "gcc"; then
+ echo >conftest.c
+ gmp_tmp_vs=`$CC $CFLAGS -V -c -o conftest.o conftest.c 2>&1 | grep "^ccom:"`
+ rm conftest*
+ gmp_tmp_v1=`echo $gmp_tmp_vs | sed 's/.* .\.\(.*\)\..*\..* HP C.*/\1/'`
+ gmp_tmp_v2=`echo $gmp_tmp_vs | sed 's/.* .\..*\.\(.*\)\..* HP C.*/\1/'`
+ gmp_tmp_v3=`echo $gmp_tmp_vs | sed 's/.* .\..*\..*\.\(.*\) HP C.*/\1/'`
+ gmp_cv_cc_64bit=no
+ test -n "$gmp_tmp_v1" && test "$gmp_tmp_v1" -ge "10" \
+ && test -n "$gmp_tmp_v2" && test "$gmp_tmp_v2" -ge "32" \
+ && test -n "$gmp_tmp_v3" && test "$gmp_tmp_v3" -ge "30" \
+ && gmp_cv_cc_64bit=yes
+ else # gcc
+ # FIXME: Compile a minimal file and determine if the resulting object
+ # file is an ELF file. If so, gcc can produce 64-bit code.
+ # Do we have file(1) for target?
+ gmp_cv_cc_64bit=no
+ fi
+ ;;
+ mips-sgi-irix6.*)
+ # We use `-n32' to cc and `-mabi=n32' to gcc, resulting in 64-bit
+ # arithmetic but not 64-bit pointers, so the general test for sizeof
+ # (void *) is not valid.
+ # Simply try to compile an empty main. If that succeeds return
+ # true.
+ cat >conftest.$ac_ext <<EOF
+#line 1689 "configure"
+#include "confdefs.h"
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:1700: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ rm -rf conftest*
+ gmp_cv_cc_64bit=yes
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ gmp_cv_cc_64bit=no
+fi
+rm -f conftest*
+ ;;
+ *-*-*)
+ # Allocate an array of size sizeof (void *) and use nm to determine its
+ # size. We depend on the first declared variable being put at address 0.
+ cat >conftest.c <<EOF
+char arr[sizeof (void *)]={0};
+char post=0;
+EOF
+ gmp_compile="$CC $CFLAGS -c conftest.c 1>&5"
+ if { (eval echo configure:1719: \"$gmp_compile\") 1>&5; (eval $gmp_compile) 2>&5; }; then
+ gmp_tmp_val=`$NM conftest.o | grep post | sed -e 's;[[][0-9][]]\(.*\);\1;' \
+ -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+ if test "$gmp_tmp_val" = "8"; then
+ gmp_cv_cc_64bit=yes
+ else
+ gmp_cv_cc_64bit=no
+ fi
+ else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ gmp_cv_cc_64bit=no
+ fi
+ rm -f conftest*
+ ;;
+ esac
+
+ CC="$gmp_tmp_CC_save"
+ CFLAGS="$gmp_tmp_CFLAGS_save"
+ echo "$ECHO_T""$gmp_cv_cc_64bit" 1>&6
+
+ if test "$gmp_cv_cc_64bit" = "yes"; then
+ test -z "$CC64" && CC64="$c"
+ test -z "$CFLAGS64" && CFLAGS64="$c_flags"
+ # We have CC64 so we're done.
+ break
+ fi
+ fi
+ else
+ # We have CC32, and we don't need a 64-bit compiler so we're done.
+ break
+ fi
+ fi
+done
+CC="$CC32"
+
+ # If 64-bit OS and we have a 64-bit compiler, use it.
+ if test -n "$os_64bit" && test -n "$CC64"; then
+ CC=$CC64
+ CFLAGS=$CFLAGS64
+ else
+ eval CFLAGS=\$gmp_cflags_$CC
+ fi
+
+ # Try compiler flags that may work with only some compiler versions.
+ # gmp_optcflags: All or nothing.
+ eval optcflags=\$gmp_optcflags_$CC
+ if test -n "$optcflags"; then
+ CFLAGS_save="$CFLAGS"
+ CFLAGS="$CFLAGS $optcflags"
+ echo $ECHO_N "checking whether $CC accepts $optcflags... $ECHO_C" 1>&6
+echo "configure:1770: checking whether $CC accepts $optcflags" 1>&5
+ ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+ cat >conftest.$ac_ext <<EOF
+
+#line 1780 "configure"
+#include "confdefs.h"
+
+int main(){return(0);}
+EOF
+if { (eval echo configure:1785: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ optok=yes
+ # If we can't run a trivial program, we are probably using a cross compiler.
+ if (./conftest; exit) 2>/dev/null; then
+ cross=no
+ else
+ cross=yes
+ fi
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ optok=no
+fi
+rm -fr conftest*
+ if test "$optok" = "yes"; then
+ echo "$ECHO_T""yes" 1>&6
+ else
+ echo "$ECHO_T""no" 1>&6
+ CFLAGS="$CFLAGS_save"
+ fi
+ fi
+ # gmp_xoptcflags: First is best, one has to work.
+ eval xoptcflags=\$gmp_xoptcflags_$CC
+ if test -n "$xoptcflags"; then
+ gmp_found="no"
+ for xopt in $xoptcflags; do
+ CFLAGS_save="$CFLAGS"
+ CFLAGS="$CFLAGS $xopt"
+ echo $ECHO_N "checking whether $CC accepts $xopt... $ECHO_C" 1>&6
+echo "configure:1814: checking whether $CC accepts $xopt" 1>&5
+ ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+ cat >conftest.$ac_ext <<EOF
+
+#line 1824 "configure"
+#include "confdefs.h"
+
+int main(){return(0);}
+EOF
+if { (eval echo configure:1829: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ optok=yes
+ # If we can't run a trivial program, we are probably using a cross compiler.
+ if (./conftest; exit) 2>/dev/null; then
+ cross=no
+ else
+ cross=yes
+ fi
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ optok=no
+fi
+rm -fr conftest*
+ if test "$optok" = "yes"; then
+ echo "$ECHO_T""yes" 1>&6
+ gmp_found="yes"
+ break
+ else
+ echo "$ECHO_T""no" 1>&6
+ CFLAGS="$CFLAGS_save"
+ fi
+ done
+ if test "$gmp_found" = "no"; then
+ echo "$0: fatal: need a compiler that understands one of $xoptcflags"
+ exit 1
+ fi
+ fi
+fi
+
+# Restore CFLAGS given on command line.
+# FIXME: We've run through quite some unnecessary code looking for a
+# nice compiler and working flags for it, just to spoil that with user
+# supplied flags.
+test -n "$gmp_user_CFLAGS" && CFLAGS="$gmp_user_CFLAGS"
+
+# Select chosen compiler.
+
+echo $ECHO_N "checking whether the C compiler ($CC $CFLAGS $CPPFLAGS $LDFLAGS) works... $ECHO_C" 1>&6
+echo "configure:1868: checking whether the C compiler ($CC $CFLAGS $CPPFLAGS $LDFLAGS) works" 1>&5
+
+ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+cat >conftest.$ac_ext <<EOF
+
+#line 1879 "configure"
+#include "confdefs.h"
+
+int main(){return(0);}
+EOF
+if { (eval echo configure:1884: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ ac_cv_prog_cc_works=yes
+ # If we can't run a trivial program, we are probably using a cross compiler.
+ if (./conftest; exit) 2>/dev/null; then
+ ac_cv_prog_cc_cross=no
+ else
+ ac_cv_prog_cc_cross=yes
+ fi
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ ac_cv_prog_cc_works=no
+fi
+rm -fr conftest*
+ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+echo "$ECHO_T""$ac_cv_prog_cc_works" 1>&6
+if test $ac_cv_prog_cc_works = no; then
+ { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 77; }
+fi
+echo $ECHO_N "checking whether the C compiler ($CC $CFLAGS $CPPFLAGS $LDFLAGS) is a cross-compiler... $ECHO_C" 1>&6
+echo "configure:1910: checking whether the C compiler ($CC $CFLAGS $CPPFLAGS $LDFLAGS) is a cross-compiler" 1>&5
+echo "$ECHO_T""$ac_cv_prog_cc_cross" 1>&6
+cross_compiling=$ac_cv_prog_cc_cross
+
+echo $ECHO_N "checking whether we are using GNU C... $ECHO_C" 1>&6
+echo "configure:1915: checking whether we are using GNU C" 1>&5
+if test "${ac_cv_prog_gcc+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat >conftest.c <<EOF
+#ifdef __GNUC__
+ yes;
+#endif
+EOF
+if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:1924: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then
+ ac_cv_prog_gcc=yes
+else
+ ac_cv_prog_gcc=no
+fi
+fi
+echo "$ECHO_T""$ac_cv_prog_gcc" 1>&6
+
+if test "$ac_cv_prog_gcc" = "yes"; then
+ GCC=yes
+else
+ GCC=
+fi
+
+# Set CFLAGS if not already set.
+if test -z "$CFLAGS"; then
+ CFLAGS="-g"
+ if test "$GCC" = "yes"; then
+ CFLAGS="$CFLAGS -O2"
+ fi
+fi
+
+if test "${ac_cv_prog_CC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ ac_cv_prog_CC="$CC"
+fi
+
+# How to assemble.
+CCAS="$CC -c"
+
+echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" 1>&6
+echo "configure:1956: checking how to run the C preprocessor" 1>&5
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+ CPP=
+fi
+if test -z "$CPP"; then
+if test "${ac_cv_prog_CPP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ # This must be in double quotes, not single quotes, because CPP may get
+ # substituted into the Makefile and "${CC-cc}" will confuse make.
+ CPP="${CC-cc} -E"
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp.
+
+cat >conftest.$ac_ext <<EOF
+#line 1972 "configure"
+#include "confdefs.h"
+#include <assert.h>
+Syntax Error
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:1978: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+ :
+else
+ echo "$ac_err" >&5
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ CPP="${CC-cc} -E -traditional-cpp"
+
+cat >conftest.$ac_ext <<EOF
+#line 1990 "configure"
+#include "confdefs.h"
+#include <assert.h>
+Syntax Error
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:1996: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+ :
+else
+ echo "$ac_err" >&5
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ CPP="${CC-cc} -nologo -E"
+
+cat >conftest.$ac_ext <<EOF
+#line 2008 "configure"
+#include "confdefs.h"
+#include <assert.h>
+Syntax Error
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:2014: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+ :
+else
+ echo "$ac_err" >&5
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ CPP=/lib/cpp
+fi
+rm -f conftest*
+fi
+rm -f conftest*
+fi
+rm -f conftest*
+ ac_cv_prog_CPP="$CPP"
+fi
+ CPP="$ac_cv_prog_CPP"
+else
+ ac_cv_prog_CPP="$CPP"
+fi
+echo "$ECHO_T""$CPP" 1>&6
+
+# Find a good install program. We prefer a C program (faster),
+# so one script is as good as another. But avoid the broken or
+# incompatible versions:
+# SysV /etc/install, /usr/sbin/install
+# SunOS /usr/etc/install
+# IRIX /sbin/install
+# AIX /bin/install
+# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
+# AFS /usr/afsws/bin/install, which mishandles nonexistent args
+# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
+# ./install, which can be erroneously created by make from ./install.sh.
+echo $ECHO_N "checking for a BSD compatible install... $ECHO_C" 1>&6
+echo "configure:2050: checking for a BSD compatible install" 1>&5
+if test -z "$INSTALL"; then
+if test "${ac_cv_path_install+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS=":"
+ for ac_dir in $PATH; do
+ # Account for people who put trailing slashes in PATH elements.
+ case "$ac_dir/" in
+ /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;;
+ *)
+ # OSF1 and SCO ODT 3.0 have their own names for install.
+ # Don't use installbsd from OSF since it installs stuff as root
+ # by default.
+ for ac_prog in ginstall scoinst install; do
+ if test -f $ac_dir/$ac_prog; then
+ if test $ac_prog = install &&
+ grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then
+ # AIX install. It has an incompatible calling convention.
+ :
+ elif test $ac_prog = install &&
+ grep pwplus $ac_dir/$ac_prog >/dev/null 2>&1; then
+ # program-specific install script used by HP pwplus--don't use.
+ :
+ else
+ ac_cv_path_install="$ac_dir/$ac_prog -c"
+ break 2
+ fi
+ fi
+ done
+ ;;
+ esac
+ done
+ IFS="$ac_save_IFS"
+
+fi
+ if test "${ac_cv_path_install+set}" = set; then
+ INSTALL="$ac_cv_path_install"
+ else
+ # As a last resort, use the slow shell script. We don't cache a
+ # path for INSTALL within a source directory, because that will
+ # break other packages using the cache if that directory is
+ # removed, or if the path is relative.
+ INSTALL="$ac_install_sh"
+ fi
+fi
+echo "$ECHO_T""$INSTALL" 1>&6
+
+# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
+# It thinks the first close brace ends the variable substitution.
+test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
+
+test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}'
+
+test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
+
+echo $ECHO_N "checking whether ln -s works... $ECHO_C" 1>&6
+echo "configure:2107: checking whether ln -s works" 1>&5
+if test "${ac_cv_prog_LN_S+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ rm -f conftestdata
+if ln -s X conftestdata 2>/dev/null
+then
+ rm -f conftestdata
+ ac_cv_prog_LN_S="ln -s"
+else
+ ac_cv_prog_LN_S=ln
+fi
+fi
+LN_S="$ac_cv_prog_LN_S"
+if test "$ac_cv_prog_LN_S" = "ln -s"; then
+ echo "$ECHO_T""yes" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+echo $ECHO_N "checking for suitable m4... $ECHO_C" 1>&6
+echo "configure:2128: checking for suitable m4" 1>&5
+if test "${gmp_cv_prog_m4+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$M4"; then
+ gmp_cv_prog_m4="$M4"
+else
+ cat >conftest.m4 <<\EOF
+define(dollarhash,``$#'')dnl
+ifelse(dollarhash(x),1,`define(t1,Y)',
+``bad: $# not supported (SunOS /usr/bin/m4)
+'')dnl
+ifelse(eval(89),89,`define(t2,Y)',
+`bad: eval() doesnt support 8 or 9 in a constant (OpenBSD 2.6 m4)
+')dnl
+ifelse(t1`'t2,YY,`good
+')dnl
+EOF
+ echo "trying m4" 1>&5
+ gmp_tmp_val="`(m4 conftest.m4) 2>&5`"
+ echo "$gmp_tmp_val" 1>&5
+ if test "$gmp_tmp_val" = good; then
+ gmp_cv_prog_m4="m4"
+ else
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ ac_dummy="$PATH:/usr/5bin"
+ for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ echo "trying $ac_dir/m4" 1>&5
+ gmp_tmp_val="`($ac_dir/m4 conftest.m4) 2>&5`"
+ echo "$gmp_tmp_val" 1>&5
+ if test "$gmp_tmp_val" = good; then
+ gmp_cv_prog_m4="$ac_dir/m4"
+ break
+ fi
+ done
+ IFS="$ac_save_ifs"
+ if test -z "$gmp_cv_prog_m4"; then
+ { echo "configure: error: No usable m4 in \$PATH or /usr/5bin (see config.log for reasons)." 1>&2; exit 1; }
+ fi
+ fi
+ rm -f conftest.m4
+fi
+fi
+echo "$ECHO_T""$gmp_cv_prog_m4" 1>&6
+M4="$gmp_cv_prog_m4"
+
+# Extract the first word of "ar", so it can be a program name with args.
+set dummy ar; ac_word=$2
+echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6
+echo "configure:2178: checking for $ac_word" 1>&5
+if test "${ac_cv_prog_AR+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$AR"; then
+ ac_cv_prog_AR="$AR" # Let the user override the test.
+else
+ for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ac_dummy="$PATH"
+for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$ac_word; then
+ echo "$ac_dir/$ac_word"
+ fi
+done
+IFS="$ac_save_ifs"
+`; do
+ ac_cv_prog_AR="ar"
+ break
+ done
+fi
+fi
+AR="$ac_cv_prog_AR"
+if test -n "$AR"; then
+ echo "$ECHO_T""$AR" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+# ar on AIX needs to know the object file format
+case "$target" in
+ powerpc64*-*-aix*)
+ AR="$AR -X 64"
+ ;;
+esac
+
+if test "$gmp_no_asm_syntax_testing" != "yes"; then
+ echo $ECHO_N "checking how to switch to text section... $ECHO_C" 1>&6
+echo "configure:2216: checking how to switch to text section" 1>&5
+if test "${gmp_cv_check_asm_text+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ case "$target" in
+ *-*-aix*)
+
+ gmp_cv_check_asm_text=".csect .text[PR]"
+
+ ;;
+ *-*-hpux*) gmp_cv_check_asm_text=".code" ;;
+ *) gmp_cv_check_asm_text=".text" ;;
+esac
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_text" 1>&6
+echo "define(<TEXT>, <$gmp_cv_check_asm_text>)" >> $gmp_tmpconfigm4
+
+ echo $ECHO_N "checking how to switch to data section... $ECHO_C" 1>&6
+echo "configure:2235: checking how to switch to data section" 1>&5
+if test "${gmp_cv_check_asm_data+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ case "$target" in
+ *-*-aix*)
+
+ gmp_cv_check_asm_data=".csect .data[RW]"
+
+ ;;
+ *) gmp_cv_check_asm_data=".data" ;;
+esac
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_data" 1>&6
+echo "define(<DATA>, <$gmp_cv_check_asm_data>)" >> $gmp_tmpconfigm4
+
+ echo $ECHO_N "checking how to export a symbol... $ECHO_C" 1>&6
+echo "configure:2253: checking how to export a symbol" 1>&5
+if test "${gmp_cv_check_asm_globl+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ case "$target" in
+ *-*-hpux*) gmp_cv_check_asm_globl=".export" ;;
+ *) gmp_cv_check_asm_globl=".globl" ;;
+esac
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_globl" 1>&6
+echo "define(<GLOBL>, <$gmp_cv_check_asm_globl>)" >> $gmp_tmpconfigm4
+
+ echo $ECHO_N "checking what assembly label suffix to use... $ECHO_C" 1>&6
+echo "configure:2267: checking what assembly label suffix to use" 1>&5
+if test "${gmp_cv_check_asm_label_suffix+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ case "$target" in
+ *-*-hpux*) gmp_cv_check_asm_label_suffix="" ;;
+ *) gmp_cv_check_asm_label_suffix=":" ;;
+esac
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_label_suffix" 1>&6
+echo "define(<LABEL_SUFFIX>, <\$1$gmp_cv_check_asm_label_suffix>)" >> $gmp_tmpconfigm4
+
+ echo $ECHO_N "checking how the .type assembly directive should be used... $ECHO_C" 1>&6
+echo "configure:2281: checking how the .type assembly directive should be used" 1>&5
+if test "${gmp_cv_check_asm_type+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ ac_assemble="$CCAS $CFLAGS conftest.s 1>&5"
+for gmp_tmp_prefix in @ \# %; do
+ echo " .type sym,${gmp_tmp_prefix}function" > conftest.s
+ if { (eval echo configure:2288: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then
+ gmp_cv_check_asm_type=".type \$1,${gmp_tmp_prefix}\$2"
+ break
+ fi
+done
+if test -z "$gmp_cv_check_asm_type"; then
+ gmp_cv_check_asm_type="dnl"
+fi
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_type" 1>&6
+echo "define(<TYPE>, <$gmp_cv_check_asm_type>)" >> $gmp_tmpconfigm4
+
+ echo $ECHO_N "checking if the .size assembly directive works... $ECHO_C" 1>&6
+echo "configure:2302: checking if the .size assembly directive works" 1>&5
+if test "${gmp_cv_check_asm_size+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ ac_assemble="$CCAS $CFLAGS conftest.s 1>&5"
+echo ' .size sym,1' > conftest.s
+if { (eval echo configure:2308: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then
+ gmp_cv_check_asm_size=".size \$1,\$2"
+else
+ gmp_cv_check_asm_size="dnl"
+fi
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_size" 1>&6
+echo "define(<SIZE>, <$gmp_cv_check_asm_size>)" >> $gmp_tmpconfigm4
+
+echo $ECHO_N "checking what prefix to use for a local label... $ECHO_C" 1>&6
+echo "configure:2319: checking what prefix to use for a local label" 1>&5
+if test "${gmp_cv_check_asm_lsym_prefix+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -z "$NM"; then
+ echo; echo "GMP_CHECK_ASM_LSYM_PREFIX: fatal: need nm"
+ exit 1
+fi
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&5"
+gmp_cv_check_asm_lsym_prefix="L"
+for gmp_tmp_pre in L .L $ L$; do
+ cat > conftest.s <<EOF
+dummy${gmp_cv_check_asm_label_suffix}
+${gmp_tmp_pre}gurkmacka${gmp_cv_check_asm_label_suffix}
+ .byte 0
+EOF
+ if { (eval echo configure:2335: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then
+ $NM conftest.o >/dev/null 2>&1
+ gmp_rc=$?
+ if test "$gmp_rc" != "0"; then
+ echo "configure: $NM failure, using default"
+ break
+ fi
+ if $NM conftest.o | grep gurkmacka >/dev/null; then true; else
+ gmp_cv_check_asm_lsym_prefix="$gmp_tmp_pre"
+ break
+ fi
+ else
+ echo "configure: failed program was:" >&5
+ cat conftest.s >&5
+ # Use default.
+ fi
+done
+rm -f conftest*
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_lsym_prefix" 1>&6
+echo "define(<LSYM_PREFIX>, <${gmp_cv_check_asm_lsym_prefix}>)" >> $gmp_tmpconfigm4
+
+echo $ECHO_N "checking how to define a 32-bit word... $ECHO_C" 1>&6
+echo "configure:2359: checking how to [define] a 32-bit word" 1>&5
+if test "${gmp_cv_check_asm_w32+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -z "$NM"; then
+ echo; echo "configure: GMP_CHECK_ASM_W32: fatal: need nm"
+ exit 1
+fi
+
+# FIXME: HPUX puts first symbol at 0x40000000, breaking our assumption
+# that it's at 0x0. We'll have to declare another symbol before the
+# .long/.word and look at the distance between the two symbols. The
+# only problem is that the sed expression(s) barfs (on Solaris, for
+# example) for the symbol with value 0. For now, HPUX uses .word.
+
+case "$target" in
+ *-*-hpux*)
+ gmp_cv_check_asm_w32=".word"
+ ;;
+ *-*-*)
+ ac_assemble="$CCAS $CFLAGS conftest.s 1>&5"
+ for gmp_tmp_op in .long .word; do
+ cat > conftest.s <<EOF
+ $gmp_cv_check_asm_data
+ $gmp_cv_check_asm_globl foo
+ $gmp_tmp_op 0
+foo${gmp_cv_check_asm_label_suffix}
+ .byte 0
+EOF
+ if { (eval echo configure:2388: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then
+
+ gmp_tmp_val=`$NM conftest.o | grep foo | sed -e 's;[[][0-9][]]\(.*\);\1;' \
+ -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+ if test "$gmp_tmp_val" = "4"; then
+ gmp_cv_check_asm_w32="$gmp_tmp_op"
+ break
+ fi
+ fi
+ done
+ ;;
+esac
+
+if test -z "$gmp_cv_check_asm_w32"; then
+ echo; echo "configure: GMP_CHECK_ASM_W32: fatal: do not know how to define a 32-bit word"
+ exit 1
+fi
+rm -f conftest*
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_w32" 1>&6
+echo "define(<W32>, <$gmp_cv_check_asm_w32>)" >> $gmp_tmpconfigm4
+
+ echo $ECHO_N "checking if symbols are prefixed by underscore... $ECHO_C" 1>&6
+echo "configure:2412: checking if symbols are prefixed by underscore" 1>&5
+if test "${gmp_cv_check_asm_underscore+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 2417 "configure"
+#include "confdefs.h"
+int underscore_test() {
+return; }
+EOF
+if { (eval echo configure:2422: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ if grep _underscore_test conftest* >/dev/null; then
+ gmp_cv_check_asm_underscore=yes
+ else
+ gmp_cv_check_asm_underscore=no
+ fi
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+fi
+rm -f conftest*
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_underscore" 1>&6
+if test "$gmp_cv_check_asm_underscore" = "yes"; then
+
+echo 'define(<GSYM_PREFIX>, <_>)' >> $gmp_tmpconfigm4
+
+ underscore=yes
+else
+
+echo 'define(<GSYM_PREFIX>, <>)' >> $gmp_tmpconfigm4
+
+ underscore=no
+fi
+
+echo $ECHO_N "checking if .align assembly directive is logarithmic... $ECHO_C" 1>&6
+echo "configure:2449: checking if .align assembly directive is logarithmic" 1>&5
+if test "${gmp_cv_check_asm_align_log+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -z "$NM"; then
+ echo; echo "configure: GMP_CHECK_ASM_ALIGN_LOG: fatal: need nm"
+ exit 1
+fi
+cat > conftest.s <<EOF
+ $gmp_cv_check_asm_data
+ .align 4
+ $gmp_cv_check_asm_globl foo
+ .byte 1
+ .align 4
+foo$gmp_cv_check_asm_label_suffix
+ .byte 2
+EOF
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&5"
+if { (eval echo configure:2467: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then
+
+ gmp_tmp_val=`$NM conftest.o | grep foo | sed -e 's;[[][0-9][]]\(.*\);\1;' \
+ -e 's;[^1-9]*\([0-9]*\).*;\1;'`
+ if test "$gmp_tmp_val" = "10" || test "$gmp_tmp_val" = "16"; then
+ gmp_cv_check_asm_align_log=yes
+ else
+ gmp_cv_check_asm_align_log=no
+ fi
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.s >&5
+fi
+rm -f conftest*
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_align_log" 1>&6
+
+echo "define(<ALIGN_LOGARITHMIC>,<$gmp_cv_check_asm_align_log>)" >> $gmp_tmpconfigm4
+
+if test "$gmp_cv_check_asm_align_log" = "yes"; then
+ asm_align=log
+else
+ asm_align=nolog
+fi
+
+fi
+
+family=generic
+
+case ${target} in
+ arm*-*-*)
+ path="arm"
+ ;;
+ sparcv9*-*-solaris2.[789]* | sparc64*-*-solaris2.[789]* | ultrasparc*-*-solaris2.[789]*)
+ if test -n "$CC64"
+ then path="sparc64"
+ else path="sparc32/v9 sparc32/v8 sparc32"
+ fi
+ ;;
+ sparc64-*-linux*)
+ if test -n "$CC64"
+ then path="sparc64"
+ else path="sparc32/v9 sparc32/v8 sparc32"
+ fi
+ ;;
+ sparcv8*-*-* | microsparc*-*-*)
+ path="sparc32/v8 sparc32"
+ if test x${floating_point} = xno
+ then extra_functions="udiv_nfp"
+ else extra_functions="udiv_fp"
+ fi
+ ;;
+ sparcv9*-*-* | ultrasparc*-*-*)
+ path="sparc32/v9 sparc32/v8 sparc32"
+ extra_functions="udiv_fp"
+ ;;
+ supersparc*-*-*)
+ path="sparc32/v8/supersparc sparc32/v8 sparc32"
+ extra_functions="udiv"
+ ;;
+ sparc*-*-*) path="sparc32"
+ if test x${floating_point} = xno
+ then extra_functions="udiv_nfp"
+ else extra_functions="udiv_fp"
+ fi
+ ;;
+ hppa7000*-*-*)
+ path="hppa/hppa1_1 hppa"
+ extra_functions="udiv_qrnnd"
+ ;;
+ hppa1.0*-*-*)
+ path="hppa"
+ extra_functions="udiv_qrnnd"
+ ;;
+ hppa2.0w-*-*)
+ path="pa64w"
+ extra_functions="umul_ppmm udiv_qrnnd"
+ ;;
+ hppa2.0*-*-*)
+ if test -n "$CC64"; then
+ path="pa64"
+ extra_functions="umul_ppmm udiv_qrnnd"
+ # We need to use the system compiler, or actually the system assembler,
+ # since GAS has not been ported to understand the 2.0 instructions.
+ CCAS="$CC64 -c"
+ else
+ # FIXME: path should be "hppa/hppa2_0 hppa/hppa1_1 hppa"
+ path="hppa/hppa1_1 hppa"
+ extra_functions="udiv_qrnnd"
+ fi
+ ;;
+ hppa*-*-*) #assume pa7100
+ path="hppa/hppa1_1/pa7100 hppa/hppa1_1 hppa"
+ extra_functions="udiv_qrnnd";;
+ f30[01]-fujitsu-sysv*)
+ path=fujitsu;;
+ alphaev6*-*-*) path="alpha/ev6 alpha"; extra_functions="invert_limb cntlz";;
+ alphaev5*-*-*) path="alpha/ev5 alpha"; extra_functions="invert_limb cntlz";;
+ alpha*-*-*) path="alpha"; extra_functions="invert_limb cntlz";;
+ # Cray vector machines. This must come after alpha* so that we can
+ # recognize present and future vector processors with a wildcard.
+ *-cray-unicos*)
+ path="cray"
+ extra_functions="mulww";;
+ am29000*-*-*) path="a29k";;
+ a29k*-*-*) path="a29k";;
+
+ # AMD and Intel x86 configurations
+
+ i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-*)
+ gmp_m4postinc="x86/x86-defs.m4"
+ extra_functions="udiv umul"
+ CALLING_CONVENTIONS_OBJS="x86call.o x86check.o"
+
+echo $ECHO_N "checking if the assembler takes cl with shldl... $ECHO_C" 1>&6
+echo "configure:2583: checking if the assembler takes cl with shldl" 1>&5
+if test "${gmp_cv_check_asm_shldl_cl+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat > conftest.s <<EOF
+ $gmp_cv_check_asm_text
+ shldl %cl, %eax, %ebx
+EOF
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&5"
+if { (eval echo configure:2592: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then
+ gmp_cv_check_asm_shldl_cl=yes
+else
+ gmp_cv_check_asm_shldl_cl=no
+fi
+rm -f conftest*
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_shldl_cl" 1>&6
+if test "$gmp_cv_check_asm_shldl_cl" = "yes"; then
+
+echo 'define(<WANT_SHLDL_CL>, <1>)' >> $gmp_tmpconfigm4
+
+else
+
+echo 'define(<WANT_SHLDL_CL>, <0>)' >> $gmp_tmpconfigm4
+
+fi
+
+ echo $ECHO_N "checking if the .align directive accepts an 0x90 fill in .text... $ECHO_C" 1>&6
+echo "configure:2612: checking if the .align directive accepts an 0x90 fill in .text" 1>&5
+if test "${gmp_cv_check_asm_align_fill_0x90+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+
+cat > conftest.s <<EOF
+ $gmp_cv_check_asm_text
+ .align 4, 0x90
+ .byte 0
+ .align 4, 0x90
+EOF
+gmp_tmp_val="`$CCAS $CFLAGS conftest.s 2>&1`"
+if test $? = 0; then
+ echo "$gmp_tmp_val" 1>&5
+ if echo "$gmp_tmp_val" | grep "Warning: Fill parameter ignored for executable section"; then
+ echo "Supressing this warning by omitting 0x90" 1>&5
+ gmp_cv_check_asm_align_fill_0x90=no
+ else
+ gmp_cv_check_asm_align_fill_0x90=yes
+ fi
+else
+ echo "Non-zero exit code" 1>&5
+ echo "$gmp_tmp_val" 1>&5
+ gmp_cv_check_asm_align_fill_0x90=no
+fi
+rm -f conftest*
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_align_fill_0x90" 1>&6
+
+echo "define(<ALIGN_FILL_0x90>,<$gmp_cv_check_asm_align_fill_0x90>)" >> $gmp_tmpconfigm4
+
+ # the CPUs below wanting to know about mmx
+ case ${target} in
+ pentiummmx-*-* | pentium[23]-*-* | k6*-*-* | athlon-*-*)
+
+echo $ECHO_N "checking if the assembler knows about MMX instructions... $ECHO_C" 1>&6
+echo "configure:2649: checking if the assembler knows about MMX instructions" 1>&5
+if test "${gmp_cv_check_asm_mmx+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat > conftest.s <<EOF
+ $gmp_cv_check_asm_text
+ por %mm0, %mm0
+EOF
+ac_assemble="$CCAS $CFLAGS conftest.s 1>&5"
+if { (eval echo configure:2658: \"$ac_assemble\") 1>&5; (eval $ac_assemble) 2>&5; }; then
+ gmp_cv_check_asm_mmx=yes
+else
+ gmp_cv_check_asm_mmx=no
+fi
+rm -f conftest*
+
+fi
+echo "$ECHO_T""$gmp_cv_check_asm_mmx" 1>&6
+if test "$gmp_cv_check_asm_mmx" = "yes"; then
+ tmp_mmx=yes
+else
+ echo "configure: warning: +----------------------------------------------------------" 1>&2
+ echo "configure: warning: | WARNING WARNING WARNING" 1>&2
+ echo "configure: warning: | Target CPU has MMX code, but it can't be assembled by" 1>&2
+ echo "configure: warning: | $CCAS $CFLAGS" 1>&2
+ echo "configure: warning: | Non-MMX replacements will be used." 1>&2
+ echo "configure: warning: | This will be an inferior build." 1>&2
+ echo "configure: warning: +----------------------------------------------------------" 1>&2
+ tmp_mmx=no
+fi
+
+ ;;
+ esac
+
+ # default for anything not otherwise mentioned
+ path="x86"
+
+ case ${target} in
+ i[34]86*-*-*)
+ path="x86"
+ ;;
+ k5*-*-*)
+ # don't know what best suits k5
+ path="x86"
+ ;;
+ i586*-*-* | pentium-*-*)
+ path="x86/pentium x86"
+ ;;
+ pentiummmx-*-*)
+ path="x86/pentium x86"
+ if test "$tmp_mmx" = yes; then
+ path="x86/pentium/mmx $path"
+ fi
+ ;;
+ i686*-*-* | pentiumpro-*-*)
+ path="x86/p6 x86"
+ ;;
+ pentium2-*-*)
+ path="x86/p6 x86"
+ # The pentium/mmx lshift and rshift are good on p6 and can be used
+ # until there's something specific for p6.
+ if test "$tmp_mmx" = yes; then
+ path="x86/p6/mmx x86/pentium/mmx $path"
+ fi
+ ;;
+ pentium3-*-*)
+ path="x86/p6 x86"
+ # The pentium/mmx lshift and rshift are good on p6 and can be used
+ # until there's something specific for p6.
+ if test "$tmp_mmx" = yes; then
+ path="x86/p6/p3mmx x86/p6/mmx x86/pentium/mmx $path"
+ fi
+ ;;
+ k6[23]*-*-*)
+ path="x86/k6 x86"
+ if test "$tmp_mmx" = yes; then
+ path="x86/k6/k62mmx x86/k6/mmx $path"
+ fi
+ ;;
+ k6*-*-*)
+ path="x86/k6 x86"
+ if test "$tmp_mmx" = yes; then
+ path="x86/k6/mmx $path"
+ fi
+ ;;
+ athlon-*-*)
+ path="x86/k7 x86"
+ if test "$tmp_mmx" = yes; then
+ path="x86/k7/mmx $path"
+ fi
+ ;;
+ esac
+ ;;
+
+ i960*-*-*) path="i960";;
+
+ ia64*-*-*) path="ia64";;
+
+# Motorola 68k configurations. Let m68k mean 68020-68040.
+ m680[234]0*-*-* | m68k*-*-* | \
+ m68*-next-nextstep*) # Nexts are at least '020
+ path="m68k/mc68020 m68k"
+ family=m68k
+ ;;
+ m68000*-*-*)
+ path="m68k"
+ family=m68k
+ ;;
+
+ m88k*-*-* | m88k*-*-*) path="m88k";;
+ m88110*-*-*) path="m88k/mc88110 m88k";;
+ ns32k*-*-*) path="ns32k";;
+
+ pyramid-*-*) path="pyr";;
+
+ ppc601-*-*) path="power powerpc32";;
+ powerpc64*-*-*) path="powerpc64";;
+ powerpc*-*-*) path="powerpc32";;
+ rs6000-*-* | power-*-* | power2-*-*)
+ path="power"
+ extra_functions="udiv_w_sdiv"
+ ;;
+
+ sh-*-*) path="sh";;
+ sh2-*-*) path="sh/sh2 sh";;
+
+ mips[34]*-*-*) path="mips3";;
+ mips*-*-irix6*) path="mips3";;
+ mips*-*-*) path="mips2";;
+
+ vax*-*-*) path="vax"; extra_functions="udiv_w_sdiv";;
+
+ z8000x*-*-*) path="z8000x"; extra_functions="udiv_w_sdiv";;
+ z8000*-*-*) path="z8000"; extra_functions="udiv_w_sdiv";;
+
+ clipper*-*-*) path="clipper";;
+esac
+
+if test -n "$CALLING_CONVENTIONS_OBJS"; then
+ cat >>confdefs.h <<\EOF
+#define HAVE_CALLING_CONVENTIONS 1
+EOF
+
+fi
+
+case ${target} in
+ i[5-8]86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-*)
+ # rdtsc is in pentium and up, not in i386 and i486
+ SPEED_CYCLECOUNTER_OBJS=pentium.lo
+ ;;
+ alpha*-*-*)
+ SPEED_CYCLECOUNTER_OBJS=alpha.lo
+ ;;
+ sparcv9*-*-* | ultrasparc*-*-* | sparc64*-*-*)
+ SPEED_CYCLECOUNTER_OBJS=sparcv9.lo
+ ;;
+ hppa2*-*-*)
+ SPEED_CYCLECOUNTER_OBJS=hppa2.lo
+ ;;
+ hppa*-*-*)
+ SPEED_CYCLECOUNTER_OBJS=hppa.lo
+ ;;
+esac
+
+if test -n "$SPEED_CYCLECOUNTER_OBJS"
+then
+ cat >>confdefs.h <<\EOF
+#define HAVE_SPEED_CYCLECOUNTER 1
+EOF
+
+fi
+
+echo $ECHO_N "checking for Cygwin environment... $ECHO_C" 1>&6
+echo "configure:2822: checking for Cygwin environment" 1>&5
+if test "${ac_cv_cygwin+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat >conftest.$ac_ext <<EOF
+#line 2827 "configure"
+#include "confdefs.h"
+
+int
+main ()
+{
+#ifndef __CYGWIN__
+# define __CYGWIN__ __CYGWIN32__
+#endif
+return __CYGWIN__;
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:2841: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ rm -rf conftest*
+ ac_cv_cygwin=yes
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_cygwin=no
+fi
+rm -f conftest*
+fi
+echo "$ECHO_T""$ac_cv_cygwin" 1>&6
+CYGWIN=
+test "$ac_cv_cygwin" = yes && CYGWIN=yes
+echo $ECHO_N "checking for mingw32 environment... $ECHO_C" 1>&6
+echo "configure:2856: checking for mingw32 environment" 1>&5
+if test "${ac_cv_mingw32+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat >conftest.$ac_ext <<EOF
+#line 2861 "configure"
+#include "confdefs.h"
+
+int
+main ()
+{
+return __MINGW32__;
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:2872: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ rm -rf conftest*
+ ac_cv_mingw32=yes
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_mingw32=no
+fi
+rm -f conftest*
+fi
+echo "$ECHO_T""$ac_cv_mingw32" 1>&6
+MINGW32=
+test "$ac_cv_mingw32" = yes && MINGW32=yes
+echo $ECHO_N "checking for EMX OS/2 environment... $ECHO_C" 1>&6
+echo "configure:2887: checking for EMX OS/2 environment" 1>&5
+if test "${ac_cv_emxos2+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat >conftest.$ac_ext <<EOF
+#line 2892 "configure"
+#include "confdefs.h"
+
+int
+main ()
+{
+return __EMX__;
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:2903: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ rm -rf conftest*
+ ac_cv_emxos2=yes
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_emxos2=no
+fi
+rm -f conftest*
+fi
+echo "$ECHO_T""$ac_cv_emxos2" 1>&6
+EMXOS2=
+test "$ac_cv_emxos2" = yes && EMXOS2=yes
+
+echo $ECHO_N "checking for executable suffix... $ECHO_C" 1>&6
+echo "configure:2919: checking for executable suffix" 1>&5
+if test "${ac_cv_exeext+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test "$CYGWIN" = yes || test "$MINGW32" = yes || test "$EMXOS2" = yes; then
+ ac_cv_exeext=.exe
+else
+ rm -f conftest*
+ echo 'int main () { return 0; }' >conftest.$ac_ext
+ ac_cv_exeext=
+ if { (eval echo configure:2929: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; }; then
+ for ac_file in conftest.*; do
+ case $ac_file in
+ *.c | *.C | *.o | *.obj | *.xcoff) ;;
+ *) ac_cv_exeext=`echo $ac_file | sed -e s/conftest//` ;;
+ esac
+ done
+ else
+ { echo "configure: error: installation or configuration problem: compiler cannot create executables." 1>&2; exit 1; }
+ fi
+ rm -f conftest*
+ test x"${ac_cv_exeext}" = x && ac_cv_exeext=no
+fi
+fi
+
+EXEEXT=""
+test x"${ac_cv_exeext}" != xno && EXEEXT=${ac_cv_exeext}
+echo "$ECHO_T""${ac_cv_exeext}" 1>&6
+ac_exeext=$EXEEXT
+
+echo $ECHO_N "checking for object suffix... $ECHO_C" 1>&6
+echo "configure:2950: checking for object suffix" 1>&5
+if test "${ac_cv_objext+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ rm -f conftest*
+echo 'int i = 1;' >conftest.$ac_ext
+if { (eval echo configure:2956: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ for ac_file in conftest.*; do
+ case $ac_file in
+ *.c) ;;
+ *) ac_cv_objext=`echo $ac_file | sed -e s/conftest.//` ;;
+ esac
+ done
+else
+ { echo "configure: error: installation or configuration problem; compiler does not work" 1>&2; exit 1; }
+fi
+rm -f conftest*
+fi
+
+echo "$ECHO_T""$ac_cv_objext" 1>&6
+OBJEXT=$ac_cv_objext
+ac_objext=$ac_cv_objext
+
+case "$target" in
+ *-*-aix4.[3-9]*) enable_shared=no ;;
+esac
+# Check whether --enable-shared or --disable-shared was given.
+if test "${enable_shared+set}" = set; then
+ enableval="$enable_shared"
+ p=${PACKAGE-default}
+case "$enableval" in
+yes) enable_shared=yes ;;
+no) enable_shared=no ;;
+*)
+ enable_shared=no
+ # Look at the argument we got. We use all the common list separators.
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:,"
+ for pkg in $enableval; do
+ if test "X$pkg" = "X$p"; then
+ enable_shared=yes
+ fi
+ done
+ IFS="$ac_save_ifs"
+ ;;
+esac
+else
+ enable_shared=yes
+fi
+# Check whether --enable-static or --disable-static was given.
+if test "${enable_static+set}" = set; then
+ enableval="$enable_static"
+ p=${PACKAGE-default}
+case "$enableval" in
+yes) enable_static=yes ;;
+no) enable_static=no ;;
+*)
+ enable_static=no
+ # Look at the argument we got. We use all the common list separators.
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:,"
+ for pkg in $enableval; do
+ if test "X$pkg" = "X$p"; then
+ enable_static=yes
+ fi
+ done
+ IFS="$ac_save_ifs"
+ ;;
+esac
+else
+ enable_static=yes
+fi
+# Check whether --enable-fast-install or --disable-fast-install was given.
+if test "${enable_fast_install+set}" = set; then
+ enableval="$enable_fast_install"
+ p=${PACKAGE-default}
+case "$enableval" in
+yes) enable_fast_install=yes ;;
+no) enable_fast_install=no ;;
+*)
+ enable_fast_install=no
+ # Look at the argument we got. We use all the common list separators.
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:,"
+ for pkg in $enableval; do
+ if test "X$pkg" = "X$p"; then
+ enable_fast_install=yes
+ fi
+ done
+ IFS="$ac_save_ifs"
+ ;;
+esac
+else
+ enable_fast_install=yes
+fi
+
+echo $ECHO_N "checking build system type... $ECHO_C" 1>&6
+echo "configure:3044: checking build system type" 1>&5
+if test "x$ac_cv_build" = "x" || (test "x$build" != "xNONE" && test "x$build" != "x$ac_cv_build_alias"); then
+
+ # Make sure we can run config.sub.
+ if $ac_config_sub sun4 >/dev/null 2>&1; then :; else
+ { echo "configure: error: cannot run $ac_config_sub" 1>&2; exit 1; }
+ fi
+
+ ac_cv_build_alias=$build
+ case "$ac_cv_build_alias" in
+ NONE)
+ case $nonopt in
+ NONE)
+ ac_cv_build_alias=$host_alias ;;
+ *) ac_cv_build_alias=$nonopt ;;
+ esac ;;
+ esac
+
+ ac_cv_build=`$ac_config_sub $ac_cv_build_alias` || exit 1
+ ac_cv_build_cpu=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+ ac_cv_build_vendor=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+ ac_cv_build_os=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+else
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+fi
+
+echo "$ECHO_T""$ac_cv_build" 1>&6
+
+build=$ac_cv_build
+build_alias=$ac_cv_build_alias
+build_cpu=$ac_cv_build_cpu
+build_vendor=$ac_cv_build_vendor
+build_os=$ac_cv_build_os
+
+# Check whether --with-gnu-ld or --without-gnu-ld was given.
+if test "${with_gnu_ld+set}" = set; then
+ withval="$with_gnu_ld"
+ test "$withval" = no || with_gnu_ld=yes
+else
+ with_gnu_ld=no
+fi
+
+ac_prog=ld
+if test "$ac_cv_prog_gcc" = yes; then
+ # Check if gcc -print-prog-name=ld gives a path.
+ echo $ECHO_N "checking for ld used by GCC... $ECHO_C" 1>&6
+echo "configure:3090: checking for ld used by GCC" 1>&5
+ case $target in
+ *-*-mingw*)
+ # gcc leaves a trailing carriage return which upsets mingw
+ ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
+ *)
+ ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
+ esac
+ case "$ac_prog" in
+ # Accept absolute paths.
+ [\\/]* | [A-Za-z]:[\\/]*)
+ re_direlt='/[^/][^/]*/\.\./'
+ # Canonicalize the path of ld
+ ac_prog=`echo $ac_prog| sed 's%\\\\%/%g'`
+ while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do
+ ac_prog=`echo $ac_prog| sed "s%$re_direlt%/%"`
+ done
+ test -z "$LD" && LD="$ac_prog"
+ ;;
+ "")
+ # If it fails, then pretend we aren't using GCC.
+ ac_prog=ld
+ ;;
+ *)
+ # If it is relative, then search for the first ld in PATH.
+ with_gnu_ld=unknown
+ ;;
+ esac
+elif test "$with_gnu_ld" = yes; then
+ echo $ECHO_N "checking for GNU ld... $ECHO_C" 1>&6
+echo "configure:3120: checking for GNU ld" 1>&5
+else
+ echo $ECHO_N "checking for non-GNU ld... $ECHO_C" 1>&6
+echo "configure:3123: checking for non-GNU ld" 1>&5
+fi
+if test "${ac_cv_path_LD+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -z "$LD"; then
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}"
+ for ac_dir in $PATH; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
+ ac_cv_path_LD="$ac_dir/$ac_prog"
+ # Check to see if the program is GNU ld. I'd rather use --version,
+ # but apparently some GNU ld's only accept -v.
+ # Break only if it was the GNU/non-GNU ld that we prefer.
+ if "$ac_cv_path_LD" -v 2>&1 < /dev/null | egrep '(GNU|with BFD)' > /dev/null; then
+ test "$with_gnu_ld" != no && break
+ else
+ test "$with_gnu_ld" != yes && break
+ fi
+ fi
+ done
+ IFS="$ac_save_ifs"
+else
+ ac_cv_path_LD="$LD" # Let the user override the test with a path.
+fi
+fi
+
+LD="$ac_cv_path_LD"
+if test -n "$LD"; then
+ echo "$ECHO_T""$LD" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+test -z "$LD" && { echo "configure: error: no acceptable ld found in \$PATH" 1>&2; exit 1; }
+echo $ECHO_N "checking if the linker ($LD) is GNU ld... $ECHO_C" 1>&6
+echo "configure:3158: checking if the linker ($LD) is GNU ld" 1>&5
+if test "${ac_cv_prog_gnu_ld+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ # I'd rather use --version here, but apparently some GNU ld's only accept -v.
+if $LD -v 2>&1 </dev/null | egrep '(GNU|with BFD)' 1>&5; then
+ ac_cv_prog_gnu_ld=yes
+else
+ ac_cv_prog_gnu_ld=no
+fi
+fi
+echo "$ECHO_T""$ac_cv_prog_gnu_ld" 1>&6
+with_gnu_ld=$ac_cv_prog_gnu_ld
+
+echo $ECHO_N "checking for $LD option to reload object files... $ECHO_C" 1>&6
+echo "configure:3173: checking for $LD option to reload object files" 1>&5
+if test "${lt_cv_ld_reload_flag+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ lt_cv_ld_reload_flag='-r'
+fi
+echo "$ECHO_T""$lt_cv_ld_reload_flag" 1>&6
+reload_flag=$lt_cv_ld_reload_flag
+test -n "$reload_flag" && reload_flag=" $reload_flag"
+
+echo $ECHO_N "checking how to recognise dependant libraries... $ECHO_C" 1>&6
+echo "configure:3184: checking how to recognise dependant libraries" 1>&5
+if test "${lt_cv_deplibs_check_method+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ lt_cv_file_magic_cmd='${MAGIC}'
+lt_cv_file_magic_test_file=
+lt_cv_deplibs_check_method='unknown'
+# Need to set the preceding variable on all platforms that support
+# interlibrary dependencies.
+# 'none' -- dependencies not supported.
+# `unknown' -- same as none, but documents that we really don't know.
+# 'pass_all' -- all dependencies passed with no checks.
+# 'test_compile' -- check by making test program.
+# 'file_magic [regex]' -- check by looking for files in library path
+# which responds to the $file_magic_cmd with a given egrep regex.
+# If you have `file' or equivalent on your system and you're not sure
+# whether `pass_all' will *always* work, you probably want this one.
+
+case "$host_os" in
+aix4* | beos*)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+bsdi4*)
+ lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)'
+ lt_cv_file_magic_test_file=/shlib/libc.so
+ ;;
+
+cygwin* | mingw*)
+ lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?'
+ lt_cv_file_magic_cmd='${OBJDUMP} -f'
+ ;;
+
+freebsd*)
+ case "$version_type" in
+ freebsd-elf*)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+ esac
+ ;;
+
+gnu*)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+irix5* | irix6*)
+ case "$host_os" in
+ irix5*)
+ # this will be overridden with pass_all, but let us keep it just in case
+ lt_cv_deplibs_check_method="file_magic ELF 32-bit MSB dynamic lib MIPS - version 1"
+ ;;
+ *)
+ case "$LD" in
+ *-32|*"-32 ") libmagic=32-bit;;
+ *-n32|*"-n32 ") libmagic=N32;;
+ *-64|*"-64 ") libmagic=64-bit;;
+ *) libmagic=never-match;;
+ esac
+ # this will be overridden with pass_all, but let us keep it just in case
+ lt_cv_deplibs_check_method="file_magic ELF ${libmagic} MSB mips-[1234] dynamic lib MIPS - version 1"
+ ;;
+ esac
+ lt_cv_file_magic_test_file=`echo /lib${libsuff}/libc.so*`
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+# This must be Linux ELF.
+linux-gnu*)
+ case "$host_cpu" in
+ alpha* | i*86 | powerpc* | sparc* )
+ lt_cv_deplibs_check_method=pass_all ;;
+ *)
+ # glibc up to 2.1.1 does not perform some relocations on ARM
+ lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' ;;
+ esac
+ lt_cv_file_magic_test_file=`echo /lib/libc.so* /lib/libc-*.so`
+ ;;
+
+osf3* | osf4* | osf5*)
+ # this will be overridden with pass_all, but let us keep it just in case
+ lt_cv_deplibs_check_method='file_magic COFF format alpha shared library'
+ lt_cv_file_magic_test_file=/shlib/libc.so
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+sco3.2v5*)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+
+solaris*)
+ lt_cv_deplibs_check_method=pass_all
+ lt_cv_file_magic_test_file=/lib/libc.so
+ ;;
+
+sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+ case "$host_vendor" in
+ ncr)
+ lt_cv_deplibs_check_method=pass_all
+ ;;
+ motorola)
+ lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]'
+ lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*`
+ ;;
+ esac
+ ;;
+esac
+
+fi
+echo "$ECHO_T""$lt_cv_deplibs_check_method" 1>&6
+file_magic_cmd=$lt_cv_file_magic_cmd
+deplibs_check_method=$lt_cv_deplibs_check_method
+
+if test $host != $build; then
+ ac_tool_prefix=${host_alias}-
+else
+ ac_tool_prefix=
+fi
+
+# Only perform the check for file, if the check method requires it
+case "$deplibs_check_method" in
+file_magic*)
+ if test "$file_magic_cmd" = '${MAGIC}'; then
+
+echo $ECHO_N "checking for ${ac_tool_prefix}file... $ECHO_C" 1>&6
+echo "configure:3308: checking for ${ac_tool_prefix}file" 1>&5
+if test "${lt_cv_path_MAGIC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ case "$MAGIC" in
+ /*)
+ lt_cv_path_MAGIC="$MAGIC" # Let the user override the test with a path.
+ ;;
+ ?:/*)
+ ac_cv_path_MAGIC="$MAGIC" # Let the user override the test with a dos path.
+ ;;
+ *)
+ ac_save_MAGIC="$MAGIC"
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ ac_dummy="/usr/bin:$PATH"
+ for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/${ac_tool_prefix}file; then
+ lt_cv_path_MAGIC="$ac_dir/${ac_tool_prefix}file"
+ if test -n "$file_magic_test_file"; then
+ case "$deplibs_check_method" in
+ "file_magic "*)
+ file_magic_regex="`expr \"$deplibs_check_method\" : \"file_magic \(.*\)\"`"
+ MAGIC="$lt_cv_path_MAGIC"
+ if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+ egrep "$file_magic_regex" > /dev/null; then
+ :
+ else
+ cat <<EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such. This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem. Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+EOF
+ fi ;;
+ esac
+ fi
+ break
+ fi
+ done
+ IFS="$ac_save_ifs"
+ MAGIC="$ac_save_MAGIC"
+ ;;
+esac
+fi
+
+MAGIC="$lt_cv_path_MAGIC"
+if test -n "$MAGIC"; then
+ echo "$ECHO_T""$MAGIC" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+if test -z "$lt_cv_path_MAGIC"; then
+ if test -n "$ac_tool_prefix"; then
+ echo $ECHO_N "checking for file... $ECHO_C" 1>&6
+echo "configure:3370: checking for file" 1>&5
+if test "${lt_cv_path_MAGIC+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ case "$MAGIC" in
+ /*)
+ lt_cv_path_MAGIC="$MAGIC" # Let the user override the test with a path.
+ ;;
+ ?:/*)
+ ac_cv_path_MAGIC="$MAGIC" # Let the user override the test with a dos path.
+ ;;
+ *)
+ ac_save_MAGIC="$MAGIC"
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ ac_dummy="/usr/bin:$PATH"
+ for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/file; then
+ lt_cv_path_MAGIC="$ac_dir/file"
+ if test -n "$file_magic_test_file"; then
+ case "$deplibs_check_method" in
+ "file_magic "*)
+ file_magic_regex="`expr \"$deplibs_check_method\" : \"file_magic \(.*\)\"`"
+ MAGIC="$lt_cv_path_MAGIC"
+ if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+ egrep "$file_magic_regex" > /dev/null; then
+ :
+ else
+ cat <<EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such. This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem. Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+EOF
+ fi ;;
+ esac
+ fi
+ break
+ fi
+ done
+ IFS="$ac_save_ifs"
+ MAGIC="$ac_save_MAGIC"
+ ;;
+esac
+fi
+
+MAGIC="$lt_cv_path_MAGIC"
+if test -n "$MAGIC"; then
+ echo "$ECHO_T""$MAGIC" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+ else
+ MAGIC=:
+ fi
+fi
+
+ fi
+ ;;
+esac
+
+case "$target" in
+NONE) lt_target="$host" ;;
+*) lt_target="$target" ;;
+esac
+
+# Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ranlib; ac_word=$2
+echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6
+echo "configure:3446: checking for $ac_word" 1>&5
+if test "${ac_cv_prog_RANLIB+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$RANLIB"; then
+ ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
+else
+ for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ac_dummy="$PATH"
+for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$ac_word; then
+ echo "$ac_dir/$ac_word"
+ fi
+done
+IFS="$ac_save_ifs"
+`; do
+ ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
+ break
+ done
+fi
+fi
+RANLIB="$ac_cv_prog_RANLIB"
+if test -n "$RANLIB"; then
+ echo "$ECHO_T""$RANLIB" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+if test -z "$ac_cv_prog_RANLIB"; then
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "ranlib", so it can be a program name with args.
+set dummy ranlib; ac_word=$2
+echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6
+echo "configure:3480: checking for $ac_word" 1>&5
+if test "${ac_cv_prog_RANLIB+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$RANLIB"; then
+ ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
+else
+ for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ac_dummy="$PATH"
+for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$ac_word; then
+ echo "$ac_dir/$ac_word"
+ fi
+done
+IFS="$ac_save_ifs"
+`; do
+ ac_cv_prog_RANLIB="ranlib"
+ break
+ done
+ test -z "$ac_cv_prog_RANLIB" && ac_cv_prog_RANLIB=":"
+fi
+fi
+RANLIB="$ac_cv_prog_RANLIB"
+if test -n "$RANLIB"; then
+ echo "$ECHO_T""$RANLIB" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+ else
+ RANLIB=":"
+ fi
+fi
+
+# Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args.
+set dummy ${ac_tool_prefix}strip; ac_word=$2
+echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6
+echo "configure:3518: checking for $ac_word" 1>&5
+if test "${ac_cv_prog_STRIP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$STRIP"; then
+ ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
+else
+ for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ac_dummy="$PATH"
+for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$ac_word; then
+ echo "$ac_dir/$ac_word"
+ fi
+done
+IFS="$ac_save_ifs"
+`; do
+ ac_cv_prog_STRIP="${ac_tool_prefix}strip"
+ break
+ done
+fi
+fi
+STRIP="$ac_cv_prog_STRIP"
+if test -n "$STRIP"; then
+ echo "$ECHO_T""$STRIP" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+if test -z "$ac_cv_prog_STRIP"; then
+ if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "strip", so it can be a program name with args.
+set dummy strip; ac_word=$2
+echo $ECHO_N "checking for $ac_word... $ECHO_C" 1>&6
+echo "configure:3552: checking for $ac_word" 1>&5
+if test "${ac_cv_prog_STRIP+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ if test -n "$STRIP"; then
+ ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
+else
+ for ac_path in `IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+ac_dummy="$PATH"
+for ac_dir in $ac_dummy; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$ac_word; then
+ echo "$ac_dir/$ac_word"
+ fi
+done
+IFS="$ac_save_ifs"
+`; do
+ ac_cv_prog_STRIP="strip"
+ break
+ done
+ test -z "$ac_cv_prog_STRIP" && ac_cv_prog_STRIP=":"
+fi
+fi
+STRIP="$ac_cv_prog_STRIP"
+if test -n "$STRIP"; then
+ echo "$ECHO_T""$STRIP" 1>&6
+else
+ echo "$ECHO_T""no" 1>&6
+fi
+
+ else
+ STRIP=":"
+ fi
+fi
+
+# Check for any special flags to pass to ltconfig.
+libtool_flags="--cache-file=$cache_file"
+test "$enable_shared" = no && libtool_flags="$libtool_flags --disable-shared"
+test "$enable_static" = no && libtool_flags="$libtool_flags --disable-static"
+test "$enable_fast_install" = no && libtool_flags="$libtool_flags --disable-fast-install"
+test "$ac_cv_prog_gcc" = yes && libtool_flags="$libtool_flags --with-gcc"
+test "$ac_cv_prog_gnu_ld" = yes && libtool_flags="$libtool_flags --with-gnu-ld"
+
+# Check whether --enable-libtool-lock or --disable-libtool-lock was given.
+if test "${enable_libtool_lock+set}" = set; then
+ enableval="$enable_libtool_lock"
+
+fi
+test "x$enable_libtool_lock" = xno && libtool_flags="$libtool_flags --disable-lock"
+test x"$silent" = xyes && libtool_flags="$libtool_flags --silent"
+
+# Check whether --with-pic or --without-pic was given.
+if test "${with_pic+set}" = set; then
+ withval="$with_pic"
+ pic_mode="$withval"
+else
+ pic_mode=default
+fi
+test x"$pic_mode" = xyes && libtool_flags="$libtool_flags --prefer-pic"
+test x"$pic_mode" = xno && libtool_flags="$libtool_flags --prefer-non-pic"
+
+# Some flags need to be propagated to the compiler or linker for good
+# libtool support.
+case "$lt_target" in
+*-*-irix6*)
+ # Find out which ABI we are using.
+ echo '#line 3618 "configure"' > conftest.$ac_ext
+ if { (eval echo configure:3619: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ case "`/usr/bin/file conftest.o`" in
+ *32-bit*)
+ LD="${LD-ld} -32"
+ ;;
+ *N32*)
+ LD="${LD-ld} -n32"
+ ;;
+ *64-bit*)
+ LD="${LD-ld} -64"
+ ;;
+ esac
+ fi
+ rm -rf conftest*
+ ;;
+
+*-*-sco3.2v5*)
+ # On SCO OpenServer 5, we need -belf to get full-featured binaries.
+ SAVE_CFLAGS="$CFLAGS"
+ CFLAGS="$CFLAGS -belf"
+ echo $ECHO_N "checking whether the C compiler needs -belf... $ECHO_C" 1>&6
+echo "configure:3640: checking whether the C compiler needs -belf" 1>&5
+if test "${lt_cv_cc_needs_belf+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+
+ ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+ cat >conftest.$ac_ext <<EOF
+#line 3653 "configure"
+#include "confdefs.h"
+
+int
+main()
+{
+
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:3664: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ rm -rf conftest*
+ lt_cv_cc_needs_belf=yes
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ lt_cv_cc_needs_belf=no
+fi
+rm -f conftest*
+
+ ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+cross_compiling=$ac_cv_prog_cc_cross
+
+fi
+echo "$ECHO_T""$lt_cv_cc_needs_belf" 1>&6
+ if test x"$lt_cv_cc_needs_belf" != x"yes"; then
+ # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf
+ CFLAGS="$SAVE_CFLAGS"
+ fi
+ ;;
+
+esac
+
+# Save cache, so that ltconfig can load it
+cat >confcache <<\EOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs. It is not useful on other systems.
+# If it contains results you don't want to keep, you may remove or edit it.
+#
+# By default, configure uses ./config.cache as the cache file,
+# creating it if it does not exist already. You can give configure
+# the --cache-file=FILE option to use a different cache file; that is
+# what configure does when it calls configure scripts in
+# subdirectories, so they share the cache.
+# Giving --cache-file=/dev/null disables caching, for debugging configure.
+# config.status only pays attention to the cache file if you give it the
+# --recheck option to rerun configure.
+#
+EOF
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, don't put newlines in cache variables' values.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(set) 2>&1 |
+ case `(ac_space=' '; set | grep ac_space) 2>&1` in
+ *ac_space=\ *)
+ # `set' does not quote correctly, so add quotes (double-quote substitution
+ # turns \\\\ into \\, and sed turns \\ into \).
+ sed -n \
+ -e "s/'/'\\\\''/g" \
+ -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p"
+ ;;
+ *)
+ # `set' quotes correctly as required by POSIX, so do not add quotes.
+ sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p'
+ ;;
+ esac >>confcache
+if cmp -s $cache_file confcache; then :; else
+ if test -w $cache_file; then
+ echo "updating cache $cache_file"
+ cat confcache >$cache_file
+ else
+ echo "not updating unwritable cache $cache_file"
+ fi
+fi
+rm -f confcache
+
+# Actually configure libtool. ac_aux_dir is where install-sh is found.
+AR="$AR" CC="$CC" CFLAGS="$CFLAGS" CPPFLAGS="$CPPFLAGS" \
+MAGIC="$MAGIC" LD="$LD" LDFLAGS="$LDFLAGS" LIBS="$LIBS" \
+LN_S="$LN_S" NM="$NM" RANLIB="$RANLIB" STRIP="$STRIP" \
+AS="$AS" DLLTOOL="$DLLTOOL" OBJDUMP="$OBJDUMP" \
+objext="$OBJEXT" exeext="$EXEEXT" reload_flag="$reload_flag" \
+deplibs_check_method="$deplibs_check_method" file_magic_cmd="$file_magic_cmd" \
+${CONFIG_SHELL-/bin/sh} $ac_aux_dir/ltconfig --no-reexec \
+$libtool_flags --no-verify --build="$build" $ac_aux_dir/ltmain.sh $lt_target \
+|| { echo "configure: error: libtool configure failed" 1>&2; exit 1; }
+
+# Reload cache, that may have been modified by ltconfig
+if test -r "$cache_file"; then
+ echo "loading cache $cache_file"
+ test -f "$cache_file" && . $cache_file
+else
+ echo "creating cache $cache_file"
+ >$cache_file
+fi
+
+# This can be used to rebuild libtool when needed
+LIBTOOL_DEPS="$ac_aux_dir/ltconfig $ac_aux_dir/ltmain.sh"
+
+# Always use our own libtool.
+LIBTOOL='$(SHELL) $(top_builddir)/libtool'
+
+# Redirect the config.log output again, so that the ltconfig log is not
+# clobbered by the next message.
+exec 5>>./config.log
+
+echo $ECHO_N "checking whether optarg is declared... $ECHO_C" 1>&6
+echo "configure:3769: checking whether optarg is declared" 1>&5
+if test "${ac_cv_have_decl_optarg+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat >conftest.$ac_ext <<EOF
+#line 3774 "configure"
+#include "confdefs.h"
+$ac_includes_default
+int
+main ()
+{
+#ifndef optarg
+ char *p = (char *) optarg;
+#endif
+
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:3788: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ rm -rf conftest*
+ ac_cv_have_decl_optarg=yes
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_have_decl_optarg=no
+fi
+rm -f conftest*
+fi
+echo "$ECHO_T""$ac_cv_have_decl_optarg" 1>&6
+if test $ac_cv_have_decl_optarg = yes; then
+ cat >>confdefs.h <<EOF
+#define HAVE_DECL_OPTARG 1
+EOF
+
+else
+ cat >>confdefs.h <<EOF
+#define HAVE_DECL_OPTARG 0
+EOF
+
+fi
+
+echo $ECHO_N "checking for ANSI C header files... $ECHO_C" 1>&6
+echo "configure:3813: checking for ANSI C header files" 1>&5
+if test "${ac_cv_header_stdc+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+
+cat >conftest.$ac_ext <<EOF
+#line 3819 "configure"
+#include "confdefs.h"
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:3828: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+ rm -rf conftest*
+ ac_cv_header_stdc=yes
+else
+ echo "$ac_err" >&5
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+if test $ac_cv_header_stdc = yes; then
+ # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+
+cat >conftest.$ac_ext <<EOF
+#line 3846 "configure"
+#include "confdefs.h"
+#include <string.h>
+
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "memchr" >/dev/null 2>&1; then
+ :
+else
+ rm -rf conftest*
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+
+cat >conftest.$ac_ext <<EOF
+#line 3866 "configure"
+#include "confdefs.h"
+#include <stdlib.h>
+
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "free" >/dev/null 2>&1; then
+ :
+else
+ rm -rf conftest*
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+if test "$cross_compiling" = yes; then
+ :
+else
+ cat >conftest.$ac_ext <<EOF
+#line 3888 "configure"
+#include "confdefs.h"
+#include <ctype.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) (('a' <= (c) && (c) <= 'i') \
+ || ('j' <= (c) && (c) <= 'r') \
+ || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ if (XOR (islower (i), ISLOWER (i))
+ || toupper (i) != TOUPPER (i))
+ exit(2);
+ exit (0);
+}
+EOF
+if { (eval echo configure:3913: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null
+then
+ :
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -fr conftest*
+ ac_cv_header_stdc=no
+fi
+rm -fr conftest*
+
+fi
+
+fi
+fi
+echo "$ECHO_T""$ac_cv_header_stdc" 1>&6
+if test $ac_cv_header_stdc = yes; then
+ cat >>confdefs.h <<\EOF
+#define STDC_HEADERS 1
+EOF
+
+fi
+
+for ac_header in getopt.h unistd.h sys/sysctl.h sys/time.h
+do
+ac_ac_Header=`echo "ac_cv_header_$ac_header" | $ac_tr_sh`
+echo $ECHO_N "checking for $ac_header... $ECHO_C" 1>&6
+echo "configure:3940: checking for $ac_header" 1>&5
+if eval "test \"\${$ac_ac_Header+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+
+cat >conftest.$ac_ext <<EOF
+#line 3946 "configure"
+#include "confdefs.h"
+#include <$ac_header>
+
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:3952: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+ rm -rf conftest*
+ eval "$ac_ac_Header=yes"
+else
+ echo "$ac_err" >&5
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ eval "$ac_ac_Header=no"
+fi
+rm -f conftest*
+fi
+echo "$ECHO_T""`eval echo '${'$ac_ac_Header'}'`" 1>&6
+if test `eval echo '${'$ac_ac_Header'}'` = yes; then
+ cat >>confdefs.h <<EOF
+#define `echo "HAVE_$ac_header" | $ac_tr_cpp` 1
+EOF
+
+fi
+done
+
+echo $ECHO_N "checking for void... $ECHO_C" 1>&6
+echo "configure:3976: checking for void" 1>&5
+if test "${ac_cv_type_void+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat >conftest.$ac_ext <<EOF
+#line 3981 "configure"
+#include "confdefs.h"
+$ac_includes_default
+int
+main ()
+{
+if ((void *) 0)
+ return 0;
+if (sizeof (void))
+ return 0;
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:3995: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ rm -rf conftest*
+ ac_cv_type_void=yes
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_type_void=no
+fi
+rm -f conftest*
+fi
+echo "$ECHO_T""$ac_cv_type_void" 1>&6
+if test $ac_cv_type_void = yes; then
+ cat >>confdefs.h <<EOF
+#define HAVE_VOID 1
+EOF
+
+fi
+
+echo $ECHO_N "checking for preprocessor stringizing operator... $ECHO_C" 1>&6
+echo "configure:4015: checking for preprocessor stringizing operator" 1>&5
+if test "${ac_cv_c_stringize+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+
+cat >conftest.$ac_ext <<EOF
+#line 4021 "configure"
+#include "confdefs.h"
+
+#define x(y) #y
+
+char *s = x(teststring);
+
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "#teststring" >/dev/null 2>&1; then
+ rm -rf conftest*
+ ac_cv_c_stringize=no
+else
+ rm -rf conftest*
+ ac_cv_c_stringize=yes
+fi
+rm -f conftest*
+
+fi
+
+if test "${ac_cv_c_stringize}" = yes; then
+ cat >>confdefs.h <<\EOF
+#define HAVE_STRINGIZE 1
+EOF
+
+fi
+echo "$ECHO_T""${ac_cv_c_stringize}" 1>&6
+
+for ac_func in getopt_long getpagesize popen processor_info strtoul sysconf sysctlbyname
+do
+ac_ac_var=`echo "ac_cv_func_$ac_func" | $ac_tr_sh`
+echo $ECHO_N "checking for $ac_func... $ECHO_C" 1>&6
+echo "configure:4053: checking for $ac_func" 1>&5
+if eval "test \"\${$ac_ac_var+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat >conftest.$ac_ext <<EOF
+#line 4058 "configure"
+#include "confdefs.h"
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func(); below. */
+#include <assert.h>
+/* Override any gcc2 internal prototype to avoid an error. */
+/* We use char because int might match the return type of a gcc2
+ builtin and then its argument prototype would still apply. */
+char $ac_func();
+char (*f)();
+
+int
+main()
+{
+
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined (__stub_$ac_func) || defined (__stub___$ac_func)
+choke me
+#else
+f = $ac_func;
+#endif
+
+ ;
+ return 0;
+}
+EOF
+if { (eval echo configure:4086: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ rm -rf conftest*
+ eval "$ac_ac_var=yes"
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ eval "$ac_ac_var=no"
+fi
+rm -f conftest*
+
+fi
+echo "$ECHO_T""`eval echo '${'$ac_ac_var'}'`" 1>&6
+if test `eval echo '${'$ac_ac_var'}'` = yes; then
+ cat >>confdefs.h <<EOF
+#define `echo "HAVE_$ac_func" | $ac_tr_cpp` 1
+EOF
+
+fi
+done
+
+echo $ECHO_N "checking if ansi2knr should be used... $ECHO_C" 1>&6
+echo "configure:4108: checking if ansi2knr should be used" 1>&5
+if test "${gmp_cv_c_ansi2knr+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+ cat >conftest.c <<EOF
+int main (int argc, char *argv) { return 0; }
+EOF
+if { (eval echo configure:4115: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
+ gmp_cv_c_ansi2knr=no
+else
+ gmp_cv_c_ansi2knr=yes
+fi
+rm -f conftest.*
+
+fi
+echo "$ECHO_T""$gmp_cv_c_ansi2knr" 1>&6
+if test $gmp_cv_c_ansi2knr = no; then
+ U= ANSI2KNR=
+else
+ U=_ ANSI2KNR=./ansi2knr
+ # Ensure some checks needed by ansi2knr itself.
+
+echo $ECHO_N "checking for ANSI C header files... $ECHO_C" 1>&6
+echo "configure:4131: checking for ANSI C header files" 1>&5
+if test "${ac_cv_header_stdc+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+
+cat >conftest.$ac_ext <<EOF
+#line 4137 "configure"
+#include "confdefs.h"
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:4146: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+ rm -rf conftest*
+ ac_cv_header_stdc=yes
+else
+ echo "$ac_err" >&5
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+if test $ac_cv_header_stdc = yes; then
+ # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+
+cat >conftest.$ac_ext <<EOF
+#line 4164 "configure"
+#include "confdefs.h"
+#include <string.h>
+
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "memchr" >/dev/null 2>&1; then
+ :
+else
+ rm -rf conftest*
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+
+cat >conftest.$ac_ext <<EOF
+#line 4184 "configure"
+#include "confdefs.h"
+#include <stdlib.h>
+
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "free" >/dev/null 2>&1; then
+ :
+else
+ rm -rf conftest*
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+if test "$cross_compiling" = yes; then
+ :
+else
+ cat >conftest.$ac_ext <<EOF
+#line 4206 "configure"
+#include "confdefs.h"
+#include <ctype.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) (('a' <= (c) && (c) <= 'i') \
+ || ('j' <= (c) && (c) <= 'r') \
+ || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ if (XOR (islower (i), ISLOWER (i))
+ || toupper (i) != TOUPPER (i))
+ exit(2);
+ exit (0);
+}
+EOF
+if { (eval echo configure:4231: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null
+then
+ :
+else
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -fr conftest*
+ ac_cv_header_stdc=no
+fi
+rm -fr conftest*
+
+fi
+
+fi
+fi
+echo "$ECHO_T""$ac_cv_header_stdc" 1>&6
+if test $ac_cv_header_stdc = yes; then
+ cat >>confdefs.h <<\EOF
+#define STDC_HEADERS 1
+EOF
+
+fi
+
+ for ac_header in string.h
+do
+ac_ac_Header=`echo "ac_cv_header_$ac_header" | $ac_tr_sh`
+echo $ECHO_N "checking for $ac_header... $ECHO_C" 1>&6
+echo "configure:4258: checking for $ac_header" 1>&5
+if eval "test \"\${$ac_ac_Header+set}\" = set"; then
+ echo $ECHO_N "(cached) $ECHO_C" 1>&6
+else
+
+cat >conftest.$ac_ext <<EOF
+#line 4264 "configure"
+#include "confdefs.h"
+#include <$ac_header>
+
+EOF
+ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+{ (eval echo configure:4270: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+ rm -rf conftest*
+ eval "$ac_ac_Header=yes"
+else
+ echo "$ac_err" >&5
+ echo "configure: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ eval "$ac_ac_Header=no"
+fi
+rm -f conftest*
+fi
+echo "$ECHO_T""`eval echo '${'$ac_ac_Header'}'`" 1>&6
+if test `eval echo '${'$ac_ac_Header'}'` = yes; then
+ cat >>confdefs.h <<EOF
+#define `echo "HAVE_$ac_header" | $ac_tr_cpp` 1
+EOF
+
+fi
+done
+
+fi
+
+syntax=
+# For now, we use the old switch for setting syntax.
+# FIXME: Remove when conversion to .asm is completed.
+case "${target}" in
+ m680[234]0*-*-linuxaout* | m68k*-*-linuxaout* | \
+ m68k-next-nextstep* | \
+ m68000*-*-*)
+ syntax=mit
+ ;;
+ m680[234]0*-*-linux* | m68k*-*-linux*)
+ syntax=elf
+ ;;
+ m680[234]0*-*-* | m68k*-*-*)
+ syntax=mit
+ ;;
+esac
+
+# Now build an asm-syntax.h file for targets that include that from the
+# assembly files.
+# FIXME: Remove when conversion to .asm is completed.
+case "${family}-${underscore}-${asm_align}-${syntax}" in
+ m68k-yes-log-mit)
+ echo '#define MIT_SYNTAX' >asm-syntax.h
+ cat $srcdir/mpn/underscore.h >>asm-syntax.h
+ echo '#include "'$srcdir'/mpn/m68k/syntax.h"' >>asm-syntax.h;;
+ m68k-no-nolog-elf)
+ echo '#define ELF_SYNTAX' >asm-syntax.h
+ echo '#define C_SYMBOL_NAME(name) name' >>asm-syntax.h
+ echo '#include "'$srcdir'/mpn/m68k/syntax.h"' >>asm-syntax.h;;
+esac
+
+# The pattern here tests for an absolute path the same way as
+# _AC_OUTPUT_FILES in autoconf acgeneral.m4.
+
+echo "dnl CONFIG_TOP_SRCDIR is a path from the mpn builddir to the top srcdir" >> $gmp_tmpconfigm4
+
+case "$srcdir" in
+[\\/]* | ?:[\\/]* )
+
+echo "define(<CONFIG_TOP_SRCDIR>,<\`$srcdir'>)" >> $gmp_tmpconfigm4
+ ;;
+*)
+
+echo "define(<CONFIG_TOP_SRCDIR>,<\`../$srcdir'>)" >> $gmp_tmpconfigm4
+ ;;
+esac
+
+echo "include(CONFIG_TOP_SRCDIR\`/mpn/asm-defs.m4')" >> $gmp_tmpconfigm4p
+
+# Must be after asm-defs.m4
+
+echo "define_not_for_expansion(\`HAVE_TARGET_CPU_$target_cpu')" >> $gmp_tmpconfigm4p
+
+case "$target" in
+ alpha*-cray-unicos*)
+ gmp_m4postinc="alpha/unicos.m4"
+ ;;
+ alpha*-*-*)
+ gmp_m4postinc="alpha/default.m4"
+ ;;
+ power*-*-*)
+ case "$target" in
+ *-*-mach* | *-*-rhapsody* | *-*-nextstep* | *-*-darwin* | *-*-macosx*)
+ ;; # these use non-conventional assembly syntax.
+ powerpc64-*-aix*)
+ gmp_m4postinc="powerpc32/regmap.m4 powerpc64/aix.m4"
+ ;;
+ *-*-aix*)
+ gmp_m4postinc="powerpc32/regmap.m4 powerpc32/aix.m4"
+ ;;
+ *)
+ gmp_m4postinc="powerpc32/regmap.m4"
+ ;;
+ esac
+ ;;
+esac
+
+for tmp_f in $gmp_m4postinc; do
+
+echo "include_mpn(\`$tmp_f')" >> $gmp_tmpconfigm4p
+
+done
+
+# Set up `gmp_links'. It's a list of link:file pairs that configure will
+# process to create link -> file.
+gmp_links=
+
+# If the user specified `MPN_PATH', use that instead of the path we've
+# come up with.
+if test -z "$MPN_PATH"; then
+ path="$path generic"
+else
+ path="$MPN_PATH"
+fi
+
+# Pick the correct source files in $path and link them to mpn/.
+# $gmp_mpn_functions lists all functions we need.
+#
+# The rule is to find a file with the function name and a .asm, .S,
+# .s, or .c extension. Certain multi-function files with special names
+# can provide some functions too. (mpn/Makefile.am passes
+# -DOPERATION_<func> to get them to generate the right code.)
+
+# FIXME: udiv and umul aren't in $gmp_mpn_functions_optional yet since
+# there's some versions of those files which should be checked for bit
+# rot first. Put them in $extra_functions for each target for now,
+# change to standard optionals when all are ready.
+
+# Note: The following lines defining $gmp_mpn_functions_optional
+# and $gmp_mpn_functions are parsed by the "macos/configure"
+# Perl script. So if you change the lines in a major way
+# make sure to run and examine the output from
+#
+# % (cd macos; perl configure)
+
+gmp_mpn_functions_optional="copyi copyd com_n \
+ and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n"
+
+gmp_mpn_functions="${extra_functions} inlines add_n sub_n mul_1 addmul_1 \
+ submul_1 lshift rshift diveby3 divrem divrem_1 divrem_2 \
+ mod_1 mod_1_rs pre_mod_1 dump \
+ mul mul_fft mul_n mul_basecase sqr_basecase random \
+ random2 sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp perfsqr \
+ bdivmod gcd_1 gcd gcdext tdiv_qr bz_divrem_n sb_divrem_mn jacbase \
+ $gmp_mpn_functions_optional"
+
+# the list of all object files used by mpn/Makefile.in and the
+# top-level Makefile.in, respectively
+mpn_objects=
+mpn_objs_in_libgmp="mpn/mp_bases.lo"
+
+# SLPJ trace
+echo "Peering at file structure (takes a while)..." 1>&6
+
+for tmp_fn in ${gmp_mpn_functions} ; do
+# SLPJ trace
+ echo "...$tmp_fn..." 1>&6
+
+# This line was
+# rm -f mpn/${tmp_fn}.[Ssc] mpn/${tmp_fn}.asm
+# but I found that on my NT workstation the command
+# would unpredictably hang. rm wasn't an active process,
+# but absolutlely nothing was happening.
+# I *think* that expanding the [Ssc] cures the problem
+# SLPJ May 01
+ rm -f mpn/${tmp_fn}.S mpn/${tmp_fn}.s mpn/${tmp_fn}.c mpn/${tmp_fn}.asm
+
+ echo "...$tmp_fn (done rm)..." 1>&6
+
+ # functions that can be provided by multi-function files
+ tmp_mulfunc=
+ case $tmp_fn in
+ add_n|sub_n) tmp_mulfunc="aors_n" ;;
+ addmul_1|submul_1) tmp_mulfunc="aorsmul_1" ;;
+ popcount|hamdist) tmp_mulfunc="popham" ;;
+ and_n|andn_n|nand_n | ior_n|iorn_n|nior_n | xor_n|xnor_n)
+ tmp_mulfunc="logops_n" ;;
+ esac
+
+ found=no
+ for tmp_dir in $path; do
+
+# SLPJ trace
+# We get stuck sometimes
+ echo " ...dir $tmp_dir..." 1>&6
+ for tmp_base in $tmp_fn $tmp_mulfunc; do
+
+# SLPJ trace
+# We get stuck sometimes
+ echo " ...base $tmp_base..." 1>&6
+ for tmp_ext in asm S s c; do
+ tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext
+
+# SLPJ trace
+# We get stuck sometimes
+ echo " ...$tmp_file..." 1>&6
+
+ if test -f $tmp_file; then
+ found=yes
+
+ mpn_objects="$mpn_objects ${tmp_fn}.lo"
+ mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/${tmp_fn}.lo"
+ gmp_links="$gmp_links mpn/$tmp_fn.$tmp_ext:mpn/$tmp_dir/$tmp_base.$tmp_ext"
+
+ # duplicate AC_DEFINEs are harmless, so it doesn't matter
+ # that multi-function files get grepped here repeatedly
+ gmp_ep="`
+ sed -n 's/^[ ]*MULFUNC_PROLOGUE(\(.*\))/\1/p' $tmp_file ;
+ sed -n 's/^[ ]*PROLOGUE.*(\(.*\))/\1/p' $tmp_file
+ `"
+ for gmp_tmp in $gmp_ep; do
+ cat >>confdefs.h <<EOF
+#define HAVE_NATIVE_${gmp_tmp} 1
+EOF
+
+ done
+
+ break
+ fi
+ done
+ if test $found = yes; then break ; fi
+ done
+ if test $found = yes; then break ; fi
+ done
+
+ if test $found = no; then
+ for tmp_optional in $gmp_mpn_functions_optional; do
+ if test $tmp_optional = $tmp_fn; then
+ found=yes
+ fi
+ done
+ if test $found = no; then
+ { echo "configure: error: no version of $tmp_fn found in path: $path" 1>&2; exit 1; }
+ fi
+ fi
+done
+
+
+# Create link for gmp-mparam.h.
+
+# SLPJ trace
+echo "Creating link for gmp-mparam.h..." 1>&6
+
+for tmp_dir in $path ; do
+ rm -f gmp-mparam.h
+ if test -f $srcdir/mpn/${tmp_dir}/gmp-mparam.h ; then
+ gmp_links="$gmp_links gmp-mparam.h:mpn/${tmp_dir}/gmp-mparam.h"
+
+ # Copy any KARATSUBA_SQR_THRESHOLD in gmp-mparam.h to config.m4.
+ # Some versions of sqr_basecase.asm use this.
+ tmp_gmp_karatsuba_sqr_threshold="`sed -n 's/^#define KARATSUBA_SQR_THRESHOLD[ ]*\([0-9][0-9]*\).*$/\1/p' $srcdir/mpn/${tmp_dir}/gmp-mparam.h`"
+ if test -n "$tmp_gmp_karatsuba_sqr_threshold"; then
+
+echo "define(<KARATSUBA_SQR_THRESHOLD>,<$tmp_gmp_karatsuba_sqr_threshold>)" >> $gmp_tmpconfigm4
+
+ fi
+
+ break
+ fi
+done
+
+# SLPJ trace
+echo "Digging out links to include in DISTCLEANFILES..." 1>&6
+
+# Dig out the links from `gmp_links' for inclusion in DISTCLEANFILES.
+gmp_srclinks=
+for f in $gmp_links; do
+ gmp_srclinks="$gmp_srclinks `echo $f | sed 's/\(.*\):.*/\1/'`"
+done
+
+echo "creating $gmp_configm4"
+echo "dnl $gmp_configm4. Generated automatically by configure." > $gmp_configm4
+if test -f $gmp_tmpconfigm4; then
+ echo "changequote(<,>)dnl" >> $gmp_configm4
+ echo "ifdef(<__CONFIG_M4_INCLUDED__>,,<" >> $gmp_configm4
+ cat $gmp_tmpconfigm4 >> $gmp_configm4
+ echo ">)" >> $gmp_configm4
+ echo "changequote(\`,')dnl" >> $gmp_configm4
+ rm $gmp_tmpconfigm4
+fi
+echo "ifdef(\`__CONFIG_M4_INCLUDED__',,\`" >> $gmp_configm4
+if test -f $gmp_tmpconfigm4i; then
+ cat $gmp_tmpconfigm4i >> $gmp_configm4
+ rm $gmp_tmpconfigm4i
+fi
+if test -f $gmp_tmpconfigm4p; then
+ cat $gmp_tmpconfigm4p >> $gmp_configm4
+ rm $gmp_tmpconfigm4p
+fi
+echo "')" >> $gmp_configm4
+echo "define(\`__CONFIG_M4_INCLUDED__')" >> $gmp_configm4
+
+trap '' 1 2 15
+cat >confcache <<\EOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs. It is not useful on other systems.
+# If it contains results you don't want to keep, you may remove or edit it.
+#
+# By default, configure uses ./config.cache as the cache file,
+# creating it if it does not exist already. You can give configure
+# the --cache-file=FILE option to use a different cache file; that is
+# what configure does when it calls configure scripts in
+# subdirectories, so they share the cache.
+# Giving --cache-file=/dev/null disables caching, for debugging configure.
+# config.status only pays attention to the cache file if you give it the
+# --recheck option to rerun configure.
+#
+EOF
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, don't put newlines in cache variables' values.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(set) 2>&1 |
+ case `(ac_space=' '; set | grep ac_space) 2>&1` in
+ *ac_space=\ *)
+ # `set' does not quote correctly, so add quotes (double-quote substitution
+ # turns \\\\ into \\, and sed turns \\ into \).
+ sed -n \
+ -e "s/'/'\\\\''/g" \
+ -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p"
+ ;;
+ *)
+ # `set' quotes correctly as required by POSIX, so do not add quotes.
+ sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p'
+ ;;
+ esac >>confcache
+if cmp -s $cache_file confcache; then :; else
+ if test -w $cache_file; then
+ echo "updating cache $cache_file"
+ cat confcache >$cache_file
+ else
+ echo "not updating unwritable cache $cache_file"
+ fi
+fi
+rm -f confcache
+
+trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+# Any assignment to VPATH causes Sun make to only execute
+# the first set of double-colon rules, so remove it if not needed.
+# If there is a colon in the path, we need to keep it.
+if test "x$srcdir" = x.; then
+ ac_vpsub='/^[ ]*VPATH[ ]*=[^:]*$/d'
+fi
+
+DEFS=-DHAVE_CONFIG_H
+
+: ${CONFIG_STATUS=./config.status}
+trap 'rm -f $CONFIG_STATUS conftest*; exit 1' 1 2 15
+echo creating $CONFIG_STATUS
+cat >$CONFIG_STATUS <<EOF
+#! /bin/sh
+# Generated automatically by configure.
+# Run this file to recreate the current configuration.
+# This directory was configured as follows,
+# on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
+#
+# $0 $ac_configure_args
+#
+# Compiler output produced by configure, useful for debugging
+# configure, is in ./config.log if it exists.
+
+# Files that config.status was made for.
+config_files="\\
+ Makefile mpn/Makefile mpz/Makefile"
+config_headers="\\
+ config.h:config.in"
+config_links="\\
+ $gmp_links"
+config_commands="\\
+ default-1"
+
+ac_cs_usage="\\
+\\\`$CONFIG_STATUS' instantiates files from templates according to the
+current configuration.
+
+Usage: $CONFIG_STATUS [OPTIONS] FILE...
+
+ --recheck Update $CONFIG_STATUS by reconfiguring in the same conditions
+ --version Print the version of Autoconf and exit
+ --help Display this help and exit
+ --file=FILE[:TEMPLATE]
+ Instantiate the configuration file FILE
+ --header=FILE[:TEMPLATE]
+ Instantiate the configuration header FILE
+
+Configuration files:
+\$config_files
+
+Configuration headers:
+\$config_headers
+
+Configuration links:
+\$config_links
+
+Configuration commands:
+\$config_commands
+
+Report bugs to <bug-autoconf@gnu.org>."
+
+ac_cs_version="\\
+$CONFIG_STATUS generated by autoconf version 2.14a.
+Configured on host `(hostname || uname -n) 2>/dev/null | sed 1q` by
+ `echo "$0 $ac_configure_args" | sed 's/[\\"\`\$]/\\\\&/g'`"
+
+# Root of the tmp file names. Use pid to allow concurrent executions.
+ac_cs_root=cs\$\$
+ac_given_srcdir=$srcdir
+ac_given_INSTALL="$INSTALL"
+
+# If no file are specified by the user, then we need to provide default
+# value. By we need to know if files were specified by the user.
+ac_need_defaults=:
+while test \$# != 0
+do
+ case "\$1" in
+ --*=*)
+ ac_option=\`echo "\$1" | sed -e 's/=.*//'\`
+ ac_optarg=\`echo "\$1" | sed -e 's/[^=]*=//'\`
+ shift
+ set dummy "\$ac_option" "\$ac_optarg" \${1+"\$@"}
+ shift
+ ;;
+ -*);;
+ *) # This is not an option, so the user has probably given explicit
+ # arguments.
+ ac_need_defaults=false;;
+ esac
+
+ case "\$1" in
+
+ # Handling of the options.
+ -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+ echo "running \${CONFIG_SHELL-/bin/sh} $0 `echo "$ac_configure_args" | sed 's/[\\"\`\$]/\\\\&/g'` --no-create --no-recursion"
+ exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;;
+ -version | --version | --versio | --versi | --vers | --ver | --ve | --v)
+ echo "\$ac_cs_version"; exit 0 ;;
+ --he | --h)
+ # Conflict between --help and --header
+ echo "$CONFIG_STATUS: ambiguous option: \$ac_option
+Try \\\`$CONFIG_STATUS --help' for more information."; exit 1 ;;
+ -help | --help | --hel )
+ echo "\$ac_cs_usage"; exit 0 ;;
+ --file | --fil | --fi | --f )
+ shift
+ CONFIG_FILES="\$CONFIG_FILES \$1"
+ ac_need_defaults=false;;
+ --header | --heade | --head | --hea )
+ shift
+ CONFIG_HEADERS="\$CONFIG_FILES \$1"
+ ac_need_defaults=false;;
+
+ # Handling of arguments.
+ 'Makefile' ) CONFIG_FILES="\$CONFIG_FILES Makefile" ;;
+ 'mpz/Makefile' ) CONFIG_FILES="\$CONFIG_FILES mpz/Makefile" ;;
+ 'mpn/Makefile' ) CONFIG_FILES="\$CONFIG_FILES mpn/Makefile" ;;
+ '$gmp_links' ) CONFIG_LINKS="\$CONFIG_LINKS $gmp_links" ;;
+ 'default-1' ) CONFIG_COMMANDS="\$CONFIG_COMMANDS default-1" ;;
+ 'config.h' ) CONFIG_HEADERS="\$CONFIG_HEADERS config.h:config.in" ;;
+
+ # This is an error.
+ -*) echo "$CONFIG_STATUS: unrecognized option: \$1
+Try \\\`$CONFIG_STATUS --help' for more information."; exit 1 ;;
+ *) echo "$CONFIG_STATUS: invalid argument: \$1"; exit 1 ;;
+ esac
+ shift
+done
+
+EOF
+
+cat >>$CONFIG_STATUS <<\EOF
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used. Set only those that are not.
+if $ac_need_defaults; then
+ : ${CONFIG_FILES=$config_files}
+ : ${CONFIG_HEADERS=$config_headers}
+ : ${CONFIG_LINKS=$config_links}
+ : ${CONFIG_COMMANDS=$config_commands}
+fi
+
+# Trap to remove the temp files.
+trap 'rm -fr $ac_cs_root*; exit 1' 1 2 15
+
+EOF
+
+cat >>$CONFIG_STATUS <<EOF
+#
+# INIT-COMMANDS section.
+#
+
+EOF
+
+cat >>$CONFIG_STATUS <<EOF
+
+#
+# CONFIG_FILES section.
+#
+
+# No need to generate the scripts if there are no CONFIG_FILES.
+# This happens for instance when ./config.status config.h
+if test -n "\$CONFIG_FILES"; then
+ # Protect against being on the right side of a sed subst in config.status.
+ sed 's/%@/@@/; s/@%/@@/; s/%;t t\$/@;t t/; /@;t t\$/s/[\\\\&%]/\\\\&/g;
+ s/@@/%@/; s/@@/@%/; s/@;t t\$/%;t t/' >\$ac_cs_root.subs <<\\CEOF
+s%@exec_prefix@%$exec_prefix%;t t
+s%@prefix@%$prefix%;t t
+s%@program_transform_name@%$program_transform_name%;t t
+s%@bindir@%$bindir%;t t
+s%@sbindir@%$sbindir%;t t
+s%@libexecdir@%$libexecdir%;t t
+s%@datadir@%$datadir%;t t
+s%@sysconfdir@%$sysconfdir%;t t
+s%@sharedstatedir@%$sharedstatedir%;t t
+s%@localstatedir@%$localstatedir%;t t
+s%@libdir@%$libdir%;t t
+s%@includedir@%$includedir%;t t
+s%@oldincludedir@%$oldincludedir%;t t
+s%@infodir@%$infodir%;t t
+s%@mandir@%$mandir%;t t
+s%@SHELL@%$SHELL%;t t
+s%@ECHO_C@%$ECHO_C%;t t
+s%@ECHO_N@%$ECHO_N%;t t
+s%@ECHO_T@%$ECHO_T%;t t
+s%@CFLAGS@%$CFLAGS%;t t
+s%@CPPFLAGS@%$CPPFLAGS%;t t
+s%@CXXFLAGS@%$CXXFLAGS%;t t
+s%@FFLAGS@%$FFLAGS%;t t
+s%@DEFS@%$DEFS%;t t
+s%@LDFLAGS@%$LDFLAGS%;t t
+s%@LIBS@%$LIBS%;t t
+s%@host@%$host%;t t
+s%@host_alias@%$host_alias%;t t
+s%@host_cpu@%$host_cpu%;t t
+s%@host_vendor@%$host_vendor%;t t
+s%@host_os@%$host_os%;t t
+s%@target@%$target%;t t
+s%@target_alias@%$target_alias%;t t
+s%@target_cpu@%$target_cpu%;t t
+s%@target_vendor@%$target_vendor%;t t
+s%@target_os@%$target_os%;t t
+s%@build@%$build%;t t
+s%@build_alias@%$build_alias%;t t
+s%@build_cpu@%$build_cpu%;t t
+s%@build_vendor@%$build_vendor%;t t
+s%@build_os@%$build_os%;t t
+s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%;t t
+s%@INSTALL_SCRIPT@%$INSTALL_SCRIPT%;t t
+s%@INSTALL_DATA@%$INSTALL_DATA%;t t
+s%@PACKAGE@%$PACKAGE%;t t
+s%@VERSION@%$VERSION%;t t
+s%@ACLOCAL@%$ACLOCAL%;t t
+s%@AUTOCONF@%$AUTOCONF%;t t
+s%@AUTOMAKE@%$AUTOMAKE%;t t
+s%@AUTOHEADER@%$AUTOHEADER%;t t
+s%@MAKEINFO@%$MAKEINFO%;t t
+s%@AMTAR@%$AMTAR%;t t
+s%@install_sh@%$install_sh%;t t
+s%@AWK@%$AWK%;t t
+s%@SET_MAKE@%$SET_MAKE%;t t
+s%@AMDEP@%$AMDEP%;t t
+s%@AMDEPBACKSLASH@%$AMDEPBACKSLASH%;t t
+s%@DEPDIR@%$DEPDIR%;t t
+s%@MAINTAINER_MODE_TRUE@%$MAINTAINER_MODE_TRUE%;t t
+s%@MAINTAINER_MODE_FALSE@%$MAINTAINER_MODE_FALSE%;t t
+s%@MAINT@%$MAINT%;t t
+s%@WANT_MPBSD_TRUE@%$WANT_MPBSD_TRUE%;t t
+s%@WANT_MPBSD_FALSE@%$WANT_MPBSD_FALSE%;t t
+s%@WANT_MPFR_TRUE@%$WANT_MPFR_TRUE%;t t
+s%@WANT_MPFR_FALSE@%$WANT_MPFR_FALSE%;t t
+s%@CC@%$CC%;t t
+s%@CCAS@%$CCAS%;t t
+s%@CPP@%$CPP%;t t
+s%@LN_S@%$LN_S%;t t
+s%@M4@%$M4%;t t
+s%@AR@%$AR%;t t
+s%@CALLING_CONVENTIONS_OBJS@%$CALLING_CONVENTIONS_OBJS%;t t
+s%@SPEED_CYCLECOUNTER_OBJS@%$SPEED_CYCLECOUNTER_OBJS%;t t
+s%@EXEEXT@%$EXEEXT%;t t
+s%@OBJEXT@%$OBJEXT%;t t
+s%@RANLIB@%$RANLIB%;t t
+s%@STRIP@%$STRIP%;t t
+s%@LIBTOOL@%$LIBTOOL%;t t
+s%@U@%$U%;t t
+s%@ANSI2KNR@%$ANSI2KNR%;t t
+s%@mpn_objects@%$mpn_objects%;t t
+s%@mpn_objs_in_libgmp@%$mpn_objs_in_libgmp%;t t
+s%@gmp_srclinks@%$gmp_srclinks%;t t
+CEOF
+
+EOF
+
+ cat >>$CONFIG_STATUS <<\EOF
+ # Split the substitutions into bite-sized pieces for seds with
+ # small command number limits, like on Digital OSF/1 and HP-UX.
+ ac_max_sed_lines=48
+ ac_sed_frag=1 # Number of current file.
+ ac_beg=1 # First line for current file.
+ ac_end=$ac_max_sed_lines # Line after last line for current file.
+ ac_more_lines=:
+ ac_sed_cmds=""
+ while $ac_more_lines; do
+ if test $ac_beg -gt 1; then
+ sed "1,${ac_beg}d; ${ac_end}q" $ac_cs_root.subs >$ac_cs_root.sfrag
+ else
+ sed "${ac_end}q" $ac_cs_root.subs >$ac_cs_root.sfrag
+ fi
+ if test ! -s $ac_cs_root.sfrag; then
+ ac_more_lines=false
+ rm -f $ac_cs_root.sfrag
+ else
+ # The purpose of the label and of the branching condition is to
+ # speed up the sed processing (if there are no `@' at all, there
+ # is no need to browse any of the substitutions).
+ # These are the two extra sed commands mentioned above.
+ (echo ':t
+ /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $ac_cs_root.sfrag) >$ac_cs_root.s$ac_sed_frag
+ if test -z "$ac_sed_cmds"; then
+ ac_sed_cmds="sed -f $ac_cs_root.s$ac_sed_frag"
+ else
+ ac_sed_cmds="$ac_sed_cmds | sed -f $ac_cs_root.s$ac_sed_frag"
+ fi
+ ac_sed_frag=`expr $ac_sed_frag + 1`
+ ac_beg=$ac_end
+ ac_end=`expr $ac_end + $ac_max_sed_lines`
+ fi
+ done
+ if test -z "$ac_sed_cmds"; then
+ ac_sed_cmds=cat
+ fi
+fi # test -n "$CONFIG_FILES"
+
+EOF
+cat >>$CONFIG_STATUS <<\EOF
+for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then
+ # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
+ case "$ac_file" in
+ *:*) ac_file_in=`echo "$ac_file" | sed 's%[^:]*:%%'`
+ ac_file=`echo "$ac_file" | sed 's%:.*%%'` ;;
+ *) ac_file_in="${ac_file}.in" ;;
+ esac
+
+ # Adjust a relative srcdir, top_srcdir, and INSTALL for subdirectories.
+
+ # Remove last slash and all that follows it. Not all systems have dirname.
+ ac_dir=`echo "$ac_file" | sed 's%/[^/][^/]*$%%'`
+ if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then
+ # The file is in a subdirectory.
+ test ! -d "$ac_dir" && mkdir "$ac_dir"
+ ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`"
+ # A "../" for each directory in $ac_dir_suffix.
+ ac_dots=`echo "$ac_dir_suffix" | sed 's%/[^/]*%../%g'`
+ else
+ ac_dir_suffix= ac_dots=
+ fi
+
+ case "$ac_given_srcdir" in
+ .) srcdir=.
+ if test -z "$ac_dots"; then top_srcdir=.
+ else top_srcdir=`echo $ac_dots | sed 's%/$%%'`; fi ;;
+ [\\/]* | ?:[\\/]* )
+ srcdir="$ac_given_srcdir$ac_dir_suffix";
+ top_srcdir=$ac_given_srcdir ;;
+ *) # Relative path.
+ srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix"
+ top_srcdir="$ac_dots$ac_given_srcdir" ;;
+ esac
+
+ case "$ac_given_INSTALL" in
+ [\\/$]* | ?:[\\/]* ) INSTALL="$ac_given_INSTALL" ;;
+ *) INSTALL="$ac_dots$ac_given_INSTALL" ;;
+ esac
+
+ echo creating "$ac_file"
+ rm -f "$ac_file"
+ configure_input="Generated automatically from `echo $ac_file_in |
+ sed 's%.*/%%'` by configure."
+ case "$ac_file" in
+ *[Mm]akefile*) ac_comsub="1i\\
+# $configure_input" ;;
+ *) ac_comsub= ;;
+ esac
+
+ # Don't redirect the output to AC_FILE directly: use `mv' so that updating
+ # is atomic, and doesn't need trapping.
+ ac_file_inputs=`echo "$ac_file_in" |
+ sed -e "s%:% $ac_given_srcdir/%g;s%^%$ac_given_srcdir/%"`
+ for ac_file_input in $ac_file_inputs;
+ do
+ test -f "$ac_file_input" ||
+ { echo "configure: error: cannot find input file \`$ac_file_input'" 1>&2; exit 1; }
+ done
+EOF
+cat >>$CONFIG_STATUS <<EOF
+ sed -e "$ac_comsub
+$ac_vpsub
+$extrasub
+EOF
+cat >>$CONFIG_STATUS <<\EOF
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s%@configure_input@%$configure_input%;t t
+s%@srcdir@%$srcdir%;t t
+s%@top_srcdir@%$top_srcdir%;t t
+s%@INSTALL@%$INSTALL%;t t
+" $ac_file_inputs | (eval "$ac_sed_cmds") >$ac_cs_root.out
+ mv $ac_cs_root.out $ac_file
+
+fi; done
+rm -f $ac_cs_root.s*
+EOF
+cat >>$CONFIG_STATUS <<\EOF
+
+#
+# CONFIG_HEADER section.
+#
+
+# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where
+# NAME is the cpp macro being defined and VALUE is the value it is being given.
+#
+# ac_d sets the value in "#define NAME VALUE" lines.
+ac_dA='s%^\([ ]*\)#\([ ]*define[ ][ ]*\)'
+ac_dB='[ ].*$%\1#\2'
+ac_dC=' '
+ac_dD='%;t'
+# ac_u turns "#undef NAME" without trailing blanks into "#define NAME VALUE".
+ac_uA='s%^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)'
+ac_uB='$%\1#\2define\3'
+ac_uC=' '
+ac_uD='%;t'
+
+for ac_file in .. $CONFIG_HEADERS; do if test "x$ac_file" != x..; then
+ # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
+ case "$ac_file" in
+ *:*) ac_file_in=`echo "$ac_file" | sed 's%[^:]*:%%'`
+ ac_file=`echo "$ac_file" | sed 's%:.*%%'` ;;
+ *) ac_file_in="${ac_file}.in" ;;
+ esac
+
+ echo creating $ac_file
+
+ rm -f $ac_cs_root.frag $ac_cs_root.in $ac_cs_root.out
+ ac_file_inputs=`echo "$ac_file_in" |
+ sed -e "s%:% $ac_given_srcdir/%g;s%^%$ac_given_srcdir/%"`
+ for ac_file_input in $ac_file_inputs;
+ do
+ test -f "$ac_file_input" ||
+ { echo "configure: error: cannot find input file \`$ac_file_input'" 1>&2; exit 1; }
+ done
+ # Remove the trailing spaces.
+ sed -e 's/[ ]*$//' $ac_file_inputs >$ac_cs_root.in
+
+EOF
+
+# Transform confdefs.h into two sed scripts, `conftest.defines' and
+# `conftest.undefs', that substitutes the proper values into
+# config.h.in to produce config.h. The first handles `#define'
+# templates, and the second `#undef' templates.
+# And first: Protect against being on the right side of a sed subst in
+# config.status. Protect against being in an unquoted here document
+# in config.status.
+rm -f conftest.defines conftest.undefs
+ac_cs_root=conftest
+cat >$ac_cs_root.hdr <<\EOF
+s/[\\&%]/\\&/g
+s%[\\$`]%\\&%g
+t clear
+: clear
+s%^[ ]*#[ ]*define[ ][ ]*\(\([^ (][^ (]*\)([^)]*)\)[ ]*\(.*\)$%${ac_dA}\2${ac_dB}\1${ac_dC}\3${ac_dD}%gp
+t cleanup
+s%^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)$%${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD}%gp
+: cleanup
+EOF
+# If some macros were called several times there might be several times
+# the same #defines, which is useless. Nevertheless, we may not want to
+# sort them, since we want the *last* AC_DEFINE to be honored.
+uniq confdefs.h | sed -n -f $ac_cs_root.hdr >conftest.defines
+sed -e 's/ac_d/ac_u/g' conftest.defines >conftest.undefs
+rm -f $ac_cs_root.hdr
+
+# This sed command replaces #undef with comments. This is necessary, for
+# example, in the case of _POSIX_SOURCE, which is predefined and required
+# on some systems where configure will not decide to define it.
+cat >>conftest.undefs <<\EOF
+s%^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*%/* & */%
+EOF
+
+# Break up conftest.defines because some shells have a limit on the size
+# of here documents, and old seds have small limits too (100 cmds).
+echo ' # Handle all the #define templates only if necessary.' >>$CONFIG_STATUS
+echo ' if egrep "^[ ]*#[ ]*define" $ac_cs_root.in >/dev/null; then' >>$CONFIG_STATUS
+echo ' # If there are no defines, we may have an empty if/fi' >>$CONFIG_STATUS
+echo ' :' >>$CONFIG_STATUS
+rm -f conftest.tail
+while grep . conftest.defines >/dev/null
+do
+ # Write a limited-size here document to $ac_cs_root.frag.
+ echo ' cat >$ac_cs_root.frag <<CEOF' >>$CONFIG_STATUS
+ echo '/^[ ]*#[ ]*define/!b' >>$CONFIG_STATUS
+ sed ${ac_max_here_lines}q conftest.defines >>$CONFIG_STATUS
+ echo 'CEOF
+ sed -f $ac_cs_root.frag $ac_cs_root.in >$ac_cs_root.out
+ rm -f $ac_cs_root.in
+ mv $ac_cs_root.out $ac_cs_root.in
+' >>$CONFIG_STATUS
+ sed 1,${ac_max_here_lines}d conftest.defines >conftest.tail
+ rm -f conftest.defines
+ mv conftest.tail conftest.defines
+done
+rm -f conftest.defines
+echo ' fi # egrep' >>$CONFIG_STATUS
+echo >>$CONFIG_STATUS
+
+# Break up conftest.undefs because some shells have a limit on the size
+# of here documents, and old seds have small limits too (100 cmds).
+echo ' # Handle all the #undef templates' >>$CONFIG_STATUS
+rm -f conftest.tail
+while grep . conftest.undefs >/dev/null
+do
+ # Write a limited-size here document to $ac_cs_root.frag.
+ echo ' cat >$ac_cs_root.frag <<CEOF' >>$CONFIG_STATUS
+ echo '/^[ ]*#[ ]*undef/!b' >>$CONFIG_STATUS
+ sed ${ac_max_here_lines}q conftest.undefs >>$CONFIG_STATUS
+ echo 'CEOF
+ sed -f $ac_cs_root.frag $ac_cs_root.in >$ac_cs_root.out
+ rm -f $ac_cs_root.in
+ mv $ac_cs_root.out $ac_cs_root.in
+' >>$CONFIG_STATUS
+ sed 1,${ac_max_here_lines}d conftest.undefs >conftest.tail
+ rm -f conftest.undefs
+ mv conftest.tail conftest.undefs
+done
+rm -f conftest.undefs
+
+cat >>$CONFIG_STATUS <<\EOF
+ rm -f $ac_cs_root.frag $ac_cs_root.h
+ echo "/* $ac_file. Generated automatically by configure. */" >$ac_cs_root.h
+ cat $ac_cs_root.in >>$ac_cs_root.h
+ rm -f $ac_cs_root.in
+ if cmp -s $ac_file $ac_cs_root.h 2>/dev/null; then
+ echo "$ac_file is unchanged"
+ rm -f $ac_cs_root.h
+ else
+ # Remove last slash and all that follows it. Not all systems have dirname.
+ ac_dir=`echo "$ac_file" | sed 's%/[^/][^/]*$%%'`
+ if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then
+ # The file is in a subdirectory.
+ test ! -d "$ac_dir" && mkdir "$ac_dir"
+ fi
+ rm -f $ac_file
+ mv $ac_cs_root.h $ac_file
+ fi
+fi; done
+EOF
+cat >>$CONFIG_STATUS <<\EOF
+
+#
+# CONFIG_LINKS section.
+#
+srcdir=$ac_given_srcdir
+
+for ac_file in : $CONFIG_LINKS; do if test "x$ac_file" != x:; then
+ ac_dest=`echo "$ac_file" | sed 's%:.*%%'`
+ ac_source=`echo "$ac_file" | sed 's%[^:]*:%%'`
+
+ echo "copying $srcdir/$ac_source to $ac_dest"
+
+ if test ! -r $srcdir/$ac_source; then
+ { echo "configure: error: $srcdir/$ac_source: File not found" 1>&2; exit 1; }
+ fi
+ rm -f $ac_dest
+
+ # Make relative symlinks.
+ # Remove last slash and all that follows it. Not all systems have dirname.
+ ac_dest_dir=`echo $ac_dest | sed 's%/[^/][^/]*$%%'`
+ if test "$ac_dest_dir" != "$ac_dest" && test "$ac_dest_dir" != .; then
+ # The dest file is in a subdirectory.
+ test ! -d "$ac_dest_dir" && mkdir "$ac_dest_dir"
+ ac_dest_dir_suffix="/`echo $ac_dest_dir|sed 's%^\./%%'`"
+ # A "../" for each directory in $ac_dest_dir_suffix.
+ ac_dots=`echo $ac_dest_dir_suffix|sed 's%/[^/]*%../%g'`
+ else
+ ac_dest_dir_suffix= ac_dots=
+ fi
+
+ case "$srcdir" in
+ [\\/$]* | ?:[\\/]* ) ac_rel_source="$srcdir/$ac_source" ;;
+ *) ac_rel_source="$ac_dots$srcdir/$ac_source" ;;
+ esac
+
+ # Note: Dodgy local mods to 'make things work' in an environment (cygwin)
+ # that supports symlinks (through silly hack) using tools that don't
+ # understand them (mingw). The end sometimes justifies the means, son.
+ #
+ # Make a symlink if possible; otherwise try a hard link.
+ #if ln -s $ac_rel_source $ac_dest 2>/dev/null ||
+ # ln $srcdir/$ac_source $ac_dest; then :
+ #
+ # Note: If the -p offends your 'cp', just drop it; no harm done, you'll just
+ # get more recompilations.
+ #
+ if cp -p $srcdir/$ac_source $ac_dest; then :
+ else
+ { echo "configure: error: cannot copy $ac_dest to $srcdir/$ac_source" 1>&2; exit 1; }
+ fi
+fi; done
+EOF
+cat >>$CONFIG_STATUS <<\EOF
+
+#
+# CONFIG_COMMANDS section.
+#
+for ac_file in .. $CONFIG_COMMANDS; do if test "x$ac_file" != x..; then
+ ac_dest=`echo "$ac_file" | sed 's%:.*%%'`
+ ac_source=`echo "$ac_file" | sed 's%[^:]*:%%'`
+
+ case "$ac_dest" in
+ default-1 ) test -z "$CONFIG_HEADERS" || echo timestamp > stamp-h ;;
+ esac
+fi;done
+EOF
+
+cat >>$CONFIG_STATUS <<\EOF
+
+exit 0
+EOF
+chmod +x $CONFIG_STATUS
+rm -fr confdefs* $ac_clean_files
+trap 'exit 1' 1 2 15
+
+test "$no_create" = yes || $SHELL $CONFIG_STATUS || exit 1
diff --git a/rts/gmp/configure.in b/rts/gmp/configure.in
new file mode 100644
index 0000000000..18f610fe29
--- /dev/null
+++ b/rts/gmp/configure.in
@@ -0,0 +1,950 @@
+dnl Process this file with autoconf to produce a configure script.
+
+
+dnl Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+AC_REVISION($Revision: 1.8 $)dnl
+AC_PREREQ(2.14)dnl
+AC_INIT(gmp-impl.h)
+
+dnl Check system.
+AC_CANONICAL_SYSTEM
+
+dnl Automake
+AM_INIT_AUTOMAKE(gmp, GMP_VERSION)
+AM_CONFIG_HEADER(config.h:config.in)
+AM_MAINTAINER_MODE
+
+dnl GMP specific
+GMP_INIT(config.m4)
+
+
+AC_ARG_ENABLE(assert,
+AC_HELP_STRING([--enable-assert],[enable ASSERT checking [default=no]]),
+[case "${enableval}" in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value ${enableval} for --enable-assert, need yes or no]) ;;
+esac],
+[enable_assert=no])
+
+if test "$enable_assert" = "yes"; then
+ AC_DEFINE(WANT_ASSERT,1,
+ [./configure --enable-assert option, to enable some ASSERT()s])
+fi
+
+
+AC_ARG_ENABLE(alloca,
+AC_HELP_STRING([--enable-alloca],[use alloca for temp space [default=yes]]),
+[case "${enableval}" in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value ${enableval} for --enable-alloca, need yes or no]) ;;
+esac],
+[enable_alloca=yes])
+
+if test "$enable_alloca" = "no"; then
+ AC_DEFINE(USE_STACK_ALLOC,1,
+ [./configure --disable-alloca option, to use stack-alloc.c, not alloca])
+fi
+
+
+AC_ARG_ENABLE(fft,
+AC_HELP_STRING([--enable-fft],[enable FFTs for multiplication [default=no]]),
+[case "${enableval}" in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value ${enableval} for --enable-fft, need yes or no]) ;;
+esac],
+[enable_fft=no])
+
+if test "$enable_fft" = "yes"; then
+ AC_DEFINE(WANT_FFT,1,
+ [./configure --enable-fft option, to enable FFTs for multiplication])
+fi
+
+
+AC_ARG_ENABLE(mpbsd,
+AC_HELP_STRING([--enable-mpbsd],[build Berkley MP compatibility library [default=no]]),
+[case "${enableval}" in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value ${enableval} for --enable-mpbsd, need yes or no]) ;;
+esac],
+[enable_mpbsd=no])
+AM_CONDITIONAL(WANT_MPBSD, test "$enable_mpbsd" = "yes")
+
+
+AC_ARG_ENABLE(mpfr,
+AC_HELP_STRING([--enable-mpfr],[build MPFR [default=no]]),
+[case "${enableval}" in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value ${enableval} for --enable-mpfr, need yes or no]) ;;
+esac],
+[enable_mpfr=no])
+AM_CONDITIONAL(WANT_MPFR, test "$enable_mpfr" = "yes")
+
+
+dnl Switch on OS and determine what compiler to use.
+dnl
+dnl os_64bit Set to "yes" if OS is 64-bit capable.
+dnl FIXME: Rename to `check_64bit_compiler'!
+dnl cclist List of compilers, best first.
+dnl gmp_cflags_{cc} Flags for compiler named {cc}.
+dnl gmp_cflags64_{cc} Flags for compiler named {cc} for 64-bit code.
+dnl gmp_optcflags_{cc} Optional compiler flags.
+dnl gmp_xoptcflags_{cc} Exclusive optional compiler flags.
+dnl
+os_64bit="no"
+cclist="gcc cc" # FIXME: Prefer c89 to cc.
+gmp_cflags_gcc="-g -O2"
+gmp_cflags64_gcc="-g -O2"
+gmp_cflags_cc="-g"
+gmp_cflags64_cc="-g"
+
+case "$target" in
+ # Alpha
+ alpha*-cray-unicos*)
+ # Don't perform any assembly syntax tests on this beast.
+ gmp_no_asm_syntax_testing=yes
+ cclist=cc
+ gmp_cflags_cc="$gmp_cflags_cc -O"
+ ;;
+ alpha*-*-osf*)
+ flavour=`echo $target_cpu | sed 's/^alpha//g'`
+ if test -n "$flavour"; then
+ case $flavour in # compilers don't seem to understand `ev67' and such.
+ ev6? | ev7*) flavour=ev6;;
+ esac
+ gmp_optcflags_gcc="-mcpu=$flavour"
+ # FIXME: We shouldn't fail fatally if none of these work, but that's
+ # how xoptcflags work and we don't have any other mechanism right now.
+ # Why do we need this here and not for alpha*-*-* below?
+ gmp_xoptcflags_gcc="-Wa,-arch,${flavour} -Wa,-m${flavour}"
+ gmp_optcflags_cc="-arch $flavour -tune $flavour"
+ fi
+ ;;
+ alpha*-*-*)
+ cclist="gcc"
+ flavour=`echo $target_cpu | sed 's/^alpha//g'`
+ if test -n "$flavour"; then
+ case $flavour in
+ ev6? | ev7*) flavour=ev6;;
+ esac
+ gmp_optcflags_gcc="-mcpu=$flavour"
+ fi
+ ;;
+ # Cray vector machines. This must come after alpha* so that we can
+ # recognize present and future vector processors with a wildcard.
+ *-cray-unicos*)
+ # Don't perform any assembly syntax tests on this beast.
+ gmp_no_asm_syntax_testing=yes
+ cclist=cc
+ # Don't inherit default gmp_cflags_cc value; it comes with -g which
+ # disables all optimization on Cray vector systems
+ gmp_cflags_cc="-O"
+ ;;
+
+ # AMD and Intel x86 configurations
+ [i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-*])
+ # Rumour has it -O2 used to give worse register allocation than just -O.
+ gmp_cflags_gcc="-g -O -fomit-frame-pointer"
+
+ case "${target}" in
+ i386*-*-*) gmp_optcflags_gcc="-mcpu=i386 -march=i386";;
+ i486*-*-*) gmp_optcflags_gcc="-mcpu=i486 -march=i486";;
+ i586*-*-* | pentium-*-* | pentiummmx-*-*)
+ gmp_optcflags_gcc="-mcpu=pentium -march=pentium";;
+
+ # -march=pentiumpro not used because mpz/powm.c (swox cvs rev 1.4)
+ # tickles a bug in gcc 2.95.2 (believed fixed in 2.96).
+ [i686*-*-* | pentiumpro-*-* | pentium[23]-*-*])
+ gmp_optcflags_gcc="-mcpu=pentiumpro";;
+
+ k6*-*-*) gmp_optcflags_gcc="-mcpu=k6 -march=k6";;
+
+ # Athlon instruction costs are close to p6: 3 cycle load latency, 4-6
+ # cycle mul, 40 cycle div, pairable adc, ...
+ # FIXME: Change this when gcc gets something specific for Athlon.
+ # -march=pentiumpro not used, per i686 above.
+ athlon-*-*) gmp_optcflags_gcc="-mcpu=pentiumpro";;
+ esac
+ ;;
+
+ # Sparc
+ [ultrasparc*-*-solaris2.[7-9] | sparcv9-*-solaris2.[7-9]])
+ os_64bit=yes
+ gmp_cflags_gcc="$gmp_cflags_gcc -Wa,-xarch=v8plus"
+ gmp_xoptcflags_gcc="-mcpu=v9 -mcpu=v8 -mv8"
+ gmp_cflags64_gcc="$gmp_cflags64_gcc -m64 -mptr64 -Wa,-xarch=v9 -mcpu=v9"
+ gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4"
+ gmp_cflags64_cc="-xtarget=native -xarch=v9 -xO4"
+ ;;
+ sparc64-*-linux*)
+ # Need to think more about the options passed here. This isn't good for
+ # some sparc64 linux distros, since we end up not optimizing when all the
+ # options below fail.
+ os_64bit=yes
+ gmp_cflags64_gcc="$gmp_cflags64_gcc -m64 -mptr64 -Wa,-xarch=v9 -mcpu=v9"
+ gmp_cflags_gcc="$gmp_cflags_gcc -m32"
+ gmp_xoptflags_gcc="-mcpu=ultrasparc -mvis"
+ ;;
+ ultrasparc*-*-* | sparcv9-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -Wa,-xarch=v8plus"
+ gmp_xoptcflags_gcc="-mcpu=v9 -mcpu=v8 -mv8"
+ gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4"
+ ;;
+ sparcv8*-*-solaris2.* | microsparc*-*-solaris2.*)
+ gmp_cflags_gcc="$gmp_cflags_gcc"
+ gmp_xoptcflags_gcc="-mcpu=v8 -mv8"
+ gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4"
+ ;;
+ sparcv8*-*-* | microsparc*-*-*) # SunOS, Linux, *BSD
+ cclist="gcc acc cc"
+ gmp_cflags_gcc="$gmp_cflags_gcc"
+ gmp_xoptcflags_gcc="-mcpu=v8 -mv8"
+ gmp_cflags_acc="-g -O2 -cg92"
+ gmp_cflags_cc="-O2" # FIXME: Flag for v8?
+ ;;
+ supersparc*-*-solaris2.*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -DSUPERSPARC"
+ gmp_xoptcflags_gcc="-mcpu=v8 -mv8"
+ gmp_cflags_cc="-xtarget=native -xarch=v8 -xO4 -DSUPERSPARC"
+ ;;
+ supersparc*-*-*) # SunOS, Linux, *BSD
+ cclist="gcc acc cc"
+ gmp_cflags_gcc="$gmp_cflags_gcc -DSUPERSPARC"
+ gmp_xoptcflags_gcc="-mcpu=v8 -mv8"
+ gmp_cflags_acc="-g -O2 -cg92 -DSUPERSPARC"
+ gmp_cflags_cc="-O2 -DSUPERSPARC" # FIXME: Flag for v8?
+ ;;
+ *sparc*-*-*)
+ cclist="gcc acc cc"
+ gmp_cflags_acc="-g -O2"
+ gmp_cflags_cc="-g -O2"
+ ;;
+
+ # POWER/PowerPC
+ powerpc64-*-aix*)
+ cclist="gcc xlc"
+ gmp_cflags_gcc="$gmp_cflags_gcc -maix64 -mpowerpc64"
+ gmp_cflags_xlc="-g -O2 -q64 -qtune=pwr3"
+ ;;
+ powerpc*-*-aix*)
+ cclist="gcc xlc"
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc"
+ gmp_cflags_xlc="$gmp_cflags_cc -qarch=ppc -O2"
+ ;;
+ power-*-aix*)
+ cclist="gcc xlc"
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpower"
+ gmp_cflags_xlc="$gmp_cflags_cc -qarch=pwr -O2"
+ ;;
+ powerpc64*-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc64"
+ AC_DEFINE(_LONG_LONG_LIMB) dnl FIXME: Remove.
+ ;;
+ powerpc-apple-darwin* | powerpc-apple-macosx*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc -traditional-cpp"
+ ;;
+ powerpc*-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -mpowerpc"
+ ;;
+
+ # MIPS
+ mips-sgi-irix6.*)
+ os_64bit=yes
+ gmp_cflags64_gcc="-g -O2 -mabi=n32"
+ gmp_cflags64_cc="$gmp_cflags64_cc -O2 -n32"
+ ;;
+
+ # Motorola 68k family
+ m88110*-*-*)
+ gmp_cflags_gcc="-g -O -m88110" dnl FIXME: Use `-O2'?
+ ;;
+ m68*-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -fomit-frame-pointer"
+ ;;
+
+ # HP
+ hppa1.0*-*-*)
+ cclist="gcc c89 cc"
+ gmp_cflags_c89="$gmp_cflags_cc +O2"
+ gmp_cflags_cc="$gmp_cflags_cc +O2"
+ ;;
+ hppa2.0w*-*-*)
+ cclist="c89 cc"
+ gmp_cflags_c89="+DD64 +O3"
+ gmp_cflags_cc="+DD64 +O3"
+ ;;
+ hppa2.0*-*-*)
+ os_64bit=yes
+ cclist="gcc c89 cc"
+ gmp_cflags64_gcc="$gmp_cflags64_gcc -mWHAT -D_LONG_LONG_LIMB"
+ # +O2 to cc triggers bug in mpz/powm.c (1.4)
+ gmp_cflags64_c89="+DA2.0 +e +O3 -D_LONG_LONG_LIMB"
+ gmp_cflags64_cc="+DA2.0 +e +O3 -D_LONG_LONG_LIMB"
+ gmp_cflags_c89="$gmp_cflags_cc +O2"
+ gmp_cflags_cc="$gmp_cflags_cc +O2"
+ ;;
+
+ # VAX
+ vax*-*-*)
+ gmp_cflags_gcc="$gmp_cflags_gcc -fomit-frame-pointer"
+ ;;
+
+ # Fujitsu
+ [f30[01]-fujitsu-sysv*])
+ cclist="gcc vcc"
+ gmp_cflags_vcc="-g" # FIXME: flags for vcc?
+ ;;
+esac
+
+case "${target}" in
+ *-*-mingw32) gmp_cflags_gcc="$gmp_cflags_gcc -mno-cygwin";;
+esac
+
+dnl Check for programs needed by macros for finding compiler.
+dnl More programs are checked for below, when a compiler is found.
+AC_PROG_NM dnl Macro from Libtool.
+# nm on 64-bit AIX needs to know the object file format
+case "$target" in
+ powerpc64*-*-aix*)
+ NM="$NM -X 64"
+ ;;
+esac
+
+# Save CFLAGS given on command line.
+gmp_user_CFLAGS="$CFLAGS"
+
+if test -z "$CC"; then
+ # Find compiler.
+ GMP_PROG_CC_FIND($cclist, $os_64bit)
+
+ # If 64-bit OS and we have a 64-bit compiler, use it.
+ if test -n "$os_64bit" && test -n "$CC64"; then
+ CC=$CC64
+ CFLAGS=$CFLAGS64
+ else
+ eval CFLAGS=\$gmp_cflags_$CC
+ fi
+
+ # Try compiler flags that may work with only some compiler versions.
+ # gmp_optcflags: All or nothing.
+ eval optcflags=\$gmp_optcflags_$CC
+ if test -n "$optcflags"; then
+ CFLAGS_save="$CFLAGS"
+ CFLAGS="$CFLAGS $optcflags"
+ AC_MSG_CHECKING([whether $CC accepts $optcflags])
+ AC_LANG_C
+ AC_TRY_COMPILER([int main(){return(0);}], optok, cross)
+ if test "$optok" = "yes"; then
+ AC_MSG_RESULT([yes])
+ else
+ AC_MSG_RESULT([no])
+ CFLAGS="$CFLAGS_save"
+ fi
+ fi
+ # gmp_xoptcflags: First is best, one has to work.
+ eval xoptcflags=\$gmp_xoptcflags_$CC
+ if test -n "$xoptcflags"; then
+ gmp_found="no"
+ for xopt in $xoptcflags; do
+ CFLAGS_save="$CFLAGS"
+ CFLAGS="$CFLAGS $xopt"
+ AC_MSG_CHECKING([whether $CC accepts $xopt])
+ AC_LANG_C
+ AC_TRY_COMPILER([int main(){return(0);}], optok, cross)
+ if test "$optok" = "yes"; then
+ AC_MSG_RESULT([yes])
+ gmp_found="yes"
+ break
+ else
+ AC_MSG_RESULT([no])
+ CFLAGS="$CFLAGS_save"
+ fi
+ done
+ if test "$gmp_found" = "no"; then
+ echo ["$0: fatal: need a compiler that understands one of $xoptcflags"]
+ exit 1
+ fi
+ fi
+fi
+
+# Restore CFLAGS given on command line.
+# FIXME: We've run through quite some unnecessary code looking for a
+# nice compiler and working flags for it, just to spoil that with user
+# supplied flags.
+test -n "$gmp_user_CFLAGS" && CFLAGS="$gmp_user_CFLAGS"
+
+# Select chosen compiler.
+GMP_PROG_CC_SELECT
+
+# How to assemble.
+CCAS="$CC -c"
+AC_SUBST(CCAS)
+
+dnl Checks for programs.
+dnl --------------------
+AC_PROG_CPP
+AC_PROG_INSTALL
+AC_PROG_LN_S
+GMP_PROG_M4
+AC_CHECK_PROG(AR, ar, ar)
+# ar on AIX needs to know the object file format
+case "$target" in
+ powerpc64*-*-aix*)
+ AR="$AR -X 64"
+ ;;
+esac
+dnl FIXME: Find good ld? /usr/ucb/ld on Solaris won't work.
+
+dnl Checks for assembly syntax.
+if test "$gmp_no_asm_syntax_testing" != "yes"; then
+ GMP_CHECK_ASM_TEXT
+ GMP_CHECK_ASM_DATA
+ GMP_CHECK_ASM_GLOBL
+ GMP_CHECK_ASM_LABEL_SUFFIX
+ GMP_CHECK_ASM_TYPE
+ GMP_CHECK_ASM_SIZE
+ GMP_CHECK_ASM_LSYM_PREFIX
+ GMP_CHECK_ASM_W32
+ GMP_CHECK_ASM_UNDERSCORE(underscore=yes, underscore=no)
+ GMP_CHECK_ASM_ALIGN_LOG(asm_align=log, asm_align=nolog)
+fi
+
+dnl FIXME: Check for FPU and set `floating_point' appropriately.
+
+dnl ========================================
+dnl Configuring mpn.
+dnl ----------------------------------------
+dnl Set the following target specific variables:
+dnl path where to search for source files
+dnl family processor family (Needed for building
+dnl asm-syntax.h for now. FIXME: Remove.)
+dnl extra_functions extra functions
+
+family=generic
+
+case ${target} in
+ arm*-*-*)
+ path="arm"
+ ;;
+ [sparcv9*-*-solaris2.[789]* | sparc64*-*-solaris2.[789]* | ultrasparc*-*-solaris2.[789]*])
+ if test -n "$CC64"
+ then path="sparc64"
+ else path="sparc32/v9 sparc32/v8 sparc32"
+ fi
+ ;;
+ sparc64-*-linux*)
+ if test -n "$CC64"
+ then path="sparc64"
+ else path="sparc32/v9 sparc32/v8 sparc32"
+ fi
+ ;;
+ sparcv8*-*-* | microsparc*-*-*)
+ path="sparc32/v8 sparc32"
+ if test x${floating_point} = xno
+ then extra_functions="udiv_nfp"
+ else extra_functions="udiv_fp"
+ fi
+ ;;
+ sparcv9*-*-* | ultrasparc*-*-*)
+ path="sparc32/v9 sparc32/v8 sparc32"
+ extra_functions="udiv_fp"
+ ;;
+ supersparc*-*-*)
+ path="sparc32/v8/supersparc sparc32/v8 sparc32"
+ extra_functions="udiv"
+ ;;
+ sparc*-*-*) path="sparc32"
+ if test x${floating_point} = xno
+ then extra_functions="udiv_nfp"
+ else extra_functions="udiv_fp"
+ fi
+ ;;
+ hppa7000*-*-*)
+ path="hppa/hppa1_1 hppa"
+ extra_functions="udiv_qrnnd"
+ ;;
+ hppa1.0*-*-*)
+ path="hppa"
+ extra_functions="udiv_qrnnd"
+ ;;
+ hppa2.0w-*-*)
+ path="pa64w"
+ extra_functions="umul_ppmm udiv_qrnnd"
+ ;;
+ hppa2.0*-*-*)
+ if test -n "$CC64"; then
+ path="pa64"
+ extra_functions="umul_ppmm udiv_qrnnd"
+ # We need to use the system compiler, or actually the system assembler,
+ # since GAS has not been ported to understand the 2.0 instructions.
+ CCAS="$CC64 -c"
+ else
+ # FIXME: path should be "hppa/hppa2_0 hppa/hppa1_1 hppa"
+ path="hppa/hppa1_1 hppa"
+ extra_functions="udiv_qrnnd"
+ fi
+ ;;
+ hppa*-*-*) #assume pa7100
+ path="hppa/hppa1_1/pa7100 hppa/hppa1_1 hppa"
+ extra_functions="udiv_qrnnd";;
+ [f30[01]-fujitsu-sysv*])
+ path=fujitsu;;
+ alphaev6*-*-*) path="alpha/ev6 alpha"; extra_functions="invert_limb cntlz";;
+ alphaev5*-*-*) path="alpha/ev5 alpha"; extra_functions="invert_limb cntlz";;
+ alpha*-*-*) path="alpha"; extra_functions="invert_limb cntlz";;
+ # Cray vector machines. This must come after alpha* so that we can
+ # recognize present and future vector processors with a wildcard.
+ *-cray-unicos*)
+ path="cray"
+ extra_functions="mulww";;
+ am29000*-*-*) path="a29k";;
+ a29k*-*-*) path="a29k";;
+
+ # AMD and Intel x86 configurations
+
+ [i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-*])
+ gmp_m4postinc="x86/x86-defs.m4"
+ extra_functions="udiv umul"
+ CALLING_CONVENTIONS_OBJS="x86call.o x86check.o"
+
+ GMP_CHECK_ASM_SHLDL_CL(
+ [GMP_DEFINE(WANT_SHLDL_CL,1)],
+ [GMP_DEFINE(WANT_SHLDL_CL,0)])
+ GMP_CHECK_ASM_ALIGN_FILL_0x90
+
+ # the CPUs below wanting to know about mmx
+ case ${target} in
+ [pentiummmx-*-* | pentium[23]-*-* | k6*-*-* | athlon-*-*])
+ GMP_CHECK_ASM_MMX(tmp_mmx=yes, tmp_mmx=no)
+ ;;
+ esac
+
+ # default for anything not otherwise mentioned
+ path="x86"
+
+ case ${target} in
+ [i[34]86*-*-*])
+ path="x86"
+ ;;
+ k5*-*-*)
+ # don't know what best suits k5
+ path="x86"
+ ;;
+ i586*-*-* | pentium-*-*)
+ path="x86/pentium x86"
+ ;;
+ pentiummmx-*-*)
+ path="x86/pentium x86"
+ if test "$tmp_mmx" = yes; then
+ path="x86/pentium/mmx $path"
+ fi
+ ;;
+ i686*-*-* | pentiumpro-*-*)
+ path="x86/p6 x86"
+ ;;
+ pentium2-*-*)
+ path="x86/p6 x86"
+ # The pentium/mmx lshift and rshift are good on p6 and can be used
+ # until there's something specific for p6.
+ if test "$tmp_mmx" = yes; then
+ path="x86/p6/mmx x86/pentium/mmx $path"
+ fi
+ ;;
+ pentium3-*-*)
+ path="x86/p6 x86"
+ # The pentium/mmx lshift and rshift are good on p6 and can be used
+ # until there's something specific for p6.
+ if test "$tmp_mmx" = yes; then
+ path="x86/p6/p3mmx x86/p6/mmx x86/pentium/mmx $path"
+ fi
+ ;;
+ [k6[23]*-*-*])
+ path="x86/k6 x86"
+ if test "$tmp_mmx" = yes; then
+ path="x86/k6/k62mmx x86/k6/mmx $path"
+ fi
+ ;;
+ k6*-*-*)
+ path="x86/k6 x86"
+ if test "$tmp_mmx" = yes; then
+ path="x86/k6/mmx $path"
+ fi
+ ;;
+ athlon-*-*)
+ path="x86/k7 x86"
+ if test "$tmp_mmx" = yes; then
+ path="x86/k7/mmx $path"
+ fi
+ ;;
+ esac
+ ;;
+
+
+ i960*-*-*) path="i960";;
+
+ ia64*-*-*) path="ia64";;
+
+# Motorola 68k configurations. Let m68k mean 68020-68040.
+ [m680[234]0*-*-* | m68k*-*-* | \
+ m68*-next-nextstep*]) # Nexts are at least '020
+ path="m68k/mc68020 m68k"
+ family=m68k
+ ;;
+ m68000*-*-*)
+ path="m68k"
+ family=m68k
+ ;;
+
+ m88k*-*-* | m88k*-*-*) path="m88k";;
+ m88110*-*-*) path="m88k/mc88110 m88k";;
+ ns32k*-*-*) path="ns32k";;
+
+ pyramid-*-*) path="pyr";;
+
+ ppc601-*-*) path="power powerpc32";;
+ powerpc64*-*-*) path="powerpc64";;
+ powerpc*-*-*) path="powerpc32";;
+ rs6000-*-* | power-*-* | power2-*-*)
+ path="power"
+ extra_functions="udiv_w_sdiv"
+ ;;
+
+ sh-*-*) path="sh";;
+ sh2-*-*) path="sh/sh2 sh";;
+
+ [mips[34]*-*-*]) path="mips3";;
+ mips*-*-irix6*) path="mips3";;
+ mips*-*-*) path="mips2";;
+
+ vax*-*-*) path="vax"; extra_functions="udiv_w_sdiv";;
+
+ z8000x*-*-*) path="z8000x"; extra_functions="udiv_w_sdiv";;
+ z8000*-*-*) path="z8000"; extra_functions="udiv_w_sdiv";;
+
+ clipper*-*-*) path="clipper";;
+esac
+
+AC_SUBST(CALLING_CONVENTIONS_OBJS)
+if test -n "$CALLING_CONVENTIONS_OBJS"; then
+ AC_DEFINE(HAVE_CALLING_CONVENTIONS,1,
+ [Define if mpn/tests has calling conventions checking for the CPU])
+fi
+
+
+case ${target} in
+ [i[5-8]86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-*])
+ # rdtsc is in pentium and up, not in i386 and i486
+ SPEED_CYCLECOUNTER_OBJS=pentium.lo
+ ;;
+ alpha*-*-*)
+ SPEED_CYCLECOUNTER_OBJS=alpha.lo
+ ;;
+ sparcv9*-*-* | ultrasparc*-*-* | sparc64*-*-*)
+ SPEED_CYCLECOUNTER_OBJS=sparcv9.lo
+ ;;
+ hppa2*-*-*)
+ SPEED_CYCLECOUNTER_OBJS=hppa2.lo
+ ;;
+ hppa*-*-*)
+ SPEED_CYCLECOUNTER_OBJS=hppa.lo
+ ;;
+esac
+
+AC_SUBST(SPEED_CYCLECOUNTER_OBJS)
+
+if test -n "$SPEED_CYCLECOUNTER_OBJS"
+then
+ AC_DEFINE(HAVE_SPEED_CYCLECOUNTER, 1,
+ [Define if a speed_cyclecounter exists (for the tune programs)])
+fi
+
+
+dnl Extensions for executable and object files.
+dnl -------------------------------------------
+AC_EXEEXT
+AC_OBJEXT
+
+dnl Use Libtool.
+dnl ------------
+dnl FIXME: Shared libs seem to fail on aix4.3.
+dnl FIXME: Should invoke [AC_DISABLE_SHARED], but m4 recurses to death.
+case "$target" in
+ [*-*-aix4.[3-9]*]) enable_shared=no ;;
+esac
+AC_PROG_LIBTOOL
+
+dnl Checks for libraries.
+dnl ---------------------
+AC_CHECK_DECLS((optarg))
+
+dnl Checks for header files.
+dnl ------------------------
+AC_HEADER_STDC
+AC_CHECK_HEADERS(getopt.h unistd.h sys/sysctl.h sys/time.h)
+
+dnl Checks for typedefs, structures, and compiler characteristics.
+dnl --------------------------------------------------------------
+AC_CHECK_TYPES((void))
+AC_C_STRINGIZE
+
+dnl Checks for library functions.
+dnl -----------------------------
+dnl Most of these are only for the benefit of supplementary programs. The
+dnl library itself doesn't use anything weird.
+dnl AC_FUNC_MEMCMP
+dnl AC_TYPE_SIGNAL
+dnl AC_CHECK_FUNCS(strtol)
+AC_CHECK_FUNCS(getopt_long getpagesize popen processor_info strtoul sysconf sysctlbyname)
+
+dnl Trick automake into thinking we've run AM_C_PROTOTYPES which it wants
+dnl for ansi2knr, and instead use our own test. (It's only a warning
+dnl automake prints, but it's good to suppress it.)
+ifelse(0,1,[
+AM_C_PROTOTYPES
+])
+GMP_C_ANSI2KNR
+
+
+dnl Set `syntax' to one of <blank>, "mit", "elf", "aix", "macho".
+syntax=
+# For now, we use the old switch for setting syntax.
+# FIXME: Remove when conversion to .asm is completed.
+changequote(,)dnl
+case "${target}" in
+ m680[234]0*-*-linuxaout* | m68k*-*-linuxaout* | \
+ m68k-next-nextstep* | \
+ m68000*-*-*)
+ syntax=mit
+ ;;
+ m680[234]0*-*-linux* | m68k*-*-linux*)
+ syntax=elf
+ ;;
+ m680[234]0*-*-* | m68k*-*-*)
+ syntax=mit
+ ;;
+esac
+changequote([,])dnl
+
+dnl ----------------------------------------
+# Now build an asm-syntax.h file for targets that include that from the
+# assembly files.
+# FIXME: Remove when conversion to .asm is completed.
+case "${family}-${underscore}-${asm_align}-${syntax}" in
+ m68k-yes-log-mit)
+ echo '#define MIT_SYNTAX' >asm-syntax.h
+ cat $srcdir/mpn/underscore.h >>asm-syntax.h
+ echo '#include "'$srcdir'/mpn/m68k/syntax.h"' >>asm-syntax.h;;
+ m68k-no-nolog-elf)
+ echo '#define ELF_SYNTAX' >asm-syntax.h
+ echo '#define C_SYMBOL_NAME(name) name' >>asm-syntax.h
+ echo '#include "'$srcdir'/mpn/m68k/syntax.h"' >>asm-syntax.h;;
+esac
+
+
+# The pattern here tests for an absolute path the same way as
+# _AC_OUTPUT_FILES in autoconf acgeneral.m4.
+GMP_DEFINE_RAW(["dnl CONFIG_TOP_SRCDIR is a path from the mpn builddir to the top srcdir"])
+case "$srcdir" in
+[[\\/]]* | ?:[[\\/]]* )
+ GMP_DEFINE_RAW(["define(<CONFIG_TOP_SRCDIR>,<\`$srcdir'>)"]) ;;
+*) GMP_DEFINE_RAW(["define(<CONFIG_TOP_SRCDIR>,<\`../$srcdir'>)"]) ;;
+esac
+
+GMP_DEFINE_RAW(["include(CONFIG_TOP_SRCDIR\`/mpn/asm-defs.m4')"], POST)
+
+# Must be after asm-defs.m4
+GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_TARGET_CPU_$target_cpu')", POST)
+
+
+dnl config.m4 post-includes
+dnl -----------------------
+dnl (Note x86 post include set with $path above.)
+changequote(,)dnl
+case "$target" in
+ alpha*-cray-unicos*)
+ gmp_m4postinc="alpha/unicos.m4"
+ ;;
+ alpha*-*-*)
+ gmp_m4postinc="alpha/default.m4"
+ ;;
+ power*-*-*)
+ case "$target" in
+ *-*-mach* | *-*-rhapsody* | *-*-nextstep* | *-*-darwin* | *-*-macosx*)
+ ;; # these use non-conventional assembly syntax.
+ powerpc64-*-aix*)
+ gmp_m4postinc="powerpc32/regmap.m4 powerpc64/aix.m4"
+ ;;
+ *-*-aix*)
+ gmp_m4postinc="powerpc32/regmap.m4 powerpc32/aix.m4"
+ ;;
+ *)
+ gmp_m4postinc="powerpc32/regmap.m4"
+ ;;
+ esac
+ ;;
+esac
+changequote([, ])dnl
+
+for tmp_f in $gmp_m4postinc; do
+ GMP_DEFINE_RAW(["include_mpn(\`$tmp_f')"], POST)
+done
+
+
+# Set up `gmp_links'. It's a list of link:file pairs that configure will
+# process to create link -> file.
+gmp_links=
+
+# If the user specified `MPN_PATH', use that instead of the path we've
+# come up with.
+if test -z "$MPN_PATH"; then
+ path="$path generic"
+else
+ path="$MPN_PATH"
+fi
+
+# Pick the correct source files in $path and link them to mpn/.
+# $gmp_mpn_functions lists all functions we need.
+#
+# The rule is to find a file with the function name and a .asm, .S,
+# .s, or .c extension. Certain multi-function files with special names
+# can provide some functions too. (mpn/Makefile.am passes
+# -DOPERATION_<func> to get them to generate the right code.)
+
+# FIXME: udiv and umul aren't in $gmp_mpn_functions_optional yet since
+# there's some versions of those files which should be checked for bit
+# rot first. Put them in $extra_functions for each target for now,
+# change to standard optionals when all are ready.
+
+# Note: The following lines defining $gmp_mpn_functions_optional
+# and $gmp_mpn_functions are parsed by the "macos/configure"
+# Perl script. So if you change the lines in a major way
+# make sure to run and examine the output from
+#
+# % (cd macos; perl configure)
+
+gmp_mpn_functions_optional="copyi copyd com_n \
+ and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n"
+
+gmp_mpn_functions="${extra_functions} inlines add_n sub_n mul_1 addmul_1 \
+ submul_1 lshift rshift diveby3 divrem divrem_1 divrem_2 \
+ mod_1 mod_1_rs pre_mod_1 dump \
+ mul mul_fft mul_n mul_basecase sqr_basecase random \
+ random2 sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp perfsqr \
+ bdivmod gcd_1 gcd gcdext tdiv_qr bz_divrem_n sb_divrem_mn jacbase \
+ $gmp_mpn_functions_optional"
+
+# the list of all object files used by mpn/Makefile.in and the
+# top-level Makefile.in, respectively
+mpn_objects=
+mpn_objs_in_libgmp="mpn/mp_bases.lo"
+
+for tmp_fn in ${gmp_mpn_functions} ; do
+ [rm -f mpn/${tmp_fn}.[Ssc] mpn/${tmp_fn}.asm]
+
+ # functions that can be provided by multi-function files
+ tmp_mulfunc=
+ case $tmp_fn in
+ add_n|sub_n) tmp_mulfunc="aors_n" ;;
+ addmul_1|submul_1) tmp_mulfunc="aorsmul_1" ;;
+ popcount|hamdist) tmp_mulfunc="popham" ;;
+ and_n|andn_n|nand_n | ior_n|iorn_n|nior_n | xor_n|xnor_n)
+ tmp_mulfunc="logops_n" ;;
+ esac
+
+ found=no
+ for tmp_dir in $path; do
+ for tmp_base in $tmp_fn $tmp_mulfunc; do
+ for tmp_ext in asm S s c; do
+ tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext
+ if test -f $tmp_file; then
+ found=yes
+
+ mpn_objects="$mpn_objects ${tmp_fn}.lo"
+ mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/${tmp_fn}.lo"
+ gmp_links="$gmp_links mpn/$tmp_fn.$tmp_ext:mpn/$tmp_dir/$tmp_base.$tmp_ext"
+
+ # duplicate AC_DEFINEs are harmless, so it doesn't matter
+ # that multi-function files get grepped here repeatedly
+ gmp_ep=["`
+ sed -n 's/^[ ]*MULFUNC_PROLOGUE(\(.*\))/\1/p' $tmp_file ;
+ sed -n 's/^[ ]*PROLOGUE.*(\(.*\))/\1/p' $tmp_file
+ `"]
+ for gmp_tmp in $gmp_ep; do
+ AC_DEFINE_UNQUOTED(HAVE_NATIVE_${gmp_tmp})
+ done
+
+ break
+ fi
+ done
+ if test $found = yes; then break ; fi
+ done
+ if test $found = yes; then break ; fi
+ done
+
+ if test $found = no; then
+ for tmp_optional in $gmp_mpn_functions_optional; do
+ if test $tmp_optional = $tmp_fn; then
+ found=yes
+ fi
+ done
+ if test $found = no; then
+ AC_MSG_ERROR([no version of $tmp_fn found in path: $path])
+ fi
+ fi
+done
+
+# Create link for gmp-mparam.h.
+for tmp_dir in $path ; do
+ rm -f gmp-mparam.h
+ if test -f $srcdir/mpn/${tmp_dir}/gmp-mparam.h ; then
+ gmp_links="$gmp_links gmp-mparam.h:mpn/${tmp_dir}/gmp-mparam.h"
+
+ # Copy any KARATSUBA_SQR_THRESHOLD in gmp-mparam.h to config.m4.
+ # Some versions of sqr_basecase.asm use this.
+ tmp_gmp_karatsuba_sqr_threshold="`sed -n 's/^#define KARATSUBA_SQR_THRESHOLD[ ]*\([0-9][0-9]*\).*$/\1/p' $srcdir/mpn/${tmp_dir}/gmp-mparam.h`"
+ if test -n "$tmp_gmp_karatsuba_sqr_threshold"; then
+ GMP_DEFINE_RAW(["define(<KARATSUBA_SQR_THRESHOLD>,<$tmp_gmp_karatsuba_sqr_threshold>)"])
+ fi
+
+ break
+ fi
+done
+
+# Dig out the links from `gmp_links' for inclusion in DISTCLEANFILES.
+gmp_srclinks=
+for f in $gmp_links; do
+ gmp_srclinks="$gmp_srclinks `echo $f | sed 's/\(.*\):.*/\1/'`"
+done
+
+AC_SUBST(mpn_objects)
+AC_SUBST(mpn_objs_in_libgmp)
+AC_SUBST(gmp_srclinks)
+
+dnl ----------------------------------------
+dnl Make links.
+AC_CONFIG_LINKS($gmp_links)
+
+dnl Create config.m4.
+GMP_FINISH
+
+dnl Create Makefiles
+dnl FIXME: Upcoming version of autoconf/automake may not like broken lines.
+AC_OUTPUT(Makefile mpz/Makefile mpn/Makefile)
diff --git a/rts/gmp/depcomp b/rts/gmp/depcomp
new file mode 100644
index 0000000000..7906096738
--- /dev/null
+++ b/rts/gmp/depcomp
@@ -0,0 +1,269 @@
+#! /bin/sh
+
+# depcomp - compile a program generating dependencies as side-effects
+# Copyright (C) 1999 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
+
+if test -z "$depmode" || test -z "$source" || test -z "$object"; then
+ echo "depcomp: Variables source, object and depmode must be set" 1>&2
+ exit 1
+fi
+# `libtool' can also be set to `yes' or `no'.
+
+depfile=${depfile-`echo "$object" | sed 's,\([^/]*\)$,.deps/\1,;s/\.\([^.]*\)$/.P\1/'`}
+tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
+
+rm -f "$tmpdepfile"
+
+# Some modes work just like other modes, but use different flags. We
+# parameterize here, but still list the modes in the big case below,
+# to make depend.m4 easier to write. Note that we *cannot* use a case
+# here, because this file can only contain one case statement.
+if test "$depmode" = hp; then
+ # HP compiler uses -M and no extra arg.
+ gccflag=-M
+ depmode=gcc
+fi
+
+if test "$depmode" = dashXmstdout; then
+ # This is just like dashmstdout with a different argument.
+ dashmflag=-xM
+ depmode=dashmstdout
+fi
+
+case "$depmode" in
+gcc)
+## There are various ways to get dependency output from gcc. Here's
+## why we pick this rather obscure method:
+## - Don't want to use -MD because we'd like the dependencies to end
+## up in a subdir. Having to rename by hand is ugly.
+## (We might end up doing this anyway to support other compilers.)
+## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
+## -MM, not -M (despite what the docs say).
+## - Using -M directly means running the compiler twice (even worse
+## than renaming).
+ if test -z "$gccflag"; then
+ gccflag=-MD,
+ fi
+ if "$@" -Wp,"$gccflag$tmpdepfile"; then :
+ else
+ stat=$?
+ rm -f "$tmpdepfile"
+ exit $stat
+ fi
+ rm -f "$depfile"
+ echo "$object : \\" > "$depfile"
+ sed 's/^[^:]*: / /' < "$tmpdepfile" >> "$depfile"
+## This next piece of magic avoids the `deleted header file' problem.
+## The problem is that when a header file which appears in a .P file
+## is deleted, the dependency causes make to die (because there is
+## typically no way to rebuild the header). We avoid this by adding
+## dummy dependencies for each header file. Too bad gcc doesn't do
+## this for us directly.
+ tr ' ' '
+' < "$tmpdepfile" |
+## Some versions of gcc put a space before the `:'. On the theory
+## that the space means something, we add a space to the output as
+## well.
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly. Breaking it into two sed invocations is a workaround.
+ sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+hp)
+ # This case exists only to let depend.m4 do its work. It works by
+ # looking at the text of this script. This case will never be run,
+ # since it is checked for above.
+ exit 1
+ ;;
+
+dashmd)
+ # The Java front end to gcc doesn't run cpp, so we can't use the -Wp
+ # trick. Instead we must use -M and then rename the resulting .d
+ # file. This is also the case for older versions of gcc, which
+ # don't implement -Wp.
+ if "$@" -MD; then :
+ else
+ stat=$?
+ rm -f FIXME
+ exit $stat
+ fi
+ FIXME: rewrite the file
+ ;;
+
+sgi)
+ if test "$libtool" = yes; then
+ "$@" "-Wc,-MDupdate,$tmpdepfile"
+ else
+ "$@" -MDupdate "$tmpdepfile"
+ fi
+ stat=$?
+ if test $stat -eq 0; then :
+ else
+ stat=$?
+ rm -f "$tmpdepfile"
+ exit $stat
+ fi
+ rm -f "$depfile"
+ echo "$object : \\" > "$depfile"
+ sed 's/^[^:]*: / /' < "$tmpdepfile" >> "$depfile"
+ tr ' ' '
+' < "$tmpdepfile" | \
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly. Breaking it into two sed invocations is a workaround.
+ sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+#nosideeffect)
+ # This comment above is used by automake to tell side-effect
+ # dependency tracking mechanisms from slower ones.
+
+dashmstdout)
+ # Important note: in order to support this mode, a compiler *must*
+ # always write the proprocessed file to stdout, regardless of -o,
+ # because we must use -o when running libtool.
+ test -z "$dashmflag" && dashmflag=-M
+ ( IFS=" "
+ case " $* " in
+ *" --mode=compile "*) # this is libtool, let us make it quiet
+ for arg
+ do # cycle over the arguments
+ case "$arg" in
+ "--mode=compile")
+ # insert --quiet before "--mode=compile"
+ set fnord "$@" --quiet
+ shift # fnord
+ ;;
+ esac
+ set fnord "$@" "$arg"
+ shift # fnord
+ shift # "$arg"
+ done
+ ;;
+ esac
+ "$@" $dashmflag | sed 's:^[^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile"
+ ) &
+ proc=$!
+ "$@"
+ stat=$?
+ wait "$proc"
+ if test "$stat" != 0; then exit $stat; fi
+ rm -f "$depfile"
+ cat < "$tmpdepfile" > "$depfile"
+ tr ' ' '
+' < "$tmpdepfile" | \
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly. Breaking it into two sed invocations is a workaround.
+ sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+dashXmstdout)
+ # This case only exists to satisfy depend.m4. It is never actually
+ # run, as this mode is specially recognized in the preamble.
+ exit 1
+ ;;
+
+makedepend)
+ # X makedepend
+ (
+ shift
+ cleared=no
+ for arg in "$@"; do
+ case $cleared in no)
+ set ""; shift
+ cleared=yes
+ esac
+ case "$arg" in
+ -D*|-I*)
+ set fnord "$@" "$arg"; shift;;
+ -*)
+ ;;
+ *)
+ set fnord "$@" "$arg"; shift;;
+ esac
+ done
+ obj_suffix="`echo $object | sed 's/^.*\././'`"
+ touch "$tmpdepfile"
+ ${MAKEDEPEND-makedepend} 2>/dev/null -o"$obj_suffix" -f"$tmpdepfile" "$@"
+ ) &
+ proc=$!
+ "$@"
+ stat=$?
+ wait "$proc"
+ if test "$stat" != 0; then exit $stat; fi
+ rm -f "$depfile"
+ cat < "$tmpdepfile" > "$depfile"
+ tail +3 "$tmpdepfile" | tr ' ' '
+' | \
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly. Breaking it into two sed invocations is a workaround.
+ sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile" "$tmpdepfile".bak
+ ;;
+
+cpp)
+ # Important note: in order to support this mode, a compiler *must*
+ # always write the proprocessed file to stdout, regardless of -o,
+ # because we must use -o when running libtool.
+ ( IFS=" "
+ case " $* " in
+ *" --mode=compile "*)
+ for arg
+ do # cycle over the arguments
+ case "$arg" in
+ "--mode=compile")
+ # insert --quiet before "--mode=compile"
+ set fnord "$@" --quiet
+ shift # fnord
+ ;;
+ esac
+ set fnord "$@" "$arg"
+ shift # fnord
+ shift # "$arg"
+ done
+ ;;
+ esac
+ "$@" -E |
+ sed -n '/^# [0-9][0-9]* "\([^"]*\)"/ s::'"$object"'\: \1:p' > "$tmpdepfile"
+ ) &
+ proc=$!
+ "$@"
+ stat=$?
+ wait "$proc"
+ if test "$stat" != 0; then exit $stat; fi
+ rm -f "$depfile"
+ cat < "$tmpdepfile" > "$depfile"
+ sed < "$tmpdepfile" -e 's/^[^:]*: //' -e 's/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+none)
+ exec "$@"
+ ;;
+
+*)
+ echo "Unknown depmode $depmode" 1>&2
+ exit 1
+ ;;
+esac
+
+exit 0
diff --git a/rts/gmp/errno.c b/rts/gmp/errno.c
new file mode 100644
index 0000000000..7dd223c19c
--- /dev/null
+++ b/rts/gmp/errno.c
@@ -0,0 +1,26 @@
+/* gmp_errno -- The largest and most complex file in GMP.
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int gmp_errno = 0;
diff --git a/rts/gmp/extract-dbl.c b/rts/gmp/extract-dbl.c
new file mode 100644
index 0000000000..2d70d9a3b2
--- /dev/null
+++ b/rts/gmp/extract-dbl.c
@@ -0,0 +1,187 @@
+/* __gmp_extract_double -- convert from double to array of mp_limb_t.
+
+Copyright (C) 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifdef XDEBUG
+#undef _GMP_IEEE_FLOATS
+#endif
+
+#ifndef _GMP_IEEE_FLOATS
+#define _GMP_IEEE_FLOATS 0
+#endif
+
+/* Extract a non-negative double in d. */
+
+int
+#if __STDC__
+__gmp_extract_double (mp_ptr rp, double d)
+#else
+__gmp_extract_double (rp, d)
+ mp_ptr rp;
+ double d;
+#endif
+{
+ long exp;
+ unsigned sc;
+ mp_limb_t manh, manl;
+
+ /* BUGS
+
+ 1. Should handle Inf and NaN in IEEE specific code.
+ 2. Handle Inf and NaN also in default code, to avoid hangs.
+ 3. Generalize to handle all BITS_PER_MP_LIMB >= 32.
+ 4. This lits is incomplete and misspelled.
+ */
+
+ if (d == 0.0)
+ {
+ rp[0] = 0;
+ rp[1] = 0;
+#if BITS_PER_MP_LIMB == 32
+ rp[2] = 0;
+#endif
+ return 0;
+ }
+
+#if _GMP_IEEE_FLOATS
+ {
+#if defined (__alpha) && __GNUC__ == 2 && __GNUC_MINOR__ == 8
+ /* Work around alpha-specific bug in GCC 2.8.x. */
+ volatile
+#endif
+ union ieee_double_extract x;
+ x.d = d;
+ exp = x.s.exp;
+#if BITS_PER_MP_LIMB == 64
+ manl = (((mp_limb_t) 1 << 63)
+ | ((mp_limb_t) x.s.manh << 43) | ((mp_limb_t) x.s.manl << 11));
+ if (exp == 0)
+ {
+ /* Denormalized number. Don't try to be clever about this,
+ since it is not an important case to make fast. */
+ exp = 1;
+ do
+ {
+ manl = manl << 1;
+ exp--;
+ }
+ while ((mp_limb_signed_t) manl >= 0);
+ }
+#else
+ manh = ((mp_limb_t) 1 << 31) | (x.s.manh << 11) | (x.s.manl >> 21);
+ manl = x.s.manl << 11;
+ if (exp == 0)
+ {
+ /* Denormalized number. Don't try to be clever about this,
+ since it is not an important case to make fast. */
+ exp = 1;
+ do
+ {
+ manh = (manh << 1) | (manl >> 31);
+ manl = manl << 1;
+ exp--;
+ }
+ while ((mp_limb_signed_t) manh >= 0);
+ }
+#endif
+ exp -= 1022; /* Remove IEEE bias. */
+ }
+#else
+ {
+ /* Unknown (or known to be non-IEEE) double format. */
+ exp = 0;
+ if (d >= 1.0)
+ {
+ if (d * 0.5 == d)
+ abort ();
+
+ while (d >= 32768.0)
+ {
+ d *= (1.0 / 65536.0);
+ exp += 16;
+ }
+ while (d >= 1.0)
+ {
+ d *= 0.5;
+ exp += 1;
+ }
+ }
+ else if (d < 0.5)
+ {
+ while (d < (1.0 / 65536.0))
+ {
+ d *= 65536.0;
+ exp -= 16;
+ }
+ while (d < 0.5)
+ {
+ d *= 2.0;
+ exp -= 1;
+ }
+ }
+
+ d *= MP_BASE_AS_DOUBLE;
+#if BITS_PER_MP_LIMB == 64
+ manl = d;
+#else
+ manh = d;
+ manl = (d - manh) * MP_BASE_AS_DOUBLE;
+#endif
+ }
+#endif
+
+ sc = (unsigned) exp % BITS_PER_MP_LIMB;
+
+ /* We add something here to get rounding right. */
+ exp = (exp + 2048) / BITS_PER_MP_LIMB - 2048 / BITS_PER_MP_LIMB + 1;
+
+#if BITS_PER_MP_LIMB == 64
+ if (sc != 0)
+ {
+ rp[1] = manl >> (BITS_PER_MP_LIMB - sc);
+ rp[0] = manl << sc;
+ }
+ else
+ {
+ rp[1] = manl;
+ rp[0] = 0;
+ exp--;
+ }
+#else
+ if (sc != 0)
+ {
+ rp[2] = manh >> (BITS_PER_MP_LIMB - sc);
+ rp[1] = (manl >> (BITS_PER_MP_LIMB - sc)) | (manh << sc);
+ rp[0] = manl << sc;
+ }
+ else
+ {
+ rp[2] = manh;
+ rp[1] = manl;
+ rp[0] = 0;
+ exp--;
+ }
+#endif
+
+ return exp;
+}
diff --git a/rts/gmp/gmp-impl.h b/rts/gmp/gmp-impl.h
new file mode 100644
index 0000000000..3c7ac26e7d
--- /dev/null
+++ b/rts/gmp/gmp-impl.h
@@ -0,0 +1,1072 @@
+/* Include file for internal GNU MP types and definitions.
+
+ THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
+ BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "config.h"
+#include "gmp-mparam.h"
+/* #include "longlong.h" */
+
+/* When using gcc, make sure to use its builtin alloca. */
+#if ! defined (alloca) && defined (__GNUC__)
+#define alloca __builtin_alloca
+#define HAVE_ALLOCA 1
+#endif
+
+/* When using cc, do whatever necessary to allow use of alloca. For many
+ machines, this means including alloca.h. IBM's compilers need a #pragma
+ in "each module that needs to use alloca". */
+#if ! defined (alloca)
+/* We need lots of variants for MIPS, to cover all versions and perversions
+ of OSes for MIPS. */
+#if defined (__mips) || defined (MIPSEL) || defined (MIPSEB) \
+ || defined (_MIPSEL) || defined (_MIPSEB) || defined (__sgi) \
+ || defined (__alpha) || defined (__sparc) || defined (sparc) \
+ || defined (__ksr__)
+#include <alloca.h>
+#define HAVE_ALLOCA
+#endif
+#if defined (_IBMR2)
+#pragma alloca
+#define HAVE_ALLOCA
+#endif
+#if defined (__DECC)
+#define alloca(x) __ALLOCA(x)
+#define HAVE_ALLOCA
+#endif
+#endif
+
+#if defined (alloca)
+# ifndef HAVE_ALLOCA
+#define HAVE_ALLOCA
+# endif
+#endif
+
+#if ! defined (HAVE_ALLOCA) || USE_STACK_ALLOC
+#include "stack-alloc.h"
+#else
+#define TMP_DECL(m)
+#define TMP_ALLOC(x) alloca(x)
+#define TMP_MARK(m)
+#define TMP_FREE(m)
+#endif
+
+/* Allocating various types. */
+#define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type)))
+#define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t)
+#define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr)
+
+
+#if ! defined (__GNUC__) /* FIXME: Test for C++ compilers here,
+ __DECC understands __inline */
+#define inline /* Empty */
+#endif
+
+#define ABS(x) (x >= 0 ? x : -x)
+#define MIN(l,o) ((l) < (o) ? (l) : (o))
+#define MAX(h,i) ((h) > (i) ? (h) : (i))
+#define numberof(x) (sizeof (x) / sizeof ((x)[0]))
+
+/* Field access macros. */
+#define SIZ(x) ((x)->_mp_size)
+#define ABSIZ(x) ABS (SIZ (x))
+#define PTR(x) ((x)->_mp_d)
+#define LIMBS(x) ((x)->_mp_d)
+#define EXP(x) ((x)->_mp_exp)
+#define PREC(x) ((x)->_mp_prec)
+#define ALLOC(x) ((x)->_mp_alloc)
+
+/* Extra casts because shorts are promoted to ints by "~" and "<<". "-1"
+ rather than "1" in SIGNED_TYPE_MIN avoids warnings from some compilers
+ about arithmetic overflow. */
+#define UNSIGNED_TYPE_MAX(type) ((type) ~ (type) 0)
+#define UNSIGNED_TYPE_HIGHBIT(type) ((type) ~ (UNSIGNED_TYPE_MAX(type) >> 1))
+#define SIGNED_TYPE_MIN(type) (((type) -1) << (8*sizeof(type)-1))
+#define SIGNED_TYPE_MAX(type) ((type) ~ SIGNED_TYPE_MIN(type))
+#define SIGNED_TYPE_HIGHBIT(type) SIGNED_TYPE_MIN(type)
+
+#define MP_LIMB_T_MAX UNSIGNED_TYPE_MAX (mp_limb_t)
+#define MP_LIMB_T_HIGHBIT UNSIGNED_TYPE_HIGHBIT (mp_limb_t)
+
+#define MP_SIZE_T_MAX SIGNED_TYPE_MAX (mp_size_t)
+
+#ifndef ULONG_MAX
+#define ULONG_MAX UNSIGNED_TYPE_MAX (unsigned long)
+#endif
+#define ULONG_HIGHBIT UNSIGNED_TYPE_HIGHBIT (unsigned long)
+#define LONG_HIGHBIT SIGNED_TYPE_HIGHBIT (long)
+#ifndef LONG_MAX
+#define LONG_MAX SIGNED_TYPE_MAX (long)
+#endif
+
+#ifndef USHORT_MAX
+#define USHORT_MAX UNSIGNED_TYPE_MAX (unsigned short)
+#endif
+#define USHORT_HIGHBIT UNSIGNED_TYPE_HIGHBIT (unsigned short)
+#define SHORT_HIGHBIT SIGNED_TYPE_HIGHBIT (short)
+#ifndef SHORT_MAX
+#define SHORT_MAX SIGNED_TYPE_MAX (short)
+#endif
+
+
+/* Swap macros. */
+
+#define MP_LIMB_T_SWAP(x, y) \
+ do { \
+ mp_limb_t __mp_limb_t_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mp_limb_t_swap__tmp; \
+ } while (0)
+#define MP_SIZE_T_SWAP(x, y) \
+ do { \
+ mp_size_t __mp_size_t_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mp_size_t_swap__tmp; \
+ } while (0)
+
+#define MP_PTR_SWAP(x, y) \
+ do { \
+ mp_ptr __mp_ptr_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mp_ptr_swap__tmp; \
+ } while (0)
+#define MP_SRCPTR_SWAP(x, y) \
+ do { \
+ mp_srcptr __mp_srcptr_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mp_srcptr_swap__tmp; \
+ } while (0)
+
+#define MPN_PTR_SWAP(xp,xs, yp,ys) \
+ do { \
+ MP_PTR_SWAP (xp, yp); \
+ MP_SIZE_T_SWAP (xs, ys); \
+ } while(0)
+#define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
+ do { \
+ MP_SRCPTR_SWAP (xp, yp); \
+ MP_SIZE_T_SWAP (xs, ys); \
+ } while(0)
+
+#define MPZ_PTR_SWAP(x, y) \
+ do { \
+ mpz_ptr __mpz_ptr_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mpz_ptr_swap__tmp; \
+ } while (0)
+#define MPZ_SRCPTR_SWAP(x, y) \
+ do { \
+ mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mpz_srcptr_swap__tmp; \
+ } while (0)
+
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/* FIXME: These are purely internal, so do a search and replace to change
+ them to __gmp forms, rather than using these macros. */
+#define _mp_allocate_func __gmp_allocate_func
+#define _mp_reallocate_func __gmp_reallocate_func
+#define _mp_free_func __gmp_free_func
+#define _mp_default_allocate __gmp_default_allocate
+#define _mp_default_reallocate __gmp_default_reallocate
+#define _mp_default_free __gmp_default_free
+
+extern void * (*_mp_allocate_func) _PROTO ((size_t));
+extern void * (*_mp_reallocate_func) _PROTO ((void *, size_t, size_t));
+extern void (*_mp_free_func) _PROTO ((void *, size_t));
+
+void *_mp_default_allocate _PROTO ((size_t));
+void *_mp_default_reallocate _PROTO ((void *, size_t, size_t));
+void _mp_default_free _PROTO ((void *, size_t));
+
+#define _MP_ALLOCATE_FUNC_TYPE(n,type) \
+ ((type *) (*_mp_allocate_func) ((n) * sizeof (type)))
+#define _MP_ALLOCATE_FUNC_LIMBS(n) _MP_ALLOCATE_FUNC_TYPE(n,mp_limb_t)
+
+#define _MP_FREE_FUNC_TYPE(p,n,type) (*_mp_free_func) (p, (n) * sizeof (type))
+#define _MP_FREE_FUNC_LIMBS(p,n) _MP_FREE_FUNC_TYPE(p,n,mp_limb_t)
+
+
+#if (__STDC__-0) || defined (__cplusplus)
+
+#else
+
+#define const /* Empty */
+#define signed /* Empty */
+
+#endif
+
+#if defined (__GNUC__) && defined (__i386__)
+#if 0 /* check that these actually improve things */
+#define MPN_COPY_INCR(DST, SRC, N) \
+ __asm__ ("cld\n\trep\n\tmovsl" : : \
+ "D" (DST), "S" (SRC), "c" (N) : \
+ "cx", "di", "si", "memory")
+#define MPN_COPY_DECR(DST, SRC, N) \
+ __asm__ ("std\n\trep\n\tmovsl" : : \
+ "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \
+ "cx", "di", "si", "memory")
+#define MPN_NORMALIZE_NOT_ZERO(P, N) \
+ do { \
+ __asm__ ("std\n\trepe\n\tscasl" : "=c" (N) : \
+ "a" (0), "D" ((P) + (N) - 1), "0" (N) : \
+ "cx", "di"); \
+ (N)++; \
+ } while (0)
+#endif
+#endif
+
+#if HAVE_NATIVE_mpn_copyi
+#define mpn_copyi __MPN(copyi)
+void mpn_copyi _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#endif
+
+/* Remap names of internal mpn functions. */
+#define __clz_tab __MPN(clz_tab)
+#define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv)
+#define mpn_reciprocal __MPN(reciprocal)
+
+#define mpn_sb_divrem_mn __MPN(sb_divrem_mn)
+#define mpn_bz_divrem_n __MPN(bz_divrem_n)
+/* #define mpn_tdiv_q __MPN(tdiv_q) */
+
+#define mpn_kara_mul_n __MPN(kara_mul_n)
+void mpn_kara_mul_n _PROTO((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
+
+#define mpn_kara_sqr_n __MPN(kara_sqr_n)
+void mpn_kara_sqr_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+
+#define mpn_toom3_mul_n __MPN(toom3_mul_n)
+void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr));
+
+#define mpn_toom3_sqr_n __MPN(toom3_sqr_n)
+void mpn_toom3_sqr_n _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+
+#define mpn_fft_best_k __MPN(fft_best_k)
+int mpn_fft_best_k _PROTO ((mp_size_t n, int sqr));
+
+#define mpn_mul_fft __MPN(mul_fft)
+void mpn_mul_fft _PROTO ((mp_ptr op, mp_size_t pl,
+ mp_srcptr n, mp_size_t nl,
+ mp_srcptr m, mp_size_t ml,
+ int k));
+
+#define mpn_mul_fft_full __MPN(mul_fft_full)
+void mpn_mul_fft_full _PROTO ((mp_ptr op,
+ mp_srcptr n, mp_size_t nl,
+ mp_srcptr m, mp_size_t ml));
+
+#define mpn_fft_next_size __MPN(fft_next_size)
+mp_size_t mpn_fft_next_size _PROTO ((mp_size_t pl, int k));
+
+mp_limb_t mpn_sb_divrem_mn _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
+mp_limb_t mpn_bz_divrem_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
+/* void mpn_tdiv_q _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); */
+
+/* Copy NLIMBS *limbs* from SRC to DST, NLIMBS==0 allowed. */
+#ifndef MPN_COPY_INCR
+#if HAVE_NATIVE_mpn_copyi
+#define MPN_COPY_INCR(DST, SRC, NLIMBS) mpn_copyi (DST, SRC, NLIMBS)
+#else
+#define MPN_COPY_INCR(DST, SRC, NLIMBS) \
+ do { \
+ mp_size_t __i; \
+ for (__i = 0; __i < (NLIMBS); __i++) \
+ (DST)[__i] = (SRC)[__i]; \
+ } while (0)
+#endif
+#endif
+
+#if HAVE_NATIVE_mpn_copyd
+#define mpn_copyd __MPN(copyd)
+void mpn_copyd _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#endif
+
+/* NLIMBS==0 allowed */
+#ifndef MPN_COPY_DECR
+#if HAVE_NATIVE_mpn_copyd
+#define MPN_COPY_DECR(DST, SRC, NLIMBS) mpn_copyd (DST, SRC, NLIMBS)
+#else
+#define MPN_COPY_DECR(DST, SRC, NLIMBS) \
+ do { \
+ mp_size_t __i; \
+ for (__i = (NLIMBS) - 1; __i >= 0; __i--) \
+ (DST)[__i] = (SRC)[__i]; \
+ } while (0)
+#endif
+#endif
+
+/* Define MPN_COPY for vector computers. Since #pragma cannot be in a macro,
+ rely on function inlining. */
+#if defined (_CRAY) || defined (__uxp__)
+static inline void
+_MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n;
+{
+ int i; /* Faster for Cray with plain int */
+#pragma _CRI ivdep /* Cray PVP systems */
+#pragma loop noalias d,s /* Fujitsu VPP systems */
+ for (i = 0; i < n; i++)
+ d[i] = s[i];
+}
+#define MPN_COPY _MPN_COPY
+#endif
+
+#ifndef MPN_COPY
+#define MPN_COPY MPN_COPY_INCR
+#endif
+
+/* Zero NLIMBS *limbs* AT DST. */
+#ifndef MPN_ZERO
+#define MPN_ZERO(DST, NLIMBS) \
+ do { \
+ mp_size_t __i; \
+ for (__i = 0; __i < (NLIMBS); __i++) \
+ (DST)[__i] = 0; \
+ } while (0)
+#endif
+
+#ifndef MPN_NORMALIZE
+#define MPN_NORMALIZE(DST, NLIMBS) \
+ do { \
+ while (NLIMBS > 0) \
+ { \
+ if ((DST)[(NLIMBS) - 1] != 0) \
+ break; \
+ NLIMBS--; \
+ } \
+ } while (0)
+#endif
+#ifndef MPN_NORMALIZE_NOT_ZERO
+#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
+ do { \
+ while (1) \
+ { \
+ if ((DST)[(NLIMBS) - 1] != 0) \
+ break; \
+ NLIMBS--; \
+ } \
+ } while (0)
+#endif
+
+/* Strip least significant zero limbs from ptr,size by incrementing ptr and
+ decrementing size. The number in ptr,size must be non-zero, ie. size!=0
+ and somewhere a non-zero limb. */
+#define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size) \
+ do \
+ { \
+ ASSERT ((size) != 0); \
+ while ((ptr)[0] == 0) \
+ { \
+ (ptr)++; \
+ (size)--; \
+ ASSERT (size >= 0); \
+ } \
+ } \
+ while (0)
+
+/* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a
+ temporary variable; it will be automatically cleared out at function
+ return. We use __x here to make it possible to accept both mpz_ptr and
+ mpz_t arguments. */
+#define MPZ_TMP_INIT(X, NLIMBS) \
+ do { \
+ mpz_ptr __x = (X); \
+ __x->_mp_alloc = (NLIMBS); \
+ __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB); \
+ } while (0)
+
+/* Realloc for an mpz_t WHAT if it has less thann NEEDED limbs. */
+#define MPZ_REALLOC(what,needed) \
+ do { \
+ if ((needed) > ALLOC (what)) \
+ _mpz_realloc (what, needed); \
+ } while (0)
+
+/* If KARATSUBA_MUL_THRESHOLD is not already defined, define it to a
+ value which is good on most machines. */
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 32
+#endif
+
+/* If TOOM3_MUL_THRESHOLD is not already defined, define it to a
+ value which is good on most machines. */
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 256
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD (2*KARATSUBA_MUL_THRESHOLD)
+#endif
+
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD (2*TOOM3_MUL_THRESHOLD)
+#endif
+
+/* First k to use for an FFT modF multiply. A modF FFT is an order
+ log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
+ whereas k=4 is 1.33 which is faster than toom3 at 1.485. */
+#define FFT_FIRST_K 4
+
+/* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
+#ifndef FFT_MODF_MUL_THRESHOLD
+#define FFT_MODF_MUL_THRESHOLD (TOOM3_MUL_THRESHOLD * 3)
+#endif
+#ifndef FFT_MODF_SQR_THRESHOLD
+#define FFT_MODF_SQR_THRESHOLD (TOOM3_SQR_THRESHOLD * 3)
+#endif
+
+/* Threshold at which FFT should be used to do an NxN -> 2N multiply. This
+ will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
+ NxN->2N multiply and not recursing into itself is an order
+ log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
+ is the first better than toom3. */
+#ifndef FFT_MUL_THRESHOLD
+#define FFT_MUL_THRESHOLD (FFT_MODF_MUL_THRESHOLD * 10)
+#endif
+#ifndef FFT_SQR_THRESHOLD
+#define FFT_SQR_THRESHOLD (FFT_MODF_SQR_THRESHOLD * 10)
+#endif
+
+/* Table of thresholds for successive modF FFT "k"s. The first entry is
+ where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
+ etc. See mpn_fft_best_k(). */
+#ifndef FFT_MUL_TABLE
+#define FFT_MUL_TABLE \
+ { TOOM3_MUL_THRESHOLD * 4, /* k=5 */ \
+ TOOM3_MUL_THRESHOLD * 8, /* k=6 */ \
+ TOOM3_MUL_THRESHOLD * 16, /* k=7 */ \
+ TOOM3_MUL_THRESHOLD * 32, /* k=8 */ \
+ TOOM3_MUL_THRESHOLD * 96, /* k=9 */ \
+ TOOM3_MUL_THRESHOLD * 288, /* k=10 */ \
+ 0 }
+#endif
+#ifndef FFT_SQR_TABLE
+#define FFT_SQR_TABLE \
+ { TOOM3_SQR_THRESHOLD * 4, /* k=5 */ \
+ TOOM3_SQR_THRESHOLD * 8, /* k=6 */ \
+ TOOM3_SQR_THRESHOLD * 16, /* k=7 */ \
+ TOOM3_SQR_THRESHOLD * 32, /* k=8 */ \
+ TOOM3_SQR_THRESHOLD * 96, /* k=9 */ \
+ TOOM3_SQR_THRESHOLD * 288, /* k=10 */ \
+ 0 }
+#endif
+
+#ifndef FFT_TABLE_ATTRS
+#define FFT_TABLE_ATTRS static const
+#endif
+
+#define MPN_FFT_TABLE_SIZE 16
+
+
+/* Return non-zero if xp,xsize and yp,ysize overlap.
+ If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
+ overlap. If both these are false, there's an overlap. */
+#define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
+ ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
+
+
+/* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
+ ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
+ does it always. Generally assertions are meant for development, but
+ might help when looking for a problem later too.
+
+ ASSERT_NOCARRY() uses ASSERT() to check the expression is zero, but if
+ assertion checking is disabled, the expression is still evaluated. This
+ is meant for use with routines like mpn_add_n() where the return value
+ represents a carry or whatever that shouldn't occur. For example,
+ ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
+
+#ifdef __LINE__
+#define ASSERT_LINE __LINE__
+#else
+#define ASSERT_LINE -1
+#endif
+
+#ifdef __FILE__
+#define ASSERT_FILE __FILE__
+#else
+#define ASSERT_FILE ""
+#endif
+
+int __gmp_assert_fail _PROTO((const char *filename, int linenum,
+ const char *expr));
+
+#if HAVE_STRINGIZE
+#define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
+#else
+#define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
+#endif
+
+#if HAVE_VOID
+#define CAST_TO_VOID (void)
+#else
+#define CAST_TO_VOID
+#endif
+
+#define ASSERT_ALWAYS(expr) ((expr) ? 0 : ASSERT_FAIL (expr))
+
+#if WANT_ASSERT
+#define ASSERT(expr) ASSERT_ALWAYS (expr)
+#define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0)
+
+#else
+#define ASSERT(expr) (CAST_TO_VOID 0)
+#define ASSERT_NOCARRY(expr) (expr)
+#endif
+
+
+#if HAVE_NATIVE_mpn_com_n
+#define mpn_com_n __MPN(com_n)
+void mpn_com_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+#else
+#define mpn_com_n(d,s,n) \
+ do \
+ { \
+ mp_ptr __d = (d); \
+ mp_srcptr __s = (s); \
+ mp_size_t __n = (n); \
+ do \
+ *__d++ = ~ *__s++; \
+ while (--__n); \
+ } \
+ while (0)
+#endif
+
+#define MPN_LOGOPS_N_INLINE(d,s1,s2,n,dop,op,s2op) \
+ do \
+ { \
+ mp_ptr __d = (d); \
+ mp_srcptr __s1 = (s1); \
+ mp_srcptr __s2 = (s2); \
+ mp_size_t __n = (n); \
+ do \
+ *__d++ = dop (*__s1++ op s2op *__s2++); \
+ while (--__n); \
+ } \
+ while (0)
+
+#if HAVE_NATIVE_mpn_and_n
+#define mpn_and_n __MPN(and_n)
+void mpn_and_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#else
+#define mpn_and_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&, )
+#endif
+
+#if HAVE_NATIVE_mpn_andn_n
+#define mpn_andn_n __MPN(andn_n)
+void mpn_andn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#else
+#define mpn_andn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&,~)
+#endif
+
+#if HAVE_NATIVE_mpn_nand_n
+#define mpn_nand_n __MPN(nand_n)
+void mpn_nand_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#else
+#define mpn_nand_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,&, )
+#endif
+
+#if HAVE_NATIVE_mpn_ior_n
+#define mpn_ior_n __MPN(ior_n)
+void mpn_ior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#else
+#define mpn_ior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|, )
+#endif
+
+#if HAVE_NATIVE_mpn_iorn_n
+#define mpn_iorn_n __MPN(iorn_n)
+void mpn_iorn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#else
+#define mpn_iorn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|,~)
+#endif
+
+#if HAVE_NATIVE_mpn_nior_n
+#define mpn_nior_n __MPN(nior_n)
+void mpn_nior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#else
+#define mpn_nior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,|, )
+#endif
+
+#if HAVE_NATIVE_mpn_xor_n
+#define mpn_xor_n __MPN(xor_n)
+void mpn_xor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#else
+#define mpn_xor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,^, )
+#endif
+
+#if HAVE_NATIVE_mpn_xnor_n
+#define mpn_xnor_n __MPN(xnor_n)
+void mpn_xnor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+#else
+#define mpn_xnor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,^, )
+#endif
+
+/* Structure for conversion between internal binary format and
+ strings in base 2..36. */
+struct bases
+{
+ /* Number of digits in the conversion base that always fits in an mp_limb_t.
+ For example, for base 10 on a machine where a mp_limb_t has 32 bits this
+ is 9, since 10**9 is the largest number that fits into a mp_limb_t. */
+ int chars_per_limb;
+
+ /* log(2)/log(conversion_base) */
+ double chars_per_bit_exactly;
+
+ /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
+ factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
+ i.e. the number of bits used to represent each digit in the base. */
+ mp_limb_t big_base;
+
+ /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a
+ fixed-point number. Instead of dividing by big_base an application can
+ choose to multiply by big_base_inverted. */
+ mp_limb_t big_base_inverted;
+};
+
+#define __mp_bases __MPN(mp_bases)
+extern const struct bases __mp_bases[];
+extern mp_size_t __gmp_default_fp_limb_precision;
+
+#if defined (__i386__)
+#define TARGET_REGISTER_STARVED 1
+#else
+#define TARGET_REGISTER_STARVED 0
+#endif
+
+/* Use a library function for invert_limb, if available. */
+#if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
+#define mpn_invert_limb __MPN(invert_limb)
+mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t));
+#define invert_limb(invxl,xl) (invxl = __MPN(invert_limb) (xl))
+#endif
+
+#ifndef invert_limb
+#define invert_limb(invxl,xl) \
+ do { \
+ mp_limb_t dummy; \
+ if (xl << 1 == 0) \
+ invxl = ~(mp_limb_t) 0; \
+ else \
+ udiv_qrnnd (invxl, dummy, -xl, 0, xl); \
+ } while (0)
+#endif
+
+/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
+ limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
+ If this would yield overflow, DI should be the largest possible number
+ (i.e., only ones). For correct operation, the most significant bit of D
+ has to be set. Put the quotient in Q and the remainder in R. */
+#define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
+ do { \
+ mp_limb_t _q, _ql, _r; \
+ mp_limb_t _xh, _xl; \
+ umul_ppmm (_q, _ql, (nh), (di)); \
+ _q += (nh); /* DI is 2**BITS_PER_MP_LIMB too small */\
+ umul_ppmm (_xh, _xl, _q, (d)); \
+ sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \
+ if (_xh != 0) \
+ { \
+ sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
+ _q += 1; \
+ if (_xh != 0) \
+ { \
+ sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
+ _q += 1; \
+ } \
+ } \
+ if (_r >= (d)) \
+ { \
+ _r -= (d); \
+ _q += 1; \
+ } \
+ (r) = _r; \
+ (q) = _q; \
+ } while (0)
+/* Like udiv_qrnnd_preinv, but for for any value D. DNORM is D shifted left
+ so that its most significant bit is set. LGUP is ceil(log2(D)). */
+#define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
+ do { \
+ mp_limb_t _n2, _n10, _n1, _nadj, _q1; \
+ mp_limb_t _xh, _xl; \
+ _n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\
+ _n10 = (nl) << (BITS_PER_MP_LIMB - (lgup)); \
+ _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \
+ _nadj = _n10 + (_n1 & (dnorm)); \
+ umul_ppmm (_xh, _xl, di, _n2 - _n1); \
+ add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \
+ _q1 = ~(_n2 + _xh); \
+ umul_ppmm (_xh, _xl, _q1, d); \
+ add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
+ _xh -= (d); \
+ (r) = _xl + ((d) & _xh); \
+ (q) = _xh - _q1; \
+ } while (0)
+/* Exactly like udiv_qrnnd_preinv, but branch-free. It is not clear which
+ version to use. */
+#define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \
+ do { \
+ mp_limb_t _n2, _n10, _n1, _nadj, _q1; \
+ mp_limb_t _xh, _xl; \
+ _n2 = (nh); \
+ _n10 = (nl); \
+ _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \
+ _nadj = _n10 + (_n1 & (d)); \
+ umul_ppmm (_xh, _xl, di, _n2 - _n1); \
+ add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \
+ _q1 = ~(_n2 + _xh); \
+ umul_ppmm (_xh, _xl, _q1, d); \
+ add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
+ _xh -= (d); \
+ (r) = _xl + ((d) & _xh); \
+ (q) = _xh - _q1; \
+ } while (0)
+
+
+/* modlimb_invert() sets "inv" to the multiplicative inverse of "n" modulo
+ 2^BITS_PER_MP_LIMB, ie. so that inv*n == 1 mod 2^BITS_PER_MP_LIMB.
+ "n" must be odd (otherwise such an inverse doesn't exist).
+
+ This is not to be confused with invert_limb(), which is completely
+ different.
+
+ The table lookup gives an inverse with the low 8 bits valid, and each
+ multiply step doubles the number of bits. See Jebelean's exact division
+ paper, end of section 4 (reference in gmp.texi). */
+
+#define modlimb_invert_table __gmp_modlimb_invert_table
+extern const unsigned char modlimb_invert_table[128];
+
+#if BITS_PER_MP_LIMB <= 32
+#define modlimb_invert(inv,n) \
+ do { \
+ mp_limb_t __n = (n); \
+ mp_limb_t __inv; \
+ ASSERT ((__n & 1) == 1); \
+ __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
+ __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \
+ __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \
+ ASSERT (__inv * __n == 1); \
+ (inv) = __inv; \
+ } while (0)
+#endif
+
+#if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64
+#define modlimb_invert(inv,n) \
+ do { \
+ mp_limb_t __n = (n); \
+ mp_limb_t __inv; \
+ ASSERT ((__n & 1) == 1); \
+ __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
+ __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \
+ __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \
+ __inv = 2 * __inv - __inv * __inv * __n; /* 64 */ \
+ ASSERT (__inv * __n == 1); \
+ (inv) = __inv; \
+ } while (0)
+#endif
+
+
+/* The `mode' attribute was introduced in GCC 2.2, but we can only distinguish
+ between GCC 2 releases from 2.5, since __GNUC_MINOR__ wasn't introduced
+ until then. */
+#if (__GNUC__ - 0 > 2 || defined (__GNUC_MINOR__)) && ! defined (__APPLE_CC__)
+/* Define stuff for longlong.h. */
+typedef unsigned int UQItype __attribute__ ((mode (QI)));
+typedef int SItype __attribute__ ((mode (SI)));
+typedef unsigned int USItype __attribute__ ((mode (SI)));
+typedef int DItype __attribute__ ((mode (DI)));
+typedef unsigned int UDItype __attribute__ ((mode (DI)));
+#else
+typedef unsigned char UQItype;
+typedef long SItype;
+typedef unsigned long USItype;
+#if defined _LONGLONG || defined _LONG_LONG_LIMB
+typedef long long int DItype;
+typedef unsigned long long int UDItype;
+#else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
+typedef long int DItype;
+typedef unsigned long int UDItype;
+#endif
+#endif
+
+typedef mp_limb_t UWtype;
+typedef unsigned int UHWtype;
+#define W_TYPE_SIZE BITS_PER_MP_LIMB
+
+/* Define ieee_double_extract and _GMP_IEEE_FLOATS. */
+
+#if (defined (__arm__) && (defined (__ARMWEL__) || defined (__linux__)))
+/* Special case for little endian ARM since floats remain in big-endian. */
+#define _GMP_IEEE_FLOATS 1
+union ieee_double_extract
+{
+ struct
+ {
+ unsigned int manh:20;
+ unsigned int exp:11;
+ unsigned int sig:1;
+ unsigned int manl:32;
+ } s;
+ double d;
+};
+#else
+#if defined (_LITTLE_ENDIAN) || defined (__LITTLE_ENDIAN__) \
+ || defined (__alpha) \
+ || defined (__clipper__) \
+ || defined (__cris) \
+ || defined (__i386__) \
+ || defined (__i860__) \
+ || defined (__i960__) \
+ || defined (MIPSEL) || defined (_MIPSEL) \
+ || defined (__ns32000__) \
+ || defined (__WINNT) || defined (_WIN32)
+#define _GMP_IEEE_FLOATS 1
+union ieee_double_extract
+{
+ struct
+ {
+ unsigned int manl:32;
+ unsigned int manh:20;
+ unsigned int exp:11;
+ unsigned int sig:1;
+ } s;
+ double d;
+};
+#else /* Need this as an #else since the tests aren't made exclusive. */
+#if defined (_BIG_ENDIAN) || defined (__BIG_ENDIAN__) \
+ || defined (__a29k__) || defined (_AM29K) \
+ || defined (__arm__) \
+ || (defined (__convex__) && defined (_IEEE_FLOAT_)) \
+ || defined (_CRAYMPP) \
+ || defined (__i370__) || defined (__mvs__) \
+ || defined (__mc68000__) || defined (__mc68020__) || defined (__m68k__)\
+ || defined(mc68020) \
+ || defined (__m88000__) \
+ || defined (MIPSEB) || defined (_MIPSEB) \
+ || defined (__hppa) || defined (__hppa__) \
+ || defined (__pyr__) \
+ || defined (__ibm032__) \
+ || defined (_IBMR2) || defined (_ARCH_PPC) \
+ || defined (__sh__) \
+ || defined (__sparc) || defined (sparc) \
+ || defined (__we32k__)
+#define _GMP_IEEE_FLOATS 1
+union ieee_double_extract
+{
+ struct
+ {
+ unsigned int sig:1;
+ unsigned int exp:11;
+ unsigned int manh:20;
+ unsigned int manl:32;
+ } s;
+ double d;
+};
+#endif
+#endif
+#endif
+
+/* Using "(2.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)))" doesn't work on
+ SunOS 4.1.4 native /usr/ucb/cc (K&R), it comes out as -4294967296.0,
+ presumably due to treating the mp_limb_t constant as signed rather than
+ unsigned. */
+#define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 2)))
+#if BITS_PER_MP_LIMB == 64
+#define LIMBS_PER_DOUBLE 2
+#else
+#define LIMBS_PER_DOUBLE 3
+#endif
+
+double __gmp_scale2 _PROTO ((double, int));
+int __gmp_extract_double _PROTO ((mp_ptr, double));
+
+extern int __gmp_junk;
+extern const int __gmp_0;
+#define GMP_ERROR(code) (gmp_errno |= (code), __gmp_junk = 10/__gmp_0)
+#define DIVIDE_BY_ZERO GMP_ERROR(GMP_ERROR_DIVISION_BY_ZERO)
+#define SQRT_OF_NEGATIVE GMP_ERROR(GMP_ERROR_SQRT_OF_NEGATIVE)
+
+#if defined _LONG_LONG_LIMB
+#if defined (__STDC__)
+#define CNST_LIMB(C) C##LL
+#else
+#define CNST_LIMB(C) C/**/LL
+#endif
+#else /* not _LONG_LONG_LIMB */
+#if defined (__STDC__)
+#define CNST_LIMB(C) C##L
+#else
+#define CNST_LIMB(C) C/**/L
+#endif
+#endif /* _LONG_LONG_LIMB */
+
+/*** Stuff used by mpn/generic/prefsqr.c and mpn/generic/next_prime.c ***/
+#if BITS_PER_MP_LIMB == 32
+#define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x 13 x ... x 29 */
+#define PP_INVERTED 0x53E5645CL
+#define PP_MAXPRIME 29
+#define PP_MASK 0x208A28A8L
+#endif
+
+#if BITS_PER_MP_LIMB == 64
+#define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x 13 x ... x 53 */
+#define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
+#define PP_MAXPRIME 53
+#define PP_MASK CNST_LIMB(0x208A20A08A28A8)
+#endif
+
+
+/* BIT1 means a result value in bit 1 (second least significant bit), with a
+ zero bit representing +1 and a one bit representing -1. Bits other than
+ bit 1 are garbage.
+
+ JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
+ and their speed is important. Expressions are used rather than
+ conditionals to accumulate sign changes, which effectively means XORs
+ instead of conditional JUMPs. */
+
+/* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
+#define JACOBI_S0(a) \
+ (((a) == 1) | ((a) == -1))
+
+/* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
+#define JACOBI_U0(a) \
+ ((a) == 1)
+
+/* (a/0), with a an mpz_t; is 1 if a=+/-1, 0 otherwise
+ An mpz_t always has at least one limb of allocated space, so the fetch of
+ the low limb is valid. */
+#define JACOBI_Z0(a) \
+ (((SIZ(a) == 1) | (SIZ(a) == -1)) & (PTR(a)[0] == 1))
+
+/* Convert a bit1 to +1 or -1. */
+#define JACOBI_BIT1_TO_PN(result_bit1) \
+ (1 - ((result_bit1) & 2))
+
+/* (2/b), with b unsigned and odd;
+ is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
+ hence obtained from (b>>1)^b */
+#define JACOBI_TWO_U_BIT1(b) \
+ (ASSERT (b & 1), (((b) >> 1) ^ (b)))
+
+/* (2/b)^twos, with b unsigned and odd */
+#define JACOBI_TWOS_U_BIT1(twos, b) \
+ (((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
+
+/* (2/b)^twos, with b unsigned and odd */
+#define JACOBI_TWOS_U(twos, b) \
+ (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
+
+/* (a/b) effect due to sign of a: signed/unsigned, b odd;
+ is (-1)^((b-1)/2) if a<0, or +1 if a>=0 */
+#define JACOBI_ASGN_SU_BIT1(a, b) \
+ ((((a) < 0) << 1) & (b))
+
+/* (a/b) effect due to sign of b: signed/mpz;
+ is -1 if a and b both negative, +1 otherwise */
+#define JACOBI_BSGN_SZ_BIT1(a, b) \
+ ((((a) < 0) & (SIZ(b) < 0)) << 1)
+
+/* (a/b) effect due to sign of b: mpz/signed */
+#define JACOBI_BSGN_ZS_BIT1(a, b) \
+ JACOBI_BSGN_SZ_BIT1(b, a)
+
+/* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd.
+ Is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4 or -1 if
+ both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd
+ because this is used in a couple of places with only bit 1 of a or b
+ valid. */
+#define JACOBI_RECIP_UU_BIT1(a, b) \
+ ((a) & (b))
+
+
+/* For testing and debugging. */
+#define MPZ_CHECK_FORMAT(z) \
+ (ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0), \
+ ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)))
+#define MPZ_PROVOKE_REALLOC(z) \
+ do { ALLOC(z) = ABSIZ(z); } while (0)
+
+
+#if TUNE_PROGRAM_BUILD
+/* Some extras wanted when recompiling some .c files for use by the tune
+ program. Not part of a normal build. */
+
+extern mp_size_t mul_threshold[];
+extern mp_size_t fft_modf_mul_threshold;
+extern mp_size_t sqr_threshold[];
+extern mp_size_t fft_modf_sqr_threshold;
+extern mp_size_t bz_threshold[];
+extern mp_size_t fib_threshold[];
+extern mp_size_t powm_threshold[];
+extern mp_size_t gcd_accel_threshold[];
+extern mp_size_t gcdext_threshold[];
+
+#undef KARATSUBA_MUL_THRESHOLD
+#undef TOOM3_MUL_THRESHOLD
+#undef FFT_MUL_TABLE
+#undef FFT_MUL_THRESHOLD
+#undef FFT_MODF_MUL_THRESHOLD
+#undef KARATSUBA_SQR_THRESHOLD
+#undef TOOM3_SQR_THRESHOLD
+#undef FFT_SQR_TABLE
+#undef FFT_SQR_THRESHOLD
+#undef FFT_MODF_SQR_THRESHOLD
+#undef BZ_THRESHOLD
+#undef FIB_THRESHOLD
+#undef POWM_THRESHOLD
+#undef GCD_ACCEL_THRESHOLD
+#undef GCDEXT_THRESHOLD
+
+#define KARATSUBA_MUL_THRESHOLD mul_threshold[0]
+#define TOOM3_MUL_THRESHOLD mul_threshold[1]
+#define FFT_MUL_TABLE 0
+#define FFT_MUL_THRESHOLD mul_threshold[2]
+#define FFT_MODF_MUL_THRESHOLD fft_modf_mul_threshold
+#define KARATSUBA_SQR_THRESHOLD sqr_threshold[0]
+#define TOOM3_SQR_THRESHOLD sqr_threshold[1]
+#define FFT_SQR_TABLE 0
+#define FFT_SQR_THRESHOLD sqr_threshold[2]
+#define FFT_MODF_SQR_THRESHOLD fft_modf_sqr_threshold
+#define BZ_THRESHOLD bz_threshold[0]
+#define FIB_THRESHOLD fib_threshold[0]
+#define POWM_THRESHOLD powm_threshold[0]
+#define GCD_ACCEL_THRESHOLD gcd_accel_threshold[0]
+#define GCDEXT_THRESHOLD gcdext_threshold[0]
+
+#define TOOM3_MUL_THRESHOLD_LIMIT 700
+
+#undef FFT_TABLE_ATTRS
+#define FFT_TABLE_ATTRS
+extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
+
+#endif /* TUNE_PROGRAM_BUILD */
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/rts/gmp/gmp.h b/rts/gmp/gmp.h
new file mode 100644
index 0000000000..0f1b9510e9
--- /dev/null
+++ b/rts/gmp/gmp.h
@@ -0,0 +1,1083 @@
+/* gmp.h -- Definitions for GNU multiple precision functions.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#ifndef __GMP_H__
+
+#ifndef __GNU_MP__ /* to allow inclusion of both gmp.h and mp.h */
+#define __GNU_MP__ 2
+#define __need_size_t
+#include <stddef.h>
+#undef __need_size_t
+
+#ifndef STG_H
+/* Get DLL_IMPORT */
+#include "../../includes/ghcconfig.h"
+#include "../../includes/StgDLL.h"
+#endif
+
+#if defined (__mips) && defined (_ABIN32)
+/* Force the use of 64-bit limbs for all 64-bit MIPS CPUs if ABI permits. */
+#define _LONG_LONG_LIMB
+#endif
+
+#if (__STDC__-0) || defined (__cplusplus)
+#define __gmp_const const
+#define __gmp_signed signed
+#else
+#define __gmp_const
+#define __gmp_signed
+#endif
+
+#if defined (__GNUC__)
+#define __gmp_inline __inline__
+#else
+#define __gmp_inline
+#endif
+
+#ifndef _EXTERN_INLINE
+#ifdef __GNUC__
+#define _EXTERN_INLINE extern __inline__
+#else
+#define _EXTERN_INLINE static
+#endif
+#endif
+
+#ifdef _SHORT_LIMB
+typedef unsigned int mp_limb_t;
+typedef int mp_limb_signed_t;
+#else
+#ifdef _LONG_LONG_LIMB
+typedef unsigned long long int mp_limb_t;
+typedef long long int mp_limb_signed_t;
+#else
+typedef unsigned long int mp_limb_t;
+typedef long int mp_limb_signed_t;
+#endif
+#endif
+
+typedef mp_limb_t * mp_ptr;
+typedef __gmp_const mp_limb_t * mp_srcptr;
+#if defined (_CRAY) && ! defined (_CRAYMPP)
+/* plain `int' is much faster (48 bits) */
+typedef int mp_size_t;
+typedef int mp_exp_t;
+#else
+typedef long int mp_size_t;
+typedef long int mp_exp_t;
+#endif
+
+typedef struct
+{
+ int _mp_alloc; /* Number of *limbs* allocated and pointed
+ to by the _mp_d field. */
+ int _mp_size; /* abs(_mp_size) is the number of limbs the
+ last field points to. If _mp_size is
+ negative this is a negative number. */
+ mp_limb_t *_mp_d; /* Pointer to the limbs. */
+} __mpz_struct;
+#endif /* __GNU_MP__ */
+
+typedef __mpz_struct MP_INT;
+typedef __mpz_struct mpz_t[1];
+
+typedef struct
+{
+ __mpz_struct _mp_num;
+ __mpz_struct _mp_den;
+} __mpq_struct;
+
+typedef __mpq_struct MP_RAT;
+typedef __mpq_struct mpq_t[1];
+
+typedef struct
+{
+ int _mp_prec; /* Max precision, in number of `mp_limb_t's.
+ Set by mpf_init and modified by
+ mpf_set_prec. The area pointed to by the
+ _mp_d field contains `prec' + 1 limbs. */
+ int _mp_size; /* abs(_mp_size) is the number of limbs the
+ last field points to. If _mp_size is
+ negative this is a negative number. */
+ mp_exp_t _mp_exp; /* Exponent, in the base of `mp_limb_t'. */
+ mp_limb_t *_mp_d; /* Pointer to the limbs. */
+} __mpf_struct;
+
+/* typedef __mpf_struct MP_FLOAT; */
+typedef __mpf_struct mpf_t[1];
+
+/* Available random number generation algorithms. */
+typedef enum
+{
+ GMP_RAND_ALG_DEFAULT = 0,
+ GMP_RAND_ALG_LC = GMP_RAND_ALG_DEFAULT /* Linear congruential. */
+} gmp_randalg_t;
+
+/* Linear congruential data struct. */
+typedef struct {
+ mpz_t a; /* Multiplier. */
+ unsigned long int c; /* Adder. */
+ mpz_t m; /* Modulus (valid only if m2exp == 0). */
+ unsigned long int m2exp; /* If != 0, modulus is 2 ^ m2exp. */
+} __gmp_randata_lc;
+
+/* Random state struct. */
+typedef struct
+{
+ mpz_t seed; /* Current seed. */
+ gmp_randalg_t alg; /* Algorithm used. */
+ union { /* Algorithm specific data. */
+ __gmp_randata_lc *lc; /* Linear congruential. */
+ } algdata;
+} __gmp_randstate_struct;
+typedef __gmp_randstate_struct gmp_randstate_t[1];
+
+/* Types for function declarations in gmp files. */
+/* ??? Should not pollute user name space with these ??? */
+typedef __gmp_const __mpz_struct *mpz_srcptr;
+typedef __mpz_struct *mpz_ptr;
+typedef __gmp_const __mpf_struct *mpf_srcptr;
+typedef __mpf_struct *mpf_ptr;
+typedef __gmp_const __mpq_struct *mpq_srcptr;
+typedef __mpq_struct *mpq_ptr;
+
+#ifndef _PROTO
+#if (__STDC__-0) || defined (__cplusplus)
+#define _PROTO(x) x
+#else
+#define _PROTO(x) ()
+#endif
+#endif
+
+#ifndef __MPN
+/* Really use `defined (__STDC__)' here; we want it to be true for Sun C */
+#if defined (__STDC__) || defined (__cplusplus)
+#define __MPN(x) __gmpn_##x
+#else
+#define __MPN(x) __gmpn_/**/x
+#endif
+#endif
+
+#if defined (FILE) || defined (H_STDIO) || defined (_H_STDIO) \
+ || defined (_STDIO_H) || defined (_STDIO_H_) || defined (__STDIO_H__) \
+ || defined (_STDIO_INCLUDED) || defined (__dj_include_stdio_h_)
+#define _GMP_H_HAVE_FILE 1
+#endif
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#define mp_set_memory_functions __gmp_set_memory_functions
+DLL_IMPORT void mp_set_memory_functions _PROTO ((void *(*) (size_t),
+ void *(*) (void *, size_t, size_t),
+ void (*) (void *, size_t)));
+
+#define mp_bits_per_limb __gmp_bits_per_limb
+DLL_IMPORT extern __gmp_const int mp_bits_per_limb;
+
+#if defined (__cplusplus)
+}
+#endif
+
+
+/**************** Random number routines. ****************/
+
+#define _gmp_rand __gmp_rand
+#define gmp_randinit __gmp_randinit
+#define gmp_randinit_lc __gmp_randinit_lc
+#define gmp_randinit_lc_2exp __gmp_randinit_lc_2exp
+#define gmp_randseed __gmp_randseed
+#define gmp_randseed_ui __gmp_randseed_ui
+#define gmp_randclear __gmp_randclear
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+DLL_IMPORT void _gmp_rand _PROTO ((mp_ptr, gmp_randstate_t, unsigned long int));
+DLL_IMPORT void gmp_randinit _PROTO ((gmp_randstate_t, gmp_randalg_t, ...));
+DLL_IMPORT void gmp_randinit_lc _PROTO ((gmp_randstate_t, mpz_t, unsigned long int,
+ mpz_t));
+DLL_IMPORT void gmp_randinit_lc_2exp _PROTO ((gmp_randstate_t, mpz_t, unsigned long int,
+ unsigned long int));
+DLL_IMPORT void gmp_randseed _PROTO ((gmp_randstate_t, mpz_t));
+DLL_IMPORT void gmp_randseed_ui _PROTO ((gmp_randstate_t, unsigned long int));
+DLL_IMPORT void gmp_randclear _PROTO ((gmp_randstate_t));
+
+#if defined (__cplusplus)
+}
+#endif
+
+/**************** Integer (i.e. Z) routines. ****************/
+
+#define _mpz_realloc __gmpz_realloc
+#define mpz_realloc __gmpz_realloc
+#define mpz_abs __gmpz_abs
+#define mpz_add __gmpz_add
+#define mpz_add_ui __gmpz_add_ui
+#define mpz_addmul_ui __gmpz_addmul_ui
+#define mpz_and __gmpz_and
+#define mpz_array_init __gmpz_array_init
+#define mpz_bin_ui __gmpz_bin_ui
+#define mpz_bin_uiui __gmpz_bin_uiui
+#define mpz_cdiv_q __gmpz_cdiv_q
+#define mpz_cdiv_q_ui __gmpz_cdiv_q_ui
+#define mpz_cdiv_qr __gmpz_cdiv_qr
+#define mpz_cdiv_qr_ui __gmpz_cdiv_qr_ui
+#define mpz_cdiv_r __gmpz_cdiv_r
+#define mpz_cdiv_r_ui __gmpz_cdiv_r_ui
+#define mpz_cdiv_ui __gmpz_cdiv_ui
+#define mpz_clear __gmpz_clear
+#define mpz_clrbit __gmpz_clrbit
+#define mpz_cmp __gmpz_cmp
+#define _mpz_cmp_si __gmpz_cmp_si
+#define _mpz_cmp_ui __gmpz_cmp_ui
+#define mpz_cmpabs __gmpz_cmpabs
+#define mpz_cmpabs_ui __gmpz_cmpabs_ui
+#define mpz_com __gmpz_com
+#define mpz_divexact __gmpz_divexact
+#define mpz_dump __gmpz_dump
+#define mpz_fac_ui __gmpz_fac_ui
+#define mpz_fdiv_q __gmpz_fdiv_q
+#define mpz_fdiv_q_2exp __gmpz_fdiv_q_2exp
+#define mpz_fdiv_q_ui __gmpz_fdiv_q_ui
+#define mpz_fdiv_qr __gmpz_fdiv_qr
+#define mpz_fdiv_qr_ui __gmpz_fdiv_qr_ui
+#define mpz_fdiv_r __gmpz_fdiv_r
+#define mpz_fdiv_r_2exp __gmpz_fdiv_r_2exp
+#define mpz_fdiv_r_ui __gmpz_fdiv_r_ui
+#define mpz_fdiv_ui __gmpz_fdiv_ui
+#define mpz_fib_ui __gmpz_fib_ui
+#define mpz_fits_sint_p __gmpz_fits_sint_p
+#define mpz_fits_slong_p __gmpz_fits_slong_p
+#define mpz_fits_sshort_p __gmpz_fits_sshort_p
+#define mpz_fits_uint_p __gmpz_fits_uint_p
+#define mpz_fits_ulong_p __gmpz_fits_ulong_p
+#define mpz_fits_ushort_p __gmpz_fits_ushort_p
+#define mpz_gcd __gmpz_gcd
+#define mpz_gcd_ui __gmpz_gcd_ui
+#define mpz_gcdext __gmpz_gcdext
+#define mpz_get_d __gmpz_get_d
+#define mpz_get_si __gmpz_get_si
+#define mpz_get_str __gmpz_get_str
+#define mpz_get_ui __gmpz_get_ui
+#define mpz_getlimbn __gmpz_getlimbn
+#define mpz_hamdist __gmpz_hamdist
+#define mpz_init __gmpz_init
+#define mpz_inp_binary __gmpz_inp_binary
+#define mpz_inp_raw __gmpz_inp_raw
+#define mpz_inp_str __gmpz_inp_str
+#define mpz_init_set __gmpz_init_set
+#define mpz_init_set_d __gmpz_init_set_d
+#define mpz_init_set_si __gmpz_init_set_si
+#define mpz_init_set_str __gmpz_init_set_str
+#define mpz_init_set_ui __gmpz_init_set_ui
+#define mpz_invert __gmpz_invert
+#define mpz_ior __gmpz_ior
+#define mpz_jacobi __gmpz_jacobi
+#define mpz_lcm __gmpz_lcm
+#define mpz_legendre __gmpz_legendre
+#define mpz_mod __gmpz_mod
+#define mpz_mul __gmpz_mul
+#define mpz_mul_2exp __gmpz_mul_2exp
+#define mpz_neg __gmpz_neg
+#define mpz_nextprime __gmpz_nextprime
+#define mpz_out_binary __gmpz_out_binary
+#define mpz_out_raw __gmpz_out_raw
+#define mpz_out_str __gmpz_out_str
+#define mpz_perfect_power_p __gmpz_perfect_power_p
+#define mpz_perfect_square_p __gmpz_perfect_square_p
+#define mpz_popcount __gmpz_popcount
+#define mpz_pow_ui __gmpz_pow_ui
+#define mpz_powm __gmpz_powm
+#define mpz_powm_ui __gmpz_powm_ui
+#define mpz_probab_prime_p __gmpz_probab_prime_p
+#define mpz_random __gmpz_random
+#define mpz_random2 __gmpz_random2
+#define mpz_remove __gmpz_remove
+#define mpz_root __gmpz_root
+#define mpz_rrandomb __gmpz_rrandomb
+#define mpz_scan0 __gmpz_scan0
+#define mpz_scan1 __gmpz_scan1
+#define mpz_set __gmpz_set
+#define mpz_set_d __gmpz_set_d
+#define mpz_set_f __gmpz_set_f
+#define mpz_set_q __gmpz_set_q
+#define mpz_set_si __gmpz_set_si
+#define mpz_set_str __gmpz_set_str
+#define mpz_set_ui __gmpz_set_ui
+#define mpz_setbit __gmpz_setbit
+#define mpz_size __gmpz_size
+#define mpz_sizeinbase __gmpz_sizeinbase
+#define mpz_sqrt __gmpz_sqrt
+#define mpz_sqrtrem __gmpz_sqrtrem
+#define mpz_sub __gmpz_sub
+#define mpz_sub_ui __gmpz_sub_ui
+#define mpz_swap __gmpz_swap
+#define mpz_tdiv_ui __gmpz_tdiv_ui
+#define mpz_tdiv_q __gmpz_tdiv_q
+#define mpz_tdiv_q_2exp __gmpz_tdiv_q_2exp
+#define mpz_tdiv_q_ui __gmpz_tdiv_q_ui
+#define mpz_tdiv_qr __gmpz_tdiv_qr
+#define mpz_tdiv_qr_ui __gmpz_tdiv_qr_ui
+#define mpz_tdiv_r __gmpz_tdiv_r
+#define mpz_tdiv_r_2exp __gmpz_tdiv_r_2exp
+#define mpz_tdiv_r_ui __gmpz_tdiv_r_ui
+#define mpz_tstbit __gmpz_tstbit
+#define mpz_ui_pow_ui __gmpz_ui_pow_ui
+#define mpz_urandomb __gmpz_urandomb
+#define mpz_urandomm __gmpz_urandomm
+#define mpz_xor __gmpz_xor
+#define mpz_eor __gmpz_xor
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+DLL_IMPORT void *_mpz_realloc _PROTO ((mpz_ptr, mp_size_t));
+
+DLL_IMPORT void mpz_abs _PROTO ((mpz_ptr, mpz_srcptr));
+DLL_IMPORT void mpz_add _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_add_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_addmul_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_and _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_array_init _PROTO ((mpz_ptr, mp_size_t, mp_size_t));
+DLL_IMPORT void mpz_bin_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_bin_uiui _PROTO ((mpz_ptr, unsigned long int, unsigned long int));
+DLL_IMPORT void mpz_cdiv_q _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT unsigned long int mpz_cdiv_q_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_cdiv_qr _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT unsigned long int mpz_cdiv_qr_ui _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_cdiv_r _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT unsigned long int mpz_cdiv_r_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT unsigned long int mpz_cdiv_ui _PROTO ((mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_clear _PROTO ((mpz_ptr));
+DLL_IMPORT void mpz_clrbit _PROTO ((mpz_ptr, unsigned long int));
+DLL_IMPORT int mpz_cmp _PROTO ((mpz_srcptr, mpz_srcptr));
+DLL_IMPORT int _mpz_cmp_si _PROTO ((mpz_srcptr, signed long int));
+DLL_IMPORT int _mpz_cmp_ui _PROTO ((mpz_srcptr, unsigned long int));
+DLL_IMPORT int mpz_cmpabs _PROTO ((mpz_srcptr, mpz_srcptr));
+DLL_IMPORT int mpz_cmpabs_ui _PROTO ((mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_com _PROTO ((mpz_ptr, mpz_srcptr));
+DLL_IMPORT void mpz_divexact _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_dump _PROTO ((mpz_srcptr));
+DLL_IMPORT void mpz_fac_ui _PROTO ((mpz_ptr, unsigned long int));
+DLL_IMPORT void mpz_fdiv_q _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_fdiv_q_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT unsigned long int mpz_fdiv_q_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_fdiv_qr _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT unsigned long int mpz_fdiv_qr_ui _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_fdiv_r _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_fdiv_r_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT unsigned long int mpz_fdiv_r_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT unsigned long int mpz_fdiv_ui _PROTO ((mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_fib_ui _PROTO ((mpz_ptr, unsigned long int));
+DLL_IMPORT int mpz_fits_sint_p _PROTO ((mpz_srcptr));
+DLL_IMPORT int mpz_fits_slong_p _PROTO ((mpz_srcptr));
+DLL_IMPORT int mpz_fits_sshort_p _PROTO ((mpz_srcptr));
+DLL_IMPORT int mpz_fits_uint_p _PROTO ((mpz_srcptr));
+DLL_IMPORT int mpz_fits_ulong_p _PROTO ((mpz_srcptr));
+DLL_IMPORT int mpz_fits_ushort_p _PROTO ((mpz_srcptr));
+DLL_IMPORT void mpz_gcd _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT unsigned long int mpz_gcd_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_gcdext _PROTO ((mpz_ptr, mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT double mpz_get_d _PROTO ((mpz_srcptr));
+/* signed */ long int mpz_get_si _PROTO ((mpz_srcptr));
+DLL_IMPORT char *mpz_get_str _PROTO ((char *, int, mpz_srcptr));
+DLL_IMPORT unsigned long int mpz_get_ui _PROTO ((mpz_srcptr));
+DLL_IMPORT mp_limb_t mpz_getlimbn _PROTO ((mpz_srcptr, mp_size_t));
+DLL_IMPORT unsigned long int mpz_hamdist _PROTO ((mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_init _PROTO ((mpz_ptr));
+#ifdef _GMP_H_HAVE_FILE
+DLL_IMPORT size_t mpz_inp_binary _PROTO ((mpz_ptr, FILE *));
+DLL_IMPORT size_t mpz_inp_raw _PROTO ((mpz_ptr, FILE *));
+DLL_IMPORT size_t mpz_inp_str _PROTO ((mpz_ptr, FILE *, int));
+#endif
+DLL_IMPORT void mpz_init_set _PROTO ((mpz_ptr, mpz_srcptr));
+DLL_IMPORT void mpz_init_set_d _PROTO ((mpz_ptr, double));
+DLL_IMPORT void mpz_init_set_si _PROTO ((mpz_ptr, signed long int));
+DLL_IMPORT int mpz_init_set_str _PROTO ((mpz_ptr, __gmp_const char *, int));
+DLL_IMPORT void mpz_init_set_ui _PROTO ((mpz_ptr, unsigned long int));
+DLL_IMPORT int mpz_invert _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_ior _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT int mpz_jacobi _PROTO ((mpz_srcptr, mpz_srcptr));
+
+#define mpz_kronecker_si __gmpz_kronecker_si
+DLL_IMPORT int mpz_kronecker_si _PROTO ((mpz_srcptr, long));
+
+#define mpz_kronecker_ui __gmpz_kronecker_ui
+DLL_IMPORT int mpz_kronecker_ui _PROTO ((mpz_srcptr, unsigned long));
+
+#define mpz_si_kronecker __gmpz_si_kronecker
+DLL_IMPORT int mpz_si_kronecker _PROTO ((long, mpz_srcptr));
+
+#define mpz_ui_kronecker __gmpz_ui_kronecker
+DLL_IMPORT int mpz_ui_kronecker _PROTO ((unsigned long, mpz_srcptr));
+
+DLL_IMPORT void mpz_lcm _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT int mpz_legendre _PROTO ((mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_mod _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_mul _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_mul_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+#define mpz_mul_si __gmpz_mul_si
+DLL_IMPORT void mpz_mul_si _PROTO ((mpz_ptr, mpz_srcptr, long int));
+
+#define mpz_mul_ui __gmpz_mul_ui
+DLL_IMPORT void mpz_mul_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+
+DLL_IMPORT void mpz_neg _PROTO ((mpz_ptr, mpz_srcptr));
+DLL_IMPORT void mpz_nextprime _PROTO ((mpz_ptr, mpz_srcptr));
+#ifdef _GMP_H_HAVE_FILE
+DLL_IMPORT size_t mpz_out_binary _PROTO ((FILE *, mpz_srcptr));
+DLL_IMPORT size_t mpz_out_raw _PROTO ((FILE *, mpz_srcptr));
+DLL_IMPORT size_t mpz_out_str _PROTO ((FILE *, int, mpz_srcptr));
+#endif
+DLL_IMPORT int mpz_perfect_power_p _PROTO ((mpz_srcptr));
+DLL_IMPORT int mpz_perfect_square_p _PROTO ((mpz_srcptr));
+DLL_IMPORT unsigned long int mpz_popcount _PROTO ((mpz_srcptr));
+DLL_IMPORT void mpz_pow_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_powm _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_powm_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int, mpz_srcptr));
+DLL_IMPORT int mpz_probab_prime_p _PROTO ((mpz_srcptr, int));
+DLL_IMPORT void mpz_random _PROTO ((mpz_ptr, mp_size_t));
+DLL_IMPORT void mpz_random2 _PROTO ((mpz_ptr, mp_size_t));
+DLL_IMPORT unsigned long int mpz_remove _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT int mpz_root _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_rrandomb _PROTO ((mpz_ptr, gmp_randstate_t, unsigned long int));
+DLL_IMPORT unsigned long int mpz_scan0 _PROTO ((mpz_srcptr, unsigned long int));
+DLL_IMPORT unsigned long int mpz_scan1 _PROTO ((mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_set _PROTO ((mpz_ptr, mpz_srcptr));
+DLL_IMPORT void mpz_set_d _PROTO ((mpz_ptr, double));
+DLL_IMPORT void mpz_set_f _PROTO ((mpz_ptr, mpf_srcptr));
+DLL_IMPORT void mpz_set_q _PROTO ((mpz_ptr, mpq_srcptr));
+DLL_IMPORT void mpz_set_si _PROTO ((mpz_ptr, signed long int));
+DLL_IMPORT int mpz_set_str _PROTO ((mpz_ptr, __gmp_const char *, int));
+DLL_IMPORT void mpz_set_ui _PROTO ((mpz_ptr, unsigned long int));
+DLL_IMPORT void mpz_setbit _PROTO ((mpz_ptr, unsigned long int));
+DLL_IMPORT size_t mpz_size _PROTO ((mpz_srcptr));
+DLL_IMPORT size_t mpz_sizeinbase _PROTO ((mpz_srcptr, int));
+DLL_IMPORT void mpz_sqrt _PROTO ((mpz_ptr, mpz_srcptr));
+DLL_IMPORT void mpz_sqrtrem _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr));
+DLL_IMPORT void mpz_sub _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_sub_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_swap _PROTO ((mpz_ptr, mpz_ptr));
+DLL_IMPORT void mpz_tdiv_q _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_tdiv_q_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT unsigned long int mpz_tdiv_ui _PROTO ((mpz_srcptr, unsigned long int));
+DLL_IMPORT unsigned long int mpz_tdiv_q_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_tdiv_qr _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT unsigned long int mpz_tdiv_qr_ui _PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_tdiv_r _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+DLL_IMPORT void mpz_tdiv_r_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT unsigned long int mpz_tdiv_r_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+DLL_IMPORT int mpz_tstbit _PROTO ((mpz_srcptr, unsigned long int));
+DLL_IMPORT void mpz_ui_pow_ui _PROTO ((mpz_ptr, unsigned long int, unsigned long int));
+DLL_IMPORT void mpz_urandomb _PROTO ((mpz_t, gmp_randstate_t, unsigned long int));
+DLL_IMPORT void mpz_urandomm _PROTO ((mpz_t, gmp_randstate_t, mpz_t));
+DLL_IMPORT void mpz_xor _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+#if defined (__cplusplus)
+}
+#endif
+
+/**************** Rational (i.e. Q) routines. ****************/
+
+#define mpq_init __gmpq_init
+#define mpq_clear __gmpq_clear
+#define mpq_set __gmpq_set
+#define mpq_set_ui __gmpq_set_ui
+#define mpq_set_si __gmpq_set_si
+#define mpq_set_z __gmpq_set_z
+#define mpq_add __gmpq_add
+#define mpq_sub __gmpq_sub
+#define mpq_mul __gmpq_mul
+#define mpq_div __gmpq_div
+#define mpq_neg __gmpq_neg
+#define mpq_cmp __gmpq_cmp
+#define _mpq_cmp_ui __gmpq_cmp_ui
+#define mpq_equal __gmpq_equal
+#define mpq_inv __gmpq_inv
+#define mpq_set_num __gmpq_set_num
+#define mpq_set_den __gmpq_set_den
+#define mpq_get_num __gmpq_get_num
+#define mpq_get_den __gmpq_get_den
+#define mpq_get_d __gmpq_get_d
+#define mpq_set_d __gmpq_set_d
+#define mpq_canonicalize __gmpq_canonicalize
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+DLL_IMPORT void mpq_init _PROTO ((mpq_ptr));
+DLL_IMPORT void mpq_clear _PROTO ((mpq_ptr));
+DLL_IMPORT void mpq_set _PROTO ((mpq_ptr, mpq_srcptr));
+DLL_IMPORT void mpq_set_ui _PROTO ((mpq_ptr, unsigned long int, unsigned long int));
+DLL_IMPORT void mpq_set_si _PROTO ((mpq_ptr, signed long int, unsigned long int));
+DLL_IMPORT void mpq_set_z _PROTO ((mpq_ptr, mpz_srcptr));
+DLL_IMPORT void mpq_add _PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+DLL_IMPORT void mpq_sub _PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+DLL_IMPORT void mpq_mul _PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+DLL_IMPORT void mpq_div _PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+DLL_IMPORT void mpq_neg _PROTO ((mpq_ptr, mpq_srcptr));
+DLL_IMPORT int mpq_cmp _PROTO ((mpq_srcptr, mpq_srcptr));
+DLL_IMPORT int _mpq_cmp_ui _PROTO ((mpq_srcptr, unsigned long int, unsigned long int));
+DLL_IMPORT int mpq_equal _PROTO ((mpq_srcptr, mpq_srcptr));
+DLL_IMPORT void mpq_inv _PROTO ((mpq_ptr, mpq_srcptr));
+DLL_IMPORT void mpq_set_num _PROTO ((mpq_ptr, mpz_srcptr));
+DLL_IMPORT void mpq_set_den _PROTO ((mpq_ptr, mpz_srcptr));
+DLL_IMPORT void mpq_get_num _PROTO ((mpz_ptr, mpq_srcptr));
+DLL_IMPORT void mpq_get_den _PROTO ((mpz_ptr, mpq_srcptr));
+DLL_IMPORT double mpq_get_d _PROTO ((mpq_srcptr));
+DLL_IMPORT void mpq_set_d _PROTO ((mpq_ptr, double));
+DLL_IMPORT void mpq_canonicalize _PROTO ((mpq_ptr));
+
+#define mpq_swap __gmpq_swap
+DLL_IMPORT void mpq_swap _PROTO ((mpq_ptr, mpq_ptr));
+
+#ifdef _GMP_H_HAVE_FILE
+#define mpq_out_str __gmpq_out_str
+DLL_IMPORT size_t mpq_out_str _PROTO ((FILE *, int, mpq_srcptr));
+#endif
+
+#if defined (__cplusplus)
+}
+#endif
+
+/**************** Float (i.e. F) routines. ****************/
+
+#define mpf_abs __gmpf_abs
+#define mpf_add __gmpf_add
+#define mpf_add_ui __gmpf_add_ui
+#define mpf_ceil __gmpf_ceil
+#define mpf_clear __gmpf_clear
+#define mpf_cmp __gmpf_cmp
+#define mpf_cmp_si __gmpf_cmp_si
+#define mpf_cmp_ui __gmpf_cmp_ui
+#define mpf_div __gmpf_div
+#define mpf_div_2exp __gmpf_div_2exp
+#define mpf_div_ui __gmpf_div_ui
+#define mpf_dump __gmpf_dump
+#define mpf_floor __gmpf_floor
+#define mpf_eq __gmpf_eq
+#define mpf_get_d __gmpf_get_d
+#define mpf_get_prec __gmpf_get_prec
+#define mpf_get_str __gmpf_get_str
+#define mpf_init __gmpf_init
+#define mpf_init2 __gmpf_init2
+#define mpf_inp_str __gmpf_inp_str
+#define mpf_init_set __gmpf_init_set
+#define mpf_init_set_d __gmpf_init_set_d
+#define mpf_init_set_si __gmpf_init_set_si
+#define mpf_init_set_str __gmpf_init_set_str
+#define mpf_init_set_ui __gmpf_init_set_ui
+#define mpf_mul __gmpf_mul
+#define mpf_mul_2exp __gmpf_mul_2exp
+#define mpf_mul_ui __gmpf_mul_ui
+#define mpf_neg __gmpf_neg
+#define mpf_out_str __gmpf_out_str
+#define mpf_pow_ui __gmpf_pow_ui
+#define mpf_random2 __gmpf_random2
+#define mpf_reldiff __gmpf_reldiff
+#define mpf_set __gmpf_set
+#define mpf_set_d __gmpf_set_d
+#define mpf_set_default_prec __gmpf_set_default_prec
+#define mpf_set_prec __gmpf_set_prec
+#define mpf_set_prec_raw __gmpf_set_prec_raw
+#define mpf_set_q __gmpf_set_q
+#define mpf_set_si __gmpf_set_si
+#define mpf_set_str __gmpf_set_str
+#define mpf_set_ui __gmpf_set_ui
+#define mpf_set_z __gmpf_set_z
+#define mpf_size __gmpf_size
+#define mpf_sqrt __gmpf_sqrt
+#define mpf_sqrt_ui __gmpf_sqrt_ui
+#define mpf_sub __gmpf_sub
+#define mpf_sub_ui __gmpf_sub_ui
+#define mpf_trunc __gmpf_trunc
+#define mpf_ui_div __gmpf_ui_div
+#define mpf_ui_sub __gmpf_ui_sub
+#define mpf_urandomb __gmpf_urandomb
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+DLL_IMPORT void mpf_abs _PROTO ((mpf_ptr, mpf_srcptr));
+DLL_IMPORT void mpf_add _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+DLL_IMPORT void mpf_add_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+DLL_IMPORT void mpf_ceil _PROTO ((mpf_ptr, mpf_srcptr));
+DLL_IMPORT void mpf_clear _PROTO ((mpf_ptr));
+DLL_IMPORT int mpf_cmp _PROTO ((mpf_srcptr, mpf_srcptr));
+DLL_IMPORT int mpf_cmp_si _PROTO ((mpf_srcptr, signed long int));
+DLL_IMPORT int mpf_cmp_ui _PROTO ((mpf_srcptr, unsigned long int));
+DLL_IMPORT void mpf_div _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+DLL_IMPORT void mpf_div_2exp _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+DLL_IMPORT void mpf_div_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+DLL_IMPORT void mpf_dump _PROTO ((mpf_srcptr));
+DLL_IMPORT int mpf_eq _PROTO ((mpf_srcptr, mpf_srcptr, unsigned long int));
+DLL_IMPORT void mpf_floor _PROTO ((mpf_ptr, mpf_srcptr));
+DLL_IMPORT double mpf_get_d _PROTO ((mpf_srcptr));
+DLL_IMPORT unsigned long int mpf_get_prec _PROTO ((mpf_srcptr));
+char *mpf_get_str _PROTO ((char *, mp_exp_t *, int, size_t, mpf_srcptr));
+DLL_IMPORT void mpf_init _PROTO ((mpf_ptr));
+DLL_IMPORT void mpf_init2 _PROTO ((mpf_ptr, unsigned long int));
+#ifdef _GMP_H_HAVE_FILE
+DLL_IMPORT size_t mpf_inp_str _PROTO ((mpf_ptr, FILE *, int));
+#endif
+DLL_IMPORT void mpf_init_set _PROTO ((mpf_ptr, mpf_srcptr));
+DLL_IMPORT void mpf_init_set_d _PROTO ((mpf_ptr, double));
+DLL_IMPORT void mpf_init_set_si _PROTO ((mpf_ptr, signed long int));
+DLL_IMPORT int mpf_init_set_str _PROTO ((mpf_ptr, __gmp_const char *, int));
+DLL_IMPORT void mpf_init_set_ui _PROTO ((mpf_ptr, unsigned long int));
+DLL_IMPORT void mpf_mul _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+DLL_IMPORT void mpf_mul_2exp _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+DLL_IMPORT void mpf_mul_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+DLL_IMPORT void mpf_neg _PROTO ((mpf_ptr, mpf_srcptr));
+#ifdef _GMP_H_HAVE_FILE
+DLL_IMPORT size_t mpf_out_str _PROTO ((FILE *, int, size_t, mpf_srcptr));
+#endif
+DLL_IMPORT void mpf_pow_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+DLL_IMPORT void mpf_random2 _PROTO ((mpf_ptr, mp_size_t, mp_exp_t));
+DLL_IMPORT void mpf_reldiff _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+DLL_IMPORT void mpf_set _PROTO ((mpf_ptr, mpf_srcptr));
+DLL_IMPORT void mpf_set_d _PROTO ((mpf_ptr, double));
+DLL_IMPORT void mpf_set_default_prec _PROTO ((unsigned long int));
+DLL_IMPORT void mpf_set_prec _PROTO ((mpf_ptr, unsigned long int));
+DLL_IMPORT void mpf_set_prec_raw _PROTO ((mpf_ptr, unsigned long int));
+DLL_IMPORT void mpf_set_q _PROTO ((mpf_ptr, mpq_srcptr));
+DLL_IMPORT void mpf_set_si _PROTO ((mpf_ptr, signed long int));
+DLL_IMPORT int mpf_set_str _PROTO ((mpf_ptr, __gmp_const char *, int));
+DLL_IMPORT void mpf_set_ui _PROTO ((mpf_ptr, unsigned long int));
+DLL_IMPORT void mpf_set_z _PROTO ((mpf_ptr, mpz_srcptr));
+DLL_IMPORT size_t mpf_size _PROTO ((mpf_srcptr));
+DLL_IMPORT void mpf_sqrt _PROTO ((mpf_ptr, mpf_srcptr));
+DLL_IMPORT void mpf_sqrt_ui _PROTO ((mpf_ptr, unsigned long int));
+DLL_IMPORT void mpf_sub _PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+DLL_IMPORT void mpf_sub_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+DLL_IMPORT void mpf_trunc _PROTO ((mpf_ptr, mpf_srcptr));
+DLL_IMPORT void mpf_ui_div _PROTO ((mpf_ptr, unsigned long int, mpf_srcptr));
+DLL_IMPORT void mpf_ui_sub _PROTO ((mpf_ptr, unsigned long int, mpf_srcptr));
+DLL_IMPORT void mpf_urandomb _PROTO ((mpf_t, gmp_randstate_t, unsigned long int));
+
+#define mpf_swap __gmpf_swap
+DLL_IMPORT void mpf_swap _PROTO ((mpf_ptr, mpf_ptr));
+
+#if defined (__cplusplus)
+}
+#endif
+/************ Low level positive-integer (i.e. N) routines. ************/
+
+/* This is ugly, but we need to make user calls reach the prefixed function. */
+#define mpn_add __MPN(add)
+#define mpn_add_1 __MPN(add_1)
+#define mpn_add_n __MPN(add_n)
+#define mpn_add_nc __MPN(add_nc)
+#define mpn_addmul_1 __MPN(addmul_1)
+#define mpn_addsub_n __MPN(addsub_n)
+#define mpn_addsub_nc __MPN(addsub_nc)
+/* #define mpn_and_n __MPN(and_n) */
+/* #define mpn_andn_n __MPN(andn_n) */
+#define mpn_bdivmod __MPN(bdivmod)
+#define mpn_cmp __MPN(cmp)
+/* #define mpn_com_n __MPN(com_n) */
+#define mpn_copyd __MPN(copyd)
+#define mpn_copyi __MPN(copyi)
+#define mpn_divrem __MPN(divrem)
+#define mpn_divrem_1 __MPN(divrem_1)
+#define mpn_divrem_2 __MPN(divrem_2)
+#define mpn_dump __MPN(dump)
+#define mpn_gcd __MPN(gcd)
+#define mpn_gcd_1 __MPN(gcd_1)
+#define mpn_gcdext __MPN(gcdext)
+#define mpn_get_str __MPN(get_str)
+#define mpn_hamdist __MPN(hamdist)
+#define mpn_invert_limb __MPN(invert_limb)
+/* #define mpn_ior_n __MPN(ior_n) */
+/* #define mpn_iorn_n __MPN(iorn_n) */
+/* #define mpn_kara_mul_n __MPN(kara_mul_n) internal */
+/* #define mpn_kara_sqr_n __MPN(kara_sqr_n) internal */
+#define mpn_lshift __MPN(lshift)
+#define mpn_lshiftc __MPN(lshiftc)
+#define mpn_mod_1 __MPN(mod_1)
+#define mpn_mul __MPN(mul)
+#define mpn_mul_1 __MPN(mul_1)
+#define mpn_mul_basecase __MPN(mul_basecase)
+#define mpn_mul_n __MPN(mul_n)
+#define mpn_perfect_square_p __MPN(perfect_square_p)
+#define mpn_popcount __MPN(popcount)
+#define mpn_preinv_mod_1 __MPN(preinv_mod_1)
+/* #define mpn_nand_n __MPN(nand_n) */
+/* #define mpn_nior_n __MPN(nior_n) */
+#define mpn_random __MPN(random)
+#define mpn_random2 __MPN(random2)
+#define mpn_rshift __MPN(rshift)
+#define mpn_rshiftc __MPN(rshiftc)
+#define mpn_scan0 __MPN(scan0)
+#define mpn_scan1 __MPN(scan1)
+#define mpn_set_str __MPN(set_str)
+#define mpn_sqr_basecase __MPN(sqr_basecase)
+#define mpn_sqr_n __MPN(sqr_n)
+#define mpn_sqrtrem __MPN(sqrtrem)
+#define mpn_sub __MPN(sub)
+#define mpn_sub_1 __MPN(sub_1)
+#define mpn_sub_n __MPN(sub_n)
+#define mpn_sub_nc __MPN(sub_nc)
+#define mpn_submul_1 __MPN(submul_1)
+/* #define mpn_toom3_mul_n __MPN(toom3_mul_n) internal */
+/* #define mpn_toom3_sqr_n __MPN(toom3_sqr_n) internal */
+/* #define mpn_xnor_n __MPN(xnor_n) */
+/* #define mpn_xor_n __MPN(xor_n) */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+DLL_IMPORT mp_limb_t mpn_add _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,mp_size_t));
+DLL_IMPORT mp_limb_t mpn_add_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+DLL_IMPORT mp_limb_t mpn_add_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+DLL_IMPORT mp_limb_t mpn_add_nc _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+DLL_IMPORT mp_limb_t mpn_addmul_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_addmul_1c __MPN(addmul_1c)
+DLL_IMPORT mp_limb_t mpn_addmul_1c _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+
+DLL_IMPORT mp_limb_t mpn_addsub_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+DLL_IMPORT mp_limb_t mpn_bdivmod _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, unsigned long int));
+DLL_IMPORT int mpn_cmp _PROTO ((mp_srcptr, mp_srcptr, mp_size_t));
+
+#define mpn_divexact_by3(dst, src, size) mpn_divexact_by3c (dst, src, size, 0)
+
+#define mpn_divexact_by3c __MPN(divexact_by3c)
+DLL_IMPORT mp_limb_t mpn_divexact_by3c _PROTO ((mp_ptr dst, mp_srcptr src,
+ mp_size_t size, mp_limb_t carry));
+
+#define mpn_divmod_1(qp,np,nsize,dlimb) mpn_divrem_1 (qp,0,np,nsize,dlimb)
+
+DLL_IMPORT mp_limb_t mpn_divrem _PROTO((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
+
+DLL_IMPORT mp_limb_t mpn_divrem_1 _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_divrem_1c __MPN(divrem_1c)
+DLL_IMPORT mp_limb_t mpn_divrem_1c _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t,
+ mp_limb_t, mp_limb_t));
+
+DLL_IMPORT mp_limb_t mpn_divrem_2 _PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
+DLL_IMPORT void mpn_dump _PROTO ((mp_srcptr, mp_size_t));
+mp_size_t mpn_gcd _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+DLL_IMPORT mp_limb_t mpn_gcd_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
+mp_size_t mpn_gcdext _PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+DLL_IMPORT size_t mpn_get_str _PROTO ((unsigned char *, int, mp_ptr, mp_size_t));
+DLL_IMPORT unsigned long int mpn_hamdist _PROTO ((mp_srcptr, mp_srcptr, mp_size_t));
+
+#define mpn_jacobi_base __MPN(jacobi_base)
+DLL_IMPORT int mpn_jacobi_base _PROTO ((mp_limb_t a, mp_limb_t b, int result_bit1));
+
+DLL_IMPORT mp_limb_t mpn_lshift _PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+DLL_IMPORT mp_limb_t mpn_mod_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_mod_1c __MPN(mod_1c)
+DLL_IMPORT mp_limb_t mpn_mod_1c _PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+
+#define mpn_mod_1_rshift __MPN(mod_1_rshift)
+DLL_IMPORT mp_limb_t mpn_mod_1_rshift _PROTO ((mp_srcptr, mp_size_t, unsigned,mp_limb_t));
+
+DLL_IMPORT mp_limb_t mpn_mul _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+DLL_IMPORT mp_limb_t mpn_mul_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_mul_1c __MPN(mul_1c)
+DLL_IMPORT mp_limb_t mpn_mul_1c _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+
+DLL_IMPORT void mpn_mul_basecase _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+DLL_IMPORT void mpn_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+DLL_IMPORT int mpn_perfect_square_p _PROTO ((mp_srcptr, mp_size_t));
+DLL_IMPORT unsigned long int mpn_popcount _PROTO ((mp_srcptr, mp_size_t));
+DLL_IMPORT mp_limb_t mpn_preinv_mod_1 _PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+DLL_IMPORT void mpn_random _PROTO ((mp_ptr, mp_size_t));
+DLL_IMPORT void mpn_random2 _PROTO ((mp_ptr, mp_size_t));
+DLL_IMPORT mp_limb_t mpn_rshift _PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+DLL_IMPORT unsigned long int mpn_scan0 _PROTO ((mp_srcptr, unsigned long int));
+DLL_IMPORT unsigned long int mpn_scan1 _PROTO ((mp_srcptr, unsigned long int));
+mp_size_t mpn_set_str _PROTO ((mp_ptr, __gmp_const unsigned char *, size_t, int));
+DLL_IMPORT void mpn_sqr_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+DLL_IMPORT void mpn_sqr_basecase _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+mp_size_t mpn_sqrtrem _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
+DLL_IMPORT mp_limb_t mpn_sub _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,mp_size_t));
+DLL_IMPORT mp_limb_t mpn_sub_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+DLL_IMPORT mp_limb_t mpn_sub_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+DLL_IMPORT mp_limb_t mpn_sub_nc _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+DLL_IMPORT mp_limb_t mpn_submul_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+
+#define mpn_submul_1c __MPN(submul_1c)
+DLL_IMPORT mp_limb_t mpn_submul_1c _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+
+#define mpn_tdiv_qr __MPN(tdiv_qr)
+DLL_IMPORT void mpn_tdiv_qr _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+
+#if defined (__cplusplus)
+}
+#endif
+
+#define mpn_incr_u(p,incr) \
+ do { mp_limb_t __x; mp_ptr __p = p; \
+ __x = *__p + incr; \
+ *__p = __x; \
+ if (__x < incr) \
+ while (++(*(++__p)) == 0) \
+ ; \
+ } while (0)
+
+#define mpn_decr_u(p,incr) \
+ do { mp_limb_t __x; mp_ptr __p = p; \
+ __x = *__p; \
+ *__p = __x - incr; \
+ if (__x < incr) \
+ while ((*(++__p))-- == 0) \
+ ; \
+ } while (0)
+
+#if defined (__GNUC__) || defined (_FORCE_INLINES)
+_EXTERN_INLINE mp_limb_t
+#if (__STDC__-0) || defined (__cplusplus)
+mpn_add_1 (register mp_ptr res_ptr,
+ register mp_srcptr s1_ptr,
+ register mp_size_t s1_size,
+ register mp_limb_t s2_limb)
+#else
+mpn_add_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ register mp_size_t s1_size;
+ register mp_limb_t s2_limb;
+#endif
+{
+ register mp_limb_t x;
+
+ x = *s1_ptr++;
+ s2_limb = x + s2_limb;
+ *res_ptr++ = s2_limb;
+ if (s2_limb < x)
+ {
+ while (--s1_size != 0)
+ {
+ x = *s1_ptr++ + 1;
+ *res_ptr++ = x;
+ if (x != 0)
+ goto fin;
+ }
+
+ return 1;
+ }
+
+ fin:
+ if (res_ptr != s1_ptr)
+ {
+ mp_size_t i;
+ for (i = 0; i < s1_size - 1; i++)
+ res_ptr[i] = s1_ptr[i];
+ }
+ return 0;
+}
+
+_EXTERN_INLINE mp_limb_t
+#if (__STDC__-0) || defined (__cplusplus)
+mpn_add (register mp_ptr res_ptr,
+ register mp_srcptr s1_ptr,
+ register mp_size_t s1_size,
+ register mp_srcptr s2_ptr,
+ register mp_size_t s2_size)
+#else
+mpn_add (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ register mp_size_t s1_size;
+ register mp_srcptr s2_ptr;
+ register mp_size_t s2_size;
+#endif
+{
+ mp_limb_t cy_limb = 0;
+
+ if (s2_size != 0)
+ cy_limb = mpn_add_n (res_ptr, s1_ptr, s2_ptr, s2_size);
+
+ if (s1_size - s2_size != 0)
+ cy_limb = mpn_add_1 (res_ptr + s2_size,
+ s1_ptr + s2_size,
+ s1_size - s2_size,
+ cy_limb);
+ return cy_limb;
+}
+
+_EXTERN_INLINE mp_limb_t
+#if (__STDC__-0) || defined (__cplusplus)
+mpn_sub_1 (register mp_ptr res_ptr,
+ register mp_srcptr s1_ptr,
+ register mp_size_t s1_size,
+ register mp_limb_t s2_limb)
+#else
+mpn_sub_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ register mp_size_t s1_size;
+ register mp_limb_t s2_limb;
+#endif
+{
+ register mp_limb_t x;
+
+ x = *s1_ptr++;
+ s2_limb = x - s2_limb;
+ *res_ptr++ = s2_limb;
+ if (s2_limb > x)
+ {
+ while (--s1_size != 0)
+ {
+ x = *s1_ptr++;
+ *res_ptr++ = x - 1;
+ if (x != 0)
+ goto fin;
+ }
+
+ return 1;
+ }
+
+ fin:
+ if (res_ptr != s1_ptr)
+ {
+ mp_size_t i;
+ for (i = 0; i < s1_size - 1; i++)
+ res_ptr[i] = s1_ptr[i];
+ }
+ return 0;
+}
+
+_EXTERN_INLINE mp_limb_t
+#if (__STDC__-0) || defined (__cplusplus)
+mpn_sub (register mp_ptr res_ptr,
+ register mp_srcptr s1_ptr,
+ register mp_size_t s1_size,
+ register mp_srcptr s2_ptr,
+ register mp_size_t s2_size)
+#else
+mpn_sub (res_ptr, s1_ptr, s1_size, s2_ptr, s2_size)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ register mp_size_t s1_size;
+ register mp_srcptr s2_ptr;
+ register mp_size_t s2_size;
+#endif
+{
+ mp_limb_t cy_limb = 0;
+
+ if (s2_size != 0)
+ cy_limb = mpn_sub_n (res_ptr, s1_ptr, s2_ptr, s2_size);
+
+ if (s1_size - s2_size != 0)
+ cy_limb = mpn_sub_1 (res_ptr + s2_size,
+ s1_ptr + s2_size,
+ s1_size - s2_size,
+ cy_limb);
+ return cy_limb;
+}
+#endif /* __GNUC__ */
+
+/* Allow faster testing for negative, zero, and positive. */
+#define mpz_sgn(Z) ((Z)->_mp_size < 0 ? -1 : (Z)->_mp_size > 0)
+#define mpf_sgn(F) ((F)->_mp_size < 0 ? -1 : (F)->_mp_size > 0)
+#define mpq_sgn(Q) ((Q)->_mp_num._mp_size < 0 ? -1 : (Q)->_mp_num._mp_size > 0)
+
+/* When using GCC, optimize certain common comparisons. */
+#if defined (__GNUC__)
+#define mpz_cmp_ui(Z,UI) \
+ (__builtin_constant_p (UI) && (UI) == 0 \
+ ? mpz_sgn (Z) : _mpz_cmp_ui (Z,UI))
+#define mpz_cmp_si(Z,SI) \
+ (__builtin_constant_p (SI) && (SI) == 0 ? mpz_sgn (Z) \
+ : __builtin_constant_p (SI) && (SI) > 0 \
+ ? _mpz_cmp_ui (Z, (unsigned long int) SI) \
+ : _mpz_cmp_si (Z,SI))
+#define mpq_cmp_ui(Q,NUI,DUI) \
+ (__builtin_constant_p (NUI) && (NUI) == 0 \
+ ? mpq_sgn (Q) : _mpq_cmp_ui (Q,NUI,DUI))
+#else
+#define mpz_cmp_ui(Z,UI) _mpz_cmp_ui (Z,UI)
+#define mpz_cmp_si(Z,UI) _mpz_cmp_si (Z,UI)
+#define mpq_cmp_ui(Q,NUI,DUI) _mpq_cmp_ui (Q,NUI,DUI)
+#endif
+
+
+/* Using "&" rather than "&&" means these can come out branch-free. Every
+ mpz_t has at least one limb allocated, so fetching the low limb is always
+ allowed. */
+#define mpz_odd_p(z) ((int) ((z)->_mp_size != 0) & (int) (z)->_mp_d[0])
+#define mpz_even_p(z) (! mpz_odd_p (z))
+
+
+/* Allow direct user access to numerator and denominator of a mpq_t object. */
+#define mpq_numref(Q) (&((Q)->_mp_num))
+#define mpq_denref(Q) (&((Q)->_mp_den))
+
+
+/* Compatibility with GMP 2 and earlier. */
+#define mpn_divmod(qp,np,nsize,dp,dsize) mpn_divrem (qp,0,np,nsize,dp,dsize)
+
+/* Compatibility with GMP 1. */
+#define mpz_mdiv mpz_fdiv_q
+#define mpz_mdivmod mpz_fdiv_qr
+#define mpz_mmod mpz_fdiv_r
+#define mpz_mdiv_ui mpz_fdiv_q_ui
+#define mpz_mdivmod_ui(q,r,n,d) \
+ ((r == 0) ? mpz_fdiv_q_ui (q,n,d) : mpz_fdiv_qr_ui (q,r,n,d))
+#define mpz_mmod_ui(r,n,d) \
+ ((r == 0) ? mpz_fdiv_ui (n,d) : mpz_fdiv_r_ui (r,n,d))
+
+/* Useful synonyms, but not quite compatible with GMP 1. */
+#define mpz_div mpz_fdiv_q
+#define mpz_divmod mpz_fdiv_qr
+#define mpz_div_ui mpz_fdiv_q_ui
+#define mpz_divmod_ui mpz_fdiv_qr_ui
+#define mpz_mod_ui mpz_fdiv_r_ui
+#define mpz_div_2exp mpz_fdiv_q_2exp
+#define mpz_mod_2exp mpz_fdiv_r_2exp
+
+#define gmp_errno __gmp_errno
+extern int gmp_errno;
+
+enum
+{
+ GMP_ERROR_NONE = 0,
+ GMP_ERROR_UNSUPPORTED_ARGUMENT = 1,
+ GMP_ERROR_DIVISION_BY_ZERO = 2,
+ GMP_ERROR_SQRT_OF_NEGATIVE = 4,
+ GMP_ERROR_INVALID_ARGUMENT = 8,
+ GMP_ERROR_ALLOCATE = 16,
+ GMP_ERROR_BAD_STRING = 32,
+ GMP_ERROR_UNUSED_ERROR
+};
+
+/* Note: major version number is in mp.h too */
+#define __GNU_MP_VERSION 3
+#define __GNU_MP_VERSION_MINOR 1
+#define __GNU_MP_VERSION_PATCHLEVEL 1
+
+#define gmp_version __gmp_version
+extern __gmp_const char *gmp_version;
+
+#define __GMP_H__
+#endif /* __GMP_H__ */
diff --git a/rts/gmp/insert-dbl.c b/rts/gmp/insert-dbl.c
new file mode 100644
index 0000000000..dc88a56f62
--- /dev/null
+++ b/rts/gmp/insert-dbl.c
@@ -0,0 +1,98 @@
+/* __gmp_insert_double -- convert from array of mp_limb_t to double.
+
+Copyright (C) 1996, 1997, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifdef XDEBUG
+#undef _GMP_IEEE_FLOATS
+#endif
+
+#ifndef _GMP_IEEE_FLOATS
+#define _GMP_IEEE_FLOATS 0
+#endif
+
+double
+#if __STDC__
+__gmp_scale2 (double d, int exp)
+#else
+__gmp_scale2 (d, exp)
+ double d;
+ int exp;
+#endif
+{
+#if _GMP_IEEE_FLOATS
+ {
+#if defined (__alpha) && __GNUC__ == 2 && __GNUC_MINOR__ == 8
+ /* Work around alpha-specific bug in GCC 2.8.x. */
+ volatile
+#endif
+ union ieee_double_extract x;
+ x.d = d;
+ exp += x.s.exp;
+ x.s.exp = exp;
+ if (exp >= 2047)
+ {
+ /* Return +-infinity */
+ x.s.exp = 2047;
+ x.s.manl = x.s.manh = 0;
+ }
+ else if (exp < 1)
+ {
+ x.s.exp = 1; /* smallest exponent (biased) */
+ /* Divide result by 2 until we have scaled it to the right IEEE
+ denormalized number, but stop if it becomes zero. */
+ while (exp < 1 && x.d != 0)
+ {
+ x.d *= 0.5;
+ exp++;
+ }
+ }
+ return x.d;
+ }
+#else
+ {
+ double factor, r;
+
+ factor = 2.0;
+ if (exp < 0)
+ {
+ factor = 0.5;
+ exp = -exp;
+ }
+ r = d;
+ if (exp != 0)
+ {
+ if ((exp & 1) != 0)
+ r *= factor;
+ exp >>= 1;
+ while (exp != 0)
+ {
+ factor *= factor;
+ if ((exp & 1) != 0)
+ r *= factor;
+ exp >>= 1;
+ }
+ }
+ return r;
+ }
+#endif
+}
diff --git a/rts/gmp/install-sh b/rts/gmp/install-sh
new file mode 100644
index 0000000000..e9de23842d
--- /dev/null
+++ b/rts/gmp/install-sh
@@ -0,0 +1,251 @@
+#!/bin/sh
+#
+# install - install a program, script, or datafile
+# This comes from X11R5 (mit/util/scripts/install.sh).
+#
+# Copyright 1991 by the Massachusetts Institute of Technology
+#
+# Permission to use, copy, modify, distribute, and sell this software and its
+# documentation for any purpose is hereby granted without fee, provided that
+# the above copyright notice appear in all copies and that both that
+# copyright notice and this permission notice appear in supporting
+# documentation, and that the name of M.I.T. not be used in advertising or
+# publicity pertaining to distribution of the software without specific,
+# written prior permission. M.I.T. makes no representations about the
+# suitability of this software for any purpose. It is provided "as is"
+# without express or implied warranty.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# `make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch. It can only install one file at a time, a restriction
+# shared with many OS's install programs.
+
+
+# set DOITPROG to echo to test this script
+
+# Don't use :- since 4.3BSD and earlier shells don't like it.
+doit="${DOITPROG-}"
+
+
+# put in absolute paths if you don't have them in your path; or use env. vars.
+
+mvprog="${MVPROG-mv}"
+cpprog="${CPPROG-cp}"
+chmodprog="${CHMODPROG-chmod}"
+chownprog="${CHOWNPROG-chown}"
+chgrpprog="${CHGRPPROG-chgrp}"
+stripprog="${STRIPPROG-strip}"
+rmprog="${RMPROG-rm}"
+mkdirprog="${MKDIRPROG-mkdir}"
+
+transformbasename=""
+transform_arg=""
+instcmd="$mvprog"
+chmodcmd="$chmodprog 0755"
+chowncmd=""
+chgrpcmd=""
+stripcmd=""
+rmcmd="$rmprog -f"
+mvcmd="$mvprog"
+src=""
+dst=""
+dir_arg=""
+
+while [ x"$1" != x ]; do
+ case $1 in
+ -c) instcmd="$cpprog"
+ shift
+ continue;;
+
+ -d) dir_arg=true
+ shift
+ continue;;
+
+ -m) chmodcmd="$chmodprog $2"
+ shift
+ shift
+ continue;;
+
+ -o) chowncmd="$chownprog $2"
+ shift
+ shift
+ continue;;
+
+ -g) chgrpcmd="$chgrpprog $2"
+ shift
+ shift
+ continue;;
+
+ -s) stripcmd="$stripprog"
+ shift
+ continue;;
+
+ -t=*) transformarg=`echo $1 | sed 's/-t=//'`
+ shift
+ continue;;
+
+ -b=*) transformbasename=`echo $1 | sed 's/-b=//'`
+ shift
+ continue;;
+
+ *) if [ x"$src" = x ]
+ then
+ src=$1
+ else
+ # this colon is to work around a 386BSD /bin/sh bug
+ :
+ dst=$1
+ fi
+ shift
+ continue;;
+ esac
+done
+
+if [ x"$src" = x ]
+then
+ echo "install: no input file specified"
+ exit 1
+else
+ true
+fi
+
+if [ x"$dir_arg" != x ]; then
+ dst=$src
+ src=""
+
+ if [ -d $dst ]; then
+ instcmd=:
+ chmodcmd=""
+ else
+ instcmd=mkdir
+ fi
+else
+
+# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
+# might cause directories to be created, which would be especially bad
+# if $src (and thus $dsttmp) contains '*'.
+
+ if [ -f $src -o -d $src ]
+ then
+ true
+ else
+ echo "install: $src does not exist"
+ exit 1
+ fi
+
+ if [ x"$dst" = x ]
+ then
+ echo "install: no destination specified"
+ exit 1
+ else
+ true
+ fi
+
+# If destination is a directory, append the input filename; if your system
+# does not like double slashes in filenames, you may need to add some logic
+
+ if [ -d $dst ]
+ then
+ dst="$dst"/`basename $src`
+ else
+ true
+ fi
+fi
+
+## this sed command emulates the dirname command
+dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
+
+# Make sure that the destination directory exists.
+# this part is taken from Noah Friedman's mkinstalldirs script
+
+# Skip lots of stat calls in the usual case.
+if [ ! -d "$dstdir" ]; then
+defaultIFS='
+'
+IFS="${IFS-${defaultIFS}}"
+
+oIFS="${IFS}"
+# Some sh's can't handle IFS=/ for some reason.
+IFS='%'
+set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'`
+IFS="${oIFS}"
+
+pathcomp=''
+
+while [ $# -ne 0 ] ; do
+ pathcomp="${pathcomp}${1}"
+ shift
+
+ if [ ! -d "${pathcomp}" ] ;
+ then
+ $mkdirprog "${pathcomp}"
+ else
+ true
+ fi
+
+ pathcomp="${pathcomp}/"
+done
+fi
+
+if [ x"$dir_arg" != x ]
+then
+ $doit $instcmd $dst &&
+
+ if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi &&
+ if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi &&
+ if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi &&
+ if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi
+else
+
+# If we're going to rename the final executable, determine the name now.
+
+ if [ x"$transformarg" = x ]
+ then
+ dstfile=`basename $dst`
+ else
+ dstfile=`basename $dst $transformbasename |
+ sed $transformarg`$transformbasename
+ fi
+
+# don't allow the sed command to completely eliminate the filename
+
+ if [ x"$dstfile" = x ]
+ then
+ dstfile=`basename $dst`
+ else
+ true
+ fi
+
+# Make a temp file name in the proper directory.
+
+ dsttmp=$dstdir/#inst.$$#
+
+# Move or copy the file name to the temp name
+
+ $doit $instcmd $src $dsttmp &&
+
+ trap "rm -f ${dsttmp}" 0 &&
+
+# and set any options; do chmod last to preserve setuid bits
+
+# If any of these fail, we abort the whole thing. If we want to
+# ignore errors from any of these, just make sure not to ignore
+# errors from the above "$doit $instcmd $src $dsttmp" command.
+
+ if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi &&
+ if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi &&
+ if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi &&
+ if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi &&
+
+# Now rename the file to the real destination.
+
+ $doit $rmcmd -f $dstdir/$dstfile &&
+ $doit $mvcmd $dsttmp $dstdir/$dstfile
+
+fi &&
+
+
+exit 0
diff --git a/rts/gmp/longlong.h b/rts/gmp/longlong.h
new file mode 100644
index 0000000000..9a12755053
--- /dev/null
+++ b/rts/gmp/longlong.h
@@ -0,0 +1,1347 @@
+/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
+
+Copyright (C) 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this file; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/* You have to define the following before including this file:
+
+ UWtype -- An unsigned type, default type for operations (typically a "word")
+ UHWtype -- An unsigned type, at least half the size of UWtype.
+ UDWtype -- An unsigned type, at least twice as large a UWtype
+ W_TYPE_SIZE -- size in bits of UWtype
+
+ SItype, USItype -- Signed and unsigned 32 bit types.
+ DItype, UDItype -- Signed and unsigned 64 bit types.
+
+ On a 32 bit machine UWtype should typically be USItype;
+ on a 64 bit machine, UWtype should typically be UDItype.
+*/
+
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+/* This is used to make sure no undesirable sharing between different libraries
+ that use this file takes place. */
+#ifndef __MPN
+#define __MPN(x) __##x
+#endif
+
+#ifndef _PROTO
+#if (__STDC__-0) || defined (__cplusplus)
+#define _PROTO(x) x
+#else
+#define _PROTO(x) ()
+#endif
+#endif
+
+/* Define auxiliary asm macros.
+
+ 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
+ UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
+ word product in HIGH_PROD and LOW_PROD.
+
+ 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
+ UDWtype product. This is just a variant of umul_ppmm.
+
+ 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ denominator) divides a UDWtype, composed by the UWtype integers
+ HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+ in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
+ than DENOMINATOR for correct operation. If, in addition, the most
+ significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+ UDIV_NEEDS_NORMALIZATION is defined to 1.
+
+ 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ denominator). Like udiv_qrnnd but the numbers are signed. The quotient
+ is rounded towards 0.
+
+ 5) count_leading_zeros(count, x) counts the number of zero-bits from the
+ msb to the first non-zero bit in the UWtype X. This is the number of
+ steps X needs to be shifted left to set the msb. Undefined for X == 0,
+ unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
+
+ 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
+ from the least significant end.
+
+ 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+ high_addend_2, low_addend_2) adds two UWtype integers, composed by
+ HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
+ respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
+ (i.e. carry out) is not stored anywhere, and is lost.
+
+ 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
+ high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
+ composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
+ LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
+ and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
+ and is lost.
+
+ If any of these macros are left undefined for a particular CPU,
+ C macros are used. */
+
+/* The CPUs come in alphabetical order below.
+
+ Please add support for more CPUs here, or improve the current support
+ for the CPUs below! */
+
+#if defined (__alpha) && W_TYPE_SIZE == 64
+#if defined (__GNUC__)
+#define umul_ppmm(ph, pl, m0, m1) \
+ do { \
+ UDItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("umulh %r1,%2,%0" \
+ : "=r" (ph) \
+ : "%rJ" (m0), "rI" (m1)); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#define UMUL_TIME 18
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { UDItype __di; \
+ __di = __MPN(invert_limb) (d); \
+ udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \
+ } while (0)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define UDIV_TIME 220
+long __MPN(count_leading_zeros) ();
+#define count_leading_zeros(count, x) \
+ ((count) = __MPN(count_leading_zeros) (x))
+#endif /* LONGLONG_STANDALONE */
+#else /* ! __GNUC__ */
+#include <machine/builtins.h>
+#define umul_ppmm(ph, pl, m0, m1) \
+ do { \
+ UDItype __m0 = (m0), __m1 = (m1); \
+ (ph) = __UMULH (m0, m1); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#endif
+#endif /* __alpha */
+
+#if defined (__hppa) && W_TYPE_SIZE == 64
+/* We put the result pointer parameter last here, since it makes passing
+ of the other parameters more efficient. */
+#ifndef LONGLONG_STANDALONE
+#define umul_ppmm(wh, wl, u, v) \
+ do { \
+ UDItype __p0; \
+ (wh) = __MPN(umul_ppmm) (u, v, &__p0); \
+ (wl) = __p0; \
+ } while (0)
+extern UDItype __MPN(umul_ppmm) _PROTO ((UDItype, UDItype, UDItype *));
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { UDItype __r; \
+ (q) = __MPN(udiv_qrnnd) (n1, n0, d, &__r); \
+ (r) = __r; \
+ } while (0)
+extern UDItype __MPN(udiv_qrnnd) _PROTO ((UDItype, UDItype, UDItype, UDItype *));
+#define UMUL_TIME 8
+#define UDIV_TIME 60
+#endif /* LONGLONG_STANDALONE */
+#endif /* hppa */
+
+#if defined (__ia64) && W_TYPE_SIZE == 64
+#if defined (__GNUC__)
+#define umul_ppmm(ph, pl, m0, m1) \
+ do { \
+ UDItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("xma.hu %0 = %1, %2, f0" \
+ : "=e" (ph) \
+ : "e" (m0), "e" (m1)); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#endif
+#endif
+
+
+#if defined (__GNUC__) && !defined (NO_ASM)
+
+/* We sometimes need to clobber "cc" with gcc2, but that would not be
+ understood by gcc1. Use cpp to avoid major code duplication. */
+#if __GNUC__ < 2
+#define __CLOBBER_CC
+#define __AND_CLOBBER_CC
+#else /* __GNUC__ >= 2 */
+#define __CLOBBER_CC : "cc"
+#define __AND_CLOBBER_CC , "cc"
+#endif /* __GNUC__ < 2 */
+
+#if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("add %1,%4,%5\n\taddc %0,%2,%3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "%r" (ah), "rI" (bh), "%r" (al), "rI" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("sub %1,%4,%5\n\tsubc %0,%2,%3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rI" (bh), "r" (al), "rI" (bl))
+#define umul_ppmm(xh, xl, m0, m1) \
+ do { \
+ USItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("multiplu %0,%1,%2" \
+ : "=r" (xl) \
+ : "r" (__m0), "r" (__m1)); \
+ __asm__ ("multmu %0,%1,%2" \
+ : "=r" (xh) \
+ : "r" (__m0), "r" (__m1)); \
+ } while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ __asm__ ("dividu %0,%3,%4" \
+ : "=r" (q), "=q" (r) \
+ : "1" (n1), "r" (n0), "r" (d))
+#define count_leading_zeros(count, x) \
+ __asm__ ("clz %0,%1" \
+ : "=r" (count) \
+ : "r" (x))
+#define COUNT_LEADING_ZEROS_0 32
+#endif /* __a29k__ */
+
+#if defined (__arm__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "%r" (ah), "rI" (bh), "%r" (al), "rI" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rI" (bh), "r" (al), "rI" (bl))
+#if 1 || defined (__arm_m__) /* `M' series has widening multiply support */
+#define umul_ppmm(xh, xl, a, b) \
+ __asm__ ("umull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
+#define smul_ppmm(xh, xl, a, b) \
+ __asm__ ("smull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
+#define UMUL_TIME 5
+#else
+#define umul_ppmm(xh, xl, a, b) \
+ __asm__ ("%@ Inlined umul_ppmm\n" \
+ "mov %|r0, %2, lsr #16\n" \
+ "mov %|r2, %3, lsr #16\n" \
+ "bic %|r1, %2, %|r0, lsl #16\n" \
+ "bic %|r2, %3, %|r2, lsl #16\n" \
+ "mul %1, %|r1, %|r2\n" \
+ "mul %|r2, %|r0, %|r2\n" \
+ "mul %|r1, %0, %|r1\n" \
+ "mul %0, %|r0, %0\n" \
+ "adds %|r1, %|r2, %|r1\n" \
+ "addcs %0, %0, #65536\n" \
+ "adds %1, %1, %|r1, lsl #16\n" \
+ "adc %0, %0, %|r1, lsr #16" \
+ : "=&r" (xh), "=r" (xl) \
+ : "r" (a), "r" (b) \
+ : "r0", "r1", "r2")
+#define UMUL_TIME 20
+#endif
+#define UDIV_TIME 100
+#endif /* __arm__ */
+
+#if defined (__clipper__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+ ({union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __x; \
+ __asm__ ("mulwux %2,%0" \
+ : "=r" (__x.__ll) \
+ : "%0" ((USItype)(u)), "r" ((USItype)(v))); \
+ (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#define smul_ppmm(w1, w0, u, v) \
+ ({union {DItype __ll; \
+ struct {SItype __l, __h;} __i; \
+ } __x; \
+ __asm__ ("mulwx %2,%0" \
+ : "=r" (__x.__ll) \
+ : "%0" ((SItype)(u)), "r" ((SItype)(v))); \
+ (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#define __umulsidi3(u, v) \
+ ({UDItype __w; \
+ __asm__ ("mulwux %2,%0" \
+ : "=r" (__w) : "%0" ((USItype)(u)), "r" ((USItype)(v))); \
+ __w; })
+#endif /* __clipper__ */
+
+/* Fujitsu vector computers. */
+#if defined (__uxp__) && W_TYPE_SIZE == 32
+#define umul_ppmm(ph, pl, u, v) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x; \
+ __asm__ ("mult.lu %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v));\
+ (ph) = __x.__i.__h; \
+ (pl) = __x.__i.__l; \
+ } while (0)
+#define smul_ppmm(ph, pl, u, v) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x; \
+ __asm__ ("mult.l %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v)); \
+ (ph) = __x.__i.__h; \
+ (pl) = __x.__i.__l; \
+ } while (0)
+#endif
+
+#if defined (__gmicro__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("add.w %5,%1\n\taddx %3,%0" \
+ : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
+ : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \
+ "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("sub.w %5,%1\n\tsubx %3,%0" \
+ : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
+ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
+ "1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+ __asm__ ("mulx %3,%0,%1" \
+ : "=g" ((USItype)(ph)), "=r" ((USItype)(pl)) \
+ : "%0" ((USItype)(m0)), "g" ((USItype)(m1)))
+#define udiv_qrnnd(q, r, nh, nl, d) \
+ __asm__ ("divx %4,%0,%1" \
+ : "=g" ((USItype)(q)), "=r" ((USItype)(r)) \
+ : "1" ((USItype)(nh)), "0" ((USItype)(nl)), "g" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+ __asm__ ("bsch/1 %1,%0" \
+ : "=g" (count) : "g" ((USItype)(x)), "0" ((USItype)0))
+#endif
+
+#if defined (__hppa) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \
+ : "=r" (sh), "=&r" (sl) \
+ : "%rM" (ah), "rM" (bh), "%rM" (al), "rM" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \
+ : "=r" (sh), "=&r" (sl) \
+ : "rM" (ah), "rM" (bh), "rM" (al), "rM" (bl))
+#if defined (_PA_RISC1_1)
+#define umul_ppmm(wh, wl, u, v) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x; \
+ __asm__ ("xmpyu %1,%2,%0" : "=*f" (__x.__ll) : "*f" (u), "*f" (v)); \
+ (wh) = __x.__i.__h; \
+ (wl) = __x.__i.__l; \
+ } while (0)
+#define UMUL_TIME 8
+#define UDIV_TIME 60
+#else
+#define UMUL_TIME 40
+#define UDIV_TIME 80
+#endif
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { USItype __r; \
+ (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \
+ (r) = __r; \
+ } while (0)
+extern USItype __MPN(udiv_qrnnd) _PROTO ((USItype *, USItype, USItype, USItype));
+#endif /* LONGLONG_STANDALONE */
+#define count_leading_zeros(count, x) \
+ do { \
+ USItype __tmp; \
+ __asm__ ( \
+ "ldi 2,%0\n" \
+ "extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \
+ "extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n" \
+ "ldo 16(%0),%0 ; Yes. Perform add.\n" \
+ "extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \
+ "extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n" \
+ "ldo 8(%0),%0 ; Yes. Perform add.\n" \
+ "extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \
+ "extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n" \
+ "ldo 4(%0),%0 ; Yes. Perform add.\n" \
+ "extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \
+ "extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n" \
+ "ldo 2(%0),%0 ; Yes. Perform add.\n" \
+ "extru %1,30,1,%1 ; Extract bit 1.\n" \
+ "sub %0,%1,%0 ; Subtract it.\n" \
+ : "=r" (count), "=r" (__tmp) : "1" (x)); \
+ } while (0)
+#endif /* hppa */
+
+#if (defined (__i370__) || defined (__mvs__)) && W_TYPE_SIZE == 32
+#define smul_ppmm(xh, xl, m0, m1) \
+ do { \
+ union {DItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x; \
+ __asm__ ("mr %0,%3" \
+ : "=r" (__x.__i.__h), "=r" (__x.__i.__l) \
+ : "%1" (m0), "r" (m1)); \
+ (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
+ } while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+ do { \
+ union {DItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x; \
+ __x.__i.__h = n1; __x.__i.__l = n0; \
+ __asm__ ("dr %0,%2" \
+ : "=r" (__x.__ll) \
+ : "0" (__x.__ll), "r" (d)); \
+ (q) = __x.__i.__l; (r) = __x.__i.__h; \
+ } while (0)
+#endif
+
+#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("addl %5,%1\n\tadcl %3,%0" \
+ : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
+ : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \
+ "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subl %5,%1\n\tsbbl %3,%0" \
+ : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
+ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
+ "1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("mull %3" \
+ : "=a" (w0), "=d" (w1) \
+ : "%0" ((USItype)(u)), "rm" ((USItype)(v)))
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ __asm__ ("divl %4" \
+ : "=a" (q), "=d" (r) \
+ : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "rm" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+ do { \
+ USItype __cbtmp; \
+ __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \
+ (count) = __cbtmp ^ 31; \
+ } while (0)
+#define count_trailing_zeros(count, x) \
+ __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)))
+#ifndef UMUL_TIME
+#define UMUL_TIME 10
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME 40
+#endif
+#endif /* 80x86 */
+
+#if defined (__i860__) && W_TYPE_SIZE == 32
+#define rshift_rhlc(r,h,l,c) \
+ __asm__ ("shr %3,r0,r0\;shrd %1,%2,%0" \
+ "=r" (r) : "r" (h), "r" (l), "rn" (c))
+#endif /* i860 */
+
+#if defined (__i960__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("cmpo 1,0\;addc %5,%4,%1\;addc %3,%2,%0" \
+ : "=r" (sh), "=&r" (sl) \
+ : "%dI" (ah), "dI" (bh), "%dI" (al), "dI" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("cmpo 0,0\;subc %5,%4,%1\;subc %3,%2,%0" \
+ : "=r" (sh), "=&r" (sl) \
+ : "dI" (ah), "dI" (bh), "dI" (al), "dI" (bl))
+#define umul_ppmm(w1, w0, u, v) \
+ ({union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __x; \
+ __asm__ ("emul %2,%1,%0" \
+ : "=d" (__x.__ll) : "%dI" (u), "dI" (v)); \
+ (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#define __umulsidi3(u, v) \
+ ({UDItype __w; \
+ __asm__ ("emul %2,%1,%0" : "=d" (__w) : "%dI" (u), "dI" (v)); \
+ __w; })
+#define udiv_qrnnd(q, r, nh, nl, d) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __nn; \
+ __nn.__i.__h = (nh); __nn.__i.__l = (nl); \
+ __asm__ ("ediv %d,%n,%0" \
+ : "=d" (__rq.__ll) : "dI" (__nn.__ll), "dI" (d)); \
+ (r) = __rq.__i.__l; (q) = __rq.__i.__h; \
+ } while (0)
+#define count_leading_zeros(count, x) \
+ do { \
+ USItype __cbtmp; \
+ __asm__ ("scanbit %1,%0" : "=r" (__cbtmp) : "r" (x)); \
+ (count) = __cbtmp ^ 31; \
+ } while (0)
+#define COUNT_LEADING_ZEROS_0 (-32) /* sic */
+#if defined (__i960mx) /* what is the proper symbol to test??? */
+#define rshift_rhlc(r,h,l,c) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __nn; \
+ __nn.__i.__h = (h); __nn.__i.__l = (l); \
+ __asm__ ("shre %2,%1,%0" : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
+ }
+#endif /* i960mx */
+#endif /* i960 */
+
+#if (defined (__mc68000__) || defined (__mc68020__) || defined(mc68020) \
+ || defined (__m68k__) || defined (__mc5200__) || defined (__mc5206e__) \
+ || defined (__mc5307__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \
+ : "=d" ((USItype)(sh)), "=&d" ((USItype)(sl)) \
+ : "%0" ((USItype)(ah)), "d" ((USItype)(bh)), \
+ "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \
+ : "=d" ((USItype)(sh)), "=&d" ((USItype)(sl)) \
+ : "0" ((USItype)(ah)), "d" ((USItype)(bh)), \
+ "1" ((USItype)(al)), "g" ((USItype)(bl)))
+/* The '020, '030, '040 and CPU32 have 32x32->64 and 64/32->32q-32r. */
+#if defined (__mc68020__) || defined(mc68020) \
+ || defined (__mc68030__) || defined (mc68030) \
+ || defined (__mc68040__) || defined (mc68040) \
+ || defined (__mc68332__) || defined (mc68332) \
+ || defined (__NeXT__)
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("mulu%.l %3,%1:%0" \
+ : "=d" ((USItype)(w0)), "=d" ((USItype)(w1)) \
+ : "%0" ((USItype)(u)), "dmi" ((USItype)(v)))
+#define UMUL_TIME 45
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ __asm__ ("divu%.l %4,%1:%0" \
+ : "=d" ((USItype)(q)), "=d" ((USItype)(r)) \
+ : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
+#define UDIV_TIME 90
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+ __asm__ ("divs%.l %4,%1:%0" \
+ : "=d" ((USItype)(q)), "=d" ((USItype)(r)) \
+ : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
+#else /* for other 68k family members use 16x16->32 multiplication */
+#define umul_ppmm(xh, xl, a, b) \
+ do { USItype __umul_tmp1, __umul_tmp2; \
+ __asm__ ("| Inlined umul_ppmm\n" \
+ "move%.l %5,%3\n" \
+ "move%.l %2,%0\n" \
+ "move%.w %3,%1\n" \
+ "swap %3\n" \
+ "swap %0\n" \
+ "mulu%.w %2,%1\n" \
+ "mulu%.w %3,%0\n" \
+ "mulu%.w %2,%3\n" \
+ "swap %2\n" \
+ "mulu%.w %5,%2\n" \
+ "add%.l %3,%2\n" \
+ "jcc 1f\n" \
+ "add%.l %#0x10000,%0\n" \
+"1: move%.l %2,%3\n" \
+ "clr%.w %2\n" \
+ "swap %2\n" \
+ "swap %3\n" \
+ "clr%.w %3\n" \
+ "add%.l %3,%1\n" \
+ "addx%.l %2,%0\n" \
+ "| End inlined umul_ppmm" \
+ : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
+ "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
+ : "%2" ((USItype)(a)), "d" ((USItype)(b))); \
+ } while (0)
+#define UMUL_TIME 100
+#define UDIV_TIME 400
+#endif /* not mc68020 */
+/* The '020, '030, '040 and '060 have bitfield insns. */
+#if defined (__mc68020__) || defined (mc68020) \
+ || defined (__mc68030__) || defined (mc68030) \
+ || defined (__mc68040__) || defined (mc68040) \
+ || defined (__mc68060__) || defined (mc68060) \
+ || defined (__NeXT__)
+#define count_leading_zeros(count, x) \
+ __asm__ ("bfffo %1{%b2:%b2},%0" \
+ : "=d" ((USItype) (count)) \
+ : "od" ((USItype) (x)), "n" (0))
+#define COUNT_LEADING_ZEROS_0 32
+#endif
+#endif /* mc68000 */
+
+#if defined (__m88000__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "%rJ" (ah), "rJ" (bh), "%rJ" (al), "rJ" (bl))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "rJ" (ah), "rJ" (bh), "rJ" (al), "rJ" (bl))
+#define count_leading_zeros(count, x) \
+ do { \
+ USItype __cbtmp; \
+ __asm__ ("ff1 %0,%1" : "=r" (__cbtmp) : "r" (x)); \
+ (count) = __cbtmp ^ 31; \
+ } while (0)
+#define COUNT_LEADING_ZEROS_0 63 /* sic */
+#if defined (__m88110__)
+#define umul_ppmm(wh, wl, u, v) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x; \
+ __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \
+ (wh) = __x.__i.__h; \
+ (wl) = __x.__i.__l; \
+ } while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ ({union {UDItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x, __q; \
+ __x.__i.__h = (n1); __x.__i.__l = (n0); \
+ __asm__ ("divu.d %0,%1,%2" \
+ : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
+ (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
+#define UMUL_TIME 5
+#define UDIV_TIME 25
+#else
+#define UMUL_TIME 17
+#define UDIV_TIME 150
+#endif /* __m88110__ */
+#endif /* __m88000__ */
+
+#if defined (__mips) && W_TYPE_SIZE == 32
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("multu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("multu %2,%3\n\tmflo %0\n\tmfhi %1" \
+ : "=d" (w0), "=d" (w1) : "d" (u), "d" (v))
+#endif
+#define UMUL_TIME 10
+#define UDIV_TIME 100
+#endif /* __mips */
+
+#if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("dmultu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("dmultu %2,%3\n\tmflo %0\n\tmfhi %1" \
+ : "=d" (w0), "=d" (w1) : "d" (u), "d" (v))
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 140
+#endif /* __mips */
+
+#if defined (__ns32000__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+ ({union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __x; \
+ __asm__ ("meid %2,%0" \
+ : "=g" (__x.__ll) \
+ : "%0" ((USItype)(u)), "g" ((USItype)(v))); \
+ (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#define __umulsidi3(u, v) \
+ ({UDItype __w; \
+ __asm__ ("meid %2,%0" \
+ : "=g" (__w) \
+ : "%0" ((USItype)(u)), "g" ((USItype)(v))); \
+ __w; })
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ ({union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __x; \
+ __x.__i.__h = (n1); __x.__i.__l = (n0); \
+ __asm__ ("deid %2,%0" \
+ : "=g" (__x.__ll) \
+ : "0" (__x.__ll), "g" ((USItype)(d))); \
+ (r) = __x.__i.__l; (q) = __x.__i.__h; })
+#define count_trailing_zeros(count,x) \
+ do { \
+ __asm__ ("ffsd %2,%0" \
+ : "=r" ((USItype) (count)) \
+ : "0" ((USItype) 0), "r" ((USItype) (x))); \
+ } while (0)
+#endif /* __ns32000__ */
+
+/* We should test _IBMR2 here when we add assembly support for the system
+ vendor compilers. */
+#if (defined (_ARCH_PPC) || defined (_ARCH_PWR) || defined (__powerpc__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ do { \
+ if (__builtin_constant_p (bh) && (bh) == 0) \
+ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "%r" (ah), "%r" (al), "rI" (bl));\
+ else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
+ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "%r" (ah), "%r" (al), "rI" (bl));\
+ else \
+ __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
+ } while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ do { \
+ if (__builtin_constant_p (ah) && (ah) == 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+ else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+ else if (__builtin_constant_p (bh) && (bh) == 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+ else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+ else \
+ __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
+ } while (0)
+#define count_leading_zeros(count, x) \
+ __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
+#define COUNT_LEADING_ZEROS_0 32
+#if defined (_ARCH_PPC) || defined (__powerpc__)
+#define umul_ppmm(ph, pl, m0, m1) \
+ do { \
+ USItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+ do { \
+ SItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#define SMUL_TIME 14
+#define UDIV_TIME 120
+#else
+#define UMUL_TIME 8
+#define smul_ppmm(xh, xl, m0, m1) \
+ __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
+#define SMUL_TIME 4
+#define sdiv_qrnnd(q, r, nh, nl, d) \
+ __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
+#define UDIV_TIME 100
+#endif
+#endif /* 32-bit POWER architecture variants. */
+
+/* We should test _IBMR2 here when we add assembly support for the system
+ vendor compilers. */
+#if (defined (_ARCH_PPC) || defined (__powerpc__)) && W_TYPE_SIZE == 64
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ do { \
+ if (__builtin_constant_p (bh) && (bh) == 0) \
+ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "%r" (ah), "%r" (al), "rI" (bl));\
+ else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
+ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "%r" (ah), "%r" (al), "rI" (bl));\
+ else \
+ __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
+ } while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ do { \
+ if (__builtin_constant_p (ah) && (ah) == 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+ else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+ else if (__builtin_constant_p (bh) && (bh) == 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+ else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+ else \
+ __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
+ } while (0)
+#define count_leading_zeros(count, x) \
+ __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
+#define COUNT_LEADING_ZEROS_0 64
+#define umul_ppmm(ph, pl, m0, m1) \
+ do { \
+ UDItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+ do { \
+ DItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#define SMUL_TIME 14 /* ??? */
+#define UDIV_TIME 120 /* ??? */
+#endif /* 64-bit PowerPC. */
+
+#if defined (__pyr__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("addw %5,%1\n\taddwc %3,%0" \
+ : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
+ : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \
+ "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subw %5,%1\n\tsubwb %3,%0" \
+ : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
+ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
+ "1" ((USItype)(al)), "g" ((USItype)(bl)))
+/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */
+#define umul_ppmm(w1, w0, u, v) \
+ ({union {UDItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x; \
+ __asm__ ("movw %1,%R0\n\tuemul %2,%0" \
+ : "=&r" (__x.__ll) \
+ : "g" ((USItype) (u)), "g" ((USItype)(v))); \
+ (w1) = __x.__i.__h; (w0) = __x.__i.__l;})
+#endif /* __pyr__ */
+
+#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("a %1,%5\n\tae %0,%3" \
+ : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
+ : "%0" ((USItype)(ah)), "r" ((USItype)(bh)), \
+ "%1" ((USItype)(al)), "r" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("s %1,%5\n\tse %0,%3" \
+ : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
+ : "0" ((USItype)(ah)), "r" ((USItype)(bh)), \
+ "1" ((USItype)(al)), "r" ((USItype)(bl)))
+#define smul_ppmm(ph, pl, m0, m1) \
+ __asm__ ( \
+ "s r2,r2\n" \
+ "mts r10,%2\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "m r2,%3\n" \
+ "cas %0,r2,r0\n" \
+ "mfs r10,%1" \
+ : "=r" ((USItype)(ph)), "=r" ((USItype)(pl)) \
+ : "%r" ((USItype)(m0)), "r" ((USItype)(m1)) \
+ : "r2"); \
+#define UMUL_TIME 20
+#define UDIV_TIME 200
+#define count_leading_zeros(count, x) \
+ do { \
+ if ((x) >= 0x10000) \
+ __asm__ ("clz %0,%1" \
+ : "=r" ((USItype)(count)) : "r" ((USItype)(x) >> 16)); \
+ else \
+ { \
+ __asm__ ("clz %0,%1" \
+ : "=r" ((USItype)(count)) : "r" ((USItype)(x))); \
+ (count) += 16; \
+ } \
+ } while (0)
+#endif /* RT/ROMP */
+
+#if defined (__sh2__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0" \
+ : "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "macl", "mach")
+#define UMUL_TIME 5
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \
+ : "=r" (sh), "=&r" (sl) \
+ : "%rJ" (ah), "rI" (bh),"%rJ" (al), "rI" (bl) \
+ __CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \
+ : "=r" (sh), "=&r" (sl) \
+ : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl) \
+ __CLOBBER_CC)
+#if defined (__sparc_v9__) || defined (__sparcv9)
+/* Perhaps we should use floating-point operations here? */
+#if 0
+/* Triggers a bug making mpz/tests/t-gcd.c fail.
+ Perhaps we simply need explicitly zero-extend the inputs? */
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("mulx %2,%3,%%g1; srl %%g1,0,%1; srlx %%g1,32,%0" : \
+ "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "g1")
+#else
+/* Use v8 umul until above bug is fixed. */
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
+#endif
+/* Use a plain v8 divide for v9. */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { \
+ USItype __q; \
+ __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
+ : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \
+ (r) = (n0) - __q * (d); \
+ (q) = __q; \
+ } while (0)
+#else
+#if defined (__sparc_v8__)
+/* Don't match immediate range because, 1) it is not often useful,
+ 2) the 'I' flag thinks of the range as a 13 bit signed interval,
+ while we want to match a 13 bit interval, sign extended to 32 bits,
+ but INTERPRETED AS UNSIGNED. */
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
+#define UMUL_TIME 5
+#ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { \
+ USItype __q; \
+ __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
+ : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \
+ (r) = (n0) - __q * (d); \
+ (q) = __q; \
+ } while (0)
+#define UDIV_TIME 25
+#else
+#define UDIV_TIME 60 /* SuperSPARC timing */
+#endif /* SUPERSPARC */
+#else /* ! __sparc_v8__ */
+#if defined (__sparclite__)
+/* This has hardware multiply but not divide. It also has two additional
+ instructions scan (ffs from high bit) and divscc. */
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
+#define UMUL_TIME 5
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ __asm__ ("! Inlined udiv_qrnnd\n" \
+ "wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
+ "tst %%g0\n" \
+ "divscc %3,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%%g1\n" \
+ "divscc %%g1,%4,%0\n" \
+ "rd %%y,%1\n" \
+ "bl,a 1f\n" \
+ "add %1,%4,%1\n" \
+"1: ! End of inline udiv_qrnnd" \
+ : "=r" (q), "=r" (r) : "r" (n1), "r" (n0), "rI" (d) \
+ : "%g1" __AND_CLOBBER_CC)
+#define UDIV_TIME 37
+#define count_leading_zeros(count, x) \
+ __asm__ ("scan %1,0,%0" : "=r" (x) : "r" (count))
+/* Early sparclites return 63 for an argument of 0, but they warn that future
+ implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0
+ undefined. */
+#endif /* __sparclite__ */
+#endif /* __sparc_v8__ */
+#endif /* __sparc_v9__ */
+/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */
+#ifndef umul_ppmm
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("! Inlined umul_ppmm\n" \
+ "wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \
+ "sra %3,31,%%g2 ! Don't move this insn\n" \
+ "and %2,%%g2,%%g2 ! Don't move this insn\n" \
+ "andcc %%g0,0,%%g1 ! Don't move this insn\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,%3,%%g1\n" \
+ "mulscc %%g1,0,%%g1\n" \
+ "add %%g1,%%g2,%0\n" \
+ "rd %%y,%1" \
+ : "=r" (w1), "=r" (w0) : "%rI" (u), "r" (v) \
+ : "%g1", "%g2" __AND_CLOBBER_CC)
+#define UMUL_TIME 39 /* 39 instructions */
+#endif
+#ifndef udiv_qrnnd
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { USItype __r; \
+ (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \
+ (r) = __r; \
+ } while (0)
+extern USItype __MPN(udiv_qrnnd) _PROTO ((USItype *, USItype, USItype, USItype));
+#ifndef UDIV_TIME
+#define UDIV_TIME 140
+#endif
+#endif /* LONGLONG_STANDALONE */
+#endif /* udiv_qrnnd */
+#endif /* __sparc__ */
+
+#if defined (__vax__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \
+ : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
+ : "%0" ((USItype)(ah)), "g" ((USItype)(bh)), \
+ "%1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \
+ : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
+ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
+ "1" ((USItype)(al)), "g" ((USItype)(bl)))
+#define smul_ppmm(xh, xl, m0, m1) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __x; \
+ USItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("emul %1,%2,$0,%0" \
+ : "=g" (__x.__ll) : "g" (__m0), "g" (__m1)); \
+ (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
+ } while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+ do { \
+ union {DItype __ll; \
+ struct {SItype __l, __h;} __i; \
+ } __x; \
+ __x.__i.__h = n1; __x.__i.__l = n0; \
+ __asm__ ("ediv %3,%2,%0,%1" \
+ : "=g" (q), "=g" (r) : "g" (__x.__ll), "g" (d)); \
+ } while (0)
+#endif /* __vax__ */
+
+#if defined (__z8000__) && W_TYPE_SIZE == 16
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
+ : "=r" ((unsigned int)(sh)), "=&r" ((unsigned int)(sl)) \
+ : "%0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \
+ "%1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
+ : "=r" ((unsigned int)(sh)), "=&r" ((unsigned int)(sl)) \
+ : "0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \
+ "1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+ do { \
+ union {long int __ll; \
+ struct {unsigned int __h, __l;} __i; \
+ } __x; \
+ unsigned int __m0 = (m0), __m1 = (m1); \
+ __asm__ ("mult %S0,%H3" \
+ : "=r" (__x.__i.__h), "=r" (__x.__i.__l) \
+ : "%1" (m0), "rQR" (m1)); \
+ (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
+ (xh) += ((((signed int) __m0 >> 15) & __m1) \
+ + (((signed int) __m1 >> 15) & __m0)); \
+ } while (0)
+#endif /* __z8000__ */
+
+#endif /* __GNUC__ */
+
+
+#if !defined (umul_ppmm) && defined (__umulsidi3)
+#define umul_ppmm(ph, pl, m0, m1) \
+ { \
+ UDWtype __ll = __umulsidi3 (m0, m1); \
+ ph = (UWtype) (__ll >> W_TYPE_SIZE); \
+ pl = (UWtype) __ll; \
+ }
+#endif
+
+#if !defined (__umulsidi3)
+#define __umulsidi3(u, v) \
+ ({UWtype __hi, __lo; \
+ umul_ppmm (__hi, __lo, u, v); \
+ ((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
+#endif
+
+
+/* Note the prototypes are under !define(umul_ppmm) etc too, since the HPPA
+ versions above are different and we don't want to conflict. */
+
+#if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm
+#define mpn_umul_ppmm __MPN(umul_ppmm)
+extern mp_limb_t mpn_umul_ppmm _PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t));
+#define umul_ppmm(wh, wl, u, v) \
+ do { \
+ mp_limb_t __umul_ppmm__p0; \
+ (wh) = __MPN(umul_ppmm) (&__umul_ppmm__p0, \
+ (mp_limb_t) (u), (mp_limb_t) (v)); \
+ (wl) = __umul_ppmm__p0; \
+ } while (0)
+#endif
+
+#if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd
+#define mpn_udiv_qrnnd __MPN(udiv_qrnnd)
+extern mp_limb_t mpn_udiv_qrnnd _PROTO ((mp_limb_t *,
+ mp_limb_t, mp_limb_t, mp_limb_t));
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { \
+ mp_limb_t __udiv_qrnnd__r; \
+ (q) = mpn_udiv_qrnnd (&__udiv_qrnnd__r, \
+ (mp_limb_t) (n1), (mp_limb_t) (n0), (mp_limb_t) d); \
+ (r) = __udiv_qrnnd__r; \
+ } while (0)
+#endif
+
+
+/* If this machine has no inline assembler, use C macros. */
+
+#if !defined (add_ssaaaa)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ do { \
+ UWtype __x; \
+ __x = (al) + (bl); \
+ (sh) = (ah) + (bh) + (__x < (al)); \
+ (sl) = __x; \
+ } while (0)
+#endif
+
+#if !defined (sub_ddmmss)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ do { \
+ UWtype __x; \
+ __x = (al) - (bl); \
+ (sh) = (ah) - (bh) - (__x > (al)); \
+ (sl) = __x; \
+ } while (0)
+#endif
+
+/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
+ smul_ppmm. */
+#if !defined (umul_ppmm) && defined (smul_ppmm)
+#define umul_ppmm(w1, w0, u, v) \
+ do { \
+ UWtype __w1; \
+ UWtype __xm0 = (u), __xm1 = (v); \
+ smul_ppmm (__w1, w0, __xm0, __xm1); \
+ (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
+ + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
+ } while (0)
+#endif
+
+/* If we still don't have umul_ppmm, define it using plain C. */
+#if !defined (umul_ppmm)
+#define umul_ppmm(w1, w0, u, v) \
+ do { \
+ UWtype __x0, __x1, __x2, __x3; \
+ UHWtype __ul, __vl, __uh, __vh; \
+ UWtype __u = (u), __v = (v); \
+ \
+ __ul = __ll_lowpart (__u); \
+ __uh = __ll_highpart (__u); \
+ __vl = __ll_lowpart (__v); \
+ __vh = __ll_highpart (__v); \
+ \
+ __x0 = (UWtype) __ul * __vl; \
+ __x1 = (UWtype) __ul * __vh; \
+ __x2 = (UWtype) __uh * __vl; \
+ __x3 = (UWtype) __uh * __vh; \
+ \
+ __x1 += __ll_highpart (__x0);/* this can't give carry */ \
+ __x1 += __x2; /* but this indeed can */ \
+ if (__x1 < __x2) /* did we get it? */ \
+ __x3 += __ll_B; /* yes, add it in the proper pos. */ \
+ \
+ (w1) = __x3 + __ll_highpart (__x1); \
+ (w0) = (__x1 << W_TYPE_SIZE/2) + __ll_lowpart (__x0); \
+ } while (0)
+#endif
+
+/* If we don't have smul_ppmm, define it using umul_ppmm (which surely will
+ exist in one form or another. */
+#if !defined (smul_ppmm)
+#define smul_ppmm(w1, w0, u, v) \
+ do { \
+ UWtype __w1; \
+ UWtype __xm0 = (u), __xm1 = (v); \
+ umul_ppmm (__w1, w0, __xm0, __xm1); \
+ (w1) = __w1 - (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
+ - (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
+ } while (0)
+#endif
+
+/* Define this unconditionally, so it can be used for debugging. */
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+ do { \
+ UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
+ __d1 = __ll_highpart (d); \
+ __d0 = __ll_lowpart (d); \
+ \
+ __q1 = (n1) / __d1; \
+ __r1 = (n1) - __q1 * __d1; \
+ __m = (UWtype) __q1 * __d0; \
+ __r1 = __r1 * __ll_B | __ll_highpart (n0); \
+ if (__r1 < __m) \
+ { \
+ __q1--, __r1 += (d); \
+ if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
+ if (__r1 < __m) \
+ __q1--, __r1 += (d); \
+ } \
+ __r1 -= __m; \
+ \
+ __q0 = __r1 / __d1; \
+ __r0 = __r1 - __q0 * __d1; \
+ __m = (UWtype) __q0 * __d0; \
+ __r0 = __r0 * __ll_B | __ll_lowpart (n0); \
+ if (__r0 < __m) \
+ { \
+ __q0--, __r0 += (d); \
+ if (__r0 >= (d)) \
+ if (__r0 < __m) \
+ __q0--, __r0 += (d); \
+ } \
+ __r0 -= __m; \
+ \
+ (q) = (UWtype) __q1 * __ll_B | __q0; \
+ (r) = __r0; \
+ } while (0)
+
+/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
+ __udiv_w_sdiv (defined in libgcc or elsewhere). */
+#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
+#define udiv_qrnnd(q, r, nh, nl, d) \
+ do { \
+ UWtype __r; \
+ (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
+ (r) = __r; \
+ } while (0)
+#endif
+
+/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
+#if !defined (udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#endif
+
+#if !defined (count_leading_zeros)
+extern
+#if __STDC__
+const
+#endif
+unsigned char __clz_tab[];
+#define count_leading_zeros(count, x) \
+ do { \
+ UWtype __xr = (x); \
+ UWtype __a; \
+ \
+ if (W_TYPE_SIZE <= 32) \
+ { \
+ __a = __xr < ((UWtype) 1 << 2*__BITS4) \
+ ? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \
+ : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 : 3*__BITS4);\
+ } \
+ else \
+ { \
+ for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
+ if (((__xr >> __a) & 0xff) != 0) \
+ break; \
+ } \
+ \
+ (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
+ } while (0)
+/* This version gives a well-defined value for zero. */
+#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
+#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#endif
+
+#if !defined (count_trailing_zeros)
+/* Define count_trailing_zeros using count_leading_zeros. The latter might be
+ defined in asm, but if it is not, the C version above is good enough. */
+#define count_trailing_zeros(count, x) \
+ do { \
+ UWtype __ctz_x = (x); \
+ UWtype __ctz_c; \
+ count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \
+ (count) = W_TYPE_SIZE - 1 - __ctz_c; \
+ } while (0)
+#endif
+
+#ifndef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 0
+#endif
+
+/* Give defaults for UMUL_TIME and UDIV_TIME. */
+#ifndef UMUL_TIME
+#define UMUL_TIME 1
+#endif
+
+#ifndef UDIV_TIME
+#define UDIV_TIME UMUL_TIME
+#endif
+
+/* count_trailing_zeros is often on the slow side, so make that the default */
+#ifndef COUNT_TRAILING_ZEROS_TIME
+#define COUNT_TRAILING_ZEROS_TIME 15 /* cycles */
+#endif
+
+
diff --git a/rts/gmp/ltconfig b/rts/gmp/ltconfig
new file mode 100644
index 0000000000..6d8cf33e8f
--- /dev/null
+++ b/rts/gmp/ltconfig
@@ -0,0 +1,3109 @@
+#! /bin/sh
+
+# ltconfig - Create a system-specific libtool.
+# Copyright (C) 1996-2000 Free Software Foundation, Inc.
+# Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# A lot of this script is taken from autoconf-2.10.
+
+# Check that we are running under the correct shell.
+SHELL=${CONFIG_SHELL-/bin/sh}
+echo=echo
+if test "X$1" = X--no-reexec; then
+ # Discard the --no-reexec flag, and continue.
+ shift
+elif test "X$1" = X--fallback-echo; then
+ # Avoid inline document here, it may be left over
+ :
+elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then
+ # Yippee, $echo works!
+ :
+else
+ # Restart under the correct shell.
+ exec "$SHELL" "$0" --no-reexec ${1+"$@"}
+fi
+
+if test "X$1" = X--fallback-echo; then
+ # used as fallback echo
+ shift
+ cat <<EOF
+$*
+EOF
+ exit 0
+fi
+
+# Find the correct PATH separator. Usually this is `:', but
+# DJGPP uses `;' like DOS.
+if test "X${PATH_SEPARATOR+set}" != Xset; then
+ UNAME=${UNAME-`uname 2>/dev/null`}
+ case X$UNAME in
+ *-DOS) PATH_SEPARATOR=';' ;;
+ *) PATH_SEPARATOR=':' ;;
+ esac
+fi
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+if test "X${CDPATH+set}" = Xset; then CDPATH=:; export CDPATH; fi
+
+if test "X${echo_test_string+set}" != Xset; then
+ # find a string as large as possible, as long as the shell can cope with it
+ for cmd in 'sed 50q "$0"' 'sed 20q "$0"' 'sed 10q "$0"' 'sed 2q "$0"' 'echo test'; do
+ # expected sizes: less than 2Kb, 1Kb, 512 bytes, 16 bytes, ...
+ if (echo_test_string="`eval $cmd`") 2>/dev/null &&
+ echo_test_string="`eval $cmd`" &&
+ (test "X$echo_test_string" = "X$echo_test_string") 2>/dev/null; then
+ break
+ fi
+ done
+fi
+
+if test "X`($echo '\t') 2>/dev/null`" = 'X\t' &&
+ echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` &&
+ test "X$echo_testing_string" = "X$echo_test_string"; then
+ :
+else
+ # The Solaris, AIX, and Digital Unix default echo programs unquote
+ # backslashes. This makes it impossible to quote backslashes using
+ # echo "$something" | sed 's/\\/\\\\/g'
+ #
+ # So, first we look for a working echo in the user's PATH.
+
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR}"
+ for dir in $PATH /usr/ucb; do
+ if (test -f $dir/echo || test -f $dir/echo$ac_exeext) &&
+ test "X`($dir/echo '\t') 2>/dev/null`" = 'X\t' &&
+ echo_testing_string=`($dir/echo "$echo_test_string") 2>/dev/null` &&
+ test "X$echo_testing_string" = "X$echo_test_string"; then
+ echo="$dir/echo"
+ break
+ fi
+ done
+ IFS="$save_ifs"
+
+ if test "X$echo" = Xecho; then
+ # We didn't find a better echo, so look for alternatives.
+ if test "X`(print -r '\t') 2>/dev/null`" = 'X\t' &&
+ echo_testing_string=`(print -r "$echo_test_string") 2>/dev/null` &&
+ test "X$echo_testing_string" = "X$echo_test_string"; then
+ # This shell has a builtin print -r that does the trick.
+ echo='print -r'
+ elif (test -f /bin/ksh || test -f /bin/ksh$ac_exeext) &&
+ test "X$CONFIG_SHELL" != X/bin/ksh; then
+ # If we have ksh, try running ltconfig again with it.
+ ORIGINAL_CONFIG_SHELL="${CONFIG_SHELL-/bin/sh}"
+ export ORIGINAL_CONFIG_SHELL
+ CONFIG_SHELL=/bin/ksh
+ export CONFIG_SHELL
+ exec "$CONFIG_SHELL" "$0" --no-reexec ${1+"$@"}
+ else
+ # Try using printf.
+ echo='printf "%s\n"'
+ if test "X`($echo '\t') 2>/dev/null`" = 'X\t' &&
+ echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` &&
+ test "X$echo_testing_string" = "X$echo_test_string"; then
+ # Cool, printf works
+ :
+ elif echo_testing_string=`("$ORIGINAL_CONFIG_SHELL" "$0" --fallback-echo '\t') 2>/dev/null` &&
+ test "X$echo_testing_string" = 'X\t' &&
+ echo_testing_string=`("$ORIGINAL_CONFIG_SHELL" "$0" --fallback-echo "$echo_test_string") 2>/dev/null` &&
+ test "X$echo_testing_string" = "X$echo_test_string"; then
+ CONFIG_SHELL="$ORIGINAL_CONFIG_SHELL"
+ export CONFIG_SHELL
+ SHELL="$CONFIG_SHELL"
+ export SHELL
+ echo="$CONFIG_SHELL $0 --fallback-echo"
+ elif echo_testing_string=`("$CONFIG_SHELL" "$0" --fallback-echo '\t') 2>/dev/null` &&
+ test "X$echo_testing_string" = 'X\t' &&
+ echo_testing_string=`("$CONFIG_SHELL" "$0" --fallback-echo "$echo_test_string") 2>/dev/null` &&
+ test "X$echo_testing_string" = "X$echo_test_string"; then
+ echo="$CONFIG_SHELL $0 --fallback-echo"
+ else
+ # maybe with a smaller string...
+ prev=:
+
+ for cmd in 'echo test' 'sed 2q "$0"' 'sed 10q "$0"' 'sed 20q "$0"' 'sed 50q "$0"'; do
+ if (test "X$echo_test_string" = "X`eval $cmd`") 2>/dev/null; then
+ break
+ fi
+ prev="$cmd"
+ done
+
+ if test "$prev" != 'sed 50q "$0"'; then
+ echo_test_string=`eval $prev`
+ export echo_test_string
+ exec "${ORIGINAL_CONFIG_SHELL}" "$0" ${1+"$@"}
+ else
+ # Oops. We lost completely, so just stick with echo.
+ echo=echo
+ fi
+ fi
+ fi
+ fi
+fi
+
+# Sed substitution that helps us do robust quoting. It backslashifies
+# metacharacters that are still active within double-quoted strings.
+Xsed='sed -e s/^X//'
+sed_quote_subst='s/\([\\"\\`$\\\\]\)/\\\1/g'
+
+# Same as above, but do not quote variable references.
+double_quote_subst='s/\([\\"\\`\\\\]\)/\\\1/g'
+
+# Sed substitution to delay expansion of an escaped shell variable in a
+# double_quote_subst'ed string.
+delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g'
+
+# The name of this program.
+progname=`$echo "X$0" | $Xsed -e 's%^.*/%%'`
+
+# Constants:
+PROGRAM=ltconfig
+PACKAGE=libtool
+VERSION=1.3c
+TIMESTAMP=" (1.696 2000/03/14 20:22:42)"
+ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
+ac_link='${CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+rm="rm -f"
+
+help="Try \`$progname --help' for more information."
+
+# Global variables:
+default_ofile=libtool
+can_build_shared=yes
+enable_shared=yes
+# All known linkers require a `.a' archive for static linking (except M$VC,
+# which needs '.lib').
+enable_static=yes
+enable_fast_install=yes
+enable_dlopen=unknown
+enable_win32_dll=no
+pic_mode=default
+ltmain=
+silent=
+srcdir=
+ac_config_guess=
+ac_config_sub=
+host=
+build=NONE
+nonopt=NONE
+ofile="$default_ofile"
+verify_host=yes
+with_gcc=no
+with_gnu_ld=no
+need_locks=yes
+ac_ext=c
+libext=a
+cache_file=
+
+old_AR="$AR"
+old_CC="$CC"
+old_CFLAGS="$CFLAGS"
+old_CPPFLAGS="$CPPFLAGS"
+old_LDFLAGS="$LDFLAGS"
+old_LIBS="$LIBS"
+old_MAGIC="$MAGIC"
+old_LD="$LD"
+old_LN_S="$LN_S"
+old_NM="$NM"
+old_RANLIB="$RANLIB"
+old_STRIP="$STRIP"
+old_AS="$AS"
+old_DLLTOOL="$DLLTOOL"
+old_OBJDUMP="$OBJDUMP"
+old_OBJEXT="$OBJEXT"
+old_EXEEXT="$EXEEXT"
+old_reload_Flag="$reload_flag"
+old_deplibs_check_method="$deplibs_check_method"
+old_file_magic_cmd="$file_magic_cmd"
+
+# Parse the command line options.
+args=
+prev=
+for option
+do
+ case "$option" in
+ -*=*) optarg=`echo "$option" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
+ *) optarg= ;;
+ esac
+
+ # If the previous option needs an argument, assign it.
+ if test -n "$prev"; then
+ eval "$prev=\$option"
+ prev=
+ continue
+ fi
+
+ case "$option" in
+ --help) cat <<EOM
+Usage: $progname [OPTION]... LTMAIN [HOST]
+
+Generate a system-specific libtool script.
+
+ --build configure for building on BUILD [BUILD=HOST]
+ --debug enable verbose shell tracing
+ --disable-shared do not build shared libraries
+ --disable-static do not build static libraries
+ --disable-fast-install do not optimize for fast installation
+ --enable-dlopen enable dlopen support
+ --enable-win32-dll enable building dlls on win32 hosts
+ --help display this help and exit
+ --no-verify do not verify that HOST is a valid host type
+-o, --output=FILE specify the output file [default=$default_ofile]
+ --quiet same as \`--silent'
+ --silent do not print informational messages
+ --srcdir=DIR find \`config.guess' in DIR
+ --version output version information and exit
+ --with-gcc assume that the GNU C compiler will be used
+ --with-gnu-ld assume that the C compiler uses the GNU linker
+ --prefer-pic try to use only PIC objects
+ --prefer-non-pic try to use only non-PIC objects
+ --disable-lock disable file locking
+ --cache-file=FILE configure cache file
+
+LTMAIN is the \`ltmain.sh' shell script fragment or \`ltmain.c' program
+that provides basic libtool functionality.
+
+HOST is the canonical host system name [default=guessed].
+EOM
+ exit 0
+ ;;
+
+ --build) prev=build ;;
+ --build=*) build="$optarg" ;;
+
+ --debug)
+ echo "$progname: enabling shell trace mode"
+ set -x
+ ;;
+
+ --disable-shared) enable_shared=no ;;
+
+ --disable-static) enable_static=no ;;
+
+ --disable-fast-install) enable_fast_install=no ;;
+
+ --enable-dlopen) enable_dlopen=yes ;;
+
+ --enable-win32-dll) enable_win32_dll=yes ;;
+
+ --quiet | --silent) silent=yes ;;
+
+ --srcdir) prev=srcdir ;;
+ --srcdir=*) srcdir="$optarg" ;;
+
+ --no-verify) verify_host=no ;;
+
+ --output | -o) prev=ofile ;;
+ --output=*) ofile="$optarg" ;;
+
+ --version) echo "$PROGRAM (GNU $PACKAGE) $VERSION$TIMESTAMP"; exit 0 ;;
+
+ --with-gcc) with_gcc=yes ;;
+ --with-gnu-ld) with_gnu_ld=yes ;;
+
+ --prefer-pic) pic_mode=yes ;;
+ --prefer-non-pic) pic_mode=no ;;
+
+ --disable-lock) need_locks=no ;;
+
+ --cache-file=*) cache_file="$optarg" ;;
+
+ -*)
+ echo "$progname: unrecognized option \`$option'" 1>&2
+ echo "$help" 1>&2
+ exit 1
+ ;;
+
+ *)
+ if test -z "$ltmain"; then
+ ltmain="$option"
+ elif test -z "$host"; then
+# This generates an unnecessary warning for sparc-sun-solaris4.1.3_U1
+# if test -n "`echo $option| sed 's/[-a-z0-9.]//g'`"; then
+# echo "$progname: warning \`$option' is not a valid host type" 1>&2
+# fi
+ host="$option"
+ else
+ echo "$progname: too many arguments" 1>&2
+ echo "$help" 1>&2
+ exit 1
+ fi ;;
+ esac
+done
+
+if test -z "$ltmain"; then
+ echo "$progname: you must specify a LTMAIN file" 1>&2
+ echo "$help" 1>&2
+ exit 1
+fi
+
+if test ! -f "$ltmain"; then
+ echo "$progname: \`$ltmain' does not exist" 1>&2
+ echo "$help" 1>&2
+ exit 1
+fi
+
+# Quote any args containing shell metacharacters.
+ltconfig_args=
+for arg
+do
+ case "$arg" in
+ *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*)
+ ltconfig_args="$ltconfig_args '$arg'" ;;
+ *) ltconfig_args="$ltconfig_args $arg" ;;
+ esac
+done
+
+# A relevant subset of AC_INIT.
+
+# File descriptor usage:
+# 0 standard input
+# 1 file creation
+# 2 errors and warnings
+# 3 some systems may open it to /dev/tty
+# 4 used on the Kubota Titan
+# 5 compiler messages saved in config.log
+# 6 checking for... messages and results
+if test "$silent" = yes; then
+ exec 6>/dev/null
+else
+ exec 6>&1
+fi
+exec 5>>./config.log
+
+# NLS nuisances.
+# Only set LANG and LC_ALL to C if already set.
+# These must not be set unconditionally because not all systems understand
+# e.g. LANG=C (notably SCO).
+if test "X${LC_ALL+set}" = Xset; then LC_ALL=C; export LC_ALL; fi
+if test "X${LANG+set}" = Xset; then LANG=C; export LANG; fi
+
+if test -n "$cache_file" && test -r "$cache_file"; then
+ echo "loading cache $cache_file within ltconfig"
+ . $cache_file
+fi
+
+if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then
+ # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu.
+ if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then
+ ac_n= ac_c='
+' ac_t=' '
+ else
+ ac_n=-n ac_c= ac_t=
+ fi
+else
+ ac_n= ac_c='\c' ac_t=
+fi
+
+if test -z "$srcdir"; then
+ # Assume the source directory is the same one as the path to LTMAIN.
+ srcdir=`$echo "X$ltmain" | $Xsed -e 's%/[^/]*$%%'`
+ test "$srcdir" = "$ltmain" && srcdir=.
+fi
+
+trap "$rm conftest*; exit 1" 1 2 15
+if test "$verify_host" = yes; then
+ # Check for config.guess and config.sub.
+ ac_aux_dir=
+ for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do
+ if test -f $ac_dir/config.guess; then
+ ac_aux_dir=$ac_dir
+ break
+ fi
+ done
+ if test -z "$ac_aux_dir"; then
+ echo "$progname: cannot find config.guess in $srcdir $srcdir/.. $srcdir/../.." 1>&2
+ echo "$help" 1>&2
+ exit 1
+ fi
+ ac_config_guess=$ac_aux_dir/config.guess
+ ac_config_sub=$ac_aux_dir/config.sub
+
+ # Make sure we can run config.sub.
+ if $SHELL $ac_config_sub sun4 >/dev/null 2>&1; then :
+ else
+ echo "$progname: cannot run $ac_config_sub" 1>&2
+ echo "$help" 1>&2
+ exit 1
+ fi
+
+ echo $ac_n "checking host system type""... $ac_c" 1>&6
+
+ host_alias=$host
+ case "$host_alias" in
+ "")
+ if host_alias=`$SHELL $ac_config_guess`; then :
+ else
+ echo "$progname: cannot guess host type; you must specify one" 1>&2
+ echo "$help" 1>&2
+ exit 1
+ fi ;;
+ esac
+ host=`$SHELL $ac_config_sub $host_alias`
+ echo "$ac_t$host" 1>&6
+
+ # Make sure the host verified.
+ test -z "$host" && exit 1
+
+ # Check for the build system type
+ echo $ac_n "checking build system type... $ac_c" 1>&6
+
+ build_alias=$build
+ case "$build_alias" in
+ NONE)
+ case $nonopt in
+ NONE) build_alias=$host_alias ;;
+ *) build_alias=$nonopt ;;
+ esac ;;
+ esac
+
+ build=`$SHELL $ac_config_sub $build_alias`
+ build_cpu=`echo $build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+ build_vendor=`echo $build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+ build_os=`echo $build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+ echo "$ac_t""$build" 1>&6
+
+elif test -z "$host"; then
+ echo "$progname: you must specify a host type if you use \`--no-verify'" 1>&2
+ echo "$help" 1>&2
+ exit 1
+else
+ host_alias=$host
+ build_alias=$host_alias
+ build=$host
+fi
+
+if test x"$host" != x"$build"; then
+ ac_tool_prefix=${host_alias}-
+else
+ ac_tool_prefix=
+fi
+
+host_cpu=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
+host_vendor=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
+host_os=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
+
+# Transform linux* to *-*-linux-gnu*, to support old configure scripts.
+case "$host_os" in
+linux-gnu*) ;;
+linux*) host=`echo $host | sed 's/^\(.*-.*-linux\)\(.*\)$/\1-gnu\2/'`
+esac
+
+case "$host_os" in
+aix3*)
+ # AIX sometimes has problems with the GCC collect2 program. For some
+ # reason, if we set the COLLECT_NAMES environment variable, the problems
+ # vanish in a puff of smoke.
+ if test "X${COLLECT_NAMES+set}" != Xset; then
+ COLLECT_NAMES=
+ export COLLECT_NAMES
+ fi
+ ;;
+esac
+
+# Determine commands to create old-style static archives.
+old_archive_cmds='$AR cru $oldlib$oldobjs$old_deplibs'
+old_postinstall_cmds='chmod 644 $oldlib'
+old_postuninstall_cmds=
+
+# Set sane defaults for various variables
+test -z "$AR" && AR=ar
+test -z "$AS" && AS=as
+test -z "$CC" && CC=cc
+test -z "$DLLTOOL" && DLLTOOL=dlltool
+test -z "$MAGIC" && MAGIC=file
+test -z "$LD" && LD=ld
+test -z "$LN_S" && LN_S="ln -s"
+test -z "$NM" && NM=nm
+test -z "$OBJDUMP" && OBJDUMP=objdump
+test -z "$RANLIB" && RANLIB=:
+test -z "$STRIP" && STRIP=:
+test -z "$objext" && objext=o
+
+echo $ac_n "checking for objdir... $ac_c" 1>&6
+rm -f .libs 2>/dev/null
+mkdir .libs 2>/dev/null
+if test -d .libs; then
+ objdir=.libs
+else
+ # MS-DOS does not allow filenames that begin with a dot.
+ objdir=_libs
+fi
+rmdir .libs 2>/dev/null
+echo "$ac_t$objdir" 1>&6
+
+# Allow CC to be a program name with arguments.
+set dummy $CC
+compiler="$2"
+
+# We assume here that the value for ac_cv_prog_cc_pic will not be cached
+# in isolation, and that seeing it set (from the cache) indicates that
+# the associated values are set (in the cache) correctly too.
+echo $ac_n "checking for $compiler option to produce PIC... $ac_c" 1>&6
+echo "$progname:563:checking for $compiler option to produce PIC" 1>&5
+if test "X${ac_cv_prog_cc_pic+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ ac_cv_prog_cc_pic=
+ ac_cv_prog_cc_shlib=
+ ac_cv_prog_cc_wl=
+ ac_cv_prog_cc_static=
+ ac_cv_prog_cc_no_builtin=
+ ac_cv_prog_cc_can_build_shared=$can_build_shared
+
+ if test "$with_gcc" = yes; then
+ ac_cv_prog_cc_wl='-Wl,'
+ ac_cv_prog_cc_static='-static'
+
+ case "$host_os" in
+ beos* | irix5* | irix6* | osf3* | osf4* | osf5*)
+ # PIC is the default for these OSes.
+ ;;
+ aix*)
+ # Below there is a dirty hack to force normal static linking with -ldl
+ # The problem is because libdl dynamically linked with both libc and
+ # libC (AIX C++ library), which obviously doesn't included in libraries
+ # list by gcc. This cause undefined symbols with -static flags.
+ # This hack allows C programs to be linked with "-static -ldl", but
+ # we not sure about C++ programs.
+ ac_cv_prog_cc_static="$ac_cv_prog_cc_static ${ac_cv_prog_cc_wl}-lC"
+ ;;
+ cygwin* | mingw* | os2*)
+ # This hack is so that the source file can tell whether it is being
+ # built for inclusion in a dll (and should export symbols for example).
+ ac_cv_prog_cc_pic='-DDLL_EXPORT'
+ ;;
+ amigaos*)
+ # FIXME: we need at least 68020 code to build shared libraries, but
+ # adding the `-m68020' flag to GCC prevents building anything better,
+ # like `-m68040'.
+ ac_cv_prog_cc_pic='-m68020 -resident32 -malways-restore-a4'
+ ;;
+ sysv4*MP*)
+ if test -d /usr/nec; then
+ ac_cv_prog_cc_pic=-Kconform_pic
+ fi
+ ;;
+ *)
+ ac_cv_prog_cc_pic='-fPIC'
+ ;;
+ esac
+ else
+ # PORTME Check for PIC flags for the system compiler.
+ case "$host_os" in
+ aix3* | aix4*)
+ # All AIX code is PIC.
+ ac_cv_prog_cc_static='-bnso -bI:/lib/syscalls.exp'
+ ;;
+
+ hpux9* | hpux10* | hpux11*)
+ # Is there a better ac_cv_prog_cc_static that works with the bundled CC?
+ ac_cv_prog_cc_wl='-Wl,'
+ ac_cv_prog_cc_static="${ac_cv_prog_cc_wl}-a ${ac_cv_prog_cc_wl}archive"
+ ac_cv_prog_cc_pic='+Z'
+ ;;
+
+ irix5* | irix6*)
+ ac_cv_prog_cc_wl='-Wl,'
+ ac_cv_prog_cc_static='-non_shared'
+ # PIC (with -KPIC) is the default.
+ ;;
+
+ cygwin* | mingw* | os2*)
+ # This hack is so that the source file can tell whether it is being
+ # built for inclusion in a dll (and should export symbols for example).
+ ac_cv_prog_cc_pic='-DDLL_EXPORT'
+ ;;
+
+ osf3* | osf4* | osf5*)
+ # All OSF/1 code is PIC.
+ ac_cv_prog_cc_wl='-Wl,'
+ ac_cv_prog_cc_static='-non_shared'
+ ;;
+
+ sco3.2v5*)
+ ac_cv_prog_cc_pic='-Kpic'
+ ac_cv_prog_cc_static='-dn'
+ ac_cv_prog_cc_shlib='-belf'
+ ;;
+
+ solaris*)
+ ac_cv_prog_cc_pic='-KPIC'
+ ac_cv_prog_cc_static='-Bstatic'
+ ac_cv_prog_cc_wl='-Wl,'
+ ;;
+
+ sunos4*)
+ ac_cv_prog_cc_pic='-PIC'
+ ac_cv_prog_cc_static='-Bstatic'
+ ac_cv_prog_cc_wl='-Qoption ld '
+ ;;
+
+ sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+ ac_cv_prog_cc_pic='-KPIC'
+ ac_cv_prog_cc_static='-Bstatic'
+ ac_cv_prog_cc_wl='-Wl,'
+ ;;
+
+ uts4*)
+ ac_cv_prog_cc_pic='-pic'
+ ac_cv_prog_cc_static='-Bstatic'
+ ;;
+
+ sysv4*MP*)
+ if test -d /usr/nec ;then
+ ac_cv_prog_cc_pic='-Kconform_pic'
+ ac_cv_prog_cc_static='-Bstatic'
+ fi
+ ;;
+
+ *)
+ ac_cv_prog_cc_can_build_shared=no
+ ;;
+ esac
+ fi
+fi
+if test -z "$ac_cv_prog_cc_pic"; then
+ echo "$ac_t"none 1>&6
+else
+ echo "$ac_t""$ac_cv_prog_cc_pic" 1>&6
+
+ # Check to make sure the pic_flag actually works.
+ echo $ac_n "checking if $compiler PIC flag $ac_cv_prog_cc_pic works... $ac_c" 1>&6
+ echo "$progname:693:checking that $compiler PIC flag $ac_cv_prog_cc_pic works." 1>&5
+ if test "X${ac_cv_prog_cc_pic_works+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+ else
+ ac_cv_prog_cc_pic_works=yes
+ $rm conftest*
+ echo "int some_variable = 0;" > conftest.c
+ save_CFLAGS="$CFLAGS"
+ CFLAGS="$CFLAGS $ac_cv_prog_cc_pic -DPIC"
+ if { (eval echo $progname:702: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>conftest.err; } && test -s conftest.$objext; then
+ # Append any warnings to the config.log.
+ cat conftest.err 1>&5
+
+ case "$host_os" in
+ hpux9* | hpux10* | hpux11*)
+ # On HP-UX, both CC and GCC only warn that PIC is supported... then
+ # they create non-PIC objects. So, if there were any warnings, we
+ # assume that PIC is not supported.
+ if test -s conftest.err; then
+ ac_cv_prog_cc_pic_works=no
+ ac_cv_prog_cc_can_build_shared=no
+ ac_cv_prog_cc_pic=
+ else
+ ac_cv_prog_cc_pic_works=yes
+ ac_cv_prog_cc_pic=" $ac_cv_prog_cc_pic"
+ fi
+ ;;
+ *)
+ ac_cv_prog_cc_pic_works=yes
+ ac_cv_prog_cc_pic=" $ac_cv_prog_cc_pic"
+ ;;
+ esac
+ else
+ # Append any errors to the config.log.
+ cat conftest.err 1>&5
+ ac_cv_prog_cc_pic_works=no
+ ac_cv_prog_cc_can_build_shared=no
+ ac_cv_prog_cc_pic=
+ fi
+ CFLAGS="$save_CFLAGS"
+ $rm conftest*
+ fi
+ # Belt *and* braces to stop my trousers falling down:
+ if test "X$ac_cv_prog_cc_pic_works" = Xno; then
+ ac_cv_prog_cc_pic=
+ ac_cv_prog_cc_can_build_shared=no
+ fi
+ echo "$ac_t""$ac_cv_prog_cc_pic_works" 1>&6
+fi
+
+# Check for any special shared library compilation flags.
+if test -n "$ac_cv_prog_cc_shlib"; then
+ echo "$progname: warning: \`$CC' requires \`$ac_cv_prog_cc_shlib' to build shared libraries" 1>&2
+ if echo "$old_CC $old_CFLAGS " | egrep -e "[ ]$ac_cv_prog_cc_shlib[ ]" >/dev/null; then :
+ else
+ echo "$progname: add \`$ac_cv_prog_cc_shlib' to the CC or CFLAGS env variable and reconfigure" 1>&2
+ ac_cv_prog_cc_can_build_shared=no
+ fi
+fi
+
+echo $ac_n "checking if $compiler static flag $ac_cv_prog_cc_static works... $ac_c" 1>&6
+echo "$progname:754: checking if $compiler static flag $ac_cv_prog_cc_static works" >&5
+if test "X${ac_cv_prog_cc_static_works+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ $rm conftest*
+ echo 'main(){return(0);}' > conftest.c
+ save_LDFLAGS="$LDFLAGS"
+ LDFLAGS="$LDFLAGS $ac_cv_prog_cc_static"
+ if { (eval echo $progname:762: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then
+ ac_cv_prog_cc_static_works=yes
+ else
+ ac_cv_prog_cc_static_works=no
+ ac_cv_prog_cc_static=
+ fi
+ LDFLAGS="$save_LDFLAGS"
+ $rm conftest*
+fi
+# Belt *and* braces to stop my trousers falling down:
+if test "X$ac_cv_prog_cc_static_works" = Xno; then
+ ac_cv_prog_cc_static=
+fi
+echo "$ac_t""$ac_cv_prog_cc_static_works" 1>&6
+pic_flag="$ac_cv_prog_cc_pic"
+special_shlib_compile_flags="$ac_cv_prog_cc_shlib"
+wl="$ac_cv_prog_cc_wl"
+link_static_flag="$ac_cv_prog_cc_static"
+no_builtin_flag="$ac_cv_prog_cc_no_builtin"
+can_build_shared="$ac_cv_prog_cc_can_build_shared"
+
+# Check to see if options -o and -c are simultaneously supported by compiler
+echo $ac_n "checking if $compiler supports -c -o file.o... $ac_c" 1>&6
+$rm -r conftest 2>/dev/null
+mkdir conftest
+cd conftest
+$rm conftest*
+echo "int some_variable = 0;" > conftest.c
+mkdir out
+# According to Tom Tromey, Ian Lance Taylor reported there are C compilers
+# that will create temporary files in the current directory regardless of
+# the output directory. Thus, making CWD read-only will cause this test
+# to fail, enabling locking or at least warning the user not to do parallel
+# builds.
+chmod -w .
+save_CFLAGS="$CFLAGS"
+CFLAGS="$CFLAGS -o out/conftest2.o"
+echo "$progname:799: checking if $compiler supports -c -o file.o" >&5
+if { (eval echo $progname:800: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>out/conftest.err; } && test -s out/conftest2.o; then
+
+ # The compiler can only warn and ignore the option if not recognized
+ # So say no if there are warnings
+ if test -s out/conftest.err; then
+ echo "$ac_t"no 1>&6
+ compiler_c_o=no
+ else
+ echo "$ac_t"yes 1>&6
+ compiler_c_o=yes
+ fi
+else
+ # Append any errors to the config.log.
+ cat out/conftest.err 1>&5
+ compiler_c_o=no
+ echo "$ac_t"no 1>&6
+fi
+CFLAGS="$save_CFLAGS"
+chmod u+w .
+$rm conftest* out/*
+rmdir out
+cd ..
+rmdir conftest
+$rm -r conftest 2>/dev/null
+
+if test x"$compiler_c_o" = x"yes"; then
+ # Check to see if we can write to a .lo
+ echo $ac_n "checking if $compiler supports -c -o file.lo... $ac_c" 1>&6
+ $rm conftest*
+ echo "int some_variable = 0;" > conftest.c
+ save_CFLAGS="$CFLAGS"
+ CFLAGS="$CFLAGS -c -o conftest.lo"
+ echo "$progname:832: checking if $compiler supports -c -o file.lo" >&5
+if { (eval echo $progname:833: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>conftest.err; } && test -s conftest.lo; then
+
+ # The compiler can only warn and ignore the option if not recognized
+ # So say no if there are warnings
+ if test -s conftest.err; then
+ echo "$ac_t"no 1>&6
+ compiler_o_lo=no
+ else
+ echo "$ac_t"yes 1>&6
+ compiler_o_lo=yes
+ fi
+ else
+ # Append any errors to the config.log.
+ cat conftest.err 1>&5
+ compiler_o_lo=no
+ echo "$ac_t"no 1>&6
+ fi
+ CFLAGS="$save_CFLAGS"
+ $rm conftest*
+else
+ compiler_o_lo=no
+fi
+
+# Check to see if we can do hard links to lock some files if needed
+hard_links="nottested"
+if test "$compiler_c_o" = no && test "$need_locks" != no; then
+ # do not overwrite the value of need_locks provided by the user
+ echo $ac_n "checking if we can lock with hard links... $ac_c" 1>&6
+ hard_links=yes
+ $rm conftest*
+ ln conftest.a conftest.b 2>/dev/null && hard_links=no
+ touch conftest.a
+ ln conftest.a conftest.b 2>&5 || hard_links=no
+ ln conftest.a conftest.b 2>/dev/null && hard_links=no
+ echo "$ac_t$hard_links" 1>&6
+ $rm conftest*
+ if test "$hard_links" = no; then
+ echo "*** WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2
+ need_locks=warn
+ fi
+else
+ need_locks=no
+fi
+
+if test "$with_gcc" = yes; then
+ # Check to see if options -fno-rtti -fno-exceptions are supported by compiler
+ echo $ac_n "checking if $compiler supports -fno-rtti -fno-exceptions ... $ac_c" 1>&6
+ $rm conftest*
+ echo "int some_variable = 0;" > conftest.c
+ save_CFLAGS="$CFLAGS"
+ CFLAGS="$CFLAGS -fno-rtti -fno-exceptions -c conftest.c"
+ echo "$progname:884: checking if $compiler supports -fno-rtti -fno-exceptions" >&5
+ if { (eval echo $progname:885: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>conftest.err; } && test -s conftest.o; then
+
+ # The compiler can only warn and ignore the option if not recognized
+ # So say no if there are warnings
+ if test -s conftest.err; then
+ echo "$ac_t"no 1>&6
+ compiler_rtti_exceptions=no
+ else
+ echo "$ac_t"yes 1>&6
+ compiler_rtti_exceptions=yes
+ fi
+ else
+ # Append any errors to the config.log.
+ cat conftest.err 1>&5
+ compiler_rtti_exceptions=no
+ echo "$ac_t"no 1>&6
+ fi
+ CFLAGS="$save_CFLAGS"
+ $rm conftest*
+
+ if test "$compiler_rtti_exceptions" = "yes"; then
+ no_builtin_flag=' -fno-builtin -fno-rtti -fno-exceptions'
+ else
+ no_builtin_flag=' -fno-builtin'
+ fi
+
+fi
+
+# See if the linker supports building shared libraries.
+echo $ac_n "checking whether the linker ($LD) supports shared libraries... $ac_c" 1>&6
+
+allow_undefined_flag=
+no_undefined_flag=
+need_lib_prefix=unknown
+need_version=unknown
+# when you set need_version to no, make sure it does not cause -set_version
+# flags to be left without arguments
+archive_cmds=
+archive_expsym_cmds=
+old_archive_from_new_cmds=
+old_archive_from_expsyms_cmds=
+striplib=
+old_striplib=
+export_dynamic_flag_spec=
+whole_archive_flag_spec=
+thread_safe_flag_spec=
+hardcode_into_libs=no
+hardcode_libdir_flag_spec=
+hardcode_libdir_separator=
+hardcode_direct=no
+hardcode_minus_L=no
+hardcode_shlibpath_var=unsupported
+runpath_var=
+link_all_deplibs=unknown
+always_export_symbols=no
+export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | sed '\''s/.* //'\'' | sort | uniq > $export_symbols'
+# include_expsyms should be a list of space-separated symbols to be *always*
+# included in the symbol list
+include_expsyms=
+# exclude_expsyms can be an egrep regular expression of symbols to exclude
+# it will be wrapped by ` (' and `)$', so one must not match beginning or
+# end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc',
+# as well as any symbol that contains `d'.
+exclude_expsyms="_GLOBAL_OFFSET_TABLE_"
+# Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out
+# platforms (ab)use it in PIC code, but their linkers get confused if
+# the symbol is explicitly referenced. Since portable code cannot
+# rely on this symbol name, it's probably fine to never include it in
+# preloaded symbol tables.
+extract_expsyms_cmds=
+
+case "$host_os" in
+cygwin* | mingw*)
+ # FIXME: the MSVC++ port hasn't been tested in a loooong time
+ # When not using gcc, we currently assume that we are using
+ # Microsoft Visual C++.
+ if test "$with_gcc" != yes; then
+ with_gnu_ld=no
+ fi
+ ;;
+
+esac
+
+ld_shlibs=yes
+if test "$with_gnu_ld" = yes; then
+ # If archive_cmds runs LD, not CC, wlarc should be empty
+ wlarc='${wl}'
+
+ # See if GNU ld supports shared libraries.
+ case "$host_os" in
+ aix3* | aix4*)
+ # On AIX, the GNU linker is very broken
+ ld_shlibs=no
+ cat <<EOF 1>&2
+
+*** Warning: the GNU linker, at least up to release 2.9.1, is reported
+*** to be unable to reliably create shared libraries on AIX.
+*** Therefore, libtool is disabling shared libraries support. If you
+*** really care for shared libraries, you may want to modify your PATH
+*** so that a non-GNU linker is found, and then restart.
+
+EOF
+ ;;
+
+ amigaos*)
+ archive_cmds='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR cru $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+ hardcode_libdir_flag_spec='-L$libdir'
+ hardcode_minus_L=yes
+
+ # Samuel A. Falvo II <kc5tja@dolphin.openprojects.net> reports
+ # that the semantics of dynamic libraries on AmigaOS, at least up
+ # to version 4, is to share data among multiple programs linked
+ # with the same dynamic library. Since this doesn't match the
+ # behavior of shared libraries on other platforms, we can use
+ # them.
+ ld_shlibs=no
+ ;;
+
+ beos*)
+ if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then
+ allow_undefined_flag=unsupported
+ # Joseph Beckenbach <jrb3@best.com> says some releases of gcc
+ # support --undefined. This deserves some investigation. FIXME
+ archive_cmds='$CC -nostart $libobjs $deplibs $linker_flags ${wl}-soname $wl$soname -o $lib'
+ else
+ ld_shlibs=no
+ fi
+ ;;
+
+ cygwin* | mingw*)
+ # hardcode_libdir_flag_spec is actually meaningless, as there is
+ # no search path for DLLs.
+ hardcode_libdir_flag_spec='-L$libdir'
+ allow_undefined_flag=unsupported
+ always_export_symbols=yes
+
+ extract_expsyms_cmds='test -f $output_objdir/impgen.c || \
+ sed -e "/^# \/\* impgen\.c starts here \*\//,/^# \/\* impgen.c ends here \*\// { s/^# //; p; }" -e d < $0 > $output_objdir/impgen.c~
+ test -f $output_objdir/impgen.exe || (cd $output_objdir && \
+ if test "x$HOST_CC" != "x" ; then $HOST_CC -o impgen impgen.c ; \
+ else $CC -o impgen impgen.c ; fi)~
+ $output_objdir/impgen $dir/$soname > $output_objdir/$soname-def'
+
+ old_archive_from_expsyms_cmds='$DLLTOOL --as=$AS --dllname $soname --def $output_objdir/$soname-def --output-lib $output_objdir/$newlib'
+
+ # cygwin and mingw dlls have different entry points and sets of symbols
+ # to exclude.
+ # FIXME: what about values for MSVC?
+ dll_entry=__cygwin_dll_entry@12
+ dll_exclude_symbols=DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12~
+ case "$host_os" in
+ mingw*)
+ # mingw values
+ dll_entry=_DllMainCRTStartup@12
+ dll_exclude_symbols=DllMain@12,DllMainCRTStartup@12,DllEntryPoint@12~
+ ;;
+ esac
+
+ # mingw and cygwin differ, and it's simplest to just exclude the union
+ # of the two symbol sets.
+ dll_exclude_symbols=DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12,DllMainCRTStartup@12,DllEntryPoint@12
+
+ # recent cygwin and mingw systems supply a stub DllMain which the user
+ # can override, but on older systems we have to supply one (in ltdll.c)
+ if test "x$lt_cv_need_dllmain" = "xyes"; then
+ ltdll_obj='$output_objdir/$soname-ltdll.'"$objext "
+ ltdll_cmds='test -f $output_objdir/$soname-ltdll.c || sed -e "/^# \/\* ltdll\.c starts here \*\//,/^# \/\* ltdll.c ends here \*\// { s/^# //; p; }" -e d < $0 > $output_objdir/$soname-ltdll.c~
+ test -f $output_objdir/$soname-ltdll.$objext || (cd $output_objdir && $CC -c $soname-ltdll.c)~'
+ else
+ ltdll_obj=
+ ltdll_cmds=
+ fi
+
+ # Extract the symbol export list from an `--export-all' def file,
+ # then regenerate the def file from the symbol export list, so that
+ # the compiled dll only exports the symbol export list.
+ # Be careful not to strip the DATA tag left be newer dlltools.
+ export_symbols_cmds="$ltdll_cmds"'
+ $DLLTOOL --export-all --exclude-symbols '$dll_exclude_symbols' --output-def $output_objdir/$soname-def '$ltdll_obj'$libobjs $convenience~
+ sed -e "1,/EXPORTS/d" -e "s/ @ [0-9]*//" -e "s/ *;.*$//" < $output_objdir/$soname-def > $export_symbols'
+
+ # If DATA tags from a recent dlltool are present, honour them!
+ archive_expsym_cmds='echo EXPORTS > $output_objdir/$soname-def~
+ _lt_hint=1;
+ cat $export_symbols | while read symbol; do
+ set dummy \$symbol;
+ case \$# in
+ 2) echo " \$2 @ \$_lt_hint ; " >> $output_objdir/$soname-def;;
+ *) echo " \$2 @ \$_lt_hint \$3 ; " >> $output_objdir/$soname-def;;
+ esac;
+ _lt_hint=`expr 1 + \$_lt_hint`;
+ done~
+ '"$ltdll_cmds"'
+ $CC -Wl,--base-file,$output_objdir/$soname-base '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $lib '$ltdll_obj'$libobjs $deplibs $compiler_flags~
+ $DLLTOOL --as=$AS --dllname $soname --exclude-symbols '$dll_exclude_symbols' --def $output_objdir/$soname-def --base-file $output_objdir/$soname-base --output-exp $output_objdir/$soname-exp~
+ $CC -Wl,--base-file,$output_objdir/$soname-base $output_objdir/$soname-exp '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $lib '$ltdll_obj'$libobjs $deplibs $compiler_flags~
+ $DLLTOOL --as=$AS --dllname $soname --exclude-symbols '$dll_exclude_symbols' --def $output_objdir/$soname-def --base-file $output_objdir/$soname-base --output-exp $output_objdir/$soname-exp~
+ $CC $output_objdir/$soname-exp '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $lib '$ltdll_obj'$libobjs $deplibs $compiler_flags'
+ ;;
+
+ netbsd*)
+ if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+ archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+ archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+ else
+ archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
+ fi
+ ;;
+
+ solaris* | sysv5*)
+ if $LD -v 2>&1 | egrep 'BFD 2\.8' > /dev/null; then
+ ld_shlibs=no
+ cat <<EOF 1>&2
+
+*** Warning: The releases 2.8.* of the GNU linker cannot reliably
+*** create shared libraries on Solaris systems. Therefore, libtool
+*** is disabling shared libraries support. We urge you to upgrade GNU
+*** binutils to release 2.9.1 or newer. Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+EOF
+ elif $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then
+ archive_cmds='$CC -shared $libobjs $deplibs $linker_flags ${wl}-soname $wl$soname -o $lib'
+ archive_expsym_cmds='$CC -shared $libobjs $deplibs $linker_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+ else
+ ld_shlibs=no
+ fi
+ ;;
+
+ sunos4*)
+ archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+ wlarc=
+ hardcode_direct=yes
+ hardcode_shlibpath_var=no
+ ;;
+
+ *)
+ if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then
+ archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+ archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+ else
+ ld_shlibs=no
+ fi
+ ;;
+ esac
+
+ if test "$ld_shlibs" = yes; then
+ runpath_var=LD_RUN_PATH
+ hardcode_libdir_flag_spec='${wl}--rpath ${wl}$libdir'
+ export_dynamic_flag_spec='${wl}--export-dynamic'
+ case $host_os in
+ cygwin* | mingw*)
+ # dlltool doesn't understand --whole-archive et. al.
+ whole_archive_flag_spec=
+ ;;
+ *)
+ # ancient GNU ld didn't support --whole-archive et. al.
+ if $LD --help 2>&1 | egrep 'no-whole-archive' > /dev/null; then
+ whole_archive_flag_spec="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+ else
+ whole_archive_flag_spec=
+ fi
+ ;;
+ esac
+ fi
+else
+ # PORTME fill in a description of your system's linker (not GNU ld)
+ case "$host_os" in
+ aix3*)
+ allow_undefined_flag=unsupported
+ always_export_symbols=yes
+ archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR cru $lib $output_objdir/$soname'
+ # Note: this linker hardcodes the directories in LIBPATH if there
+ # are no directories specified by -L.
+ hardcode_minus_L=yes
+ if test "$with_gcc" = yes && test -z "$link_static_flag"; then
+ # Neither direct hardcoding nor static linking is supported with a
+ # broken collect2.
+ hardcode_direct=unsupported
+ fi
+ ;;
+
+ aix4*)
+ hardcode_libdir_flag_spec='${wl}-b ${wl}nolibpath ${wl}-b ${wl}libpath:$libdir:/usr/lib:/lib'
+ hardcode_libdir_separator=':'
+ if test "$with_gcc" = yes; then
+ collect2name=`${CC} -print-prog-name=collect2`
+ if test -f "$collect2name" && \
+ strings "$collect2name" | grep resolve_lib_name >/dev/null
+ then
+ # We have reworked collect2
+ hardcode_direct=yes
+ else
+ # We have old collect2
+ hardcode_direct=unsupported
+ # It fails to find uninstalled libraries when the uninstalled
+ # path is not listed in the libpath. Setting hardcode_minus_L
+ # to unsupported forces relinking
+ hardcode_minus_L=yes
+ hardcode_libdir_flag_spec='-L$libdir'
+ hardcode_libdir_separator=
+ fi
+ shared_flag='-shared'
+ else
+ shared_flag='${wl}-bM:SRE'
+ hardcode_direct=yes
+ fi
+ allow_undefined_flag=' ${wl}-berok'
+ archive_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags ${wl}-bexpall ${wl}-bnoentry${allow_undefined_flag}'
+ archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags ${wl}-bE:$export_symbols ${wl}-bnoentry${allow_undefined_flag}'
+ case "$host_os" in aix4.[01]|aix4.[01].*)
+ # According to Greg Wooledge, -bexpall is only supported from AIX 4.2 on
+ always_export_symbols=yes ;;
+ esac
+ ;;
+
+ amigaos*)
+ archive_cmds='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR cru $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+ hardcode_libdir_flag_spec='-L$libdir'
+ hardcode_minus_L=yes
+ # see comment about different semantics on the GNU ld section
+ ld_shlibs=no
+ ;;
+
+ cygwin* | mingw*)
+ # When not using gcc, we currently assume that we are using
+ # Microsoft Visual C++.
+ # hardcode_libdir_flag_spec is actually meaningless, as there is
+ # no search path for DLLs.
+ hardcode_libdir_flag_spec=' '
+ allow_undefined_flag=unsupported
+ # Tell ltmain to make .lib files, not .a files.
+ libext=lib
+ # FIXME: Setting linknames here is a bad hack.
+ archive_cmds='$CC -o $lib $libobjs $compiler_flags `echo "$deplibs" | sed -e '\''s/ -lc$//'\''` -link -dll~linknames='
+ # The linker will automatically build a .lib file if we build a DLL.
+ old_archive_from_new_cmds='true'
+ # FIXME: Should let the user specify the lib program.
+ old_archive_cmds='lib /OUT:$oldlib$oldobjs$old_deplibs'
+ fix_srcfile_path='`cygpath -w $srcfile`'
+ ;;
+
+ freebsd1*)
+ ld_shlibs=no
+ ;;
+
+ # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
+ # support. Future versions do this automatically, but an explicit c++rt0.o
+ # does not break anything, and helps significantly (at the cost of a little
+ # extra space).
+ freebsd2.2*)
+ archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o'
+ hardcode_libdir_flag_spec='-R$libdir'
+ hardcode_direct=yes
+ hardcode_shlibpath_var=no
+ ;;
+
+ # Unfortunately, older versions of FreeBSD 2 do not have this feature.
+ freebsd2*)
+ archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_direct=yes
+ hardcode_minus_L=yes
+ hardcode_shlibpath_var=no
+ ;;
+
+ # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
+ freebsd*)
+ archive_cmds='$CC -shared -o $lib $libobjs $deplibs $compiler_flags'
+ hardcode_libdir_flag_spec='-R$libdir'
+ hardcode_direct=yes
+ hardcode_shlibpath_var=no
+ ;;
+
+ hpux9* | hpux10* | hpux11*)
+ case "$host_os" in
+ hpux9*) archive_cmds='$rm $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' ;;
+ *) archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' ;;
+ esac
+ hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+ hardcode_libdir_separator=:
+ hardcode_direct=yes
+ hardcode_minus_L=yes # Not in the search PATH, but as the default
+ # location of the library.
+ export_dynamic_flag_spec='${wl}-E'
+ ;;
+
+ irix5* | irix6*)
+ if test "$with_gcc" = yes; then
+ archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+ else
+ archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+ fi
+ hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+ hardcode_libdir_separator=:
+ link_all_deplibs=yes
+ ;;
+
+ netbsd*)
+ if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+ archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out
+ else
+ archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF
+ fi
+ hardcode_libdir_flag_spec='${wl}-R$libdir'
+ hardcode_direct=yes
+ hardcode_shlibpath_var=no
+ ;;
+
+ openbsd*)
+ archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_libdir_flag_spec='-R$libdir'
+ hardcode_direct=yes
+ hardcode_shlibpath_var=no
+ ;;
+
+ os2*)
+ hardcode_libdir_flag_spec='-L$libdir'
+ hardcode_minus_L=yes
+ allow_undefined_flag=unsupported
+ archive_cmds='$echo "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$echo "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~$echo DATA >> $output_objdir/$libname.def~$echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~$echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def'
+ old_archive_from_new_cmds='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def'
+ ;;
+
+ osf3*)
+ if test "$with_gcc" = yes; then
+ allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*'
+ archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+ else
+ allow_undefined_flag=' -expect_unresolved \*'
+ archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+ fi
+ hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+ hardcode_libdir_separator=:
+ ;;
+
+ osf4* | osf5*) # as osf3* with the addition of -msym flag
+ if test "$with_gcc" = yes; then
+ allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*'
+ archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+ else
+ allow_undefined_flag=' -expect_unresolved \*'
+ archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -msym -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+ fi
+ hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+ hardcode_libdir_separator=:
+ ;;
+
+ sco3.2v5*)
+ archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_shlibpath_var=no
+ runpath_var=LD_RUN_PATH
+ hardcode_runpath_var=yes
+ ;;
+
+ solaris*)
+ no_undefined_flag=' -z text'
+ # $CC -shared without GNU ld will not create a library from C++
+ # object files and a static libstdc++, better avoid it by now
+ archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
+ archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~
+ $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp'
+ hardcode_libdir_flag_spec='-R$libdir'
+ hardcode_shlibpath_var=no
+ case "$host_os" in
+ solaris2.[0-5] | solaris2.[0-5].*) ;;
+ *) # Supported since Solaris 2.6 (maybe 2.5.1?)
+ whole_archive_flag_spec='-z allextract$convenience -z defaultextract' ;;
+ esac
+ link_all_deplibs=yes
+ ;;
+
+ sunos4*)
+ archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_libdir_flag_spec='-L$libdir'
+ hardcode_direct=yes
+ hardcode_minus_L=yes
+ hardcode_shlibpath_var=no
+ ;;
+
+ sysv4)
+ archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+ runpath_var='LD_RUN_PATH'
+ hardcode_shlibpath_var=no
+ hardcode_direct=no #Motorola manual says yes, but my tests say they lie
+ ;;
+
+ sysv4.3*)
+ archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_shlibpath_var=no
+ export_dynamic_flag_spec='-Bexport'
+ ;;
+
+ sysv5*)
+ no_undefined_flag=' -z text'
+ # $CC -shared without GNU ld will not create a library from C++
+ # object files and a static libstdc++, better avoid it by now
+ archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
+ archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~
+ $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp'
+ hardcode_libdir_flag_spec=
+ hardcode_shlibpath_var=no
+ runpath_var='LD_RUN_PATH'
+ ;;
+
+ uts4*)
+ archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_libdir_flag_spec='-L$libdir'
+ hardcode_shlibpath_var=no
+ ;;
+
+ dgux*)
+ archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_libdir_flag_spec='-L$libdir'
+ hardcode_shlibpath_var=no
+ ;;
+
+ sysv4*MP*)
+ if test -d /usr/nec; then
+ archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_shlibpath_var=no
+ runpath_var=LD_RUN_PATH
+ hardcode_runpath_var=yes
+ ld_shlibs=yes
+ fi
+ ;;
+
+ sysv4.2uw2*)
+ archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags'
+ hardcode_direct=yes
+ hardcode_minus_L=no
+ hardcode_shlibpath_var=no
+ hardcode_runpath_var=yes
+ runpath_var=LD_RUN_PATH
+ ;;
+
+ unixware7*)
+ archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+ runpath_var='LD_RUN_PATH'
+ hardcode_shlibpath_var=no
+ ;;
+
+ *)
+ ld_shlibs=no
+ ;;
+ esac
+fi
+echo "$ac_t$ld_shlibs" 1>&6
+test "$ld_shlibs" = no && can_build_shared=no
+
+# Check hardcoding attributes.
+echo $ac_n "checking how to hardcode library paths into programs... $ac_c" 1>&6
+hardcode_action=
+if test -n "$hardcode_libdir_flag_spec" || \
+ test -n "$runpath_var"; then
+
+ # We can hardcode non-existant directories.
+ if test "$hardcode_direct" != no &&
+ # If the only mechanism to avoid hardcoding is shlibpath_var, we
+ # have to relink, otherwise we might link with an installed library
+ # when we should be linking with a yet-to-be-installed one
+ ## test "$hardcode_shlibpath_var" != no &&
+ test "$hardcode_minus_L" != no; then
+ # Linking always hardcodes the temporary library directory.
+ hardcode_action=relink
+ else
+ # We can link without hardcoding, and we can hardcode nonexisting dirs.
+ hardcode_action=immediate
+ fi
+else
+ # We cannot hardcode anything, or else we can only hardcode existing
+ # directories.
+ hardcode_action=unsupported
+fi
+echo "$ac_t$hardcode_action" 1>&6
+
+echo $ac_n "checking whether stripping libraries is possible... $ac_c" 1>&6
+if test -n "$STRIP" && $STRIP -V 2>&1 | grep "GNU strip" >/dev/null; then
+ test -z "$old_striplib" && old_striplib="$STRIP --strip-debug"
+ test -z "$striplib" && striplib="$STRIP --strip-unneeded"
+ echo "${ac_t}yes" 1>&6
+else
+ echo "${ac_t}no" 1>&6
+fi
+
+reload_cmds='$LD$reload_flag -o $output$reload_objs'
+test -z "$deplibs_check_method" && deplibs_check_method=unknown
+
+# PORTME Fill in your ld.so characteristics
+library_names_spec=
+libname_spec='lib$name'
+soname_spec=
+postinstall_cmds=
+postuninstall_cmds=
+finish_cmds=
+finish_eval=
+shlibpath_var=
+shlibpath_overrides_runpath=unknown
+version_type=none
+dynamic_linker="$host_os ld.so"
+sys_lib_dlsearch_path_spec="/lib /usr/lib"
+sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
+
+echo $ac_n "checking dynamic linker characteristics... $ac_c" 1>&6
+case "$host_os" in
+aix3*)
+ version_type=linux
+ library_names_spec='${libname}${release}.so$versuffix $libname.a'
+ shlibpath_var=LIBPATH
+
+ # AIX has no versioning support, so we append a major version to the name.
+ soname_spec='${libname}${release}.so$major'
+ ;;
+
+aix4*)
+ version_type=linux
+ # AIX has no versioning support, so currently we can not hardcode correct
+ # soname into executable. Probably we can add versioning support to
+ # collect2, so additional links can be useful in future.
+ # We preserve .a as extension for shared libraries though AIX4.2
+ # and later linker supports .so
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.a'
+ shlibpath_var=LIBPATH
+ ;;
+
+amigaos*)
+ library_names_spec='$libname.ixlibrary $libname.a'
+ # Create ${libname}_ixlibrary.a entries in /sys/libs.
+ finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "(cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a)"; (cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a) || exit 1; done'
+ ;;
+
+beos*)
+ library_names_spec='${libname}.so'
+ dynamic_linker="$host_os ld.so"
+ shlibpath_var=LIBRARY_PATH
+ lt_cv_dlopen="load_add_on"
+ lt_cv_dlopen_libs=
+ lt_cv_dlopen_self=yes
+ ;;
+
+bsdi4*)
+ version_type=linux
+ need_version=no
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+ soname_spec='${libname}${release}.so$major'
+ finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
+ shlibpath_var=LD_LIBRARY_PATH
+ sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
+ sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
+ export_dynamic_flag_spec=-rdynamic
+ # the default ld.so.conf also contains /usr/contrib/lib and
+ # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
+ # libtool to hard-code these into programs
+ ;;
+
+cygwin* | mingw*)
+ version_type=windows
+ need_version=no
+ need_lib_prefix=no
+ if test "$with_gcc" = yes; then
+ library_names_spec='${libname}`echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll'
+ else
+ library_names_spec='${libname}`echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll $libname.lib'
+ fi
+ dynamic_linker='Win32 ld.exe'
+ # FIXME: first we should search . and the directory the executable is in
+ shlibpath_var=PATH
+ lt_cv_dlopen="LoadLibrary"
+ lt_cv_dlopen_libs=
+ ;;
+
+freebsd1*)
+ dynamic_linker=no
+ ;;
+
+freebsd*)
+ objformat=`test -x /usr/bin/objformat && /usr/bin/objformat || echo aout`
+ version_type=freebsd-$objformat
+ case "$version_type" in
+ freebsd-elf*)
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so $libname.so'
+ need_version=no
+ need_lib_prefix=no
+ ;;
+ freebsd-*)
+ library_names_spec='${libname}${release}.so$versuffix $libname.so$versuffix'
+ need_version=yes
+ ;;
+ esac
+ shlibpath_var=LD_LIBRARY_PATH
+ case "$host_os" in
+ freebsd2*)
+ shlibpath_overrides_runpath=yes
+ ;;
+ freebsd3.[01]* | freebsdelf3.[01]*)
+ shlibpath_overrides_runpath=yes
+ hardcode_into_libs=yes
+ ;;
+ *) # from 3.2 on
+ shlibpath_overrides_runpath=no
+ hardcode_into_libs=yes
+ ;;
+ esac
+ ;;
+
+gnu*)
+ version_type=linux
+ need_lib_prefix=no
+ need_version=no
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so${major} ${libname}.so'
+ soname_spec='${libname}${release}.so$major'
+ shlibpath_var=LD_LIBRARY_PATH
+ hardcode_into_libs=yes
+ ;;
+
+hpux9* | hpux10* | hpux11*)
+ # Give a soname corresponding to the major version so that dld.sl refuses to
+ # link against other versions.
+ dynamic_linker="$host_os dld.sl"
+ version_type=sunos
+ need_lib_prefix=no
+ need_version=no
+ shlibpath_var=SHLIB_PATH
+ shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
+ library_names_spec='${libname}${release}.sl$versuffix ${libname}${release}.sl$major $libname.sl'
+ soname_spec='${libname}${release}.sl$major'
+ # HP-UX runs *really* slowly unless shared libraries are mode 555.
+ postinstall_cmds='chmod 555 $lib'
+ ;;
+
+irix5* | irix6*)
+ version_type=irix
+ need_lib_prefix=no
+ need_version=no
+ soname_spec='${libname}${release}.so.$major'
+ library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major ${libname}${release}.so $libname.so'
+ case "$host_os" in
+ irix5*)
+ libsuff= shlibsuff=
+ ;;
+ *)
+ case "$LD" in # libtool.m4 will add one of these switches to LD
+ *-32|*"-32 ") libsuff= shlibsuff= libmagic=32-bit;;
+ *-n32|*"-n32 ") libsuff=32 shlibsuff=N32 libmagic=N32;;
+ *-64|*"-64 ") libsuff=64 shlibsuff=64 libmagic=64-bit;;
+ *) libsuff= shlibsuff= libmagic=never-match;;
+ esac
+ ;;
+ esac
+ shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
+ shlibpath_overrides_runpath=no
+ sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
+ sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
+ ;;
+
+# No shared lib support for Linux oldld, aout, or coff.
+linux-gnuoldld* | linux-gnuaout* | linux-gnucoff*)
+ dynamic_linker=no
+ ;;
+
+# This must be Linux ELF.
+linux-gnu*)
+ version_type=linux
+ need_lib_prefix=no
+ need_version=no
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+ soname_spec='${libname}${release}.so$major'
+ finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
+ shlibpath_var=LD_LIBRARY_PATH
+ shlibpath_overrides_runpath=no
+ # This implies no fast_install, which is unacceptable.
+ # Some rework will be needed to allow for fast_install
+ # before this can be enabled.
+ hardcode_into_libs=yes
+
+ if test -f /lib/ld.so.1; then
+ dynamic_linker='GNU ld.so'
+ else
+ # Only the GNU ld.so supports shared libraries on MkLinux.
+ case "$host_cpu" in
+ powerpc*) dynamic_linker=no ;;
+ *) dynamic_linker='Linux ld.so' ;;
+ esac
+ fi
+ ;;
+
+netbsd*)
+ version_type=sunos
+ if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+ library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix'
+ finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+ dynamic_linker='NetBSD (a.out) ld.so'
+ else
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major ${libname}${release}.so ${libname}.so'
+ soname_spec='${libname}${release}.so$major'
+ dynamic_linker='NetBSD ld.elf_so'
+ fi
+ shlibpath_var=LD_LIBRARY_PATH
+ ;;
+
+openbsd*)
+ version_type=sunos
+ if test "$with_gnu_ld" = yes; then
+ need_lib_prefix=no
+ need_version=no
+ fi
+ library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix'
+ finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+ shlibpath_var=LD_LIBRARY_PATH
+ ;;
+
+os2*)
+ libname_spec='$name'
+ need_lib_prefix=no
+ library_names_spec='$libname.dll $libname.a'
+ dynamic_linker='OS/2 ld.exe'
+ shlibpath_var=LIBPATH
+ ;;
+
+osf3* | osf4* | osf5*)
+ version_type=osf
+ need_version=no
+ soname_spec='${libname}${release}.so'
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so $libname.so'
+ shlibpath_var=LD_LIBRARY_PATH
+ sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
+ sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
+ ;;
+
+sco3.2v5*)
+ version_type=osf
+ soname_spec='${libname}${release}.so$major'
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+ shlibpath_var=LD_LIBRARY_PATH
+ ;;
+
+solaris*)
+ version_type=linux
+ need_lib_prefix=no
+ need_version=no
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+ soname_spec='${libname}${release}.so$major'
+ shlibpath_var=LD_LIBRARY_PATH
+ shlibpath_overrides_runpath=yes
+ hardcode_into_libs=yes
+ # ldd complains unless libraries are executable
+ postinstall_cmds='chmod +x $lib'
+ ;;
+
+sunos4*)
+ version_type=sunos
+ library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix'
+ finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
+ shlibpath_var=LD_LIBRARY_PATH
+ shlibpath_overrides_runpath=yes
+ if test "$with_gnu_ld" = yes; then
+ need_lib_prefix=no
+ fi
+ need_version=yes
+ ;;
+
+sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+ version_type=linux
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+ soname_spec='${libname}${release}.so$major'
+ shlibpath_var=LD_LIBRARY_PATH
+ case "$host_vendor" in
+ motorola)
+ need_lib_prefix=no
+ need_version=no
+ shlibpath_overrides_runpath=no
+ sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
+ ;;
+ esac
+ ;;
+
+uts4*)
+ version_type=linux
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+ soname_spec='${libname}${release}.so$major'
+ shlibpath_var=LD_LIBRARY_PATH
+ ;;
+
+dgux*)
+ version_type=linux
+ need_lib_prefix=no
+ need_version=no
+ library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+ soname_spec='${libname}${release}.so$major'
+ shlibpath_var=LD_LIBRARY_PATH
+ ;;
+
+sysv4*MP*)
+ if test -d /usr/nec ;then
+ version_type=linux
+ library_names_spec='$libname.so.$versuffix $libname.so.$major $libname.so'
+ soname_spec='$libname.so.$major'
+ shlibpath_var=LD_LIBRARY_PATH
+ fi
+ ;;
+
+*)
+ dynamic_linker=no
+ ;;
+esac
+echo "$ac_t$dynamic_linker" 1>&6
+test "$dynamic_linker" = no && can_build_shared=no
+
+# Check for command to grab the raw symbol name followed by C symbol from nm.
+echo $ac_n "checking command to parse $NM output... $ac_c" 1>&6
+
+# These are sane defaults that work on at least a few old systems.
+# [They come from Ultrix. What could be older than Ultrix?!! ;)]
+
+# Character class describing NM global symbol codes.
+symcode='[BCDEGRST]'
+
+# Regexp to match symbols that can be accessed directly from C.
+sympat='\([_A-Za-z][_A-Za-z0-9]*\)'
+
+# Transform the above into a raw symbol and a C symbol.
+symxfrm='\1 \2\3 \3'
+
+# Transform an extracted symbol line into a proper C declaration
+global_symbol_to_cdecl="sed -n -e 's/^. .* \(.*\)$/extern char \1;/p'"
+
+# Define system-specific variables.
+case "$host_os" in
+aix*)
+ symcode='[BCDT]'
+ ;;
+cygwin* | mingw*)
+ symcode='[ABCDGISTW]'
+ ;;
+hpux*) # Its linker distinguishes data from code symbols
+ global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern char \1();/p' -e 's/^. .* \(.*\)$/extern char \1;/p'"
+ ;;
+irix*)
+ symcode='[BCDEGRST]'
+ ;;
+solaris* | sysv5*)
+ symcode='[BDT]'
+ ;;
+sysv4)
+ symcode='[DFNSTU]'
+ ;;
+esac
+
+# Handle CRLF in mingw too chain
+opt_cr=
+case "$host_os" in
+mingw*)
+ opt_cr=`echo 'x\{0,1\}' | tr x '\015'` # option cr in regexp
+ ;;
+esac
+
+# If we're using GNU nm, then use its standard symbol codes.
+if $NM -V 2>&1 | egrep '(GNU|with BFD)' > /dev/null; then
+ symcode='[ABCDGISTW]'
+fi
+
+# Try without a prefix undercore, then with it.
+for ac_symprfx in "" "_"; do
+
+ # Write the raw and C identifiers.
+global_symbol_pipe="sed -n -e 's/^.*[ ]\($symcode\)[ ][ ]*\($ac_symprfx\)$sympat$opt_cr$/$symxfrm/p'"
+
+ # Check to see that the pipe works correctly.
+ pipe_works=no
+ $rm conftest*
+ cat > conftest.c <<EOF
+#ifdef __cplusplus
+extern "C" {
+#endif
+char nm_test_var;
+void nm_test_func(){}
+#ifdef __cplusplus
+}
+#endif
+main(){nm_test_var='a';nm_test_func();return(0);}
+EOF
+
+ echo "$progname:1867: checking if global_symbol_pipe works" >&5
+ if { (eval echo $progname:1868: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; } && test -s conftest.$objext; then
+ # Now try to grab the symbols.
+ nlist=conftest.nm
+ if { echo "$progname:1871: eval \"$NM conftest.$objext | $global_symbol_pipe > $nlist\"" >&5; eval "$NM conftest.$objext | $global_symbol_pipe > $nlist 2>&5"; } && test -s "$nlist"; then
+
+ # Try sorting and uniquifying the output.
+ if sort "$nlist" | uniq > "$nlist"T; then
+ mv -f "$nlist"T "$nlist"
+ else
+ rm -f "$nlist"T
+ fi
+
+ # Make sure that we snagged all the symbols we need.
+ if egrep ' nm_test_var$' "$nlist" >/dev/null; then
+ if egrep ' nm_test_func$' "$nlist" >/dev/null; then
+ cat <<EOF > conftest.c
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+EOF
+ # Now generate the symbol file.
+ eval "$global_symbol_to_cdecl"' < "$nlist" >> conftest.c'
+
+ cat <<EOF >> conftest.c
+#if defined (__STDC__) && __STDC__
+# define lt_ptr_t void *
+#else
+# define lt_ptr_t char *
+# define const
+#endif
+
+/* The mapping between symbol names and symbols. */
+const struct {
+ const char *name;
+ lt_ptr_t address;
+}
+lt_preloaded_symbols[] =
+{
+EOF
+ sed 's/^. \(.*\) \(.*\)$/ {"\2", (lt_ptr_t) \&\2},/' < "$nlist" >> conftest.c
+ cat <<\EOF >> conftest.c
+ {0, (lt_ptr_t) 0}
+};
+
+#ifdef __cplusplus
+}
+#endif
+EOF
+ # Now try linking the two files.
+ mv conftest.$objext conftstm.$objext
+ save_LIBS="$LIBS"
+ save_CFLAGS="$CFLAGS"
+ LIBS="conftstm.$objext"
+ CFLAGS="$CFLAGS$no_builtin_flag"
+ if { (eval echo $progname:1923: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then
+ pipe_works=yes
+ else
+ echo "$progname: failed program was:" >&5
+ cat conftest.c >&5
+ fi
+ LIBS="$save_LIBS"
+ else
+ echo "cannot find nm_test_func in $nlist" >&5
+ fi
+ else
+ echo "cannot find nm_test_var in $nlist" >&5
+ fi
+ else
+ echo "cannot run $global_symbol_pipe" >&5
+ fi
+ else
+ echo "$progname: failed program was:" >&5
+ cat conftest.c >&5
+ fi
+ $rm conftest* conftst*
+
+ # Do not use the global_symbol_pipe unless it works.
+ if test "$pipe_works" = yes; then
+ break
+ else
+ global_symbol_pipe=
+ fi
+done
+if test "$pipe_works" = yes; then
+ echo "${ac_t}ok" 1>&6
+else
+ echo "${ac_t}failed" 1>&6
+fi
+
+if test -z "$global_symbol_pipe"; then
+ global_symbol_to_cdecl=
+fi
+
+# Report the final consequences.
+echo "checking if libtool supports shared libraries... $can_build_shared" 1>&6
+
+# Only try to build win32 dlls if AC_LIBTOOL_WIN32_DLL was used in
+# configure.in, otherwise build static only libraries.
+case "$host_os" in
+cygwin* | mingw* | os2*)
+ if test x$can_build_shared = xyes; then
+ test x$enable_win32_dll = xno && can_build_shared=no
+ echo "checking if package supports dlls... $can_build_shared" 1>&6
+ fi
+;;
+esac
+
+echo $ac_n "checking whether to build shared libraries... $ac_c" 1>&6
+test "$can_build_shared" = "no" && enable_shared=no
+
+# On AIX, shared libraries and static libraries use the same namespace, and
+# are all built from PIC.
+case "$host_os" in
+aix3*)
+ test "$enable_shared" = yes && enable_static=no
+ if test -n "$RANLIB"; then
+ archive_cmds="$archive_cmds~\$RANLIB \$lib"
+ postinstall_cmds='$RANLIB $lib'
+ fi
+ ;;
+
+aix4*)
+ test "$enable_shared" = yes && enable_static=no
+ ;;
+esac
+
+echo "$ac_t$enable_shared" 1>&6
+
+# Make sure either enable_shared or enable_static is yes.
+test "$enable_shared" = yes || enable_static=yes
+
+echo "checking whether to build static libraries... $enable_static" 1>&6
+
+if test "$hardcode_action" = relink || test "$hardcode_into_libs" = all; then
+ # Fast installation is not supported
+ enable_fast_install=no
+elif test "$shlibpath_overrides_runpath" = yes ||
+ test "$enable_shared" = no; then
+ # Fast installation is not necessary
+ enable_fast_install=needless
+fi
+
+# Check whether we must set pic_mode to default
+test -z "$pic_flag" && pic_mode=default
+# On Cygwin there's no "real" PIC flag so we must build both object types
+case "$host_os" in
+cygwin* | mingw* | os2*)
+ pic_mode=default
+ ;;
+esac
+if test $pic_mode = no && test "$deplibs_check_method" != pass_all; then
+ # non-PIC code in shared libraries is not supported
+ pic_mode=default
+fi
+
+if test "x$enable_dlopen" != xyes; then
+ enable_dlopen=unknown
+ enable_dlopen_self=unknown
+ enable_dlopen_self_static=unknown
+else
+if test "X${lt_cv_dlopen+set}" != Xset; then
+ lt_cv_dlopen=no lt_cv_dlopen_libs=
+echo $ac_n "checking for dlopen in -ldl""... $ac_c" 1>&6
+echo "$progname:2032: checking for dlopen in -ldl" >&5
+if test "X${ac_cv_lib_dl_dlopen+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ ac_save_LIBS="$LIBS"
+LIBS="-ldl $LIBS"
+cat > conftest.$ac_ext <<EOF
+#line 2039 "ltconfig"
+/* Override any gcc2 internal prototype to avoid an error. */
+/* We use char because int might match the return type of a gcc2
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dlopen();
+
+int main() {
+dlopen()
+; return 0; }
+EOF
+if { (eval echo $progname:2052: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ rm -rf conftest*
+ ac_cv_lib_dl_dlopen=yes
+else
+ echo "$progname: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_lib_dl_dlopen=no
+fi
+rm -f conftest*
+LIBS="$ac_save_LIBS"
+
+fi
+if test "X$ac_cv_lib_dl_dlopen" = Xyes; then
+ echo "$ac_t""yes" 1>&6
+ lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"
+else
+ echo "$ac_t""no" 1>&6
+echo $ac_n "checking for dlopen""... $ac_c" 1>&6
+echo "$progname:2071: checking for dlopen" >&5
+if test "X${ac_cv_func_dlopen+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 2076 "ltconfig"
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char dlopen(); below. */
+#include <assert.h>
+/* Override any gcc2 internal prototype to avoid an error. */
+/* We use char because int might match the return type of a gcc2
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dlopen();
+
+int main() {
+
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined (__stub_dlopen) || defined (__stub___dlopen)
+choke me
+#else
+dlopen();
+#endif
+
+; return 0; }
+EOF
+if { (eval echo $progname:2101: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ rm -rf conftest*
+ ac_cv_func_dlopen=yes
+else
+ echo "$progname: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_func_dlopen=no
+fi
+rm -f conftest*
+fi
+if test "X$ac_cv_func_dlopen" = Xyes; then
+ echo "$ac_t""yes" 1>&6
+ lt_cv_dlopen="dlopen"
+else
+ echo "$ac_t""no" 1>&6
+echo $ac_n "checking for dld_link in -ldld""... $ac_c" 1>&6
+echo "$progname:2118: checking for dld_link in -ldld" >&5
+if test "X${ac_cv_lib_dld_dld_link+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ ac_save_LIBS="$LIBS"
+LIBS="-ldld $LIBS"
+cat > conftest.$ac_ext <<EOF
+#line 2125 "ltconfig"
+/* Override any gcc2 internal prototype to avoid an error. */
+/* We use char because int might match the return type of a gcc2
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dld_link();
+
+int main() {
+dld_link()
+; return 0; }
+EOF
+if { (eval echo $progname:2138: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ rm -rf conftest*
+ ac_cv_lib_dld_dld_link=yes
+else
+ echo "$progname: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_lib_dld_dld_link=no
+fi
+rm -f conftest*
+LIBS="$ac_save_LIBS"
+
+fi
+if test "X$ac_cv_lib_dld_dld_link" = Xyes; then
+ echo "$ac_t""yes" 1>&6
+ lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"
+else
+ echo "$ac_t""no" 1>&6
+echo $ac_n "checking for shl_load""... $ac_c" 1>&6
+echo "$progname:2157: checking for shl_load" >&5
+if test "X${ac_cv_func_shl_load+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 2162 "ltconfig"
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char shl_load(); below. */
+#include <assert.h>
+/* Override any gcc2 internal prototype to avoid an error. */
+/* We use char because int might match the return type of a gcc2
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char shl_load();
+
+int main() {
+
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined (__stub_shl_load) || defined (__stub___shl_load)
+choke me
+#else
+shl_load();
+#endif
+
+; return 0; }
+EOF
+if { (eval echo $progname:2187: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ rm -rf conftest*
+ ac_cv_func_shl_load=yes
+else
+ echo "$progname: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_func_shl_load=no
+fi
+rm -f conftest*
+fi
+
+if test "X$ac_cv_func_shl_load" = Xyes; then
+ echo "$ac_t""yes" 1>&6
+ lt_cv_dlopen="shl_load"
+else
+ echo "$ac_t""no" 1>&6
+echo $ac_n "checking for shl_load in -ldld""... $ac_c" 1>&6
+echo "$progname:2205: checking for shl_load in -ldld" >&5
+if test "X${ac_cv_lib_dld_shl_load+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ ac_save_LIBS="$LIBS"
+LIBS="-ldld $LIBS"
+cat > conftest.$ac_ext <<EOF
+#line 2212 "ltconfig"
+#include "confdefs.h"
+/* Override any gcc2 internal prototype to avoid an error. */
+/* We use char because int might match the return type of a gcc2
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char shl_load();
+
+int main() {
+shl_load()
+; return 0; }
+EOF
+if { (eval echo $progname:2226: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then
+ rm -rf conftest*
+ ac_cv_lib_dld_shl_load=yes
+else
+ echo "$progname: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ ac_cv_lib_dld_shl_load=no
+fi
+rm -f conftest*
+LIBS="$ac_save_LIBS"
+
+fi
+if test "X$ac_cv_lib_dld_shl_load" = Xyes; then
+ echo "$ac_t""yes" 1>&6
+ lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"
+else
+ echo "$ac_t""no" 1>&6
+fi
+
+
+fi
+
+
+fi
+
+
+fi
+
+
+fi
+
+fi
+
+ if test "x$lt_cv_dlopen" != xno; then
+ enable_dlopen=yes
+ fi
+
+ case "$lt_cv_dlopen" in
+ dlopen)
+for ac_hdr in dlfcn.h; do
+ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'`
+echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6
+echo "$progname:2269: checking for $ac_hdr" >&5
+if eval "test \"`echo 'X$''{'ac_cv_header_$ac_safe'+set}'`\" = Xset"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 2274 "ltconfig"
+#include <$ac_hdr>
+int fnord = 0;
+int main () { }
+EOF
+ac_try="$ac_compile >/dev/null 2>conftest.out"
+{ (eval echo $progname:2280: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
+ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"`
+if test -z "$ac_err"; then
+ rm -rf conftest*
+ eval "ac_cv_header_$ac_safe=yes"
+else
+ echo "$ac_err" >&5
+ echo "$progname: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -rf conftest*
+ eval "ac_cv_header_$ac_safe=no"
+fi
+rm -f conftest*
+fi
+if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then
+ echo "$ac_t""yes" 1>&6
+else
+ echo "$ac_t""no" 1>&6
+fi
+done
+
+ if test "x$ac_cv_header_dlfcn_h" = xyes; then
+ CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H"
+ fi
+ eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\"
+ LIBS="$lt_cv_dlopen_libs $LIBS"
+
+ echo $ac_n "checking whether a program can dlopen itself""... $ac_c" 1>&6
+echo "$progname:2308: checking whether a program can dlopen itself" >&5
+if test "X${lt_cv_dlopen_self+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ if test "$cross_compiling" = yes; then
+ lt_cv_dlopen_self=cross
+ else
+ cat > conftest.c <<EOF
+#line 2316 "ltconfig"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+# define LTDL_GLOBAL RTLD_GLOBAL
+#else
+# ifdef DL_GLOBAL
+# define LTDL_GLOBAL DL_GLOBAL
+# else
+# define LTDL_GLOBAL 0
+# endif
+#endif
+
+/* We may have to define LTDL_LAZY_OR_NOW in the command line if we
+ find out it does not work in some platform. */
+#ifndef LTDL_LAZY_OR_NOW
+# ifdef RTLD_LAZY
+# define LTDL_LAZY_OR_NOW RTLD_LAZY
+# else
+# ifdef DL_LAZY
+# define LTDL_LAZY_OR_NOW DL_LAZY
+# else
+# ifdef RTLD_NOW
+# define LTDL_LAZY_OR_NOW RTLD_NOW
+# else
+# ifdef DL_NOW
+# define LTDL_LAZY_OR_NOW DL_NOW
+# else
+# define LTDL_LAZY_OR_NOW 0
+# endif
+# endif
+# endif
+# endif
+#endif
+
+fnord() { int i=42;}
+main() { void *self, *ptr1, *ptr2; self=dlopen(0,LTDL_GLOBAL|LTDL_LAZY_OR_NOW);
+ if(self) { ptr1=dlsym(self,"fnord"); ptr2=dlsym(self,"_fnord");
+ if(ptr1 || ptr2) { dlclose(self); exit(0); } } exit(1); }
+
+EOF
+if { (eval echo $progname:2362: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null
+then
+ lt_cv_dlopen_self=yes
+else
+ echo "$progname: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -fr conftest*
+ lt_cv_dlopen_self=no
+fi
+rm -fr conftest*
+fi
+
+fi
+
+echo "$ac_t""$lt_cv_dlopen_self" 1>&6
+
+ if test "$lt_cv_dlopen_self" = yes; then
+ LDFLAGS="$LDFLAGS $link_static_flag"
+ echo $ac_n "checking whether a statically linked program can dlopen itself""... $ac_c" 1>&6
+echo "$progname:2381: checking whether a statically linked program can dlopen itself" >&5
+if test "X${lt_cv_dlopen_self_static+set}" = Xset; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ if test "$cross_compiling" = yes; then
+ lt_cv_dlopen_self_static=cross
+ else
+ cat > conftest.c <<EOF
+#line 2389 "ltconfig"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+# define LTDL_GLOBAL RTLD_GLOBAL
+#else
+# ifdef DL_GLOBAL
+# define LTDL_GLOBAL DL_GLOBAL
+# else
+# define LTDL_GLOBAL 0
+# endif
+#endif
+
+/* We may have to define LTDL_LAZY_OR_NOW in the command line if we
+ find out it does not work in some platform. */
+#ifndef LTDL_LAZY_OR_NOW
+# ifdef RTLD_LAZY
+# define LTDL_LAZY_OR_NOW RTLD_LAZY
+# else
+# ifdef DL_LAZY
+# define LTDL_LAZY_OR_NOW DL_LAZY
+# else
+# ifdef RTLD_NOW
+# define LTDL_LAZY_OR_NOW RTLD_NOW
+# else
+# ifdef DL_NOW
+# define LTDL_LAZY_OR_NOW DL_NOW
+# else
+# define LTDL_LAZY_OR_NOW 0
+# endif
+# endif
+# endif
+# endif
+#endif
+
+fnord() { int i=42;}
+main() { void *self, *ptr1, *ptr2; self=dlopen(0,LTDL_GLOBAL|LTDL_LAZY_OR_NOW);
+ if(self) { ptr1=dlsym(self,"fnord"); ptr2=dlsym(self,"_fnord");
+ if(ptr1 || ptr2) { dlclose(self); exit(0); } } exit(1); }
+
+EOF
+if { (eval echo $progname:2435: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null
+then
+ lt_cv_dlopen_self_static=yes
+else
+ echo "$progname: failed program was:" >&5
+ cat conftest.$ac_ext >&5
+ rm -fr conftest*
+ lt_cv_dlopen_self_static=no
+fi
+rm -fr conftest*
+fi
+
+fi
+
+echo "$ac_t""$lt_cv_dlopen_self_static" 1>&6
+fi
+ ;;
+ esac
+
+ case "$lt_cv_dlopen_self" in
+ yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;;
+ *) enable_dlopen_self=unknown ;;
+ esac
+
+ case "$lt_cv_dlopen_self_static" in
+ yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;;
+ *) enable_dlopen_self_static=unknown ;;
+ esac
+fi
+
+# Copy echo and quote the copy, instead of the original, because it is
+# used later.
+ltecho="$echo"
+if test "X$ltecho" = "X$CONFIG_SHELL $0 --fallback-echo"; then
+ ltecho="$CONFIG_SHELL \$0 --fallback-echo"
+fi
+LTSHELL="$SHELL"
+
+LTCONFIG_VERSION="$VERSION"
+
+# Only quote variables if we're using ltmain.sh.
+case "$ltmain" in
+*.sh)
+ # Now quote all the things that may contain metacharacters.
+ for var in ltecho old_AR old_CC old_CFLAGS old_CPPFLAGS \
+ old_MAGIC old_LD old_LDFLAGS old_LIBS \
+ old_LN_S old_NM old_RANLIB old_STRIP \
+ old_AS old_DLLTOOL old_OBJDUMP \
+ old_OBJEXT old_EXEEXT old_reload_flag \
+ old_deplibs_check_method old_file_magic_cmd \
+ AR CC LD LN_S NM LTSHELL LTCONFIG_VERSION \
+ reload_flag reload_cmds wl \
+ pic_flag link_static_flag no_builtin_flag export_dynamic_flag_spec \
+ thread_safe_flag_spec whole_archive_flag_spec libname_spec \
+ library_names_spec soname_spec \
+ RANLIB old_archive_cmds old_archive_from_new_cmds old_postinstall_cmds \
+ old_postuninstall_cmds archive_cmds archive_expsym_cmds postinstall_cmds \
+ postuninstall_cmds extract_expsyms_cmds old_archive_from_expsyms_cmds \
+ old_striplib striplib file_magic_cmd export_symbols_cmds \
+ deplibs_check_method allow_undefined_flag no_undefined_flag \
+ finish_cmds finish_eval global_symbol_pipe global_symbol_to_cdecl \
+ hardcode_libdir_flag_spec hardcode_libdir_separator \
+ sys_lib_search_path_spec sys_lib_dlsearch_path_spec \
+ compiler_c_o compiler_o_lo need_locks exclude_expsyms include_expsyms; do
+
+ case "$var" in
+ reload_cmds | old_archive_cmds | old_archive_from_new_cmds | \
+ old_postinstall_cmds | old_postuninstall_cmds | \
+ export_symbols_cmds | archive_cmds | archive_expsym_cmds | \
+ extract_expsyms_cmds | old_archive_from_expsyms_cmds | \
+ postinstall_cmds | postuninstall_cmds | \
+ finish_cmds | sys_lib_search_path_spec | sys_lib_dlsearch_path_spec)
+ # Double-quote double-evaled strings.
+ eval "$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\" -e \"\$delay_variable_subst\"\`\\\"" ### testsuite: skip nested quoting test
+ ;;
+ *)
+ eval "$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`\\\"" ### testsuite: skip nested quoting test
+ ;;
+ esac
+ done
+
+ case "$ltecho" in
+ *'\$0 --fallback-echo"')
+ ltecho=`$echo "X$ltecho" | $Xsed -e 's/\\\\\\\$0 --fallback-echo"$/$0 --fallback-echo"/'`
+ ;;
+ esac
+
+ trap "$rm \"$ofile\"; exit 1" 1 2 15
+ echo "creating $ofile"
+ $rm "$ofile"
+ cat <<EOF > "$ofile"
+#! $SHELL
+
+# `$echo "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services.
+# Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP)
+# NOTE: Changes made to this file will be lost: look at ltconfig or ltmain.sh.
+#
+# Copyright (C) 1996-2000 Free Software Foundation, Inc.
+# Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Sed that helps us avoid accidentally triggering echo(1) options like -n.
+Xsed="sed -e s/^X//"
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+if test "X\${CDPATH+set}" = Xset; then CDPATH=:; export CDPATH; fi
+
+### BEGIN LIBTOOL CONFIG
+EOF
+ cfgfile="$ofile"
+ ;;
+
+*)
+ # Double-quote the variables that need it (for aesthetics).
+ for var in old_AR old_CC old_CFLAGS old_CPPFLAGS \
+ old_MAGIC old_LD old_LDFLAGS old_LIBS \
+ old_LN_S old_NM old_RANLIB old_STRIP \
+ old_AS old_DLLTOOL old_OBJDUMP \
+ old_OBJEXT old_EXEEXT old_reload_flag \
+ old_deplibs_check_method old_file_magic_cmd; do
+ eval "$var=\\\"\$var\\\""
+ done
+
+ # Just create a config file.
+ cfgfile="$ofile.cfg"
+ trap "$rm \"$cfgfile\"; exit 1" 1 2 15
+ echo "creating $cfgfile"
+ $rm "$cfgfile"
+ cat <<EOF > "$cfgfile"
+# `$echo "$cfgfile" | sed 's%^.*/%%'` - Libtool configuration file.
+# Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP)
+EOF
+ ;;
+esac
+
+cat <<EOF >> "$cfgfile"
+# Libtool was configured as follows, on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
+#
+# AR=$old_AR CC=$old_CC CFLAGS=$old_CFLAGS CPPFLAGS=$old_CPPFLAGS \\
+# MAGIC=$old_MAGIC LD=$old_LD LDFLAGS=$old_LDFLAGS LIBS=$old_LIBS \\
+# LN_S=$old_LN_S NM=$old_NM RANLIB=$old_RANLIB STRIP=$old_STRIP \\
+# AS=$old_AS DLLTOOL=$old_DLLTOOL OBJDUMP=$old_OBJDUMP \\
+# objext=$old_OBJEXT exeext=$old_EXEEXT reload_flag=$old_reload_flag \\
+# deplibs_check_method=$old_deplibs_check_method file_magic_cmd=$old_file_magic_cmd \\
+# $0$ltconfig_args
+#
+# Compiler and other test output produced by $progname, useful for
+# debugging $progname, is in ./config.log if it exists.
+# The version of $progname that generated this script.
+LTCONFIG_VERSION=$LTCONFIG_VERSION
+
+# Shell to use when invoking shell scripts.
+SHELL=$LTSHELL
+
+# Whether or not to build shared libraries.
+build_libtool_libs=$enable_shared
+
+# Whether or not to build static libraries.
+build_old_libs=$enable_static
+
+# Whether or not to optimize for fast installation.
+fast_install=$enable_fast_install
+
+# The host system.
+host_alias=$host_alias
+host=$host
+
+# An echo program that does not interpret backslashes.
+echo=$ltecho
+
+# The archiver.
+AR=$AR
+
+# The default C compiler.
+CC=$CC
+
+# The linker used to build libraries.
+LD=$LD
+
+# Whether we need hard or soft links.
+LN_S=$LN_S
+
+# A BSD-compatible nm program.
+NM=$NM
+
+# A symbol stripping program
+STRIP=$STRIP
+
+# Used to examine libraries when file_magic_cmd begins "file"
+MAGIC=$MAGIC
+
+# Used on cygwin: DLL creation program.
+DLLTOOL="$DLLTOOL"
+
+# Used on cygwin: object dumper.
+OBJDUMP="$OBJDUMP"
+
+# Used on cygwin: assembler.
+AS="$AS"
+
+# The name of the directory that contains temporary libtool files.
+objdir=$objdir
+
+# How to create reloadable object files.
+reload_flag=$reload_flag
+reload_cmds=$reload_cmds
+
+# How to pass a linker flag through the compiler.
+wl=$wl
+
+# Object file suffix (normally "o").
+objext="$objext"
+
+# Old archive suffix (normally "a").
+libext="$libext"
+
+# Executable file suffix (normally "").
+exeext="$exeext"
+
+# Additional compiler flags for building library objects.
+pic_flag=$pic_flag
+pic_mode=$pic_mode
+
+# Does compiler simultaneously support -c and -o options?
+compiler_c_o=$compiler_c_o
+
+# Can we write directly to a .lo ?
+compiler_o_lo=$compiler_o_lo
+
+# Must we lock files when doing compilation ?
+need_locks=$need_locks
+
+# Do we need the lib prefix for modules?
+need_lib_prefix=$need_lib_prefix
+
+# Do we need a version for libraries?
+need_version=$need_version
+
+# Whether dlopen is supported.
+dlopen_support=$enable_dlopen
+
+# Whether dlopen of programs is supported.
+dlopen_self=$enable_dlopen_self
+
+# Whether dlopen of statically linked programs is supported.
+dlopen_self_static=$enable_dlopen_self_static
+
+# Compiler flag to prevent dynamic linking.
+link_static_flag=$link_static_flag
+
+# Compiler flag to turn off builtin functions.
+no_builtin_flag=$no_builtin_flag
+
+# Compiler flag to allow reflexive dlopens.
+export_dynamic_flag_spec=$export_dynamic_flag_spec
+
+# Compiler flag to generate shared objects directly from archives.
+whole_archive_flag_spec=$whole_archive_flag_spec
+
+# Compiler flag to generate thread-safe objects.
+thread_safe_flag_spec=$thread_safe_flag_spec
+
+# Library versioning type.
+version_type=$version_type
+
+# Format of library name prefix.
+libname_spec=$libname_spec
+
+# List of archive names. First name is the real one, the rest are links.
+# The last name is the one that the linker finds with -lNAME.
+library_names_spec=$library_names_spec
+
+# The coded name of the library, if different from the real name.
+soname_spec=$soname_spec
+
+# Commands used to build and install an old-style archive.
+RANLIB=$RANLIB
+old_archive_cmds=$old_archive_cmds
+old_postinstall_cmds=$old_postinstall_cmds
+old_postuninstall_cmds=$old_postuninstall_cmds
+
+# Create an old-style archive from a shared archive.
+old_archive_from_new_cmds=$old_archive_from_new_cmds
+
+# Create a temporary old-style archive to link instead of a shared archive.
+old_archive_from_expsyms_cmds=$old_archive_from_expsyms_cmds
+
+# Commands used to build and install a shared archive.
+archive_cmds=$archive_cmds
+archive_expsym_cmds=$archive_expsym_cmds
+postinstall_cmds=$postinstall_cmds
+postuninstall_cmds=$postuninstall_cmds
+
+# Commands to strip libraries.
+old_striplib=$old_striplib
+striplib=$striplib
+
+# Method to check whether dependent libraries are shared objects.
+deplibs_check_method=$deplibs_check_method
+
+# Command to use when deplibs_check_method == file_magic.
+file_magic_cmd=$file_magic_cmd
+
+# Flag that allows shared libraries with undefined symbols to be built.
+allow_undefined_flag=$allow_undefined_flag
+
+# Flag that forces no undefined symbols.
+no_undefined_flag=$no_undefined_flag
+
+# Commands used to finish a libtool library installation in a directory.
+finish_cmds=$finish_cmds
+
+# Same as above, but a single script fragment to be evaled but not shown.
+finish_eval=$finish_eval
+
+# Take the output of nm and produce a listing of raw symbols and C names.
+global_symbol_pipe=$global_symbol_pipe
+
+# Transform the output of nm in a proper C declaration
+global_symbol_to_cdecl=$global_symbol_to_cdecl
+
+# This is the shared library runtime path variable.
+runpath_var=$runpath_var
+
+# This is the shared library path variable.
+shlibpath_var=$shlibpath_var
+
+# Is shlibpath searched before the hard-coded library search path?
+shlibpath_overrides_runpath=$shlibpath_overrides_runpath
+
+# How to hardcode a shared library path into an executable.
+hardcode_action=$hardcode_action
+
+# Whether we should hardcode library paths into libraries.
+hardcode_into_libs=$hardcode_into_libs
+
+# Flag to hardcode \$libdir into a binary during linking.
+# This must work even if \$libdir does not exist.
+hardcode_libdir_flag_spec=$hardcode_libdir_flag_spec
+
+# Whether we need a single -rpath flag with a separated argument.
+hardcode_libdir_separator=$hardcode_libdir_separator
+
+# Set to yes if using DIR/libNAME.so during linking hardcodes DIR into the
+# resulting binary.
+hardcode_direct=$hardcode_direct
+
+# Set to yes if using the -LDIR flag during linking hardcodes DIR into the
+# resulting binary.
+hardcode_minus_L=$hardcode_minus_L
+
+# Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into
+# the resulting binary.
+hardcode_shlibpath_var=$hardcode_shlibpath_var
+
+# Whether libtool must link a program against all its dependency libraries.
+link_all_deplibs=$link_all_deplibs
+
+# Compile-time system search path for libraries
+sys_lib_search_path_spec=$sys_lib_search_path_spec
+
+# Run-time system search path for libraries
+sys_lib_dlsearch_path_spec=$sys_lib_dlsearch_path_spec
+
+# Fix the shell variable \$srcfile for the compiler.
+fix_srcfile_path="$fix_srcfile_path"
+
+# Set to yes if exported symbols are required.
+always_export_symbols=$always_export_symbols
+
+# The commands to list exported symbols.
+export_symbols_cmds=$export_symbols_cmds
+
+# The commands to extract the exported symbol list from a shared archive.
+extract_expsyms_cmds=$extract_expsyms_cmds
+
+# Symbols that should not be listed in the preloaded symbols.
+exclude_expsyms=$exclude_expsyms
+
+# Symbols that must always be exported.
+include_expsyms=$include_expsyms
+
+EOF
+
+case "$ltmain" in
+*.sh)
+ echo '### END LIBTOOL CONFIG' >> "$ofile"
+ echo >> "$ofile"
+ case "$host_os" in
+ aix3*)
+ cat <<\EOF >> "$ofile"
+
+# AIX sometimes has problems with the GCC collect2 program. For some
+# reason, if we set the COLLECT_NAMES environment variable, the problems
+# vanish in a puff of smoke.
+if test "X${COLLECT_NAMES+set}" != Xset; then
+ COLLECT_NAMES=
+ export COLLECT_NAMES
+fi
+EOF
+ ;;
+ esac
+ case "$host" in
+ *-*-cygwin* | *-*-mingw* | *-*-os2*)
+ cat <<'EOF' >> "$ofile"
+ # This is a source program that is used to create dlls on Windows
+ # Don't remove nor modify the starting and closing comments
+# /* ltdll.c starts here */
+# #define WIN32_LEAN_AND_MEAN
+# #include <windows.h>
+# #undef WIN32_LEAN_AND_MEAN
+# #include <stdio.h>
+#
+# #ifndef __CYGWIN__
+# # ifdef __CYGWIN32__
+# # define __CYGWIN__ __CYGWIN32__
+# # endif
+# #endif
+#
+# #ifdef __cplusplus
+# extern "C" {
+# #endif
+# BOOL APIENTRY DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved);
+# #ifdef __cplusplus
+# }
+# #endif
+#
+# #ifdef __CYGWIN__
+# #include <cygwin/cygwin_dll.h>
+# DECLARE_CYGWIN_DLL( DllMain );
+# #endif
+# HINSTANCE __hDllInstance_base;
+#
+# BOOL APIENTRY
+# DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved)
+# {
+# __hDllInstance_base = hInst;
+# return TRUE;
+# }
+# /* ltdll.c ends here */
+ # This is a source program that is used to create import libraries
+ # on Windows for dlls which lack them. Don't remove nor modify the
+ # starting and closing comments
+# /* impgen.c starts here */
+# /* Copyright (C) 1999-2000 Free Software Foundation, Inc.
+#
+# This file is part of GNU libtool.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# */
+#
+# #include <stdio.h> /* for printf() */
+# #include <unistd.h> /* for open(), lseek(), read() */
+# #include <fcntl.h> /* for O_RDONLY, O_BINARY */
+# #include <string.h> /* for strdup() */
+#
+# /* O_BINARY isn't required (or even defined sometimes) under Unix */
+# #ifndef O_BINARY
+# #define O_BINARY 0
+# #endif
+#
+# static unsigned int
+# pe_get16 (fd, offset)
+# int fd;
+# int offset;
+# {
+# unsigned char b[2];
+# lseek (fd, offset, SEEK_SET);
+# read (fd, b, 2);
+# return b[0] + (b[1]<<8);
+# }
+#
+# static unsigned int
+# pe_get32 (fd, offset)
+# int fd;
+# int offset;
+# {
+# unsigned char b[4];
+# lseek (fd, offset, SEEK_SET);
+# read (fd, b, 4);
+# return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24);
+# }
+#
+# static unsigned int
+# pe_as32 (ptr)
+# void *ptr;
+# {
+# unsigned char *b = ptr;
+# return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24);
+# }
+#
+# int
+# main (argc, argv)
+# int argc;
+# char *argv[];
+# {
+# int dll;
+# unsigned long pe_header_offset, opthdr_ofs, num_entries, i;
+# unsigned long export_rva, export_size, nsections, secptr, expptr;
+# unsigned long name_rvas, nexp;
+# unsigned char *expdata, *erva;
+# char *filename, *dll_name;
+#
+# filename = argv[1];
+#
+# dll = open(filename, O_RDONLY|O_BINARY);
+# if (!dll)
+# return 1;
+#
+# dll_name = filename;
+#
+# for (i=0; filename[i]; i++)
+# if (filename[i] == '/' || filename[i] == '\\' || filename[i] == ':')
+# dll_name = filename + i +1;
+#
+# pe_header_offset = pe_get32 (dll, 0x3c);
+# opthdr_ofs = pe_header_offset + 4 + 20;
+# num_entries = pe_get32 (dll, opthdr_ofs + 92);
+#
+# if (num_entries < 1) /* no exports */
+# return 1;
+#
+# export_rva = pe_get32 (dll, opthdr_ofs + 96);
+# export_size = pe_get32 (dll, opthdr_ofs + 100);
+# nsections = pe_get16 (dll, pe_header_offset + 4 +2);
+# secptr = (pe_header_offset + 4 + 20 +
+# pe_get16 (dll, pe_header_offset + 4 + 16));
+#
+# expptr = 0;
+# for (i = 0; i < nsections; i++)
+# {
+# char sname[8];
+# unsigned long secptr1 = secptr + 40 * i;
+# unsigned long vaddr = pe_get32 (dll, secptr1 + 12);
+# unsigned long vsize = pe_get32 (dll, secptr1 + 16);
+# unsigned long fptr = pe_get32 (dll, secptr1 + 20);
+# lseek(dll, secptr1, SEEK_SET);
+# read(dll, sname, 8);
+# if (vaddr <= export_rva && vaddr+vsize > export_rva)
+# {
+# expptr = fptr + (export_rva - vaddr);
+# if (export_rva + export_size > vaddr + vsize)
+# export_size = vsize - (export_rva - vaddr);
+# break;
+# }
+# }
+#
+# expdata = (unsigned char*)malloc(export_size);
+# lseek (dll, expptr, SEEK_SET);
+# read (dll, expdata, export_size);
+# erva = expdata - export_rva;
+#
+# nexp = pe_as32 (expdata+24);
+# name_rvas = pe_as32 (expdata+32);
+#
+# printf ("EXPORTS\n");
+# for (i = 0; i<nexp; i++)
+# {
+# unsigned long name_rva = pe_as32 (erva+name_rvas+i*4);
+# printf ("\t%s @ %ld ;\n", erva+name_rva, 1+ i);
+# }
+#
+# return 0;
+# }
+# /* impgen.c ends here */
+
+EOF
+ ;;
+ esac
+
+
+ # Append the ltmain.sh script.
+ sed '$q' "$ltmain" >> "$ofile" || (rm -f "$ofile"; exit 1)
+ # We use sed instead of cat because bash on DJGPP gets confused if
+ # if finds mixed CR/LF and LF-only lines. Since sed operates in
+ # text mode, it properly converts lines to CR/LF. This bash problem
+ # is reportedly fixed, but why not run on old versions too?
+
+ chmod +x "$ofile"
+ ;;
+
+*)
+ # Compile the libtool program.
+ echo "FIXME: would compile $ltmain"
+ ;;
+esac
+
+test -n "$cache_file" || exit 0
+
+# AC_CACHE_SAVE
+trap '' 1 2 15
+cat > confcache <<\EOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs. It is not useful on other systems.
+# If it contains results you don't want to keep, you may remove or edit it.
+#
+# By default, configure uses ./config.cache as the cache file,
+# creating it if it does not exist already. You can give configure
+# the --cache-file=FILE option to use a different cache file; that is
+# what configure does when it calls configure scripts in
+# subdirectories, so they share the cache.
+# Giving --cache-file=/dev/null disables caching, for debugging configure.
+# config.status only pays attention to the cache file if you give it the
+# --recheck option to rerun configure.
+#
+EOF
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, don't put newlines in cache variables' values.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(set) 2>&1 |
+ case `(ac_space=' '; set | grep ac_space) 2>&1` in
+ *ac_space=\ *)
+ # `set' does not quote correctly, so add quotes (double-quote substitution
+ # turns \\\\ into \\, and sed turns \\ into \).
+ sed -n \
+ -e "s/'/'\\\\''/g" \
+ -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p"
+ ;;
+ *)
+ # `set' quotes correctly as required by POSIX, so do not add quotes.
+ sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p'
+ ;;
+ esac >> confcache
+if cmp -s $cache_file confcache; then
+ :
+else
+ if test -w $cache_file; then
+ echo "updating cache $cache_file"
+ cat confcache > $cache_file
+ else
+ echo "not updating unwritable cache $cache_file"
+ fi
+fi
+rm -f confcache
+
+exit 0
+
+# Local Variables:
+# mode:shell-script
+# sh-indentation:2
+# End:
diff --git a/rts/gmp/ltmain.sh b/rts/gmp/ltmain.sh
new file mode 100644
index 0000000000..d81d89f878
--- /dev/null
+++ b/rts/gmp/ltmain.sh
@@ -0,0 +1,4692 @@
+# ltmain.sh - Provide generalized library-building support services.
+# NOTE: Changing this file will not affect anything until you rerun ltconfig.
+#
+# Copyright (C) 1996-2000 Free Software Foundation, Inc.
+# Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Check that we have a working $echo.
+if test "X$1" = X--no-reexec; then
+ # Discard the --no-reexec flag, and continue.
+ shift
+elif test "X$1" = X--fallback-echo; then
+ # Avoid inline document here, it may be left over
+ :
+elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then
+ # Yippee, $echo works!
+ :
+else
+ # Restart under the correct shell, and then maybe $echo will work.
+ exec $SHELL "$0" --no-reexec ${1+"$@"}
+fi
+
+if test "X$1" = X--fallback-echo; then
+ # used as fallback echo
+ shift
+ cat <<EOF
+$*
+EOF
+ exit 0
+fi
+
+# The name of this program.
+progname=`$echo "$0" | sed 's%^.*/%%'`
+modename="$progname"
+
+# Constants.
+PROGRAM=ltmain.sh
+PACKAGE=libtool
+VERSION=1.3c
+TIMESTAMP=" (1.696 2000/03/14 20:22:42)"
+
+default_mode=
+help="Try \`$progname --help' for more information."
+magic="%%%MAGIC variable%%%"
+mkdir="mkdir"
+mv="mv -f"
+rm="rm -f"
+
+# Sed substitution that helps us do robust quoting. It backslashifies
+# metacharacters that are still active within double-quoted strings.
+Xsed='sed -e 1s/^X//'
+sed_quote_subst='s/\([\\`\\"$\\\\]\)/\\\1/g'
+SP2NL='tr \040 \012'
+NL2SP='tr \015\012 \040\040'
+
+# NLS nuisances.
+# Only set LANG and LC_ALL to C if already set.
+# These must not be set unconditionally because not all systems understand
+# e.g. LANG=C (notably SCO).
+# We save the old values to restore during execute mode.
+if test "${LC_ALL+set}" = set; then
+ save_LC_ALL="$LC_ALL"; LC_ALL=C; export LC_ALL
+fi
+if test "${LANG+set}" = set; then
+ save_LANG="$LANG"; LANG=C; export LANG
+fi
+
+if test "$LTCONFIG_VERSION" != "$VERSION"; then
+ echo "$modename: ltconfig version \`$LTCONFIG_VERSION' does not match $PROGRAM version \`$VERSION'" 1>&2
+ echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2
+ exit 1
+fi
+
+if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then
+ echo "$modename: not configured to build any kind of library" 1>&2
+ echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2
+ exit 1
+fi
+
+# Global variables.
+mode=$default_mode
+nonopt=
+prev=
+prevopt=
+run=
+show="$echo"
+show_help=
+execute_dlfiles=
+lo2o="s/\\.lo\$/.${objext}/"
+o2lo="s/\\.${objext}\$/.lo/"
+
+# Parse our command line options once, thoroughly.
+while test $# -gt 0
+do
+ arg="$1"
+ shift
+
+ case "$arg" in
+ -*=*) optarg=`$echo "X$arg" | $Xsed -e 's/[-_a-zA-Z0-9]*=//'` ;;
+ *) optarg= ;;
+ esac
+
+ # If the previous option needs an argument, assign it.
+ if test -n "$prev"; then
+ case "$prev" in
+ execute_dlfiles)
+ eval "$prev=\"\$$prev \$arg\""
+ ;;
+ *)
+ eval "$prev=\$arg"
+ ;;
+ esac
+
+ prev=
+ prevopt=
+ continue
+ fi
+
+ # Have we seen a non-optional argument yet?
+ case "$arg" in
+ --help)
+ show_help=yes
+ ;;
+
+ --version)
+ echo "$PROGRAM (GNU $PACKAGE) $VERSION$TIMESTAMP"
+ exit 0
+ ;;
+
+ --config)
+ sed -e '1,/^### BEGIN LIBTOOL CONFIG/d' -e '/^### END LIBTOOL CONFIG/,$d' $0
+ exit 0
+ ;;
+
+ --debug)
+ echo "$progname: enabling shell trace mode"
+ set -x
+ ;;
+
+ --dry-run | -n)
+ run=:
+ ;;
+
+ --features)
+ echo "host: $host"
+ if test "$build_libtool_libs" = yes; then
+ echo "enable shared libraries"
+ else
+ echo "disable shared libraries"
+ fi
+ if test "$build_old_libs" = yes; then
+ echo "enable static libraries"
+ else
+ echo "disable static libraries"
+ fi
+ exit 0
+ ;;
+
+ --finish) mode="finish" ;;
+
+ --mode) prevopt="--mode" prev=mode ;;
+ --mode=*) mode="$optarg" ;;
+
+ --quiet | --silent)
+ show=:
+ ;;
+
+ -dlopen)
+ prevopt="-dlopen"
+ prev=execute_dlfiles
+ ;;
+
+ -*)
+ $echo "$modename: unrecognized option \`$arg'" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ ;;
+
+ *)
+ nonopt="$arg"
+ break
+ ;;
+ esac
+done
+
+if test -n "$prevopt"; then
+ $echo "$modename: option \`$prevopt' requires an argument" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+fi
+
+if test -z "$show_help"; then
+
+ # Infer the operation mode.
+ if test -z "$mode"; then
+ case "$nonopt" in
+ *cc | *++ | gcc* | *-gcc*)
+ mode=link
+ for arg
+ do
+ case "$arg" in
+ -c)
+ mode=compile
+ break
+ ;;
+ esac
+ done
+ ;;
+ *db | *dbx | *strace | *truss)
+ mode=execute
+ ;;
+ *install*|cp|mv)
+ mode=install
+ ;;
+ *rm)
+ mode=uninstall
+ ;;
+ *)
+ # If we have no mode, but dlfiles were specified, then do execute mode.
+ test -n "$execute_dlfiles" && mode=execute
+
+ # Just use the default operation mode.
+ if test -z "$mode"; then
+ if test -n "$nonopt"; then
+ $echo "$modename: warning: cannot infer operation mode from \`$nonopt'" 1>&2
+ else
+ $echo "$modename: warning: cannot infer operation mode without MODE-ARGS" 1>&2
+ fi
+ fi
+ ;;
+ esac
+ fi
+
+ # Only execute mode is allowed to have -dlopen flags.
+ if test -n "$execute_dlfiles" && test "$mode" != execute; then
+ $echo "$modename: unrecognized option \`-dlopen'" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ # Change the help message to a mode-specific one.
+ generic_help="$help"
+ help="Try \`$modename --help --mode=$mode' for more information."
+
+ # These modes are in order of execution frequency so that they run quickly.
+ case "$mode" in
+ # libtool compile mode
+ compile)
+ modename="$modename: compile"
+ # Get the compilation command and the source file.
+ base_compile=
+ prev=
+ lastarg=
+ srcfile="$nonopt"
+ suppress_output=
+
+ user_target=no
+ for arg
+ do
+ case "$prev" in
+ "") ;;
+ xcompiler)
+ # Aesthetically quote the previous argument.
+ prev=
+ lastarg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+
+ case "$arg" in
+ # Double-quote args containing other shell metacharacters.
+ # Many Bourne shells cannot handle close brackets correctly
+ # in scan sets, so we specify it separately.
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
+ arg="\"$arg\""
+ ;;
+ esac
+
+ # Add the previous argument to base_compile.
+ if test -z "$base_compile"; then
+ base_compile="$lastarg"
+ else
+ base_compile="$base_compile $lastarg"
+ fi
+ continue
+ ;;
+ esac
+
+ # Accept any command-line options.
+ case "$arg" in
+ -o)
+ if test "$user_target" != "no"; then
+ $echo "$modename: you cannot specify \`-o' more than once" 1>&2
+ exit 1
+ fi
+ user_target=next
+ ;;
+
+ -static)
+ build_old_libs=yes
+ continue
+ ;;
+
+ -Xcompiler)
+ prev=xcompiler
+ continue
+ ;;
+
+ -Wc,*)
+ args=`$echo "X$arg" | $Xsed -e "s/^-Wc,//"`
+ lastarg=
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS=','
+ for arg in $args; do
+ IFS="$save_ifs"
+
+ # Double-quote args containing other shell metacharacters.
+ # Many Bourne shells cannot handle close brackets correctly
+ # in scan sets, so we specify it separately.
+ case "$arg" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
+ arg="\"$arg\""
+ ;;
+ esac
+ lastarg="$lastarg $arg"
+ done
+ IFS="$save_ifs"
+ lastarg=`$echo "X$lastarg" | $Xsed -e "s/^ //"`
+
+ # Add the arguments to base_compile.
+ if test -z "$base_compile"; then
+ base_compile="$lastarg"
+ else
+ base_compile="$base_compile $lastarg"
+ fi
+ continue
+ ;;
+ esac
+
+ case "$user_target" in
+ next)
+ # The next one is the -o target name
+ user_target=yes
+ continue
+ ;;
+ yes)
+ # We got the output file
+ user_target=set
+ libobj="$arg"
+ continue
+ ;;
+ esac
+
+ # Accept the current argument as the source file.
+ lastarg="$srcfile"
+ srcfile="$arg"
+
+ # Aesthetically quote the previous argument.
+
+ # Backslashify any backslashes, double quotes, and dollar signs.
+ # These are the only characters that are still specially
+ # interpreted inside of double-quoted scrings.
+ lastarg=`$echo "X$lastarg" | $Xsed -e "$sed_quote_subst"`
+
+ # Double-quote args containing other shell metacharacters.
+ # Many Bourne shells cannot handle close brackets correctly
+ # in scan sets, so we specify it separately.
+ case "$lastarg" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
+ lastarg="\"$lastarg\""
+ ;;
+ esac
+
+ # Add the previous argument to base_compile.
+ if test -z "$base_compile"; then
+ base_compile="$lastarg"
+ else
+ base_compile="$base_compile $lastarg"
+ fi
+ done
+
+ case "$user_target" in
+ set)
+ ;;
+ no)
+ # Get the name of the library object.
+ libobj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%'`
+ ;;
+ *)
+ $echo "$modename: you must specify a target with \`-o'" 1>&2
+ exit 1
+ ;;
+ esac
+
+ # Recognize several different file suffixes.
+ # If the user specifies -o file.o, it is replaced with file.lo
+ xform='[cCFSfmso]'
+ case "$libobj" in
+ *.ada) xform=ada ;;
+ *.adb) xform=adb ;;
+ *.ads) xform=ads ;;
+ *.asm) xform=asm ;;
+ *.c++) xform=c++ ;;
+ *.cc) xform=cc ;;
+ *.cpp) xform=cpp ;;
+ *.cxx) xform=cxx ;;
+ *.f90) xform=f90 ;;
+ *.for) xform=for ;;
+ esac
+
+ libobj=`$echo "X$libobj" | $Xsed -e "s/\.$xform$/.lo/"`
+
+ case "$libobj" in
+ *.lo) obj=`$echo "X$libobj" | $Xsed -e "$lo2o"` ;;
+ *)
+ $echo "$modename: cannot determine name of library object from \`$libobj'" 1>&2
+ exit 1
+ ;;
+ esac
+
+ if test -z "$base_compile"; then
+ $echo "$modename: you must specify a compilation command" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ # Delete any leftover library objects.
+ if test "$build_old_libs" = yes; then
+ removelist="$obj $libobj"
+ else
+ removelist="$libobj"
+ fi
+
+ $run $rm $removelist
+ trap "$run $rm $removelist; exit 1" 1 2 15
+
+ # Calculate the filename of the output object if compiler does
+ # not support -o with -c
+ if test "$compiler_c_o" = no; then
+ output_obj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%' -e 's%\..*$%%'`.${objext}
+ lockfile="$output_obj.lock"
+ removelist="$removelist $output_obj $lockfile"
+ trap "$run $rm $removelist; exit 1" 1 2 15
+ else
+ need_locks=no
+ lockfile=
+ fi
+
+ # Lock this critical section if it is needed
+ # We use this script file to make the link, it avoids creating a new file
+ if test "$need_locks" = yes; then
+ until ln "$0" "$lockfile" 2>/dev/null; do
+ $show "Waiting for $lockfile to be removed"
+ sleep 2
+ done
+ elif test "$need_locks" = warn; then
+ if test -f "$lockfile"; then
+ echo "\
+*** ERROR, $lockfile exists and contains:
+`cat $lockfile 2>/dev/null`
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together. If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+ $run $rm $removelist
+ exit 1
+ fi
+ echo $srcfile > "$lockfile"
+ fi
+
+ if test -n "$fix_srcfile_path"; then
+ eval srcfile=\"$fix_srcfile_path\"
+ fi
+
+ # Only build a PIC object if we are building libtool libraries.
+ if test "$build_libtool_libs" = yes; then
+ # Without this assignment, base_compile gets emptied.
+ fbsd_hideous_sh_bug=$base_compile
+
+ if test "$pic_mode" != no; then
+ # All platforms use -DPIC, to notify preprocessed assembler code.
+ command="$base_compile $srcfile $pic_flag -DPIC"
+ else
+ # Don't build PIC code
+ command="$base_compile $srcfile"
+ fi
+ if test "$build_old_libs" = yes; then
+ lo_libobj="$libobj"
+ dir=`$echo "X$libobj" | $Xsed -e 's%/[^/]*$%%'`
+ if test "X$dir" = "X$libobj"; then
+ dir="$objdir"
+ else
+ dir="$dir/$objdir"
+ fi
+ libobj="$dir/"`$echo "X$libobj" | $Xsed -e 's%^.*/%%'`
+
+ if test -d "$dir"; then
+ $show "$rm $libobj"
+ $run $rm $libobj
+ else
+ $show "$mkdir $dir"
+ $run $mkdir $dir
+ status=$?
+ if test $status -ne 0 && test ! -d $dir; then
+ exit $status
+ fi
+ fi
+ fi
+ if test "$compiler_o_lo" = yes; then
+ output_obj="$libobj"
+ command="$command -o $output_obj"
+ elif test "$compiler_c_o" = yes; then
+ output_obj="$obj"
+ command="$command -o $output_obj"
+ fi
+
+ $run $rm "$output_obj"
+ $show "$command"
+ if $run eval "$command"; then :
+ else
+ test -n "$output_obj" && $run $rm $removelist
+ exit 1
+ fi
+
+ if test "$need_locks" = warn &&
+ test x"`cat $lockfile 2>/dev/null`" != x"$srcfile"; then
+ echo "\
+*** ERROR, $lockfile contains:
+`cat $lockfile 2>/dev/null`
+
+but it should contain:
+$srcfile
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together. If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+ $run $rm $removelist
+ exit 1
+ fi
+
+ # Just move the object if needed, then go on to compile the next one
+ if test x"$output_obj" != x"$libobj"; then
+ $show "$mv $output_obj $libobj"
+ if $run $mv $output_obj $libobj; then :
+ else
+ error=$?
+ $run $rm $removelist
+ exit $error
+ fi
+ fi
+
+ # If we have no pic_flag, then copy the object into place and finish.
+ if (test -z "$pic_flag" || test "$pic_mode" != default) &&
+ test "$build_old_libs" = yes; then
+ # Rename the .lo from within objdir to obj
+ if test -f $obj; then
+ $show $rm $obj
+ $run $rm $obj
+ fi
+
+ $show "$mv $libobj $obj"
+ if $run $mv $libobj $obj; then :
+ else
+ error=$?
+ $run $rm $removelist
+ exit $error
+ fi
+
+ xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'`
+ if test "X$xdir" = "X$obj"; then
+ xdir="."
+ else
+ xdir="$xdir"
+ fi
+ baseobj=`$echo "X$obj" | $Xsed -e "s%.*/%%"`
+ libobj=`$echo "X$baseobj" | $Xsed -e "$o2lo"`
+ # Now arrange that obj and lo_libobj become the same file
+ $show "(cd $xdir && $LN_S $baseobj $libobj)"
+ if $run eval '(cd $xdir && $LN_S $baseobj $libobj)'; then
+ exit 0
+ else
+ error=$?
+ $run $rm $removelist
+ exit $error
+ fi
+ fi
+
+ # Allow error messages only from the first compilation.
+ suppress_output=' >/dev/null 2>&1'
+ fi
+
+ # Only build a position-dependent object if we build old libraries.
+ if test "$build_old_libs" = yes; then
+ if test "$pic_mode" != yes; then
+ # Don't build PIC code
+ command="$base_compile $srcfile"
+ else
+ # All platforms use -DPIC, to notify preprocessed assembler code.
+ command="$base_compile $srcfile $pic_flag -DPIC"
+ fi
+ if test "$compiler_c_o" = yes; then
+ command="$command -o $obj"
+ output_obj="$obj"
+ fi
+
+ # Suppress compiler output if we already did a PIC compilation.
+ command="$command$suppress_output"
+ $run $rm "$output_obj"
+ $show "$command"
+ if $run eval "$command"; then :
+ else
+ $run $rm $removelist
+ exit 1
+ fi
+
+ if test "$need_locks" = warn &&
+ test x"`cat $lockfile 2>/dev/null`" != x"$srcfile"; then
+ echo "\
+*** ERROR, $lockfile contains:
+`cat $lockfile 2>/dev/null`
+
+but it should contain:
+$srcfile
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together. If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+ $run $rm $removelist
+ exit 1
+ fi
+
+ # Just move the object if needed
+ if test x"$output_obj" != x"$obj"; then
+ $show "$mv $output_obj $obj"
+ if $run $mv $output_obj $obj; then :
+ else
+ error=$?
+ $run $rm $removelist
+ exit $error
+ fi
+ fi
+
+ # Create an invalid libtool object if no PIC, so that we do not
+ # accidentally link it into a program.
+ if test "$build_libtool_libs" != yes; then
+ $show "echo timestamp > $libobj"
+ $run eval "echo timestamp > \$libobj" || exit $?
+ else
+ # Move the .lo from within objdir
+ $show "$mv $libobj $lo_libobj"
+ if $run $mv $libobj $lo_libobj; then :
+ else
+ error=$?
+ $run $rm $removelist
+ exit $error
+ fi
+ fi
+ fi
+
+ # Unlock the critical section if it was locked
+ if test "$need_locks" != no; then
+ $rm "$lockfile"
+ fi
+
+ exit 0
+ ;;
+
+ # libtool link mode
+ link | relink)
+ modename="$modename: link"
+ case "$host" in
+ *-*-cygwin* | *-*-mingw* | *-*-os2*)
+ # It is impossible to link a dll without this setting, and
+ # we shouldn't force the makefile maintainer to figure out
+ # which system we are compiling for in order to pass an extra
+ # flag for every libtool invokation.
+ # allow_undefined=no
+
+ # FIXME: Unfortunately, there are problems with the above when trying
+ # to make a dll which has undefined symbols, in which case not
+ # even a static library is built. For now, we need to specify
+ # -no-undefined on the libtool link line when we can be certain
+ # that all symbols are satisfied, otherwise we get a static library.
+ allow_undefined=yes
+ ;;
+ *)
+ allow_undefined=yes
+ ;;
+ esac
+ libtool_args="$nonopt"
+ compile_command="$nonopt"
+ finalize_command="$nonopt"
+
+ compile_rpath=
+ finalize_rpath=
+ compile_shlibpath=
+ finalize_shlibpath=
+ convenience=
+ old_convenience=
+ deplibs=
+ old_deplibs=
+ compiler_flags=
+ linker_flags=
+ dllsearchpath=
+ lib_search_path=`pwd`
+
+ avoid_version=no
+ dlfiles=
+ dlprefiles=
+ dlself=no
+ export_dynamic=no
+ export_symbols=
+ export_symbols_regex=
+ generated=
+ libobjs=
+ ltlibs=
+ module=no
+ no_install=no
+ objs=
+ prefer_static_libs=no
+ preload=no
+ prev=
+ prevarg=
+ release=
+ rpath=
+ xrpath=
+ perm_rpath=
+ temp_rpath=
+ thread_safe=no
+ vinfo=
+
+ # We need to know -static, to get the right output filenames.
+ for arg
+ do
+ case "$arg" in
+ -all-static | -static)
+ if test "X$arg" = "X-all-static"; then
+ if test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then
+ $echo "$modename: warning: complete static linking is impossible in this configuration" 1>&2
+ fi
+ if test -n "$link_static_flag"; then
+ dlopen_self=$dlopen_self_static
+ fi
+ else
+ if test -z "$pic_flag" && test -n "$link_static_flag"; then
+ dlopen_self=$dlopen_self_static
+ fi
+ fi
+ build_libtool_libs=no
+ build_old_libs=yes
+ prefer_static_libs=yes
+ break
+ ;;
+ esac
+ done
+
+ # See if our shared archives depend on static archives.
+ test -n "$old_archive_from_new_cmds" && build_old_libs=yes
+
+ # Go through the arguments, transforming them on the way.
+ while test $# -gt 0; do
+ arg="$1"
+ shift
+ case "$arg" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
+ qarg=\"`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`\" ### testsuite: skip nested quoting test
+ ;;
+ *) qarg=$arg ;;
+ esac
+ libtool_args="$libtool_args $qarg"
+
+ # If the previous option needs an argument, assign it.
+ if test -n "$prev"; then
+ case "$prev" in
+ output)
+ compile_command="$compile_command @OUTPUT@"
+ finalize_command="$finalize_command @OUTPUT@"
+ ;;
+ esac
+
+ case "$prev" in
+ dlfiles|dlprefiles)
+ if test "$preload" = no; then
+ # Add the symbol object into the linking commands.
+ compile_command="$compile_command @SYMFILE@"
+ finalize_command="$finalize_command @SYMFILE@"
+ preload=yes
+ fi
+ case "$arg" in
+ *.la | *.lo) ;; # We handle these cases below.
+ force)
+ if test "$dlself" = no; then
+ dlself=needless
+ export_dynamic=yes
+ fi
+ prev=
+ continue
+ ;;
+ self)
+ if test "$prev" = dlprefiles; then
+ dlself=yes
+ elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then
+ dlself=yes
+ else
+ dlself=needless
+ export_dynamic=yes
+ fi
+ prev=
+ continue
+ ;;
+ *)
+ if test "$prev" = dlfiles; then
+ dlfiles="$dlfiles $arg"
+ else
+ dlprefiles="$dlprefiles $arg"
+ fi
+ prev=
+ continue
+ ;;
+ esac
+ ;;
+ expsyms)
+ export_symbols="$arg"
+ if test ! -f "$arg"; then
+ $echo "$modename: symbol file \`$arg' does not exist"
+ exit 1
+ fi
+ prev=
+ continue
+ ;;
+ expsyms_regex)
+ export_symbols_regex="$arg"
+ prev=
+ continue
+ ;;
+ release)
+ release="-$arg"
+ prev=
+ continue
+ ;;
+ rpath | xrpath)
+ # We need an absolute path.
+ case "$arg" in
+ [\\/]* | [A-Za-z]:[\\/]*) ;;
+ *)
+ $echo "$modename: only absolute run-paths are allowed" 1>&2
+ exit 1
+ ;;
+ esac
+ if test "$prev" = rpath; then
+ case "$rpath " in
+ *" $arg "*) ;;
+ *) rpath="$rpath $arg" ;;
+ esac
+ else
+ case "$xrpath " in
+ *" $arg "*) ;;
+ *) xrpath="$xrpath $arg" ;;
+ esac
+ fi
+ prev=
+ continue
+ ;;
+ xcompiler)
+ compiler_flags="$compiler_flags $qarg"
+ prev=
+ compile_command="$compile_command $qarg"
+ finalize_command="$finalize_command $qarg"
+ continue
+ ;;
+ xlinker)
+ linker_flags="$linker_flags $qarg"
+ compiler_flags="$compiler_flags $wl$qarg"
+ prev=
+ compile_command="$compile_command $wl$qarg"
+ finalize_command="$finalize_command $wl$qarg"
+ continue
+ ;;
+ *)
+ eval "$prev=\"\$arg\""
+ prev=
+ continue
+ ;;
+ esac
+ fi
+
+ prevarg="$arg"
+
+ case "$arg" in
+ -all-static)
+ if test -n "$link_static_flag"; then
+ compile_command="$compile_command $link_static_flag"
+ finalize_command="$finalize_command $link_static_flag"
+ fi
+ continue
+ ;;
+
+ -allow-undefined)
+ # FIXME: remove this flag sometime in the future.
+ $echo "$modename: \`-allow-undefined' is deprecated because it is the default" 1>&2
+ continue
+ ;;
+
+ -avoid-version)
+ avoid_version=yes
+ continue
+ ;;
+
+ -dlopen)
+ prev=dlfiles
+ continue
+ ;;
+
+ -dlpreopen)
+ prev=dlprefiles
+ continue
+ ;;
+
+ -export-dynamic)
+ export_dynamic=yes
+ continue
+ ;;
+
+ -export-symbols | -export-symbols-regex)
+ if test -n "$export_symbols" || test -n "$export_symbols_regex"; then
+ $echo "$modename: not more than one -exported-symbols argument allowed"
+ exit 1
+ fi
+ if test "X$arg" = "X-export-symbols"; then
+ prev=expsyms
+ else
+ prev=expsyms_regex
+ fi
+ continue
+ ;;
+
+ -L*)
+ dir=`$echo "X$arg" | $Xsed -e 's/^-L//'`
+ # We need an absolute path.
+ case "$dir" in
+ [\\/]* | [A-Za-z]:[\\/]*) ;;
+ *)
+ absdir=`cd "$dir" && pwd`
+ if test -z "$absdir"; then
+ $echo "$modename: cannot determine absolute directory name of \`$dir'" 1>&2
+ exit 1
+ fi
+ dir="$absdir"
+ ;;
+ esac
+ case "$deplibs " in
+ *" -L$dir "*) ;;
+ *)
+ deplibs="$deplibs -L$dir"
+ lib_search_path="$lib_search_path $dir"
+ ;;
+ esac
+ case "$host" in
+ *-*-cygwin* | *-*-mingw* | *-*-os2*)
+ case ":$dllsearchpath:" in
+ *":$dir:"*) ;;
+ *) dllsearchpath="$dllsearchpath:$dir";;
+ esac
+ ;;
+ esac
+ continue
+ ;;
+
+ -l*)
+ if test "$arg" = "-lc"; then
+ case "$host" in
+ *-*-cygwin* | *-*-mingw* | *-*-os2* | *-*-beos*)
+ # These systems don't actually have c library (as such)
+ continue
+ ;;
+ esac
+ elif test "$arg" = "-lm"; then
+ case "$host" in
+ *-*-cygwin* | *-*-beos*)
+ # These systems don't actually have math library (as such)
+ continue
+ ;;
+ esac
+ fi
+ deplibs="$deplibs $arg"
+ continue
+ ;;
+
+ -module)
+ module=yes
+ continue
+ ;;
+
+ -no-fast-install)
+ fast_install=no
+ continue
+ ;;
+
+ -no-install)
+ case "$host" in
+ *-*-cygwin* | *-*-mingw* | *-*-os2*)
+ # The PATH hackery in wrapper scripts is required on Windows
+ # in order for the loader to find any dlls it needs.
+ $echo "$modename: warning: \`-no-install' is ignored for $host" 1>&2
+ $echo "$modename: warning: assuming \`-no-fast-install' instead" 1>&2
+ fast_install=no
+ ;;
+ *)
+ no_install=yes
+ ;;
+ esac
+ continue
+ ;;
+
+ -no-undefined)
+ allow_undefined=no
+ continue
+ ;;
+
+ -o) prev=output ;;
+
+ -release)
+ prev=release
+ continue
+ ;;
+
+ -rpath)
+ prev=rpath
+ continue
+ ;;
+
+ -R)
+ prev=xrpath
+ continue
+ ;;
+
+ -R*)
+ dir=`$echo "X$arg" | $Xsed -e 's/^-R//'`
+ # We need an absolute path.
+ case "$dir" in
+ [\\/]* | [A-Za-z]:[\\/]*) ;;
+ *)
+ $echo "$modename: only absolute run-paths are allowed" 1>&2
+ exit 1
+ ;;
+ esac
+ case "$xrpath " in
+ *" $dir "*) ;;
+ *) xrpath="$xrpath $dir" ;;
+ esac
+ continue
+ ;;
+
+ -static)
+ # If we have no pic_flag, then this is the same as -all-static.
+ if test -z "$pic_flag" && test -n "$link_static_flag"; then
+ compile_command="$compile_command $link_static_flag"
+ finalize_command="$finalize_command $link_static_flag"
+ fi
+ continue
+ ;;
+
+ -thread-safe)
+ thread_safe=yes
+ continue
+ ;;
+
+ -version-info)
+ prev=vinfo
+ continue
+ ;;
+
+ -Wc,*)
+ args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wc,//'`
+ arg=
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS=','
+ for flag in $args; do
+ IFS="$save_ifs"
+ case "$flag" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
+ flag="\"$flag\""
+ ;;
+ esac
+ arg="$arg $wl$flag"
+ compiler_flags="$compiler_flags $flag"
+ done
+ IFS="$save_ifs"
+ arg=`$echo "X$arg" | $Xsed -e "s/^ //"`
+ ;;
+
+ -Wl,*)
+ args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wl,//'`
+ arg=
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS=','
+ for flag in $args; do
+ IFS="$save_ifs"
+ case "$flag" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
+ flag="\"$flag\""
+ ;;
+ esac
+ arg="$arg $wl$flag"
+ compiler_flags="$compiler_flags $wl$flag"
+ linker_flags="$linker_flags $flag"
+ done
+ IFS="$save_ifs"
+ arg=`$echo "X$arg" | $Xsed -e "s/^ //"`
+ ;;
+
+ -Xcompiler)
+ prev=xcompiler
+ continue
+ ;;
+
+ -Xlinker)
+ prev=xlinker
+ continue
+ ;;
+
+ # Some other compiler flag.
+ -* | +*)
+ # Unknown arguments in both finalize_command and compile_command need
+ # to be aesthetically quoted because they are evaled later.
+ arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+ case "$arg" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
+ arg="\"$arg\""
+ ;;
+ esac
+ ;;
+
+ *.$objext)
+ # A standard object.
+ objs="$objs $arg"
+ ;;
+
+ *.lo)
+ # A library object.
+ if test "$prev" = dlfiles; then
+ # This file was specified with -dlopen.
+ if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then
+ dlfiles="$dlfiles $arg"
+ prev=
+ continue
+ else
+ # If libtool objects are unsupported, then we need to preload.
+ prev=dlprefiles
+ fi
+ fi
+
+ if test "$prev" = dlprefiles; then
+ # Preload the old-style object.
+ dlprefiles="$dlprefiles "`$echo "X$arg" | $Xsed -e "$lo2o"`
+ prev=
+ else
+ libobjs="$libobjs $arg"
+ fi
+ ;;
+
+ *.$libext)
+ # An archive.
+ deplibs="$deplibs $arg"
+ old_deplibs="$old_deplibs $arg"
+ continue
+ ;;
+
+ *.la)
+ # A libtool-controlled library.
+
+ if test "$prev" = dlfiles; then
+ # This library was specified with -dlopen.
+ dlfiles="$dlfiles $arg"
+ prev=
+ elif test "$prev" = dlprefiles; then
+ # The library was specified with -dlpreopen.
+ dlprefiles="$dlprefiles $arg"
+ prev=
+ else
+ deplibs="$deplibs $arg"
+ fi
+ continue
+ ;;
+
+ # Some other compiler argument.
+ *)
+ # Unknown arguments in both finalize_command and compile_command need
+ # to be aesthetically quoted because they are evaled later.
+ arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+ case "$arg" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"")
+ arg="\"$arg\""
+ ;;
+ esac
+ ;;
+ esac
+
+ # Now actually substitute the argument into the commands.
+ if test -n "$arg"; then
+ compile_command="$compile_command $arg"
+ finalize_command="$finalize_command $arg"
+ fi
+ done
+
+ if test -n "$prev"; then
+ $echo "$modename: the \`$prevarg' option requires an argument" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then
+ eval arg=\"$export_dynamic_flag_spec\"
+ compile_command="$compile_command $arg"
+ finalize_command="$finalize_command $arg"
+ fi
+
+ oldlibs=
+ # calculate the name of the file, without its directory
+ outputname=`$echo "X$output" | $Xsed -e 's%^.*/%%'`
+ libobjs_save="$libobjs"
+
+ if test -n "$shlibpath_var"; then
+ # get the directories listed in $shlibpath_var
+ eval shlib_search_path=\`\$echo \"X \${$shlibpath_var}\" \| \$Xsed -e \'s/:/ /g\'\`
+ else
+ shlib_search_path=
+ fi
+ eval sys_lib_search_path=\"$sys_lib_search_path_spec\"
+ eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\"
+ lib_search_path="$lib_search_path $sys_lib_search_path $shlib_search_path"
+
+ output_objdir=`$echo "X$output" | $Xsed -e 's%/[^/]*$%%'`
+ if test "X$output_objdir" = "X$output"; then
+ output_objdir="$objdir"
+ else
+ output_objdir="$output_objdir/$objdir"
+ fi
+ # Create the object directory.
+ if test ! -d $output_objdir; then
+ $show "$mkdir $output_objdir"
+ $run $mkdir $output_objdir
+ status=$?
+ if test $status -ne 0 && test ! -d $output_objdir; then
+ exit $status
+ fi
+ fi
+
+ case "$output" in
+ "")
+ $echo "$modename: you must specify an output file" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ ;;
+ *.$libext)
+ linkmode=oldlib ;;
+ *.lo | *.$objext)
+ linkmode=obj ;;
+ *.la)
+ linkmode=lib ;;
+ *) # Anything else should be a program.
+ linkmode=prog ;;
+ esac
+
+ specialdeplibs=
+ libs=
+ # Find all interdependent deplibs that
+ # are linked more than once (e.g. -la -lb -la)
+ for deplib in $deplibs; do
+ case "$libs " in
+ *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+ esac
+ libs="$libs $deplib"
+ done
+ deplibs=
+ newdependency_libs=
+ uninst_path= # paths that contain uninstalled libtool libraries
+ new_lib_search_path=
+ need_relink=no # whether we're linking any uninstalled libtool libraries
+ case $linkmode in
+ lib)
+ passes="link"
+ for file in $dlfiles $dlprefiles; do
+ case "$file" in
+ *.la) ;;
+ *)
+ $echo "$modename: libraries can \`-dlopen' only libtool libraries" 1>&2
+ exit 1
+ ;;
+ esac
+ done
+ ;;
+ prog)
+ compile_deplibs=
+ finalize_deplibs=
+ alldeplibs=no
+ newdlfiles=
+ newdlprefiles=
+ link_against_libtool_libs=
+ passes="scan dlopen dlpreopen link"
+ ;;
+ *) passes="link"
+ ;;
+ esac
+ for pass in $passes; do
+ if test $linkmode = prog; then
+ case $pass in
+ dlopen) libs="$dlfiles" ;;
+ dlpreopen) libs="$dlprefiles" ;;
+ link) libs="$deplibs %DEPLIBS% $dependency_libs" ;;
+ esac
+ fi
+ if test $pass = dlopen; then
+ # Collect dlpreopened libraries
+ save_deplibs="$deplibs"
+ deplibs=
+ fi
+ for deplib in $libs; do
+ lib=
+ found=no
+ case "$deplib" in
+ -l*)
+ if test $linkmode != lib && test $linkmode != prog; then
+ $echo "$modename: warning: \`-l' is ignored for archives/objects" 1>&2
+ continue
+ fi
+ name=`$echo "X$deplib" | $Xsed -e 's/^-l//'`
+ for searchdir in $lib_search_path; do
+ # Search the libtool library
+ lib="$searchdir/lib${name}.la"
+ if test -f "$lib"; then
+ found=yes
+ break
+ fi
+ done
+ if test "$found" != yes; then
+ if test "$linkmode,$pass" = "prog,link"; then
+ compile_deplibs="$deplib $compile_deplibs"
+ finalize_deplibs="$deplib $finalize_deplibs"
+ else
+ deplibs="$deplib $deplibs"
+ test $linkmode = lib && newdependency_libs="$deplib $newdependency_libs"
+ fi
+ continue
+ fi
+ ;;
+ -L*)
+ case $linkmode in
+ lib)
+ deplibs="$deplib $deplibs"
+ newdependency_libs="$deplib $newdependency_libs"
+ new_lib_search_path="$new_lib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`
+ ;;
+ prog)
+ if test $pass = scan; then
+ deplibs="$deplib $deplibs"
+ new_lib_search_path="$new_lib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`
+ else
+ compile_deplibs="$deplib $compile_deplibs"
+ finalize_deplibs="$deplib $finalize_deplibs"
+ fi
+ ;;
+ *)
+ $echo "$modename: warning: \`-L' is ignored for archives/objects" 1>&2
+ ;;
+ esac
+ continue
+ ;;
+ -R*)
+ if test "$linkmode,$pass" = "prog,link"; then
+ dir=`$echo "X$deplib" | $Xsed -e 's/^-R//'`
+ # Make sure the xrpath contains only unique directories.
+ case "$xrpath " in
+ *" $dir "*) ;;
+ *) xrpath="$xrpath $dir" ;;
+ esac
+ fi
+ continue
+ ;;
+ *.la) lib="$deplib" ;;
+ *.$libext)
+ case $linkmode in
+ lib)
+ if test "$deplibs_check_method" != pass_all; then
+ echo
+ echo "*** Warning: This library needs some functionality provided by $deplib."
+ echo "*** I have the capability to make that library automatically link in when"
+ echo "*** you link to this library. But I can only do this if you have a"
+ echo "*** shared version of the library, which you do not appear to have."
+ else
+ echo
+ echo "*** Warning: Linking the shared library $output against the"
+ echo "*** static library $deplib is not portable!"
+ deplibs="$deplib $deplibs"
+ fi
+ continue
+ ;;
+ prog)
+ if test $pass != link; then
+ deplibs="$deplib $deplibs"
+ else
+ compile_deplibs="$deplib $compile_deplibs"
+ finalize_deplibs="$deplib $finalize_deplibs"
+ fi
+ continue
+ ;;
+ esac
+ ;;
+ *.lo | *.$objext)
+ if test $linkmode = prog; then
+ if test $pass = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then
+ # If there is no dlopen support or we're linking statically,
+ # we need to preload.
+ newdlprefiles="$newdlprefiles $deplib"
+ compile_deplibs="$deplib $compile_deplibs"
+ finalize_deplibs="$deplib $finalize_deplibs"
+ else
+ newdlfiles="$newdlfiles $deplib"
+ fi
+ fi
+ continue
+ ;;
+ %DEPLIBS%)
+ alldeplibs=yes
+ continue
+ ;;
+ esac
+ if test $found = yes || test -f "$lib"; then :
+ else
+ $echo "$modename: cannot find the library \`$lib'" 1>&2
+ exit 1
+ fi
+
+ # Check to see that this really is a libtool archive.
+ if (sed -e '2q' $lib | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then :
+ else
+ $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+ exit 1
+ fi
+
+ ladir=`$echo "X$lib" | $Xsed -e 's%/[^/]*$%%'`
+ test "X$ladir" = "X$lib" && ladir="."
+
+ dlname=
+ dlopen=
+ dlpreopen=
+ libdir=
+ library_names=
+ old_library=
+ # If the library was installed with an old release of libtool,
+ # it will not redefine variable installed.
+ installed=yes
+
+ # Read the .la file
+ case "$lib" in
+ */* | *\\*) . $lib ;;
+ *) . ./$lib ;;
+ esac
+
+ if test $linkmode = lib || test "$linkmode,$pass" = "prog,scan"; then
+ test -n "$dlopen" && dlfiles="$dlfiles $dlopen"
+ test -n "$dlpreopen" && dlprefiles="$dlprefiles $dlpreopen"
+ fi
+
+ if test $linkmode != lib && test $linkmode != prog; then
+ # only check for convenience libraries
+ if test -z "$old_library"; then
+ $echo "$modename: cannot find name of link library for \`$lib'" 1>&2
+ exit 1
+ fi
+ if test -n "$libdir"; then
+ $echo "$modename: \`$lib' is not a convenience library" 1>&2
+ exit 1
+ fi
+ # It is a libtool convenience library, so add in its objects.
+ convenience="$convenience $ladir/$objdir/$old_library"
+ old_convenience="$old_convenience $ladir/$objdir/$old_library"
+ continue
+ fi
+
+ # Get the name of the library we link against.
+ linklib=
+ for l in $old_library $library_names; do
+ linklib="$l"
+ done
+ if test -z "$linklib"; then
+ $echo "$modename: cannot find name of link library for \`$lib'" 1>&2
+ exit 1
+ fi
+
+ # This library was specified with -dlopen.
+ if test $pass = dlopen; then
+ if test -z "$dlname" || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then
+ # If there is no dlname, no dlopen support or we're linking statically,
+ # we need to preload.
+ dlprefiles="$dlprefiles $lib"
+ else
+ newdlfiles="$newdlfiles $lib"
+ fi
+ continue
+ fi
+
+ # We need an absolute path.
+ case "$ladir" in
+ [\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;;
+ *)
+ abs_ladir=`cd "$ladir" && pwd`
+ if test -z "$abs_ladir"; then
+ $echo "$modename: warning: cannot determine absolute directory name of \`$ladir'" 1>&2
+ $echo "$modename: passing it literally to the linker, although it might fail" 1>&2
+ abs_ladir="$ladir"
+ fi
+ ;;
+ esac
+ laname=`$echo "X$lib" | $Xsed -e 's%^.*/%%'`
+
+ # Find the relevant object directory and library name.
+ if test "X$installed" = Xyes; then
+ if test ! -f "$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then
+ $echo "$modename: warning: library \`$lib' was moved." 1>&2
+ dir="$ladir"
+ absdir="$abs_ladir"
+ libdir="$abs_ladir"
+ else
+ dir="$libdir"
+ absdir="$libdir"
+ fi
+ else
+ dir="$ladir/$objdir"
+ absdir="$abs_ladir/$objdir"
+ # Remove this search path later
+ uninst_path="$uninst_path $abs_ladir"
+ fi
+ name=`$echo "X$laname" | $Xsed -e 's/\.la$//' -e 's/^lib//'`
+
+ # This library was specified with -dlpreopen.
+ if test $pass = dlpreopen; then
+ # Prefer using a static library (so that no silly _DYNAMIC symbols
+ # are required to link).
+ if test -n "$old_library"; then
+ newdlprefiles="$newdlprefiles $dir/$old_library"
+ else
+ newdlprefiles="$newdlprefiles $dir/$linklib"
+ fi
+ fi
+
+ if test $linkmode = prog && test $pass != link; then
+ new_lib_search_path="$new_lib_search_path $ladir"
+ deplibs="$lib $deplibs"
+
+ linkalldeplibs=no
+ if test "$link_all_deplibs" != no || test "$fast_install" != no || \
+ test "$build_libtool_libs" = no || test -z "$library_names"; then
+ linkalldeplibs=yes
+ fi
+
+ tmp_libs=
+ for deplib in $dependency_libs; do
+ case "$deplib" in
+ -L*) new_lib_search_path="$new_lib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`;; ### testsuite: skip nested quoting test
+ esac
+ # Need to link against all dependency_libs?
+ if test $linkalldeplibs = yes; then
+ deplibs="$deplib $deplibs"
+ else
+ # Need to hardcode shared library paths
+ # or/and link against static libraries
+ newdependency_libs="$deplib $newdependency_libs"
+ fi
+ case "$tmp_libs " in
+ *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+ esac
+ tmp_libs="$tmp_libs $deplib"
+ done
+ continue
+ fi
+
+ if test -z "$libdir"; then
+ # It is a libtool convenience library, so add in its objects.
+ convenience="$convenience $dir/$old_library"
+ old_convenience="$old_convenience $dir/$old_library"
+ if test $linkmode = lib; then
+ deplibs="$dir/$old_library $deplibs"
+ tmp_libs=
+ for deplib in $dependency_libs; do
+ newdependency_libs="$deplib $newdependency_libs"
+ case "$tmp_libs " in
+ *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+ esac
+ tmp_libs="$tmp_libs $deplib"
+ done
+ elif test "$linkmode,$pass" = "prog,link"; then
+ compile_deplibs="$dir/$old_library $compile_deplibs"
+ finalize_deplibs="$dir/$old_library $finalize_deplibs"
+ fi
+ continue
+ fi
+
+ if test "$linkmode,$pass" = "prog,link"; then
+ if test -n "$library_names" &&
+ { test "$hardcode_into_libs" != all || test "$alldeplibs" != yes; } &&
+ { test "$prefer_static_libs" = no || test -z "$old_library"; }; then
+ # We need to hardcode the library path
+ if test -n "$shlibpath_var"; then
+ # Make sure the rpath contains only unique directories.
+ case "$temp_rpath " in
+ *" $dir "*) ;;
+ *" $absdir "*) ;;
+ *) temp_rpath="$temp_rpath $dir" ;;
+ esac
+ fi
+
+ # Hardcode the library path.
+ # Skip directories that are in the system default run-time
+ # search path.
+ case " $sys_lib_dlsearch_path " in
+ *" $absdir "*) ;;
+ *)
+ case "$compile_rpath " in
+ *" $absdir "*) ;;
+ *) compile_rpath="$compile_rpath $absdir"
+ esac
+ ;;
+ esac
+
+ case " $sys_lib_dlsearch_path " in
+ *" $libdir "*) ;;
+ *)
+ case "$finalize_rpath " in
+ *" $libdir "*) ;;
+ *) finalize_rpath="$finalize_rpath $libdir"
+ esac
+ ;;
+ esac
+ fi
+
+ if test "$alldeplibs" = yes &&
+ { test "$deplibs_check_method" = pass_all ||
+ { test "$build_libtool_libs" = yes &&
+ test -n "$library_names"; }; }; then
+ # Do we only need to link against static libraries?
+ continue
+ fi
+ fi
+
+ link_static=no # Whether this library is linked statically
+ if test -n "$library_names" &&
+ { test "$prefer_static_libs" = no || test -z "$old_library"; }; then
+ link_against_libtool_libs="$link_against_libtool_libs $lib"
+ test "X$installed" = xno && need_relink=yes
+ # This is a shared library
+ if test $linkmode = lib && test "$hardcode_into_libs" = all; then
+ # Hardcode the library path.
+ # Skip directories that are in the system default run-time
+ # search path.
+ case " $sys_lib_dlsearch_path " in
+ *" $absdir "*) ;;
+ *)
+ case "$compile_rpath " in
+ *" $absdir "*) ;;
+ *) compile_rpath="$compile_rpath $absdir"
+ esac
+ ;;
+ esac
+ case " $sys_lib_dlsearch_path " in
+ *" $libdir "*) ;;
+ *)
+ case "$finalize_rpath " in
+ *" $libdir "*) ;;
+ *) finalize_rpath="$finalize_rpath $libdir"
+ esac
+ ;;
+ esac
+ fi
+
+ if test -n "$old_archive_from_expsyms_cmds"; then
+ # figure out the soname
+ set dummy $library_names
+ realname="$2"
+ shift; shift
+ libname=`eval \\$echo \"$libname_spec\"`
+ if test -n "$soname_spec"; then
+ eval soname=\"$soname_spec\"
+ else
+ soname="$realname"
+ fi
+
+ # Make a new name for the extract_expsyms_cmds to use
+ newlib="libimp-`echo $soname | sed 's/^lib//;s/\.dll$//'`.a"
+
+ # If the library has no export list, then create one now
+ if test -f "$output_objdir/$soname-def"; then :
+ else
+ $show "extracting exported symbol list from \`$soname'"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ eval cmds=\"$extract_expsyms_cmds\"
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || exit $?
+ done
+ IFS="$save_ifs"
+ fi
+
+ # Create $newlib
+ if test -f "$output_objdir/$newlib"; then :; else
+ $show "generating import library for \`$soname'"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ eval cmds=\"$old_archive_from_expsyms_cmds\"
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || exit $?
+ done
+ IFS="$save_ifs"
+ fi
+ # make sure the library variables are pointing to the new library
+ dir=$output_objdir
+ linklib=$newlib
+ fi
+
+ if test $linkmode = prog || test "$mode" != relink; then
+ add_shlibpath=
+ add_dir=
+ add=
+ lib_linked=yes
+ case "$hardcode_action" in
+ immediate | unsupported)
+ if test "$hardcode_direct" = no; then
+ add="$dir/$linklib"
+ elif test "$hardcode_minus_L" = no; then
+ case "$host" in
+ *-*-sunos*) add_shlibpath="$dir" ;;
+ esac
+ add_dir="-L$dir"
+ add="-l$name"
+ elif test "$hardcode_shlibpath_var" = no; then
+ add_shlibpath="$dir"
+ add="-l$name"
+ else
+ lib_linked=no
+ fi
+ ;;
+ relink)
+ if test "$hardcode_direct" = yes; then
+ add="$dir/$linklib"
+ elif test "$hardcode_minus_L" = yes; then
+ add_dir="-L$dir"
+ add="-l$name"
+ elif test "$hardcode_shlibpath_var" = yes; then
+ add_shlibpath="$dir"
+ add="-l$name"
+ else
+ lib_linked=no
+ fi
+ ;;
+ *) lib_linked=no ;;
+ esac
+
+ if test "$lib_linked" != yes; then
+ $echo "$modename: configuration error: unsupported hardcode properties"
+ exit 1
+ fi
+
+ if test -n "$add_shlibpath"; then
+ case ":$compile_shlibpath:" in
+ *":$add_shlibpath:"*) ;;
+ *) compile_shlibpath="$compile_shlibpath$add_shlibpath:" ;;
+ esac
+ fi
+ if test $linkmode = prog; then
+ test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs"
+ test -n "$add" && compile_deplibs="$add $compile_deplibs"
+ else
+ test -n "$add_dir" && deplibs="$add_dir $deplibs"
+ test -n "$add" && deplibs="$add $deplibs"
+ if test "$hardcode_direct" != yes && \
+ test "$hardcode_minus_L" != yes && \
+ test "$hardcode_shlibpath_var" = yes; then
+ case ":$finalize_shlibpath:" in
+ *":$libdir:"*) ;;
+ *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;;
+ esac
+ fi
+ fi
+ fi
+
+ if test $linkmode = prog || test "$mode" = relink; then
+ add_shlibpath=
+ add_dir=
+ add=
+ # Finalize command for both is simple: just hardcode it.
+ if test "$hardcode_direct" = yes; then
+ add="$libdir/$linklib"
+ elif test "$hardcode_minus_L" = yes; then
+ add_dir="-L$libdir"
+ add="-l$name"
+ elif test "$hardcode_shlibpath_var" = yes; then
+ case ":$finalize_shlibpath:" in
+ *":$libdir:"*) ;;
+ *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;;
+ esac
+ add="-l$name"
+ else
+ # We cannot seem to hardcode it, guess we'll fake it.
+ add_dir="-L$libdir"
+ add="-l$name"
+ fi
+
+ if test $linkmode = prog; then
+ test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs"
+ test -n "$add" && finalize_deplibs="$add $finalize_deplibs"
+ else
+ test -n "$add_dir" && deplibs="$add_dir $deplibs"
+ test -n "$add" && deplibs="$add deplibs"
+ fi
+ fi
+ elif test $linkmode = prog; then
+ # Here we assume that one of hardcode_direct or hardcode_minus_L
+ # is not unsupported. This is valid on all known static and
+ # shared platforms.
+ if test "$hardcode_direct" != unsupported; then
+ test -n "$old_library" && linklib="$old_library"
+ compile_deplibs="$dir/$linklib $compile_deplibs"
+ finalize_deplibs="$dir/$linklib $finalize_deplibs"
+ else
+ compile_deplibs="-l$name -L$dir $compile_deplibs"
+ finalize_deplibs="-l$name -L$dir $finalize_deplibs"
+ fi
+ elif test "$build_libtool_libs" = yes; then
+ # Not a shared library
+ if test "$deplibs_check_method" != pass_all; then
+ # We're trying link a shared library against a static one
+ # but the system doesn't support it.
+ # Just print a warning and add the library to dependency_libs so
+ # that the program can be linked against the static library.
+ echo
+ echo "*** Warning: This library needs some functionality provided by $lib."
+ echo "*** I have the capability to make that library automatically link in when"
+ echo "*** you link to this library. But I can only do this if you have a"
+ echo "*** shared version of the library, which you do not appear to have."
+ else
+ convenience="$convenience $dir/$old_library"
+ old_convenience="$old_convenience $dir/$old_library"
+ deplibs="$dir/$old_library $deplibs"
+ link_static=yes
+ fi
+ fi
+
+ if test $linkmode = lib; then
+ if test -n "$dependency_libs" &&
+ { test "$hardcode_into_libs" = no || test $build_old_libs = yes ||
+ test $link_static = yes; }; then
+ # Extract -R from dependency_libs
+ temp_deplibs=
+ for libdir in $dependency_libs; do
+ case "$libdir" in
+ -R*) temp_xrpath=`$echo "X$libdir" | $Xsed -e 's/^-R//'`
+ case " $xrpath " in
+ *" $temp_xrpath "*) ;;
+ *) xrpath="$xrpath $temp_xrpath";;
+ esac;;
+ *) temp_deplibs="$temp_deplibs $libdir";;
+ esac
+ done
+ dependency_libs="$temp_deplibs"
+ fi
+
+ new_lib_search_path="$new_lib_search_path $absdir"
+ # Link against this library
+ test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs"
+ # ... and its dependency_libs
+ tmp_libs=
+ for deplib in $dependency_libs; do
+ newdependency_libs="$deplib $newdependency_libs"
+ case "$tmp_libs " in
+ *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+ esac
+ tmp_libs="$tmp_libs $deplib"
+ done
+
+ if test $link_all_deplibs != no; then
+ # Add the search paths of all dependency libraries
+ for deplib in $dependency_libs; do
+ case "$deplib" in
+ -L*) path="$deplib" ;;
+ *.la)
+ dir=`$echo "X$deplib" | $Xsed -e 's%/[^/]*$%%'`
+ test "X$dir" = "X$deplib" && dir="."
+ # We need an absolute path.
+ case "$dir" in
+ [\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;;
+ *)
+ absdir=`cd "$dir" && pwd`
+ if test -z "$absdir"; then
+ $echo "$modename: warning: cannot determine absolute directory name of \`$dir'" 1>&2
+ absdir="$dir"
+ fi
+ ;;
+ esac
+ if grep "^installed=no" $deplib > /dev/null; then
+ path="-L$absdir/$objdir"
+ else
+ eval libdir=`sed -n -e 's/^libdir=\(.*\)$/\1/p' $deplib`
+ if test -z "$libdir"; then
+ $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2
+ exit 1
+ fi
+ if test "$absdir" != "$libdir"; then
+ $echo "$modename: warning: \`$deplib' seems to be moved" 1>&2
+ fi
+ path="-L$absdir"
+ fi
+ ;;
+ *) continue ;;
+ esac
+ case " $deplibs " in
+ *" $path "*) ;;
+ *) deplibs="$deplibs $path" ;;
+ esac
+ done
+ fi
+ fi
+ done
+ dependency_libs="$newdependency_libs"
+ if test $pass = dlpreopen; then
+ # Link the dlpreopened libraries before other libraries
+ deplibs="$deplibs $save_deplibs"
+ elif test $pass != dlopen; then
+ # Make sure lib_search_path contains only unique directories.
+ lib_search_path=
+ for dir in $new_lib_search_path; do
+ case "$lib_search_path " in
+ *" $dir "*) ;;
+ *) lib_search_path="$lib_search_path $dir" ;;
+ esac
+ done
+ lib_search_path="$lib_search_path $sys_lib_search_path"
+
+ if test "$linkmode,$pass" != "prog,link"; then
+ vars="deplibs"
+ else
+ vars="compile_deplibs finalize_deplibs"
+ fi
+ for var in $vars dependency_libs; do
+ # Make sure that $var contains only unique libraries
+ # and add them in reverse order
+ eval tmp_libs=\"\$$var\"
+ new_libs=
+ for deplib in $tmp_libs; do
+ case "$deplib" in
+ -L*) new_libs="$deplib $new_libs" ;;
+ *)
+ case " $specialdeplibs " in
+ *" $deplib "*) new_libs="$deplib $new_libs" ;;
+ *)
+ case " $new_libs " in
+ *" $deplib "*) ;;
+ *) new_libs="$deplib $new_libs" ;;
+ esac
+ ;;
+ esac
+ ;;
+ esac
+ done
+ tmp_libs=
+ for deplib in $new_libs; do
+ case "$deplib" in
+ -L*)
+ case " $tmp_libs " in
+ *" $deplib "*) ;;
+ *) tmp_libs="$tmp_libs $deplib" ;;
+ esac
+ ;;
+ *) tmp_libs="$tmp_libs $deplib" ;;
+ esac
+ done
+ eval $var=\"$tmp_libs\"
+ done
+ fi
+ done
+ if test $linkmode = prog; then
+ dlfiles="$newdlfiles"
+ dlprefiles="$newdlprefiles"
+ fi
+
+ case $linkmode in
+ oldlib)
+ if test -n "$deplibs"; then
+ $echo "$modename: warning: \`-l' and \`-L' are ignored for archives" 1>&2
+ fi
+
+ if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+ $echo "$modename: warning: \`-dlopen' is ignored for archives" 1>&2
+ fi
+
+ if test -n "$rpath"; then
+ $echo "$modename: warning: \`-rpath' is ignored for archives" 1>&2
+ fi
+
+ if test -n "$xrpath"; then
+ $echo "$modename: warning: \`-R' is ignored for archives" 1>&2
+ fi
+
+ if test -n "$vinfo"; then
+ $echo "$modename: warning: \`-version-info' is ignored for archives" 1>&2
+ fi
+
+ if test -n "$release"; then
+ $echo "$modename: warning: \`-release' is ignored for archives" 1>&2
+ fi
+
+ if test -n "$export_symbols" || test -n "$export_symbols_regex"; then
+ $echo "$modename: warning: \`-export-symbols' is ignored for archives" 1>&2
+ fi
+
+ # Now set the variables for building old libraries.
+ build_libtool_libs=no
+ oldlibs="$output"
+ objs="$objs$old_deplibs"
+ ;;
+
+ lib)
+ # Make sure we only generate libraries of the form `libNAME.la'.
+ case "$outputname" in
+ lib*)
+ name=`$echo "X$outputname" | $Xsed -e 's/\.la$//' -e 's/^lib//'`
+ eval libname=\"$libname_spec\"
+ ;;
+ *)
+ if test "$module" = no; then
+ $echo "$modename: libtool library \`$output' must begin with \`lib'" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+ if test "$need_lib_prefix" != no; then
+ # Add the "lib" prefix for modules if required
+ name=`$echo "X$outputname" | $Xsed -e 's/\.la$//'`
+ eval libname=\"$libname_spec\"
+ else
+ libname=`$echo "X$outputname" | $Xsed -e 's/\.la$//'`
+ fi
+ ;;
+ esac
+
+ if test -n "$objs"; then
+ if test "$deplibs_check_method" != pass_all; then
+ $echo "$modename: cannot build libtool library \`$output' from non-libtool objects on this host:$objs" 2>&1
+ exit 1
+ else
+ echo
+ echo "*** Warning: Linking the shared library $output against the non-libtool"
+ echo "*** objects $objs is not portable!"
+ libobjs="$libobjs $objs"
+ fi
+ fi
+
+ if test "$dlself" != no; then
+ $echo "$modename: warning: \`-dlopen self' is ignored for libtool libraries" 1>&2
+ fi
+
+ set dummy $rpath
+ if test $# -gt 2; then
+ $echo "$modename: warning: ignoring multiple \`-rpath's for a libtool library" 1>&2
+ fi
+ install_libdir="$2"
+
+ oldlibs=
+ if test -z "$rpath"; then
+ if test "$build_libtool_libs" = yes; then
+ # Building a libtool convenience library.
+ libext=al
+ oldlibs="$output_objdir/$libname.$libext $oldlibs"
+ build_libtool_libs=convenience
+ build_old_libs=yes
+ fi
+
+ if test -n "$vinfo"; then
+ $echo "$modename: warning: \`-version-info' is ignored for convenience libraries" 1>&2
+ fi
+
+ if test -n "$release"; then
+ $echo "$modename: warning: \`-release' is ignored for convenience libraries" 1>&2
+ fi
+ else
+
+ # Parse the version information argument.
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS=':'
+ set dummy $vinfo 0 0 0
+ IFS="$save_ifs"
+
+ if test -n "$8"; then
+ $echo "$modename: too many parameters to \`-version-info'" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ current="$2"
+ revision="$3"
+ age="$4"
+
+ # Check that each of the things are valid numbers.
+ case "$current" in
+ 0 | [1-9] | [1-9][0-9]*) ;;
+ *)
+ $echo "$modename: CURRENT \`$current' is not a nonnegative integer" 1>&2
+ $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+ exit 1
+ ;;
+ esac
+
+ case "$revision" in
+ 0 | [1-9] | [1-9][0-9]*) ;;
+ *)
+ $echo "$modename: REVISION \`$revision' is not a nonnegative integer" 1>&2
+ $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+ exit 1
+ ;;
+ esac
+
+ case "$age" in
+ 0 | [1-9] | [1-9][0-9]*) ;;
+ *)
+ $echo "$modename: AGE \`$age' is not a nonnegative integer" 1>&2
+ $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+ exit 1
+ ;;
+ esac
+
+ if test $age -gt $current; then
+ $echo "$modename: AGE \`$age' is greater than the current interface number \`$current'" 1>&2
+ $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+ exit 1
+ fi
+
+ # Calculate the version variables.
+ major=
+ versuffix=
+ verstring=
+ case "$version_type" in
+ none) ;;
+
+ irix)
+ major=`expr $current - $age + 1`
+ versuffix="$major.$revision"
+ verstring="sgi$major.$revision"
+
+ # Add in all the interfaces that we are compatible with.
+ loop=$revision
+ while test $loop != 0; do
+ iface=`expr $revision - $loop`
+ loop=`expr $loop - 1`
+ verstring="sgi$major.$iface:$verstring"
+ done
+ ;;
+
+ linux)
+ major=.`expr $current - $age`
+ versuffix="$major.$age.$revision"
+ ;;
+
+ osf)
+ major=`expr $current - $age`
+ versuffix=".$current.$age.$revision"
+ verstring="$current.$age.$revision"
+
+ # Add in all the interfaces that we are compatible with.
+ loop=$age
+ while test $loop != 0; do
+ iface=`expr $current - $loop`
+ loop=`expr $loop - 1`
+ verstring="$verstring:${iface}.0"
+ done
+
+ # Make executables depend on our current version.
+ verstring="$verstring:${current}.0"
+ ;;
+
+ sunos)
+ major=".$current"
+ versuffix=".$current.$revision"
+ ;;
+
+ freebsd-aout)
+ major=".$current"
+ versuffix=".$current.$revision";
+ ;;
+
+ freebsd-elf)
+ major=".$current"
+ versuffix=".$current";
+ ;;
+
+ windows)
+ # Like Linux, but with '-' rather than '.', since we only
+ # want one extension on Windows 95.
+ major=`expr $current - $age`
+ versuffix="-$major-$age-$revision"
+ ;;
+
+ *)
+ $echo "$modename: unknown library version type \`$version_type'" 1>&2
+ echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2
+ exit 1
+ ;;
+ esac
+
+ # Clear the version info if we defaulted, and they specified a release.
+ if test -z "$vinfo" && test -n "$release"; then
+ major=
+ verstring="0.0"
+ if test "$need_version" = no; then
+ versuffix=
+ else
+ versuffix=".0.0"
+ fi
+ fi
+
+ # Remove version info from name if versioning should be avoided
+ if test "$avoid_version" = yes && test "$need_version" = no; then
+ major=
+ versuffix=
+ verstring=""
+ fi
+
+ # Check to see if the archive will have undefined symbols.
+ if test "$allow_undefined" = yes; then
+ if test "$allow_undefined_flag" = unsupported; then
+ $echo "$modename: warning: undefined symbols not allowed in $host shared libraries" 1>&2
+ build_libtool_libs=no
+ build_old_libs=yes
+ fi
+ else
+ # Don't allow undefined symbols.
+ allow_undefined_flag="$no_undefined_flag"
+ fi
+ fi
+
+ if test "$mode" != relink; then
+ # Remove our outputs.
+ $show "${rm}r $output_objdir/$outputname $output_objdir/$libname.* $output_objdir/${libname}${release}.*"
+ $run ${rm}r $output_objdir/$outputname $output_objdir/$libname.* $output_objdir/${libname}${release}.*
+ fi
+
+ # Now set the variables for building old libraries.
+ if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then
+ oldlibs="$oldlibs $output_objdir/$libname.$libext"
+
+ # Transform .lo files to .o files.
+ oldobjs="$objs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e "$lo2o" | $NL2SP`
+ fi
+
+ # Eliminate all temporary directories.
+ for path in $uninst_path; do
+ lib_search_path=`echo "$lib_search_path " | sed -e 's% $path % %g'`
+ deplibs=`echo "$deplibs " | sed -e 's% -L$path % %g'`
+ dependency_libs=`echo "$dependency_libs " | sed -e 's% -L$path % %g'`
+ done
+
+ if test -n "$xrpath"; then
+ # If the user specified any rpath flags, then add them.
+ temp_xrpath=
+ for libdir in $xrpath; do
+ temp_xrpath="$temp_xrpath -R$libdir"
+ case "$finalize_rpath " in
+ *" $libdir "*) ;;
+ *) finalize_rpath="$finalize_rpath $libdir" ;;
+ esac
+ done
+ if test "$hardcode_into_libs" = no || test $build_old_libs = yes; then
+ dependency_libs="$temp_xrpath $dependency_libs"
+ fi
+ fi
+
+ # Make sure dlfiles contains only unique files that won't be dlpreopened
+ old_dlfiles="$dlfiles"
+ dlfiles=
+ for lib in $old_dlfiles; do
+ case " $dlprefiles $dlfiles " in
+ *" $lib "*) ;;
+ *) dlfiles="$dlfiles $lib" ;;
+ esac
+ done
+
+ # Make sure dlprefiles contains only unique files
+ old_dlprefiles="$dlprefiles"
+ dlprefiles=
+ for lib in $old_dlprefiles; do
+ case "$dlprefiles " in
+ *" $lib "*) ;;
+ *) dlprefiles="$dlprefiles $lib" ;;
+ esac
+ done
+
+ if test "$build_libtool_libs" = yes; then
+ if test -n "$rpath"; then
+ case "$host" in
+ *-*-cygwin* | *-*-mingw* | *-*-os2* | *-*-beos*)
+ # these systems don't actually have a c library (as such)!
+ ;;
+ *)
+ # Add libc to deplibs on all other systems.
+ deplibs="$deplibs -lc"
+ ;;
+ esac
+ fi
+
+ # Transform deplibs into only deplibs that can be linked in shared.
+ name_save=$name
+ libname_save=$libname
+ release_save=$release
+ versuffix_save=$versuffix
+ major_save=$major
+ # I'm not sure if I'm treating the release correctly. I think
+ # release should show up in the -l (ie -lgmp5) so we don't want to
+ # add it in twice. Is that correct?
+ release=""
+ versuffix=""
+ major=""
+ newdeplibs=
+ droppeddeps=no
+ case "$deplibs_check_method" in
+ pass_all)
+ # Don't check for shared/static. Everything works.
+ # This might be a little naive. We might want to check
+ # whether the library exists or not. But this is on
+ # osf3 & osf4 and I'm not really sure... Just
+ # implementing what was already the behaviour.
+ newdeplibs=$deplibs
+ ;;
+ test_compile)
+ # This code stresses the "libraries are programs" paradigm to its
+ # limits. Maybe even breaks it. We compile a program, linking it
+ # against the deplibs as a proxy for the library. Then we can check
+ # whether they linked in statically or dynamically with ldd.
+ $rm conftest.c
+ cat > conftest.c <<EOF
+ int main() { return 0; }
+EOF
+ $rm conftest
+ $CC -o conftest conftest.c $deplibs
+ if test $? -eq 0 ; then
+ ldd_output=`ldd conftest`
+ for i in $deplibs; do
+ name="`expr $i : '-l\(.*\)'`"
+ # If $name is empty we are operating on a -L argument.
+ if test "$name" != "" ; then
+ libname=`eval \\$echo \"$libname_spec\"`
+ deplib_matches=`eval \\$echo \"$library_names_spec\"`
+ set dummy $deplib_matches
+ deplib_match=$2
+ if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
+ newdeplibs="$newdeplibs $i"
+ else
+ droppeddeps=yes
+ echo
+ echo "*** Warning: This library needs some functionality provided by $i."
+ echo "*** I have the capability to make that library automatically link in when"
+ echo "*** you link to this library. But I can only do this if you have a"
+ echo "*** shared version of the library, which you do not appear to have."
+ fi
+ else
+ newdeplibs="$newdeplibs $i"
+ fi
+ done
+ else
+ # Error occured in the first compile. Let's try to salvage the situation:
+ # Compile a seperate program for each library.
+ for i in $deplibs; do
+ name="`expr $i : '-l\(.*\)'`"
+ # If $name is empty we are operating on a -L argument.
+ if test "$name" != "" ; then
+ $rm conftest
+ $CC -o conftest conftest.c $i
+ # Did it work?
+ if test $? -eq 0 ; then
+ ldd_output=`ldd conftest`
+ libname=`eval \\$echo \"$libname_spec\"`
+ deplib_matches=`eval \\$echo \"$library_names_spec\"`
+ set dummy $deplib_matches
+ deplib_match=$2
+ if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
+ newdeplibs="$newdeplibs $i"
+ else
+ droppeddeps=yes
+ echo
+ echo "*** Warning: This library needs some functionality provided by $i."
+ echo "*** I have the capability to make that library automatically link in when"
+ echo "*** you link to this library. But I can only do this if you have a"
+ echo "*** shared version of the library, which you do not appear to have."
+ fi
+ else
+ droppeddeps=yes
+ echo
+ echo "*** Warning! Library $i is needed by this library but I was not able to"
+ echo "*** make it link in! You will probably need to install it or some"
+ echo "*** library that it depends on before this library will be fully"
+ echo "*** functional. Installing it before continuing would be even better."
+ fi
+ else
+ newdeplibs="$newdeplibs $i"
+ fi
+ done
+ fi
+ ;;
+ file_magic*)
+ set dummy $deplibs_check_method
+ file_magic_regex=`expr "$deplibs_check_method" : "$2 \(.*\)"`
+ for a_deplib in $deplibs; do
+ name="`expr $a_deplib : '-l\(.*\)'`"
+ # If $name is empty we are operating on a -L argument.
+ if test "$name" != "" ; then
+ libname=`eval \\$echo \"$libname_spec\"`
+ for i in $lib_search_path; do
+ potential_libs=`ls $i/$libname[.-]* 2>/dev/null`
+ for potent_lib in $potential_libs; do
+ # Follow soft links.
+ if ls -lLd "$potent_lib" 2>/dev/null \
+ | grep " -> " >/dev/null; then
+ continue
+ fi
+ # The statement above tries to avoid entering an
+ # endless loop below, in case of cyclic links.
+ # We might still enter an endless loop, since a link
+ # loop can be closed while we follow links,
+ # but so what?
+ potlib="$potent_lib"
+ while test -h "$potlib" 2>/dev/null; do
+ potliblink=`ls -ld $potlib | sed 's/.* -> //'`
+ case "$potliblink" in
+ [\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";;
+ *) potlib=`$echo "X$potlib" | $Xsed -e 's,[^/]*$,,'`"$potliblink";;
+ esac
+ done
+ if eval $file_magic_cmd \"\$potlib\" 2>/dev/null \
+ | sed 10q \
+ | egrep "$file_magic_regex" > /dev/null; then
+ newdeplibs="$newdeplibs $a_deplib"
+ a_deplib=""
+ break 2
+ fi
+ done
+ done
+ if test -n "$a_deplib" ; then
+ droppeddeps=yes
+ echo
+ echo "*** Warning: This library needs some functionality provided by $a_deplib."
+ echo "*** I have the capability to make that library automatically link in when"
+ echo "*** you link to this library. But I can only do this if you have a"
+ echo "*** shared version of the library, which you do not appear to have."
+ fi
+ else
+ # Add a -L argument.
+ newdeplibs="$newdeplibs $a_deplib"
+ fi
+ done # Gone through all deplibs.
+ ;;
+ none | unknown | *)
+ newdeplibs=""
+ if $echo "X $deplibs" | $Xsed -e 's/ -lc$//' \
+ -e 's/ -[LR][^ ]*//g' -e 's/[ ]//g' |
+ grep . >/dev/null; then
+ echo
+ if test "X$deplibs_check_method" = "Xnone"; then
+ echo "*** Warning: inter-library dependencies are not supported in this platform."
+ else
+ echo "*** Warning: inter-library dependencies are not known to be supported."
+ fi
+ echo "*** All declared inter-library dependencies are being dropped."
+ droppeddeps=yes
+ fi
+ ;;
+ esac
+ versuffix=$versuffix_save
+ major=$major_save
+ release=$release_save
+ libname=$libname_save
+ name=$name_save
+
+ if test "$droppeddeps" = yes; then
+ if test "$module" = yes; then
+ echo
+ echo "*** Warning: libtool could not satisfy all declared inter-library"
+ echo "*** dependencies of module $libname. Therefore, libtool will create"
+ echo "*** a static module, that should work as long as the dlopening"
+ echo "*** application is linked with the -dlopen flag."
+ if test -z "$global_symbol_pipe"; then
+ echo
+ echo "*** However, this would only work if libtool was able to extract symbol"
+ echo "*** lists from a program, using \`nm' or equivalent, but libtool could"
+ echo "*** not find such a program. So, this module is probably useless."
+ echo "*** \`nm' from GNU binutils and a full rebuild may help."
+ fi
+ if test "$build_old_libs" = no; then
+ oldlibs="$output_objdir/$libname.$libext"
+ build_libtool_libs=module
+ build_old_libs=yes
+ else
+ build_libtool_libs=no
+ fi
+ else
+ echo "*** The inter-library dependencies that have been dropped here will be"
+ echo "*** automatically added whenever a program is linked with this library"
+ echo "*** or is declared to -dlopen it."
+ fi
+ fi
+ # Done checking deplibs!
+ deplibs=$newdeplibs
+ fi
+
+ # All the library-specific variables (install_libdir is set above).
+ library_names=
+ old_library=
+ dlname=
+
+ # Test again, we may have decided not to build it any more
+ if test "$build_libtool_libs" = yes; then
+ if test "$hardcode_into_libs" != no; then
+ # Hardcode the library paths
+ hardcode_libdirs=
+ dep_rpath=
+ rpath="$finalize_rpath"
+ test "$mode" != relink && rpath="$compile_rpath$rpath"
+ for libdir in $rpath; do
+ if test -n "$hardcode_libdir_flag_spec"; then
+ if test -n "$hardcode_libdir_separator"; then
+ if test -z "$hardcode_libdirs"; then
+ hardcode_libdirs="$libdir"
+ else
+ # Just accumulate the unique libdirs.
+ case "$hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator" in
+ *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+ ;;
+ *)
+ hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
+ ;;
+ esac
+ fi
+ else
+ eval flag=\"$hardcode_libdir_flag_spec\"
+ dep_rpath="$dep_rpath $flag"
+ fi
+ elif test -n "$runpath_var"; then
+ case "$perm_rpath " in
+ *" $libdir "*) ;;
+ *) perm_rpath="$perm_rpath $libdir" ;;
+ esac
+ fi
+ done
+ # Substitute the hardcoded libdirs into the rpath.
+ if test -n "$hardcode_libdir_separator" &&
+ test -n "$hardcode_libdirs"; then
+ libdir="$hardcode_libdirs"
+ eval dep_rpath=\"$hardcode_libdir_flag_spec\"
+ fi
+ if test -n "$runpath_var" && test -n "$perm_rpath"; then
+ # We should set the runpath_var.
+ rpath=
+ for dir in $perm_rpath; do
+ rpath="$rpath$dir:"
+ done
+ eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var"
+ fi
+ test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs"
+ fi
+
+ shlibpath="$finalize_shlibpath"
+ test "$mode" != relink && shlibpath="$compile_shlibpath$shlibpath"
+ if test -n "$shlibpath"; then
+ eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var"
+ fi
+
+ # Get the real and link names of the library.
+ eval library_names=\"$library_names_spec\"
+ set dummy $library_names
+ realname="$2"
+ shift; shift
+
+ if test -n "$soname_spec"; then
+ eval soname=\"$soname_spec\"
+ else
+ soname="$realname"
+ fi
+
+ lib="$output_objdir/$realname"
+ for link
+ do
+ linknames="$linknames $link"
+ done
+
+ # Ensure that we have .o objects for linkers which dislike .lo
+ # (e.g. aix) in case we are running --disable-static
+ for obj in $libobjs; do
+ xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'`
+ if test "X$xdir" = "X$obj"; then
+ xdir="."
+ else
+ xdir="$xdir"
+ fi
+ baseobj=`$echo "X$obj" | $Xsed -e 's%^.*/%%'`
+ oldobj=`$echo "X$baseobj" | $Xsed -e "$lo2o"`
+ if test ! -f $xdir/$oldobj; then
+ $show "(cd $xdir && ${LN_S} $baseobj $oldobj)"
+ $run eval '(cd $xdir && ${LN_S} $baseobj $oldobj)' || exit $?
+ fi
+ done
+
+ # Use standard objects if they are pic
+ test -z "$pic_flag" && libobjs=`$echo "X$libobjs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+
+ # Prepare the list of exported symbols
+ if test -z "$export_symbols"; then
+ if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then
+ $show "generating symbol list for \`$libname.la'"
+ export_symbols="$output_objdir/$libname.exp"
+ $run $rm $export_symbols
+ eval cmds=\"$export_symbols_cmds\"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || exit $?
+ done
+ IFS="$save_ifs"
+ if test -n "$export_symbols_regex"; then
+ $show "egrep -e \"$export_symbols_regex\" \"$export_symbols\" > \"${export_symbols}T\""
+ $run eval 'egrep -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"'
+ $show "$mv \"${export_symbols}T\" \"$export_symbols\""
+ $run eval '$mv "${export_symbols}T" "$export_symbols"'
+ fi
+ fi
+ fi
+
+ if test -n "$export_symbols" && test -n "$include_expsyms"; then
+ $run eval '$echo "X$include_expsyms" | $SP2NL >> "$export_symbols"'
+ fi
+
+ if test -n "$convenience"; then
+ if test -n "$whole_archive_flag_spec"; then
+ eval libobjs=\"\$libobjs $whole_archive_flag_spec\"
+ else
+ gentop="$output_objdir/${outputname}x"
+ $show "${rm}r $gentop"
+ $run ${rm}r "$gentop"
+ $show "mkdir $gentop"
+ $run mkdir "$gentop"
+ status=$?
+ if test $status -ne 0 && test ! -d "$gentop"; then
+ exit $status
+ fi
+ generated="$generated $gentop"
+
+ for xlib in $convenience; do
+ # Extract the objects.
+ case "$xlib" in
+ [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;;
+ *) xabs=`pwd`"/$xlib" ;;
+ esac
+ xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'`
+ xdir="$gentop/$xlib"
+
+ $show "${rm}r $xdir"
+ $run ${rm}r "$xdir"
+ $show "mkdir $xdir"
+ $run mkdir "$xdir"
+ status=$?
+ if test $status -ne 0 && test ! -d "$xdir"; then
+ exit $status
+ fi
+ $show "(cd $xdir && $AR x $xabs)"
+ $run eval "(cd \$xdir && $AR x \$xabs)" || exit $?
+
+ libobjs="$libobjs "`find $xdir -name \*.o -print -o -name \*.lo -print | $NL2SP`
+ done
+ fi
+ fi
+
+ if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then
+ eval flag=\"$thread_safe_flag_spec\"
+ linker_flags="$linker_flags $flag"
+ fi
+
+ # Make a backup of the uninstalled library when relinking
+ if test "$mode" = relink && test "$hardcode_into_libs" = all; then
+ $run eval '(cd $output_objdir && $rm ${realname}U && $mv $realname ${realname}U)' || exit $?
+ fi
+
+ # Do each of the archive commands.
+ if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then
+ eval cmds=\"$archive_expsym_cmds\"
+ else
+ eval cmds=\"$archive_cmds\"
+ fi
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || exit $?
+ done
+ IFS="$save_ifs"
+
+ # Restore the uninstalled library and exit
+ if test "$mode" = relink && test "$hardcode_into_libs" = all; then
+ $run eval '(cd $output_objdir && $rm ${realname}T && $mv $realname ${realname}T && $mv "$realname"U $realname)' || exit $?
+ exit 0
+ fi
+
+ # Create links to the real library.
+ for linkname in $linknames; do
+ if test "$realname" != "$linkname"; then
+ $show "(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)"
+ $run eval '(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)' || exit $?
+ fi
+ done
+
+ # If -module or -export-dynamic was specified, set the dlname.
+ if test "$module" = yes || test "$export_dynamic" = yes; then
+ # On all known operating systems, these are identical.
+ dlname="$soname"
+ fi
+ fi
+ ;;
+
+ obj)
+ if test -n "$deplibs"; then
+ $echo "$modename: warning: \`-l' and \`-L' are ignored for objects" 1>&2
+ fi
+
+ if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+ $echo "$modename: warning: \`-dlopen' is ignored for objects" 1>&2
+ fi
+
+ if test -n "$rpath"; then
+ $echo "$modename: warning: \`-rpath' is ignored for objects" 1>&2
+ fi
+
+ if test -n "$xrpath"; then
+ $echo "$modename: warning: \`-R' is ignored for objects" 1>&2
+ fi
+
+ if test -n "$vinfo"; then
+ $echo "$modename: warning: \`-version-info' is ignored for objects" 1>&2
+ fi
+
+ if test -n "$release"; then
+ $echo "$modename: warning: \`-release' is ignored for objects" 1>&2
+ fi
+
+ case "$output" in
+ *.lo)
+ if test -n "$objs$old_deplibs"; then
+ $echo "$modename: cannot build library object \`$output' from non-libtool objects" 1>&2
+ exit 1
+ fi
+ libobj="$output"
+ obj=`$echo "X$output" | $Xsed -e "$lo2o"`
+ ;;
+ *)
+ libobj=
+ obj="$output"
+ ;;
+ esac
+
+ # Delete the old objects.
+ $run $rm $obj $libobj
+
+ # Objects from convenience libraries. This assumes
+ # single-version convenience libraries. Whenever we create
+ # different ones for PIC/non-PIC, this we'll have to duplicate
+ # the extraction.
+ reload_conv_objs=
+ gentop=
+ # reload_cmds runs $LD directly, so let us get rid of
+ # -Wl from whole_archive_flag_spec
+ wl=
+
+ if test -n "$convenience"; then
+ if test -n "$whole_archive_flag_spec"; then
+ eval reload_conv_objs=\"\$reload_objs $whole_archive_flag_spec\"
+ else
+ gentop="$output_objdir/${obj}x"
+ $show "${rm}r $gentop"
+ $run ${rm}r "$gentop"
+ $show "mkdir $gentop"
+ $run mkdir "$gentop"
+ status=$?
+ if test $status -ne 0 && test ! -d "$gentop"; then
+ exit $status
+ fi
+ generated="$generated $gentop"
+
+ for xlib in $convenience; do
+ # Extract the objects.
+ case "$xlib" in
+ [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;;
+ *) xabs=`pwd`"/$xlib" ;;
+ esac
+ xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'`
+ xdir="$gentop/$xlib"
+
+ $show "${rm}r $xdir"
+ $run ${rm}r "$xdir"
+ $show "mkdir $xdir"
+ $run mkdir "$xdir"
+ status=$?
+ if test $status -ne 0 && test ! -d "$xdir"; then
+ exit $status
+ fi
+ $show "(cd $xdir && $AR x $xabs)"
+ $run eval "(cd \$xdir && $AR x \$xabs)" || exit $?
+
+ reload_conv_objs="$reload_objs "`find $xdir -name \*.o -print -o -name \*.lo -print | $NL2SP`
+ done
+ fi
+ fi
+
+ # Create the old-style object.
+ reload_objs="$objs$old_deplibs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}$'/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test
+
+ output="$obj"
+ eval cmds=\"$reload_cmds\"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || exit $?
+ done
+ IFS="$save_ifs"
+
+ # Exit if we aren't doing a library object file.
+ if test -z "$libobj"; then
+ if test -n "$gentop"; then
+ $show "${rm}r $gentop"
+ $run ${rm}r $gentop
+ fi
+
+ exit 0
+ fi
+
+ if test "$build_libtool_libs" != yes; then
+ if test -n "$gentop"; then
+ $show "${rm}r $gentop"
+ $run ${rm}r $gentop
+ fi
+
+ # Create an invalid libtool object if no PIC, so that we don't
+ # accidentally link it into a program.
+ $show "echo timestamp > $libobj"
+ $run eval "echo timestamp > $libobj" || exit $?
+ exit 0
+ fi
+
+ if test -n "$pic_flag" || test "$pic_mode" != default; then
+ # Only do commands if we really have different PIC objects.
+ reload_objs="$libobjs $reload_conv_objs"
+ output="$libobj"
+ eval cmds=\"$reload_cmds\"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || exit $?
+ done
+ IFS="$save_ifs"
+ else
+ # Just create a symlink.
+ $show $rm $libobj
+ $run $rm $libobj
+ xdir=`$echo "X$libobj" | $Xsed -e 's%/[^/]*$%%'`
+ if test "X$xdir" = "X$libobj"; then
+ xdir="."
+ else
+ xdir="$xdir"
+ fi
+ baseobj=`$echo "X$libobj" | $Xsed -e 's%^.*/%%'`
+ oldobj=`$echo "X$baseobj" | $Xsed -e "$lo2o"`
+ $show "(cd $xdir && $LN_S $oldobj $baseobj)"
+ $run eval '(cd $xdir && $LN_S $oldobj $baseobj)' || exit $?
+ fi
+
+ if test -n "$gentop"; then
+ $show "${rm}r $gentop"
+ $run ${rm}r $gentop
+ fi
+
+ exit 0
+ ;;
+
+ prog)
+ if test -n "$vinfo"; then
+ $echo "$modename: warning: \`-version-info' is ignored for programs" 1>&2
+ fi
+
+ if test -n "$release"; then
+ $echo "$modename: warning: \`-release' is ignored for programs" 1>&2
+ fi
+
+ if test "$preload" = yes; then
+ if test "$dlopen_support" = unknown && test "$dlopen_self" = unknown &&
+ test "$dlopen_self_static" = unknown; then
+ $echo "$modename: warning: \`AC_LIBTOOL_DLOPEN' not used. Assuming no dlopen support."
+ fi
+ fi
+
+ compile_command="$compile_command $compile_deplibs"
+ finalize_command="$finalize_command $finalize_deplibs"
+
+ if test -n "$rpath$xrpath"; then
+ # If the user specified any rpath flags, then add them.
+ for libdir in $rpath $xrpath; do
+ # This is the magic to use -rpath.
+ case "$finalize_rpath " in
+ *" $libdir "*) ;;
+ *) finalize_rpath="$finalize_rpath $libdir" ;;
+ esac
+ done
+ fi
+
+ # Now hardcode the library paths
+ rpath=
+ hardcode_libdirs=
+ for libdir in $compile_rpath $finalize_rpath; do
+ if test -n "$hardcode_libdir_flag_spec"; then
+ if test -n "$hardcode_libdir_separator"; then
+ if test -z "$hardcode_libdirs"; then
+ hardcode_libdirs="$libdir"
+ else
+ # Just accumulate the unique libdirs.
+ case "$hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator" in
+ *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+ ;;
+ *)
+ hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
+ ;;
+ esac
+ fi
+ else
+ eval flag=\"$hardcode_libdir_flag_spec\"
+ rpath="$rpath $flag"
+ fi
+ elif test -n "$runpath_var"; then
+ case "$perm_rpath " in
+ *" $libdir "*) ;;
+ *) perm_rpath="$perm_rpath $libdir" ;;
+ esac
+ fi
+ case "$host" in
+ *-*-cygwin* | *-*-mingw* | *-*-os2*)
+ case ":$dllsearchpath:" in
+ *":$libdir:"*) ;;
+ *) dllsearchpath="$dllsearchpath:$libdir";;
+ esac
+ ;;
+ esac
+ done
+ # Substitute the hardcoded libdirs into the rpath.
+ if test -n "$hardcode_libdir_separator" &&
+ test -n "$hardcode_libdirs"; then
+ libdir="$hardcode_libdirs"
+ eval rpath=\" $hardcode_libdir_flag_spec\"
+ fi
+ compile_rpath="$rpath"
+
+ rpath=
+ hardcode_libdirs=
+ for libdir in $finalize_rpath; do
+ if test -n "$hardcode_libdir_flag_spec"; then
+ if test -n "$hardcode_libdir_separator"; then
+ if test -z "$hardcode_libdirs"; then
+ hardcode_libdirs="$libdir"
+ else
+ # Just accumulate the unique libdirs.
+ case "$hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator" in
+ *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+ ;;
+ *)
+ hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
+ ;;
+ esac
+ fi
+ else
+ eval flag=\"$hardcode_libdir_flag_spec\"
+ rpath="$rpath $flag"
+ fi
+ elif test -n "$runpath_var"; then
+ case "$finalize_perm_rpath " in
+ *" $libdir "*) ;;
+ *) finalize_perm_rpath="$finalize_perm_rpath $libdir" ;;
+ esac
+ fi
+ done
+ # Substitute the hardcoded libdirs into the rpath.
+ if test -n "$hardcode_libdir_separator" &&
+ test -n "$hardcode_libdirs"; then
+ libdir="$hardcode_libdirs"
+ eval rpath=\" $hardcode_libdir_flag_spec\"
+ fi
+ finalize_rpath="$rpath"
+
+ if test -n "$libobjs" && test "$build_old_libs" = yes; then
+ # Transform all the library objects into standard objects.
+ compile_command=`$echo "X$compile_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+ finalize_command=`$echo "X$finalize_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+ fi
+
+ dlsyms=
+ if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+ if test -n "$NM" && test -n "$global_symbol_pipe"; then
+ dlsyms="${outputname}S.c"
+ else
+ $echo "$modename: not configured to extract global symbols from dlpreopened files" 1>&2
+ fi
+ fi
+
+ if test -n "$dlsyms"; then
+ case "$dlsyms" in
+ "") ;;
+ *.c)
+ # Discover the nlist of each of the dlfiles.
+ nlist="$output_objdir/${outputname}.nm"
+
+ $show "$rm $nlist ${nlist}S ${nlist}T"
+ $run $rm "$nlist" "${nlist}S" "${nlist}T"
+
+ # Parse the name list into a source file.
+ $show "creating $output_objdir/$dlsyms"
+
+ test -z "$run" && $echo > "$output_objdir/$dlsyms" "\
+/* $dlsyms - symbol resolution table for \`$outputname' dlsym emulation. */
+/* Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP */
+
+#ifdef __cplusplus
+extern \"C\" {
+#endif
+
+/* Prevent the only kind of declaration conflicts we can make. */
+#define lt_preloaded_symbols some_other_symbol
+
+/* External symbol declarations for the compiler. */\
+"
+
+ if test "$dlself" = yes; then
+ $show "generating symbol list for \`$output'"
+
+ test -z "$run" && $echo ': @PROGRAM@ ' > "$nlist"
+
+ # Add our own program objects to the symbol list.
+ progfiles=`$echo "X$objs$old_deplibs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+ for arg in $progfiles; do
+ $show "extracting global C symbols from \`$arg'"
+ $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'"
+ done
+
+ if test -n "$exclude_expsyms"; then
+ $run eval 'egrep -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T'
+ $run eval '$mv "$nlist"T "$nlist"'
+ fi
+
+ if test -n "$export_symbols_regex"; then
+ $run eval 'egrep -e "$export_symbols_regex" "$nlist" > "$nlist"T'
+ $run eval '$mv "$nlist"T "$nlist"'
+ fi
+
+ # Prepare the list of exported symbols
+ if test -z "$export_symbols"; then
+ export_symbols="$output_objdir/$output.exp"
+ $run $rm $export_symbols
+ $run eval "sed -n -e '/^: @PROGRAM@$/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"'
+ else
+ $run eval "sed -e 's/\([][.*^$]\)/\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$output.exp"'
+ $run eval 'grep -f "$output_objdir/$output.exp" < "$nlist" > "$nlist"T'
+ $run eval 'mv "$nlist"T "$nlist"'
+ fi
+ fi
+
+ for arg in $dlprefiles; do
+ $show "extracting global C symbols from \`$arg'"
+ name=`echo "$arg" | sed -e 's%^.*/%%'`
+ $run eval 'echo ": $name " >> "$nlist"'
+ $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'"
+ done
+
+ if test -z "$run"; then
+ # Make sure we have at least an empty file.
+ test -f "$nlist" || : > "$nlist"
+
+ if test -n "$exclude_expsyms"; then
+ egrep -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T
+ $mv "$nlist"T "$nlist"
+ fi
+
+ # Try sorting and uniquifying the output.
+ if grep -v "^: " < "$nlist" | sort +2 | uniq > "$nlist"S; then
+ :
+ else
+ grep -v "^: " < "$nlist" > "$nlist"S
+ fi
+
+ if test -f "$nlist"S; then
+ eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$dlsyms"'
+ else
+ echo '/* NONE */' >> "$output_objdir/$dlsyms"
+ fi
+
+ $echo >> "$output_objdir/$dlsyms" "\
+
+#undef lt_preloaded_symbols
+
+#if defined (__STDC__) && __STDC__
+# define lt_ptr_t void *
+#else
+# define lt_ptr_t char *
+# define const
+#endif
+
+/* The mapping between symbol names and symbols. */
+const struct {
+ const char *name;
+ lt_ptr_t address;
+}
+lt_preloaded_symbols[] =
+{\
+"
+
+ sed -n -e 's/^: \([^ ]*\) $/ {\"\1\", (lt_ptr_t) 0},/p' \
+ -e 's/^. \([^ ]*\) \([^ ]*\)$/ {"\2", (lt_ptr_t) \&\2},/p' \
+ < "$nlist" >> "$output_objdir/$dlsyms"
+
+ $echo >> "$output_objdir/$dlsyms" "\
+ {0, (lt_ptr_t) 0}
+};
+
+/* This works around a problem in FreeBSD linker */
+#ifdef FREEBSD_WORKAROUND
+static const void *lt_preloaded_setup() {
+ return lt_preloaded_symbols;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif\
+"
+ fi
+
+ pic_flag_for_symtable=
+ case "$host" in
+ # compiling the symbol table file with pic_flag works around
+ # a FreeBSD bug that causes programs to crash when -lm is
+ # linked before any other PIC object. But we must not use
+ # pic_flag when linking with -static. The problem exists in
+ # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1.
+ *-*-freebsd2*|*-*-freebsd3.0*|*-*-freebsdelf3.0*)
+ case "$compile_command " in
+ *" -static "*) ;;
+ *) pic_flag_for_symtable=" $pic_flag -DPIC -DFREEBSD_WORKAROUND";;
+ esac;;
+ *-*-hpux*)
+ case "$compile_command " in
+ *" -static "*) ;;
+ *) pic_flag_for_symtable=" $pic_flag -DPIC";;
+ esac
+ esac
+
+ # Now compile the dynamic symbol file.
+ $show "(cd $output_objdir && $CC -c$no_builtin_flag$pic_flag_for_symtable \"$dlsyms\")"
+ $run eval '(cd $output_objdir && $CC -c$no_builtin_flag$pic_flag_for_symtable "$dlsyms")' || exit $?
+
+ # Clean up the generated files.
+ $show "$rm $output_objdir/$dlsyms $nlist ${nlist}S ${nlist}T"
+ $run $rm "$output_objdir/$dlsyms" "$nlist" "${nlist}S" "${nlist}T"
+
+ # Transform the symbol file into the correct name.
+ compile_command=`$echo "X$compile_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"`
+ finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"`
+ ;;
+ *)
+ $echo "$modename: unknown suffix for \`$dlsyms'" 1>&2
+ exit 1
+ ;;
+ esac
+ else
+ # We keep going just in case the user didn't refer to
+ # lt_preloaded_symbols. The linker will fail if global_symbol_pipe
+ # really was required.
+
+ # Nullify the symbol file.
+ compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"`
+ finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"`
+ fi
+
+ if test -z "$link_against_libtool_libs" || test "$build_libtool_libs" != yes; then
+ # Replace the output file specification.
+ compile_command=`$echo "X$compile_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'`
+ link_command="$compile_command$compile_rpath"
+
+ # We have no uninstalled library dependencies, so finalize right now.
+ $show "$link_command"
+ $run eval "$link_command"
+ status=$?
+
+ # Delete the generated files.
+ if test -n "$dlsyms"; then
+ $show "$rm $output_objdir/${outputname}S.${objext}"
+ $run $rm "$output_objdir/${outputname}S.${objext}"
+ fi
+
+ exit $status
+ fi
+
+ if test -n "$shlibpath_var"; then
+ # We should set the shlibpath_var
+ rpath=
+ for dir in $temp_rpath; do
+ case "$dir" in
+ [\\/]* | [A-Za-z]:[\\/]*)
+ # Absolute path.
+ rpath="$rpath$dir:"
+ ;;
+ *)
+ # Relative path: add a thisdir entry.
+ rpath="$rpath\$thisdir/$dir:"
+ ;;
+ esac
+ done
+ temp_rpath="$rpath"
+ fi
+
+ if test -n "$compile_shlibpath$finalize_shlibpath"; then
+ compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command"
+ fi
+ if test -n "$finalize_shlibpath"; then
+ finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command"
+ fi
+
+ compile_var=
+ finalize_var=
+ if test -n "$runpath_var"; then
+ if test -n "$perm_rpath"; then
+ # We should set the runpath_var.
+ rpath=
+ for dir in $perm_rpath; do
+ rpath="$rpath$dir:"
+ done
+ compile_var="$runpath_var=\"$rpath\$$runpath_var\" "
+ fi
+ if test -n "$finalize_perm_rpath"; then
+ # We should set the runpath_var.
+ rpath=
+ for dir in $finalize_perm_rpath; do
+ rpath="$rpath$dir:"
+ done
+ finalize_var="$runpath_var=\"$rpath\$$runpath_var\" "
+ fi
+ fi
+
+ if test "$no_install" = yes; then
+ # We don't need to create a wrapper script.
+ link_command="$compile_var$compile_command$compile_rpath"
+ # Replace the output file specification.
+ link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'`
+ # Delete the old output file.
+ $run $rm $output
+ # Link the executable and exit
+ $show "$link_command"
+ $run eval "$link_command" || exit $?
+ exit 0
+ fi
+
+ if test "$hardcode_action" = relink || test "$hardcode_into_libs" = all; then
+ # Fast installation is not supported
+ link_command="$compile_var$compile_command$compile_rpath"
+ relink_command="$finalize_var$finalize_command$finalize_rpath"
+
+ $echo "$modename: warning: this platform does not like uninstalled shared libraries" 1>&2
+ $echo "$modename: \`$output' will be relinked during installation" 1>&2
+ else
+ if test "$fast_install" != no; then
+ link_command="$finalize_var$compile_command$finalize_rpath"
+ if test "$fast_install" = yes; then
+ relink_command=`$echo "X$compile_var$compile_command$compile_rpath" | $Xsed -e 's%@OUTPUT@%\$progdir/\$file%g'`
+ else
+ # fast_install is set to needless
+ relink_command=
+ fi
+ else
+ link_command="$compile_var$compile_command$compile_rpath"
+ relink_command="$finalize_var$finalize_command$finalize_rpath"
+ fi
+ fi
+
+ # Replace the output file specification.
+ link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'`
+
+ # Delete the old output files.
+ $run $rm $output $output_objdir/$outputname $output_objdir/lt-$outputname
+
+ $show "$link_command"
+ $run eval "$link_command" || exit $?
+
+ # Now create the wrapper script.
+ $show "creating $output"
+
+ # Quote the relink command for shipping.
+ if test -n "$relink_command"; then
+ relink_command="cd `pwd`; $relink_command"
+ relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"`
+ fi
+
+ # Quote $echo for shipping.
+ if test "X$echo" = "X$SHELL $0 --fallback-echo"; then
+ case "$0" in
+ [\\/]* | [A-Za-z]:[\\/]*) qecho="$SHELL $0 --fallback-echo";;
+ *) qecho="$SHELL `pwd`/$0 --fallback-echo";;
+ esac
+ qecho=`$echo "X$qecho" | $Xsed -e "$sed_quote_subst"`
+ else
+ qecho=`$echo "X$echo" | $Xsed -e "$sed_quote_subst"`
+ fi
+
+ # Only actually do things if our run command is non-null.
+ if test -z "$run"; then
+ # win32 will think the script is a binary if it has
+ # a .exe suffix, so we strip it off here.
+ case $output in
+ *.exe) output=`echo $output|sed 's,.exe$,,'` ;;
+ esac
+ $rm $output
+ trap "$rm $output; exit 1" 1 2 15
+
+ $echo > $output "\
+#! $SHELL
+
+# $output - temporary wrapper script for $objdir/$outputname
+# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP
+#
+# The $output program cannot be directly executed until all the libtool
+# libraries that it depends on are installed.
+#
+# This wrapper script should never be moved out of the build directory.
+# If it is, it will not operate correctly.
+
+# Sed substitution that helps us do robust quoting. It backslashifies
+# metacharacters that are still active within double-quoted strings.
+Xsed='sed -e 1s/^X//'
+sed_quote_subst='$sed_quote_subst'
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+if test \"\${CDPATH+set}\" = set; then CDPATH=:; export CDPATH; fi
+
+relink_command=\"$relink_command\"
+
+# This environment variable determines our operation mode.
+if test \"\$libtool_install_magic\" = \"$magic\"; then
+ # install mode needs the following variable:
+ link_against_libtool_libs='$link_against_libtool_libs'
+else
+ # When we are sourced in execute mode, \$file and \$echo are already set.
+ if test \"\$libtool_execute_magic\" != \"$magic\"; then
+ echo=\"$qecho\"
+ file=\"\$0\"
+ # Make sure echo works.
+ if test \"X\$1\" = X--no-reexec; then
+ # Discard the --no-reexec flag, and continue.
+ shift
+ elif test \"X\`(\$echo '\t') 2>/dev/null\`\" = 'X\t'; then
+ # Yippee, \$echo works!
+ :
+ else
+ # Restart under the correct shell, and then maybe \$echo will work.
+ exec $SHELL \"\$0\" --no-reexec \${1+\"\$@\"}
+ fi
+ fi\
+"
+ $echo >> $output "\
+
+ # Find the directory that this script lives in.
+ thisdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*$%%'\`
+ test \"x\$thisdir\" = \"x\$file\" && thisdir=.
+
+ # Follow symbolic links until we get to the real thisdir.
+ file=\`ls -ld \"\$file\" | sed -n 's/.*-> //p'\`
+ while test -n \"\$file\"; do
+ destdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*\$%%'\`
+
+ # If there was a directory component, then change thisdir.
+ if test \"x\$destdir\" != \"x\$file\"; then
+ case \"\$destdir\" in
+ [\\/]* | [A-Za-z]:[\\/]*) thisdir=\"\$destdir\" ;;
+ *) thisdir=\"\$thisdir/\$destdir\" ;;
+ esac
+ fi
+
+ file=\`\$echo \"X\$file\" | \$Xsed -e 's%^.*/%%'\`
+ file=\`ls -ld \"\$thisdir/\$file\" | sed -n 's/.*-> //p'\`
+ done
+
+ # Try to get the absolute directory name.
+ absdir=\`cd \"\$thisdir\" && pwd\`
+ test -n \"\$absdir\" && thisdir=\"\$absdir\"
+"
+
+ if test "$fast_install" = yes; then
+ echo >> $output "\
+ program=lt-'$outputname'
+ progdir=\"\$thisdir/$objdir\"
+
+ if test ! -f \"\$progdir/\$program\" || \\
+ { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | sed 1q\`; \\
+ test \"X\$file\" != \"X\$progdir/\$program\"; }; then
+
+ file=\"\$\$-\$program\"
+
+ if test ! -d \"\$progdir\"; then
+ $mkdir \"\$progdir\"
+ else
+ $rm \"\$progdir/\$file\"
+ fi"
+
+ echo >> $output "\
+
+ # relink executable if necessary
+ if test -n \"\$relink_command\"; then
+ if (eval \$relink_command); then :
+ else
+ $rm \"\$progdir/\$file\"
+ exit 1
+ fi
+ fi
+
+ $mv \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null ||
+ { $rm \"\$progdir/\$program\";
+ $mv \"\$progdir/\$file\" \"\$progdir/\$program\"; }
+ $rm \"\$progdir/\$file\"
+ fi"
+ else
+ echo >> $output "\
+ program='$outputname'
+ progdir=\"\$thisdir/$objdir\"
+"
+ fi
+
+ echo >> $output "\
+
+ if test -f \"\$progdir/\$program\"; then"
+
+ # Export our shlibpath_var if we have one.
+ if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then
+ $echo >> $output "\
+ # Add our own library path to $shlibpath_var
+ $shlibpath_var=\"$temp_rpath\$$shlibpath_var\"
+
+ # Some systems cannot cope with colon-terminated $shlibpath_var
+ # The second colon is a workaround for a bug in BeOS R4 sed
+ $shlibpath_var=\`\$echo \"X\$$shlibpath_var\" | \$Xsed -e 's/::*\$//'\`
+
+ export $shlibpath_var
+"
+ fi
+
+ # fixup the dll searchpath if we need to.
+ if test -n "$dllsearchpath"; then
+ $echo >> $output "\
+ # Add the dll search path components to the executable PATH
+ PATH=$dllsearchpath:\$PATH
+"
+ fi
+
+ $echo >> $output "\
+ if test \"\$libtool_execute_magic\" != \"$magic\"; then
+ # Run the actual program with our arguments.
+"
+ case $host in
+ *-*-cygwin* | *-*-mingw | *-*-os2*)
+ # win32 systems need to use the prog path for dll
+ # lookup to work
+ $echo >> $output "\
+ exec \$progdir\\\\\$program \${1+\"\$@\"}
+"
+ ;;
+ *)
+ $echo >> $output "\
+ # Export the path to the program.
+ PATH=\"\$progdir:\$PATH\"
+ export PATH
+
+ exec \$program \${1+\"\$@\"}
+"
+ ;;
+ esac
+ $echo >> $output "\
+ \$echo \"\$0: cannot exec \$program \${1+\"\$@\"}\"
+ exit 1
+ fi
+ else
+ # The program doesn't exist.
+ \$echo \"\$0: error: \$progdir/\$program does not exist\" 1>&2
+ \$echo \"This script is just a wrapper for \$program.\" 1>&2
+ echo \"See the $PACKAGE documentation for more information.\" 1>&2
+ exit 1
+ fi
+fi\
+"
+ chmod +x $output
+ fi
+ exit 0
+ ;;
+ esac
+
+ # See if we need to build an old-fashioned archive.
+ for oldlib in $oldlibs; do
+
+ if test "$build_libtool_libs" = convenience; then
+ oldobjs="$libobjs_save"
+ addlibs="$convenience"
+ build_libtool_libs=no
+ else
+ if test "$build_libtool_libs" = module; then
+ oldobjs="$libobjs_save"
+ build_libtool_libs=no
+ else
+ oldobjs="$objs$old_deplibs "`$echo "X$libobjs_save" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP`
+ fi
+ addlibs="$old_convenience"
+ fi
+
+ if test -n "$addlibs"; then
+ gentop="$output_objdir/${outputname}x"
+ $show "${rm}r $gentop"
+ $run ${rm}r "$gentop"
+ $show "mkdir $gentop"
+ $run mkdir "$gentop"
+ status=$?
+ if test $status -ne 0 && test ! -d "$gentop"; then
+ exit $status
+ fi
+ generated="$generated $gentop"
+
+ # Add in members from convenience archives.
+ for xlib in $addlibs; do
+ # Extract the objects.
+ case "$xlib" in
+ [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;;
+ *) xabs=`pwd`"/$xlib" ;;
+ esac
+ xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'`
+ xdir="$gentop/$xlib"
+
+ $show "${rm}r $xdir"
+ $run ${rm}r "$xdir"
+ $show "mkdir $xdir"
+ $run mkdir "$xdir"
+ status=$?
+ if test $status -ne 0 && test ! -d "$xdir"; then
+ exit $status
+ fi
+ $show "(cd $xdir && $AR x $xabs)"
+ $run eval "(cd \$xdir && $AR x \$xabs)" || exit $?
+
+ oldobjs="$oldobjs "`find $xdir -name \*.${objext} -print -o -name \*.lo -print | $NL2SP`
+ done
+ fi
+
+ # Do each command in the archive commands.
+ if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then
+ eval cmds=\"$old_archive_from_new_cmds\"
+ else
+ # Ensure that we have .o objects in place in case we decided
+ # not to build a shared library, and have fallen back to building
+ # static libs even though --disable-static was passed!
+ for oldobj in $oldobjs; do
+ if test ! -f $oldobj; then
+ xdir=`$echo "X$oldobj" | $Xsed -e 's%/[^/]*$%%'`
+ if test "X$xdir" = "X$oldobj"; then
+ xdir="."
+ else
+ xdir="$xdir"
+ fi
+ baseobj=`$echo "X$oldobj" | $Xsed -e 's%^.*/%%'`
+ obj=`$echo "X$baseobj" | $Xsed -e "$o2lo"`
+ $show "(cd $xdir && ${LN_S} $obj $baseobj)"
+ $run eval '(cd $xdir && ${LN_S} $obj $baseobj)' || exit $?
+ fi
+ done
+
+ eval cmds=\"$old_archive_cmds\"
+ fi
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || exit $?
+ done
+ IFS="$save_ifs"
+ done
+
+ if test -n "$generated"; then
+ $show "${rm}r$generated"
+ $run ${rm}r$generated
+ fi
+
+ # Now create the libtool archive.
+ case "$output" in
+ *.la)
+ old_library=
+ test "$build_old_libs" = yes && old_library="$libname.$libext"
+ $show "creating $output"
+
+ # Quote the link command for shipping.
+ relink_command="cd `pwd`; $SHELL $0 --mode=relink $libtool_args"
+ relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"`
+
+ # Only create the output if not a dry run.
+ if test -z "$run"; then
+ for installed in no yes; do
+ if test "$installed" = yes; then
+ if test -z "$install_libdir"; then
+ break
+ fi
+ output="$output_objdir/$outputname"i
+ # Replace all uninstalled libtool libraries with the installed ones
+ newdependency_libs=
+ for deplib in $dependency_libs; do
+ case "$deplib" in
+ *.la)
+ name=`$echo "X$deplib" | $Xsed -e 's%^.*/%%'`
+ eval libdir=`sed -n -e 's/^libdir=\(.*\)$/\1/p' $deplib`
+ if test -z "$libdir"; then
+ $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2
+ exit 1
+ fi
+ newdependency_libs="$newdependency_libs $libdir/$name"
+ ;;
+ *) newdependency_libs="$newdependency_libs $deplib" ;;
+ esac
+ done
+ dependency_libs="$newdependency_libs"
+ newdlfiles=
+ for lib in $dlfiles; do
+ name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'`
+ eval libdir=`sed -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
+ if test -z "$libdir"; then
+ $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+ exit 1
+ fi
+ newdlfiles="$newdlfiles $libdir/$name"
+ done
+ dlfiles="$newdlfiles"
+ newdlprefiles=
+ for lib in $dlprefiles; do
+ name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'`
+ eval libdir=`sed -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
+ if test -z "$libdir"; then
+ $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+ exit 1
+ fi
+ newdlprefiles="$newdlprefiles $libdir/$name"
+ done
+ dlprefiles="$newdlprefiles"
+ fi
+ $rm $output
+ $echo > $output "\
+# $outputname - a libtool library file
+# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP
+#
+# Please DO NOT delete this file!
+# It is necessary for linking the library.
+
+# The name that we can dlopen(3).
+dlname='$dlname'
+
+# Names of this library.
+library_names='$library_names'
+
+# The name of the static archive.
+old_library='$old_library'
+
+# Libraries that this one depends upon.
+dependency_libs='$dependency_libs'
+
+# Version information for $libname.
+current=$current
+age=$age
+revision=$revision
+
+# Is this an already installed library?
+installed=$installed
+
+# Files to dlopen/dlpreopen
+dlopen='$dlfiles'
+dlpreopen='$dlprefiles'
+
+# Directory that this library needs to be installed in:
+libdir='$install_libdir'"
+ if test "$installed" = no; then
+ $echo >> $output "\
+relink_command=\"$relink_command\""
+ fi
+ done
+ fi
+
+ # Do a symbolic link so that the libtool archive can be found in
+ # LD_LIBRARY_PATH before the program is installed.
+ $show "(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)"
+ $run eval '(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)' || exit $?
+ ;;
+ esac
+ exit 0
+ ;;
+
+ # libtool install mode
+ install)
+ modename="$modename: install"
+
+ # There may be an optional sh(1) argument at the beginning of
+ # install_prog (especially on Windows NT).
+ if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh; then
+ # Aesthetically quote it.
+ arg=`$echo "X$nonopt" | $Xsed -e "$sed_quote_subst"`
+ case "$arg" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*)
+ arg="\"$arg\""
+ ;;
+ esac
+ install_prog="$arg "
+ arg="$1"
+ shift
+ else
+ install_prog=
+ arg="$nonopt"
+ fi
+
+ # The real first argument should be the name of the installation program.
+ # Aesthetically quote it.
+ arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+ case "$arg" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*)
+ arg="\"$arg\""
+ ;;
+ esac
+ install_prog="$install_prog$arg"
+
+ # We need to accept at least all the BSD install flags.
+ dest=
+ files=
+ opts=
+ prev=
+ install_type=
+ isdir=no
+ stripme=
+ for arg
+ do
+ if test -n "$dest"; then
+ files="$files $dest"
+ dest="$arg"
+ continue
+ fi
+
+ case "$arg" in
+ -d) isdir=yes ;;
+ -f) prev="-f" ;;
+ -g) prev="-g" ;;
+ -m) prev="-m" ;;
+ -o) prev="-o" ;;
+ -s)
+ stripme=" -s"
+ continue
+ ;;
+ -*) ;;
+
+ *)
+ # If the previous option needed an argument, then skip it.
+ if test -n "$prev"; then
+ prev=
+ else
+ dest="$arg"
+ continue
+ fi
+ ;;
+ esac
+
+ # Aesthetically quote the argument.
+ arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+ case "$arg" in
+ *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*)
+ arg="\"$arg\""
+ ;;
+ esac
+ install_prog="$install_prog $arg"
+ done
+
+ if test -z "$install_prog"; then
+ $echo "$modename: you must specify an install program" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ if test -n "$prev"; then
+ $echo "$modename: the \`$prev' option requires an argument" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ if test -z "$files"; then
+ if test -z "$dest"; then
+ $echo "$modename: no file or destination specified" 1>&2
+ else
+ $echo "$modename: you must specify a destination" 1>&2
+ fi
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ # Strip any trailing slash from the destination.
+ dest=`$echo "X$dest" | $Xsed -e 's%/$%%'`
+
+ # Check to see that the destination is a directory.
+ test -d "$dest" && isdir=yes
+ if test "$isdir" = yes; then
+ destdir="$dest"
+ destname=
+ else
+ destdir=`$echo "X$dest" | $Xsed -e 's%/[^/]*$%%'`
+ test "X$destdir" = "X$dest" && destdir=.
+ destname=`$echo "X$dest" | $Xsed -e 's%^.*/%%'`
+
+ # Not a directory, so check to see that there is only one file specified.
+ set dummy $files
+ if test $# -gt 2; then
+ $echo "$modename: \`$dest' is not a directory" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+ fi
+ case "$destdir" in
+ [\\/]* | [A-Za-z]:[\\/]*) ;;
+ *)
+ for file in $files; do
+ case "$file" in
+ *.lo) ;;
+ *)
+ $echo "$modename: \`$destdir' must be an absolute directory name" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ ;;
+ esac
+ done
+ ;;
+ esac
+
+ # This variable tells wrapper scripts just to set variables rather
+ # than running their programs.
+ libtool_install_magic="$magic"
+
+ staticlibs=
+ future_libdirs=
+ current_libdirs=
+ for file in $files; do
+
+ # Do each installation.
+ case "$file" in
+ *.$libext)
+ # Do the static libraries later.
+ staticlibs="$staticlibs $file"
+ ;;
+
+ *.la)
+ # Check to see that this really is a libtool archive.
+ if (sed -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then :
+ else
+ $echo "$modename: \`$file' is not a valid libtool archive" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ library_names=
+ old_library=
+ relink_command=
+ # If there is no directory component, then add one.
+ case "$file" in
+ */* | *\\*) . $file ;;
+ *) . ./$file ;;
+ esac
+
+ # Add the libdir to current_libdirs if it is the destination.
+ if test "X$destdir" = "X$libdir"; then
+ case "$current_libdirs " in
+ *" $libdir "*) ;;
+ *) current_libdirs="$current_libdirs $libdir" ;;
+ esac
+ else
+ # Note the libdir as a future libdir.
+ case "$future_libdirs " in
+ *" $libdir "*) ;;
+ *) future_libdirs="$future_libdirs $libdir" ;;
+ esac
+ fi
+
+ dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`/
+ test "X$dir" = "X$file/" && dir=
+ dir="$dir$objdir"
+
+ if test "$hardcode_into_libs" = all; then
+ if test -z "$relink_command"; then
+ $echo "$modename: invalid libtool pseudo library \`$file'" 1>&2
+ exit 1
+ fi
+ $echo "$modename: warning: relinking \`$file'" 1>&2
+ $show "$relink_command"
+ if $run eval "$relink_command"; then :
+ else
+ $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2
+ continue
+ fi
+ fi
+
+ # See the names of the shared library.
+ set dummy $library_names
+ if test -n "$2"; then
+ realname="$2"
+ shift
+ shift
+
+ srcname="$realname"
+ test "$hardcode_into_libs" = all && srcname="$realname"T
+
+ # Install the shared library and build the symlinks.
+ $show "$install_prog $dir/$srcname $destdir/$realname"
+ $run eval "$install_prog $dir/$srcname $destdir/$realname" || exit $?
+ if test -n "$stripme" && test -n "$striplib"; then
+ $show "$striplib $destdir/$realname"
+ $run eval "$striplib $destdir/$realname" || exit $?
+ fi
+
+ if test $# -gt 0; then
+ # Delete the old symlinks, and create new ones.
+ for linkname
+ do
+ if test "$linkname" != "$realname"; then
+ $show "(cd $destdir && $rm $linkname && $LN_S $realname $linkname)"
+ $run eval "(cd $destdir && $rm $linkname && $LN_S $realname $linkname)"
+ fi
+ done
+ fi
+
+ # Do each command in the postinstall commands.
+ lib="$destdir/$realname"
+ eval cmds=\"$postinstall_cmds\"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || exit $?
+ done
+ IFS="$save_ifs"
+ fi
+
+ # Install the pseudo-library for information purposes.
+ name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+ instname="$dir/$name"i
+ $show "$install_prog $instname $destdir/$name"
+ $run eval "$install_prog $instname $destdir/$name" || exit $?
+
+ # Maybe install the static library, too.
+ test -n "$old_library" && staticlibs="$staticlibs $dir/$old_library"
+ ;;
+
+ *.lo)
+ # Install (i.e. copy) a libtool object.
+
+ # Figure out destination file name, if it wasn't already specified.
+ if test -n "$destname"; then
+ destfile="$destdir/$destname"
+ else
+ destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+ destfile="$destdir/$destfile"
+ fi
+
+ # Deduce the name of the destination old-style object file.
+ case "$destfile" in
+ *.lo)
+ staticdest=`$echo "X$destfile" | $Xsed -e "$lo2o"`
+ ;;
+ *.$objext)
+ staticdest="$destfile"
+ destfile=
+ ;;
+ *)
+ $echo "$modename: cannot copy a libtool object to \`$destfile'" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ ;;
+ esac
+
+ # Install the libtool object if requested.
+ if test -n "$destfile"; then
+ $show "$install_prog $file $destfile"
+ $run eval "$install_prog $file $destfile" || exit $?
+ fi
+
+ # Install the old object if enabled.
+ if test "$build_old_libs" = yes; then
+ # Deduce the name of the old-style object file.
+ staticobj=`$echo "X$file" | $Xsed -e "$lo2o"`
+
+ $show "$install_prog $staticobj $staticdest"
+ $run eval "$install_prog \$staticobj \$staticdest" || exit $?
+ fi
+ exit 0
+ ;;
+
+ *)
+ # Figure out destination file name, if it wasn't already specified.
+ if test -n "$destname"; then
+ destfile="$destdir/$destname"
+ else
+ destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+ destfile="$destdir/$destfile"
+ fi
+
+ # Do a test to see if this is really a libtool program.
+ if (sed -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+ link_against_libtool_libs=
+ relink_command=
+
+ # If there is no directory component, then add one.
+ case "$file" in
+ */* | *\\*) . $file ;;
+ *) . ./$file ;;
+ esac
+
+ # Check the variables that should have been set.
+ if test -z "$link_against_libtool_libs"; then
+ $echo "$modename: invalid libtool wrapper script \`$file'" 1>&2
+ exit 1
+ fi
+
+ finalize=yes
+ for lib in $link_against_libtool_libs; do
+ # Check to see that each library is installed.
+ libdir=
+ if test -f "$lib"; then
+ # If there is no directory component, then add one.
+ case "$lib" in
+ */* | *\\*) . $lib ;;
+ *) . ./$lib ;;
+ esac
+ fi
+ libfile="$libdir/"`$echo "X$lib" | $Xsed -e 's%^.*/%%g'` ### testsuite: skip nested quoting test
+ if test -n "$libdir" && test ! -f "$libfile"; then
+ $echo "$modename: warning: \`$lib' has not been installed in \`$libdir'" 1>&2
+ finalize=no
+ fi
+ done
+
+ relink_command=
+ # If there is no directory component, then add one.
+ case "$file" in
+ */* | *\\*) . $file ;;
+ *) . ./$file ;;
+ esac
+
+ outputname=
+ if test "$fast_install" = no && test -n "$relink_command"; then
+ if test "$finalize" = yes && test -z "$run"; then
+ tmpdir="/tmp"
+ test -n "$TMPDIR" && tmpdir="$TMPDIR"
+ tmpdir="$tmpdir/libtool-$$"
+ if $mkdir -p "$tmpdir" && chmod 700 "$tmpdir"; then :
+ else
+ $echo "$modename: error: cannot create temporary directory \`$tmpdir'" 1>&2
+ continue
+ fi
+ outputname="$tmpdir/$file"
+ # Replace the output file specification.
+ relink_command=`$echo "X$relink_command" | $Xsed -e 's%@OUTPUT@%'"$outputname"'%g'`
+
+ $show "$relink_command"
+ if $run eval "$relink_command"; then :
+ else
+ $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2
+ ${rm}r "$tmpdir"
+ continue
+ fi
+ file="$outputname"
+ else
+ $echo "$modename: warning: cannot relink \`$file'" 1>&2
+ fi
+ else
+ # Install the binary that we compiled earlier.
+ file=`$echo "X$file" | $Xsed -e "s%\([^/]*\)$%$objdir/\1%"`
+ fi
+ fi
+
+ $show "$install_prog$stripme $file $destfile"
+ $run eval "$install_prog\$stripme \$file \$destfile" || exit $?
+ test -n "$outputname" && ${rm}r "$tmpdir"
+ ;;
+ esac
+ done
+
+ for file in $staticlibs; do
+ name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+
+ # Set up the ranlib parameters.
+ oldlib="$destdir/$name"
+
+ $show "$install_prog $file $oldlib"
+ $run eval "$install_prog \$file \$oldlib" || exit $?
+
+ if test -n "$stripme" && test -n "$striplib"; then
+ $show "$old_striplib $oldlib"
+ $run eval "$old_striplib $oldlib" || exit $?
+ fi
+
+ # Do each command in the postinstall commands.
+ eval cmds=\"$old_postinstall_cmds\"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || exit $?
+ done
+ IFS="$save_ifs"
+ done
+
+ if test -n "$future_libdirs"; then
+ $echo "$modename: warning: remember to run \`$progname --finish$future_libdirs'" 1>&2
+ fi
+
+ if test -n "$current_libdirs"; then
+ # Maybe just do a dry run.
+ test -n "$run" && current_libdirs=" -n$current_libdirs"
+ exec $SHELL $0 --finish$current_libdirs
+ exit 1
+ fi
+
+ exit 0
+ ;;
+
+ # libtool finish mode
+ finish)
+ modename="$modename: finish"
+ libdirs="$nonopt"
+ admincmds=
+
+ if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then
+ for dir
+ do
+ libdirs="$libdirs $dir"
+ done
+
+ for libdir in $libdirs; do
+ if test -n "$finish_cmds"; then
+ # Do each command in the finish commands.
+ eval cmds=\"$finish_cmds\"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd" || admincmds="$admincmds
+ $cmd"
+ done
+ IFS="$save_ifs"
+ fi
+ if test -n "$finish_eval"; then
+ # Do the single finish_eval.
+ eval cmds=\"$finish_eval\"
+ $run eval "$cmds" || admincmds="$admincmds
+ $cmds"
+ fi
+ done
+ fi
+
+ # Exit here if they wanted silent mode.
+ test "$show" = : && exit 0
+
+ echo "----------------------------------------------------------------------"
+ echo "Libraries have been installed in:"
+ for libdir in $libdirs; do
+ echo " $libdir"
+ done
+ echo
+ echo "If you ever happen to want to link against installed libraries"
+ echo "in a given directory, LIBDIR, you must either use libtool, and"
+ echo "specify the full pathname of the library, or use \`-LLIBDIR'"
+ echo "flag during linking and do at least one of the following:"
+ if test -n "$shlibpath_var"; then
+ echo " - add LIBDIR to the \`$shlibpath_var' environment variable"
+ echo " during execution"
+ fi
+ if test -n "$runpath_var"; then
+ echo " - add LIBDIR to the \`$runpath_var' environment variable"
+ echo " during linking"
+ fi
+ if test -n "$hardcode_libdir_flag_spec"; then
+ libdir=LIBDIR
+ eval flag=\"$hardcode_libdir_flag_spec\"
+
+ echo " - use the \`$flag' linker flag"
+ fi
+ if test -n "$admincmds"; then
+ echo " - have your system administrator run these commands:$admincmds"
+ fi
+ if test -f /etc/ld.so.conf; then
+ echo " - have your system administrator add LIBDIR to \`/etc/ld.so.conf'"
+ fi
+ echo
+ echo "See any operating system documentation about shared libraries for"
+ echo "more information, such as the ld(1) and ld.so(8) manual pages."
+ echo "----------------------------------------------------------------------"
+ exit 0
+ ;;
+
+ # libtool execute mode
+ execute)
+ modename="$modename: execute"
+
+ # The first argument is the command name.
+ cmd="$nonopt"
+ if test -z "$cmd"; then
+ $echo "$modename: you must specify a COMMAND" 1>&2
+ $echo "$help"
+ exit 1
+ fi
+
+ # Handle -dlopen flags immediately.
+ for file in $execute_dlfiles; do
+ if test ! -f "$file"; then
+ $echo "$modename: \`$file' is not a file" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ dir=
+ case "$file" in
+ *.la)
+ # Check to see that this really is a libtool archive.
+ if (sed -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then :
+ else
+ $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ # Read the libtool library.
+ dlname=
+ library_names=
+
+ # If there is no directory component, then add one.
+ case "$file" in
+ */* | *\\*) . $file ;;
+ *) . ./$file ;;
+ esac
+
+ # Skip this library if it cannot be dlopened.
+ if test -z "$dlname"; then
+ # Warn if it was a shared library.
+ test -n "$library_names" && $echo "$modename: warning: \`$file' was not linked with \`-export-dynamic'"
+ continue
+ fi
+
+ dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
+ test "X$dir" = "X$file" && dir=.
+
+ if test -f "$dir/$objdir/$dlname"; then
+ dir="$dir/$objdir"
+ else
+ $echo "$modename: cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'" 1>&2
+ exit 1
+ fi
+ ;;
+
+ *.lo)
+ # Just add the directory containing the .lo file.
+ dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
+ test "X$dir" = "X$file" && dir=.
+ ;;
+
+ *)
+ $echo "$modename: warning \`-dlopen' is ignored for non-libtool libraries and objects" 1>&2
+ continue
+ ;;
+ esac
+
+ # Get the absolute pathname.
+ absdir=`cd "$dir" && pwd`
+ test -n "$absdir" && dir="$absdir"
+
+ # Now add the directory to shlibpath_var.
+ if eval "test -z \"\$$shlibpath_var\""; then
+ eval "$shlibpath_var=\"\$dir\""
+ else
+ eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\""
+ fi
+ done
+
+ # This variable tells wrapper scripts just to set shlibpath_var
+ # rather than running their programs.
+ libtool_execute_magic="$magic"
+
+ # Check if any of the arguments is a wrapper script.
+ args=
+ for file
+ do
+ case "$file" in
+ -*) ;;
+ *)
+ # Do a test to see if this is really a libtool program.
+ if (sed -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+ # If there is no directory component, then add one.
+ case "$file" in
+ */* | *\\*) . $file ;;
+ *) . ./$file ;;
+ esac
+
+ # Transform arg to wrapped name.
+ file="$progdir/$program"
+ fi
+ ;;
+ esac
+ # Quote arguments (to preserve shell metacharacters).
+ file=`$echo "X$file" | $Xsed -e "$sed_quote_subst"`
+ args="$args \"$file\""
+ done
+
+ if test -z "$run"; then
+ if test -n "$shlibpath_var"; then
+ # Export the shlibpath_var.
+ eval "export $shlibpath_var"
+ fi
+
+ # Restore saved enviroment variables
+ if test "${save_LC_ALL+set}" = set; then
+ LC_ALL="$save_LC_ALL"; export LC_ALL
+ fi
+ if test "${save_LANG+set}" = set; then
+ LANG="$save_LANG"; export LANG
+ fi
+
+ # Now actually exec the command.
+ eval "exec \$cmd$args"
+
+ $echo "$modename: cannot exec \$cmd$args"
+ exit 1
+ else
+ # Display what would be done.
+ if test -n "$shlibpath_var"; then
+ eval "\$echo \"\$shlibpath_var=\$$shlibpath_var\""
+ $echo "export $shlibpath_var"
+ fi
+ $echo "$cmd$args"
+ exit 0
+ fi
+ ;;
+
+ # libtool clean and uninstall mode
+ clean | uninstall)
+ modename="$modename: $mode"
+ rm="$nonopt"
+ files=
+
+ # This variable tells wrapper scripts just to set variables rather
+ # than running their programs.
+ libtool_install_magic="$magic"
+
+ for arg
+ do
+ case "$arg" in
+ -*) rm="$rm $arg" ;;
+ *) files="$files $arg" ;;
+ esac
+ done
+
+ if test -z "$rm"; then
+ $echo "$modename: you must specify an RM program" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ fi
+
+ for file in $files; do
+ dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
+ if test "X$dir" = "X$file"; then
+ dir=.
+ objdir="$objdir"
+ else
+ objdir="$dir/$objdir"
+ fi
+ name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+ test $mode = uninstall && objdir="$dir"
+
+ rmfiles="$file"
+
+ case "$name" in
+ *.la)
+ # Possibly a libtool archive, so verify it.
+ if (sed -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+ . $dir/$name
+
+ # Delete the libtool libraries and symlinks.
+ for n in $library_names; do
+ rmfiles="$rmfiles $objdir/$n"
+ done
+ test -n "$old_library" && rmfiles="$rmfiles $objdir/$old_library"
+ test $mode = clean && rmfiles="$rmfiles $objdir/$name $objdir/${name}i"
+
+ if test $mode = uninstall; then
+ if test -n "$library_names"; then
+ # Do each command in the postuninstall commands.
+ eval cmds=\"$postuninstall_cmds\"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd"
+ done
+ IFS="$save_ifs"
+ fi
+
+ if test -n "$old_library"; then
+ # Do each command in the old_postuninstall commands.
+ eval cmds=\"$old_postuninstall_cmds\"
+ IFS="${IFS= }"; save_ifs="$IFS"; IFS='~'
+ for cmd in $cmds; do
+ IFS="$save_ifs"
+ $show "$cmd"
+ $run eval "$cmd"
+ done
+ IFS="$save_ifs"
+ fi
+ # FIXME: should reinstall the best remaining shared library.
+ fi
+ fi
+ ;;
+
+ *.lo)
+ if test "$build_old_libs" = yes; then
+ oldobj=`$echo "X$name" | $Xsed -e "$lo2o"`
+ rmfiles="$rmfiles $dir/$oldobj"
+ fi
+ ;;
+
+ *)
+ # Do a test to see if this is a libtool program.
+ if test $mode = clean &&
+ (sed -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+ relink_command=
+ . $dir/$file
+
+ rmfiles="$rmfiles $objdir/$name $objdir/${name}S.${objext}"
+ if test "$fast_install" = yes && test -n "$relink_command"; then
+ rmfiles="$rmfiles $objdir/lt-$name"
+ fi
+ fi
+ ;;
+ esac
+ $show "$rm $rmfiles"
+ $run $rm $rmfiles
+ done
+ exit 0
+ ;;
+
+ "")
+ $echo "$modename: you must specify a MODE" 1>&2
+ $echo "$generic_help" 1>&2
+ exit 1
+ ;;
+ esac
+
+ $echo "$modename: invalid operation mode \`$mode'" 1>&2
+ $echo "$generic_help" 1>&2
+ exit 1
+fi # test -z "$show_help"
+
+# We need to display help for each of the modes.
+case "$mode" in
+"") $echo \
+"Usage: $modename [OPTION]... [MODE-ARG]...
+
+Provide generalized library-building support services.
+
+ --config show all configuration variables
+ --debug enable verbose shell tracing
+-n, --dry-run display commands without modifying any files
+ --features display basic configuration information and exit
+ --finish same as \`--mode=finish'
+ --help display this help message and exit
+ --mode=MODE use operation mode MODE [default=inferred from MODE-ARGS]
+ --quiet same as \`--silent'
+ --silent don't print informational messages
+ --version print version information
+
+MODE must be one of the following:
+
+ clean remove files from the build directory
+ compile compile a source file into a libtool object
+ execute automatically set library path, then run a program
+ finish complete the installation of libtool libraries
+ install install libraries or executables
+ link create a library or an executable
+ uninstall remove libraries from an installed directory
+
+MODE-ARGS vary depending on the MODE. Try \`$modename --help --mode=MODE' for
+a more detailed description of MODE."
+ exit 0
+ ;;
+
+clean)
+ $echo \
+"Usage: $modename [OPTION]... --mode=clean RM [RM-OPTION]... FILE...
+
+Remove files from the build directory.
+
+RM is the name of the program to use to delete files associated with each FILE
+(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed
+to RM.
+
+If FILE is a libtool library, object or program, all the files associated
+with it are deleted. Otherwise, only FILE itself is deleted using RM."
+ ;;
+
+compile)
+ $echo \
+"Usage: $modename [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE
+
+Compile a source file into a libtool library object.
+
+This mode accepts the following additional options:
+
+ -o OUTPUT-FILE set the output file name to OUTPUT-FILE
+ -static always build a \`.o' file suitable for static linking
+
+COMPILE-COMMAND is a command to be used in creating a \`standard' object file
+from the given SOURCEFILE.
+
+The output file name is determined by removing the directory component from
+SOURCEFILE, then substituting the C source code suffix \`.c' with the
+library object suffix, \`.lo'."
+ ;;
+
+execute)
+ $echo \
+"Usage: $modename [OPTION]... --mode=execute COMMAND [ARGS]...
+
+Automatically set library path, then run a program.
+
+This mode accepts the following additional options:
+
+ -dlopen FILE add the directory containing FILE to the library path
+
+This mode sets the library path environment variable according to \`-dlopen'
+flags.
+
+If any of the ARGS are libtool executable wrappers, then they are translated
+into their corresponding uninstalled binary, and any of their required library
+directories are added to the library path.
+
+Then, COMMAND is executed, with ARGS as arguments."
+ ;;
+
+finish)
+ $echo \
+"Usage: $modename [OPTION]... --mode=finish [LIBDIR]...
+
+Complete the installation of libtool libraries.
+
+Each LIBDIR is a directory that contains libtool libraries.
+
+The commands that this mode executes may require superuser privileges. Use
+the \`--dry-run' option if you just want to see what would be executed."
+ ;;
+
+install)
+ $echo \
+"Usage: $modename [OPTION]... --mode=install INSTALL-COMMAND...
+
+Install executables or libraries.
+
+INSTALL-COMMAND is the installation command. The first component should be
+either the \`install' or \`cp' program.
+
+The rest of the components are interpreted as arguments to that command (only
+BSD-compatible install options are recognized)."
+ ;;
+
+link)
+ $echo \
+"Usage: $modename [OPTION]... --mode=link LINK-COMMAND...
+
+Link object files or libraries together to form another library, or to
+create an executable program.
+
+LINK-COMMAND is a command using the C compiler that you would use to create
+a program from several object files.
+
+The following components of LINK-COMMAND are treated specially:
+
+ -all-static do not do any dynamic linking at all
+ -avoid-version do not add a version suffix if possible
+ -dlopen FILE \`-dlpreopen' FILE if it cannot be dlopened at runtime
+ -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols
+ -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3)
+ -export-symbols SYMFILE
+ try to export only the symbols listed in SYMFILE
+ -export-symbols-regex REGEX
+ try to export only the symbols matching REGEX
+ -LLIBDIR search LIBDIR for required installed libraries
+ -lNAME OUTPUT-FILE requires the installed library libNAME
+ -module build a library that can dlopened
+ -no-fast-install disable the fast-install mode
+ -no-install link a not-installable executable
+ -no-undefined declare that a library does not refer to external symbols
+ -o OUTPUT-FILE create OUTPUT-FILE from the specified objects
+ -release RELEASE specify package release information
+ -rpath LIBDIR the created library will eventually be installed in LIBDIR
+ -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries
+ -static do not do any dynamic linking of libtool libraries
+ -version-info CURRENT[:REVISION[:AGE]]
+ specify library version info [each variable defaults to 0]
+
+All other options (arguments beginning with \`-') are ignored.
+
+Every other argument is treated as a filename. Files ending in \`.la' are
+treated as uninstalled libtool libraries, other files are standard or library
+object files.
+
+If the OUTPUT-FILE ends in \`.la', then a libtool library is created,
+only library objects (\`.lo' files) may be specified, and \`-rpath' is
+required, except when creating a convenience library.
+
+If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created
+using \`ar' and \`ranlib', or on Windows using \`lib'.
+
+If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file
+is created, otherwise an executable program is created."
+ ;;
+
+uninstall)
+ $echo \
+"Usage: $modename [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE...
+
+Remove libraries from an installation directory.
+
+RM is the name of the program to use to delete files associated with each FILE
+(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed
+to RM.
+
+If FILE is a libtool library, all the files associated with it are deleted.
+Otherwise, only FILE itself is deleted using RM."
+ ;;
+
+*)
+ $echo "$modename: invalid operation mode \`$mode'" 1>&2
+ $echo "$help" 1>&2
+ exit 1
+ ;;
+esac
+
+echo
+$echo "Try \`$modename --help' for more information about other modes."
+
+exit 0
+
+# Local Variables:
+# mode:shell-script
+# sh-indentation:2
+# End:
diff --git a/rts/gmp/mdate-sh b/rts/gmp/mdate-sh
new file mode 100644
index 0000000000..37171f21fb
--- /dev/null
+++ b/rts/gmp/mdate-sh
@@ -0,0 +1,92 @@
+#!/bin/sh
+# Get modification time of a file or directory and pretty-print it.
+# Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
+# written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, June 1995
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+# Prevent date giving response in another language.
+LANG=C
+export LANG
+LC_ALL=C
+export LC_ALL
+LC_TIME=C
+export LC_TIME
+
+# Get the extended ls output of the file or directory.
+# On HPUX /bin/sh, "set" interprets "-rw-r--r--" as options, so the "x" below.
+if ls -L /dev/null 1>/dev/null 2>&1; then
+ set - x`ls -L -l -d $1`
+else
+ set - x`ls -l -d $1`
+fi
+# The month is at least the fourth argument
+# (3 shifts here, the next inside the loop).
+shift
+shift
+shift
+
+# Find the month. Next argument is day, followed by the year or time.
+month=
+until test $month
+do
+ shift
+ case $1 in
+ Jan) month=January; nummonth=1;;
+ Feb) month=February; nummonth=2;;
+ Mar) month=March; nummonth=3;;
+ Apr) month=April; nummonth=4;;
+ May) month=May; nummonth=5;;
+ Jun) month=June; nummonth=6;;
+ Jul) month=July; nummonth=7;;
+ Aug) month=August; nummonth=8;;
+ Sep) month=September; nummonth=9;;
+ Oct) month=October; nummonth=10;;
+ Nov) month=November; nummonth=11;;
+ Dec) month=December; nummonth=12;;
+ esac
+done
+
+day=$2
+
+# Here we have to deal with the problem that the ls output gives either
+# the time of day or the year.
+case $3 in
+ *:*) set `date`; eval year=\$$#
+ case $2 in
+ Jan) nummonthtod=1;;
+ Feb) nummonthtod=2;;
+ Mar) nummonthtod=3;;
+ Apr) nummonthtod=4;;
+ May) nummonthtod=5;;
+ Jun) nummonthtod=6;;
+ Jul) nummonthtod=7;;
+ Aug) nummonthtod=8;;
+ Sep) nummonthtod=9;;
+ Oct) nummonthtod=10;;
+ Nov) nummonthtod=11;;
+ Dec) nummonthtod=12;;
+ esac
+ # For the first six month of the year the time notation can also
+ # be used for files modified in the last year.
+ if (expr $nummonth \> $nummonthtod) > /dev/null;
+ then
+ year=`expr $year - 1`
+ fi;;
+ *) year=$3;;
+esac
+
+# The result.
+echo $day $month $year
diff --git a/rts/gmp/memory.c b/rts/gmp/memory.c
new file mode 100644
index 0000000000..9df440ce22
--- /dev/null
+++ b/rts/gmp/memory.c
@@ -0,0 +1,160 @@
+/* Memory allocation routines.
+
+Copyright (C) 1991, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h>
+#include <stdlib.h> /* for malloc, realloc, free */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifdef __NeXT__
+#define static
+#endif
+
+
+void * (*_mp_allocate_func) _PROTO ((size_t)) = _mp_default_allocate;
+void * (*_mp_reallocate_func) _PROTO ((void *, size_t, size_t))
+ = _mp_default_reallocate;
+void (*_mp_free_func) _PROTO ((void *, size_t)) = _mp_default_free;
+
+
+/* Default allocation functions. In case of failure to allocate/reallocate
+ an error message is written to stderr and the program aborts. */
+
+void *
+#if __STDC__
+_mp_default_allocate (size_t size)
+#else
+_mp_default_allocate (size)
+ size_t size;
+#endif
+{
+ void *ret;
+#ifdef DEBUG
+ size_t req_size = size;
+ size += 2 * BYTES_PER_MP_LIMB;
+#endif
+ ret = malloc (size);
+ if (ret == 0)
+ {
+ perror ("cannot allocate in gmp");
+ abort ();
+ }
+
+#ifdef DEBUG
+ {
+ mp_ptr p = ret;
+ p++;
+ p[-1] = (0xdeadbeef << 31) + 0xdeafdeed;
+ if (req_size % BYTES_PER_MP_LIMB == 0)
+ p[req_size / BYTES_PER_MP_LIMB] = ~((0xdeadbeef << 31) + 0xdeafdeed);
+ ret = p;
+ }
+#endif
+ return ret;
+}
+
+void *
+#if __STDC__
+_mp_default_reallocate (void *oldptr, size_t old_size, size_t new_size)
+#else
+_mp_default_reallocate (oldptr, old_size, new_size)
+ void *oldptr;
+ size_t old_size;
+ size_t new_size;
+#endif
+{
+ void *ret;
+
+#ifdef DEBUG
+ size_t req_size = new_size;
+
+ if (old_size != 0)
+ {
+ mp_ptr p = oldptr;
+ if (p[-1] != (0xdeadbeef << 31) + 0xdeafdeed)
+ {
+ fprintf (stderr, "gmp: (realloc) data clobbered before allocation block\n");
+ abort ();
+ }
+ if (old_size % BYTES_PER_MP_LIMB == 0)
+ if (p[old_size / BYTES_PER_MP_LIMB] != ~((0xdeadbeef << 31) + 0xdeafdeed))
+ {
+ fprintf (stderr, "gmp: (realloc) data clobbered after allocation block\n");
+ abort ();
+ }
+ oldptr = p - 1;
+ }
+
+ new_size += 2 * BYTES_PER_MP_LIMB;
+#endif
+
+ ret = realloc (oldptr, new_size);
+ if (ret == 0)
+ {
+ perror ("cannot allocate in gmp");
+ abort ();
+ }
+
+#ifdef DEBUG
+ {
+ mp_ptr p = ret;
+ p++;
+ p[-1] = (0xdeadbeef << 31) + 0xdeafdeed;
+ if (req_size % BYTES_PER_MP_LIMB == 0)
+ p[req_size / BYTES_PER_MP_LIMB] = ~((0xdeadbeef << 31) + 0xdeafdeed);
+ ret = p;
+ }
+#endif
+ return ret;
+}
+
+void
+#if __STDC__
+_mp_default_free (void *blk_ptr, size_t blk_size)
+#else
+_mp_default_free (blk_ptr, blk_size)
+ void *blk_ptr;
+ size_t blk_size;
+#endif
+{
+#ifdef DEBUG
+ {
+ mp_ptr p = blk_ptr;
+ if (blk_size != 0)
+ {
+ if (p[-1] != (0xdeadbeef << 31) + 0xdeafdeed)
+ {
+ fprintf (stderr, "gmp: (free) data clobbered before allocation block\n");
+ abort ();
+ }
+ if (blk_size % BYTES_PER_MP_LIMB == 0)
+ if (p[blk_size / BYTES_PER_MP_LIMB] != ~((0xdeadbeef << 31) + 0xdeafdeed))
+ {
+ fprintf (stderr, "gmp: (free) data clobbered after allocation block\n");
+ abort ();
+ }
+ }
+ blk_ptr = p - 1;
+ }
+#endif
+ free (blk_ptr);
+}
diff --git a/rts/gmp/missing b/rts/gmp/missing
new file mode 100644
index 0000000000..c60e9d772f
--- /dev/null
+++ b/rts/gmp/missing
@@ -0,0 +1,244 @@
+#! /bin/sh
+# Common stub for a few missing GNU programs while installing.
+# Copyright (C) 1996, 1997, 1999 Free Software Foundation, Inc.
+# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+if test $# -eq 0; then
+ echo 1>&2 "Try \`$0 --help' for more information"
+ exit 1
+fi
+
+run=:
+
+case "$1" in
+--run)
+ # Try to run requested program, and just exit if it succeeds.
+ run=
+ shift
+ "$@" && exit 0
+ ;;
+esac
+
+# If it does not exist, or fails to run (possibly an outdated version),
+# try to emulate it.
+case "$1" in
+
+ -h|--h|--he|--hel|--help)
+ echo "\
+$0 [OPTION]... PROGRAM [ARGUMENT]...
+
+Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
+error status if there is no known handling for PROGRAM.
+
+Options:
+ -h, --help display this help and exit
+ -v, --version output version information and exit
+ --run try to run the given command, and emulate it if it fails
+
+Supported PROGRAM values:
+ aclocal touch file \`aclocal.m4'
+ autoconf touch file \`configure'
+ autoheader touch file \`config.h.in'
+ automake touch all \`Makefile.in' files
+ bison create \`y.tab.[ch]', if possible, from existing .[ch]
+ flex create \`lex.yy.c', if possible, from existing .c
+ lex create \`lex.yy.c', if possible, from existing .c
+ makeinfo touch the output file
+ tar try tar, gnutar, gtar, then tar without non-portable flags
+ yacc create \`y.tab.[ch]', if possible, from existing .[ch]"
+ ;;
+
+ -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
+ echo "missing 0.2 - GNU automake"
+ ;;
+
+ -*)
+ echo 1>&2 "$0: Unknown \`$1' option"
+ echo 1>&2 "Try \`$0 --help' for more information"
+ exit 1
+ ;;
+
+ aclocal)
+ echo 1>&2 "\
+WARNING: \`$1' is missing on your system. You should only need it if
+ you modified \`acinclude.m4' or \`configure.in'. You might want
+ to install the \`Automake' and \`Perl' packages. Grab them from
+ any GNU archive site."
+ touch aclocal.m4
+ ;;
+
+ autoconf)
+ echo 1>&2 "\
+WARNING: \`$1' is missing on your system. You should only need it if
+ you modified \`configure.in'. You might want to install the
+ \`Autoconf' and \`GNU m4' packages. Grab them from any GNU
+ archive site."
+ touch configure
+ ;;
+
+ autoheader)
+ echo 1>&2 "\
+WARNING: \`$1' is missing on your system. You should only need it if
+ you modified \`acconfig.h' or \`configure.in'. You might want
+ to install the \`Autoconf' and \`GNU m4' packages. Grab them
+ from any GNU archive site."
+ files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' configure.in`
+ test -z "$files" && files="config.h"
+ touch_files=
+ for f in $files; do
+ case "$f" in
+ *:*) touch_files="$touch_files "`echo "$f" |
+ sed -e 's/^[^:]*://' -e 's/:.*//'`;;
+ *) touch_files="$touch_files $f.in";;
+ esac
+ done
+ touch $touch_files
+ ;;
+
+ automake)
+ echo 1>&2 "\
+WARNING: \`$1' is missing on your system. You should only need it if
+ you modified \`Makefile.am', \`acinclude.m4' or \`configure.in'.
+ You might want to install the \`Automake' and \`Perl' packages.
+ Grab them from any GNU archive site."
+ find . -type f -name Makefile.am -print |
+ sed 's/\.am$/.in/' |
+ while read f; do touch "$f"; done
+ ;;
+
+ bison|yacc)
+ echo 1>&2 "\
+WARNING: \`$1' is missing on your system. You should only need it if
+ you modified a \`.y' file. You may need the \`Bison' package
+ in order for those modifications to take effect. You can get
+ \`Bison' from any GNU archive site."
+ rm -f y.tab.c y.tab.h
+ if [ $# -ne 1 ]; then
+ eval LASTARG="\${$#}"
+ case "$LASTARG" in
+ *.y)
+ SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
+ if [ -f "$SRCFILE" ]; then
+ cp "$SRCFILE" y.tab.c
+ fi
+ SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
+ if [ -f "$SRCFILE" ]; then
+ cp "$SRCFILE" y.tab.h
+ fi
+ ;;
+ esac
+ fi
+ if [ ! -f y.tab.h ]; then
+ echo >y.tab.h
+ fi
+ if [ ! -f y.tab.c ]; then
+ echo 'main() { return 0; }' >y.tab.c
+ fi
+ ;;
+
+ lex|flex)
+ echo 1>&2 "\
+WARNING: \`$1' is missing on your system. You should only need it if
+ you modified a \`.l' file. You may need the \`Flex' package
+ in order for those modifications to take effect. You can get
+ \`Flex' from any GNU archive site."
+ rm -f lex.yy.c
+ if [ $# -ne 1 ]; then
+ eval LASTARG="\${$#}"
+ case "$LASTARG" in
+ *.l)
+ SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
+ if [ -f "$SRCFILE" ]; then
+ cp "$SRCFILE" lex.yy.c
+ fi
+ ;;
+ esac
+ fi
+ if [ ! -f lex.yy.c ]; then
+ echo 'main() { return 0; }' >lex.yy.c
+ fi
+ ;;
+
+ makeinfo)
+ echo 1>&2 "\
+WARNING: \`$1' is missing on your system. You should only need it if
+ you modified a \`.texi' or \`.texinfo' file, or any other file
+ indirectly affecting the aspect of the manual. The spurious
+ call might also be the consequence of using a buggy \`make' (AIX,
+ DU, IRIX). You might want to install the \`Texinfo' package or
+ the \`GNU make' package. Grab either from any GNU archive site."
+ file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'`
+ if test -z "$file"; then
+ file=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
+ file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $file`
+ fi
+ touch $file
+ ;;
+
+ tar)
+ shift
+ if test -n "$run"; then
+ echo 1>&2 "ERROR: \`tar' requires --run"
+ exit 1
+ fi
+
+ # We have already tried tar in the generic part.
+ # Look for gnutar/gtar before invocation to avoid ugly error
+ # messages.
+ if (gnutar --version > /dev/null 2>&1); then
+ gnutar ${1+"$@"} && exit 0
+ fi
+ if (gtar --version > /dev/null 2>&1); then
+ gtar ${1+"$@"} && exit 0
+ fi
+ firstarg="$1"
+ if shift; then
+ case "$firstarg" in
+ *o*)
+ firstarg=`echo "$firstarg" | sed s/o//`
+ tar "$firstarg" ${1+"$@"} && exit 0
+ ;;
+ esac
+ case "$firstarg" in
+ *h*)
+ firstarg=`echo "$firstarg" | sed s/h//`
+ tar "$firstarg" ${1+"$@"} && exit 0
+ ;;
+ esac
+ fi
+
+ echo 1>&2 "\
+WARNING: I can't seem to be able to run \`tar' with the given arguments.
+ You may want to install GNU tar or Free paxutils, or check the
+ command line arguments."
+ exit 1
+ ;;
+
+ *)
+ echo 1>&2 "\
+WARNING: \`$1' is needed, and you do not seem to have it handy on your
+ system. You might have modified some files without having the
+ proper tools for further handling them. Check the \`README' file,
+ it often tells you about the needed prerequirements for installing
+ this package. You may also peek at any GNU archive site, in case
+ some other package would contain this missing \`$1' program."
+ exit 1
+ ;;
+esac
+
+exit 0
diff --git a/rts/gmp/mkinstalldirs b/rts/gmp/mkinstalldirs
new file mode 100644
index 0000000000..5e17cd39fb
--- /dev/null
+++ b/rts/gmp/mkinstalldirs
@@ -0,0 +1,38 @@
+#! /bin/sh
+# mkinstalldirs --- make directory hierarchy
+# Author: Noah Friedman <friedman@prep.ai.mit.edu>
+# Created: 1993-05-16
+# Public domain
+
+errstatus=0
+
+for file
+do
+ set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'`
+ shift
+
+ pathcomp=
+ for d
+ do
+ pathcomp="$pathcomp$d"
+ case "$pathcomp" in
+ -* ) pathcomp=./$pathcomp ;;
+ esac
+
+ if test ! -d "$pathcomp"; then
+ echo "mkdir $pathcomp"
+
+ mkdir "$pathcomp" || lasterr=$?
+
+ if test ! -d "$pathcomp"; then
+ errstatus=$lasterr
+ fi
+ fi
+
+ pathcomp="$pathcomp/"
+ done
+done
+
+exit $errstatus
+
+# mkinstalldirs ends here
diff --git a/rts/gmp/mp.h b/rts/gmp/mp.h
new file mode 100644
index 0000000000..ffab4cba82
--- /dev/null
+++ b/rts/gmp/mp.h
@@ -0,0 +1,124 @@
+/* mp.h -- Definitions for Berkeley compatible multiple precision functions.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#ifndef __MP_H__
+
+#ifndef __GNU_MP__ /* to allow inclusion of both gmp.h and mp.h */
+#define __GNU_MP__ 3
+#define __need_size_t
+#include <stddef.h>
+#undef __need_size_t
+
+#if defined (__STDC__) || defined (__cplusplus)
+#define __gmp_const const
+#else
+#define __gmp_const
+#endif
+
+#if defined (__GNUC__)
+#define __gmp_inline __inline__
+#else
+#define __gmp_inline
+#endif
+
+#ifndef _EXTERN_INLINE
+#ifdef __GNUC__
+#define _EXTERN_INLINE extern __inline__
+#else
+#define _EXTERN_INLINE static
+#endif
+#endif
+
+#ifdef _SHORT_LIMB
+typedef unsigned int mp_limb_t;
+typedef int mp_limb_signed_t;
+#else
+#ifdef _LONG_LONG_LIMB
+typedef unsigned long long int mp_limb_t;
+typedef long long int mp_limb_signed_t;
+#else
+typedef unsigned long int mp_limb_t;
+typedef long int mp_limb_signed_t;
+#endif
+#endif
+
+typedef mp_limb_t * mp_ptr;
+typedef __gmp_const mp_limb_t * mp_srcptr;
+typedef int mp_size_t;
+typedef long int mp_exp_t;
+
+typedef struct
+{
+ int _mp_alloc; /* Number of *limbs* allocated and pointed
+ to by the D field. */
+ int _mp_size; /* abs(SIZE) is the number of limbs
+ the last field points to. If SIZE
+ is negative this is a negative
+ number. */
+ mp_limb_t *_mp_d; /* Pointer to the limbs. */
+} __mpz_struct;
+#endif /* __GNU_MP__ */
+
+/* User-visible types. */
+typedef __mpz_struct MINT;
+
+
+#ifndef _PROTO
+#if (__STDC__-0) || defined (__cplusplus)
+#define _PROTO(x) x
+#else
+#define _PROTO(x) ()
+#endif
+#endif
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#define mp_set_memory_functions __gmp_set_memory_functions
+void mp_set_memory_functions _PROTO ((void *(*) (size_t),
+ void *(*) (void *, size_t, size_t),
+ void (*) (void *, size_t)));
+MINT *itom _PROTO ((signed short int));
+MINT *xtom _PROTO ((const char *));
+void move _PROTO ((const MINT *, MINT *));
+void madd _PROTO ((const MINT *, const MINT *, MINT *));
+void msub _PROTO ((const MINT *, const MINT *, MINT *));
+void mult _PROTO ((const MINT *, const MINT *, MINT *));
+void mdiv _PROTO ((const MINT *, const MINT *, MINT *, MINT *));
+void sdiv _PROTO ((const MINT *, signed short int, MINT *, signed short int *));
+void msqrt _PROTO ((const MINT *, MINT *, MINT *));
+void pow _PROTO ((const MINT *, const MINT *, const MINT *, MINT *));
+void rpow _PROTO ((const MINT *, signed short int, MINT *));
+void gcd _PROTO ((const MINT *, const MINT *, MINT *));
+int mcmp _PROTO ((const MINT *, const MINT *));
+void min _PROTO ((MINT *));
+void mout _PROTO ((const MINT *));
+char *mtox _PROTO ((const MINT *));
+void mfree _PROTO ((MINT *));
+
+#if defined (__cplusplus)
+}
+#endif
+
+#define __MP_H__
+#endif /* __MP_H__ */
diff --git a/rts/gmp/mp_bpl.c b/rts/gmp/mp_bpl.c
new file mode 100644
index 0000000000..df8b03e5ab
--- /dev/null
+++ b/rts/gmp/mp_bpl.c
@@ -0,0 +1,27 @@
+/*
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+const int mp_bits_per_limb = BITS_PER_MP_LIMB;
+const int __gmp_0 = 0;
+int __gmp_junk;
diff --git a/rts/gmp/mp_clz_tab.c b/rts/gmp/mp_clz_tab.c
new file mode 100644
index 0000000000..1bbd1d6a66
--- /dev/null
+++ b/rts/gmp/mp_clz_tab.c
@@ -0,0 +1,36 @@
+/* __clz_tab -- support for longlong.h
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+const
+unsigned char __clz_tab[] =
+{
+ 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+};
diff --git a/rts/gmp/mp_minv_tab.c b/rts/gmp/mp_minv_tab.c
new file mode 100644
index 0000000000..4afff85cfc
--- /dev/null
+++ b/rts/gmp/mp_minv_tab.c
@@ -0,0 +1,50 @@
+/* A table of data supporting modlimb_invert().
+
+ THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND MAY CHANGE
+ INCOMPATIBLY OR DISAPPEAR IN A FUTURE GNU MP RELEASE. */
+
+/*
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* modlimb_invert_table[i] is the multiplicative inverse of 2*i+1 mod 256,
+ ie. (modlimb_invert_table[i] * (2*i+1)) % 256 == 1 */
+
+const unsigned char modlimb_invert_table[128] = {
+ 0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
+ 0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
+ 0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
+ 0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
+ 0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
+ 0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
+ 0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
+ 0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
+ 0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
+ 0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
+ 0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
+ 0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
+ 0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
+ 0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
+ 0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
+ 0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
+};
diff --git a/rts/gmp/mp_set_fns.c b/rts/gmp/mp_set_fns.c
new file mode 100644
index 0000000000..55d4d9d6e4
--- /dev/null
+++ b/rts/gmp/mp_set_fns.c
@@ -0,0 +1,48 @@
+/* mp_set_memory_functions -- Set the allocate, reallocate, and free functions
+ for use by the mp package.
+
+Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mp_set_memory_functions (void *(*alloc_func) (size_t),
+ void *(*realloc_func) (void *, size_t, size_t),
+ void (*free_func) (void *, size_t))
+#else
+mp_set_memory_functions (alloc_func, realloc_func, free_func)
+ void *(*alloc_func) ();
+ void *(*realloc_func) ();
+ void (*free_func) ();
+#endif
+{
+ if (alloc_func == 0)
+ alloc_func = _mp_default_allocate;
+ if (realloc_func == 0)
+ realloc_func = _mp_default_reallocate;
+ if (free_func == 0)
+ free_func = _mp_default_free;
+
+ _mp_allocate_func = alloc_func;
+ _mp_reallocate_func = realloc_func;
+ _mp_free_func = free_func;
+}
diff --git a/rts/gmp/mpn/Makefile.am b/rts/gmp/mpn/Makefile.am
new file mode 100644
index 0000000000..1c49ccda25
--- /dev/null
+++ b/rts/gmp/mpn/Makefile.am
@@ -0,0 +1,94 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright (C) 1996, 1998, 1999, 2000 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+AUTOMAKE_OPTIONS = gnu no-dependencies
+SUBDIRS = tests
+
+CPP = @CPP@
+
+# -DOPERATION_$* tells multi-function files which function to produce.
+INCLUDES = -I$(top_srcdir) -DOPERATION_$*
+
+GENERIC_SOURCES = mp_bases.c
+OFILES = @mpn_objects@
+
+noinst_LTLIBRARIES = libmpn.la
+libmpn_la_SOURCES = $(GENERIC_SOURCES)
+libmpn_la_LIBADD = $(OFILES)
+libmpn_la_DEPENDENCIES = $(OFILES)
+
+TARG_DIST = a29k alpha arm clipper cray generic hppa i960 lisp m68k m88k \
+ mips2 mips3 ns32k pa64 pa64w power powerpc32 powerpc64 pyr sh sparc32 \
+ sparc64 thumb vax x86 z8000 z8000x
+
+EXTRA_DIST = underscore.h asm-defs.m4 $(TARG_DIST)
+
+# COMPILE minus CC. FIXME: Really pass *_CFLAGS to CPP?
+COMPILE_FLAGS = \
+ $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+
+SUFFIXES = .s .S .asm
+
+# *.s are not preprocessed at all.
+.s.o:
+ $(CCAS) $(COMPILE_FLAGS) $<
+.s.obj:
+ $(CCAS) $(COMPILE_FLAGS) `cygpath -w $<`
+.s.lo:
+ $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) $<
+
+# *.S are preprocessed with CPP.
+.S.o:
+ $(CPP) $(COMPILE_FLAGS) $< | grep -v '^#' >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ rm -f tmp-$*.s
+.S.obj:
+ $(CPP) $(COMPILE_FLAGS) `cygpath -w $<` | grep -v '^#' >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ rm -f tmp-$*.s
+
+# We have to rebuild the static object file without passing -DPIC to
+# preprocessor. The overhead cost is one extra assemblation. FIXME:
+# Teach libtool how to assemble with a preprocessor pass (CPP or m4).
+
+.S.lo:
+ $(CPP) $(COMPILE_FLAGS) -DPIC $< | grep -v '^#' >tmp-$*.s
+ $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ $(CPP) $(COMPILE_FLAGS) $< | grep -v '^#' >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $*.o
+ rm -f tmp-$*.s
+
+# *.m4 are preprocessed with m4.
+.asm.o:
+ $(M4) -DOPERATION_$* $< >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ rm -f tmp-$*.s
+.asm.obj:
+ $(M4) -DOPERATION_$* `cygpath -w $<` >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ rm -f tmp-$*.s
+.asm.lo:
+ $(M4) -DPIC -DOPERATION_$* $< >tmp-$*.s
+ $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ $(M4) -DOPERATION_$* $< >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $*.o
+ rm -f tmp-$*.s
diff --git a/rts/gmp/mpn/Makefile.in b/rts/gmp/mpn/Makefile.in
new file mode 100644
index 0000000000..59ee958c92
--- /dev/null
+++ b/rts/gmp/mpn/Makefile.in
@@ -0,0 +1,472 @@
+# Makefile.in generated automatically by automake 1.4a from Makefile.am
+
+# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+
+DESTDIR =
+
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+
+top_builddir = ..
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_FLAG =
+transform = @program_transform_name@
+
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+
+@SET_MAKE@
+build_alias = @build_alias@
+build_triplet = @build@
+host_alias = @host_alias@
+host_triplet = @host@
+target_alias = @target_alias@
+target_triplet = @target@
+AMDEP = @AMDEP@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CPP = @CPP@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+EXEEXT = @EXEEXT@
+LIBTOOL = @LIBTOOL@
+LN_S = @LN_S@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+RANLIB = @RANLIB@
+SPEED_CYCLECOUNTER_OBJS = @SPEED_CYCLECOUNTER_OBJS@
+STRIP = @STRIP@
+U = @U@
+VERSION = @VERSION@
+gmp_srclinks = @gmp_srclinks@
+install_sh = @install_sh@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+
+# Copyright (C) 1996, 1998, 1999, 2000 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+AUTOMAKE_OPTIONS = gnu no-dependencies
+SUBDIRS =
+
+CPP = @CPP@
+
+# -DOPERATION_$* tells multi-function files which function to produce.
+INCLUDES = -I$(top_srcdir) -DOPERATION_$*
+
+GENERIC_SOURCES = mp_bases.c
+OFILES = @mpn_objects@
+
+noinst_LTLIBRARIES = libmpn.la
+libmpn_la_SOURCES = $(GENERIC_SOURCES)
+libmpn_la_LIBADD = $(OFILES)
+libmpn_la_DEPENDENCIES = $(OFILES)
+
+TARG_DIST = a29k alpha arm clipper cray generic hppa i960 lisp m68k m88k \
+ mips2 mips3 ns32k pa64 pa64w power powerpc32 powerpc64 pyr sh sparc32 \
+ sparc64 thumb vax x86 z8000 z8000x
+
+
+EXTRA_DIST = underscore.h asm-defs.m4 $(TARG_DIST)
+
+# COMPILE minus CC. FIXME: Really pass *_CFLAGS to CPP?
+COMPILE_FLAGS = \
+ $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+
+
+SUFFIXES = .s .S .asm
+subdir = mpn
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = ../config.h
+CONFIG_CLEAN_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+
+
+DEFS = @DEFS@ -I. -I$(srcdir) -I..
+CPPFLAGS = @CPPFLAGS@
+LDFLAGS = @LDFLAGS@
+LIBS = @LIBS@
+libmpn_la_LDFLAGS =
+am_libmpn_la_OBJECTS = mp_bases.lo
+libmpn_la_OBJECTS = $(am_libmpn_la_OBJECTS)
+COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CFLAGS = @CFLAGS@
+CCLD = $(CC)
+LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+DIST_SOURCES = $(libmpn_la_SOURCES)
+DIST_COMMON = README Makefile.am Makefile.in
+
+
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+GZIP_ENV = --best
+depcomp =
+SOURCES = $(libmpn_la_SOURCES)
+OBJECTS = $(am_libmpn_la_OBJECTS)
+
+all: all-redirect
+.SUFFIXES:
+.SUFFIXES: .S .asm .c .lo .o .obj .s
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4)
+ cd $(top_srcdir) && $(AUTOMAKE) --gnu mpn/Makefile
+
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) \
+ && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+
+mostlyclean-noinstLTLIBRARIES:
+
+clean-noinstLTLIBRARIES:
+ -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+
+distclean-noinstLTLIBRARIES:
+
+maintainer-clean-noinstLTLIBRARIES:
+
+mostlyclean-compile:
+ -rm -f *.o core *.core
+ -rm -f *.$(OBJEXT)
+
+clean-compile:
+
+distclean-compile:
+ -rm -f *.tab.c
+
+maintainer-clean-compile:
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+distclean-libtool:
+
+maintainer-clean-libtool:
+
+libmpn.la: $(libmpn_la_OBJECTS) $(libmpn_la_DEPENDENCIES)
+ $(LINK) $(libmpn_la_LDFLAGS) $(libmpn_la_OBJECTS) $(libmpn_la_LIBADD) $(LIBS)
+.c.o:
+ $(COMPILE) -c $<
+.c.obj:
+ $(COMPILE) -c `cygpath -w $<`
+.c.lo:
+ $(LTCOMPILE) -c -o $@ $<
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+# (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+
+all-recursive install-data-recursive install-exec-recursive \
+installdirs-recursive install-recursive uninstall-recursive \
+check-recursive installcheck-recursive info-recursive dvi-recursive:
+ @set fnord $(MAKEFLAGS); amf=$$2; \
+ dot_seen=no; \
+ target=`echo $@ | sed s/-recursive//`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ dot_seen=yes; \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
+ done; \
+ if test "$$dot_seen" = "no"; then \
+ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+ fi; test -z "$$fail"
+
+mostlyclean-recursive clean-recursive distclean-recursive \
+maintainer-clean-recursive:
+ @set fnord $(MAKEFLAGS); amf=$$2; \
+ dot_seen=no; \
+ rev=''; list='$(SUBDIRS)'; for subdir in $$list; do \
+ rev="$$subdir $$rev"; \
+ if test "$$subdir" = "."; then dot_seen=yes; else :; fi; \
+ done; \
+ test "$$dot_seen" = "no" && rev=". $$rev"; \
+ target=`echo $@ | sed s/-recursive//`; \
+ for subdir in $$rev; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
+ done && test -z "$$fail"
+tags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+ done
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ mkid -f$$here/ID $$unique $(LISP)
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test -f $$subdir/TAGS && tags="$$tags -i $$here/$$subdir/TAGS"; \
+ fi; \
+ done; \
+ list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \
+ || etags $(ETAGS_ARGS) $$tags $$unique $(LISP)
+
+mostlyclean-tags:
+
+clean-tags:
+
+distclean-tags:
+ -rm -f TAGS ID
+
+maintainer-clean-tags:
+
+distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir)
+
+distdir: $(DISTFILES)
+ @for file in $(DISTFILES); do \
+ d=$(srcdir); \
+ if test -d $$d/$$file; then \
+ cp -pR $$d/$$file $(distdir); \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file || :; \
+ fi; \
+ done
+ for subdir in $(SUBDIRS); do \
+ if test "$$subdir" = .; then :; else \
+ test -d $(distdir)/$$subdir \
+ || mkdir $(distdir)/$$subdir \
+ || exit 1; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir=../$(top_distdir) distdir=../$(distdir)/$$subdir distdir) \
+ || exit 1; \
+ fi; \
+ done
+info-am:
+info: info-recursive
+dvi-am:
+dvi: dvi-recursive
+check-am: all-am
+check: check-recursive
+installcheck-am:
+installcheck: installcheck-recursive
+install-exec-am:
+install-exec: install-exec-recursive
+
+install-data-am:
+install-data: install-data-recursive
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+install: install-recursive
+uninstall-am:
+uninstall: uninstall-recursive
+all-am: Makefile $(LTLIBRARIES)
+all-redirect: all-recursive
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_STRIP_FLAG=-s install
+installdirs: installdirs-recursive
+installdirs-am:
+
+
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -rm -f Makefile $(CONFIG_CLEAN_FILES)
+ -rm -f config.cache config.log stamp-h stamp-h[0-9]*
+
+maintainer-clean-generic:
+ -rm -f Makefile.in
+mostlyclean-am: mostlyclean-noinstLTLIBRARIES mostlyclean-compile \
+ mostlyclean-libtool mostlyclean-tags \
+ mostlyclean-generic
+
+mostlyclean: mostlyclean-recursive
+
+clean-am: clean-noinstLTLIBRARIES clean-compile clean-libtool \
+ clean-tags clean-generic mostlyclean-am
+
+clean: clean-recursive
+
+distclean-am: distclean-noinstLTLIBRARIES distclean-compile \
+ distclean-libtool distclean-tags distclean-generic \
+ clean-am
+ -rm -f libtool
+
+distclean: distclean-recursive
+
+maintainer-clean-am: maintainer-clean-noinstLTLIBRARIES \
+ maintainer-clean-compile maintainer-clean-libtool \
+ maintainer-clean-tags maintainer-clean-generic \
+ distclean-am
+ @echo "This command is intended for maintainers to use;"
+ @echo "it deletes files that may require special tools to rebuild."
+
+maintainer-clean: maintainer-clean-recursive
+
+.PHONY: mostlyclean-noinstLTLIBRARIES distclean-noinstLTLIBRARIES \
+clean-noinstLTLIBRARIES maintainer-clean-noinstLTLIBRARIES \
+mostlyclean-compile distclean-compile clean-compile \
+maintainer-clean-compile mostlyclean-libtool distclean-libtool \
+clean-libtool maintainer-clean-libtool install-recursive \
+uninstall-recursive install-data-recursive uninstall-data-recursive \
+install-exec-recursive uninstall-exec-recursive installdirs-recursive \
+uninstalldirs-recursive all-recursive check-recursive \
+installcheck-recursive info-recursive dvi-recursive \
+mostlyclean-recursive distclean-recursive clean-recursive \
+maintainer-clean-recursive tags tags-recursive mostlyclean-tags \
+distclean-tags clean-tags maintainer-clean-tags distdir info-am info \
+dvi-am dvi check check-am installcheck-am installcheck install-exec-am \
+install-exec install-data-am install-data install-am install \
+uninstall-am uninstall all-redirect all-am all install-strip \
+installdirs-am installdirs mostlyclean-generic distclean-generic \
+clean-generic maintainer-clean-generic clean mostlyclean distclean \
+maintainer-clean
+
+
+# *.s are not preprocessed at all.
+.s.o:
+ $(CCAS) $(COMPILE_FLAGS) $<
+.s.obj:
+ $(CCAS) $(COMPILE_FLAGS) `cygpath -w $<`
+.s.lo:
+ $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) $<
+
+# *.S are preprocessed with CPP.
+.S.o:
+ $(CPP) $(COMPILE_FLAGS) $< | grep -v '^#' >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ rm -f tmp-$*.s
+.S.obj:
+ $(CPP) $(COMPILE_FLAGS) `cygpath -w $<` | grep -v '^#' >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ rm -f tmp-$*.s
+
+# We have to rebuild the static object file without passing -DPIC to
+# preprocessor. The overhead cost is one extra assemblation. FIXME:
+# Teach libtool how to assemble with a preprocessor pass (CPP or m4).
+
+.S.lo:
+ $(CPP) $(COMPILE_FLAGS) -DPIC $< | grep -v '^#' >tmp-$*.s
+ $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ $(CPP) $(COMPILE_FLAGS) $< | grep -v '^#' >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $*.o
+ rm -f tmp-$*.s
+
+# *.m4 are preprocessed with m4.
+.asm.o:
+ $(M4) -DOPERATION_$* $< >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ rm -f tmp-$*.s
+.asm.obj:
+ $(M4) -DOPERATION_$* `cygpath -w $<` >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ rm -f tmp-$*.s
+.asm.lo:
+ $(M4) -DPIC -DOPERATION_$* $< >tmp-$*.s
+ $(LIBTOOL) --mode=compile $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
+ $(M4) -DOPERATION_$* $< >tmp-$*.s
+ $(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $*.o
+ rm -f tmp-$*.s
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/rts/gmp/mpn/README b/rts/gmp/mpn/README
new file mode 100644
index 0000000000..7453c9d03e
--- /dev/null
+++ b/rts/gmp/mpn/README
@@ -0,0 +1,13 @@
+This directory contains all code for the mpn layer of GMP.
+
+Most subdirectories contain machine-dependent code, written in assembly or C.
+The `generic' subdirectory contains default code, used when there is no
+machine-dependent replacement for a particular machine.
+
+There is one subdirectory for each ISA family. Note that e.g., 32-bit SPARC
+and 64-bit SPARC are very different ISA's, and thus cannot share any code.
+
+A particular compile will only use code from one subdirectory, and the
+`generic' subdirectory. The ISA-specific subdirectories contain hierachies of
+directories for various architecture variants and implementations; the
+top-most level contains code that runs correctly on all variants.
diff --git a/rts/gmp/mpn/a29k/add_n.s b/rts/gmp/mpn/a29k/add_n.s
new file mode 100644
index 0000000000..e3ee6dfa60
--- /dev/null
+++ b/rts/gmp/mpn/a29k/add_n.s
@@ -0,0 +1,120 @@
+; 29000 __gmpn_add -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr lr2
+; s1_ptr lr3
+; s2_ptr lr4
+; size lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+; The 29k has no addition or subtraction instructions that doesn't
+; affect carry, so we need to save and restore that as soon as we
+; adjust the pointers. gr116 is used for this purpose. Note that
+; gr116==0 means that carry should be set.
+
+ .sect .lit,lit
+ .text
+ .align 4
+ .global ___gmpn_add_n
+ .word 0x60000
+___gmpn_add_n:
+ srl gr117,lr5,3
+ sub gr118,gr117,1
+ jmpt gr118,Ltail
+ constn gr116,-1 ; init cy reg
+ sub gr117,gr117,2 ; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop: mtsrim cr,(8-1)
+ loadm 0,0,gr96,lr3
+ add lr3,lr3,32
+ mtsrim cr,(8-1)
+ loadm 0,0,gr104,lr4
+ add lr4,lr4,32
+
+ subr gr116,gr116,0 ; restore carry
+ addc gr96,gr96,gr104
+ addc gr97,gr97,gr105
+ addc gr98,gr98,gr106
+ addc gr99,gr99,gr107
+ addc gr100,gr100,gr108
+ addc gr101,gr101,gr109
+ addc gr102,gr102,gr110
+ addc gr103,gr103,gr111
+ subc gr116,gr116,gr116 ; gr116 = not(cy)
+
+ mtsrim cr,(8-1)
+ storem 0,0,gr96,lr2
+ jmpfdec gr117,Loop
+ add lr2,lr2,32
+
+; Code for the last up-to-7 limbs.
+; This code might look very strange, but it's hard to write it
+; differently without major slowdown.
+
+ and lr5,lr5,(8-1)
+Ltail: sub gr118,lr5,1 ; count for CR
+ jmpt gr118,Lend
+ sub gr117,lr5,2 ; count for jmpfdec
+
+ mtsr cr,gr118
+ loadm 0,0,gr96,lr3
+ mtsr cr,gr118
+ loadm 0,0,gr104,lr4
+
+ subr gr116,gr116,0 ; restore carry
+
+ jmpfdec gr117,L1
+ addc gr96,gr96,gr104
+ jmp Lstore
+ mtsr cr,gr118
+L1: jmpfdec gr117,L2
+ addc gr97,gr97,gr105
+ jmp Lstore
+ mtsr cr,gr118
+L2: jmpfdec gr117,L3
+ addc gr98,gr98,gr106
+ jmp Lstore
+ mtsr cr,gr118
+L3: jmpfdec gr117,L4
+ addc gr99,gr99,gr107
+ jmp Lstore
+ mtsr cr,gr118
+L4: jmpfdec gr117,L5
+ addc gr100,gr100,gr108
+ jmp Lstore
+ mtsr cr,gr118
+L5: jmpfdec gr117,L6
+ addc gr101,gr101,gr109
+ jmp Lstore
+ mtsr cr,gr118
+L6: addc gr102,gr102,gr110
+
+Lstore: storem 0,0,gr96,lr2
+ subc gr116,gr116,gr116 ; gr116 = not(cy)
+
+Lend: jmpi lr0
+ add gr96,gr116,1
diff --git a/rts/gmp/mpn/a29k/addmul_1.s b/rts/gmp/mpn/a29k/addmul_1.s
new file mode 100644
index 0000000000..f51b6d7af6
--- /dev/null
+++ b/rts/gmp/mpn/a29k/addmul_1.s
@@ -0,0 +1,113 @@
+; 29000 __gmpn_addmul_1 -- Multiply a limb vector with a single limb and
+; add the product to a second limb vector.
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr lr2
+; s1_ptr lr3
+; size lr4
+; s2_limb lr5
+
+ .cputype 29050
+ .sect .lit,lit
+ .text
+ .align 4
+ .global ___gmpn_addmul_1
+ .word 0x60000
+___gmpn_addmul_1:
+ sub lr4,lr4,8
+ jmpt lr4,Ltail
+ const gr120,0 ; init cylimb reg
+
+ srl gr117,lr4,3 ; divide by 8
+ sub gr117,gr117,1 ; count for jmpfdec
+
+Loop: mtsrim cr,(8-1)
+ loadm 0,0,gr96,lr3
+ add lr3,lr3,32
+
+ multiplu gr104,gr96,lr5
+ multmu gr96,gr96,lr5
+ multiplu gr105,gr97,lr5
+ multmu gr97,gr97,lr5
+ multiplu gr106,gr98,lr5
+ multmu gr98,gr98,lr5
+ multiplu gr107,gr99,lr5
+ multmu gr99,gr99,lr5
+ multiplu gr108,gr100,lr5
+ multmu gr100,gr100,lr5
+ multiplu gr109,gr101,lr5
+ multmu gr101,gr101,lr5
+ multiplu gr110,gr102,lr5
+ multmu gr102,gr102,lr5
+ multiplu gr111,gr103,lr5
+ multmu gr103,gr103,lr5
+
+ add gr104,gr104,gr120
+ addc gr105,gr105,gr96
+ addc gr106,gr106,gr97
+ addc gr107,gr107,gr98
+ addc gr108,gr108,gr99
+ addc gr109,gr109,gr100
+ addc gr110,gr110,gr101
+ addc gr111,gr111,gr102
+ addc gr120,gr103,0
+
+ mtsrim cr,(8-1)
+ loadm 0,0,gr96,lr2
+
+ add gr104,gr96,gr104
+ addc gr105,gr97,gr105
+ addc gr106,gr98,gr106
+ addc gr107,gr99,gr107
+ addc gr108,gr100,gr108
+ addc gr109,gr101,gr109
+ addc gr110,gr102,gr110
+ addc gr111,gr103,gr111
+ addc gr120,gr120,0
+
+ mtsrim cr,(8-1)
+ storem 0,0,gr104,lr2
+ jmpfdec gr117,Loop
+ add lr2,lr2,32
+
+Ltail: and lr4,lr4,(8-1)
+ sub gr118,lr4,1 ; count for CR
+ jmpt gr118,Lend
+ sub lr4,lr4,2
+ sub lr2,lr2,4 ; offset res_ptr by one limb
+
+Loop2: load 0,0,gr116,lr3
+ add lr3,lr3,4
+ multiplu gr117,gr116,lr5
+ multmu gr118,gr116,lr5
+ add lr2,lr2,4
+ load 0,0,gr119,lr2
+ add gr117,gr117,gr120
+ addc gr118,gr118,0
+ add gr117,gr117,gr119
+ store 0,0,gr117,lr2
+ jmpfdec lr4,Loop2
+ addc gr120,gr118,0
+
+Lend: jmpi lr0
+ or gr96,gr120,0 ; copy
diff --git a/rts/gmp/mpn/a29k/lshift.s b/rts/gmp/mpn/a29k/lshift.s
new file mode 100644
index 0000000000..93e1917127
--- /dev/null
+++ b/rts/gmp/mpn/a29k/lshift.s
@@ -0,0 +1,93 @@
+; 29000 __gmpn_lshift --
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr lr2
+; s1_ptr lr3
+; s2_ptr lr4
+; size lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+ .sect .lit,lit
+ .text
+ .align 4
+ .global ___gmpn_lshift
+ .word 0x60000
+___gmpn_lshift:
+ sll gr116,lr4,2
+ add lr3,gr116,lr3
+ add lr2,gr116,lr2
+ sub lr3,lr3,4
+ load 0,0,gr119,lr3
+
+ subr gr116,lr5,32
+ srl gr96,gr119,gr116 ; return value
+ sub lr4,lr4,1 ; actual loop count is SIZE - 1
+
+ srl gr117,lr4,3 ; chuck count = (actual count) / 8
+ cpeq gr118,gr117,0
+ jmpt gr118,Ltail
+ mtsr fc,lr5
+
+ sub gr117,gr117,2 ; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop: sub lr3,lr3,32
+ mtsrim cr,(8-1)
+ loadm 0,0,gr100,lr3
+
+ extract gr109,gr119,gr107
+ extract gr108,gr107,gr106
+ extract gr107,gr106,gr105
+ extract gr106,gr105,gr104
+ extract gr105,gr104,gr103
+ extract gr104,gr103,gr102
+ extract gr103,gr102,gr101
+ extract gr102,gr101,gr100
+
+ sub lr2,lr2,32
+ mtsrim cr,(8-1)
+ storem 0,0,gr102,lr2
+ jmpfdec gr117,Loop
+ or gr119,gr100,0
+
+; Code for the last up-to-7 limbs.
+
+ and lr4,lr4,(8-1)
+Ltail: cpeq gr118,lr4,0
+ jmpt gr118,Lend
+ sub lr4,lr4,2 ; count for jmpfdec
+
+Loop2: sub lr3,lr3,4
+ load 0,0,gr116,lr3
+ extract gr117,gr119,gr116
+ sub lr2,lr2,4
+ store 0,0,gr117,lr2
+ jmpfdec lr4,Loop2
+ or gr119,gr116,0
+
+Lend: extract gr117,gr119,0
+ sub lr2,lr2,4
+ jmpi lr0
+ store 0,0,gr117,lr2
diff --git a/rts/gmp/mpn/a29k/mul_1.s b/rts/gmp/mpn/a29k/mul_1.s
new file mode 100644
index 0000000000..6bcf7ce0cf
--- /dev/null
+++ b/rts/gmp/mpn/a29k/mul_1.s
@@ -0,0 +1,97 @@
+; 29000 __gmpn_mul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr lr2
+; s1_ptr lr3
+; size lr4
+; s2_limb lr5
+
+ .cputype 29050
+ .sect .lit,lit
+ .text
+ .align 4
+ .global ___gmpn_mul_1
+ .word 0x60000
+___gmpn_mul_1:
+ sub lr4,lr4,8
+ jmpt lr4,Ltail
+ const gr120,0 ; init cylimb reg
+
+ srl gr117,lr4,3 ; divide by 8
+ sub gr117,gr117,1 ; count for jmpfdec
+
+Loop: mtsrim cr,(8-1)
+ loadm 0,0,gr96,lr3
+ add lr3,lr3,32
+
+ multiplu gr104,gr96,lr5
+ multmu gr96,gr96,lr5
+ multiplu gr105,gr97,lr5
+ multmu gr97,gr97,lr5
+ multiplu gr106,gr98,lr5
+ multmu gr98,gr98,lr5
+ multiplu gr107,gr99,lr5
+ multmu gr99,gr99,lr5
+ multiplu gr108,gr100,lr5
+ multmu gr100,gr100,lr5
+ multiplu gr109,gr101,lr5
+ multmu gr101,gr101,lr5
+ multiplu gr110,gr102,lr5
+ multmu gr102,gr102,lr5
+ multiplu gr111,gr103,lr5
+ multmu gr103,gr103,lr5
+
+ add gr104,gr104,gr120
+ addc gr105,gr105,gr96
+ addc gr106,gr106,gr97
+ addc gr107,gr107,gr98
+ addc gr108,gr108,gr99
+ addc gr109,gr109,gr100
+ addc gr110,gr110,gr101
+ addc gr111,gr111,gr102
+ addc gr120,gr103,0
+
+ mtsrim cr,(8-1)
+ storem 0,0,gr104,lr2
+ jmpfdec gr117,Loop
+ add lr2,lr2,32
+
+Ltail: and lr4,lr4,(8-1)
+ sub gr118,lr4,1 ; count for CR
+ jmpt gr118,Lend
+ sub lr4,lr4,2
+ sub lr2,lr2,4 ; offset res_ptr by one limb
+
+Loop2: load 0,0,gr116,lr3
+ add lr3,lr3,4
+ multiplu gr117,gr116,lr5
+ multmu gr118,gr116,lr5
+ add lr2,lr2,4
+ add gr117,gr117,gr120
+ store 0,0,gr117,lr2
+ jmpfdec lr4,Loop2
+ addc gr120,gr118,0
+
+Lend: jmpi lr0
+ or gr96,gr120,0 ; copy
diff --git a/rts/gmp/mpn/a29k/rshift.s b/rts/gmp/mpn/a29k/rshift.s
new file mode 100644
index 0000000000..ea163bff2b
--- /dev/null
+++ b/rts/gmp/mpn/a29k/rshift.s
@@ -0,0 +1,89 @@
+; 29000 __gmpn_rshift --
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr lr2
+; s1_ptr lr3
+; s2_ptr lr4
+; size lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+ .sect .lit,lit
+ .text
+ .align 4
+ .global ___gmpn_rshift
+ .word 0x60000
+___gmpn_rshift:
+ load 0,0,gr119,lr3
+ add lr3,lr3,4
+
+ subr gr116,lr5,32
+ sll gr96,gr119,gr116 ; return value
+ sub lr4,lr4,1 ; actual loop count is SIZE - 1
+
+ srl gr117,lr4,3 ; chuck count = (actual count) / 8
+ cpeq gr118,gr117,0
+ jmpt gr118,Ltail
+ mtsr fc,gr116
+
+ sub gr117,gr117,2 ; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop: mtsrim cr,(8-1)
+ loadm 0,0,gr100,lr3
+ add lr3,lr3,32
+
+ extract gr98,gr100,gr119
+ extract gr99,gr101,gr100
+ extract gr100,gr102,gr101
+ extract gr101,gr103,gr102
+ extract gr102,gr104,gr103
+ extract gr103,gr105,gr104
+ extract gr104,gr106,gr105
+ extract gr105,gr107,gr106
+
+ mtsrim cr,(8-1)
+ storem 0,0,gr98,lr2
+ add lr2,lr2,32
+ jmpfdec gr117,Loop
+ or gr119,gr107,0
+
+; Code for the last up-to-7 limbs.
+
+ and lr4,lr4,(8-1)
+Ltail: cpeq gr118,lr4,0
+ jmpt gr118,Lend
+ sub lr4,lr4,2 ; count for jmpfdec
+
+Loop2: load 0,0,gr100,lr3
+ add lr3,lr3,4
+ extract gr117,gr100,gr119
+ store 0,0,gr117,lr2
+ add lr2,lr2,4
+ jmpfdec lr4,Loop2
+ or gr119,gr100,0
+
+Lend: srl gr117,gr119,lr5
+ jmpi lr0
+ store 0,0,gr117,lr2
diff --git a/rts/gmp/mpn/a29k/sub_n.s b/rts/gmp/mpn/a29k/sub_n.s
new file mode 100644
index 0000000000..c6b64c5bee
--- /dev/null
+++ b/rts/gmp/mpn/a29k/sub_n.s
@@ -0,0 +1,120 @@
+; 29000 __gmpn_sub -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr lr2
+; s1_ptr lr3
+; s2_ptr lr4
+; size lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+; The 29k has no addition or subtraction instructions that doesn't
+; affect carry, so we need to save and restore that as soon as we
+; adjust the pointers. gr116 is used for this purpose. Note that
+; gr116==0 means that carry should be set.
+
+ .sect .lit,lit
+ .text
+ .align 4
+ .global ___gmpn_sub_n
+ .word 0x60000
+___gmpn_sub_n:
+ srl gr117,lr5,3
+ sub gr118,gr117,1
+ jmpt gr118,Ltail
+ constn gr116,-1 ; init cy reg
+ sub gr117,gr117,2 ; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop: mtsrim cr,(8-1)
+ loadm 0,0,gr96,lr3
+ add lr3,lr3,32
+ mtsrim cr,(8-1)
+ loadm 0,0,gr104,lr4
+ add lr4,lr4,32
+
+ subr gr116,gr116,0 ; restore carry
+ subc gr96,gr96,gr104
+ subc gr97,gr97,gr105
+ subc gr98,gr98,gr106
+ subc gr99,gr99,gr107
+ subc gr100,gr100,gr108
+ subc gr101,gr101,gr109
+ subc gr102,gr102,gr110
+ subc gr103,gr103,gr111
+ subc gr116,gr116,gr116 ; gr116 = not(cy)
+
+ mtsrim cr,(8-1)
+ storem 0,0,gr96,lr2
+ jmpfdec gr117,Loop
+ add lr2,lr2,32
+
+; Code for the last up-to-7 limbs.
+; This code might look very strange, but it's hard to write it
+; differently without major slowdown.
+
+ and lr5,lr5,(8-1)
+Ltail: sub gr118,lr5,1 ; count for CR
+ jmpt gr118,Lend
+ sub gr117,lr5,2 ; count for jmpfdec
+
+ mtsr cr,gr118
+ loadm 0,0,gr96,lr3
+ mtsr cr,gr118
+ loadm 0,0,gr104,lr4
+
+ subr gr116,gr116,0 ; restore carry
+
+ jmpfdec gr117,L1
+ subc gr96,gr96,gr104
+ jmp Lstore
+ mtsr cr,gr118
+L1: jmpfdec gr117,L2
+ subc gr97,gr97,gr105
+ jmp Lstore
+ mtsr cr,gr118
+L2: jmpfdec gr117,L3
+ subc gr98,gr98,gr106
+ jmp Lstore
+ mtsr cr,gr118
+L3: jmpfdec gr117,L4
+ subc gr99,gr99,gr107
+ jmp Lstore
+ mtsr cr,gr118
+L4: jmpfdec gr117,L5
+ subc gr100,gr100,gr108
+ jmp Lstore
+ mtsr cr,gr118
+L5: jmpfdec gr117,L6
+ subc gr101,gr101,gr109
+ jmp Lstore
+ mtsr cr,gr118
+L6: subc gr102,gr102,gr110
+
+Lstore: storem 0,0,gr96,lr2
+ subc gr116,gr116,gr116 ; gr116 = not(cy)
+
+Lend: jmpi lr0
+ add gr96,gr116,1
diff --git a/rts/gmp/mpn/a29k/submul_1.s b/rts/gmp/mpn/a29k/submul_1.s
new file mode 100644
index 0000000000..ef97d8d4e5
--- /dev/null
+++ b/rts/gmp/mpn/a29k/submul_1.s
@@ -0,0 +1,116 @@
+; 29000 __gmpn_submul_1 -- Multiply a limb vector with a single limb and
+; subtract the product from a second limb vector.
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr lr2
+; s1_ptr lr3
+; size lr4
+; s2_limb lr5
+
+ .cputype 29050
+ .sect .lit,lit
+ .text
+ .align 4
+ .global ___gmpn_submul_1
+ .word 0x60000
+___gmpn_submul_1:
+ sub lr4,lr4,8
+ jmpt lr4,Ltail
+ const gr120,0 ; init cylimb reg
+
+ srl gr117,lr4,3 ; divide by 8
+ sub gr117,gr117,1 ; count for jmpfdec
+
+Loop: mtsrim cr,(8-1)
+ loadm 0,0,gr96,lr3
+ add lr3,lr3,32
+
+ multiplu gr104,gr96,lr5
+ multmu gr96,gr96,lr5
+ multiplu gr105,gr97,lr5
+ multmu gr97,gr97,lr5
+ multiplu gr106,gr98,lr5
+ multmu gr98,gr98,lr5
+ multiplu gr107,gr99,lr5
+ multmu gr99,gr99,lr5
+ multiplu gr108,gr100,lr5
+ multmu gr100,gr100,lr5
+ multiplu gr109,gr101,lr5
+ multmu gr101,gr101,lr5
+ multiplu gr110,gr102,lr5
+ multmu gr102,gr102,lr5
+ multiplu gr111,gr103,lr5
+ multmu gr103,gr103,lr5
+
+ add gr104,gr104,gr120
+ addc gr105,gr105,gr96
+ addc gr106,gr106,gr97
+ addc gr107,gr107,gr98
+ addc gr108,gr108,gr99
+ addc gr109,gr109,gr100
+ addc gr110,gr110,gr101
+ addc gr111,gr111,gr102
+ addc gr120,gr103,0
+
+ mtsrim cr,(8-1)
+ loadm 0,0,gr96,lr2
+
+ sub gr96,gr96,gr104
+ subc gr97,gr97,gr105
+ subc gr98,gr98,gr106
+ subc gr99,gr99,gr107
+ subc gr100,gr100,gr108
+ subc gr101,gr101,gr109
+ subc gr102,gr102,gr110
+ subc gr103,gr103,gr111
+
+ add gr104,gr103,gr111 ; invert carry from previus sub
+ addc gr120,gr120,0
+
+ mtsrim cr,(8-1)
+ storem 0,0,gr96,lr2
+ jmpfdec gr117,Loop
+ add lr2,lr2,32
+
+Ltail: and lr4,lr4,(8-1)
+ sub gr118,lr4,1 ; count for CR
+ jmpt gr118,Lend
+ sub lr4,lr4,2
+ sub lr2,lr2,4 ; offset res_ptr by one limb
+
+Loop2: load 0,0,gr116,lr3
+ add lr3,lr3,4
+ multiplu gr117,gr116,lr5
+ multmu gr118,gr116,lr5
+ add lr2,lr2,4
+ load 0,0,gr119,lr2
+ add gr117,gr117,gr120
+ addc gr118,gr118,0
+ sub gr119,gr119,gr117
+ add gr104,gr119,gr117 ; invert carry from previus sub
+ store 0,0,gr119,lr2
+ jmpfdec lr4,Loop2
+ addc gr120,gr118,0
+
+Lend: jmpi lr0
+ or gr96,gr120,0 ; copy
diff --git a/rts/gmp/mpn/a29k/udiv.s b/rts/gmp/mpn/a29k/udiv.s
new file mode 100644
index 0000000000..fdd53a9a88
--- /dev/null
+++ b/rts/gmp/mpn/a29k/udiv.s
@@ -0,0 +1,30 @@
+; Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+ .sect .lit,lit
+ .text
+ .align 4
+ .global ___udiv_qrnnd
+ .word 0x60000
+___udiv_qrnnd:
+ mtsr q,lr3
+ dividu gr96,lr4,lr5
+ mfsr gr116,q
+ jmpi lr0
+ store 0,0,gr116,lr2
diff --git a/rts/gmp/mpn/a29k/umul.s b/rts/gmp/mpn/a29k/umul.s
new file mode 100644
index 0000000000..7741981167
--- /dev/null
+++ b/rts/gmp/mpn/a29k/umul.s
@@ -0,0 +1,29 @@
+; Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+ .sect .lit,lit
+ .text
+ .align 4
+ .global ___umul_ppmm
+ .word 0x50000
+___umul_ppmm:
+ multiplu gr116,lr3,lr4
+ multmu gr96,lr3,lr4
+ jmpi lr0
+ store 0,0,gr116,lr2
diff --git a/rts/gmp/mpn/alpha/README b/rts/gmp/mpn/alpha/README
new file mode 100644
index 0000000000..744260c7c5
--- /dev/null
+++ b/rts/gmp/mpn/alpha/README
@@ -0,0 +1,224 @@
+This directory contains mpn functions optimized for DEC Alpha processors.
+
+ALPHA ASSEMBLY RULES AND REGULATIONS
+
+The `.prologue N' pseudo op marks the end of instruction that needs
+special handling by unwinding. It also says whether $27 is really
+needed for computing the gp. The `.mask M' pseudo op says which
+registers are saved on the stack, and at what offset in the frame.
+
+Cray code is very very different...
+
+
+RELEVANT OPTIMIZATION ISSUES
+
+EV4
+
+1. This chip has very limited store bandwidth. The on-chip L1 cache is
+ write-through, and a cache line is transfered from the store buffer to
+ the off-chip L2 in as much 15 cycles on most systems. This delay hurts
+ mpn_add_n, mpn_sub_n, mpn_lshift, and mpn_rshift.
+
+2. Pairing is possible between memory instructions and integer arithmetic
+ instructions.
+
+3. mulq and umulh are documented to have a latency of 23 cycles, but 2 of
+ these cycles are pipelined. Thus, multiply instructions can be issued at
+ a rate of one each 21st cycle.
+
+EV5
+
+1. The memory bandwidth of this chip seems excellent, both for loads and
+ stores. Even when the working set is larger than the on-chip L1 and L2
+ caches, the performance remain almost unaffected.
+
+2. mulq has a latency of 12 cycles and an issue rate of 1 each 8th cycle.
+ umulh has a measured latency of 14 cycles and an issue rate of 1 each
+ 10th cycle. But the exact timing is somewhat confusing.
+
+3. mpn_add_n. With 4-fold unrolling, we need 37 instructions, whereof 12
+ are memory operations. This will take at least
+ ceil(37/2) [dual issue] + 1 [taken branch] = 19 cycles
+ We have 12 memory cycles, plus 4 after-store conflict cycles, or 16 data
+ cache cycles, which should be completely hidden in the 19 issue cycles.
+ The computation is inherently serial, with these dependencies:
+
+ ldq ldq
+ \ /\
+ (or) addq |
+ |\ / \ |
+ | addq cmpult
+ \ | |
+ cmpult |
+ \ /
+ or
+
+ I.e., 3 operations are needed between carry-in and carry-out, making 12
+ cycles the absolute minimum for the 4 limbs. We could replace the `or'
+ with a cmoveq/cmovne, which could issue one cycle earlier that the `or',
+ but that might waste a cycle on EV4. The total depth remain unaffected,
+ since cmov has a latency of 2 cycles.
+
+ addq
+ / \
+ addq cmpult
+ | \
+ cmpult -> cmovne
+
+Montgomery has a slightly different way of computing carry that requires one
+less instruction, but has depth 4 (instead of the current 3). Since the
+code is currently instruction issue bound, Montgomery's idea should save us
+1/2 cycle per limb, or bring us down to a total of 17 cycles or 4.25
+cycles/limb. Unfortunately, this method will not be good for the EV6.
+
+EV6
+
+Here we have a really parallel pipeline, capable of issuing up to 4 integer
+instructions per cycle. One integer multiply instruction can issue each
+cycle. To get optimal speed, we need to pretend we are vectorizing the code,
+i.e., minimize the iterative dependencies.
+
+There are two dependencies to watch out for. 1) Address arithmetic
+dependencies, and 2) carry propagation dependencies.
+
+We can avoid serializing due to address arithmetic by unrolling the loop, so
+that addresses don't depend heavily on an index variable. Avoiding
+serializing because of carry propagation is trickier; the ultimate performance
+of the code will be determined of the number of latency cycles it takes from
+accepting carry-in to a vector point until we can generate carry-out.
+
+Most integer instructions can execute in either the L0, U0, L1, or U1
+pipelines. Shifts only execute in U0 and U1, and multiply only in U1.
+
+CMOV instructions split into two internal instructions, CMOV1 and CMOV2, but
+the execute efficiently. But CMOV split the mapping process (see pg 2-26 in
+cmpwrgd.pdf), suggesting the CMOV should always be placed as the last
+instruction of an aligned 4 instruction block (?).
+
+Perhaps the most important issue is the latency between the L0/U0 and L1/U1
+clusters; a result obtained on either cluster has an extra cycle of latency
+for consumers in the opposite cluster. Because of the dynamic nature of the
+implementation, it is hard to predict where an instruction will execute.
+
+The shift loops need (per limb):
+ 1 load (Lx pipes)
+ 1 store (Lx pipes)
+ 2 shift (Ux pipes)
+ 1 iaddlog (Lx pipes, Ux pipes)
+Obviously, since the pipes are very equally loaded, we should get 4 insn/cycle, or 1.25 cycles/limb.
+
+For mpn_add_n, we currently have
+ 2 load (Lx pipes)
+ 1 store (Lx pipes)
+ 5 iaddlog (Lx pipes, Ux pipes)
+
+Again, we have a perfect balance and will be limited by carry propagation
+delays, currently three cycles. The superoptimizer indicates that ther
+might be sequences that--using a final cmov--have a carry propagation delay
+of just two. Montgomery's subtraction sequence could perhaps be used, by
+complementing some operands. All in all, we should get down to 2 cycles
+without much problems.
+
+For mpn_mul_1, we could do, just like for mpn_add_n:
+ not newlo,notnewlo
+ addq cylimb,newlo,newlo || cmpult cylimb,notnewlo,cyout
+ addq cyout,newhi,cylimb
+and get 2-cycle carry propagation. The instructions needed will be
+ 1 ld (Lx pipes)
+ 1 st (Lx pipes)
+ 2 mul (U1 pipe)
+ 4 iaddlog (Lx pipes, Ux pipes)
+issue1: addq not mul ld
+issue2: cmpult addq mul st
+Conclusion: no cluster delays and 2-cycle carry delays will give us 2 cycles/limb!
+
+Last, we have mpn_addmul_1. Almost certainly, we will get down to 3
+cycles/limb, which would be absolutely awesome.
+
+Old, perhaps obsolete addmul_1 dependency diagram (needs 175 columns wide screen):
+
+ i
+ s
+ s i
+ u n
+ e s
+ d t
+ r
+ i u
+l n c
+i s t
+v t i
+e r o
+ u n
+v c
+a t t
+l i y
+u o p
+e n e
+s s s
+ issue
+ in
+ cycle
+ -1 ldq
+ / \
+ 0 | \
+ | \
+ 1 | |
+ | |
+ 2 | | ldq
+ | | / \
+ 3 | mulq | \
+ | \ | \
+ 4 umulh \ | |
+ | | | |
+ 5 | | | | ldq
+ | | | | / \
+ 4calm 6 | | ldq | mulq | \
+ | | / | \ | \
+ 4casm 7 | | / umulh \ | |
+6 | || | | | |
+ 3aal 8 | || | | | | ldq
+7 | || | | | | / \
+ 4calm 9 | || | | ldq | mulq | \
+9 | || | | / | \ | \
+ 4casm 10 | || | | / umulh \ | |
+9 | || | || | | | |
+ 3aal 11 | addq | || | | | | ldq
+9 | // \ | || | | | | / \
+ 4calm 12 \ cmpult addq<-cy | || | | ldq | mulq | \
+13 \ / // \ | || | | / | \ | \
+ 4casm 13 addq cmpult stq | || | | / umulh \ | |
+11 \ / | || | || | | | |
+ 3aal 14 addq | addq | || | | | | ldq
+10 \ | // \ | || | | | | / \
+ 4calm 15 cy ----> \ cmpult addq<-cy | || | | ldq | mulq | \
+13 \ / // \ | || | | / | \ | \
+ 4casm 16 addq cmpult stq | || | | / umulh \ | |
+11 \ / | || | || | | | |
+ 3aal 17 addq | addq | || | | | |
+10 \ | // \ | || | | | |
+ 4calm 18 cy ----> \ cmpult addq<-cy | || | | ldq | mulq
+13 \ / // \ | || | | / | \
+ 4casm 19 addq cmpult stq | || | | / umulh \
+11 \ / | || | || | |
+ 3aal 20 addq | addq | || | |
+10 \ | // \ | || | |
+ 4calm 21 cy ----> \ cmpult addq<-cy | || | | ldq
+ \ / // \ | || | | /
+ 22 addq cmpult stq | || | | /
+ \ / | || | ||
+ 23 addq | addq | ||
+ \ | // \ | ||
+ 24 cy ----> \ cmpult addq<-cy | ||
+ \ / // \ | ||
+ 25 addq cmpult stq | ||
+ \ / | ||
+ 26 addq | addq
+ \ | // \
+ 27 cy ----> \ cmpult addq<-cy
+ \ / // \
+ 28 addq cmpult stq
+ \ /
+As many as 6 consecutive points will be under execution simultaneously, or if we addq
+schedule loads even further away, maybe 7 or 8. But the number of live quantities \
+is reasonable, and can easily be satisfied. cy ---->
diff --git a/rts/gmp/mpn/alpha/add_n.asm b/rts/gmp/mpn/alpha/add_n.asm
new file mode 100644
index 0000000000..08d6a9f7b8
--- /dev/null
+++ b/rts/gmp/mpn/alpha/add_n.asm
@@ -0,0 +1,114 @@
+dnl Alpha mpn_add_n -- Add two limb vectors of the same length > 0 and
+dnl store sum in a third limb vector.
+
+dnl Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl s2_ptr r18
+dnl size r19
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ ldq r3,0(r17)
+ ldq r4,0(r18)
+
+ subq r19,1,r19
+ and r19,4-1,r2 C number of limbs in first loop
+ bis r31,r31,r0
+ beq r2,$L0 C if multiple of 4 limbs, skip first loop
+
+ subq r19,r2,r19
+
+$Loop0: subq r2,1,r2
+ ldq r5,8(r17)
+ addq r4,r0,r4
+ ldq r6,8(r18)
+ cmpult r4,r0,r1
+ addq r3,r4,r4
+ cmpult r4,r3,r0
+ stq r4,0(r16)
+ bis r0,r1,r0
+
+ addq r17,8,r17
+ addq r18,8,r18
+ bis r5,r5,r3
+ bis r6,r6,r4
+ addq r16,8,r16
+ bne r2,$Loop0
+
+$L0: beq r19,$Lend
+
+ ALIGN(8)
+$Loop: subq r19,4,r19
+
+ ldq r5,8(r17)
+ addq r4,r0,r4
+ ldq r6,8(r18)
+ cmpult r4,r0,r1
+ addq r3,r4,r4
+ cmpult r4,r3,r0
+ stq r4,0(r16)
+ bis r0,r1,r0
+
+ ldq r3,16(r17)
+ addq r6,r0,r6
+ ldq r4,16(r18)
+ cmpult r6,r0,r1
+ addq r5,r6,r6
+ cmpult r6,r5,r0
+ stq r6,8(r16)
+ bis r0,r1,r0
+
+ ldq r5,24(r17)
+ addq r4,r0,r4
+ ldq r6,24(r18)
+ cmpult r4,r0,r1
+ addq r3,r4,r4
+ cmpult r4,r3,r0
+ stq r4,16(r16)
+ bis r0,r1,r0
+
+ ldq r3,32(r17)
+ addq r6,r0,r6
+ ldq r4,32(r18)
+ cmpult r6,r0,r1
+ addq r5,r6,r6
+ cmpult r6,r5,r0
+ stq r6,24(r16)
+ bis r0,r1,r0
+
+ addq r17,32,r17
+ addq r18,32,r18
+ addq r16,32,r16
+ bne r19,$Loop
+
+$Lend: addq r4,r0,r4
+ cmpult r4,r0,r1
+ addq r3,r4,r4
+ cmpult r4,r3,r0
+ stq r4,0(r16)
+ bis r0,r1,r0
+ ret r31,(r26),1
+EPILOGUE(mpn_add_n)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/addmul_1.asm b/rts/gmp/mpn/alpha/addmul_1.asm
new file mode 100644
index 0000000000..4ea900be6b
--- /dev/null
+++ b/rts/gmp/mpn/alpha/addmul_1.asm
@@ -0,0 +1,87 @@
+dnl Alpha __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+dnl the result to a second limb vector.
+
+dnl Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl size r18
+dnl s2_limb r19
+
+dnl This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and 7
+dnl cycles/limb on EV6.
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ ldq r2,0(r17) C r2 = s1_limb
+ addq r17,8,r17 C s1_ptr++
+ subq r18,1,r18 C size--
+ mulq r2,r19,r3 C r3 = prod_low
+ ldq r5,0(r16) C r5 = *res_ptr
+ umulh r2,r19,r0 C r0 = prod_high
+ beq r18,$Lend1 C jump if size was == 1
+ ldq r2,0(r17) C r2 = s1_limb
+ addq r17,8,r17 C s1_ptr++
+ subq r18,1,r18 C size--
+ addq r5,r3,r3
+ cmpult r3,r5,r4
+ stq r3,0(r16)
+ addq r16,8,r16 C res_ptr++
+ beq r18,$Lend2 C jump if size was == 2
+
+ ALIGN(8)
+$Loop: mulq r2,r19,r3 C r3 = prod_low
+ ldq r5,0(r16) C r5 = *res_ptr
+ addq r4,r0,r0 C cy_limb = cy_limb + 'cy'
+ subq r18,1,r18 C size--
+ umulh r2,r19,r4 C r4 = cy_limb
+ ldq r2,0(r17) C r2 = s1_limb
+ addq r17,8,r17 C s1_ptr++
+ addq r3,r0,r3 C r3 = cy_limb + prod_low
+ cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low)
+ addq r5,r3,r3
+ cmpult r3,r5,r5
+ stq r3,0(r16)
+ addq r16,8,r16 C res_ptr++
+ addq r5,r0,r0 C combine carries
+ bne r18,$Loop
+
+$Lend2: mulq r2,r19,r3 C r3 = prod_low
+ ldq r5,0(r16) C r5 = *res_ptr
+ addq r4,r0,r0 C cy_limb = cy_limb + 'cy'
+ umulh r2,r19,r4 C r4 = cy_limb
+ addq r3,r0,r3 C r3 = cy_limb + prod_low
+ cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low)
+ addq r5,r3,r3
+ cmpult r3,r5,r5
+ stq r3,0(r16)
+ addq r5,r0,r0 C combine carries
+ addq r4,r0,r0 C cy_limb = prod_high + cy
+ ret r31,(r26),1
+$Lend1: addq r5,r3,r3
+ cmpult r3,r5,r5
+ stq r3,0(r16)
+ addq r0,r5,r0
+ ret r31,(r26),1
+EPILOGUE(mpn_addmul_1)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/cntlz.asm b/rts/gmp/mpn/alpha/cntlz.asm
new file mode 100644
index 0000000000..febb3b70d9
--- /dev/null
+++ b/rts/gmp/mpn/alpha/cntlz.asm
@@ -0,0 +1,68 @@
+dnl Alpha auxiliary for longlong.h's count_leading_zeros
+
+dnl Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl DISCUSSION:
+
+dnl Other methods have been tried, and using a 128-entry table actually trims
+dnl about 10% of the execution time (on a 21164) when the table is in the L1
+dnl cache. But under non-benchmarking conditions, the table will hardly be in
+dnl the L1 cache. Tricky bit-fiddling methods with multiplies and magic tables
+dnl are also possible, but they require many more instructions than the current
+dnl code. (But for count_trailing_zeros, such tricks are beneficial.)
+dnl Finally, converting to floating-point and extracting the exponent is much
+dnl slower.
+
+ASM_START()
+PROLOGUE(MPN(count_leading_zeros))
+ bis r31,63,r0 C initialize partial result count
+
+ srl r16,32,r1 C shift down 32 steps -> r1
+ cmovne r1,r1,r16 C select r1 if non-zero
+ cmovne r1,31,r0 C if r1 is nonzero choose smaller count
+
+ srl r16,16,r1 C shift down 16 steps -> r1
+ subq r0,16,r2 C generate new partial result count
+ cmovne r1,r1,r16 C choose new r1 if non-zero
+ cmovne r1,r2,r0 C choose new count if r1 was non-zero
+
+ srl r16,8,r1
+ subq r0,8,r2
+ cmovne r1,r1,r16
+ cmovne r1,r2,r0
+
+ srl r16,4,r1
+ subq r0,4,r2
+ cmovne r1,r1,r16
+ cmovne r1,r2,r0
+
+ srl r16,2,r1
+ subq r0,2,r2
+ cmovne r1,r1,r16
+ cmovne r1,r2,r0
+
+ srl r16,1,r1 C extract bit 1
+ subq r0,r1,r0 C subtract it from partial result
+
+ ret r31,(r26),1
+EPILOGUE(MPN(count_leading_zeros))
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/default.m4 b/rts/gmp/mpn/alpha/default.m4
new file mode 100644
index 0000000000..5f4c48dc73
--- /dev/null
+++ b/rts/gmp/mpn/alpha/default.m4
@@ -0,0 +1,77 @@
+divert(-1)
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+define(`ASM_START',
+ `
+ .set noreorder
+ .set noat')
+
+define(`X',`0x$1')
+define(`FLOAT64',
+ `
+ .align 3
+$1: .t_floating $2')
+
+define(`PROLOGUE',
+ `
+ .text
+ .align 3
+ .globl $1
+ .ent $1
+$1:
+ .frame r30,0,r26
+ .prologue 0')
+
+define(`PROLOGUE_GP',
+ `
+ .text
+ .align 3
+ .globl $1
+ .ent $1
+$1:
+ ldgp r29,0(r27)
+ .frame r30,0,r26
+ .prologue 1')
+
+define(`EPILOGUE',
+ `
+ .end $1')
+
+dnl Map register names r0, r1, etc, to `$0', `$1', etc.
+dnl This is needed on all systems but Unicos
+forloop(i,0,31,
+`define(`r'i,``$''i)'
+)
+forloop(i,0,31,
+`define(`f'i,``$f''i)'
+)
+
+define(`DATASTART',
+ `dnl
+ DATA
+$1:')
+define(`DATAEND',`dnl')
+
+define(`ASM_END',`dnl')
+
+divert
diff --git a/rts/gmp/mpn/alpha/ev5/add_n.asm b/rts/gmp/mpn/alpha/ev5/add_n.asm
new file mode 100644
index 0000000000..716d6404ae
--- /dev/null
+++ b/rts/gmp/mpn/alpha/ev5/add_n.asm
@@ -0,0 +1,143 @@
+dnl Alpha EV5 __gmpn_add_n -- Add two limb vectors of the same length > 0 and
+dnl store sum in a third limb vector.
+
+dnl Copyright (C) 1995, 1999, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl s2_ptr r18
+dnl size r19
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ bis r31,r31,r25 C clear cy
+ subq r19,4,r19 C decr loop cnt
+ blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop
+C Start software pipeline for 1st loop
+ ldq r0,0(r18)
+ ldq r4,0(r17)
+ ldq r1,8(r18)
+ ldq r5,8(r17)
+ addq r17,32,r17 C update s1_ptr
+ ldq r2,16(r18)
+ addq r0,r4,r20 C 1st main add
+ ldq r3,24(r18)
+ subq r19,4,r19 C decr loop cnt
+ ldq r6,-16(r17)
+ cmpult r20,r0,r25 C compute cy from last add
+ ldq r7,-8(r17)
+ addq r1,r5,r28 C 2nd main add
+ addq r18,32,r18 C update s2_ptr
+ addq r28,r25,r21 C 2nd carry add
+ cmpult r28,r5,r8 C compute cy from last add
+ blt r19,$Lend1 C if less than 4 limbs remain, jump
+C 1st loop handles groups of 4 limbs in a software pipeline
+ ALIGN(16)
+$Loop: cmpult r21,r28,r25 C compute cy from last add
+ ldq r0,0(r18)
+ bis r8,r25,r25 C combine cy from the two adds
+ ldq r1,8(r18)
+ addq r2,r6,r28 C 3rd main add
+ ldq r4,0(r17)
+ addq r28,r25,r22 C 3rd carry add
+ ldq r5,8(r17)
+ cmpult r28,r6,r8 C compute cy from last add
+ cmpult r22,r28,r25 C compute cy from last add
+ stq r20,0(r16)
+ bis r8,r25,r25 C combine cy from the two adds
+ stq r21,8(r16)
+ addq r3,r7,r28 C 4th main add
+ addq r28,r25,r23 C 4th carry add
+ cmpult r28,r7,r8 C compute cy from last add
+ cmpult r23,r28,r25 C compute cy from last add
+ addq r17,32,r17 C update s1_ptr
+ bis r8,r25,r25 C combine cy from the two adds
+ addq r16,32,r16 C update res_ptr
+ addq r0,r4,r28 C 1st main add
+ ldq r2,16(r18)
+ addq r25,r28,r20 C 1st carry add
+ ldq r3,24(r18)
+ cmpult r28,r4,r8 C compute cy from last add
+ ldq r6,-16(r17)
+ cmpult r20,r28,r25 C compute cy from last add
+ ldq r7,-8(r17)
+ bis r8,r25,r25 C combine cy from the two adds
+ subq r19,4,r19 C decr loop cnt
+ stq r22,-16(r16)
+ addq r1,r5,r28 C 2nd main add
+ stq r23,-8(r16)
+ addq r25,r28,r21 C 2nd carry add
+ addq r18,32,r18 C update s2_ptr
+ cmpult r28,r5,r8 C compute cy from last add
+ bge r19,$Loop
+C Finish software pipeline for 1st loop
+$Lend1: cmpult r21,r28,r25 C compute cy from last add
+ bis r8,r25,r25 C combine cy from the two adds
+ addq r2,r6,r28 C 3rd main add
+ addq r28,r25,r22 C 3rd carry add
+ cmpult r28,r6,r8 C compute cy from last add
+ cmpult r22,r28,r25 C compute cy from last add
+ stq r20,0(r16)
+ bis r8,r25,r25 C combine cy from the two adds
+ stq r21,8(r16)
+ addq r3,r7,r28 C 4th main add
+ addq r28,r25,r23 C 4th carry add
+ cmpult r28,r7,r8 C compute cy from last add
+ cmpult r23,r28,r25 C compute cy from last add
+ bis r8,r25,r25 C combine cy from the two adds
+ addq r16,32,r16 C update res_ptr
+ stq r22,-16(r16)
+ stq r23,-8(r16)
+$Lend2: addq r19,4,r19 C restore loop cnt
+ beq r19,$Lret
+C Start software pipeline for 2nd loop
+ ldq r0,0(r18)
+ ldq r4,0(r17)
+ subq r19,1,r19
+ beq r19,$Lend0
+C 2nd loop handles remaining 1-3 limbs
+ ALIGN(16)
+$Loop0: addq r0,r4,r28 C main add
+ ldq r0,8(r18)
+ cmpult r28,r4,r8 C compute cy from last add
+ ldq r4,8(r17)
+ addq r28,r25,r20 C carry add
+ addq r18,8,r18
+ addq r17,8,r17
+ stq r20,0(r16)
+ cmpult r20,r28,r25 C compute cy from last add
+ subq r19,1,r19 C decr loop cnt
+ bis r8,r25,r25 C combine cy from the two adds
+ addq r16,8,r16
+ bne r19,$Loop0
+$Lend0: addq r0,r4,r28 C main add
+ addq r28,r25,r20 C carry add
+ cmpult r28,r4,r8 C compute cy from last add
+ cmpult r20,r28,r25 C compute cy from last add
+ stq r20,0(r16)
+ bis r8,r25,r25 C combine cy from the two adds
+
+$Lret: bis r25,r31,r0 C return cy
+ ret r31,(r26),1
+EPILOGUE(mpn_add_n)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/ev5/lshift.asm b/rts/gmp/mpn/alpha/ev5/lshift.asm
new file mode 100644
index 0000000000..cb181dda66
--- /dev/null
+++ b/rts/gmp/mpn/alpha/ev5/lshift.asm
@@ -0,0 +1,169 @@
+dnl Alpha EV5 __gmpn_lshift -- Shift a number left.
+
+dnl Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl size r18
+dnl cnt r19
+
+dnl This code runs at 3.25 cycles/limb on the EV5.
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ s8addq r18,r17,r17 C make r17 point at end of s1
+ ldq r4,-8(r17) C load first limb
+ subq r31,r19,r20
+ s8addq r18,r16,r16 C make r16 point at end of RES
+ subq r18,1,r18
+ and r18,4-1,r28 C number of limbs in first loop
+ srl r4,r20,r0 C compute function result
+
+ beq r28,$L0
+ subq r18,r28,r18
+
+ ALIGN(8)
+$Loop0: ldq r3,-16(r17)
+ subq r16,8,r16
+ sll r4,r19,r5
+ subq r17,8,r17
+ subq r28,1,r28
+ srl r3,r20,r6
+ bis r3,r3,r4
+ bis r5,r6,r8
+ stq r8,0(r16)
+ bne r28,$Loop0
+
+$L0: sll r4,r19,r24
+ beq r18,$Lend
+C warm up phase 1
+ ldq r1,-16(r17)
+ subq r18,4,r18
+ ldq r2,-24(r17)
+ ldq r3,-32(r17)
+ ldq r4,-40(r17)
+ beq r18,$Lend1
+C warm up phase 2
+ srl r1,r20,r7
+ sll r1,r19,r21
+ srl r2,r20,r8
+ ldq r1,-48(r17)
+ sll r2,r19,r22
+ ldq r2,-56(r17)
+ srl r3,r20,r5
+ bis r7,r24,r7
+ sll r3,r19,r23
+ bis r8,r21,r8
+ srl r4,r20,r6
+ ldq r3,-64(r17)
+ sll r4,r19,r24
+ ldq r4,-72(r17)
+ subq r18,4,r18
+ beq r18,$Lend2
+ ALIGN(16)
+C main loop
+$Loop: stq r7,-8(r16)
+ bis r5,r22,r5
+ stq r8,-16(r16)
+ bis r6,r23,r6
+
+ srl r1,r20,r7
+ subq r18,4,r18
+ sll r1,r19,r21
+ unop C ldq r31,-96(r17)
+
+ srl r2,r20,r8
+ ldq r1,-80(r17)
+ sll r2,r19,r22
+ ldq r2,-88(r17)
+
+ stq r5,-24(r16)
+ bis r7,r24,r7
+ stq r6,-32(r16)
+ bis r8,r21,r8
+
+ srl r3,r20,r5
+ unop C ldq r31,-96(r17)
+ sll r3,r19,r23
+ subq r16,32,r16
+
+ srl r4,r20,r6
+ ldq r3,-96(r17)
+ sll r4,r19,r24
+ ldq r4,-104(r17)
+
+ subq r17,32,r17
+ bne r18,$Loop
+C cool down phase 2/1
+$Lend2: stq r7,-8(r16)
+ bis r5,r22,r5
+ stq r8,-16(r16)
+ bis r6,r23,r6
+ srl r1,r20,r7
+ sll r1,r19,r21
+ srl r2,r20,r8
+ sll r2,r19,r22
+ stq r5,-24(r16)
+ bis r7,r24,r7
+ stq r6,-32(r16)
+ bis r8,r21,r8
+ srl r3,r20,r5
+ sll r3,r19,r23
+ srl r4,r20,r6
+ sll r4,r19,r24
+C cool down phase 2/2
+ stq r7,-40(r16)
+ bis r5,r22,r5
+ stq r8,-48(r16)
+ bis r6,r23,r6
+ stq r5,-56(r16)
+ stq r6,-64(r16)
+C cool down phase 2/3
+ stq r24,-72(r16)
+ ret r31,(r26),1
+
+C cool down phase 1/1
+$Lend1: srl r1,r20,r7
+ sll r1,r19,r21
+ srl r2,r20,r8
+ sll r2,r19,r22
+ srl r3,r20,r5
+ bis r7,r24,r7
+ sll r3,r19,r23
+ bis r8,r21,r8
+ srl r4,r20,r6
+ sll r4,r19,r24
+C cool down phase 1/2
+ stq r7,-8(r16)
+ bis r5,r22,r5
+ stq r8,-16(r16)
+ bis r6,r23,r6
+ stq r5,-24(r16)
+ stq r6,-32(r16)
+ stq r24,-40(r16)
+ ret r31,(r26),1
+
+$Lend: stq r24,-8(r16)
+ ret r31,(r26),1
+EPILOGUE(mpn_lshift)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/ev5/rshift.asm b/rts/gmp/mpn/alpha/ev5/rshift.asm
new file mode 100644
index 0000000000..9940d83fad
--- /dev/null
+++ b/rts/gmp/mpn/alpha/ev5/rshift.asm
@@ -0,0 +1,167 @@
+dnl Alpha EV5 __gmpn_rshift -- Shift a number right.
+
+dnl Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl size r18
+dnl cnt r19
+
+dnl This code runs at 3.25 cycles/limb on the EV5.
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ ldq r4,0(r17) C load first limb
+ subq r31,r19,r20
+ subq r18,1,r18
+ and r18,4-1,r28 C number of limbs in first loop
+ sll r4,r20,r0 C compute function result
+
+ beq r28,$L0
+ subq r18,r28,r18
+
+ ALIGN(8)
+$Loop0: ldq r3,8(r17)
+ addq r16,8,r16
+ srl r4,r19,r5
+ addq r17,8,r17
+ subq r28,1,r28
+ sll r3,r20,r6
+ bis r3,r3,r4
+ bis r5,r6,r8
+ stq r8,-8(r16)
+ bne r28,$Loop0
+
+$L0: srl r4,r19,r24
+ beq r18,$Lend
+C warm up phase 1
+ ldq r1,8(r17)
+ subq r18,4,r18
+ ldq r2,16(r17)
+ ldq r3,24(r17)
+ ldq r4,32(r17)
+ beq r18,$Lend1
+C warm up phase 2
+ sll r1,r20,r7
+ srl r1,r19,r21
+ sll r2,r20,r8
+ ldq r1,40(r17)
+ srl r2,r19,r22
+ ldq r2,48(r17)
+ sll r3,r20,r5
+ bis r7,r24,r7
+ srl r3,r19,r23
+ bis r8,r21,r8
+ sll r4,r20,r6
+ ldq r3,56(r17)
+ srl r4,r19,r24
+ ldq r4,64(r17)
+ subq r18,4,r18
+ beq r18,$Lend2
+ ALIGN(16)
+C main loop
+$Loop: stq r7,0(r16)
+ bis r5,r22,r5
+ stq r8,8(r16)
+ bis r6,r23,r6
+
+ sll r1,r20,r7
+ subq r18,4,r18
+ srl r1,r19,r21
+ unop C ldq r31,-96(r17)
+
+ sll r2,r20,r8
+ ldq r1,72(r17)
+ srl r2,r19,r22
+ ldq r2,80(r17)
+
+ stq r5,16(r16)
+ bis r7,r24,r7
+ stq r6,24(r16)
+ bis r8,r21,r8
+
+ sll r3,r20,r5
+ unop C ldq r31,-96(r17)
+ srl r3,r19,r23
+ addq r16,32,r16
+
+ sll r4,r20,r6
+ ldq r3,88(r17)
+ srl r4,r19,r24
+ ldq r4,96(r17)
+
+ addq r17,32,r17
+ bne r18,$Loop
+C cool down phase 2/1
+$Lend2: stq r7,0(r16)
+ bis r5,r22,r5
+ stq r8,8(r16)
+ bis r6,r23,r6
+ sll r1,r20,r7
+ srl r1,r19,r21
+ sll r2,r20,r8
+ srl r2,r19,r22
+ stq r5,16(r16)
+ bis r7,r24,r7
+ stq r6,24(r16)
+ bis r8,r21,r8
+ sll r3,r20,r5
+ srl r3,r19,r23
+ sll r4,r20,r6
+ srl r4,r19,r24
+C cool down phase 2/2
+ stq r7,32(r16)
+ bis r5,r22,r5
+ stq r8,40(r16)
+ bis r6,r23,r6
+ stq r5,48(r16)
+ stq r6,56(r16)
+C cool down phase 2/3
+ stq r24,64(r16)
+ ret r31,(r26),1
+
+C cool down phase 1/1
+$Lend1: sll r1,r20,r7
+ srl r1,r19,r21
+ sll r2,r20,r8
+ srl r2,r19,r22
+ sll r3,r20,r5
+ bis r7,r24,r7
+ srl r3,r19,r23
+ bis r8,r21,r8
+ sll r4,r20,r6
+ srl r4,r19,r24
+C cool down phase 1/2
+ stq r7,0(r16)
+ bis r5,r22,r5
+ stq r8,8(r16)
+ bis r6,r23,r6
+ stq r5,16(r16)
+ stq r6,24(r16)
+ stq r24,32(r16)
+ ret r31,(r26),1
+
+$Lend: stq r24,0(r16)
+ ret r31,(r26),1
+EPILOGUE(mpn_rshift)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/ev5/sub_n.asm b/rts/gmp/mpn/alpha/ev5/sub_n.asm
new file mode 100644
index 0000000000..5248a2aa38
--- /dev/null
+++ b/rts/gmp/mpn/alpha/ev5/sub_n.asm
@@ -0,0 +1,143 @@
+dnl Alpha EV5 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0
+dnl and store difference in a third limb vector.
+
+dnl Copyright (C) 1995, 1999, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl s2_ptr r18
+dnl size r19
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+ bis r31,r31,r25 C clear cy
+ subq r19,4,r19 C decr loop cnt
+ blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop
+C Start software pipeline for 1st loop
+ ldq r0,0(r18)
+ ldq r4,0(r17)
+ ldq r1,8(r18)
+ ldq r5,8(r17)
+ addq r17,32,r17 C update s1_ptr
+ ldq r2,16(r18)
+ subq r4,r0,r20 C 1st main subtract
+ ldq r3,24(r18)
+ subq r19,4,r19 C decr loop cnt
+ ldq r6,-16(r17)
+ cmpult r4,r0,r25 C compute cy from last subtract
+ ldq r7,-8(r17)
+ subq r5,r1,r28 C 2nd main subtract
+ addq r18,32,r18 C update s2_ptr
+ subq r28,r25,r21 C 2nd carry subtract
+ cmpult r5,r1,r8 C compute cy from last subtract
+ blt r19,$Lend1 C if less than 4 limbs remain, jump
+C 1st loop handles groups of 4 limbs in a software pipeline
+ ALIGN(16)
+$Loop: cmpult r28,r25,r25 C compute cy from last subtract
+ ldq r0,0(r18)
+ bis r8,r25,r25 C combine cy from the two subtracts
+ ldq r1,8(r18)
+ subq r6,r2,r28 C 3rd main subtract
+ ldq r4,0(r17)
+ subq r28,r25,r22 C 3rd carry subtract
+ ldq r5,8(r17)
+ cmpult r6,r2,r8 C compute cy from last subtract
+ cmpult r28,r25,r25 C compute cy from last subtract
+ stq r20,0(r16)
+ bis r8,r25,r25 C combine cy from the two subtracts
+ stq r21,8(r16)
+ subq r7,r3,r28 C 4th main subtract
+ subq r28,r25,r23 C 4th carry subtract
+ cmpult r7,r3,r8 C compute cy from last subtract
+ cmpult r28,r25,r25 C compute cy from last subtract
+ addq r17,32,r17 C update s1_ptr
+ bis r8,r25,r25 C combine cy from the two subtracts
+ addq r16,32,r16 C update res_ptr
+ subq r4,r0,r28 C 1st main subtract
+ ldq r2,16(r18)
+ subq r28,r25,r20 C 1st carry subtract
+ ldq r3,24(r18)
+ cmpult r4,r0,r8 C compute cy from last subtract
+ ldq r6,-16(r17)
+ cmpult r28,r25,r25 C compute cy from last subtract
+ ldq r7,-8(r17)
+ bis r8,r25,r25 C combine cy from the two subtracts
+ subq r19,4,r19 C decr loop cnt
+ stq r22,-16(r16)
+ subq r5,r1,r28 C 2nd main subtract
+ stq r23,-8(r16)
+ subq r28,r25,r21 C 2nd carry subtract
+ addq r18,32,r18 C update s2_ptr
+ cmpult r5,r1,r8 C compute cy from last subtract
+ bge r19,$Loop
+C Finish software pipeline for 1st loop
+$Lend1: cmpult r28,r25,r25 C compute cy from last subtract
+ bis r8,r25,r25 C combine cy from the two subtracts
+ subq r6,r2,r28 C cy add
+ subq r28,r25,r22 C 3rd main subtract
+ cmpult r6,r2,r8 C compute cy from last subtract
+ cmpult r28,r25,r25 C compute cy from last subtract
+ stq r20,0(r16)
+ bis r8,r25,r25 C combine cy from the two subtracts
+ stq r21,8(r16)
+ subq r7,r3,r28 C cy add
+ subq r28,r25,r23 C 4th main subtract
+ cmpult r7,r3,r8 C compute cy from last subtract
+ cmpult r28,r25,r25 C compute cy from last subtract
+ bis r8,r25,r25 C combine cy from the two subtracts
+ addq r16,32,r16 C update res_ptr
+ stq r22,-16(r16)
+ stq r23,-8(r16)
+$Lend2: addq r19,4,r19 C restore loop cnt
+ beq r19,$Lret
+C Start software pipeline for 2nd loop
+ ldq r0,0(r18)
+ ldq r4,0(r17)
+ subq r19,1,r19
+ beq r19,$Lend0
+C 2nd loop handles remaining 1-3 limbs
+ ALIGN(16)
+$Loop0: subq r4,r0,r28 C main subtract
+ cmpult r4,r0,r8 C compute cy from last subtract
+ ldq r0,8(r18)
+ ldq r4,8(r17)
+ subq r28,r25,r20 C carry subtract
+ addq r18,8,r18
+ addq r17,8,r17
+ stq r20,0(r16)
+ cmpult r28,r25,r25 C compute cy from last subtract
+ subq r19,1,r19 C decr loop cnt
+ bis r8,r25,r25 C combine cy from the two subtracts
+ addq r16,8,r16
+ bne r19,$Loop0
+$Lend0: subq r4,r0,r28 C main subtract
+ subq r28,r25,r20 C carry subtract
+ cmpult r4,r0,r8 C compute cy from last subtract
+ cmpult r28,r25,r25 C compute cy from last subtract
+ stq r20,0(r16)
+ bis r8,r25,r25 C combine cy from the two subtracts
+
+$Lret: bis r25,r31,r0 C return cy
+ ret r31,(r26),1
+EPILOGUE(mpn_sub_n)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/ev6/addmul_1.asm b/rts/gmp/mpn/alpha/ev6/addmul_1.asm
new file mode 100644
index 0000000000..2f588626a5
--- /dev/null
+++ b/rts/gmp/mpn/alpha/ev6/addmul_1.asm
@@ -0,0 +1,474 @@
+dnl Alpha ev6 mpn_addmul_1 -- Multiply a limb vector with a limb and add
+dnl the result to a second limb vector.
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl size r18
+dnl s2_limb r19
+
+dnl This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and
+dnl exactly 3.625 cycles/limb on EV6...
+
+dnl This code was written in close cooperation with ev6 pipeline expert
+dnl Steve Root (root@toober.hlo.dec.com). Any errors are tege's fault, though.
+dnl
+dnl Register usages for unrolled loop:
+dnl 0-3 mul's
+dnl 4-7 acc's
+dnl 8-15 mul results
+dnl 20,21 carry's
+dnl 22,23 save for stores
+
+dnl Sustains 8 mul-adds in 29 cycles in the unrolled inner loop.
+
+dnl The stores can issue a cycle late so we have paired no-op's to 'catch'
+dnl them, so that further disturbance to the schedule is damped.
+
+dnl We couldn't pair the loads, because the entangled schedule of the
+dnl carry's has to happen on one side {0} of the machine. Note, the total
+dnl use of U0, and the total use of L0 (after attending to the stores).
+dnl which is part of the reason why....
+
+dnl This is a great schedule for the d_cache, a poor schedule for the
+dnl b_cache. The lockup on U0 means that any stall can't be recovered
+dnl from. Consider a ldq in L1. say that load gets stalled because it
+dnl collides with a fill from the b_Cache. On the next cycle, this load
+dnl gets priority. If first looks at L0, and goes there. The instruction
+dnl we intended for L0 gets to look at L1, which is NOT where we want
+dnl it. It either stalls 1, because it can't go in L0, or goes there, and
+dnl causes a further instruction to stall.
+
+dnl So for b_cache, we're likely going to want to put one or more cycles
+dnl back into the code! And, of course, put in prefetches. For the
+dnl accumulator, lds, intent to modify. For the multiplier, you might
+dnl want ldq, evict next, if you're not wanting to use it again soon. Use
+dnl 256 ahead of present pointer value. At a place where we have an mt
+dnl followed by a bookkeeping, put the bookkeeping in upper, and the
+dnl prefetch into lower.
+
+dnl Note, the usage of physical registers per cycle is smoothed off, as
+dnl much as possible.
+
+dnl Note, the ldq's and stq's are at the end of the quadpacks. note, we'd
+dnl like not to have a ldq or stq to preceded a conditional branch in a
+dnl quadpack. The conditional branch moves the retire pointer one cycle
+dnl later.
+
+dnl Optimization notes:
+dnl Callee-saves regs: r9 r10 r11 r12 r13 r14 r15 r26 ?r27?
+dnl Reserved regs: r29 r30 r31
+dnl Free caller-saves regs in unrolled code: r24 r25 r28
+dnl We should swap some of the callee-saves regs for some of the free
+dnl caller-saves regs, saving some overhead cycles.
+dnl Most importantly, we should write fast code for the 0-7 case.
+dnl The code we use there are for the 21164, and runs at 7 cycles/limb
+dnl on the 21264. Should not be hard, if we write specialized code for
+dnl 1-7 limbs (the one for 0 limbs should be straightforward). We then just
+dnl need a jump table indexed by the low 3 bits of the count argument.
+
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ cmpult r18, 8, r1
+ beq r1, $Large
+
+ ldq r2, 0(r17) C r2 = s1_limb
+ addq r17, 8, r17 C s1_ptr++
+ subq r18, 1, r18 C size--
+ mulq r2, r19, r3 C r3 = prod_low
+ ldq r5, 0(r16) C r5 = *res_ptr
+ umulh r2, r19, r0 C r0 = prod_high
+ beq r18, $Lend0b C jump if size was == 1
+ ldq r2, 0(r17) C r2 = s1_limb
+ addq r17, 8, r17 C s1_ptr++
+ subq r18, 1, r18 C size--
+ addq r5, r3, r3
+ cmpult r3, r5, r4
+ stq r3, 0(r16)
+ addq r16, 8, r16 C res_ptr++
+ beq r18, $Lend0a C jump if size was == 2
+
+ ALIGN(8)
+$Loop0: mulq r2, r19, r3 C r3 = prod_low
+ ldq r5, 0(r16) C r5 = *res_ptr
+ addq r4, r0, r0 C cy_limb = cy_limb + 'cy'
+ subq r18, 1, r18 C size--
+ umulh r2, r19, r4 C r4 = cy_limb
+ ldq r2, 0(r17) C r2 = s1_limb
+ addq r17, 8, r17 C s1_ptr++
+ addq r3, r0, r3 C r3 = cy_limb + prod_low
+ cmpult r3, r0, r0 C r0 = carry from (cy_limb + prod_low)
+ addq r5, r3, r3
+ cmpult r3, r5, r5
+ stq r3, 0(r16)
+ addq r16, 8, r16 C res_ptr++
+ addq r5, r0, r0 C combine carries
+ bne r18, $Loop0
+$Lend0a:
+ mulq r2, r19, r3 C r3 = prod_low
+ ldq r5, 0(r16) C r5 = *res_ptr
+ addq r4, r0, r0 C cy_limb = cy_limb + 'cy'
+ umulh r2, r19, r4 C r4 = cy_limb
+ addq r3, r0, r3 C r3 = cy_limb + prod_low
+ cmpult r3, r0, r0 C r0 = carry from (cy_limb + prod_low)
+ addq r5, r3, r3
+ cmpult r3, r5, r5
+ stq r3, 0(r16)
+ addq r5, r0, r0 C combine carries
+ addq r4, r0, r0 C cy_limb = prod_high + cy
+ ret r31, (r26), 1
+$Lend0b:
+ addq r5, r3, r3
+ cmpult r3, r5, r5
+ stq r3, 0(r16)
+ addq r0, r5, r0
+ ret r31, (r26), 1
+
+$Large:
+ lda $30, -240($30)
+ stq $9, 8($30)
+ stq $10, 16($30)
+ stq $11, 24($30)
+ stq $12, 32($30)
+ stq $13, 40($30)
+ stq $14, 48($30)
+ stq $15, 56($30)
+
+ and r18, 7, r20 C count for the first loop, 0-7
+ srl r18, 3, r18 C count for unrolled loop
+ bis r31, r31, r0
+ beq r20, $Lunroll
+ ldq r2, 0(r17) C r2 = s1_limb
+ addq r17, 8, r17 C s1_ptr++
+ subq r20, 1, r20 C size--
+ mulq r2, r19, r3 C r3 = prod_low
+ ldq r5, 0(r16) C r5 = *res_ptr
+ umulh r2, r19, r0 C r0 = prod_high
+ beq r20, $Lend1b C jump if size was == 1
+ ldq r2, 0(r17) C r2 = s1_limb
+ addq r17, 8, r17 C s1_ptr++
+ subq r20, 1, r20 C size--
+ addq r5, r3, r3
+ cmpult r3, r5, r4
+ stq r3, 0(r16)
+ addq r16, 8, r16 C res_ptr++
+ beq r20, $Lend1a C jump if size was == 2
+
+ ALIGN(8)
+$Loop1: mulq r2, r19, r3 C r3 = prod_low
+ ldq r5, 0(r16) C r5 = *res_ptr
+ addq r4, r0, r0 C cy_limb = cy_limb + 'cy'
+ subq r20, 1, r20 C size--
+ umulh r2, r19, r4 C r4 = cy_limb
+ ldq r2, 0(r17) C r2 = s1_limb
+ addq r17, 8, r17 C s1_ptr++
+ addq r3, r0, r3 C r3 = cy_limb + prod_low
+ cmpult r3, r0, r0 C r0 = carry from (cy_limb + prod_low)
+ addq r5, r3, r3
+ cmpult r3, r5, r5
+ stq r3, 0(r16)
+ addq r16, 8, r16 C res_ptr++
+ addq r5, r0, r0 C combine carries
+ bne r20, $Loop1
+
+$Lend1a:
+ mulq r2, r19, r3 C r3 = prod_low
+ ldq r5, 0(r16) C r5 = *res_ptr
+ addq r4, r0, r0 C cy_limb = cy_limb + 'cy'
+ umulh r2, r19, r4 C r4 = cy_limb
+ addq r3, r0, r3 C r3 = cy_limb + prod_low
+ cmpult r3, r0, r0 C r0 = carry from (cy_limb + prod_low)
+ addq r5, r3, r3
+ cmpult r3, r5, r5
+ stq r3, 0(r16)
+ addq r16, 8, r16 C res_ptr++
+ addq r5, r0, r0 C combine carries
+ addq r4, r0, r0 C cy_limb = prod_high + cy
+ br r31, $Lunroll
+$Lend1b:
+ addq r5, r3, r3
+ cmpult r3, r5, r5
+ stq r3, 0(r16)
+ addq r16, 8, r16 C res_ptr++
+ addq r0, r5, r0
+
+$Lunroll:
+ lda r17, -16(r17) C L1 bookkeeping
+ lda r16, -16(r16) C L1 bookkeeping
+ bis r0, r31, r12
+
+C ____ UNROLLED LOOP SOFTWARE PIPELINE STARTUP ____
+
+ ldq r2, 16(r17) C L1
+ ldq r3, 24(r17) C L1
+ lda r18, -1(r18) C L1 bookkeeping
+ ldq r6, 16(r16) C L1
+ ldq r7, 24(r16) C L1
+ ldq r0, 32(r17) C L1
+ mulq r19, r2, r13 C U1
+ ldq r1, 40(r17) C L1
+ umulh r19, r2, r14 C U1
+ mulq r19, r3, r15 C U1
+ lda r17, 64(r17) C L1 bookkeeping
+ ldq r4, 32(r16) C L1
+ ldq r5, 40(r16) C L1
+ umulh r19, r3, r8 C U1
+ ldq r2, -16(r17) C L1
+ mulq r19, r0, r9 C U1
+ ldq r3, -8(r17) C L1
+ umulh r19, r0, r10 C U1
+ addq r6, r13, r6 C L0 lo + acc
+ mulq r19, r1, r11 C U1
+ cmpult r6, r13, r20 C L0 lo add => carry
+ lda r16, 64(r16) C L1 bookkeeping
+ addq r6, r12, r22 C U0 hi add => answer
+ cmpult r22, r12, r21 C L0 hi add => carry
+ addq r14, r20, r14 C U0 hi mul + carry
+ ldq r6, -16(r16) C L1
+ addq r7, r15, r23 C L0 lo + acc
+ addq r14, r21, r14 C U0 hi mul + carry
+ ldq r7, -8(r16) C L1
+ umulh r19, r1, r12 C U1
+ cmpult r23, r15, r20 C L0 lo add => carry
+ addq r23, r14, r23 C U0 hi add => answer
+ ldq r0, 0(r17) C L1
+ mulq r19, r2, r13 C U1
+ cmpult r23, r14, r21 C L0 hi add => carry
+ addq r8, r20, r8 C U0 hi mul + carry
+ ldq r1, 8(r17) C L1
+ umulh r19, r2, r14 C U1
+ addq r4, r9, r4 C L0 lo + acc
+ stq r22, -48(r16) C L0
+ stq r23, -40(r16) C L1
+ mulq r19, r3, r15 C U1
+ addq r8, r21, r8 C U0 hi mul + carry
+ cmpult r4, r9, r20 C L0 lo add => carry
+ addq r4, r8, r22 C U0 hi add => answer
+ ble r18, $Lend C U1 bookkeeping
+
+C ____ MAIN UNROLLED LOOP ____
+ ALIGN(16)
+$Loop:
+ bis r31, r31, r31 C U1 mt
+ cmpult r22, r8, r21 C L0 hi add => carry
+ addq r10, r20, r10 C U0 hi mul + carry
+ ldq r4, 0(r16) C L1
+
+ bis r31, r31, r31 C U1 mt
+ addq r5, r11, r23 C L0 lo + acc
+ addq r10, r21, r10 C L0 hi mul + carry
+ ldq r5, 8(r16) C L1
+
+ umulh r19, r3, r8 C U1
+ cmpult r23, r11, r20 C L0 lo add => carry
+ addq r23, r10, r23 C U0 hi add => answer
+ ldq r2, 16(r17) C L1
+
+ mulq r19, r0, r9 C U1
+ cmpult r23, r10, r21 C L0 hi add => carry
+ addq r12, r20, r12 C U0 hi mul + carry
+ ldq r3, 24(r17) C L1
+
+ umulh r19, r0, r10 C U1
+ addq r6, r13, r6 C L0 lo + acc
+ stq r22, -32(r16) C L0
+ stq r23, -24(r16) C L1
+
+ bis r31, r31, r31 C L0 st slosh
+ mulq r19, r1, r11 C U1
+ bis r31, r31, r31 C L1 st slosh
+ addq r12, r21, r12 C U0 hi mul + carry
+
+ cmpult r6, r13, r20 C L0 lo add => carry
+ bis r31, r31, r31 C U1 mt
+ lda r18, -1(r18) C L1 bookkeeping
+ addq r6, r12, r22 C U0 hi add => answer
+
+ bis r31, r31, r31 C U1 mt
+ cmpult r22, r12, r21 C L0 hi add => carry
+ addq r14, r20, r14 C U0 hi mul + carry
+ ldq r6, 16(r16) C L1
+
+ bis r31, r31, r31 C U1 mt
+ addq r7, r15, r23 C L0 lo + acc
+ addq r14, r21, r14 C U0 hi mul + carry
+ ldq r7, 24(r16) C L1
+
+ umulh r19, r1, r12 C U1
+ cmpult r23, r15, r20 C L0 lo add => carry
+ addq r23, r14, r23 C U0 hi add => answer
+ ldq r0, 32(r17) C L1
+
+ mulq r19, r2, r13 C U1
+ cmpult r23, r14, r21 C L0 hi add => carry
+ addq r8, r20, r8 C U0 hi mul + carry
+ ldq r1, 40(r17) C L1
+
+ umulh r19, r2, r14 C U1
+ addq r4, r9, r4 C U0 lo + acc
+ stq r22, -16(r16) C L0
+ stq r23, -8(r16) C L1
+
+ bis r31, r31, r31 C L0 st slosh
+ mulq r19, r3, r15 C U1
+ bis r31, r31, r31 C L1 st slosh
+ addq r8, r21, r8 C L0 hi mul + carry
+
+ cmpult r4, r9, r20 C L0 lo add => carry
+ bis r31, r31, r31 C U1 mt
+ lda r17, 64(r17) C L1 bookkeeping
+ addq r4, r8, r22 C U0 hi add => answer
+
+ bis r31, r31, r31 C U1 mt
+ cmpult r22, r8, r21 C L0 hi add => carry
+ addq r10, r20, r10 C U0 hi mul + carry
+ ldq r4, 32(r16) C L1
+
+ bis r31, r31, r31 C U1 mt
+ addq r5, r11, r23 C L0 lo + acc
+ addq r10, r21, r10 C L0 hi mul + carry
+ ldq r5, 40(r16) C L1
+
+ umulh r19, r3, r8 C U1
+ cmpult r23, r11, r20 C L0 lo add => carry
+ addq r23, r10, r23 C U0 hi add => answer
+ ldq r2, -16(r17) C L1
+
+ mulq r19, r0, r9 C U1
+ cmpult r23, r10, r21 C L0 hi add => carry
+ addq r12, r20, r12 C U0 hi mul + carry
+ ldq r3, -8(r17) C L1
+
+ umulh r19, r0, r10 C U1
+ addq r6, r13, r6 C L0 lo + acc
+ stq r22, 0(r16) C L0
+ stq r23, 8(r16) C L1
+
+ bis r31, r31, r31 C L0 st slosh
+ mulq r19, r1, r11 C U1
+ bis r31, r31, r31 C L1 st slosh
+ addq r12, r21, r12 C U0 hi mul + carry
+
+ cmpult r6, r13, r20 C L0 lo add => carry
+ bis r31, r31, r31 C U1 mt
+ lda r16, 64(r16) C L1 bookkeeping
+ addq r6, r12, r22 C U0 hi add => answer
+
+ bis r31, r31, r31 C U1 mt
+ cmpult r22, r12, r21 C L0 hi add => carry
+ addq r14, r20, r14 C U0 hi mul + carry
+ ldq r6, -16(r16) C L1
+
+ bis r31, r31, r31 C U1 mt
+ addq r7, r15, r23 C L0 lo + acc
+ addq r14, r21, r14 C U0 hi mul + carry
+ ldq r7, -8(r16) C L1
+
+ umulh r19, r1, r12 C U1
+ cmpult r23, r15, r20 C L0 lo add => carry
+ addq r23, r14, r23 C U0 hi add => answer
+ ldq r0, 0(r17) C L1
+
+ mulq r19, r2, r13 C U1
+ cmpult r23, r14, r21 C L0 hi add => carry
+ addq r8, r20, r8 C U0 hi mul + carry
+ ldq r1, 8(r17) C L1
+
+ umulh r19, r2, r14 C U1
+ addq r4, r9, r4 C L0 lo + acc
+ stq r22, -48(r16) C L0
+ stq r23, -40(r16) C L1
+
+ bis r31, r31, r31 C L0 st slosh
+ mulq r19, r3, r15 C U1
+ bis r31, r31, r31 C L1 st slosh
+ addq r8, r21, r8 C U0 hi mul + carry
+
+ cmpult r4, r9, r20 C L0 lo add => carry
+ addq r4, r8, r22 C U0 hi add => answer
+ bis r31, r31, r31 C L1 mt
+ bgt r18, $Loop C U1 bookkeeping
+
+C ____ UNROLLED LOOP SOFTWARE PIPELINE FINISH ____
+$Lend:
+ cmpult r22, r8, r21 C L0 hi add => carry
+ addq r10, r20, r10 C U0 hi mul + carry
+ ldq r4, 0(r16) C L1
+ addq r5, r11, r23 C L0 lo + acc
+ addq r10, r21, r10 C L0 hi mul + carry
+ ldq r5, 8(r16) C L1
+ umulh r19, r3, r8 C U1
+ cmpult r23, r11, r20 C L0 lo add => carry
+ addq r23, r10, r23 C U0 hi add => answer
+ mulq r19, r0, r9 C U1
+ cmpult r23, r10, r21 C L0 hi add => carry
+ addq r12, r20, r12 C U0 hi mul + carry
+ umulh r19, r0, r10 C U1
+ addq r6, r13, r6 C L0 lo + acc
+ stq r22, -32(r16) C L0
+ stq r23, -24(r16) C L1
+ mulq r19, r1, r11 C U1
+ addq r12, r21, r12 C U0 hi mul + carry
+ cmpult r6, r13, r20 C L0 lo add => carry
+ addq r6, r12, r22 C U0 hi add => answer
+ cmpult r22, r12, r21 C L0 hi add => carry
+ addq r14, r20, r14 C U0 hi mul + carry
+ addq r7, r15, r23 C L0 lo + acc
+ addq r14, r21, r14 C U0 hi mul + carry
+ umulh r19, r1, r12 C U1
+ cmpult r23, r15, r20 C L0 lo add => carry
+ addq r23, r14, r23 C U0 hi add => answer
+ cmpult r23, r14, r21 C L0 hi add => carry
+ addq r8, r20, r8 C U0 hi mul + carry
+ addq r4, r9, r4 C U0 lo + acc
+ stq r22, -16(r16) C L0
+ stq r23, -8(r16) C L1
+ bis r31, r31, r31 C L0 st slosh
+ addq r8, r21, r8 C L0 hi mul + carry
+ cmpult r4, r9, r20 C L0 lo add => carry
+ addq r4, r8, r22 C U0 hi add => answer
+ cmpult r22, r8, r21 C L0 hi add => carry
+ addq r10, r20, r10 C U0 hi mul + carry
+ addq r5, r11, r23 C L0 lo + acc
+ addq r10, r21, r10 C L0 hi mul + carry
+ cmpult r23, r11, r20 C L0 lo add => carry
+ addq r23, r10, r23 C U0 hi add => answer
+ cmpult r23, r10, r21 C L0 hi add => carry
+ addq r12, r20, r12 C U0 hi mul + carry
+ stq r22, 0(r16) C L0
+ stq r23, 8(r16) C L1
+ addq r12, r21, r0 C U0 hi mul + carry
+
+ ldq $9, 8($30)
+ ldq $10, 16($30)
+ ldq $11, 24($30)
+ ldq $12, 32($30)
+ ldq $13, 40($30)
+ ldq $14, 48($30)
+ ldq $15, 56($30)
+ lda $30, 240($30)
+ ret r31, (r26), 1
+EPILOGUE(mpn_addmul_1)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/ev6/gmp-mparam.h b/rts/gmp/mpn/alpha/ev6/gmp-mparam.h
new file mode 100644
index 0000000000..7ea20577f8
--- /dev/null
+++ b/rts/gmp/mpn/alpha/ev6/gmp-mparam.h
@@ -0,0 +1,62 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* Generated by tuneup.c, 2000-08-02. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 47
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 70
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 94
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 101
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 33
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 70
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 29
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 46
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 33
+#endif
diff --git a/rts/gmp/mpn/alpha/gmp-mparam.h b/rts/gmp/mpn/alpha/gmp-mparam.h
new file mode 100644
index 0000000000..054ff2fe5f
--- /dev/null
+++ b/rts/gmp/mpn/alpha/gmp-mparam.h
@@ -0,0 +1,64 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* These values are for the 21164 family. The 21264 will require
+ different values, since it has such quick multiplication. */
+/* Generated by tuneup.c, 2000-07-19. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 22
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 53
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 31
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 47
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 64
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 98
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 17
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 4
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 4
+#endif
diff --git a/rts/gmp/mpn/alpha/invert_limb.asm b/rts/gmp/mpn/alpha/invert_limb.asm
new file mode 100644
index 0000000000..a921b32b3f
--- /dev/null
+++ b/rts/gmp/mpn/alpha/invert_limb.asm
@@ -0,0 +1,345 @@
+dnl Alpha mpn_invert_limb -- Invert a normalized limb.
+
+dnl Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+dnl
+dnl This is based on sophie:/gmp-stuff/dbg-inv-limb.c.
+dnl The ideas are due to Peter L. Montgomery
+dnl
+dnl The table below uses 4096 bytes. The file mentioned above has an
+dnl alternative function that doesn't require the table, but it runs 50%
+dnl slower than this.
+
+include(`../config.m4')
+
+ASM_START()
+
+FLOAT64($C36,9223372036854775808.0) C 2^63
+
+PROLOGUE_GP(mpn_invert_limb)
+ lda r30,-16(r30)
+ addq r16,r16,r1
+ bne r1,$73
+ lda r0,-1
+ br r31,$Lend
+$73:
+ srl r16,1,r1
+ stq r1,0(r30)
+ ldt f11,0(r30)
+ cvtqt f11,f1
+ lda r1,$C36
+ ldt f10,0(r1)
+ divt f10,f1,f10
+ lda r2,$invtab-4096
+ srl r16,52,r1
+ addq r1,r1,r1
+ addq r1,r2,r1
+ bic r1,6,r2
+ ldq r2,0(r2)
+ bic r1,1,r1
+ extwl r2,r1,r2
+ sll r2,48,r0
+ umulh r16,r0,r1
+ addq r16,r1,r3
+ stq r3,0(r30)
+ ldt f11,0(r30)
+ cvtqt f11,f1
+ mult f1,f10,f1
+ cvttqc f1,f1
+ stt f1,0(r30)
+ ldq r4,0(r30)
+ subq r0,r4,r0
+ umulh r16,r0,r1
+ mulq r16,r0,r2
+ addq r16,r1,r3
+ bge r3,$Loop2
+$Loop1: addq r2,r16,r2
+ cmpult r2,r16,r1
+ addq r3,r1,r3
+ addq r0,1,r0
+ blt r3,$Loop1
+$Loop2: cmpult r2,r16,r1
+ subq r0,1,r0
+ subq r3,r1,r3
+ subq r2,r16,r2
+ bge r3,$Loop2
+$Lend:
+ lda r30,16(r30)
+ ret r31,(r26),1
+EPILOGUE(mpn_invert_limb)
+DATASTART(`$invtab',4)
+ .word 0xffff,0xffc0,0xff80,0xff40,0xff00,0xfec0,0xfe81,0xfe41
+ .word 0xfe01,0xfdc2,0xfd83,0xfd43,0xfd04,0xfcc5,0xfc86,0xfc46
+ .word 0xfc07,0xfbc8,0xfb8a,0xfb4b,0xfb0c,0xfacd,0xfa8e,0xfa50
+ .word 0xfa11,0xf9d3,0xf994,0xf956,0xf918,0xf8d9,0xf89b,0xf85d
+ .word 0xf81f,0xf7e1,0xf7a3,0xf765,0xf727,0xf6ea,0xf6ac,0xf66e
+ .word 0xf631,0xf5f3,0xf5b6,0xf578,0xf53b,0xf4fd,0xf4c0,0xf483
+ .word 0xf446,0xf409,0xf3cc,0xf38f,0xf352,0xf315,0xf2d8,0xf29c
+ .word 0xf25f,0xf222,0xf1e6,0xf1a9,0xf16d,0xf130,0xf0f4,0xf0b8
+ .word 0xf07c,0xf03f,0xf003,0xefc7,0xef8b,0xef4f,0xef14,0xeed8
+ .word 0xee9c,0xee60,0xee25,0xede9,0xedae,0xed72,0xed37,0xecfb
+ .word 0xecc0,0xec85,0xec4a,0xec0e,0xebd3,0xeb98,0xeb5d,0xeb22
+ .word 0xeae8,0xeaad,0xea72,0xea37,0xe9fd,0xe9c2,0xe988,0xe94d
+ .word 0xe913,0xe8d8,0xe89e,0xe864,0xe829,0xe7ef,0xe7b5,0xe77b
+ .word 0xe741,0xe707,0xe6cd,0xe694,0xe65a,0xe620,0xe5e6,0xe5ad
+ .word 0xe573,0xe53a,0xe500,0xe4c7,0xe48d,0xe454,0xe41b,0xe3e2
+ .word 0xe3a9,0xe370,0xe336,0xe2fd,0xe2c5,0xe28c,0xe253,0xe21a
+ .word 0xe1e1,0xe1a9,0xe170,0xe138,0xe0ff,0xe0c7,0xe08e,0xe056
+ .word 0xe01e,0xdfe5,0xdfad,0xdf75,0xdf3d,0xdf05,0xdecd,0xde95
+ .word 0xde5d,0xde25,0xdded,0xddb6,0xdd7e,0xdd46,0xdd0f,0xdcd7
+ .word 0xdca0,0xdc68,0xdc31,0xdbf9,0xdbc2,0xdb8b,0xdb54,0xdb1d
+ .word 0xdae6,0xdaae,0xda78,0xda41,0xda0a,0xd9d3,0xd99c,0xd965
+ .word 0xd92f,0xd8f8,0xd8c1,0xd88b,0xd854,0xd81e,0xd7e8,0xd7b1
+ .word 0xd77b,0xd745,0xd70e,0xd6d8,0xd6a2,0xd66c,0xd636,0xd600
+ .word 0xd5ca,0xd594,0xd55f,0xd529,0xd4f3,0xd4bd,0xd488,0xd452
+ .word 0xd41d,0xd3e7,0xd3b2,0xd37c,0xd347,0xd312,0xd2dd,0xd2a7
+ .word 0xd272,0xd23d,0xd208,0xd1d3,0xd19e,0xd169,0xd134,0xd100
+ .word 0xd0cb,0xd096,0xd061,0xd02d,0xcff8,0xcfc4,0xcf8f,0xcf5b
+ .word 0xcf26,0xcef2,0xcebe,0xce89,0xce55,0xce21,0xcded,0xcdb9
+ .word 0xcd85,0xcd51,0xcd1d,0xcce9,0xccb5,0xcc81,0xcc4e,0xcc1a
+ .word 0xcbe6,0xcbb3,0xcb7f,0xcb4c,0xcb18,0xcae5,0xcab1,0xca7e
+ .word 0xca4b,0xca17,0xc9e4,0xc9b1,0xc97e,0xc94b,0xc918,0xc8e5
+ .word 0xc8b2,0xc87f,0xc84c,0xc819,0xc7e7,0xc7b4,0xc781,0xc74f
+ .word 0xc71c,0xc6e9,0xc6b7,0xc684,0xc652,0xc620,0xc5ed,0xc5bb
+ .word 0xc589,0xc557,0xc524,0xc4f2,0xc4c0,0xc48e,0xc45c,0xc42a
+ .word 0xc3f8,0xc3c7,0xc395,0xc363,0xc331,0xc300,0xc2ce,0xc29c
+ .word 0xc26b,0xc239,0xc208,0xc1d6,0xc1a5,0xc174,0xc142,0xc111
+ .word 0xc0e0,0xc0af,0xc07e,0xc04d,0xc01c,0xbfeb,0xbfba,0xbf89
+ .word 0xbf58,0xbf27,0xbef6,0xbec5,0xbe95,0xbe64,0xbe33,0xbe03
+ .word 0xbdd2,0xbda2,0xbd71,0xbd41,0xbd10,0xbce0,0xbcb0,0xbc80
+ .word 0xbc4f,0xbc1f,0xbbef,0xbbbf,0xbb8f,0xbb5f,0xbb2f,0xbaff
+ .word 0xbacf,0xba9f,0xba6f,0xba40,0xba10,0xb9e0,0xb9b1,0xb981
+ .word 0xb951,0xb922,0xb8f2,0xb8c3,0xb894,0xb864,0xb835,0xb806
+ .word 0xb7d6,0xb7a7,0xb778,0xb749,0xb71a,0xb6eb,0xb6bc,0xb68d
+ .word 0xb65e,0xb62f,0xb600,0xb5d1,0xb5a2,0xb574,0xb545,0xb516
+ .word 0xb4e8,0xb4b9,0xb48a,0xb45c,0xb42e,0xb3ff,0xb3d1,0xb3a2
+ .word 0xb374,0xb346,0xb318,0xb2e9,0xb2bb,0xb28d,0xb25f,0xb231
+ .word 0xb203,0xb1d5,0xb1a7,0xb179,0xb14b,0xb11d,0xb0f0,0xb0c2
+ .word 0xb094,0xb067,0xb039,0xb00b,0xafde,0xafb0,0xaf83,0xaf55
+ .word 0xaf28,0xaefb,0xaecd,0xaea0,0xae73,0xae45,0xae18,0xadeb
+ .word 0xadbe,0xad91,0xad64,0xad37,0xad0a,0xacdd,0xacb0,0xac83
+ .word 0xac57,0xac2a,0xabfd,0xabd0,0xaba4,0xab77,0xab4a,0xab1e
+ .word 0xaaf1,0xaac5,0xaa98,0xaa6c,0xaa40,0xaa13,0xa9e7,0xa9bb
+ .word 0xa98e,0xa962,0xa936,0xa90a,0xa8de,0xa8b2,0xa886,0xa85a
+ .word 0xa82e,0xa802,0xa7d6,0xa7aa,0xa77e,0xa753,0xa727,0xa6fb
+ .word 0xa6d0,0xa6a4,0xa678,0xa64d,0xa621,0xa5f6,0xa5ca,0xa59f
+ .word 0xa574,0xa548,0xa51d,0xa4f2,0xa4c6,0xa49b,0xa470,0xa445
+ .word 0xa41a,0xa3ef,0xa3c4,0xa399,0xa36e,0xa343,0xa318,0xa2ed
+ .word 0xa2c2,0xa297,0xa26d,0xa242,0xa217,0xa1ed,0xa1c2,0xa197
+ .word 0xa16d,0xa142,0xa118,0xa0ed,0xa0c3,0xa098,0xa06e,0xa044
+ .word 0xa01a,0x9fef,0x9fc5,0x9f9b,0x9f71,0x9f47,0x9f1c,0x9ef2
+ .word 0x9ec8,0x9e9e,0x9e74,0x9e4b,0x9e21,0x9df7,0x9dcd,0x9da3
+ .word 0x9d79,0x9d50,0x9d26,0x9cfc,0x9cd3,0x9ca9,0x9c80,0x9c56
+ .word 0x9c2d,0x9c03,0x9bda,0x9bb0,0x9b87,0x9b5e,0x9b34,0x9b0b
+ .word 0x9ae2,0x9ab9,0x9a8f,0x9a66,0x9a3d,0x9a14,0x99eb,0x99c2
+ .word 0x9999,0x9970,0x9947,0x991e,0x98f6,0x98cd,0x98a4,0x987b
+ .word 0x9852,0x982a,0x9801,0x97d8,0x97b0,0x9787,0x975f,0x9736
+ .word 0x970e,0x96e5,0x96bd,0x9695,0x966c,0x9644,0x961c,0x95f3
+ .word 0x95cb,0x95a3,0x957b,0x9553,0x952b,0x9503,0x94db,0x94b3
+ .word 0x948b,0x9463,0x943b,0x9413,0x93eb,0x93c3,0x939b,0x9374
+ .word 0x934c,0x9324,0x92fd,0x92d5,0x92ad,0x9286,0x925e,0x9237
+ .word 0x920f,0x91e8,0x91c0,0x9199,0x9172,0x914a,0x9123,0x90fc
+ .word 0x90d4,0x90ad,0x9086,0x905f,0x9038,0x9011,0x8fea,0x8fc3
+ .word 0x8f9c,0x8f75,0x8f4e,0x8f27,0x8f00,0x8ed9,0x8eb2,0x8e8b
+ .word 0x8e65,0x8e3e,0x8e17,0x8df1,0x8dca,0x8da3,0x8d7d,0x8d56
+ .word 0x8d30,0x8d09,0x8ce3,0x8cbc,0x8c96,0x8c6f,0x8c49,0x8c23
+ .word 0x8bfc,0x8bd6,0x8bb0,0x8b8a,0x8b64,0x8b3d,0x8b17,0x8af1
+ .word 0x8acb,0x8aa5,0x8a7f,0x8a59,0x8a33,0x8a0d,0x89e7,0x89c1
+ .word 0x899c,0x8976,0x8950,0x892a,0x8904,0x88df,0x88b9,0x8893
+ .word 0x886e,0x8848,0x8823,0x87fd,0x87d8,0x87b2,0x878d,0x8767
+ .word 0x8742,0x871d,0x86f7,0x86d2,0x86ad,0x8687,0x8662,0x863d
+ .word 0x8618,0x85f3,0x85ce,0x85a9,0x8583,0x855e,0x8539,0x8514
+ .word 0x84f0,0x84cb,0x84a6,0x8481,0x845c,0x8437,0x8412,0x83ee
+ .word 0x83c9,0x83a4,0x8380,0x835b,0x8336,0x8312,0x82ed,0x82c9
+ .word 0x82a4,0x8280,0x825b,0x8237,0x8212,0x81ee,0x81ca,0x81a5
+ .word 0x8181,0x815d,0x8138,0x8114,0x80f0,0x80cc,0x80a8,0x8084
+ .word 0x8060,0x803c,0x8018,0x7ff4,0x7fd0,0x7fac,0x7f88,0x7f64
+ .word 0x7f40,0x7f1c,0x7ef8,0x7ed4,0x7eb1,0x7e8d,0x7e69,0x7e45
+ .word 0x7e22,0x7dfe,0x7ddb,0x7db7,0x7d93,0x7d70,0x7d4c,0x7d29
+ .word 0x7d05,0x7ce2,0x7cbf,0x7c9b,0x7c78,0x7c55,0x7c31,0x7c0e
+ .word 0x7beb,0x7bc7,0x7ba4,0x7b81,0x7b5e,0x7b3b,0x7b18,0x7af5
+ .word 0x7ad2,0x7aaf,0x7a8c,0x7a69,0x7a46,0x7a23,0x7a00,0x79dd
+ .word 0x79ba,0x7997,0x7975,0x7952,0x792f,0x790c,0x78ea,0x78c7
+ .word 0x78a4,0x7882,0x785f,0x783c,0x781a,0x77f7,0x77d5,0x77b2
+ .word 0x7790,0x776e,0x774b,0x7729,0x7706,0x76e4,0x76c2,0x76a0
+ .word 0x767d,0x765b,0x7639,0x7617,0x75f5,0x75d2,0x75b0,0x758e
+ .word 0x756c,0x754a,0x7528,0x7506,0x74e4,0x74c2,0x74a0,0x747e
+ .word 0x745d,0x743b,0x7419,0x73f7,0x73d5,0x73b4,0x7392,0x7370
+ .word 0x734f,0x732d,0x730b,0x72ea,0x72c8,0x72a7,0x7285,0x7264
+ .word 0x7242,0x7221,0x71ff,0x71de,0x71bc,0x719b,0x717a,0x7158
+ .word 0x7137,0x7116,0x70f5,0x70d3,0x70b2,0x7091,0x7070,0x704f
+ .word 0x702e,0x700c,0x6feb,0x6fca,0x6fa9,0x6f88,0x6f67,0x6f46
+ .word 0x6f26,0x6f05,0x6ee4,0x6ec3,0x6ea2,0x6e81,0x6e60,0x6e40
+ .word 0x6e1f,0x6dfe,0x6dde,0x6dbd,0x6d9c,0x6d7c,0x6d5b,0x6d3a
+ .word 0x6d1a,0x6cf9,0x6cd9,0x6cb8,0x6c98,0x6c77,0x6c57,0x6c37
+ .word 0x6c16,0x6bf6,0x6bd6,0x6bb5,0x6b95,0x6b75,0x6b54,0x6b34
+ .word 0x6b14,0x6af4,0x6ad4,0x6ab4,0x6a94,0x6a73,0x6a53,0x6a33
+ .word 0x6a13,0x69f3,0x69d3,0x69b3,0x6993,0x6974,0x6954,0x6934
+ .word 0x6914,0x68f4,0x68d4,0x68b5,0x6895,0x6875,0x6855,0x6836
+ .word 0x6816,0x67f6,0x67d7,0x67b7,0x6798,0x6778,0x6758,0x6739
+ .word 0x6719,0x66fa,0x66db,0x66bb,0x669c,0x667c,0x665d,0x663e
+ .word 0x661e,0x65ff,0x65e0,0x65c0,0x65a1,0x6582,0x6563,0x6544
+ .word 0x6524,0x6505,0x64e6,0x64c7,0x64a8,0x6489,0x646a,0x644b
+ .word 0x642c,0x640d,0x63ee,0x63cf,0x63b0,0x6391,0x6373,0x6354
+ .word 0x6335,0x6316,0x62f7,0x62d9,0x62ba,0x629b,0x627c,0x625e
+ .word 0x623f,0x6221,0x6202,0x61e3,0x61c5,0x61a6,0x6188,0x6169
+ .word 0x614b,0x612c,0x610e,0x60ef,0x60d1,0x60b3,0x6094,0x6076
+ .word 0x6058,0x6039,0x601b,0x5ffd,0x5fdf,0x5fc0,0x5fa2,0x5f84
+ .word 0x5f66,0x5f48,0x5f2a,0x5f0b,0x5eed,0x5ecf,0x5eb1,0x5e93
+ .word 0x5e75,0x5e57,0x5e39,0x5e1b,0x5dfd,0x5de0,0x5dc2,0x5da4
+ .word 0x5d86,0x5d68,0x5d4a,0x5d2d,0x5d0f,0x5cf1,0x5cd3,0x5cb6
+ .word 0x5c98,0x5c7a,0x5c5d,0x5c3f,0x5c21,0x5c04,0x5be6,0x5bc9
+ .word 0x5bab,0x5b8e,0x5b70,0x5b53,0x5b35,0x5b18,0x5afb,0x5add
+ .word 0x5ac0,0x5aa2,0x5a85,0x5a68,0x5a4b,0x5a2d,0x5a10,0x59f3
+ .word 0x59d6,0x59b8,0x599b,0x597e,0x5961,0x5944,0x5927,0x590a
+ .word 0x58ed,0x58d0,0x58b3,0x5896,0x5879,0x585c,0x583f,0x5822
+ .word 0x5805,0x57e8,0x57cb,0x57ae,0x5791,0x5775,0x5758,0x573b
+ .word 0x571e,0x5702,0x56e5,0x56c8,0x56ac,0x568f,0x5672,0x5656
+ .word 0x5639,0x561c,0x5600,0x55e3,0x55c7,0x55aa,0x558e,0x5571
+ .word 0x5555,0x5538,0x551c,0x5500,0x54e3,0x54c7,0x54aa,0x548e
+ .word 0x5472,0x5456,0x5439,0x541d,0x5401,0x53e5,0x53c8,0x53ac
+ .word 0x5390,0x5374,0x5358,0x533c,0x5320,0x5304,0x52e8,0x52cb
+ .word 0x52af,0x5293,0x5277,0x525c,0x5240,0x5224,0x5208,0x51ec
+ .word 0x51d0,0x51b4,0x5198,0x517c,0x5161,0x5145,0x5129,0x510d
+ .word 0x50f2,0x50d6,0x50ba,0x509f,0x5083,0x5067,0x504c,0x5030
+ .word 0x5015,0x4ff9,0x4fdd,0x4fc2,0x4fa6,0x4f8b,0x4f6f,0x4f54
+ .word 0x4f38,0x4f1d,0x4f02,0x4ee6,0x4ecb,0x4eb0,0x4e94,0x4e79
+ .word 0x4e5e,0x4e42,0x4e27,0x4e0c,0x4df0,0x4dd5,0x4dba,0x4d9f
+ .word 0x4d84,0x4d69,0x4d4d,0x4d32,0x4d17,0x4cfc,0x4ce1,0x4cc6
+ .word 0x4cab,0x4c90,0x4c75,0x4c5a,0x4c3f,0x4c24,0x4c09,0x4bee
+ .word 0x4bd3,0x4bb9,0x4b9e,0x4b83,0x4b68,0x4b4d,0x4b32,0x4b18
+ .word 0x4afd,0x4ae2,0x4ac7,0x4aad,0x4a92,0x4a77,0x4a5d,0x4a42
+ .word 0x4a27,0x4a0d,0x49f2,0x49d8,0x49bd,0x49a3,0x4988,0x496e
+ .word 0x4953,0x4939,0x491e,0x4904,0x48e9,0x48cf,0x48b5,0x489a
+ .word 0x4880,0x4865,0x484b,0x4831,0x4817,0x47fc,0x47e2,0x47c8
+ .word 0x47ae,0x4793,0x4779,0x475f,0x4745,0x472b,0x4711,0x46f6
+ .word 0x46dc,0x46c2,0x46a8,0x468e,0x4674,0x465a,0x4640,0x4626
+ .word 0x460c,0x45f2,0x45d8,0x45be,0x45a5,0x458b,0x4571,0x4557
+ .word 0x453d,0x4523,0x4509,0x44f0,0x44d6,0x44bc,0x44a2,0x4489
+ .word 0x446f,0x4455,0x443c,0x4422,0x4408,0x43ef,0x43d5,0x43bc
+ .word 0x43a2,0x4388,0x436f,0x4355,0x433c,0x4322,0x4309,0x42ef
+ .word 0x42d6,0x42bc,0x42a3,0x428a,0x4270,0x4257,0x423d,0x4224
+ .word 0x420b,0x41f2,0x41d8,0x41bf,0x41a6,0x418c,0x4173,0x415a
+ .word 0x4141,0x4128,0x410e,0x40f5,0x40dc,0x40c3,0x40aa,0x4091
+ .word 0x4078,0x405f,0x4046,0x402d,0x4014,0x3ffb,0x3fe2,0x3fc9
+ .word 0x3fb0,0x3f97,0x3f7e,0x3f65,0x3f4c,0x3f33,0x3f1a,0x3f01
+ .word 0x3ee8,0x3ed0,0x3eb7,0x3e9e,0x3e85,0x3e6c,0x3e54,0x3e3b
+ .word 0x3e22,0x3e0a,0x3df1,0x3dd8,0x3dc0,0x3da7,0x3d8e,0x3d76
+ .word 0x3d5d,0x3d45,0x3d2c,0x3d13,0x3cfb,0x3ce2,0x3cca,0x3cb1
+ .word 0x3c99,0x3c80,0x3c68,0x3c50,0x3c37,0x3c1f,0x3c06,0x3bee
+ .word 0x3bd6,0x3bbd,0x3ba5,0x3b8d,0x3b74,0x3b5c,0x3b44,0x3b2b
+ .word 0x3b13,0x3afb,0x3ae3,0x3acb,0x3ab2,0x3a9a,0x3a82,0x3a6a
+ .word 0x3a52,0x3a3a,0x3a22,0x3a09,0x39f1,0x39d9,0x39c1,0x39a9
+ .word 0x3991,0x3979,0x3961,0x3949,0x3931,0x3919,0x3901,0x38ea
+ .word 0x38d2,0x38ba,0x38a2,0x388a,0x3872,0x385a,0x3843,0x382b
+ .word 0x3813,0x37fb,0x37e3,0x37cc,0x37b4,0x379c,0x3785,0x376d
+ .word 0x3755,0x373e,0x3726,0x370e,0x36f7,0x36df,0x36c8,0x36b0
+ .word 0x3698,0x3681,0x3669,0x3652,0x363a,0x3623,0x360b,0x35f4
+ .word 0x35dc,0x35c5,0x35ae,0x3596,0x357f,0x3567,0x3550,0x3539
+ .word 0x3521,0x350a,0x34f3,0x34db,0x34c4,0x34ad,0x3496,0x347e
+ .word 0x3467,0x3450,0x3439,0x3422,0x340a,0x33f3,0x33dc,0x33c5
+ .word 0x33ae,0x3397,0x3380,0x3368,0x3351,0x333a,0x3323,0x330c
+ .word 0x32f5,0x32de,0x32c7,0x32b0,0x3299,0x3282,0x326c,0x3255
+ .word 0x323e,0x3227,0x3210,0x31f9,0x31e2,0x31cb,0x31b5,0x319e
+ .word 0x3187,0x3170,0x3159,0x3143,0x312c,0x3115,0x30fe,0x30e8
+ .word 0x30d1,0x30ba,0x30a4,0x308d,0x3076,0x3060,0x3049,0x3033
+ .word 0x301c,0x3005,0x2fef,0x2fd8,0x2fc2,0x2fab,0x2f95,0x2f7e
+ .word 0x2f68,0x2f51,0x2f3b,0x2f24,0x2f0e,0x2ef8,0x2ee1,0x2ecb
+ .word 0x2eb4,0x2e9e,0x2e88,0x2e71,0x2e5b,0x2e45,0x2e2e,0x2e18
+ .word 0x2e02,0x2dec,0x2dd5,0x2dbf,0x2da9,0x2d93,0x2d7c,0x2d66
+ .word 0x2d50,0x2d3a,0x2d24,0x2d0e,0x2cf8,0x2ce1,0x2ccb,0x2cb5
+ .word 0x2c9f,0x2c89,0x2c73,0x2c5d,0x2c47,0x2c31,0x2c1b,0x2c05
+ .word 0x2bef,0x2bd9,0x2bc3,0x2bad,0x2b97,0x2b81,0x2b6c,0x2b56
+ .word 0x2b40,0x2b2a,0x2b14,0x2afe,0x2ae8,0x2ad3,0x2abd,0x2aa7
+ .word 0x2a91,0x2a7c,0x2a66,0x2a50,0x2a3a,0x2a25,0x2a0f,0x29f9
+ .word 0x29e4,0x29ce,0x29b8,0x29a3,0x298d,0x2977,0x2962,0x294c
+ .word 0x2937,0x2921,0x290c,0x28f6,0x28e0,0x28cb,0x28b5,0x28a0
+ .word 0x288b,0x2875,0x2860,0x284a,0x2835,0x281f,0x280a,0x27f5
+ .word 0x27df,0x27ca,0x27b4,0x279f,0x278a,0x2774,0x275f,0x274a
+ .word 0x2735,0x271f,0x270a,0x26f5,0x26e0,0x26ca,0x26b5,0x26a0
+ .word 0x268b,0x2676,0x2660,0x264b,0x2636,0x2621,0x260c,0x25f7
+ .word 0x25e2,0x25cd,0x25b8,0x25a2,0x258d,0x2578,0x2563,0x254e
+ .word 0x2539,0x2524,0x250f,0x24fa,0x24e5,0x24d1,0x24bc,0x24a7
+ .word 0x2492,0x247d,0x2468,0x2453,0x243e,0x2429,0x2415,0x2400
+ .word 0x23eb,0x23d6,0x23c1,0x23ad,0x2398,0x2383,0x236e,0x235a
+ .word 0x2345,0x2330,0x231c,0x2307,0x22f2,0x22dd,0x22c9,0x22b4
+ .word 0x22a0,0x228b,0x2276,0x2262,0x224d,0x2239,0x2224,0x2210
+ .word 0x21fb,0x21e6,0x21d2,0x21bd,0x21a9,0x2194,0x2180,0x216c
+ .word 0x2157,0x2143,0x212e,0x211a,0x2105,0x20f1,0x20dd,0x20c8
+ .word 0x20b4,0x20a0,0x208b,0x2077,0x2063,0x204e,0x203a,0x2026
+ .word 0x2012,0x1ffd,0x1fe9,0x1fd5,0x1fc1,0x1fac,0x1f98,0x1f84
+ .word 0x1f70,0x1f5c,0x1f47,0x1f33,0x1f1f,0x1f0b,0x1ef7,0x1ee3
+ .word 0x1ecf,0x1ebb,0x1ea7,0x1e93,0x1e7f,0x1e6a,0x1e56,0x1e42
+ .word 0x1e2e,0x1e1a,0x1e06,0x1df3,0x1ddf,0x1dcb,0x1db7,0x1da3
+ .word 0x1d8f,0x1d7b,0x1d67,0x1d53,0x1d3f,0x1d2b,0x1d18,0x1d04
+ .word 0x1cf0,0x1cdc,0x1cc8,0x1cb5,0x1ca1,0x1c8d,0x1c79,0x1c65
+ .word 0x1c52,0x1c3e,0x1c2a,0x1c17,0x1c03,0x1bef,0x1bdb,0x1bc8
+ .word 0x1bb4,0x1ba0,0x1b8d,0x1b79,0x1b66,0x1b52,0x1b3e,0x1b2b
+ .word 0x1b17,0x1b04,0x1af0,0x1add,0x1ac9,0x1ab6,0x1aa2,0x1a8f
+ .word 0x1a7b,0x1a68,0x1a54,0x1a41,0x1a2d,0x1a1a,0x1a06,0x19f3
+ .word 0x19e0,0x19cc,0x19b9,0x19a5,0x1992,0x197f,0x196b,0x1958
+ .word 0x1945,0x1931,0x191e,0x190b,0x18f8,0x18e4,0x18d1,0x18be
+ .word 0x18ab,0x1897,0x1884,0x1871,0x185e,0x184b,0x1837,0x1824
+ .word 0x1811,0x17fe,0x17eb,0x17d8,0x17c4,0x17b1,0x179e,0x178b
+ .word 0x1778,0x1765,0x1752,0x173f,0x172c,0x1719,0x1706,0x16f3
+ .word 0x16e0,0x16cd,0x16ba,0x16a7,0x1694,0x1681,0x166e,0x165b
+ .word 0x1648,0x1635,0x1623,0x1610,0x15fd,0x15ea,0x15d7,0x15c4
+ .word 0x15b1,0x159f,0x158c,0x1579,0x1566,0x1553,0x1541,0x152e
+ .word 0x151b,0x1508,0x14f6,0x14e3,0x14d0,0x14bd,0x14ab,0x1498
+ .word 0x1485,0x1473,0x1460,0x144d,0x143b,0x1428,0x1416,0x1403
+ .word 0x13f0,0x13de,0x13cb,0x13b9,0x13a6,0x1394,0x1381,0x136f
+ .word 0x135c,0x1349,0x1337,0x1325,0x1312,0x1300,0x12ed,0x12db
+ .word 0x12c8,0x12b6,0x12a3,0x1291,0x127f,0x126c,0x125a,0x1247
+ .word 0x1235,0x1223,0x1210,0x11fe,0x11ec,0x11d9,0x11c7,0x11b5
+ .word 0x11a3,0x1190,0x117e,0x116c,0x1159,0x1147,0x1135,0x1123
+ .word 0x1111,0x10fe,0x10ec,0x10da,0x10c8,0x10b6,0x10a4,0x1091
+ .word 0x107f,0x106d,0x105b,0x1049,0x1037,0x1025,0x1013,0x1001
+ .word 0x0fef,0x0fdc,0x0fca,0x0fb8,0x0fa6,0x0f94,0x0f82,0x0f70
+ .word 0x0f5e,0x0f4c,0x0f3a,0x0f28,0x0f17,0x0f05,0x0ef3,0x0ee1
+ .word 0x0ecf,0x0ebd,0x0eab,0x0e99,0x0e87,0x0e75,0x0e64,0x0e52
+ .word 0x0e40,0x0e2e,0x0e1c,0x0e0a,0x0df9,0x0de7,0x0dd5,0x0dc3
+ .word 0x0db2,0x0da0,0x0d8e,0x0d7c,0x0d6b,0x0d59,0x0d47,0x0d35
+ .word 0x0d24,0x0d12,0x0d00,0x0cef,0x0cdd,0x0ccb,0x0cba,0x0ca8
+ .word 0x0c97,0x0c85,0x0c73,0x0c62,0x0c50,0x0c3f,0x0c2d,0x0c1c
+ .word 0x0c0a,0x0bf8,0x0be7,0x0bd5,0x0bc4,0x0bb2,0x0ba1,0x0b8f
+ .word 0x0b7e,0x0b6c,0x0b5b,0x0b4a,0x0b38,0x0b27,0x0b15,0x0b04
+ .word 0x0af2,0x0ae1,0x0ad0,0x0abe,0x0aad,0x0a9c,0x0a8a,0x0a79
+ .word 0x0a68,0x0a56,0x0a45,0x0a34,0x0a22,0x0a11,0x0a00,0x09ee
+ .word 0x09dd,0x09cc,0x09bb,0x09a9,0x0998,0x0987,0x0976,0x0965
+ .word 0x0953,0x0942,0x0931,0x0920,0x090f,0x08fe,0x08ec,0x08db
+ .word 0x08ca,0x08b9,0x08a8,0x0897,0x0886,0x0875,0x0864,0x0853
+ .word 0x0842,0x0831,0x081f,0x080e,0x07fd,0x07ec,0x07db,0x07ca
+ .word 0x07b9,0x07a8,0x0798,0x0787,0x0776,0x0765,0x0754,0x0743
+ .word 0x0732,0x0721,0x0710,0x06ff,0x06ee,0x06dd,0x06cd,0x06bc
+ .word 0x06ab,0x069a,0x0689,0x0678,0x0668,0x0657,0x0646,0x0635
+ .word 0x0624,0x0614,0x0603,0x05f2,0x05e1,0x05d1,0x05c0,0x05af
+ .word 0x059e,0x058e,0x057d,0x056c,0x055c,0x054b,0x053a,0x052a
+ .word 0x0519,0x0508,0x04f8,0x04e7,0x04d6,0x04c6,0x04b5,0x04a5
+ .word 0x0494,0x0484,0x0473,0x0462,0x0452,0x0441,0x0431,0x0420
+ .word 0x0410,0x03ff,0x03ef,0x03de,0x03ce,0x03bd,0x03ad,0x039c
+ .word 0x038c,0x037b,0x036b,0x035b,0x034a,0x033a,0x0329,0x0319
+ .word 0x0309,0x02f8,0x02e8,0x02d7,0x02c7,0x02b7,0x02a6,0x0296
+ .word 0x0286,0x0275,0x0265,0x0255,0x0245,0x0234,0x0224,0x0214
+ .word 0x0204,0x01f3,0x01e3,0x01d3,0x01c3,0x01b2,0x01a2,0x0192
+ .word 0x0182,0x0172,0x0161,0x0151,0x0141,0x0131,0x0121,0x0111
+ .word 0x0101,0x00f0,0x00e0,0x00d0,0x00c0,0x00b0,0x00a0,0x0090
+ .word 0x0080,0x0070,0x0060,0x0050,0x0040,0x0030,0x0020,0x0010
+DATAEND()
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/lshift.asm b/rts/gmp/mpn/alpha/lshift.asm
new file mode 100644
index 0000000000..87c46f6fe7
--- /dev/null
+++ b/rts/gmp/mpn/alpha/lshift.asm
@@ -0,0 +1,104 @@
+dnl Alpha mpn_lshift -- Shift a number left.
+
+dnl Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl size r18
+dnl cnt r19
+
+dnl This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling,
+dnl it would take 4 cycles/limb. It should be possible to get down to 3
+dnl cycles/limb since both ldq and stq can be paired with the other used
+dnl instructions. But there are many restrictions in the 21064 pipeline that
+dnl makes it hard, if not impossible, to get down to 3 cycles/limb:
+
+dnl 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
+dnl 2. Only aligned instruction pairs can be paired.
+dnl 3. The store buffer or silo might not be able to deal with the bandwidth.
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ s8addq r18,r17,r17 C make r17 point at end of s1
+ ldq r4,-8(r17) C load first limb
+ subq r17,8,r17
+ subq r31,r19,r7
+ s8addq r18,r16,r16 C make r16 point at end of RES
+ subq r18,1,r18
+ and r18,4-1,r20 C number of limbs in first loop
+ srl r4,r7,r0 C compute function result
+
+ beq r20,$L0
+ subq r18,r20,r18
+
+ ALIGN(8)
+$Loop0:
+ ldq r3,-8(r17)
+ subq r16,8,r16
+ subq r17,8,r17
+ subq r20,1,r20
+ sll r4,r19,r5
+ srl r3,r7,r6
+ bis r3,r3,r4
+ bis r5,r6,r8
+ stq r8,0(r16)
+ bne r20,$Loop0
+
+$L0: beq r18,$Lend
+
+ ALIGN(8)
+$Loop: ldq r3,-8(r17)
+ subq r16,32,r16
+ subq r18,4,r18
+ sll r4,r19,r5
+ srl r3,r7,r6
+
+ ldq r4,-16(r17)
+ sll r3,r19,r1
+ bis r5,r6,r8
+ stq r8,24(r16)
+ srl r4,r7,r2
+
+ ldq r3,-24(r17)
+ sll r4,r19,r5
+ bis r1,r2,r8
+ stq r8,16(r16)
+ srl r3,r7,r6
+
+ ldq r4,-32(r17)
+ sll r3,r19,r1
+ bis r5,r6,r8
+ stq r8,8(r16)
+ srl r4,r7,r2
+
+ subq r17,32,r17
+ bis r1,r2,r8
+ stq r8,0(r16)
+
+ bgt r18,$Loop
+
+$Lend: sll r4,r19,r8
+ stq r8,-8(r16)
+ ret r31,(r26),1
+EPILOGUE(mpn_lshift)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/mul_1.asm b/rts/gmp/mpn/alpha/mul_1.asm
new file mode 100644
index 0000000000..46b8df34f5
--- /dev/null
+++ b/rts/gmp/mpn/alpha/mul_1.asm
@@ -0,0 +1,71 @@
+dnl Alpha __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl the result in a second limb vector.
+
+dnl Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl size r18
+dnl s2_limb r19
+
+dnl This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and 7
+dnl cycles/limb on EV6.
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ ldq r2,0(r17) C r2 = s1_limb
+ subq r18,1,r18 C size--
+ mulq r2,r19,r3 C r3 = prod_low
+ bic r31,r31,r4 C clear cy_limb
+ umulh r2,r19,r0 C r0 = prod_high
+ beq r18,$Lend1 C jump if size was == 1
+ ldq r2,8(r17) C r2 = s1_limb
+ subq r18,1,r18 C size--
+ stq r3,0(r16)
+ beq r18,$Lend2 C jump if size was == 2
+
+ ALIGN(8)
+$Loop: mulq r2,r19,r3 C r3 = prod_low
+ addq r4,r0,r0 C cy_limb = cy_limb + 'cy'
+ subq r18,1,r18 C size--
+ umulh r2,r19,r4 C r4 = cy_limb
+ ldq r2,16(r17) C r2 = s1_limb
+ addq r17,8,r17 C s1_ptr++
+ addq r3,r0,r3 C r3 = cy_limb + prod_low
+ stq r3,8(r16)
+ cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low)
+ addq r16,8,r16 C res_ptr++
+ bne r18,$Loop
+
+$Lend2: mulq r2,r19,r3 C r3 = prod_low
+ addq r4,r0,r0 C cy_limb = cy_limb + 'cy'
+ umulh r2,r19,r4 C r4 = cy_limb
+ addq r3,r0,r3 C r3 = cy_limb + prod_low
+ cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low)
+ stq r3,8(r16)
+ addq r4,r0,r0 C cy_limb = prod_high + cy
+ ret r31,(r26),1
+$Lend1: stq r3,0(r16)
+ ret r31,(r26),1
+EPILOGUE(mpn_mul_1)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/rshift.asm b/rts/gmp/mpn/alpha/rshift.asm
new file mode 100644
index 0000000000..aa25eda54e
--- /dev/null
+++ b/rts/gmp/mpn/alpha/rshift.asm
@@ -0,0 +1,102 @@
+dnl Alpha mpn_rshift -- Shift a number right.
+
+dnl Copyright (C) 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl size r18
+dnl cnt r19
+
+dnl This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling,
+dnl it would take 4 cycles/limb. It should be possible to get down to 3
+dnl cycles/limb since both ldq and stq can be paired with the other used
+dnl instructions. But there are many restrictions in the 21064 pipeline that
+dnl makes it hard, if not impossible, to get down to 3 cycles/limb:
+
+dnl 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
+dnl 2. Only aligned instruction pairs can be paired.
+dnl 3. The store buffer or silo might not be able to deal with the bandwidth.
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ ldq r4,0(r17) C load first limb
+ addq r17,8,r17
+ subq r31,r19,r7
+ subq r18,1,r18
+ and r18,4-1,r20 C number of limbs in first loop
+ sll r4,r7,r0 C compute function result
+
+ beq r20,$L0
+ subq r18,r20,r18
+
+ ALIGN(8)
+$Loop0:
+ ldq r3,0(r17)
+ addq r16,8,r16
+ addq r17,8,r17
+ subq r20,1,r20
+ srl r4,r19,r5
+ sll r3,r7,r6
+ bis r3,r3,r4
+ bis r5,r6,r8
+ stq r8,-8(r16)
+ bne r20,$Loop0
+
+$L0: beq r18,$Lend
+
+ ALIGN(8)
+$Loop: ldq r3,0(r17)
+ addq r16,32,r16
+ subq r18,4,r18
+ srl r4,r19,r5
+ sll r3,r7,r6
+
+ ldq r4,8(r17)
+ srl r3,r19,r1
+ bis r5,r6,r8
+ stq r8,-32(r16)
+ sll r4,r7,r2
+
+ ldq r3,16(r17)
+ srl r4,r19,r5
+ bis r1,r2,r8
+ stq r8,-24(r16)
+ sll r3,r7,r6
+
+ ldq r4,24(r17)
+ srl r3,r19,r1
+ bis r5,r6,r8
+ stq r8,-16(r16)
+ sll r4,r7,r2
+
+ addq r17,32,r17
+ bis r1,r2,r8
+ stq r8,-8(r16)
+
+ bgt r18,$Loop
+
+$Lend: srl r4,r19,r8
+ stq r8,0(r16)
+ ret r31,(r26),1
+EPILOGUE(mpn_rshift)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/sub_n.asm b/rts/gmp/mpn/alpha/sub_n.asm
new file mode 100644
index 0000000000..718f657141
--- /dev/null
+++ b/rts/gmp/mpn/alpha/sub_n.asm
@@ -0,0 +1,114 @@
+dnl Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl store difference in a third limb vector.
+
+dnl Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl s2_ptr r18
+dnl size r19
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+ ldq r3,0(r17)
+ ldq r4,0(r18)
+
+ subq r19,1,r19
+ and r19,4-1,r2 C number of limbs in first loop
+ bis r31,r31,r0
+ beq r2,$L0 C if multiple of 4 limbs, skip first loop
+
+ subq r19,r2,r19
+
+$Loop0: subq r2,1,r2
+ ldq r5,8(r17)
+ addq r4,r0,r4
+ ldq r6,8(r18)
+ cmpult r4,r0,r1
+ subq r3,r4,r4
+ cmpult r3,r4,r0
+ stq r4,0(r16)
+ bis r0,r1,r0
+
+ addq r17,8,r17
+ addq r18,8,r18
+ bis r5,r5,r3
+ bis r6,r6,r4
+ addq r16,8,r16
+ bne r2,$Loop0
+
+$L0: beq r19,$Lend
+
+ ALIGN(8)
+$Loop: subq r19,4,r19
+
+ ldq r5,8(r17)
+ addq r4,r0,r4
+ ldq r6,8(r18)
+ cmpult r4,r0,r1
+ subq r3,r4,r4
+ cmpult r3,r4,r0
+ stq r4,0(r16)
+ bis r0,r1,r0
+
+ ldq r3,16(r17)
+ addq r6,r0,r6
+ ldq r4,16(r18)
+ cmpult r6,r0,r1
+ subq r5,r6,r6
+ cmpult r5,r6,r0
+ stq r6,8(r16)
+ bis r0,r1,r0
+
+ ldq r5,24(r17)
+ addq r4,r0,r4
+ ldq r6,24(r18)
+ cmpult r4,r0,r1
+ subq r3,r4,r4
+ cmpult r3,r4,r0
+ stq r4,16(r16)
+ bis r0,r1,r0
+
+ ldq r3,32(r17)
+ addq r6,r0,r6
+ ldq r4,32(r18)
+ cmpult r6,r0,r1
+ subq r5,r6,r6
+ cmpult r5,r6,r0
+ stq r6,24(r16)
+ bis r0,r1,r0
+
+ addq r17,32,r17
+ addq r18,32,r18
+ addq r16,32,r16
+ bne r19,$Loop
+
+$Lend: addq r4,r0,r4
+ cmpult r4,r0,r1
+ subq r3,r4,r4
+ cmpult r3,r4,r0
+ stq r4,0(r16)
+ bis r0,r1,r0
+ ret r31,(r26),1
+EPILOGUE(mpn_sub_n)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/submul_1.asm b/rts/gmp/mpn/alpha/submul_1.asm
new file mode 100644
index 0000000000..caec1a720b
--- /dev/null
+++ b/rts/gmp/mpn/alpha/submul_1.asm
@@ -0,0 +1,87 @@
+dnl Alpha __gmpn_submul_1 -- Multiply a limb vector with a limb and
+dnl subtract the result from a second limb vector.
+
+dnl Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl size r18
+dnl s2_limb r19
+
+dnl This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and 7
+dnl cycles/limb on EV6.
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ ldq r2,0(r17) C r2 = s1_limb
+ addq r17,8,r17 C s1_ptr++
+ subq r18,1,r18 C size--
+ mulq r2,r19,r3 C r3 = prod_low
+ ldq r5,0(r16) C r5 = *res_ptr
+ umulh r2,r19,r0 C r0 = prod_high
+ beq r18,$Lend1 C jump if size was == 1
+ ldq r2,0(r17) C r2 = s1_limb
+ addq r17,8,r17 C s1_ptr++
+ subq r18,1,r18 C size--
+ subq r5,r3,r3
+ cmpult r5,r3,r4
+ stq r3,0(r16)
+ addq r16,8,r16 C res_ptr++
+ beq r18,$Lend2 C jump if size was == 2
+
+ ALIGN(8)
+$Loop: mulq r2,r19,r3 C r3 = prod_low
+ ldq r5,0(r16) C r5 = *res_ptr
+ addq r4,r0,r0 C cy_limb = cy_limb + 'cy'
+ subq r18,1,r18 C size--
+ umulh r2,r19,r4 C r4 = cy_limb
+ ldq r2,0(r17) C r2 = s1_limb
+ addq r17,8,r17 C s1_ptr++
+ addq r3,r0,r3 C r3 = cy_limb + prod_low
+ cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low)
+ subq r5,r3,r3
+ cmpult r5,r3,r5
+ stq r3,0(r16)
+ addq r16,8,r16 C res_ptr++
+ addq r5,r0,r0 C combine carries
+ bne r18,$Loop
+
+$Lend2: mulq r2,r19,r3 C r3 = prod_low
+ ldq r5,0(r16) C r5 = *res_ptr
+ addq r4,r0,r0 C cy_limb = cy_limb + 'cy'
+ umulh r2,r19,r4 C r4 = cy_limb
+ addq r3,r0,r3 C r3 = cy_limb + prod_low
+ cmpult r3,r0,r0 C r0 = carry from (cy_limb + prod_low)
+ subq r5,r3,r3
+ cmpult r5,r3,r5
+ stq r3,0(r16)
+ addq r5,r0,r0 C combine carries
+ addq r4,r0,r0 C cy_limb = prod_high + cy
+ ret r31,(r26),1
+$Lend1: subq r5,r3,r3
+ cmpult r5,r3,r5
+ stq r3,0(r16)
+ addq r0,r5,r0
+ ret r31,(r26),1
+EPILOGUE(mpn_submul_1)
+ASM_END()
diff --git a/rts/gmp/mpn/alpha/udiv_qrnnd.S b/rts/gmp/mpn/alpha/udiv_qrnnd.S
new file mode 100644
index 0000000000..53814bbcb0
--- /dev/null
+++ b/rts/gmp/mpn/alpha/udiv_qrnnd.S
@@ -0,0 +1,151 @@
+ # Alpha 21064 __udiv_qrnnd
+
+ # Copyright (C) 1992, 1994, 1995, 1997, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __gmpn_udiv_qrnnd
+ .ent __gmpn_udiv_qrnnd
+__gmpn_udiv_qrnnd:
+ .frame $30,0,$26,0
+ .prologue 0
+#define cnt $2
+#define tmp $3
+#define rem_ptr $16
+#define n1 $17
+#define n0 $18
+#define d $19
+#define qb $20
+
+ ldiq cnt,16
+ blt d,.Largedivisor
+
+.Loop1: cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule d,n1,qb
+ subq n1,d,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule d,n1,qb
+ subq n1,d,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule d,n1,qb
+ subq n1,d,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule d,n1,qb
+ subq n1,d,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ subq cnt,1,cnt
+ bgt cnt,.Loop1
+ stq n1,0(rem_ptr)
+ bis $31,n0,$0
+ ret $31,($26),1
+
+.Largedivisor:
+ and n0,1,$4
+
+ srl n0,1,n0
+ sll n1,63,tmp
+ or tmp,n0,n0
+ srl n1,1,n1
+
+ and d,1,$6
+ srl d,1,$5
+ addq $5,$6,$5
+
+.Loop2: cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule $5,n1,qb
+ subq n1,$5,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule $5,n1,qb
+ subq n1,$5,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule $5,n1,qb
+ subq n1,$5,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule $5,n1,qb
+ subq n1,$5,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ subq cnt,1,cnt
+ bgt cnt,.Loop2
+
+ addq n1,n1,n1
+ addq $4,n1,n1
+ bne $6,.LOdd
+ stq n1,0(rem_ptr)
+ bis $31,n0,$0
+ ret $31,($26),1
+
+.LOdd:
+ /* q' in n0. r' in n1 */
+ addq n1,n0,n1
+ cmpult n1,n0,tmp # tmp := carry from addq
+ beq tmp,.LLp6
+ addq n0,1,n0
+ subq n1,d,n1
+.LLp6: cmpult n1,d,tmp
+ bne tmp,.LLp7
+ addq n0,1,n0
+ subq n1,d,n1
+.LLp7:
+ stq n1,0(rem_ptr)
+ bis $31,n0,$0
+ ret $31,($26),1
+
+ .end __gmpn_udiv_qrnnd
diff --git a/rts/gmp/mpn/alpha/umul.asm b/rts/gmp/mpn/alpha/umul.asm
new file mode 100644
index 0000000000..44428ed5f5
--- /dev/null
+++ b/rts/gmp/mpn/alpha/umul.asm
@@ -0,0 +1,39 @@
+dnl Currently unused.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+ .set noreorder
+ .set volatile
+ .set noat
+
+.text
+ .align 3
+ .globl __umul_ppmm
+ .ent __umul_ppmm
+__umul_ppmm:
+__umul_ppmm..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+ mulq $17,$18,$1
+ umulh $17,$18,$0
+ stq $1,0($16)
+ ret $31,($26),1
+ .end __umul_ppmm
diff --git a/rts/gmp/mpn/alpha/unicos.m4 b/rts/gmp/mpn/alpha/unicos.m4
new file mode 100644
index 0000000000..7ff26c090c
--- /dev/null
+++ b/rts/gmp/mpn/alpha/unicos.m4
@@ -0,0 +1,63 @@
+divert(-1)
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+define(`ASM_START',
+ `.ident dummy')
+
+define(`X',`^X$1')
+define(`FLOAT64',
+ `dnl
+ .psect $1@crud,data
+$1: .t_floating $2
+ .endp')
+
+define(`PROLOGUE',
+ `dnl
+ .stack 192 ; What does this mean? Only Cray knows.
+ .psect $1@code,code,cache
+$1::')
+define(`PROLOGUE_GP', `PROLOGUE($1)')
+
+define(`EPILOGUE',
+ `dnl
+ .endp')
+
+define(`DATASTART',
+ `dnl
+ .psect $1@crud,data
+$1:')
+define(`DATAEND',
+ `dnl
+ .endp')
+
+define(`ASM_END',
+ `dnl
+ .end')
+
+define(`unop',`bis r31,r31,r31') ; Unicos assembler lacks unop
+define(`cvttqc',`cvttq/c')
+
+define(`ALIGN',`') ; Unicos assembler seems to align using garbage
+
+divert
+
diff --git a/rts/gmp/mpn/arm/add_n.S b/rts/gmp/mpn/arm/add_n.S
new file mode 100644
index 0000000000..fb3f8f703b
--- /dev/null
+++ b/rts/gmp/mpn/arm/add_n.S
@@ -0,0 +1,77 @@
+@ ARM mpn_add -- Add two limb vectors of the same length > 0 and store sum in
+@ a third limb vector.
+@ Contributed by Robert Harley.
+
+@ Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+@ This file is part of the GNU MP Library.
+
+@ The GNU MP Library is free software; you can redistribute it and/or modify
+@ it under the terms of the GNU Lesser General Public License as published by
+@ the Free Software Foundation; either version 2.1 of the License, or (at your
+@ option) any later version.
+
+@ The GNU MP Library is distributed in the hope that it will be useful, but
+@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+@ License for more details.
+
+@ You should have received a copy of the GNU Lesser General Public License
+@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+@ MA 02111-1307, USA.
+
+#define s r0
+#define a r1
+#define b r2
+#define n r3
+
+#define sl r10
+#define fp r11
+#define ip r12
+#define sp r13
+#define lr r14
+#define pc r15
+
+.text
+ .align 0
+ .global __gmpn_add_n
+ .type __gmpn_add_n,%function
+__gmpn_add_n:
+ stmfd sp!, { r8, r9, lr }
+ movs n, n, lsr #1
+ bcc skip1
+ ldr ip, [a], #4
+ ldr lr, [b], #4
+ adds ip, ip, lr
+ str ip, [s], #4
+skip1:
+ tst n, #1
+ beq skip2
+ ldmia a!, { r8, r9 }
+ ldmia b!, { ip, lr }
+ adcs r8, r8, ip
+ adcs r9, r9, lr
+ stmia s!, { r8, r9 }
+skip2:
+ bics n, n, #1
+ beq return
+ stmfd sp!, { r4, r5, r6, r7 }
+add_n_loop:
+ ldmia a!, { r4, r5, r6, r7 }
+ ldmia b!, { r8, r9, ip, lr }
+ adcs r4, r4, r8
+ ldr r8, [s] /* Bring stuff into cache. */
+ adcs r5, r5, r9
+ adcs r6, r6, ip
+ adcs r7, r7, lr
+ stmia s!, { r4, r5, r6, r7 }
+ sub n, n, #2
+ teq n, #0
+ bne add_n_loop
+ ldmfd sp!, { r4, r5, r6, r7 }
+return:
+ adc r0, n, #0
+ ldmfd sp!, { r8, r9, pc }
+end:
+ .size __gmpn_add_n, end - __gmpn_add_n
diff --git a/rts/gmp/mpn/arm/addmul_1.S b/rts/gmp/mpn/arm/addmul_1.S
new file mode 100644
index 0000000000..396fff77a3
--- /dev/null
+++ b/rts/gmp/mpn/arm/addmul_1.S
@@ -0,0 +1,89 @@
+@ ARM mpn_mul_1 -- Multiply a limb vector with a limb and add the result to a
+@ second limb vector.
+@ Contributed by Robert Harley.
+
+@ Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+@ This file is part of the GNU MP Library.
+
+@ The GNU MP Library is free software; you can redistribute it and/or modify
+@ it under the terms of the GNU Lesser General Public License as published by
+@ the Free Software Foundation; either version 2.1 of the License, or (at your
+@ option) any later version.
+
+@ The GNU MP Library is distributed in the hope that it will be useful, but
+@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+@ License for more details.
+
+@ You should have received a copy of the GNU Lesser General Public License
+@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+@ MA 02111-1307, USA.
+
+#define p r0
+#define a r1
+#define n r2
+#define w r3
+
+#define z r11
+
+#define ip r12
+#define sp r13
+#define lr r14
+#define pc r15
+
+.text
+ .align 0
+ .global __gmpn_addmul_1
+ .type __gmpn_addmul_1,%function
+__gmpn_addmul_1:
+ stmfd sp!, { r8-r11, lr }
+ mov z, #0
+ mov ip, #0
+ movs n, n, lsr #1
+ bcc skip1
+ ldr lr, [a], #4
+ ldr r9, [p]
+ umlal r9, ip, w, lr
+ str r9, [p], #4
+skip1:
+ movs n, n, lsr #1
+ bcc skip2
+ ldmia p, { r9, r10 }
+ adds r8, ip, r9
+ adc r9, z, #0
+ ldmia a!, { ip, lr }
+ umlal r8, r9, w, ip
+ adds r9, r9, r10
+ adc ip, z, #0
+ umlal r9, ip, w, lr
+ stmia p!, { r8, r9 }
+skip2:
+ teq n, #0
+ beq return
+ stmfd sp!, { r4-r7 }
+addmul_loop:
+ ldmia p, { r5, r6, r7, r8 }
+ adds r4, ip, r5
+ adc r5, z, #0
+ ldmia a!, { r9, r10, ip, lr }
+ umlal r4, r5, w, r9
+ adds r5, r5, r6
+ adc r6, z, #0
+ umlal r5, r6, w, r10
+ adds r6, r6, r7
+ adc r7, z, #0
+ umlal r6, r7, w, ip
+ adds r7, r7, r8
+ adc ip, z, #0
+ umlal r7, ip, w, lr
+ subs n, n, #1
+ stmia p!, { r4, r5, r6, r7 }
+ bne addmul_loop
+ ldmfd sp!, { r4-r7 }
+return:
+ mov r0, ip
+ ldmfd sp!, { r8-r11, pc }
+end:
+ .size __gmpn_addmul_1, end - __gmpn_addmul_1
diff --git a/rts/gmp/mpn/arm/gmp-mparam.h b/rts/gmp/mpn/arm/gmp-mparam.h
new file mode 100644
index 0000000000..a35b0c7b66
--- /dev/null
+++ b/rts/gmp/mpn/arm/gmp-mparam.h
@@ -0,0 +1,34 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 21
+#endif
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 48
+#endif
diff --git a/rts/gmp/mpn/arm/mul_1.S b/rts/gmp/mpn/arm/mul_1.S
new file mode 100644
index 0000000000..bae526a0f0
--- /dev/null
+++ b/rts/gmp/mpn/arm/mul_1.S
@@ -0,0 +1,81 @@
+@ ARM mpn_addmul_1 -- Multiply a limb vector with a limb and store the result
+@ in a second limb vector.
+@ Contributed by Robert Harley.
+
+@ Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+@ This file is part of the GNU MP Library.
+
+@ The GNU MP Library is free software; you can redistribute it and/or modify
+@ it under the terms of the GNU Lesser General Public License as published by
+@ the Free Software Foundation; either version 2.1 of the License, or (at your
+@ option) any later version.
+
+@ The GNU MP Library is distributed in the hope that it will be useful, but
+@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+@ License for more details.
+
+@ You should have received a copy of the GNU Lesser General Public License
+@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+@ MA 02111-1307, USA.
+
+#define p r0
+#define a r1
+#define n r2
+#define w r3
+
+#define sl r10
+#define fp r11
+#define ip r12
+#define sp r13
+#define lr r14
+#define pc r15
+
+.text
+ .align 0
+ .global __gmpn_mul_1
+ .type __gmpn_mul_1,%function
+__gmpn_mul_1:
+ stmfd sp!, { r8, r9, lr }
+ ands ip, n, #1
+ beq skip1
+ ldr lr, [a], #4
+ umull r9, ip, w, lr
+ str r9, [p], #4
+skip1:
+ tst n, #2
+ beq skip2
+ mov r8, ip
+ ldmia a!, { ip, lr }
+ mov r9, #0
+ umlal r8, r9, w, ip
+ mov ip, #0
+ umlal r9, ip, w, lr
+ stmia p!, { r8, r9 }
+skip2:
+ bics n, n, #3
+ beq return
+ stmfd sp!, { r6, r7 }
+mul_1_loop:
+ mov r6, ip
+ ldmia a!, { r8, r9, ip, lr }
+ ldr r7, [p] /* Bring stuff into cache. */
+ mov r7, #0
+ umlal r6, r7, w, r8
+ mov r8, #0
+ umlal r7, r8, w, r9
+ mov r9, #0
+ umlal r8, r9, w, ip
+ mov ip, #0
+ umlal r9, ip, w, lr
+ subs n, n, #4
+ stmia p!, { r6, r7, r8, r9 }
+ bne mul_1_loop
+ ldmfd sp!, { r6, r7 }
+return:
+ mov r0, ip
+ ldmfd sp!, { r8, r9, pc }
+end:
+ .size __gmpn_mul_1, end - __gmpn_mul_1
diff --git a/rts/gmp/mpn/arm/sub_n.S b/rts/gmp/mpn/arm/sub_n.S
new file mode 100644
index 0000000000..856505fe21
--- /dev/null
+++ b/rts/gmp/mpn/arm/sub_n.S
@@ -0,0 +1,79 @@
+@ ARM mpn_sub -- Subtract two limb vectors of the same length > 0 and store
+@ difference in a third limb vector.
+@ Contributed by Robert Harley.
+
+@ Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+@ This file is part of the GNU MP Library.
+
+@ The GNU MP Library is free software; you can redistribute it and/or modify
+@ it under the terms of the GNU Lesser General Public License as published by
+@ the Free Software Foundation; either version 2.1 of the License, or (at your
+@ option) any later version.
+
+@ The GNU MP Library is distributed in the hope that it will be useful, but
+@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+@ License for more details.
+
+@ You should have received a copy of the GNU Lesser General Public License
+@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+@ MA 02111-1307, USA.
+
+#define d r0
+#define a r1
+#define b r2
+#define n r3
+
+#define sl r10
+#define fp r11
+#define ip r12
+#define sp r13
+#define lr r14
+#define pc r15
+
+.text
+ .align 0
+ .global __gmpn_sub_n
+ .type __gmpn_sub_n,%function
+__gmpn_sub_n:
+ stmfd sp!, { r8, r9, lr }
+ subs ip, ip, ip
+ tst n, #1
+ beq skip1
+ ldr ip, [a], #4
+ ldr lr, [b], #4
+ subs ip, ip, lr
+ str ip, [d], #4
+skip1:
+ tst n, #2
+ beq skip2
+ ldmia a!, { r8, r9 }
+ ldmia b!, { ip, lr }
+ sbcs r8, r8, ip
+ sbcs r9, r9, lr
+ stmia d!, { r8, r9 }
+skip2:
+ bics n, n, #3
+ beq return
+ stmfd sp!, { r4, r5, r6, r7 }
+sub_n_loop:
+ ldmia a!, { r4, r5, r6, r7 }
+ ldmia b!, { r8, r9, ip, lr }
+ sbcs r4, r4, r8
+ ldr r8, [d] /* Bring stuff into cache. */
+ sbcs r5, r5, r9
+ sbcs r6, r6, ip
+ sbcs r7, r7, lr
+ stmia d!, { r4, r5, r6, r7 }
+ sub n, n, #4
+ teq n, #0
+ bne sub_n_loop
+ ldmfd sp!, { r4, r5, r6, r7 }
+return:
+ sbc r0, r0, r0
+ and r0, r0, #1
+ ldmfd sp!, { r8, r9, pc }
+end:
+ .size __gmpn_sub_n, end - __gmpn_sub_n
diff --git a/rts/gmp/mpn/asm-defs.m4 b/rts/gmp/mpn/asm-defs.m4
new file mode 100644
index 0000000000..aa2024138b
--- /dev/null
+++ b/rts/gmp/mpn/asm-defs.m4
@@ -0,0 +1,1182 @@
+divert(-1)
+dnl
+dnl m4 macros for gmp assembly code, shared by all CPUs.
+dnl
+dnl These macros are designed for use with any m4 and have been used on
+dnl GNU, FreeBSD, OpenBSD and SysV.
+dnl
+dnl GNU m4 and OpenBSD 2.7 m4 will give filenames and line numbers in error
+dnl messages.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+dnl Macros:
+dnl
+dnl Most new m4 specific macros have an "m4_" prefix to emphasise they're
+dnl m4 expansions. But new defining things like deflit() and defreg() are
+dnl named like the builtin define(), and forloop() is named following the
+dnl GNU m4 example on which it's based.
+dnl
+dnl GNU m4 with the -P option uses "m4_" as a prefix for builtins, but that
+dnl option isn't going to be used, so there's no conflict or confusion.
+dnl
+dnl
+dnl Comments in output:
+dnl
+dnl The m4 comment delimiters are left at # and \n, the normal assembler
+dnl commenting for most CPUs. m4 passes comment text through without
+dnl expanding macros in it, which is generally a good thing since it stops
+dnl unexpected expansions and possible resultant errors.
+dnl
+dnl But note that when a quoted string is being read, a # isn't special, so
+dnl apostrophes in comments in quoted strings must be avoided or they'll be
+dnl interpreted as a closing quote mark. But when the quoted text is
+dnl re-read # will still act like a normal comment, supressing macro
+dnl expansion.
+dnl
+dnl For example,
+dnl
+dnl # apostrophes in comments that're outside quotes are ok
+dnl # and using macro names like PROLOGUE is ok too
+dnl ...
+dnl ifdef(`PIC',`
+dnl # but apostrophes aren't ok inside quotes
+dnl # ^--wrong
+dnl ...
+dnl # though macro names like PROLOGUE are still ok
+dnl ...
+dnl ')
+dnl
+dnl If macro expansion in a comment is wanted, use `#' in the .asm (ie. a
+dnl quoted hash symbol), which will turn into # in the .s but get
+dnl expansions done on that line. This can make the .s more readable to
+dnl humans, but it won't make a blind bit of difference to the assembler.
+dnl
+dnl All the above applies, mutatis mutandis, when changecom() is used to
+dnl select @ ! ; or whatever other commenting.
+dnl
+dnl
+dnl Variations in m4 affecting gmp:
+dnl
+dnl $# - When a macro is called as "foo" with no brackets, BSD m4 sets $#
+dnl to 1, whereas GNU or SysV m4 set it to 0. In all cases though
+dnl "foo()" sets $# to 1. This is worked around in various places.
+dnl
+dnl len() - When "len()" is given an empty argument, BSD m4 evaluates to
+dnl nothing, whereas GNU, SysV, and the new OpenBSD, evaluate to 0.
+dnl See m4_length() below which works around this.
+dnl
+dnl translit() - GNU m4 accepts character ranges like A-Z, and the new
+dnl OpenBSD m4 does under option -g, but basic BSD and SysV don't.
+dnl
+dnl popdef() - in BSD and SysV m4 popdef() takes multiple arguments and
+dnl pops each, but GNU m4 only takes one argument.
+dnl
+dnl push back - BSD m4 has some limits on the amount of text that can be
+dnl pushed back. The limit is reasonably big and so long as macros
+dnl don't gratuitously duplicate big arguments it isn't a problem.
+dnl Normally an error message is given, but sometimes it just hangs.
+dnl
+dnl eval() &,|,^ - GNU and SysV m4 have bitwise operators &,|,^ available,
+dnl but BSD m4 doesn't (contrary to what the man page suggests) and
+dnl instead ^ is exponentiation.
+dnl
+dnl eval() ?: - The C ternary operator "?:" is available in BSD m4, but not
+dnl in SysV or GNU m4 (as of GNU m4 1.4 and betas of 1.5).
+dnl
+dnl eval() -2^31 - BSD m4 has a bug where an eval() resulting in -2^31
+dnl (ie. -2147483648) gives "-(". Using -2147483648 within an
+dnl expression is ok, it just can't be a final result. "-(" will of
+dnl course upset parsing, with all sorts of strange effects.
+dnl
+dnl eval() <<,>> - SysV m4 doesn't support shift operators in eval() (on
+dnl SunOS 5.7 /usr/xpg4/m4 has them but /usr/ccs/m4 doesn't). See
+dnl m4_lshift() and m4_rshift() below for workarounds.
+dnl
+dnl m4wrap() - in BSD m4, m4wrap() replaces any previous m4wrap() string,
+dnl in SysV m4 it appends to it, and in GNU m4 it prepends. See
+dnl m4wrap_prepend() below which brings uniformity to this.
+dnl
+dnl __file__,__line__ - GNU m4 and OpenBSD 2.7 m4 provide these, and
+dnl they're used here to make error messages more informative. GNU m4
+dnl gives an unhelpful "NONE 0" in an m4wrap(), but that's worked
+dnl around.
+dnl
+dnl __file__ quoting - OpenBSD m4, unlike GNU m4, doesn't quote the
+dnl filename in __file__, so care should be taken that no macro has
+dnl the same name as a file, or an unwanted expansion will occur when
+dnl printing an error or warning.
+dnl
+dnl OpenBSD 2.6 m4 - this m4 rejects decimal constants containing an 8 or 9
+dnl in eval(), making it pretty much unusable. This bug is confined
+dnl to version 2.6 (it's not in 2.5, and has been fixed in 2.7).
+dnl
+dnl SunOS /usr/bin/m4 - this m4 lacks a number of desired features,
+dnl including $# and $@, defn(), m4exit(), m4wrap(), pushdef(),
+dnl popdef(). /usr/5bin/m4 is a SysV style m4 which should always be
+dnl available, and "configure" will reject /usr/bin/m4 in favour of
+dnl /usr/5bin/m4 (if necessary).
+dnl
+dnl The sparc code actually has modest m4 requirements currently and
+dnl could manage with /usr/bin/m4, but there's no reason to put our
+dnl macros through contortions when /usr/5bin/m4 is available or GNU
+dnl m4 can be installed.
+
+
+ifdef(`__ASM_DEFS_M4_INCLUDED__',
+`m4_error(`asm-defs.m4 already included, dont include it twice
+')m4exit(1)')
+define(`__ASM_DEFS_M4_INCLUDED__')
+
+
+dnl Detect and give a message about the unsuitable OpenBSD 2.6 m4.
+
+ifelse(eval(89),89,,
+`errprint(
+`This m4 doesnt accept 8 and/or 9 in constants in eval(), making it unusable.
+This is probably OpenBSD 2.6 m4 (September 1999). Upgrade to OpenBSD 2.7,
+or get a bug fix from the CVS (expr.c rev 1.9), or get GNU m4. Dont forget
+to configure with M4=/wherever/m4 if you install one of these in a directory
+not in $PATH.
+')m4exit(1)')
+
+
+dnl Detect and give a message about the unsuitable SunOS /usr/bin/m4.
+dnl
+dnl Unfortunately this test doesn't work when m4 is run in the normal way
+dnl from mpn/Makefile with "m4 -DOPERATION_foo foo.asm", since the bad m4
+dnl takes "-" in "-D..." to mean read stdin, so it will look like it just
+dnl hangs. But running "m4 asm-defs.m4" to try it out will work.
+dnl
+dnl We'd like to abort immediately on finding a problem, but unfortunately
+dnl the bad m4 doesn't have an m4exit(), nor does an invalid eval() kill
+dnl it. Unexpanded $#'s in some m4_assert_numargs() later on will comment
+dnl out some closing parentheses and kill it with "m4: arg stack overflow".
+
+define(m4_dollarhash_works_test,``$#'')
+ifelse(m4_dollarhash_works_test(x),1,,
+`errprint(
+`This m4 doesnt support $# and cant be used for GMP asm processing.
+If this is on SunOS, ./configure should choose /usr/5bin/m4 if you have that
+or can get it, otherwise install GNU m4. Dont forget to configure with
+M4=/wherever/m4 if you install in a directory not in $PATH.
+')')
+undefine(`m4_dollarhash_works_test')
+
+
+dnl --------------------------------------------------------------------------
+dnl Basic error handling things.
+
+
+dnl Usage: m4_dollarhash_1_if_noparen_p
+dnl
+dnl Expand to 1 if a call "foo" gives $# set to 1 (as opposed to 0 like GNU
+dnl and SysV m4 give).
+
+define(m4_dollarhash_1_if_noparen_test,`$#')
+define(m4_dollarhash_1_if_noparen_p,
+eval(m4_dollarhash_1_if_noparen_test==1))
+undefine(`m4_dollarhash_1_if_noparen_test')
+
+
+dnl Usage: m4wrap_prepend(string)
+dnl
+dnl Prepend the given string to what will be exapanded under m4wrap at the
+dnl end of input.
+dnl
+dnl This macro exists to work around variations in m4wrap() behaviour in
+dnl the various m4s (notes at the start of this file). Don't use m4wrap()
+dnl directly since it will interfere with this scheme.
+
+define(m4wrap_prepend,
+m4_assert_numargs(1)
+`define(`m4wrap_string',`$1'defn(`m4wrap_string'))')
+
+m4wrap(`m4wrap_string')
+define(m4wrap_string,`')
+
+
+dnl Usage: m4_file_and_line
+dnl
+dnl Expand to the current file and line number, if the GNU m4 extensions
+dnl __file__ and __line__ are available.
+dnl
+dnl In GNU m4 1.4 at the end of input when m4wrap text is expanded,
+dnl __file__ is NONE and __line__ is 0, which is not a helpful thing to
+dnl print. If m4_file_seen() has been called to note the last file seen,
+dnl then that file at a big line number is used, otherwise "end of input"
+dnl is used (although "end of input" won't parse as an error message).
+
+define(m4_file_and_line,
+`ifdef(`__file__',
+`ifelse(__file__`'__line__,`NONE0',
+`ifdef(`m4_file_seen_last',`m4_file_seen_last: 999999: ',`end of input: ')',
+`__file__: __line__: ')')')
+
+
+dnl Usage: m4_errprint_commas(arg,...)
+dnl
+dnl The same as errprint(), but commas are printed between arguments
+dnl instead of spaces.
+
+define(m4_errprint_commas,
+`errprint(`$1')dnl
+ifelse(eval($#>1),1,`errprint(`,')m4_errprint_commas(shift($@))')')
+
+
+dnl Usage: m4_error(args...)
+dnl m4_warning(args...)
+dnl
+dnl Print an error message, using m4_errprint_commas, prefixed with the
+dnl current filename and line number (if available). m4_error sets up to
+dnl give an error exit at the end of processing, m4_warning just prints.
+dnl These macros are the recommended way to print errors.
+dnl
+dnl The arguments here should be quoted in the usual way to prevent them
+dnl being expanded when the macro call is read. (m4_error takes care not
+dnl to do any further expansion.)
+dnl
+dnl For example,
+dnl
+dnl m4_error(`some error message
+dnl ')
+dnl
+dnl which prints
+dnl
+dnl foo.asm:123: some error message
+dnl
+dnl or if __file__ and __line__ aren't available
+dnl
+dnl some error message
+dnl
+dnl The "file:line:" format is a basic style, used by gcc and GNU m4, so
+dnl emacs and other editors will recognise it in their normal error message
+dnl parsing.
+
+define(m4_warning,
+`m4_errprint_commas(m4_file_and_line`'$@)')
+
+define(m4_error,
+`define(`m4_error_occurred',1)m4_warning($@)')
+
+define(`m4_error_occurred',0)
+
+dnl This m4wrap_prepend() is first, so it'll be executed last.
+m4wrap_prepend(
+`ifelse(m4_error_occurred,1,
+`m4_error(`Errors occurred during m4 processing
+')m4exit(1)')')
+
+
+dnl Usage: m4_assert_numargs(num)
+dnl
+dnl Put this unquoted on a line on its own at the start of a macro
+dnl definition to add some code to check that num many arguments get passed
+dnl to the macro. For example,
+dnl
+dnl define(foo,
+dnl m4_assert_numargs(2)
+dnl `something `$1' and `$2' blah blah')
+dnl
+dnl Then a call like foo(one,two,three) will provoke an error like
+dnl
+dnl file:10: foo expected 2 arguments, got 3 arguments
+dnl
+dnl Here are some calls and how many arguments they're interpreted as passing.
+dnl
+dnl foo(abc,def) 2
+dnl foo(xyz) 1
+dnl foo() 0
+dnl foo -1
+dnl
+dnl The -1 for no parentheses at all means a macro that's meant to be used
+dnl that way can be checked with m4_assert_numargs(-1). For example,
+dnl
+dnl define(SPECIAL_SUFFIX,
+dnl m4_assert_numargs(-1)
+dnl `ifdef(`FOO',`_foo',`_bar')')
+dnl
+dnl But as an alternative see also deflit() below where parenthesized
+dnl expressions following a macro are passed through to the output.
+dnl
+dnl Note that in BSD m4 there's no way to differentiate calls "foo" and
+dnl "foo()", so in BSD m4 the distinction between the two isn't enforced.
+dnl (In GNU and SysV m4 it can be checked, and is.)
+
+
+dnl m4_assert_numargs is able to check its own arguments by calling
+dnl assert_numargs_internal directly.
+dnl
+dnl m4_doublequote($`'0) expands to ``$0'', whereas ``$`'0'' would expand
+dnl to `$`'0' and do the wrong thing, and likewise for $1. The same is
+dnl done in other assert macros.
+dnl
+dnl $`#' leaves $# in the new macro being defined, and stops # being
+dnl interpreted as a comment character.
+dnl
+dnl `dnl ' means an explicit dnl isn't necessary when m4_assert_numargs is
+dnl used. The space means that if there is a dnl it'll still work.
+
+dnl Usage: m4_doublequote(x) expands to ``x''
+define(m4_doublequote,
+`m4_assert_numargs_internal(`$0',1,$#,len(`$1'))``$1''')
+
+define(m4_assert_numargs,
+`m4_assert_numargs_internal(`$0',1,$#,len(`$1'))dnl
+`m4_assert_numargs_internal'(m4_doublequote($`'0),$1,$`#',`len'(m4_doublequote($`'1)))`dnl '')
+
+dnl Called: m4_assert_numargs_internal(`macroname',wantargs,$#,len(`$1'))
+define(m4_assert_numargs_internal,
+`m4_assert_numargs_internal_check(`$1',`$2',m4_numargs_count(`$3',`$4'))')
+
+dnl Called: m4_assert_numargs_internal_check(`macroname',wantargs,gotargs)
+dnl
+dnl If m4_dollarhash_1_if_noparen_p (BSD m4) then gotargs can be 0 when it
+dnl should be -1. If wantargs is -1 but gotargs is 0 and the two can't be
+dnl distinguished then it's allowed to pass.
+dnl
+define(m4_assert_numargs_internal_check,
+`ifelse(eval($2 == $3
+ || ($2==-1 && $3==0 && m4_dollarhash_1_if_noparen_p)),0,
+`m4_error(`$1 expected 'm4_Narguments(`$2')`, got 'm4_Narguments(`$3')
+)')')
+
+dnl Called: m4_numargs_count($#,len(`$1'))
+dnl If $#==0 then -1 args, if $#==1 but len(`$1')==0 then 0 args, otherwise
+dnl $# args.
+define(m4_numargs_count,
+`ifelse($1,0, -1,
+`ifelse(eval($1==1 && $2-0==0),1, 0, $1)')')
+
+dnl Usage: m4_Narguments(N)
+dnl "$1 argument" or "$1 arguments" with the plural according to $1.
+define(m4_Narguments,
+`$1 argument`'ifelse(`$1',1,,s)')
+
+
+dnl --------------------------------------------------------------------------
+dnl Additional error checking things.
+
+
+dnl Usage: m4_file_seen()
+dnl
+dnl Record __file__ for the benefit of m4_file_and_line in m4wrap text.
+dnl The basic __file__ macro comes out quoted, like `foo.asm', and
+dnl m4_file_seen_last is defined like that too.
+dnl
+dnl This only needs to be used with something that could generate an error
+dnl message in m4wrap text. The x86 PROLOGUE is the only such at the
+dnl moment (at end of input its m4wrap checks for missing EPILOGUE). A few
+dnl include()s can easily trick this scheme, but you'd expect an EPILOGUE
+dnl in the same file as the PROLOGUE.
+
+define(m4_file_seen,
+m4_assert_numargs(0)
+`ifelse(__file__,`NONE',,
+`define(`m4_file_seen_last',m4_doublequote(__file__))')')
+
+
+dnl Usage: m4_assert_onearg()
+dnl
+dnl Put this, unquoted, at the start of a macro definition to add some code
+dnl to check that one argument is passed to the macro, but with that
+dnl argument allowed to be empty. For example,
+dnl
+dnl define(foo,
+dnl m4_assert_onearg()
+dnl `blah blah $1 blah blah')
+dnl
+dnl Calls "foo(xyz)" or "foo()" are accepted. A call "foo(xyz,abc)" fails.
+dnl A call "foo" fails too, but BSD m4 can't detect this case (GNU and SysV
+dnl m4 can).
+
+define(m4_assert_onearg,
+m4_assert_numargs(0)
+`m4_assert_onearg_internal'(m4_doublequote($`'0),$`#')`dnl ')
+
+dnl Called: m4_assert_onearg(`macroname',$#)
+define(m4_assert_onearg_internal,
+`ifelse($2,1,,
+`m4_error(`$1 expected 1 argument, got 'm4_Narguments(`$2')
+)')')
+
+
+dnl Usage: m4_assert_numargs_range(low,high)
+dnl
+dnl Put this, unquoted, at the start of a macro definition to add some code
+dnl to check that between low and high many arguments get passed to the
+dnl macro. For example,
+dnl
+dnl define(foo,
+dnl m4_assert_numargs_range(3,5)
+dnl `mandatory $1 $2 $3 optional $4 $5 end')
+dnl
+dnl See m4_assert_numargs() for more info.
+
+define(m4_assert_numargs_range,
+m4_assert_numargs(2)
+``m4_assert_numargs_range_internal'(m4_doublequote($`'0),$1,$2,$`#',`len'(m4_doublequote($`'1)))`dnl '')
+
+dnl Called: m4_assert_numargs_range_internal(`name',low,high,$#,len(`$1'))
+define(m4_assert_numargs_range_internal,
+m4_assert_numargs(5)
+`m4_assert_numargs_range_check(`$1',`$2',`$3',m4_numargs_count(`$4',`$5'))')
+
+dnl Called: m4_assert_numargs_range_check(`name',low,high,gotargs)
+dnl
+dnl If m4_dollarhash_1_if_noparen_p (BSD m4) then gotargs can be 0 when it
+dnl should be -1. To ensure a `high' of -1 works, a fudge is applied to
+dnl gotargs if it's 0 and the 0 and -1 cases can't be distinguished.
+dnl
+define(m4_assert_numargs_range_check,
+m4_assert_numargs(4)
+`ifelse(eval($2 <= $4 &&
+ ($4 - ($4==0 && m4_dollarhash_1_if_noparen_p) <= $3)),0,
+`m4_error(`$1 expected $2 to $3 arguments, got 'm4_Narguments(`$4')
+)')')
+
+
+dnl Usage: m4_assert_defined(symbol)
+dnl
+dnl Put this unquoted on a line of its own at the start of a macro
+dnl definition to add some code to check that the given symbol is defined
+dnl when the macro is used. For example,
+dnl
+dnl define(foo,
+dnl m4_assert_defined(`FOO_PREFIX')
+dnl `FOO_PREFIX whatever')
+dnl
+dnl This is a convenient way to check that the user or ./configure or
+dnl whatever has defined the things needed by a macro, as opposed to
+dnl silently generating garbage.
+
+define(m4_assert_defined,
+m4_assert_numargs(1)
+``m4_assert_defined_internal'(m4_doublequote($`'0),``$1'')`dnl '')
+
+dnl Called: m4_assert_defined_internal(`macroname',`define_required')
+define(m4_assert_defined_internal,
+m4_assert_numargs(2)
+`ifdef(`$2',,
+`m4_error(`$1 needs $2 defined
+')')')
+
+
+dnl Usage: m4_not_for_expansion(`SYMBOL')
+dnl define_not_for_expansion(`SYMBOL')
+dnl
+dnl m4_not_for_expansion turns SYMBOL, if defined, into something which
+dnl will give an error if expanded. For example,
+dnl
+dnl m4_not_for_expansion(`PIC')
+dnl
+dnl define_not_for_expansion is the same, but always makes a definition.
+dnl
+dnl These are for symbols that should be tested with ifdef(`FOO',...)
+dnl rather than be expanded as such. They guard against accidentally
+dnl omitting the quotes, as in ifdef(FOO,...). Note though that they only
+dnl catches this when FOO is defined, so be sure to test code both with and
+dnl without each definition.
+
+define(m4_not_for_expansion,
+m4_assert_numargs(1)
+`ifdef(`$1',`define_not_for_expansion(`$1')')')
+
+define(define_not_for_expansion,
+m4_assert_numargs(1)
+`ifelse(defn(`$1'),,,
+`m4_error(``$1' has a non-empty value, maybe it shouldnt be munged with m4_not_for_expansion()
+')')dnl
+define(`$1',`m4_not_for_expansion_internal(`$1')')')
+
+define(m4_not_for_expansion_internal,
+`m4_error(``$1' is not meant to be expanded, perhaps you mean `ifdef(`$1',...)'
+')')
+
+
+dnl --------------------------------------------------------------------------
+dnl Various generic m4 things.
+
+
+dnl Usage: m4_ifdef_anyof_p(`symbol',...)
+dnl
+dnl Expand to 1 if any of the symbols in the argument list are defined, or
+dnl to 0 if not.
+
+define(m4_ifdef_anyof_p,
+`ifelse(eval($#<=1 && m4_length(`$1')==0),1, 0,
+`ifdef(`$1', 1,
+`m4_ifdef_anyof_p(shift($@))')')')
+
+
+dnl Usage: m4_length(string)
+dnl
+dnl Determine the length of a string. This is the same as len(), but
+dnl always expands to a number, working around the BSD len() which
+dnl evaluates to nothing given an empty argument.
+
+define(m4_length,
+m4_assert_onearg()
+`eval(len(`$1')-0)')
+
+
+dnl Usage: m4_stringequal_p(x,y)
+dnl
+dnl Expand to 1 or 0 according as strings x and y are equal or not.
+
+define(m4_stringequal_p,
+`ifelse(`$1',`$2',1,0)')
+
+
+dnl Usage: m4_incr_or_decr(n,last)
+dnl
+dnl Do an incr(n) or decr(n), whichever is in the direction of "last".
+dnl Both n and last must be numbers of course.
+
+define(m4_incr_or_decr,
+m4_assert_numargs(2)
+`ifelse(eval($1<$2),1,incr($1),decr($1))')
+
+
+dnl Usage: forloop(i, first, last, statement)
+dnl
+dnl Based on GNU m4 examples/forloop.m4, but extended.
+dnl
+dnl statement is expanded repeatedly, with i successively defined as
+dnl
+dnl first, first+1, ..., last-1, last
+dnl
+dnl Or if first > last, then it's
+dnl
+dnl first, first-1, ..., last+1, last
+dnl
+dnl If first == last, then one expansion is done.
+dnl
+dnl A pushdef/popdef of i is done to preserve any previous definition (or
+dnl lack of definition). first and last are eval()ed and so can be
+dnl expressions.
+dnl
+dnl forloop_first is defined to 1 on the first iteration, 0 on the rest.
+dnl forloop_last is defined to 1 on the last iteration, 0 on the others.
+dnl Nested forloops are allowed, in which case forloop_first and
+dnl forloop_last apply to the innermost loop that's open.
+dnl
+dnl A simple example,
+dnl
+dnl forloop(i, 1, 2*2+1, `dnl
+dnl iteration number i ... ifelse(forloop_first,1,FIRST)
+dnl ')
+
+
+dnl "i" and "statement" are carefully quoted, but "first" and "last" are
+dnl just plain numbers once eval()ed.
+
+define(`forloop',
+m4_assert_numargs(4)
+`pushdef(`$1',eval(`$2'))dnl
+pushdef(`forloop_first',1)dnl
+pushdef(`forloop_last',0)dnl
+forloop_internal(`$1',eval(`$3'),`$4')`'dnl
+popdef(`forloop_first')dnl
+popdef(`forloop_last')dnl
+popdef(`$1')')
+
+dnl Called: forloop_internal(`var',last,statement)
+define(`forloop_internal',
+m4_assert_numargs(3)
+`ifelse($1,$2,
+`define(`forloop_last',1)$3',
+`$3`'dnl
+define(`forloop_first',0)dnl
+define(`$1',m4_incr_or_decr($1,$2))dnl
+forloop_internal(`$1',$2,`$3')')')
+
+
+dnl Usage: m4_toupper(x)
+dnl m4_tolower(x)
+dnl
+dnl Convert the argument string to upper or lower case, respectively.
+dnl Only one argument accepted.
+dnl
+dnl BSD m4 doesn't take ranges like a-z in translit(), so the full alphabet
+dnl is written out.
+
+define(m4_alphabet_lower, `abcdefghijklmnopqrstuvwxyz')
+define(m4_alphabet_upper, `ABCDEFGHIJKLMNOPQRSTUVWXYZ')
+
+define(m4_toupper,
+m4_assert_onearg()
+`translit(`$1', m4_alphabet_lower, m4_alphabet_upper)')
+
+define(m4_tolower,
+m4_assert_onearg()
+`translit(`$1', m4_alphabet_upper, m4_alphabet_lower)')
+
+
+dnl Usage: m4_empty_if_zero(x)
+dnl
+dnl Evaluate to x, or to nothing if x is 0. x is eval()ed and so can be an
+dnl expression.
+dnl
+dnl This is useful for x86 addressing mode displacements since forms like
+dnl (%ebx) are one byte shorter than 0(%ebx). A macro `foo' for use as
+dnl foo(%ebx) could be defined with the following so it'll be empty if the
+dnl expression comes out zero.
+dnl
+dnl deflit(`foo', `m4_empty_if_zero(a+b*4-c)')
+dnl
+dnl Naturally this shouldn't be done if, say, a computed jump depends on
+dnl the code being a particular size.
+
+define(m4_empty_if_zero,
+m4_assert_onearg()
+`ifelse(eval($1),0,,eval($1))')
+
+
+dnl Usage: m4_log2(x)
+dnl
+dnl Calculate a logarithm to base 2.
+dnl x must be an integral power of 2, between 2**0 and 2**30.
+dnl x is eval()ed, so it can be an expression.
+dnl An error results if x is invalid.
+dnl
+dnl 2**31 isn't supported, because an unsigned 2147483648 is out of range
+dnl of a 32-bit signed int. Also, the bug in BSD m4 where an eval()
+dnl resulting in 2147483648 (or -2147483648 as the case may be) gives `-('
+dnl means tests like eval(1<<31==(x)) would be necessary, but that then
+dnl gives an unattractive explosion of eval() error messages if x isn't
+dnl numeric.
+
+define(m4_log2,
+m4_assert_numargs(1)
+`m4_log2_internal(0,1,eval(`$1'))')
+
+dnl Called: m4_log2_internal(n,2**n,target)
+define(m4_log2_internal,
+m4_assert_numargs(3)
+`ifelse($2,$3,$1,
+`ifelse($1,30,
+`m4_error(`m4_log2() argument too big or not a power of two: $3
+')',
+`m4_log2_internal(incr($1),eval(2*$2),$3)')')')
+
+
+dnl Usage: m4_div2_towards_zero
+dnl
+dnl m4 division is probably whatever a C signed division is, and C doesn't
+dnl specify what rounding gets used on negatives, so this expression forces
+dnl a rounding towards zero.
+
+define(m4_div2_towards_zero,
+m4_assert_numargs(1)
+`eval((($1) + ((($1)<0) & ($1))) / 2)')
+
+
+dnl Usage: m4_lshift(n,count)
+dnl m4_rshift(n,count)
+dnl
+dnl Calculate n shifted left or right by count many bits. Both n and count
+dnl are eval()ed and so can be expressions.
+dnl
+dnl Negative counts are allowed and mean a shift in the opposite direction.
+dnl Negative n is allowed and right shifts will be arithmetic (meaning
+dnl divide by 2**count, rounding towards zero, also meaning the sign bit is
+dnl duplicated).
+dnl
+dnl Use these macros instead of << and >> in eval() since the basic ccs
+dnl SysV m4 doesn't have those operators.
+
+define(m4_rshift,
+m4_assert_numargs(2)
+`m4_lshift(`$1',-(`$2'))')
+
+define(m4_lshift,
+m4_assert_numargs(2)
+`m4_lshift_internal(eval(`$1'),eval(`$2'))')
+
+define(m4_lshift_internal,
+m4_assert_numargs(2)
+`ifelse(eval($2-0==0),1,$1,
+`ifelse(eval($2>0),1,
+`m4_lshift_internal(eval($1*2),decr($2))',
+`m4_lshift_internal(m4_div2_towards_zero($1),incr($2))')')')
+
+
+dnl Usage: deflit(name,value)
+dnl
+dnl Like define(), but "name" expands like a literal, rather than taking
+dnl arguments. For example "name(%eax)" expands to "value(%eax)".
+dnl
+dnl Limitations:
+dnl
+dnl $ characters in the value part must have quotes to stop them looking
+dnl like macro parameters. For example, deflit(reg,`123+$`'4+567'). See
+dnl defreg() below for handling simple register definitions like $7 etc.
+dnl
+dnl "name()" is turned into "name", unfortunately. In GNU and SysV m4 an
+dnl error is generated when this happens, but in BSD m4 it will happen
+dnl silently. The problem is that in BSD m4 $# is 1 in both "name" or
+dnl "name()", so there's no way to differentiate them. Because we want
+dnl plain "name" to turn into plain "value", we end up with "name()"
+dnl turning into plain "value" too.
+dnl
+dnl "name(foo)" will lose any whitespace after commas in "foo", for example
+dnl "disp(%eax, %ecx)" would become "128(%eax,%ecx)".
+dnl
+dnl These parentheses oddities shouldn't matter in assembler text, but if
+dnl they do the suggested workaround is to write "name ()" or "name (foo)"
+dnl to stop the parentheses looking like a macro argument list. If a space
+dnl isn't acceptable in the output, then write "name`'()" or "name`'(foo)".
+dnl The `' is stripped when read, but again stops the parentheses looking
+dnl like parameters.
+
+dnl Quoting for deflit_emptyargcheck is similar to m4_assert_numargs. The
+dnl stuff in the ifelse gives a $#, $1 and $@ evaluated in the new macro
+dnl created, not in deflit.
+define(deflit,
+m4_assert_numargs(2)
+`define(`$1',
+`deflit_emptyargcheck'(``$1'',$`#',m4_doublequote($`'1))`dnl
+$2`'dnl
+ifelse(eval($'`#>1 || m4_length('m4_doublequote($`'1)`)!=0),1,($'`@))')')
+
+dnl Called: deflit_emptyargcheck(macroname,$#,`$1')
+define(deflit_emptyargcheck,
+`ifelse(eval($2==1 && !m4_dollarhash_1_if_noparen_p && m4_length(`$3')==0),1,
+`m4_error(`dont use a deflit as $1() because it loses the brackets (see deflit in asm-incl.m4 for more information)
+')')')
+
+
+dnl Usage: m4_assert(`expr')
+dnl
+dnl Test a compile-time requirement with an m4 expression. The expression
+dnl should be quoted, and will be eval()ed and expected to be non-zero.
+dnl For example,
+dnl
+dnl m4_assert(`FOO*2+6 < 14')
+
+define(m4_assert,
+m4_assert_numargs(1)
+`ifelse(eval($1),1,,
+`m4_error(`assertion failed: $1
+')')')
+
+
+dnl --------------------------------------------------------------------------
+dnl Various assembler things, not specific to any particular CPU.
+dnl
+
+
+dnl Usage: include_mpn(`filename')
+dnl
+dnl Like include(), but adds a path to the mpn source directory. For
+dnl example,
+dnl
+dnl include_mpn(`sparc64/addmul_1h.asm')
+
+define(include_mpn,
+m4_assert_numargs(1)
+m4_assert_defined(`CONFIG_TOP_SRCDIR')
+`include(CONFIG_TOP_SRCDIR`/mpn/$1')')
+
+
+dnl Usage: C comment ...
+dnl
+dnl "C" works like a FORTRAN-style comment character. This can be used for
+dnl comments to the right of assembly instructions, where just dnl would
+dnl remove the linefeed, and concatenate adjacent lines.
+dnl
+dnl "C" and/or "dnl" are useful when an assembler doesn't support comments,
+dnl or where different assemblers for a particular CPU have different
+dnl comment styles. The intermediate ".s" files will end up with no
+dnl comments, just code.
+dnl
+dnl Using "C" is not intended to cause offence to anyone who doesn't like
+dnl FORTRAN; but if that happens it's an unexpected bonus.
+
+define(C, `
+dnl')
+
+
+dnl Various possible defines passed from the Makefile that are to be tested
+dnl with ifdef() rather than be expanded.
+
+m4_not_for_expansion(`PIC')
+
+dnl aors_n
+m4_not_for_expansion(`OPERATION_add_n')
+m4_not_for_expansion(`OPERATION_sub_n')
+
+dnl aorsmul_n
+m4_not_for_expansion(`OPERATION_addmul_1')
+m4_not_for_expansion(`OPERATION_submul_1')
+
+dnl logops_n
+m4_not_for_expansion(`OPERATION_and_n')
+m4_not_for_expansion(`OPERATION_andn_n')
+m4_not_for_expansion(`OPERATION_nand_n')
+m4_not_for_expansion(`OPERATION_ior_n')
+m4_not_for_expansion(`OPERATION_iorn_n')
+m4_not_for_expansion(`OPERATION_nior_n')
+m4_not_for_expansion(`OPERATION_xor_n')
+m4_not_for_expansion(`OPERATION_xnor_n')
+
+dnl popham
+m4_not_for_expansion(`OPERATION_popcount')
+m4_not_for_expansion(`OPERATION_hamdist')
+
+
+dnl Usage: m4_config_gmp_mparam(`symbol')
+dnl
+dnl Check that `symbol' is defined. If it isn't, issue an error and
+dnl terminate immediately. The error message explains that the symbol
+dnl should be in config.m4, copied from gmp-mparam.h.
+dnl
+dnl Processing is terminated immediately since missing something like
+dnl KARATSUBA_SQR_THRESHOLD can lead to infinite loops with endless error
+dnl messages.
+
+define(m4_config_gmp_mparam,
+m4_assert_numargs(1)
+`ifdef(`$1',,
+`m4_error(`$1 is not defined.
+ "configure" should have extracted this from gmp-mparam.h and put it
+ in config.m4, but somehow this has failed.
+')m4exit(1)')')
+
+
+dnl Usage: defreg(name,reg)
+dnl
+dnl Give a name to a $ style register. For example,
+dnl
+dnl defreg(foo,$12)
+dnl
+dnl defreg() inserts an extra pair of quotes after the $ so that it's not
+dnl interpreted as an m4 macro parameter, ie. foo is actually $`'12. m4
+dnl strips those quotes when foo is expanded.
+dnl
+dnl deflit() is used to make the new definition, so it will expand
+dnl literally even if followed by parentheses ie. foo(99) will become
+dnl $12(99). (But there's nowhere that would be used is there?)
+dnl
+dnl When making further definitions from existing defreg() macros, remember
+dnl to use defreg() again to protect the $ in the new definitions too. For
+dnl example,
+dnl
+dnl defreg(a0,$4)
+dnl defreg(a1,$5)
+dnl ...
+dnl
+dnl defreg(PARAM_DST,a0)
+dnl
+dnl This is only because a0 is expanding at the time the PARAM_DST
+dnl definition is made, leaving a literal $4 that must be re-quoted. On
+dnl the other hand in something like the following ra is only expanded when
+dnl ret is used and its $`'31 protection will have its desired effect at
+dnl that time.
+dnl
+dnl defreg(ra,$31)
+dnl ...
+dnl define(ret,`j ra')
+dnl
+dnl Note that only $n forms are meant to be used here, and something like
+dnl 128($30) doesn't get protected and will come out wrong.
+
+define(defreg,
+m4_assert_numargs(2)
+`deflit(`$1',
+substr(`$2',0,1)``''substr(`$2',1))')
+
+
+dnl Usage: m4_instruction_wrapper(num)
+dnl
+dnl Put this, unquoted, on a line on its own, at the start of a macro
+dnl that's a wrapper around an assembler instruction. It adds code to give
+dnl a descriptive error message if the macro is invoked without arguments.
+dnl
+dnl For example, suppose jmp needs to be wrapped,
+dnl
+dnl define(jmp,
+dnl m4_instruction_wrapper()
+dnl m4_assert_numargs(1)
+dnl `.byte 0x42
+dnl .long $1
+dnl nop')
+dnl
+dnl The point of m4_instruction_wrapper is to get a better error message
+dnl than m4_assert_numargs would give if jmp is accidentally used as plain
+dnl "jmp foo" instead of the intended "jmp( foo)". "jmp()" with no
+dnl argument also provokes the error message.
+dnl
+dnl m4_instruction_wrapper should only be used with wrapped instructions
+dnl that take arguments, since obviously something meant to be used as
+dnl plain "ret", say, doesn't want to give an error when used that way.
+
+define(m4_instruction_wrapper,
+m4_assert_numargs(0)
+``m4_instruction_wrapper_internal'(m4_doublequote($`'0),dnl
+m4_doublequote(ifdef(`__file__',__file__,`the m4 sources')),dnl
+$`#',m4_doublequote($`'1))`dnl'')
+
+dnl Called: m4_instruction_wrapper_internal($0,`filename',$#,$1)
+define(m4_instruction_wrapper_internal,
+`ifelse(eval($3<=1 && m4_length(`$4')==0),1,
+`m4_error(`$1 is a macro replacing that instruction and needs arguments, see $2 for details
+')')')
+
+
+dnl Usage: UNROLL_LOG2, UNROLL_MASK, UNROLL_BYTES
+dnl CHUNK_LOG2, CHUNK_MASK, CHUNK_BYTES
+dnl
+dnl When code supports a variable amount of loop unrolling, the convention
+dnl is to define UNROLL_COUNT to the number of limbs processed per loop.
+dnl When testing code this can be varied to see how much the loop overhead
+dnl is costing. For example,
+dnl
+dnl deflit(UNROLL_COUNT, 32)
+dnl
+dnl If the forloop() generating the unrolled loop has a pattern processing
+dnl more than one limb, the convention is to express this with CHUNK_COUNT.
+dnl For example,
+dnl
+dnl deflit(CHUNK_COUNT, 2)
+dnl
+dnl The LOG2, MASK and BYTES definitions below are derived from these COUNT
+dnl definitions. If COUNT is redefined, the LOG2, MASK and BYTES follow
+dnl the new definition automatically.
+dnl
+dnl LOG2 is the log base 2 of COUNT. MASK is COUNT-1, which can be used as
+dnl a bit mask. BYTES is BYTES_PER_MP_LIMB*COUNT, the number of bytes
+dnl processed in each unrolled loop.
+dnl
+dnl BYTES_PER_MP_LIMB is defined in a CPU specific m4 include file. It
+dnl exists only so the BYTES definitions here can be common to all CPUs.
+dnl In the actual code for a given CPU, an explicit 4 or 8 may as well be
+dnl used because the code is only for a particular CPU, it doesn't need to
+dnl be general.
+dnl
+dnl Note that none of these macros do anything except give conventional
+dnl names to commonly used things. You still have to write your own
+dnl expressions for a forloop() and the resulting address displacements.
+dnl Something like the following would be typical for 4 bytes per limb.
+dnl
+dnl forloop(`i',0,UNROLL_COUNT-1,`
+dnl deflit(`disp',eval(i*4))
+dnl ...
+dnl ')
+dnl
+dnl Or when using CHUNK_COUNT,
+dnl
+dnl forloop(`i',0,UNROLL_COUNT/CHUNK_COUNT-1,`
+dnl deflit(`disp0',eval(i*CHUNK_COUNT*4))
+dnl deflit(`disp1',eval(disp0+4))
+dnl ...
+dnl ')
+dnl
+dnl Clearly `i' can be run starting from 1, or from high to low or whatever
+dnl best suits.
+
+deflit(UNROLL_LOG2,
+m4_assert_defined(`UNROLL_COUNT')
+`m4_log2(UNROLL_COUNT)')
+
+deflit(UNROLL_MASK,
+m4_assert_defined(`UNROLL_COUNT')
+`eval(UNROLL_COUNT-1)')
+
+deflit(UNROLL_BYTES,
+m4_assert_defined(`UNROLL_COUNT')
+m4_assert_defined(`BYTES_PER_MP_LIMB')
+`eval(UNROLL_COUNT * BYTES_PER_MP_LIMB)')
+
+deflit(CHUNK_LOG2,
+m4_assert_defined(`CHUNK_COUNT')
+`m4_log2(CHUNK_COUNT)')
+
+deflit(CHUNK_MASK,
+m4_assert_defined(`CHUNK_COUNT')
+`eval(CHUNK_COUNT-1)')
+
+deflit(CHUNK_BYTES,
+m4_assert_defined(`CHUNK_COUNT')
+m4_assert_defined(`BYTES_PER_MP_LIMB')
+`eval(CHUNK_COUNT * BYTES_PER_MP_LIMB)')
+
+
+dnl Usage: MPN(name)
+dnl
+dnl Add MPN_PREFIX to a name.
+dnl MPN_PREFIX defaults to "__gmpn_" if not defined.
+
+ifdef(`MPN_PREFIX',,
+`define(`MPN_PREFIX',`__gmpn_')')
+
+define(MPN,
+m4_assert_numargs(1)
+`MPN_PREFIX`'$1')
+
+
+dnl Usage: mpn_add_n, etc
+dnl
+dnl Convenience definitions using MPN(), like the #defines in gmp.h. Each
+dnl function that might be implemented in assembler is here.
+
+define(define_mpn,
+m4_assert_numargs(1)
+`define(`mpn_$1',`MPN(`$1')')')
+
+define_mpn(add)
+define_mpn(add_1)
+define_mpn(add_n)
+define_mpn(add_nc)
+define_mpn(addmul_1)
+define_mpn(addmul_1c)
+define_mpn(addsub_n)
+define_mpn(addsub_nc)
+define_mpn(and_n)
+define_mpn(andn_n)
+define_mpn(bdivmod)
+define_mpn(cmp)
+define_mpn(com_n)
+define_mpn(copyd)
+define_mpn(copyi)
+define_mpn(divexact_by3c)
+define_mpn(divrem)
+define_mpn(divrem_1)
+define_mpn(divrem_1c)
+define_mpn(divrem_2)
+define_mpn(divrem_classic)
+define_mpn(divrem_newton)
+define_mpn(dump)
+define_mpn(gcd)
+define_mpn(gcd_1)
+define_mpn(gcdext)
+define_mpn(get_str)
+define_mpn(hamdist)
+define_mpn(invert_limb)
+define_mpn(ior_n)
+define_mpn(iorn_n)
+define_mpn(kara_mul_n)
+define_mpn(kara_sqr_n)
+define_mpn(lshift)
+define_mpn(lshiftc)
+define_mpn(mod_1)
+define_mpn(mod_1c)
+define_mpn(mul)
+define_mpn(mul_1)
+define_mpn(mul_1c)
+define_mpn(mul_basecase)
+define_mpn(mul_n)
+define_mpn(perfect_square_p)
+define_mpn(popcount)
+define_mpn(preinv_mod_1)
+define_mpn(nand_n)
+define_mpn(nior_n)
+define_mpn(random)
+define_mpn(random2)
+define_mpn(rshift)
+define_mpn(rshiftc)
+define_mpn(scan0)
+define_mpn(scan1)
+define_mpn(set_str)
+define_mpn(sqr_basecase)
+define_mpn(sub_n)
+define_mpn(sqrtrem)
+define_mpn(sub)
+define_mpn(sub_1)
+define_mpn(sub_n)
+define_mpn(sub_nc)
+define_mpn(submul_1)
+define_mpn(submul_1c)
+define_mpn(toom3_mul_n)
+define_mpn(toom3_sqr_n)
+define_mpn(umul_ppmm)
+define_mpn(udiv_qrnnd)
+define_mpn(xnor_n)
+define_mpn(xor_n)
+
+define(`ASM_START',
+ `')
+
+define(`PROLOGUE',
+ `
+ TEXT
+ ALIGN(4)
+ GLOBL GSYM_PREFIX`$1'
+ TYPE(GSYM_PREFIX`$1',`function')
+GSYM_PREFIX`$1':')
+
+define(`EPILOGUE',
+ `
+ SIZE(GSYM_PREFIX`$1',.-GSYM_PREFIX`$1')')
+
+dnl LSYM_PREFIX might be L$, so defn() must be used to quote it or the L
+dnl will expand as the L macro, an infinite recursion.
+define(`L',`defn(`LSYM_PREFIX')$1')
+
+define(`INT32',
+ `
+ ALIGN(4)
+$1:
+ W32 $2
+ ')
+
+define(`INT64',
+ `
+ ALIGN(8)
+$1:
+ W32 $2
+ W32 $3
+ ')
+
+
+dnl Usage: ALIGN(bytes)
+dnl
+dnl Emit a ".align" directive. The alignment is specified in bytes, and
+dnl will normally need to be a power of 2. The actual ".align" generated
+dnl is either bytes or logarithmic according to what ./configure detects.
+dnl
+dnl ALIGN_FILL_0x90, if defined and equal to "yes", means a ", 0x90" should
+dnl be appended (this is for x86).
+
+define(ALIGN,
+m4_assert_numargs(1)
+m4_assert_defined(`ALIGN_LOGARITHMIC')
+`.align ifelse(ALIGN_LOGARITHMIC,yes,`m4_log2($1)',`eval($1)')dnl
+ifelse(ALIGN_FILL_0x90,yes,`, 0x90')')
+
+
+dnl Usage: MULFUNC_PROLOGUE(function function...)
+dnl
+dnl A dummy macro which is grepped for by ./configure to know what
+dnl functions a multi-function file is providing. Use this if there aren't
+dnl explicit PROLOGUE()s for each possible function.
+dnl
+dnl Multiple MULFUNC_PROLOGUEs can be used, or just one with the function
+dnl names separated by spaces.
+
+define(`MULFUNC_PROLOGUE',
+m4_assert_numargs(1)
+`')
+
+
+divert`'dnl
diff --git a/rts/gmp/mpn/clipper/add_n.s b/rts/gmp/mpn/clipper/add_n.s
new file mode 100644
index 0000000000..538a1caed0
--- /dev/null
+++ b/rts/gmp/mpn/clipper/add_n.s
@@ -0,0 +1,48 @@
+; Clipper __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+.text
+ .align 16
+.globl ___gmpn_add_n
+___gmpn_add_n:
+ subq $8,sp
+ storw r6,(sp)
+ loadw 12(sp),r2
+ loadw 16(sp),r3
+ loadq $0,r6 ; clear carry-save register
+
+.Loop: loadw (r1),r4
+ loadw (r2),r5
+ addwc r6,r6 ; restore carry from r6
+ addwc r5,r4
+ storw r4,(r0)
+ subwc r6,r6 ; save carry in r6
+ addq $4,r0
+ addq $4,r1
+ addq $4,r2
+ subq $1,r3
+ brne .Loop
+
+ negw r6,r0
+ loadw (sp),r6
+ addq $8,sp
+ ret sp
diff --git a/rts/gmp/mpn/clipper/mul_1.s b/rts/gmp/mpn/clipper/mul_1.s
new file mode 100644
index 0000000000..c0c756488c
--- /dev/null
+++ b/rts/gmp/mpn/clipper/mul_1.s
@@ -0,0 +1,47 @@
+; Clipper __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+; the result in a second limb vector.
+
+; Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+.text
+ .align 16
+.globl ___gmpn_mul_1
+___gmpn_mul_1:
+ subq $8,sp
+ storw r6,(sp)
+ loadw 12(sp),r2
+ loadw 16(sp),r3
+ loadq $0,r6 ; clear carry limb
+
+.Loop: loadw (r1),r4
+ mulwux r3,r4
+ addw r6,r4 ; add old carry limb into low product limb
+ loadq $0,r6
+ addwc r5,r6 ; propagate cy into high product limb
+ storw r4,(r0)
+ addq $4,r0
+ addq $4,r1
+ subq $1,r2
+ brne .Loop
+
+ movw r6,r0
+ loadw 0(sp),r6
+ addq $8,sp
+ ret sp
diff --git a/rts/gmp/mpn/clipper/sub_n.s b/rts/gmp/mpn/clipper/sub_n.s
new file mode 100644
index 0000000000..44d8797289
--- /dev/null
+++ b/rts/gmp/mpn/clipper/sub_n.s
@@ -0,0 +1,48 @@
+; Clipper __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+.text
+ .align 16
+.globl ___gmpn_sub_n
+___gmpn_sub_n:
+ subq $8,sp
+ storw r6,(sp)
+ loadw 12(sp),r2
+ loadw 16(sp),r3
+ loadq $0,r6 ; clear carry-save register
+
+.Loop: loadw (r1),r4
+ loadw (r2),r5
+ addwc r6,r6 ; restore carry from r6
+ subwc r5,r4
+ storw r4,(r0)
+ subwc r6,r6 ; save carry in r6
+ addq $4,r0
+ addq $4,r1
+ addq $4,r2
+ subq $1,r3
+ brne .Loop
+
+ negw r6,r0
+ loadw (sp),r6
+ addq $8,sp
+ ret sp
diff --git a/rts/gmp/mpn/cray/README b/rts/gmp/mpn/cray/README
new file mode 100644
index 0000000000..8195c67e21
--- /dev/null
+++ b/rts/gmp/mpn/cray/README
@@ -0,0 +1,14 @@
+The (poorly optimized) code in this directory was originally written for a
+j90 system, but finished on a c90. It should work on all Cray vector
+computers. For the T3E and T3D systems, the `alpha' subdirectory at the
+same level as the directory containing this file, is much better.
+
+* `+' seems to be faster than `|' when combining carries.
+
+* It is possible that the best multiply performance would be achived by
+ storing only 24 bits per element, and using lazy carry propagation. Before
+ calling i24mult, full carry propagation would be needed.
+
+* Supply tasking versions of the C loops.
+
+
diff --git a/rts/gmp/mpn/cray/add_n.c b/rts/gmp/mpn/cray/add_n.c
new file mode 100644
index 0000000000..1fdb394993
--- /dev/null
+++ b/rts/gmp/mpn/cray/add_n.c
@@ -0,0 +1,96 @@
+/* mpn_add_n -- Add two limb vectors of equal, non-zero length.
+ For Cray vector processors.
+
+ Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+ This file is part of the GNU MP Library.
+
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or (at your
+ option) any later version.
+
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_add_n (c, a, b, n)
+ mp_ptr c;
+ mp_srcptr a, b;
+ mp_size_t n;
+{
+ mp_size_t i;
+ mp_size_t nm1 = n - 1;
+ int more_carries = 0;
+ int carry_out;
+
+ /* For small operands the non-vector code is faster. */
+ if (n < 16)
+ goto sequential;
+
+ if (a == c || b == c)
+ {
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+ if (c == a)
+ {
+ /* allocate temp space for a */
+ mp_ptr ax = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ MPN_COPY (ax, a, n);
+ a = (mp_srcptr) ax;
+ }
+ if (c == b)
+ {
+ /* allocate temp space for b */
+ mp_ptr bx = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ MPN_COPY (bx, b, n);
+ b = (mp_srcptr) bx;
+ }
+ carry_out = mpn_add_n (c, a, b, n);
+ TMP_FREE (marker);
+ return carry_out;
+ }
+
+ carry_out = a[nm1] + b[nm1] < a[nm1];
+
+#pragma _CRI ivdep /* Cray PVP systems */
+ for (i = nm1; i > 0; i--)
+ {
+ int cy_in;
+ cy_in = a[i - 1] + b[i - 1] < a[i - 1];
+ c[i] = a[i] + b[i] + cy_in;
+ more_carries += c[i] < cy_in;
+ }
+ c[0] = a[0] + b[0];
+
+ if (more_carries)
+ {
+ /* This won't vectorize, but we should come here rarely. */
+ int cy;
+ sequential:
+ cy = 0;
+ for (i = 0; i < n; i++)
+ {
+ mp_limb_t ai, ci, t;
+ ai = a[i];
+ t = b[i] + cy;
+ cy = t < cy;
+ ci = ai + t;
+ cy += ci < ai;
+ c[i] = ci;
+ }
+ carry_out = cy;
+ }
+
+ return carry_out;
+}
diff --git a/rts/gmp/mpn/cray/addmul_1.c b/rts/gmp/mpn/cray/addmul_1.c
new file mode 100644
index 0000000000..031b4e8e8d
--- /dev/null
+++ b/rts/gmp/mpn/cray/addmul_1.c
@@ -0,0 +1,46 @@
+/* mpn_addmul_1 for Cray PVP.
+
+Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t limb)
+{
+ mp_ptr p0, p1, tp;
+ mp_limb_t cy_limb;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+
+ p1 = TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ p0 = TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ tp = TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+
+ GMPN_MULWW (p1, p0, up, &n, &limb);
+ cy_limb = mpn_add_n (tp, rp, p0, n);
+ rp[0] = tp[0];
+ cy_limb += mpn_add_n (rp + 1, tp + 1, p1, n - 1);
+ cy_limb += p1[n - 1];
+
+ TMP_FREE (marker);
+ return cy_limb;
+}
diff --git a/rts/gmp/mpn/cray/gmp-mparam.h b/rts/gmp/mpn/cray/gmp-mparam.h
new file mode 100644
index 0000000000..14f7b8e05b
--- /dev/null
+++ b/rts/gmp/mpn/cray/gmp-mparam.h
@@ -0,0 +1,27 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 64
+#define BITS_PER_SHORTINT 32
+#define BITS_PER_CHAR 8
diff --git a/rts/gmp/mpn/cray/mul_1.c b/rts/gmp/mpn/cray/mul_1.c
new file mode 100644
index 0000000000..0c8750b4ac
--- /dev/null
+++ b/rts/gmp/mpn/cray/mul_1.c
@@ -0,0 +1,44 @@
+/* mpn_mul_1 for Cray PVP.
+
+Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t limb)
+{
+ mp_ptr p0, p1;
+ mp_limb_t cy_limb;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+
+ p1 = TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ p0 = TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+
+ GMPN_MULWW (p1, p0, up, &n, &limb);
+ rp[0] = p0[0];
+ cy_limb = mpn_add_n (rp + 1, p0 + 1, p1, n - 1);
+ cy_limb += p1[n - 1];
+
+ TMP_FREE (marker);
+ return cy_limb;
+}
diff --git a/rts/gmp/mpn/cray/mulww.f b/rts/gmp/mpn/cray/mulww.f
new file mode 100644
index 0000000000..99507c1e44
--- /dev/null
+++ b/rts/gmp/mpn/cray/mulww.f
@@ -0,0 +1,54 @@
+c Helper for mpn_mul_1, mpn_addmul_1, and mpn_submul_1 for Cray PVP.
+
+c Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+c This file is part of the GNU MP Library.
+
+c The GNU MP Library is free software; you can redistribute it and/or
+c modify it under the terms of the GNU Lesser General Public License as
+c published by the Free Software Foundation; either version 2.1 of the
+c License, or (at your option) any later version.
+
+c The GNU MP Library is distributed in the hope that it will be useful,
+c but WITHOUT ANY WARRANTY; without even the implied warranty of
+c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+c Lesser General Public License for more details.
+
+c You should have received a copy of the GNU Lesser General Public
+c License along with the GNU MP Library; see the file COPYING.LIB. If
+c not, write to the Free Software Foundation, Inc., 59 Temple Place -
+c Suite 330, Boston, MA 02111-1307, USA.
+
+c p1[] = hi(a[]*s); the upper limbs of each product
+c p0[] = low(a[]*s); the corresponding lower limbs
+c n is number of limbs in the vectors
+
+ subroutine gmpn_mulww(p1,p0,a,n,s)
+ integer*8 p1(0:*),p0(0:*),a(0:*),s
+ integer n
+
+ integer*8 a0,a1,a2,s0,s1,s2,c
+ integer*8 ai,t0,t1,t2,t3,t4
+
+ s0 = shiftl(and(s,4194303),24)
+ s1 = shiftl(and(shiftr(s,22),4194303),24)
+ s2 = shiftl(and(shiftr(s,44),4194303),24)
+
+ do i = 0,n-1
+ ai = a(i)
+ a0 = shiftl(and(ai,4194303),24)
+ a1 = shiftl(and(shiftr(ai,22),4194303),24)
+ a2 = shiftl(and(shiftr(ai,44),4194303),24)
+
+ t0 = i24mult(a0,s0)
+ t1 = i24mult(a0,s1)+i24mult(a1,s0)
+ t2 = i24mult(a0,s2)+i24mult(a1,s1)+i24mult(a2,s0)
+ t3 = i24mult(a1,s2)+i24mult(a2,s1)
+ t4 = i24mult(a2,s2)
+
+ p0(i)=shiftl(t2,44)+shiftl(t1,22)+t0
+ c=shiftr(shiftr(t0,22)+and(t1,4398046511103)+
+ $ shiftl(and(t2,1048575),22),42)
+ p1(i)=shiftl(t4,24)+shiftl(t3,2)+shiftr(t2,20)+shiftr(t1,42)+c
+ end do
+ end
diff --git a/rts/gmp/mpn/cray/mulww.s b/rts/gmp/mpn/cray/mulww.s
new file mode 100644
index 0000000000..890cdcf94d
--- /dev/null
+++ b/rts/gmp/mpn/cray/mulww.s
@@ -0,0 +1,245 @@
+* Helper for mpn_mul_1, mpn_addmul_1, and mpn_submul_1 for Cray PVP.
+
+* Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+* This file is generated from mulww.f in this same directory.
+
+* This file is part of the GNU MP Library.
+
+* The GNU MP Library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 2.1 of the
+* License, or (at your option) any later version.
+
+* The GNU MP Library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* Lesser General Public License for more details.
+
+* You should have received a copy of the GNU Lesser General Public
+* License along with the GNU MP Library; see the file COPYING.LIB. If
+* not, write to the Free Software Foundation, Inc., 59 Temple Place -
+* Suite 330, Boston, MA 02111-1307, USA.
+
+ IDENT GMPN_MULWW
+**********************************************
+* Assemble with Cal Version 2.0 *
+* *
+* Generated by CFT77 6.0.4.19 *
+* on 06/27/00 at 04:34:13 *
+* *
+**********************************************
+* ALLOW UNDERSCORES IN IDENTIFIERS
+ EDIT OFF
+ FORMAT NEW
+@DATA SECTION DATA,CM
+@DATA = W.*
+ CON O'0000000000040000000000
+ CON O'0435152404713723252514 ;GMPN_MUL 1
+ CON O'0535270000000000000000 ;WW 1
+ CON O'0000000000000001200012 ;trbk tbl 1
+ VWD 32/0,32/P.GMPN_MULWW ;trbk tbl 1
+ CON O'0014003000000000001416 ;trbk tbl 1
+ CON O'0000000000000000000011 ;trbk tbl 1
+ CON O'0000000000000000000215 ;trbk tbl 1
+ BSSZ 1 ;trbk tbl 1
+@CODE SECTION CODE
+@CODE = P.*
+L3 = P.* ; 1
+ A0 A6 ;arg base 1
+ A5 6 ;num Darg 1
+ B03,A5 0,A0 ;load DAs 1
+ A0 A1+A2 ; 1
+ A5 1 ;num Ts 1
+ 0,A0 T00,A5 ; 1
+ B02 A2 ;new base 1
+ B66 A3 ;stk top 1
+ B01 A6 ;arg base 1
+ A7 P.L4 ;ofrn rtn 1
+ B00 A7 ;return 1
+ A6 @DATA ; 1
+ J $STKOFEN ;$STKOFEN 1
+GMPN_MULWW = P.* ; 1
+ A0 @DATA+3 ;(trbk) 1
+ B77 A0 ;(trbk) 1
+ A1 13 ;num Bs 1
+ A0 B66 ;stk top 1
+ A2 B66 ;stk tmp 1
+ A4 B67 ;stk limt 1
+ 0,A0 B77,A1 ; 1
+ A7 782 ;stk size 1
+ A3 A2+A7 ; 1
+ A0 A4-A3 ; 1
+ JAM L3 ;overflow 1
+ A0 A6 ;arg base 1
+ A5 6 ;num Darg 1
+ B03,A5 0,A0 ;load DAs 1
+ A0 A1+A2 ; 1
+ A5 1 ;num Ts 1
+ 0,A0 T00,A5 ; 1
+ B02 A2 ;new base 1
+ B66 A3 ;new top 1
+ B01 A6 ;arg base 1
+L4 = P.* ;ofrn rtn 1
+ A7 B07 ;regs 14
+ S7 0,A7 ; 14
+ A6 B10 ;regs 9
+ S6 0,A6 ; 9
+ S5 1 ; 14
+ S4 <22 ; 9
+ S7 S7-S5 ; 14
+ S5 #S7 ; 14
+ T00 S6 ;regs 10
+ S6 S6>22 ; 10
+ S7 T00 ;regs 11
+ S7 S7>44 ; 11
+ S3 T00 ;regs 9
+ S3 S3&S4 ; 9
+ S6 S6&S4 ; 10
+ S7 S7&S4 ; 11
+ S3 S3<24 ; 9
+ S6 S6<24 ; 10
+ S7 S7<24 ; 11
+ S0 S5 ;regs 14
+ S4 S5 ;regs 14
+ S1 S6 ;regs 14
+ S2 S3 ;regs 14
+ S3 S7 ;regs 14
+ JSP L5 ; 14
+L6 = P.* ; 14
+ S7 -S4 ; 14
+ A2 S7 ;regs 14
+ VL A2 ;regs 14
+ A3 B06 ;s_bt_sp 14
+ A5 B05 ;s_bt_sp 14
+ A4 B04 ;s_bt_sp 14
+ A1 VL ; 14
+ A2 S4 ;regs 14
+L7 = P.* ; 14
+ A0 A3 ;regs 15
+ VL A1 ;regs 15
+ V7 ,A0,1 ; 15
+ B11 A5 ;s_bt_sp 15
+ A7 22 ; 17
+ B12 A4 ;s_bt_sp 17
+ V6 V7>A7 ; 17
+ B13 A3 ;s_bt_sp 17
+ S7 <22 ; 17
+ A3 B02 ;s_bt_sp 17
+ V5 S7&V6 ; 17
+ A6 24 ; 17
+ V4 V5<A6 ; 17
+ V3 S1*FV4 ; 22
+ V2 S7&V7 ; 16
+ V1 V2<A6 ; 16
+ V0 S3*FV1 ; 22
+ V6 V0+V3 ; 22
+ A5 44 ; 18
+ V5 V7>A5 ; 18
+ V2 S1*FV1 ; 21
+ V3 S7&V5 ; 18
+ A0 14 ; 34
+ B77 A0 ;regs 34
+ A4 B77 ;regs 34
+ A0 A4+A3 ; 34
+ ,A0,1 V2 ;v_ld_str 34
+ V0 V3<A6 ; 18
+ V7 S2*FV1 ; 20
+ A4 142 ; 34
+ A0 A4+A3 ; 34
+ ,A0,1 V7 ;v_ld_str 34
+ V5 V7>A7 ; 28
+ V2 S2*FV0 ; 22
+ V3 V6+V2 ; 22
+ S7 <20 ; 28
+ V1 S7&V3 ; 28
+ A4 270 ; 34
+ A0 A4+A3 ; 34
+ ,A0,1 V0 ;v_ld_str 34
+ A4 14 ; 34
+ A0 A4+A3 ; 34
+ V7 ,A0,1 ;v_ld_str 34
+ V6 V1<A7 ; 28
+ V2 S2*FV4 ; 21
+ V0 V7+V2 ; 21
+ S7 <42 ; 28
+ V1 S7&V0 ; 28
+ A4 398 ; 34
+ A0 A4+A3 ; 34
+ ,A0,1 V0 ;v_ld_str 34
+ V7 S3*FV4 ; 23
+ V2 V5+V1 ; 28
+ V0 V3<A5 ; 26
+ A5 526 ; 34
+ A0 A5+A3 ; 34
+ ,A0,1 V0 ;v_ld_str 34
+ A5 270 ; 34
+ A0 A5+A3 ; 34
+ V4 ,A0,1 ;v_ld_str 34
+ V5 V2+V6 ; 28
+ A5 20 ; 32
+ V1 V3>A5 ; 32
+ V0 S1*FV4 ; 23
+ A5 654 ; 34
+ A0 A5+A3 ; 34
+ ,A0,1 V1 ;v_ld_str 34
+ V6 V7+V0 ; 23
+ A5 2 ; 32
+ V2 V6<A5 ; 32
+ V3 S3*FV4 ; 24
+ A5 142 ; 34
+ A0 A5+A3 ; 34
+ V1 ,A0,1 ;v_ld_str 34
+ A5 526 ; 34
+ A0 A5+A3 ; 34
+ V7 ,A0,1 ;v_ld_str 34
+ V0 V1+V7 ; 26
+ V6 V3<A6 ; 32
+ V4 V6+V2 ; 32
+ A6 42 ; 28
+ V7 V5>A6 ; 28
+ A5 654 ; 34
+ CPW ;cmr_vrsp 34
+ A0 A5+A3 ; 34
+ V1 ,A0,1 ;v_ld_str 34
+ A5 398 ; 34
+ A0 A5+A3 ; 34
+ V3 ,A0,1 ;v_ld_str 34
+ V6 V4+V1 ; 32
+ V2 V3>A6 ; 32
+ V5 V6+V2 ; 32
+ A6 B12 ;s_bt_sp 32
+ V4 V3<A7 ; 26
+ A7 B13 ;regs 34
+ A3 A7+A1 ; 34
+ A7 B11 ;regs 34
+ A5 A7+A1 ; 34
+ A4 A6+A1 ; 34
+ A7 A2+A1 ; 34
+ A0 A2+A1 ; 34
+ A2 128 ; 34
+ B13 A0 ;s_bt_sp 34
+ V1 V0+V4 ; 26
+ A0 B11 ;regs 31
+ ,A0,1 V1 ; 31
+ V6 V5+V7 ; 33
+ A0 A6 ;regs 33
+ ,A0,1 V6 ; 33
+ A0 B13 ;regs 34
+ A1 A2 ;regs 34
+ A2 A7 ;regs 34
+ JAN L7 ; 34
+L8 = P.* ; 34
+L5 = P.* ; 34
+ S1 0 ; 35
+ A0 B02 ; 35
+ A2 B02 ; 35
+ A1 13 ;num Bs 35
+ B66 A0 ; 35
+ B77,A1 0,A0 ; 35
+ A0 A2+A1 ; 35
+ A1 1 ;num Ts 35
+ T00,A1 0,A0 ; 35
+ J B00 ; 35
+ EXT $STKOFEN:p
+ ENTRY GMPN_MULWW
+ END
diff --git a/rts/gmp/mpn/cray/sub_n.c b/rts/gmp/mpn/cray/sub_n.c
new file mode 100644
index 0000000000..902e07a727
--- /dev/null
+++ b/rts/gmp/mpn/cray/sub_n.c
@@ -0,0 +1,97 @@
+/* mpn_sub_n -- Subtract two limb vectors of equal, non-zero length.
+ For Cray vector processors.
+
+ Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+ This file is part of the GNU MP Library.
+
+ The GNU MP Library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or (at your
+ option) any later version.
+
+ The GNU MP Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_sub_n (c, a, b, n)
+ mp_ptr c;
+ mp_srcptr a, b;
+ mp_size_t n;
+{
+ mp_size_t i;
+ mp_size_t nm1 = n - 1;
+ int more_carries = 0;
+ int carry_out;
+
+ /* For small operands the non-vector code is faster. */
+ if (n < 16)
+ goto sequential;
+
+ if (a == c || b == c)
+ {
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+ if (c == a)
+ {
+ /* allocate temp space for a */
+ mp_ptr ax = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ MPN_COPY (ax, a, n);
+ a = (mp_srcptr) ax;
+ }
+ if (c == b)
+ {
+ /* allocate temp space for b */
+ mp_ptr bx = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ MPN_COPY (bx, b, n);
+ b = (mp_srcptr) bx;
+ }
+ carry_out = mpn_sub_n (c, a, b, n);
+ TMP_FREE (marker);
+ return carry_out;
+ }
+
+ carry_out = a[nm1] < b[nm1];
+
+#pragma _CRI ivdep /* Cray PVP systems */
+ for (i = nm1; i > 0; i--)
+ {
+ int cy_in; mp_limb_t t;
+ cy_in = a[i - 1] < b[i - 1];
+ t = a[i] - b[i];
+ more_carries += t < cy_in;
+ c[i] = t - cy_in;
+ }
+ c[0] = a[0] - b[0];
+
+ if (more_carries)
+ {
+ /* This won't vectorize, but we should come here rarely. */
+ int cy;
+ sequential:
+ cy = 0;
+ for (i = 0; i < n; i++)
+ {
+ mp_limb_t ai, ci, t;
+ ai = a[i];
+ t = b[i] + cy;
+ cy = t < cy;
+ ci = ai - t;
+ cy += ci > ai;
+ c[i] = ci;
+ }
+ carry_out = cy;
+ }
+
+ return carry_out;
+}
diff --git a/rts/gmp/mpn/cray/submul_1.c b/rts/gmp/mpn/cray/submul_1.c
new file mode 100644
index 0000000000..4d2fb13c62
--- /dev/null
+++ b/rts/gmp/mpn/cray/submul_1.c
@@ -0,0 +1,46 @@
+/* mpn_submul_1 for Cray PVP.
+
+Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t limb)
+{
+ mp_ptr p0, p1, tp;
+ mp_limb_t cy_limb;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+
+ p1 = TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ p0 = TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ tp = TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+
+ GMPN_MULWW (p1, p0, up, &n, &limb);
+ cy_limb = mpn_sub_n (tp, rp, p0, n);
+ rp[0] = tp[0];
+ cy_limb += mpn_sub_n (rp + 1, tp + 1, p1, n - 1);
+ cy_limb += p1[n - 1];
+
+ TMP_FREE (marker);
+ return cy_limb;
+}
diff --git a/rts/gmp/mpn/generic/add_n.c b/rts/gmp/mpn/generic/add_n.c
new file mode 100644
index 0000000000..5fcb7e4835
--- /dev/null
+++ b/rts/gmp/mpn/generic/add_n.c
@@ -0,0 +1,62 @@
+/* mpn_add_n -- Add two limb vectors of equal, non-zero length.
+
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+#if __STDC__
+mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+#else
+mpn_add_n (res_ptr, s1_ptr, s2_ptr, size)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ register mp_srcptr s2_ptr;
+ mp_size_t size;
+#endif
+{
+ register mp_limb_t x, y, cy;
+ register mp_size_t j;
+
+ /* The loop counter and index J goes from -SIZE to -1. This way
+ the loop becomes faster. */
+ j = -size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ s1_ptr -= j;
+ s2_ptr -= j;
+ res_ptr -= j;
+
+ cy = 0;
+ do
+ {
+ y = s2_ptr[j];
+ x = s1_ptr[j];
+ y += cy; /* add previous carry to one addend */
+ cy = (y < cy); /* get out carry from that addition */
+ y = x + y; /* add other addend */
+ cy = (y < x) + cy; /* get out carry from that add, combine */
+ res_ptr[j] = y;
+ }
+ while (++j != 0);
+
+ return cy;
+}
diff --git a/rts/gmp/mpn/generic/addmul_1.c b/rts/gmp/mpn/generic/addmul_1.c
new file mode 100644
index 0000000000..746ae31307
--- /dev/null
+++ b/rts/gmp/mpn/generic/addmul_1.c
@@ -0,0 +1,65 @@
+/* mpn_addmul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
+ by S2_LIMB, add the S1_SIZE least significant limbs of the product to the
+ limb vector pointed to by RES_PTR. Return the most significant limb of
+ the product, adjusted for carry-out from the addition.
+
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ mp_size_t s1_size;
+ register mp_limb_t s2_limb;
+{
+ register mp_limb_t cy_limb;
+ register mp_size_t j;
+ register mp_limb_t prod_high, prod_low;
+ register mp_limb_t x;
+
+ /* The loop counter and index J goes from -SIZE to -1. This way
+ the loop becomes faster. */
+ j = -s1_size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ res_ptr -= j;
+ s1_ptr -= j;
+
+ cy_limb = 0;
+ do
+ {
+ umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
+
+ prod_low += cy_limb;
+ cy_limb = (prod_low < cy_limb) + prod_high;
+
+ x = res_ptr[j];
+ prod_low = x + prod_low;
+ cy_limb += (prod_low < x);
+ res_ptr[j] = prod_low;
+ }
+ while (++j != 0);
+
+ return cy_limb;
+}
diff --git a/rts/gmp/mpn/generic/addsub_n.c b/rts/gmp/mpn/generic/addsub_n.c
new file mode 100644
index 0000000000..c9bab3ef60
--- /dev/null
+++ b/rts/gmp/mpn/generic/addsub_n.c
@@ -0,0 +1,167 @@
+/* mpn_addsub_n -- Add and Subtract two limb vectors of equal, non-zero length.
+
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifndef L1_CACHE_SIZE
+#define L1_CACHE_SIZE 8192 /* only 68040 has less than this */
+#endif
+
+#define PART_SIZE (L1_CACHE_SIZE / BYTES_PER_MP_LIMB / 6)
+
+
+/* mpn_addsub_n.
+ r1[] = s1[] + s2[]
+ r2[] = s1[] - s2[]
+ All operands have n limbs.
+ In-place operations allowed. */
+mp_limb_t
+#if __STDC__
+mpn_addsub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
+#else
+mpn_addsub_n (r1p, r2p, s1p, s2p, n)
+ mp_ptr r1p, r2p;
+ mp_srcptr s1p, s2p;
+ mp_size_t n;
+#endif
+{
+ mp_limb_t acyn, acyo; /* carry for add */
+ mp_limb_t scyn, scyo; /* carry for subtract */
+ mp_size_t off; /* offset in operands */
+ mp_size_t this_n; /* size of current chunk */
+
+ /* We alternatingly add and subtract in chunks that fit into the (L1)
+ cache. Since the chunks are several hundred limbs, the function call
+ overhead is insignificant, but we get much better locality. */
+
+ /* We have three variant of the inner loop, the proper loop is chosen
+ depending on whether r1 or r2 are the same operand as s1 or s2. */
+
+ if (r1p != s1p && r1p != s2p)
+ {
+ /* r1 is not identical to either input operand. We can therefore write
+ to r1 directly, without using temporary storage. */
+ acyo = 0;
+ scyo = 0;
+ for (off = 0; off < n; off += PART_SIZE)
+ {
+ this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_add_nc || !HAVE_NATIVE_mpn_add_n
+ acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
+#else
+ acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
+ acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
+#endif
+#if HAVE_NATIVE_mpn_sub_nc || !HAVE_NATIVE_mpn_sub_n
+ scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+ scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+ scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+ }
+ }
+ else if (r2p != s1p && r2p != s2p)
+ {
+ /* r2 is not identical to either input operand. We can therefore write
+ to r2 directly, without using temporary storage. */
+ acyo = 0;
+ scyo = 0;
+ for (off = 0; off < n; off += PART_SIZE)
+ {
+ this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_sub_nc || !HAVE_NATIVE_mpn_sub_n
+ scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+ scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+ scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+#if HAVE_NATIVE_mpn_add_nc || !HAVE_NATIVE_mpn_add_n
+ acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
+#else
+ acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
+ acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
+#endif
+ }
+ }
+ else
+ {
+ /* r1 and r2 are identical to s1 and s2 (r1==s1 and r2=s2 or vice versa)
+ Need temporary storage. */
+ mp_limb_t tp[PART_SIZE];
+ acyo = 0;
+ scyo = 0;
+ for (off = 0; off < n; off += PART_SIZE)
+ {
+ this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_add_nc || !HAVE_NATIVE_mpn_add_n
+ acyo = mpn_add_nc (tp, s1p + off, s2p + off, this_n, acyo);
+#else
+ acyn = mpn_add_n (tp, s1p + off, s2p + off, this_n);
+ acyo = acyn + mpn_add_1 (tp, tp, this_n, acyo);
+#endif
+#if HAVE_NATIVE_mpn_sub_nc || !HAVE_NATIVE_mpn_sub_n
+ scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+ scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+ scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+ MPN_COPY (r1p + off, tp, this_n);
+ }
+ }
+
+ return 2 * acyo + scyo;
+}
+
+#ifdef MAIN
+#include <stdlib.h>
+#include <stdio.h>
+#include "timing.h"
+
+long cputime ();
+
+int
+main (int argc, char **argv)
+{
+ mp_ptr r1p, r2p, s1p, s2p;
+ double t;
+ mp_size_t n;
+
+ n = strtol (argv[1], 0, 0);
+
+ r1p = malloc (n * BYTES_PER_MP_LIMB);
+ r2p = malloc (n * BYTES_PER_MP_LIMB);
+ s1p = malloc (n * BYTES_PER_MP_LIMB);
+ s2p = malloc (n * BYTES_PER_MP_LIMB);
+ TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n)));
+ printf (" separate add and sub: %.3f\n", t);
+ TIME (t,mpn_addsub_n(r1p,r2p,s1p,s2p,n));
+ printf ("combined addsub separate variables: %.3f\n", t);
+ TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n));
+ printf (" combined addsub r1 overlap: %.3f\n", t);
+ TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n));
+ printf (" combined addsub r2 overlap: %.3f\n", t);
+ TIME (t,mpn_addsub_n(r1p,r2p,r1p,r2p,n));
+ printf (" combined addsub in-place: %.3f\n", t);
+
+ return 0;
+}
+#endif
diff --git a/rts/gmp/mpn/generic/bdivmod.c b/rts/gmp/mpn/generic/bdivmod.c
new file mode 100644
index 0000000000..c4bcb414e6
--- /dev/null
+++ b/rts/gmp/mpn/generic/bdivmod.c
@@ -0,0 +1,120 @@
+/* mpn/bdivmod.c: mpn_bdivmod for computing U/V mod 2^d.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/* q_high = mpn_bdivmod (qp, up, usize, vp, vsize, d).
+
+ Puts the low d/BITS_PER_MP_LIMB limbs of Q = U / V mod 2^d at qp, and
+ returns the high d%BITS_PER_MP_LIMB bits of Q as the result.
+
+ Also, U - Q * V mod 2^(usize*BITS_PER_MP_LIMB) is placed at up. Since the
+ low d/BITS_PER_MP_LIMB limbs of this difference are zero, the code allows
+ the limb vectors at qp to overwrite the low limbs at up, provided qp <= up.
+
+ Preconditions:
+ 1. V is odd.
+ 2. usize * BITS_PER_MP_LIMB >= d.
+ 3. If Q and U overlap, qp <= up.
+
+ Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
+
+ Funding for this work has been partially provided by Conselho Nacional
+ de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
+ 301314194-2, and was done while I was a visiting reseacher in the Instituto
+ de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
+
+ References:
+ T. Jebelean, An algorithm for exact division, Journal of Symbolic
+ Computation, v. 15, 1993, pp. 169-180.
+
+ K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
+ Mathematical Software, v. 21 (March), 1995, pp. 111-122. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+#if __STDC__
+mpn_bdivmod (mp_ptr qp, mp_ptr up, mp_size_t usize,
+ mp_srcptr vp, mp_size_t vsize, unsigned long int d)
+#else
+mpn_bdivmod (qp, up, usize, vp, vsize, d)
+ mp_ptr qp;
+ mp_ptr up;
+ mp_size_t usize;
+ mp_srcptr vp;
+ mp_size_t vsize;
+ unsigned long int d;
+#endif
+{
+ mp_limb_t v_inv;
+
+ /* 1/V mod 2^BITS_PER_MP_LIMB. */
+ modlimb_invert (v_inv, vp[0]);
+
+ /* Fast code for two cases previously used by the accel part of mpn_gcd.
+ (Could probably remove this now it's inlined there.) */
+ if (usize == 2 && vsize == 2 &&
+ (d == BITS_PER_MP_LIMB || d == 2*BITS_PER_MP_LIMB))
+ {
+ mp_limb_t hi, lo;
+ mp_limb_t q = up[0] * v_inv;
+ umul_ppmm (hi, lo, q, vp[0]);
+ up[0] = 0, up[1] -= hi + q*vp[1], qp[0] = q;
+ if (d == 2*BITS_PER_MP_LIMB)
+ q = up[1] * v_inv, up[1] = 0, qp[1] = q;
+ return 0;
+ }
+
+ /* Main loop. */
+ while (d >= BITS_PER_MP_LIMB)
+ {
+ mp_limb_t q = up[0] * v_inv;
+ mp_limb_t b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);
+ if (usize > vsize)
+ mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
+ d -= BITS_PER_MP_LIMB;
+ up += 1, usize -= 1;
+ *qp++ = q;
+ }
+
+ if (d)
+ {
+ mp_limb_t b;
+ mp_limb_t q = (up[0] * v_inv) & (((mp_limb_t)1<<d) - 1);
+ if (q <= 1)
+ {
+ if (q == 0)
+ return 0;
+ else
+ b = mpn_sub_n (up, up, vp, MIN (usize, vsize));
+ }
+ else
+ b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);
+
+ if (usize > vsize)
+ mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
+ return q;
+ }
+
+ return 0;
+}
diff --git a/rts/gmp/mpn/generic/bz_divrem_n.c b/rts/gmp/mpn/generic/bz_divrem_n.c
new file mode 100644
index 0000000000..d234b22af5
--- /dev/null
+++ b/rts/gmp/mpn/generic/bz_divrem_n.c
@@ -0,0 +1,153 @@
+/* mpn_bz_divrem_n and auxilliary routines.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
+ INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
+ IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
+ FUTURE GNU MP RELEASE.
+
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+Contributed by Paul Zimmermann.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+[1] Fast Recursive Division, by Christoph Burnikel and Joachim Ziegler,
+ Technical report MPI-I-98-1-022, october 1998.
+ http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz
+*/
+
+static mp_limb_t mpn_bz_div_3_halves_by_2
+ _PROTO ((mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n));
+
+
+/* mpn_bz_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
+ div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
+ i.e. n/2 >= KARATSUBA_MUL_THRESHOLD */
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
+#endif
+
+#if 0
+static
+unused_mpn_divrem (qp, qxn, np, nn, dp, dn)
+ mp_ptr qp;
+ mp_size_t qxn;
+ mp_ptr np;
+ mp_size_t nn;
+ mp_srcptr dp;
+ mp_size_t dn;
+{
+ /* This might be useful: */
+ if (qxn != 0)
+ {
+ mp_limb_t c;
+ mp_ptr tp = alloca ((nn + qxn) * BYTES_PER_MP_LIMB);
+ MPN_COPY (tp + qxn - nn, np, nn);
+ MPN_ZERO (tp, qxn);
+ c = mpn_divrem (qp, 0L, tp, nn + qxn, dp, dn);
+ /* Maybe copy proper part of tp to np? Documentation is unclear about
+ the returned np value when qxn != 0 */
+ return c;
+ }
+}
+#endif
+
+
+/* mpn_bz_divrem_n - Implements algorithm of page 8 in [1]: divides (np,2n)
+ by (dp,n) and puts the quotient in (qp,n), the remainder in (np,n).
+ Returns most significant limb of the quotient, which is 0 or 1.
+ Requires that the most significant bit of the divisor is set. */
+
+mp_limb_t
+#if __STDC__
+mpn_bz_divrem_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)
+#else
+mpn_bz_divrem_n (qp, np, dp, n)
+ mp_ptr qp;
+ mp_ptr np;
+ mp_srcptr dp;
+ mp_size_t n;
+#endif
+{
+ mp_limb_t qhl, cc;
+
+ if (n % 2 != 0)
+ {
+ qhl = mpn_bz_divrem_n (qp + 1, np + 2, dp + 1, n - 1);
+ cc = mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0]);
+ cc = mpn_sub_1 (np + n, np + n, 1, cc);
+ if (qhl) cc += mpn_sub_1 (np + n, np + n, 1, dp[0]);
+ while (cc)
+ {
+ qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, (mp_limb_t) 1);
+ cc -= mpn_add_n (np + 1, np + 1, dp, n);
+ }
+ qhl += mpn_add_1 (qp + 1, qp + 1, n - 1,
+ mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
+ }
+ else
+ {
+ mp_size_t n2 = n/2;
+ qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2);
+ qhl += mpn_add_1 (qp + n2, qp + n2, n2,
+ mpn_bz_div_3_halves_by_2 (qp, np, dp, n2));
+ }
+ return qhl;
+}
+
+
+/* divides (np, 3n) by (dp, 2n) and puts the quotient in (qp, n),
+ the remainder in (np, 2n) */
+
+static mp_limb_t
+#if __STDC__
+mpn_bz_div_3_halves_by_2 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)
+#else
+mpn_bz_div_3_halves_by_2 (qp, np, dp, n)
+ mp_ptr qp;
+ mp_ptr np;
+ mp_srcptr dp;
+ mp_size_t n;
+#endif
+{
+ mp_size_t twon = n + n;
+ mp_limb_t qhl, cc;
+ mp_ptr tmp;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ if (n < BZ_THRESHOLD)
+ qhl = mpn_sb_divrem_mn (qp, np + n, twon, dp + n, n);
+ else
+ qhl = mpn_bz_divrem_n (qp, np + n, dp + n, n);
+ tmp = (mp_ptr) TMP_ALLOC (twon * BYTES_PER_MP_LIMB);
+ mpn_mul_n (tmp, qp, dp, n);
+ cc = mpn_sub_n (np, np, tmp, twon);
+ TMP_FREE (marker);
+ if (qhl) cc += mpn_sub_n (np + n, np + n, dp, n);
+ while (cc)
+ {
+ qhl -= mpn_sub_1 (qp, qp, n, (mp_limb_t) 1);
+ cc -= mpn_add_n (np, np, dp, twon);
+ }
+ return qhl;
+}
diff --git a/rts/gmp/mpn/generic/cmp.c b/rts/gmp/mpn/generic/cmp.c
new file mode 100644
index 0000000000..8e9792f54e
--- /dev/null
+++ b/rts/gmp/mpn/generic/cmp.c
@@ -0,0 +1,56 @@
+/* mpn_cmp -- Compare two low-level natural-number integers.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE.
+ There are no restrictions on the relative sizes of
+ the two arguments.
+ Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2. */
+
+int
+#if __STDC__
+mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size)
+#else
+mpn_cmp (op1_ptr, op2_ptr, size)
+ mp_srcptr op1_ptr;
+ mp_srcptr op2_ptr;
+ mp_size_t size;
+#endif
+{
+ mp_size_t i;
+ mp_limb_t op1_word, op2_word;
+
+ for (i = size - 1; i >= 0; i--)
+ {
+ op1_word = op1_ptr[i];
+ op2_word = op2_ptr[i];
+ if (op1_word != op2_word)
+ goto diff;
+ }
+ return 0;
+ diff:
+ /* This can *not* be simplified to
+ op2_word - op2_word
+ since that expression might give signed overflow. */
+ return (op1_word > op2_word) ? 1 : -1;
+}
diff --git a/rts/gmp/mpn/generic/diveby3.c b/rts/gmp/mpn/generic/diveby3.c
new file mode 100644
index 0000000000..a2fb552bfa
--- /dev/null
+++ b/rts/gmp/mpn/generic/diveby3.c
@@ -0,0 +1,77 @@
+/* mpn_divexact_by3 -- mpn division by 3, expecting no remainder. */
+
+/*
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Multiplicative inverse of 3, modulo 2^BITS_PER_MP_LIMB.
+ 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. */
+#define INVERSE_3 ((MP_LIMB_T_MAX / 3) * 2 + 1)
+
+
+/* The "c += ..."s are adding the high limb of 3*l to c. That high limb
+ will be 0, 1 or 2. Doing two separate "+="s seems to turn out better
+ code on gcc (as of 2.95.2 at least).
+
+ When a subtraction of a 0,1,2 carry value causes a borrow, that leaves a
+ limb value of either 0xFF...FF or 0xFF...FE and the multiply by INVERSE_3
+ gives 0x55...55 or 0xAA...AA respectively, producing a further borrow of
+ only 0 or 1 respectively. Hence the carry out of each stage and for the
+ return value is always only 0, 1 or 2. */
+
+mp_limb_t
+#if __STDC__
+mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t c)
+#else
+mpn_divexact_by3c (dst, src, size, c)
+ mp_ptr dst;
+ mp_srcptr src;
+ mp_size_t size;
+ mp_limb_t c;
+#endif
+{
+ mp_size_t i;
+
+ ASSERT (size >= 1);
+
+ i = 0;
+ do
+ {
+ mp_limb_t l, s;
+
+ s = src[i];
+ l = s - c;
+ c = (l > s);
+
+ l *= INVERSE_3;
+ dst[i] = l;
+
+ c += (l > MP_LIMB_T_MAX/3);
+ c += (l > (MP_LIMB_T_MAX/3)*2);
+ }
+ while (++i < size);
+
+ return c;
+}
diff --git a/rts/gmp/mpn/generic/divrem.c b/rts/gmp/mpn/generic/divrem.c
new file mode 100644
index 0000000000..30673e76d9
--- /dev/null
+++ b/rts/gmp/mpn/generic/divrem.c
@@ -0,0 +1,101 @@
+/* mpn_divrem -- Divide natural numbers, producing both remainder and
+ quotient. This is now just a middle layer for calling the new
+ internal mpn_tdiv_qr.
+
+Copyright (C) 1993, 1994, 1995, 1996, 1997, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+#if __STDC__
+mpn_divrem (mp_ptr qp, mp_size_t qxn,
+ mp_ptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn)
+#else
+mpn_divrem (qp, qxn, np, nn, dp, dn)
+ mp_ptr qp;
+ mp_size_t qxn;
+ mp_ptr np;
+ mp_size_t nn;
+ mp_srcptr dp;
+ mp_size_t dn;
+#endif
+{
+ if (dn == 1)
+ {
+ mp_limb_t ret;
+ mp_ptr q2p;
+ mp_size_t qn;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ q2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);
+
+ np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]);
+ qn = nn + qxn - 1;
+ MPN_COPY (qp, q2p, qn);
+ ret = q2p[qn];
+
+ TMP_FREE (marker);
+ return ret;
+ }
+ else if (dn == 2)
+ {
+ return mpn_divrem_2 (qp, qxn, np, nn, dp);
+ }
+ else
+ {
+ mp_ptr rp, q2p;
+ mp_limb_t qhl;
+ mp_size_t qn;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ if (qxn != 0)
+ {
+ mp_ptr n2p;
+ n2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);
+ MPN_ZERO (n2p, qxn);
+ MPN_COPY (n2p + qxn, np, nn);
+ q2p = (mp_ptr) TMP_ALLOC ((nn - dn + qxn + 1) * BYTES_PER_MP_LIMB);
+ rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
+ mpn_tdiv_qr (q2p, rp, 0L, n2p, nn + qxn, dp, dn);
+ MPN_COPY (np, rp, dn);
+ qn = nn - dn + qxn;
+ MPN_COPY (qp, q2p, qn);
+ qhl = q2p[qn];
+ }
+ else
+ {
+ q2p = (mp_ptr) TMP_ALLOC ((nn - dn + 1) * BYTES_PER_MP_LIMB);
+ rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
+ mpn_tdiv_qr (q2p, rp, 0L, np, nn, dp, dn);
+ MPN_COPY (np, rp, dn); /* overwrite np area with remainder */
+ qn = nn - dn;
+ MPN_COPY (qp, q2p, qn);
+ qhl = q2p[qn];
+ }
+ TMP_FREE (marker);
+ return qhl;
+ }
+}
diff --git a/rts/gmp/mpn/generic/divrem_1.c b/rts/gmp/mpn/generic/divrem_1.c
new file mode 100644
index 0000000000..e93f241c9d
--- /dev/null
+++ b/rts/gmp/mpn/generic/divrem_1.c
@@ -0,0 +1,248 @@
+/* mpn_divrem_1(quot_ptr, qsize, dividend_ptr, dividend_size, divisor_limb) --
+ Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+ Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
+ Return the single-limb remainder.
+ There are no constraints on the value of the divisor.
+
+ QUOT_PTR and DIVIDEND_PTR might point to the same limb.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1998, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+
+/* __gmpn_divmod_1_internal(quot_ptr,dividend_ptr,dividend_size,divisor_limb)
+ Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+ Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
+ Return the single-limb remainder.
+ There are no constraints on the value of the divisor.
+
+ QUOT_PTR and DIVIDEND_PTR might point to the same limb. */
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 1
+#endif
+
+#ifndef UDIV_TIME
+#define UDIV_TIME UMUL_TIME
+#endif
+
+static mp_limb_t
+#if __STDC__
+__gmpn_divmod_1_internal (mp_ptr quot_ptr,
+ mp_srcptr dividend_ptr, mp_size_t dividend_size,
+ mp_limb_t divisor_limb)
+#else
+__gmpn_divmod_1_internal (quot_ptr, dividend_ptr, dividend_size, divisor_limb)
+ mp_ptr quot_ptr;
+ mp_srcptr dividend_ptr;
+ mp_size_t dividend_size;
+ mp_limb_t divisor_limb;
+#endif
+{
+ mp_size_t i;
+ mp_limb_t n1, n0, r;
+ int dummy;
+
+ /* ??? Should this be handled at all? Rely on callers? */
+ if (dividend_size == 0)
+ return 0;
+
+ /* If multiplication is much faster than division, and the
+ dividend is large, pre-invert the divisor, and use
+ only multiplications in the inner loop. */
+
+ /* This test should be read:
+ Does it ever help to use udiv_qrnnd_preinv?
+ && Does what we save compensate for the inversion overhead? */
+ if (UDIV_TIME > (2 * UMUL_TIME + 6)
+ && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME)
+ {
+ int normalization_steps;
+
+ count_leading_zeros (normalization_steps, divisor_limb);
+ if (normalization_steps != 0)
+ {
+ mp_limb_t divisor_limb_inverted;
+
+ divisor_limb <<= normalization_steps;
+ invert_limb (divisor_limb_inverted, divisor_limb);
+
+ n1 = dividend_ptr[dividend_size - 1];
+ r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
+
+ /* Possible optimization:
+ if (r == 0
+ && divisor_limb > ((n1 << normalization_steps)
+ | (dividend_ptr[dividend_size - 2] >> ...)))
+ ...one division less... */
+
+ for (i = dividend_size - 2; i >= 0; i--)
+ {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd_preinv (quot_ptr[i + 1], r, r,
+ ((n1 << normalization_steps)
+ | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))),
+ divisor_limb, divisor_limb_inverted);
+ n1 = n0;
+ }
+ udiv_qrnnd_preinv (quot_ptr[0], r, r,
+ n1 << normalization_steps,
+ divisor_limb, divisor_limb_inverted);
+ return r >> normalization_steps;
+ }
+ else
+ {
+ mp_limb_t divisor_limb_inverted;
+
+ invert_limb (divisor_limb_inverted, divisor_limb);
+
+ i = dividend_size - 1;
+ r = dividend_ptr[i];
+
+ if (r >= divisor_limb)
+ r = 0;
+ else
+ {
+ quot_ptr[i] = 0;
+ i--;
+ }
+
+ for (; i >= 0; i--)
+ {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd_preinv (quot_ptr[i], r, r,
+ n0, divisor_limb, divisor_limb_inverted);
+ }
+ return r;
+ }
+ }
+ else
+ {
+ if (UDIV_NEEDS_NORMALIZATION)
+ {
+ int normalization_steps;
+
+ count_leading_zeros (normalization_steps, divisor_limb);
+ if (normalization_steps != 0)
+ {
+ divisor_limb <<= normalization_steps;
+
+ n1 = dividend_ptr[dividend_size - 1];
+ r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
+
+ /* Possible optimization:
+ if (r == 0
+ && divisor_limb > ((n1 << normalization_steps)
+ | (dividend_ptr[dividend_size - 2] >> ...)))
+ ...one division less... */
+
+ for (i = dividend_size - 2; i >= 0; i--)
+ {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd (quot_ptr[i + 1], r, r,
+ ((n1 << normalization_steps)
+ | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))),
+ divisor_limb);
+ n1 = n0;
+ }
+ udiv_qrnnd (quot_ptr[0], r, r,
+ n1 << normalization_steps,
+ divisor_limb);
+ return r >> normalization_steps;
+ }
+ }
+ /* No normalization needed, either because udiv_qrnnd doesn't require
+ it, or because DIVISOR_LIMB is already normalized. */
+
+ i = dividend_size - 1;
+ r = dividend_ptr[i];
+
+ if (r >= divisor_limb)
+ r = 0;
+ else
+ {
+ quot_ptr[i] = 0;
+ i--;
+ }
+
+ for (; i >= 0; i--)
+ {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd (quot_ptr[i], r, r, n0, divisor_limb);
+ }
+ return r;
+ }
+}
+
+
+
+mp_limb_t
+#if __STDC__
+mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
+ mp_srcptr np, mp_size_t nn,
+ mp_limb_t d)
+#else
+mpn_divrem_1 (qp, qxn, np, nn, d)
+ mp_ptr qp;
+ mp_size_t qxn;
+ mp_srcptr np;
+ mp_size_t nn;
+ mp_limb_t d;
+#endif
+{
+ mp_limb_t rlimb;
+ mp_size_t i;
+
+ /* Develop integer part of quotient. */
+ rlimb = __gmpn_divmod_1_internal (qp + qxn, np, nn, d);
+
+ /* Develop fraction part of quotient. This is not as fast as it should;
+ the preinvert stuff from __gmpn_divmod_1_internal ought to be used here
+ too. */
+ if (UDIV_NEEDS_NORMALIZATION)
+ {
+ int normalization_steps;
+
+ count_leading_zeros (normalization_steps, d);
+ if (normalization_steps != 0)
+ {
+ d <<= normalization_steps;
+ rlimb <<= normalization_steps;
+
+ for (i = qxn - 1; i >= 0; i--)
+ udiv_qrnnd (qp[i], rlimb, rlimb, 0, d);
+
+ return rlimb >> normalization_steps;
+ }
+ else
+ /* fall out */
+ ;
+ }
+
+ for (i = qxn - 1; i >= 0; i--)
+ udiv_qrnnd (qp[i], rlimb, rlimb, 0, d);
+
+ return rlimb;
+}
diff --git a/rts/gmp/mpn/generic/divrem_2.c b/rts/gmp/mpn/generic/divrem_2.c
new file mode 100644
index 0000000000..0bc31ae2e7
--- /dev/null
+++ b/rts/gmp/mpn/generic/divrem_2.c
@@ -0,0 +1,151 @@
+/* mpn_divrem_2 -- Divide natural numbers, producing both remainder and
+ quotient. The divisor is two limbs.
+
+ THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS
+ ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS
+ ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
+ RELEASE.
+
+
+Copyright (C) 1993, 1994, 1995, 1996, 1999, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Divide num (NP/NSIZE) by den (DP/2) and write
+ the NSIZE-2 least significant quotient limbs at QP
+ and the 2 long remainder at NP. If QEXTRA_LIMBS is
+ non-zero, generate that many fraction bits and append them after the
+ other quotient limbs.
+ Return the most significant limb of the quotient, this is always 0 or 1.
+
+ Preconditions:
+ 0. NSIZE >= 2.
+ 1. The most significant bit of the divisor must be set.
+ 2. QP must either not overlap with the input operands at all, or
+ QP + 2 >= NP must hold true. (This means that it's
+ possible to put the quotient in the high part of NUM, right after the
+ remainder in NUM.
+ 3. NSIZE >= 2, even if QEXTRA_LIMBS is non-zero. */
+
+mp_limb_t
+#if __STDC__
+mpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
+ mp_ptr np, mp_size_t nsize,
+ mp_srcptr dp)
+#else
+mpn_divrem_2 (qp, qxn, np, nsize, dp)
+ mp_ptr qp;
+ mp_size_t qxn;
+ mp_ptr np;
+ mp_size_t nsize;
+ mp_srcptr dp;
+#endif
+{
+ mp_limb_t most_significant_q_limb = 0;
+ mp_size_t i;
+ mp_limb_t n1, n0, n2;
+ mp_limb_t d1, d0;
+ mp_limb_t d1inv;
+ int have_preinv;
+
+ np += nsize - 2;
+ d1 = dp[1];
+ d0 = dp[0];
+ n1 = np[1];
+ n0 = np[0];
+
+ if (n1 >= d1 && (n1 > d1 || n0 >= d0))
+ {
+ sub_ddmmss (n1, n0, n1, n0, d1, d0);
+ most_significant_q_limb = 1;
+ }
+
+ /* If multiplication is much faster than division, preinvert the most
+ significant divisor limb before entering the loop. */
+ if (UDIV_TIME > 2 * UMUL_TIME + 6)
+ {
+ have_preinv = 0;
+ if ((UDIV_TIME - (2 * UMUL_TIME + 6)) * (nsize - 2) > UDIV_TIME)
+ {
+ invert_limb (d1inv, d1);
+ have_preinv = 1;
+ }
+ }
+
+ for (i = qxn + nsize - 2 - 1; i >= 0; i--)
+ {
+ mp_limb_t q;
+ mp_limb_t r;
+
+ if (i >= qxn)
+ np--;
+ else
+ np[0] = 0;
+
+ if (n1 == d1)
+ {
+ /* Q should be either 111..111 or 111..110. Need special treatment
+ of this rare case as normal division would give overflow. */
+ q = ~(mp_limb_t) 0;
+
+ r = n0 + d1;
+ if (r < d1) /* Carry in the addition? */
+ {
+ add_ssaaaa (n1, n0, r - d0, np[0], 0, d0);
+ qp[i] = q;
+ continue;
+ }
+ n1 = d0 - (d0 != 0);
+ n0 = -d0;
+ }
+ else
+ {
+ if (UDIV_TIME > 2 * UMUL_TIME + 6 && have_preinv)
+ udiv_qrnnd_preinv (q, r, n1, n0, d1, d1inv);
+ else
+ udiv_qrnnd (q, r, n1, n0, d1);
+ umul_ppmm (n1, n0, d0, q);
+ }
+
+ n2 = np[0];
+
+ q_test:
+ if (n1 > r || (n1 == r && n0 > n2))
+ {
+ /* The estimated Q was too large. */
+ q--;
+
+ sub_ddmmss (n1, n0, n1, n0, 0, d0);
+ r += d1;
+ if (r >= d1) /* If not carry, test Q again. */
+ goto q_test;
+ }
+
+ qp[i] = q;
+ sub_ddmmss (n1, n0, r, n2, n1, n0);
+ }
+ np[1] = n1;
+ np[0] = n0;
+
+ return most_significant_q_limb;
+}
diff --git a/rts/gmp/mpn/generic/dump.c b/rts/gmp/mpn/generic/dump.c
new file mode 100644
index 0000000000..66f375c74b
--- /dev/null
+++ b/rts/gmp/mpn/generic/dump.c
@@ -0,0 +1,76 @@
+/* THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS NOT SAFE TO
+ CALL THIS FUNCTION DIRECTLY. IN FACT, IT IS ALMOST GUARANTEED THAT THIS
+ FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpn_dump (mp_srcptr ptr, mp_size_t size)
+#else
+mpn_dump (ptr, size)
+ mp_srcptr ptr;
+ mp_size_t size;
+#endif
+{
+ MPN_NORMALIZE (ptr, size);
+
+ if (size == 0)
+ printf ("0\n");
+ else
+ {
+ size--;
+ if (BYTES_PER_MP_LIMB > sizeof (long))
+ {
+ if ((ptr[size] >> BITS_PER_MP_LIMB/2) != 0)
+ {
+ printf ("%lX",
+ (unsigned long) (ptr[size] >> BITS_PER_MP_LIMB/2));
+ printf ("%0*lX", (int) (BYTES_PER_MP_LIMB),
+ (unsigned long) ptr[size]);
+ }
+ else
+ printf ("%lX", (unsigned long) ptr[size]);
+ }
+ else
+ printf ("%lX", ptr[size]);
+
+ while (size)
+ {
+ size--;
+ if (BYTES_PER_MP_LIMB > sizeof (long))
+ {
+ printf ("%0*lX", (int) (BYTES_PER_MP_LIMB),
+ (unsigned long) (ptr[size] >> BITS_PER_MP_LIMB/2));
+ printf ("%0*lX", (int) (BYTES_PER_MP_LIMB),
+ (unsigned long) ptr[size]);
+ }
+ else
+ printf ("%0*lX", (int) (2 * BYTES_PER_MP_LIMB), ptr[size]);
+ }
+ printf ("\n");
+ }
+}
diff --git a/rts/gmp/mpn/generic/gcd.c b/rts/gmp/mpn/generic/gcd.c
new file mode 100644
index 0000000000..059e219a06
--- /dev/null
+++ b/rts/gmp/mpn/generic/gcd.c
@@ -0,0 +1,414 @@
+/* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/* Integer greatest common divisor of two unsigned integers, using
+ the accelerated algorithm (see reference below).
+
+ mp_size_t mpn_gcd (up, usize, vp, vsize).
+
+ Preconditions [U = (up, usize) and V = (vp, vsize)]:
+
+ 1. V is odd.
+ 2. numbits(U) >= numbits(V).
+
+ Both U and V are destroyed by the operation. The result is left at vp,
+ and its size is returned.
+
+ Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
+
+ Funding for this work has been partially provided by Conselho Nacional
+ de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
+ 301314194-2, and was done while I was a visiting reseacher in the Instituto
+ de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
+
+ Refer to
+ K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
+ Mathematical Software, v. 21 (March), 1995, pp. 111-122. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* If MIN (usize, vsize) >= GCD_ACCEL_THRESHOLD, then the accelerated
+ algorithm is used, otherwise the binary algorithm is used. This may be
+ adjusted for different architectures. */
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 5
+#endif
+
+/* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated
+ algorithm reduces using the bmod operation. Otherwise, the k-ary reduction
+ is used. 0 <= BMOD_THRESHOLD < BITS_PER_MP_LIMB. */
+enum
+ {
+ BMOD_THRESHOLD = BITS_PER_MP_LIMB/2
+ };
+
+
+/* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2.
+ Both U and V must be odd. */
+static __gmp_inline mp_size_t
+#if __STDC__
+gcd_2 (mp_ptr vp, mp_srcptr up)
+#else
+gcd_2 (vp, up)
+ mp_ptr vp;
+ mp_srcptr up;
+#endif
+{
+ mp_limb_t u0, u1, v0, v1;
+ mp_size_t vsize;
+
+ u0 = up[0], u1 = up[1], v0 = vp[0], v1 = vp[1];
+
+ while (u1 != v1 && u0 != v0)
+ {
+ unsigned long int r;
+ if (u1 > v1)
+ {
+ u1 -= v1 + (u0 < v0), u0 -= v0;
+ count_trailing_zeros (r, u0);
+ u0 = u1 << (BITS_PER_MP_LIMB - r) | u0 >> r;
+ u1 >>= r;
+ }
+ else /* u1 < v1. */
+ {
+ v1 -= u1 + (v0 < u0), v0 -= u0;
+ count_trailing_zeros (r, v0);
+ v0 = v1 << (BITS_PER_MP_LIMB - r) | v0 >> r;
+ v1 >>= r;
+ }
+ }
+
+ vp[0] = v0, vp[1] = v1, vsize = 1 + (v1 != 0);
+
+ /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */
+ if (u1 == v1 && u0 == v0)
+ return vsize;
+
+ v0 = (u0 == v0) ? (u1 > v1) ? u1-v1 : v1-u1 : (u0 > v0) ? u0-v0 : v0-u0;
+ vp[0] = mpn_gcd_1 (vp, vsize, v0);
+
+ return 1;
+}
+
+/* The function find_a finds 0 < N < 2^BITS_PER_MP_LIMB such that there exists
+ 0 < |D| < 2^BITS_PER_MP_LIMB, and N == D * C mod 2^(2*BITS_PER_MP_LIMB).
+ In the reference article, D was computed along with N, but it is better to
+ compute D separately as D <-- N / C mod 2^(BITS_PER_MP_LIMB + 1), treating
+ the result as a twos' complement signed integer.
+
+ Initialize N1 to C mod 2^(2*BITS_PER_MP_LIMB). According to the reference
+ article, N2 should be initialized to 2^(2*BITS_PER_MP_LIMB), but we use
+ 2^(2*BITS_PER_MP_LIMB) - N1 to start the calculations within double
+ precision. If N2 > N1 initially, the first iteration of the while loop
+ will swap them. In all other situations, N1 >= N2 is maintained. */
+
+static
+#if ! defined (__i386__)
+__gmp_inline /* don't inline this for the x86 */
+#endif
+mp_limb_t
+#if __STDC__
+find_a (mp_srcptr cp)
+#else
+find_a (cp)
+ mp_srcptr cp;
+#endif
+{
+ unsigned long int leading_zero_bits = 0;
+
+ mp_limb_t n1_l = cp[0]; /* N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l. */
+ mp_limb_t n1_h = cp[1];
+
+ mp_limb_t n2_l = -n1_l; /* N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l. */
+ mp_limb_t n2_h = ~n1_h;
+
+ /* Main loop. */
+ while (n2_h) /* While N2 >= 2^BITS_PER_MP_LIMB. */
+ {
+ /* N1 <-- N1 % N2. */
+ if ((MP_LIMB_T_HIGHBIT >> leading_zero_bits & n2_h) == 0)
+ {
+ unsigned long int i;
+ count_leading_zeros (i, n2_h);
+ i -= leading_zero_bits, leading_zero_bits += i;
+ n2_h = n2_h<<i | n2_l>>(BITS_PER_MP_LIMB - i), n2_l <<= i;
+ do
+ {
+ if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
+ n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
+ n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1;
+ i -= 1;
+ }
+ while (i);
+ }
+ if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
+ n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
+
+ MP_LIMB_T_SWAP (n1_h, n2_h);
+ MP_LIMB_T_SWAP (n1_l, n2_l);
+ }
+
+ return n2_l;
+}
+
+mp_size_t
+#if __STDC__
+mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize)
+#else
+mpn_gcd (gp, up, usize, vp, vsize)
+ mp_ptr gp;
+ mp_ptr up;
+ mp_size_t usize;
+ mp_ptr vp;
+ mp_size_t vsize;
+#endif
+{
+ mp_ptr orig_vp = vp;
+ mp_size_t orig_vsize = vsize;
+ int binary_gcd_ctr; /* Number of times binary gcd will execute. */
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ /* Use accelerated algorithm if vsize is over GCD_ACCEL_THRESHOLD.
+ Two EXTRA limbs for U and V are required for kary reduction. */
+ if (vsize >= GCD_ACCEL_THRESHOLD)
+ {
+ unsigned long int vbitsize, d;
+ mp_ptr orig_up = up;
+ mp_size_t orig_usize = usize;
+ mp_ptr anchor_up = (mp_ptr) TMP_ALLOC ((usize + 2) * BYTES_PER_MP_LIMB);
+
+ MPN_COPY (anchor_up, orig_up, usize);
+ up = anchor_up;
+
+ count_leading_zeros (d, up[usize-1]);
+ d = usize * BITS_PER_MP_LIMB - d;
+ count_leading_zeros (vbitsize, vp[vsize-1]);
+ vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
+ d = d - vbitsize + 1;
+
+ /* Use bmod reduction to quickly discover whether V divides U. */
+ up[usize++] = 0; /* Insert leading zero. */
+ mpn_bdivmod (up, up, usize, vp, vsize, d);
+
+ /* Now skip U/V mod 2^d and any low zero limbs. */
+ d /= BITS_PER_MP_LIMB, up += d, usize -= d;
+ while (usize != 0 && up[0] == 0)
+ up++, usize--;
+
+ if (usize == 0) /* GCD == ORIG_V. */
+ goto done;
+
+ vp = (mp_ptr) TMP_ALLOC ((vsize + 2) * BYTES_PER_MP_LIMB);
+ MPN_COPY (vp, orig_vp, vsize);
+
+ do /* Main loop. */
+ {
+ /* mpn_com_n can't be used here because anchor_up and up may
+ partially overlap */
+ if (up[usize-1] & MP_LIMB_T_HIGHBIT) /* U < 0; take twos' compl. */
+ {
+ mp_size_t i;
+ anchor_up[0] = -up[0];
+ for (i = 1; i < usize; i++)
+ anchor_up[i] = ~up[i];
+ up = anchor_up;
+ }
+
+ MPN_NORMALIZE_NOT_ZERO (up, usize);
+
+ if ((up[0] & 1) == 0) /* Result even; remove twos. */
+ {
+ unsigned int r;
+ count_trailing_zeros (r, up[0]);
+ mpn_rshift (anchor_up, up, usize, r);
+ usize -= (anchor_up[usize-1] == 0);
+ }
+ else if (anchor_up != up)
+ MPN_COPY_INCR (anchor_up, up, usize);
+
+ MPN_PTR_SWAP (anchor_up,usize, vp,vsize);
+ up = anchor_up;
+
+ if (vsize <= 2) /* Kary can't handle < 2 limbs and */
+ break; /* isn't efficient for == 2 limbs. */
+
+ d = vbitsize;
+ count_leading_zeros (vbitsize, vp[vsize-1]);
+ vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
+ d = d - vbitsize + 1;
+
+ if (d > BMOD_THRESHOLD) /* Bmod reduction. */
+ {
+ up[usize++] = 0;
+ mpn_bdivmod (up, up, usize, vp, vsize, d);
+ d /= BITS_PER_MP_LIMB, up += d, usize -= d;
+ }
+ else /* Kary reduction. */
+ {
+ mp_limb_t bp[2], cp[2];
+
+ /* C <-- V/U mod 2^(2*BITS_PER_MP_LIMB). */
+ {
+ mp_limb_t u_inv, hi, lo;
+ modlimb_invert (u_inv, up[0]);
+ cp[0] = vp[0] * u_inv;
+ umul_ppmm (hi, lo, cp[0], up[0]);
+ cp[1] = (vp[1] - hi - cp[0] * up[1]) * u_inv;
+ }
+
+ /* U <-- find_a (C) * U. */
+ up[usize] = mpn_mul_1 (up, up, usize, find_a (cp));
+ usize++;
+
+ /* B <-- A/C == U/V mod 2^(BITS_PER_MP_LIMB + 1).
+ bp[0] <-- U/V mod 2^BITS_PER_MP_LIMB and
+ bp[1] <-- ( (U - bp[0] * V)/2^BITS_PER_MP_LIMB ) / V mod 2
+
+ Like V/U above, but simplified because only the low bit of
+ bp[1] is wanted. */
+ {
+ mp_limb_t v_inv, hi, lo;
+ modlimb_invert (v_inv, vp[0]);
+ bp[0] = up[0] * v_inv;
+ umul_ppmm (hi, lo, bp[0], vp[0]);
+ bp[1] = (up[1] + hi + (bp[0]&vp[1])) & 1;
+ }
+
+ up[usize++] = 0;
+ if (bp[1]) /* B < 0: U <-- U + (-B) * V. */
+ {
+ mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0]);
+ mpn_add_1 (up + vsize, up + vsize, usize - vsize, c);
+ }
+ else /* B >= 0: U <-- U - B * V. */
+ {
+ mp_limb_t b = mpn_submul_1 (up, vp, vsize, bp[0]);
+ mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
+ }
+
+ up += 2, usize -= 2; /* At least two low limbs are zero. */
+ }
+
+ /* Must remove low zero limbs before complementing. */
+ while (usize != 0 && up[0] == 0)
+ up++, usize--;
+ }
+ while (usize);
+
+ /* Compute GCD (ORIG_V, GCD (ORIG_U, V)). Binary will execute twice. */
+ up = orig_up, usize = orig_usize;
+ binary_gcd_ctr = 2;
+ }
+ else
+ binary_gcd_ctr = 1;
+
+ /* Finish up with the binary algorithm. Executes once or twice. */
+ for ( ; binary_gcd_ctr--; up = orig_vp, usize = orig_vsize)
+ {
+ if (usize > 2) /* First make U close to V in size. */
+ {
+ unsigned long int vbitsize, d;
+ count_leading_zeros (d, up[usize-1]);
+ d = usize * BITS_PER_MP_LIMB - d;
+ count_leading_zeros (vbitsize, vp[vsize-1]);
+ vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
+ d = d - vbitsize - 1;
+ if (d != -(unsigned long int)1 && d > 2)
+ {
+ mpn_bdivmod (up, up, usize, vp, vsize, d); /* Result > 0. */
+ d /= (unsigned long int)BITS_PER_MP_LIMB, up += d, usize -= d;
+ }
+ }
+
+ /* Start binary GCD. */
+ do
+ {
+ mp_size_t zeros;
+
+ /* Make sure U is odd. */
+ MPN_NORMALIZE (up, usize);
+ while (up[0] == 0)
+ up += 1, usize -= 1;
+ if ((up[0] & 1) == 0)
+ {
+ unsigned int r;
+ count_trailing_zeros (r, up[0]);
+ mpn_rshift (up, up, usize, r);
+ usize -= (up[usize-1] == 0);
+ }
+
+ /* Keep usize >= vsize. */
+ if (usize < vsize)
+ MPN_PTR_SWAP (up, usize, vp, vsize);
+
+ if (usize <= 2) /* Double precision. */
+ {
+ if (vsize == 1)
+ vp[0] = mpn_gcd_1 (up, usize, vp[0]);
+ else
+ vsize = gcd_2 (vp, up);
+ break; /* Binary GCD done. */
+ }
+
+ /* Count number of low zero limbs of U - V. */
+ for (zeros = 0; up[zeros] == vp[zeros] && ++zeros != vsize; )
+ continue;
+
+ /* If U < V, swap U and V; in any case, subtract V from U. */
+ if (zeros == vsize) /* Subtract done. */
+ up += zeros, usize -= zeros;
+ else if (usize == vsize)
+ {
+ mp_size_t size = vsize;
+ do
+ size--;
+ while (up[size] == vp[size]);
+ if (up[size] < vp[size]) /* usize == vsize. */
+ MP_PTR_SWAP (up, vp);
+ up += zeros, usize = size + 1 - zeros;
+ mpn_sub_n (up, up, vp + zeros, usize);
+ }
+ else
+ {
+ mp_size_t size = vsize - zeros;
+ up += zeros, usize -= zeros;
+ if (mpn_sub_n (up, up, vp + zeros, size))
+ {
+ while (up[size] == 0) /* Propagate borrow. */
+ up[size++] = -(mp_limb_t)1;
+ up[size] -= 1;
+ }
+ }
+ }
+ while (usize); /* End binary GCD. */
+ }
+
+done:
+ if (vp != gp)
+ MPN_COPY (gp, vp, vsize);
+ TMP_FREE (marker);
+ return vsize;
+}
diff --git a/rts/gmp/mpn/generic/gcd_1.c b/rts/gmp/mpn/generic/gcd_1.c
new file mode 100644
index 0000000000..1832636636
--- /dev/null
+++ b/rts/gmp/mpn/generic/gcd_1.c
@@ -0,0 +1,77 @@
+/* mpn_gcd_1 --
+
+Copyright (C) 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Does not work for U == 0 or V == 0. It would be tough to make it work for
+ V == 0 since gcd(x,0) = x, and U does not generally fit in an mp_limb_t. */
+
+mp_limb_t
+#if __STDC__
+mpn_gcd_1 (mp_srcptr up, mp_size_t size, mp_limb_t vlimb)
+#else
+mpn_gcd_1 (up, size, vlimb)
+ mp_srcptr up;
+ mp_size_t size;
+ mp_limb_t vlimb;
+#endif
+{
+ mp_limb_t ulimb;
+ unsigned long int u_low_zero_bits, v_low_zero_bits;
+
+ if (size > 1)
+ {
+ ulimb = mpn_mod_1 (up, size, vlimb);
+ if (ulimb == 0)
+ return vlimb;
+ }
+ else
+ ulimb = up[0];
+
+ /* Need to eliminate low zero bits. */
+ count_trailing_zeros (u_low_zero_bits, ulimb);
+ ulimb >>= u_low_zero_bits;
+
+ count_trailing_zeros (v_low_zero_bits, vlimb);
+ vlimb >>= v_low_zero_bits;
+
+ while (ulimb != vlimb)
+ {
+ if (ulimb > vlimb)
+ {
+ ulimb -= vlimb;
+ do
+ ulimb >>= 1;
+ while ((ulimb & 1) == 0);
+ }
+ else /* vlimb > ulimb. */
+ {
+ vlimb -= ulimb;
+ do
+ vlimb >>= 1;
+ while ((vlimb & 1) == 0);
+ }
+ }
+
+ return ulimb << MIN (u_low_zero_bits, v_low_zero_bits);
+}
diff --git a/rts/gmp/mpn/generic/gcdext.c b/rts/gmp/mpn/generic/gcdext.c
new file mode 100644
index 0000000000..fe22d779a6
--- /dev/null
+++ b/rts/gmp/mpn/generic/gcdext.c
@@ -0,0 +1,700 @@
+/* mpn_gcdext -- Extended Greatest Common Divisor.
+
+Copyright (C) 1996, 1998, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 17
+#endif
+
+#ifndef EXTEND
+#define EXTEND 1
+#endif
+
+#if STAT
+int arr[BITS_PER_MP_LIMB];
+#endif
+
+
+/* mpn_gcdext (GP, SP, SSIZE, UP, USIZE, VP, VSIZE)
+
+ Compute the extended GCD of {UP,USIZE} and {VP,VSIZE} and store the
+ greatest common divisor at GP (unless it is 0), and the first cofactor at
+ SP. Write the size of the cofactor through the pointer SSIZE. Return the
+ size of the value at GP. Note that SP might be a negative number; this is
+ denoted by storing the negative of the size through SSIZE.
+
+ {UP,USIZE} and {VP,VSIZE} are both clobbered.
+
+ The space allocation for all four areas needs to be USIZE+1.
+
+ Preconditions: 1) U >= V.
+ 2) V > 0. */
+
+/* We use Lehmer's algorithm. The idea is to extract the most significant
+ bits of the operands, and compute the continued fraction for them. We then
+ apply the gathered cofactors to the full operands.
+
+ Idea 1: After we have performed a full division, don't shift operands back,
+ but instead account for the extra factors-of-2 thus introduced.
+ Idea 2: Simple generalization to use divide-and-conquer would give us an
+ algorithm that runs faster than O(n^2).
+ Idea 3: The input numbers need less space as the computation progresses,
+ while the s0 and s1 variables need more space. To save memory, we
+ could make them share space, and have the latter variables grow
+ into the former.
+ Idea 4: We should not do double-limb arithmetic from the start. Instead,
+ do things in single-limb arithmetic until the quotients differ,
+ and then switch to double-limb arithmetic. */
+
+
+/* Division optimized for small quotients. If the quotient is more than one limb,
+ store 1 in *qh and return 0. */
+static mp_limb_t
+#if __STDC__
+div2 (mp_limb_t *qh, mp_limb_t n1, mp_limb_t n0, mp_limb_t d1, mp_limb_t d0)
+#else
+div2 (qh, n1, n0, d1, d0)
+ mp_limb_t *qh;
+ mp_limb_t n1;
+ mp_limb_t n0;
+ mp_limb_t d1;
+ mp_limb_t d0;
+#endif
+{
+ if (d1 == 0)
+ {
+ *qh = 1;
+ return 0;
+ }
+
+ if ((mp_limb_signed_t) n1 < 0)
+ {
+ mp_limb_t q;
+ int cnt;
+ for (cnt = 1; (mp_limb_signed_t) d1 >= 0; cnt++)
+ {
+ d1 = (d1 << 1) | (d0 >> (BITS_PER_MP_LIMB - 1));
+ d0 = d0 << 1;
+ }
+
+ q = 0;
+ while (cnt)
+ {
+ q <<= 1;
+ if (n1 > d1 || (n1 == d1 && n0 >= d0))
+ {
+ sub_ddmmss (n1, n0, n1, n0, d1, d0);
+ q |= 1;
+ }
+ d0 = (d1 << (BITS_PER_MP_LIMB - 1)) | (d0 >> 1);
+ d1 = d1 >> 1;
+ cnt--;
+ }
+
+ *qh = 0;
+ return q;
+ }
+ else
+ {
+ mp_limb_t q;
+ int cnt;
+ for (cnt = 0; n1 > d1 || (n1 == d1 && n0 >= d0); cnt++)
+ {
+ d1 = (d1 << 1) | (d0 >> (BITS_PER_MP_LIMB - 1));
+ d0 = d0 << 1;
+ }
+
+ q = 0;
+ while (cnt)
+ {
+ d0 = (d1 << (BITS_PER_MP_LIMB - 1)) | (d0 >> 1);
+ d1 = d1 >> 1;
+ q <<= 1;
+ if (n1 > d1 || (n1 == d1 && n0 >= d0))
+ {
+ sub_ddmmss (n1, n0, n1, n0, d1, d0);
+ q |= 1;
+ }
+ cnt--;
+ }
+
+ *qh = 0;
+ return q;
+ }
+}
+
+mp_size_t
+#if EXTEND
+#if __STDC__
+mpn_gcdext (mp_ptr gp, mp_ptr s0p, mp_size_t *s0size,
+ mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
+#else
+mpn_gcdext (gp, s0p, s0size, up, size, vp, vsize)
+ mp_ptr gp;
+ mp_ptr s0p;
+ mp_size_t *s0size;
+ mp_ptr up;
+ mp_size_t size;
+ mp_ptr vp;
+ mp_size_t vsize;
+#endif
+#else
+#if __STDC__
+mpn_gcd (mp_ptr gp,
+ mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
+#else
+mpn_gcd (gp, up, size, vp, vsize)
+ mp_ptr gp;
+ mp_ptr up;
+ mp_size_t size;
+ mp_ptr vp;
+ mp_size_t vsize;
+#endif
+#endif
+{
+ mp_limb_t A, B, C, D;
+ int cnt;
+ mp_ptr tp, wp;
+#if RECORD
+ mp_limb_t max = 0;
+#endif
+#if EXTEND
+ mp_ptr s1p;
+ mp_ptr orig_s0p = s0p;
+ mp_size_t ssize;
+ int sign = 1;
+#endif
+ int use_double_flag;
+ TMP_DECL (mark);
+
+ TMP_MARK (mark);
+
+ use_double_flag = (size >= GCDEXT_THRESHOLD);
+
+ tp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
+ wp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
+#if EXTEND
+ s1p = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
+
+ MPN_ZERO (s0p, size);
+ MPN_ZERO (s1p, size);
+
+ s0p[0] = 1;
+ s1p[0] = 0;
+ ssize = 1;
+#endif
+
+ if (size > vsize)
+ {
+ /* Normalize V (and shift up U the same amount). */
+ count_leading_zeros (cnt, vp[vsize - 1]);
+ if (cnt != 0)
+ {
+ mp_limb_t cy;
+ mpn_lshift (vp, vp, vsize, cnt);
+ cy = mpn_lshift (up, up, size, cnt);
+ up[size] = cy;
+ size += cy != 0;
+ }
+
+ mpn_divmod (up + vsize, up, size, vp, vsize);
+#if EXTEND
+ /* This is really what it boils down to in this case... */
+ s0p[0] = 0;
+ s1p[0] = 1;
+ sign = -sign;
+#endif
+ size = vsize;
+ if (cnt != 0)
+ {
+ mpn_rshift (up, up, size, cnt);
+ mpn_rshift (vp, vp, size, cnt);
+ }
+ MP_PTR_SWAP (up, vp);
+ }
+
+ for (;;)
+ {
+ mp_limb_t asign;
+ /* Figure out exact size of V. */
+ vsize = size;
+ MPN_NORMALIZE (vp, vsize);
+ if (vsize <= 1)
+ break;
+
+ if (use_double_flag)
+ {
+ mp_limb_t uh, vh, ul, vl;
+ /* Let UH,UL be the most significant limbs of U, and let VH,VL be
+ the corresponding bits from V. */
+ uh = up[size - 1];
+ vh = vp[size - 1];
+ ul = up[size - 2];
+ vl = vp[size - 2];
+ count_leading_zeros (cnt, uh);
+ if (cnt != 0)
+ {
+ uh = (uh << cnt) | (ul >> (BITS_PER_MP_LIMB - cnt));
+ vh = (vh << cnt) | (vl >> (BITS_PER_MP_LIMB - cnt));
+ vl <<= cnt;
+ ul <<= cnt;
+ if (size >= 3)
+ {
+ ul |= (up[size - 3] >> (BITS_PER_MP_LIMB - cnt));
+ vl |= (vp[size - 3] >> (BITS_PER_MP_LIMB - cnt));
+ }
+ }
+
+ A = 1;
+ B = 0;
+ C = 0;
+ D = 1;
+
+ asign = 0;
+ for (;;)
+ {
+ mp_limb_t T;
+ mp_limb_t qh, q1, q2;
+ mp_limb_t nh, nl, dh, dl;
+ mp_limb_t t1, t0;
+ mp_limb_t Th, Tl;
+
+ sub_ddmmss (dh, dl, vh, vl, 0, C);
+ if ((dl | dh) == 0)
+ break;
+ add_ssaaaa (nh, nl, uh, ul, 0, A);
+ q1 = div2 (&qh, nh, nl, dh, dl);
+ if (qh != 0)
+ break; /* could handle this */
+
+ add_ssaaaa (dh, dl, vh, vl, 0, D);
+ if ((dl | dh) == 0)
+ break;
+ sub_ddmmss (nh, nl, uh, ul, 0, B);
+ q2 = div2 (&qh, nh, nl, dh, dl);
+ if (qh != 0)
+ break; /* could handle this */
+
+ if (q1 != q2)
+ break;
+
+ asign = ~asign;
+
+ T = A + q1 * C;
+ A = C;
+ C = T;
+ T = B + q1 * D;
+ B = D;
+ D = T;
+ umul_ppmm (t1, t0, q1, vl);
+ t1 += q1 * vh;
+ sub_ddmmss (Th, Tl, uh, ul, t1, t0);
+ uh = vh, ul = vl;
+ vh = Th, vl = Tl;
+
+ add_ssaaaa (dh, dl, vh, vl, 0, C);
+ sub_ddmmss (nh, nl, uh, ul, 0, A);
+ q1 = div2 (&qh, nh, nl, dh, dl);
+ if (qh != 0)
+ break; /* could handle this */
+
+ sub_ddmmss (dh, dl, vh, vl, 0, D);
+ if ((dl | dh) == 0)
+ break;
+ add_ssaaaa (nh, nl, uh, ul, 0, B);
+ q2 = div2 (&qh, nh, nl, dh, dl);
+ if (qh != 0)
+ break; /* could handle this */
+
+ if (q1 != q2)
+ break;
+
+ asign = ~asign;
+
+ T = A + q1 * C;
+ A = C;
+ C = T;
+ T = B + q1 * D;
+ B = D;
+ D = T;
+ umul_ppmm (t1, t0, q1, vl);
+ t1 += q1 * vh;
+ sub_ddmmss (Th, Tl, uh, ul, t1, t0);
+ uh = vh, ul = vl;
+ vh = Th, vl = Tl;
+ }
+#if EXTEND
+ if (asign)
+ sign = -sign;
+#endif
+ }
+ else /* Same, but using single-limb calculations. */
+ {
+ mp_limb_t uh, vh;
+ /* Make UH be the most significant limb of U, and make VH be
+ corresponding bits from V. */
+ uh = up[size - 1];
+ vh = vp[size - 1];
+ count_leading_zeros (cnt, uh);
+ if (cnt != 0)
+ {
+ uh = (uh << cnt) | (up[size - 2] >> (BITS_PER_MP_LIMB - cnt));
+ vh = (vh << cnt) | (vp[size - 2] >> (BITS_PER_MP_LIMB - cnt));
+ }
+
+ A = 1;
+ B = 0;
+ C = 0;
+ D = 1;
+
+ asign = 0;
+ for (;;)
+ {
+ mp_limb_t q, T;
+ if (vh - C == 0 || vh + D == 0)
+ break;
+
+ q = (uh + A) / (vh - C);
+ if (q != (uh - B) / (vh + D))
+ break;
+
+ asign = ~asign;
+
+ T = A + q * C;
+ A = C;
+ C = T;
+ T = B + q * D;
+ B = D;
+ D = T;
+ T = uh - q * vh;
+ uh = vh;
+ vh = T;
+
+ if (vh - D == 0)
+ break;
+
+ q = (uh - A) / (vh + C);
+ if (q != (uh + B) / (vh - D))
+ break;
+
+ asign = ~asign;
+
+ T = A + q * C;
+ A = C;
+ C = T;
+ T = B + q * D;
+ B = D;
+ D = T;
+ T = uh - q * vh;
+ uh = vh;
+ vh = T;
+ }
+#if EXTEND
+ if (asign)
+ sign = -sign;
+#endif
+ }
+
+#if RECORD
+ max = MAX (A, max); max = MAX (B, max);
+ max = MAX (C, max); max = MAX (D, max);
+#endif
+
+ if (B == 0)
+ {
+ mp_limb_t qh;
+ mp_size_t i;
+ /* This is quite rare. I.e., optimize something else! */
+
+ /* Normalize V (and shift up U the same amount). */
+ count_leading_zeros (cnt, vp[vsize - 1]);
+ if (cnt != 0)
+ {
+ mp_limb_t cy;
+ mpn_lshift (vp, vp, vsize, cnt);
+ cy = mpn_lshift (up, up, size, cnt);
+ up[size] = cy;
+ size += cy != 0;
+ }
+
+ qh = mpn_divmod (up + vsize, up, size, vp, vsize);
+#if EXTEND
+ MPN_COPY (tp, s0p, ssize);
+ {
+ mp_size_t qsize;
+
+ qsize = size - vsize; /* size of stored quotient from division */
+ if (ssize < qsize)
+ {
+ MPN_ZERO (tp + ssize, qsize - ssize);
+ MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
+ for (i = 0; i < ssize; i++)
+ {
+ mp_limb_t cy;
+ cy = mpn_addmul_1 (tp + i, up + vsize, qsize, s1p[i]);
+ tp[qsize + i] = cy;
+ }
+ if (qh != 0)
+ {
+ mp_limb_t cy;
+ cy = mpn_add_n (tp + qsize, tp + qsize, s1p, ssize);
+ if (cy != 0)
+ abort ();
+ }
+ }
+ else
+ {
+ MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
+ for (i = 0; i < qsize; i++)
+ {
+ mp_limb_t cy;
+ cy = mpn_addmul_1 (tp + i, s1p, ssize, up[vsize + i]);
+ tp[ssize + i] = cy;
+ }
+ if (qh != 0)
+ {
+ mp_limb_t cy;
+ cy = mpn_add_n (tp + qsize, tp + qsize, s1p, ssize);
+ if (cy != 0)
+ {
+ tp[qsize + ssize] = cy;
+ s1p[qsize + ssize] = 0;
+ ssize++;
+ }
+ }
+ }
+ ssize += qsize;
+ ssize -= tp[ssize - 1] == 0;
+ }
+
+ sign = -sign;
+ MP_PTR_SWAP (s0p, s1p);
+ MP_PTR_SWAP (s1p, tp);
+#endif
+ size = vsize;
+ if (cnt != 0)
+ {
+ mpn_rshift (up, up, size, cnt);
+ mpn_rshift (vp, vp, size, cnt);
+ }
+ MP_PTR_SWAP (up, vp);
+ }
+ else
+ {
+#if EXTEND
+ mp_size_t tsize, wsize;
+#endif
+ /* T = U*A + V*B
+ W = U*C + V*D
+ U = T
+ V = W */
+
+#if STAT
+ { mp_limb_t x; x = A | B | C | D; count_leading_zeros (cnt, x);
+ arr[BITS_PER_MP_LIMB - cnt]++; }
+#endif
+ if (A == 0)
+ {
+ /* B == 1 and C == 1 (D is arbitrary) */
+ mp_limb_t cy;
+ MPN_COPY (tp, vp, size);
+ MPN_COPY (wp, up, size);
+ mpn_submul_1 (wp, vp, size, D);
+ MP_PTR_SWAP (tp, up);
+ MP_PTR_SWAP (wp, vp);
+#if EXTEND
+ MPN_COPY (tp, s1p, ssize);
+ tsize = ssize;
+ tp[ssize] = 0; /* must zero since wp might spill below */
+ MPN_COPY (wp, s0p, ssize);
+ cy = mpn_addmul_1 (wp, s1p, ssize, D);
+ wp[ssize] = cy;
+ wsize = ssize + (cy != 0);
+ MP_PTR_SWAP (tp, s0p);
+ MP_PTR_SWAP (wp, s1p);
+ ssize = MAX (wsize, tsize);
+#endif
+ }
+ else
+ {
+ if (asign)
+ {
+ mp_limb_t cy;
+ mpn_mul_1 (tp, vp, size, B);
+ mpn_submul_1 (tp, up, size, A);
+ mpn_mul_1 (wp, up, size, C);
+ mpn_submul_1 (wp, vp, size, D);
+ MP_PTR_SWAP (tp, up);
+ MP_PTR_SWAP (wp, vp);
+#if EXTEND
+ cy = mpn_mul_1 (tp, s1p, ssize, B);
+ cy += mpn_addmul_1 (tp, s0p, ssize, A);
+ tp[ssize] = cy;
+ tsize = ssize + (cy != 0);
+ cy = mpn_mul_1 (wp, s0p, ssize, C);
+ cy += mpn_addmul_1 (wp, s1p, ssize, D);
+ wp[ssize] = cy;
+ wsize = ssize + (cy != 0);
+ MP_PTR_SWAP (tp, s0p);
+ MP_PTR_SWAP (wp, s1p);
+ ssize = MAX (wsize, tsize);
+#endif
+ }
+ else
+ {
+ mp_limb_t cy;
+ mpn_mul_1 (tp, up, size, A);
+ mpn_submul_1 (tp, vp, size, B);
+ mpn_mul_1 (wp, vp, size, D);
+ mpn_submul_1 (wp, up, size, C);
+ MP_PTR_SWAP (tp, up);
+ MP_PTR_SWAP (wp, vp);
+#if EXTEND
+ cy = mpn_mul_1 (tp, s0p, ssize, A);
+ cy += mpn_addmul_1 (tp, s1p, ssize, B);
+ tp[ssize] = cy;
+ tsize = ssize + (cy != 0);
+ cy = mpn_mul_1 (wp, s1p, ssize, D);
+ cy += mpn_addmul_1 (wp, s0p, ssize, C);
+ wp[ssize] = cy;
+ wsize = ssize + (cy != 0);
+ MP_PTR_SWAP (tp, s0p);
+ MP_PTR_SWAP (wp, s1p);
+ ssize = MAX (wsize, tsize);
+#endif
+ }
+ }
+
+ size -= up[size - 1] == 0;
+ }
+ }
+
+#if RECORD
+ printf ("max: %lx\n", max);
+#endif
+
+#if STAT
+ {int i; for (i = 0; i < BITS_PER_MP_LIMB; i++) printf ("%d:%d\n", i, arr[i]);}
+#endif
+
+ if (vsize == 0)
+ {
+ if (gp != up && gp != 0)
+ MPN_COPY (gp, up, size);
+#if EXTEND
+ MPN_NORMALIZE (s0p, ssize);
+ if (orig_s0p != s0p)
+ MPN_COPY (orig_s0p, s0p, ssize);
+ *s0size = sign >= 0 ? ssize : -ssize;
+#endif
+ TMP_FREE (mark);
+ return size;
+ }
+ else
+ {
+ mp_limb_t vl, ul, t;
+#if EXTEND
+ mp_size_t qsize, i;
+#endif
+ vl = vp[0];
+#if EXTEND
+ t = mpn_divmod_1 (wp, up, size, vl);
+
+ MPN_COPY (tp, s0p, ssize);
+
+ qsize = size - (wp[size - 1] == 0); /* size of quotient from division */
+ if (ssize < qsize)
+ {
+ MPN_ZERO (tp + ssize, qsize - ssize);
+ MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
+ for (i = 0; i < ssize; i++)
+ {
+ mp_limb_t cy;
+ cy = mpn_addmul_1 (tp + i, wp, qsize, s1p[i]);
+ tp[qsize + i] = cy;
+ }
+ }
+ else
+ {
+ MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
+ for (i = 0; i < qsize; i++)
+ {
+ mp_limb_t cy;
+ cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]);
+ tp[ssize + i] = cy;
+ }
+ }
+ ssize += qsize;
+ ssize -= tp[ssize - 1] == 0;
+
+ sign = -sign;
+ MP_PTR_SWAP (s0p, s1p);
+ MP_PTR_SWAP (s1p, tp);
+#else
+ t = mpn_mod_1 (up, size, vl);
+#endif
+ ul = vl;
+ vl = t;
+ while (vl != 0)
+ {
+ mp_limb_t t;
+#if EXTEND
+ mp_limb_t q;
+ q = ul / vl;
+ t = ul - q * vl;
+
+ MPN_COPY (tp, s0p, ssize);
+
+ MPN_ZERO (s1p + ssize, 1); /* zero s1 too */
+
+ {
+ mp_limb_t cy;
+ cy = mpn_addmul_1 (tp, s1p, ssize, q);
+ tp[ssize] = cy;
+ }
+
+ ssize += 1;
+ ssize -= tp[ssize - 1] == 0;
+
+ sign = -sign;
+ MP_PTR_SWAP (s0p, s1p);
+ MP_PTR_SWAP (s1p, tp);
+#else
+ t = ul % vl;
+#endif
+ ul = vl;
+ vl = t;
+ }
+ if (gp != 0)
+ gp[0] = ul;
+#if EXTEND
+ MPN_NORMALIZE (s0p, ssize);
+ if (orig_s0p != s0p)
+ MPN_COPY (orig_s0p, s0p, ssize);
+ *s0size = sign >= 0 ? ssize : -ssize;
+#endif
+ TMP_FREE (mark);
+ return 1;
+ }
+}
diff --git a/rts/gmp/mpn/generic/get_str.c b/rts/gmp/mpn/generic/get_str.c
new file mode 100644
index 0000000000..a713b61825
--- /dev/null
+++ b/rts/gmp/mpn/generic/get_str.c
@@ -0,0 +1,216 @@
+/* mpn_get_str -- Convert a MSIZE long limb vector pointed to by MPTR
+ to a printable string in STR in base BASE.
+
+Copyright (C) 1991, 1992, 1993, 1994, 1996, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Convert the limb vector pointed to by MPTR and MSIZE long to a
+ char array, using base BASE for the result array. Store the
+ result in the character array STR. STR must point to an array with
+ space for the largest possible number represented by a MSIZE long
+ limb vector + 1 extra character.
+
+ The result is NOT in Ascii, to convert it to printable format, add
+ '0' or 'A' depending on the base and range.
+
+ Return the number of digits in the result string.
+ This may include some leading zeros.
+
+ The limb vector pointed to by MPTR is clobbered. */
+
+size_t
+#if __STDC__
+mpn_get_str (unsigned char *str, int base, mp_ptr mptr, mp_size_t msize)
+#else
+mpn_get_str (str, base, mptr, msize)
+ unsigned char *str;
+ int base;
+ mp_ptr mptr;
+ mp_size_t msize;
+#endif
+{
+ mp_limb_t big_base;
+#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME
+ int normalization_steps;
+#endif
+#if UDIV_TIME > 2 * UMUL_TIME
+ mp_limb_t big_base_inverted;
+#endif
+ unsigned int dig_per_u;
+ mp_size_t out_len;
+ register unsigned char *s;
+
+ big_base = __mp_bases[base].big_base;
+
+ s = str;
+
+ /* Special case zero, as the code below doesn't handle it. */
+ if (msize == 0)
+ {
+ s[0] = 0;
+ return 1;
+ }
+
+ if ((base & (base - 1)) == 0)
+ {
+ /* The base is a power of 2. Make conversion from most
+ significant side. */
+ mp_limb_t n1, n0;
+ register int bits_per_digit = big_base;
+ register int x;
+ register int bit_pos;
+ register int i;
+
+ n1 = mptr[msize - 1];
+ count_leading_zeros (x, n1);
+
+ /* BIT_POS should be R when input ends in least sign. nibble,
+ R + bits_per_digit * n when input ends in n:th least significant
+ nibble. */
+
+ {
+ int bits;
+
+ bits = BITS_PER_MP_LIMB * msize - x;
+ x = bits % bits_per_digit;
+ if (x != 0)
+ bits += bits_per_digit - x;
+ bit_pos = bits - (msize - 1) * BITS_PER_MP_LIMB;
+ }
+
+ /* Fast loop for bit output. */
+ i = msize - 1;
+ for (;;)
+ {
+ bit_pos -= bits_per_digit;
+ while (bit_pos >= 0)
+ {
+ *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1);
+ bit_pos -= bits_per_digit;
+ }
+ i--;
+ if (i < 0)
+ break;
+ n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1);
+ n1 = mptr[i];
+ bit_pos += BITS_PER_MP_LIMB;
+ *s++ = n0 | (n1 >> bit_pos);
+ }
+
+ *s = 0;
+
+ return s - str;
+ }
+ else
+ {
+ /* General case. The base is not a power of 2. Make conversion
+ from least significant end. */
+
+ /* If udiv_qrnnd only handles divisors with the most significant bit
+ set, prepare BIG_BASE for being a divisor by shifting it to the
+ left exactly enough to set the most significant bit. */
+#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME
+ count_leading_zeros (normalization_steps, big_base);
+ big_base <<= normalization_steps;
+#if UDIV_TIME > 2 * UMUL_TIME
+ /* Get the fixed-point approximation to 1/(BIG_BASE << NORMALIZATION_STEPS). */
+ big_base_inverted = __mp_bases[base].big_base_inverted;
+#endif
+#endif
+
+ dig_per_u = __mp_bases[base].chars_per_limb;
+ out_len = ((size_t) msize * BITS_PER_MP_LIMB
+ * __mp_bases[base].chars_per_bit_exactly) + 1;
+ s += out_len;
+
+ while (msize != 0)
+ {
+ int i;
+ mp_limb_t n0, n1;
+
+#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME
+ /* If we shifted BIG_BASE above, shift the dividend too, to get
+ the right quotient. We need to do this every loop,
+ since the intermediate quotients are OK, but the quotient from
+ one turn in the loop is going to be the dividend in the
+ next turn, and the dividend needs to be up-shifted. */
+ if (normalization_steps != 0)
+ {
+ n0 = mpn_lshift (mptr, mptr, msize, normalization_steps);
+
+ /* If the shifting gave a carry out limb, store it and
+ increase the length. */
+ if (n0 != 0)
+ {
+ mptr[msize] = n0;
+ msize++;
+ }
+ }
+#endif
+
+ /* Divide the number at TP with BIG_BASE to get a quotient and a
+ remainder. The remainder is our new digit in base BIG_BASE. */
+ i = msize - 1;
+ n1 = mptr[i];
+
+ if (n1 >= big_base)
+ n1 = 0;
+ else
+ {
+ msize--;
+ i--;
+ }
+
+ for (; i >= 0; i--)
+ {
+ n0 = mptr[i];
+#if UDIV_TIME > 2 * UMUL_TIME
+ udiv_qrnnd_preinv (mptr[i], n1, n1, n0, big_base, big_base_inverted);
+#else
+ udiv_qrnnd (mptr[i], n1, n1, n0, big_base);
+#endif
+ }
+
+#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME
+ /* If we shifted above (at previous UDIV_NEEDS_NORMALIZATION tests)
+ the remainder will be up-shifted here. Compensate. */
+ n1 >>= normalization_steps;
+#endif
+
+ /* Convert N1 from BIG_BASE to a string of digits in BASE
+ using single precision operations. */
+ for (i = dig_per_u - 1; i >= 0; i--)
+ {
+ *--s = n1 % base;
+ n1 /= base;
+ if (n1 == 0 && msize == 0)
+ break;
+ }
+ }
+
+ while (s != str)
+ *--s = 0;
+ return out_len;
+ }
+}
diff --git a/rts/gmp/mpn/generic/gmp-mparam.h b/rts/gmp/mpn/generic/gmp-mparam.h
new file mode 100644
index 0000000000..14bcaece83
--- /dev/null
+++ b/rts/gmp/mpn/generic/gmp-mparam.h
@@ -0,0 +1,27 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
diff --git a/rts/gmp/mpn/generic/hamdist.c b/rts/gmp/mpn/generic/hamdist.c
new file mode 100644
index 0000000000..35c10e8450
--- /dev/null
+++ b/rts/gmp/mpn/generic/hamdist.c
@@ -0,0 +1,94 @@
+/* mpn_hamdist --
+
+Copyright (C) 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if defined __GNUC__
+/* No processor claiming to be SPARC v9 compliant seem to
+ implement the POPC instruction. Disable pattern for now. */
+#if 0 && defined __sparc_v9__ && BITS_PER_MP_LIMB == 64
+#define popc_limb(a) \
+ ({ \
+ DItype __res; \
+ asm ("popc %1,%0" : "=r" (__res) : "rI" (a)); \
+ __res; \
+ })
+#endif
+#endif
+
+#ifndef popc_limb
+
+/* Cool population count of a mp_limb_t.
+ You have to figure out how this works, I won't tell you! */
+
+static inline unsigned int
+#if __STDC__
+popc_limb (mp_limb_t x)
+#else
+popc_limb (x)
+ mp_limb_t x;
+#endif
+{
+#if BITS_PER_MP_LIMB == 64
+ /* We have to go into some trouble to define these constants.
+ (For mp_limb_t being `long long'.) */
+ mp_limb_t cnst;
+ cnst = 0xaaaaaaaaL | ((mp_limb_t) 0xaaaaaaaaL << BITS_PER_MP_LIMB/2);
+ x -= (x & cnst) >> 1;
+ cnst = 0x33333333L | ((mp_limb_t) 0x33333333L << BITS_PER_MP_LIMB/2);
+ x = ((x & ~cnst) >> 2) + (x & cnst);
+ cnst = 0x0f0f0f0fL | ((mp_limb_t) 0x0f0f0f0fL << BITS_PER_MP_LIMB/2);
+ x = ((x >> 4) + x) & cnst;
+ x = ((x >> 8) + x);
+ x = ((x >> 16) + x);
+ x = ((x >> 32) + x) & 0xff;
+#endif
+#if BITS_PER_MP_LIMB == 32
+ x -= (x & 0xaaaaaaaa) >> 1;
+ x = ((x >> 2) & 0x33333333L) + (x & 0x33333333L);
+ x = ((x >> 4) + x) & 0x0f0f0f0fL;
+ x = ((x >> 8) + x);
+ x = ((x >> 16) + x) & 0xff;
+#endif
+ return x;
+}
+#endif
+
+unsigned long int
+#if __STDC__
+mpn_hamdist (mp_srcptr up, mp_srcptr vp, mp_size_t size)
+#else
+mpn_hamdist (up, vp, size)
+ register mp_srcptr up;
+ register mp_srcptr vp;
+ register mp_size_t size;
+#endif
+{
+ unsigned long int hamdist;
+ mp_size_t i;
+
+ hamdist = 0;
+ for (i = 0; i < size; i++)
+ hamdist += popc_limb (up[i] ^ vp[i]);
+
+ return hamdist;
+}
diff --git a/rts/gmp/mpn/generic/inlines.c b/rts/gmp/mpn/generic/inlines.c
new file mode 100644
index 0000000000..9487e58cf2
--- /dev/null
+++ b/rts/gmp/mpn/generic/inlines.c
@@ -0,0 +1,24 @@
+/*
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#define _FORCE_INLINES
+#define _EXTERN_INLINE /* empty */
+#include "gmp.h"
diff --git a/rts/gmp/mpn/generic/jacbase.c b/rts/gmp/mpn/generic/jacbase.c
new file mode 100644
index 0000000000..dd437f1ac1
--- /dev/null
+++ b/rts/gmp/mpn/generic/jacbase.c
@@ -0,0 +1,136 @@
+/* mpn_jacobi_base -- limb/limb Jacobi symbol with restricted arguments.
+
+ THIS INTERFACE IS PRELIMINARY AND MIGHT DISAPPEAR OR BE SUBJECT TO
+ INCOMPATIBLE CHANGES IN A FUTURE RELEASE OF GMP. */
+
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+#if COUNT_TRAILING_ZEROS_TIME <= 7
+/* If count_trailing_zeros is fast, use it.
+ K7 at 7 cycles and P6 at 2 are good here. K6 at 12-27 and P5 at 18-42
+ are not. The default 15 in longlong.h is meant to mean not good here. */
+
+#define PROCESS_TWOS_ANY \
+ { \
+ mp_limb_t twos; \
+ count_trailing_zeros (twos, a); \
+ result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b); \
+ a >>= twos; \
+ }
+
+#define PROCESS_TWOS_EVEN PROCESS_TWOS_ANY
+
+#else
+/* Use a loop instead. With "a" uniformly distributed there will usually be
+ only a few trailing zeros.
+
+ Unfortunately the branch for the while loop here will be on a 50/50
+ chance of a 1 or 0, which is bad for branch prediction. */
+
+#define PROCESS_TWOS_EVEN \
+ { \
+ int two; \
+ two = JACOBI_TWO_U_BIT1 (b); \
+ do \
+ { \
+ a >>= 1; \
+ result_bit1 ^= two; \
+ ASSERT (a != 0); \
+ } \
+ while ((a & 1) == 0); \
+ }
+
+#define PROCESS_TWOS_ANY \
+ if ((a & 1) == 0) \
+ PROCESS_TWOS_EVEN;
+
+#endif
+
+
+/* Calculate the value of the Jacobi symbol (a/b) of two mp_limb_t's, but
+ with a restricted range of inputs accepted, namely b>1, b odd, and a<=b.
+
+ The initial result_bit1 is taken as a parameter for the convenience of
+ mpz_kronecker_zi_ui() et al. The sign changes both here and in those
+ routines accumulate nicely in bit 1, see the JACOBI macros.
+
+ The return value here is the normal +1, 0, or -1. Note that +1 and -1
+ have bit 1 in the "BIT1" sense, which could be useful if the caller is
+ accumulating it into some extended calculation.
+
+ Duplicating the loop body to avoid the MP_LIMB_T_SWAP(a,b) would be
+ possible, but a couple of tests suggest it's not a significant speedup,
+ and may even be a slowdown, so what's here is good enough for now.
+
+ Future: The code doesn't demand a<=b actually, so maybe this could be
+ relaxed. All the places this is used currently call with a<=b though. */
+
+int
+#if __STDC__
+mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
+#else
+mpn_jacobi_base (a, b, result_bit1)
+ mp_limb_t a;
+ mp_limb_t b;
+ int result_bit1;
+#endif
+{
+ ASSERT (b & 1); /* b odd */
+ ASSERT (b != 1);
+ ASSERT (a <= b);
+
+ if (a == 0)
+ return 0;
+
+ PROCESS_TWOS_ANY;
+ if (a == 1)
+ goto done;
+
+ for (;;)
+ {
+ result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b);
+ MP_LIMB_T_SWAP (a, b);
+
+ do
+ {
+ /* working on (a/b), a,b odd, a>=b */
+ ASSERT (a & 1);
+ ASSERT (b & 1);
+ ASSERT (a >= b);
+
+ if ((a -= b) == 0)
+ return 0;
+
+ PROCESS_TWOS_EVEN;
+ if (a == 1)
+ goto done;
+ }
+ while (a >= b);
+ }
+
+ done:
+ return JACOBI_BIT1_TO_PN (result_bit1);
+}
diff --git a/rts/gmp/mpn/generic/lshift.c b/rts/gmp/mpn/generic/lshift.c
new file mode 100644
index 0000000000..0b58389658
--- /dev/null
+++ b/rts/gmp/mpn/generic/lshift.c
@@ -0,0 +1,87 @@
+/* mpn_lshift -- Shift left low level.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left
+ and store the USIZE least significant digits of the result at WP.
+ Return the bits shifted out from the most significant digit.
+
+ Argument constraints:
+ 1. 0 < CNT < BITS_PER_MP_LIMB
+ 2. If the result is to be written over the input, WP must be >= UP.
+*/
+
+mp_limb_t
+#if __STDC__
+mpn_lshift (register mp_ptr wp,
+ register mp_srcptr up, mp_size_t usize,
+ register unsigned int cnt)
+#else
+mpn_lshift (wp, up, usize, cnt)
+ register mp_ptr wp;
+ register mp_srcptr up;
+ mp_size_t usize;
+ register unsigned int cnt;
+#endif
+{
+ register mp_limb_t high_limb, low_limb;
+ register unsigned sh_1, sh_2;
+ register mp_size_t i;
+ mp_limb_t retval;
+
+#ifdef DEBUG
+ if (usize == 0 || cnt == 0)
+ abort ();
+#endif
+
+ sh_1 = cnt;
+#if 0
+ if (sh_1 == 0)
+ {
+ if (wp != up)
+ {
+ /* Copy from high end to low end, to allow specified input/output
+ overlapping. */
+ for (i = usize - 1; i >= 0; i--)
+ wp[i] = up[i];
+ }
+ return 0;
+ }
+#endif
+
+ wp += 1;
+ sh_2 = BITS_PER_MP_LIMB - sh_1;
+ i = usize - 1;
+ low_limb = up[i];
+ retval = low_limb >> sh_2;
+ high_limb = low_limb;
+ while (--i >= 0)
+ {
+ low_limb = up[i];
+ wp[i] = (high_limb << sh_1) | (low_limb >> sh_2);
+ high_limb = low_limb;
+ }
+ wp[i] = high_limb << sh_1;
+
+ return retval;
+}
diff --git a/rts/gmp/mpn/generic/mod_1.c b/rts/gmp/mpn/generic/mod_1.c
new file mode 100644
index 0000000000..168ec9df49
--- /dev/null
+++ b/rts/gmp/mpn/generic/mod_1.c
@@ -0,0 +1,175 @@
+/* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --
+ Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+ Return the single-limb remainder.
+ There are no constraints on the value of the divisor.
+
+Copyright (C) 1991, 1993, 1994, 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 1
+#endif
+
+#ifndef UDIV_TIME
+#define UDIV_TIME UMUL_TIME
+#endif
+
+mp_limb_t
+#if __STDC__
+mpn_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size,
+ mp_limb_t divisor_limb)
+#else
+mpn_mod_1 (dividend_ptr, dividend_size, divisor_limb)
+ mp_srcptr dividend_ptr;
+ mp_size_t dividend_size;
+ mp_limb_t divisor_limb;
+#endif
+{
+ mp_size_t i;
+ mp_limb_t n1, n0, r;
+ int dummy;
+
+ /* Botch: Should this be handled at all? Rely on callers? */
+ if (dividend_size == 0)
+ return 0;
+
+ /* If multiplication is much faster than division, and the
+ dividend is large, pre-invert the divisor, and use
+ only multiplications in the inner loop. */
+
+ /* This test should be read:
+ Does it ever help to use udiv_qrnnd_preinv?
+ && Does what we save compensate for the inversion overhead? */
+ if (UDIV_TIME > (2 * UMUL_TIME + 6)
+ && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME)
+ {
+ int normalization_steps;
+
+ count_leading_zeros (normalization_steps, divisor_limb);
+ if (normalization_steps != 0)
+ {
+ mp_limb_t divisor_limb_inverted;
+
+ divisor_limb <<= normalization_steps;
+ invert_limb (divisor_limb_inverted, divisor_limb);
+
+ n1 = dividend_ptr[dividend_size - 1];
+ r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
+
+ /* Possible optimization:
+ if (r == 0
+ && divisor_limb > ((n1 << normalization_steps)
+ | (dividend_ptr[dividend_size - 2] >> ...)))
+ ...one division less... */
+
+ for (i = dividend_size - 2; i >= 0; i--)
+ {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd_preinv (dummy, r, r,
+ ((n1 << normalization_steps)
+ | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))),
+ divisor_limb, divisor_limb_inverted);
+ n1 = n0;
+ }
+ udiv_qrnnd_preinv (dummy, r, r,
+ n1 << normalization_steps,
+ divisor_limb, divisor_limb_inverted);
+ return r >> normalization_steps;
+ }
+ else
+ {
+ mp_limb_t divisor_limb_inverted;
+
+ invert_limb (divisor_limb_inverted, divisor_limb);
+
+ i = dividend_size - 1;
+ r = dividend_ptr[i];
+
+ if (r >= divisor_limb)
+ r = 0;
+ else
+ i--;
+
+ for (; i >= 0; i--)
+ {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd_preinv (dummy, r, r,
+ n0, divisor_limb, divisor_limb_inverted);
+ }
+ return r;
+ }
+ }
+ else
+ {
+ if (UDIV_NEEDS_NORMALIZATION)
+ {
+ int normalization_steps;
+
+ count_leading_zeros (normalization_steps, divisor_limb);
+ if (normalization_steps != 0)
+ {
+ divisor_limb <<= normalization_steps;
+
+ n1 = dividend_ptr[dividend_size - 1];
+ r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
+
+ /* Possible optimization:
+ if (r == 0
+ && divisor_limb > ((n1 << normalization_steps)
+ | (dividend_ptr[dividend_size - 2] >> ...)))
+ ...one division less... */
+
+ for (i = dividend_size - 2; i >= 0; i--)
+ {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd (dummy, r, r,
+ ((n1 << normalization_steps)
+ | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))),
+ divisor_limb);
+ n1 = n0;
+ }
+ udiv_qrnnd (dummy, r, r,
+ n1 << normalization_steps,
+ divisor_limb);
+ return r >> normalization_steps;
+ }
+ }
+ /* No normalization needed, either because udiv_qrnnd doesn't require
+ it, or because DIVISOR_LIMB is already normalized. */
+
+ i = dividend_size - 1;
+ r = dividend_ptr[i];
+
+ if (r >= divisor_limb)
+ r = 0;
+ else
+ i--;
+
+ for (; i >= 0; i--)
+ {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd (dummy, r, r, n0, divisor_limb);
+ }
+ return r;
+ }
+}
diff --git a/rts/gmp/mpn/generic/mod_1_rs.c b/rts/gmp/mpn/generic/mod_1_rs.c
new file mode 100644
index 0000000000..62aaa94b92
--- /dev/null
+++ b/rts/gmp/mpn/generic/mod_1_rs.c
@@ -0,0 +1,111 @@
+/* mpn_mod_1_rshift -- mpn remainder under hypothetical right shift.
+
+ THE FUNCTION IN THIS FILE IS FOR INTERNAL USE AND HAS A MUTABLE
+ INTERFACE. IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
+ IT'S ALMOST GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
+ RELEASE. */
+
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* When testing on a CPU with UDIV_NEEDS_NORMALIZATION equal to 0, it can be
+ changed to 1 temporarily to test the code under that case too. */
+#if 0
+#undef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 1
+#endif
+
+
+/* Calculate the remainder "(ptr,size >> shift) % divisor". Note ptr,size
+ is unchanged, the shift is only for its effect on the remainder.
+ The shift doesn't even need to be considered until the last limb.
+
+ This function has the normal size!=0 restriction, unlike the basic
+ mpn_mod_1. */
+
+mp_limb_t
+#if __STDC__
+mpn_mod_1_rshift (mp_srcptr ptr, mp_size_t size, unsigned shift,
+ mp_limb_t divisor)
+#else
+mpn_mod_1_rshift (ptr, size, shift, divisor)
+ mp_srcptr ptr;
+ mp_size_t size;
+ unsigned shift;
+ mp_limb_t divisor;
+#endif
+{
+ mp_limb_t quot, rem;
+
+ ASSERT (shift >= 1);
+ ASSERT (shift < BITS_PER_MP_LIMB);
+ ASSERT (size >= 1);
+
+ if (size == 1)
+ return (ptr[0] >> shift) % divisor;
+
+#if UDIV_NEEDS_NORMALIZATION
+ {
+ int norm;
+ int delta;
+
+ count_leading_zeros (norm, divisor);
+ divisor <<= norm;
+
+ delta = shift - norm;
+ if (delta == 0)
+ return mpn_mod_1 (ptr, size, divisor) >> norm;
+
+ if (delta > 0)
+ {
+ rem = mpn_mod_1 (ptr+1, size-1, divisor);
+ udiv_qrnnd (quot, rem,
+ rem >> delta,
+ (rem << (BITS_PER_MP_LIMB-delta)) | (ptr[0] >> delta),
+ divisor);
+ return rem >> norm;
+ }
+ else
+ {
+ rem = mpn_mod_1 (ptr, size, divisor);
+ udiv_qrnnd (quot, rem,
+ rem >> (BITS_PER_MP_LIMB+delta),
+ rem << -delta,
+ divisor);
+ return rem >> norm;
+ }
+ }
+
+#else /* !UDIV_NEEDS_NORMALIZATION */
+
+ rem = mpn_mod_1 (ptr+1, size-1, divisor);
+ udiv_qrnnd (quot, rem,
+ rem >> shift,
+ (rem << (BITS_PER_MP_LIMB-shift)) | (ptr[0] >> shift),
+ divisor);
+ return rem;
+
+#endif
+}
diff --git a/rts/gmp/mpn/generic/mul.c b/rts/gmp/mpn/generic/mul.c
new file mode 100644
index 0000000000..cecfa19ca1
--- /dev/null
+++ b/rts/gmp/mpn/generic/mul.c
@@ -0,0 +1,190 @@
+/* mpn_mul -- Multiply two natural numbers.
+
+ THE HELPER FUNCTIONS IN THIS FILE (meaning everything except mpn_mul)
+ ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS ONLY SAFE TO REACH
+ THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED
+ THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v
+ (pointed to by VP, with VN limbs), and store the result at PRODP. The
+ result is UN + VN limbs. Return the most significant limb of the result.
+
+ NOTE: The space pointed to by PRODP is overwritten before finished with U
+ and V, so overlap is an error.
+
+ Argument constraints:
+ 1. UN >= VN.
+ 2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from
+ the multiplier and the multiplicand. */
+
+void
+#if __STDC__
+mpn_sqr_n (mp_ptr prodp,
+ mp_srcptr up, mp_size_t un)
+#else
+mpn_sqr_n (prodp, up, un)
+ mp_ptr prodp;
+ mp_srcptr up;
+ mp_size_t un;
+#endif
+{
+ if (un < KARATSUBA_SQR_THRESHOLD)
+ { /* plain schoolbook multiplication */
+ if (un == 0)
+ return;
+ mpn_sqr_basecase (prodp, up, un);
+ }
+ else if (un < TOOM3_SQR_THRESHOLD)
+ { /* karatsuba multiplication */
+ mp_ptr tspace;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+ tspace = (mp_ptr) TMP_ALLOC (2 * (un + BITS_PER_MP_LIMB) * BYTES_PER_MP_LIMB);
+ mpn_kara_sqr_n (prodp, up, un, tspace);
+ TMP_FREE (marker);
+ }
+#if WANT_FFT || TUNE_PROGRAM_BUILD
+ else if (un < FFT_SQR_THRESHOLD)
+#else
+ else
+#endif
+ { /* toom3 multiplication */
+ mp_ptr tspace;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+ tspace = (mp_ptr) TMP_ALLOC (2 * (un + BITS_PER_MP_LIMB) * BYTES_PER_MP_LIMB);
+ mpn_toom3_sqr_n (prodp, up, un, tspace);
+ TMP_FREE (marker);
+ }
+#if WANT_FFT || TUNE_PROGRAM_BUILD
+ else
+ {
+ /* schoenhage multiplication */
+ mpn_mul_fft_full (prodp, up, un, up, un);
+ }
+#endif
+}
+
+mp_limb_t
+#if __STDC__
+mpn_mul (mp_ptr prodp,
+ mp_srcptr up, mp_size_t un,
+ mp_srcptr vp, mp_size_t vn)
+#else
+mpn_mul (prodp, up, un, vp, vn)
+ mp_ptr prodp;
+ mp_srcptr up;
+ mp_size_t un;
+ mp_srcptr vp;
+ mp_size_t vn;
+#endif
+{
+ mp_size_t l;
+ mp_limb_t c;
+
+ if (up == vp && un == vn)
+ {
+ mpn_sqr_n (prodp, up, un);
+ return prodp[2 * un - 1];
+ }
+
+ if (vn < KARATSUBA_MUL_THRESHOLD)
+ { /* long multiplication */
+ mpn_mul_basecase (prodp, up, un, vp, vn);
+ return prodp[un + vn - 1];
+ }
+
+ mpn_mul_n (prodp, up, vp, vn);
+ if (un != vn)
+ { mp_limb_t t;
+ mp_ptr ws;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+
+ prodp += vn;
+ l = vn;
+ up += vn;
+ un -= vn;
+
+ if (un < vn)
+ {
+ /* Swap u's and v's. */
+ MPN_SRCPTR_SWAP (up,un, vp,vn);
+ }
+
+ ws = (mp_ptr) TMP_ALLOC (((vn >= KARATSUBA_MUL_THRESHOLD ? vn : un) + vn)
+ * BYTES_PER_MP_LIMB);
+
+ t = 0;
+ while (vn >= KARATSUBA_MUL_THRESHOLD)
+ {
+ mpn_mul_n (ws, up, vp, vn);
+ if (l <= 2*vn)
+ {
+ t += mpn_add_n (prodp, prodp, ws, l);
+ if (l != 2*vn)
+ {
+ t = mpn_add_1 (prodp + l, ws + l, 2*vn - l, t);
+ l = 2*vn;
+ }
+ }
+ else
+ {
+ c = mpn_add_n (prodp, prodp, ws, 2*vn);
+ t += mpn_add_1 (prodp + 2*vn, prodp + 2*vn, l - 2*vn, c);
+ }
+ prodp += vn;
+ l -= vn;
+ up += vn;
+ un -= vn;
+ if (un < vn)
+ {
+ /* Swap u's and v's. */
+ MPN_SRCPTR_SWAP (up,un, vp,vn);
+ }
+ }
+
+ if (vn)
+ {
+ mpn_mul_basecase (ws, up, un, vp, vn);
+ if (l <= un + vn)
+ {
+ t += mpn_add_n (prodp, prodp, ws, l);
+ if (l != un + vn)
+ t = mpn_add_1 (prodp + l, ws + l, un + vn - l, t);
+ }
+ else
+ {
+ c = mpn_add_n (prodp, prodp, ws, un + vn);
+ t += mpn_add_1 (prodp + un + vn, prodp + un + vn, l - un - vn, c);
+ }
+ }
+
+ TMP_FREE (marker);
+ }
+ return prodp[un + vn - 1];
+}
diff --git a/rts/gmp/mpn/generic/mul_1.c b/rts/gmp/mpn/generic/mul_1.c
new file mode 100644
index 0000000000..1c36b5fb1f
--- /dev/null
+++ b/rts/gmp/mpn/generic/mul_1.c
@@ -0,0 +1,59 @@
+/* mpn_mul_1 -- Multiply a limb vector with a single limb and
+ store the product in a second limb vector.
+
+Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_mul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ mp_size_t s1_size;
+ register mp_limb_t s2_limb;
+{
+ register mp_limb_t cy_limb;
+ register mp_size_t j;
+ register mp_limb_t prod_high, prod_low;
+
+ /* The loop counter and index J goes from -S1_SIZE to -1. This way
+ the loop becomes faster. */
+ j = -s1_size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ s1_ptr -= j;
+ res_ptr -= j;
+
+ cy_limb = 0;
+ do
+ {
+ umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
+
+ prod_low += cy_limb;
+ cy_limb = (prod_low < cy_limb) + prod_high;
+
+ res_ptr[j] = prod_low;
+ }
+ while (++j != 0);
+
+ return cy_limb;
+}
diff --git a/rts/gmp/mpn/generic/mul_basecase.c b/rts/gmp/mpn/generic/mul_basecase.c
new file mode 100644
index 0000000000..00c06aa5c4
--- /dev/null
+++ b/rts/gmp/mpn/generic/mul_basecase.c
@@ -0,0 +1,87 @@
+/* mpn_mul_basecase -- Internal routine to multiply two natural numbers
+ of length m and n.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+
+Copyright (C) 1991, 1992, 1993, 1994, 1996, 1997, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Handle simple cases with traditional multiplication.
+
+ This is the most critical code of multiplication. All multiplies rely on
+ this, both small and huge. Small ones arrive here immediately, huge ones
+ arrive here as this is the base case for Karatsuba's recursive algorithm. */
+
+void
+#if __STDC__
+mpn_mul_basecase (mp_ptr prodp,
+ mp_srcptr up, mp_size_t usize,
+ mp_srcptr vp, mp_size_t vsize)
+#else
+mpn_mul_basecase (prodp, up, usize, vp, vsize)
+ mp_ptr prodp;
+ mp_srcptr up;
+ mp_size_t usize;
+ mp_srcptr vp;
+ mp_size_t vsize;
+#endif
+{
+ /* We first multiply by the low order one or two limbs, as the result can
+ be stored, not added, to PROD. We also avoid a loop for zeroing this
+ way. */
+#if HAVE_NATIVE_mpn_mul_2
+ if (vsize >= 2)
+ {
+ prodp[usize + 1] = mpn_mul_2 (prodp, up, usize, vp[0], vp[1]);
+ prodp += 2, vp += 2, vsize -= 2;
+ }
+ else
+ {
+ prodp[usize] = mpn_mul_1 (prodp, up, usize, vp[0]);
+ return;
+ }
+#else
+ prodp[usize] = mpn_mul_1 (prodp, up, usize, vp[0]);
+ prodp += 1, vp += 1, vsize -= 1;
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_2
+ while (vsize >= 2)
+ {
+ prodp[usize + 1] = mpn_addmul_2 (prodp, up, usize, vp[0], vp[1]);
+ prodp += 2, vp += 2, vsize -= 2;
+ }
+ if (vsize != 0)
+ prodp[usize] = mpn_addmul_1 (prodp, up, usize, vp[0]);
+#else
+ /* For each iteration in the loop, multiply U with one limb from V, and
+ add the result to PROD. */
+ while (vsize != 0)
+ {
+ prodp[usize] = mpn_addmul_1 (prodp, up, usize, vp[0]);
+ prodp += 1, vp += 1, vsize -= 1;
+ }
+#endif
+}
diff --git a/rts/gmp/mpn/generic/mul_fft.c b/rts/gmp/mpn/generic/mul_fft.c
new file mode 100644
index 0000000000..00fd6d72de
--- /dev/null
+++ b/rts/gmp/mpn/generic/mul_fft.c
@@ -0,0 +1,772 @@
+/* An implementation in GMP of Scho"nhage's fast multiplication algorithm
+ modulo 2^N+1, by Paul Zimmermann, INRIA Lorraine, February 1998.
+
+ THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND THE FUNCTIONS HAVE
+ MUTABLE INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED
+ INTERFACES. IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN
+ A FUTURE GNU MP RELEASE.
+
+Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+
+/* References:
+
+ Schnelle Multiplikation grosser Zahlen, by Arnold Scho"nhage and Volker
+ Strassen, Computing 7, p. 281-292, 1971.
+
+ Asymptotically fast algorithms for the numerical multiplication
+ and division of polynomials with complex coefficients, by Arnold Scho"nhage,
+ Computer Algebra, EUROCAM'82, LNCS 144, p. 3-15, 1982.
+
+ Tapes versus Pointers, a study in implementing fast algorithms,
+ by Arnold Scho"nhage, Bulletin of the EATCS, 30, p. 23-32, 1986.
+
+ See also http://www.loria.fr/~zimmerma/bignum
+
+
+ Future:
+
+ K==2 isn't needed in the current uses of this code and the bits specific
+ for that could be dropped.
+
+ It might be possible to avoid a small number of MPN_COPYs by using a
+ rotating temporary or two.
+
+ Multiplications of unequal sized operands can be done with this code, but
+ it needs a tighter test for identifying squaring (same sizes as well as
+ same pointers). */
+
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Change this to "#define TRACE(x) x" for some traces. */
+#define TRACE(x)
+
+
+
+FFT_TABLE_ATTRS mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE] = {
+ FFT_MUL_TABLE,
+ FFT_SQR_TABLE
+};
+
+
+static void mpn_mul_fft_internal
+_PROTO ((mp_limb_t *op, mp_srcptr n, mp_srcptr m, mp_size_t pl,
+ int k, int K,
+ mp_limb_t **Ap, mp_limb_t **Bp,
+ mp_limb_t *A, mp_limb_t *B,
+ mp_size_t nprime, mp_size_t l, mp_size_t Mp, int **_fft_l,
+ mp_limb_t *T, int rec));
+
+
+/* Find the best k to use for a mod 2^(n*BITS_PER_MP_LIMB)+1 FFT.
+ sqr==0 if for a multiply, sqr==1 for a square */
+int
+#if __STDC__
+mpn_fft_best_k (mp_size_t n, int sqr)
+#else
+mpn_fft_best_k (n, sqr)
+ mp_size_t n;
+ int sqr;
+#endif
+{
+ mp_size_t t;
+ int i;
+
+ for (i = 0; mpn_fft_table[sqr][i] != 0; i++)
+ if (n < mpn_fft_table[sqr][i])
+ return i + FFT_FIRST_K;
+
+ /* treat 4*last as one further entry */
+ if (i == 0 || n < 4*mpn_fft_table[sqr][i-1])
+ return i + FFT_FIRST_K;
+ else
+ return i + FFT_FIRST_K + 1;
+}
+
+
+/* Returns smallest possible number of limbs >= pl for a fft of size 2^k.
+ FIXME: Is this simply pl rounded up to the next multiple of 2^k ? */
+
+mp_size_t
+#if __STDC__
+mpn_fft_next_size (mp_size_t pl, int k)
+#else
+mpn_fft_next_size (pl, k)
+ mp_size_t pl;
+ int k;
+#endif
+{
+ mp_size_t N, M;
+ int K;
+
+ /* if (k==0) k = mpn_fft_best_k (pl, sqr); */
+ N = pl*BITS_PER_MP_LIMB;
+ K = 1<<k;
+ if (N%K) N=(N/K+1)*K;
+ M = N/K;
+ if (M%BITS_PER_MP_LIMB) N=((M/BITS_PER_MP_LIMB)+1)*BITS_PER_MP_LIMB*K;
+ return (N/BITS_PER_MP_LIMB);
+}
+
+
+static void
+#if __STDC__
+mpn_fft_initl(int **l, int k)
+#else
+mpn_fft_initl(l, k)
+ int **l;
+ int k;
+#endif
+{
+ int i,j,K;
+
+ l[0][0] = 0;
+ for (i=1,K=2;i<=k;i++,K*=2) {
+ for (j=0;j<K/2;j++) {
+ l[i][j] = 2*l[i-1][j];
+ l[i][K/2+j] = 1+l[i][j];
+ }
+ }
+}
+
+
+/* a <- -a mod 2^(n*BITS_PER_MP_LIMB)+1 */
+static void
+#if __STDC__
+mpn_fft_neg_modF(mp_limb_t *ap, mp_size_t n)
+#else
+mpn_fft_neg_modF(ap, n)
+ mp_limb_t *ap;
+ mp_size_t n;
+#endif
+{
+ mp_limb_t c;
+
+ c = ap[n]+2;
+ mpn_com_n (ap, ap, n);
+ ap[n]=0; mpn_incr_u(ap, c);
+}
+
+
+/* a <- a*2^e mod 2^(n*BITS_PER_MP_LIMB)+1 */
+static void
+#if __STDC__
+mpn_fft_mul_2exp_modF(mp_limb_t *ap, int e, mp_size_t n, mp_limb_t *tp)
+#else
+mpn_fft_mul_2exp_modF(ap, e, n, tp)
+ mp_limb_t *ap;
+ int e;
+ mp_size_t n;
+ mp_limb_t *tp;
+#endif
+{
+ int d, sh, i; mp_limb_t cc;
+
+ d = e%(n*BITS_PER_MP_LIMB); /* 2^e = (+/-) 2^d */
+ sh = d % BITS_PER_MP_LIMB;
+ if (sh) mpn_lshift(tp, ap, n+1, sh); /* no carry here */
+ else MPN_COPY(tp, ap, n+1);
+ d /= BITS_PER_MP_LIMB; /* now shift of d limbs to the left */
+ if (d) {
+ /* ap[d..n-1] = tp[0..n-d-1], ap[0..d-1] = -tp[n-d..n-1] */
+ /* mpn_xor would be more efficient here */
+ for (i=d-1;i>=0;i--) ap[i] = ~tp[n-d+i];
+ cc = 1-mpn_add_1(ap, ap, d, 1);
+ if (cc) cc=mpn_sub_1(ap+d, tp, n-d, 1);
+ else MPN_COPY(ap+d, tp, n-d);
+ if (cc+=mpn_sub_1(ap+d, ap+d, n-d, tp[n]))
+ ap[n]=mpn_add_1(ap, ap, n, cc);
+ else ap[n]=0;
+ }
+ else if ((ap[n]=mpn_sub_1(ap, tp, n, tp[n]))) {
+ ap[n]=mpn_add_1(ap, ap, n, 1);
+ }
+ if ((e/(n*BITS_PER_MP_LIMB))%2) mpn_fft_neg_modF(ap, n);
+}
+
+
+/* a <- a+b mod 2^(n*BITS_PER_MP_LIMB)+1 */
+static void
+#if __STDC__
+mpn_fft_add_modF (mp_limb_t *ap, mp_limb_t *bp, int n)
+#else
+mpn_fft_add_modF (ap, bp, n)
+ mp_limb_t *ap,*bp;
+ int n;
+#endif
+{
+ mp_limb_t c;
+
+ c = ap[n] + bp[n] + mpn_add_n(ap, ap, bp, n);
+ if (c>1) c -= 1+mpn_sub_1(ap,ap,n,1);
+ ap[n]=c;
+}
+
+
+/* input: A[0] ... A[inc*(K-1)] are residues mod 2^N+1 where
+ N=n*BITS_PER_MP_LIMB
+ 2^omega is a primitive root mod 2^N+1
+ output: A[inc*l[k][i]] <- \sum (2^omega)^(ij) A[inc*j] mod 2^N+1 */
+
+static void
+#if __STDC__
+mpn_fft_fft_sqr (mp_limb_t **Ap, mp_size_t K, int **ll,
+ mp_size_t omega, mp_size_t n, mp_size_t inc, mp_limb_t *tp)
+#else
+mpn_fft_fft_sqr(Ap,K,ll,omega,n,inc,tp)
+mp_limb_t **Ap,*tp;
+mp_size_t K,omega,n,inc;
+int **ll;
+#endif
+{
+ if (K==2) {
+#ifdef ADDSUB
+ if (mpn_addsub_n(Ap[0], Ap[inc], Ap[0], Ap[inc], n+1) & 1)
+#else
+ MPN_COPY(tp, Ap[0], n+1);
+ mpn_add_n(Ap[0], Ap[0], Ap[inc],n+1);
+ if (mpn_sub_n(Ap[inc], tp, Ap[inc],n+1))
+#endif
+ Ap[inc][n] = mpn_add_1(Ap[inc], Ap[inc], n, 1);
+ }
+ else {
+ int j, inc2=2*inc;
+ int *lk = *ll;
+ mp_limb_t *tmp;
+ TMP_DECL(marker);
+
+ TMP_MARK(marker);
+ tmp = TMP_ALLOC_LIMBS (n+1);
+ mpn_fft_fft_sqr(Ap, K/2,ll-1,2*omega,n,inc2, tp);
+ mpn_fft_fft_sqr(Ap+inc, K/2,ll-1,2*omega,n,inc2, tp);
+ /* A[2*j*inc] <- A[2*j*inc] + omega^l[k][2*j*inc] A[(2j+1)inc]
+ A[(2j+1)inc] <- A[2*j*inc] + omega^l[k][(2j+1)inc] A[(2j+1)inc] */
+ for (j=0;j<K/2;j++,lk+=2,Ap+=2*inc) {
+ MPN_COPY(tp, Ap[inc], n+1);
+ mpn_fft_mul_2exp_modF(Ap[inc], lk[1]*omega, n, tmp);
+ mpn_fft_add_modF(Ap[inc], Ap[0], n);
+ mpn_fft_mul_2exp_modF(tp,lk[0]*omega, n, tmp);
+ mpn_fft_add_modF(Ap[0], tp, n);
+ }
+ TMP_FREE(marker);
+ }
+}
+
+
+/* input: A[0] ... A[inc*(K-1)] are residues mod 2^N+1 where
+ N=n*BITS_PER_MP_LIMB
+ 2^omega is a primitive root mod 2^N+1
+ output: A[inc*l[k][i]] <- \sum (2^omega)^(ij) A[inc*j] mod 2^N+1 */
+
+static void
+#if __STDC__
+mpn_fft_fft (mp_limb_t **Ap, mp_limb_t **Bp, mp_size_t K, int **ll,
+ mp_size_t omega, mp_size_t n, mp_size_t inc, mp_limb_t *tp)
+#else
+mpn_fft_fft(Ap,Bp,K,ll,omega,n,inc,tp)
+ mp_limb_t **Ap,**Bp,*tp;
+ mp_size_t K,omega,n,inc;
+ int **ll;
+#endif
+{
+ if (K==2) {
+#ifdef ADDSUB
+ if (mpn_addsub_n(Ap[0], Ap[inc], Ap[0], Ap[inc], n+1) & 1)
+#else
+ MPN_COPY(tp, Ap[0], n+1);
+ mpn_add_n(Ap[0], Ap[0], Ap[inc],n+1);
+ if (mpn_sub_n(Ap[inc], tp, Ap[inc],n+1))
+#endif
+ Ap[inc][n] = mpn_add_1(Ap[inc], Ap[inc], n, 1);
+#ifdef ADDSUB
+ if (mpn_addsub_n(Bp[0], Bp[inc], Bp[0], Bp[inc], n+1) & 1)
+#else
+ MPN_COPY(tp, Bp[0], n+1);
+ mpn_add_n(Bp[0], Bp[0], Bp[inc],n+1);
+ if (mpn_sub_n(Bp[inc], tp, Bp[inc],n+1))
+#endif
+ Bp[inc][n] = mpn_add_1(Bp[inc], Bp[inc], n, 1);
+ }
+ else {
+ int j, inc2=2*inc;
+ int *lk=*ll;
+ mp_limb_t *tmp;
+ TMP_DECL(marker);
+
+ TMP_MARK(marker);
+ tmp = TMP_ALLOC_LIMBS (n+1);
+ mpn_fft_fft(Ap, Bp, K/2,ll-1,2*omega,n,inc2, tp);
+ mpn_fft_fft(Ap+inc, Bp+inc, K/2,ll-1,2*omega,n,inc2, tp);
+ /* A[2*j*inc] <- A[2*j*inc] + omega^l[k][2*j*inc] A[(2j+1)inc]
+ A[(2j+1)inc] <- A[2*j*inc] + omega^l[k][(2j+1)inc] A[(2j+1)inc] */
+ for (j=0;j<K/2;j++,lk+=2,Ap+=2*inc,Bp+=2*inc) {
+ MPN_COPY(tp, Ap[inc], n+1);
+ mpn_fft_mul_2exp_modF(Ap[inc], lk[1]*omega, n, tmp);
+ mpn_fft_add_modF(Ap[inc], Ap[0], n);
+ mpn_fft_mul_2exp_modF(tp,lk[0]*omega, n, tmp);
+ mpn_fft_add_modF(Ap[0], tp, n);
+ MPN_COPY(tp, Bp[inc], n+1);
+ mpn_fft_mul_2exp_modF(Bp[inc], lk[1]*omega, n, tmp);
+ mpn_fft_add_modF(Bp[inc], Bp[0], n);
+ mpn_fft_mul_2exp_modF(tp,lk[0]*omega, n, tmp);
+ mpn_fft_add_modF(Bp[0], tp, n);
+ }
+ TMP_FREE(marker);
+ }
+}
+
+
+/* a[i] <- a[i]*b[i] mod 2^(n*BITS_PER_MP_LIMB)+1 for 0 <= i < K */
+static void
+#if __STDC__
+mpn_fft_mul_modF_K (mp_limb_t **ap, mp_limb_t **bp, mp_size_t n, int K)
+#else
+mpn_fft_mul_modF_K(ap, bp, n, K)
+ mp_limb_t **ap, **bp;
+ mp_size_t n;
+ int K;
+#endif
+{
+ int i;
+ int sqr = (ap == bp);
+ TMP_DECL(marker);
+
+ TMP_MARK(marker);
+
+ if (n >= (sqr ? FFT_MODF_SQR_THRESHOLD : FFT_MODF_MUL_THRESHOLD)) {
+ int k, K2,nprime2,Nprime2,M2,maxLK,l,Mp2;
+ int **_fft_l;
+ mp_limb_t **Ap,**Bp,*A,*B,*T;
+
+ k = mpn_fft_best_k (n, sqr);
+ K2 = 1<<k;
+ maxLK = (K2>BITS_PER_MP_LIMB) ? K2 : BITS_PER_MP_LIMB;
+ M2 = n*BITS_PER_MP_LIMB/K2;
+ l = n/K2;
+ Nprime2 = ((2*M2+k+2+maxLK)/maxLK)*maxLK; /* ceil((2*M2+k+3)/maxLK)*maxLK*/
+ nprime2 = Nprime2/BITS_PER_MP_LIMB;
+ Mp2 = Nprime2/K2;
+
+ Ap = TMP_ALLOC_MP_PTRS (K2);
+ Bp = TMP_ALLOC_MP_PTRS (K2);
+ A = TMP_ALLOC_LIMBS (2*K2*(nprime2+1));
+ T = TMP_ALLOC_LIMBS (nprime2+1);
+ B = A + K2*(nprime2+1);
+ _fft_l = TMP_ALLOC_TYPE (k+1, int*);
+ for (i=0;i<=k;i++)
+ _fft_l[i] = TMP_ALLOC_TYPE (1<<i, int);
+ mpn_fft_initl(_fft_l, k);
+
+ TRACE (printf("recurse: %dx%d limbs -> %d times %dx%d (%1.2f)\n", n,
+ n, K2, nprime2, nprime2, 2.0*(double)n/nprime2/K2));
+
+ for (i=0;i<K;i++,ap++,bp++)
+ mpn_mul_fft_internal(*ap, *ap, *bp, n, k, K2, Ap, Bp, A, B, nprime2,
+ l, Mp2, _fft_l, T, 1);
+ }
+ else {
+ mp_limb_t *a, *b, cc, *tp, *tpn; int n2=2*n;
+ tp = TMP_ALLOC_LIMBS (n2);
+ tpn = tp+n;
+ TRACE (printf (" mpn_mul_n %d of %d limbs\n", K, n));
+ for (i=0;i<K;i++) {
+ a = *ap++; b=*bp++;
+ if (sqr)
+ mpn_sqr_n(tp, a, n);
+ else
+ mpn_mul_n(tp, b, a, n);
+ if (a[n]) cc=mpn_add_n(tpn, tpn, b, n); else cc=0;
+ if (b[n]) cc += mpn_add_n(tpn, tpn, a, n) + a[n];
+ if (cc) {
+ cc = mpn_add_1(tp, tp, n2, cc);
+ ASSERT_NOCARRY (mpn_add_1(tp, tp, n2, cc));
+ }
+ a[n] = mpn_sub_n(a, tp, tpn, n) && mpn_add_1(a, a, n, 1);
+ }
+ }
+ TMP_FREE(marker);
+}
+
+
+/* input: A^[l[k][0]] A^[l[k][1]] ... A^[l[k][K-1]]
+ output: K*A[0] K*A[K-1] ... K*A[1] */
+
+static void
+#if __STDC__
+mpn_fft_fftinv (mp_limb_t **Ap, int K, mp_size_t omega, mp_size_t n,
+ mp_limb_t *tp)
+#else
+mpn_fft_fftinv(Ap,K,omega,n,tp)
+ mp_limb_t **Ap, *tp;
+ int K;
+ mp_size_t omega, n;
+#endif
+{
+ if (K==2) {
+#ifdef ADDSUB
+ if (mpn_addsub_n(Ap[0], Ap[1], Ap[0], Ap[1], n+1) & 1)
+#else
+ MPN_COPY(tp, Ap[0], n+1);
+ mpn_add_n(Ap[0], Ap[0], Ap[1], n+1);
+ if (mpn_sub_n(Ap[1], tp, Ap[1], n+1))
+#endif
+ Ap[1][n] = mpn_add_1(Ap[1], Ap[1], n, 1);
+ }
+ else {
+ int j, K2=K/2; mp_limb_t **Bp=Ap+K2, *tmp;
+ TMP_DECL(marker);
+
+ TMP_MARK(marker);
+ tmp = TMP_ALLOC_LIMBS (n+1);
+ mpn_fft_fftinv(Ap, K2, 2*omega, n, tp);
+ mpn_fft_fftinv(Bp, K2, 2*omega, n, tp);
+ /* A[j] <- A[j] + omega^j A[j+K/2]
+ A[j+K/2] <- A[j] + omega^(j+K/2) A[j+K/2] */
+ for (j=0;j<K2;j++,Ap++,Bp++) {
+ MPN_COPY(tp, Bp[0], n+1);
+ mpn_fft_mul_2exp_modF(Bp[0], (j+K2)*omega, n, tmp);
+ mpn_fft_add_modF(Bp[0], Ap[0], n);
+ mpn_fft_mul_2exp_modF(tp, j*omega, n, tmp);
+ mpn_fft_add_modF(Ap[0], tp, n);
+ }
+ TMP_FREE(marker);
+ }
+}
+
+
+/* A <- A/2^k mod 2^(n*BITS_PER_MP_LIMB)+1 */
+static void
+#if __STDC__
+mpn_fft_div_2exp_modF (mp_limb_t *ap, int k, mp_size_t n, mp_limb_t *tp)
+#else
+mpn_fft_div_2exp_modF(ap,k,n,tp)
+ mp_limb_t *ap,*tp;
+ int k;
+ mp_size_t n;
+#endif
+{
+ int i;
+
+ i = 2*n*BITS_PER_MP_LIMB;
+ i = (i-k) % i;
+ mpn_fft_mul_2exp_modF(ap,i,n,tp);
+ /* 1/2^k = 2^(2nL-k) mod 2^(n*BITS_PER_MP_LIMB)+1 */
+ /* normalize so that A < 2^(n*BITS_PER_MP_LIMB)+1 */
+ if (ap[n]==1) {
+ for (i=0;i<n && ap[i]==0;i++);
+ if (i<n) {
+ ap[n]=0;
+ mpn_sub_1(ap, ap, n, 1);
+ }
+ }
+}
+
+
+/* R <- A mod 2^(n*BITS_PER_MP_LIMB)+1, n<=an<=3*n */
+static void
+#if __STDC__
+mpn_fft_norm_modF(mp_limb_t *rp, mp_limb_t *ap, mp_size_t n, mp_size_t an)
+#else
+mpn_fft_norm_modF(rp, ap, n, an)
+ mp_limb_t *rp;
+ mp_limb_t *ap;
+ mp_size_t n;
+ mp_size_t an;
+#endif
+{
+ mp_size_t l;
+
+ if (an>2*n) {
+ l = n;
+ rp[n] = mpn_add_1(rp+an-2*n, ap+an-2*n, 3*n-an,
+ mpn_add_n(rp,ap,ap+2*n,an-2*n));
+ }
+ else {
+ l = an-n;
+ MPN_COPY(rp, ap, n);
+ rp[n]=0;
+ }
+ if (mpn_sub_n(rp,rp,ap+n,l)) {
+ if (mpn_sub_1(rp+l,rp+l,n+1-l,1))
+ rp[n]=mpn_add_1(rp,rp,n,1);
+ }
+}
+
+
+static void
+#if __STDC__
+mpn_mul_fft_internal(mp_limb_t *op, mp_srcptr n, mp_srcptr m, mp_size_t pl,
+ int k, int K,
+ mp_limb_t **Ap, mp_limb_t **Bp,
+ mp_limb_t *A, mp_limb_t *B,
+ mp_size_t nprime, mp_size_t l, mp_size_t Mp,
+ int **_fft_l,
+ mp_limb_t *T, int rec)
+#else
+mpn_mul_fft_internal(op,n,m,pl,k,K,Ap,Bp,A,B,nprime,l,Mp,_fft_l,T,rec)
+ mp_limb_t *op;
+ mp_srcptr n, m;
+ mp_limb_t **Ap,**Bp,*A,*B,*T;
+ mp_size_t pl,nprime;
+ int **_fft_l;
+ int k,K,l,Mp,rec;
+#endif
+{
+ int i, sqr, pla, lo, sh, j;
+ mp_limb_t *p;
+
+ sqr = (n==m);
+
+ TRACE (printf ("pl=%d k=%d K=%d np=%d l=%d Mp=%d rec=%d sqr=%d\n",
+ pl,k,K,nprime,l,Mp,rec,sqr));
+
+ /* decomposition of inputs into arrays Ap[i] and Bp[i] */
+ if (rec) for (i=0;i<K;i++) {
+ Ap[i] = A+i*(nprime+1); Bp[i] = B+i*(nprime+1);
+ /* store the next M bits of n into A[i] */
+ /* supposes that M is a multiple of BITS_PER_MP_LIMB */
+ MPN_COPY(Ap[i], n, l); n+=l; MPN_ZERO(Ap[i]+l, nprime+1-l);
+ /* set most significant bits of n and m (important in recursive calls) */
+ if (i==K-1) Ap[i][l]=n[0];
+ mpn_fft_mul_2exp_modF(Ap[i], i*Mp, nprime, T);
+ if (!sqr) {
+ MPN_COPY(Bp[i], m, l); m+=l; MPN_ZERO(Bp[i]+l, nprime+1-l);
+ if (i==K-1) Bp[i][l]=m[0];
+ mpn_fft_mul_2exp_modF(Bp[i], i*Mp, nprime, T);
+ }
+ }
+
+ /* direct fft's */
+ if (sqr) mpn_fft_fft_sqr(Ap,K,_fft_l+k,2*Mp,nprime,1, T);
+ else mpn_fft_fft(Ap,Bp,K,_fft_l+k,2*Mp,nprime,1, T);
+
+ /* term to term multiplications */
+ mpn_fft_mul_modF_K(Ap, (sqr) ? Ap : Bp, nprime, K);
+
+ /* inverse fft's */
+ mpn_fft_fftinv(Ap, K, 2*Mp, nprime, T);
+
+ /* division of terms after inverse fft */
+ for (i=0;i<K;i++) mpn_fft_div_2exp_modF(Ap[i],k+((K-i)%K)*Mp,nprime, T);
+
+ /* addition of terms in result p */
+ MPN_ZERO(T,nprime+1);
+ pla = l*(K-1)+nprime+1; /* number of required limbs for p */
+ p = B; /* B has K*(n'+1) limbs, which is >= pla, i.e. enough */
+ MPN_ZERO(p, pla);
+ sqr=0; /* will accumulate the (signed) carry at p[pla] */
+ for (i=K-1,lo=l*i+nprime,sh=l*i;i>=0;i--,lo-=l,sh-=l) {
+ mp_ptr n = p+sh;
+ j = (K-i)%K;
+ if (mpn_add_n(n,n,Ap[j],nprime+1))
+ sqr += mpn_add_1(n+nprime+1,n+nprime+1,pla-sh-nprime-1,1);
+ T[2*l]=i+1; /* T = (i+1)*2^(2*M) */
+ if (mpn_cmp(Ap[j],T,nprime+1)>0) { /* subtract 2^N'+1 */
+ sqr -= mpn_sub_1(n,n,pla-sh,1);
+ sqr -= mpn_sub_1(p+lo,p+lo,pla-lo,1);
+ }
+ }
+ if (sqr==-1) {
+ if ((sqr=mpn_add_1(p+pla-pl,p+pla-pl,pl,1))) {
+ /* p[pla-pl]...p[pla-1] are all zero */
+ mpn_sub_1(p+pla-pl-1,p+pla-pl-1,pl+1,1);
+ mpn_sub_1(p+pla-1,p+pla-1,1,1);
+ }
+ }
+ else if (sqr==1) {
+ if (pla>=2*pl)
+ while ((sqr=mpn_add_1(p+pla-2*pl,p+pla-2*pl,2*pl,sqr)));
+ else {
+ sqr = mpn_sub_1(p+pla-pl,p+pla-pl,pl,sqr);
+ ASSERT (sqr == 0);
+ }
+ }
+ else
+ ASSERT (sqr == 0);
+
+ /* here p < 2^(2M) [K 2^(M(K-1)) + (K-1) 2^(M(K-2)) + ... ]
+ < K 2^(2M) [2^(M(K-1)) + 2^(M(K-2)) + ... ]
+ < K 2^(2M) 2^(M(K-1))*2 = 2^(M*K+M+k+1) */
+ mpn_fft_norm_modF(op,p,pl,pla);
+}
+
+
+/* op <- n*m mod 2^N+1 with fft of size 2^k where N=pl*BITS_PER_MP_LIMB
+ n and m have respectively nl and ml limbs
+ op must have space for pl+1 limbs
+ One must have pl = mpn_fft_next_size(pl, k).
+*/
+
+void
+#if __STDC__
+mpn_mul_fft (mp_ptr op, mp_size_t pl,
+ mp_srcptr n, mp_size_t nl,
+ mp_srcptr m, mp_size_t ml,
+ int k)
+#else
+mpn_mul_fft (op, pl, n, nl, m, ml, k)
+ mp_ptr op;
+ mp_size_t pl;
+ mp_srcptr n;
+ mp_size_t nl;
+ mp_srcptr m;
+ mp_size_t ml;
+ int k;
+#endif
+{
+ int K,maxLK,i,j;
+ mp_size_t N,Nprime,nprime,M,Mp,l;
+ mp_limb_t **Ap,**Bp,*A,*T,*B;
+ int **_fft_l;
+ int sqr = (n==m && nl==ml);
+ TMP_DECL(marker);
+
+ TRACE (printf ("\nmpn_mul_fft pl=%ld nl=%ld ml=%ld k=%d\n",
+ pl, nl, ml, k));
+ ASSERT_ALWAYS (mpn_fft_next_size(pl, k) == pl);
+
+ TMP_MARK(marker);
+ N = pl*BITS_PER_MP_LIMB;
+ _fft_l = TMP_ALLOC_TYPE (k+1, int*);
+ for (i=0;i<=k;i++)
+ _fft_l[i] = TMP_ALLOC_TYPE (1<<i, int);
+ mpn_fft_initl(_fft_l, k);
+ K = 1<<k;
+ M = N/K; /* N = 2^k M */
+ l = M/BITS_PER_MP_LIMB;
+ maxLK = (K>BITS_PER_MP_LIMB) ? K : BITS_PER_MP_LIMB;
+
+ Nprime = ((2*M+k+2+maxLK)/maxLK)*maxLK; /* ceil((2*M+k+3)/maxLK)*maxLK; */
+ nprime = Nprime/BITS_PER_MP_LIMB;
+ TRACE (printf ("N=%d K=%d, M=%d, l=%d, maxLK=%d, Np=%d, np=%d\n",
+ N, K, M, l, maxLK, Nprime, nprime));
+ if (nprime >= (sqr ? FFT_MODF_SQR_THRESHOLD : FFT_MODF_MUL_THRESHOLD)) {
+ maxLK = (1<<mpn_fft_best_k(nprime,n==m))*BITS_PER_MP_LIMB;
+ if (Nprime % maxLK) {
+ Nprime=((Nprime/maxLK)+1)*maxLK;
+ nprime = Nprime/BITS_PER_MP_LIMB;
+ }
+ TRACE (printf ("new maxLK=%d, Np=%d, np=%d\n", maxLK, Nprime, nprime));
+ }
+
+ T = TMP_ALLOC_LIMBS (nprime+1);
+ Mp = Nprime/K;
+
+ TRACE (printf("%dx%d limbs -> %d times %dx%d limbs (%1.2f)\n",
+ pl,pl,K,nprime,nprime,2.0*(double)N/Nprime/K);
+ printf(" temp space %ld\n", 2*K*(nprime+1)));
+
+ A = _MP_ALLOCATE_FUNC_LIMBS (2*K*(nprime+1));
+ B = A+K*(nprime+1);
+ Ap = TMP_ALLOC_MP_PTRS (K);
+ Bp = TMP_ALLOC_MP_PTRS (K);
+ /* special decomposition for main call */
+ for (i=0;i<K;i++) {
+ Ap[i] = A+i*(nprime+1); Bp[i] = B+i*(nprime+1);
+ /* store the next M bits of n into A[i] */
+ /* supposes that M is a multiple of BITS_PER_MP_LIMB */
+ if (nl>0) {
+ j = (nl>=l) ? l : nl; /* limbs to store in Ap[i] */
+ MPN_COPY(Ap[i], n, j); n+=l; MPN_ZERO(Ap[i]+j, nprime+1-j);
+ mpn_fft_mul_2exp_modF(Ap[i], i*Mp, nprime, T);
+ }
+ else MPN_ZERO(Ap[i], nprime+1);
+ nl -= l;
+ if (n!=m) {
+ if (ml>0) {
+ j = (ml>=l) ? l : ml; /* limbs to store in Bp[i] */
+ MPN_COPY(Bp[i], m, j); m+=l; MPN_ZERO(Bp[i]+j, nprime+1-j);
+ mpn_fft_mul_2exp_modF(Bp[i], i*Mp, nprime, T);
+ }
+ else MPN_ZERO(Bp[i], nprime+1);
+ }
+ ml -= l;
+ }
+ mpn_mul_fft_internal(op,n,m,pl,k,K,Ap,Bp,A,B,nprime,l,Mp,_fft_l,T,0);
+ TMP_FREE(marker);
+ _MP_FREE_FUNC_LIMBS (A, 2*K*(nprime+1));
+}
+
+
+#if WANT_ASSERT
+static int
+#if __STDC__
+mpn_zero_p (mp_ptr p, mp_size_t n)
+#else
+ mpn_zero_p (p, n)
+ mp_ptr p;
+ mp_size_t n;
+#endif
+{
+ mp_size_t i;
+
+ for (i = 0; i < n; i++)
+ {
+ if (p[i] != 0)
+ return 0;
+ }
+
+ return 1;
+}
+#endif
+
+
+/* Multiply {n,nl}*{m,ml} and write the result to {op,nl+ml}.
+
+ FIXME: Duplicating the result like this is wasteful, do something better
+ perhaps at the norm_modF stage above. */
+
+void
+#if __STDC__
+mpn_mul_fft_full (mp_ptr op,
+ mp_srcptr n, mp_size_t nl,
+ mp_srcptr m, mp_size_t ml)
+#else
+mpn_mul_fft_full (op, n, nl, m, ml)
+ mp_ptr op;
+ mp_srcptr n;
+ mp_size_t nl;
+ mp_srcptr m;
+ mp_size_t ml;
+#endif
+{
+ mp_ptr pad_op;
+ mp_size_t pl;
+ int k;
+ int sqr = (n==m && nl==ml);
+
+ k = mpn_fft_best_k (nl+ml, sqr);
+ pl = mpn_fft_next_size (nl+ml, k);
+
+ TRACE (printf ("mpn_mul_fft_full nl=%ld ml=%ld -> pl=%ld k=%d\n",
+ nl, ml, pl, k));
+
+ pad_op = _MP_ALLOCATE_FUNC_LIMBS (pl+1);
+ mpn_mul_fft (pad_op, pl, n, nl, m, ml, k);
+
+ ASSERT (mpn_zero_p (pad_op+nl+ml, pl+1-(nl+ml)));
+ MPN_COPY (op, pad_op, nl+ml);
+
+ _MP_FREE_FUNC_LIMBS (pad_op, pl+1);
+}
diff --git a/rts/gmp/mpn/generic/mul_n.c b/rts/gmp/mpn/generic/mul_n.c
new file mode 100644
index 0000000000..b7563be2d3
--- /dev/null
+++ b/rts/gmp/mpn/generic/mul_n.c
@@ -0,0 +1,1343 @@
+/* mpn_mul_n and helper function -- Multiply/square natural numbers.
+
+ THE HELPER FUNCTIONS IN THIS FILE (meaning everything except mpn_mul_n)
+ ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS ONLY SAFE TO REACH
+ THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED
+ THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Multiplicative inverse of 3, modulo 2^BITS_PER_MP_LIMB.
+ 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. */
+#define INVERSE_3 ((MP_LIMB_T_MAX / 3) * 2 + 1)
+
+#if !defined (__alpha) && !defined (__mips)
+/* For all other machines, we want to call mpn functions for the compund
+ operations instead of open-coding them. */
+#define USE_MORE_MPN
+#endif
+
+/*== Function declarations =================================================*/
+
+static void evaluate3 _PROTO ((mp_ptr, mp_ptr, mp_ptr,
+ mp_ptr, mp_ptr, mp_ptr,
+ mp_srcptr, mp_srcptr, mp_srcptr,
+ mp_size_t, mp_size_t));
+static void interpolate3 _PROTO ((mp_srcptr,
+ mp_ptr, mp_ptr, mp_ptr,
+ mp_srcptr,
+ mp_ptr, mp_ptr, mp_ptr,
+ mp_size_t, mp_size_t));
+static mp_limb_t add2Times _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+
+
+/*-- mpn_kara_mul_n ---------------------------------------------------------------*/
+
+/* Multiplies using 3 half-sized mults and so on recursively.
+ * p[0..2*n-1] := product of a[0..n-1] and b[0..n-1].
+ * No overlap of p[...] with a[...] or b[...].
+ * ws is workspace.
+ */
+
+void
+#if __STDC__
+mpn_kara_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr ws)
+#else
+mpn_kara_mul_n(p, a, b, n, ws)
+ mp_ptr p;
+ mp_srcptr a;
+ mp_srcptr b;
+ mp_size_t n;
+ mp_ptr ws;
+#endif
+{
+ mp_limb_t i, sign, w, w0, w1;
+ mp_size_t n2;
+ mp_srcptr x, y;
+
+ n2 = n >> 1;
+ ASSERT (n2 > 0);
+
+ if (n & 1)
+ {
+ /* Odd length. */
+ mp_size_t n1, n3, nm1;
+
+ n3 = n - n2;
+
+ sign = 0;
+ w = a[n2];
+ if (w != 0)
+ w -= mpn_sub_n (p, a, a + n3, n2);
+ else
+ {
+ i = n2;
+ do
+ {
+ --i;
+ w0 = a[i];
+ w1 = a[n3+i];
+ }
+ while (w0 == w1 && i != 0);
+ if (w0 < w1)
+ {
+ x = a + n3;
+ y = a;
+ sign = 1;
+ }
+ else
+ {
+ x = a;
+ y = a + n3;
+ }
+ mpn_sub_n (p, x, y, n2);
+ }
+ p[n2] = w;
+
+ w = b[n2];
+ if (w != 0)
+ w -= mpn_sub_n (p + n3, b, b + n3, n2);
+ else
+ {
+ i = n2;
+ do
+ {
+ --i;
+ w0 = b[i];
+ w1 = b[n3+i];
+ }
+ while (w0 == w1 && i != 0);
+ if (w0 < w1)
+ {
+ x = b + n3;
+ y = b;
+ sign ^= 1;
+ }
+ else
+ {
+ x = b;
+ y = b + n3;
+ }
+ mpn_sub_n (p + n3, x, y, n2);
+ }
+ p[n] = w;
+
+ n1 = n + 1;
+ if (n2 < KARATSUBA_MUL_THRESHOLD)
+ {
+ if (n3 < KARATSUBA_MUL_THRESHOLD)
+ {
+ mpn_mul_basecase (ws, p, n3, p + n3, n3);
+ mpn_mul_basecase (p, a, n3, b, n3);
+ }
+ else
+ {
+ mpn_kara_mul_n (ws, p, p + n3, n3, ws + n1);
+ mpn_kara_mul_n (p, a, b, n3, ws + n1);
+ }
+ mpn_mul_basecase (p + n1, a + n3, n2, b + n3, n2);
+ }
+ else
+ {
+ mpn_kara_mul_n (ws, p, p + n3, n3, ws + n1);
+ mpn_kara_mul_n (p, a, b, n3, ws + n1);
+ mpn_kara_mul_n (p + n1, a + n3, b + n3, n2, ws + n1);
+ }
+
+ if (sign)
+ mpn_add_n (ws, p, ws, n1);
+ else
+ mpn_sub_n (ws, p, ws, n1);
+
+ nm1 = n - 1;
+ if (mpn_add_n (ws, p + n1, ws, nm1))
+ {
+ mp_limb_t x = ws[nm1] + 1;
+ ws[nm1] = x;
+ if (x == 0)
+ ++ws[n];
+ }
+ if (mpn_add_n (p + n3, p + n3, ws, n1))
+ {
+ mp_limb_t x;
+ i = n1 + n3;
+ do
+ {
+ x = p[i] + 1;
+ p[i] = x;
+ ++i;
+ } while (x == 0);
+ }
+ }
+ else
+ {
+ /* Even length. */
+ mp_limb_t t;
+
+ i = n2;
+ do
+ {
+ --i;
+ w0 = a[i];
+ w1 = a[n2+i];
+ }
+ while (w0 == w1 && i != 0);
+ sign = 0;
+ if (w0 < w1)
+ {
+ x = a + n2;
+ y = a;
+ sign = 1;
+ }
+ else
+ {
+ x = a;
+ y = a + n2;
+ }
+ mpn_sub_n (p, x, y, n2);
+
+ i = n2;
+ do
+ {
+ --i;
+ w0 = b[i];
+ w1 = b[n2+i];
+ }
+ while (w0 == w1 && i != 0);
+ if (w0 < w1)
+ {
+ x = b + n2;
+ y = b;
+ sign ^= 1;
+ }
+ else
+ {
+ x = b;
+ y = b + n2;
+ }
+ mpn_sub_n (p + n2, x, y, n2);
+
+ /* Pointwise products. */
+ if (n2 < KARATSUBA_MUL_THRESHOLD)
+ {
+ mpn_mul_basecase (ws, p, n2, p + n2, n2);
+ mpn_mul_basecase (p, a, n2, b, n2);
+ mpn_mul_basecase (p + n, a + n2, n2, b + n2, n2);
+ }
+ else
+ {
+ mpn_kara_mul_n (ws, p, p + n2, n2, ws + n);
+ mpn_kara_mul_n (p, a, b, n2, ws + n);
+ mpn_kara_mul_n (p + n, a + n2, b + n2, n2, ws + n);
+ }
+
+ /* Interpolate. */
+ if (sign)
+ w = mpn_add_n (ws, p, ws, n);
+ else
+ w = -mpn_sub_n (ws, p, ws, n);
+ w += mpn_add_n (ws, p + n, ws, n);
+ w += mpn_add_n (p + n2, p + n2, ws, n);
+ /* TO DO: could put "if (w) { ... }" here.
+ * Less work but badly predicted branch.
+ * No measurable difference in speed on Alpha.
+ */
+ i = n + n2;
+ t = p[i] + w;
+ p[i] = t;
+ if (t < w)
+ {
+ do
+ {
+ ++i;
+ w = p[i] + 1;
+ p[i] = w;
+ }
+ while (w == 0);
+ }
+ }
+}
+
+void
+#if __STDC__
+mpn_kara_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n, mp_ptr ws)
+#else
+mpn_kara_sqr_n (p, a, n, ws)
+ mp_ptr p;
+ mp_srcptr a;
+ mp_size_t n;
+ mp_ptr ws;
+#endif
+{
+ mp_limb_t i, sign, w, w0, w1;
+ mp_size_t n2;
+ mp_srcptr x, y;
+
+ n2 = n >> 1;
+ ASSERT (n2 > 0);
+
+ if (n & 1)
+ {
+ /* Odd length. */
+ mp_size_t n1, n3, nm1;
+
+ n3 = n - n2;
+
+ sign = 0;
+ w = a[n2];
+ if (w != 0)
+ w -= mpn_sub_n (p, a, a + n3, n2);
+ else
+ {
+ i = n2;
+ do
+ {
+ --i;
+ w0 = a[i];
+ w1 = a[n3+i];
+ }
+ while (w0 == w1 && i != 0);
+ if (w0 < w1)
+ {
+ x = a + n3;
+ y = a;
+ sign = 1;
+ }
+ else
+ {
+ x = a;
+ y = a + n3;
+ }
+ mpn_sub_n (p, x, y, n2);
+ }
+ p[n2] = w;
+
+ w = a[n2];
+ if (w != 0)
+ w -= mpn_sub_n (p + n3, a, a + n3, n2);
+ else
+ {
+ i = n2;
+ do
+ {
+ --i;
+ w0 = a[i];
+ w1 = a[n3+i];
+ }
+ while (w0 == w1 && i != 0);
+ if (w0 < w1)
+ {
+ x = a + n3;
+ y = a;
+ sign ^= 1;
+ }
+ else
+ {
+ x = a;
+ y = a + n3;
+ }
+ mpn_sub_n (p + n3, x, y, n2);
+ }
+ p[n] = w;
+
+ n1 = n + 1;
+ if (n2 < KARATSUBA_SQR_THRESHOLD)
+ {
+ if (n3 < KARATSUBA_SQR_THRESHOLD)
+ {
+ mpn_sqr_basecase (ws, p, n3);
+ mpn_sqr_basecase (p, a, n3);
+ }
+ else
+ {
+ mpn_kara_sqr_n (ws, p, n3, ws + n1);
+ mpn_kara_sqr_n (p, a, n3, ws + n1);
+ }
+ mpn_sqr_basecase (p + n1, a + n3, n2);
+ }
+ else
+ {
+ mpn_kara_sqr_n (ws, p, n3, ws + n1);
+ mpn_kara_sqr_n (p, a, n3, ws + n1);
+ mpn_kara_sqr_n (p + n1, a + n3, n2, ws + n1);
+ }
+
+ if (sign)
+ mpn_add_n (ws, p, ws, n1);
+ else
+ mpn_sub_n (ws, p, ws, n1);
+
+ nm1 = n - 1;
+ if (mpn_add_n (ws, p + n1, ws, nm1))
+ {
+ mp_limb_t x = ws[nm1] + 1;
+ ws[nm1] = x;
+ if (x == 0)
+ ++ws[n];
+ }
+ if (mpn_add_n (p + n3, p + n3, ws, n1))
+ {
+ mp_limb_t x;
+ i = n1 + n3;
+ do
+ {
+ x = p[i] + 1;
+ p[i] = x;
+ ++i;
+ } while (x == 0);
+ }
+ }
+ else
+ {
+ /* Even length. */
+ mp_limb_t t;
+
+ i = n2;
+ do
+ {
+ --i;
+ w0 = a[i];
+ w1 = a[n2+i];
+ }
+ while (w0 == w1 && i != 0);
+ sign = 0;
+ if (w0 < w1)
+ {
+ x = a + n2;
+ y = a;
+ sign = 1;
+ }
+ else
+ {
+ x = a;
+ y = a + n2;
+ }
+ mpn_sub_n (p, x, y, n2);
+
+ i = n2;
+ do
+ {
+ --i;
+ w0 = a[i];
+ w1 = a[n2+i];
+ }
+ while (w0 == w1 && i != 0);
+ if (w0 < w1)
+ {
+ x = a + n2;
+ y = a;
+ sign ^= 1;
+ }
+ else
+ {
+ x = a;
+ y = a + n2;
+ }
+ mpn_sub_n (p + n2, x, y, n2);
+
+ /* Pointwise products. */
+ if (n2 < KARATSUBA_SQR_THRESHOLD)
+ {
+ mpn_sqr_basecase (ws, p, n2);
+ mpn_sqr_basecase (p, a, n2);
+ mpn_sqr_basecase (p + n, a + n2, n2);
+ }
+ else
+ {
+ mpn_kara_sqr_n (ws, p, n2, ws + n);
+ mpn_kara_sqr_n (p, a, n2, ws + n);
+ mpn_kara_sqr_n (p + n, a + n2, n2, ws + n);
+ }
+
+ /* Interpolate. */
+ if (sign)
+ w = mpn_add_n (ws, p, ws, n);
+ else
+ w = -mpn_sub_n (ws, p, ws, n);
+ w += mpn_add_n (ws, p + n, ws, n);
+ w += mpn_add_n (p + n2, p + n2, ws, n);
+ /* TO DO: could put "if (w) { ... }" here.
+ * Less work but badly predicted branch.
+ * No measurable difference in speed on Alpha.
+ */
+ i = n + n2;
+ t = p[i] + w;
+ p[i] = t;
+ if (t < w)
+ {
+ do
+ {
+ ++i;
+ w = p[i] + 1;
+ p[i] = w;
+ }
+ while (w == 0);
+ }
+ }
+}
+
+/*-- add2Times -------------------------------------------------------------*/
+
+/* z[] = x[] + 2 * y[]
+ Note that z and x might point to the same vectors. */
+#ifdef USE_MORE_MPN
+static inline mp_limb_t
+#if __STDC__
+add2Times (mp_ptr z, mp_srcptr x, mp_srcptr y, mp_size_t n)
+#else
+add2Times (z, x, y, n)
+ mp_ptr z;
+ mp_srcptr x;
+ mp_srcptr y;
+ mp_size_t n;
+#endif
+{
+ mp_ptr t;
+ mp_limb_t c;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+ t = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ c = mpn_lshift (t, y, n, 1);
+ c += mpn_add_n (z, x, t, n);
+ TMP_FREE (marker);
+ return c;
+}
+#else
+
+static mp_limb_t
+#if __STDC__
+add2Times (mp_ptr z, mp_srcptr x, mp_srcptr y, mp_size_t n)
+#else
+add2Times (z, x, y, n)
+ mp_ptr z;
+ mp_srcptr x;
+ mp_srcptr y;
+ mp_size_t n;
+#endif
+{
+ mp_limb_t c, v, w;
+
+ ASSERT (n > 0);
+ v = *x; w = *y;
+ c = w >> (BITS_PER_MP_LIMB - 1);
+ w <<= 1;
+ v += w;
+ c += v < w;
+ *z = v;
+ ++x; ++y; ++z;
+ while (--n)
+ {
+ v = *x;
+ w = *y;
+ v += c;
+ c = v < c;
+ c += w >> (BITS_PER_MP_LIMB - 1);
+ w <<= 1;
+ v += w;
+ c += v < w;
+ *z = v;
+ ++x; ++y; ++z;
+ }
+
+ return c;
+}
+#endif
+
+/*-- evaluate3 -------------------------------------------------------------*/
+
+/* Evaluates:
+ * ph := 4*A+2*B+C
+ * p1 := A+B+C
+ * p2 := A+2*B+4*C
+ * where:
+ * ph[], p1[], p2[], A[] and B[] all have length len,
+ * C[] has length len2 with len-len2 = 0, 1 or 2.
+ * Returns top words (overflow) at pth, pt1 and pt2 respectively.
+ */
+#ifdef USE_MORE_MPN
+static void
+#if __STDC__
+evaluate3 (mp_ptr ph, mp_ptr p1, mp_ptr p2, mp_ptr pth, mp_ptr pt1, mp_ptr pt2,
+ mp_srcptr A, mp_srcptr B, mp_srcptr C, mp_size_t len, mp_size_t len2)
+#else
+evaluate3 (ph, p1, p2, pth, pt1, pt2,
+ A, B, C, len, len2)
+ mp_ptr ph;
+ mp_ptr p1;
+ mp_ptr p2;
+ mp_ptr pth;
+ mp_ptr pt1;
+ mp_ptr pt2;
+ mp_srcptr A;
+ mp_srcptr B;
+ mp_srcptr C;
+ mp_size_t len;
+ mp_size_t len2;
+#endif
+{
+ mp_limb_t c, d, e;
+
+ ASSERT (len - len2 <= 2);
+
+ e = mpn_lshift (p1, B, len, 1);
+
+ c = mpn_lshift (ph, A, len, 2);
+ c += e + mpn_add_n (ph, ph, p1, len);
+ d = mpn_add_n (ph, ph, C, len2);
+ if (len2 == len) c += d; else c += mpn_add_1 (ph + len2, ph + len2, len-len2, d);
+ ASSERT (c < 7);
+ *pth = c;
+
+ c = mpn_lshift (p2, C, len2, 2);
+#if 1
+ if (len2 != len) { p2[len-1] = 0; p2[len2] = c; c = 0; }
+ c += e + mpn_add_n (p2, p2, p1, len);
+#else
+ d = mpn_add_n (p2, p2, p1, len2);
+ c += d;
+ if (len2 != len) c = mpn_add_1 (p2+len2, p1+len2, len-len2, c);
+ c += e;
+#endif
+ c += mpn_add_n (p2, p2, A, len);
+ ASSERT (c < 7);
+ *pt2 = c;
+
+ c = mpn_add_n (p1, A, B, len);
+ d = mpn_add_n (p1, p1, C, len2);
+ if (len2 == len) c += d;
+ else c += mpn_add_1 (p1+len2, p1+len2, len-len2, d);
+ ASSERT (c < 3);
+ *pt1 = c;
+
+}
+
+#else
+
+static void
+#if __STDC__
+evaluate3 (mp_ptr ph, mp_ptr p1, mp_ptr p2, mp_ptr pth, mp_ptr pt1, mp_ptr pt2,
+ mp_srcptr A, mp_srcptr B, mp_srcptr C, mp_size_t l, mp_size_t ls)
+#else
+evaluate3 (ph, p1, p2, pth, pt1, pt2,
+ A, B, C, l, ls)
+ mp_ptr ph;
+ mp_ptr p1;
+ mp_ptr p2;
+ mp_ptr pth;
+ mp_ptr pt1;
+ mp_ptr pt2;
+ mp_srcptr A;
+ mp_srcptr B;
+ mp_srcptr C;
+ mp_size_t l;
+ mp_size_t ls;
+#endif
+{
+ mp_limb_t a,b,c, i, t, th,t1,t2, vh,v1,v2;
+
+ ASSERT (l - ls <= 2);
+
+ th = t1 = t2 = 0;
+ for (i = 0; i < l; ++i)
+ {
+ a = *A;
+ b = *B;
+ c = i < ls ? *C : 0;
+
+ /* TO DO: choose one of the following alternatives. */
+#if 0
+ t = a << 2;
+ vh = th + t;
+ th = vh < t;
+ th += a >> (BITS_PER_MP_LIMB - 2);
+ t = b << 1;
+ vh += t;
+ th += vh < t;
+ th += b >> (BITS_PER_MP_LIMB - 1);
+ vh += c;
+ th += vh < c;
+#else
+ vh = th + c;
+ th = vh < c;
+ t = b << 1;
+ vh += t;
+ th += vh < t;
+ th += b >> (BITS_PER_MP_LIMB - 1);
+ t = a << 2;
+ vh += t;
+ th += vh < t;
+ th += a >> (BITS_PER_MP_LIMB - 2);
+#endif
+
+ v1 = t1 + a;
+ t1 = v1 < a;
+ v1 += b;
+ t1 += v1 < b;
+ v1 += c;
+ t1 += v1 < c;
+
+ v2 = t2 + a;
+ t2 = v2 < a;
+ t = b << 1;
+ v2 += t;
+ t2 += v2 < t;
+ t2 += b >> (BITS_PER_MP_LIMB - 1);
+ t = c << 2;
+ v2 += t;
+ t2 += v2 < t;
+ t2 += c >> (BITS_PER_MP_LIMB - 2);
+
+ *ph = vh;
+ *p1 = v1;
+ *p2 = v2;
+
+ ++A; ++B; ++C;
+ ++ph; ++p1; ++p2;
+ }
+
+ ASSERT (th < 7);
+ ASSERT (t1 < 3);
+ ASSERT (t2 < 7);
+
+ *pth = th;
+ *pt1 = t1;
+ *pt2 = t2;
+}
+#endif
+
+
+/*-- interpolate3 ----------------------------------------------------------*/
+
+/* Interpolates B, C, D (in-place) from:
+ * 16*A+8*B+4*C+2*D+E
+ * A+B+C+D+E
+ * A+2*B+4*C+8*D+16*E
+ * where:
+ * A[], B[], C[] and D[] all have length l,
+ * E[] has length ls with l-ls = 0, 2 or 4.
+ *
+ * Reads top words (from earlier overflow) from ptb, ptc and ptd,
+ * and returns new top words there.
+ */
+
+#ifdef USE_MORE_MPN
+static void
+#if __STDC__
+interpolate3 (mp_srcptr A, mp_ptr B, mp_ptr C, mp_ptr D, mp_srcptr E,
+ mp_ptr ptb, mp_ptr ptc, mp_ptr ptd, mp_size_t len, mp_size_t len2)
+#else
+interpolate3 (A, B, C, D, E,
+ ptb, ptc, ptd, len, len2)
+ mp_srcptr A;
+ mp_ptr B;
+ mp_ptr C;
+ mp_ptr D;
+ mp_srcptr E;
+ mp_ptr ptb;
+ mp_ptr ptc;
+ mp_ptr ptd;
+ mp_size_t len;
+ mp_size_t len2;
+#endif
+{
+ mp_ptr ws;
+ mp_limb_t t, tb,tc,td;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+
+ ASSERT (len - len2 == 0 || len - len2 == 2 || len - len2 == 4);
+
+ /* Let x1, x2, x3 be the values to interpolate. We have:
+ * b = 16*a + 8*x1 + 4*x2 + 2*x3 + e
+ * c = a + x1 + x2 + x3 + e
+ * d = a + 2*x1 + 4*x2 + 8*x3 + 16*e
+ */
+
+ ws = (mp_ptr) TMP_ALLOC (len * BYTES_PER_MP_LIMB);
+
+ tb = *ptb; tc = *ptc; td = *ptd;
+
+
+ /* b := b - 16*a - e
+ * c := c - a - e
+ * d := d - a - 16*e
+ */
+
+ t = mpn_lshift (ws, A, len, 4);
+ tb -= t + mpn_sub_n (B, B, ws, len);
+ t = mpn_sub_n (B, B, E, len2);
+ if (len2 == len) tb -= t;
+ else tb -= mpn_sub_1 (B+len2, B+len2, len-len2, t);
+
+ tc -= mpn_sub_n (C, C, A, len);
+ t = mpn_sub_n (C, C, E, len2);
+ if (len2 == len) tc -= t;
+ else tc -= mpn_sub_1 (C+len2, C+len2, len-len2, t);
+
+ t = mpn_lshift (ws, E, len2, 4);
+ t += mpn_add_n (ws, ws, A, len2);
+#if 1
+ if (len2 != len) t = mpn_add_1 (ws+len2, A+len2, len-len2, t);
+ td -= t + mpn_sub_n (D, D, ws, len);
+#else
+ t += mpn_sub_n (D, D, ws, len2);
+ if (len2 != len) {
+ t = mpn_sub_1 (D+len2, D+len2, len-len2, t);
+ t += mpn_sub_n (D+len2, D+len2, A+len2, len-len2);
+ } /* end if/else */
+ td -= t;
+#endif
+
+
+ /* b, d := b + d, b - d */
+
+#ifdef HAVE_MPN_ADD_SUB_N
+ /* #error TO DO ... */
+#else
+ t = tb + td + mpn_add_n (ws, B, D, len);
+ td = tb - td - mpn_sub_n (D, B, D, len);
+ tb = t;
+ MPN_COPY (B, ws, len);
+#endif
+
+ /* b := b-8*c */
+ t = 8 * tc + mpn_lshift (ws, C, len, 3);
+ tb -= t + mpn_sub_n (B, B, ws, len);
+
+ /* c := 2*c - b */
+ tc = 2 * tc + mpn_lshift (C, C, len, 1);
+ tc -= tb + mpn_sub_n (C, C, B, len);
+
+ /* d := d/3 */
+ td = (td - mpn_divexact_by3 (D, D, len)) * INVERSE_3;
+
+ /* b, d := b + d, b - d */
+#ifdef HAVE_MPN_ADD_SUB_N
+ /* #error TO DO ... */
+#else
+ t = tb + td + mpn_add_n (ws, B, D, len);
+ td = tb - td - mpn_sub_n (D, B, D, len);
+ tb = t;
+ MPN_COPY (B, ws, len);
+#endif
+
+ /* Now:
+ * b = 4*x1
+ * c = 2*x2
+ * d = 4*x3
+ */
+
+ ASSERT(!(*B & 3));
+ mpn_rshift (B, B, len, 2);
+ B[len-1] |= tb<<(BITS_PER_MP_LIMB-2);
+ ASSERT((long)tb >= 0);
+ tb >>= 2;
+
+ ASSERT(!(*C & 1));
+ mpn_rshift (C, C, len, 1);
+ C[len-1] |= tc<<(BITS_PER_MP_LIMB-1);
+ ASSERT((long)tc >= 0);
+ tc >>= 1;
+
+ ASSERT(!(*D & 3));
+ mpn_rshift (D, D, len, 2);
+ D[len-1] |= td<<(BITS_PER_MP_LIMB-2);
+ ASSERT((long)td >= 0);
+ td >>= 2;
+
+#if WANT_ASSERT
+ ASSERT (tb < 2);
+ if (len == len2)
+ {
+ ASSERT (tc < 3);
+ ASSERT (td < 2);
+ }
+ else
+ {
+ ASSERT (tc < 2);
+ ASSERT (!td);
+ }
+#endif
+
+ *ptb = tb;
+ *ptc = tc;
+ *ptd = td;
+
+ TMP_FREE (marker);
+}
+
+#else
+
+static void
+#if __STDC__
+interpolate3 (mp_srcptr A, mp_ptr B, mp_ptr C, mp_ptr D, mp_srcptr E,
+ mp_ptr ptb, mp_ptr ptc, mp_ptr ptd, mp_size_t l, mp_size_t ls)
+#else
+interpolate3 (A, B, C, D, E,
+ ptb, ptc, ptd, l, ls)
+ mp_srcptr A;
+ mp_ptr B;
+ mp_ptr C;
+ mp_ptr D;
+ mp_srcptr E;
+ mp_ptr ptb;
+ mp_ptr ptc;
+ mp_ptr ptd;
+ mp_size_t l;
+ mp_size_t ls;
+#endif
+{
+ mp_limb_t a,b,c,d,e,t, i, sb,sc,sd, ob,oc,od;
+ const mp_limb_t maskOffHalf = (~(mp_limb_t) 0) << (BITS_PER_MP_LIMB >> 1);
+
+#if WANT_ASSERT
+ t = l - ls;
+ ASSERT (t == 0 || t == 2 || t == 4);
+#endif
+
+ sb = sc = sd = 0;
+ for (i = 0; i < l; ++i)
+ {
+ mp_limb_t tb, tc, td, tt;
+
+ a = *A;
+ b = *B;
+ c = *C;
+ d = *D;
+ e = i < ls ? *E : 0;
+
+ /* Let x1, x2, x3 be the values to interpolate. We have:
+ * b = 16*a + 8*x1 + 4*x2 + 2*x3 + e
+ * c = a + x1 + x2 + x3 + e
+ * d = a + 2*x1 + 4*x2 + 8*x3 + 16*e
+ */
+
+ /* b := b - 16*a - e
+ * c := c - a - e
+ * d := d - a - 16*e
+ */
+ t = a << 4;
+ tb = -(a >> (BITS_PER_MP_LIMB - 4)) - (b < t);
+ b -= t;
+ tb -= b < e;
+ b -= e;
+ tc = -(c < a);
+ c -= a;
+ tc -= c < e;
+ c -= e;
+ td = -(d < a);
+ d -= a;
+ t = e << 4;
+ td = td - (e >> (BITS_PER_MP_LIMB - 4)) - (d < t);
+ d -= t;
+
+ /* b, d := b + d, b - d */
+ t = b + d;
+ tt = tb + td + (t < b);
+ td = tb - td - (b < d);
+ d = b - d;
+ b = t;
+ tb = tt;
+
+ /* b := b-8*c */
+ t = c << 3;
+ tb = tb - (tc << 3) - (c >> (BITS_PER_MP_LIMB - 3)) - (b < t);
+ b -= t;
+
+ /* c := 2*c - b */
+ t = c << 1;
+ tc = (tc << 1) + (c >> (BITS_PER_MP_LIMB - 1)) - tb - (t < b);
+ c = t - b;
+
+ /* d := d/3 */
+ d *= INVERSE_3;
+ td = td - (d >> (BITS_PER_MP_LIMB - 1)) - (d*3 < d);
+ td *= INVERSE_3;
+
+ /* b, d := b + d, b - d */
+ t = b + d;
+ tt = tb + td + (t < b);
+ td = tb - td - (b < d);
+ d = b - d;
+ b = t;
+ tb = tt;
+
+ /* Now:
+ * b = 4*x1
+ * c = 2*x2
+ * d = 4*x3
+ */
+
+ /* sb has period 2. */
+ b += sb;
+ tb += b < sb;
+ sb &= maskOffHalf;
+ sb |= sb >> (BITS_PER_MP_LIMB >> 1);
+ sb += tb;
+
+ /* sc has period 1. */
+ c += sc;
+ tc += c < sc;
+ /* TO DO: choose one of the following alternatives. */
+#if 1
+ sc = (mp_limb_t)((long)sc >> (BITS_PER_MP_LIMB - 1));
+ sc += tc;
+#else
+ sc = tc - ((long)sc < 0L);
+#endif
+
+ /* sd has period 2. */
+ d += sd;
+ td += d < sd;
+ sd &= maskOffHalf;
+ sd |= sd >> (BITS_PER_MP_LIMB >> 1);
+ sd += td;
+
+ if (i != 0)
+ {
+ B[-1] = ob | b << (BITS_PER_MP_LIMB - 2);
+ C[-1] = oc | c << (BITS_PER_MP_LIMB - 1);
+ D[-1] = od | d << (BITS_PER_MP_LIMB - 2);
+ }
+ ob = b >> 2;
+ oc = c >> 1;
+ od = d >> 2;
+
+ ++A; ++B; ++C; ++D; ++E;
+ }
+
+ /* Handle top words. */
+ b = *ptb;
+ c = *ptc;
+ d = *ptd;
+
+ t = b + d;
+ d = b - d;
+ b = t;
+ b -= c << 3;
+ c = (c << 1) - b;
+ d *= INVERSE_3;
+ t = b + d;
+ d = b - d;
+ b = t;
+
+ b += sb;
+ c += sc;
+ d += sd;
+
+ B[-1] = ob | b << (BITS_PER_MP_LIMB - 2);
+ C[-1] = oc | c << (BITS_PER_MP_LIMB - 1);
+ D[-1] = od | d << (BITS_PER_MP_LIMB - 2);
+
+ b >>= 2;
+ c >>= 1;
+ d >>= 2;
+
+#if WANT_ASSERT
+ ASSERT (b < 2);
+ if (l == ls)
+ {
+ ASSERT (c < 3);
+ ASSERT (d < 2);
+ }
+ else
+ {
+ ASSERT (c < 2);
+ ASSERT (!d);
+ }
+#endif
+
+ *ptb = b;
+ *ptc = c;
+ *ptd = d;
+}
+#endif
+
+
+/*-- mpn_toom3_mul_n --------------------------------------------------------------*/
+
+/* Multiplies using 5 mults of one third size and so on recursively.
+ * p[0..2*n-1] := product of a[0..n-1] and b[0..n-1].
+ * No overlap of p[...] with a[...] or b[...].
+ * ws is workspace.
+ */
+
+/* TO DO: If TOOM3_MUL_THRESHOLD is much bigger than KARATSUBA_MUL_THRESHOLD then the
+ * recursion in mpn_toom3_mul_n() will always bottom out with mpn_kara_mul_n()
+ * because the "n < KARATSUBA_MUL_THRESHOLD" test here will always be false.
+ */
+
+#define TOOM3_MUL_REC(p, a, b, n, ws) \
+ do { \
+ if (n < KARATSUBA_MUL_THRESHOLD) \
+ mpn_mul_basecase (p, a, n, b, n); \
+ else if (n < TOOM3_MUL_THRESHOLD) \
+ mpn_kara_mul_n (p, a, b, n, ws); \
+ else \
+ mpn_toom3_mul_n (p, a, b, n, ws); \
+ } while (0)
+
+void
+#if __STDC__
+mpn_toom3_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr ws)
+#else
+mpn_toom3_mul_n (p, a, b, n, ws)
+ mp_ptr p;
+ mp_srcptr a;
+ mp_srcptr b;
+ mp_size_t n;
+ mp_ptr ws;
+#endif
+{
+ mp_limb_t cB,cC,cD, dB,dC,dD, tB,tC,tD;
+ mp_limb_t *A,*B,*C,*D,*E, *W;
+ mp_size_t l,l2,l3,l4,l5,ls;
+
+ /* Break n words into chunks of size l, l and ls.
+ * n = 3*k => l = k, ls = k
+ * n = 3*k+1 => l = k+1, ls = k-1
+ * n = 3*k+2 => l = k+1, ls = k
+ */
+ {
+ mp_limb_t m;
+
+ ASSERT (n >= TOOM3_MUL_THRESHOLD);
+ l = ls = n / 3;
+ m = n - l * 3;
+ if (m != 0)
+ ++l;
+ if (m == 1)
+ --ls;
+
+ l2 = l * 2;
+ l3 = l * 3;
+ l4 = l * 4;
+ l5 = l * 5;
+ A = p;
+ B = ws;
+ C = p + l2;
+ D = ws + l2;
+ E = p + l4;
+ W = ws + l4;
+ }
+
+ /** First stage: evaluation at points 0, 1/2, 1, 2, oo. **/
+ evaluate3 (A, B, C, &cB, &cC, &cD, a, a + l, a + l2, l, ls);
+ evaluate3 (A + l, B + l, C + l, &dB, &dC, &dD, b, b + l, b + l2, l, ls);
+
+ /** Second stage: pointwise multiplies. **/
+ TOOM3_MUL_REC(D, C, C + l, l, W);
+ tD = cD*dD;
+ if (cD) tD += mpn_addmul_1 (D + l, C + l, l, cD);
+ if (dD) tD += mpn_addmul_1 (D + l, C, l, dD);
+ ASSERT (tD < 49);
+ TOOM3_MUL_REC(C, B, B + l, l, W);
+ tC = cC*dC;
+ /* TO DO: choose one of the following alternatives. */
+#if 0
+ if (cC) tC += mpn_addmul_1 (C + l, B + l, l, cC);
+ if (dC) tC += mpn_addmul_1 (C + l, B, l, dC);
+#else
+ if (cC)
+ {
+ if (cC == 1) tC += mpn_add_n (C + l, C + l, B + l, l);
+ else tC += add2Times (C + l, C + l, B + l, l);
+ }
+ if (dC)
+ {
+ if (dC == 1) tC += mpn_add_n (C + l, C + l, B, l);
+ else tC += add2Times (C + l, C + l, B, l);
+ }
+#endif
+ ASSERT (tC < 9);
+ TOOM3_MUL_REC(B, A, A + l, l, W);
+ tB = cB*dB;
+ if (cB) tB += mpn_addmul_1 (B + l, A + l, l, cB);
+ if (dB) tB += mpn_addmul_1 (B + l, A, l, dB);
+ ASSERT (tB < 49);
+ TOOM3_MUL_REC(A, a, b, l, W);
+ TOOM3_MUL_REC(E, a + l2, b + l2, ls, W);
+
+ /** Third stage: interpolation. **/
+ interpolate3 (A, B, C, D, E, &tB, &tC, &tD, l2, ls << 1);
+
+ /** Final stage: add up the coefficients. **/
+ {
+ mp_limb_t i, x, y;
+ tB += mpn_add_n (p + l, p + l, B, l2);
+ tD += mpn_add_n (p + l3, p + l3, D, l2);
+ mpn_incr_u (p + l3, tB);
+ mpn_incr_u (p + l4, tC);
+ mpn_incr_u (p + l5, tD);
+ }
+}
+
+/*-- mpn_toom3_sqr_n --------------------------------------------------------------*/
+
+/* Like previous function but for squaring */
+
+#define TOOM3_SQR_REC(p, a, n, ws) \
+ do { \
+ if (n < KARATSUBA_SQR_THRESHOLD) \
+ mpn_sqr_basecase (p, a, n); \
+ else if (n < TOOM3_SQR_THRESHOLD) \
+ mpn_kara_sqr_n (p, a, n, ws); \
+ else \
+ mpn_toom3_sqr_n (p, a, n, ws); \
+ } while (0)
+
+void
+#if __STDC__
+mpn_toom3_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n, mp_ptr ws)
+#else
+mpn_toom3_sqr_n (p, a, n, ws)
+ mp_ptr p;
+ mp_srcptr a;
+ mp_size_t n;
+ mp_ptr ws;
+#endif
+{
+ mp_limb_t cB,cC,cD, tB,tC,tD;
+ mp_limb_t *A,*B,*C,*D,*E, *W;
+ mp_size_t l,l2,l3,l4,l5,ls;
+
+ /* Break n words into chunks of size l, l and ls.
+ * n = 3*k => l = k, ls = k
+ * n = 3*k+1 => l = k+1, ls = k-1
+ * n = 3*k+2 => l = k+1, ls = k
+ */
+ {
+ mp_limb_t m;
+
+ ASSERT (n >= TOOM3_MUL_THRESHOLD);
+ l = ls = n / 3;
+ m = n - l * 3;
+ if (m != 0)
+ ++l;
+ if (m == 1)
+ --ls;
+
+ l2 = l * 2;
+ l3 = l * 3;
+ l4 = l * 4;
+ l5 = l * 5;
+ A = p;
+ B = ws;
+ C = p + l2;
+ D = ws + l2;
+ E = p + l4;
+ W = ws + l4;
+ }
+
+ /** First stage: evaluation at points 0, 1/2, 1, 2, oo. **/
+ evaluate3 (A, B, C, &cB, &cC, &cD, a, a + l, a + l2, l, ls);
+
+ /** Second stage: pointwise multiplies. **/
+ TOOM3_SQR_REC(D, C, l, W);
+ tD = cD*cD;
+ if (cD) tD += mpn_addmul_1 (D + l, C, l, 2*cD);
+ ASSERT (tD < 49);
+ TOOM3_SQR_REC(C, B, l, W);
+ tC = cC*cC;
+ /* TO DO: choose one of the following alternatives. */
+#if 0
+ if (cC) tC += mpn_addmul_1 (C + l, B, l, 2*cC);
+#else
+ if (cC >= 1)
+ {
+ tC += add2Times (C + l, C + l, B, l);
+ if (cC == 2)
+ tC += add2Times (C + l, C + l, B, l);
+ }
+#endif
+ ASSERT (tC < 9);
+ TOOM3_SQR_REC(B, A, l, W);
+ tB = cB*cB;
+ if (cB) tB += mpn_addmul_1 (B + l, A, l, 2*cB);
+ ASSERT (tB < 49);
+ TOOM3_SQR_REC(A, a, l, W);
+ TOOM3_SQR_REC(E, a + l2, ls, W);
+
+ /** Third stage: interpolation. **/
+ interpolate3 (A, B, C, D, E, &tB, &tC, &tD, l2, ls << 1);
+
+ /** Final stage: add up the coefficients. **/
+ {
+ mp_limb_t i, x, y;
+ tB += mpn_add_n (p + l, p + l, B, l2);
+ tD += mpn_add_n (p + l3, p + l3, D, l2);
+ mpn_incr_u (p + l3, tB);
+ mpn_incr_u (p + l4, tC);
+ mpn_incr_u (p + l5, tD);
+ }
+}
+
+void
+#if __STDC__
+mpn_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n)
+#else
+mpn_mul_n (p, a, b, n)
+ mp_ptr p;
+ mp_srcptr a;
+ mp_srcptr b;
+ mp_size_t n;
+#endif
+{
+ if (n < KARATSUBA_MUL_THRESHOLD)
+ mpn_mul_basecase (p, a, n, b, n);
+ else if (n < TOOM3_MUL_THRESHOLD)
+ {
+ /* Allocate workspace of fixed size on stack: fast! */
+#if TUNE_PROGRAM_BUILD
+ mp_limb_t ws[2 * (TOOM3_MUL_THRESHOLD_LIMIT-1) + 2 * BITS_PER_MP_LIMB];
+#else
+ mp_limb_t ws[2 * (TOOM3_MUL_THRESHOLD-1) + 2 * BITS_PER_MP_LIMB];
+#endif
+ mpn_kara_mul_n (p, a, b, n, ws);
+ }
+#if WANT_FFT || TUNE_PROGRAM_BUILD
+ else if (n < FFT_MUL_THRESHOLD)
+#else
+ else
+#endif
+ {
+ /* Use workspace of unknown size in heap, as stack space may
+ * be limited. Since n is at least TOOM3_MUL_THRESHOLD, the
+ * multiplication will take much longer than malloc()/free(). */
+ mp_limb_t wsLen, *ws;
+ wsLen = 2 * n + 3 * BITS_PER_MP_LIMB;
+ ws = (mp_ptr) (*_mp_allocate_func) ((size_t) wsLen * sizeof (mp_limb_t));
+ mpn_toom3_mul_n (p, a, b, n, ws);
+ (*_mp_free_func) (ws, (size_t) wsLen * sizeof (mp_limb_t));
+ }
+#if WANT_FFT || TUNE_PROGRAM_BUILD
+ else
+ {
+ mpn_mul_fft_full (p, a, n, b, n);
+ }
+#endif
+}
diff --git a/rts/gmp/mpn/generic/perfsqr.c b/rts/gmp/mpn/generic/perfsqr.c
new file mode 100644
index 0000000000..42ee3405d7
--- /dev/null
+++ b/rts/gmp/mpn/generic/perfsqr.c
@@ -0,0 +1,123 @@
+/* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square,
+ zero otherwise.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* sq_res_0x100[x mod 0x100] == 1 iff x mod 0x100 is a quadratic residue
+ modulo 0x100. */
+static unsigned char const sq_res_0x100[0x100] =
+{
+ 1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+ 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+ 1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+ 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+ 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+ 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+ 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+ 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+};
+
+int
+#if __STDC__
+mpn_perfect_square_p (mp_srcptr up, mp_size_t usize)
+#else
+mpn_perfect_square_p (up, usize)
+ mp_srcptr up;
+ mp_size_t usize;
+#endif
+{
+ mp_limb_t rem;
+ mp_ptr root_ptr;
+ int res;
+ TMP_DECL (marker);
+
+ /* The first test excludes 55/64 (85.9%) of the perfect square candidates
+ in O(1) time. */
+ if ((sq_res_0x100[(unsigned int) up[0] % 0x100] & 1) == 0)
+ return 0;
+
+#if defined (PP)
+ /* The second test excludes 30652543/30808063 (99.5%) of the remaining
+ perfect square candidates in O(n) time. */
+
+ /* Firstly, compute REM = A mod PP. */
+ if (UDIV_TIME > (2 * UMUL_TIME + 6))
+ rem = mpn_preinv_mod_1 (up, usize, (mp_limb_t) PP, (mp_limb_t) PP_INVERTED);
+ else
+ rem = mpn_mod_1 (up, usize, (mp_limb_t) PP);
+
+ /* Now decide if REM is a quadratic residue modulo the factors in PP. */
+
+ /* If A is just a few limbs, computing the square root does not take long
+ time, so things might run faster if we limit this loop according to the
+ size of A. */
+
+#if BITS_PER_MP_LIMB == 64
+ if (((CNST_LIMB(0x12DD703303AED3) >> rem % 53) & 1) == 0)
+ return 0;
+ if (((CNST_LIMB(0x4351B2753DF) >> rem % 47) & 1) == 0)
+ return 0;
+ if (((CNST_LIMB(0x35883A3EE53) >> rem % 43) & 1) == 0)
+ return 0;
+ if (((CNST_LIMB(0x1B382B50737) >> rem % 41) & 1) == 0)
+ return 0;
+ if (((CNST_LIMB(0x165E211E9B) >> rem % 37) & 1) == 0)
+ return 0;
+ if (((CNST_LIMB(0x121D47B7) >> rem % 31) & 1) == 0)
+ return 0;
+#endif
+ if (((0x13D122F3L >> rem % 29) & 1) == 0)
+ return 0;
+ if (((0x5335FL >> rem % 23) & 1) == 0)
+ return 0;
+ if (((0x30AF3L >> rem % 19) & 1) == 0)
+ return 0;
+ if (((0x1A317L >> rem % 17) & 1) == 0)
+ return 0;
+ if (((0x161BL >> rem % 13) & 1) == 0)
+ return 0;
+ if (((0x23BL >> rem % 11) & 1) == 0)
+ return 0;
+ if (((0x017L >> rem % 7) & 1) == 0)
+ return 0;
+ if (((0x13L >> rem % 5) & 1) == 0)
+ return 0;
+ if (((0x3L >> rem % 3) & 1) == 0)
+ return 0;
+#endif
+
+ TMP_MARK (marker);
+
+ /* For the third and last test, we finally compute the square root,
+ to make sure we've really got a perfect square. */
+ root_ptr = (mp_ptr) TMP_ALLOC ((usize + 1) / 2 * BYTES_PER_MP_LIMB);
+
+ /* Iff mpn_sqrtrem returns zero, the square is perfect. */
+ res = ! mpn_sqrtrem (root_ptr, NULL, up, usize);
+ TMP_FREE (marker);
+ return res;
+}
diff --git a/rts/gmp/mpn/generic/popcount.c b/rts/gmp/mpn/generic/popcount.c
new file mode 100644
index 0000000000..387be9536d
--- /dev/null
+++ b/rts/gmp/mpn/generic/popcount.c
@@ -0,0 +1,93 @@
+/* popcount.c
+
+Copyright (C) 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if defined __GNUC__
+/* No processor claiming to be SPARC v9 compliant seem to
+ implement the POPC instruction. Disable pattern for now. */
+#if 0 && defined __sparc_v9__ && BITS_PER_MP_LIMB == 64
+#define popc_limb(a) \
+ ({ \
+ DItype __res; \
+ asm ("popc %1,%0" : "=r" (__res) : "rI" (a)); \
+ __res; \
+ })
+#endif
+#endif
+
+#ifndef popc_limb
+
+/* Cool population count of a mp_limb_t.
+ You have to figure out how this works, I won't tell you! */
+
+static inline unsigned int
+#if __STDC__
+popc_limb (mp_limb_t x)
+#else
+popc_limb (x)
+ mp_limb_t x;
+#endif
+{
+#if BITS_PER_MP_LIMB == 64
+ /* We have to go into some trouble to define these constants.
+ (For mp_limb_t being `long long'.) */
+ mp_limb_t cnst;
+ cnst = 0xaaaaaaaaL | ((mp_limb_t) 0xaaaaaaaaL << BITS_PER_MP_LIMB/2);
+ x -= (x & cnst) >> 1;
+ cnst = 0x33333333L | ((mp_limb_t) 0x33333333L << BITS_PER_MP_LIMB/2);
+ x = ((x & ~cnst) >> 2) + (x & cnst);
+ cnst = 0x0f0f0f0fL | ((mp_limb_t) 0x0f0f0f0fL << BITS_PER_MP_LIMB/2);
+ x = ((x >> 4) + x) & cnst;
+ x = ((x >> 8) + x);
+ x = ((x >> 16) + x);
+ x = ((x >> 32) + x) & 0xff;
+#endif
+#if BITS_PER_MP_LIMB == 32
+ x -= (x & 0xaaaaaaaa) >> 1;
+ x = ((x >> 2) & 0x33333333L) + (x & 0x33333333L);
+ x = ((x >> 4) + x) & 0x0f0f0f0fL;
+ x = ((x >> 8) + x);
+ x = ((x >> 16) + x) & 0xff;
+#endif
+ return x;
+}
+#endif
+
+unsigned long int
+#if __STDC__
+mpn_popcount (register mp_srcptr p, register mp_size_t size)
+#else
+mpn_popcount (p, size)
+ register mp_srcptr p;
+ register mp_size_t size;
+#endif
+{
+ unsigned long int popcnt;
+ mp_size_t i;
+
+ popcnt = 0;
+ for (i = 0; i < size; i++)
+ popcnt += popc_limb (p[i]);
+
+ return popcnt;
+}
diff --git a/rts/gmp/mpn/generic/pre_mod_1.c b/rts/gmp/mpn/generic/pre_mod_1.c
new file mode 100644
index 0000000000..27179683b3
--- /dev/null
+++ b/rts/gmp/mpn/generic/pre_mod_1.c
@@ -0,0 +1,69 @@
+/* mpn_preinv_mod_1 (dividend_ptr, dividend_size, divisor_limb,
+ divisor_limb_inverted) --
+ Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by the normalized DIVISOR_LIMB.
+ DIVISOR_LIMB_INVERTED should be 2^(2*BITS_PER_MP_LIMB) / DIVISOR_LIMB +
+ - 2^BITS_PER_MP_LIMB.
+ Return the single-limb remainder.
+
+Copyright (C) 1991, 1993, 1994, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 1
+#endif
+
+#ifndef UDIV_TIME
+#define UDIV_TIME UMUL_TIME
+#endif
+
+mp_limb_t
+#if __STDC__
+mpn_preinv_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size,
+ mp_limb_t divisor_limb, mp_limb_t divisor_limb_inverted)
+#else
+mpn_preinv_mod_1 (dividend_ptr, dividend_size, divisor_limb, divisor_limb_inverted)
+ mp_srcptr dividend_ptr;
+ mp_size_t dividend_size;
+ mp_limb_t divisor_limb;
+ mp_limb_t divisor_limb_inverted;
+#endif
+{
+ mp_size_t i;
+ mp_limb_t n0, r;
+ int dummy;
+
+ i = dividend_size - 1;
+ r = dividend_ptr[i];
+
+ if (r >= divisor_limb)
+ r = 0;
+ else
+ i--;
+
+ for (; i >= 0; i--)
+ {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd_preinv (dummy, r, r, n0, divisor_limb, divisor_limb_inverted);
+ }
+ return r;
+}
diff --git a/rts/gmp/mpn/generic/random.c b/rts/gmp/mpn/generic/random.c
new file mode 100644
index 0000000000..dea4e20e56
--- /dev/null
+++ b/rts/gmp/mpn/generic/random.c
@@ -0,0 +1,43 @@
+/* mpn_random -- Generate random numbers.
+
+Copyright (C) 1996, 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "urandom.h"
+
+void
+#if __STDC__
+mpn_random (mp_ptr res_ptr, mp_size_t size)
+#else
+mpn_random (res_ptr, size)
+ mp_ptr res_ptr;
+ mp_size_t size;
+#endif
+{
+ mp_size_t i;
+
+ for (i = 0; i < size; i++)
+ res_ptr[i] = urandom ();
+
+ /* Make sure the most significant limb is non-zero. */
+ while (res_ptr[size - 1] == 0)
+ res_ptr[size - 1] = urandom ();
+}
diff --git a/rts/gmp/mpn/generic/random2.c b/rts/gmp/mpn/generic/random2.c
new file mode 100644
index 0000000000..86682f81fa
--- /dev/null
+++ b/rts/gmp/mpn/generic/random2.c
@@ -0,0 +1,105 @@
+/* mpn_random2 -- Generate random numbers with relatively long strings
+ of ones and zeroes. Suitable for border testing.
+
+Copyright (C) 1992, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if defined (__hpux) || defined (__alpha) || defined (__svr4__) || defined (__SVR4)
+/* HPUX lacks random(). DEC OSF/1 1.2 random() returns a double. */
+long mrand48 ();
+static inline long
+random ()
+{
+ return mrand48 ();
+}
+#elif defined(_WIN32) && !(defined(__CYGWIN__) || defined(__CYGWIN32__))
+/* MS CRT supplies just the poxy rand(), with an upper bound of 0x7fff */
+static inline unsigned long
+random ()
+{
+ return rand () ^ (rand () << 16) ^ (rand() << 32);
+}
+
+#else
+long random ();
+#endif
+
+/* It's a bit tricky to get this right, so please test the code well
+ if you hack with it. Some early versions of the function produced
+ random numbers with the leading limb == 0, and some versions never
+ made the most significant bit set. */
+
+void
+#if __STDC__
+mpn_random2 (mp_ptr res_ptr, mp_size_t size)
+#else
+mpn_random2 (res_ptr, size)
+ mp_ptr res_ptr;
+ mp_size_t size;
+#endif
+{
+ int n_bits;
+ int bit_pos;
+ mp_size_t limb_pos;
+ unsigned int ran;
+ mp_limb_t limb;
+
+ limb = 0;
+
+ /* Start off in a random bit position in the most significant limb. */
+ bit_pos = random () & (BITS_PER_MP_LIMB - 1);
+
+ /* Least significant bit of RAN chooses string of ones/string of zeroes.
+ Make most significant limb be non-zero by setting bit 0 of RAN. */
+ ran = random () | 1;
+
+ for (limb_pos = size - 1; limb_pos >= 0; )
+ {
+ n_bits = (ran >> 1) % BITS_PER_MP_LIMB + 1;
+ if ((ran & 1) != 0)
+ {
+ /* Generate a string of ones. */
+ if (n_bits >= bit_pos)
+ {
+ res_ptr[limb_pos--] = limb | ((((mp_limb_t) 2) << bit_pos) - 1);
+ bit_pos += BITS_PER_MP_LIMB;
+ limb = (~(mp_limb_t) 0) << (bit_pos - n_bits);
+ }
+ else
+ {
+ limb |= ((((mp_limb_t) 1) << n_bits) - 1) << (bit_pos - n_bits + 1);
+ }
+ }
+ else
+ {
+ /* Generate a string of zeroes. */
+ if (n_bits >= bit_pos)
+ {
+ res_ptr[limb_pos--] = limb;
+ limb = 0;
+ bit_pos += BITS_PER_MP_LIMB;
+ }
+ }
+ bit_pos -= n_bits;
+ ran = random ();
+ }
+}
diff --git a/rts/gmp/mpn/generic/rshift.c b/rts/gmp/mpn/generic/rshift.c
new file mode 100644
index 0000000000..59caf73529
--- /dev/null
+++ b/rts/gmp/mpn/generic/rshift.c
@@ -0,0 +1,88 @@
+/* mpn_rshift -- Shift right a low-level natural-number integer.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right
+ and store the USIZE least significant limbs of the result at WP.
+ The bits shifted out to the right are returned.
+
+ Argument constraints:
+ 1. 0 < CNT < BITS_PER_MP_LIMB
+ 2. If the result is to be written over the input, WP must be <= UP.
+*/
+
+mp_limb_t
+#if __STDC__
+mpn_rshift (register mp_ptr wp,
+ register mp_srcptr up, mp_size_t usize,
+ register unsigned int cnt)
+#else
+mpn_rshift (wp, up, usize, cnt)
+ register mp_ptr wp;
+ register mp_srcptr up;
+ mp_size_t usize;
+ register unsigned int cnt;
+#endif
+{
+ register mp_limb_t high_limb, low_limb;
+ register unsigned sh_1, sh_2;
+ register mp_size_t i;
+ mp_limb_t retval;
+
+#ifdef DEBUG
+ if (usize == 0 || cnt == 0)
+ abort ();
+#endif
+
+ sh_1 = cnt;
+
+#if 0
+ if (sh_1 == 0)
+ {
+ if (wp != up)
+ {
+ /* Copy from low end to high end, to allow specified input/output
+ overlapping. */
+ for (i = 0; i < usize; i++)
+ wp[i] = up[i];
+ }
+ return usize;
+ }
+#endif
+
+ wp -= 1;
+ sh_2 = BITS_PER_MP_LIMB - sh_1;
+ high_limb = up[0];
+ retval = high_limb << sh_2;
+ low_limb = high_limb;
+
+ for (i = 1; i < usize; i++)
+ {
+ high_limb = up[i];
+ wp[i] = (low_limb >> sh_1) | (high_limb << sh_2);
+ low_limb = high_limb;
+ }
+ wp[i] = low_limb >> sh_1;
+
+ return retval;
+}
diff --git a/rts/gmp/mpn/generic/sb_divrem_mn.c b/rts/gmp/mpn/generic/sb_divrem_mn.c
new file mode 100644
index 0000000000..a269e34f5f
--- /dev/null
+++ b/rts/gmp/mpn/generic/sb_divrem_mn.c
@@ -0,0 +1,201 @@
+/* mpn_sb_divrem_mn -- Divide natural numbers, producing both remainder and
+ quotient.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
+ INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
+ IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
+ FUTURE GNU MP RELEASE.
+
+
+Copyright (C) 1993, 1994, 1995, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
+ the NSIZE-DSIZE least significant quotient limbs at QP
+ and the DSIZE long remainder at NP. If QEXTRA_LIMBS is
+ non-zero, generate that many fraction bits and append them after the
+ other quotient limbs.
+ Return the most significant limb of the quotient, this is always 0 or 1.
+
+ Preconditions:
+ 0. NSIZE >= DSIZE.
+ 1. The most significant bit of the divisor must be set.
+ 2. QP must either not overlap with the input operands at all, or
+ QP + DSIZE >= NP must hold true. (This means that it's
+ possible to put the quotient in the high part of NUM, right after the
+ remainder in NUM.
+ 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
+ 4. DSIZE >= 2. */
+
+
+#define PREINVERT_VIABLE \
+ (UDIV_TIME > 2 * UMUL_TIME + 6 /* && ! TARGET_REGISTER_STARVED */)
+
+mp_limb_t
+#if __STDC__
+mpn_sb_divrem_mn (mp_ptr qp,
+ mp_ptr np, mp_size_t nsize,
+ mp_srcptr dp, mp_size_t dsize)
+#else
+mpn_sb_divrem_mn (qp, np, nsize, dp, dsize)
+ mp_ptr qp;
+ mp_ptr np;
+ mp_size_t nsize;
+ mp_srcptr dp;
+ mp_size_t dsize;
+#endif
+{
+ mp_limb_t most_significant_q_limb = 0;
+ mp_size_t i;
+ mp_limb_t dx, d1, n0;
+ mp_limb_t dxinv;
+ int have_preinv;
+
+ ASSERT_ALWAYS (dsize > 2);
+
+ np += nsize - dsize;
+ dx = dp[dsize - 1];
+ d1 = dp[dsize - 2];
+ n0 = np[dsize - 1];
+
+ if (n0 >= dx)
+ {
+ if (n0 > dx || mpn_cmp (np, dp, dsize - 1) >= 0)
+ {
+ mpn_sub_n (np, np, dp, dsize);
+ most_significant_q_limb = 1;
+ }
+ }
+
+ /* If multiplication is much faster than division, preinvert the
+ most significant divisor limb before entering the loop. */
+ if (PREINVERT_VIABLE)
+ {
+ have_preinv = 0;
+ if ((UDIV_TIME - (2 * UMUL_TIME + 6)) * (nsize - dsize) > UDIV_TIME)
+ {
+ invert_limb (dxinv, dx);
+ have_preinv = 1;
+ }
+ }
+
+ for (i = nsize - dsize - 1; i >= 0; i--)
+ {
+ mp_limb_t q;
+ mp_limb_t nx;
+ mp_limb_t cy_limb;
+
+ nx = np[dsize - 1];
+ np--;
+
+ if (nx == dx)
+ {
+ /* This might over-estimate q, but it's probably not worth
+ the extra code here to find out. */
+ q = ~(mp_limb_t) 0;
+
+#if 1
+ cy_limb = mpn_submul_1 (np, dp, dsize, q);
+#else
+ /* This should be faster on many machines */
+ cy_limb = mpn_sub_n (np + 1, np + 1, dp, dsize);
+ cy = mpn_add_n (np, np, dp, dsize);
+ np[dsize] += cy;
+#endif
+
+ if (nx != cy_limb)
+ {
+ mpn_add_n (np, np, dp, dsize);
+ q--;
+ }
+
+ qp[i] = q;
+ }
+ else
+ {
+ mp_limb_t rx, r1, r0, p1, p0;
+
+ /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register
+ usage when np[dsize-1] is used in an asm statement like
+ umul_ppmm in udiv_qrnnd_preinv. The symptom is seg faults due
+ to registers being clobbered. gcc 2.95 i386 doesn't have the
+ problem. */
+ {
+ mp_limb_t workaround = np[dsize - 1];
+ if (PREINVERT_VIABLE && have_preinv)
+ udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv);
+ else
+ udiv_qrnnd (q, r1, nx, workaround, dx);
+ }
+ umul_ppmm (p1, p0, d1, q);
+
+ r0 = np[dsize - 2];
+ rx = 0;
+ if (r1 < p1 || (r1 == p1 && r0 < p0))
+ {
+ p1 -= p0 < d1;
+ p0 -= d1;
+ q--;
+ r1 += dx;
+ rx = r1 < dx;
+ }
+
+ p1 += r0 < p0; /* cannot carry! */
+ rx -= r1 < p1; /* may become 11..1 if q is still too large */
+ r1 -= p1;
+ r0 -= p0;
+
+ cy_limb = mpn_submul_1 (np, dp, dsize - 2, q);
+
+ {
+ mp_limb_t cy1, cy2;
+ cy1 = r0 < cy_limb;
+ r0 -= cy_limb;
+ cy2 = r1 < cy1;
+ r1 -= cy1;
+ np[dsize - 1] = r1;
+ np[dsize - 2] = r0;
+ if (cy2 != rx)
+ {
+ mpn_add_n (np, np, dp, dsize);
+ q--;
+ }
+ }
+ qp[i] = q;
+ }
+ }
+
+ /* ______ ______ ______
+ |__rx__|__r1__|__r0__| partial remainder
+ ______ ______
+ - |__p1__|__p0__| partial product to subtract
+ ______ ______
+ - |______|cylimb|
+
+ rx is -1, 0 or 1. If rx=1, then q is correct (it should match
+ carry out). If rx=-1 then q is too large. If rx=0, then q might
+ be too large, but it is most likely correct.
+ */
+
+ return most_significant_q_limb;
+}
diff --git a/rts/gmp/mpn/generic/scan0.c b/rts/gmp/mpn/generic/scan0.c
new file mode 100644
index 0000000000..96f05ce854
--- /dev/null
+++ b/rts/gmp/mpn/generic/scan0.c
@@ -0,0 +1,62 @@
+/* mpn_scan0 -- Scan from a given bit position for the next clear bit.
+
+Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Design issues:
+ 1. What if starting_bit is not within U? Caller's problem?
+ 2. Bit index should be 'unsigned'?
+
+ Argument constraints:
+ 1. U must sooner ot later have a limb with a clear bit.
+ */
+
+unsigned long int
+#if __STDC__
+mpn_scan0 (register mp_srcptr up,
+ register unsigned long int starting_bit)
+#else
+mpn_scan0 (up, starting_bit)
+ register mp_srcptr up;
+ register unsigned long int starting_bit;
+#endif
+{
+ mp_size_t starting_word;
+ mp_limb_t alimb;
+ int cnt;
+ mp_srcptr p;
+
+ /* Start at the word implied by STARTING_BIT. */
+ starting_word = starting_bit / BITS_PER_MP_LIMB;
+ p = up + starting_word;
+ alimb = ~*p++;
+
+ /* Mask off any bits before STARTING_BIT in the first limb. */
+ alimb &= - (mp_limb_t) 1 << (starting_bit % BITS_PER_MP_LIMB);
+
+ while (alimb == 0)
+ alimb = ~*p++;
+
+ count_leading_zeros (cnt, alimb & -alimb);
+ return (p - up) * BITS_PER_MP_LIMB - 1 - cnt;
+}
diff --git a/rts/gmp/mpn/generic/scan1.c b/rts/gmp/mpn/generic/scan1.c
new file mode 100644
index 0000000000..98e2e0dcc0
--- /dev/null
+++ b/rts/gmp/mpn/generic/scan1.c
@@ -0,0 +1,62 @@
+/* mpn_scan1 -- Scan from a given bit position for the next set bit.
+
+Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Design issues:
+ 1. What if starting_bit is not within U? Caller's problem?
+ 2. Bit index should be 'unsigned'?
+
+ Argument constraints:
+ 1. U must sooner ot later have a limb != 0.
+ */
+
+unsigned long int
+#if __STDC__
+mpn_scan1 (register mp_srcptr up,
+ register unsigned long int starting_bit)
+#else
+mpn_scan1 (up, starting_bit)
+ register mp_srcptr up;
+ register unsigned long int starting_bit;
+#endif
+{
+ mp_size_t starting_word;
+ mp_limb_t alimb;
+ int cnt;
+ mp_srcptr p;
+
+ /* Start at the word implied by STARTING_BIT. */
+ starting_word = starting_bit / BITS_PER_MP_LIMB;
+ p = up + starting_word;
+ alimb = *p++;
+
+ /* Mask off any bits before STARTING_BIT in the first limb. */
+ alimb &= - (mp_limb_t) 1 << (starting_bit % BITS_PER_MP_LIMB);
+
+ while (alimb == 0)
+ alimb = *p++;
+
+ count_leading_zeros (cnt, alimb & -alimb);
+ return (p - up) * BITS_PER_MP_LIMB - 1 - cnt;
+}
diff --git a/rts/gmp/mpn/generic/set_str.c b/rts/gmp/mpn/generic/set_str.c
new file mode 100644
index 0000000000..e6ccc92154
--- /dev/null
+++ b/rts/gmp/mpn/generic/set_str.c
@@ -0,0 +1,159 @@
+/* mpn_set_str (mp_ptr res_ptr, const char *str, size_t str_len, int base)
+ -- Convert a STR_LEN long base BASE byte string pointed to by STR to a
+ limb vector pointed to by RES_PTR. Return the number of limbs in
+ RES_PTR.
+
+Copyright (C) 1991, 1992, 1993, 1994, 1996, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_size_t
+#if __STDC__
+mpn_set_str (mp_ptr xp, const unsigned char *str, size_t str_len, int base)
+#else
+mpn_set_str (xp, str, str_len, base)
+ mp_ptr xp;
+ const unsigned char *str;
+ size_t str_len;
+ int base;
+#endif
+{
+ mp_size_t size;
+ mp_limb_t big_base;
+ int indigits_per_limb;
+ mp_limb_t res_digit;
+
+ big_base = __mp_bases[base].big_base;
+ indigits_per_limb = __mp_bases[base].chars_per_limb;
+
+/* size = str_len / indigits_per_limb + 1; */
+
+ size = 0;
+
+ if ((base & (base - 1)) == 0)
+ {
+ /* The base is a power of 2. Read the input string from
+ least to most significant character/digit. */
+
+ const unsigned char *s;
+ int next_bitpos;
+ int bits_per_indigit = big_base;
+
+ res_digit = 0;
+ next_bitpos = 0;
+
+ for (s = str + str_len - 1; s >= str; s--)
+ {
+ int inp_digit = *s;
+
+ res_digit |= (mp_limb_t) inp_digit << next_bitpos;
+ next_bitpos += bits_per_indigit;
+ if (next_bitpos >= BITS_PER_MP_LIMB)
+ {
+ xp[size++] = res_digit;
+ next_bitpos -= BITS_PER_MP_LIMB;
+ res_digit = inp_digit >> (bits_per_indigit - next_bitpos);
+ }
+ }
+
+ if (res_digit != 0)
+ xp[size++] = res_digit;
+ }
+ else
+ {
+ /* General case. The base is not a power of 2. */
+
+ size_t i;
+ int j;
+ mp_limb_t cy_limb;
+
+ for (i = indigits_per_limb; i < str_len; i += indigits_per_limb)
+ {
+ res_digit = *str++;
+ if (base == 10)
+ { /* This is a common case.
+ Help the compiler to avoid multiplication. */
+ for (j = 1; j < indigits_per_limb; j++)
+ res_digit = res_digit * 10 + *str++;
+ }
+ else
+ {
+ for (j = 1; j < indigits_per_limb; j++)
+ res_digit = res_digit * base + *str++;
+ }
+
+ if (size == 0)
+ {
+ if (res_digit != 0)
+ {
+ xp[0] = res_digit;
+ size = 1;
+ }
+ }
+ else
+ {
+ cy_limb = mpn_mul_1 (xp, xp, size, big_base);
+ cy_limb += mpn_add_1 (xp, xp, size, res_digit);
+ if (cy_limb != 0)
+ xp[size++] = cy_limb;
+ }
+ }
+
+ big_base = base;
+ res_digit = *str++;
+ if (base == 10)
+ { /* This is a common case.
+ Help the compiler to avoid multiplication. */
+ for (j = 1; j < str_len - (i - indigits_per_limb); j++)
+ {
+ res_digit = res_digit * 10 + *str++;
+ big_base *= 10;
+ }
+ }
+ else
+ {
+ for (j = 1; j < str_len - (i - indigits_per_limb); j++)
+ {
+ res_digit = res_digit * base + *str++;
+ big_base *= base;
+ }
+ }
+
+ if (size == 0)
+ {
+ if (res_digit != 0)
+ {
+ xp[0] = res_digit;
+ size = 1;
+ }
+ }
+ else
+ {
+ cy_limb = mpn_mul_1 (xp, xp, size, big_base);
+ cy_limb += mpn_add_1 (xp, xp, size, res_digit);
+ if (cy_limb != 0)
+ xp[size++] = cy_limb;
+ }
+ }
+
+ return size;
+}
diff --git a/rts/gmp/mpn/generic/sqr_basecase.c b/rts/gmp/mpn/generic/sqr_basecase.c
new file mode 100644
index 0000000000..760258a3e0
--- /dev/null
+++ b/rts/gmp/mpn/generic/sqr_basecase.c
@@ -0,0 +1,83 @@
+/* mpn_sqr_basecase -- Internal routine to square two natural numbers
+ of length m and n.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+
+Copyright (C) 1991, 1992, 1993, 1994, 1996, 1997, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+#if __STDC__
+mpn_sqr_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t n)
+#else
+mpn_sqr_basecase (prodp, up, n)
+ mp_ptr prodp;
+ mp_srcptr up;
+ mp_size_t n;
+#endif
+{
+ mp_size_t i;
+
+ {
+ /* N.B.! We need the superfluous indirection through argh to work around
+ a reloader bug in GCC 2.7.*. */
+ mp_limb_t x;
+ mp_limb_t argh;
+ x = up[0];
+ umul_ppmm (argh, prodp[0], x, x);
+ prodp[1] = argh;
+ }
+ if (n > 1)
+ {
+ mp_limb_t tarr[2 * KARATSUBA_SQR_THRESHOLD];
+ mp_ptr tp = tarr;
+ mp_limb_t cy;
+
+ /* must fit 2*n limbs in tarr */
+ ASSERT (n <= KARATSUBA_SQR_THRESHOLD);
+
+ cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
+ tp[n - 1] = cy;
+ for (i = 2; i < n; i++)
+ {
+ mp_limb_t cy;
+ cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
+ tp[n + i - 2] = cy;
+ }
+ for (i = 1; i < n; i++)
+ {
+ mp_limb_t x;
+ x = up[i];
+ umul_ppmm (prodp[2 * i + 1], prodp[2 * i], x, x);
+ }
+ {
+ mp_limb_t cy;
+ cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
+ cy += mpn_add_n (prodp + 1, prodp + 1, tp, 2 * n - 2);
+ prodp[2 * n - 1] += cy;
+ }
+ }
+}
diff --git a/rts/gmp/mpn/generic/sqrtrem.c b/rts/gmp/mpn/generic/sqrtrem.c
new file mode 100644
index 0000000000..ee3b5144dd
--- /dev/null
+++ b/rts/gmp/mpn/generic/sqrtrem.c
@@ -0,0 +1,509 @@
+/* mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
+
+ Write the square root of {OP_PTR, OP_SIZE} at ROOT_PTR.
+ Write the remainder at REM_PTR, if REM_PTR != NULL.
+ Return the size of the remainder.
+ (The size of the root is always half of the size of the operand.)
+
+ OP_PTR and ROOT_PTR may not point to the same object.
+ OP_PTR and REM_PTR may point to the same object.
+
+ If REM_PTR is NULL, only the root is computed and the return value of
+ the function is 0 if OP is a perfect square, and *any* non-zero number
+ otherwise.
+
+Copyright (C) 1993, 1994, 1996, 1997, 1998, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/* This code is just correct if "unsigned char" has at least 8 bits. It
+ doesn't help to use CHAR_BIT from limits.h, as the real problem is
+ the static arrays. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Square root algorithm:
+
+ 1. Shift OP (the input) to the left an even number of bits s.t. there
+ are an even number of words and either (or both) of the most
+ significant bits are set. This way, sqrt(OP) has exactly half as
+ many words as OP, and has its most significant bit set.
+
+ 2. Get a 9-bit approximation to sqrt(OP) using the pre-computed tables.
+ This approximation is used for the first single-precision
+ iterations of Newton's method, yielding a full-word approximation
+ to sqrt(OP).
+
+ 3. Perform multiple-precision Newton iteration until we have the
+ exact result. Only about half of the input operand is used in
+ this calculation, as the square root is perfectly determinable
+ from just the higher half of a number. */
+
+/* Define this macro for IEEE P854 machines with a fast sqrt instruction. */
+#if defined __GNUC__ && ! defined __SOFT_FLOAT
+
+#if defined (__sparc__) && BITS_PER_MP_LIMB == 32
+#define SQRT(a) \
+ ({ \
+ double __sqrt_res; \
+ asm ("fsqrtd %1,%0" : "=f" (__sqrt_res) : "f" (a)); \
+ __sqrt_res; \
+ })
+#endif
+
+#if defined (__HAVE_68881__)
+#define SQRT(a) \
+ ({ \
+ double __sqrt_res; \
+ asm ("fsqrtx %1,%0" : "=f" (__sqrt_res) : "f" (a)); \
+ __sqrt_res; \
+ })
+#endif
+
+#if defined (__hppa) && BITS_PER_MP_LIMB == 32
+#define SQRT(a) \
+ ({ \
+ double __sqrt_res; \
+ asm ("fsqrt,dbl %1,%0" : "=fx" (__sqrt_res) : "fx" (a)); \
+ __sqrt_res; \
+ })
+#endif
+
+#if defined (_ARCH_PWR2) && BITS_PER_MP_LIMB == 32
+#define SQRT(a) \
+ ({ \
+ double __sqrt_res; \
+ asm ("fsqrt %0,%1" : "=f" (__sqrt_res) : "f" (a)); \
+ __sqrt_res; \
+ })
+#endif
+
+#if 0
+#if defined (__i386__) || defined (__i486__)
+#define SQRT(a) \
+ ({ \
+ double __sqrt_res; \
+ asm ("fsqrt" : "=t" (__sqrt_res) : "0" (a)); \
+ __sqrt_res; \
+ })
+#endif
+#endif
+
+#endif
+
+#ifndef SQRT
+
+/* Tables for initial approximation of the square root. These are
+ indexed with bits 1-8 of the operand for which the square root is
+ calculated, where bit 0 is the most significant non-zero bit. I.e.
+ the most significant one-bit is not used, since that per definition
+ is one. Likewise, the tables don't return the highest bit of the
+ result. That bit must be inserted by or:ing the returned value with
+ 0x100. This way, we get a 9-bit approximation from 8-bit tables! */
+
+/* Table to be used for operands with an even total number of bits.
+ (Exactly as in the decimal system there are similarities between the
+ square root of numbers with the same initial digits and an even
+ difference in the total number of digits. Consider the square root
+ of 1, 10, 100, 1000, ...) */
+static const unsigned char even_approx_tab[256] =
+{
+ 0x6a, 0x6a, 0x6b, 0x6c, 0x6c, 0x6d, 0x6e, 0x6e,
+ 0x6f, 0x70, 0x71, 0x71, 0x72, 0x73, 0x73, 0x74,
+ 0x75, 0x75, 0x76, 0x77, 0x77, 0x78, 0x79, 0x79,
+ 0x7a, 0x7b, 0x7b, 0x7c, 0x7d, 0x7d, 0x7e, 0x7f,
+ 0x80, 0x80, 0x81, 0x81, 0x82, 0x83, 0x83, 0x84,
+ 0x85, 0x85, 0x86, 0x87, 0x87, 0x88, 0x89, 0x89,
+ 0x8a, 0x8b, 0x8b, 0x8c, 0x8d, 0x8d, 0x8e, 0x8f,
+ 0x8f, 0x90, 0x90, 0x91, 0x92, 0x92, 0x93, 0x94,
+ 0x94, 0x95, 0x96, 0x96, 0x97, 0x97, 0x98, 0x99,
+ 0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9c, 0x9d, 0x9e,
+ 0x9e, 0x9f, 0xa0, 0xa0, 0xa1, 0xa1, 0xa2, 0xa3,
+ 0xa3, 0xa4, 0xa4, 0xa5, 0xa6, 0xa6, 0xa7, 0xa7,
+ 0xa8, 0xa9, 0xa9, 0xaa, 0xaa, 0xab, 0xac, 0xac,
+ 0xad, 0xad, 0xae, 0xaf, 0xaf, 0xb0, 0xb0, 0xb1,
+ 0xb2, 0xb2, 0xb3, 0xb3, 0xb4, 0xb5, 0xb5, 0xb6,
+ 0xb6, 0xb7, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xba,
+ 0xbb, 0xbb, 0xbc, 0xbd, 0xbd, 0xbe, 0xbe, 0xbf,
+ 0xc0, 0xc0, 0xc1, 0xc1, 0xc2, 0xc2, 0xc3, 0xc3,
+ 0xc4, 0xc5, 0xc5, 0xc6, 0xc6, 0xc7, 0xc7, 0xc8,
+ 0xc9, 0xc9, 0xca, 0xca, 0xcb, 0xcb, 0xcc, 0xcc,
+ 0xcd, 0xce, 0xce, 0xcf, 0xcf, 0xd0, 0xd0, 0xd1,
+ 0xd1, 0xd2, 0xd3, 0xd3, 0xd4, 0xd4, 0xd5, 0xd5,
+ 0xd6, 0xd6, 0xd7, 0xd7, 0xd8, 0xd9, 0xd9, 0xda,
+ 0xda, 0xdb, 0xdb, 0xdc, 0xdc, 0xdd, 0xdd, 0xde,
+ 0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe1, 0xe2, 0xe2,
+ 0xe3, 0xe3, 0xe4, 0xe4, 0xe5, 0xe5, 0xe6, 0xe6,
+ 0xe7, 0xe7, 0xe8, 0xe8, 0xe9, 0xea, 0xea, 0xeb,
+ 0xeb, 0xec, 0xec, 0xed, 0xed, 0xee, 0xee, 0xef,
+ 0xef, 0xf0, 0xf0, 0xf1, 0xf1, 0xf2, 0xf2, 0xf3,
+ 0xf3, 0xf4, 0xf4, 0xf5, 0xf5, 0xf6, 0xf6, 0xf7,
+ 0xf7, 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb,
+ 0xfb, 0xfc, 0xfc, 0xfd, 0xfd, 0xfe, 0xfe, 0xff,
+};
+
+/* Table to be used for operands with an odd total number of bits.
+ (Further comments before previous table.) */
+static const unsigned char odd_approx_tab[256] =
+{
+ 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03,
+ 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07,
+ 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b,
+ 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f,
+ 0x0f, 0x10, 0x10, 0x10, 0x11, 0x11, 0x12, 0x12,
+ 0x13, 0x13, 0x14, 0x14, 0x15, 0x15, 0x16, 0x16,
+ 0x16, 0x17, 0x17, 0x18, 0x18, 0x19, 0x19, 0x1a,
+ 0x1a, 0x1b, 0x1b, 0x1b, 0x1c, 0x1c, 0x1d, 0x1d,
+ 0x1e, 0x1e, 0x1f, 0x1f, 0x20, 0x20, 0x20, 0x21,
+ 0x21, 0x22, 0x22, 0x23, 0x23, 0x23, 0x24, 0x24,
+ 0x25, 0x25, 0x26, 0x26, 0x27, 0x27, 0x27, 0x28,
+ 0x28, 0x29, 0x29, 0x2a, 0x2a, 0x2a, 0x2b, 0x2b,
+ 0x2c, 0x2c, 0x2d, 0x2d, 0x2d, 0x2e, 0x2e, 0x2f,
+ 0x2f, 0x30, 0x30, 0x30, 0x31, 0x31, 0x32, 0x32,
+ 0x32, 0x33, 0x33, 0x34, 0x34, 0x35, 0x35, 0x35,
+ 0x36, 0x36, 0x37, 0x37, 0x37, 0x38, 0x38, 0x39,
+ 0x39, 0x39, 0x3a, 0x3a, 0x3b, 0x3b, 0x3b, 0x3c,
+ 0x3c, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e, 0x3f, 0x3f,
+ 0x40, 0x40, 0x40, 0x41, 0x41, 0x41, 0x42, 0x42,
+ 0x43, 0x43, 0x43, 0x44, 0x44, 0x45, 0x45, 0x45,
+ 0x46, 0x46, 0x47, 0x47, 0x47, 0x48, 0x48, 0x49,
+ 0x49, 0x49, 0x4a, 0x4a, 0x4b, 0x4b, 0x4b, 0x4c,
+ 0x4c, 0x4c, 0x4d, 0x4d, 0x4e, 0x4e, 0x4e, 0x4f,
+ 0x4f, 0x50, 0x50, 0x50, 0x51, 0x51, 0x51, 0x52,
+ 0x52, 0x53, 0x53, 0x53, 0x54, 0x54, 0x54, 0x55,
+ 0x55, 0x56, 0x56, 0x56, 0x57, 0x57, 0x57, 0x58,
+ 0x58, 0x59, 0x59, 0x59, 0x5a, 0x5a, 0x5a, 0x5b,
+ 0x5b, 0x5b, 0x5c, 0x5c, 0x5d, 0x5d, 0x5d, 0x5e,
+ 0x5e, 0x5e, 0x5f, 0x5f, 0x60, 0x60, 0x60, 0x61,
+ 0x61, 0x61, 0x62, 0x62, 0x62, 0x63, 0x63, 0x63,
+ 0x64, 0x64, 0x65, 0x65, 0x65, 0x66, 0x66, 0x66,
+ 0x67, 0x67, 0x67, 0x68, 0x68, 0x68, 0x69, 0x69,
+};
+#endif
+
+
+mp_size_t
+#if __STDC__
+mpn_sqrtrem (mp_ptr root_ptr, mp_ptr rem_ptr, mp_srcptr op_ptr, mp_size_t op_size)
+#else
+mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
+ mp_ptr root_ptr;
+ mp_ptr rem_ptr;
+ mp_srcptr op_ptr;
+ mp_size_t op_size;
+#endif
+{
+ /* R (root result) */
+ mp_ptr rp; /* Pointer to least significant word */
+ mp_size_t rsize; /* The size in words */
+
+ /* T (OP shifted to the left a.k.a. normalized) */
+ mp_ptr tp; /* Pointer to least significant word */
+ mp_size_t tsize; /* The size in words */
+ mp_ptr t_end_ptr; /* Pointer right beyond most sign. word */
+ mp_limb_t t_high0, t_high1; /* The two most significant words */
+
+ /* TT (temporary for numerator/remainder) */
+ mp_ptr ttp; /* Pointer to least significant word */
+
+ /* X (temporary for quotient in main loop) */
+ mp_ptr xp; /* Pointer to least significant word */
+ mp_size_t xsize; /* The size in words */
+
+ unsigned cnt;
+ mp_limb_t initial_approx; /* Initially made approximation */
+ mp_size_t tsizes[BITS_PER_MP_LIMB]; /* Successive calculation precisions */
+ mp_size_t tmp;
+ mp_size_t i;
+
+ mp_limb_t cy_limb;
+ TMP_DECL (marker);
+
+ /* If OP is zero, both results are zero. */
+ if (op_size == 0)
+ return 0;
+
+ count_leading_zeros (cnt, op_ptr[op_size - 1]);
+ tsize = op_size;
+ if ((tsize & 1) != 0)
+ {
+ cnt += BITS_PER_MP_LIMB;
+ tsize++;
+ }
+
+ rsize = tsize / 2;
+ rp = root_ptr;
+
+ TMP_MARK (marker);
+
+ /* Shift OP an even number of bits into T, such that either the most or
+ the second most significant bit is set, and such that the number of
+ words in T becomes even. This way, the number of words in R=sqrt(OP)
+ is exactly half as many as in OP, and the most significant bit of R
+ is set.
+
+ Also, the initial approximation is simplified by this up-shifted OP.
+
+ Finally, the Newtonian iteration which is the main part of this
+ program performs division by R. The fast division routine expects
+ the divisor to be "normalized" in exactly the sense of having the
+ most significant bit set. */
+
+ tp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
+
+ if ((cnt & ~1) % BITS_PER_MP_LIMB != 0)
+ t_high0 = mpn_lshift (tp + cnt / BITS_PER_MP_LIMB, op_ptr, op_size,
+ (cnt & ~1) % BITS_PER_MP_LIMB);
+ else
+ MPN_COPY (tp + cnt / BITS_PER_MP_LIMB, op_ptr, op_size);
+
+ if (cnt >= BITS_PER_MP_LIMB)
+ tp[0] = 0;
+
+ t_high0 = tp[tsize - 1];
+ t_high1 = tp[tsize - 2]; /* Never stray. TSIZE is >= 2. */
+
+/* Is there a fast sqrt instruction defined for this machine? */
+#ifdef SQRT
+ {
+ initial_approx = SQRT (t_high0 * MP_BASE_AS_DOUBLE + t_high1);
+ /* If t_high0,,t_high1 is big, the result in INITIAL_APPROX might have
+ become incorrect due to overflow in the conversion from double to
+ mp_limb_t above. It will typically be zero in that case, but might be
+ a small number on some machines. The most significant bit of
+ INITIAL_APPROX should be set, so that bit is a good overflow
+ indication. */
+ if ((mp_limb_signed_t) initial_approx >= 0)
+ initial_approx = ~(mp_limb_t)0;
+ }
+#else
+ /* Get a 9 bit approximation from the tables. The tables expect to
+ be indexed with the 8 high bits right below the highest bit.
+ Also, the highest result bit is not returned by the tables, and
+ must be or:ed into the result. The scheme gives 9 bits of start
+ approximation with just 256-entry 8 bit tables. */
+
+ if ((cnt & 1) == 0)
+ {
+ /* The most significant bit of t_high0 is set. */
+ initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 1);
+ initial_approx &= 0xff;
+ initial_approx = even_approx_tab[initial_approx];
+ }
+ else
+ {
+ /* The most significant bit of t_high0 is unset,
+ the second most significant is set. */
+ initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 2);
+ initial_approx &= 0xff;
+ initial_approx = odd_approx_tab[initial_approx];
+ }
+ initial_approx |= 0x100;
+ initial_approx <<= BITS_PER_MP_LIMB - 8 - 1;
+
+ /* Perform small precision Newtonian iterations to get a full word
+ approximation. For small operands, these iterations will do the
+ entire job. */
+ if (t_high0 == ~(mp_limb_t)0)
+ initial_approx = t_high0;
+ else
+ {
+ mp_limb_t quot;
+
+ if (t_high0 >= initial_approx)
+ initial_approx = t_high0 + 1;
+
+ /* First get about 18 bits with pure C arithmetics. */
+ quot = t_high0 / (initial_approx >> BITS_PER_MP_LIMB/2) << BITS_PER_MP_LIMB/2;
+ initial_approx = (initial_approx + quot) / 2;
+ initial_approx |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
+
+ /* Now get a full word by one (or for > 36 bit machines) several
+ iterations. */
+ for (i = 18; i < BITS_PER_MP_LIMB; i <<= 1)
+ {
+ mp_limb_t ignored_remainder;
+
+ udiv_qrnnd (quot, ignored_remainder,
+ t_high0, t_high1, initial_approx);
+ initial_approx = (initial_approx + quot) / 2;
+ initial_approx |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
+ }
+ }
+#endif
+
+ rp[0] = initial_approx;
+ rsize = 1;
+
+#ifdef SQRT_DEBUG
+ printf ("\n\nT = ");
+ mpn_dump (tp, tsize);
+#endif
+
+ if (tsize > 2)
+ {
+ /* Determine the successive precisions to use in the iteration. We
+ minimize the precisions, beginning with the highest (i.e. last
+ iteration) to the lowest (i.e. first iteration). */
+
+ xp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
+ ttp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
+
+ t_end_ptr = tp + tsize;
+
+ tmp = tsize / 2;
+ for (i = 0;; i++)
+ {
+ tsize = (tmp + 1) / 2;
+ if (tmp == tsize)
+ break;
+ tsizes[i] = tsize + tmp;
+ tmp = tsize;
+ }
+
+ /* Main Newton iteration loop. For big arguments, most of the
+ time is spent here. */
+
+ /* It is possible to do a great optimization here. The successive
+ divisors in the mpn_divmod call below have more and more leading
+ words equal to its predecessor. Therefore the beginning of
+ each division will repeat the same work as did the last
+ division. If we could guarantee that the leading words of two
+ consecutive divisors are the same (i.e. in this case, a later
+ divisor has just more digits at the end) it would be a simple
+ matter of just using the old remainder of the last division in
+ a subsequent division, to take care of this optimization. This
+ idea would surely make a difference even for small arguments. */
+
+ /* Loop invariants:
+
+ R <= shiftdown_to_same_size(floor(sqrt(OP))) < R + 1.
+ X - 1 < shiftdown_to_same_size(floor(sqrt(OP))) <= X.
+ R <= shiftdown_to_same_size(X). */
+
+ while (--i >= 0)
+ {
+ mp_limb_t cy;
+#ifdef SQRT_DEBUG
+ mp_limb_t old_least_sign_r = rp[0];
+ mp_size_t old_rsize = rsize;
+
+ printf ("R = ");
+ mpn_dump (rp, rsize);
+#endif
+ tsize = tsizes[i];
+
+ /* Need to copy the numerator into temporary space, as
+ mpn_divmod overwrites its numerator argument with the
+ remainder (which we currently ignore). */
+ MPN_COPY (ttp, t_end_ptr - tsize, tsize);
+ cy = mpn_divmod (xp, ttp, tsize, rp, rsize);
+ xsize = tsize - rsize;
+
+#ifdef SQRT_DEBUG
+ printf ("X =%d ", cy);
+ mpn_dump (xp, xsize);
+#endif
+
+ /* Add X and R with the most significant limbs aligned,
+ temporarily ignoring at least one limb at the low end of X. */
+ tmp = xsize - rsize;
+ cy += mpn_add_n (xp + tmp, rp, xp + tmp, rsize);
+
+ /* If T begins with more than 2 x BITS_PER_MP_LIMB of ones, we get
+ intermediate roots that'd need an extra bit. We don't want to
+ handle that since it would make the subsequent divisor
+ non-normalized, so round such roots down to be only ones in the
+ current precision. */
+ if (cy == 2)
+ {
+ mp_size_t j;
+ for (j = xsize; j >= 0; j--)
+ xp[j] = ~(mp_limb_t)0;
+ }
+
+ /* Divide X by 2 and put the result in R. This is the new
+ approximation. Shift in the carry from the addition. */
+ mpn_rshift (rp, xp, xsize, 1);
+ rp[xsize - 1] |= ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1));
+ rsize = xsize;
+#ifdef SQRT_DEBUG
+ if (old_least_sign_r != rp[rsize - old_rsize])
+ printf (">>>>>>>> %d: %0*lX, %0*lX <<<<<<<<\n",
+ i, 2 * BYTES_PER_MP_LIMB, old_least_sign_r,
+ 2 * BYTES_PER_MP_LIMB, rp[rsize - old_rsize]);
+#endif
+ }
+ }
+
+#ifdef SQRT_DEBUG
+ printf ("(final) R = ");
+ mpn_dump (rp, rsize);
+#endif
+
+ /* We computed the square root of OP * 2**(2*floor(cnt/2)).
+ This has resulted in R being 2**floor(cnt/2) to large.
+ Shift it down here to fix that. */
+ if (cnt / 2 != 0)
+ {
+ mpn_rshift (rp, rp, rsize, cnt/2);
+ rsize -= rp[rsize - 1] == 0;
+ }
+
+ /* Calculate the remainder. */
+ mpn_mul_n (tp, rp, rp, rsize);
+ tsize = rsize + rsize;
+ tsize -= tp[tsize - 1] == 0;
+ if (op_size < tsize
+ || (op_size == tsize && mpn_cmp (op_ptr, tp, op_size) < 0))
+ {
+ /* R is too large. Decrement it. */
+
+ /* These operations can't overflow. */
+ cy_limb = mpn_sub_n (tp, tp, rp, rsize);
+ cy_limb += mpn_sub_n (tp, tp, rp, rsize);
+ mpn_decr_u (tp + rsize, cy_limb);
+ mpn_incr_u (tp, (mp_limb_t) 1);
+
+ mpn_decr_u (rp, (mp_limb_t) 1);
+
+#ifdef SQRT_DEBUG
+ printf ("(adjusted) R = ");
+ mpn_dump (rp, rsize);
+#endif
+ }
+
+ if (rem_ptr != NULL)
+ {
+ cy_limb = mpn_sub (rem_ptr, op_ptr, op_size, tp, tsize);
+ MPN_NORMALIZE (rem_ptr, op_size);
+ TMP_FREE (marker);
+ return op_size;
+ }
+ else
+ {
+ int res;
+ res = op_size != tsize || mpn_cmp (op_ptr, tp, op_size);
+ TMP_FREE (marker);
+ return res;
+ }
+}
diff --git a/rts/gmp/mpn/generic/sub_n.c b/rts/gmp/mpn/generic/sub_n.c
new file mode 100644
index 0000000000..4f2f06099c
--- /dev/null
+++ b/rts/gmp/mpn/generic/sub_n.c
@@ -0,0 +1,62 @@
+/* mpn_sub_n -- Subtract two limb vectors of equal, non-zero length.
+
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+#if __STDC__
+mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+#else
+mpn_sub_n (res_ptr, s1_ptr, s2_ptr, size)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ register mp_srcptr s2_ptr;
+ mp_size_t size;
+#endif
+{
+ register mp_limb_t x, y, cy;
+ register mp_size_t j;
+
+ /* The loop counter and index J goes from -SIZE to -1. This way
+ the loop becomes faster. */
+ j = -size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ s1_ptr -= j;
+ s2_ptr -= j;
+ res_ptr -= j;
+
+ cy = 0;
+ do
+ {
+ y = s2_ptr[j];
+ x = s1_ptr[j];
+ y += cy; /* add previous carry to subtrahend */
+ cy = (y < cy); /* get out carry from that addition */
+ y = x - y; /* main subtract */
+ cy = (y > x) + cy; /* get out carry from the subtract, combine */
+ res_ptr[j] = y;
+ }
+ while (++j != 0);
+
+ return cy;
+}
diff --git a/rts/gmp/mpn/generic/submul_1.c b/rts/gmp/mpn/generic/submul_1.c
new file mode 100644
index 0000000000..c7c08ee4af
--- /dev/null
+++ b/rts/gmp/mpn/generic/submul_1.c
@@ -0,0 +1,65 @@
+/* mpn_submul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
+ by S2_LIMB, subtract the S1_SIZE least significant limbs of the product
+ from the limb vector pointed to by RES_PTR. Return the most significant
+ limb of the product, adjusted for carry-out from the subtraction.
+
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_submul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+ register mp_ptr res_ptr;
+ register mp_srcptr s1_ptr;
+ mp_size_t s1_size;
+ register mp_limb_t s2_limb;
+{
+ register mp_limb_t cy_limb;
+ register mp_size_t j;
+ register mp_limb_t prod_high, prod_low;
+ register mp_limb_t x;
+
+ /* The loop counter and index J goes from -SIZE to -1. This way
+ the loop becomes faster. */
+ j = -s1_size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ res_ptr -= j;
+ s1_ptr -= j;
+
+ cy_limb = 0;
+ do
+ {
+ umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
+
+ prod_low += cy_limb;
+ cy_limb = (prod_low < cy_limb) + prod_high;
+
+ x = res_ptr[j];
+ prod_low = x - prod_low;
+ cy_limb += (prod_low > x);
+ res_ptr[j] = prod_low;
+ }
+ while (++j != 0);
+
+ return cy_limb;
+}
diff --git a/rts/gmp/mpn/generic/tdiv_qr.c b/rts/gmp/mpn/generic/tdiv_qr.c
new file mode 100644
index 0000000000..b748b5d810
--- /dev/null
+++ b/rts/gmp/mpn/generic/tdiv_qr.c
@@ -0,0 +1,401 @@
+/* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
+ write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp. If
+ qxn is non-zero, generate that many fraction limbs and append them after the
+ other quotient limbs, and update the remainder accordningly. The input
+ operands are unaffected.
+
+ Preconditions:
+ 1. The most significant limb of of the divisor must be non-zero.
+ 2. No argument overlap is permitted. (??? relax this ???)
+ 3. nn >= dn, even if qxn is non-zero. (??? relax this ???)
+
+ The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
+ complexity of multiplication.
+
+Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
+#endif
+
+/* Extract the middle limb from ((h,,l) << cnt) */
+#define SHL(h,l,cnt) \
+ ((h << cnt) | ((l >> 1) >> ((~cnt) & (BITS_PER_MP_LIMB - 1))))
+
+void
+#if __STDC__
+mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
+ mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
+#else
+mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn)
+ mp_ptr qp;
+ mp_ptr rp;
+ mp_size_t qxn;
+ mp_srcptr np;
+ mp_size_t nn;
+ mp_srcptr dp;
+ mp_size_t dn;
+#endif
+{
+ /* FIXME:
+ 1. qxn
+ 2. pass allocated storage in additional parameter?
+ */
+ if (qxn != 0)
+ abort ();
+
+ switch (dn)
+ {
+ case 0:
+ DIVIDE_BY_ZERO;
+
+ case 1:
+ {
+ rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]);
+ return;
+ }
+
+ case 2:
+ {
+ int cnt;
+ mp_ptr n2p, d2p;
+ mp_limb_t qhl, cy;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+ count_leading_zeros (cnt, dp[dn - 1]);
+ if (cnt != 0)
+ {
+ d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
+ mpn_lshift (d2p, dp, dn, cnt);
+ n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
+ cy = mpn_lshift (n2p, np, nn, cnt);
+ n2p[nn] = cy;
+ qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
+ if (cy == 0)
+ qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
+ }
+ else
+ {
+ d2p = (mp_ptr) dp;
+ n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB);
+ MPN_COPY (n2p, np, nn);
+ qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
+ qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
+ }
+
+ if (cnt != 0)
+ mpn_rshift (rp, n2p, dn, cnt);
+ else
+ MPN_COPY (rp, n2p, dn);
+ TMP_FREE (marker);
+ return;
+ }
+
+ default:
+ {
+ int adjust;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+ adjust = np[nn - 1] >= dp[dn - 1]; /* conservative tests for quotient size */
+ if (nn + adjust >= 2 * dn)
+ {
+ mp_ptr n2p, d2p;
+ mp_limb_t cy;
+ int cnt;
+ count_leading_zeros (cnt, dp[dn - 1]);
+
+ qp[nn - dn] = 0; /* zero high quotient limb */
+ if (cnt != 0) /* normalize divisor if needed */
+ {
+ d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
+ mpn_lshift (d2p, dp, dn, cnt);
+ n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
+ cy = mpn_lshift (n2p, np, nn, cnt);
+ n2p[nn] = cy;
+ nn += adjust;
+ }
+ else
+ {
+ d2p = (mp_ptr) dp;
+ n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
+ MPN_COPY (n2p, np, nn);
+ n2p[nn] = 0;
+ nn += adjust;
+ }
+
+ if (dn == 2)
+ mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
+ else if (dn < BZ_THRESHOLD)
+ mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
+ else
+ {
+ /* Perform 2*dn / dn limb divisions as long as the limbs
+ in np last. */
+ mp_ptr q2p = qp + nn - 2 * dn;
+ n2p += nn - 2 * dn;
+ mpn_bz_divrem_n (q2p, n2p, d2p, dn);
+ nn -= dn;
+ while (nn >= 2 * dn)
+ {
+ mp_limb_t c;
+ q2p -= dn; n2p -= dn;
+ c = mpn_bz_divrem_n (q2p, n2p, d2p, dn);
+ ASSERT_ALWAYS (c == 0);
+ nn -= dn;
+ }
+
+ if (nn != dn)
+ {
+ n2p -= nn - dn;
+ /* In theory, we could fall out to the cute code below
+ since we now have exactly the situation that code
+ is designed to handle. We botch this badly and call
+ the basic mpn_sb_divrem_mn! */
+ if (dn == 2)
+ mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
+ else
+ mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
+ }
+ }
+
+
+ if (cnt != 0)
+ mpn_rshift (rp, n2p, dn, cnt);
+ else
+ MPN_COPY (rp, n2p, dn);
+ TMP_FREE (marker);
+ return;
+ }
+
+ /* When we come here, the numerator/partial remainder is less
+ than twice the size of the denominator. */
+
+ {
+ /* Problem:
+
+ Divide a numerator N with nn limbs by a denominator D with dn
+ limbs forming a quotient of nn-dn+1 limbs. When qn is small
+ compared to dn, conventional division algorithms perform poorly.
+ We want an algorithm that has an expected running time that is
+ dependent only on qn. It is assumed that the most significant
+ limb of the numerator is smaller than the most significant limb
+ of the denominator.
+
+ Algorithm (very informally stated):
+
+ 1) Divide the 2 x qn most significant limbs from the numerator
+ by the qn most significant limbs from the denominator. Call
+ the result qest. This is either the correct quotient, but
+ might be 1 or 2 too large. Compute the remainder from the
+ division. (This step is implemented by a mpn_divrem call.)
+
+ 2) Is the most significant limb from the remainder < p, where p
+ is the product of the most significant limb from the quotient
+ and the next(d). (Next(d) denotes the next ignored limb from
+ the denominator.) If it is, decrement qest, and adjust the
+ remainder accordingly.
+
+ 3) Is the remainder >= qest? If it is, qest is the desired
+ quotient. The algorithm terminates.
+
+ 4) Subtract qest x next(d) from the remainder. If there is
+ borrow out, decrement qest, and adjust the remainder
+ accordingly.
+
+ 5) Skip one word from the denominator (i.e., let next(d) denote
+ the next less significant limb. */
+
+ mp_size_t qn;
+ mp_ptr n2p, d2p;
+ mp_ptr tp;
+ mp_limb_t cy;
+ mp_size_t in, rn;
+ mp_limb_t quotient_too_large;
+ int cnt;
+
+ qn = nn - dn;
+ qp[qn] = 0; /* zero high quotient limb */
+ qn += adjust; /* qn cannot become bigger */
+
+ if (qn == 0)
+ {
+ MPN_COPY (rp, np, dn);
+ TMP_FREE (marker);
+ return;
+ }
+
+ in = dn - qn; /* (at least partially) ignored # of limbs in ops */
+ /* Normalize denominator by shifting it to the left such that its
+ most significant bit is set. Then shift the numerator the same
+ amount, to mathematically preserve quotient. */
+ count_leading_zeros (cnt, dp[dn - 1]);
+ if (cnt != 0)
+ {
+ d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB);
+
+ mpn_lshift (d2p, dp + in, qn, cnt);
+ d2p[0] |= dp[in - 1] >> (BITS_PER_MP_LIMB - cnt);
+
+ n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
+ cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
+ if (adjust)
+ {
+ n2p[2 * qn] = cy;
+ n2p++;
+ }
+ else
+ {
+ n2p[0] |= np[nn - 2 * qn - 1] >> (BITS_PER_MP_LIMB - cnt);
+ }
+ }
+ else
+ {
+ d2p = (mp_ptr) dp + in;
+
+ n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
+ MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
+ if (adjust)
+ {
+ n2p[2 * qn] = 0;
+ n2p++;
+ }
+ }
+
+ /* Get an approximate quotient using the extracted operands. */
+ if (qn == 1)
+ {
+ mp_limb_t q0, r0;
+ mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0;
+ /* Due to a gcc 2.7.2.3 reload pass bug, we have to use some
+ temps here. This doesn't hurt code quality on any machines
+ so we do it unconditionally. */
+ gcc272bug_n1 = n2p[1];
+ gcc272bug_n0 = n2p[0];
+ gcc272bug_d0 = d2p[0];
+ udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0, gcc272bug_d0);
+ n2p[0] = r0;
+ qp[0] = q0;
+ }
+ else if (qn == 2)
+ mpn_divrem_2 (qp, 0L, n2p, 4L, d2p);
+ else if (qn < BZ_THRESHOLD)
+ mpn_sb_divrem_mn (qp, n2p, qn * 2, d2p, qn);
+ else
+ mpn_bz_divrem_n (qp, n2p, d2p, qn);
+
+ rn = qn;
+ /* Multiply the first ignored divisor limb by the most significant
+ quotient limb. If that product is > the partial remainder's
+ most significant limb, we know the quotient is too large. This
+ test quickly catches most cases where the quotient is too large;
+ it catches all cases where the quotient is 2 too large. */
+ {
+ mp_limb_t dl, x;
+ mp_limb_t h, l;
+
+ if (in - 2 < 0)
+ dl = 0;
+ else
+ dl = dp[in - 2];
+
+ x = SHL (dp[in - 1], dl, cnt);
+ umul_ppmm (h, l, x, qp[qn - 1]);
+
+ if (n2p[qn - 1] < h)
+ {
+ mp_limb_t cy;
+
+ mpn_decr_u (qp, (mp_limb_t) 1);
+ cy = mpn_add_n (n2p, n2p, d2p, qn);
+ if (cy)
+ {
+ /* The partial remainder is safely large. */
+ n2p[qn] = cy;
+ ++rn;
+ }
+ }
+ }
+
+ quotient_too_large = 0;
+ if (cnt != 0)
+ {
+ mp_limb_t cy1, cy2;
+
+ /* Append partially used numerator limb to partial remainder. */
+ cy1 = mpn_lshift (n2p, n2p, rn, BITS_PER_MP_LIMB - cnt);
+ n2p[0] |= np[in - 1] & (~(mp_limb_t) 0 >> cnt);
+
+ /* Update partial remainder with partially used divisor limb. */
+ cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (~(mp_limb_t) 0 >> cnt));
+ if (qn != rn)
+ {
+ if (n2p[qn] < cy2)
+ abort ();
+ n2p[qn] -= cy2;
+ }
+ else
+ {
+ n2p[qn] = cy1 - cy2;
+
+ quotient_too_large = (cy1 < cy2);
+ ++rn;
+ }
+ --in;
+ }
+ /* True: partial remainder now is neutral, i.e., it is not shifted up. */
+
+ tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
+
+ if (in < qn)
+ {
+ if (in == 0)
+ {
+ MPN_COPY (rp, n2p, rn);
+ if (rn != dn)
+ abort ();
+ goto foo;
+ }
+ mpn_mul (tp, qp, qn, dp, in);
+ }
+ else
+ mpn_mul (tp, dp, in, qp, qn);
+
+ cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
+ MPN_COPY (rp + in, n2p, dn - in);
+ quotient_too_large |= cy;
+ cy = mpn_sub_n (rp, np, tp, in);
+ cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
+ quotient_too_large |= cy;
+ foo:
+ if (quotient_too_large)
+ {
+ mpn_decr_u (qp, (mp_limb_t) 1);
+ mpn_add_n (rp, rp, dp, dn);
+ }
+ }
+ TMP_FREE (marker);
+ return;
+ }
+ }
+}
diff --git a/rts/gmp/mpn/generic/udiv_w_sdiv.c b/rts/gmp/mpn/generic/udiv_w_sdiv.c
new file mode 100644
index 0000000000..061cce86e1
--- /dev/null
+++ b/rts/gmp/mpn/generic/udiv_w_sdiv.c
@@ -0,0 +1,131 @@
+/* mpn_udiv_w_sdiv -- implement udiv_qrnnd on machines with only signed
+ division.
+
+ Contributed by Peter L. Montgomery.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY SAFE
+ TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS
+ ALMOST GUARANTEED THAT THIS FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE
+ GNU MP RELEASE.
+
+
+Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_udiv_w_sdiv (rp, a1, a0, d)
+ mp_limb_t *rp, a1, a0, d;
+{
+ mp_limb_t q, r;
+ mp_limb_t c0, c1, b1;
+
+ if ((mp_limb_signed_t) d >= 0)
+ {
+ if (a1 < d - a1 - (a0 >> (BITS_PER_MP_LIMB - 1)))
+ {
+ /* dividend, divisor, and quotient are nonnegative */
+ sdiv_qrnnd (q, r, a1, a0, d);
+ }
+ else
+ {
+ /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d */
+ sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (BITS_PER_MP_LIMB - 1));
+ /* Divide (c1*2^32 + c0) by d */
+ sdiv_qrnnd (q, r, c1, c0, d);
+ /* Add 2^31 to quotient */
+ q += (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
+ }
+ }
+ else
+ {
+ b1 = d >> 1; /* d/2, between 2^30 and 2^31 - 1 */
+ c1 = a1 >> 1; /* A/2 */
+ c0 = (a1 << (BITS_PER_MP_LIMB - 1)) + (a0 >> 1);
+
+ if (a1 < b1) /* A < 2^32*b1, so A/2 < 2^31*b1 */
+ {
+ sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
+
+ r = 2*r + (a0 & 1); /* Remainder from A/(2*b1) */
+ if ((d & 1) != 0)
+ {
+ if (r >= q)
+ r = r - q;
+ else if (q - r <= d)
+ {
+ r = r - q + d;
+ q--;
+ }
+ else
+ {
+ r = r - q + 2*d;
+ q -= 2;
+ }
+ }
+ }
+ else if (c1 < b1) /* So 2^31 <= (A/2)/b1 < 2^32 */
+ {
+ c1 = (b1 - 1) - c1;
+ c0 = ~c0; /* logical NOT */
+
+ sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
+
+ q = ~q; /* (A/2)/b1 */
+ r = (b1 - 1) - r;
+
+ r = 2*r + (a0 & 1); /* A/(2*b1) */
+
+ if ((d & 1) != 0)
+ {
+ if (r >= q)
+ r = r - q;
+ else if (q - r <= d)
+ {
+ r = r - q + d;
+ q--;
+ }
+ else
+ {
+ r = r - q + 2*d;
+ q -= 2;
+ }
+ }
+ }
+ else /* Implies c1 = b1 */
+ { /* Hence a1 = d - 1 = 2*b1 - 1 */
+ if (a0 >= -d)
+ {
+ q = -1;
+ r = a0 + d;
+ }
+ else
+ {
+ q = -2;
+ r = a0 + 2*d;
+ }
+ }
+ }
+
+ *rp = r;
+ return q;
+}
diff --git a/rts/gmp/mpn/hppa/README b/rts/gmp/mpn/hppa/README
new file mode 100644
index 0000000000..97e7abe011
--- /dev/null
+++ b/rts/gmp/mpn/hppa/README
@@ -0,0 +1,91 @@
+This directory contains mpn functions for various HP PA-RISC chips. Code
+that runs faster on the PA7100 and later implementations, is in the pa7100
+directory.
+
+RELEVANT OPTIMIZATION ISSUES
+
+ Load and Store timing
+
+On the PA7000 no memory instructions can issue the two cycles after a store.
+For the PA7100, this is reduced to one cycle.
+
+The PA7100 has a lookup-free cache, so it helps to schedule loads and the
+dependent instruction really far from each other.
+
+STATUS
+
+1. mpn_mul_1 could be improved to 6.5 cycles/limb on the PA7100, using the
+ instructions below (but some sw pipelining is needed to avoid the
+ xmpyu-fstds delay):
+
+ fldds s1_ptr
+
+ xmpyu
+ fstds N(%r30)
+ xmpyu
+ fstds N(%r30)
+
+ ldws N(%r30)
+ ldws N(%r30)
+ ldws N(%r30)
+ ldws N(%r30)
+
+ addc
+ stws res_ptr
+ addc
+ stws res_ptr
+
+ addib Loop
+
+2. mpn_addmul_1 could be improved from the current 10 to 7.5 cycles/limb
+ (asymptotically) on the PA7100, using the instructions below. With proper
+ sw pipelining and the unrolling level below, the speed becomes 8
+ cycles/limb.
+
+ fldds s1_ptr
+ fldds s1_ptr
+
+ xmpyu
+ fstds N(%r30)
+ xmpyu
+ fstds N(%r30)
+ xmpyu
+ fstds N(%r30)
+ xmpyu
+ fstds N(%r30)
+
+ ldws N(%r30)
+ ldws N(%r30)
+ ldws N(%r30)
+ ldws N(%r30)
+ ldws N(%r30)
+ ldws N(%r30)
+ ldws N(%r30)
+ ldws N(%r30)
+ addc
+ addc
+ addc
+ addc
+ addc %r0,%r0,cy-limb
+
+ ldws res_ptr
+ ldws res_ptr
+ ldws res_ptr
+ ldws res_ptr
+ add
+ stws res_ptr
+ addc
+ stws res_ptr
+ addc
+ stws res_ptr
+ addc
+ stws res_ptr
+
+ addib
+
+3. For the PA8000 we have to stick to using 32-bit limbs before compiler
+ support emerges. But we want to use 64-bit operations whenever possible,
+ in particular for loads and stores. It is possible to handle mpn_add_n
+ efficiently by rotating (when s1/s2 are aligned), masking+bit field
+ inserting when (they are not). The speed should double compared to the
+ code used today.
diff --git a/rts/gmp/mpn/hppa/add_n.s b/rts/gmp/mpn/hppa/add_n.s
new file mode 100644
index 0000000000..c53b2f71b3
--- /dev/null
+++ b/rts/gmp/mpn/hppa/add_n.s
@@ -0,0 +1,58 @@
+; HP-PA __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+; One might want to unroll this as for other processors, but it turns
+; out that the data cache contention after a store makes such
+; unrolling useless. We can't come under 5 cycles/limb anyway.
+
+ .code
+ .export __gmpn_add_n
+__gmpn_add_n
+ .proc
+ .callinfo frame=0,no_calls
+ .entry
+
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+
+ addib,= -1,%r23,L$end ; check for (SIZE == 1)
+ add %r20,%r19,%r28 ; add first limbs ignoring cy
+
+L$loop ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addib,<> -1,%r23,L$loop
+ addc %r20,%r19,%r28
+
+L$end stws %r28,0(0,%r26)
+ bv 0(%r2)
+ addc %r0,%r0,%r28
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/gmp-mparam.h b/rts/gmp/mpn/hppa/gmp-mparam.h
new file mode 100644
index 0000000000..98b6d9ce3c
--- /dev/null
+++ b/rts/gmp/mpn/hppa/gmp-mparam.h
@@ -0,0 +1,63 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* These values are for the PA7100 using GCC. */
+/* Generated by tuneup.c, 2000-07-25. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 30
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 172
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 59
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 185
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 96
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 122
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 18
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 46
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 33
+#endif
diff --git a/rts/gmp/mpn/hppa/hppa1_1/addmul_1.s b/rts/gmp/mpn/hppa/hppa1_1/addmul_1.s
new file mode 100644
index 0000000000..c7d218f922
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/addmul_1.s
@@ -0,0 +1,102 @@
+; HP-PA-1.1 __gmpn_addmul_1 -- Multiply a limb vector with a limb and
+; add the result to a second limb vector.
+
+; Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr r26
+; s1_ptr r25
+; size r24
+; s2_limb r23
+
+; This runs at 11 cycles/limb on a PA7000. With the used instructions, it
+; can not become faster due to data cache contention after a store. On the
+; PA7100 it runs at 10 cycles/limb, and that can not be improved either,
+; since only the xmpyu does not need the integer pipeline, so the only
+; dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb
+; on the PA7100.
+
+; There are some ideas described in mul_1.s that applies to this code too.
+
+ .code
+ .export __gmpn_addmul_1
+__gmpn_addmul_1
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+
+ ldo 64(%r30),%r30
+ fldws,ma 4(%r25),%fr5
+ stw %r23,-16(%r30) ; move s2_limb ...
+ addib,= -1,%r24,L$just_one_limb
+ fldws -16(%r30),%fr4 ; ... into fr4
+ add %r0,%r0,%r0 ; clear carry
+ xmpyu %fr4,%fr5,%fr6
+ fldws,ma 4(%r25),%fr7
+ fstds %fr6,-16(%r30)
+ xmpyu %fr4,%fr7,%fr8
+ ldw -12(%r30),%r19 ; least significant limb in product
+ ldw -16(%r30),%r28
+
+ fstds %fr8,-16(%r30)
+ addib,= -1,%r24,L$end
+ ldw -12(%r30),%r1
+
+; Main loop
+L$loop ldws 0(%r26),%r29
+ fldws,ma 4(%r25),%fr5
+ add %r29,%r19,%r19
+ stws,ma %r19,4(%r26)
+ addc %r28,%r1,%r19
+ xmpyu %fr4,%fr5,%fr6
+ ldw -16(%r30),%r28
+ fstds %fr6,-16(%r30)
+ addc %r0,%r28,%r28
+ addib,<> -1,%r24,L$loop
+ ldw -12(%r30),%r1
+
+L$end ldw 0(%r26),%r29
+ add %r29,%r19,%r19
+ stws,ma %r19,4(%r26)
+ addc %r28,%r1,%r19
+ ldw -16(%r30),%r28
+ ldws 0(%r26),%r29
+ addc %r0,%r28,%r28
+ add %r29,%r19,%r19
+ stws,ma %r19,4(%r26)
+ addc %r0,%r28,%r28
+ bv 0(%r2)
+ ldo -64(%r30),%r30
+
+L$just_one_limb
+ xmpyu %fr4,%fr5,%fr6
+ ldw 0(%r26),%r29
+ fstds %fr6,-16(%r30)
+ ldw -12(%r30),%r1
+ ldw -16(%r30),%r28
+ add %r29,%r1,%r19
+ stw %r19,0(%r26)
+ addc %r0,%r28,%r28
+ bv 0(%r2)
+ ldo -64(%r30),%r30
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/mul_1.s b/rts/gmp/mpn/hppa/hppa1_1/mul_1.s
new file mode 100644
index 0000000000..4512fddec9
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/mul_1.s
@@ -0,0 +1,98 @@
+; HP-PA-1.1 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+; the result in a second limb vector.
+
+; Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr r26
+; s1_ptr r25
+; size r24
+; s2_limb r23
+
+; This runs at 9 cycles/limb on a PA7000. With the used instructions, it can
+; not become faster due to data cache contention after a store. On the
+; PA7100 it runs at 7 cycles/limb, and that can not be improved either, since
+; only the xmpyu does not need the integer pipeline, so the only dual-issue
+; we will get are addc+xmpyu. Unrolling would not help either CPU.
+
+; We could use fldds to read two limbs at a time from the S1 array, and that
+; could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and
+; PA7100, respectively. We don't do that since it does not seem worth the
+; (alignment) troubles...
+
+; At least the PA7100 is rumored to be able to deal with cache-misses
+; without stalling instruction issue. If this is true, and the cache is
+; actually also lockup-free, we should use a deeper software pipeline, and
+; load from S1 very early! (The loads and stores to -12(sp) will surely be
+; in the cache.)
+
+ .code
+ .export __gmpn_mul_1
+__gmpn_mul_1
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+
+ ldo 64(%r30),%r30
+ fldws,ma 4(%r25),%fr5
+ stw %r23,-16(%r30) ; move s2_limb ...
+ addib,= -1,%r24,L$just_one_limb
+ fldws -16(%r30),%fr4 ; ... into fr4
+ add %r0,%r0,%r0 ; clear carry
+ xmpyu %fr4,%fr5,%fr6
+ fldws,ma 4(%r25),%fr7
+ fstds %fr6,-16(%r30)
+ xmpyu %fr4,%fr7,%fr8
+ ldw -12(%r30),%r19 ; least significant limb in product
+ ldw -16(%r30),%r28
+
+ fstds %fr8,-16(%r30)
+ addib,= -1,%r24,L$end
+ ldw -12(%r30),%r1
+
+; Main loop
+L$loop fldws,ma 4(%r25),%fr5
+ stws,ma %r19,4(%r26)
+ addc %r28,%r1,%r19
+ xmpyu %fr4,%fr5,%fr6
+ ldw -16(%r30),%r28
+ fstds %fr6,-16(%r30)
+ addib,<> -1,%r24,L$loop
+ ldw -12(%r30),%r1
+
+L$end stws,ma %r19,4(%r26)
+ addc %r28,%r1,%r19
+ ldw -16(%r30),%r28
+ stws,ma %r19,4(%r26)
+ addc %r0,%r28,%r28
+ bv 0(%r2)
+ ldo -64(%r30),%r30
+
+L$just_one_limb
+ xmpyu %fr4,%fr5,%fr6
+ fstds %fr6,-16(%r30)
+ ldw -16(%r30),%r28
+ ldo -64(%r30),%r30
+ bv 0(%r2)
+ fstws %fr6R,0(%r26)
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/pa7100/add_n.s b/rts/gmp/mpn/hppa/hppa1_1/pa7100/add_n.s
new file mode 100644
index 0000000000..4f4be08b37
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/pa7100/add_n.s
@@ -0,0 +1,75 @@
+; HP-PA __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+; This is optimized for the PA7100, where is runs at 4.25 cycles/limb
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+ .code
+ .export __gmpn_add_n
+__gmpn_add_n
+ .proc
+ .callinfo frame=0,no_calls
+ .entry
+
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+
+ addib,<= -5,%r23,L$rest
+ add %r20,%r19,%r28 ; add first limbs ignoring cy
+
+L$loop ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addc %r20,%r19,%r28
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addc %r20,%r19,%r28
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addc %r20,%r19,%r28
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addib,> -4,%r23,L$loop
+ addc %r20,%r19,%r28
+
+L$rest addib,= 4,%r23,L$end
+ nop
+L$eloop ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addib,> -1,%r23,L$eloop
+ addc %r20,%r19,%r28
+
+L$end stws %r28,0(0,%r26)
+ bv 0(%r2)
+ addc %r0,%r0,%r28
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S b/rts/gmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S
new file mode 100644
index 0000000000..04db06822e
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/pa7100/addmul_1.S
@@ -0,0 +1,189 @@
+; HP-PA 7100/7200 __gmpn_addmul_1 -- Multiply a limb vector with a limb and
+; add the result to a second limb vector.
+
+; Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+; INPUT PARAMETERS
+#define res_ptr %r26
+#define s1_ptr %r25
+#define size %r24
+#define s2_limb %r23
+
+#define cylimb %r28
+#define s0 %r19
+#define s1 %r20
+#define s2 %r3
+#define s3 %r4
+#define lo0 %r21
+#define lo1 %r5
+#define lo2 %r6
+#define lo3 %r7
+#define hi0 %r22
+#define hi1 %r23 /* safe to reuse */
+#define hi2 %r29
+#define hi3 %r1
+
+ .code
+ .export __gmpn_addmul_1
+__gmpn_addmul_1
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+
+ ldo 128(%r30),%r30
+ stws s2_limb,-16(%r30)
+ add %r0,%r0,cylimb ; clear cy and cylimb
+ addib,< -4,size,L$few_limbs
+ fldws -16(%r30),%fr31R
+
+ ldo -112(%r30),%r31
+ stw %r3,-96(%r30)
+ stw %r4,-92(%r30)
+ stw %r5,-88(%r30)
+ stw %r6,-84(%r30)
+ stw %r7,-80(%r30)
+
+ bb,>=,n s1_ptr,29,L$0
+
+ fldws,ma 4(s1_ptr),%fr4
+ ldws 0(res_ptr),s0
+ xmpyu %fr4,%fr31R,%fr5
+ fstds %fr5,-16(%r31)
+ ldws -16(%r31),cylimb
+ ldws -12(%r31),lo0
+ add s0,lo0,s0
+ addib,< -1,size,L$few_limbs
+ stws,ma s0,4(res_ptr)
+
+; start software pipeline ----------------------------------------------------
+L$0 fldds,ma 8(s1_ptr),%fr4
+ fldds,ma 8(s1_ptr),%fr8
+
+ xmpyu %fr4L,%fr31R,%fr5
+ xmpyu %fr4R,%fr31R,%fr6
+ xmpyu %fr8L,%fr31R,%fr9
+ xmpyu %fr8R,%fr31R,%fr10
+
+ fstds %fr5,-16(%r31)
+ fstds %fr6,-8(%r31)
+ fstds %fr9,0(%r31)
+ fstds %fr10,8(%r31)
+
+ ldws -16(%r31),hi0
+ ldws -12(%r31),lo0
+ ldws -8(%r31),hi1
+ ldws -4(%r31),lo1
+ ldws 0(%r31),hi2
+ ldws 4(%r31),lo2
+ ldws 8(%r31),hi3
+ ldws 12(%r31),lo3
+
+ addc lo0,cylimb,lo0
+ addc lo1,hi0,lo1
+ addc lo2,hi1,lo2
+ addc lo3,hi2,lo3
+
+ addib,< -4,size,L$end
+ addc %r0,hi3,cylimb ; propagate carry into cylimb
+; main loop ------------------------------------------------------------------
+L$loop fldds,ma 8(s1_ptr),%fr4
+ fldds,ma 8(s1_ptr),%fr8
+
+ ldws 0(res_ptr),s0
+ xmpyu %fr4L,%fr31R,%fr5
+ ldws 4(res_ptr),s1
+ xmpyu %fr4R,%fr31R,%fr6
+ ldws 8(res_ptr),s2
+ xmpyu %fr8L,%fr31R,%fr9
+ ldws 12(res_ptr),s3
+ xmpyu %fr8R,%fr31R,%fr10
+
+ fstds %fr5,-16(%r31)
+ add s0,lo0,s0
+ fstds %fr6,-8(%r31)
+ addc s1,lo1,s1
+ fstds %fr9,0(%r31)
+ addc s2,lo2,s2
+ fstds %fr10,8(%r31)
+ addc s3,lo3,s3
+
+ ldws -16(%r31),hi0
+ ldws -12(%r31),lo0
+ ldws -8(%r31),hi1
+ ldws -4(%r31),lo1
+ ldws 0(%r31),hi2
+ ldws 4(%r31),lo2
+ ldws 8(%r31),hi3
+ ldws 12(%r31),lo3
+
+ addc lo0,cylimb,lo0
+ stws,ma s0,4(res_ptr)
+ addc lo1,hi0,lo1
+ stws,ma s1,4(res_ptr)
+ addc lo2,hi1,lo2
+ stws,ma s2,4(res_ptr)
+ addc lo3,hi2,lo3
+ stws,ma s3,4(res_ptr)
+
+ addib,>= -4,size,L$loop
+ addc %r0,hi3,cylimb ; propagate carry into cylimb
+; finish software pipeline ---------------------------------------------------
+L$end ldws 0(res_ptr),s0
+ ldws 4(res_ptr),s1
+ ldws 8(res_ptr),s2
+ ldws 12(res_ptr),s3
+
+ add s0,lo0,s0
+ stws,ma s0,4(res_ptr)
+ addc s1,lo1,s1
+ stws,ma s1,4(res_ptr)
+ addc s2,lo2,s2
+ stws,ma s2,4(res_ptr)
+ addc s3,lo3,s3
+ stws,ma s3,4(res_ptr)
+
+; restore callee-saves registers ---------------------------------------------
+ ldw -96(%r30),%r3
+ ldw -92(%r30),%r4
+ ldw -88(%r30),%r5
+ ldw -84(%r30),%r6
+ ldw -80(%r30),%r7
+
+L$few_limbs
+ addib,=,n 4,size,L$ret
+L$loop2 fldws,ma 4(s1_ptr),%fr4
+ ldws 0(res_ptr),s0
+ xmpyu %fr4,%fr31R,%fr5
+ fstds %fr5,-16(%r30)
+ ldws -16(%r30),hi0
+ ldws -12(%r30),lo0
+ addc lo0,cylimb,lo0
+ addc %r0,hi0,cylimb
+ add s0,lo0,s0
+ stws,ma s0,4(res_ptr)
+ addib,<> -1,size,L$loop2
+ nop
+
+L$ret addc %r0,cylimb,cylimb
+ bv 0(%r2)
+ ldo -128(%r30),%r30
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/pa7100/lshift.s b/rts/gmp/mpn/hppa/hppa1_1/pa7100/lshift.s
new file mode 100644
index 0000000000..31669b1a55
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/pa7100/lshift.s
@@ -0,0 +1,83 @@
+; HP-PA __gmpn_lshift --
+; This is optimized for the PA7100, where is runs at 3.25 cycles/limb
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s_ptr gr25
+; size gr24
+; cnt gr23
+
+ .code
+ .export __gmpn_lshift
+__gmpn_lshift
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+
+ sh2add %r24,%r25,%r25
+ sh2add %r24,%r26,%r26
+ ldws,mb -4(0,%r25),%r22
+ subi 32,%r23,%r1
+ mtsar %r1
+ addib,= -1,%r24,L$0004
+ vshd %r0,%r22,%r28 ; compute carry out limb
+ ldws,mb -4(0,%r25),%r29
+ addib,<= -5,%r24,L$rest
+ vshd %r22,%r29,%r20
+
+L$loop ldws,mb -4(0,%r25),%r22
+ stws,mb %r20,-4(0,%r26)
+ vshd %r29,%r22,%r20
+ ldws,mb -4(0,%r25),%r29
+ stws,mb %r20,-4(0,%r26)
+ vshd %r22,%r29,%r20
+ ldws,mb -4(0,%r25),%r22
+ stws,mb %r20,-4(0,%r26)
+ vshd %r29,%r22,%r20
+ ldws,mb -4(0,%r25),%r29
+ stws,mb %r20,-4(0,%r26)
+ addib,> -4,%r24,L$loop
+ vshd %r22,%r29,%r20
+
+L$rest addib,= 4,%r24,L$end1
+ nop
+L$eloop ldws,mb -4(0,%r25),%r22
+ stws,mb %r20,-4(0,%r26)
+ addib,<= -1,%r24,L$end2
+ vshd %r29,%r22,%r20
+ ldws,mb -4(0,%r25),%r29
+ stws,mb %r20,-4(0,%r26)
+ addib,> -1,%r24,L$eloop
+ vshd %r22,%r29,%r20
+
+L$end1 stws,mb %r20,-4(0,%r26)
+ vshd %r29,%r0,%r20
+ bv 0(%r2)
+ stw %r20,-4(0,%r26)
+L$end2 stws,mb %r20,-4(0,%r26)
+L$0004 vshd %r22,%r0,%r20
+ bv 0(%r2)
+ stw %r20,-4(0,%r26)
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/pa7100/rshift.s b/rts/gmp/mpn/hppa/hppa1_1/pa7100/rshift.s
new file mode 100644
index 0000000000..d32b10b4b1
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/pa7100/rshift.s
@@ -0,0 +1,80 @@
+; HP-PA __gmpn_rshift --
+; This is optimized for the PA7100, where is runs at 3.25 cycles/limb
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s_ptr gr25
+; size gr24
+; cnt gr23
+
+ .code
+ .export __gmpn_rshift
+__gmpn_rshift
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+
+ ldws,ma 4(0,%r25),%r22
+ mtsar %r23
+ addib,= -1,%r24,L$0004
+ vshd %r22,%r0,%r28 ; compute carry out limb
+ ldws,ma 4(0,%r25),%r29
+ addib,<= -5,%r24,L$rest
+ vshd %r29,%r22,%r20
+
+L$loop ldws,ma 4(0,%r25),%r22
+ stws,ma %r20,4(0,%r26)
+ vshd %r22,%r29,%r20
+ ldws,ma 4(0,%r25),%r29
+ stws,ma %r20,4(0,%r26)
+ vshd %r29,%r22,%r20
+ ldws,ma 4(0,%r25),%r22
+ stws,ma %r20,4(0,%r26)
+ vshd %r22,%r29,%r20
+ ldws,ma 4(0,%r25),%r29
+ stws,ma %r20,4(0,%r26)
+ addib,> -4,%r24,L$loop
+ vshd %r29,%r22,%r20
+
+L$rest addib,= 4,%r24,L$end1
+ nop
+L$eloop ldws,ma 4(0,%r25),%r22
+ stws,ma %r20,4(0,%r26)
+ addib,<= -1,%r24,L$end2
+ vshd %r22,%r29,%r20
+ ldws,ma 4(0,%r25),%r29
+ stws,ma %r20,4(0,%r26)
+ addib,> -1,%r24,L$eloop
+ vshd %r29,%r22,%r20
+
+L$end1 stws,ma %r20,4(0,%r26)
+ vshd %r0,%r29,%r20
+ bv 0(%r2)
+ stw %r20,0(0,%r26)
+L$end2 stws,ma %r20,4(0,%r26)
+L$0004 vshd %r0,%r22,%r20
+ bv 0(%r2)
+ stw %r20,0(0,%r26)
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/pa7100/sub_n.s b/rts/gmp/mpn/hppa/hppa1_1/pa7100/sub_n.s
new file mode 100644
index 0000000000..0eec41c4b3
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/pa7100/sub_n.s
@@ -0,0 +1,76 @@
+; HP-PA __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+; This is optimized for the PA7100, where is runs at 4.25 cycles/limb
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+ .code
+ .export __gmpn_sub_n
+__gmpn_sub_n
+ .proc
+ .callinfo frame=0,no_calls
+ .entry
+
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+
+ addib,<= -5,%r23,L$rest
+ sub %r20,%r19,%r28 ; subtract first limbs ignoring cy
+
+L$loop ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ subb %r20,%r19,%r28
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ subb %r20,%r19,%r28
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ subb %r20,%r19,%r28
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addib,> -4,%r23,L$loop
+ subb %r20,%r19,%r28
+
+L$rest addib,= 4,%r23,L$end
+ nop
+L$eloop ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addib,> -1,%r23,L$eloop
+ subb %r20,%r19,%r28
+
+L$end stws %r28,0(0,%r26)
+ addc %r0,%r0,%r28
+ bv 0(%r2)
+ subi 1,%r28,%r28
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/pa7100/submul_1.S b/rts/gmp/mpn/hppa/hppa1_1/pa7100/submul_1.S
new file mode 100644
index 0000000000..0fba21dcef
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/pa7100/submul_1.S
@@ -0,0 +1,195 @@
+; HP-PA 7100/7200 __gmpn_submul_1 -- Multiply a limb vector with a limb and
+; subtract the result from a second limb vector.
+
+; Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+; INPUT PARAMETERS
+#define res_ptr %r26
+#define s1_ptr %r25
+#define size %r24
+#define s2_limb %r23
+
+#define cylimb %r28
+#define s0 %r19
+#define s1 %r20
+#define s2 %r3
+#define s3 %r4
+#define lo0 %r21
+#define lo1 %r5
+#define lo2 %r6
+#define lo3 %r7
+#define hi0 %r22
+#define hi1 %r23 /* safe to reuse */
+#define hi2 %r29
+#define hi3 %r1
+
+ .code
+ .export __gmpn_submul_1
+__gmpn_submul_1
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+
+ ldo 128(%r30),%r30
+ stws s2_limb,-16(%r30)
+ add %r0,%r0,cylimb ; clear cy and cylimb
+ addib,< -4,size,L$few_limbs
+ fldws -16(%r30),%fr31R
+
+ ldo -112(%r30),%r31
+ stw %r3,-96(%r30)
+ stw %r4,-92(%r30)
+ stw %r5,-88(%r30)
+ stw %r6,-84(%r30)
+ stw %r7,-80(%r30)
+
+ bb,>=,n s1_ptr,29,L$0
+
+ fldws,ma 4(s1_ptr),%fr4
+ ldws 0(res_ptr),s0
+ xmpyu %fr4,%fr31R,%fr5
+ fstds %fr5,-16(%r31)
+ ldws -16(%r31),cylimb
+ ldws -12(%r31),lo0
+ sub s0,lo0,s0
+ add s0,lo0,%r0 ; invert cy
+ addib,< -1,size,L$few_limbs
+ stws,ma s0,4(res_ptr)
+
+; start software pipeline ----------------------------------------------------
+L$0 fldds,ma 8(s1_ptr),%fr4
+ fldds,ma 8(s1_ptr),%fr8
+
+ xmpyu %fr4L,%fr31R,%fr5
+ xmpyu %fr4R,%fr31R,%fr6
+ xmpyu %fr8L,%fr31R,%fr9
+ xmpyu %fr8R,%fr31R,%fr10
+
+ fstds %fr5,-16(%r31)
+ fstds %fr6,-8(%r31)
+ fstds %fr9,0(%r31)
+ fstds %fr10,8(%r31)
+
+ ldws -16(%r31),hi0
+ ldws -12(%r31),lo0
+ ldws -8(%r31),hi1
+ ldws -4(%r31),lo1
+ ldws 0(%r31),hi2
+ ldws 4(%r31),lo2
+ ldws 8(%r31),hi3
+ ldws 12(%r31),lo3
+
+ addc lo0,cylimb,lo0
+ addc lo1,hi0,lo1
+ addc lo2,hi1,lo2
+ addc lo3,hi2,lo3
+
+ addib,< -4,size,L$end
+ addc %r0,hi3,cylimb ; propagate carry into cylimb
+; main loop ------------------------------------------------------------------
+L$loop fldds,ma 8(s1_ptr),%fr4
+ fldds,ma 8(s1_ptr),%fr8
+
+ ldws 0(res_ptr),s0
+ xmpyu %fr4L,%fr31R,%fr5
+ ldws 4(res_ptr),s1
+ xmpyu %fr4R,%fr31R,%fr6
+ ldws 8(res_ptr),s2
+ xmpyu %fr8L,%fr31R,%fr9
+ ldws 12(res_ptr),s3
+ xmpyu %fr8R,%fr31R,%fr10
+
+ fstds %fr5,-16(%r31)
+ sub s0,lo0,s0
+ fstds %fr6,-8(%r31)
+ subb s1,lo1,s1
+ fstds %fr9,0(%r31)
+ subb s2,lo2,s2
+ fstds %fr10,8(%r31)
+ subb s3,lo3,s3
+ subb %r0,%r0,lo0 ; these two insns ...
+ add lo0,lo0,%r0 ; ... just invert cy
+
+ ldws -16(%r31),hi0
+ ldws -12(%r31),lo0
+ ldws -8(%r31),hi1
+ ldws -4(%r31),lo1
+ ldws 0(%r31),hi2
+ ldws 4(%r31),lo2
+ ldws 8(%r31),hi3
+ ldws 12(%r31),lo3
+
+ addc lo0,cylimb,lo0
+ stws,ma s0,4(res_ptr)
+ addc lo1,hi0,lo1
+ stws,ma s1,4(res_ptr)
+ addc lo2,hi1,lo2
+ stws,ma s2,4(res_ptr)
+ addc lo3,hi2,lo3
+ stws,ma s3,4(res_ptr)
+
+ addib,>= -4,size,L$loop
+ addc %r0,hi3,cylimb ; propagate carry into cylimb
+; finish software pipeline ---------------------------------------------------
+L$end ldws 0(res_ptr),s0
+ ldws 4(res_ptr),s1
+ ldws 8(res_ptr),s2
+ ldws 12(res_ptr),s3
+
+ sub s0,lo0,s0
+ stws,ma s0,4(res_ptr)
+ subb s1,lo1,s1
+ stws,ma s1,4(res_ptr)
+ subb s2,lo2,s2
+ stws,ma s2,4(res_ptr)
+ subb s3,lo3,s3
+ stws,ma s3,4(res_ptr)
+ subb %r0,%r0,lo0 ; these two insns ...
+ add lo0,lo0,%r0 ; ... invert cy
+
+; restore callee-saves registers ---------------------------------------------
+ ldw -96(%r30),%r3
+ ldw -92(%r30),%r4
+ ldw -88(%r30),%r5
+ ldw -84(%r30),%r6
+ ldw -80(%r30),%r7
+
+L$few_limbs
+ addib,=,n 4,size,L$ret
+L$loop2 fldws,ma 4(s1_ptr),%fr4
+ ldws 0(res_ptr),s0
+ xmpyu %fr4,%fr31R,%fr5
+ fstds %fr5,-16(%r30)
+ ldws -16(%r30),hi0
+ ldws -12(%r30),lo0
+ addc lo0,cylimb,lo0
+ addc %r0,hi0,cylimb
+ sub s0,lo0,s0
+ add s0,lo0,%r0 ; invert cy
+ stws,ma s0,4(res_ptr)
+ addib,<> -1,size,L$loop2
+ nop
+
+L$ret addc %r0,cylimb,cylimb
+ bv 0(%r2)
+ ldo -128(%r30),%r30
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/submul_1.s b/rts/gmp/mpn/hppa/hppa1_1/submul_1.s
new file mode 100644
index 0000000000..20a5b5ce0a
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/submul_1.s
@@ -0,0 +1,111 @@
+; HP-PA-1.1 __gmpn_submul_1 -- Multiply a limb vector with a limb and
+; subtract the result from a second limb vector.
+
+; Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr r26
+; s1_ptr r25
+; size r24
+; s2_limb r23
+
+; This runs at 12 cycles/limb on a PA7000. With the used instructions, it
+; can not become faster due to data cache contention after a store. On the
+; PA7100 it runs at 11 cycles/limb, and that can not be improved either,
+; since only the xmpyu does not need the integer pipeline, so the only
+; dual-issue we will get are addc+xmpyu. Unrolling could gain a cycle/limb
+; on the PA7100.
+
+; There are some ideas described in mul_1.s that applies to this code too.
+
+; It seems possible to make this run as fast as __gmpn_addmul_1, if we use
+; sub,>>= %r29,%r19,%r22
+; addi 1,%r28,%r28
+; but that requires reworking the hairy software pipeline...
+
+ .code
+ .export __gmpn_submul_1
+__gmpn_submul_1
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+
+ ldo 64(%r30),%r30
+ fldws,ma 4(%r25),%fr5
+ stw %r23,-16(%r30) ; move s2_limb ...
+ addib,= -1,%r24,L$just_one_limb
+ fldws -16(%r30),%fr4 ; ... into fr4
+ add %r0,%r0,%r0 ; clear carry
+ xmpyu %fr4,%fr5,%fr6
+ fldws,ma 4(%r25),%fr7
+ fstds %fr6,-16(%r30)
+ xmpyu %fr4,%fr7,%fr8
+ ldw -12(%r30),%r19 ; least significant limb in product
+ ldw -16(%r30),%r28
+
+ fstds %fr8,-16(%r30)
+ addib,= -1,%r24,L$end
+ ldw -12(%r30),%r1
+
+; Main loop
+L$loop ldws 0(%r26),%r29
+ fldws,ma 4(%r25),%fr5
+ sub %r29,%r19,%r22
+ add %r22,%r19,%r0
+ stws,ma %r22,4(%r26)
+ addc %r28,%r1,%r19
+ xmpyu %fr4,%fr5,%fr6
+ ldw -16(%r30),%r28
+ fstds %fr6,-16(%r30)
+ addc %r0,%r28,%r28
+ addib,<> -1,%r24,L$loop
+ ldw -12(%r30),%r1
+
+L$end ldw 0(%r26),%r29
+ sub %r29,%r19,%r22
+ add %r22,%r19,%r0
+ stws,ma %r22,4(%r26)
+ addc %r28,%r1,%r19
+ ldw -16(%r30),%r28
+ ldws 0(%r26),%r29
+ addc %r0,%r28,%r28
+ sub %r29,%r19,%r22
+ add %r22,%r19,%r0
+ stws,ma %r22,4(%r26)
+ addc %r0,%r28,%r28
+ bv 0(%r2)
+ ldo -64(%r30),%r30
+
+L$just_one_limb
+ xmpyu %fr4,%fr5,%fr6
+ ldw 0(%r26),%r29
+ fstds %fr6,-16(%r30)
+ ldw -12(%r30),%r1
+ ldw -16(%r30),%r28
+ sub %r29,%r1,%r22
+ add %r22,%r1,%r0
+ stw %r22,0(%r26)
+ addc %r0,%r28,%r28
+ bv 0(%r2)
+ ldo -64(%r30),%r30
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/udiv_qrnnd.S b/rts/gmp/mpn/hppa/hppa1_1/udiv_qrnnd.S
new file mode 100644
index 0000000000..b83d6f4dd2
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/udiv_qrnnd.S
@@ -0,0 +1,80 @@
+; HP-PA __udiv_qrnnd division support, used from longlong.h.
+; This version runs fast on PA 7000 and later.
+
+; Copyright (C) 1993, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; rem_ptr gr26
+; n1 gr25
+; n0 gr24
+; d gr23
+
+ .code
+L$0000 .word 0x43f00000 ; 2^64
+ .word 0x0
+ .export __gmpn_udiv_qrnnd
+__gmpn_udiv_qrnnd
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+ ldo 64(%r30),%r30
+
+ stws %r25,-16(0,%r30) ; n_hi
+ stws %r24,-12(0,%r30) ; n_lo
+#ifdef PIC
+ addil LT%L$0000,%r19
+ ldo RT%L$0000(%r1),%r19
+#else
+ ldil L%L$0000,%r19
+ ldo R%L$0000(%r19),%r19
+#endif
+ fldds -16(0,%r30),%fr5
+ stws %r23,-12(0,%r30)
+ comib,<= 0,%r25,L$1
+ fcnvxf,dbl,dbl %fr5,%fr5
+ fldds 0(0,%r19),%fr4
+ fadd,dbl %fr4,%fr5,%fr5
+L$1
+ fcpy,sgl %fr0,%fr6L
+ fldws -12(0,%r30),%fr6R
+ fcnvxf,dbl,dbl %fr6,%fr4
+
+ fdiv,dbl %fr5,%fr4,%fr5
+
+ fcnvfx,dbl,dbl %fr5,%fr4
+ fstws %fr4R,-16(%r30)
+ xmpyu %fr4R,%fr6R,%fr6
+ ldws -16(%r30),%r28
+ fstds %fr6,-16(0,%r30)
+ ldws -12(0,%r30),%r21
+ ldws -16(0,%r30),%r20
+ sub %r24,%r21,%r22
+ subb %r25,%r20,%r19
+ comib,= 0,%r19,L$2
+ ldo -64(%r30),%r30
+
+ add %r22,%r23,%r22
+ ldo -1(%r28),%r28
+L$2 bv 0(%r2)
+ stws %r22,0(0,%r26)
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa1_1/umul.s b/rts/gmp/mpn/hppa/hppa1_1/umul.s
new file mode 100644
index 0000000000..1f1300ac9b
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa1_1/umul.s
@@ -0,0 +1,42 @@
+; Copyright (C) 1999 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+ .code
+ .export __umul_ppmm
+ .align 4
+__umul_ppmm
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+
+ ldo 64(%r30),%r30
+ stw %r25,-16(0,%r30)
+ fldws -16(0,%r30),%fr22R
+ stw %r24,-16(0,%r30)
+ fldws -16(0,%r30),%fr22L
+ xmpyu %fr22R,%fr22L,%fr22
+ fstds %fr22,-16(0,%r30)
+ ldw -16(0,%r30),%r28
+ ldw -12(0,%r30),%r29
+ stw %r29,0(0,%r26)
+ bv 0(%r2)
+ ldo -64(%r30),%r30
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa2_0/add_n.s b/rts/gmp/mpn/hppa/hppa2_0/add_n.s
new file mode 100644
index 0000000000..6e97278a39
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa2_0/add_n.s
@@ -0,0 +1,88 @@
+; HP-PA 2.0 32-bit __gmpn_add_n -- Add two limb vectors of the same length > 0
+; and store sum in a third limb vector.
+
+; Copyright (C) 1997, 1998, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+; This runs at 2 cycles/limb on PA8000.
+
+ .code
+ .export __gmpn_add_n
+__gmpn_add_n
+ .proc
+ .callinfo frame=0,no_calls
+ .entry
+
+ sub %r0,%r23,%r22
+ zdep %r22,30,3,%r28 ; r28 = 2 * (-n & 7)
+ zdep %r22,29,3,%r22 ; r22 = 4 * (-n & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ sub %r24,%r22,%r24 ; offset s2_ptr
+ sub %r26,%r22,%r26 ; offset res_ptr
+ blr %r28,%r0 ; branch into loop
+ add %r0,%r0,%r0 ; reset carry
+
+L$loop ldw 0(%r25),%r20
+ ldw 0(%r24),%r31
+ addc %r20,%r31,%r20
+ stw %r20,0(%r26)
+L$7 ldw 4(%r25),%r21
+ ldw 4(%r24),%r19
+ addc %r21,%r19,%r21
+ stw %r21,4(%r26)
+L$6 ldw 8(%r25),%r20
+ ldw 8(%r24),%r31
+ addc %r20,%r31,%r20
+ stw %r20,8(%r26)
+L$5 ldw 12(%r25),%r21
+ ldw 12(%r24),%r19
+ addc %r21,%r19,%r21
+ stw %r21,12(%r26)
+L$4 ldw 16(%r25),%r20
+ ldw 16(%r24),%r31
+ addc %r20,%r31,%r20
+ stw %r20,16(%r26)
+L$3 ldw 20(%r25),%r21
+ ldw 20(%r24),%r19
+ addc %r21,%r19,%r21
+ stw %r21,20(%r26)
+L$2 ldw 24(%r25),%r20
+ ldw 24(%r24),%r31
+ addc %r20,%r31,%r20
+ stw %r20,24(%r26)
+L$1 ldw 28(%r25),%r21
+ ldo 32(%r25),%r25
+ ldw 28(%r24),%r19
+ addc %r21,%r19,%r21
+ stw %r21,28(%r26)
+ ldo 32(%r24),%r24
+ addib,> -8,%r23,L$loop
+ ldo 32(%r26),%r26
+
+ bv (%r2)
+ .exit
+ addc %r0,%r0,%r28
+ .procend
diff --git a/rts/gmp/mpn/hppa/hppa2_0/sub_n.s b/rts/gmp/mpn/hppa/hppa2_0/sub_n.s
new file mode 100644
index 0000000000..7d9b50fc27
--- /dev/null
+++ b/rts/gmp/mpn/hppa/hppa2_0/sub_n.s
@@ -0,0 +1,88 @@
+; HP-PA 2.0 32-bit __gmpn_sub_n -- Subtract two limb vectors of the same
+; length > 0 and store difference in a third limb vector.
+
+; Copyright (C) 1997, 1998, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+; This runs at 2 cycles/limb on PA8000.
+
+ .code
+ .export __gmpn_sub_n
+__gmpn_sub_n
+ .proc
+ .callinfo frame=0,no_calls
+ .entry
+
+ sub %r0,%r23,%r22
+ zdep %r22,30,3,%r28 ; r28 = 2 * (-n & 7)
+ zdep %r22,29,3,%r22 ; r22 = 4 * (-n & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ sub %r24,%r22,%r24 ; offset s2_ptr
+ blr %r28,%r0 ; branch into loop
+ sub %r26,%r22,%r26 ; offset res_ptr and set carry
+
+L$loop ldw 0(%r25),%r20
+ ldw 0(%r24),%r31
+ subb %r20,%r31,%r20
+ stw %r20,0(%r26)
+L$7 ldw 4(%r25),%r21
+ ldw 4(%r24),%r19
+ subb %r21,%r19,%r21
+ stw %r21,4(%r26)
+L$6 ldw 8(%r25),%r20
+ ldw 8(%r24),%r31
+ subb %r20,%r31,%r20
+ stw %r20,8(%r26)
+L$5 ldw 12(%r25),%r21
+ ldw 12(%r24),%r19
+ subb %r21,%r19,%r21
+ stw %r21,12(%r26)
+L$4 ldw 16(%r25),%r20
+ ldw 16(%r24),%r31
+ subb %r20,%r31,%r20
+ stw %r20,16(%r26)
+L$3 ldw 20(%r25),%r21
+ ldw 20(%r24),%r19
+ subb %r21,%r19,%r21
+ stw %r21,20(%r26)
+L$2 ldw 24(%r25),%r20
+ ldw 24(%r24),%r31
+ subb %r20,%r31,%r20
+ stw %r20,24(%r26)
+L$1 ldw 28(%r25),%r21
+ ldo 32(%r25),%r25
+ ldw 28(%r24),%r19
+ subb %r21,%r19,%r21
+ stw %r21,28(%r26)
+ ldo 32(%r24),%r24
+ addib,> -8,%r23,L$loop
+ ldo 32(%r26),%r26
+
+ addc %r0,%r0,%r28
+ bv (%r2)
+ .exit
+ subi 1,%r28,%r28
+ .procend
diff --git a/rts/gmp/mpn/hppa/lshift.s b/rts/gmp/mpn/hppa/lshift.s
new file mode 100644
index 0000000000..f5a2daad60
--- /dev/null
+++ b/rts/gmp/mpn/hppa/lshift.s
@@ -0,0 +1,66 @@
+; HP-PA __gmpn_lshift --
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s_ptr gr25
+; size gr24
+; cnt gr23
+
+ .code
+ .export __gmpn_lshift
+__gmpn_lshift
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+
+ sh2add %r24,%r25,%r25
+ sh2add %r24,%r26,%r26
+ ldws,mb -4(0,%r25),%r22
+ subi 32,%r23,%r1
+ mtsar %r1
+ addib,= -1,%r24,L$0004
+ vshd %r0,%r22,%r28 ; compute carry out limb
+ ldws,mb -4(0,%r25),%r29
+ addib,= -1,%r24,L$0002
+ vshd %r22,%r29,%r20
+
+L$loop ldws,mb -4(0,%r25),%r22
+ stws,mb %r20,-4(0,%r26)
+ addib,= -1,%r24,L$0003
+ vshd %r29,%r22,%r20
+ ldws,mb -4(0,%r25),%r29
+ stws,mb %r20,-4(0,%r26)
+ addib,<> -1,%r24,L$loop
+ vshd %r22,%r29,%r20
+
+L$0002 stws,mb %r20,-4(0,%r26)
+ vshd %r29,%r0,%r20
+ bv 0(%r2)
+ stw %r20,-4(0,%r26)
+L$0003 stws,mb %r20,-4(0,%r26)
+L$0004 vshd %r22,%r0,%r20
+ bv 0(%r2)
+ stw %r20,-4(0,%r26)
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/rshift.s b/rts/gmp/mpn/hppa/rshift.s
new file mode 100644
index 0000000000..e05e2f10b5
--- /dev/null
+++ b/rts/gmp/mpn/hppa/rshift.s
@@ -0,0 +1,63 @@
+; HP-PA __gmpn_rshift --
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s_ptr gr25
+; size gr24
+; cnt gr23
+
+ .code
+ .export __gmpn_rshift
+__gmpn_rshift
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+
+ ldws,ma 4(0,%r25),%r22
+ mtsar %r23
+ addib,= -1,%r24,L$0004
+ vshd %r22,%r0,%r28 ; compute carry out limb
+ ldws,ma 4(0,%r25),%r29
+ addib,= -1,%r24,L$0002
+ vshd %r29,%r22,%r20
+
+L$loop ldws,ma 4(0,%r25),%r22
+ stws,ma %r20,4(0,%r26)
+ addib,= -1,%r24,L$0003
+ vshd %r22,%r29,%r20
+ ldws,ma 4(0,%r25),%r29
+ stws,ma %r20,4(0,%r26)
+ addib,<> -1,%r24,L$loop
+ vshd %r29,%r22,%r20
+
+L$0002 stws,ma %r20,4(0,%r26)
+ vshd %r0,%r29,%r20
+ bv 0(%r2)
+ stw %r20,0(0,%r26)
+L$0003 stws,ma %r20,4(0,%r26)
+L$0004 vshd %r0,%r22,%r20
+ bv 0(%r2)
+ stw %r20,0(0,%r26)
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/sub_n.s b/rts/gmp/mpn/hppa/sub_n.s
new file mode 100644
index 0000000000..8f770ad1ad
--- /dev/null
+++ b/rts/gmp/mpn/hppa/sub_n.s
@@ -0,0 +1,59 @@
+; HP-PA __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+; One might want to unroll this as for other processors, but it turns
+; out that the data cache contention after a store makes such
+; unrolling useless. We can't come under 5 cycles/limb anyway.
+
+ .code
+ .export __gmpn_sub_n
+__gmpn_sub_n
+ .proc
+ .callinfo frame=0,no_calls
+ .entry
+
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+
+ addib,= -1,%r23,L$end ; check for (SIZE == 1)
+ sub %r20,%r19,%r28 ; subtract first limbs ignoring cy
+
+L$loop ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addib,<> -1,%r23,L$loop
+ subb %r20,%r19,%r28
+
+L$end stws %r28,0(0,%r26)
+ addc %r0,%r0,%r28
+ bv 0(%r2)
+ subi 1,%r28,%r28
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/hppa/udiv_qrnnd.s b/rts/gmp/mpn/hppa/udiv_qrnnd.s
new file mode 100644
index 0000000000..9aa3b8a830
--- /dev/null
+++ b/rts/gmp/mpn/hppa/udiv_qrnnd.s
@@ -0,0 +1,286 @@
+; HP-PA __udiv_qrnnd division support, used from longlong.h.
+; This version runs fast on pre-PA7000 CPUs.
+
+; Copyright (C) 1993, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; rem_ptr gr26
+; n1 gr25
+; n0 gr24
+; d gr23
+
+; The code size is a bit excessive. We could merge the last two ds;addc
+; sequences by simply moving the "bb,< Odd" instruction down. The only
+; trouble is the FFFFFFFF code that would need some hacking.
+
+ .code
+ .export __gmpn_udiv_qrnnd
+__gmpn_udiv_qrnnd
+ .proc
+ .callinfo frame=0,no_calls
+ .entry
+
+ comb,< %r23,0,L$largedivisor
+ sub %r0,%r23,%r1 ; clear cy as side-effect
+ ds %r0,%r1,%r0
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r28
+ ds %r25,%r23,%r25
+ comclr,>= %r25,%r0,%r0
+ addl %r25,%r23,%r25
+ stws %r25,0(0,%r26)
+ bv 0(%r2)
+ addc %r28,%r28,%r28
+
+L$largedivisor
+ extru %r24,31,1,%r19 ; r19 = n0 & 1
+ bb,< %r23,31,L$odd
+ extru %r23,30,31,%r22 ; r22 = d >> 1
+ shd %r25,%r24,1,%r24 ; r24 = new n0
+ extru %r25,30,31,%r25 ; r25 = new n1
+ sub %r0,%r22,%r21
+ ds %r0,%r21,%r0
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ comclr,>= %r25,%r0,%r0
+ addl %r25,%r22,%r25
+ sh1addl %r25,%r19,%r25
+ stws %r25,0(0,%r26)
+ bv 0(%r2)
+ addc %r24,%r24,%r28
+
+L$odd addib,sv,n 1,%r22,L$FF.. ; r22 = (d / 2 + 1)
+ shd %r25,%r24,1,%r24 ; r24 = new n0
+ extru %r25,30,31,%r25 ; r25 = new n1
+ sub %r0,%r22,%r21
+ ds %r0,%r21,%r0
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r28
+ comclr,>= %r25,%r0,%r0
+ addl %r25,%r22,%r25
+ sh1addl %r25,%r19,%r25
+; We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25
+ add,nuv %r28,%r25,%r25
+ addl %r25,%r1,%r25
+ addc %r0,%r28,%r28
+ sub,<< %r25,%r23,%r0
+ addl %r25,%r1,%r25
+ stws %r25,0(0,%r26)
+ bv 0(%r2)
+ addc %r0,%r28,%r28
+
+; This is just a special case of the code above.
+; We come here when d == 0xFFFFFFFF
+L$FF.. add,uv %r25,%r24,%r24
+ sub,<< %r24,%r23,%r0
+ ldo 1(%r24),%r24
+ stws %r24,0(0,%r26)
+ bv 0(%r2)
+ addc %r0,%r25,%r28
+
+ .exit
+ .procend
diff --git a/rts/gmp/mpn/i960/README b/rts/gmp/mpn/i960/README
new file mode 100644
index 0000000000..d68a0a83eb
--- /dev/null
+++ b/rts/gmp/mpn/i960/README
@@ -0,0 +1,9 @@
+This directory contains mpn functions for Intel i960 processors.
+
+RELEVANT OPTIMIZATION ISSUES
+
+The code in this directory is not well optimized.
+
+STATUS
+
+The code in this directory has not been tested.
diff --git a/rts/gmp/mpn/i960/add_n.s b/rts/gmp/mpn/i960/add_n.s
new file mode 100644
index 0000000000..387317a397
--- /dev/null
+++ b/rts/gmp/mpn/i960/add_n.s
@@ -0,0 +1,43 @@
+# I960 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+# sum in a third limb vector.
+
+# Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+.text
+ .align 4
+ .globl ___gmpn_add_n
+___gmpn_add_n:
+ mov 0,g6 # clear carry-save register
+ cmpo 1,0 # clear cy
+
+Loop: subo 1,g3,g3 # update loop counter
+ ld (g1),g5 # load from s1_ptr
+ addo 4,g1,g1 # s1_ptr++
+ ld (g2),g4 # load from s2_ptr
+ addo 4,g2,g2 # s2_ptr++
+ cmpo g6,1 # restore cy from g6, relies on cy being 0
+ addc g4,g5,g4 # main add
+ subc 0,0,g6 # save cy in g6
+ st g4,(g0) # store result to res_ptr
+ addo 4,g0,g0 # res_ptr++
+ cmpobne 0,g3,Loop # when branch is taken, clears C bit
+
+ mov g6,g0
+ ret
diff --git a/rts/gmp/mpn/i960/addmul_1.s b/rts/gmp/mpn/i960/addmul_1.s
new file mode 100644
index 0000000000..7df1418356
--- /dev/null
+++ b/rts/gmp/mpn/i960/addmul_1.s
@@ -0,0 +1,48 @@
+# I960 __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+.text
+ .align 4
+ .globl ___gmpn_mul_1
+___gmpn_mul_1:
+ subo g2,0,g2
+ shlo 2,g2,g4
+ subo g4,g1,g1
+ subo g4,g0,g13
+ mov 0,g0
+
+ cmpo 1,0 # clear C bit on AC.cc
+
+Loop: ld (g1)[g2*4],g5
+ emul g3,g5,g6
+ ld (g13)[g2*4],g5
+
+ addc g0,g6,g6 # relies on that C bit is clear
+ addc 0,g7,g7
+ addc g5,g6,g6 # relies on that C bit is clear
+ st g6,(g13)[g2*4]
+ addc 0,g7,g0
+
+ addo g2,1,g2
+ cmpobne 0,g2,Loop # when branch is taken, clears C bit
+
+ ret
diff --git a/rts/gmp/mpn/i960/mul_1.s b/rts/gmp/mpn/i960/mul_1.s
new file mode 100644
index 0000000000..5c0c985aa5
--- /dev/null
+++ b/rts/gmp/mpn/i960/mul_1.s
@@ -0,0 +1,45 @@
+# I960 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+.text
+ .align 4
+ .globl ___gmpn_mul_1
+___gmpn_mul_1:
+ subo g2,0,g2
+ shlo 2,g2,g4
+ subo g4,g1,g1
+ subo g4,g0,g13
+ mov 0,g0
+
+ cmpo 1,0 # clear C bit on AC.cc
+
+Loop: ld (g1)[g2*4],g5
+ emul g3,g5,g6
+
+ addc g0,g6,g6 # relies on that C bit is clear
+ st g6,(g13)[g2*4]
+ addc 0,g7,g0
+
+ addo g2,1,g2
+ cmpobne 0,g2,Loop # when branch is taken, clears C bit
+
+ ret
diff --git a/rts/gmp/mpn/i960/sub_n.s b/rts/gmp/mpn/i960/sub_n.s
new file mode 100644
index 0000000000..2db2d46aad
--- /dev/null
+++ b/rts/gmp/mpn/i960/sub_n.s
@@ -0,0 +1,43 @@
+# I960 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+# store difference in a third limb vector.
+
+# Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+.text
+ .align 4
+ .globl ___gmpn_sub_n
+___gmpn_sub_n:
+ mov 1,g6 # set carry-save register
+ cmpo 1,0 # clear cy
+
+Loop: subo 1,g3,g3 # update loop counter
+ ld (g1),g5 # load from s1_ptr
+ addo 4,g1,g1 # s1_ptr++
+ ld (g2),g4 # load from s2_ptr
+ addo 4,g2,g2 # s2_ptr++
+ cmpo g6,1 # restore cy from g6, relies on cy being 0
+ subc g4,g5,g4 # main subtract
+ subc 0,0,g6 # save cy in g6
+ st g4,(g0) # store result to res_ptr
+ addo 4,g0,g0 # res_ptr++
+ cmpobne 0,g3,Loop # when branch is taken, cy will be 0
+
+ mov g6,g0
+ ret
diff --git a/rts/gmp/mpn/lisp/gmpasm-mode.el b/rts/gmp/mpn/lisp/gmpasm-mode.el
new file mode 100644
index 0000000000..5d9da7fa1f
--- /dev/null
+++ b/rts/gmp/mpn/lisp/gmpasm-mode.el
@@ -0,0 +1,351 @@
+;;; gmpasm-mode.el -- GNU MP asm and m4 editing mode.
+
+
+;; Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+;;
+;; This file is part of the GNU MP Library.
+;;
+;; The GNU MP Library is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU Lesser General Public License as published by
+;; the Free Software Foundation; either version 2.1 of the License, or (at your
+;; option) any later version.
+;;
+;; The GNU MP Library is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU Lesser General Public License
+;; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+;; MA 02111-1307, USA.
+
+
+;;; Commentary:
+;;
+;; gmpasm-mode is an editing mode for m4 processed assembler code and m4
+;; macro files in GMP. It's similar to m4-mode, but has a number of
+;; settings better suited to GMP.
+;;
+;;
+;; Install
+;; -------
+;;
+;; To make M-x gmpasm-mode available, put gmpasm-mode.el somewhere in the
+;; load-path and the following in .emacs
+;;
+;; (autoload 'gmpasm-mode "gmpasm-mode" nil t)
+;;
+;; To use gmpasm-mode automatically on all .asm and .m4 files, put the
+;; following in .emacs
+;;
+;; (add-to-list 'auto-mode-alist '("\\.asm\\'" . gmpasm-mode))
+;; (add-to-list 'auto-mode-alist '("\\.m4\\'" . gmpasm-mode))
+;;
+;; To have gmpasm-mode only on gmp files, try instead something like the
+;; following, which uses it only in a directory starting with "gmp", or a
+;; sub-directory of such.
+;;
+;; (add-to-list 'auto-mode-alist
+;; '("/gmp.*/.*\\.\\(asm\\|m4\\)\\'" . gmpasm-mode))
+;;
+;; Byte compiling will slightly speed up loading. If you want a docstring
+;; in the autoload you can use M-x update-file-autoloads if you set it up
+;; right.
+;;
+;;
+;; Emacsen
+;; -------
+;;
+;; FSF Emacs 20.x - gmpasm-mode is designed for this.
+;; XEmacs 20.x - seems to work.
+;;
+;; FSF Emacs 19.x - should work if replacements for some 20.x-isms are
+;; available. comment-region with "C" won't really do the right thing
+;; though.
+
+
+;;; Code:
+
+(defgroup gmpasm nil
+ "GNU MP m4 and asm editing."
+ :prefix "gmpasm-"
+ :group 'languages)
+
+(defcustom gmpasm-mode-hook nil
+ "*Hook called by `gmpasm-mode'."
+ :type 'hook
+ :group 'gmpasm)
+
+(defcustom gmpasm-comment-start-regexp "[#;!@C]"
+ "*Regexp matching possible comment styles.
+See `gmpasm-mode' docstring for how this is used."
+ :type 'regexp
+ :group 'gmpasm)
+
+
+(defun gmpasm-add-to-list-second (list-var element)
+ "(gmpasm-add-to-list-second LIST-VAR ELEMENT)
+
+Add ELEMENT to LIST-VAR as the second element in the list, if it isn't
+already in the list. If LIST-VAR is nil, then ELEMENT is just added as the
+sole element in the list.
+
+This is like `add-to-list', but it puts the new value second in the list.
+
+The first cons cell is copied rather than changed in-place, so references to
+the list elsewhere won't be affected."
+
+ (if (member element (symbol-value list-var))
+ (symbol-value list-var)
+ (set list-var
+ (if (symbol-value list-var)
+ (cons (car (symbol-value list-var))
+ (cons element
+ (cdr (symbol-value list-var))))
+ (list element)))))
+
+
+(defun gmpasm-delete-from-list (list-var element)
+ "(gmpasm-delete-from-list LIST-VAR ELEMENT)
+
+Delete ELEMENT from LIST-VAR, using `delete'.
+This is like `add-to-list', but the element is deleted from the list.
+The list is copied rather than changed in-place, so references to it elsewhere
+won't be affected."
+
+ (set list-var (delete element (copy-sequence (symbol-value list-var)))))
+
+
+(defvar gmpasm-mode-map
+ (let ((map (make-sparse-keymap)))
+
+ ;; assembler and dnl commenting
+ (define-key map "\C-c\C-c" 'comment-region)
+ (define-key map "\C-c\C-d" 'gmpasm-comment-region-dnl)
+
+ ;; kill an M-x compile, since it's not hard to put m4 into an infinite
+ ;; loop
+ (define-key map "\C-c\C-k" 'kill-compilation)
+
+ map)
+ "Keymap for `gmpasm-mode'.")
+
+
+(defvar gmpasm-mode-syntax-table
+ (let ((table (make-syntax-table)))
+ ;; underscore left as a symbol char, like C mode
+
+ ;; m4 quotes
+ (modify-syntax-entry ?` "('" table)
+ (modify-syntax-entry ?' ")`" table)
+
+ table)
+ "Syntax table used in `gmpasm-mode'.
+
+m4 ignores quote marks in # comments at the top level, but inside quotes #
+isn't special and all quotes are active. There seems no easy way to express
+this in the syntax table, so nothing is done for comments. Usually this is
+best, since it picks up invalid apostrophes in comments inside quotes.")
+
+
+(defvar gmpasm-font-lock-keywords
+ (eval-when-compile
+ (list
+ (cons
+ (concat
+ "\\b"
+ (regexp-opt
+ '("deflit" "defreg" "defframe" "defframe_pushl"
+ "define_not_for_expansion"
+ "ASM_START" "ASM_END" "PROLOGUE" "EPILOGUE"
+ "forloop"
+ "TEXT" "DATA" "ALIGN" "W32"
+ "builtin" "changecom" "changequote" "changeword" "debugfile"
+ "debugmode" "decr" "define" "defn" "divert" "divnum" "dumpdef"
+ "errprint" "esyscmd" "eval" "__file__" "format" "gnu" "ifdef"
+ "ifelse" "include" "incr" "index" "indir" "len" "__line__"
+ "m4exit" "m4wrap" "maketemp" "patsubst" "popdef" "pushdef"
+ "regexp" "shift" "sinclude" "substr" "syscmd" "sysval"
+ "traceoff" "traceon" "translit" "undefine" "undivert" "unix")
+ t)
+ "\\b") 'font-lock-keyword-face)))
+
+ "`font-lock-keywords' for `gmpasm-mode'.
+
+The keywords are m4 builtins and some of the GMP macros used in asm files.
+L and LF don't look good fontified, so they're omitted.
+
+The right assembler comment regexp is added dynamically buffer-local (with
+dnl too).")
+
+
+;; Initialized if gmpasm-mode finds filladapt loaded.
+(defvar gmpasm-filladapt-token-table nil
+ "Filladapt token table used in `gmpasm-mode'.")
+(defvar gmpasm-filladapt-token-match-table nil
+ "Filladapt token match table used in `gmpasm-mode'.")
+(defvar gmpasm-filladapt-token-conversion-table nil
+ "Filladapt token conversion table used in `gmpasm-mode'.")
+
+
+;;;###autoload
+(defun gmpasm-mode ()
+ "A major mode for editing GNU MP asm and m4 files.
+
+\\{gmpasm-mode-map}
+`comment-start' and `comment-end' are set buffer-local to assembler
+commenting appropriate for the CPU by looking for something matching
+`gmpasm-comment-start-regexp' at the start of a line, or \"#\" is used if
+there's no match (if \"#\" isn't what you want, type in a desired comment
+and do \\[gmpasm-mode] to reinitialize).
+
+`adaptive-fill-regexp' is set buffer-local to the standard regexp with
+`comment-start' and dnl added. If filladapt.el has been loaded it similarly
+gets `comment-start' and dnl added as buffer-local fill prefixes.
+
+Font locking has the m4 builtins, some of the GMP macros, m4 dnl commenting,
+and assembler commenting (based on the `comment-start' determined).
+
+Note that `gmpasm-comment-start-regexp' is only matched as a whole word, so
+the `C' in it is only matched as a whole word, not on something that happens
+to start with `C'. Also it's only the particular `comment-start' determined
+that's added for filling etc, not the whole `gmpasm-comment-start-regexp'.
+
+`gmpasm-mode-hook' is run after initializations are complete.
+"
+
+ (interactive)
+ (kill-all-local-variables)
+ (setq major-mode 'gmpasm-mode
+ mode-name "gmpasm")
+ (use-local-map gmpasm-mode-map)
+ (set-syntax-table gmpasm-mode-syntax-table)
+ (setq fill-column 76)
+
+ ;; Short instructions might fit with 32, but anything with labels or
+ ;; expressions soon needs the comments pushed out to column 40.
+ (setq comment-column 40)
+
+ ;; Don't want to find out the hard way which dumb assemblers don't like a
+ ;; missing final newline.
+ (set (make-local-variable 'require-final-newline) t)
+
+ ;; The first match of gmpasm-comment-start-regexp at the start of a line
+ ;; determines comment-start, or "#" if no match.
+ (set (make-local-variable 'comment-start)
+ (save-excursion
+ (goto-char (point-min))
+ (if (re-search-forward
+ (concat "^\\(" gmpasm-comment-start-regexp "\\)\\(\\s-\\|$\\)")
+ nil t)
+ (match-string 1)
+ "#")))
+ (set (make-local-variable 'comment-end) "")
+
+ ;; If comment-start ends in an alphanumeric then \b is used to match it
+ ;; only as a separate word. The test is for an alphanumeric rather than
+ ;; \w since we might try # or ! as \w characters but without wanting \b.
+ (let ((comment-regexp
+ (concat (regexp-quote comment-start)
+ (if (string-match "[a-zA-Z0-9]\\'" comment-start) "\\b"))))
+
+ ;; Whitespace is required before a comment-start so m4 $# doesn't match
+ ;; when comment-start is "#".
+ ;; Only spaces or tabs match after, so newline isn't included in the
+ ;; font lock below.
+ (set (make-local-variable 'comment-start-skip)
+ (concat "\\(^\\|\\s-\\)" comment-regexp "[ \t]*"))
+
+ ;; Comment fontification based on comment-start, matching through to the
+ ;; end of the line.
+ (add-to-list (make-local-variable 'gmpasm-font-lock-keywords)
+ (cons (concat
+ "\\(\\bdnl\\b\\|" comment-start-skip "\\).*$")
+ 'font-lock-comment-face))
+
+ (set (make-local-variable 'font-lock-defaults)
+ '(gmpasm-font-lock-keywords
+ t ; no syntactic fontification (of strings etc)
+ nil ; no case-fold
+ ((?_ . "w")) ; _ part of a word while fontifying
+ ))
+
+ ;; Paragraphs are separated by blank lines, or lines with only dnl or
+ ;; comment-start.
+ (set (make-local-variable 'paragraph-separate)
+ (concat "[ \t\f]*\\(\\(" comment-regexp "\\|dnl\\)[ \t]*\\)*$"))
+ (set (make-local-variable 'paragraph-start)
+ (concat "\f\\|" paragraph-separate))
+
+ ;; Adaptive fill gets dnl and comment-start as comment style prefixes on
+ ;; top of the standard regexp (which has # and ; already actually).
+ (set (make-local-variable 'adaptive-fill-regexp)
+ (concat "[ \t]*\\(\\("
+ comment-regexp
+ "\\|dnl\\|[-|#;>*]+\\|(?[0-9]+[.)]\\)[ \t]*\\)*"))
+ (set (make-local-variable 'adaptive-fill-first-line-regexp)
+ "\\`\\([ \t]*dnl\\)?[ \t]*\\'")
+
+ (when (fboundp 'filladapt-mode)
+ (when (not gmpasm-filladapt-token-table)
+ (setq gmpasm-filladapt-token-table
+ filladapt-token-table)
+ (setq gmpasm-filladapt-token-match-table
+ filladapt-token-match-table)
+ (setq gmpasm-filladapt-token-conversion-table
+ filladapt-token-conversion-table)
+
+ ;; Numbered bullet points like "2.1" get matched at the start of a
+ ;; line when it's really something like "2.1 cycles/limb", so delete
+ ;; this from the list. The regexp for "1.", "2." etc is left
+ ;; though.
+ (gmpasm-delete-from-list 'gmpasm-filladapt-token-table
+ '("[0-9]+\\(\\.[0-9]+\\)+[ \t]"
+ bullet))
+
+ ;; "%" as a comment prefix interferes with x86 register names
+ ;; like %eax, so delete this.
+ (gmpasm-delete-from-list 'gmpasm-filladapt-token-table
+ '("%+" postscript-comment))
+
+ (add-to-list 'gmpasm-filladapt-token-match-table
+ '(gmpasm-comment gmpasm-comment))
+ (add-to-list 'gmpasm-filladapt-token-conversion-table
+ '(gmpasm-comment . exact))
+ )
+
+ (set (make-local-variable 'filladapt-token-table)
+ gmpasm-filladapt-token-table)
+ (set (make-local-variable 'filladapt-token-match-table)
+ gmpasm-filladapt-token-match-table)
+ (set (make-local-variable 'filladapt-token-conversion-table)
+ gmpasm-filladapt-token-conversion-table)
+
+ ;; Add dnl and comment-start as fill prefixes.
+ ;; Comments in filladapt.el say filladapt-token-table must begin
+ ;; with ("^" beginning-of-line), so put our addition second.
+ (gmpasm-add-to-list-second 'filladapt-token-table
+ (list (concat "dnl[ \t]\\|" comment-regexp)
+ 'gmpasm-comment))
+ ))
+
+ (run-hooks 'gmpasm-mode-hook))
+
+
+(defun gmpasm-comment-region-dnl (beg end &optional arg)
+ "(gmpasm-comment-region BEG END &option ARG)
+
+Comment or uncomment each line in the region using `dnl'.
+With \\[universal-argument] prefix arg, uncomment each line in region.
+This is `comment-region', but using \"dnl\"."
+
+ (interactive "r\nP")
+ (let ((comment-start "dnl")
+ (comment-end ""))
+ (comment-region beg end arg)))
+
+
+(provide 'gmpasm-mode)
+
+;;; gmpasm-mode.el ends here
diff --git a/rts/gmp/mpn/m68k/add_n.S b/rts/gmp/mpn/m68k/add_n.S
new file mode 100644
index 0000000000..9e1d89d64f
--- /dev/null
+++ b/rts/gmp/mpn/m68k/add_n.S
@@ -0,0 +1,79 @@
+/* mc68020 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+ sum in a third limb vector.
+
+Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ s2_ptr (sp + 16)
+ size (sp + 12)
+*/
+
+#include "asm-syntax.h"
+
+ TEXT
+ ALIGN
+ GLOBL C_SYMBOL_NAME(__gmpn_add_n)
+
+C_SYMBOL_NAME(__gmpn_add_n:)
+PROLOG(__gmpn_add_n)
+/* Save used registers on the stack. */
+ movel R(d2),MEM_PREDEC(sp)
+ movel R(a2),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers. Better use movem? */
+ movel MEM_DISP(sp,12),R(a2)
+ movel MEM_DISP(sp,16),R(a0)
+ movel MEM_DISP(sp,20),R(a1)
+ movel MEM_DISP(sp,24),R(d2)
+
+ eorw #1,R(d2)
+ lsrl #1,R(d2)
+ bcc L(L1)
+ subql #1,R(d2) /* clears cy as side effect */
+
+L(Loop:)
+ movel MEM_POSTINC(a0),R(d0)
+ movel MEM_POSTINC(a1),R(d1)
+ addxl R(d1),R(d0)
+ movel R(d0),MEM_POSTINC(a2)
+L(L1:) movel MEM_POSTINC(a0),R(d0)
+ movel MEM_POSTINC(a1),R(d1)
+ addxl R(d1),R(d0)
+ movel R(d0),MEM_POSTINC(a2)
+
+ dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */
+ subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */
+ subl #0x10000,R(d2)
+ bcs L(L2)
+ addl R(d0),R(d0) /* restore cy */
+ bra L(Loop)
+
+L(L2:)
+ negl R(d0)
+
+/* Restore used registers from stack frame. */
+ movel MEM_POSTINC(sp),R(a2)
+ movel MEM_POSTINC(sp),R(d2)
+
+ rts
+EPILOG(__gmpn_add_n)
diff --git a/rts/gmp/mpn/m68k/lshift.S b/rts/gmp/mpn/m68k/lshift.S
new file mode 100644
index 0000000000..a539d5d42e
--- /dev/null
+++ b/rts/gmp/mpn/m68k/lshift.S
@@ -0,0 +1,150 @@
+/* mc68020 __gmpn_lshift -- Shift left a low-level natural-number integer.
+
+Copyright (C) 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s_ptr (sp + 8)
+ s_size (sp + 16)
+ cnt (sp + 12)
+*/
+
+#include "asm-syntax.h"
+
+#define res_ptr a1
+#define s_ptr a0
+#define s_size d6
+#define cnt d4
+
+ TEXT
+ ALIGN
+ GLOBL C_SYMBOL_NAME(__gmpn_lshift)
+
+C_SYMBOL_NAME(__gmpn_lshift:)
+PROLOG(__gmpn_lshift)
+
+/* Save used registers on the stack. */
+ moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers. */
+ movel MEM_DISP(sp,28),R(res_ptr)
+ movel MEM_DISP(sp,32),R(s_ptr)
+ movel MEM_DISP(sp,36),R(s_size)
+ movel MEM_DISP(sp,40),R(cnt)
+
+ moveql #1,R(d5)
+ cmpl R(d5),R(cnt)
+ bne L(Lnormal)
+ cmpl R(s_ptr),R(res_ptr)
+ bls L(Lspecial) /* jump if s_ptr >= res_ptr */
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+ lea MEM_INDX1(s_ptr,s_size,l,4),R(a2)
+#else /* not mc68020 */
+ movel R(s_size),R(d0)
+ asll #2,R(d0)
+ lea MEM_INDX(s_ptr,d0,l),R(a2)
+#endif
+ cmpl R(res_ptr),R(a2)
+ bls L(Lspecial) /* jump if res_ptr >= s_ptr + s_size */
+
+L(Lnormal:)
+ moveql #32,R(d5)
+ subl R(cnt),R(d5)
+
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+ lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
+ lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
+#else /* not mc68000 */
+ movel R(s_size),R(d0)
+ asll #2,R(d0)
+ addl R(s_size),R(s_ptr)
+ addl R(s_size),R(res_ptr)
+#endif
+ movel MEM_PREDEC(s_ptr),R(d2)
+ movel R(d2),R(d0)
+ lsrl R(d5),R(d0) /* compute carry limb */
+
+ lsll R(cnt),R(d2)
+ movel R(d2),R(d1)
+ subql #1,R(s_size)
+ beq L(Lend)
+ lsrl #1,R(s_size)
+ bcs L(L1)
+ subql #1,R(s_size)
+
+L(Loop:)
+ movel MEM_PREDEC(s_ptr),R(d2)
+ movel R(d2),R(d3)
+ lsrl R(d5),R(d3)
+ orl R(d3),R(d1)
+ movel R(d1),MEM_PREDEC(res_ptr)
+ lsll R(cnt),R(d2)
+L(L1:)
+ movel MEM_PREDEC(s_ptr),R(d1)
+ movel R(d1),R(d3)
+ lsrl R(d5),R(d3)
+ orl R(d3),R(d2)
+ movel R(d2),MEM_PREDEC(res_ptr)
+ lsll R(cnt),R(d1)
+
+ dbf R(s_size),L(Loop)
+ subl #0x10000,R(s_size)
+ bcc L(Loop)
+
+L(Lend:)
+ movel R(d1),MEM_PREDEC(res_ptr) /* store least significant limb */
+
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+ rts
+
+/* We loop from least significant end of the arrays, which is only
+ permissable if the source and destination don't overlap, since the
+ function is documented to work for overlapping source and destination. */
+
+L(Lspecial:)
+ clrl R(d0) /* initialize carry */
+ eorw #1,R(s_size)
+ lsrl #1,R(s_size)
+ bcc L(LL1)
+ subql #1,R(s_size)
+
+L(LLoop:)
+ movel MEM_POSTINC(s_ptr),R(d2)
+ addxl R(d2),R(d2)
+ movel R(d2),MEM_POSTINC(res_ptr)
+L(LL1:)
+ movel MEM_POSTINC(s_ptr),R(d2)
+ addxl R(d2),R(d2)
+ movel R(d2),MEM_POSTINC(res_ptr)
+
+ dbf R(s_size),L(LLoop)
+ addxl R(d0),R(d0) /* save cy in lsb */
+ subl #0x10000,R(s_size)
+ bcs L(LLend)
+ lsrl #1,R(d0) /* restore cy */
+ bra L(LLoop)
+
+L(LLend:)
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+ rts
+EPILOG(__gmpn_lshift)
diff --git a/rts/gmp/mpn/m68k/mc68020/addmul_1.S b/rts/gmp/mpn/m68k/mc68020/addmul_1.S
new file mode 100644
index 0000000000..6638115d71
--- /dev/null
+++ b/rts/gmp/mpn/m68k/mc68020/addmul_1.S
@@ -0,0 +1,83 @@
+/* mc68020 __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+ the result to a second limb vector.
+
+Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ s1_size (sp + 12)
+ s2_limb (sp + 16)
+*/
+
+#include "asm-syntax.h"
+
+ TEXT
+ ALIGN
+ GLOBL C_SYMBOL_NAME(__gmpn_addmul_1)
+
+C_SYMBOL_NAME(__gmpn_addmul_1:)
+PROLOG(__gmpn_addmul_1)
+
+#define res_ptr a0
+#define s1_ptr a1
+#define s1_size d2
+#define s2_limb d4
+
+/* Save used registers on the stack. */
+ moveml R(d2)-R(d5),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers. Better use movem? */
+ movel MEM_DISP(sp,20),R(res_ptr)
+ movel MEM_DISP(sp,24),R(s1_ptr)
+ movel MEM_DISP(sp,28),R(s1_size)
+ movel MEM_DISP(sp,32),R(s2_limb)
+
+ eorw #1,R(s1_size)
+ clrl R(d1)
+ clrl R(d5)
+ lsrl #1,R(s1_size)
+ bcc L(L1)
+ subql #1,R(s1_size)
+ subl R(d0),R(d0) /* (d0,cy) <= (0,0) */
+
+L(Loop:)
+ movel MEM_POSTINC(s1_ptr),R(d3)
+ mulul R(s2_limb),R(d1):R(d3)
+ addxl R(d0),R(d3)
+ addxl R(d5),R(d1)
+ addl R(d3),MEM_POSTINC(res_ptr)
+L(L1:) movel MEM_POSTINC(s1_ptr),R(d3)
+ mulul R(s2_limb),R(d0):R(d3)
+ addxl R(d1),R(d3)
+ addxl R(d5),R(d0)
+ addl R(d3),MEM_POSTINC(res_ptr)
+
+ dbf R(s1_size),L(Loop)
+ addxl R(d5),R(d0)
+ subl #0x10000,R(s1_size)
+ bcc L(Loop)
+
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d5)
+
+ rts
+EPILOG(__gmpn_addmul_1)
diff --git a/rts/gmp/mpn/m68k/mc68020/mul_1.S b/rts/gmp/mpn/m68k/mc68020/mul_1.S
new file mode 100644
index 0000000000..fdd4c39d70
--- /dev/null
+++ b/rts/gmp/mpn/m68k/mc68020/mul_1.S
@@ -0,0 +1,90 @@
+/* mc68020 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+ the result in a second limb vector.
+
+Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ s1_size (sp + 12)
+ s2_limb (sp + 16)
+*/
+
+#include "asm-syntax.h"
+
+ TEXT
+ ALIGN
+ GLOBL C_SYMBOL_NAME(__gmpn_mul_1)
+
+C_SYMBOL_NAME(__gmpn_mul_1:)
+PROLOG(__gmpn_mul_1)
+
+#define res_ptr a0
+#define s1_ptr a1
+#define s1_size d2
+#define s2_limb d4
+
+/* Save used registers on the stack. */
+ moveml R(d2)-R(d4),MEM_PREDEC(sp)
+#if 0
+ movel R(d2),MEM_PREDEC(sp)
+ movel R(d3),MEM_PREDEC(sp)
+ movel R(d4),MEM_PREDEC(sp)
+#endif
+
+/* Copy the arguments to registers. Better use movem? */
+ movel MEM_DISP(sp,16),R(res_ptr)
+ movel MEM_DISP(sp,20),R(s1_ptr)
+ movel MEM_DISP(sp,24),R(s1_size)
+ movel MEM_DISP(sp,28),R(s2_limb)
+
+ eorw #1,R(s1_size)
+ clrl R(d1)
+ lsrl #1,R(s1_size)
+ bcc L(L1)
+ subql #1,R(s1_size)
+ subl R(d0),R(d0) /* (d0,cy) <= (0,0) */
+
+L(Loop:)
+ movel MEM_POSTINC(s1_ptr),R(d3)
+ mulul R(s2_limb),R(d1):R(d3)
+ addxl R(d0),R(d3)
+ movel R(d3),MEM_POSTINC(res_ptr)
+L(L1:) movel MEM_POSTINC(s1_ptr),R(d3)
+ mulul R(s2_limb),R(d0):R(d3)
+ addxl R(d1),R(d3)
+ movel R(d3),MEM_POSTINC(res_ptr)
+
+ dbf R(s1_size),L(Loop)
+ clrl R(d3)
+ addxl R(d3),R(d0)
+ subl #0x10000,R(s1_size)
+ bcc L(Loop)
+
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d4)
+#if 0
+ movel MEM_POSTINC(sp),R(d4)
+ movel MEM_POSTINC(sp),R(d3)
+ movel MEM_POSTINC(sp),R(d2)
+#endif
+ rts
+EPILOG(__gmpn_mul_1)
diff --git a/rts/gmp/mpn/m68k/mc68020/submul_1.S b/rts/gmp/mpn/m68k/mc68020/submul_1.S
new file mode 100644
index 0000000000..3c36b70166
--- /dev/null
+++ b/rts/gmp/mpn/m68k/mc68020/submul_1.S
@@ -0,0 +1,83 @@
+/* mc68020 __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
+ the result from a second limb vector.
+
+Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ s1_size (sp + 12)
+ s2_limb (sp + 16)
+*/
+
+#include "asm-syntax.h"
+
+ TEXT
+ ALIGN
+ GLOBL C_SYMBOL_NAME(__gmpn_submul_1)
+
+C_SYMBOL_NAME(__gmpn_submul_1:)
+PROLOG(__gmpn_submul_1)
+
+#define res_ptr a0
+#define s1_ptr a1
+#define s1_size d2
+#define s2_limb d4
+
+/* Save used registers on the stack. */
+ moveml R(d2)-R(d5),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers. Better use movem? */
+ movel MEM_DISP(sp,20),R(res_ptr)
+ movel MEM_DISP(sp,24),R(s1_ptr)
+ movel MEM_DISP(sp,28),R(s1_size)
+ movel MEM_DISP(sp,32),R(s2_limb)
+
+ eorw #1,R(s1_size)
+ clrl R(d1)
+ clrl R(d5)
+ lsrl #1,R(s1_size)
+ bcc L(L1)
+ subql #1,R(s1_size)
+ subl R(d0),R(d0) /* (d0,cy) <= (0,0) */
+
+L(Loop:)
+ movel MEM_POSTINC(s1_ptr),R(d3)
+ mulul R(s2_limb),R(d1):R(d3)
+ addxl R(d0),R(d3)
+ addxl R(d5),R(d1)
+ subl R(d3),MEM_POSTINC(res_ptr)
+L(L1:) movel MEM_POSTINC(s1_ptr),R(d3)
+ mulul R(s2_limb),R(d0):R(d3)
+ addxl R(d1),R(d3)
+ addxl R(d5),R(d0)
+ subl R(d3),MEM_POSTINC(res_ptr)
+
+ dbf R(s1_size),L(Loop)
+ addxl R(d5),R(d0)
+ subl #0x10000,R(s1_size)
+ bcc L(Loop)
+
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d5)
+
+ rts
+EPILOG(__gmpn_submul_1)
diff --git a/rts/gmp/mpn/m68k/mc68020/udiv.S b/rts/gmp/mpn/m68k/mc68020/udiv.S
new file mode 100644
index 0000000000..d00cf13558
--- /dev/null
+++ b/rts/gmp/mpn/m68k/mc68020/udiv.S
@@ -0,0 +1,31 @@
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+.text
+ .even
+.globl ___udiv_qrnnd
+___udiv_qrnnd:
+ movel sp@(4),a0
+ movel sp@(8),d1
+ movel sp@(12),d0
+ divul sp@(16),d1:d0
+ movel d1,a0@
+ rts
diff --git a/rts/gmp/mpn/m68k/mc68020/umul.S b/rts/gmp/mpn/m68k/mc68020/umul.S
new file mode 100644
index 0000000000..a34ae6c543
--- /dev/null
+++ b/rts/gmp/mpn/m68k/mc68020/umul.S
@@ -0,0 +1,31 @@
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+.text
+ .even
+.globl ___umul_ppmm
+___umul_ppmm:
+ movel sp@(4),a0
+ movel sp@(8),d1
+ movel sp@(12),d0
+ mulul d0,d0:d1
+ movel d1,a0@
+ rts
diff --git a/rts/gmp/mpn/m68k/rshift.S b/rts/gmp/mpn/m68k/rshift.S
new file mode 100644
index 0000000000..b47a48e52a
--- /dev/null
+++ b/rts/gmp/mpn/m68k/rshift.S
@@ -0,0 +1,149 @@
+/* mc68020 __gmpn_rshift -- Shift right a low-level natural-number integer.
+
+Copyright (C) 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s_ptr (sp + 8)
+ s_size (sp + 16)
+ cnt (sp + 12)
+*/
+
+#include "asm-syntax.h"
+
+#define res_ptr a1
+#define s_ptr a0
+#define s_size d6
+#define cnt d4
+
+ TEXT
+ ALIGN
+ GLOBL C_SYMBOL_NAME(__gmpn_rshift)
+
+C_SYMBOL_NAME(__gmpn_rshift:)
+PROLOG(__gmpn_rshift)
+/* Save used registers on the stack. */
+ moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers. */
+ movel MEM_DISP(sp,28),R(res_ptr)
+ movel MEM_DISP(sp,32),R(s_ptr)
+ movel MEM_DISP(sp,36),R(s_size)
+ movel MEM_DISP(sp,40),R(cnt)
+
+ moveql #1,R(d5)
+ cmpl R(d5),R(cnt)
+ bne L(Lnormal)
+ cmpl R(res_ptr),R(s_ptr)
+ bls L(Lspecial) /* jump if res_ptr >= s_ptr */
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+ lea MEM_INDX1(res_ptr,s_size,l,4),R(a2)
+#else /* not mc68020 */
+ movel R(s_size),R(d0)
+ asll #2,R(d0)
+ lea MEM_INDX(res_ptr,d0,l),R(a2)
+#endif
+ cmpl R(s_ptr),R(a2)
+ bls L(Lspecial) /* jump if s_ptr >= res_ptr + s_size */
+
+L(Lnormal:)
+ moveql #32,R(d5)
+ subl R(cnt),R(d5)
+ movel MEM_POSTINC(s_ptr),R(d2)
+ movel R(d2),R(d0)
+ lsll R(d5),R(d0) /* compute carry limb */
+
+ lsrl R(cnt),R(d2)
+ movel R(d2),R(d1)
+ subql #1,R(s_size)
+ beq L(Lend)
+ lsrl #1,R(s_size)
+ bcs L(L1)
+ subql #1,R(s_size)
+
+L(Loop:)
+ movel MEM_POSTINC(s_ptr),R(d2)
+ movel R(d2),R(d3)
+ lsll R(d5),R(d3)
+ orl R(d3),R(d1)
+ movel R(d1),MEM_POSTINC(res_ptr)
+ lsrl R(cnt),R(d2)
+L(L1:)
+ movel MEM_POSTINC(s_ptr),R(d1)
+ movel R(d1),R(d3)
+ lsll R(d5),R(d3)
+ orl R(d3),R(d2)
+ movel R(d2),MEM_POSTINC(res_ptr)
+ lsrl R(cnt),R(d1)
+
+ dbf R(s_size),L(Loop)
+ subl #0x10000,R(s_size)
+ bcc L(Loop)
+
+L(Lend:)
+ movel R(d1),MEM(res_ptr) /* store most significant limb */
+
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+ rts
+
+/* We loop from most significant end of the arrays, which is only
+ permissable if the source and destination don't overlap, since the
+ function is documented to work for overlapping source and destination. */
+
+L(Lspecial:)
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+ lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
+ lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
+#else /* not mc68000 */
+ movel R(s_size),R(d0)
+ asll #2,R(d0)
+ addl R(s_size),R(s_ptr)
+ addl R(s_size),R(res_ptr)
+#endif
+
+ clrl R(d0) /* initialize carry */
+ eorw #1,R(s_size)
+ lsrl #1,R(s_size)
+ bcc L(LL1)
+ subql #1,R(s_size)
+
+L(LLoop:)
+ movel MEM_PREDEC(s_ptr),R(d2)
+ roxrl #1,R(d2)
+ movel R(d2),MEM_PREDEC(res_ptr)
+L(LL1:)
+ movel MEM_PREDEC(s_ptr),R(d2)
+ roxrl #1,R(d2)
+ movel R(d2),MEM_PREDEC(res_ptr)
+
+ dbf R(s_size),L(LLoop)
+ roxrl #1,R(d0) /* save cy in msb */
+ subl #0x10000,R(s_size)
+ bcs L(LLend)
+ addl R(d0),R(d0) /* restore cy */
+ bra L(LLoop)
+
+L(LLend:)
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+ rts
+EPILOG(__gmpn_rshift)
diff --git a/rts/gmp/mpn/m68k/sub_n.S b/rts/gmp/mpn/m68k/sub_n.S
new file mode 100644
index 0000000000..ce45b24db5
--- /dev/null
+++ b/rts/gmp/mpn/m68k/sub_n.S
@@ -0,0 +1,79 @@
+/* mc68020 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+ store difference in a third limb vector.
+
+Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ INPUT PARAMETERS
+ res_ptr (sp + 4)
+ s1_ptr (sp + 8)
+ s2_ptr (sp + 16)
+ size (sp + 12)
+*/
+
+#include "asm-syntax.h"
+
+ TEXT
+ ALIGN
+ GLOBL C_SYMBOL_NAME(__gmpn_sub_n)
+
+C_SYMBOL_NAME(__gmpn_sub_n:)
+PROLOG(__gmpn_sub_n)
+/* Save used registers on the stack. */
+ movel R(d2),MEM_PREDEC(sp)
+ movel R(a2),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers. Better use movem? */
+ movel MEM_DISP(sp,12),R(a2)
+ movel MEM_DISP(sp,16),R(a0)
+ movel MEM_DISP(sp,20),R(a1)
+ movel MEM_DISP(sp,24),R(d2)
+
+ eorw #1,R(d2)
+ lsrl #1,R(d2)
+ bcc L(L1)
+ subql #1,R(d2) /* clears cy as side effect */
+
+L(Loop:)
+ movel MEM_POSTINC(a0),R(d0)
+ movel MEM_POSTINC(a1),R(d1)
+ subxl R(d1),R(d0)
+ movel R(d0),MEM_POSTINC(a2)
+L(L1:) movel MEM_POSTINC(a0),R(d0)
+ movel MEM_POSTINC(a1),R(d1)
+ subxl R(d1),R(d0)
+ movel R(d0),MEM_POSTINC(a2)
+
+ dbf R(d2),L(Loop) /* loop until 16 lsb of %4 == -1 */
+ subxl R(d0),R(d0) /* d0 <= -cy; save cy as 0 or -1 in d0 */
+ subl #0x10000,R(d2)
+ bcs L(L2)
+ addl R(d0),R(d0) /* restore cy */
+ bra L(Loop)
+
+L(L2:)
+ negl R(d0)
+
+/* Restore used registers from stack frame. */
+ movel MEM_POSTINC(sp),R(a2)
+ movel MEM_POSTINC(sp),R(d2)
+
+ rts
+EPILOG(__gmpn_sub_n)
diff --git a/rts/gmp/mpn/m68k/syntax.h b/rts/gmp/mpn/m68k/syntax.h
new file mode 100644
index 0000000000..9eec279c06
--- /dev/null
+++ b/rts/gmp/mpn/m68k/syntax.h
@@ -0,0 +1,177 @@
+/* asm.h -- Definitions for 68k syntax variations.
+
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#undef ALIGN
+
+#ifdef MIT_SYNTAX
+#define PROLOG(name)
+#define EPILOG(name)
+#define R(r)r
+#define MEM(base)base@
+#define MEM_DISP(base,displacement)base@(displacement)
+#define MEM_INDX(base,idx,size_suffix)base@(idx:size_suffix)
+#define MEM_INDX1(base,idx,size_suffix,scale)base@(idx:size_suffix:scale)
+#define MEM_PREDEC(memory_base)memory_base@-
+#define MEM_POSTINC(memory_base)memory_base@+
+#define L(label) label
+#define TEXT .text
+#define ALIGN .even
+#define GLOBL .globl
+#define moveql moveq
+/* Use variable sized opcodes. */
+#define bcc jcc
+#define bcs jcs
+#define bls jls
+#define beq jeq
+#define bne jne
+#define bra jra
+#endif
+
+#ifdef SONY_SYNTAX
+#define PROLOG(name)
+#define EPILOG(name)
+#define R(r)r
+#define MEM(base)(base)
+#define MEM_DISP(base,displacement)(displacement,base)
+#define MEM_INDX(base,idx,size_suffix)(base,idx.size_suffix)
+#define MEM_INDX1(base,idx,size_suffix,scale)(base,idx.size_suffix*scale)
+#define MEM_PREDEC(memory_base)-(memory_base)
+#define MEM_POSTINC(memory_base)(memory_base)+
+#define L(label) label
+#define TEXT .text
+#define ALIGN .even
+#define GLOBL .globl
+#endif
+
+#ifdef MOTOROLA_SYNTAX
+#define PROLOG(name)
+#define EPILOG(name)
+#define R(r)r
+#define MEM(base)(base)
+#define MEM_DISP(base,displacement)(displacement,base)
+#define MEM_INDX(base,idx,size_suffix)(base,idx.size_suffix)
+#define MEM_INDX1(base,idx,size_suffix,scale)(base,idx.size_suffix*scale)
+#define MEM_PREDEC(memory_base)-(memory_base)
+#define MEM_POSTINC(memory_base)(memory_base)+
+#define L(label) label
+#define TEXT
+#define ALIGN
+#define GLOBL XDEF
+#define lea LEA
+#define movel MOVE.L
+#define moveml MOVEM.L
+#define moveql MOVEQ.L
+#define cmpl CMP.L
+#define orl OR.L
+#define clrl CLR.L
+#define eorw EOR.W
+#define lsrl LSR.L
+#define lsll LSL.L
+#define roxrl ROXR.L
+#define roxll ROXL.L
+#define addl ADD.L
+#define addxl ADDX.L
+#define addql ADDQ.L
+#define subl SUB.L
+#define subxl SUBX.L
+#define subql SUBQ.L
+#define negl NEG.L
+#define mulul MULU.L
+#define bcc BCC
+#define bcs BCS
+#define bls BLS
+#define beq BEQ
+#define bne BNE
+#define bra BRA
+#define dbf DBF
+#define rts RTS
+#define d0 D0
+#define d1 D1
+#define d2 D2
+#define d3 D3
+#define d4 D4
+#define d5 D5
+#define d6 D6
+#define d7 D7
+#define a0 A0
+#define a1 A1
+#define a2 A2
+#define a3 A3
+#define a4 A4
+#define a5 A5
+#define a6 A6
+#define a7 A7
+#define sp SP
+#endif
+
+#ifdef ELF_SYNTAX
+#define PROLOG(name) .type name,@function
+#define EPILOG(name) .size name,.-name
+#define MEM(base)(R(base))
+#define MEM_DISP(base,displacement)(displacement,R(base))
+#define MEM_PREDEC(memory_base)-(R(memory_base))
+#define MEM_POSTINC(memory_base)(R(memory_base))+
+#ifdef __STDC__
+#define R_(r)%##r
+#define R(r)R_(r)
+#define MEM_INDX_(base,idx,size_suffix)(R(base),R(idx##.##size_suffix))
+#define MEM_INDX(base,idx,size_suffix)MEM_INDX_(base,idx,size_suffix)
+#define MEM_INDX1_(base,idx,size_suffix,scale)(R(base),R(idx##.##size_suffix*scale))
+#define MEM_INDX1(base,idx,size_suffix,scale)MEM_INDX1_(base,idx,size_suffix,scale)
+#define L(label) .##label
+#else
+#define R(r)%/**/r
+#define MEM_INDX(base,idx,size_suffix)(R(base),R(idx).size_suffix)
+#define MEM_INDX1(base,idx,size_suffix,scale)(R(base),R(idx).size_suffix*scale)
+#define L(label) ./**/label
+#endif
+#define TEXT .text
+#define ALIGN .align 2
+#define GLOBL .globl
+#define bcc jbcc
+#define bcs jbcs
+#define bls jbls
+#define beq jbeq
+#define bne jbne
+#define bra jbra
+#endif
+
+#if defined (SONY_SYNTAX) || defined (ELF_SYNTAX)
+#define movel move.l
+#define moveml movem.l
+#define moveql moveq.l
+#define cmpl cmp.l
+#define orl or.l
+#define clrl clr.l
+#define eorw eor.w
+#define lsrl lsr.l
+#define lsll lsl.l
+#define roxrl roxr.l
+#define roxll roxl.l
+#define addl add.l
+#define addxl addx.l
+#define addql addq.l
+#define subl sub.l
+#define subxl subx.l
+#define subql subq.l
+#define negl neg.l
+#define mulul mulu.l
+#endif
diff --git a/rts/gmp/mpn/m88k/add_n.s b/rts/gmp/mpn/m88k/add_n.s
new file mode 100644
index 0000000000..0b776c618a
--- /dev/null
+++ b/rts/gmp/mpn/m88k/add_n.s
@@ -0,0 +1,104 @@
+; mc88100 __gmpn_add -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr r2
+; s1_ptr r3
+; s2_ptr r4
+; size r5
+
+; This code has been optimized to run one instruction per clock, avoiding
+; load stalls and writeback contention. As a result, the instruction
+; order is not always natural.
+
+; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100,
+; but on the 88110, it seems to run much slower, 6.6 clocks/limb.
+
+ text
+ align 16
+ global ___gmpn_add_n
+___gmpn_add_n:
+ ld r6,r3,0 ; read first limb from s1_ptr
+ extu r10,r5,3
+ ld r7,r4,0 ; read first limb from s2_ptr
+
+ subu.co r5,r0,r5 ; (clear carry as side effect)
+ mak r5,r5,3<4>
+ bcnd eq0,r5,Lzero
+
+ or r12,r0,lo16(Lbase)
+ or.u r12,r12,hi16(Lbase)
+ addu r12,r12,r5 ; r12 is address for entering in loop
+
+ extu r5,r5,2 ; divide by 4
+ subu r2,r2,r5 ; adjust res_ptr
+ subu r3,r3,r5 ; adjust s1_ptr
+ subu r4,r4,r5 ; adjust s2_ptr
+
+ or r8,r6,r0
+
+ jmp.n r12
+ or r9,r7,r0
+
+Loop: addu r3,r3,32
+ st r8,r2,28
+ addu r4,r4,32
+ ld r6,r3,0
+ addu r2,r2,32
+ ld r7,r4,0
+Lzero: subu r10,r10,1 ; add 0 + 8r limbs (adj loop cnt)
+Lbase: ld r8,r3,4
+ addu.cio r6,r6,r7
+ ld r9,r4,4
+ st r6,r2,0
+ ld r6,r3,8 ; add 7 + 8r limbs
+ addu.cio r8,r8,r9
+ ld r7,r4,8
+ st r8,r2,4
+ ld r8,r3,12 ; add 6 + 8r limbs
+ addu.cio r6,r6,r7
+ ld r9,r4,12
+ st r6,r2,8
+ ld r6,r3,16 ; add 5 + 8r limbs
+ addu.cio r8,r8,r9
+ ld r7,r4,16
+ st r8,r2,12
+ ld r8,r3,20 ; add 4 + 8r limbs
+ addu.cio r6,r6,r7
+ ld r9,r4,20
+ st r6,r2,16
+ ld r6,r3,24 ; add 3 + 8r limbs
+ addu.cio r8,r8,r9
+ ld r7,r4,24
+ st r8,r2,20
+ ld r8,r3,28 ; add 2 + 8r limbs
+ addu.cio r6,r6,r7
+ ld r9,r4,28
+ st r6,r2,24
+ bcnd.n ne0,r10,Loop ; add 1 + 8r limbs
+ addu.cio r8,r8,r9
+
+ st r8,r2,28 ; store most significant limb
+
+ jmp.n r1
+ addu.ci r2,r0,r0 ; return carry-out from most sign. limb
diff --git a/rts/gmp/mpn/m88k/mc88110/add_n.S b/rts/gmp/mpn/m88k/mc88110/add_n.S
new file mode 100644
index 0000000000..843a50dded
--- /dev/null
+++ b/rts/gmp/mpn/m88k/mc88110/add_n.S
@@ -0,0 +1,200 @@
+; mc88110 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+#define res_ptr r2
+#define s1_ptr r3
+#define s2_ptr r4
+#define size r5
+
+#include "sysdep.h"
+
+ text
+ align 16
+ global C_SYMBOL_NAME(__gmpn_add_n)
+C_SYMBOL_NAME(__gmpn_add_n):
+ addu.co r0,r0,r0 ; clear cy flag
+ xor r12,s2_ptr,res_ptr
+ bb1 2,r12,L1
+; ** V1a **
+L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned?
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+ ld r10,s1_ptr,0
+ addu s1_ptr,s1_ptr,4
+ ld r8,s2_ptr,0
+ addu s2_ptr,s2_ptr,4
+ subu size,size,1
+ addu.co r6,r10,r8
+ st r6,res_ptr,0
+ addu res_ptr,res_ptr,4
+L_v1: cmp r12,size,2
+ bb1 lt,r12,Lend2
+
+ ld r10,s1_ptr,0
+ ld r12,s1_ptr,4
+ ld.d r8,s2_ptr,0
+ subu size,size,10
+ bcnd lt0,size,Lfin1
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+ align 8
+Loop1: subu size,size,8
+ addu.cio r6,r10,r8
+ ld r10,s1_ptr,8
+ addu.cio r7,r12,r9
+ ld r12,s1_ptr,12
+ ld.d r8,s2_ptr,8
+ st.d r6,res_ptr,0
+ addu.cio r6,r10,r8
+ ld r10,s1_ptr,16
+ addu.cio r7,r12,r9
+ ld r12,s1_ptr,20
+ ld.d r8,s2_ptr,16
+ st.d r6,res_ptr,8
+ addu.cio r6,r10,r8
+ ld r10,s1_ptr,24
+ addu.cio r7,r12,r9
+ ld r12,s1_ptr,28
+ ld.d r8,s2_ptr,24
+ st.d r6,res_ptr,16
+ addu.cio r6,r10,r8
+ ld r10,s1_ptr,32
+ addu.cio r7,r12,r9
+ ld r12,s1_ptr,36
+ addu s1_ptr,s1_ptr,32
+ ld.d r8,s2_ptr,32
+ addu s2_ptr,s2_ptr,32
+ st.d r6,res_ptr,24
+ addu res_ptr,res_ptr,32
+ bcnd ge0,size,Loop1
+
+Lfin1: addu size,size,8-2
+ bcnd lt0,size,Lend1
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1: addu.cio r6,r10,r8
+ ld r10,s1_ptr,8
+ addu.cio r7,r12,r9
+ ld r12,s1_ptr,12
+ ld.d r8,s2_ptr,8
+ st.d r6,res_ptr,0
+ subu size,size,2
+ addu s1_ptr,s1_ptr,8
+ addu s2_ptr,s2_ptr,8
+ addu res_ptr,res_ptr,8
+ bcnd ge0,size,Loope1
+Lend1: addu.cio r6,r10,r8
+ addu.cio r7,r12,r9
+ st.d r6,res_ptr,0
+
+ bb0 0,size,Lret1
+/* Add last limb */
+ ld r10,s1_ptr,8
+ ld r8,s2_ptr,8
+ addu.cio r6,r10,r8
+ st r6,res_ptr,8
+
+Lret1: jmp.n r1
+ addu.ci r2,r0,r0 ; return carry-out from most sign. limb
+
+L1: xor r12,s1_ptr,res_ptr
+ bb1 2,r12,L2
+; ** V1b **
+ or r12,r0,s2_ptr
+ or s2_ptr,r0,s1_ptr
+ or s1_ptr,r0,r12
+ br L0
+
+; ** V2 **
+/* If we come here, the alignment of s1_ptr and res_ptr as well as the
+ alignment of s2_ptr and res_ptr differ. Since there are only two ways
+ things can be aligned (that we care about) we now know that the alignment
+ of s1_ptr and s2_ptr are the same. */
+
+L2: cmp r12,size,1
+ bb1 eq,r12,Ljone
+ bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+ ld r10,s1_ptr,0
+ addu s1_ptr,s1_ptr,4
+ ld r8,s2_ptr,0
+ addu s2_ptr,s2_ptr,4
+ subu size,size,1
+ addu.co r6,r10,r8
+ st r6,res_ptr,0
+ addu res_ptr,res_ptr,4
+
+L_v2: subu size,size,8
+ bcnd lt0,size,Lfin2
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+ align 8
+Loop2: subu size,size,8
+ ld.d r8,s1_ptr,0
+ ld.d r6,s2_ptr,0
+ addu.cio r8,r8,r6
+ st r8,res_ptr,0
+ addu.cio r9,r9,r7
+ st r9,res_ptr,4
+ ld.d r8,s1_ptr,8
+ ld.d r6,s2_ptr,8
+ addu.cio r8,r8,r6
+ st r8,res_ptr,8
+ addu.cio r9,r9,r7
+ st r9,res_ptr,12
+ ld.d r8,s1_ptr,16
+ ld.d r6,s2_ptr,16
+ addu.cio r8,r8,r6
+ st r8,res_ptr,16
+ addu.cio r9,r9,r7
+ st r9,res_ptr,20
+ ld.d r8,s1_ptr,24
+ ld.d r6,s2_ptr,24
+ addu.cio r8,r8,r6
+ st r8,res_ptr,24
+ addu.cio r9,r9,r7
+ st r9,res_ptr,28
+ addu s1_ptr,s1_ptr,32
+ addu s2_ptr,s2_ptr,32
+ addu res_ptr,res_ptr,32
+ bcnd ge0,size,Loop2
+
+Lfin2: addu size,size,8-2
+ bcnd lt0,size,Lend2
+Loope2: ld.d r8,s1_ptr,0
+ ld.d r6,s2_ptr,0
+ addu.cio r8,r8,r6
+ st r8,res_ptr,0
+ addu.cio r9,r9,r7
+ st r9,res_ptr,4
+ subu size,size,2
+ addu s1_ptr,s1_ptr,8
+ addu s2_ptr,s2_ptr,8
+ addu res_ptr,res_ptr,8
+ bcnd ge0,size,Loope2
+Lend2: bb0 0,size,Lret2
+/* Add last limb */
+Ljone: ld r10,s1_ptr,0
+ ld r8,s2_ptr,0
+ addu.cio r6,r10,r8
+ st r6,res_ptr,0
+
+Lret2: jmp.n r1
+ addu.ci r2,r0,r0 ; return carry-out from most sign. limb
diff --git a/rts/gmp/mpn/m88k/mc88110/addmul_1.s b/rts/gmp/mpn/m88k/mc88110/addmul_1.s
new file mode 100644
index 0000000000..7d97c87c79
--- /dev/null
+++ b/rts/gmp/mpn/m88k/mc88110/addmul_1.s
@@ -0,0 +1,61 @@
+; mc88110 __gmpn_addmul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr r2
+; s1_ptr r3
+; size r4
+; s2_limb r5
+
+ text
+ align 16
+ global ___gmpn_addmul_1
+___gmpn_addmul_1:
+ lda r3,r3[r4]
+ lda r8,r2[r4] ; RES_PTR in r8 since r2 is retval
+ subu r4,r0,r4
+ addu.co r2,r0,r0 ; r2 = cy = 0
+
+ ld r6,r3[r4]
+ addu r4,r4,1
+ subu r8,r8,4
+ bcnd.n eq0,r4,Lend
+ mulu.d r10,r6,r5
+
+Loop: ld r7,r8[r4]
+ ld r6,r3[r4]
+ addu.cio r9,r11,r2
+ addu.ci r2,r10,r0
+ addu.co r9,r9,r7
+ st r9,r8[r4]
+ addu r4,r4,1
+ mulu.d r10,r6,r5
+ bcnd ne0,r4,Loop
+
+Lend: ld r7,r8,0
+ addu.cio r9,r11,r2
+ addu.ci r2,r10,r0
+ addu.co r9,r9,r7
+ st r9,r8,0
+ jmp.n r1
+ addu.ci r2,r2,r0
diff --git a/rts/gmp/mpn/m88k/mc88110/mul_1.s b/rts/gmp/mpn/m88k/mc88110/mul_1.s
new file mode 100644
index 0000000000..b8483afa91
--- /dev/null
+++ b/rts/gmp/mpn/m88k/mc88110/mul_1.s
@@ -0,0 +1,59 @@
+; mc88110 __gmpn_mul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr r2
+; s1_ptr r3
+; size r4
+; s2_limb r5
+
+ text
+ align 16
+ global ___gmpn_mul_1
+___gmpn_mul_1:
+ ; Make S1_PTR and RES_PTR point at the end of their blocks
+ ; and negate SIZE.
+ lda r3,r3[r4]
+ lda r8,r2[r4] ; RES_PTR in r8 since r2 is retval
+ subu r4,r0,r4
+
+ addu.co r2,r0,r0 ; r2 = cy = 0
+
+ ld r6,r3[r4]
+ addu r4,r4,1
+ mulu.d r10,r6,r5
+ bcnd.n eq0,r4,Lend
+ subu r8,r8,8
+
+Loop: ld r6,r3[r4]
+ addu.cio r9,r11,r2
+ or r2,r10,r0 ; could be avoided if unrolled
+ addu r4,r4,1
+ mulu.d r10,r6,r5
+ bcnd.n ne0,r4,Loop
+ st r9,r8[r4]
+
+Lend: addu.cio r9,r11,r2
+ st r9,r8,4
+ jmp.n r1
+ addu.ci r2,r10,r0
diff --git a/rts/gmp/mpn/m88k/mc88110/sub_n.S b/rts/gmp/mpn/m88k/mc88110/sub_n.S
new file mode 100644
index 0000000000..715a3faf25
--- /dev/null
+++ b/rts/gmp/mpn/m88k/mc88110/sub_n.S
@@ -0,0 +1,276 @@
+; mc88110 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+#define res_ptr r2
+#define s1_ptr r3
+#define s2_ptr r4
+#define size r5
+
+#include "sysdep.h"
+
+ text
+ align 16
+ global C_SYMBOL_NAME(__gmpn_sub_n)
+C_SYMBOL_NAME(__gmpn_sub_n):
+ subu.co r0,r0,r0 ; set cy flag
+ xor r12,s2_ptr,res_ptr
+ bb1 2,r12,L1
+; ** V1a **
+L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+ ld r10,s1_ptr,0
+ addu s1_ptr,s1_ptr,4
+ ld r8,s2_ptr,0
+ addu s2_ptr,s2_ptr,4
+ subu size,size,1
+ subu.co r6,r10,r8
+ st r6,res_ptr,0
+ addu res_ptr,res_ptr,4
+L_v1: cmp r12,size,2
+ bb1 lt,r12,Lend2
+
+ ld r10,s1_ptr,0
+ ld r12,s1_ptr,4
+ ld.d r8,s2_ptr,0
+ subu size,size,10
+ bcnd lt0,size,Lfin1
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+ align 8
+Loop1: subu size,size,8
+ subu.cio r6,r10,r8
+ ld r10,s1_ptr,8
+ subu.cio r7,r12,r9
+ ld r12,s1_ptr,12
+ ld.d r8,s2_ptr,8
+ st.d r6,res_ptr,0
+ subu.cio r6,r10,r8
+ ld r10,s1_ptr,16
+ subu.cio r7,r12,r9
+ ld r12,s1_ptr,20
+ ld.d r8,s2_ptr,16
+ st.d r6,res_ptr,8
+ subu.cio r6,r10,r8
+ ld r10,s1_ptr,24
+ subu.cio r7,r12,r9
+ ld r12,s1_ptr,28
+ ld.d r8,s2_ptr,24
+ st.d r6,res_ptr,16
+ subu.cio r6,r10,r8
+ ld r10,s1_ptr,32
+ subu.cio r7,r12,r9
+ ld r12,s1_ptr,36
+ addu s1_ptr,s1_ptr,32
+ ld.d r8,s2_ptr,32
+ addu s2_ptr,s2_ptr,32
+ st.d r6,res_ptr,24
+ addu res_ptr,res_ptr,32
+ bcnd ge0,size,Loop1
+
+Lfin1: addu size,size,8-2
+ bcnd lt0,size,Lend1
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1: subu.cio r6,r10,r8
+ ld r10,s1_ptr,8
+ subu.cio r7,r12,r9
+ ld r12,s1_ptr,12
+ ld.d r8,s2_ptr,8
+ st.d r6,res_ptr,0
+ subu size,size,2
+ addu s1_ptr,s1_ptr,8
+ addu s2_ptr,s2_ptr,8
+ addu res_ptr,res_ptr,8
+ bcnd ge0,size,Loope1
+Lend1: subu.cio r6,r10,r8
+ subu.cio r7,r12,r9
+ st.d r6,res_ptr,0
+
+ bb0 0,size,Lret1
+/* Add last limb */
+ ld r10,s1_ptr,8
+ ld r8,s2_ptr,8
+ subu.cio r6,r10,r8
+ st r6,res_ptr,8
+
+Lret1: addu.ci r2,r0,r0 ; return carry-out from most sign. limb
+ jmp.n r1
+ xor r2,r2,1
+
+L1: xor r12,s1_ptr,res_ptr
+ bb1 2,r12,L2
+; ** V1b **
+ bb0 2,res_ptr,L_v1b ; branch if res_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s1_ptr */
+ ld r10,s2_ptr,0
+ addu s2_ptr,s2_ptr,4
+ ld r8,s1_ptr,0
+ addu s1_ptr,s1_ptr,4
+ subu size,size,1
+ subu.co r6,r8,r10
+ st r6,res_ptr,0
+ addu res_ptr,res_ptr,4
+L_v1b: cmp r12,size,2
+ bb1 lt,r12,Lend2
+
+ ld r10,s2_ptr,0
+ ld r12,s2_ptr,4
+ ld.d r8,s1_ptr,0
+ subu size,size,10
+ bcnd lt0,size,Lfin1b
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+ align 8
+Loop1b: subu size,size,8
+ subu.cio r6,r8,r10
+ ld r10,s2_ptr,8
+ subu.cio r7,r9,r12
+ ld r12,s2_ptr,12
+ ld.d r8,s1_ptr,8
+ st.d r6,res_ptr,0
+ subu.cio r6,r8,r10
+ ld r10,s2_ptr,16
+ subu.cio r7,r9,r12
+ ld r12,s2_ptr,20
+ ld.d r8,s1_ptr,16
+ st.d r6,res_ptr,8
+ subu.cio r6,r8,r10
+ ld r10,s2_ptr,24
+ subu.cio r7,r9,r12
+ ld r12,s2_ptr,28
+ ld.d r8,s1_ptr,24
+ st.d r6,res_ptr,16
+ subu.cio r6,r8,r10
+ ld r10,s2_ptr,32
+ subu.cio r7,r9,r12
+ ld r12,s2_ptr,36
+ addu s2_ptr,s2_ptr,32
+ ld.d r8,s1_ptr,32
+ addu s1_ptr,s1_ptr,32
+ st.d r6,res_ptr,24
+ addu res_ptr,res_ptr,32
+ bcnd ge0,size,Loop1b
+
+Lfin1b: addu size,size,8-2
+ bcnd lt0,size,Lend1b
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1b:subu.cio r6,r8,r10
+ ld r10,s2_ptr,8
+ subu.cio r7,r9,r12
+ ld r12,s2_ptr,12
+ ld.d r8,s1_ptr,8
+ st.d r6,res_ptr,0
+ subu size,size,2
+ addu s1_ptr,s1_ptr,8
+ addu s2_ptr,s2_ptr,8
+ addu res_ptr,res_ptr,8
+ bcnd ge0,size,Loope1b
+Lend1b: subu.cio r6,r8,r10
+ subu.cio r7,r9,r12
+ st.d r6,res_ptr,0
+
+ bb0 0,size,Lret1b
+/* Add last limb */
+ ld r10,s2_ptr,8
+ ld r8,s1_ptr,8
+ subu.cio r6,r8,r10
+ st r6,res_ptr,8
+
+Lret1b: addu.ci r2,r0,r0 ; return carry-out from most sign. limb
+ jmp.n r1
+ xor r2,r2,1
+
+; ** V2 **
+/* If we come here, the alignment of s1_ptr and res_ptr as well as the
+ alignment of s2_ptr and res_ptr differ. Since there are only two ways
+ things can be aligned (that we care about) we now know that the alignment
+ of s1_ptr and s2_ptr are the same. */
+
+L2: cmp r12,size,1
+ bb1 eq,r12,Ljone
+ bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+ ld r10,s1_ptr,0
+ addu s1_ptr,s1_ptr,4
+ ld r8,s2_ptr,0
+ addu s2_ptr,s2_ptr,4
+ subu size,size,1
+ subu.co r6,r10,r8
+ st r6,res_ptr,0
+ addu res_ptr,res_ptr,4
+
+L_v2: subu size,size,8
+ bcnd lt0,size,Lfin2
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+ align 8
+Loop2: subu size,size,8
+ ld.d r8,s1_ptr,0
+ ld.d r6,s2_ptr,0
+ subu.cio r8,r8,r6
+ st r8,res_ptr,0
+ subu.cio r9,r9,r7
+ st r9,res_ptr,4
+ ld.d r8,s1_ptr,8
+ ld.d r6,s2_ptr,8
+ subu.cio r8,r8,r6
+ st r8,res_ptr,8
+ subu.cio r9,r9,r7
+ st r9,res_ptr,12
+ ld.d r8,s1_ptr,16
+ ld.d r6,s2_ptr,16
+ subu.cio r8,r8,r6
+ st r8,res_ptr,16
+ subu.cio r9,r9,r7
+ st r9,res_ptr,20
+ ld.d r8,s1_ptr,24
+ ld.d r6,s2_ptr,24
+ subu.cio r8,r8,r6
+ st r8,res_ptr,24
+ subu.cio r9,r9,r7
+ st r9,res_ptr,28
+ addu s1_ptr,s1_ptr,32
+ addu s2_ptr,s2_ptr,32
+ addu res_ptr,res_ptr,32
+ bcnd ge0,size,Loop2
+
+Lfin2: addu size,size,8-2
+ bcnd lt0,size,Lend2
+Loope2: ld.d r8,s1_ptr,0
+ ld.d r6,s2_ptr,0
+ subu.cio r8,r8,r6
+ st r8,res_ptr,0
+ subu.cio r9,r9,r7
+ st r9,res_ptr,4
+ subu size,size,2
+ addu s1_ptr,s1_ptr,8
+ addu s2_ptr,s2_ptr,8
+ addu res_ptr,res_ptr,8
+ bcnd ge0,size,Loope2
+Lend2: bb0 0,size,Lret2
+/* Add last limb */
+Ljone: ld r10,s1_ptr,0
+ ld r8,s2_ptr,0
+ subu.cio r6,r10,r8
+ st r6,res_ptr,0
+
+Lret2: addu.ci r2,r0,r0 ; return carry-out from most sign. limb
+ jmp.n r1
+ xor r2,r2,1
diff --git a/rts/gmp/mpn/m88k/mul_1.s b/rts/gmp/mpn/m88k/mul_1.s
new file mode 100644
index 0000000000..06370837ef
--- /dev/null
+++ b/rts/gmp/mpn/m88k/mul_1.s
@@ -0,0 +1,127 @@
+; mc88100 __gmpn_mul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr r2
+; s1_ptr r3
+; size r4
+; s2_limb r5
+
+; Common overhead is about 11 cycles/invocation.
+
+; The speed for S2_LIMB >= 0x10000 is approximately 21 cycles/limb. (The
+; pipeline stalls 2 cycles due to WB contention.)
+
+; The speed for S2_LIMB < 0x10000 is approximately 16 cycles/limb. (The
+; pipeline stalls 2 cycles due to WB contention and 1 cycle due to latency.)
+
+; To enhance speed:
+; 1. Unroll main loop 4-8 times.
+; 2. Schedule code to avoid WB contention. It might be tempting to move the
+; ld instruction in the loops down to save 2 cycles (less WB contention),
+; but that looses because the ultimate value will be read from outside
+; the allocated space. But if we handle the ultimate multiplication in
+; the tail, we can do this.
+; 3. Make the multiplication with less instructions. I think the code for
+; (S2_LIMB >= 0x10000) is not minimal.
+; With these techniques the (S2_LIMB >= 0x10000) case would run in 17 or
+; less cycles/limb; the (S2_LIMB < 0x10000) case would run in 11
+; cycles/limb. (Assuming infinite unrolling.)
+
+ text
+ align 16
+ global ___gmpn_mul_1
+___gmpn_mul_1:
+
+ ; Make S1_PTR and RES_PTR point at the end of their blocks
+ ; and negate SIZE.
+ lda r3,r3[r4]
+ lda r6,r2[r4] ; RES_PTR in r6 since r2 is retval
+ subu r4,r0,r4
+
+ addu.co r2,r0,r0 ; r2 = cy = 0
+ ld r9,r3[r4]
+ mask r7,r5,0xffff ; r7 = lo(S2_LIMB)
+ extu r8,r5,16 ; r8 = hi(S2_LIMB)
+ bcnd.n eq0,r8,Lsmall ; jump if (hi(S2_LIMB) == 0)
+ subu r6,r6,4
+
+; General code for any value of S2_LIMB.
+
+ ; Make a stack frame and save r25 and r26
+ subu r31,r31,16
+ st.d r25,r31,8
+
+ ; Enter the loop in the middle
+ br.n L1
+ addu r4,r4,1
+
+Loop: ld r9,r3[r4]
+ st r26,r6[r4]
+; bcnd ne0,r0,0 ; bubble
+ addu r4,r4,1
+L1: mul r26,r9,r5 ; low word of product mul_1 WB ld
+ mask r12,r9,0xffff ; r12 = lo(s1_limb) mask_1
+ mul r11,r12,r7 ; r11 = prod_0 mul_2 WB mask_1
+ mul r10,r12,r8 ; r10 = prod_1a mul_3
+ extu r13,r9,16 ; r13 = hi(s1_limb) extu_1 WB mul_1
+ mul r12,r13,r7 ; r12 = prod_1b mul_4 WB extu_1
+ mul r25,r13,r8 ; r25 = prod_2 mul_5 WB mul_2
+ extu r11,r11,16 ; r11 = hi(prod_0) extu_2 WB mul_3
+ addu r10,r10,r11 ; addu_1 WB extu_2
+; bcnd ne0,r0,0 ; bubble WB addu_1
+ addu.co r10,r10,r12 ; WB mul_4
+ mask.u r10,r10,0xffff ; move the 16 most significant bits...
+ addu.ci r10,r10,r0 ; ...to the low half of the word...
+ rot r10,r10,16 ; ...and put carry in pos 16.
+ addu.co r26,r26,r2 ; add old carry limb
+ bcnd.n ne0,r4,Loop
+ addu.ci r2,r25,r10 ; compute new carry limb
+
+ st r26,r6[r4]
+ ld.d r25,r31,8
+ jmp.n r1
+ addu r31,r31,16
+
+; Fast code for S2_LIMB < 0x10000
+Lsmall:
+ ; Enter the loop in the middle
+ br.n SL1
+ addu r4,r4,1
+
+SLoop: ld r9,r3[r4] ;
+ st r8,r6[r4] ;
+ addu r4,r4,1 ;
+SL1: mul r8,r9,r5 ; low word of product
+ mask r12,r9,0xffff ; r12 = lo(s1_limb)
+ extu r13,r9,16 ; r13 = hi(s1_limb)
+ mul r11,r12,r7 ; r11 = prod_0
+ mul r12,r13,r7 ; r12 = prod_1b
+ addu.cio r8,r8,r2 ; add old carry limb
+ extu r10,r11,16 ; r11 = hi(prod_0)
+ addu r10,r10,r12 ;
+ bcnd.n ne0,r4,SLoop
+ extu r2,r10,16 ; r2 = new carry limb
+
+ jmp.n r1
+ st r8,r6[r4]
diff --git a/rts/gmp/mpn/m88k/sub_n.s b/rts/gmp/mpn/m88k/sub_n.s
new file mode 100644
index 0000000000..2fd345a135
--- /dev/null
+++ b/rts/gmp/mpn/m88k/sub_n.s
@@ -0,0 +1,106 @@
+; mc88100 __gmpn_sub -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr r2
+; s1_ptr r3
+; s2_ptr r4
+; size r5
+
+; This code has been optimized to run one instruction per clock, avoiding
+; load stalls and writeback contention. As a result, the instruction
+; order is not always natural.
+
+; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100,
+; but on the 88110, it seems to run much slower, 6.6 clocks/limb.
+
+ text
+ align 16
+ global ___gmpn_sub_n
+___gmpn_sub_n:
+ ld r6,r3,0 ; read first limb from s1_ptr
+ extu r10,r5,3
+ ld r7,r4,0 ; read first limb from s2_ptr
+
+ subu r5,r0,r5
+ mak r5,r5,3<4>
+ bcnd.n eq0,r5,Lzero
+ subu.co r0,r0,r0 ; initialize carry
+
+ or r12,r0,lo16(Lbase)
+ or.u r12,r12,hi16(Lbase)
+ addu r12,r12,r5 ; r12 is address for entering in loop
+
+ extu r5,r5,2 ; divide by 4
+ subu r2,r2,r5 ; adjust res_ptr
+ subu r3,r3,r5 ; adjust s1_ptr
+ subu r4,r4,r5 ; adjust s2_ptr
+
+ or r8,r6,r0
+
+ jmp.n r12
+ or r9,r7,r0
+
+Loop: addu r3,r3,32
+ st r8,r2,28
+ addu r4,r4,32
+ ld r6,r3,0
+ addu r2,r2,32
+ ld r7,r4,0
+Lzero: subu r10,r10,1 ; subtract 0 + 8r limbs (adj loop cnt)
+Lbase: ld r8,r3,4
+ subu.cio r6,r6,r7
+ ld r9,r4,4
+ st r6,r2,0
+ ld r6,r3,8 ; subtract 7 + 8r limbs
+ subu.cio r8,r8,r9
+ ld r7,r4,8
+ st r8,r2,4
+ ld r8,r3,12 ; subtract 6 + 8r limbs
+ subu.cio r6,r6,r7
+ ld r9,r4,12
+ st r6,r2,8
+ ld r6,r3,16 ; subtract 5 + 8r limbs
+ subu.cio r8,r8,r9
+ ld r7,r4,16
+ st r8,r2,12
+ ld r8,r3,20 ; subtract 4 + 8r limbs
+ subu.cio r6,r6,r7
+ ld r9,r4,20
+ st r6,r2,16
+ ld r6,r3,24 ; subtract 3 + 8r limbs
+ subu.cio r8,r8,r9
+ ld r7,r4,24
+ st r8,r2,20
+ ld r8,r3,28 ; subtract 2 + 8r limbs
+ subu.cio r6,r6,r7
+ ld r9,r4,28
+ st r6,r2,24
+ bcnd.n ne0,r10,Loop ; subtract 1 + 8r limbs
+ subu.cio r8,r8,r9
+
+ st r8,r2,28 ; store most significant limb
+
+ addu.ci r2,r0,r0 ; return carry-out from most sign. limb
+ jmp.n r1
+ xor r2,r2,1
diff --git a/rts/gmp/mpn/mips2/add_n.s b/rts/gmp/mpn/mips2/add_n.s
new file mode 100644
index 0000000000..5c3c7fc8a1
--- /dev/null
+++ b/rts/gmp/mpn/mips2/add_n.s
@@ -0,0 +1,120 @@
+ # MIPS2 __gmpn_add_n -- Add two limb vectors of the same length > 0 and
+ # store sum in a third limb vector.
+
+ # Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # s2_ptr $6
+ # size $7
+
+ .text
+ .align 2
+ .globl __gmpn_add_n
+ .ent __gmpn_add_n
+__gmpn_add_n:
+ .set noreorder
+ .set nomacro
+
+ lw $10,0($5)
+ lw $11,0($6)
+
+ addiu $7,$7,-1
+ and $9,$7,4-1 # number of limbs in first loop
+ beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
+ move $2,$0
+
+ subu $7,$7,$9
+
+.Loop0: addiu $9,$9,-1
+ lw $12,4($5)
+ addu $11,$11,$2
+ lw $13,4($6)
+ sltu $8,$11,$2
+ addu $11,$10,$11
+ sltu $2,$11,$10
+ sw $11,0($4)
+ or $2,$2,$8
+
+ addiu $5,$5,4
+ addiu $6,$6,4
+ move $10,$12
+ move $11,$13
+ bne $9,$0,.Loop0
+ addiu $4,$4,4
+
+.L0: beq $7,$0,.Lend
+ nop
+
+.Loop: addiu $7,$7,-4
+
+ lw $12,4($5)
+ addu $11,$11,$2
+ lw $13,4($6)
+ sltu $8,$11,$2
+ addu $11,$10,$11
+ sltu $2,$11,$10
+ sw $11,0($4)
+ or $2,$2,$8
+
+ lw $10,8($5)
+ addu $13,$13,$2
+ lw $11,8($6)
+ sltu $8,$13,$2
+ addu $13,$12,$13
+ sltu $2,$13,$12
+ sw $13,4($4)
+ or $2,$2,$8
+
+ lw $12,12($5)
+ addu $11,$11,$2
+ lw $13,12($6)
+ sltu $8,$11,$2
+ addu $11,$10,$11
+ sltu $2,$11,$10
+ sw $11,8($4)
+ or $2,$2,$8
+
+ lw $10,16($5)
+ addu $13,$13,$2
+ lw $11,16($6)
+ sltu $8,$13,$2
+ addu $13,$12,$13
+ sltu $2,$13,$12
+ sw $13,12($4)
+ or $2,$2,$8
+
+ addiu $5,$5,16
+ addiu $6,$6,16
+
+ bne $7,$0,.Loop
+ addiu $4,$4,16
+
+.Lend: addu $11,$11,$2
+ sltu $8,$11,$2
+ addu $11,$10,$11
+ sltu $2,$11,$10
+ sw $11,0($4)
+ j $31
+ or $2,$2,$8
+
+ .end __gmpn_add_n
diff --git a/rts/gmp/mpn/mips2/addmul_1.s b/rts/gmp/mpn/mips2/addmul_1.s
new file mode 100644
index 0000000000..1e5037751b
--- /dev/null
+++ b/rts/gmp/mpn/mips2/addmul_1.s
@@ -0,0 +1,97 @@
+ # MIPS __gmpn_addmul_1 -- Multiply a limb vector with a single limb and
+ # add the product to a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # size $6
+ # s2_limb $7
+
+ .text
+ .align 4
+ .globl __gmpn_addmul_1
+ .ent __gmpn_addmul_1
+__gmpn_addmul_1:
+ .set noreorder
+ .set nomacro
+
+ # warm up phase 0
+ lw $8,0($5)
+
+ # warm up phase 1
+ addiu $5,$5,4
+ multu $8,$7
+
+ addiu $6,$6,-1
+ beq $6,$0,$LC0
+ move $2,$0 # zero cy2
+
+ addiu $6,$6,-1
+ beq $6,$0,$LC1
+ lw $8,0($5) # load new s1 limb as early as possible
+
+Loop: lw $10,0($4)
+ mflo $3
+ mfhi $9
+ addiu $5,$5,4
+ addu $3,$3,$2 # add old carry limb to low product limb
+ multu $8,$7
+ lw $8,0($5) # load new s1 limb as early as possible
+ addiu $6,$6,-1 # decrement loop counter
+ sltu $2,$3,$2 # carry from previous addition -> $2
+ addu $3,$10,$3
+ sltu $10,$3,$10
+ addu $2,$2,$10
+ sw $3,0($4)
+ addiu $4,$4,4
+ bne $6,$0,Loop
+ addu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 1
+$LC1: lw $10,0($4)
+ mflo $3
+ mfhi $9
+ addu $3,$3,$2
+ sltu $2,$3,$2
+ multu $8,$7
+ addu $3,$10,$3
+ sltu $10,$3,$10
+ addu $2,$2,$10
+ sw $3,0($4)
+ addiu $4,$4,4
+ addu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 0
+$LC0: lw $10,0($4)
+ mflo $3
+ mfhi $9
+ addu $3,$3,$2
+ sltu $2,$3,$2
+ addu $3,$10,$3
+ sltu $10,$3,$10
+ addu $2,$2,$10
+ sw $3,0($4)
+ j $31
+ addu $2,$9,$2 # add high product limb and carry from addition
+
+ .end __gmpn_addmul_1
diff --git a/rts/gmp/mpn/mips2/lshift.s b/rts/gmp/mpn/mips2/lshift.s
new file mode 100644
index 0000000000..2ca3a3c800
--- /dev/null
+++ b/rts/gmp/mpn/mips2/lshift.s
@@ -0,0 +1,95 @@
+ # MIPS2 __gmpn_lshift --
+
+ # Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # src_ptr $5
+ # size $6
+ # cnt $7
+
+ .text
+ .align 2
+ .globl __gmpn_lshift
+ .ent __gmpn_lshift
+__gmpn_lshift:
+ .set noreorder
+ .set nomacro
+
+ sll $2,$6,2
+ addu $5,$5,$2 # make r5 point at end of src
+ lw $10,-4($5) # load first limb
+ subu $13,$0,$7
+ addu $4,$4,$2 # make r4 point at end of res
+ addiu $6,$6,-1
+ and $9,$6,4-1 # number of limbs in first loop
+ beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
+ srl $2,$10,$13 # compute function result
+
+ subu $6,$6,$9
+
+.Loop0: lw $3,-8($5)
+ addiu $4,$4,-4
+ addiu $5,$5,-4
+ addiu $9,$9,-1
+ sll $11,$10,$7
+ srl $12,$3,$13
+ move $10,$3
+ or $8,$11,$12
+ bne $9,$0,.Loop0
+ sw $8,0($4)
+
+.L0: beq $6,$0,.Lend
+ nop
+
+.Loop: lw $3,-8($5)
+ addiu $4,$4,-16
+ addiu $6,$6,-4
+ sll $11,$10,$7
+ srl $12,$3,$13
+
+ lw $10,-12($5)
+ sll $14,$3,$7
+ or $8,$11,$12
+ sw $8,12($4)
+ srl $9,$10,$13
+
+ lw $3,-16($5)
+ sll $11,$10,$7
+ or $8,$14,$9
+ sw $8,8($4)
+ srl $12,$3,$13
+
+ lw $10,-20($5)
+ sll $14,$3,$7
+ or $8,$11,$12
+ sw $8,4($4)
+ srl $9,$10,$13
+
+ addiu $5,$5,-16
+ or $8,$14,$9
+ bgtz $6,.Loop
+ sw $8,0($4)
+
+.Lend: sll $8,$10,$7
+ j $31
+ sw $8,-4($4)
+ .end __gmpn_lshift
diff --git a/rts/gmp/mpn/mips2/mul_1.s b/rts/gmp/mpn/mips2/mul_1.s
new file mode 100644
index 0000000000..ea8aa26809
--- /dev/null
+++ b/rts/gmp/mpn/mips2/mul_1.s
@@ -0,0 +1,85 @@
+ # MIPS __gmpn_mul_1 -- Multiply a limb vector with a single limb and
+ # store the product in a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # size $6
+ # s2_limb $7
+
+ .text
+ .align 4
+ .globl __gmpn_mul_1
+ .ent __gmpn_mul_1
+__gmpn_mul_1:
+ .set noreorder
+ .set nomacro
+
+ # warm up phase 0
+ lw $8,0($5)
+
+ # warm up phase 1
+ addiu $5,$5,4
+ multu $8,$7
+
+ addiu $6,$6,-1
+ beq $6,$0,$LC0
+ move $2,$0 # zero cy2
+
+ addiu $6,$6,-1
+ beq $6,$0,$LC1
+ lw $8,0($5) # load new s1 limb as early as possible
+
+Loop: mflo $10
+ mfhi $9
+ addiu $5,$5,4
+ addu $10,$10,$2 # add old carry limb to low product limb
+ multu $8,$7
+ lw $8,0($5) # load new s1 limb as early as possible
+ addiu $6,$6,-1 # decrement loop counter
+ sltu $2,$10,$2 # carry from previous addition -> $2
+ sw $10,0($4)
+ addiu $4,$4,4
+ bne $6,$0,Loop
+ addu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 1
+$LC1: mflo $10
+ mfhi $9
+ addu $10,$10,$2
+ sltu $2,$10,$2
+ multu $8,$7
+ sw $10,0($4)
+ addiu $4,$4,4
+ addu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 0
+$LC0: mflo $10
+ mfhi $9
+ addu $10,$10,$2
+ sltu $2,$10,$2
+ sw $10,0($4)
+ j $31
+ addu $2,$9,$2 # add high product limb and carry from addition
+
+ .end __gmpn_mul_1
diff --git a/rts/gmp/mpn/mips2/rshift.s b/rts/gmp/mpn/mips2/rshift.s
new file mode 100644
index 0000000000..37c8f39cb4
--- /dev/null
+++ b/rts/gmp/mpn/mips2/rshift.s
@@ -0,0 +1,92 @@
+ # MIPS2 __gmpn_rshift --
+
+ # Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # src_ptr $5
+ # size $6
+ # cnt $7
+
+ .text
+ .align 2
+ .globl __gmpn_rshift
+ .ent __gmpn_rshift
+__gmpn_rshift:
+ .set noreorder
+ .set nomacro
+
+ lw $10,0($5) # load first limb
+ subu $13,$0,$7
+ addiu $6,$6,-1
+ and $9,$6,4-1 # number of limbs in first loop
+ beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
+ sll $2,$10,$13 # compute function result
+
+ subu $6,$6,$9
+
+.Loop0: lw $3,4($5)
+ addiu $4,$4,4
+ addiu $5,$5,4
+ addiu $9,$9,-1
+ srl $11,$10,$7
+ sll $12,$3,$13
+ move $10,$3
+ or $8,$11,$12
+ bne $9,$0,.Loop0
+ sw $8,-4($4)
+
+.L0: beq $6,$0,.Lend
+ nop
+
+.Loop: lw $3,4($5)
+ addiu $4,$4,16
+ addiu $6,$6,-4
+ srl $11,$10,$7
+ sll $12,$3,$13
+
+ lw $10,8($5)
+ srl $14,$3,$7
+ or $8,$11,$12
+ sw $8,-16($4)
+ sll $9,$10,$13
+
+ lw $3,12($5)
+ srl $11,$10,$7
+ or $8,$14,$9
+ sw $8,-12($4)
+ sll $12,$3,$13
+
+ lw $10,16($5)
+ srl $14,$3,$7
+ or $8,$11,$12
+ sw $8,-8($4)
+ sll $9,$10,$13
+
+ addiu $5,$5,16
+ or $8,$14,$9
+ bgtz $6,.Loop
+ sw $8,-4($4)
+
+.Lend: srl $8,$10,$7
+ j $31
+ sw $8,0($4)
+ .end __gmpn_rshift
diff --git a/rts/gmp/mpn/mips2/sub_n.s b/rts/gmp/mpn/mips2/sub_n.s
new file mode 100644
index 0000000000..51d34f3ac3
--- /dev/null
+++ b/rts/gmp/mpn/mips2/sub_n.s
@@ -0,0 +1,120 @@
+ # MIPS2 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+ # store difference in a third limb vector.
+
+ # Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # s2_ptr $6
+ # size $7
+
+ .text
+ .align 2
+ .globl __gmpn_sub_n
+ .ent __gmpn_sub_n
+__gmpn_sub_n:
+ .set noreorder
+ .set nomacro
+
+ lw $10,0($5)
+ lw $11,0($6)
+
+ addiu $7,$7,-1
+ and $9,$7,4-1 # number of limbs in first loop
+ beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
+ move $2,$0
+
+ subu $7,$7,$9
+
+.Loop0: addiu $9,$9,-1
+ lw $12,4($5)
+ addu $11,$11,$2
+ lw $13,4($6)
+ sltu $8,$11,$2
+ subu $11,$10,$11
+ sltu $2,$10,$11
+ sw $11,0($4)
+ or $2,$2,$8
+
+ addiu $5,$5,4
+ addiu $6,$6,4
+ move $10,$12
+ move $11,$13
+ bne $9,$0,.Loop0
+ addiu $4,$4,4
+
+.L0: beq $7,$0,.Lend
+ nop
+
+.Loop: addiu $7,$7,-4
+
+ lw $12,4($5)
+ addu $11,$11,$2
+ lw $13,4($6)
+ sltu $8,$11,$2
+ subu $11,$10,$11
+ sltu $2,$10,$11
+ sw $11,0($4)
+ or $2,$2,$8
+
+ lw $10,8($5)
+ addu $13,$13,$2
+ lw $11,8($6)
+ sltu $8,$13,$2
+ subu $13,$12,$13
+ sltu $2,$12,$13
+ sw $13,4($4)
+ or $2,$2,$8
+
+ lw $12,12($5)
+ addu $11,$11,$2
+ lw $13,12($6)
+ sltu $8,$11,$2
+ subu $11,$10,$11
+ sltu $2,$10,$11
+ sw $11,8($4)
+ or $2,$2,$8
+
+ lw $10,16($5)
+ addu $13,$13,$2
+ lw $11,16($6)
+ sltu $8,$13,$2
+ subu $13,$12,$13
+ sltu $2,$12,$13
+ sw $13,12($4)
+ or $2,$2,$8
+
+ addiu $5,$5,16
+ addiu $6,$6,16
+
+ bne $7,$0,.Loop
+ addiu $4,$4,16
+
+.Lend: addu $11,$11,$2
+ sltu $8,$11,$2
+ subu $11,$10,$11
+ sltu $2,$10,$11
+ sw $11,0($4)
+ j $31
+ or $2,$2,$8
+
+ .end __gmpn_sub_n
diff --git a/rts/gmp/mpn/mips2/submul_1.s b/rts/gmp/mpn/mips2/submul_1.s
new file mode 100644
index 0000000000..495dea3ba2
--- /dev/null
+++ b/rts/gmp/mpn/mips2/submul_1.s
@@ -0,0 +1,97 @@
+ # MIPS __gmpn_submul_1 -- Multiply a limb vector with a single limb and
+ # subtract the product from a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # size $6
+ # s2_limb $7
+
+ .text
+ .align 4
+ .globl __gmpn_submul_1
+ .ent __gmpn_submul_1
+__gmpn_submul_1:
+ .set noreorder
+ .set nomacro
+
+ # warm up phase 0
+ lw $8,0($5)
+
+ # warm up phase 1
+ addiu $5,$5,4
+ multu $8,$7
+
+ addiu $6,$6,-1
+ beq $6,$0,$LC0
+ move $2,$0 # zero cy2
+
+ addiu $6,$6,-1
+ beq $6,$0,$LC1
+ lw $8,0($5) # load new s1 limb as early as possible
+
+Loop: lw $10,0($4)
+ mflo $3
+ mfhi $9
+ addiu $5,$5,4
+ addu $3,$3,$2 # add old carry limb to low product limb
+ multu $8,$7
+ lw $8,0($5) # load new s1 limb as early as possible
+ addiu $6,$6,-1 # decrement loop counter
+ sltu $2,$3,$2 # carry from previous addition -> $2
+ subu $3,$10,$3
+ sgtu $10,$3,$10
+ addu $2,$2,$10
+ sw $3,0($4)
+ addiu $4,$4,4
+ bne $6,$0,Loop
+ addu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 1
+$LC1: lw $10,0($4)
+ mflo $3
+ mfhi $9
+ addu $3,$3,$2
+ sltu $2,$3,$2
+ multu $8,$7
+ subu $3,$10,$3
+ sgtu $10,$3,$10
+ addu $2,$2,$10
+ sw $3,0($4)
+ addiu $4,$4,4
+ addu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 0
+$LC0: lw $10,0($4)
+ mflo $3
+ mfhi $9
+ addu $3,$3,$2
+ sltu $2,$3,$2
+ subu $3,$10,$3
+ sgtu $10,$3,$10
+ addu $2,$2,$10
+ sw $3,0($4)
+ j $31
+ addu $2,$9,$2 # add high product limb and carry from addition
+
+ .end __gmpn_submul_1
diff --git a/rts/gmp/mpn/mips2/umul.s b/rts/gmp/mpn/mips2/umul.s
new file mode 100644
index 0000000000..40e847614c
--- /dev/null
+++ b/rts/gmp/mpn/mips2/umul.s
@@ -0,0 +1,30 @@
+ # Copyright (C) 1999 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+ .text
+ .align 2
+ .globl __umul_ppmm
+ .ent __umul_ppmm
+__umul_ppmm:
+ multu $5,$6
+ mflo $3
+ mfhi $2
+ sw $3,0($4)
+ j $31
+ .end __umul_ppmm
diff --git a/rts/gmp/mpn/mips3/README b/rts/gmp/mpn/mips3/README
new file mode 100644
index 0000000000..e94b2c7460
--- /dev/null
+++ b/rts/gmp/mpn/mips3/README
@@ -0,0 +1,23 @@
+This directory contains mpn functions optimized for MIPS3. Example of
+processors that implement MIPS3 are R4000, R4400, R4600, R4700, and R8000.
+
+RELEVANT OPTIMIZATION ISSUES
+
+1. On the R4000 and R4400, branches, both the plain and the "likely" ones,
+ take 3 cycles to execute. (The fastest possible loop will take 4 cycles,
+ because of the delay insn.)
+
+ On the R4600, branches takes a single cycle
+
+ On the R8000, branches often take no noticable cycles, as they are
+ executed in a separate function unit..
+
+2. The R4000 and R4400 have a load latency of 4 cycles.
+
+3. On the R4000 and R4400, multiplies take a data-dependent number of
+ cycles, contrary to the SGI documentation. There seem to be 3 or 4
+ possible latencies.
+
+STATUS
+
+Good...
diff --git a/rts/gmp/mpn/mips3/add_n.s b/rts/gmp/mpn/mips3/add_n.s
new file mode 100644
index 0000000000..adad0beaef
--- /dev/null
+++ b/rts/gmp/mpn/mips3/add_n.s
@@ -0,0 +1,120 @@
+ # MIPS3 __gmpn_add_n -- Add two limb vectors of the same length > 0 and
+ # store sum in a third limb vector.
+
+ # Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # s2_ptr $6
+ # size $7
+
+ .text
+ .align 2
+ .globl __gmpn_add_n
+ .ent __gmpn_add_n
+__gmpn_add_n:
+ .set noreorder
+ .set nomacro
+
+ ld $10,0($5)
+ ld $11,0($6)
+
+ daddiu $7,$7,-1
+ and $9,$7,4-1 # number of limbs in first loop
+ beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
+ move $2,$0
+
+ dsubu $7,$7,$9
+
+.Loop0: daddiu $9,$9,-1
+ ld $12,8($5)
+ daddu $11,$11,$2
+ ld $13,8($6)
+ sltu $8,$11,$2
+ daddu $11,$10,$11
+ sltu $2,$11,$10
+ sd $11,0($4)
+ or $2,$2,$8
+
+ daddiu $5,$5,8
+ daddiu $6,$6,8
+ move $10,$12
+ move $11,$13
+ bne $9,$0,.Loop0
+ daddiu $4,$4,8
+
+.L0: beq $7,$0,.Lend
+ nop
+
+.Loop: daddiu $7,$7,-4
+
+ ld $12,8($5)
+ daddu $11,$11,$2
+ ld $13,8($6)
+ sltu $8,$11,$2
+ daddu $11,$10,$11
+ sltu $2,$11,$10
+ sd $11,0($4)
+ or $2,$2,$8
+
+ ld $10,16($5)
+ daddu $13,$13,$2
+ ld $11,16($6)
+ sltu $8,$13,$2
+ daddu $13,$12,$13
+ sltu $2,$13,$12
+ sd $13,8($4)
+ or $2,$2,$8
+
+ ld $12,24($5)
+ daddu $11,$11,$2
+ ld $13,24($6)
+ sltu $8,$11,$2
+ daddu $11,$10,$11
+ sltu $2,$11,$10
+ sd $11,16($4)
+ or $2,$2,$8
+
+ ld $10,32($5)
+ daddu $13,$13,$2
+ ld $11,32($6)
+ sltu $8,$13,$2
+ daddu $13,$12,$13
+ sltu $2,$13,$12
+ sd $13,24($4)
+ or $2,$2,$8
+
+ daddiu $5,$5,32
+ daddiu $6,$6,32
+
+ bne $7,$0,.Loop
+ daddiu $4,$4,32
+
+.Lend: daddu $11,$11,$2
+ sltu $8,$11,$2
+ daddu $11,$10,$11
+ sltu $2,$11,$10
+ sd $11,0($4)
+ j $31
+ or $2,$2,$8
+
+ .end __gmpn_add_n
diff --git a/rts/gmp/mpn/mips3/addmul_1.s b/rts/gmp/mpn/mips3/addmul_1.s
new file mode 100644
index 0000000000..d390e2298e
--- /dev/null
+++ b/rts/gmp/mpn/mips3/addmul_1.s
@@ -0,0 +1,97 @@
+ # MIPS3 __gmpn_addmul_1 -- Multiply a limb vector with a single limb and
+ # add the product to a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # size $6
+ # s2_limb $7
+
+ .text
+ .align 4
+ .globl __gmpn_addmul_1
+ .ent __gmpn_addmul_1
+__gmpn_addmul_1:
+ .set noreorder
+ .set nomacro
+
+ # warm up phase 0
+ ld $8,0($5)
+
+ # warm up phase 1
+ daddiu $5,$5,8
+ dmultu $8,$7
+
+ daddiu $6,$6,-1
+ beq $6,$0,$LC0
+ move $2,$0 # zero cy2
+
+ daddiu $6,$6,-1
+ beq $6,$0,$LC1
+ ld $8,0($5) # load new s1 limb as early as possible
+
+Loop: ld $10,0($4)
+ mflo $3
+ mfhi $9
+ daddiu $5,$5,8
+ daddu $3,$3,$2 # add old carry limb to low product limb
+ dmultu $8,$7
+ ld $8,0($5) # load new s1 limb as early as possible
+ daddiu $6,$6,-1 # decrement loop counter
+ sltu $2,$3,$2 # carry from previous addition -> $2
+ daddu $3,$10,$3
+ sltu $10,$3,$10
+ daddu $2,$2,$10
+ sd $3,0($4)
+ daddiu $4,$4,8
+ bne $6,$0,Loop
+ daddu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 1
+$LC1: ld $10,0($4)
+ mflo $3
+ mfhi $9
+ daddu $3,$3,$2
+ sltu $2,$3,$2
+ dmultu $8,$7
+ daddu $3,$10,$3
+ sltu $10,$3,$10
+ daddu $2,$2,$10
+ sd $3,0($4)
+ daddiu $4,$4,8
+ daddu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 0
+$LC0: ld $10,0($4)
+ mflo $3
+ mfhi $9
+ daddu $3,$3,$2
+ sltu $2,$3,$2
+ daddu $3,$10,$3
+ sltu $10,$3,$10
+ daddu $2,$2,$10
+ sd $3,0($4)
+ j $31
+ daddu $2,$9,$2 # add high product limb and carry from addition
+
+ .end __gmpn_addmul_1
diff --git a/rts/gmp/mpn/mips3/gmp-mparam.h b/rts/gmp/mpn/mips3/gmp-mparam.h
new file mode 100644
index 0000000000..656e90c7b0
--- /dev/null
+++ b/rts/gmp/mpn/mips3/gmp-mparam.h
@@ -0,0 +1,58 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* These values are for the R10000 usign the system cc. */
+/* Generated by tuneup.c, 2000-07-25. */
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 16
+#endif
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 32
+#endif
+
+/* Supressed the TOOM3 values as they looked absolutely crazy
+ (698 and 21 respectively) */
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 58
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 54
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 82
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 4
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 159
+#endif
diff --git a/rts/gmp/mpn/mips3/lshift.s b/rts/gmp/mpn/mips3/lshift.s
new file mode 100644
index 0000000000..372606fddf
--- /dev/null
+++ b/rts/gmp/mpn/mips3/lshift.s
@@ -0,0 +1,95 @@
+ # MIPS3 __gmpn_lshift --
+
+ # Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # src_ptr $5
+ # size $6
+ # cnt $7
+
+ .text
+ .align 2
+ .globl __gmpn_lshift
+ .ent __gmpn_lshift
+__gmpn_lshift:
+ .set noreorder
+ .set nomacro
+
+ dsll $2,$6,3
+ daddu $5,$5,$2 # make r5 point at end of src
+ ld $10,-8($5) # load first limb
+ dsubu $13,$0,$7
+ daddu $4,$4,$2 # make r4 point at end of res
+ daddiu $6,$6,-1
+ and $9,$6,4-1 # number of limbs in first loop
+ beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
+ dsrl $2,$10,$13 # compute function result
+
+ dsubu $6,$6,$9
+
+.Loop0: ld $3,-16($5)
+ daddiu $4,$4,-8
+ daddiu $5,$5,-8
+ daddiu $9,$9,-1
+ dsll $11,$10,$7
+ dsrl $12,$3,$13
+ move $10,$3
+ or $8,$11,$12
+ bne $9,$0,.Loop0
+ sd $8,0($4)
+
+.L0: beq $6,$0,.Lend
+ nop
+
+.Loop: ld $3,-16($5)
+ daddiu $4,$4,-32
+ daddiu $6,$6,-4
+ dsll $11,$10,$7
+ dsrl $12,$3,$13
+
+ ld $10,-24($5)
+ dsll $14,$3,$7
+ or $8,$11,$12
+ sd $8,24($4)
+ dsrl $9,$10,$13
+
+ ld $3,-32($5)
+ dsll $11,$10,$7
+ or $8,$14,$9
+ sd $8,16($4)
+ dsrl $12,$3,$13
+
+ ld $10,-40($5)
+ dsll $14,$3,$7
+ or $8,$11,$12
+ sd $8,8($4)
+ dsrl $9,$10,$13
+
+ daddiu $5,$5,-32
+ or $8,$14,$9
+ bgtz $6,.Loop
+ sd $8,0($4)
+
+.Lend: dsll $8,$10,$7
+ j $31
+ sd $8,-8($4)
+ .end __gmpn_lshift
diff --git a/rts/gmp/mpn/mips3/mul_1.s b/rts/gmp/mpn/mips3/mul_1.s
new file mode 100644
index 0000000000..6659e2b4eb
--- /dev/null
+++ b/rts/gmp/mpn/mips3/mul_1.s
@@ -0,0 +1,85 @@
+ # MIPS3 __gmpn_mul_1 -- Multiply a limb vector with a single limb and
+ # store the product in a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # size $6
+ # s2_limb $7
+
+ .text
+ .align 4
+ .globl __gmpn_mul_1
+ .ent __gmpn_mul_1
+__gmpn_mul_1:
+ .set noreorder
+ .set nomacro
+
+ # warm up phase 0
+ ld $8,0($5)
+
+ # warm up phase 1
+ daddiu $5,$5,8
+ dmultu $8,$7
+
+ daddiu $6,$6,-1
+ beq $6,$0,$LC0
+ move $2,$0 # zero cy2
+
+ daddiu $6,$6,-1
+ beq $6,$0,$LC1
+ ld $8,0($5) # load new s1 limb as early as possible
+
+Loop: mflo $10
+ mfhi $9
+ daddiu $5,$5,8
+ daddu $10,$10,$2 # add old carry limb to low product limb
+ dmultu $8,$7
+ ld $8,0($5) # load new s1 limb as early as possible
+ daddiu $6,$6,-1 # decrement loop counter
+ sltu $2,$10,$2 # carry from previous addition -> $2
+ sd $10,0($4)
+ daddiu $4,$4,8
+ bne $6,$0,Loop
+ daddu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 1
+$LC1: mflo $10
+ mfhi $9
+ daddu $10,$10,$2
+ sltu $2,$10,$2
+ dmultu $8,$7
+ sd $10,0($4)
+ daddiu $4,$4,8
+ daddu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 0
+$LC0: mflo $10
+ mfhi $9
+ daddu $10,$10,$2
+ sltu $2,$10,$2
+ sd $10,0($4)
+ j $31
+ daddu $2,$9,$2 # add high product limb and carry from addition
+
+ .end __gmpn_mul_1
diff --git a/rts/gmp/mpn/mips3/rshift.s b/rts/gmp/mpn/mips3/rshift.s
new file mode 100644
index 0000000000..59c7fd3492
--- /dev/null
+++ b/rts/gmp/mpn/mips3/rshift.s
@@ -0,0 +1,92 @@
+ # MIPS3 __gmpn_rshift --
+
+ # Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # src_ptr $5
+ # size $6
+ # cnt $7
+
+ .text
+ .align 2
+ .globl __gmpn_rshift
+ .ent __gmpn_rshift
+__gmpn_rshift:
+ .set noreorder
+ .set nomacro
+
+ ld $10,0($5) # load first limb
+ dsubu $13,$0,$7
+ daddiu $6,$6,-1
+ and $9,$6,4-1 # number of limbs in first loop
+ beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
+ dsll $2,$10,$13 # compute function result
+
+ dsubu $6,$6,$9
+
+.Loop0: ld $3,8($5)
+ daddiu $4,$4,8
+ daddiu $5,$5,8
+ daddiu $9,$9,-1
+ dsrl $11,$10,$7
+ dsll $12,$3,$13
+ move $10,$3
+ or $8,$11,$12
+ bne $9,$0,.Loop0
+ sd $8,-8($4)
+
+.L0: beq $6,$0,.Lend
+ nop
+
+.Loop: ld $3,8($5)
+ daddiu $4,$4,32
+ daddiu $6,$6,-4
+ dsrl $11,$10,$7
+ dsll $12,$3,$13
+
+ ld $10,16($5)
+ dsrl $14,$3,$7
+ or $8,$11,$12
+ sd $8,-32($4)
+ dsll $9,$10,$13
+
+ ld $3,24($5)
+ dsrl $11,$10,$7
+ or $8,$14,$9
+ sd $8,-24($4)
+ dsll $12,$3,$13
+
+ ld $10,32($5)
+ dsrl $14,$3,$7
+ or $8,$11,$12
+ sd $8,-16($4)
+ dsll $9,$10,$13
+
+ daddiu $5,$5,32
+ or $8,$14,$9
+ bgtz $6,.Loop
+ sd $8,-8($4)
+
+.Lend: dsrl $8,$10,$7
+ j $31
+ sd $8,0($4)
+ .end __gmpn_rshift
diff --git a/rts/gmp/mpn/mips3/sub_n.s b/rts/gmp/mpn/mips3/sub_n.s
new file mode 100644
index 0000000000..c57c824b04
--- /dev/null
+++ b/rts/gmp/mpn/mips3/sub_n.s
@@ -0,0 +1,120 @@
+ # MIPS3 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+ # store difference in a third limb vector.
+
+ # Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # s2_ptr $6
+ # size $7
+
+ .text
+ .align 2
+ .globl __gmpn_sub_n
+ .ent __gmpn_sub_n
+__gmpn_sub_n:
+ .set noreorder
+ .set nomacro
+
+ ld $10,0($5)
+ ld $11,0($6)
+
+ daddiu $7,$7,-1
+ and $9,$7,4-1 # number of limbs in first loop
+ beq $9,$0,.L0 # if multiple of 4 limbs, skip first loop
+ move $2,$0
+
+ dsubu $7,$7,$9
+
+.Loop0: daddiu $9,$9,-1
+ ld $12,8($5)
+ daddu $11,$11,$2
+ ld $13,8($6)
+ sltu $8,$11,$2
+ dsubu $11,$10,$11
+ sltu $2,$10,$11
+ sd $11,0($4)
+ or $2,$2,$8
+
+ daddiu $5,$5,8
+ daddiu $6,$6,8
+ move $10,$12
+ move $11,$13
+ bne $9,$0,.Loop0
+ daddiu $4,$4,8
+
+.L0: beq $7,$0,.Lend
+ nop
+
+.Loop: daddiu $7,$7,-4
+
+ ld $12,8($5)
+ daddu $11,$11,$2
+ ld $13,8($6)
+ sltu $8,$11,$2
+ dsubu $11,$10,$11
+ sltu $2,$10,$11
+ sd $11,0($4)
+ or $2,$2,$8
+
+ ld $10,16($5)
+ daddu $13,$13,$2
+ ld $11,16($6)
+ sltu $8,$13,$2
+ dsubu $13,$12,$13
+ sltu $2,$12,$13
+ sd $13,8($4)
+ or $2,$2,$8
+
+ ld $12,24($5)
+ daddu $11,$11,$2
+ ld $13,24($6)
+ sltu $8,$11,$2
+ dsubu $11,$10,$11
+ sltu $2,$10,$11
+ sd $11,16($4)
+ or $2,$2,$8
+
+ ld $10,32($5)
+ daddu $13,$13,$2
+ ld $11,32($6)
+ sltu $8,$13,$2
+ dsubu $13,$12,$13
+ sltu $2,$12,$13
+ sd $13,24($4)
+ or $2,$2,$8
+
+ daddiu $5,$5,32
+ daddiu $6,$6,32
+
+ bne $7,$0,.Loop
+ daddiu $4,$4,32
+
+.Lend: daddu $11,$11,$2
+ sltu $8,$11,$2
+ dsubu $11,$10,$11
+ sltu $2,$10,$11
+ sd $11,0($4)
+ j $31
+ or $2,$2,$8
+
+ .end __gmpn_sub_n
diff --git a/rts/gmp/mpn/mips3/submul_1.s b/rts/gmp/mpn/mips3/submul_1.s
new file mode 100644
index 0000000000..531f9705a6
--- /dev/null
+++ b/rts/gmp/mpn/mips3/submul_1.s
@@ -0,0 +1,97 @@
+ # MIPS3 __gmpn_submul_1 -- Multiply a limb vector with a single limb and
+ # subtract the product from a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $4
+ # s1_ptr $5
+ # size $6
+ # s2_limb $7
+
+ .text
+ .align 4
+ .globl __gmpn_submul_1
+ .ent __gmpn_submul_1
+__gmpn_submul_1:
+ .set noreorder
+ .set nomacro
+
+ # warm up phase 0
+ ld $8,0($5)
+
+ # warm up phase 1
+ daddiu $5,$5,8
+ dmultu $8,$7
+
+ daddiu $6,$6,-1
+ beq $6,$0,$LC0
+ move $2,$0 # zero cy2
+
+ daddiu $6,$6,-1
+ beq $6,$0,$LC1
+ ld $8,0($5) # load new s1 limb as early as possible
+
+Loop: ld $10,0($4)
+ mflo $3
+ mfhi $9
+ daddiu $5,$5,8
+ daddu $3,$3,$2 # add old carry limb to low product limb
+ dmultu $8,$7
+ ld $8,0($5) # load new s1 limb as early as possible
+ daddiu $6,$6,-1 # decrement loop counter
+ sltu $2,$3,$2 # carry from previous addition -> $2
+ dsubu $3,$10,$3
+ sgtu $10,$3,$10
+ daddu $2,$2,$10
+ sd $3,0($4)
+ daddiu $4,$4,8
+ bne $6,$0,Loop
+ daddu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 1
+$LC1: ld $10,0($4)
+ mflo $3
+ mfhi $9
+ daddu $3,$3,$2
+ sltu $2,$3,$2
+ dmultu $8,$7
+ dsubu $3,$10,$3
+ sgtu $10,$3,$10
+ daddu $2,$2,$10
+ sd $3,0($4)
+ daddiu $4,$4,8
+ daddu $2,$9,$2 # add high product limb and carry from addition
+
+ # cool down phase 0
+$LC0: ld $10,0($4)
+ mflo $3
+ mfhi $9
+ daddu $3,$3,$2
+ sltu $2,$3,$2
+ dsubu $3,$10,$3
+ sgtu $10,$3,$10
+ daddu $2,$2,$10
+ sd $3,0($4)
+ j $31
+ daddu $2,$9,$2 # add high product limb and carry from addition
+
+ .end __gmpn_submul_1
diff --git a/rts/gmp/mpn/mp_bases.c b/rts/gmp/mpn/mp_bases.c
new file mode 100644
index 0000000000..011c328c80
--- /dev/null
+++ b/rts/gmp/mpn/mp_bases.c
@@ -0,0 +1,550 @@
+/* __mp_bases -- Structure for conversion between internal binary
+ format and strings in base 2..255. The fields are explained in
+ gmp-impl.h.
+
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#if BITS_PER_MP_LIMB == 32
+const struct bases __mp_bases[256] =
+{
+ /* 0 */ {0, 0.0, 0, 0},
+ /* 1 */ {0, 1e38, 0, 0},
+ /* 2 */ {32, 1.0000000000000000, 0x1, 0x0},
+ /* 3 */ {20, 0.6309297535714575, 0xcfd41b91, 0x3b563c24},
+ /* 4 */ {16, 0.5000000000000000, 0x2, 0x0},
+ /* 5 */ {13, 0.4306765580733931, 0x48c27395, 0xc25c2684},
+ /* 6 */ {12, 0.3868528072345416, 0x81bf1000, 0xf91bd1b6},
+ /* 7 */ {11, 0.3562071871080222, 0x75db9c97, 0x1607a2cb},
+ /* 8 */ {10, 0.3333333333333334, 0x3, 0x0},
+ /* 9 */ {10, 0.3154648767857287, 0xcfd41b91, 0x3b563c24},
+ /* 10 */ {9, 0.3010299956639811, 0x3b9aca00, 0x12e0be82},
+ /* 11 */ {9, 0.2890648263178878, 0x8c8b6d2b, 0xd24cde04},
+ /* 12 */ {8, 0.2789429456511298, 0x19a10000, 0x3fa39ab5},
+ /* 13 */ {8, 0.2702381544273197, 0x309f1021, 0x50f8ac5f},
+ /* 14 */ {8, 0.2626495350371936, 0x57f6c100, 0x74843b1e},
+ /* 15 */ {8, 0.2559580248098155, 0x98c29b81, 0xad0326c2},
+ /* 16 */ {8, 0.2500000000000000, 0x4, 0x0},
+ /* 17 */ {7, 0.2446505421182260, 0x18754571, 0x4ef0b6bd},
+ /* 18 */ {7, 0.2398124665681315, 0x247dbc80, 0xc0fc48a1},
+ /* 19 */ {7, 0.2354089133666382, 0x3547667b, 0x33838942},
+ /* 20 */ {7, 0.2313782131597592, 0x4c4b4000, 0xad7f29ab},
+ /* 21 */ {7, 0.2276702486969530, 0x6b5a6e1d, 0x313c3d15},
+ /* 22 */ {7, 0.2242438242175754, 0x94ace180, 0xb8cca9e0},
+ /* 23 */ {7, 0.2210647294575037, 0xcaf18367, 0x42ed6de9},
+ /* 24 */ {6, 0.2181042919855316, 0xb640000, 0x67980e0b},
+ /* 25 */ {6, 0.2153382790366965, 0xe8d4a51, 0x19799812},
+ /* 26 */ {6, 0.2127460535533632, 0x1269ae40, 0xbce85396},
+ /* 27 */ {6, 0.2103099178571525, 0x17179149, 0x62c103a9},
+ /* 28 */ {6, 0.2080145976765095, 0x1cb91000, 0x1d353d43},
+ /* 29 */ {6, 0.2058468324604344, 0x23744899, 0xce1decea},
+ /* 30 */ {6, 0.2037950470905062, 0x2b73a840, 0x790fc511},
+ /* 31 */ {6, 0.2018490865820999, 0x34e63b41, 0x35b865a0},
+ /* 32 */ {6, 0.2000000000000000, 0x5, 0x0},
+ /* 33 */ {6, 0.1982398631705605, 0x4cfa3cc1, 0xa9aed1b3},
+ /* 34 */ {6, 0.1965616322328226, 0x5c13d840, 0x63dfc229},
+ /* 35 */ {6, 0.1949590218937863, 0x6d91b519, 0x2b0fee30},
+ /* 36 */ {6, 0.1934264036172708, 0x81bf1000, 0xf91bd1b6},
+ /* 37 */ {6, 0.1919587200065601, 0x98ede0c9, 0xac89c3a9},
+ /* 38 */ {6, 0.1905514124267734, 0xb3773e40, 0x6d2c32fe},
+ /* 39 */ {6, 0.1892003595168700, 0xd1bbc4d1, 0x387907c9},
+ /* 40 */ {6, 0.1879018247091076, 0xf4240000, 0xc6f7a0b},
+ /* 41 */ {5, 0.1866524112389434, 0x6e7d349, 0x28928154},
+ /* 42 */ {5, 0.1854490234153689, 0x7ca30a0, 0x6e8629d},
+ /* 43 */ {5, 0.1842888331487062, 0x8c32bbb, 0xd373dca0},
+ /* 44 */ {5, 0.1831692509136336, 0x9d46c00, 0xa0b17895},
+ /* 45 */ {5, 0.1820879004699383, 0xaffacfd, 0x746811a5},
+ /* 46 */ {5, 0.1810425967800402, 0xc46bee0, 0x4da6500f},
+ /* 47 */ {5, 0.1800313266566926, 0xdab86ef, 0x2ba23582},
+ /* 48 */ {5, 0.1790522317510414, 0xf300000, 0xdb20a88},
+ /* 49 */ {5, 0.1781035935540111, 0x10d63af1, 0xe68d5ce4},
+ /* 50 */ {5, 0.1771838201355579, 0x12a05f20, 0xb7cdfd9d},
+ /* 51 */ {5, 0.1762914343888821, 0x1490aae3, 0x8e583933},
+ /* 52 */ {5, 0.1754250635819545, 0x16a97400, 0x697cc3ea},
+ /* 53 */ {5, 0.1745834300480449, 0x18ed2825, 0x48a5ca6c},
+ /* 54 */ {5, 0.1737653428714400, 0x1b5e4d60, 0x2b52db16},
+ /* 55 */ {5, 0.1729696904450771, 0x1dff8297, 0x111586a6},
+ /* 56 */ {5, 0.1721954337940981, 0x20d38000, 0xf31d2b36},
+ /* 57 */ {5, 0.1714416005739134, 0x23dd1799, 0xc8d76d19},
+ /* 58 */ {5, 0.1707072796637201, 0x271f35a0, 0xa2cb1eb4},
+ /* 59 */ {5, 0.1699916162869140, 0x2a9ce10b, 0x807c3ec3},
+ /* 60 */ {5, 0.1692938075987814, 0x2e593c00, 0x617ec8bf},
+ /* 61 */ {5, 0.1686130986895011, 0x3257844d, 0x45746cbe},
+ /* 62 */ {5, 0.1679487789570419, 0x369b13e0, 0x2c0aa273},
+ /* 63 */ {5, 0.1673001788101741, 0x3b27613f, 0x14f90805},
+ /* 64 */ {5, 0.1666666666666667, 0x6, 0x0},
+ /* 65 */ {5, 0.1660476462159378, 0x4528a141, 0xd9cf0829},
+ /* 66 */ {5, 0.1654425539190583, 0x4aa51420, 0xb6fc4841},
+ /* 67 */ {5, 0.1648508567221604, 0x50794633, 0x973054cb},
+ /* 68 */ {5, 0.1642720499620502, 0x56a94400, 0x7a1dbe4b},
+ /* 69 */ {5, 0.1637056554452156, 0x5d393975, 0x5f7fcd7f},
+ /* 70 */ {5, 0.1631512196835108, 0x642d7260, 0x47196c84},
+ /* 71 */ {5, 0.1626083122716341, 0x6b8a5ae7, 0x30b43635},
+ /* 72 */ {5, 0.1620765243931223, 0x73548000, 0x1c1fa5f6},
+ /* 73 */ {5, 0.1615554674429964, 0x7b908fe9, 0x930634a},
+ /* 74 */ {5, 0.1610447717564445, 0x84435aa0, 0xef7f4a3c},
+ /* 75 */ {5, 0.1605440854340214, 0x8d71d25b, 0xcf5552d2},
+ /* 76 */ {5, 0.1600530732548213, 0x97210c00, 0xb1a47c8e},
+ /* 77 */ {5, 0.1595714156699382, 0xa1563f9d, 0x9634b43e},
+ /* 78 */ {5, 0.1590988078692941, 0xac16c8e0, 0x7cd3817d},
+ /* 79 */ {5, 0.1586349589155960, 0xb768278f, 0x65536761},
+ /* 80 */ {5, 0.1581795909397823, 0xc3500000, 0x4f8b588e},
+ /* 81 */ {5, 0.1577324383928644, 0xcfd41b91, 0x3b563c24},
+ /* 82 */ {5, 0.1572932473495469, 0xdcfa6920, 0x28928154},
+ /* 83 */ {5, 0.1568617748594410, 0xeac8fd83, 0x1721bfb0},
+ /* 84 */ {5, 0.1564377883420716, 0xf9461400, 0x6e8629d},
+ /* 85 */ {4, 0.1560210650222250, 0x31c84b1, 0x491cc17c},
+ /* 86 */ {4, 0.1556113914024940, 0x342ab10, 0x3a11d83b},
+ /* 87 */ {4, 0.1552085627701551, 0x36a2c21, 0x2be074cd},
+ /* 88 */ {4, 0.1548123827357682, 0x3931000, 0x1e7a02e7},
+ /* 89 */ {4, 0.1544226628011101, 0x3bd5ee1, 0x11d10edd},
+ /* 90 */ {4, 0.1540392219542636, 0x3e92110, 0x5d92c68},
+ /* 91 */ {4, 0.1536618862898642, 0x4165ef1, 0xf50dbfb2},
+ /* 92 */ {4, 0.1532904886526781, 0x4452100, 0xdf9f1316},
+ /* 93 */ {4, 0.1529248683028321, 0x4756fd1, 0xcb52a684},
+ /* 94 */ {4, 0.1525648706011593, 0x4a75410, 0xb8163e97},
+ /* 95 */ {4, 0.1522103467132434, 0x4dad681, 0xa5d8f269},
+ /* 96 */ {4, 0.1518611533308632, 0x5100000, 0x948b0fcd},
+ /* 97 */ {4, 0.1515171524096389, 0x546d981, 0x841e0215},
+ /* 98 */ {4, 0.1511782109217764, 0x57f6c10, 0x74843b1e},
+ /* 99 */ {4, 0.1508442006228941, 0x5b9c0d1, 0x65b11e6e},
+ /* 100 */ {4, 0.1505149978319906, 0x5f5e100, 0x5798ee23},
+ /* 101 */ {4, 0.1501904832236879, 0x633d5f1, 0x4a30b99b},
+ /* 102 */ {4, 0.1498705416319474, 0x673a910, 0x3d6e4d94},
+ /* 103 */ {4, 0.1495550618645152, 0x6b563e1, 0x314825b0},
+ /* 104 */ {4, 0.1492439365274121, 0x6f91000, 0x25b55f2e},
+ /* 105 */ {4, 0.1489370618588283, 0x73eb721, 0x1aadaccb},
+ /* 106 */ {4, 0.1486343375718350, 0x7866310, 0x10294ba2},
+ /* 107 */ {4, 0.1483356667053617, 0x7d01db1, 0x620f8f6},
+ /* 108 */ {4, 0.1480409554829326, 0x81bf100, 0xf91bd1b6},
+ /* 109 */ {4, 0.1477501131786861, 0x869e711, 0xe6d37b2a},
+ /* 110 */ {4, 0.1474630519902391, 0x8ba0a10, 0xd55cff6e},
+ /* 111 */ {4, 0.1471796869179852, 0x90c6441, 0xc4ad2db2},
+ /* 112 */ {4, 0.1468999356504447, 0x9610000, 0xb4b985cf},
+ /* 113 */ {4, 0.1466237184553111, 0x9b7e7c1, 0xa5782bef},
+ /* 114 */ {4, 0.1463509580758620, 0xa112610, 0x96dfdd2a},
+ /* 115 */ {4, 0.1460815796324244, 0xa6cc591, 0x88e7e509},
+ /* 116 */ {4, 0.1458155105286054, 0xacad100, 0x7b8813d3},
+ /* 117 */ {4, 0.1455526803620167, 0xb2b5331, 0x6eb8b595},
+ /* 118 */ {4, 0.1452930208392428, 0xb8e5710, 0x627289db},
+ /* 119 */ {4, 0.1450364656948130, 0xbf3e7a1, 0x56aebc07},
+ /* 120 */ {4, 0.1447829506139581, 0xc5c1000, 0x4b66dc33},
+ /* 121 */ {4, 0.1445324131589439, 0xcc6db61, 0x4094d8a3},
+ /* 122 */ {4, 0.1442847926987864, 0xd345510, 0x3632f7a5},
+ /* 123 */ {4, 0.1440400303421672, 0xda48871, 0x2c3bd1f0},
+ /* 124 */ {4, 0.1437980688733775, 0xe178100, 0x22aa4d5f},
+ /* 125 */ {4, 0.1435588526911310, 0xe8d4a51, 0x19799812},
+ /* 126 */ {4, 0.1433223277500932, 0xf05f010, 0x10a523e5},
+ /* 127 */ {4, 0.1430884415049874, 0xf817e01, 0x828a237},
+ /* 128 */ {4, 0.1428571428571428, 0x7, 0x0},
+ /* 129 */ {4, 0.1426283821033600, 0x10818201, 0xf04ec452},
+ /* 130 */ {4, 0.1424021108869747, 0x11061010, 0xe136444a},
+ /* 131 */ {4, 0.1421782821510107, 0x118db651, 0xd2af9589},
+ /* 132 */ {4, 0.1419568500933153, 0x12188100, 0xc4b42a83},
+ /* 133 */ {4, 0.1417377701235801, 0x12a67c71, 0xb73dccf5},
+ /* 134 */ {4, 0.1415209988221527, 0x1337b510, 0xaa4698c5},
+ /* 135 */ {4, 0.1413064939005528, 0x13cc3761, 0x9dc8f729},
+ /* 136 */ {4, 0.1410942141636095, 0x14641000, 0x91bf9a30},
+ /* 137 */ {4, 0.1408841194731412, 0x14ff4ba1, 0x86257887},
+ /* 138 */ {4, 0.1406761707131039, 0x159df710, 0x7af5c98c},
+ /* 139 */ {4, 0.1404703297561400, 0x16401f31, 0x702c01a0},
+ /* 140 */ {4, 0.1402665594314587, 0x16e5d100, 0x65c3ceb1},
+ /* 141 */ {4, 0.1400648234939879, 0x178f1991, 0x5bb91502},
+ /* 142 */ {4, 0.1398650865947379, 0x183c0610, 0x5207ec23},
+ /* 143 */ {4, 0.1396673142523192, 0x18eca3c1, 0x48ac9c19},
+ /* 144 */ {4, 0.1394714728255649, 0x19a10000, 0x3fa39ab5},
+ /* 145 */ {4, 0.1392775294872041, 0x1a592841, 0x36e98912},
+ /* 146 */ {4, 0.1390854521985406, 0x1b152a10, 0x2e7b3140},
+ /* 147 */ {4, 0.1388952096850913, 0x1bd51311, 0x2655840b},
+ /* 148 */ {4, 0.1387067714131417, 0x1c98f100, 0x1e7596ea},
+ /* 149 */ {4, 0.1385201075671774, 0x1d60d1b1, 0x16d8a20d},
+ /* 150 */ {4, 0.1383351890281539, 0x1e2cc310, 0xf7bfe87},
+ /* 151 */ {4, 0.1381519873525671, 0x1efcd321, 0x85d2492},
+ /* 152 */ {4, 0.1379704747522905, 0x1fd11000, 0x179a9f4},
+ /* 153 */ {4, 0.1377906240751463, 0x20a987e1, 0xf59e80eb},
+ /* 154 */ {4, 0.1376124087861776, 0x21864910, 0xe8b768db},
+ /* 155 */ {4, 0.1374358029495937, 0x226761f1, 0xdc39d6d5},
+ /* 156 */ {4, 0.1372607812113589, 0x234ce100, 0xd021c5d1},
+ /* 157 */ {4, 0.1370873187823978, 0x2436d4d1, 0xc46b5e37},
+ /* 158 */ {4, 0.1369153914223921, 0x25254c10, 0xb912f39c},
+ /* 159 */ {4, 0.1367449754241439, 0x26185581, 0xae150294},
+ /* 160 */ {4, 0.1365760475984821, 0x27100000, 0xa36e2eb1},
+ /* 161 */ {4, 0.1364085852596902, 0x280c5a81, 0x991b4094},
+ /* 162 */ {4, 0.1362425662114337, 0x290d7410, 0x8f19241e},
+ /* 163 */ {4, 0.1360779687331669, 0x2a135bd1, 0x8564e6b7},
+ /* 164 */ {4, 0.1359147715670014, 0x2b1e2100, 0x7bfbb5b4},
+ /* 165 */ {4, 0.1357529539050150, 0x2c2dd2f1, 0x72dadcc8},
+ /* 166 */ {4, 0.1355924953769863, 0x2d428110, 0x69ffc498},
+ /* 167 */ {4, 0.1354333760385373, 0x2e5c3ae1, 0x6167f154},
+ /* 168 */ {4, 0.1352755763596663, 0x2f7b1000, 0x5911016e},
+ /* 169 */ {4, 0.1351190772136599, 0x309f1021, 0x50f8ac5f},
+ /* 170 */ {4, 0.1349638598663645, 0x31c84b10, 0x491cc17c},
+ /* 171 */ {4, 0.1348099059658079, 0x32f6d0b1, 0x417b26d8},
+ /* 172 */ {4, 0.1346571975321549, 0x342ab100, 0x3a11d83b},
+ /* 173 */ {4, 0.1345057169479844, 0x3563fc11, 0x32dee622},
+ /* 174 */ {4, 0.1343554469488779, 0x36a2c210, 0x2be074cd},
+ /* 175 */ {4, 0.1342063706143054, 0x37e71341, 0x2514bb58},
+ /* 176 */ {4, 0.1340584713587980, 0x39310000, 0x1e7a02e7},
+ /* 177 */ {4, 0.1339117329233981, 0x3a8098c1, 0x180ea5d0},
+ /* 178 */ {4, 0.1337661393673756, 0x3bd5ee10, 0x11d10edd},
+ /* 179 */ {4, 0.1336216750601996, 0x3d311091, 0xbbfb88e},
+ /* 180 */ {4, 0.1334783246737591, 0x3e921100, 0x5d92c68},
+ /* 181 */ {4, 0.1333360731748201, 0x3ff90031, 0x1c024c},
+ /* 182 */ {4, 0.1331949058177136, 0x4165ef10, 0xf50dbfb2},
+ /* 183 */ {4, 0.1330548081372441, 0x42d8eea1, 0xea30efa3},
+ /* 184 */ {4, 0.1329157659418126, 0x44521000, 0xdf9f1316},
+ /* 185 */ {4, 0.1327777653067443, 0x45d16461, 0xd555c0c9},
+ /* 186 */ {4, 0.1326407925678156, 0x4756fd10, 0xcb52a684},
+ /* 187 */ {4, 0.1325048343149731, 0x48e2eb71, 0xc193881f},
+ /* 188 */ {4, 0.1323698773862368, 0x4a754100, 0xb8163e97},
+ /* 189 */ {4, 0.1322359088617821, 0x4c0e0f51, 0xaed8b724},
+ /* 190 */ {4, 0.1321029160581950, 0x4dad6810, 0xa5d8f269},
+ /* 191 */ {4, 0.1319708865228925, 0x4f535d01, 0x9d15039d},
+ /* 192 */ {4, 0.1318398080287045, 0x51000000, 0x948b0fcd},
+ /* 193 */ {4, 0.1317096685686114, 0x52b36301, 0x8c394d1d},
+ /* 194 */ {4, 0.1315804563506306, 0x546d9810, 0x841e0215},
+ /* 195 */ {4, 0.1314521597928493, 0x562eb151, 0x7c3784f8},
+ /* 196 */ {4, 0.1313247675185968, 0x57f6c100, 0x74843b1e},
+ /* 197 */ {4, 0.1311982683517524, 0x59c5d971, 0x6d02985d},
+ /* 198 */ {4, 0.1310726513121843, 0x5b9c0d10, 0x65b11e6e},
+ /* 199 */ {4, 0.1309479056113158, 0x5d796e61, 0x5e8e5c64},
+ /* 200 */ {4, 0.1308240206478128, 0x5f5e1000, 0x5798ee23},
+ /* 201 */ {4, 0.1307009860033912, 0x614a04a1, 0x50cf7bde},
+ /* 202 */ {4, 0.1305787914387386, 0x633d5f10, 0x4a30b99b},
+ /* 203 */ {4, 0.1304574268895465, 0x65383231, 0x43bb66bd},
+ /* 204 */ {4, 0.1303368824626505, 0x673a9100, 0x3d6e4d94},
+ /* 205 */ {4, 0.1302171484322746, 0x69448e91, 0x374842ee},
+ /* 206 */ {4, 0.1300982152363760, 0x6b563e10, 0x314825b0},
+ /* 207 */ {4, 0.1299800734730872, 0x6d6fb2c1, 0x2b6cde75},
+ /* 208 */ {4, 0.1298627138972530, 0x6f910000, 0x25b55f2e},
+ /* 209 */ {4, 0.1297461274170591, 0x71ba3941, 0x2020a2c5},
+ /* 210 */ {4, 0.1296303050907487, 0x73eb7210, 0x1aadaccb},
+ /* 211 */ {4, 0.1295152381234257, 0x7624be11, 0x155b891f},
+ /* 212 */ {4, 0.1294009178639407, 0x78663100, 0x10294ba2},
+ /* 213 */ {4, 0.1292873358018581, 0x7aafdeb1, 0xb160fe9},
+ /* 214 */ {4, 0.1291744835645007, 0x7d01db10, 0x620f8f6},
+ /* 215 */ {4, 0.1290623529140715, 0x7f5c3a21, 0x14930ef},
+ /* 216 */ {4, 0.1289509357448472, 0x81bf1000, 0xf91bd1b6},
+ /* 217 */ {4, 0.1288402240804449, 0x842a70e1, 0xefdcb0c7},
+ /* 218 */ {4, 0.1287302100711567, 0x869e7110, 0xe6d37b2a},
+ /* 219 */ {4, 0.1286208859913518, 0x891b24f1, 0xddfeb94a},
+ /* 220 */ {4, 0.1285122442369443, 0x8ba0a100, 0xd55cff6e},
+ /* 221 */ {4, 0.1284042773229231, 0x8e2ef9d1, 0xcceced50},
+ /* 222 */ {4, 0.1282969778809442, 0x90c64410, 0xc4ad2db2},
+ /* 223 */ {4, 0.1281903386569819, 0x93669481, 0xbc9c75f9},
+ /* 224 */ {4, 0.1280843525090381, 0x96100000, 0xb4b985cf},
+ /* 225 */ {4, 0.1279790124049077, 0x98c29b81, 0xad0326c2},
+ /* 226 */ {4, 0.1278743114199984, 0x9b7e7c10, 0xa5782bef},
+ /* 227 */ {4, 0.1277702427352035, 0x9e43b6d1, 0x9e1771a9},
+ /* 228 */ {4, 0.1276667996348261, 0xa1126100, 0x96dfdd2a},
+ /* 229 */ {4, 0.1275639755045533, 0xa3ea8ff1, 0x8fd05c41},
+ /* 230 */ {4, 0.1274617638294791, 0xa6cc5910, 0x88e7e509},
+ /* 231 */ {4, 0.1273601581921741, 0xa9b7d1e1, 0x8225759d},
+ /* 232 */ {4, 0.1272591522708010, 0xacad1000, 0x7b8813d3},
+ /* 233 */ {4, 0.1271587398372755, 0xafac2921, 0x750eccf9},
+ /* 234 */ {4, 0.1270589147554692, 0xb2b53310, 0x6eb8b595},
+ /* 235 */ {4, 0.1269596709794558, 0xb5c843b1, 0x6884e923},
+ /* 236 */ {4, 0.1268610025517973, 0xb8e57100, 0x627289db},
+ /* 237 */ {4, 0.1267629036018709, 0xbc0cd111, 0x5c80c07b},
+ /* 238 */ {4, 0.1266653683442337, 0xbf3e7a10, 0x56aebc07},
+ /* 239 */ {4, 0.1265683910770258, 0xc27a8241, 0x50fbb19b},
+ /* 240 */ {4, 0.1264719661804097, 0xc5c10000, 0x4b66dc33},
+ /* 241 */ {4, 0.1263760881150453, 0xc91209c1, 0x45ef7c7c},
+ /* 242 */ {4, 0.1262807514205999, 0xcc6db610, 0x4094d8a3},
+ /* 243 */ {4, 0.1261859507142915, 0xcfd41b91, 0x3b563c24},
+ /* 244 */ {4, 0.1260916806894653, 0xd3455100, 0x3632f7a5},
+ /* 245 */ {4, 0.1259979361142023, 0xd6c16d31, 0x312a60c3},
+ /* 246 */ {4, 0.1259047118299582, 0xda488710, 0x2c3bd1f0},
+ /* 247 */ {4, 0.1258120027502338, 0xdddab5a1, 0x2766aa45},
+ /* 248 */ {4, 0.1257198038592741, 0xe1781000, 0x22aa4d5f},
+ /* 249 */ {4, 0.1256281102107963, 0xe520ad61, 0x1e06233c},
+ /* 250 */ {4, 0.1255369169267456, 0xe8d4a510, 0x19799812},
+ /* 251 */ {4, 0.1254462191960791, 0xec940e71, 0x15041c33},
+ /* 252 */ {4, 0.1253560122735751, 0xf05f0100, 0x10a523e5},
+ /* 253 */ {4, 0.1252662914786691, 0xf4359451, 0xc5c2749},
+ /* 254 */ {4, 0.1251770521943144, 0xf817e010, 0x828a237},
+ /* 255 */ {4, 0.1250882898658681, 0xfc05fc01, 0x40a1423},
+};
+#endif
+#if BITS_PER_MP_LIMB == 64
+const struct bases __mp_bases[256] =
+{
+ /* 0 */ {0, 0.0, 0, 0},
+ /* 1 */ {0, 1e38, 0, 0},
+ /* 2 */ {64, 1.0000000000000000, CNST_LIMB(0x1), CNST_LIMB(0x0)},
+ /* 3 */ {40, 0.6309297535714574, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)},
+ /* 4 */ {32, 0.5000000000000000, CNST_LIMB(0x2), CNST_LIMB(0x0)},
+ /* 5 */ {27, 0.4306765580733931, CNST_LIMB(0x6765c793fa10079d), CNST_LIMB(0x3ce9a36f23c0fc90)},
+ /* 6 */ {24, 0.3868528072345416, CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295)},
+ /* 7 */ {22, 0.3562071871080222, CNST_LIMB(0x3642798750226111), CNST_LIMB(0x2df495ccaa57147b)},
+ /* 8 */ {21, 0.3333333333333334, CNST_LIMB(0x3), CNST_LIMB(0x0)},
+ /* 9 */ {20, 0.3154648767857287, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)},
+ /* 10 */ {19, 0.3010299956639811, CNST_LIMB(0x8ac7230489e80000), CNST_LIMB(0xd83c94fb6d2ac34a)},
+ /* 11 */ {18, 0.2890648263178878, CNST_LIMB(0x4d28cb56c33fa539), CNST_LIMB(0xa8adf7ae45e7577b)},
+ /* 12 */ {17, 0.2789429456511298, CNST_LIMB(0x1eca170c00000000), CNST_LIMB(0xa10c2bec5da8f8f)},
+ /* 13 */ {17, 0.2702381544273197, CNST_LIMB(0x780c7372621bd74d), CNST_LIMB(0x10f4becafe412ec3)},
+ /* 14 */ {16, 0.2626495350371936, CNST_LIMB(0x1e39a5057d810000), CNST_LIMB(0xf08480f672b4e86)},
+ /* 15 */ {16, 0.2559580248098155, CNST_LIMB(0x5b27ac993df97701), CNST_LIMB(0x6779c7f90dc42f48)},
+ /* 16 */ {16, 0.2500000000000000, CNST_LIMB(0x4), CNST_LIMB(0x0)},
+ /* 17 */ {15, 0.2446505421182260, CNST_LIMB(0x27b95e997e21d9f1), CNST_LIMB(0x9c71e11bab279323)},
+ /* 18 */ {15, 0.2398124665681315, CNST_LIMB(0x5da0e1e53c5c8000), CNST_LIMB(0x5dfaa697ec6f6a1c)},
+ /* 19 */ {15, 0.2354089133666382, CNST_LIMB(0xd2ae3299c1c4aedb), CNST_LIMB(0x3711783f6be7e9ec)},
+ /* 20 */ {14, 0.2313782131597592, CNST_LIMB(0x16bcc41e90000000), CNST_LIMB(0x6849b86a12b9b01e)},
+ /* 21 */ {14, 0.2276702486969530, CNST_LIMB(0x2d04b7fdd9c0ef49), CNST_LIMB(0x6bf097ba5ca5e239)},
+ /* 22 */ {14, 0.2242438242175754, CNST_LIMB(0x5658597bcaa24000), CNST_LIMB(0x7b8015c8d7af8f08)},
+ /* 23 */ {14, 0.2210647294575037, CNST_LIMB(0xa0e2073737609371), CNST_LIMB(0x975a24b3a3151b38)},
+ /* 24 */ {13, 0.2181042919855316, CNST_LIMB(0xc29e98000000000), CNST_LIMB(0x50bd367972689db1)},
+ /* 25 */ {13, 0.2153382790366965, CNST_LIMB(0x14adf4b7320334b9), CNST_LIMB(0x8c240c4aecb13bb5)},
+ /* 26 */ {13, 0.2127460535533632, CNST_LIMB(0x226ed36478bfa000), CNST_LIMB(0xdbd2e56854e118c9)},
+ /* 27 */ {13, 0.2103099178571525, CNST_LIMB(0x383d9170b85ff80b), CNST_LIMB(0x2351ffcaa9c7c4ae)},
+ /* 28 */ {13, 0.2080145976765095, CNST_LIMB(0x5a3c23e39c000000), CNST_LIMB(0x6b24188ca33b0636)},
+ /* 29 */ {13, 0.2058468324604344, CNST_LIMB(0x8e65137388122bcd), CNST_LIMB(0xcc3dceaf2b8ba99d)},
+ /* 30 */ {13, 0.2037950470905062, CNST_LIMB(0xdd41bb36d259e000), CNST_LIMB(0x2832e835c6c7d6b6)},
+ /* 31 */ {12, 0.2018490865820999, CNST_LIMB(0xaee5720ee830681), CNST_LIMB(0x76b6aa272e1873c5)},
+ /* 32 */ {12, 0.2000000000000000, CNST_LIMB(0x5), CNST_LIMB(0x0)},
+ /* 33 */ {12, 0.1982398631705605, CNST_LIMB(0x172588ad4f5f0981), CNST_LIMB(0x61eaf5d402c7bf4f)},
+ /* 34 */ {12, 0.1965616322328226, CNST_LIMB(0x211e44f7d02c1000), CNST_LIMB(0xeeb658123ffb27ec)},
+ /* 35 */ {12, 0.1949590218937863, CNST_LIMB(0x2ee56725f06e5c71), CNST_LIMB(0x5d5e3762e6fdf509)},
+ /* 36 */ {12, 0.1934264036172708, CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295)},
+ /* 37 */ {12, 0.1919587200065601, CNST_LIMB(0x5b5b57f8a98a5dd1), CNST_LIMB(0x66ae7831762efb6f)},
+ /* 38 */ {12, 0.1905514124267734, CNST_LIMB(0x7dcff8986ea31000), CNST_LIMB(0x47388865a00f544)},
+ /* 39 */ {12, 0.1892003595168700, CNST_LIMB(0xabd4211662a6b2a1), CNST_LIMB(0x7d673c33a123b54c)},
+ /* 40 */ {12, 0.1879018247091076, CNST_LIMB(0xe8d4a51000000000), CNST_LIMB(0x19799812dea11197)},
+ /* 41 */ {11, 0.1866524112389434, CNST_LIMB(0x7a32956ad081b79), CNST_LIMB(0xc27e62e0686feae)},
+ /* 42 */ {11, 0.1854490234153689, CNST_LIMB(0x9f49aaff0e86800), CNST_LIMB(0x9b6e7507064ce7c7)},
+ /* 43 */ {11, 0.1842888331487062, CNST_LIMB(0xce583bb812d37b3), CNST_LIMB(0x3d9ac2bf66cfed94)},
+ /* 44 */ {11, 0.1831692509136336, CNST_LIMB(0x109b79a654c00000), CNST_LIMB(0xed46bc50ce59712a)},
+ /* 45 */ {11, 0.1820879004699383, CNST_LIMB(0x1543beff214c8b95), CNST_LIMB(0x813d97e2c89b8d46)},
+ /* 46 */ {11, 0.1810425967800402, CNST_LIMB(0x1b149a79459a3800), CNST_LIMB(0x2e81751956af8083)},
+ /* 47 */ {11, 0.1800313266566926, CNST_LIMB(0x224edfb5434a830f), CNST_LIMB(0xdd8e0a95e30c0988)},
+ /* 48 */ {11, 0.1790522317510413, CNST_LIMB(0x2b3fb00000000000), CNST_LIMB(0x7ad4dd48a0b5b167)},
+ /* 49 */ {11, 0.1781035935540111, CNST_LIMB(0x3642798750226111), CNST_LIMB(0x2df495ccaa57147b)},
+ /* 50 */ {11, 0.1771838201355579, CNST_LIMB(0x43c33c1937564800), CNST_LIMB(0xe392010175ee5962)},
+ /* 51 */ {11, 0.1762914343888821, CNST_LIMB(0x54411b2441c3cd8b), CNST_LIMB(0x84eaf11b2fe7738e)},
+ /* 52 */ {11, 0.1754250635819545, CNST_LIMB(0x6851455acd400000), CNST_LIMB(0x3a1e3971e008995d)},
+ /* 53 */ {11, 0.1745834300480449, CNST_LIMB(0x80a23b117c8feb6d), CNST_LIMB(0xfd7a462344ffce25)},
+ /* 54 */ {11, 0.1737653428714400, CNST_LIMB(0x9dff7d32d5dc1800), CNST_LIMB(0x9eca40b40ebcef8a)},
+ /* 55 */ {11, 0.1729696904450771, CNST_LIMB(0xc155af6faeffe6a7), CNST_LIMB(0x52fa161a4a48e43d)},
+ /* 56 */ {11, 0.1721954337940981, CNST_LIMB(0xebb7392e00000000), CNST_LIMB(0x1607a2cbacf930c1)},
+ /* 57 */ {10, 0.1714416005739134, CNST_LIMB(0x50633659656d971), CNST_LIMB(0x97a014f8e3be55f1)},
+ /* 58 */ {10, 0.1707072796637201, CNST_LIMB(0x5fa8624c7fba400), CNST_LIMB(0x568df8b76cbf212c)},
+ /* 59 */ {10, 0.1699916162869140, CNST_LIMB(0x717d9faa73c5679), CNST_LIMB(0x20ba7c4b4e6ef492)},
+ /* 60 */ {10, 0.1692938075987814, CNST_LIMB(0x86430aac6100000), CNST_LIMB(0xe81ee46b9ef492f5)},
+ /* 61 */ {10, 0.1686130986895011, CNST_LIMB(0x9e64d9944b57f29), CNST_LIMB(0x9dc0d10d51940416)},
+ /* 62 */ {10, 0.1679487789570419, CNST_LIMB(0xba5ca5392cb0400), CNST_LIMB(0x5fa8ed2f450272a5)},
+ /* 63 */ {10, 0.1673001788101741, CNST_LIMB(0xdab2ce1d022cd81), CNST_LIMB(0x2ba9eb8c5e04e641)},
+ /* 64 */ {10, 0.1666666666666667, CNST_LIMB(0x6), CNST_LIMB(0x0)},
+ /* 65 */ {10, 0.1660476462159378, CNST_LIMB(0x12aeed5fd3e2d281), CNST_LIMB(0xb67759cc00287bf1)},
+ /* 66 */ {10, 0.1654425539190583, CNST_LIMB(0x15c3da1572d50400), CNST_LIMB(0x78621feeb7f4ed33)},
+ /* 67 */ {10, 0.1648508567221604, CNST_LIMB(0x194c05534f75ee29), CNST_LIMB(0x43d55b5f72943bc0)},
+ /* 68 */ {10, 0.1642720499620502, CNST_LIMB(0x1d56299ada100000), CNST_LIMB(0x173decb64d1d4409)},
+ /* 69 */ {10, 0.1637056554452156, CNST_LIMB(0x21f2a089a4ff4f79), CNST_LIMB(0xe29fb54fd6b6074f)},
+ /* 70 */ {10, 0.1631512196835108, CNST_LIMB(0x2733896c68d9a400), CNST_LIMB(0xa1f1f5c210d54e62)},
+ /* 71 */ {10, 0.1626083122716341, CNST_LIMB(0x2d2cf2c33b533c71), CNST_LIMB(0x6aac7f9bfafd57b2)},
+ /* 72 */ {10, 0.1620765243931223, CNST_LIMB(0x33f506e440000000), CNST_LIMB(0x3b563c2478b72ee2)},
+ /* 73 */ {10, 0.1615554674429964, CNST_LIMB(0x3ba43bec1d062211), CNST_LIMB(0x12b536b574e92d1b)},
+ /* 74 */ {10, 0.1610447717564444, CNST_LIMB(0x4455872d8fd4e400), CNST_LIMB(0xdf86c03020404fa5)},
+ /* 75 */ {10, 0.1605440854340214, CNST_LIMB(0x4e2694539f2f6c59), CNST_LIMB(0xa34adf02234eea8e)},
+ /* 76 */ {10, 0.1600530732548213, CNST_LIMB(0x5938006c18900000), CNST_LIMB(0x6f46eb8574eb59dd)},
+ /* 77 */ {10, 0.1595714156699382, CNST_LIMB(0x65ad9912474aa649), CNST_LIMB(0x42459b481df47cec)},
+ /* 78 */ {10, 0.1590988078692941, CNST_LIMB(0x73ae9ff4241ec400), CNST_LIMB(0x1b424b95d80ca505)},
+ /* 79 */ {10, 0.1586349589155960, CNST_LIMB(0x836612ee9c4ce1e1), CNST_LIMB(0xf2c1b982203a0dac)},
+ /* 80 */ {10, 0.1581795909397823, CNST_LIMB(0x9502f90000000000), CNST_LIMB(0xb7cdfd9d7bdbab7d)},
+ /* 81 */ {10, 0.1577324383928644, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)},
+ /* 82 */ {10, 0.1572932473495469, CNST_LIMB(0xbebf59a07dab4400), CNST_LIMB(0x57931eeaf85cf64f)},
+ /* 83 */ {10, 0.1568617748594410, CNST_LIMB(0xd7540d4093bc3109), CNST_LIMB(0x305a944507c82f47)},
+ /* 84 */ {10, 0.1564377883420716, CNST_LIMB(0xf2b96616f1900000), CNST_LIMB(0xe007ccc9c22781a)},
+ /* 85 */ {9, 0.1560210650222250, CNST_LIMB(0x336de62af2bca35), CNST_LIMB(0x3e92c42e000eeed4)},
+ /* 86 */ {9, 0.1556113914024940, CNST_LIMB(0x39235ec33d49600), CNST_LIMB(0x1ebe59130db2795e)},
+ /* 87 */ {9, 0.1552085627701551, CNST_LIMB(0x3f674e539585a17), CNST_LIMB(0x268859e90f51b89)},
+ /* 88 */ {9, 0.1548123827357682, CNST_LIMB(0x4645b6958000000), CNST_LIMB(0xd24cde0463108cfa)},
+ /* 89 */ {9, 0.1544226628011101, CNST_LIMB(0x4dcb74afbc49c19), CNST_LIMB(0xa536009f37adc383)},
+ /* 90 */ {9, 0.1540392219542636, CNST_LIMB(0x56064e1d18d9a00), CNST_LIMB(0x7cea06ce1c9ace10)},
+ /* 91 */ {9, 0.1536618862898642, CNST_LIMB(0x5f04fe2cd8a39fb), CNST_LIMB(0x58db032e72e8ba43)},
+ /* 92 */ {9, 0.1532904886526781, CNST_LIMB(0x68d74421f5c0000), CNST_LIMB(0x388cc17cae105447)},
+ /* 93 */ {9, 0.1529248683028321, CNST_LIMB(0x738df1f6ab4827d), CNST_LIMB(0x1b92672857620ce0)},
+ /* 94 */ {9, 0.1525648706011593, CNST_LIMB(0x7f3afbc9cfb5e00), CNST_LIMB(0x18c6a9575c2ade4)},
+ /* 95 */ {9, 0.1522103467132434, CNST_LIMB(0x8bf187fba88f35f), CNST_LIMB(0xd44da7da8e44b24f)},
+ /* 96 */ {9, 0.1518611533308632, CNST_LIMB(0x99c600000000000), CNST_LIMB(0xaa2f78f1b4cc6794)},
+ /* 97 */ {9, 0.1515171524096389, CNST_LIMB(0xa8ce21eb6531361), CNST_LIMB(0x843c067d091ee4cc)},
+ /* 98 */ {9, 0.1511782109217764, CNST_LIMB(0xb92112c1a0b6200), CNST_LIMB(0x62005e1e913356e3)},
+ /* 99 */ {9, 0.1508442006228941, CNST_LIMB(0xcad7718b8747c43), CNST_LIMB(0x4316eed01dedd518)},
+ /* 100 */ {9, 0.1505149978319906, CNST_LIMB(0xde0b6b3a7640000), CNST_LIMB(0x2725dd1d243aba0e)},
+ /* 101 */ {9, 0.1501904832236879, CNST_LIMB(0xf2d8cf5fe6d74c5), CNST_LIMB(0xddd9057c24cb54f)},
+ /* 102 */ {9, 0.1498705416319474, CNST_LIMB(0x1095d25bfa712600), CNST_LIMB(0xedeee175a736d2a1)},
+ /* 103 */ {9, 0.1495550618645152, CNST_LIMB(0x121b7c4c3698faa7), CNST_LIMB(0xc4699f3df8b6b328)},
+ /* 104 */ {9, 0.1492439365274121, CNST_LIMB(0x13c09e8d68000000), CNST_LIMB(0x9ebbe7d859cb5a7c)},
+ /* 105 */ {9, 0.1489370618588283, CNST_LIMB(0x15876ccb0b709ca9), CNST_LIMB(0x7c828b9887eb2179)},
+ /* 106 */ {9, 0.1486343375718350, CNST_LIMB(0x17723c2976da2a00), CNST_LIMB(0x5d652ab99001adcf)},
+ /* 107 */ {9, 0.1483356667053617, CNST_LIMB(0x198384e9c259048b), CNST_LIMB(0x4114f1754e5d7b32)},
+ /* 108 */ {9, 0.1480409554829326, CNST_LIMB(0x1bbde41dfeec0000), CNST_LIMB(0x274b7c902f7e0188)},
+ /* 109 */ {9, 0.1477501131786861, CNST_LIMB(0x1e241d6e3337910d), CNST_LIMB(0xfc9e0fbb32e210c)},
+ /* 110 */ {9, 0.1474630519902391, CNST_LIMB(0x20b91cee9901ee00), CNST_LIMB(0xf4afa3e594f8ea1f)},
+ /* 111 */ {9, 0.1471796869179852, CNST_LIMB(0x237ff9079863dfef), CNST_LIMB(0xcd85c32e9e4437b0)},
+ /* 112 */ {9, 0.1468999356504447, CNST_LIMB(0x267bf47000000000), CNST_LIMB(0xa9bbb147e0dd92a8)},
+ /* 113 */ {9, 0.1466237184553111, CNST_LIMB(0x29b08039fbeda7f1), CNST_LIMB(0x8900447b70e8eb82)},
+ /* 114 */ {9, 0.1463509580758620, CNST_LIMB(0x2d213df34f65f200), CNST_LIMB(0x6b0a92adaad5848a)},
+ /* 115 */ {9, 0.1460815796324244, CNST_LIMB(0x30d201d957a7c2d3), CNST_LIMB(0x4f990ad8740f0ee5)},
+ /* 116 */ {9, 0.1458155105286054, CNST_LIMB(0x34c6d52160f40000), CNST_LIMB(0x3670a9663a8d3610)},
+ /* 117 */ {9, 0.1455526803620167, CNST_LIMB(0x3903f855d8f4c755), CNST_LIMB(0x1f5c44188057be3c)},
+ /* 118 */ {9, 0.1452930208392428, CNST_LIMB(0x3d8de5c8ec59b600), CNST_LIMB(0xa2bea956c4e4977)},
+ /* 119 */ {9, 0.1450364656948130, CNST_LIMB(0x4269541d1ff01337), CNST_LIMB(0xed68b23033c3637e)},
+ /* 120 */ {9, 0.1447829506139581, CNST_LIMB(0x479b38e478000000), CNST_LIMB(0xc99cf624e50549c5)},
+ /* 121 */ {9, 0.1445324131589439, CNST_LIMB(0x4d28cb56c33fa539), CNST_LIMB(0xa8adf7ae45e7577b)},
+ /* 122 */ {9, 0.1442847926987864, CNST_LIMB(0x5317871fa13aba00), CNST_LIMB(0x8a5bc740b1c113e5)},
+ /* 123 */ {9, 0.1440400303421672, CNST_LIMB(0x596d2f44de9fa71b), CNST_LIMB(0x6e6c7efb81cfbb9b)},
+ /* 124 */ {9, 0.1437980688733775, CNST_LIMB(0x602fd125c47c0000), CNST_LIMB(0x54aba5c5cada5f10)},
+ /* 125 */ {9, 0.1435588526911310, CNST_LIMB(0x6765c793fa10079d), CNST_LIMB(0x3ce9a36f23c0fc90)},
+ /* 126 */ {9, 0.1433223277500932, CNST_LIMB(0x6f15be069b847e00), CNST_LIMB(0x26fb43de2c8cd2a8)},
+ /* 127 */ {9, 0.1430884415049874, CNST_LIMB(0x7746b3e82a77047f), CNST_LIMB(0x12b94793db8486a1)},
+ /* 128 */ {9, 0.1428571428571428, CNST_LIMB(0x7), CNST_LIMB(0x0)},
+ /* 129 */ {9, 0.1426283821033600, CNST_LIMB(0x894953f7ea890481), CNST_LIMB(0xdd5deca404c0156d)},
+ /* 130 */ {9, 0.1424021108869747, CNST_LIMB(0x932abffea4848200), CNST_LIMB(0xbd51373330291de0)},
+ /* 131 */ {9, 0.1421782821510107, CNST_LIMB(0x9dacb687d3d6a163), CNST_LIMB(0x9fa4025d66f23085)},
+ /* 132 */ {9, 0.1419568500933153, CNST_LIMB(0xa8d8102a44840000), CNST_LIMB(0x842530ee2db4949d)},
+ /* 133 */ {9, 0.1417377701235801, CNST_LIMB(0xb4b60f9d140541e5), CNST_LIMB(0x6aa7f2766b03dc25)},
+ /* 134 */ {9, 0.1415209988221527, CNST_LIMB(0xc15065d4856e4600), CNST_LIMB(0x53035ba7ebf32e8d)},
+ /* 135 */ {9, 0.1413064939005528, CNST_LIMB(0xceb1363f396d23c7), CNST_LIMB(0x3d12091fc9fb4914)},
+ /* 136 */ {9, 0.1410942141636095, CNST_LIMB(0xdce31b2488000000), CNST_LIMB(0x28b1cb81b1ef1849)},
+ /* 137 */ {9, 0.1408841194731412, CNST_LIMB(0xebf12a24bca135c9), CNST_LIMB(0x15c35be67ae3e2c9)},
+ /* 138 */ {9, 0.1406761707131039, CNST_LIMB(0xfbe6f8dbf88f4a00), CNST_LIMB(0x42a17bd09be1ff0)},
+ /* 139 */ {8, 0.1404703297561400, CNST_LIMB(0x1ef156c084ce761), CNST_LIMB(0x8bf461f03cf0bbf)},
+ /* 140 */ {8, 0.1402665594314587, CNST_LIMB(0x20c4e3b94a10000), CNST_LIMB(0xf3fbb43f68a32d05)},
+ /* 141 */ {8, 0.1400648234939879, CNST_LIMB(0x22b0695a08ba421), CNST_LIMB(0xd84f44c48564dc19)},
+ /* 142 */ {8, 0.1398650865947379, CNST_LIMB(0x24b4f35d7a4c100), CNST_LIMB(0xbe58ebcce7956abe)},
+ /* 143 */ {8, 0.1396673142523192, CNST_LIMB(0x26d397284975781), CNST_LIMB(0xa5fac463c7c134b7)},
+ /* 144 */ {8, 0.1394714728255649, CNST_LIMB(0x290d74100000000), CNST_LIMB(0x8f19241e28c7d757)},
+ /* 145 */ {8, 0.1392775294872041, CNST_LIMB(0x2b63b3a37866081), CNST_LIMB(0x799a6d046c0ae1ae)},
+ /* 146 */ {8, 0.1390854521985406, CNST_LIMB(0x2dd789f4d894100), CNST_LIMB(0x6566e37d746a9e40)},
+ /* 147 */ {8, 0.1388952096850913, CNST_LIMB(0x306a35e51b58721), CNST_LIMB(0x526887dbfb5f788f)},
+ /* 148 */ {8, 0.1387067714131417, CNST_LIMB(0x331d01712e10000), CNST_LIMB(0x408af3382b8efd3d)},
+ /* 149 */ {8, 0.1385201075671774, CNST_LIMB(0x35f14200a827c61), CNST_LIMB(0x2fbb374806ec05f1)},
+ /* 150 */ {8, 0.1383351890281539, CNST_LIMB(0x38e858b62216100), CNST_LIMB(0x1fe7c0f0afce87fe)},
+ /* 151 */ {8, 0.1381519873525671, CNST_LIMB(0x3c03b2c13176a41), CNST_LIMB(0x11003d517540d32e)},
+ /* 152 */ {8, 0.1379704747522905, CNST_LIMB(0x3f44c9b21000000), CNST_LIMB(0x2f5810f98eff0dc)},
+ /* 153 */ {8, 0.1377906240751463, CNST_LIMB(0x42ad23cef3113c1), CNST_LIMB(0xeb72e35e7840d910)},
+ /* 154 */ {8, 0.1376124087861776, CNST_LIMB(0x463e546b19a2100), CNST_LIMB(0xd27de19593dc3614)},
+ /* 155 */ {8, 0.1374358029495937, CNST_LIMB(0x49f9fc3f96684e1), CNST_LIMB(0xbaf391fd3e5e6fc2)},
+ /* 156 */ {8, 0.1372607812113589, CNST_LIMB(0x4de1c9c5dc10000), CNST_LIMB(0xa4bd38c55228c81d)},
+ /* 157 */ {8, 0.1370873187823978, CNST_LIMB(0x51f77994116d2a1), CNST_LIMB(0x8fc5a8de8e1de782)},
+ /* 158 */ {8, 0.1369153914223921, CNST_LIMB(0x563cd6bb3398100), CNST_LIMB(0x7bf9265bea9d3a3b)},
+ /* 159 */ {8, 0.1367449754241439, CNST_LIMB(0x5ab3bb270beeb01), CNST_LIMB(0x69454b325983dccd)},
+ /* 160 */ {8, 0.1365760475984821, CNST_LIMB(0x5f5e10000000000), CNST_LIMB(0x5798ee2308c39df9)},
+ /* 161 */ {8, 0.1364085852596902, CNST_LIMB(0x643dce0ec16f501), CNST_LIMB(0x46e40ba0fa66a753)},
+ /* 162 */ {8, 0.1362425662114337, CNST_LIMB(0x6954fe21e3e8100), CNST_LIMB(0x3717b0870b0db3a7)},
+ /* 163 */ {8, 0.1360779687331669, CNST_LIMB(0x6ea5b9755f440a1), CNST_LIMB(0x2825e6775d11cdeb)},
+ /* 164 */ {8, 0.1359147715670014, CNST_LIMB(0x74322a1c0410000), CNST_LIMB(0x1a01a1c09d1b4dac)},
+ /* 165 */ {8, 0.1357529539050150, CNST_LIMB(0x79fc8b6ae8a46e1), CNST_LIMB(0xc9eb0a8bebc8f3e)},
+ /* 166 */ {8, 0.1355924953769863, CNST_LIMB(0x80072a66d512100), CNST_LIMB(0xffe357ff59e6a004)},
+ /* 167 */ {8, 0.1354333760385373, CNST_LIMB(0x86546633b42b9c1), CNST_LIMB(0xe7dfd1be05fa61a8)},
+ /* 168 */ {8, 0.1352755763596663, CNST_LIMB(0x8ce6b0861000000), CNST_LIMB(0xd11ed6fc78f760e5)},
+ /* 169 */ {8, 0.1351190772136599, CNST_LIMB(0x93c08e16a022441), CNST_LIMB(0xbb8db609dd29ebfe)},
+ /* 170 */ {8, 0.1349638598663645, CNST_LIMB(0x9ae49717f026100), CNST_LIMB(0xa71aec8d1813d532)},
+ /* 171 */ {8, 0.1348099059658079, CNST_LIMB(0xa25577ae24c1a61), CNST_LIMB(0x93b612a9f20fbc02)},
+ /* 172 */ {8, 0.1346571975321549, CNST_LIMB(0xaa15f068e610000), CNST_LIMB(0x814fc7b19a67d317)},
+ /* 173 */ {8, 0.1345057169479844, CNST_LIMB(0xb228d6bf7577921), CNST_LIMB(0x6fd9a03f2e0a4b7c)},
+ /* 174 */ {8, 0.1343554469488779, CNST_LIMB(0xba91158ef5c4100), CNST_LIMB(0x5f4615a38d0d316e)},
+ /* 175 */ {8, 0.1342063706143054, CNST_LIMB(0xc351ad9aec0b681), CNST_LIMB(0x4f8876863479a286)},
+ /* 176 */ {8, 0.1340584713587980, CNST_LIMB(0xcc6db6100000000), CNST_LIMB(0x4094d8a3041b60eb)},
+ /* 177 */ {8, 0.1339117329233981, CNST_LIMB(0xd5e85d09025c181), CNST_LIMB(0x32600b8ed883a09b)},
+ /* 178 */ {8, 0.1337661393673756, CNST_LIMB(0xdfc4e816401c100), CNST_LIMB(0x24df8c6eb4b6d1f1)},
+ /* 179 */ {8, 0.1336216750601996, CNST_LIMB(0xea06b4c72947221), CNST_LIMB(0x18097a8ee151acef)},
+ /* 180 */ {8, 0.1334783246737591, CNST_LIMB(0xf4b139365210000), CNST_LIMB(0xbd48cc8ec1cd8e3)},
+ /* 181 */ {8, 0.1333360731748201, CNST_LIMB(0xffc80497d520961), CNST_LIMB(0x3807a8d67485fb)},
+ /* 182 */ {8, 0.1331949058177136, CNST_LIMB(0x10b4ebfca1dee100), CNST_LIMB(0xea5768860b62e8d8)},
+ /* 183 */ {8, 0.1330548081372441, CNST_LIMB(0x117492de921fc141), CNST_LIMB(0xd54faf5b635c5005)},
+ /* 184 */ {8, 0.1329157659418126, CNST_LIMB(0x123bb2ce41000000), CNST_LIMB(0xc14a56233a377926)},
+ /* 185 */ {8, 0.1327777653067443, CNST_LIMB(0x130a8b6157bdecc1), CNST_LIMB(0xae39a88db7cd329f)},
+ /* 186 */ {8, 0.1326407925678156, CNST_LIMB(0x13e15dede0e8a100), CNST_LIMB(0x9c10bde69efa7ab6)},
+ /* 187 */ {8, 0.1325048343149731, CNST_LIMB(0x14c06d941c0ca7e1), CNST_LIMB(0x8ac36c42a2836497)},
+ /* 188 */ {8, 0.1323698773862368, CNST_LIMB(0x15a7ff487a810000), CNST_LIMB(0x7a463c8b84f5ef67)},
+ /* 189 */ {8, 0.1322359088617821, CNST_LIMB(0x169859ddc5c697a1), CNST_LIMB(0x6a8e5f5ad090fd4b)},
+ /* 190 */ {8, 0.1321029160581950, CNST_LIMB(0x1791c60f6fed0100), CNST_LIMB(0x5b91a2943596fc56)},
+ /* 191 */ {8, 0.1319708865228925, CNST_LIMB(0x18948e8c0e6fba01), CNST_LIMB(0x4d4667b1c468e8f0)},
+ /* 192 */ {8, 0.1318398080287045, CNST_LIMB(0x19a1000000000000), CNST_LIMB(0x3fa39ab547994daf)},
+ /* 193 */ {8, 0.1317096685686114, CNST_LIMB(0x1ab769203dafc601), CNST_LIMB(0x32a0a9b2faee1e2a)},
+ /* 194 */ {8, 0.1315804563506306, CNST_LIMB(0x1bd81ab557f30100), CNST_LIMB(0x26357ceac0e96962)},
+ /* 195 */ {8, 0.1314521597928493, CNST_LIMB(0x1d0367a69fed1ba1), CNST_LIMB(0x1a5a6f65caa5859e)},
+ /* 196 */ {8, 0.1313247675185968, CNST_LIMB(0x1e39a5057d810000), CNST_LIMB(0xf08480f672b4e86)},
+ /* 197 */ {8, 0.1311982683517524, CNST_LIMB(0x1f7b2a18f29ac3e1), CNST_LIMB(0x4383340615612ca)},
+ /* 198 */ {8, 0.1310726513121843, CNST_LIMB(0x20c850694c2aa100), CNST_LIMB(0xf3c77969ee4be5a2)},
+ /* 199 */ {8, 0.1309479056113158, CNST_LIMB(0x222173cc014980c1), CNST_LIMB(0xe00993cc187c5ec9)},
+ /* 200 */ {8, 0.1308240206478128, CNST_LIMB(0x2386f26fc1000000), CNST_LIMB(0xcd2b297d889bc2b6)},
+ /* 201 */ {8, 0.1307009860033912, CNST_LIMB(0x24f92ce8af296d41), CNST_LIMB(0xbb214d5064862b22)},
+ /* 202 */ {8, 0.1305787914387386, CNST_LIMB(0x2678863cd0ece100), CNST_LIMB(0xa9e1a7ca7ea10e20)},
+ /* 203 */ {8, 0.1304574268895465, CNST_LIMB(0x280563f0a9472d61), CNST_LIMB(0x99626e72b39ea0cf)},
+ /* 204 */ {8, 0.1303368824626505, CNST_LIMB(0x29a02e1406210000), CNST_LIMB(0x899a5ba9c13fafd9)},
+ /* 205 */ {8, 0.1302171484322746, CNST_LIMB(0x2b494f4efe6d2e21), CNST_LIMB(0x7a80a705391e96ff)},
+ /* 206 */ {8, 0.1300982152363760, CNST_LIMB(0x2d0134ef21cbc100), CNST_LIMB(0x6c0cfe23de23042a)},
+ /* 207 */ {8, 0.1299800734730872, CNST_LIMB(0x2ec84ef4da2ef581), CNST_LIMB(0x5e377df359c944dd)},
+ /* 208 */ {8, 0.1298627138972530, CNST_LIMB(0x309f102100000000), CNST_LIMB(0x50f8ac5fc8f53985)},
+ /* 209 */ {8, 0.1297461274170591, CNST_LIMB(0x3285ee02a1420281), CNST_LIMB(0x44497266278e35b7)},
+ /* 210 */ {8, 0.1296303050907487, CNST_LIMB(0x347d6104fc324100), CNST_LIMB(0x382316831f7ee175)},
+ /* 211 */ {8, 0.1295152381234257, CNST_LIMB(0x3685e47dade53d21), CNST_LIMB(0x2c7f377833b8946e)},
+ /* 212 */ {8, 0.1294009178639407, CNST_LIMB(0x389ff6bb15610000), CNST_LIMB(0x2157c761ab4163ef)},
+ /* 213 */ {8, 0.1292873358018581, CNST_LIMB(0x3acc1912ebb57661), CNST_LIMB(0x16a7071803cc49a9)},
+ /* 214 */ {8, 0.1291744835645007, CNST_LIMB(0x3d0acff111946100), CNST_LIMB(0xc6781d80f8224fc)},
+ /* 215 */ {8, 0.1290623529140715, CNST_LIMB(0x3f5ca2e692eaf841), CNST_LIMB(0x294092d370a900b)},
+ /* 216 */ {8, 0.1289509357448472, CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295)},
+ /* 217 */ {8, 0.1288402240804449, CNST_LIMB(0x443bcb714399a5c1), CNST_LIMB(0xe03b98f103fad6d2)},
+ /* 218 */ {8, 0.1287302100711567, CNST_LIMB(0x46ca406c81af2100), CNST_LIMB(0xcee3d32cad2a9049)},
+ /* 219 */ {8, 0.1286208859913518, CNST_LIMB(0x496e106ac22aaae1), CNST_LIMB(0xbe3f9df9277fdada)},
+ /* 220 */ {8, 0.1285122442369443, CNST_LIMB(0x4c27d39fa5410000), CNST_LIMB(0xae46f0d94c05e933)},
+ /* 221 */ {8, 0.1284042773229231, CNST_LIMB(0x4ef825c296e43ca1), CNST_LIMB(0x9ef2280fb437a33d)},
+ /* 222 */ {8, 0.1282969778809442, CNST_LIMB(0x51dfa61f5ad88100), CNST_LIMB(0x9039ff426d3f284b)},
+ /* 223 */ {8, 0.1281903386569819, CNST_LIMB(0x54def7a6d2f16901), CNST_LIMB(0x82178c6d6b51f8f4)},
+ /* 224 */ {8, 0.1280843525090381, CNST_LIMB(0x57f6c10000000000), CNST_LIMB(0x74843b1ee4c1e053)},
+ /* 225 */ {8, 0.1279790124049077, CNST_LIMB(0x5b27ac993df97701), CNST_LIMB(0x6779c7f90dc42f48)},
+ /* 226 */ {8, 0.1278743114199984, CNST_LIMB(0x5e7268b9bbdf8100), CNST_LIMB(0x5af23c74f9ad9fe9)},
+ /* 227 */ {8, 0.1277702427352035, CNST_LIMB(0x61d7a7932ff3d6a1), CNST_LIMB(0x4ee7eae2acdc617e)},
+ /* 228 */ {8, 0.1276667996348261, CNST_LIMB(0x65581f53c8c10000), CNST_LIMB(0x43556aa2ac262a0b)},
+ /* 229 */ {8, 0.1275639755045533, CNST_LIMB(0x68f48a385b8320e1), CNST_LIMB(0x3835949593b8ddd1)},
+ /* 230 */ {8, 0.1274617638294791, CNST_LIMB(0x6cada69ed07c2100), CNST_LIMB(0x2d837fbe78458762)},
+ /* 231 */ {8, 0.1273601581921741, CNST_LIMB(0x70843718cdbf27c1), CNST_LIMB(0x233a7e150a54a555)},
+ /* 232 */ {8, 0.1272591522708010, CNST_LIMB(0x7479027ea1000000), CNST_LIMB(0x19561984a50ff8fe)},
+ /* 233 */ {8, 0.1271587398372755, CNST_LIMB(0x788cd40268f39641), CNST_LIMB(0xfd211159fe3490f)},
+ /* 234 */ {8, 0.1270589147554692, CNST_LIMB(0x7cc07b437ecf6100), CNST_LIMB(0x6aa563e655033e3)},
+ /* 235 */ {8, 0.1269596709794558, CNST_LIMB(0x8114cc6220762061), CNST_LIMB(0xfbb614b3f2d3b14c)},
+ /* 236 */ {8, 0.1268610025517973, CNST_LIMB(0x858aa0135be10000), CNST_LIMB(0xeac0f8837fb05773)},
+ /* 237 */ {8, 0.1267629036018709, CNST_LIMB(0x8a22d3b53c54c321), CNST_LIMB(0xda6e4c10e8615ca5)},
+ /* 238 */ {8, 0.1266653683442337, CNST_LIMB(0x8ede496339f34100), CNST_LIMB(0xcab755a8d01fa67f)},
+ /* 239 */ {8, 0.1265683910770258, CNST_LIMB(0x93bde80aec3a1481), CNST_LIMB(0xbb95a9ae71aa3e0c)},
+ /* 240 */ {8, 0.1264719661804097, CNST_LIMB(0x98c29b8100000000), CNST_LIMB(0xad0326c296b4f529)},
+ /* 241 */ {8, 0.1263760881150453, CNST_LIMB(0x9ded549671832381), CNST_LIMB(0x9ef9f21eed31b7c1)},
+ /* 242 */ {8, 0.1262807514205999, CNST_LIMB(0xa33f092e0b1ac100), CNST_LIMB(0x91747422be14b0b2)},
+ /* 243 */ {8, 0.1261859507142915, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)},
+ /* 244 */ {8, 0.1260916806894653, CNST_LIMB(0xae5b564ac3a10000), CNST_LIMB(0x77df79e9a96c06f6)},
+ /* 245 */ {8, 0.1259979361142023, CNST_LIMB(0xb427f4b3be74c361), CNST_LIMB(0x6bc6019636c7d0c2)},
+ /* 246 */ {8, 0.1259047118299582, CNST_LIMB(0xba1f9a938041e100), CNST_LIMB(0x601c4205aebd9e47)},
+ /* 247 */ {8, 0.1258120027502338, CNST_LIMB(0xc0435871d1110f41), CNST_LIMB(0x54ddc59756f05016)},
+ /* 248 */ {8, 0.1257198038592741, CNST_LIMB(0xc694446f01000000), CNST_LIMB(0x4a0648979c838c18)},
+ /* 249 */ {8, 0.1256281102107963, CNST_LIMB(0xcd137a5b57ac3ec1), CNST_LIMB(0x3f91b6e0bb3a053d)},
+ /* 250 */ {8, 0.1255369169267456, CNST_LIMB(0xd3c21bcecceda100), CNST_LIMB(0x357c299a88ea76a5)},
+ /* 251 */ {8, 0.1254462191960791, CNST_LIMB(0xdaa150410b788de1), CNST_LIMB(0x2bc1e517aecc56e3)},
+ /* 252 */ {8, 0.1253560122735751, CNST_LIMB(0xe1b24521be010000), CNST_LIMB(0x225f56ceb3da9f5d)},
+ /* 253 */ {8, 0.1252662914786691, CNST_LIMB(0xe8f62df12777c1a1), CNST_LIMB(0x1951136d53ad63ac)},
+ /* 254 */ {8, 0.1251770521943144, CNST_LIMB(0xf06e445906fc0100), CNST_LIMB(0x1093d504b3cd7d93)},
+ /* 255 */ {8, 0.1250882898658681, CNST_LIMB(0xf81bc845c81bf801), CNST_LIMB(0x824794d1ec1814f)},
+};
+#endif
diff --git a/rts/gmp/mpn/ns32k/add_n.s b/rts/gmp/mpn/ns32k/add_n.s
new file mode 100644
index 0000000000..bd063d07d9
--- /dev/null
+++ b/rts/gmp/mpn/ns32k/add_n.s
@@ -0,0 +1,46 @@
+# ns32000 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+# sum in a third limb vector.
+
+# Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+ .align 1
+.globl ___gmpn_add_n
+___gmpn_add_n:
+ save [r3,r4,r5]
+ negd 28(sp),r3
+ movd r3,r0
+ lshd 2,r0
+ movd 24(sp),r4
+ subd r0,r4 # r4 -> to end of S2
+ movd 20(sp),r5
+ subd r0,r5 # r5 -> to end of S1
+ movd 16(sp),r2
+ subd r0,r2 # r2 -> to end of RES
+ subd r0,r0 # cy = 0
+
+Loop: movd r5[r3:d],r0
+ addcd r4[r3:d],r0
+ movd r0,r2[r3:d]
+ acbd 1,r3,Loop
+
+ scsd r0 # r0 = cy.
+ restore [r5,r4,r3]
+ ret 0
diff --git a/rts/gmp/mpn/ns32k/addmul_1.s b/rts/gmp/mpn/ns32k/addmul_1.s
new file mode 100644
index 0000000000..df0dcdd4af
--- /dev/null
+++ b/rts/gmp/mpn/ns32k/addmul_1.s
@@ -0,0 +1,48 @@
+# ns32000 __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+ .align 1
+.globl ___gmpn_addmul_1
+___gmpn_addmul_1:
+ save [r3,r4,r5,r6,r7]
+ negd 24(sp),r4
+ movd r4,r0
+ lshd 2,r0
+ movd 20(sp),r5
+ subd r0,r5 # r5 -> to end of S1
+ movd 16(sp),r6
+ subd r0,r6 # r6 -> to end of RES
+ subd r0,r0 # r0 = 0, cy = 0
+ movd 28(sp),r7 # r7 = s2_limb
+
+Loop: movd r5[r4:d],r2
+ meid r7,r2 # r2 = low_prod, r3 = high_prod
+ addcd r0,r2 # r2 = low_prod + cy_limb
+ movd r3,r0 # r0 = new cy_limb
+ addcd 0,r0
+ addd r2,r6[r4:d]
+ acbd 1,r4,Loop
+
+ addcd 0,r0
+ restore [r7,r6,r5,r4,r3]
+ ret 0
diff --git a/rts/gmp/mpn/ns32k/mul_1.s b/rts/gmp/mpn/ns32k/mul_1.s
new file mode 100644
index 0000000000..0a77efba29
--- /dev/null
+++ b/rts/gmp/mpn/ns32k/mul_1.s
@@ -0,0 +1,47 @@
+# ns32000 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+ .align 1
+.globl ___gmpn_mul_1
+___gmpn_mul_1:
+ save [r3,r4,r5,r6,r7]
+ negd 24(sp),r4
+ movd r4,r0
+ lshd 2,r0
+ movd 20(sp),r5
+ subd r0,r5 # r5 -> to end of S1
+ movd 16(sp),r6
+ subd r0,r6 # r6 -> to end of RES
+ subd r0,r0 # r0 = 0, cy = 0
+ movd 28(sp),r7 # r7 = s2_limb
+
+Loop: movd r5[r4:d],r2
+ meid r7,r2 # r2 = low_prod, r3 = high_prod
+ addcd r0,r2 # r2 = low_prod + cy_limb
+ movd r3,r0 # r0 = new cy_limb
+ movd r2,r6[r4:d]
+ acbd 1,r4,Loop
+
+ addcd 0,r0
+ restore [r7,r6,r5,r4,r3]
+ ret 0
diff --git a/rts/gmp/mpn/ns32k/sub_n.s b/rts/gmp/mpn/ns32k/sub_n.s
new file mode 100644
index 0000000000..cd89f4fd3f
--- /dev/null
+++ b/rts/gmp/mpn/ns32k/sub_n.s
@@ -0,0 +1,46 @@
+# ns32000 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+# store difference in a third limb vector.
+
+# Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+ .align 1
+.globl ___gmpn_sub_n
+___gmpn_sub_n:
+ save [r3,r4,r5]
+ negd 28(sp),r3
+ movd r3,r0
+ lshd 2,r0
+ movd 24(sp),r4
+ subd r0,r4 # r4 -> to end of S2
+ movd 20(sp),r5
+ subd r0,r5 # r5 -> to end of S1
+ movd 16(sp),r2
+ subd r0,r2 # r2 -> to end of RES
+ subd r0,r0 # cy = 0
+
+Loop: movd r5[r3:d],r0
+ subcd r4[r3:d],r0
+ movd r0,r2[r3:d]
+ acbd 1,r3,Loop
+
+ scsd r0 # r0 = cy.
+ restore [r5,r4,r3]
+ ret 0
diff --git a/rts/gmp/mpn/ns32k/submul_1.s b/rts/gmp/mpn/ns32k/submul_1.s
new file mode 100644
index 0000000000..f811aedcf1
--- /dev/null
+++ b/rts/gmp/mpn/ns32k/submul_1.s
@@ -0,0 +1,48 @@
+# ns32000 __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
+# the result from a second limb vector.
+
+# Copyright (C) 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+ .align 1
+.globl ___gmpn_submul_1
+___gmpn_submul_1:
+ save [r3,r4,r5,r6,r7]
+ negd 24(sp),r4
+ movd r4,r0
+ lshd 2,r0
+ movd 20(sp),r5
+ subd r0,r5 # r5 -> to end of S1
+ movd 16(sp),r6
+ subd r0,r6 # r6 -> to end of RES
+ subd r0,r0 # r0 = 0, cy = 0
+ movd 28(sp),r7 # r7 = s2_limb
+
+Loop: movd r5[r4:d],r2
+ meid r7,r2 # r2 = low_prod, r3 = high_prod
+ addcd r0,r2 # r2 = low_prod + cy_limb
+ movd r3,r0 # r0 = new cy_limb
+ addcd 0,r0
+ subd r2,r6[r4:d]
+ acbd 1,r4,Loop
+
+ addcd 0,r0
+ restore [r7,r6,r5,r4,r3]
+ ret 0
diff --git a/rts/gmp/mpn/pa64/README b/rts/gmp/mpn/pa64/README
new file mode 100644
index 0000000000..8d2976dabc
--- /dev/null
+++ b/rts/gmp/mpn/pa64/README
@@ -0,0 +1,38 @@
+This directory contains mpn functions for 64-bit PA-RISC 2.0.
+
+RELEVANT OPTIMIZATION ISSUES
+
+The PA8000 has a multi-issue pipeline with large buffers for instructions
+awaiting pending results. Therefore, no latency scheduling is necessary
+(and might actually be harmful).
+
+Two 64-bit loads can be completed per cycle. One 64-bit store can be
+completed per cycle. A store cannot complete in the same cycle as a load.
+
+STATUS
+
+* mpn_lshift, mpn_rshift, mpn_add_n, mpn_sub_n are all well-tuned and run at
+ the peak cache bandwidth; 1.5 cycles/limb for shifting and 2.0 cycles/limb
+ for add/subtract.
+
+* The multiplication functions run at 11 cycles/limb. The cache bandwidth
+ allows 7.5 cycles/limb. Perhaps it would be possible, using unrolling or
+ better scheduling, to get closer to the cache bandwidth limit.
+
+* xaddmul_1.S contains a quicker method for forming the 128 bit product. It
+ uses some fewer operations, and keep the carry flag live across the loop
+ boundary. But it seems hard to make it run more than 1/4 cycle faster
+ than the old code. Perhaps we really ought to unroll this loop be 2x?
+ 2x should suffice since register latency schedling is never needed,
+ but the unrolling would hide the store-load latency. Here is a sketch:
+
+ 1. A multiply and store 64-bit products
+ 2. B sum 64-bit products 128-bit product
+ 3. B load 64-bit products to integer registers
+ 4. B multiply and store 64-bit products
+ 5. A sum 64-bit products 128-bit product
+ 6. A load 64-bit products to integer registers
+ 7. goto 1
+
+ In practice, adjacent groups (1 and 2, 2 and 3, etc) will be interleaved
+ for better instruction mix.
diff --git a/rts/gmp/mpn/pa64/add_n.s b/rts/gmp/mpn/pa64/add_n.s
new file mode 100644
index 0000000000..22ff19c184
--- /dev/null
+++ b/rts/gmp/mpn/pa64/add_n.s
@@ -0,0 +1,90 @@
+; HP-PA 2.0 __gmpn_add_n -- Add two limb vectors of the same length > 0 and
+; store sum in a third limb vector.
+
+; Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+; This runs at 2 cycles/limb on PA8000.
+
+ .level 2.0n
+ .code
+ .export __gmpn_add_n,entry
+__gmpn_add_n
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ sub %r0,%r23,%r22
+ depw,z %r22,30,3,%r28 ; r28 = 2 * (-n & 7)
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-n & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ sub %r24,%r22,%r24 ; offset s2_ptr
+ sub %r26,%r22,%r26 ; offset res_ptr
+ blr %r28,%r0 ; branch into loop
+ add %r0,%r0,%r0 ; reset carry
+
+L$loop ldd 0(%r25),%r20
+ ldd 0(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,0(%r26)
+L$7 ldd 8(%r25),%r21
+ ldd 8(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,8(%r26)
+L$6 ldd 16(%r25),%r20
+ ldd 16(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,16(%r26)
+L$5 ldd 24(%r25),%r21
+ ldd 24(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,24(%r26)
+L$4 ldd 32(%r25),%r20
+ ldd 32(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,32(%r26)
+L$3 ldd 40(%r25),%r21
+ ldd 40(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,40(%r26)
+L$2 ldd 48(%r25),%r20
+ ldd 48(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,48(%r26)
+L$1 ldd 56(%r25),%r21
+ ldo 64(%r25),%r25
+ ldd 56(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,56(%r26)
+ ldo 64(%r24),%r24
+ addib,> -8,%r23,L$loop
+ ldo 64(%r26),%r26
+
+ add,dc %r0,%r0,%r29
+ bve (%r2)
+ .exit
+ ldi 0,%r28
+ .procend
diff --git a/rts/gmp/mpn/pa64/addmul_1.S b/rts/gmp/mpn/pa64/addmul_1.S
new file mode 100644
index 0000000000..b1885b432c
--- /dev/null
+++ b/rts/gmp/mpn/pa64/addmul_1.S
@@ -0,0 +1,167 @@
+; HP-PA 2.0 64-bit __gmpn_addmul_1 -- Multiply a limb vector with a limb and
+; add the result to a second limb vector.
+
+; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+; INPUT PARAMETERS
+#define rptr %r26
+#define sptr %r25
+#define size %r24
+#define s2limb -56(%r30)
+
+; This runs at 11 cycles/limb on a PA8000. It might be possible to make
+; it faster, but the PA8000 pipeline is not publically documented and it
+; is very complex to reverse engineer
+
+#define t1 %r19
+#define rlimb %r20
+#define hi %r21
+#define lo %r22
+#define m0 %r28
+#define m1 %r3
+#define cylimb %r29
+#define t3 %r4
+#define t2 %r6
+#define t5 %r23
+#define t4 %r31
+ .level 2.0n
+ .code
+ .export __gmpn_addmul_1,entry
+__gmpn_addmul_1
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+ fldd -56(%r30),%fr5 ; s2limb passed on stack
+ ldo 128(%r30),%r30
+ add %r0,%r0,cylimb ; clear cy and cylimb
+
+ std %r3,-96(%r30)
+ std %r4,-88(%r30)
+ std %r5,-80(%r30)
+ std %r6,-72(%r30)
+ depdi,z 1,31,1,%r5
+
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ addib,= -1,%r24,L$end1
+ nop
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ addib,= -1,%r24,L$end2
+ nop
+L$loop
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m1
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,rlimb,rlimb
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ add t4,rlimb,t3
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ add,dc %r0,cylimb,cylimb
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ std t3,0(rptr)
+ addib,<> -1,%r24,L$loop
+ ldo 8(rptr),rptr
+L$end2
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,rlimb,rlimb
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ add t4,rlimb,t3
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ add,dc %r0,cylimb,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+L$end1
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ add cylimb,rlimb,rlimb
+ add,dc t2,hi,cylimb
+ add t4,rlimb,t3
+ add,dc %r0,cylimb,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+
+ ldd -96(%r30),%r3
+ ldd -88(%r30),%r4
+ ldd -80(%r30),%r5
+ ldd -72(%r30),%r6
+
+ extrd,u cylimb,31,32,%r28
+ bve (%r2)
+ .exit
+ ldo -128(%r30),%r30
+ .procend
diff --git a/rts/gmp/mpn/pa64/gmp-mparam.h b/rts/gmp/mpn/pa64/gmp-mparam.h
new file mode 100644
index 0000000000..847735b987
--- /dev/null
+++ b/rts/gmp/mpn/pa64/gmp-mparam.h
@@ -0,0 +1,65 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* These values were measured in a PA8000 using the system compiler version
+ A.10.32.30. Presumably the PA8200 and PA8500 have the same timing
+ characteristic, but GCC might give somewhat different results. */
+/* Generated by tuneup.c, 2000-07-25. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 16
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 105
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 40
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 116
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 72
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 94
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 50
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 46
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 1
+#endif
diff --git a/rts/gmp/mpn/pa64/lshift.s b/rts/gmp/mpn/pa64/lshift.s
new file mode 100644
index 0000000000..994bc1c4d6
--- /dev/null
+++ b/rts/gmp/mpn/pa64/lshift.s
@@ -0,0 +1,103 @@
+; HP-PA 2.0 __gmpn_lshift --
+
+; Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; size gr24
+; cnt gr23
+
+; This runs at 1.5 cycles/limb on PA8000.
+
+ .level 2.0n
+ .code
+ .export __gmpn_lshift,entry
+__gmpn_lshift
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ shladd %r24,3,%r25,%r25
+ shladd %r24,3,%r26,%r26
+ subi 64,%r23,%r23
+ mtsar %r23
+ ldd -8(%r25),%r21
+ addib,= -1,%r24,L$end
+ shrpd %r0,%r21,%sar,%r29 ; compute carry out limb
+ depw,z %r24,31,3,%r28 ; r28 = (size & 7)
+ sub %r0,%r24,%r22
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-size & 7)
+ add %r25,%r22,%r25 ; offset s1_ptr
+ blr %r28,%r0 ; branch into jump table
+ add %r26,%r22,%r26 ; offset res_ptr
+ b L$0
+ nop
+ b L$1
+ copy %r21,%r20
+ b L$2
+ nop
+ b L$3
+ copy %r21,%r20
+ b L$4
+ nop
+ b L$5
+ copy %r21,%r20
+ b L$6
+ nop
+ b L$7
+ copy %r21,%r20
+
+L$loop
+L$0 ldd -16(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-8(%r26)
+L$7 ldd -24(%r25),%r21
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-16(%r26)
+L$6 ldd -32(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-24(%r26)
+L$5 ldd -40(%r25),%r21
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-32(%r26)
+L$4 ldd -48(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-40(%r26)
+L$3 ldd -56(%r25),%r21
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-48(%r26)
+L$2 ldd -64(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-56(%r26)
+L$1 ldd -72(%r25),%r21
+ ldo -64(%r25),%r25
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-64(%r26)
+ addib,> -8,%r24,L$loop
+ ldo -64(%r26),%r26
+
+L$end shrpd %r21,%r0,%sar,%r21
+ std %r21,-8(%r26)
+ bve (%r2)
+ .exit
+ extrd,u %r29,31,32,%r28
+ .procend
diff --git a/rts/gmp/mpn/pa64/mul_1.S b/rts/gmp/mpn/pa64/mul_1.S
new file mode 100644
index 0000000000..ab310c1264
--- /dev/null
+++ b/rts/gmp/mpn/pa64/mul_1.S
@@ -0,0 +1,158 @@
+; HP-PA 2.0 64-bit __gmpn_mul_1 -- Multiply a limb vector with a limb and
+; store the result in a second limb vector.
+
+; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+; INPUT PARAMETERS
+#define rptr %r26
+#define sptr %r25
+#define size %r24
+#define s2limb -56(%r30)
+
+; This runs at 11 cycles/limb on a PA8000. It might be possible to make
+; it faster, but the PA8000 pipeline is not publically documented and it
+; is very complex to reverse engineer
+
+#define t1 %r19
+#define rlimb %r20
+#define hi %r21
+#define lo %r22
+#define m0 %r28
+#define m1 %r3
+#define cylimb %r29
+#define t3 %r4
+#define t2 %r6
+#define t5 %r23
+#define t4 %r31
+ .level 2.0n
+ .code
+ .export __gmpn_mul_1,entry
+__gmpn_mul_1
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+ fldd -56(%r30),%fr5 ; s2limb passed on stack
+ ldo 128(%r30),%r30
+ add %r0,%r0,cylimb ; clear cy and cylimb
+
+ std %r3,-96(%r30)
+ std %r4,-88(%r30)
+ std %r5,-80(%r30)
+ std %r6,-72(%r30)
+ depdi,z 1,31,1,%r5
+
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ addib,= -1,%r24,L$end1
+ nop
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ addib,= -1,%r24,L$end2
+ nop
+L$loop
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m1
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,t4,t3
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ std t3,0(rptr)
+ addib,<> -1,%r24,L$loop
+ ldo 8(rptr),rptr
+L$end2
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,t4,t3
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+L$end1
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t2 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ add cylimb,t4,t3
+ add,dc t2,hi,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+
+ ldd -96(%r30),%r3
+ ldd -88(%r30),%r4
+ ldd -80(%r30),%r5
+ ldd -72(%r30),%r6
+
+ extrd,u cylimb,31,32,%r28
+ bve (%r2)
+ .exit
+ ldo -128(%r30),%r30
+ .procend
diff --git a/rts/gmp/mpn/pa64/rshift.s b/rts/gmp/mpn/pa64/rshift.s
new file mode 100644
index 0000000000..f0730e2a91
--- /dev/null
+++ b/rts/gmp/mpn/pa64/rshift.s
@@ -0,0 +1,100 @@
+; HP-PA 2.0 __gmpn_rshift --
+
+; Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; size gr24
+; cnt gr23
+
+; This runs at 1.5 cycles/limb on PA8000.
+
+ .level 2.0n
+ .code
+ .export __gmpn_rshift,entry
+__gmpn_rshift
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ mtsar %r23
+ ldd 0(%r25),%r21
+ addib,= -1,%r24,L$end
+ shrpd %r21,%r0,%sar,%r29 ; compute carry out limb
+ depw,z %r24,31,3,%r28 ; r28 = (size & 7)
+ sub %r0,%r24,%r22
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-size & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ blr %r28,%r0 ; branch into jump table
+ sub %r26,%r22,%r26 ; offset res_ptr
+ b L$0
+ nop
+ b L$1
+ copy %r21,%r20
+ b L$2
+ nop
+ b L$3
+ copy %r21,%r20
+ b L$4
+ nop
+ b L$5
+ copy %r21,%r20
+ b L$6
+ nop
+ b L$7
+ copy %r21,%r20
+
+L$loop
+L$0 ldd 8(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,0(%r26)
+L$7 ldd 16(%r25),%r21
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,8(%r26)
+L$6 ldd 24(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,16(%r26)
+L$5 ldd 32(%r25),%r21
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,24(%r26)
+L$4 ldd 40(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,32(%r26)
+L$3 ldd 48(%r25),%r21
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,40(%r26)
+L$2 ldd 56(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,48(%r26)
+L$1 ldd 64(%r25),%r21
+ ldo 64(%r25),%r25
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,56(%r26)
+ addib,> -8,%r24,L$loop
+ ldo 64(%r26),%r26
+
+L$end shrpd %r0,%r21,%sar,%r21
+ std %r21,0(%r26)
+ bve (%r2)
+ .exit
+ extrd,u %r29,31,32,%r28
+ .procend
diff --git a/rts/gmp/mpn/pa64/sub_n.s b/rts/gmp/mpn/pa64/sub_n.s
new file mode 100644
index 0000000000..dda1f54b34
--- /dev/null
+++ b/rts/gmp/mpn/pa64/sub_n.s
@@ -0,0 +1,90 @@
+; HP-PA 2.0 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0
+; and store difference in a third limb vector.
+
+; Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+; This runs at 2 cycles/limb on PA8000.
+
+ .level 2.0n
+ .code
+ .export __gmpn_sub_n,entry
+__gmpn_sub_n
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ sub %r0,%r23,%r22
+ depw,z %r22,30,3,%r28 ; r28 = 2 * (-n & 7)
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-n & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ sub %r24,%r22,%r24 ; offset s2_ptr
+ blr %r28,%r0 ; branch into loop
+ sub %r26,%r22,%r26 ; offset res_ptr and set carry
+
+L$loop ldd 0(%r25),%r20
+ ldd 0(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,0(%r26)
+L$7 ldd 8(%r25),%r21
+ ldd 8(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,8(%r26)
+L$6 ldd 16(%r25),%r20
+ ldd 16(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,16(%r26)
+L$5 ldd 24(%r25),%r21
+ ldd 24(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,24(%r26)
+L$4 ldd 32(%r25),%r20
+ ldd 32(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,32(%r26)
+L$3 ldd 40(%r25),%r21
+ ldd 40(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,40(%r26)
+L$2 ldd 48(%r25),%r20
+ ldd 48(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,48(%r26)
+L$1 ldd 56(%r25),%r21
+ ldo 64(%r25),%r25
+ ldd 56(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,56(%r26)
+ ldo 64(%r24),%r24
+ addib,> -8,%r23,L$loop
+ ldo 64(%r26),%r26
+
+ add,dc %r0,%r0,%r29
+ subi 1,%r29,%r29
+ bve (%r2)
+ .exit
+ ldi 0,%r28
+ .procend
diff --git a/rts/gmp/mpn/pa64/submul_1.S b/rts/gmp/mpn/pa64/submul_1.S
new file mode 100644
index 0000000000..27666b99df
--- /dev/null
+++ b/rts/gmp/mpn/pa64/submul_1.S
@@ -0,0 +1,170 @@
+; HP-PA 2.0 64-bit __gmpn_submul_1 -- Multiply a limb vector with a limb and
+; subtract the result from a second limb vector.
+
+; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+; INPUT PARAMETERS
+#define rptr %r26
+#define sptr %r25
+#define size %r24
+#define s2limb -56(%r30)
+
+; This runs at 11 cycles/limb on a PA8000. It might be possible to make
+; it faster, but the PA8000 pipeline is not publically documented and it
+; is very complex to reverse engineer
+
+#define t1 %r19
+#define rlimb %r20
+#define hi %r21
+#define lo %r22
+#define m0 %r28
+#define m1 %r3
+#define cylimb %r29
+#define t3 %r4
+#define t2 %r6
+#define t5 %r23
+#define t4 %r31
+ .level 2.0n
+ .code
+ .export __gmpn_submul_1,entry
+__gmpn_submul_1
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+ fldd -56(%r30),%fr5 ; s2limb passed on stack
+ ldo 128(%r30),%r30
+ add %r0,%r0,cylimb ; clear cy and cylimb
+
+ std %r3,-96(%r30)
+ std %r4,-88(%r30)
+ std %r5,-80(%r30)
+ std %r6,-72(%r30)
+ depdi,z 1,31,1,%r5
+
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ addib,= -1,%r24,L$end1
+ nop
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ addib,= -1,%r24,L$end2
+ nop
+L$loop
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m1
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,t4,t4
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ sub rlimb,t4,t3
+ add t4,t3,%r0
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ add,dc %r0,cylimb,cylimb
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ std t3,0(rptr)
+ addib,<> -1,%r24,L$loop
+ ldo 8(rptr),rptr
+L$end2
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,t4,t4
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ sub rlimb,t4,t3
+ add t4,t3,%r0
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ add,dc %r0,cylimb,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+L$end1
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ add cylimb,t4,t4
+ add,dc t2,hi,cylimb
+ sub rlimb,t4,t3
+ add t4,t3,%r0
+ add,dc %r0,cylimb,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+
+ ldd -96(%r30),%r3
+ ldd -88(%r30),%r4
+ ldd -80(%r30),%r5
+ ldd -72(%r30),%r6
+
+ extrd,u cylimb,31,32,%r28
+ bve (%r2)
+ .exit
+ ldo -128(%r30),%r30
+ .procend
diff --git a/rts/gmp/mpn/pa64/udiv_qrnnd.c b/rts/gmp/mpn/pa64/udiv_qrnnd.c
new file mode 100644
index 0000000000..1c9fe084db
--- /dev/null
+++ b/rts/gmp/mpn/pa64/udiv_qrnnd.c
@@ -0,0 +1,111 @@
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define TWO64 18446744073709551616.0
+
+mp_limb_t
+#if __STDC__
+__MPN(udiv_qrnnd) (mp_limb_t n1, mp_limb_t n0, mp_limb_t d, mp_limb_t *r)
+#else
+__MPN(udiv_qrnnd) (n1, n0, d, r)
+ mp_limb_t n1;
+ mp_limb_t n0;
+ mp_limb_t d;
+ mp_limb_t *r;
+#endif
+{
+ mp_limb_t q1, q2, q;
+ mp_limb_t p1, p0;
+ double di, dq;
+
+ di = 1.0 / d;
+
+ /* Generate upper 53 bits of quotient. Be careful here; the `double'
+ quotient may be rounded to 2^64 which we cannot safely convert back
+ to a 64-bit integer. */
+ dq = (TWO64 * (double) n1 + (double) n0) * di;
+ if (dq >= TWO64)
+ q1 = 0xfffffffffffff800LL;
+ else
+ q1 = (mp_limb_t) dq;
+
+ /* Multiply back in order to compare the product to the dividend. */
+ umul_ppmm (p1, p0, q1, d);
+
+ /* Was the 53-bit quotient greater that our sought quotient? Test the
+ sign of the partial remainder to find out. */
+ if (n1 < p1 || (n1 == p1 && n0 < p0))
+ {
+ /* 53-bit quotient too large. Partial remainder is negative.
+ Compute the absolute value of the remainder in n1,,n0. */
+ n1 = p1 - (n1 + (p0 < n0));
+ n0 = p0 - n0;
+
+ /* Now use the partial remainder as new dividend to compute more bits of
+ quotient. This is an adjustment for the one we got previously. */
+ q2 = (mp_limb_t) ((TWO64 * (double) n1 + (double) n0) * di);
+ umul_ppmm (p1, p0, q2, d);
+
+ q = q1 - q2;
+ if (n1 < p1 || (n1 == p1 && n0 <= p0))
+ {
+ n0 = p0 - n0;
+ }
+ else
+ {
+ n0 = p0 - n0;
+ n0 += d;
+ q--;
+ }
+ }
+ else
+ {
+ n1 = n1 - (p1 + (n0 < p0));
+ n0 = n0 - p0;
+
+ q2 = (mp_limb_t) ((TWO64 * (double) n1 + (double) n0) * di);
+ umul_ppmm (p1, p0, q2, d);
+
+ q = q1 + q2;
+ if (n1 < p1 || (n1 == p1 && n0 < p0))
+ {
+ n0 = n0 - p0;
+ n0 += d;
+ q--;
+ }
+ else
+ {
+ n0 = n0 - p0;
+ if (n0 >= d)
+ {
+ n0 -= d;
+ q++;
+ }
+ }
+ }
+
+ *r = n0;
+ return q;
+}
diff --git a/rts/gmp/mpn/pa64/umul_ppmm.S b/rts/gmp/mpn/pa64/umul_ppmm.S
new file mode 100644
index 0000000000..ceff2d752f
--- /dev/null
+++ b/rts/gmp/mpn/pa64/umul_ppmm.S
@@ -0,0 +1,74 @@
+; Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+#define p0 %r28
+#define p1 %r29
+#define t32 %r19
+#define t0 %r20
+#define t1 %r21
+#define x %r22
+#define m0 %r23
+#define m1 %r24
+ .level 2.0n
+ .code
+ .export __gmpn_umul_ppmm,entry
+__gmpn_umul_ppmm
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+ ldo 128(%r30),%r30
+ depd %r25,31,32,%r26
+ std %r26,-64(%r30)
+ depd %r23,31,32,%r24
+ std %r24,-56(%r30)
+
+ ldw -180(%r30),%r31
+
+ fldd -64(%r30),%fr4
+ fldd -56(%r30),%fr5
+
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+
+ depdi,z 1,31,1,t32 ; t32 = 2^32
+
+ ldd -128(%r30),p0 ; lo = low 64 bit of product
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),p1 ; hi = high 64 bit of product
+
+ add,l,*nuv m0,m1,x ; x = m1+m0
+ add,l t32,p1,p1 ; propagate carry to mid of p1
+ depd,z x,31,32,t0 ; lo32(m1+m0)
+ add t0,p0,p0
+ extrd,u x,31,32,t1 ; hi32(m1+m0)
+ add,dc t1,p1,p1
+
+ std p0,0(%r31) ; store low half of product
+ extrd,u p1,31,32,%r28 ; return high half of product
+ bve (%r2)
+ .exit
+ ldo -128(%r30),%r30
+ .procend
diff --git a/rts/gmp/mpn/pa64w/README b/rts/gmp/mpn/pa64w/README
new file mode 100644
index 0000000000..cf590a7b98
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/README
@@ -0,0 +1,2 @@
+This directory contains mpn functions for 64-bit PA-RISC 2.0
+using 64-bit pointers (2.0W).
diff --git a/rts/gmp/mpn/pa64w/add_n.s b/rts/gmp/mpn/pa64w/add_n.s
new file mode 100644
index 0000000000..1bb9e8fbc7
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/add_n.s
@@ -0,0 +1,90 @@
+; HP-PA 2.0 __gmpn_add_n -- Add two limb vectors of the same length > 0 and
+; store sum in a third limb vector.
+
+; Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+; This runs at 2 cycles/limb on PA8000.
+
+ .level 2.0w
+ .code
+ .export __gmpn_add_n,entry
+__gmpn_add_n
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ sub %r0,%r23,%r22
+ depw,z %r22,30,3,%r28 ; r28 = 2 * (-n & 7)
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-n & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ sub %r24,%r22,%r24 ; offset s2_ptr
+ sub %r26,%r22,%r26 ; offset res_ptr
+ blr %r28,%r0 ; branch into loop
+ add %r0,%r0,%r0 ; reset carry
+
+L$loop ldd 0(%r25),%r20
+ ldd 0(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,0(%r26)
+L$7 ldd 8(%r25),%r21
+ ldd 8(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,8(%r26)
+L$6 ldd 16(%r25),%r20
+ ldd 16(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,16(%r26)
+L$5 ldd 24(%r25),%r21
+ ldd 24(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,24(%r26)
+L$4 ldd 32(%r25),%r20
+ ldd 32(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,32(%r26)
+L$3 ldd 40(%r25),%r21
+ ldd 40(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,40(%r26)
+L$2 ldd 48(%r25),%r20
+ ldd 48(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,48(%r26)
+L$1 ldd 56(%r25),%r21
+ ldo 64(%r25),%r25
+ ldd 56(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,56(%r26)
+ ldo 64(%r24),%r24
+ addib,> -8,%r23,L$loop
+ ldo 64(%r26),%r26
+
+ add,dc %r0,%r0,%r29
+ bve (%r2)
+ .exit
+ copy %r29,%r28
+ .procend
diff --git a/rts/gmp/mpn/pa64w/addmul_1.S b/rts/gmp/mpn/pa64w/addmul_1.S
new file mode 100644
index 0000000000..4799f90fc5
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/addmul_1.S
@@ -0,0 +1,168 @@
+; HP-PA 2.0 64-bit __gmpn_addmul_1 -- Multiply a limb vector with a limb and
+; add the result to a second limb vector.
+
+; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+; INPUT PARAMETERS
+#define rptr %r26
+#define sptr %r25
+#define size %r24
+#define s2limb %r23
+
+; This runs at 11 cycles/limb on a PA8000. It might be possible to make
+; it faster, but the PA8000 pipeline is not publically documented and it
+; is very complex to reverse engineer
+
+#define t1 %r19
+#define rlimb %r20
+#define hi %r21
+#define lo %r22
+#define m0 %r28
+#define m1 %r3
+#define cylimb %r29
+#define t3 %r4
+#define t2 %r6
+#define t5 %r23
+#define t4 %r31
+ .level 2.0w
+ .code
+ .export __gmpn_addmul_1,entry
+__gmpn_addmul_1
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+ std s2limb,-56(%r30)
+ fldd -56(%r30),%fr5
+ ldo 128(%r30),%r30
+ add %r0,%r0,cylimb ; clear cy and cylimb
+
+ std %r3,-96(%r30)
+ std %r4,-88(%r30)
+ std %r5,-80(%r30)
+ std %r6,-72(%r30)
+ depdi,z 1,31,1,%r5
+
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ addib,= -1,%r24,L$end1
+ nop
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ addib,= -1,%r24,L$end2
+ nop
+L$loop
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m1
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,rlimb,rlimb
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ add t4,rlimb,t3
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ add,dc %r0,cylimb,cylimb
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ std t3,0(rptr)
+ addib,<> -1,%r24,L$loop
+ ldo 8(rptr),rptr
+L$end2
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,rlimb,rlimb
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ add t4,rlimb,t3
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ add,dc %r0,cylimb,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+L$end1
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ add cylimb,rlimb,rlimb
+ add,dc t2,hi,cylimb
+ add t4,rlimb,t3
+ add,dc %r0,cylimb,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+
+ ldd -96(%r30),%r3
+ ldd -88(%r30),%r4
+ ldd -80(%r30),%r5
+ ldd -72(%r30),%r6
+
+ copy cylimb,%r28
+ bve (%r2)
+ .exit
+ ldo -128(%r30),%r30
+ .procend
diff --git a/rts/gmp/mpn/pa64w/gmp-mparam.h b/rts/gmp/mpn/pa64w/gmp-mparam.h
new file mode 100644
index 0000000000..ee5a0a3ab7
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/gmp-mparam.h
@@ -0,0 +1,65 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* These values were measured on a PA8500 using the system compiler version
+ A.11.01.02. Presumably the PA8000 and PA8200 have the same timing
+ characteristic, but GCC might give somewhat different results.. */
+/* Generated by tuneup.c, 2000-07-25. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 18
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 105
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 46
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 83
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 58
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 134
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 56
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 26
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 1
+#endif
diff --git a/rts/gmp/mpn/pa64w/lshift.s b/rts/gmp/mpn/pa64w/lshift.s
new file mode 100644
index 0000000000..84f925a105
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/lshift.s
@@ -0,0 +1,103 @@
+; HP-PA 2.0 __gmpn_lshift --
+
+; Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; size gr24
+; cnt gr23
+
+; This runs at 1.5 cycles/limb on PA8000.
+
+ .level 2.0w
+ .code
+ .export __gmpn_lshift,entry
+__gmpn_lshift
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ shladd %r24,3,%r25,%r25
+ shladd %r24,3,%r26,%r26
+ subi 64,%r23,%r23
+ mtsar %r23
+ ldd -8(%r25),%r21
+ addib,= -1,%r24,L$end
+ shrpd %r0,%r21,%sar,%r29 ; compute carry out limb
+ depw,z %r24,31,3,%r28 ; r28 = (size & 7)
+ sub %r0,%r24,%r22
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-size & 7)
+ add %r25,%r22,%r25 ; offset s1_ptr
+ blr %r28,%r0 ; branch into jump table
+ add %r26,%r22,%r26 ; offset res_ptr
+ b L$0
+ nop
+ b L$1
+ copy %r21,%r20
+ b L$2
+ nop
+ b L$3
+ copy %r21,%r20
+ b L$4
+ nop
+ b L$5
+ copy %r21,%r20
+ b L$6
+ nop
+ b L$7
+ copy %r21,%r20
+
+L$loop
+L$0 ldd -16(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-8(%r26)
+L$7 ldd -24(%r25),%r21
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-16(%r26)
+L$6 ldd -32(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-24(%r26)
+L$5 ldd -40(%r25),%r21
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-32(%r26)
+L$4 ldd -48(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-40(%r26)
+L$3 ldd -56(%r25),%r21
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-48(%r26)
+L$2 ldd -64(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-56(%r26)
+L$1 ldd -72(%r25),%r21
+ ldo -64(%r25),%r25
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-64(%r26)
+ addib,> -8,%r24,L$loop
+ ldo -64(%r26),%r26
+
+L$end shrpd %r21,%r0,%sar,%r21
+ std %r21,-8(%r26)
+ bve (%r2)
+ .exit
+ copy %r29,%r28
+ .procend
diff --git a/rts/gmp/mpn/pa64w/mul_1.S b/rts/gmp/mpn/pa64w/mul_1.S
new file mode 100644
index 0000000000..48f13fbd1b
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/mul_1.S
@@ -0,0 +1,159 @@
+; HP-PA 2.0 64-bit __gmpn_mul_1 -- Multiply a limb vector with a limb and
+; store the result in a second limb vector.
+
+; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+; INPUT PARAMETERS
+#define rptr %r26
+#define sptr %r25
+#define size %r24
+#define s2limb %r23
+
+; This runs at 11 cycles/limb on a PA8000. It might be possible to make
+; it faster, but the PA8000 pipeline is not publically documented and it
+; is very complex to reverse engineer
+
+#define t1 %r19
+#define rlimb %r20
+#define hi %r21
+#define lo %r22
+#define m0 %r28
+#define m1 %r3
+#define cylimb %r29
+#define t3 %r4
+#define t2 %r6
+#define t5 %r23
+#define t4 %r31
+ .level 2.0w
+ .code
+ .export __gmpn_mul_1,entry
+__gmpn_mul_1
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+ std s2limb,-56(%r30)
+ fldd -56(%r30),%fr5
+ ldo 128(%r30),%r30
+ add %r0,%r0,cylimb ; clear cy and cylimb
+
+ std %r3,-96(%r30)
+ std %r4,-88(%r30)
+ std %r5,-80(%r30)
+ std %r6,-72(%r30)
+ depdi,z 1,31,1,%r5
+
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ addib,= -1,%r24,L$end1
+ nop
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ addib,= -1,%r24,L$end2
+ nop
+L$loop
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m1
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,t4,t3
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ std t3,0(rptr)
+ addib,<> -1,%r24,L$loop
+ ldo 8(rptr),rptr
+L$end2
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,t4,t3
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+L$end1
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t2 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ add cylimb,t4,t3
+ add,dc t2,hi,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+
+ ldd -96(%r30),%r3
+ ldd -88(%r30),%r4
+ ldd -80(%r30),%r5
+ ldd -72(%r30),%r6
+
+ copy cylimb,%r28
+ bve (%r2)
+ .exit
+ ldo -128(%r30),%r30
+ .procend
diff --git a/rts/gmp/mpn/pa64w/rshift.s b/rts/gmp/mpn/pa64w/rshift.s
new file mode 100644
index 0000000000..2517cb1f87
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/rshift.s
@@ -0,0 +1,100 @@
+; HP-PA 2.0 __gmpn_rshift --
+
+; Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; size gr24
+; cnt gr23
+
+; This runs at 1.5 cycles/limb on PA8000.
+
+ .level 2.0w
+ .code
+ .export __gmpn_rshift,entry
+__gmpn_rshift
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ mtsar %r23
+ ldd 0(%r25),%r21
+ addib,= -1,%r24,L$end
+ shrpd %r21,%r0,%sar,%r29 ; compute carry out limb
+ depw,z %r24,31,3,%r28 ; r28 = (size & 7)
+ sub %r0,%r24,%r22
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-size & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ blr %r28,%r0 ; branch into jump table
+ sub %r26,%r22,%r26 ; offset res_ptr
+ b L$0
+ nop
+ b L$1
+ copy %r21,%r20
+ b L$2
+ nop
+ b L$3
+ copy %r21,%r20
+ b L$4
+ nop
+ b L$5
+ copy %r21,%r20
+ b L$6
+ nop
+ b L$7
+ copy %r21,%r20
+
+L$loop
+L$0 ldd 8(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,0(%r26)
+L$7 ldd 16(%r25),%r21
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,8(%r26)
+L$6 ldd 24(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,16(%r26)
+L$5 ldd 32(%r25),%r21
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,24(%r26)
+L$4 ldd 40(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,32(%r26)
+L$3 ldd 48(%r25),%r21
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,40(%r26)
+L$2 ldd 56(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,48(%r26)
+L$1 ldd 64(%r25),%r21
+ ldo 64(%r25),%r25
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,56(%r26)
+ addib,> -8,%r24,L$loop
+ ldo 64(%r26),%r26
+
+L$end shrpd %r0,%r21,%sar,%r21
+ std %r21,0(%r26)
+ bve (%r2)
+ .exit
+ copy %r29,%r28
+ .procend
diff --git a/rts/gmp/mpn/pa64w/sub_n.s b/rts/gmp/mpn/pa64w/sub_n.s
new file mode 100644
index 0000000000..ad01e24aa7
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/sub_n.s
@@ -0,0 +1,90 @@
+; HP-PA 2.0 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0
+; and store difference in a third limb vector.
+
+; Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+; This runs at 2 cycles/limb on PA8000.
+
+ .level 2.0w
+ .code
+ .export __gmpn_sub_n,entry
+__gmpn_sub_n
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ sub %r0,%r23,%r22
+ depw,z %r22,30,3,%r28 ; r28 = 2 * (-n & 7)
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-n & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ sub %r24,%r22,%r24 ; offset s2_ptr
+ blr %r28,%r0 ; branch into loop
+ sub %r26,%r22,%r26 ; offset res_ptr and set carry
+
+L$loop ldd 0(%r25),%r20
+ ldd 0(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,0(%r26)
+L$7 ldd 8(%r25),%r21
+ ldd 8(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,8(%r26)
+L$6 ldd 16(%r25),%r20
+ ldd 16(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,16(%r26)
+L$5 ldd 24(%r25),%r21
+ ldd 24(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,24(%r26)
+L$4 ldd 32(%r25),%r20
+ ldd 32(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,32(%r26)
+L$3 ldd 40(%r25),%r21
+ ldd 40(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,40(%r26)
+L$2 ldd 48(%r25),%r20
+ ldd 48(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,48(%r26)
+L$1 ldd 56(%r25),%r21
+ ldo 64(%r25),%r25
+ ldd 56(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,56(%r26)
+ ldo 64(%r24),%r24
+ addib,> -8,%r23,L$loop
+ ldo 64(%r26),%r26
+
+ add,dc %r0,%r0,%r29
+ subi 1,%r29,%r29
+ bve (%r2)
+ .exit
+ copy %r29,%r28
+ .procend
diff --git a/rts/gmp/mpn/pa64w/submul_1.S b/rts/gmp/mpn/pa64w/submul_1.S
new file mode 100644
index 0000000000..294f6239b2
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/submul_1.S
@@ -0,0 +1,171 @@
+; HP-PA 2.0 64-bit __gmpn_submul_1 -- Multiply a limb vector with a limb and
+; subtract the result from a second limb vector.
+
+; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+; INPUT PARAMETERS
+#define rptr %r26
+#define sptr %r25
+#define size %r24
+#define s2limb %r23
+
+; This runs at 11 cycles/limb on a PA8000. It might be possible to make
+; it faster, but the PA8000 pipeline is not publically documented and it
+; is very complex to reverse engineer
+
+#define t1 %r19
+#define rlimb %r20
+#define hi %r21
+#define lo %r22
+#define m0 %r28
+#define m1 %r3
+#define cylimb %r29
+#define t3 %r4
+#define t2 %r6
+#define t5 %r23
+#define t4 %r31
+ .level 2.0w
+ .code
+ .export __gmpn_submul_1,entry
+__gmpn_submul_1
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+ std s2limb,-56(%r30)
+ fldd -56(%r30),%fr5
+ ldo 128(%r30),%r30
+ add %r0,%r0,cylimb ; clear cy and cylimb
+
+ std %r3,-96(%r30)
+ std %r4,-88(%r30)
+ std %r5,-80(%r30)
+ std %r6,-72(%r30)
+ depdi,z 1,31,1,%r5
+
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ addib,= -1,%r24,L$end1
+ nop
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ addib,= -1,%r24,L$end2
+ nop
+L$loop
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m1
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,t4,t4
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ sub rlimb,t4,t3
+ add t4,t3,%r0
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ add,dc %r0,cylimb,cylimb
+ fldd 0(sptr),%fr4
+ ldo 8(sptr),sptr
+ std t3,0(rptr)
+ addib,<> -1,%r24,L$loop
+ ldo 8(rptr),rptr
+L$end2
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ ldd -128(%r30),lo ; lo = low 64 bit of product
+ add cylimb,t4,t4
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ add,dc t2,hi,cylimb
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ sub rlimb,t4,t3
+ add t4,t3,%r0
+ ldd -104(%r30),hi ; hi = high 64 bit of product
+ add,dc %r0,cylimb,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+L$end1
+ ldd 0(rptr),rlimb
+ extrd,u lo,31,32,t1 ; t1 = hi32(lo)
+ extrd,u lo,63,32,t4 ; t4 = lo32(lo)
+ add,l m0,t1,t1 ; t1 += m0
+ add,l,*nuv m1,t1,t1 ; t1 += m0
+ add,l %r5,hi,hi ; propagate carry
+ extrd,u t1,31,32,t2 ; t2 = hi32(t1)
+ depd,z t1,31,32,t5 ; t5 = lo32(t1)
+ add,l t5,t4,t4 ; t4 += lo32(t1)
+ add cylimb,t4,t4
+ add,dc t2,hi,cylimb
+ sub rlimb,t4,t3
+ add t4,t3,%r0
+ add,dc %r0,cylimb,cylimb
+ std t3,0(rptr)
+ ldo 8(rptr),rptr
+
+ ldd -96(%r30),%r3
+ ldd -88(%r30),%r4
+ ldd -80(%r30),%r5
+ ldd -72(%r30),%r6
+
+ copy cylimb,%r28
+ bve (%r2)
+ .exit
+ ldo -128(%r30),%r30
+ .procend
diff --git a/rts/gmp/mpn/pa64w/udiv_qrnnd.c b/rts/gmp/mpn/pa64w/udiv_qrnnd.c
new file mode 100644
index 0000000000..1852913000
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/udiv_qrnnd.c
@@ -0,0 +1,117 @@
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define TWO64 18446744073709551616.0
+#define TWO63 9223372036854775808.0
+
+mp_limb_t
+#if __STDC__
+__MPN(udiv_qrnnd) (mp_limb_t n1, mp_limb_t n0, mp_limb_t d, mp_limb_t *r)
+#else
+__MPN(udiv_qrnnd) (n1, n0, d, r)
+ mp_limb_t n1;
+ mp_limb_t n0;
+ mp_limb_t d;
+ mp_limb_t *r;
+#endif
+{
+ mp_limb_t q1, q2, q;
+ mp_limb_t p1, p0;
+ double di, dq;
+
+ di = 1.0 / d;
+
+ /* Generate upper 53 bits of quotient. Be careful here; the `double'
+ quotient may be rounded to 2^64 which we cannot safely convert back
+ to a 64-bit integer. */
+ dq = (TWO64 * (double) n1 + (double) n0) * di;
+ if (dq >= TWO64)
+ q1 = 0xfffffffffffff800L;
+#ifndef __GNUC__
+ /* Work around HP compiler bug. */
+ else if (dq > TWO63)
+ q1 = (mp_limb_t) (dq - TWO63) + 0x8000000000000000L;
+#endif
+ else
+ q1 = (mp_limb_t) dq;
+
+ /* Multiply back in order to compare the product to the dividend. */
+ umul_ppmm (p1, p0, q1, d);
+
+ /* Was the 53-bit quotient greater that our sought quotient? Test the
+ sign of the partial remainder to find out. */
+ if (n1 < p1 || (n1 == p1 && n0 < p0))
+ {
+ /* 53-bit quotient too large. Partial remainder is negative.
+ Compute the absolute value of the remainder in n1,,n0. */
+ n1 = p1 - (n1 + (p0 < n0));
+ n0 = p0 - n0;
+
+ /* Now use the partial remainder as new dividend to compute more bits of
+ quotient. This is an adjustment for the one we got previously. */
+ q2 = (mp_limb_t) ((TWO64 * (double) n1 + (double) n0) * di);
+ umul_ppmm (p1, p0, q2, d);
+
+ q = q1 - q2;
+ if (n1 < p1 || (n1 == p1 && n0 <= p0))
+ {
+ n0 = p0 - n0;
+ }
+ else
+ {
+ n0 = p0 - n0;
+ n0 += d;
+ q--;
+ }
+ }
+ else
+ {
+ n1 = n1 - (p1 + (n0 < p0));
+ n0 = n0 - p0;
+
+ q2 = (mp_limb_t) ((TWO64 * (double) n1 + (double) n0) * di);
+ umul_ppmm (p1, p0, q2, d);
+
+ q = q1 + q2;
+ if (n1 < p1 || (n1 == p1 && n0 < p0))
+ {
+ n0 = n0 - p0;
+ n0 += d;
+ q--;
+ }
+ else
+ {
+ n0 = n0 - p0;
+ if (n0 >= d)
+ {
+ n0 -= d;
+ q++;
+ }
+ }
+ }
+
+ *r = n0;
+ return q;
+}
diff --git a/rts/gmp/mpn/pa64w/umul_ppmm.S b/rts/gmp/mpn/pa64w/umul_ppmm.S
new file mode 100644
index 0000000000..d9fb92be8c
--- /dev/null
+++ b/rts/gmp/mpn/pa64w/umul_ppmm.S
@@ -0,0 +1,72 @@
+; Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2.1 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+#define p0 %r28
+#define p1 %r29
+#define t32 %r19
+#define t0 %r20
+#define t1 %r21
+#define x %r22
+#define m0 %r23
+#define m1 %r24
+ .level 2.0w
+ .code
+ .export __gmpn_umul_ppmm,entry
+__gmpn_umul_ppmm
+ .proc
+ .callinfo frame=128,no_calls
+ .entry
+ ldo 128(%r30),%r30
+ std %r26,-64(%r30)
+ std %r25,-56(%r30)
+
+ copy %r24,%r31
+
+ fldd -64(%r30),%fr4
+ fldd -56(%r30),%fr5
+
+ xmpyu %fr5R,%fr4R,%fr6
+ fstd %fr6,-128(%r30)
+ xmpyu %fr5R,%fr4L,%fr7
+ fstd %fr7,-120(%r30)
+ xmpyu %fr5L,%fr4R,%fr8
+ fstd %fr8,-112(%r30)
+ xmpyu %fr5L,%fr4L,%fr9
+ fstd %fr9,-104(%r30)
+
+ depdi,z 1,31,1,t32 ; t32 = 2^32
+
+ ldd -128(%r30),p0 ; lo = low 64 bit of product
+ ldd -120(%r30),m0 ; m0 = mid0 64 bit of product
+ ldd -112(%r30),m1 ; m1 = mid1 64 bit of product
+ ldd -104(%r30),p1 ; hi = high 64 bit of product
+
+ add,l,*nuv m0,m1,x ; x = m1+m0
+ add,l t32,p1,p1 ; propagate carry to mid of p1
+ depd,z x,31,32,t0 ; lo32(m1+m0)
+ add t0,p0,p0
+ extrd,u x,31,32,t1 ; hi32(m1+m0)
+ add,dc t1,p1,p1
+
+ std p0,0(%r31) ; store low half of product
+ copy p1,%r28 ; return high half of product
+ bve (%r2)
+ .exit
+ ldo -128(%r30),%r30
+ .procend
diff --git a/rts/gmp/mpn/power/add_n.s b/rts/gmp/mpn/power/add_n.s
new file mode 100644
index 0000000000..0f9f48f1cc
--- /dev/null
+++ b/rts/gmp/mpn/power/add_n.s
@@ -0,0 +1,79 @@
+# IBM POWER __gmpn_add_n -- Add two limb vectors of equal, non-zero length.
+
+# Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software Foundation,
+# Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# s2_ptr r5
+# size r6
+
+ .toc
+ .globl __gmpn_add_n
+ .globl .__gmpn_add_n
+ .csect __gmpn_add_n[DS]
+__gmpn_add_n:
+ .long .__gmpn_add_n, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.__gmpn_add_n:
+ andil. 10,6,1 # odd or even number of limbs?
+ l 8,0(4) # load least significant s1 limb
+ l 0,0(5) # load least significant s2 limb
+ cal 3,-4(3) # offset res_ptr, it's updated before it's used
+ sri 10,6,1 # count for unrolled loop
+ a 7,0,8 # add least significant limbs, set cy
+ mtctr 10 # copy count into CTR
+ beq 0,Leven # branch if even # of limbs (# of limbs >= 2)
+
+# We have an odd # of limbs. Add the first limbs separately.
+ cmpi 1,10,0 # is count for unrolled loop zero?
+ bc 4,6,L1 # bne cr1,L1 (misassembled by gas)
+ st 7,4(3)
+ aze 3,10 # use the fact that r10 is zero...
+ br # return
+
+# We added least significant limbs. Now reload the next limbs to enter loop.
+L1: lu 8,4(4) # load s1 limb and update s1_ptr
+ lu 0,4(5) # load s2 limb and update s2_ptr
+ stu 7,4(3)
+ ae 7,0,8 # add limbs, set cy
+Leven: lu 9,4(4) # load s1 limb and update s1_ptr
+ lu 10,4(5) # load s2 limb and update s2_ptr
+ bdz Lend # If done, skip loop
+
+Loop: lu 8,4(4) # load s1 limb and update s1_ptr
+ lu 0,4(5) # load s2 limb and update s2_ptr
+ ae 11,9,10 # add previous limbs with cy, set cy
+ stu 7,4(3) #
+ lu 9,4(4) # load s1 limb and update s1_ptr
+ lu 10,4(5) # load s2 limb and update s2_ptr
+ ae 7,0,8 # add previous limbs with cy, set cy
+ stu 11,4(3) #
+ bdn Loop # decrement CTR and loop back
+
+Lend: ae 11,9,10 # add limbs with cy, set cy
+ st 7,4(3) #
+ st 11,8(3) #
+ lil 3,0 # load cy into ...
+ aze 3,3 # ... return value register
+ br
diff --git a/rts/gmp/mpn/power/addmul_1.s b/rts/gmp/mpn/power/addmul_1.s
new file mode 100644
index 0000000000..8ecc651579
--- /dev/null
+++ b/rts/gmp/mpn/power/addmul_1.s
@@ -0,0 +1,122 @@
+# IBM POWER __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright (C) 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# s2_limb r6
+
+# The POWER architecture has no unsigned 32x32->64 bit multiplication
+# instruction. To obtain that operation, we have to use the 32x32->64 signed
+# multiplication instruction, and add the appropriate compensation to the high
+# limb of the result. We add the multiplicand if the multiplier has its most
+# significant bit set, and we add the multiplier if the multiplicand has its
+# most significant bit set. We need to preserve the carry flag between each
+# iteration, so we have to compute the compensation carefully (the natural,
+# srai+and doesn't work). Since the POWER architecture has a branch unit we
+# can branch in zero cycles, so that's how we perform the additions.
+
+ .toc
+ .globl __gmpn_addmul_1
+ .globl .__gmpn_addmul_1
+ .csect __gmpn_addmul_1[DS]
+__gmpn_addmul_1:
+ .long .__gmpn_addmul_1, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.__gmpn_addmul_1:
+
+ cal 3,-4(3)
+ l 0,0(4)
+ cmpi 0,6,0
+ mtctr 5
+ mul 9,0,6
+ srai 7,0,31
+ and 7,7,6
+ mfmq 8
+ cax 9,9,7
+ l 7,4(3)
+ a 8,8,7 # add res_limb
+ blt Lneg
+Lpos: bdz Lend
+
+Lploop: lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 10,0,6
+ mfmq 0
+ ae 8,0,9 # low limb + old_cy_limb + old cy
+ l 7,4(3)
+ aze 10,10 # propagate cy to new cy_limb
+ a 8,8,7 # add res_limb
+ bge Lp0
+ cax 10,10,6 # adjust high limb for negative limb from s1
+Lp0: bdz Lend0
+ lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 9,0,6
+ mfmq 0
+ ae 8,0,10
+ l 7,4(3)
+ aze 9,9
+ a 8,8,7
+ bge Lp1
+ cax 9,9,6 # adjust high limb for negative limb from s1
+Lp1: bdn Lploop
+
+ b Lend
+
+Lneg: cax 9,9,0
+ bdz Lend
+Lnloop: lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 10,0,6
+ mfmq 7
+ ae 8,7,9
+ l 7,4(3)
+ ae 10,10,0 # propagate cy to new cy_limb
+ a 8,8,7 # add res_limb
+ bge Ln0
+ cax 10,10,6 # adjust high limb for negative limb from s1
+Ln0: bdz Lend0
+ lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 9,0,6
+ mfmq 7
+ ae 8,7,10
+ l 7,4(3)
+ ae 9,9,0 # propagate cy to new cy_limb
+ a 8,8,7 # add res_limb
+ bge Ln1
+ cax 9,9,6 # adjust high limb for negative limb from s1
+Ln1: bdn Lnloop
+ b Lend
+
+Lend0: cal 9,0(10)
+Lend: st 8,4(3)
+ aze 3,9
+ br
diff --git a/rts/gmp/mpn/power/lshift.s b/rts/gmp/mpn/power/lshift.s
new file mode 100644
index 0000000000..ab71fb7727
--- /dev/null
+++ b/rts/gmp/mpn/power/lshift.s
@@ -0,0 +1,56 @@
+# IBM POWER __gmpn_lshift --
+
+# Copyright (C) 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s_ptr r4
+# size r5
+# cnt r6
+
+ .toc
+ .globl __gmpn_lshift
+ .globl .__gmpn_lshift
+ .csect __gmpn_lshift[DS]
+__gmpn_lshift:
+ .long .__gmpn_lshift, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.__gmpn_lshift:
+ sli 0,5,2
+ cax 9,3,0
+ cax 4,4,0
+ sfi 8,6,32
+ mtctr 5 # put limb count in CTR loop register
+ lu 0,-4(4) # read most significant limb
+ sre 3,0,8 # compute carry out limb, and init MQ register
+ bdz Lend2 # if just one limb, skip loop
+ lu 0,-4(4) # read 2:nd most significant limb
+ sreq 7,0,8 # compute most significant limb of result
+ bdz Lend # if just two limb, skip loop
+Loop: lu 0,-4(4) # load next lower limb
+ stu 7,-4(9) # store previous result during read latency
+ sreq 7,0,8 # compute result limb
+ bdn Loop # loop back until CTR is zero
+Lend: stu 7,-4(9) # store 2:nd least significant limb
+Lend2: sle 7,0,6 # compute least significant limb
+ st 7,-4(9) # store it" \
+ br
diff --git a/rts/gmp/mpn/power/mul_1.s b/rts/gmp/mpn/power/mul_1.s
new file mode 100644
index 0000000000..4e08ade583
--- /dev/null
+++ b/rts/gmp/mpn/power/mul_1.s
@@ -0,0 +1,109 @@
+# IBM POWER __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright (C) 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# s2_limb r6
+
+# The POWER architecture has no unsigned 32x32->64 bit multiplication
+# instruction. To obtain that operation, we have to use the 32x32->64 signed
+# multiplication instruction, and add the appropriate compensation to the high
+# limb of the result. We add the multiplicand if the multiplier has its most
+# significant bit set, and we add the multiplier if the multiplicand has its
+# most significant bit set. We need to preserve the carry flag between each
+# iteration, so we have to compute the compensation carefully (the natural,
+# srai+and doesn't work). Since the POWER architecture has a branch unit we
+# can branch in zero cycles, so that's how we perform the additions.
+
+ .toc
+ .globl __gmpn_mul_1
+ .globl .__gmpn_mul_1
+ .csect __gmpn_mul_1[DS]
+__gmpn_mul_1:
+ .long .__gmpn_mul_1, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.__gmpn_mul_1:
+
+ cal 3,-4(3)
+ l 0,0(4)
+ cmpi 0,6,0
+ mtctr 5
+ mul 9,0,6
+ srai 7,0,31
+ and 7,7,6
+ mfmq 8
+ ai 0,0,0 # reset carry
+ cax 9,9,7
+ blt Lneg
+Lpos: bdz Lend
+Lploop: lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 10,0,6
+ mfmq 0
+ ae 8,0,9
+ bge Lp0
+ cax 10,10,6 # adjust high limb for negative limb from s1
+Lp0: bdz Lend0
+ lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 9,0,6
+ mfmq 0
+ ae 8,0,10
+ bge Lp1
+ cax 9,9,6 # adjust high limb for negative limb from s1
+Lp1: bdn Lploop
+ b Lend
+
+Lneg: cax 9,9,0
+ bdz Lend
+Lnloop: lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 10,0,6
+ cax 10,10,0 # adjust high limb for negative s2_limb
+ mfmq 0
+ ae 8,0,9
+ bge Ln0
+ cax 10,10,6 # adjust high limb for negative limb from s1
+Ln0: bdz Lend0
+ lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 9,0,6
+ cax 9,9,0 # adjust high limb for negative s2_limb
+ mfmq 0
+ ae 8,0,10
+ bge Ln1
+ cax 9,9,6 # adjust high limb for negative limb from s1
+Ln1: bdn Lnloop
+ b Lend
+
+Lend0: cal 9,0(10)
+Lend: st 8,4(3)
+ aze 3,9
+ br
diff --git a/rts/gmp/mpn/power/rshift.s b/rts/gmp/mpn/power/rshift.s
new file mode 100644
index 0000000000..65b3945f8a
--- /dev/null
+++ b/rts/gmp/mpn/power/rshift.s
@@ -0,0 +1,54 @@
+# IBM POWER __gmpn_rshift --
+
+# Copyright (C) 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s_ptr r4
+# size r5
+# cnt r6
+
+ .toc
+ .globl __gmpn_rshift
+ .globl .__gmpn_rshift
+ .csect __gmpn_rshift[DS]
+__gmpn_rshift:
+ .long .__gmpn_rshift, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.__gmpn_rshift:
+ sfi 8,6,32
+ mtctr 5 # put limb count in CTR loop register
+ l 0,0(4) # read least significant limb
+ ai 9,3,-4 # adjust res_ptr since it's offset in the stu:s
+ sle 3,0,8 # compute carry limb, and init MQ register
+ bdz Lend2 # if just one limb, skip loop
+ lu 0,4(4) # read 2:nd least significant limb
+ sleq 7,0,8 # compute least significant limb of result
+ bdz Lend # if just two limb, skip loop
+Loop: lu 0,4(4) # load next higher limb
+ stu 7,4(9) # store previous result during read latency
+ sleq 7,0,8 # compute result limb
+ bdn Loop # loop back until CTR is zero
+Lend: stu 7,4(9) # store 2:nd most significant limb
+Lend2: sre 7,0,6 # compute most significant limb
+ st 7,4(9) # store it" \
+ br
diff --git a/rts/gmp/mpn/power/sdiv.s b/rts/gmp/mpn/power/sdiv.s
new file mode 100644
index 0000000000..81da622fbc
--- /dev/null
+++ b/rts/gmp/mpn/power/sdiv.s
@@ -0,0 +1,34 @@
+# Copyright (C) 1999 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+ .toc
+ .globl __sdiv_qrnnd
+ .globl .__sdiv_qrnnd
+ .csect __sdiv_qrnnd[DS]
+__sdiv_qrnnd:
+ .long .__sdiv_qrnnd, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.__sdiv_qrnnd:
+ mtmq 5
+ div 0,4,6
+ mfmq 9
+ st 9,0(3)
+ mr 3,0
+ br
diff --git a/rts/gmp/mpn/power/sub_n.s b/rts/gmp/mpn/power/sub_n.s
new file mode 100644
index 0000000000..aa09cf5bc1
--- /dev/null
+++ b/rts/gmp/mpn/power/sub_n.s
@@ -0,0 +1,80 @@
+# IBM POWER __gmpn_sub_n -- Subtract two limb vectors of equal, non-zero length.
+
+# Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software Foundation,
+# Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# s2_ptr r5
+# size r6
+
+ .toc
+ .globl __gmpn_sub_n
+ .globl .__gmpn_sub_n
+ .csect __gmpn_sub_n[DS]
+__gmpn_sub_n:
+ .long .__gmpn_sub_n, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.__gmpn_sub_n:
+ andil. 10,6,1 # odd or even number of limbs?
+ l 8,0(4) # load least significant s1 limb
+ l 0,0(5) # load least significant s2 limb
+ cal 3,-4(3) # offset res_ptr, it's updated before it's used
+ sri 10,6,1 # count for unrolled loop
+ sf 7,0,8 # subtract least significant limbs, set cy
+ mtctr 10 # copy count into CTR
+ beq 0,Leven # branch if even # of limbs (# of limbs >= 2)
+
+# We have an odd # of limbs. Add the first limbs separately.
+ cmpi 1,10,0 # is count for unrolled loop zero?
+ bc 4,6,L1 # bne cr1,L1 (misassembled by gas)
+ st 7,4(3)
+ sfe 3,0,0 # load !cy into ...
+ sfi 3,3,0 # ... return value register
+ br # return
+
+# We added least significant limbs. Now reload the next limbs to enter loop.
+L1: lu 8,4(4) # load s1 limb and update s1_ptr
+ lu 0,4(5) # load s2 limb and update s2_ptr
+ stu 7,4(3)
+ sfe 7,0,8 # subtract limbs, set cy
+Leven: lu 9,4(4) # load s1 limb and update s1_ptr
+ lu 10,4(5) # load s2 limb and update s2_ptr
+ bdz Lend # If done, skip loop
+
+Loop: lu 8,4(4) # load s1 limb and update s1_ptr
+ lu 0,4(5) # load s2 limb and update s2_ptr
+ sfe 11,10,9 # subtract previous limbs with cy, set cy
+ stu 7,4(3) #
+ lu 9,4(4) # load s1 limb and update s1_ptr
+ lu 10,4(5) # load s2 limb and update s2_ptr
+ sfe 7,0,8 # subtract previous limbs with cy, set cy
+ stu 11,4(3) #
+ bdn Loop # decrement CTR and loop back
+
+Lend: sfe 11,10,9 # subtract limbs with cy, set cy
+ st 7,4(3) #
+ st 11,8(3) #
+ sfe 3,0,0 # load !cy into ...
+ sfi 3,3,0 # ... return value register
+ br
diff --git a/rts/gmp/mpn/power/submul_1.s b/rts/gmp/mpn/power/submul_1.s
new file mode 100644
index 0000000000..bc01b7c95d
--- /dev/null
+++ b/rts/gmp/mpn/power/submul_1.s
@@ -0,0 +1,127 @@
+# IBM POWER __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
+# the result from a second limb vector.
+
+# Copyright (C) 1992, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# s2_limb r6
+
+# The POWER architecture has no unsigned 32x32->64 bit multiplication
+# instruction. To obtain that operation, we have to use the 32x32->64 signed
+# multiplication instruction, and add the appropriate compensation to the high
+# limb of the result. We add the multiplicand if the multiplier has its most
+# significant bit set, and we add the multiplier if the multiplicand has its
+# most significant bit set. We need to preserve the carry flag between each
+# iteration, so we have to compute the compensation carefully (the natural,
+# srai+and doesn't work). Since the POWER architecture has a branch unit we
+# can branch in zero cycles, so that's how we perform the additions.
+
+ .toc
+ .globl __gmpn_submul_1
+ .globl .__gmpn_submul_1
+ .csect __gmpn_submul_1[DS]
+__gmpn_submul_1:
+ .long .__gmpn_submul_1, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.__gmpn_submul_1:
+
+ cal 3,-4(3)
+ l 0,0(4)
+ cmpi 0,6,0
+ mtctr 5
+ mul 9,0,6
+ srai 7,0,31
+ and 7,7,6
+ mfmq 11
+ cax 9,9,7
+ l 7,4(3)
+ sf 8,11,7 # add res_limb
+ a 11,8,11 # invert cy (r11 is junk)
+ blt Lneg
+Lpos: bdz Lend
+
+Lploop: lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 10,0,6
+ mfmq 0
+ ae 11,0,9 # low limb + old_cy_limb + old cy
+ l 7,4(3)
+ aze 10,10 # propagate cy to new cy_limb
+ sf 8,11,7 # add res_limb
+ a 11,8,11 # invert cy (r11 is junk)
+ bge Lp0
+ cax 10,10,6 # adjust high limb for negative limb from s1
+Lp0: bdz Lend0
+ lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 9,0,6
+ mfmq 0
+ ae 11,0,10
+ l 7,4(3)
+ aze 9,9
+ sf 8,11,7
+ a 11,8,11 # invert cy (r11 is junk)
+ bge Lp1
+ cax 9,9,6 # adjust high limb for negative limb from s1
+Lp1: bdn Lploop
+
+ b Lend
+
+Lneg: cax 9,9,0
+ bdz Lend
+Lnloop: lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 10,0,6
+ mfmq 7
+ ae 11,7,9
+ l 7,4(3)
+ ae 10,10,0 # propagate cy to new cy_limb
+ sf 8,11,7 # add res_limb
+ a 11,8,11 # invert cy (r11 is junk)
+ bge Ln0
+ cax 10,10,6 # adjust high limb for negative limb from s1
+Ln0: bdz Lend0
+ lu 0,4(4)
+ stu 8,4(3)
+ cmpi 0,0,0
+ mul 9,0,6
+ mfmq 7
+ ae 11,7,10
+ l 7,4(3)
+ ae 9,9,0 # propagate cy to new cy_limb
+ sf 8,11,7 # add res_limb
+ a 11,8,11 # invert cy (r11 is junk)
+ bge Ln1
+ cax 9,9,6 # adjust high limb for negative limb from s1
+Ln1: bdn Lnloop
+ b Lend
+
+Lend0: cal 9,0(10)
+Lend: st 8,4(3)
+ aze 3,9
+ br
diff --git a/rts/gmp/mpn/power/umul.s b/rts/gmp/mpn/power/umul.s
new file mode 100644
index 0000000000..8c77496380
--- /dev/null
+++ b/rts/gmp/mpn/power/umul.s
@@ -0,0 +1,38 @@
+# Copyright (C) 1999 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+ .toc
+ .globl __umul_ppmm
+ .globl .__umul_ppmm
+ .csect __umul_ppmm[DS]
+__umul_ppmm:
+ .long .__umul_ppmm, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.__umul_ppmm:
+ mul 9,4,5
+ srai 0,4,31
+ and 0,0,5
+ srai 5,5,31
+ and 5,5,4
+ cax 0,0,5
+ mfmq 11
+ st 11,0(3)
+ cax 3,9,0
+ br
diff --git a/rts/gmp/mpn/powerpc32/add_n.asm b/rts/gmp/mpn/powerpc32/add_n.asm
new file mode 100644
index 0000000000..81ed04b162
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/add_n.asm
@@ -0,0 +1,61 @@
+dnl PowerPC-32 mpn_add_n -- Add two limb vectors of the same length > 0 and
+dnl store sum in a third limb vector.
+
+dnl Copyright (C) 1995, 1997, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl s2_ptr r5
+dnl size r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ mtctr r6 C copy size into CTR
+ addic r0,r0,0 C clear cy
+ lwz r8,0(r4) C load least significant s1 limb
+ lwz r0,0(r5) C load least significant s2 limb
+ addi r3,r3,-4 C offset res_ptr, it's updated before it's used
+ bdz .Lend C If done, skip loop
+.Loop: lwz r9,4(r4) C load s1 limb
+ lwz r10,4(r5) C load s2 limb
+ adde r7,r0,r8 C add limbs with cy, set cy
+ stw r7,4(r3) C store result limb
+ bdz .Lexit C decrement CTR and exit if done
+ lwzu r8,8(r4) C load s1 limb and update s1_ptr
+ lwzu r0,8(r5) C load s2 limb and update s2_ptr
+ adde r7,r10,r9 C add limbs with cy, set cy
+ stwu r7,8(r3) C store result limb and update res_ptr
+ bdnz .Loop C decrement CTR and loop back
+
+.Lend: adde r7,r0,r8
+ stw r7,4(r3) C store ultimate result limb
+ li r3,0 C load cy into ...
+ addze r3,r3 C ... return value register
+ blr
+.Lexit: adde r7,r10,r9
+ stw r7,8(r3)
+ li r3,0 C load cy into ...
+ addze r3,r3 C ... return value register
+ blr
+EPILOGUE(mpn_add_n)
diff --git a/rts/gmp/mpn/powerpc32/addmul_1.asm b/rts/gmp/mpn/powerpc32/addmul_1.asm
new file mode 100644
index 0000000000..3ef75b1532
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/addmul_1.asm
@@ -0,0 +1,124 @@
+dnl PowerPC-32 mpn_addmul_1 -- Multiply a limb vector with a limb and add
+dnl the result to a second limb vector.
+
+dnl Copyright (C) 1995, 1997, 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl size r5
+dnl s2_limb r6
+
+dnl This is optimized for the PPC604. It has not been tested on PPC601, PPC603
+dnl or PPC750 since I don't have access to any such machines.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ cmpi cr0,r5,9 C more than 9 limbs?
+ bgt cr0,.Lbig C branch if more than 9 limbs
+
+ mtctr r5
+ lwz r0,0(r4)
+ mullw r7,r0,r6
+ mulhwu r10,r0,r6
+ lwz r9,0(r3)
+ addc r8,r7,r9
+ addi r3,r3,-4
+ bdz .Lend
+.Lloop:
+ lwzu r0,4(r4)
+ stwu r8,4(r3)
+ mullw r8,r0,r6
+ adde r7,r8,r10
+ mulhwu r10,r0,r6
+ lwz r9,4(r3)
+ addze r10,r10
+ addc r8,r7,r9
+ bdnz .Lloop
+.Lend: stw r8,4(r3)
+ addze r3,r10
+ blr
+
+.Lbig: stmw r30,-32(r1)
+ addi r5,r5,-1
+ srwi r0,r5,2
+ mtctr r0
+
+ lwz r7,0(r4)
+ mullw r8,r7,r6
+ mulhwu r0,r7,r6
+ lwz r7,0(r3)
+ addc r8,r8,r7
+ stw r8,0(r3)
+
+.LloopU:
+ lwz r7,4(r4)
+ lwz r12,8(r4)
+ lwz r30,12(r4)
+ lwzu r31,16(r4)
+ mullw r8,r7,r6
+ mullw r9,r12,r6
+ mullw r10,r30,r6
+ mullw r11,r31,r6
+ adde r8,r8,r0 C add cy_limb
+ mulhwu r0,r7,r6
+ lwz r7,4(r3)
+ adde r9,r9,r0
+ mulhwu r0,r12,r6
+ lwz r12,8(r3)
+ adde r10,r10,r0
+ mulhwu r0,r30,r6
+ lwz r30,12(r3)
+ adde r11,r11,r0
+ mulhwu r0,r31,r6
+ lwz r31,16(r3)
+ addze r0,r0 C new cy_limb
+ addc r8,r8,r7
+ stw r8,4(r3)
+ adde r9,r9,r12
+ stw r9,8(r3)
+ adde r10,r10,r30
+ stw r10,12(r3)
+ adde r11,r11,r31
+ stwu r11,16(r3)
+ bdnz .LloopU
+
+ andi. r31,r5,3
+ mtctr r31
+ beq cr0,.Lendx
+
+.LloopE:
+ lwzu r7,4(r4)
+ mullw r8,r7,r6
+ adde r8,r8,r0 C add cy_limb
+ mulhwu r0,r7,r6
+ lwz r7,4(r3)
+ addze r0,r0 C new cy_limb
+ addc r8,r8,r7
+ stwu r8,4(r3)
+ bdnz .LloopE
+.Lendx:
+ addze r3,r0
+ lmw r30,-32(r1)
+ blr
+EPILOGUE(mpn_addmul_1)
diff --git a/rts/gmp/mpn/powerpc32/aix.m4 b/rts/gmp/mpn/powerpc32/aix.m4
new file mode 100644
index 0000000000..2bd8425817
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/aix.m4
@@ -0,0 +1,39 @@
+divert(-1)
+dnl m4 macros for AIX 32-bit assembly.
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+define(`ASM_START',
+ `.toc')
+
+define(`PROLOGUE',
+ `
+ .globl $1
+ .globl .$1
+ .csect $1[DS],2
+$1:
+ .long .$1, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.$1:')
+
+define(`EPILOGUE', `')
+
+divert
diff --git a/rts/gmp/mpn/powerpc32/gmp-mparam.h b/rts/gmp/mpn/powerpc32/gmp-mparam.h
new file mode 100644
index 0000000000..b283185789
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/gmp-mparam.h
@@ -0,0 +1,66 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* These values are for the 604. Presumably, these should be considerably
+ different for the 603 and 750 that have much slower multiply
+ instructions. */
+
+/* Generated by tuneup.c, 2000-05-26. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 26 /* tuneup says 20 */
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 228
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 46 /* tuneup says 44 */
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 262
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 52
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 86
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 23
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 7
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 53
+#endif
diff --git a/rts/gmp/mpn/powerpc32/lshift.asm b/rts/gmp/mpn/powerpc32/lshift.asm
new file mode 100644
index 0000000000..73a85430ab
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/lshift.asm
@@ -0,0 +1,145 @@
+dnl PowerPC-32 mpn_lshift -- Shift a number left.
+
+dnl Copyright (C) 1995, 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl size r5
+dnl cnt r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ cmpi cr0,r5,12 C more than 12 limbs?
+ slwi r0,r5,2
+ add r4,r4,r0 C make r4 point at end of s1
+ add r7,r3,r0 C make r7 point at end of res
+ bgt .LBIG C branch if more than 12 limbs
+
+ mtctr r5 C copy size into CTR
+ subfic r8,r6,32
+ lwzu r11,-4(r4) C load first s1 limb
+ srw r3,r11,r8 C compute function return value
+ bdz .Lend1
+
+.Loop: lwzu r10,-4(r4)
+ slw r9,r11,r6
+ srw r12,r10,r8
+ or r9,r9,r12
+ stwu r9,-4(r7)
+ bdz .Lend2
+ lwzu r11,-4(r4)
+ slw r9,r10,r6
+ srw r12,r11,r8
+ or r9,r9,r12
+ stwu r9,-4(r7)
+ bdnz .Loop
+
+.Lend1: slw r0,r11,r6
+ stw r0,-4(r7)
+ blr
+.Lend2: slw r0,r10,r6
+ stw r0,-4(r7)
+ blr
+
+.LBIG:
+ stmw r24,-32(r1) C save registers we are supposed to preserve
+ lwzu r9,-4(r4)
+ subfic r8,r6,32
+ srw r3,r9,r8 C compute function return value
+ slw r0,r9,r6
+ addi r5,r5,-1
+
+ andi. r10,r5,3 C count for spill loop
+ beq .Le
+ mtctr r10
+ lwzu r28,-4(r4)
+ bdz .Lxe0
+
+.Loop0: slw r12,r28,r6
+ srw r24,r28,r8
+ lwzu r28,-4(r4)
+ or r24,r0,r24
+ stwu r24,-4(r7)
+ mr r0,r12
+ bdnz .Loop0 C taken at most once!
+
+.Lxe0: slw r12,r28,r6
+ srw r24,r28,r8
+ or r24,r0,r24
+ stwu r24,-4(r7)
+ mr r0,r12
+
+.Le: srwi r5,r5,2 C count for unrolled loop
+ addi r5,r5,-1
+ mtctr r5
+ lwz r28,-4(r4)
+ lwz r29,-8(r4)
+ lwz r30,-12(r4)
+ lwzu r31,-16(r4)
+
+.LoopU: slw r9,r28,r6
+ srw r24,r28,r8
+ lwz r28,-4(r4)
+ slw r10,r29,r6
+ srw r25,r29,r8
+ lwz r29,-8(r4)
+ slw r11,r30,r6
+ srw r26,r30,r8
+ lwz r30,-12(r4)
+ slw r12,r31,r6
+ srw r27,r31,r8
+ lwzu r31,-16(r4)
+ or r24,r0,r24
+ stw r24,-4(r7)
+ or r25,r9,r25
+ stw r25,-8(r7)
+ or r26,r10,r26
+ stw r26,-12(r7)
+ or r27,r11,r27
+ stwu r27,-16(r7)
+ mr r0,r12
+ bdnz .LoopU
+
+ slw r9,r28,r6
+ srw r24,r28,r8
+ slw r10,r29,r6
+ srw r25,r29,r8
+ slw r11,r30,r6
+ srw r26,r30,r8
+ slw r12,r31,r6
+ srw r27,r31,r8
+ or r24,r0,r24
+ stw r24,-4(r7)
+ or r25,r9,r25
+ stw r25,-8(r7)
+ or r26,r10,r26
+ stw r26,-12(r7)
+ or r27,r11,r27
+ stwu r27,-16(r7)
+ mr r0,r12
+
+ stw r0,-4(r7)
+ lmw r24,-32(r1) C restore registers
+ blr
+EPILOGUE(mpn_lshift)
diff --git a/rts/gmp/mpn/powerpc32/mul_1.asm b/rts/gmp/mpn/powerpc32/mul_1.asm
new file mode 100644
index 0000000000..ec878b54d5
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/mul_1.asm
@@ -0,0 +1,86 @@
+dnl PowerPC-32 mpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl the result in a second limb vector.
+
+dnl Copyright (C) 1995, 1997, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl size r5
+dnl s2_limb r6
+
+dnl This is optimized for the PPC604 but it runs decently even on PPC601. It
+dnl has not been tested on a PPC603 since I don't have access to any such
+dnl machines.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ mtctr r5
+ addi r3,r3,-4 C adjust res_ptr, it's offset before it's used
+ li r12,0 C clear upper product reg
+ addic r0,r0,0 C clear cy
+C Start software pipeline
+ lwz r8,0(r4)
+ bdz .Lend3
+ stmw r30,-8(r1) C save registers we are supposed to preserve
+ lwzu r9,4(r4)
+ mullw r11,r8,r6
+ mulhwu r0,r8,r6
+ bdz .Lend1
+C Software pipelined main loop
+.Loop: lwz r8,4(r4)
+ mullw r10,r9,r6
+ adde r30,r11,r12
+ mulhwu r12,r9,r6
+ stw r30,4(r3)
+ bdz .Lend2
+ lwzu r9,8(r4)
+ mullw r11,r8,r6
+ adde r31,r10,r0
+ mulhwu r0,r8,r6
+ stwu r31,8(r3)
+ bdnz .Loop
+C Finish software pipeline
+.Lend1: mullw r10,r9,r6
+ adde r30,r11,r12
+ mulhwu r12,r9,r6
+ stw r30,4(r3)
+ adde r31,r10,r0
+ stwu r31,8(r3)
+ addze r3,r12
+ lmw r30,-8(r1) C restore registers from stack
+ blr
+.Lend2: mullw r11,r8,r6
+ adde r31,r10,r0
+ mulhwu r0,r8,r6
+ stwu r31,8(r3)
+ adde r30,r11,r12
+ stw r30,4(r3)
+ addze r3,r0
+ lmw r30,-8(r1) C restore registers from stack
+ blr
+.Lend3: mullw r11,r8,r6
+ stw r11,4(r3)
+ mulhwu r3,r8,r6
+ blr
+EPILOGUE(mpn_mul_1)
diff --git a/rts/gmp/mpn/powerpc32/regmap.m4 b/rts/gmp/mpn/powerpc32/regmap.m4
new file mode 100644
index 0000000000..978f18902a
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/regmap.m4
@@ -0,0 +1,34 @@
+divert(-1)
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+dnl Map register names r0, r1, etc, to just `0', `1', etc.
+dnl This is needed on all systems but NeXT, Rhapsody, and MacOS-X
+forloop(i,0,31,
+`define(`r'i,i)'
+)
+
+dnl Likewise for cr0, cr1, etc.
+forloop(i,0,7,
+`define(`cr'i,i)'
+)
+
+divert
diff --git a/rts/gmp/mpn/powerpc32/rshift.asm b/rts/gmp/mpn/powerpc32/rshift.asm
new file mode 100644
index 0000000000..a09ba04938
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/rshift.asm
@@ -0,0 +1,60 @@
+dnl PowerPC-32 mpn_rshift -- Shift a number right.
+
+dnl Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl size r5
+dnl cnt r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ mtctr r5 C copy size into CTR
+ addi r7,r3,-4 C move adjusted res_ptr to free return reg
+ subfic r8,r6,32
+ lwz r11,0(r4) C load first s1 limb
+ slw r3,r11,r8 C compute function return value
+ bdz .Lend1
+
+.Loop: lwzu r10,4(r4)
+ srw r9,r11,r6
+ slw r12,r10,r8
+ or r9,r9,r12
+ stwu r9,4(r7)
+ bdz .Lend2
+ lwzu r11,4(r4)
+ srw r9,r10,r6
+ slw r12,r11,r8
+ or r9,r9,r12
+ stwu r9,4(r7)
+ bdnz .Loop
+
+.Lend1: srw r0,r11,r6
+ stw r0,4(r7)
+ blr
+
+.Lend2: srw r0,r10,r6
+ stw r0,4(r7)
+ blr
+EPILOGUE(mpn_rshift)
diff --git a/rts/gmp/mpn/powerpc32/sub_n.asm b/rts/gmp/mpn/powerpc32/sub_n.asm
new file mode 100644
index 0000000000..b04b4192ef
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/sub_n.asm
@@ -0,0 +1,61 @@
+dnl PowerPC-32 mpn_sub_n -- Subtract two limb vectors of the same length > 0
+dnl and store difference in a third limb vector.
+
+dnl Copyright (C) 1995, 1997, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl s2_ptr r5
+dnl size r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+ mtctr r6 C copy size into CTR
+ addic r0,r6,-1 C set cy
+ lwz r8,0(r4) C load least significant s1 limb
+ lwz r0,0(r5) C load least significant s2 limb
+ addi r3,r3,-4 C offset res_ptr, it's updated before it's used
+ bdz .Lend C If done, skip loop
+.Loop: lwz r9,4(r4) C load s1 limb
+ lwz r10,4(r5) C load s2 limb
+ subfe r7,r0,r8 C subtract limbs with cy, set cy
+ stw r7,4(r3) C store result limb
+ bdz .Lexit C decrement CTR and exit if done
+ lwzu r8,8(r4) C load s1 limb and update s1_ptr
+ lwzu r0,8(r5) C load s2 limb and update s2_ptr
+ subfe r7,r10,r9 C subtract limbs with cy, set cy
+ stwu r7,8(r3) C store result limb and update res_ptr
+ bdnz .Loop C decrement CTR and loop back
+
+.Lend: subfe r7,r0,r8
+ stw r7,4(r3) C store ultimate result limb
+ subfe r3,r0,r0 C load !cy into ...
+ subfic r3,r3,0 C ... return value register
+ blr
+.Lexit: subfe r7,r10,r9
+ stw r7,8(r3)
+ subfe r3,r0,r0 C load !cy into ...
+ subfic r3,r3,0 C ... return value register
+ blr
+EPILOGUE(mpn_sub_n)
diff --git a/rts/gmp/mpn/powerpc32/submul_1.asm b/rts/gmp/mpn/powerpc32/submul_1.asm
new file mode 100644
index 0000000000..a129e9f9ea
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/submul_1.asm
@@ -0,0 +1,130 @@
+dnl PowerPC-32 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+dnl the result from a second limb vector.
+
+dnl Copyright (C) 1995, 1997, 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+dnl INPUT PARAMETERS
+dnl res_ptr r3
+dnl s1_ptr r4
+dnl size r5
+dnl s2_limb r6
+
+dnl This is optimized for the PPC604. It has not been tested on PPC601, PPC603
+dnl or PPC750 since I don't have access to any such machines.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ cmpi cr0,r5,9 C more than 9 limbs?
+ bgt cr0,.Lbig C branch if more than 9 limbs
+
+ mtctr r5
+ lwz r0,0(r4)
+ mullw r7,r0,r6
+ mulhwu r10,r0,r6
+ lwz r9,0(r3)
+ subfc r8,r7,r9
+ addc r7,r7,r8 C invert cy (r7 is junk)
+ addi r3,r3,-4
+ bdz .Lend
+.Lloop:
+ lwzu r0,4(r4)
+ stwu r8,4(r3)
+ mullw r8,r0,r6
+ adde r7,r8,r10
+ mulhwu r10,r0,r6
+ lwz r9,4(r3)
+ addze r10,r10
+ subfc r8,r7,r9
+ addc r7,r7,r8 C invert cy (r7 is junk)
+ bdnz .Lloop
+.Lend: stw r8,4(r3)
+ addze r3,r10
+ blr
+
+.Lbig: stmw r30,-32(r1)
+ addi r5,r5,-1
+ srwi r0,r5,2
+ mtctr r0
+
+ lwz r7,0(r4)
+ mullw r8,r7,r6
+ mulhwu r0,r7,r6
+ lwz r7,0(r3)
+ subfc r7,r8,r7
+ addc r8,r8,r7
+ stw r7,0(r3)
+
+.LloopU:
+ lwz r7,4(r4)
+ lwz r12,8(r4)
+ lwz r30,12(r4)
+ lwzu r31,16(r4)
+ mullw r8,r7,r6
+ mullw r9,r12,r6
+ mullw r10,r30,r6
+ mullw r11,r31,r6
+ adde r8,r8,r0 C add cy_limb
+ mulhwu r0,r7,r6
+ lwz r7,4(r3)
+ adde r9,r9,r0
+ mulhwu r0,r12,r6
+ lwz r12,8(r3)
+ adde r10,r10,r0
+ mulhwu r0,r30,r6
+ lwz r30,12(r3)
+ adde r11,r11,r0
+ mulhwu r0,r31,r6
+ lwz r31,16(r3)
+ addze r0,r0 C new cy_limb
+ subfc r7,r8,r7
+ stw r7,4(r3)
+ subfe r12,r9,r12
+ stw r12,8(r3)
+ subfe r30,r10,r30
+ stw r30,12(r3)
+ subfe r31,r11,r31
+ stwu r31,16(r3)
+ subfe r11,r11,r11 C invert ...
+ addic r11,r11,1 C ... carry
+ bdnz .LloopU
+
+ andi. r31,r5,3
+ mtctr r31
+ beq cr0,.Lendx
+
+.LloopE:
+ lwzu r7,4(r4)
+ mullw r8,r7,r6
+ adde r8,r8,r0 C add cy_limb
+ mulhwu r0,r7,r6
+ lwz r7,4(r3)
+ addze r0,r0 C new cy_limb
+ subfc r7,r8,r7
+ addc r8,r8,r7
+ stwu r7,4(r3)
+ bdnz .LloopE
+.Lendx:
+ addze r3,r0
+ lmw r30,-32(r1)
+ blr
+EPILOGUE(mpn_submul_1)
diff --git a/rts/gmp/mpn/powerpc32/umul.asm b/rts/gmp/mpn/powerpc32/umul.asm
new file mode 100644
index 0000000000..eeaa0a4dc8
--- /dev/null
+++ b/rts/gmp/mpn/powerpc32/umul.asm
@@ -0,0 +1,32 @@
+dnl PowerPC-32 umul_ppmm -- support for longlong.h
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 2.1 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+ mullw 0,4,5
+ mulhwu 9,4,5
+ stw 0,0(3)
+ mr 3,9
+ blr
+EPILOGUE(mpn_umul_ppmm)
diff --git a/rts/gmp/mpn/powerpc64/README b/rts/gmp/mpn/powerpc64/README
new file mode 100644
index 0000000000..c779276917
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/README
@@ -0,0 +1,36 @@
+PPC630 (aka Power3) pipeline information:
+
+Decoding is 4-way and issue is 8-way with some out-of-order capability.
+LS1 - ld/st unit 1
+LS2 - ld/st unit 2
+FXU1 - integer unit 1, handles any simple integer instructions
+FXU2 - integer unit 2, handles any simple integer instructions
+FXU3 - integer unit 3, handles integer multiply and divide
+FPU1 - floating-point unit 1
+FPU2 - floating-point unit 2
+
+Memory: Any two memory operations can issue, but memory subsystem
+ can sustain just one store per cycle.
+Simple integer: 2 operations (such as add, rl*)
+Integer multiply: 1 operation every 9th cycle worst case; exact timing depends
+ on 2nd operand most significant bit position (10 bits per
+ cycle). Multiply unit is not pipelined, only one multiply
+ operation in progress is allowed.
+Integer divide: ?
+Floating-point: Any plain 2 arithmetic instructions (such as fmul, fadd, fmadd)
+ Latency = 4.
+Floating-point divide:
+ ?
+Floating-point square root:
+ ?
+
+Best possible times for the main loops:
+shift: 1.5 cycles limited by integer unit contention.
+ With 63 special loops, one for each shift count, we could
+ reduce the needed integer instructions to 2, which would
+ reduce the best possible time to 1 cycle.
+add/sub: 1.5 cycles, limited by ld/st unit contention.
+mul: 18 cycles (average) unless floating-point operations are used,
+ but that would only help for multiplies of perhaps 10 and more
+ limbs.
+addmul/submul:Same situation as for mul.
diff --git a/rts/gmp/mpn/powerpc64/add_n.asm b/rts/gmp/mpn/powerpc64/add_n.asm
new file mode 100644
index 0000000000..c3325376dc
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/add_n.asm
@@ -0,0 +1,61 @@
+# PowerPC-64 mpn_add_n -- Add two limb vectors of the same length > 0 and
+# store sum in a third limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# s2_ptr r5
+# size r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ mtctr r6 # copy size into CTR
+ addic r0,r0,0 # clear cy
+ ld r8,0(r4) # load least significant s1 limb
+ ld r0,0(r5) # load least significant s2 limb
+ addi r3,r3,-8 # offset res_ptr, it's updated before it's used
+ bdz .Lend # If done, skip loop
+.Loop: ld r9,8(r4) # load s1 limb
+ ld r10,8(r5) # load s2 limb
+ adde r7,r0,r8 # add limbs with cy, set cy
+ std r7,8(r3) # store result limb
+ bdz .Lexit # decrement CTR and exit if done
+ ldu r8,16(r4) # load s1 limb and update s1_ptr
+ ldu r0,16(r5) # load s2 limb and update s2_ptr
+ adde r7,r10,r9 # add limbs with cy, set cy
+ stdu r7,16(r3) # store result limb and update res_ptr
+ bdnz .Loop # decrement CTR and loop back
+
+.Lend: adde r7,r0,r8
+ std r7,8(r3) # store ultimate result limb
+ li r3,0 # load cy into ...
+ addze r3,r3 # ... return value register
+ blr
+.Lexit: adde r7,r10,r9
+ std r7,16(r3)
+ li r3,0 # load cy into ...
+ addze r3,r3 # ... return value register
+ blr
+EPILOGUE(mpn_add_n)
diff --git a/rts/gmp/mpn/powerpc64/addmul_1.asm b/rts/gmp/mpn/powerpc64/addmul_1.asm
new file mode 100644
index 0000000000..81774482fe
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/addmul_1.asm
@@ -0,0 +1,52 @@
+# PowerPC-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# s2_limb r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ mtctr 5
+ li 9,0 # cy_limb = 0
+ addic 0,0,0
+ cal 3,-8(3)
+ cal 4,-8(4)
+.Loop:
+ ldu 0,8(4)
+ ld 10,8(3)
+ mulld 7,0,6
+ adde 7,7,9
+ mulhdu 9,0,6
+ addze 9,9
+ addc 7,7,10
+ stdu 7,8(3)
+ bdnz .Loop
+
+ addze 3,9
+ blr
+EPILOGUE(mpn_addmul_1)
diff --git a/rts/gmp/mpn/powerpc64/addsub_n.asm b/rts/gmp/mpn/powerpc64/addsub_n.asm
new file mode 100644
index 0000000000..4ed40d71ae
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/addsub_n.asm
@@ -0,0 +1,107 @@
+# PowerPC-64 mpn_addsub_n -- Simultaneous add and sub.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# s2_ptr r5
+# size r6
+
+include(`asm-syntax.m4')
+
+define(SAVE_BORROW_RESTORE_CARRY,
+ `sldi $1,$1,63
+ adde $1,$1,$1')
+define(SAVE_CARRY_RESTORE_BORROW,
+ `sldi $1,$1,63
+ adde $1,$1,$1')
+
+# 19991117
+
+# This is just crafted for testing some ideas, and verifying that we can make
+# it run fast. It runs at 2.55 cycles/limb on the 630, which is very good.
+# We should play a little with the schedule. No time has been spent on that.
+
+# To finish this, the loop warm up and cool down code needs to be written,
+# and the result need to be tested. Also, the proper calling sequence should
+# be used.
+
+# r1p r2p s1p s2p n
+# Use reg r0, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12
+
+ASM_START()
+PROLOGUE(mpn_addsub_n)
+ std r14,-64(1)
+ std r15,-56(1)
+ std r16,-48(1)
+ std r17,-40(1)
+ std r18,-32(1)
+ std r19,-24(1)
+
+ srdi r7,r7,2
+ mtctr r7 # copy size into CTR
+ addic r0,r0,0 # clear cy
+ addi r3,r3,-8 # offset res_ptr, it's updated before it's used
+ addi r4,r4,-8 # offset res_ptr, it's updated before it's used
+
+.Loop:
+ adde r12,r8,r9
+ std r12,8(r3)
+ adde r12,r10,r11
+ std r12,16(r3)
+
+ SAVE_CARRY_RESTORE_BORROW(r0)
+
+ subfe r12,r8,r9
+ std r12,8(r4)
+ ld r8,8(r5) # s1 L 1
+ ld r9,8(r6) # s2 L 1
+ subfe r12,r10,r11
+ std r12,16(r4)
+ ld r10,16(r5) # s1 L 2
+ ld r11,16(r6) # s2 L 2
+# pair -------------------------
+ subfe r12,r14,r15
+ std r12,24(r4)
+ subfe r12,r16,r17
+ stdu r12,32(r4)
+
+ SAVE_BORROW_RESTORE_CARRY(r0)
+
+ adde r12,r14,r15
+ std r12,24(r3)
+ ld r14,24(r5) # s1 L 3
+ ld r15,24(r6) # s2 L 3
+ adde r12,r16,r17
+ stdu r12,32(r3)
+ ldu r16,32(r5) # s1 L 4
+ ldu r17,32(r6) # s2 L 4
+ bdnz .Loop
+
+ ld r14,-64(1)
+ ld r15,-56(1)
+ ld r16,-48(1)
+ ld r17,-40(1)
+ ld r18,-32(1)
+ ld r19,-24(1)
+ blr
+EPILOGUE(mpn_addsub_n)
diff --git a/rts/gmp/mpn/powerpc64/aix.m4 b/rts/gmp/mpn/powerpc64/aix.m4
new file mode 100644
index 0000000000..aee9f1f97a
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/aix.m4
@@ -0,0 +1,40 @@
+divert(-1)
+dnl m4 macros for AIX 64-bit assembly.
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+define(`ASM_START',
+ `.machine "ppc64"
+ .toc')
+
+define(`PROLOGUE',
+ `
+ .globl $1
+ .globl .$1
+ .csect $1[DS],3
+$1:
+ .llong .$1, TOC[tc0], 0
+ .csect .text[PR]
+ .align 2
+.$1:')
+
+define(`EPILOGUE', `')
+
+divert
diff --git a/rts/gmp/mpn/powerpc64/copyd.asm b/rts/gmp/mpn/powerpc64/copyd.asm
new file mode 100644
index 0000000000..d06e8c25fd
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/copyd.asm
@@ -0,0 +1,45 @@
+# PowerPC-64 mpn_copyd -- Copy a limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# rptr r3
+# sptr r4
+# n r5
+
+include(`../config.m4')
+
+# Unrolling this analogous to sparc64/copyi.s doesn't help for any
+# operand sizes.
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+ cmpdi cr0,r5,0
+ mtctr r5
+ sldi r5,r5,3
+ add r4,r4,r5
+ add r3,r3,r5
+ beq cr0,.Lend
+.Loop: ldu r0,-8(r4)
+ stdu r0,-8(r3)
+ bdnz .Loop
+.Lend: blr
+EPILOGUE(mpn_copyd)
diff --git a/rts/gmp/mpn/powerpc64/copyi.asm b/rts/gmp/mpn/powerpc64/copyi.asm
new file mode 100644
index 0000000000..a1bedc4c5b
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/copyi.asm
@@ -0,0 +1,44 @@
+# PowerPC-64 mpn_copyi -- Copy a limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# rptr r3
+# sptr r4
+# n r5
+
+include(`../config.m4')
+
+# Unrolling this analogous to sparc64/copyi.s doesn't help for any
+# operand sizes.
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+ cmpdi cr0,r5,0
+ mtctr r5
+ addi r4,r4,-8
+ addi r3,r3,-8
+ beq cr0,.Lend
+.Loop: ldu r0,8(r4)
+ stdu r0,8(r3)
+ bdnz .Loop
+.Lend: blr
+EPILOGUE(mpn_copyi)
diff --git a/rts/gmp/mpn/powerpc64/gmp-mparam.h b/rts/gmp/mpn/powerpc64/gmp-mparam.h
new file mode 100644
index 0000000000..6fefb960cd
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/gmp-mparam.h
@@ -0,0 +1,62 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* Generated by tuneup.c, 2000-07-16. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 10
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 57
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 16
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 89
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 28
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 216
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 14
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 6
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 163
+#endif
diff --git a/rts/gmp/mpn/powerpc64/lshift.asm b/rts/gmp/mpn/powerpc64/lshift.asm
new file mode 100644
index 0000000000..cef3a81fdd
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/lshift.asm
@@ -0,0 +1,159 @@
+# PowerPC-64 mpn_lshift -- Shift a number left.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# cnt r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ cmpdi cr0,r5,20 # more than 20 limbs?
+ sldi r0,r5,3
+ add r4,r4,r0 # make r4 point at end of s1
+ add r7,r3,r0 # make r7 point at end of res
+ bgt .LBIG # branch if more than 12 limbs
+
+ mtctr r5 # copy size into CTR
+ subfic r8,r6,64
+ ldu r11,-8(r4) # load first s1 limb
+ srd r3,r11,r8 # compute function return value
+ bdz .Lend1
+
+.Loop: ldu r10,-8(r4)
+ sld r9,r11,r6
+ srd r12,r10,r8
+ or r9,r9,r12
+ stdu r9,-8(r7)
+ bdz .Lend2
+ ldu r11,-8(r4)
+ sld r9,r10,r6
+ srd r12,r11,r8
+ or r9,r9,r12
+ stdu r9,-8(r7)
+ bdnz .Loop
+
+.Lend1: sld r0,r11,r6
+ std r0,-8(r7)
+ blr
+.Lend2: sld r0,r10,r6
+ std r0,-8(r7)
+ blr
+
+.LBIG:
+ std r24,-64(1)
+ std r25,-56(1)
+ std r26,-48(1)
+ std r27,-40(1)
+ std r28,-32(1)
+ std r29,-24(1)
+ std r30,-16(1)
+ std r31,-8(1)
+ ldu r9,-8(r4)
+ subfic r8,r6,64
+ srd r3,r9,r8 # compute function return value
+ sld r0,r9,r6
+ addi r5,r5,-1
+
+ andi. r10,r5,3 # count for spill loop
+ beq .Le
+ mtctr r10
+ ldu r28,-8(r4)
+ bdz .Lxe0
+
+.Loop0: sld r12,r28,r6
+ srd r24,r28,r8
+ ldu r28,-8(r4)
+ or r24,r0,r24
+ stdu r24,-8(r7)
+ mr r0,r12
+ bdnz .Loop0 # taken at most once!
+
+.Lxe0: sld r12,r28,r6
+ srd r24,r28,r8
+ or r24,r0,r24
+ stdu r24,-8(r7)
+ mr r0,r12
+
+.Le: srdi r5,r5,2 # count for unrolled loop
+ addi r5,r5,-1
+ mtctr r5
+ ld r28,-8(r4)
+ ld r29,-16(r4)
+ ld r30,-24(r4)
+ ldu r31,-32(r4)
+
+.LoopU: sld r9,r28,r6
+ srd r24,r28,r8
+ ld r28,-8(r4)
+ sld r10,r29,r6
+ srd r25,r29,r8
+ ld r29,-16(r4)
+ sld r11,r30,r6
+ srd r26,r30,r8
+ ld r30,-24(r4)
+ sld r12,r31,r6
+ srd r27,r31,r8
+ ldu r31,-32(r4)
+ or r24,r0,r24
+ std r24,-8(r7)
+ or r25,r9,r25
+ std r25,-16(r7)
+ or r26,r10,r26
+ std r26,-24(r7)
+ or r27,r11,r27
+ stdu r27,-32(r7)
+ mr r0,r12
+ bdnz .LoopU
+
+ sld r9,r28,r6
+ srd r24,r28,r8
+ sld r10,r29,r6
+ srd r25,r29,r8
+ sld r11,r30,r6
+ srd r26,r30,r8
+ sld r12,r31,r6
+ srd r27,r31,r8
+ or r24,r0,r24
+ std r24,-8(r7)
+ or r25,r9,r25
+ std r25,-16(r7)
+ or r26,r10,r26
+ std r26,-24(r7)
+ or r27,r11,r27
+ stdu r27,-32(r7)
+ mr r0,r12
+
+ std r0,-8(r7)
+ ld r24,-64(1)
+ ld r25,-56(1)
+ ld r26,-48(1)
+ ld r27,-40(1)
+ ld r28,-32(1)
+ ld r29,-24(1)
+ ld r30,-16(1)
+ ld r31,-8(1)
+ blr
+EPILOGUE(mpn_lshift)
diff --git a/rts/gmp/mpn/powerpc64/mul_1.asm b/rts/gmp/mpn/powerpc64/mul_1.asm
new file mode 100644
index 0000000000..47597283ff
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/mul_1.asm
@@ -0,0 +1,49 @@
+# PowerPC-64 mpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# s2_limb r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ mtctr 5
+ li 9,0 # cy_limb = 0
+ addic 0,0,0
+ cal 3,-8(3)
+ cal 4,-8(4)
+.Loop:
+ ldu 0,8(4)
+ mulld 7,0,6
+ adde 7,7,9
+ mulhdu 9,0,6
+ stdu 7,8(3)
+ bdnz .Loop
+
+ addze 3,9
+ blr
+EPILOGUE(mpn_mul_1)
diff --git a/rts/gmp/mpn/powerpc64/rshift.asm b/rts/gmp/mpn/powerpc64/rshift.asm
new file mode 100644
index 0000000000..88272c7fa9
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/rshift.asm
@@ -0,0 +1,60 @@
+# PowerPC-64 mpn_rshift -- Shift a number right.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# cnt r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ mtctr r5 # copy size into CTR
+ addi r7,r3,-8 # move adjusted res_ptr to free return reg
+ subfic r8,r6,64
+ ld r11,0(r4) # load first s1 limb
+ sld r3,r11,r8 # compute function return value
+ bdz .Lend1
+
+.Loop: ldu r10,8(r4)
+ srd r9,r11,r6
+ sld r12,r10,r8
+ or r9,r9,r12
+ stdu r9,8(r7)
+ bdz .Lend2
+ ldu r11,8(r4)
+ srd r9,r10,r6
+ sld r12,r11,r8
+ or r9,r9,r12
+ stdu r9,8(r7)
+ bdnz .Loop
+
+.Lend1: srd r0,r11,r6
+ std r0,8(r7)
+ blr
+
+.Lend2: srd r0,r10,r6
+ std r0,8(r7)
+ blr
+EPILOGUE(mpn_rshift)
diff --git a/rts/gmp/mpn/powerpc64/sub_n.asm b/rts/gmp/mpn/powerpc64/sub_n.asm
new file mode 100644
index 0000000000..4de3de69c7
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/sub_n.asm
@@ -0,0 +1,61 @@
+# PowerPC-64 mpn_sub_n -- Subtract two limb vectors of the same length > 0
+# and store difference in a third limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.b
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# s2_ptr r5
+# size r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+ mtctr r6 # copy size into CTR
+ addic r0,r6,-1 # set cy
+ ld r8,0(r4) # load least significant s1 limb
+ ld r0,0(r5) # load least significant s2 limb
+ addi r3,r3,-8 # offset res_ptr, it's updated before it's used
+ bdz .Lend # If done, skip loop
+.Loop: ld r9,8(r4) # load s1 limb
+ ld r10,8(r5) # load s2 limb
+ subfe r7,r0,r8 # subtract limbs with cy, set cy
+ std r7,8(r3) # store result limb
+ bdz .Lexit # decrement CTR and exit if done
+ ldu r8,16(r4) # load s1 limb and update s1_ptr
+ ldu r0,16(r5) # load s2 limb and update s2_ptr
+ subfe r7,r10,r9 # subtract limbs with cy, set cy
+ stdu r7,16(r3) # store result limb and update res_ptr
+ bdnz .Loop # decrement CTR and loop back
+
+.Lend: subfe r7,r0,r8
+ std r7,8(r3) # store ultimate result limb
+ subfe r3,r0,r0 # load !cy into ...
+ subfic r3,r3,0 # ... return value register
+ blr
+.Lexit: subfe r7,r10,r9
+ std r7,16(r3)
+ subfe r3,r0,r0 # load !cy into ...
+ subfic r3,r3,0 # ... return value register
+ blr
+EPILOGUE(mpn_sub_n)
diff --git a/rts/gmp/mpn/powerpc64/submul_1.asm b/rts/gmp/mpn/powerpc64/submul_1.asm
new file mode 100644
index 0000000000..17f6369a38
--- /dev/null
+++ b/rts/gmp/mpn/powerpc64/submul_1.asm
@@ -0,0 +1,54 @@
+# PowerPC-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+# the result from a second limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr r3
+# s1_ptr r4
+# size r5
+# s2_limb r6
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ mtctr 5
+ li 9,0 # cy_limb = 0
+ addic 0,0,0
+ cal 3,-8(3)
+ cal 4,-8(4)
+.Loop:
+ ldu 0,8(4)
+ ld 10,8(3)
+ mulld 7,0,6
+ adde 7,7,9
+ mulhdu 9,0,6
+ addze 9,9
+ subfc 7,7,10
+ stdu 7,8(3)
+ subfe 11,11,11 # invert ...
+ addic 11,11,1 # ... carry
+ bdnz .Loop
+
+ addze 3,9
+ blr
+EPILOGUE(mpn_submul_1)
diff --git a/rts/gmp/mpn/pyr/add_n.s b/rts/gmp/mpn/pyr/add_n.s
new file mode 100644
index 0000000000..e1fc535846
--- /dev/null
+++ b/rts/gmp/mpn/pyr/add_n.s
@@ -0,0 +1,76 @@
+# Pyramid __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+# sum in a third limb vector.
+
+# Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+.text
+ .align 2
+.globl ___gmpn_add_n
+___gmpn_add_n:
+ movw $-1,tr0 # representation for carry clear
+
+ movw pr3,tr2
+ andw $3,tr2
+ beq Lend0
+ subw tr2,pr3
+
+Loop0: rsubw $0,tr0 # restore carry bit from carry-save register
+
+ movw (pr1),tr1
+ addwc (pr2),tr1
+ movw tr1,(pr0)
+
+ subwb tr0,tr0
+ addw $4,pr0
+ addw $4,pr1
+ addw $4,pr2
+ addw $-1,tr2
+ bne Loop0
+
+ mtstw pr3,pr3
+ beq Lend
+Lend0:
+Loop: rsubw $0,tr0 # restore carry bit from carry-save register
+
+ movw (pr1),tr1
+ addwc (pr2),tr1
+ movw tr1,(pr0)
+
+ movw 4(pr1),tr1
+ addwc 4(pr2),tr1
+ movw tr1,4(pr0)
+
+ movw 8(pr1),tr1
+ addwc 8(pr2),tr1
+ movw tr1,8(pr0)
+
+ movw 12(pr1),tr1
+ addwc 12(pr2),tr1
+ movw tr1,12(pr0)
+
+ subwb tr0,tr0
+ addw $16,pr0
+ addw $16,pr1
+ addw $16,pr2
+ addw $-4,pr3
+ bne Loop
+Lend:
+ mnegw tr0,pr0
+ ret
diff --git a/rts/gmp/mpn/pyr/addmul_1.s b/rts/gmp/mpn/pyr/addmul_1.s
new file mode 100644
index 0000000000..65c3f8f008
--- /dev/null
+++ b/rts/gmp/mpn/pyr/addmul_1.s
@@ -0,0 +1,45 @@
+# Pyramid __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+.text
+ .align 2
+.globl ___gmpn_addmul_1
+___gmpn_addmul_1:
+ mova (pr0)[pr2*4],pr0
+ mova (pr1)[pr2*4],pr1
+ mnegw pr2,pr2
+ movw $0,tr3
+
+Loop: movw (pr1)[pr2*4],tr1
+ uemul pr3,tr0
+ addw tr3,tr1
+ movw $0,tr3
+ addwc tr0,tr3
+ movw (pr0)[pr2*0x4],tr0
+ addw tr0,tr1
+ addwc $0,tr3
+ movw tr1,(pr0)[pr2*4]
+ addw $1,pr2
+ bne Loop
+
+ movw tr3,pr0
+ ret
diff --git a/rts/gmp/mpn/pyr/mul_1.s b/rts/gmp/mpn/pyr/mul_1.s
new file mode 100644
index 0000000000..1272297c42
--- /dev/null
+++ b/rts/gmp/mpn/pyr/mul_1.s
@@ -0,0 +1,42 @@
+# Pyramid __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+.text
+ .align 2
+.globl ___gmpn_mul_1
+___gmpn_mul_1:
+ mova (pr0)[pr2*4],pr0
+ mova (pr1)[pr2*4],pr1
+ mnegw pr2,pr2
+ movw $0,tr3
+
+Loop: movw (pr1)[pr2*4],tr1
+ uemul pr3,tr0
+ addw tr3,tr1
+ movw $0,tr3
+ addwc tr0,tr3
+ movw tr1,(pr0)[pr2*4]
+ addw $1,pr2
+ bne Loop
+
+ movw tr3,pr0
+ ret
diff --git a/rts/gmp/mpn/pyr/sub_n.s b/rts/gmp/mpn/pyr/sub_n.s
new file mode 100644
index 0000000000..1fd2eb0f17
--- /dev/null
+++ b/rts/gmp/mpn/pyr/sub_n.s
@@ -0,0 +1,76 @@
+# Pyramid __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+# store difference in a third limb vector.
+
+# Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+.text
+ .align 2
+.globl ___gmpn_sub_n
+___gmpn_sub_n:
+ movw $-1,tr0 # representation for carry clear
+
+ movw pr3,tr2
+ andw $3,tr2
+ beq Lend0
+ subw tr2,pr3
+
+Loop0: rsubw $0,tr0 # restore carry bit from carry-save register
+
+ movw (pr1),tr1
+ subwb (pr2),tr1
+ movw tr1,(pr0)
+
+ subwb tr0,tr0
+ addw $4,pr0
+ addw $4,pr1
+ addw $4,pr2
+ addw $-1,tr2
+ bne Loop0
+
+ mtstw pr3,pr3
+ beq Lend
+Lend0:
+Loop: rsubw $0,tr0 # restore carry bit from carry-save register
+
+ movw (pr1),tr1
+ subwb (pr2),tr1
+ movw tr1,(pr0)
+
+ movw 4(pr1),tr1
+ subwb 4(pr2),tr1
+ movw tr1,4(pr0)
+
+ movw 8(pr1),tr1
+ subwb 8(pr2),tr1
+ movw tr1,8(pr0)
+
+ movw 12(pr1),tr1
+ subwb 12(pr2),tr1
+ movw tr1,12(pr0)
+
+ subwb tr0,tr0
+ addw $16,pr0
+ addw $16,pr1
+ addw $16,pr2
+ addw $-4,pr3
+ bne Loop
+Lend:
+ mnegw tr0,pr0
+ ret
diff --git a/rts/gmp/mpn/sh/add_n.s b/rts/gmp/mpn/sh/add_n.s
new file mode 100644
index 0000000000..df388b31a3
--- /dev/null
+++ b/rts/gmp/mpn/sh/add_n.s
@@ -0,0 +1,47 @@
+! SH __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+! sum in a third limb vector.
+
+! Copyright (C) 1995, 1997, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r4
+! s1_ptr r5
+! s2_ptr r6
+! size r7
+
+ .text
+ .align 2
+ .global ___gmpn_add_n
+___gmpn_add_n:
+ mov #0,r3 ! clear cy save reg
+
+Loop: mov.l @r5+,r1
+ mov.l @r6+,r2
+ shlr r3 ! restore cy
+ addc r2,r1
+ movt r3 ! save cy
+ mov.l r1,@r4
+ dt r7
+ bf.s Loop
+ add #4,r4
+
+ rts
+ mov r3,r0 ! return carry-out from most sign. limb
diff --git a/rts/gmp/mpn/sh/sh2/addmul_1.s b/rts/gmp/mpn/sh/sh2/addmul_1.s
new file mode 100644
index 0000000000..f34a7f0503
--- /dev/null
+++ b/rts/gmp/mpn/sh/sh2/addmul_1.s
@@ -0,0 +1,53 @@
+! SH2 __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+! the result to a second limb vector.
+
+! Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r4
+! s1_ptr r5
+! size r6
+! s2_limb r7
+
+ .text
+ .align 1
+ .global ___gmpn_addmul_1
+___gmpn_addmul_1:
+ mov #0,r2 ! cy_limb = 0
+ mov #0,r0 ! Keep r0 = 0 for entire loop
+ clrt
+
+Loop: mov.l @r5+,r3
+ dmulu.l r3,r7
+ sts macl,r1
+ addc r2,r1 ! lo_prod += old cy_limb
+ sts mach,r2 ! new cy_limb = hi_prod
+ mov.l @r4,r3
+ addc r0,r2 ! cy_limb += T, T = 0
+ addc r3,r1
+ addc r0,r2 ! cy_limb += T, T = 0
+ dt r6
+ mov.l r1,@r4
+ bf.s Loop
+ add #4,r4
+
+ rts
+ mov r2,r0
diff --git a/rts/gmp/mpn/sh/sh2/mul_1.s b/rts/gmp/mpn/sh/sh2/mul_1.s
new file mode 100644
index 0000000000..2a117a3175
--- /dev/null
+++ b/rts/gmp/mpn/sh/sh2/mul_1.s
@@ -0,0 +1,50 @@
+! SH2 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+! the result in a second limb vector.
+
+! Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r4
+! s1_ptr r5
+! size r6
+! s2_limb r7
+
+ .text
+ .align 1
+ .global ___gmpn_mul_1
+___gmpn_mul_1:
+ mov #0,r2 ! cy_limb = 0
+ mov #0,r0 ! Keep r0 = 0 for entire loop
+ clrt
+
+Loop: mov.l @r5+,r3
+ dmulu.l r3,r7
+ sts macl,r1
+ addc r2,r1
+ sts mach,r2
+ addc r0,r2 ! propagate carry to cy_limb (dt clobbers T)
+ dt r6
+ mov.l r1,@r4
+ bf.s Loop
+ add #4,r4
+
+ rts
+ mov r2,r0
diff --git a/rts/gmp/mpn/sh/sh2/submul_1.s b/rts/gmp/mpn/sh/sh2/submul_1.s
new file mode 100644
index 0000000000..eb9a27dde3
--- /dev/null
+++ b/rts/gmp/mpn/sh/sh2/submul_1.s
@@ -0,0 +1,53 @@
+! SH2 __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
+! the result from a second limb vector.
+
+! Copyright (C) 1995, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r4
+! s1_ptr r5
+! size r6
+! s2_limb r7
+
+ .text
+ .align 1
+ .global ___gmpn_submul_1
+___gmpn_submul_1:
+ mov #0,r2 ! cy_limb = 0
+ mov #0,r0 ! Keep r0 = 0 for entire loop
+ clrt
+
+Loop: mov.l @r5+,r3
+ dmulu.l r3,r7
+ sts macl,r1
+ addc r2,r1 ! lo_prod += old cy_limb
+ sts mach,r2 ! new cy_limb = hi_prod
+ mov.l @r4,r3
+ addc r0,r2 ! cy_limb += T, T = 0
+ subc r3,r1
+ addc r0,r2 ! cy_limb += T, T = 0
+ dt r6
+ mov.l r1,@r4
+ bf.s Loop
+ add #4,r4
+
+ rts
+ mov r2,r0
diff --git a/rts/gmp/mpn/sh/sub_n.s b/rts/gmp/mpn/sh/sub_n.s
new file mode 100644
index 0000000000..5f818c95a8
--- /dev/null
+++ b/rts/gmp/mpn/sh/sub_n.s
@@ -0,0 +1,47 @@
+! SH __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and store
+! difference in a third limb vector.
+
+! Copyright (C) 1995, 1997, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r4
+! s1_ptr r5
+! s2_ptr r6
+! size r7
+
+ .text
+ .align 2
+ .global ___gmpn_sub_n
+___gmpn_sub_n:
+ mov #0,r3 ! clear cy save reg
+
+Loop: mov.l @r5+,r1
+ mov.l @r6+,r2
+ shlr r3 ! restore cy
+ subc r2,r1
+ movt r3 ! save cy
+ mov.l r1,@r4
+ dt r7
+ bf.s Loop
+ add #4,r4
+
+ rts
+ mov r3,r0 ! return carry-out from most sign. limb
diff --git a/rts/gmp/mpn/sparc32/README b/rts/gmp/mpn/sparc32/README
new file mode 100644
index 0000000000..7c19df7bc4
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/README
@@ -0,0 +1,36 @@
+This directory contains mpn functions for various SPARC chips. Code that
+runs only on version 8 SPARC implementations, is in the v8 subdirectory.
+
+RELEVANT OPTIMIZATION ISSUES
+
+ Load and Store timing
+
+On most early SPARC implementations, the ST instructions takes multiple
+cycles, while a STD takes just a single cycle more than an ST. For the CPUs
+in SPARCstation I and II, the times are 3 and 4 cycles, respectively.
+Therefore, combining two ST instrucitons into a STD when possible is a
+significant optimiation.
+
+Later SPARC implementations have single cycle ST.
+
+For SuperSPARC, we can perform just one memory instruction per cycle, even
+if up to two integer instructions can be executed in its pipeline. For
+programs that perform so many memory operations that there are not enough
+non-memory operations to issue in parallel with all memory operations, using
+LDD and STD when possible helps.
+
+STATUS
+
+1. On a SuperSPARC, mpn_lshift and mpn_rshift run at 3 cycles/limb, or 2.5
+ cycles/limb asymptotically. We could optimize speed for special counts
+ by using ADDXCC.
+
+2. On a SuperSPARC, mpn_add_n and mpn_sub_n runs at 2.5 cycles/limb, or 2
+ cycles/limb asymptotically.
+
+3. mpn_mul_1 runs at what is believed to be optimal speed.
+
+4. On SuperSPARC, mpn_addmul_1 and mpn_submul_1 could both be improved by a
+ cycle by avoiding one of the add instrucitons. See a29k/addmul_1.
+
+The speed of the code for other SPARC implementations is uncertain.
diff --git a/rts/gmp/mpn/sparc32/add_n.asm b/rts/gmp/mpn/sparc32/add_n.asm
new file mode 100644
index 0000000000..5f1d00c0e0
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/add_n.asm
@@ -0,0 +1,236 @@
+dnl SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store
+dnl sum in a third limb vector.
+
+dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(res_ptr,%o0)
+define(s1_ptr,%o1)
+define(s2_ptr,%o2)
+define(n,%o3)
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ xor s2_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L(1) C branch if alignment differs
+ nop
+C ** V1a **
+L(0): andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0
+ be L(v1) C if no, branch
+ nop
+C Add least significant limb separately to align res_ptr and s2_ptr
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add n,-1,n
+ addcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+L(v1): addx %g0,%g0,%o4 C save cy in register
+ cmp n,2 C if n < 2 ...
+ bl L(end2) C ... branch to tail code
+ subcc %g0,%o4,%g0 C restore cy
+
+ ld [s1_ptr+0],%g4
+ addcc n,-10,n
+ ld [s1_ptr+4],%g1
+ ldd [s2_ptr+0],%g2
+ blt L(fin1)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop1):
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+16],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+20],%g1
+ ldd [s2_ptr+16],%g2
+ std %o4,[res_ptr+8]
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+24],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+28],%g1
+ ldd [s2_ptr+24],%g2
+ std %o4,[res_ptr+16]
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+32],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+36],%g1
+ ldd [s2_ptr+32],%g2
+ std %o4,[res_ptr+24]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop1)
+ subcc %g0,%o4,%g0 C restore cy
+
+L(fin1):
+ addcc n,8-2,n
+ blt L(end1)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 2 limbs until less than 2 limbs remain
+L(loope1):
+ addxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ addxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-2,n
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope1)
+ subcc %g0,%o4,%g0 C restore cy
+L(end1):
+ addxcc %g4,%g2,%o4
+ addxcc %g1,%g3,%o5
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 C save cy in register
+
+ andcc n,1,%g0
+ be L(ret1)
+ subcc %g0,%o4,%g0 C restore cy
+C Add last limb
+ ld [s1_ptr+8],%g4
+ ld [s2_ptr+8],%g2
+ addxcc %g4,%g2,%o4
+ st %o4,[res_ptr+8]
+
+L(ret1):
+ retl
+ addx %g0,%g0,%o0 C return carry-out from most sign. limb
+
+L(1): xor s1_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L(2)
+ nop
+C ** V1b **
+ mov s2_ptr,%g1
+ mov s1_ptr,s2_ptr
+ b L(0)
+ mov %g1,s1_ptr
+
+C ** V2 **
+C If we come here, the alignment of s1_ptr and res_ptr as well as the
+C alignment of s2_ptr and res_ptr differ. Since there are only two ways
+C things can be aligned (that we care about) we now know that the alignment
+C of s1_ptr and s2_ptr are the same.
+
+L(2): cmp n,1
+ be L(jone)
+ nop
+ andcc s1_ptr,4,%g0 C s1_ptr unaligned? Side effect: cy=0
+ be L(v2) C if no, branch
+ nop
+C Add least significant limb separately to align s1_ptr and s2_ptr
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add n,-1,n
+ addcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+
+L(v2): addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ blt L(fin2)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop2):
+ ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ ldd [s1_ptr+8],%g2
+ ldd [s2_ptr+8],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+8]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+12]
+ ldd [s1_ptr+16],%g2
+ ldd [s2_ptr+16],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+16]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+20]
+ ldd [s1_ptr+24],%g2
+ ldd [s2_ptr+24],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+24]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+28]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop2)
+ subcc %g0,%o4,%g0 C restore cy
+
+L(fin2):
+ addcc n,8-2,n
+ blt L(end2)
+ subcc %g0,%o4,%g0 C restore cy
+L(loope2):
+ ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ addxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ addxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-2,n
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope2)
+ subcc %g0,%o4,%g0 C restore cy
+L(end2):
+ andcc n,1,%g0
+ be L(ret2)
+ subcc %g0,%o4,%g0 C restore cy
+C Add last limb
+L(jone):
+ ld [s1_ptr],%g4
+ ld [s2_ptr],%g2
+ addxcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+
+L(ret2):
+ retl
+ addx %g0,%g0,%o0 C return carry-out from most sign. limb
+EPILOGUE(mpn_add_n)
diff --git a/rts/gmp/mpn/sparc32/addmul_1.asm b/rts/gmp/mpn/sparc32/addmul_1.asm
new file mode 100644
index 0000000000..80c94e4251
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/addmul_1.asm
@@ -0,0 +1,146 @@
+dnl SPARC mpn_addmul_1 -- Multiply a limb vector with a limb and add the
+dnl result to a second limb vector.
+
+dnl Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr o0
+C s1_ptr o1
+C size o2
+C s2_limb o3
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ C Make S1_PTR and RES_PTR point at the end of their blocks
+ C and put (- 4 x SIZE) in index/loop counter.
+ sll %o2,2,%o2
+ add %o0,%o2,%o4 C RES_PTR in o4 since o0 is retval
+ add %o1,%o2,%o1
+ sub %g0,%o2,%o2
+
+ cmp %o3,0xfff
+ bgu L(large)
+ nop
+
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ b L(0)
+ add %o4,-4,%o4
+L(loop0):
+ addcc %o5,%g1,%g1
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g1,[%o4+%o2]
+L(0): wr %g0,%o3,%y
+ sra %o5,31,%g2
+ and %o3,%g2,%g2
+ andcc %g1,0,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,0,%g1
+ sra %g1,20,%g4
+ sll %g1,12,%g1
+ rd %y,%g3
+ srl %g3,20,%g3
+ or %g1,%g3,%g1
+
+ addcc %g1,%o0,%g1
+ addx %g2,%g4,%o0 C add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 C loop counter
+ bne L(loop0)
+ ld [%o4+%o2],%o5
+
+ addcc %o5,%g1,%g1
+ addx %o0,%g0,%o0
+ retl
+ st %g1,[%o4+%o2]
+
+L(large):
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ sra %o3,31,%g4 C g4 = mask of ones iff S2_LIMB < 0
+ b L(1)
+ add %o4,-4,%o4
+L(loop):
+ addcc %o5,%g3,%g3
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g3,[%o4+%o2]
+L(1): wr %g0,%o5,%y
+ and %o5,%g4,%g2
+ andcc %g0,%g0,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%g0,%g1
+ rd %y,%g3
+ addcc %g3,%o0,%g3
+ addx %g2,%g1,%o0
+ addcc %o2,4,%o2
+ bne L(loop)
+ ld [%o4+%o2],%o5
+
+ addcc %o5,%g3,%g3
+ addx %o0,%g0,%o0
+ retl
+ st %g3,[%o4+%o2]
+EPILOGUE(mpn_addmul_1)
diff --git a/rts/gmp/mpn/sparc32/lshift.asm b/rts/gmp/mpn/sparc32/lshift.asm
new file mode 100644
index 0000000000..529733ac2d
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/lshift.asm
@@ -0,0 +1,97 @@
+dnl SPARC mpn_lshift -- Shift a number left.
+dnl
+
+dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr %o0
+C src_ptr %o1
+C size %o2
+C cnt %o3
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ sll %o2,2,%g1
+ add %o1,%g1,%o1 C make %o1 point at end of src
+ ld [%o1-4],%g2 C load first limb
+ sub %g0,%o3,%o5 C negate shift count
+ add %o0,%g1,%o0 C make %o0 point at end of res
+ add %o2,-1,%o2
+ andcc %o2,4-1,%g4 C number of limbs in first loop
+ srl %g2,%o5,%g1 C compute function result
+ be L(0) C if multiple of 4 limbs, skip first loop
+ st %g1,[%sp+80]
+
+ sub %o2,%g4,%o2 C adjust count for main loop
+
+L(loop0):
+ ld [%o1-8],%g3
+ add %o0,-4,%o0
+ add %o1,-4,%o1
+ addcc %g4,-1,%g4
+ sll %g2,%o3,%o4
+ srl %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ bne L(loop0)
+ st %o4,[%o0+0]
+
+L(0): tst %o2
+ be L(end)
+ nop
+
+L(loop):
+ ld [%o1-8],%g3
+ add %o0,-16,%o0
+ addcc %o2,-4,%o2
+ sll %g2,%o3,%o4
+ srl %g3,%o5,%g1
+
+ ld [%o1-12],%g2
+ sll %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0+12]
+ srl %g2,%o5,%g1
+
+ ld [%o1-16],%g3
+ sll %g2,%o3,%o4
+ or %g4,%g1,%g4
+ st %g4,[%o0+8]
+ srl %g3,%o5,%g1
+
+ ld [%o1-20],%g2
+ sll %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0+4]
+ srl %g2,%o5,%g1
+
+ add %o1,-16,%o1
+ or %g4,%g1,%g4
+ bne L(loop)
+ st %g4,[%o0+0]
+
+L(end): sll %g2,%o3,%g2
+ st %g2,[%o0-4]
+ retl
+ ld [%sp+80],%o0
+EPILOGUE(mpn_lshift)
diff --git a/rts/gmp/mpn/sparc32/mul_1.asm b/rts/gmp/mpn/sparc32/mul_1.asm
new file mode 100644
index 0000000000..e5fedeabaa
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/mul_1.asm
@@ -0,0 +1,137 @@
+dnl SPARC mpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl the result in a second limb vector.
+
+dnl Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr o0
+C s1_ptr o1
+C size o2
+C s2_limb o3
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ C Make S1_PTR and RES_PTR point at the end of their blocks
+ C and put (- 4 x SIZE) in index/loop counter.
+ sll %o2,2,%o2
+ add %o0,%o2,%o4 C RES_PTR in o4 since o0 is retval
+ add %o1,%o2,%o1
+ sub %g0,%o2,%o2
+
+ cmp %o3,0xfff
+ bgu L(large)
+ nop
+
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ b L(0)
+ add %o4,-4,%o4
+L(loop0):
+ st %g1,[%o4+%o2]
+L(0): wr %g0,%o3,%y
+ sra %o5,31,%g2
+ and %o3,%g2,%g2
+ andcc %g1,0,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,0,%g1
+ sra %g1,20,%g4
+ sll %g1,12,%g1
+ rd %y,%g3
+ srl %g3,20,%g3
+ or %g1,%g3,%g1
+
+ addcc %g1,%o0,%g1
+ addx %g2,%g4,%o0 C add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 C loop counter
+ bne,a L(loop0)
+ ld [%o1+%o2],%o5
+
+ retl
+ st %g1,[%o4+%o2]
+
+
+L(large):
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ sra %o3,31,%g4 C g4 = mask of ones iff S2_LIMB < 0
+ b L(1)
+ add %o4,-4,%o4
+L(loop):
+ st %g3,[%o4+%o2]
+L(1): wr %g0,%o5,%y
+ and %o5,%g4,%g2 C g2 = S1_LIMB iff S2_LIMB < 0, else 0
+ andcc %g0,%g0,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%g0,%g1
+ rd %y,%g3
+ addcc %g3,%o0,%g3
+ addx %g2,%g1,%o0 C add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 C loop counter
+ bne,a L(loop)
+ ld [%o1+%o2],%o5
+
+ retl
+ st %g3,[%o4+%o2]
+EPILOGUE(mpn_mul_1)
diff --git a/rts/gmp/mpn/sparc32/rshift.asm b/rts/gmp/mpn/sparc32/rshift.asm
new file mode 100644
index 0000000000..9187dbaa6f
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/rshift.asm
@@ -0,0 +1,93 @@
+dnl SPARC mpn_rshift -- Shift a number right.
+
+dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr %o0
+C src_ptr %o1
+C size %o2
+C cnt %o3
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ ld [%o1],%g2 C load first limb
+ sub %g0,%o3,%o5 C negate shift count
+ add %o2,-1,%o2
+ andcc %o2,4-1,%g4 C number of limbs in first loop
+ sll %g2,%o5,%g1 C compute function result
+ be L(0) C if multiple of 4 limbs, skip first loop
+ st %g1,[%sp+80]
+
+ sub %o2,%g4,%o2 C adjust count for main loop
+
+L(loop0):
+ ld [%o1+4],%g3
+ add %o0,4,%o0
+ add %o1,4,%o1
+ addcc %g4,-1,%g4
+ srl %g2,%o3,%o4
+ sll %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ bne L(loop0)
+ st %o4,[%o0-4]
+
+L(0): tst %o2
+ be L(end)
+ nop
+
+L(loop):
+ ld [%o1+4],%g3
+ add %o0,16,%o0
+ addcc %o2,-4,%o2
+ srl %g2,%o3,%o4
+ sll %g3,%o5,%g1
+
+ ld [%o1+8],%g2
+ srl %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0-16]
+ sll %g2,%o5,%g1
+
+ ld [%o1+12],%g3
+ srl %g2,%o3,%o4
+ or %g4,%g1,%g4
+ st %g4,[%o0-12]
+ sll %g3,%o5,%g1
+
+ ld [%o1+16],%g2
+ srl %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0-8]
+ sll %g2,%o5,%g1
+
+ add %o1,16,%o1
+ or %g4,%g1,%g4
+ bne L(loop)
+ st %g4,[%o0-4]
+
+L(end): srl %g2,%o3,%g2
+ st %g2,[%o0-0]
+ retl
+ ld [%sp+80],%o0
+EPILOGUE(mpn_rshift)
diff --git a/rts/gmp/mpn/sparc32/sub_n.asm b/rts/gmp/mpn/sparc32/sub_n.asm
new file mode 100644
index 0000000000..071909a1b6
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/sub_n.asm
@@ -0,0 +1,326 @@
+dnl SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl store difference in a third limb vector.
+
+dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(res_ptr,%o0)
+define(s1_ptr,%o1)
+define(s2_ptr,%o2)
+define(n,%o3)
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+ xor s2_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L(1) C branch if alignment differs
+ nop
+C ** V1a **
+ andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0
+ be L(v1) C if no, branch
+ nop
+C Add least significant limb separately to align res_ptr and s2_ptr
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add n,-1,n
+ subcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+L(v1): addx %g0,%g0,%o4 C save cy in register
+ cmp n,2 C if n < 2 ...
+ bl L(end2) C ... branch to tail code
+ subcc %g0,%o4,%g0 C restore cy
+
+ ld [s1_ptr+0],%g4
+ addcc n,-10,n
+ ld [s1_ptr+4],%g1
+ ldd [s2_ptr+0],%g2
+ blt L(fin1)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop1):
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+16],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+20],%g1
+ ldd [s2_ptr+16],%g2
+ std %o4,[res_ptr+8]
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+24],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+28],%g1
+ ldd [s2_ptr+24],%g2
+ std %o4,[res_ptr+16]
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+32],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+36],%g1
+ ldd [s2_ptr+32],%g2
+ std %o4,[res_ptr+24]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop1)
+ subcc %g0,%o4,%g0 C restore cy
+
+L(fin1):
+ addcc n,8-2,n
+ blt L(end1)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 2 limbs until less than 2 limbs remain
+L(loope1):
+ subxcc %g4,%g2,%o4
+ ld [s1_ptr+8],%g4
+ subxcc %g1,%g3,%o5
+ ld [s1_ptr+12],%g1
+ ldd [s2_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-2,n
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope1)
+ subcc %g0,%o4,%g0 C restore cy
+L(end1):
+ subxcc %g4,%g2,%o4
+ subxcc %g1,%g3,%o5
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 C save cy in register
+
+ andcc n,1,%g0
+ be L(ret1)
+ subcc %g0,%o4,%g0 C restore cy
+C Add last limb
+ ld [s1_ptr+8],%g4
+ ld [s2_ptr+8],%g2
+ subxcc %g4,%g2,%o4
+ st %o4,[res_ptr+8]
+
+L(ret1):
+ retl
+ addx %g0,%g0,%o0 C return carry-out from most sign. limb
+
+L(1): xor s1_ptr,res_ptr,%g1
+ andcc %g1,4,%g0
+ bne L(2)
+ nop
+C ** V1b **
+ andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0
+ be L(v1b) C if no, branch
+ nop
+C Add least significant limb separately to align res_ptr and s1_ptr
+ ld [s2_ptr],%g4
+ add s2_ptr,4,s2_ptr
+ ld [s1_ptr],%g2
+ add s1_ptr,4,s1_ptr
+ add n,-1,n
+ subcc %g2,%g4,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+L(v1b): addx %g0,%g0,%o4 C save cy in register
+ cmp n,2 C if n < 2 ...
+ bl L(end2) C ... branch to tail code
+ subcc %g0,%o4,%g0 C restore cy
+
+ ld [s2_ptr+0],%g4
+ addcc n,-10,n
+ ld [s2_ptr+4],%g1
+ ldd [s1_ptr+0],%g2
+ blt L(fin1b)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop1b):
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+8],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+12],%g1
+ ldd [s1_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+16],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+20],%g1
+ ldd [s1_ptr+16],%g2
+ std %o4,[res_ptr+8]
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+24],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+28],%g1
+ ldd [s1_ptr+24],%g2
+ std %o4,[res_ptr+16]
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+32],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+36],%g1
+ ldd [s1_ptr+32],%g2
+ std %o4,[res_ptr+24]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop1b)
+ subcc %g0,%o4,%g0 C restore cy
+
+L(fin1b):
+ addcc n,8-2,n
+ blt L(end1b)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 2 limbs until less than 2 limbs remain
+L(loope1b):
+ subxcc %g2,%g4,%o4
+ ld [s2_ptr+8],%g4
+ subxcc %g3,%g1,%o5
+ ld [s2_ptr+12],%g1
+ ldd [s1_ptr+8],%g2
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-2,n
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope1b)
+ subcc %g0,%o4,%g0 C restore cy
+L(end1b):
+ subxcc %g2,%g4,%o4
+ subxcc %g3,%g1,%o5
+ std %o4,[res_ptr+0]
+ addx %g0,%g0,%o4 C save cy in register
+
+ andcc n,1,%g0
+ be L(ret1b)
+ subcc %g0,%o4,%g0 C restore cy
+C Add last limb
+ ld [s2_ptr+8],%g4
+ ld [s1_ptr+8],%g2
+ subxcc %g2,%g4,%o4
+ st %o4,[res_ptr+8]
+
+L(ret1b):
+ retl
+ addx %g0,%g0,%o0 C return carry-out from most sign. limb
+
+C ** V2 **
+C If we come here, the alignment of s1_ptr and res_ptr as well as the
+C alignment of s2_ptr and res_ptr differ. Since there are only two ways
+C things can be aligned (that we care about) we now know that the alignment
+C of s1_ptr and s2_ptr are the same.
+
+L(2): cmp n,1
+ be L(jone)
+ nop
+ andcc s1_ptr,4,%g0 C s1_ptr unaligned? Side effect: cy=0
+ be L(v2) C if no, branch
+ nop
+C Add least significant limb separately to align s1_ptr and s2_ptr
+ ld [s1_ptr],%g4
+ add s1_ptr,4,s1_ptr
+ ld [s2_ptr],%g2
+ add s2_ptr,4,s2_ptr
+ add n,-1,n
+ subcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+ add res_ptr,4,res_ptr
+
+L(v2): addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ blt L(fin2)
+ subcc %g0,%o4,%g0 C restore cy
+C Add blocks of 8 limbs until less than 8 limbs remain
+L(loop2):
+ ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ ldd [s1_ptr+8],%g2
+ ldd [s2_ptr+8],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+8]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+12]
+ ldd [s1_ptr+16],%g2
+ ldd [s2_ptr+16],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+16]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+20]
+ ldd [s1_ptr+24],%g2
+ ldd [s2_ptr+24],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+24]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+28]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-8,n
+ add s1_ptr,32,s1_ptr
+ add s2_ptr,32,s2_ptr
+ add res_ptr,32,res_ptr
+ bge L(loop2)
+ subcc %g0,%o4,%g0 C restore cy
+
+L(fin2):
+ addcc n,8-2,n
+ blt L(end2)
+ subcc %g0,%o4,%g0 C restore cy
+L(loope2):
+ ldd [s1_ptr+0],%g2
+ ldd [s2_ptr+0],%o4
+ subxcc %g2,%o4,%g2
+ st %g2,[res_ptr+0]
+ subxcc %g3,%o5,%g3
+ st %g3,[res_ptr+4]
+ addx %g0,%g0,%o4 C save cy in register
+ addcc n,-2,n
+ add s1_ptr,8,s1_ptr
+ add s2_ptr,8,s2_ptr
+ add res_ptr,8,res_ptr
+ bge L(loope2)
+ subcc %g0,%o4,%g0 C restore cy
+L(end2):
+ andcc n,1,%g0
+ be L(ret2)
+ subcc %g0,%o4,%g0 C restore cy
+C Add last limb
+L(jone):
+ ld [s1_ptr],%g4
+ ld [s2_ptr],%g2
+ subxcc %g4,%g2,%o4
+ st %o4,[res_ptr]
+
+L(ret2):
+ retl
+ addx %g0,%g0,%o0 C return carry-out from most sign. limb
+EPILOGUE(mpn_sub_n)
diff --git a/rts/gmp/mpn/sparc32/submul_1.asm b/rts/gmp/mpn/sparc32/submul_1.asm
new file mode 100644
index 0000000000..12abd844ce
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/submul_1.asm
@@ -0,0 +1,146 @@
+dnl SPARC mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+dnl the result from a second limb vector.
+
+dnl Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr o0
+C s1_ptr o1
+C size o2
+C s2_limb o3
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ C Make S1_PTR and RES_PTR point at the end of their blocks
+ C and put (- 4 x SIZE) in index/loop counter.
+ sll %o2,2,%o2
+ add %o0,%o2,%o4 C RES_PTR in o4 since o0 is retval
+ add %o1,%o2,%o1
+ sub %g0,%o2,%o2
+
+ cmp %o3,0xfff
+ bgu L(large)
+ nop
+
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ b L(0)
+ add %o4,-4,%o4
+L(loop0):
+ subcc %o5,%g1,%g1
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g1,[%o4+%o2]
+L(0): wr %g0,%o3,%y
+ sra %o5,31,%g2
+ and %o3,%g2,%g2
+ andcc %g1,0,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,%o5,%g1
+ mulscc %g1,0,%g1
+ sra %g1,20,%g4
+ sll %g1,12,%g1
+ rd %y,%g3
+ srl %g3,20,%g3
+ or %g1,%g3,%g1
+
+ addcc %g1,%o0,%g1
+ addx %g2,%g4,%o0 C add sign-compensation and cy to hi limb
+ addcc %o2,4,%o2 C loop counter
+ bne L(loop0)
+ ld [%o4+%o2],%o5
+
+ subcc %o5,%g1,%g1
+ addx %o0,%g0,%o0
+ retl
+ st %g1,[%o4+%o2]
+
+L(large):
+ ld [%o1+%o2],%o5
+ mov 0,%o0
+ sra %o3,31,%g4 C g4 = mask of ones iff S2_LIMB < 0
+ b L(1)
+ add %o4,-4,%o4
+L(loop):
+ subcc %o5,%g3,%g3
+ ld [%o1+%o2],%o5
+ addx %o0,%g0,%o0
+ st %g3,[%o4+%o2]
+L(1): wr %g0,%o5,%y
+ and %o5,%g4,%g2
+ andcc %g0,%g0,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%o3,%g1
+ mulscc %g1,%g0,%g1
+ rd %y,%g3
+ addcc %g3,%o0,%g3
+ addx %g2,%g1,%o0
+ addcc %o2,4,%o2
+ bne L(loop)
+ ld [%o4+%o2],%o5
+
+ subcc %o5,%g3,%g3
+ addx %o0,%g0,%o0
+ retl
+ st %g3,[%o4+%o2]
+EPILOGUE(mpn_submul_1)
diff --git a/rts/gmp/mpn/sparc32/udiv_fp.asm b/rts/gmp/mpn/sparc32/udiv_fp.asm
new file mode 100644
index 0000000000..e340e147d2
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/udiv_fp.asm
@@ -0,0 +1,158 @@
+dnl SPARC v7 __udiv_qrnnd division support, used from longlong.h.
+dnl This is for v7 CPUs with a floating-point unit.
+
+dnl Copyright (C) 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr i0
+C n1 i1
+C n0 i2
+C d i3
+
+ASM_START()
+
+ifdef(`PIC',
+` TEXT
+L(getpc):
+ retl
+ nop')
+
+ TEXT
+ ALIGN(8)
+L(C0): .double 0r4294967296
+L(C1): .double 0r2147483648
+
+PROLOGUE(mpn_udiv_qrnnd)
+ save %sp,-104,%sp
+ st %i1,[%fp-8]
+ ld [%fp-8],%f10
+
+ifdef(`PIC',
+`L(pc): call L(getpc) C put address of this insn in %o7
+ ldd [%o7+L(C0)-L(pc)],%f8',
+` sethi %hi(L(C0)),%o7
+ ldd [%o7+%lo(L(C0))],%f8')
+
+ fitod %f10,%f4
+ cmp %i1,0
+ bge L(248)
+ mov %i0,%i5
+ faddd %f4,%f8,%f4
+L(248):
+ st %i2,[%fp-8]
+ ld [%fp-8],%f10
+ fmuld %f4,%f8,%f6
+ cmp %i2,0
+ bge L(249)
+ fitod %f10,%f2
+ faddd %f2,%f8,%f2
+L(249):
+ st %i3,[%fp-8]
+ faddd %f6,%f2,%f2
+ ld [%fp-8],%f10
+ cmp %i3,0
+ bge L(250)
+ fitod %f10,%f4
+ faddd %f4,%f8,%f4
+L(250):
+ fdivd %f2,%f4,%f2
+
+ifdef(`PIC',
+` ldd [%o7+L(C1)-L(pc)],%f4',
+` sethi %hi(L(C1)),%o7
+ ldd [%o7+%lo(L(C1))],%f4')
+
+ fcmped %f2,%f4
+ nop
+ fbge,a L(251)
+ fsubd %f2,%f4,%f2
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ b L(252)
+ ld [%fp-8],%i4
+L(251):
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ ld [%fp-8],%i4
+ sethi %hi(-2147483648),%g2
+ xor %i4,%g2,%i4
+L(252):
+ wr %g0,%i4,%y
+ sra %i3,31,%g2
+ and %i4,%g2,%g2
+ andcc %g0,0,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,%i3,%g1
+ mulscc %g1,0,%g1
+ add %g1,%g2,%i0
+ rd %y,%g3
+ subcc %i2,%g3,%o7
+ subxcc %i1,%i0,%g0
+ be L(253)
+ cmp %o7,%i3
+
+ add %i4,-1,%i0
+ add %o7,%i3,%o7
+ st %o7,[%i5]
+ ret
+ restore
+L(253):
+ blu L(246)
+ mov %i4,%i0
+ add %i4,1,%i0
+ sub %o7,%i3,%o7
+L(246):
+ st %o7,[%i5]
+ ret
+ restore
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/rts/gmp/mpn/sparc32/udiv_nfp.asm b/rts/gmp/mpn/sparc32/udiv_nfp.asm
new file mode 100644
index 0000000000..ae19f4c6e9
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/udiv_nfp.asm
@@ -0,0 +1,193 @@
+dnl SPARC v7 __udiv_qrnnd division support, used from longlong.h.
+dnl This is for v7 CPUs without a floating-point unit.
+
+dnl Copyright (C) 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr o0
+C n1 o1
+C n0 o2
+C d o3
+
+ASM_START()
+PROLOGUE(mpn_udiv_qrnnd)
+ tst %o3
+ bneg L(largedivisor)
+ mov 8,%g1
+
+ b L(p1)
+ addxcc %o2,%o2,%o2
+
+L(plop):
+ bcc L(n1)
+ addxcc %o2,%o2,%o2
+L(p1): addx %o1,%o1,%o1
+ subcc %o1,%o3,%o4
+ bcc L(n2)
+ addxcc %o2,%o2,%o2
+L(p2): addx %o1,%o1,%o1
+ subcc %o1,%o3,%o4
+ bcc L(n3)
+ addxcc %o2,%o2,%o2
+L(p3): addx %o1,%o1,%o1
+ subcc %o1,%o3,%o4
+ bcc L(n4)
+ addxcc %o2,%o2,%o2
+L(p4): addx %o1,%o1,%o1
+ addcc %g1,-1,%g1
+ bne L(plop)
+ subcc %o1,%o3,%o4
+ bcc L(n5)
+ addxcc %o2,%o2,%o2
+L(p5): st %o1,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+L(nlop):
+ bcc L(p1)
+ addxcc %o2,%o2,%o2
+L(n1): addx %o4,%o4,%o4
+ subcc %o4,%o3,%o1
+ bcc L(p2)
+ addxcc %o2,%o2,%o2
+L(n2): addx %o4,%o4,%o4
+ subcc %o4,%o3,%o1
+ bcc L(p3)
+ addxcc %o2,%o2,%o2
+L(n3): addx %o4,%o4,%o4
+ subcc %o4,%o3,%o1
+ bcc L(p4)
+ addxcc %o2,%o2,%o2
+L(n4): addx %o4,%o4,%o4
+ addcc %g1,-1,%g1
+ bne L(nlop)
+ subcc %o4,%o3,%o1
+ bcc L(p5)
+ addxcc %o2,%o2,%o2
+L(n5): st %o4,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+L(largedivisor):
+ and %o2,1,%o5 C %o5 = n0 & 1
+
+ srl %o2,1,%o2
+ sll %o1,31,%g2
+ or %g2,%o2,%o2 C %o2 = lo(n1n0 >> 1)
+ srl %o1,1,%o1 C %o1 = hi(n1n0 >> 1)
+
+ and %o3,1,%g2
+ srl %o3,1,%g3 C %g3 = floor(d / 2)
+ add %g3,%g2,%g3 C %g3 = ceil(d / 2)
+
+ b L(Lp1)
+ addxcc %o2,%o2,%o2
+
+L(Lplop):
+ bcc L(Ln1)
+ addxcc %o2,%o2,%o2
+L(Lp1): addx %o1,%o1,%o1
+ subcc %o1,%g3,%o4
+ bcc L(Ln2)
+ addxcc %o2,%o2,%o2
+L(Lp2): addx %o1,%o1,%o1
+ subcc %o1,%g3,%o4
+ bcc L(Ln3)
+ addxcc %o2,%o2,%o2
+L(Lp3): addx %o1,%o1,%o1
+ subcc %o1,%g3,%o4
+ bcc L(Ln4)
+ addxcc %o2,%o2,%o2
+L(Lp4): addx %o1,%o1,%o1
+ addcc %g1,-1,%g1
+ bne L(Lplop)
+ subcc %o1,%g3,%o4
+ bcc L(Ln5)
+ addxcc %o2,%o2,%o2
+L(Lp5): add %o1,%o1,%o1 C << 1
+ tst %g2
+ bne L(oddp)
+ add %o5,%o1,%o1
+ st %o1,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+L(Lnlop):
+ bcc L(Lp1)
+ addxcc %o2,%o2,%o2
+L(Ln1): addx %o4,%o4,%o4
+ subcc %o4,%g3,%o1
+ bcc L(Lp2)
+ addxcc %o2,%o2,%o2
+L(Ln2): addx %o4,%o4,%o4
+ subcc %o4,%g3,%o1
+ bcc L(Lp3)
+ addxcc %o2,%o2,%o2
+L(Ln3): addx %o4,%o4,%o4
+ subcc %o4,%g3,%o1
+ bcc L(Lp4)
+ addxcc %o2,%o2,%o2
+L(Ln4): addx %o4,%o4,%o4
+ addcc %g1,-1,%g1
+ bne L(Lnlop)
+ subcc %o4,%g3,%o1
+ bcc L(Lp5)
+ addxcc %o2,%o2,%o2
+L(Ln5): add %o4,%o4,%o4 C << 1
+ tst %g2
+ bne L(oddn)
+ add %o5,%o4,%o4
+ st %o4,[%o0]
+ retl
+ xnor %g0,%o2,%o0
+
+L(oddp):
+ xnor %g0,%o2,%o2
+ C q' in %o2. r' in %o1
+ addcc %o1,%o2,%o1
+ bcc L(Lp6)
+ addx %o2,0,%o2
+ sub %o1,%o3,%o1
+L(Lp6): subcc %o1,%o3,%g0
+ bcs L(Lp7)
+ subx %o2,-1,%o2
+ sub %o1,%o3,%o1
+L(Lp7): st %o1,[%o0]
+ retl
+ mov %o2,%o0
+
+L(oddn):
+ xnor %g0,%o2,%o2
+ C q' in %o2. r' in %o4
+ addcc %o4,%o2,%o4
+ bcc L(Ln6)
+ addx %o2,0,%o2
+ sub %o4,%o3,%o4
+L(Ln6): subcc %o4,%o3,%g0
+ bcs L(Ln7)
+ subx %o2,-1,%o2
+ sub %o4,%o3,%o4
+L(Ln7): st %o4,[%o0]
+ retl
+ mov %o2,%o0
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/rts/gmp/mpn/sparc32/umul.asm b/rts/gmp/mpn/sparc32/umul.asm
new file mode 100644
index 0000000000..efa56851d6
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/umul.asm
@@ -0,0 +1,68 @@
+dnl SPARC mpn_umul_ppmm -- support for longlong.h for non-gcc.
+
+dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+ wr %g0,%o1,%y
+ sra %o2,31,%g2 C Don't move this insn
+ and %o1,%g2,%g2 C Don't move this insn
+ andcc %g0,0,%g1 C Don't move this insn
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,%o2,%g1
+ mulscc %g1,0,%g1
+ rd %y,%g3
+ st %g3,[%o0]
+ retl
+ add %g1,%g2,%o0
+EPILOGUE(mpn_umul_ppmm)
diff --git a/rts/gmp/mpn/sparc32/v8/addmul_1.asm b/rts/gmp/mpn/sparc32/v8/addmul_1.asm
new file mode 100644
index 0000000000..da44644b51
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v8/addmul_1.asm
@@ -0,0 +1,122 @@
+dnl SPARC v8 mpn_addmul_1 -- Multiply a limb vector with a limb and
+dnl add the result to a second limb vector.
+
+dnl Copyright (C) 1992, 1993, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr o0
+C s1_ptr o1
+C size o2
+C s2_limb o3
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ orcc %g0,%g0,%g2
+ ld [%o1+0],%o4 C 1
+
+ sll %o2,4,%g1
+ and %g1,(4-1)<<4,%g1
+ifdef(`PIC',
+` mov %o7,%g4 C Save return address register
+0: call 1f
+ add %o7,L(1)-0b,%g3
+1: mov %g4,%o7 C Restore return address register
+',
+` sethi %hi(L(1)),%g3
+ or %g3,%lo(L(1)),%g3
+')
+ jmp %g3+%g1
+ nop
+L(1):
+L(L00): add %o0,-4,%o0
+ b L(loop00) C 4, 8, 12, ...
+ add %o1,-4,%o1
+ nop
+L(L01): b L(loop01) C 1, 5, 9, ...
+ nop
+ nop
+ nop
+L(L10): add %o0,-12,%o0 C 2, 6, 10, ...
+ b L(loop10)
+ add %o1,4,%o1
+ nop
+L(L11): add %o0,-8,%o0 C 3, 7, 11, ...
+ b L(loop11)
+ add %o1,-8,%o1
+ nop
+
+L(loop):
+ addcc %g3,%g2,%g3 C 1
+ ld [%o1+4],%o4 C 2
+ rd %y,%g2 C 1
+ addx %g0,%g2,%g2
+ ld [%o0+0],%g1 C 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+0] C 1
+L(loop00):
+ umul %o4,%o3,%g3 C 2
+ ld [%o0+4],%g1 C 2
+ addxcc %g3,%g2,%g3 C 2
+ ld [%o1+8],%o4 C 3
+ rd %y,%g2 C 2
+ addx %g0,%g2,%g2
+ nop
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+4] C 2
+L(loop11):
+ umul %o4,%o3,%g3 C 3
+ addxcc %g3,%g2,%g3 C 3
+ ld [%o1+12],%o4 C 4
+ rd %y,%g2 C 3
+ add %o1,16,%o1
+ addx %g0,%g2,%g2
+ ld [%o0+8],%g1 C 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+8] C 3
+L(loop10):
+ umul %o4,%o3,%g3 C 4
+ addxcc %g3,%g2,%g3 C 4
+ ld [%o1+0],%o4 C 1
+ rd %y,%g2 C 4
+ addx %g0,%g2,%g2
+ ld [%o0+12],%g1 C 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+12] C 4
+ add %o0,16,%o0
+ addx %g0,%g2,%g2
+L(loop01):
+ addcc %o2,-4,%o2
+ bg L(loop)
+ umul %o4,%o3,%g3 C 1
+
+ addcc %g3,%g2,%g3 C 4
+ rd %y,%g2 C 4
+ addx %g0,%g2,%g2
+ ld [%o0+0],%g1 C 2
+ addcc %g1,%g3,%g3
+ st %g3,[%o0+0] C 4
+ addx %g0,%g2,%o0
+
+ retl
+ nop
+EPILOGUE(mpn_addmul_1)
diff --git a/rts/gmp/mpn/sparc32/v8/mul_1.asm b/rts/gmp/mpn/sparc32/v8/mul_1.asm
new file mode 100644
index 0000000000..801247553a
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v8/mul_1.asm
@@ -0,0 +1,103 @@
+dnl SPARC v8 mpn_mul_1 -- Multiply a limb vector with a single limb and
+dnl store the product in a second limb vector.
+
+dnl Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr o0
+C s1_ptr o1
+C size o2
+C s2_limb o3
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ sll %o2,4,%g1
+ and %g1,(4-1)<<4,%g1
+ifdef(`PIC',
+` mov %o7,%g4 C Save return address register
+0: call 1f
+ add %o7,L(1)-0b,%g3
+1: mov %g4,%o7 C Restore return address register
+',
+` sethi %hi(L(1)),%g3
+ or %g3,%lo(L(1)),%g3
+')
+ jmp %g3+%g1
+ ld [%o1+0],%o4 C 1
+L(1):
+L(L00): add %o0,-4,%o0
+ add %o1,-4,%o1
+ b L(loop00) C 4, 8, 12, ...
+ orcc %g0,%g0,%g2
+L(L01): b L(loop01) C 1, 5, 9, ...
+ orcc %g0,%g0,%g2
+ nop
+ nop
+L(L10): add %o0,-12,%o0 C 2, 6, 10, ...
+ add %o1,4,%o1
+ b L(loop10)
+ orcc %g0,%g0,%g2
+ nop
+L(L11): add %o0,-8,%o0 C 3, 7, 11, ...
+ add %o1,-8,%o1
+ b L(loop11)
+ orcc %g0,%g0,%g2
+
+L(loop):
+ addcc %g3,%g2,%g3 C 1
+ ld [%o1+4],%o4 C 2
+ st %g3,[%o0+0] C 1
+ rd %y,%g2 C 1
+L(loop00):
+ umul %o4,%o3,%g3 C 2
+ addxcc %g3,%g2,%g3 C 2
+ ld [%o1+8],%o4 C 3
+ st %g3,[%o0+4] C 2
+ rd %y,%g2 C 2
+L(loop11):
+ umul %o4,%o3,%g3 C 3
+ addxcc %g3,%g2,%g3 C 3
+ ld [%o1+12],%o4 C 4
+ add %o1,16,%o1
+ st %g3,[%o0+8] C 3
+ rd %y,%g2 C 3
+L(loop10):
+ umul %o4,%o3,%g3 C 4
+ addxcc %g3,%g2,%g3 C 4
+ ld [%o1+0],%o4 C 1
+ st %g3,[%o0+12] C 4
+ add %o0,16,%o0
+ rd %y,%g2 C 4
+ addx %g0,%g2,%g2
+L(loop01):
+ addcc %o2,-4,%o2
+ bg L(loop)
+ umul %o4,%o3,%g3 C 1
+
+ addcc %g3,%g2,%g3 C 4
+ st %g3,[%o0+0] C 4
+ rd %y,%g2 C 4
+
+ retl
+ addx %g0,%g2,%o0
+EPILOGUE(mpn_mul_1)
diff --git a/rts/gmp/mpn/sparc32/v8/submul_1.asm b/rts/gmp/mpn/sparc32/v8/submul_1.asm
new file mode 100644
index 0000000000..9ed132f4c1
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v8/submul_1.asm
@@ -0,0 +1,58 @@
+dnl SPARC v8 mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl subtract the result from a second limb vector.
+
+dnl Copyright (C) 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr o0
+C s1_ptr o1
+C size o2
+C s2_limb o3
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ sub %g0,%o2,%o2 C negate ...
+ sll %o2,2,%o2 C ... and scale size
+ sub %o1,%o2,%o1 C o1 is offset s1_ptr
+ sub %o0,%o2,%g1 C g1 is offset res_ptr
+
+ mov 0,%o0 C clear cy_limb
+
+L(loop):
+ ld [%o1+%o2],%o4
+ ld [%g1+%o2],%g2
+ umul %o4,%o3,%o5
+ rd %y,%g3
+ addcc %o5,%o0,%o5
+ addx %g3,0,%o0
+ subcc %g2,%o5,%g2
+ addx %o0,0,%o0
+ st %g2,[%g1+%o2]
+
+ addcc %o2,4,%o2
+ bne L(loop)
+ nop
+
+ retl
+ nop
+EPILOGUE(mpn_submul_1)
diff --git a/rts/gmp/mpn/sparc32/v8/supersparc/udiv.asm b/rts/gmp/mpn/sparc32/v8/supersparc/udiv.asm
new file mode 100644
index 0000000000..0d5e8d415d
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v8/supersparc/udiv.asm
@@ -0,0 +1,122 @@
+dnl SuperSPARC mpn_udiv_qrnnd division support, used from longlong.h.
+dnl This is for SuperSPARC only, to compensate for its semi-functional
+dnl udiv instruction.
+
+dnl Copyright (C) 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C rem_ptr i0
+C n1 i1
+C n0 i2
+C d i3
+
+ASM_START()
+
+ifdef(`PIC',
+` TEXT
+L(getpc):
+ retl
+ nop')
+
+ TEXT
+ ALIGN(8)
+L(C0): .double 0r4294967296
+L(C1): .double 0r2147483648
+
+PROLOGUE(mpn_udiv_qrnnd)
+ save %sp,-104,%sp
+ st %i1,[%fp-8]
+ ld [%fp-8],%f10
+
+ifdef(`PIC',
+`L(pc): call L(getpc) C put address of this insn in %o7
+ ldd [%o7+L(C0)-L(pc)],%f8',
+` sethi %hi(L(C0)),%o7
+ ldd [%o7+%lo(L(C0))],%f8')
+
+ fitod %f10,%f4
+ cmp %i1,0
+ bge L(248)
+ mov %i0,%i5
+ faddd %f4,%f8,%f4
+L(248):
+ st %i2,[%fp-8]
+ ld [%fp-8],%f10
+ fmuld %f4,%f8,%f6
+ cmp %i2,0
+ bge L(249)
+ fitod %f10,%f2
+ faddd %f2,%f8,%f2
+L(249):
+ st %i3,[%fp-8]
+ faddd %f6,%f2,%f2
+ ld [%fp-8],%f10
+ cmp %i3,0
+ bge L(250)
+ fitod %f10,%f4
+ faddd %f4,%f8,%f4
+L(250):
+ fdivd %f2,%f4,%f2
+
+ifdef(`PIC',
+` ldd [%o7+L(C1)-L(pc)],%f4',
+` sethi %hi(L(C1)),%o7
+ ldd [%o7+%lo(L(C1))],%f4')
+
+ fcmped %f2,%f4
+ nop
+ fbge,a L(251)
+ fsubd %f2,%f4,%f2
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ b L(252)
+ ld [%fp-8],%i4
+L(251):
+ fdtoi %f2,%f2
+ st %f2,[%fp-8]
+ ld [%fp-8],%i4
+ sethi %hi(-2147483648),%g2
+ xor %i4,%g2,%i4
+L(252):
+ umul %i3,%i4,%g3
+ rd %y,%i0
+ subcc %i2,%g3,%o7
+ subxcc %i1,%i0,%g0
+ be L(253)
+ cmp %o7,%i3
+
+ add %i4,-1,%i0
+ add %o7,%i3,%o7
+ st %o7,[%i5]
+ ret
+ restore
+L(253):
+ blu L(246)
+ mov %i4,%i0
+ add %i4,1,%i0
+ sub %o7,%i3,%o7
+L(246):
+ st %o7,[%i5]
+ ret
+ restore
+EPILOGUE(mpn_udiv_qrnnd)
diff --git a/rts/gmp/mpn/sparc32/v8/umul.asm b/rts/gmp/mpn/sparc32/v8/umul.asm
new file mode 100644
index 0000000000..ae8f692a0a
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v8/umul.asm
@@ -0,0 +1,31 @@
+dnl SPARC v8 mpn_umul_ppmm -- support for longlong.h for non-gcc.
+
+dnl Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_umul_ppmm)
+ umul %o1,%o2,%g2
+ st %g2,[%o0]
+ retl
+ rd %y,%o0
+EPILOGUE(mpn_umul_ppmm)
diff --git a/rts/gmp/mpn/sparc32/v9/README b/rts/gmp/mpn/sparc32/v9/README
new file mode 100644
index 0000000000..9b39713271
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v9/README
@@ -0,0 +1,4 @@
+Code for SPARC processors implementing version 9 of the SPARC architecture.
+This code is for systems that doesn't preserve the full 64-bit contents of
+integer register at context switch. For other systems (such as Solaris 7 or
+later) use the code in ../../sparc64.
diff --git a/rts/gmp/mpn/sparc32/v9/addmul_1.asm b/rts/gmp/mpn/sparc32/v9/addmul_1.asm
new file mode 100644
index 0000000000..c1762cc41f
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v9/addmul_1.asm
@@ -0,0 +1,288 @@
+dnl SPARC v9 32-bit mpn_addmul_1 -- Multiply a limb vector with a limb and
+dnl add the result to a second limb vector.
+
+dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr i0
+C s1_ptr i1
+C size i2
+C s2_limb i3
+
+ASM_START()
+
+ TEXT
+ ALIGN(4)
+L(noll):
+ .word 0
+
+PROLOGUE(mpn_addmul_1)
+ save %sp,-256,%sp
+
+ifdef(`PIC',
+`L(pc): rd %pc,%o7
+ ld [%o7+L(noll)-L(pc)],%f10',
+` sethi %hi(L(noll)),%g1
+ ld [%g1+%lo(L(noll))],%f10')
+
+ sethi %hi(0xffff0000),%o0
+ andn %i3,%o0,%o0
+ st %o0,[%fp-16]
+ ld [%fp-16],%f11
+ fxtod %f10,%f6
+
+ srl %i3,16,%o0
+ st %o0,[%fp-16]
+ ld [%fp-16],%f11
+ fxtod %f10,%f8
+
+ mov 0,%g3 C cy = 0
+
+ ld [%i1],%f11
+ subcc %i2,1,%i2
+ be,pn %icc,L(end1)
+ add %i1,4,%i1 C s1_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,L(end2)
+ std %f12,[%fp-16]
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,L(end3)
+ std %f12,[%fp-32]
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ add %i0,4,%i0 C res_ptr++
+ subcc %i2,1,%i2
+ be,pn %icc,L(end4)
+ std %f12,[%fp-16]
+
+ b,a L(loopm)
+
+ .align 16
+C BEGIN LOOP
+L(loop):
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+ subcc %i2,1,%i2
+ be,pn %icc,L(loope)
+ add %i0,4,%i0 C res_ptr++
+L(loopm):
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-32],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ subcc %i2,1,%i2
+ bne,pt %icc,L(loop)
+ add %i0,4,%i0 C res_ptr++
+C END LOOP
+
+ fxtod %f10,%f2
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ b,a L(xxx)
+L(loope):
+L(end4):
+ fxtod %f10,%f2
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-32],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ add %i0,4,%i0 C res_ptr++
+
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ b,a L(yyy)
+
+L(end3):
+ fxtod %f10,%f2
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+L(xxx): fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+ add %i0,4,%i0 C res_ptr++
+
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ ldx [%fp-32],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+ b,a L(ret)
+
+L(end2):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+L(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ ldx [%fp-32],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+ b,a L(ret)
+
+L(end1):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+L(ret): add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ st %g4,[%i0-4]
+
+ ret
+ restore %g0,%g3,%o0 C sideeffect: put cy in retreg
+EPILOGUE(mpn_addmul_1)
diff --git a/rts/gmp/mpn/sparc32/v9/gmp-mparam.h b/rts/gmp/mpn/sparc32/v9/gmp-mparam.h
new file mode 100644
index 0000000000..f946b900f0
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v9/gmp-mparam.h
@@ -0,0 +1,69 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+
+/* These values are for UltraSPARC I, II, and IIi. It is bogus that
+ this file lives in v9, but that will do for now. */
+
+/* Variations in addmul_1 speed make the multiply and square thresholds
+ doubtful. TOOM3_SQR_THRESHOLD had to be estimated here. */
+
+/* Generated by tuneup.c, 2000-07-06. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 30
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 200
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 59
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 500
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 107
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 146
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 29
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 4
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 3
+#endif
diff --git a/rts/gmp/mpn/sparc32/v9/mul_1.asm b/rts/gmp/mpn/sparc32/v9/mul_1.asm
new file mode 100644
index 0000000000..f8f0fdd8c2
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v9/mul_1.asm
@@ -0,0 +1,267 @@
+dnl SPARC v9 32-bit mpn_mul_1 -- Multiply a limb vector with a limb and
+dnl store the result in a second limb vector.
+
+dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr i0
+C s1_ptr i1
+C size i2
+C s2_limb i3
+
+ASM_START()
+
+ TEXT
+ ALIGN(4)
+L(noll):
+ .word 0
+
+PROLOGUE(mpn_mul_1)
+ save %sp,-256,%sp
+
+ifdef(`PIC',
+`L(pc): rd %pc,%o7
+ ld [%o7+L(noll)-L(pc)],%f10',
+` sethi %hi(L(noll)),%g1
+ ld [%g1+%lo(L(noll))],%f10')
+
+ sethi %hi(0xffff0000),%o0
+ andn %i3,%o0,%o0
+ st %o0,[%fp-16]
+ ld [%fp-16],%f11
+ fxtod %f10,%f6
+
+ srl %i3,16,%o0
+ st %o0,[%fp-16]
+ ld [%fp-16],%f11
+ fxtod %f10,%f8
+
+ mov 0,%g3 C cy = 0
+
+ ld [%i1],%f11
+ subcc %i2,1,%i2
+ be,pn %icc,L(end1)
+ add %i1,4,%i1 C s1_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,L(end2)
+ std %f12,[%fp-16]
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,L(end3)
+ std %f12,[%fp-32]
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ add %i0,4,%i0 C res_ptr++
+ subcc %i2,1,%i2
+ be,pn %icc,L(end4)
+ std %f12,[%fp-16]
+
+ b,a L(loopm)
+
+ .align 16
+C BEGIN LOOP
+L(loop):
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+ subcc %i2,1,%i2
+ be,pn %icc,L(loope)
+ add %i0,4,%i0 C res_ptr++
+L(loopm):
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-32],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ subcc %i2,1,%i2
+ bne,pt %icc,L(loop)
+ add %i0,4,%i0 C res_ptr++
+C END LOOP
+
+ fxtod %f10,%f2
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ b,a L(xxx)
+L(loope):
+L(end4):
+ fxtod %f10,%f2
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-32],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ b,a L(yyy)
+
+L(end3):
+ fxtod %f10,%f2
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+L(xxx): fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ ldx [%fp-32],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+ b,a L(ret)
+
+L(end2):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+L(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ ldx [%fp-32],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+ b,a L(ret)
+
+L(end1):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+L(ret): add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ st %g4,[%i0-4]
+
+ ret
+ restore %g0,%g3,%o0 C sideeffect: put cy in retreg
+EPILOGUE(mpn_mul_1)
diff --git a/rts/gmp/mpn/sparc32/v9/submul_1.asm b/rts/gmp/mpn/sparc32/v9/submul_1.asm
new file mode 100644
index 0000000000..6195ea88ea
--- /dev/null
+++ b/rts/gmp/mpn/sparc32/v9/submul_1.asm
@@ -0,0 +1,291 @@
+dnl SPARC v9 32-bit mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl subtract the result from a second limb vector.
+
+dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr i0
+C s1_ptr i1
+C size i2
+C s2_limb i3
+
+ASM_START()
+
+ TEXT
+ ALIGN(4)
+L(noll):
+ .word 0
+
+PROLOGUE(mpn_submul_1)
+ save %sp,-256,%sp
+
+ifdef(`PIC',
+`L(pc): rd %pc,%o7
+ ld [%o7+L(noll)-L(pc)],%f10',
+` sethi %hi(L(noll)),%g1
+ ld [%g1+%lo(L(noll))],%f10')
+
+ sethi %hi(0xffff0000),%o0
+ andn %i3,%o0,%o0
+ st %o0,[%fp-16]
+ ld [%fp-16],%f11
+ fxtod %f10,%f6
+
+ srl %i3,16,%o0
+ st %o0,[%fp-16]
+ ld [%fp-16],%f11
+ fxtod %f10,%f8
+
+ mov 0,%g3 C cy = 0
+
+ ld [%i1],%f11
+ subcc %i2,1,%i2
+ be,pn %icc,L(end1)
+ add %i1,4,%i1 C s1_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,L(end2)
+ std %f12,[%fp-16]
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,L(end3)
+ std %f12,[%fp-32]
+
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ add %i0,4,%i0 C res_ptr++
+ subcc %i2,1,%i2
+ be,pn %icc,L(end4)
+ std %f12,[%fp-16]
+
+ b,a L(loopm)
+
+ .align 16
+C BEGIN LOOP
+L(loop):
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g3,%g1,%g4 C p += cy
+ subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4]
+ addx %g3,0,%g3
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+ subcc %i2,1,%i2
+ be,pn %icc,L(loope)
+ add %i0,4,%i0 C res_ptr++
+L(loopm):
+ fxtod %f10,%f2
+ ld [%i1],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g3,%g1,%g4 C p += cy
+ subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-32],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4]
+ addx %g3,0,%g3
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ subcc %i2,1,%i2
+ bne,pt %icc,L(loop)
+ add %i0,4,%i0 C res_ptr++
+C END LOOP
+
+ fxtod %f10,%f2
+ add %g3,%g1,%g4 C p += cy
+ subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4]
+ b,a L(xxx)
+L(loope):
+L(end4):
+ fxtod %f10,%f2
+ add %g3,%g1,%g4 C p += cy
+ subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-32],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4]
+ b,a L(yyy)
+
+L(end3):
+ fxtod %f10,%f2
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-16],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+L(xxx): fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ ldx [%fp-32],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+ b,a L(ret)
+
+L(end2):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-40]
+ fdtox %f4,%f12
+ std %f12,[%fp-32]
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+L(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-40],%g2 C p16
+ ldx [%fp-32],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+ b,a L(ret)
+
+L(end1):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-24]
+ fdtox %f4,%f12
+ std %f12,[%fp-16]
+
+ ld [%i0],%g5
+ ldx [%fp-24],%g2 C p16
+ ldx [%fp-16],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+L(ret): add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ srlx %g4,32,%g3
+ st %l2,[%i0-4]
+
+ addx %g3,%g0,%g3
+ ret
+ restore %g0,%g3,%o0 C sideeffect: put cy in retreg
+EPILOGUE(mpn_submul_1)
diff --git a/rts/gmp/mpn/sparc64/README b/rts/gmp/mpn/sparc64/README
new file mode 100644
index 0000000000..6923a133f3
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/README
@@ -0,0 +1,48 @@
+This directory contains mpn functions for 64-bit V9 SPARC
+
+RELEVANT OPTIMIZATION ISSUES
+
+The Ultra I/II pipeline executes up to two simple integer arithmetic operations
+per cycle. The 64-bit integer multiply instruction mulx takes from 5 cycles to
+35 cycles, depending on the position of the most significant bit of the 1st
+source operand. It cannot overlap with other instructions. For our use of
+mulx, it will take from 5 to 20 cycles.
+
+Integer conditional move instructions cannot dual-issue with other integer
+instructions. No conditional move can issue 1-5 cycles after a load. (Or
+something such bizzare.)
+
+Integer branches can issue with two integer arithmetic instructions. Likewise
+for integer loads. Four instructions may issue (arith, arith, ld/st, branch)
+but only if the branch is last.
+
+(The V9 architecture manual recommends that the 2nd operand of a multiply
+instruction be the smaller one. For UltraSPARC, they got things backwards and
+optimize for the wrong operand! Really helpful in the light of that multiply
+is incredibly slow on these CPUs!)
+
+STATUS
+
+There is new code in ~/prec/gmp-remote/sparc64. Not tested or completed, but
+the pipelines are worked out. Here are the timings:
+
+* lshift, rshift: The code is well-optimized and runs at 2.0 cycles/limb.
+
+* add_n, sub_n: add3.s currently runs at 6 cycles/limb. We use a bizarre
+ scheme of compares and branches (with some nops and fnops to align things)
+ and carefully stay away from the instructions intended for this application
+ (i.e., movcs and movcc).
+
+ Using movcc/movcs, even with deep unrolling, seems to get down to 7
+ cycles/limb.
+
+ The most promising approach is to split operands in 32-bit pieces using
+ srlx, then use two addccc, and finally compile the results with sllx+or.
+ The result could run at 5 cycles/limb, I think. It might be possible to
+ do without unrolling, or with minimal unrolling.
+
+* addmul_1/submul_1: Should optimize for when scalar operand < 2^32.
+* addmul_1/submul_1: Since mulx is horrendously slow on UltraSPARC I/II,
+ Karatsuba's method should save up to 16 cycles (i.e. > 20%).
+* mul_1 (and possibly the other multiply functions): Handle carry in the
+ same tricky way as add_n,sub_n.
diff --git a/rts/gmp/mpn/sparc64/add_n.asm b/rts/gmp/mpn/sparc64/add_n.asm
new file mode 100644
index 0000000000..72b3895a5b
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/add_n.asm
@@ -0,0 +1,172 @@
+! SPARC v9 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+! sum in a third limb vector.
+
+! Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! s1_ptr %o1
+! s2_ptr %o2
+! size %o3
+
+include(`../config.m4')
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+PROLOGUE(mpn_add_n)
+
+! 12 mem ops >= 12 cycles
+! 8 shift insn >= 8 cycles
+! 8 addccc, executing alone, +8 cycles
+! Unrolling not mandatory...perhaps 2-way is best?
+! Put one ldx/stx and one s?lx per issue tuple, fill with pointer arith and loop ctl
+! All in all, it runs at 5 cycles/limb
+
+ save %sp,-160,%sp
+
+ addcc %g0,%g0,%g0
+
+ add %i3,-4,%i3
+ brlz,pn %i3,L(there)
+ nop
+
+ ldx [%i1+0],%l0
+ ldx [%i2+0],%l4
+ ldx [%i1+8],%l1
+ ldx [%i2+8],%l5
+ ldx [%i1+16],%l2
+ ldx [%i2+16],%l6
+ ldx [%i1+24],%l3
+ ldx [%i2+24],%l7
+ add %i1,32,%i1
+ add %i2,32,%i2
+
+ add %i3,-4,%i3
+ brlz,pn %i3,L(skip)
+ nop
+ b L(loop1) ! jump instead of executing many NOPs
+ nop
+ ALIGN(32)
+!--------- Start main loop ---------
+L(loop1):
+ addccc %l0,%l4,%g1
+!-
+ srlx %l0,32,%o0
+ ldx [%i1+0],%l0
+!-
+ srlx %l4,32,%o4
+ ldx [%i2+0],%l4
+!-
+ addccc %o0,%o4,%g0
+!-
+ addccc %l1,%l5,%g2
+!-
+ srlx %l1,32,%o1
+ ldx [%i1+8],%l1
+!-
+ srlx %l5,32,%o5
+ ldx [%i2+8],%l5
+!-
+ addccc %o1,%o5,%g0
+!-
+ addccc %l2,%l6,%g3
+!-
+ srlx %l2,32,%o2
+ ldx [%i1+16],%l2
+!-
+ srlx %l6,32,%g5 ! asymmetry
+ ldx [%i2+16],%l6
+!-
+ addccc %o2,%g5,%g0
+!-
+ addccc %l3,%l7,%g4
+!-
+ srlx %l3,32,%o3
+ ldx [%i1+24],%l3
+ add %i1,32,%i1
+!-
+ srlx %l7,32,%o7
+ ldx [%i2+24],%l7
+ add %i2,32,%i2
+!-
+ addccc %o3,%o7,%g0
+!-
+ stx %g1,[%i0+0]
+!-
+ stx %g2,[%i0+8]
+!-
+ stx %g3,[%i0+16]
+ add %i3,-4,%i3
+!-
+ stx %g4,[%i0+24]
+ add %i0,32,%i0
+
+ brgez,pt %i3,L(loop1)
+ nop
+!--------- End main loop ---------
+L(skip):
+ addccc %l0,%l4,%g1
+ srlx %l0,32,%o0
+ srlx %l4,32,%o4
+ addccc %o0,%o4,%g0
+ addccc %l1,%l5,%g2
+ srlx %l1,32,%o1
+ srlx %l5,32,%o5
+ addccc %o1,%o5,%g0
+ addccc %l2,%l6,%g3
+ srlx %l2,32,%o2
+ srlx %l6,32,%g5 ! asymmetry
+ addccc %o2,%g5,%g0
+ addccc %l3,%l7,%g4
+ srlx %l3,32,%o3
+ srlx %l7,32,%o7
+ addccc %o3,%o7,%g0
+ stx %g1,[%i0+0]
+ stx %g2,[%i0+8]
+ stx %g3,[%i0+16]
+ stx %g4,[%i0+24]
+ add %i0,32,%i0
+
+L(there):
+ add %i3,4,%i3
+ brz,pt %i3,L(end)
+ nop
+
+L(loop2):
+ ldx [%i1+0],%l0
+ add %i1,8,%i1
+ ldx [%i2+0],%l4
+ add %i2,8,%i2
+ srlx %l0,32,%g2
+ srlx %l4,32,%g3
+ addccc %l0,%l4,%g1
+ addccc %g2,%g3,%g0
+ stx %g1,[%i0+0]
+ add %i0,8,%i0
+ add %i3,-1,%i3
+ brgz,pt %i3,L(loop2)
+ nop
+
+L(end): addc %g0,%g0,%i0
+ ret
+ restore
+EPILOGUE(mpn_add_n)
diff --git a/rts/gmp/mpn/sparc64/addmul1h.asm b/rts/gmp/mpn/sparc64/addmul1h.asm
new file mode 100644
index 0000000000..96cb5f7369
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/addmul1h.asm
@@ -0,0 +1,203 @@
+dnl SPARC 64-bit addmull/addmulu -- Helper for mpn_addmul_1 and mpn_mul_1.
+
+dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+ifdef(`LOWPART',
+`addmull:',
+`addmulu:')
+ save %sp,-256,%sp
+
+ sethi %hi(0xffff0000),%o0
+ andn %i3,%o0,%o0
+ st %o0,[%fp-17]
+ ld [%fp-17],%f11
+ fxtod %f10,%f6
+
+ srl %i3,16,%o0
+ st %o0,[%fp-17]
+ ld [%fp-17],%f11
+ fxtod %f10,%f8
+
+ mov 0,%g3 C cy = 0
+
+ ld [%i1+4],%f11
+ subcc %i2,1,%i2
+dnl be,pn %icc,E(end1)
+ add %i1,4,%i1 C s1_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1-4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-25]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,E(end2)
+ std %f12,[%fp-17]
+
+ fxtod %f10,%f2
+ ld [%i1+4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+dnl be,pn %icc,E(end3)
+ std %f12,[%fp-33]
+
+ fxtod %f10,%f2
+ ld [%i1-4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ ld [%i0+DLO],%g5
+ ldx [%fp-25],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-17],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-25]
+ fdtox %f4,%f12
+ add %i0,4,%i0 C res_ptr++
+ subcc %i2,1,%i2
+ be,pn %icc,E(end4)
+ std %f12,[%fp-17]
+
+ b,a E(loop)
+ nop C nop is cheap to nullify
+
+ ALIGN(16)
+C BEGIN LOOP
+E(loop):
+ fxtod %f10,%f2
+ ld [%i1+4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0+DHI],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-41],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-33],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DLO]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ std %f12,[%fp-33]
+ sub %i2,2,%i2
+ add %i0,4,%i0 C res_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1-4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0+DLO],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-25],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-17],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DHI]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-25]
+ fdtox %f4,%f12
+ std %f12,[%fp-17]
+ brnz,pt %i2,E(loop)
+ add %i0,4,%i0 C res_ptr++
+C END LOOP
+E(loope):
+E(end4):
+ fxtod %f10,%f2
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0+DHI],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-41],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-33],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DLO]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ std %f12,[%fp-33]
+ add %i0,4,%i0 C res_ptr++
+
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ ld [%i0+DLO],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-25],%g2 C p16
+ ldx [%fp-17],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DHI]
+ b,a E(yyy)
+
+E(end2):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ std %f12,[%fp-33]
+ ld [%i0+DLO],%g5
+ ldx [%fp-25],%g2 C p16
+ ldx [%fp-17],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+E(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g5,%g1,%g1 C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ifdef(`LOWPART',
+` ld [%i0+DHI],%g5')
+ srlx %g4,32,%g3
+ ldx [%fp-41],%g2 C p16
+ ldx [%fp-33],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DLO]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ifdef(`LOWPART',
+` add %g5,%g1,%g1') C add *res_ptr to p0 (ADD2)
+ add %g3,%g1,%g4 C p += cy
+ifdef(`LOWPART',
+` st %g4,[%i0-4+DHI]
+ srlx %g4,32,%g4')
+
+ ret
+ restore %g0,%g4,%o0 C sideeffect: put cy in retreg
+ifdef(`LOWPART',
+`EPILOGUE(addmull)',
+`EPILOGUE(addmulu)')
diff --git a/rts/gmp/mpn/sparc64/addmul_1.asm b/rts/gmp/mpn/sparc64/addmul_1.asm
new file mode 100644
index 0000000000..c3f04cea6a
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/addmul_1.asm
@@ -0,0 +1,114 @@
+dnl SPARC 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and
+dnl add the result to a second limb vector.
+
+dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr i0
+C s1_ptr i1
+C size i2
+C s2_limb i3
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+
+PROLOGUE(mpn_addmul_1)
+ save %sp,-256,%sp
+
+C We store 0.0 in f10 and keep it invariant accross thw two
+C function calls below. Note that this is not ABI conformant,
+C but since the functions are local, that's acceptable.
+ifdef(`PIC',
+`L(pc): rd %pc,%o7
+ ld [%o7+L(noll)-L(pc)],%f10',
+` sethi %hh(L(noll)),%g2
+ sethi %lm(L(noll)),%g1
+ or %g2,%hm(L(noll)),%g2
+ or %g1,%lo(L(noll)),%g1
+ sllx %g2,32,%g2
+ ld [%g1+%g2],%f10')
+
+ sub %i1,%i0,%g1
+ srlx %g1,3,%g1
+ cmp %g1,%i2
+ bcc,pt %xcc,L(nooverlap)
+ nop
+
+ sllx %i2,3,%g2 C compute stack allocation byte count
+ add %g2,15,%o0
+ and %o0,-16,%o0
+ sub %sp,%o0,%sp
+ add %sp,2223,%o0
+
+ mov %i1,%o1 C copy s1_ptr to mpn_copyi's srcp
+ call mpn_copyi
+ mov %i2,%o2 C copy n to mpn_copyi's count parameter
+
+ add %sp,2223,%i1
+
+L(nooverlap):
+C First multiply-add with low 32 bits of s2_limb
+ mov %i0,%o0
+ mov %i1,%o1
+ add %i2,%i2,%o2
+ call addmull
+ srl %i3,0,%o3
+
+ mov %o0,%l0 C keep carry-out from accmull
+
+C Now multiply-add with high 32 bits of s2_limb, unless it is zero.
+ srlx %i3,32,%o3
+ brz,a,pn %o3,L(small)
+ mov %o0,%i0
+ mov %i1,%o1
+ add %i2,%i2,%o2
+ call addmulu
+ add %i0,4,%o0
+
+ add %l0,%o0,%i0
+L(small):
+ ret
+ restore %g0,%g0,%g0
+EPILOGUE(mpn_addmul_1)
+
+C Put a zero in the text segment to allow us to t the address
+C quickly when compiling for PIC
+ TEXT
+ ALIGN(4)
+L(noll):
+ .word 0
+
+define(`LO',`(+4)')
+define(`HI',`(-4)')
+
+define(`DLO',`(+4)')
+define(`DHI',`(-4)')
+define(`LOWPART')
+define(`E',`L(l.$1)')
+include_mpn(`sparc64/addmul1h.asm')
+
+define(`DLO',`(-4)')
+define(`DHI',`(+4)')
+undefine(`LOWPART')
+define(`E',`L(u.$1)')
+include_mpn(`sparc64/addmul1h.asm')
diff --git a/rts/gmp/mpn/sparc64/copyi.asm b/rts/gmp/mpn/sparc64/copyi.asm
new file mode 100644
index 0000000000..d9957e3c90
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/copyi.asm
@@ -0,0 +1,79 @@
+! SPARC v9 __gmpn_copy -- Copy a limb vector.
+
+! Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! rptr %o0
+! sptr %o1
+! n %o2
+
+include(`../config.m4')
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+PROLOGUE(mpn_copyi)
+ add %o2,-8,%o2
+ brlz,pn %o2,L(skip)
+ nop
+ b,a L(loop1)
+ nop
+
+ ALIGN(16)
+L(loop1):
+ ldx [%o1+0],%g1
+ ldx [%o1+8],%g2
+ ldx [%o1+16],%g3
+ ldx [%o1+24],%g4
+ ldx [%o1+32],%g5
+ ldx [%o1+40],%o3
+ ldx [%o1+48],%o4
+ ldx [%o1+56],%o5
+ add %o1,64,%o1
+ stx %g1,[%o0+0]
+ stx %g2,[%o0+8]
+ stx %g3,[%o0+16]
+ stx %g4,[%o0+24]
+ stx %g5,[%o0+32]
+ stx %o3,[%o0+40]
+ stx %o4,[%o0+48]
+ stx %o5,[%o0+56]
+ add %o2,-8,%o2
+ brgez,pt %o2,L(loop1)
+ add %o0,64,%o0
+
+L(skip):
+ add %o2,8,%o2
+ brz,pt %o2,L(end)
+ nop
+
+L(loop2):
+ ldx [%o1],%g1
+ add %o1,8,%o1
+ add %o2,-1,%o2
+ stx %g1,[%o0]
+ add %o0,8,%o0
+ brgz,pt %o2,L(loop2)
+ nop
+
+L(end): retl
+ nop
+EPILOGUE(mpn_copyi)
diff --git a/rts/gmp/mpn/sparc64/gmp-mparam.h b/rts/gmp/mpn/sparc64/gmp-mparam.h
new file mode 100644
index 0000000000..74f61661c1
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/gmp-mparam.h
@@ -0,0 +1,88 @@
+/* Sparc64 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+/* Tell the toom3 multiply implementation to call low-level mpn
+ functions instead of open-coding operations in C. */
+#define USE_MORE_MPN 1
+
+
+/* Run on sun workshop cc. */
+/* Generated by tuneup.c, 2000-07-30. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 12
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 95
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 33
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 125
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 27
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 107
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 12
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 4
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 199
+#endif
+
+#ifndef FFT_MUL_TABLE
+#define FFT_MUL_TABLE { 304, 608, 1344, 2304, 7168, 20480, 49152, 0 }
+#endif
+#ifndef FFT_MODF_MUL_THRESHOLD
+#define FFT_MODF_MUL_THRESHOLD 320
+#endif
+#ifndef FFT_MUL_THRESHOLD
+#define FFT_MUL_THRESHOLD 1664
+#endif
+
+#ifndef FFT_SQR_TABLE
+#define FFT_SQR_TABLE { 304, 608, 1344, 2816, 7168, 20480, 49152, 0 }
+#endif
+#ifndef FFT_MODF_SQR_THRESHOLD
+#define FFT_MODF_SQR_THRESHOLD 320
+#endif
+#ifndef FFT_SQR_THRESHOLD
+#define FFT_SQR_THRESHOLD 1664
+#endif
diff --git a/rts/gmp/mpn/sparc64/lshift.asm b/rts/gmp/mpn/sparc64/lshift.asm
new file mode 100644
index 0000000000..2d2edc50a7
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/lshift.asm
@@ -0,0 +1,97 @@
+! SPARC v9 __gmpn_lshift --
+
+! Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! src_ptr %o1
+! size %o2
+! cnt %o3
+
+include(`../config.m4')
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+PROLOGUE(mpn_lshift)
+ sllx %o2,3,%g1
+ add %o1,%g1,%o1 ! make %o1 point at end of src
+ ldx [%o1-8],%g2 ! load first limb
+ sub %g0,%o3,%o5 ! negate shift count
+ add %o0,%g1,%o0 ! make %o0 point at end of res
+ add %o2,-1,%o2
+ and %o2,4-1,%g4 ! number of limbs in first loop
+ srlx %g2,%o5,%g1 ! compute function result
+ brz,pn %g4,L(0) ! if multiple of 4 limbs, skip first loop
+ mov %g1,%g5
+
+ sub %o2,%g4,%o2 ! adjust count for main loop
+
+L(loop0):
+ ldx [%o1-16],%g3
+ add %o0,-8,%o0
+ add %o1,-8,%o1
+ add %g4,-1,%g4
+ sllx %g2,%o3,%o4
+ srlx %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ brnz,pt %g4,L(loop0)
+ stx %o4,[%o0+0]
+
+L(0): brz,pn %o2,L(end)
+ nop
+
+L(loop1):
+ ldx [%o1-16],%g3
+ add %o0,-32,%o0
+ add %o2,-4,%o2
+ sllx %g2,%o3,%o4
+ srlx %g3,%o5,%g1
+
+ ldx [%o1-24],%g2
+ sllx %g3,%o3,%g4
+ or %o4,%g1,%o4
+ stx %o4,[%o0+24]
+ srlx %g2,%o5,%g1
+
+ ldx [%o1-32],%g3
+ sllx %g2,%o3,%o4
+ or %g4,%g1,%g4
+ stx %g4,[%o0+16]
+ srlx %g3,%o5,%g1
+
+ ldx [%o1-40],%g2
+ sllx %g3,%o3,%g4
+ or %o4,%g1,%o4
+ stx %o4,[%o0+8]
+ srlx %g2,%o5,%g1
+
+ add %o1,-32,%o1
+ or %g4,%g1,%g4
+ brnz,pt %o2,L(loop1)
+ stx %g4,[%o0+0]
+
+L(end): sllx %g2,%o3,%g2
+ stx %g2,[%o0-8]
+ retl
+ mov %g5,%o0
+EPILOGUE(mpn_lshift)
diff --git a/rts/gmp/mpn/sparc64/mul_1.asm b/rts/gmp/mpn/sparc64/mul_1.asm
new file mode 100644
index 0000000000..f2f2821d51
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/mul_1.asm
@@ -0,0 +1,113 @@
+dnl SPARC 64-bit mpn_mul_1 -- Multiply a limb vector with a limb and
+dnl store the result to a second limb vector.
+
+dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr i0
+C s1_ptr i1
+C size i2
+C s2_limb i3
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+
+PROLOGUE(mpn_mul_1)
+ save %sp,-256,%sp
+
+C We store 0.0 in f10 and keep it invariant accross thw two
+C function calls below. Note that this is not ABI conformant,
+C but since the functions are local, that's acceptable.
+ifdef(`PIC',
+`L(pc): rd %pc,%o7
+ ld [%o7+L(noll)-L(pc)],%f10',
+` sethi %hh(L(noll)),%g2
+ sethi %lm(L(noll)),%g1
+ or %g2,%hm(L(noll)),%g2
+ or %g1,%lo(L(noll)),%g1
+ sllx %g2,32,%g2
+ ld [%g1+%g2],%f10')
+
+ sub %i1,%i0,%g1
+ srlx %g1,3,%g1
+ cmp %g1,%i2
+ bcc,pt %xcc,L(nooverlap)
+ nop
+
+ sllx %i2,3,%g2 C compute stack allocation byte count
+ add %g2,15,%o0
+ and %o0,-16,%o0
+ sub %sp,%o0,%sp
+ add %sp,2223,%o0
+
+ mov %i1,%o1 C copy s1_ptr to mpn_copyi's srcp
+ call mpn_copyi
+ mov %i2,%o2 C copy n to mpn_copyi's count parameter
+
+ add %sp,2223,%i1
+
+L(nooverlap):
+C First multiply-add with low 32 bits of s2_limb
+ mov %i0,%o0
+ mov %i1,%o1
+ add %i2,%i2,%o2
+ call mull
+ srl %i3,0,%o3
+
+ mov %o0,%l0 C keep carry-out from accmull
+
+C Now multiply-add with high 32 bits of s2_limb, unless it is zero.
+ srlx %i3,32,%o3
+ brz,a,pn %o3,L(small)
+ mov %o0,%i0
+ mov %i1,%o1
+ add %i2,%i2,%o2
+ call addmulu
+ add %i0,4,%o0
+
+ add %l0,%o0,%i0
+L(small):
+ ret
+ restore %g0,%g0,%g0
+EPILOGUE(mpn_mul_1)
+
+C Put a zero in the text segment to allow us to t the address
+C quickly when compiling for PIC
+ TEXT
+ ALIGN(4)
+L(noll):
+ .word 0
+
+define(`LO',`(+4)')
+define(`HI',`(-4)')
+
+define(`DLO',`(+4)')
+define(`DHI',`(-4)')
+define(`E',`L($1)')
+include_mpn(`sparc64/mul_1h.asm')
+
+define(`DLO',`(-4)')
+define(`DHI',`(+4)')
+undefine(`LOWPART')
+define(`E',`L(u.$1)')
+include_mpn(`sparc64/addmul1h.asm')
diff --git a/rts/gmp/mpn/sparc64/mul_1h.asm b/rts/gmp/mpn/sparc64/mul_1h.asm
new file mode 100644
index 0000000000..5078c01c3f
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/mul_1h.asm
@@ -0,0 +1,183 @@
+dnl SPARC 64-bit mull -- Helper for mpn_mul_1.
+
+dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+mull:
+ save %sp,-256,%sp
+
+ sethi %hi(0xffff0000),%o0
+ andn %i3,%o0,%o0
+ st %o0,[%fp-17]
+ ld [%fp-17],%f11
+ fxtod %f10,%f6
+
+ srl %i3,16,%o0
+ st %o0,[%fp-17]
+ ld [%fp-17],%f11
+ fxtod %f10,%f8
+
+ mov 0,%g3 C cy = 0
+
+ ld [%i1+4],%f11
+ subcc %i2,1,%i2
+dnl be,pn %icc,E(end1)
+ add %i1,4,%i1 C s1_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1-4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-25]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,E(end2)
+ std %f12,[%fp-17]
+
+ fxtod %f10,%f2
+ ld [%i1+4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+dnl be,pn %icc,E(end3)
+ std %f12,[%fp-33]
+
+ fxtod %f10,%f2
+ ld [%i1-4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ ldx [%fp-25],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-17],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-25]
+ fdtox %f4,%f12
+ add %i0,4,%i0 C res_ptr++
+ subcc %i2,1,%i2
+ be,pn %icc,E(end4)
+ std %f12,[%fp-17]
+
+ b,a E(loop)
+ nop C nop is cheap to nullify
+
+ ALIGN(16)
+C BEGIN LOOP
+E(loop):
+ fxtod %f10,%f2
+ ld [%i1+4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-41],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-33],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DLO]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ std %f12,[%fp-33]
+ sub %i2,2,%i2
+ add %i0,4,%i0 C res_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1-4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-25],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-17],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DHI]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-25]
+ fdtox %f4,%f12
+ std %f12,[%fp-17]
+ brnz,pt %i2,E(loop)
+ add %i0,4,%i0 C res_ptr++
+C END LOOP
+E(loope):
+E(end4):
+ fxtod %f10,%f2
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-41],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-33],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DLO]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ std %f12,[%fp-33]
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-25],%g2 C p16
+ ldx [%fp-17],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DHI]
+ b,a E(yyy)
+
+E(end2):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ std %f12,[%fp-33]
+ ldx [%fp-25],%g2 C p16
+ ldx [%fp-17],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+E(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ srlx %g4,32,%g3
+ ldx [%fp-41],%g2 C p16
+ ldx [%fp-33],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %g4,[%i0-4+DLO]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ st %g4,[%i0-4+DHI]
+ srlx %g4,32,%g4
+
+ ret
+ restore %g0,%g4,%o0 C sideeffect: put cy in retreg
+EPILOGUE(mull)
diff --git a/rts/gmp/mpn/sparc64/rshift.asm b/rts/gmp/mpn/sparc64/rshift.asm
new file mode 100644
index 0000000000..baf7920efb
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/rshift.asm
@@ -0,0 +1,94 @@
+! SPARC v9 __gmpn_rshift --
+
+! Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! src_ptr %o1
+! size %o2
+! cnt %o3
+
+include(`../config.m4')
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+PROLOGUE(mpn_rshift)
+ ldx [%o1],%g2 ! load first limb
+ sub %g0,%o3,%o5 ! negate shift count
+ add %o2,-1,%o2
+ and %o2,4-1,%g4 ! number of limbs in first loop
+ sllx %g2,%o5,%g1 ! compute function result
+ brz,pn %g4,L(0) ! if multiple of 4 limbs, skip first loop
+ mov %g1,%g5
+
+ sub %o2,%g4,%o2 ! adjust count for main loop
+
+L(loop0):
+ ldx [%o1+8],%g3
+ add %o0,8,%o0
+ add %o1,8,%o1
+ add %g4,-1,%g4
+ srlx %g2,%o3,%o4
+ sllx %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ brnz,pt %g4,L(loop0)
+ stx %o4,[%o0-8]
+
+L(0): brz,pn %o2,L(end)
+ nop
+
+L(loop1):
+ ldx [%o1+8],%g3
+ add %o0,32,%o0
+ add %o2,-4,%o2
+ srlx %g2,%o3,%o4
+ sllx %g3,%o5,%g1
+
+ ldx [%o1+16],%g2
+ srlx %g3,%o3,%g4
+ or %o4,%g1,%o4
+ stx %o4,[%o0-32]
+ sllx %g2,%o5,%g1
+
+ ldx [%o1+24],%g3
+ srlx %g2,%o3,%o4
+ or %g4,%g1,%g4
+ stx %g4,[%o0-24]
+ sllx %g3,%o5,%g1
+
+ ldx [%o1+32],%g2
+ srlx %g3,%o3,%g4
+ or %o4,%g1,%o4
+ stx %o4,[%o0-16]
+ sllx %g2,%o5,%g1
+
+ add %o1,32,%o1
+ or %g4,%g1,%g4
+ brnz %o2,L(loop1)
+ stx %g4,[%o0-8]
+
+L(end): srlx %g2,%o3,%g2
+ stx %g2,[%o0-0]
+ retl
+ mov %g5,%o0
+EPILOGUE(mpn_rshift)
diff --git a/rts/gmp/mpn/sparc64/sub_n.asm b/rts/gmp/mpn/sparc64/sub_n.asm
new file mode 100644
index 0000000000..61547138e0
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/sub_n.asm
@@ -0,0 +1,172 @@
+! SPARC v9 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! store difference in a third limb vector.
+
+! Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! s1_ptr %o1
+! s2_ptr %o2
+! size %o3
+
+include(`../config.m4')
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+PROLOGUE(mpn_sub_n)
+
+! 12 mem ops >= 12 cycles
+! 8 shift insn >= 8 cycles
+! 8 addccc, executing alone, +8 cycles
+! Unrolling not mandatory...perhaps 2-way is best?
+! Put one ldx/stx and one s?lx per issue tuple, fill with pointer arith and loop ctl
+! All in all, it runs at 5 cycles/limb
+
+ save %sp,-160,%sp
+
+ addcc %g0,%g0,%g0
+
+ add %i3,-4,%i3
+ brlz,pn %i3,L(there)
+ nop
+
+ ldx [%i1+0],%l0
+ ldx [%i2+0],%l4
+ ldx [%i1+8],%l1
+ ldx [%i2+8],%l5
+ ldx [%i1+16],%l2
+ ldx [%i2+16],%l6
+ ldx [%i1+24],%l3
+ ldx [%i2+24],%l7
+ add %i1,32,%i1
+ add %i2,32,%i2
+
+ add %i3,-4,%i3
+ brlz,pn %i3,L(skip)
+ nop
+ b L(loop1) ! jump instead of executing many NOPs
+ nop
+ ALIGN(32)
+!--------- Start main loop ---------
+L(loop1):
+ subccc %l0,%l4,%g1
+!-
+ srlx %l0,32,%o0
+ ldx [%i1+0],%l0
+!-
+ srlx %l4,32,%o4
+ ldx [%i2+0],%l4
+!-
+ subccc %o0,%o4,%g0
+!-
+ subccc %l1,%l5,%g2
+!-
+ srlx %l1,32,%o1
+ ldx [%i1+8],%l1
+!-
+ srlx %l5,32,%o5
+ ldx [%i2+8],%l5
+!-
+ subccc %o1,%o5,%g0
+!-
+ subccc %l2,%l6,%g3
+!-
+ srlx %l2,32,%o2
+ ldx [%i1+16],%l2
+!-
+ srlx %l6,32,%g5 ! asymmetry
+ ldx [%i2+16],%l6
+!-
+ subccc %o2,%g5,%g0
+!-
+ subccc %l3,%l7,%g4
+!-
+ srlx %l3,32,%o3
+ ldx [%i1+24],%l3
+ add %i1,32,%i1
+!-
+ srlx %l7,32,%o7
+ ldx [%i2+24],%l7
+ add %i2,32,%i2
+!-
+ subccc %o3,%o7,%g0
+!-
+ stx %g1,[%i0+0]
+!-
+ stx %g2,[%i0+8]
+!-
+ stx %g3,[%i0+16]
+ add %i3,-4,%i3
+!-
+ stx %g4,[%i0+24]
+ add %i0,32,%i0
+
+ brgez,pt %i3,L(loop1)
+ nop
+!--------- End main loop ---------
+L(skip):
+ subccc %l0,%l4,%g1
+ srlx %l0,32,%o0
+ srlx %l4,32,%o4
+ subccc %o0,%o4,%g0
+ subccc %l1,%l5,%g2
+ srlx %l1,32,%o1
+ srlx %l5,32,%o5
+ subccc %o1,%o5,%g0
+ subccc %l2,%l6,%g3
+ srlx %l2,32,%o2
+ srlx %l6,32,%g5 ! asymmetry
+ subccc %o2,%g5,%g0
+ subccc %l3,%l7,%g4
+ srlx %l3,32,%o3
+ srlx %l7,32,%o7
+ subccc %o3,%o7,%g0
+ stx %g1,[%i0+0]
+ stx %g2,[%i0+8]
+ stx %g3,[%i0+16]
+ stx %g4,[%i0+24]
+ add %i0,32,%i0
+
+L(there):
+ add %i3,4,%i3
+ brz,pt %i3,L(end)
+ nop
+
+L(loop2):
+ ldx [%i1+0],%l0
+ add %i1,8,%i1
+ ldx [%i2+0],%l4
+ add %i2,8,%i2
+ srlx %l0,32,%g2
+ srlx %l4,32,%g3
+ subccc %l0,%l4,%g1
+ subccc %g2,%g3,%g0
+ stx %g1,[%i0+0]
+ add %i0,8,%i0
+ add %i3,-1,%i3
+ brgz,pt %i3,L(loop2)
+ nop
+
+L(end): addc %g0,%g0,%i0
+ ret
+ restore
+EPILOGUE(mpn_sub_n)
diff --git a/rts/gmp/mpn/sparc64/submul1h.asm b/rts/gmp/mpn/sparc64/submul1h.asm
new file mode 100644
index 0000000000..7f51ba59c6
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/submul1h.asm
@@ -0,0 +1,204 @@
+dnl SPARC 64-bit submull/submulu -- Helper for mpn_submul_1 and mpn_mul_1.
+
+dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+ifdef(`LOWPART',
+`submull:',
+`submulu:')
+ save %sp,-256,%sp
+
+ sethi %hi(0xffff0000),%o0
+ andn %i3,%o0,%o0
+ st %o0,[%fp-17]
+ ld [%fp-17],%f11
+ fxtod %f10,%f6
+
+ srl %i3,16,%o0
+ st %o0,[%fp-17]
+ ld [%fp-17],%f11
+ fxtod %f10,%f8
+
+ mov 0,%g3 C cy = 0
+
+ ld [%i1+4],%f11
+ subcc %i2,1,%i2
+dnl be,pn %icc,E(end1)
+ add %i1,4,%i1 C s1_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1-4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-25]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+ be,pn %icc,E(end2)
+ std %f12,[%fp-17]
+
+ fxtod %f10,%f2
+ ld [%i1+4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ subcc %i2,1,%i2
+dnl be,pn %icc,E(end3)
+ std %f12,[%fp-33]
+
+ fxtod %f10,%f2
+ ld [%i1-4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ ld [%i0+DLO],%g5
+ ldx [%fp-25],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-17],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-25]
+ fdtox %f4,%f12
+ add %i0,4,%i0 C res_ptr++
+ subcc %i2,1,%i2
+ be,pn %icc,E(end4)
+ std %f12,[%fp-17]
+
+ b,a E(loop)
+ nop C nop is cheap to nullify
+
+ ALIGN(16)
+C BEGIN LOOP
+E(loop):
+ fxtod %f10,%f2
+ ld [%i1+4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0+DHI],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-41],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-33],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4+DLO]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ std %f12,[%fp-33]
+ sub %i2,2,%i2
+ add %i0,4,%i0 C res_ptr++
+
+ fxtod %f10,%f2
+ ld [%i1-4],%f11
+ add %i1,4,%i1 C s1_ptr++
+ add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0+DLO],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-25],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-17],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4+DHI]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-25]
+ fdtox %f4,%f12
+ std %f12,[%fp-17]
+ brnz,pt %i2,E(loop)
+ add %i0,4,%i0 C res_ptr++
+C END LOOP
+E(loope):
+E(end4):
+ fxtod %f10,%f2
+ add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0+DHI],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-41],%g2 C p16
+ fmuld %f2,%f8,%f16
+ ldx [%fp-33],%g1 C p0
+ fmuld %f2,%f6,%f4
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4+DLO]
+ fdtox %f16,%f14
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ std %f12,[%fp-33]
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ ld [%i0+DLO],%g5
+ srlx %g4,32,%g3
+ ldx [%fp-25],%g2 C p16
+ ldx [%fp-17],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4+DHI]
+ b,a E(yyy)
+
+E(end2):
+ fxtod %f10,%f2
+ fmuld %f2,%f8,%f16
+ fmuld %f2,%f6,%f4
+ fdtox %f16,%f14
+ std %f14,[%fp-41]
+ fdtox %f4,%f12
+ std %f12,[%fp-33]
+ ld [%i0+DLO],%g5
+ ldx [%fp-25],%g2 C p16
+ ldx [%fp-17],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+E(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
+ifdef(`LOWPART',
+` ld [%i0+DHI],%g5')
+ srlx %g4,32,%g3
+ ldx [%fp-41],%g2 C p16
+ ldx [%fp-33],%g1 C p0
+ sllx %g2,16,%g2 C align p16
+ st %l2,[%i0-4+DLO]
+ add %g2,%g1,%g1 C add p16 to p0 (ADD1)
+ add %i0,4,%i0 C res_ptr++
+
+ add %g3,%g1,%g4 C p += cy
+ifdef(`LOWPART',
+` subxcc %g5,%g4,%l2') C add *res_ptr to p0 (ADD2)
+ifdef(`LOWPART',
+` st %l2,[%i0-4+DHI]
+ srlx %g4,32,%g4')
+
+ addx %g4,0,%g4
+ ret
+ restore %g0,%g4,%o0 C sideeffect: put cy in retreg
+ifdef(`LOWPART',
+`EPILOGUE(submull)',
+`EPILOGUE(submulu)')
diff --git a/rts/gmp/mpn/sparc64/submul_1.asm b/rts/gmp/mpn/sparc64/submul_1.asm
new file mode 100644
index 0000000000..7c6af0a98b
--- /dev/null
+++ b/rts/gmp/mpn/sparc64/submul_1.asm
@@ -0,0 +1,114 @@
+dnl SPARC 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl subtract the result from a second limb vector.
+
+dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C res_ptr i0
+C s1_ptr i1
+C size i2
+C s2_limb i3
+
+ASM_START()
+ .register %g2,#scratch
+ .register %g3,#scratch
+
+PROLOGUE(mpn_submul_1)
+ save %sp,-256,%sp
+
+C We store 0.0 in f10 and keep it invariant accross thw two
+C function calls below. Note that this is not ABI conformant,
+C but since the functions are local, that's acceptable.
+ifdef(`PIC',
+`L(pc): rd %pc,%o7
+ ld [%o7+L(noll)-L(pc)],%f10',
+` sethi %hh(L(noll)),%g2
+ sethi %lm(L(noll)),%g1
+ or %g2,%hm(L(noll)),%g2
+ or %g1,%lo(L(noll)),%g1
+ sllx %g2,32,%g2
+ ld [%g1+%g2],%f10')
+
+ sub %i1,%i0,%g1
+ srlx %g1,3,%g1
+ cmp %g1,%i2
+ bcc,pt %xcc,L(nooverlap)
+ nop
+
+ sllx %i2,3,%g2 C compute stack allocation byte count
+ add %g2,15,%o0
+ and %o0,-16,%o0
+ sub %sp,%o0,%sp
+ add %sp,2223,%o0
+
+ mov %i1,%o1 C copy s1_ptr to mpn_copyi's srcp
+ call mpn_copyi
+ mov %i2,%o2 C copy n to mpn_copyi's count parameter
+
+ add %sp,2223,%i1
+
+L(nooverlap):
+C First multiply-add with low 32 bits of s2_limb
+ mov %i0,%o0
+ mov %i1,%o1
+ add %i2,%i2,%o2
+ call submull
+ srl %i3,0,%o3
+
+ mov %o0,%l0 C keep carry-out from accmull
+
+C Now multiply-add with high 32 bits of s2_limb, unless it is zero.
+ srlx %i3,32,%o3
+ brz,a,pn %o3,L(small)
+ mov %o0,%i0
+ mov %i1,%o1
+ add %i2,%i2,%o2
+ call submulu
+ add %i0,4,%o0
+
+ add %l0,%o0,%i0
+L(small):
+ ret
+ restore %g0,%g0,%g0
+EPILOGUE(mpn_submul_1)
+
+C Put a zero in the text segment to allow us to t the address
+C quickly when compiling for PIC
+ TEXT
+ ALIGN(4)
+L(noll):
+ .word 0
+
+define(`LO',`(+4)')
+define(`HI',`(-4)')
+
+define(`DLO',`(+4)')
+define(`DHI',`(-4)')
+define(`LOWPART')
+define(`E',`L(l.$1)')
+include_mpn(`sparc64/submul1h.asm')
+
+define(`DLO',`(-4)')
+define(`DHI',`(+4)')
+undefine(`LOWPART')
+define(`E',`L(u.$1)')
+include_mpn(`sparc64/submul1h.asm')
diff --git a/rts/gmp/mpn/thumb/add_n.s b/rts/gmp/mpn/thumb/add_n.s
new file mode 100644
index 0000000000..c1eeb6ca87
--- /dev/null
+++ b/rts/gmp/mpn/thumb/add_n.s
@@ -0,0 +1,50 @@
+@ ARM/Thumb __gmpn_add -- Add two limb vectors of the same length > 0 and store
+@ sum in a third limb vector.
+
+@ Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+@ This file is part of the GNU MP Library.
+
+@ The GNU MP Library is free software; you can redistribute it and/or modify
+@ it under the terms of the GNU Lesser General Public License as published by
+@ the Free Software Foundation; either version 2.1 of the License, or (at your
+@ option) any later version.
+
+@ The GNU MP Library is distributed in the hope that it will be useful, but
+@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+@ License for more details.
+
+@ You should have received a copy of the GNU Lesser General Public License
+@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+@ MA 02111-1307, USA.
+
+
+@ INPUT PARAMETERS
+@ RES_ptr r0
+@ S1_ptr r1
+@ S2_ptr r2
+@ SIZE r3
+
+@ NOT TESTED CODE
+
+ .text
+ .thumb
+ .align 0
+ .global ___gmpn_add_n
+___gmpn_add_n:
+ push {r4, r5, r6, lr}
+ mov r6, #1 @ init carry save register
+
+Loop: sub r6, #1 @ restore carry (set iff r6 was 0)
+ ldmia r1!, {r4} @ load next limb from S1
+ ldmia r2!, {r5} @ load next limb from S2
+ adc r4, r5
+ stmia r0!, {r4} @ store result limb to RES
+ sbc r6, r6 @ save negated carry
+ sub r3, #1
+ bge Loop @ loop back while remaining count >= 4
+
+ mov r0, r6
+ pop {r4, r5, r6, pc}
diff --git a/rts/gmp/mpn/thumb/sub_n.s b/rts/gmp/mpn/thumb/sub_n.s
new file mode 100644
index 0000000000..53c292375f
--- /dev/null
+++ b/rts/gmp/mpn/thumb/sub_n.s
@@ -0,0 +1,50 @@
+@ ARM/Thumb __gmpn_sub -- Subtract two limb vectors of the same length > 0 and
+@ store difference in a third limb vector.
+
+@ Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+@ This file is part of the GNU MP Library.
+
+@ The GNU MP Library is free software; you can redistribute it and/or modify
+@ it under the terms of the GNU Lesser General Public License as published by
+@ the Free Software Foundation; either version 2.1 of the License, or (at your
+@ option) any later version.
+
+@ The GNU MP Library is distributed in the hope that it will be useful, but
+@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+@ License for more details.
+
+@ You should have received a copy of the GNU Lesser General Public License
+@ along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+@ MA 02111-1307, USA.
+
+
+@ INPUT PARAMETERS
+@ RES_ptr r0
+@ S1_ptr r1
+@ S2_ptr r2
+@ SIZE r3
+
+@ NOT TESTED CODE
+
+ .text
+ .thumb
+ .align 0
+ .global ___gmpn_sub_n
+___gmpn_sub_n:
+ push {r4, r5, r6, lr}
+ mov r6, #1 @ init carry save register
+
+Loop: sub r6, #1 @ restore carry (set iff r6 was 0)
+ ldmia r1!, {r4} @ load next limb from S1
+ ldmia r2!, {r5} @ load next limb from S2
+ sbc r4, r5
+ stmia r0!, {r4} @ store result limb to RES
+ sbc r6, r6 @ save negated carry
+ sub r3, #1
+ bge Loop @ loop back while remaining count >= 4
+
+ mov r0, r6
+ pop {r4, r5, r6, pc}
diff --git a/rts/gmp/mpn/underscore.h b/rts/gmp/mpn/underscore.h
new file mode 100644
index 0000000000..240dae0f63
--- /dev/null
+++ b/rts/gmp/mpn/underscore.h
@@ -0,0 +1,26 @@
+/*
+Copyright (C) 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#if __STDC__
+#define C_SYMBOL_NAME(name) _##name
+#else
+#define C_SYMBOL_NAME(name) _/**/name
+#endif
diff --git a/rts/gmp/mpn/vax/add_n.s b/rts/gmp/mpn/vax/add_n.s
new file mode 100644
index 0000000000..cf4060f521
--- /dev/null
+++ b/rts/gmp/mpn/vax/add_n.s
@@ -0,0 +1,61 @@
+# VAX __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+# sum in a third limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr (sp + 4)
+# s1_ptr (sp + 8)
+# s2_ptr (sp + 12)
+# size (sp + 16)
+
+.text
+ .align 1
+.globl ___gmpn_add_n
+___gmpn_add_n:
+ .word 0x0
+ movl 16(ap),r0
+ movl 12(ap),r1
+ movl 8(ap),r2
+ movl 4(ap),r3
+ mnegl r0,r5
+ addl2 $3,r0
+ ashl $-2,r0,r0 # unroll loop count
+ bicl2 $-4,r5 # mask out low 2 bits
+ movaq (r5)[r5],r5 # 9x
+ jmp Loop(r5)
+
+Loop: movl (r2)+,r4
+ adwc (r1)+,r4
+ movl r4,(r3)+
+ movl (r2)+,r4
+ adwc (r1)+,r4
+ movl r4,(r3)+
+ movl (r2)+,r4
+ adwc (r1)+,r4
+ movl r4,(r3)+
+ movl (r2)+,r4
+ adwc (r1)+,r4
+ movl r4,(r3)+
+ sobgtr r0,Loop
+
+ adwc r0,r0
+ ret
diff --git a/rts/gmp/mpn/vax/addmul_1.s b/rts/gmp/mpn/vax/addmul_1.s
new file mode 100644
index 0000000000..379061dcb7
--- /dev/null
+++ b/rts/gmp/mpn/vax/addmul_1.s
@@ -0,0 +1,126 @@
+# VAX __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr (sp + 4)
+# s1_ptr (sp + 8)
+# size (sp + 12)
+# s2_limb (sp + 16)
+
+.text
+ .align 1
+.globl ___gmpn_addmul_1
+___gmpn_addmul_1:
+ .word 0xfc0
+ movl 12(ap),r4
+ movl 8(ap),r8
+ movl 4(ap),r9
+ movl 16(ap),r6
+ jlss s2_big
+
+ clrl r3
+ incl r4
+ ashl $-1,r4,r7
+ jlbc r4,L1
+ clrl r11
+
+# Loop for S2_LIMB < 0x80000000
+Loop1: movl (r8)+,r1
+ jlss L1n0
+ emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc $0,r3
+ addl2 r2,(r9)+
+ adwc $0,r3
+L1: movl (r8)+,r1
+ jlss L1n1
+L1p1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc $0,r11
+ addl2 r10,(r9)+
+ adwc $0,r11
+
+ sobgtr r7,Loop1
+ movl r11,r0
+ ret
+
+L1n0: emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc r6,r3
+ addl2 r2,(r9)+
+ adwc $0,r3
+ movl (r8)+,r1
+ jgeq L1p1
+L1n1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc r6,r11
+ addl2 r10,(r9)+
+ adwc $0,r11
+
+ sobgtr r7,Loop1
+ movl r11,r0
+ ret
+
+
+s2_big: clrl r3
+ incl r4
+ ashl $-1,r4,r7
+ jlbc r4,L2
+ clrl r11
+
+# Loop for S2_LIMB >= 0x80000000
+Loop2: movl (r8)+,r1
+ jlss L2n0
+ emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc r1,r3
+ addl2 r2,(r9)+
+ adwc $0,r3
+L2: movl (r8)+,r1
+ jlss L2n1
+L2p1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc r1,r11
+ addl2 r10,(r9)+
+ adwc $0,r11
+
+ sobgtr r7,Loop2
+ movl r11,r0
+ ret
+
+L2n0: emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc r6,r3
+ addl2 r2,(r9)+
+ adwc r1,r3
+ movl (r8)+,r1
+ jgeq L2p1
+L2n1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc r6,r11
+ addl2 r10,(r9)+
+ adwc r1,r11
+
+ sobgtr r7,Loop2
+ movl r11,r0
+ ret
diff --git a/rts/gmp/mpn/vax/lshift.s b/rts/gmp/mpn/vax/lshift.s
new file mode 100644
index 0000000000..fd311a9782
--- /dev/null
+++ b/rts/gmp/mpn/vax/lshift.s
@@ -0,0 +1,58 @@
+# VAX __gmpn_lshift -- left shift.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# rptr (sp + 4)
+# sptr (sp + 8)
+# size (sp + 12)
+# cnt (sp + 16)
+# r0=retval r1=size r2,r3=itmp r4,r5=otmp call-used registers
+# r6=sptr r7=rptr r8=cnt r9 r10 r11 call-saved registers
+
+.text
+ .align 1
+.globl ___gmpn_lshift
+___gmpn_lshift:
+ .word 0x1c0
+ movl 4(ap),r7
+ movl 8(ap),r6
+ movl 12(ap),r1
+ movl 16(ap),r8
+
+ moval (r6)[r1],r6
+ moval (r7)[r1],r7
+ clrl r3
+ movl -(r6),r2
+ ashq r8,r2,r4
+ movl r5,r0
+ movl r2,r3
+ decl r1
+ jeql Lend
+
+Loop: movl -(r6),r2
+ ashq r8,r2,r4
+ movl r5,-(r7)
+ movl r2,r3
+ jsobgtr r1,Loop
+
+Lend: movl r4,-4(r7)
+ ret
diff --git a/rts/gmp/mpn/vax/mul_1.s b/rts/gmp/mpn/vax/mul_1.s
new file mode 100644
index 0000000000..708e8ca6ca
--- /dev/null
+++ b/rts/gmp/mpn/vax/mul_1.s
@@ -0,0 +1,123 @@
+# VAX __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr (sp + 4)
+# s1_ptr (sp + 8)
+# size (sp + 12)
+# s2_limb (sp + 16)
+
+.text
+ .align 1
+.globl ___gmpn_mul_1
+___gmpn_mul_1:
+ .word 0xfc0
+ movl 12(ap),r4
+ movl 8(ap),r8
+ movl 4(ap),r9
+ movl 16(ap),r6
+ jlss s2_big
+
+# One might want to combine the addl2 and the store below, but that
+# is actually just slower according to my timing tests. (VAX 3600)
+
+ clrl r3
+ incl r4
+ ashl $-1,r4,r7
+ jlbc r4,L1
+ clrl r11
+
+# Loop for S2_LIMB < 0x80000000
+Loop1: movl (r8)+,r1
+ jlss L1n0
+ emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc $0,r3
+ movl r2,(r9)+
+L1: movl (r8)+,r1
+ jlss L1n1
+L1p1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc $0,r11
+ movl r10,(r9)+
+
+ sobgtr r7,Loop1
+ movl r11,r0
+ ret
+
+L1n0: emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc r6,r3
+ movl r2,(r9)+
+ movl (r8)+,r1
+ jgeq L1p1
+L1n1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc r6,r11
+ movl r10,(r9)+
+
+ sobgtr r7,Loop1
+ movl r11,r0
+ ret
+
+
+s2_big: clrl r3
+ incl r4
+ ashl $-1,r4,r7
+ jlbc r4,L2
+ clrl r11
+
+# Loop for S2_LIMB >= 0x80000000
+Loop2: movl (r8)+,r1
+ jlss L2n0
+ emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc r1,r3
+ movl r2,(r9)+
+L2: movl (r8)+,r1
+ jlss L2n1
+L2p1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc r1,r11
+ movl r10,(r9)+
+
+ sobgtr r7,Loop2
+ movl r11,r0
+ ret
+
+L2n0: emul r1,r6,$0,r2
+ addl2 r1,r3
+ addl2 r11,r2
+ adwc r6,r3
+ movl r2,(r9)+
+ movl (r8)+,r1
+ jgeq L2p1
+L2n1: emul r1,r6,$0,r10
+ addl2 r1,r11
+ addl2 r3,r10
+ adwc r6,r11
+ movl r10,(r9)+
+
+ sobgtr r7,Loop2
+ movl r11,r0
+ ret
diff --git a/rts/gmp/mpn/vax/rshift.s b/rts/gmp/mpn/vax/rshift.s
new file mode 100644
index 0000000000..515813208d
--- /dev/null
+++ b/rts/gmp/mpn/vax/rshift.s
@@ -0,0 +1,56 @@
+# VAX __gmpn_rshift -- right shift.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# rptr (sp + 4)
+# sptr (sp + 8)
+# size (sp + 12)
+# cnt (sp + 16)
+# r0=retval r1=size r2,r3=itmp r4,r5=otmp call-used registers
+# r6=sptr r7=rptr r8=cnt r9 r10 r11 call-saved registers
+
+.text
+ .align 1
+.globl ___gmpn_rshift
+___gmpn_rshift:
+ .word 0x1c0
+ movl 4(ap),r7
+ movl 8(ap),r6
+ movl 12(ap),r1
+ movl 16(ap),r8
+
+ movl (r6)+,r2
+ subl3 r8,$32,r8
+ ashl r8,r2,r0
+ decl r1
+ jeql Lend
+
+Loop: movl (r6)+,r3
+ ashq r8,r2,r4
+ movl r5,(r7)+
+ movl r3,r2
+ jsobgtr r1,Loop
+
+Lend: clrl r3
+ ashq r8,r2,r4
+ movl r5,(r7)
+ ret
diff --git a/rts/gmp/mpn/vax/sub_n.s b/rts/gmp/mpn/vax/sub_n.s
new file mode 100644
index 0000000000..eff4b1c044
--- /dev/null
+++ b/rts/gmp/mpn/vax/sub_n.s
@@ -0,0 +1,61 @@
+# VAX __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and store
+# difference in a third limb vector.
+
+# Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr (sp + 4)
+# s1_ptr (sp + 8)
+# s2_ptr (sp + 12)
+# size (sp + 16)
+
+.text
+ .align 1
+.globl ___gmpn_sub_n
+___gmpn_sub_n:
+ .word 0x0
+ movl 16(ap),r0
+ movl 12(ap),r1
+ movl 8(ap),r2
+ movl 4(ap),r3
+ mnegl r0,r5
+ addl2 $3,r0
+ ashl $-2,r0,r0 # unroll loop count
+ bicl2 $-4,r5 # mask out low 2 bits
+ movaq (r5)[r5],r5 # 9x
+ jmp Loop(r5)
+
+Loop: movl (r2)+,r4
+ sbwc (r1)+,r4
+ movl r4,(r3)+
+ movl (r2)+,r4
+ sbwc (r1)+,r4
+ movl r4,(r3)+
+ movl (r2)+,r4
+ sbwc (r1)+,r4
+ movl r4,(r3)+
+ movl (r2)+,r4
+ sbwc (r1)+,r4
+ movl r4,(r3)+
+ sobgtr r0,Loop
+
+ adwc r0,r0
+ ret
diff --git a/rts/gmp/mpn/vax/submul_1.s b/rts/gmp/mpn/vax/submul_1.s
new file mode 100644
index 0000000000..be42286935
--- /dev/null
+++ b/rts/gmp/mpn/vax/submul_1.s
@@ -0,0 +1,126 @@
+# VAX __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
+# the result from a second limb vector.
+
+# Copyright (C) 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr (sp + 4)
+# s1_ptr (sp + 8)
+# size (sp + 12)
+# s2_limb (sp + 16)
+
+.text
+ .align 1
+.globl ___gmpn_submul_1
+___gmpn_submul_1:
+ .word 0xfc0
+ movl 12(ap),r4
+ movl 8(ap),r8
+ movl 4(ap),r9
+ movl 16(ap),r6
+ jlss s2_big
+
+ clrl r3
+ incl r4
+ ashl $-1,r4,r7
+ jlbc r4,L1
+ clrl r11
+
+# Loop for S2_LIMB < 0x80000000
+Loop1: movl (r8)+,r1
+ jlss L1n0
+ emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc $0,r3
+ subl2 r2,(r9)+
+ adwc $0,r3
+L1: movl (r8)+,r1
+ jlss L1n1
+L1p1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc $0,r11
+ subl2 r10,(r9)+
+ adwc $0,r11
+
+ sobgtr r7,Loop1
+ movl r11,r0
+ ret
+
+L1n0: emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc r6,r3
+ subl2 r2,(r9)+
+ adwc $0,r3
+ movl (r8)+,r1
+ jgeq L1p1
+L1n1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc r6,r11
+ subl2 r10,(r9)+
+ adwc $0,r11
+
+ sobgtr r7,Loop1
+ movl r11,r0
+ ret
+
+
+s2_big: clrl r3
+ incl r4
+ ashl $-1,r4,r7
+ jlbc r4,L2
+ clrl r11
+
+# Loop for S2_LIMB >= 0x80000000
+Loop2: movl (r8)+,r1
+ jlss L2n0
+ emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc r1,r3
+ subl2 r2,(r9)+
+ adwc $0,r3
+L2: movl (r8)+,r1
+ jlss L2n1
+L2p1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc r1,r11
+ subl2 r10,(r9)+
+ adwc $0,r11
+
+ sobgtr r7,Loop2
+ movl r11,r0
+ ret
+
+L2n0: emul r1,r6,$0,r2
+ addl2 r11,r2
+ adwc r6,r3
+ subl2 r2,(r9)+
+ adwc r1,r3
+ movl (r8)+,r1
+ jgeq L2p1
+L2n1: emul r1,r6,$0,r10
+ addl2 r3,r10
+ adwc r6,r11
+ subl2 r10,(r9)+
+ adwc r1,r11
+
+ sobgtr r7,Loop2
+ movl r11,r0
+ ret
diff --git a/rts/gmp/mpn/x86/README b/rts/gmp/mpn/x86/README
new file mode 100644
index 0000000000..3507548b8c
--- /dev/null
+++ b/rts/gmp/mpn/x86/README
@@ -0,0 +1,40 @@
+
+ X86 MPN SUBROUTINES
+
+
+This directory contains mpn functions for various 80x86 chips.
+
+
+CODE ORGANIZATION
+
+ x86 i386, i486, generic
+ x86/pentium Intel Pentium (P5, P54)
+ x86/pentium/mmx Intel Pentium with MMX (P55)
+ x86/p6 Intel Pentium Pro
+ x86/p6/mmx Intel Pentium II, III
+ x86/p6/p3mmx Intel Pentium III
+ x86/k6 AMD K6, K6-2, K6-3
+ x86/k6/mmx
+ x86/k6/k62mmx AMD K6-2
+ x86/k7 AMD Athlon
+ x86/k7/mmx
+
+
+The x86 directory is also the main support for P6 at the moment, and
+is something of a blended style, meant to be reasonable on all x86s.
+
+
+
+STATUS
+
+The code is well-optimized for AMD and Intel chips, but not so well
+optimized for Cyrix chips.
+
+
+
+RELEVANT OPTIMIZATION ISSUES
+
+For implementations with slow double shift instructions (SHLD and
+SHRD), it might be better to mimic their operation with SHL+SHR+OR.
+(M2 is likely to benefit from that, but not Pentium due to its slow
+plain SHL and SHR.)
diff --git a/rts/gmp/mpn/x86/README.family b/rts/gmp/mpn/x86/README.family
new file mode 100644
index 0000000000..3bc73f58b0
--- /dev/null
+++ b/rts/gmp/mpn/x86/README.family
@@ -0,0 +1,333 @@
+
+ X86 CPU FAMILY MPN SUBROUTINES
+
+
+This file has some notes on things common to all the x86 family code.
+
+
+
+ASM FILES
+
+The x86 .asm files are BSD style x86 assembler code, first put through m4
+for macro processing. The generic mpn/asm-defs.m4 is used, together with
+mpn/x86/x86-defs.m4. Detailed notes are in those files.
+
+The code is meant for use with GNU "gas" or a system "as". There's no
+support for assemblers that demand Intel style, and with gas freely
+available and easy to use that shouldn't be a problem.
+
+
+
+STACK FRAME
+
+m4 macros are used to define the parameters passed on the stack, and these
+act like comments on what the stack frame looks like too. For example,
+mpn_mul_1() has the following.
+
+ defframe(PARAM_MULTIPLIER, 16)
+ defframe(PARAM_SIZE, 12)
+ defframe(PARAM_SRC, 8)
+ defframe(PARAM_DST, 4)
+
+Here PARAM_MULTIPLIER gets defined as `FRAME+16(%esp)', and the others
+similarly. The return address is at offset 0, but there's not normally any
+need to access that.
+
+FRAME is redefined as necessary through the code so it's the number of bytes
+pushed on the stack, and hence the offsets in the parameter macros stay
+correct. At the start of a routine FRAME should be zero.
+
+ deflit(`FRAME',0)
+ ...
+ deflit(`FRAME',4)
+ ...
+ deflit(`FRAME',8)
+ ...
+
+Helper macros FRAME_pushl(), FRAME_popl(), FRAME_addl_esp() and
+FRAME_subl_esp() exist to adjust FRAME for the effect of those instructions,
+and can be used instead of explicit definitions if preferred.
+defframe_pushl() is a combination FRAME_pushl() and defframe().
+
+There's generally some slackness in redefining FRAME. If new values aren't
+going to get used, then the redefinitions are omitted to keep from
+cluttering up the code. This happens for instance at the end of a routine,
+where there might be just four register pops and then a ret, so FRAME isn't
+getting used.
+
+Local variables and saved registers can be similarly defined, with negative
+offsets representing stack space below the initial stack pointer. For
+example,
+
+ defframe(SAVE_ESI, -4)
+ defframe(SAVE_EDI, -8)
+ defframe(VAR_COUNTER,-12)
+
+ deflit(STACK_SPACE, 12)
+
+Here STACK_SPACE gets used in a "subl $STACK_SPACE, %esp" to allocate the
+space, and that instruction must be followed by a redefinition of FRAME
+(setting it equal to STACK_SPACE) to reflect the change in %esp.
+
+Definitions for pushed registers are only put in when they're going to be
+used. If registers are just saved and restored with pushes and pops then
+definitions aren't made.
+
+
+
+ASSEMBLER EXPRESSIONS
+
+Only addition and subtraction seem to be universally available, certainly
+that's all the Solaris 8 "as" seems to accept. If expressions are wanted
+then m4 eval() should be used.
+
+In particular note that a "/" anywhere in a line starts a comment in Solaris
+"as", and in some configurations of gas too.
+
+ addl $32/2, %eax <-- wrong
+
+ addl $eval(32/2), %eax <-- right
+
+Binutils gas/config/tc-i386.c has a choice between "/" being a comment
+anywhere in a line, or only at the start. FreeBSD patches 2.9.1 to select
+the latter, and as of 2.9.5 it's the default for GNU/Linux too.
+
+
+
+ASSEMBLER COMMENTS
+
+Solaris "as" doesn't support "#" commenting, using /* */ instead,
+unfortunately. For that reason "C" commenting is used (see asm-defs.m4) and
+the intermediate ".s" files have no comments.
+
+
+
+ZERO DISPLACEMENTS
+
+In a couple of places addressing modes like 0(%ebx) with a byte-sized zero
+displacement are wanted, rather than (%ebx) with no displacement. These are
+either for computed jumps or to get desirable code alignment. Explicit
+.byte sequences are used to ensure the assembler doesn't turn 0(%ebx) into
+(%ebx). The Zdisp() macro in x86-defs.m4 is used for this.
+
+Current gas 2.9.5 or recent 2.9.1 leave 0(%ebx) as written, but old gas
+1.92.3 changes it. In general changing would be the sort of "optimization"
+an assembler might perform, hence explicit ".byte"s are used where
+necessary.
+
+
+
+SHLD/SHRD INSTRUCTIONS
+
+The %cl count forms of double shift instructions like "shldl %cl,%eax,%ebx"
+must be written "shldl %eax,%ebx" for some assemblers. gas takes either,
+Solaris "as" doesn't allow %cl, gcc generates %cl for gas and NeXT (which is
+gas), and omits %cl elsewhere.
+
+For GMP an autoconf test is used to determine whether %cl should be used and
+the macros shldl, shrdl, shldw and shrdw in mpn/x86/x86-defs.m4 then pass
+through or omit %cl as necessary. See comments with those macros for usage.
+
+
+
+DIRECTION FLAG
+
+The x86 calling conventions say that the direction flag should be clear at
+function entry and exit. (See iBCS2 and SVR4 ABI books, references below.)
+
+Although this has been so since the year dot, it's not absolutely clear
+whether it's universally respected. Since it's better to be safe than
+sorry, gmp follows glibc and does a "cld" if it depends on the direction
+flag being clear. This happens only in a few places.
+
+
+
+POSITION INDEPENDENT CODE
+
+Defining the symbol PIC in m4 processing selects position independent code.
+This mainly affects computed jumps, and these are implemented in a
+self-contained fashion (without using the global offset table). The few
+calls from assembly code to global functions use the normal procedure
+linkage table.
+
+PIC is necessary for ELF shared libraries because they can be mapped into
+different processes at different virtual addresses. Text relocations in
+shared libraries are allowed, but that presumably means a page with such a
+relocation isn't shared. The use of the PLT for PIC adds a fixed cost to
+every function call, which is small but might be noticeable when working with
+small operands.
+
+Calls from one library function to another don't need to go through the PLT,
+since of course the call instruction uses a displacement, not an absolute
+address, and the relative locations of object files are known when libgmp.so
+is created. "ld -Bsymbolic" (or "gcc -Wl,-Bsymbolic") will resolve calls
+this way, so that there's no jump through the PLT, but of course leaving
+setups of the GOT address in %ebx that may be unnecessary.
+
+The %ebx setup could be avoided in assembly if a separate option controlled
+PIC for calls as opposed to computed jumps etc. But there's only ever
+likely to be a handful of calls out of assembler, and getting the same
+optimization for C intra-library calls would be more important. There seems
+no easy way to tell gcc that certain functions can be called non-PIC, and
+unfortunately many gmp functions use the global memory allocation variables,
+so they need the GOT anyway. Object files with no global data references
+and only intra-library calls could go into the library as non-PIC under
+-Bsymbolic. Integrating this into libtool and automake is left as an
+exercise for the reader.
+
+
+
+SIMPLE LOOPS
+
+The overheads in setting up for an unrolled loop can mean that at small
+sizes a simple loop is faster. Making small sizes go fast is important,
+even if it adds a cycle or two to bigger sizes. To this end various
+routines choose between a simple loop and an unrolled loop according to
+operand size. The path to the simple loop, or to special case code for
+small sizes, is always as fast as possible.
+
+Adding a simple loop requires a conditional jump to choose between the
+simple and unrolled code. The size of a branch misprediction penalty
+affects whether a simple loop is worthwhile.
+
+The convention is for an m4 definition UNROLL_THRESHOLD to set the crossover
+point, with sizes < UNROLL_THRESHOLD using the simple loop, sizes >=
+UNROLL_THRESHOLD using the unrolled loop. If position independent code adds
+a couple of cycles to an unrolled loop setup, the threshold will vary with
+PIC or non-PIC. Something like the following is typical.
+
+ ifdef(`PIC',`
+ deflit(UNROLL_THRESHOLD, 10)
+ ',`
+ deflit(UNROLL_THRESHOLD, 8)
+ ')
+
+There's no automated way to determine the threshold. Setting it to a small
+value and then to a big value makes it possible to measure the simple and
+unrolled loops each over a range of sizes, from which the crossover point
+can be determined. Alternately, just adjust the threshold up or down until
+there's no more speedups.
+
+
+
+UNROLLED LOOP CODING
+
+The x86 addressing modes allow a byte displacement of -128 to +127, making
+it possible to access 256 bytes, which is 64 limbs, without adjusting
+pointer registers within the loop. Dword sized displacements can be used
+too, but they increase code size, and unrolling to 64 ought to be enough.
+
+When unrolling to the full 64 limbs/loop, the limb at the top of the loop
+will have a displacement of -128, so pointers have to have a corresponding
++128 added before entering the loop. When unrolling to 32 limbs/loop
+displacements 0 to 127 can be used with 0 at the top of the loop and no
+adjustment needed to the pointers.
+
+Where 64 limbs/loop is supported, the +128 adjustment is done only when 64
+limbs/loop is selected. Usually the gain in speed using 64 instead of 32 or
+16 is small, so support for 64 limbs/loop is generally only for comparison.
+
+
+
+COMPUTED JUMPS
+
+When working from least significant limb to most significant limb (most
+routines) the computed jump and pointer calculations in preparation for an
+unrolled loop are as follows.
+
+ S = operand size in limbs
+ N = number of limbs per loop (UNROLL_COUNT)
+ L = log2 of unrolling (UNROLL_LOG2)
+ M = mask for unrolling (UNROLL_MASK)
+ C = code bytes per limb in the loop
+ B = bytes per limb (4 for x86)
+
+ computed jump (-S & M) * C + entrypoint
+ subtract from pointers (-S & M) * B
+ initial loop counter (S-1) >> L
+ displacements 0 to B*(N-1)
+
+The loop counter is decremented at the end of each loop, and the looping
+stops when the decrement takes the counter to -1. The displacements are for
+the addressing accessing each limb, eg. a load with "movl disp(%ebx), %eax".
+
+Usually the multiply by "C" can be handled without an imul, using instead an
+leal, or a shift and subtract.
+
+When working from most significant to least significant limb (eg. mpn_lshift
+and mpn_copyd), the calculations change as follows.
+
+ add to pointers (-S & M) * B
+ displacements 0 to -B*(N-1)
+
+
+
+OLD GAS 1.92.3
+
+This version comes with FreeBSD 2.2.8 and has a couple of gremlins that
+affect gmp code.
+
+Firstly, an expression involving two forward references to labels comes out
+as zero. For example,
+
+ addl $bar-foo, %eax
+ foo:
+ nop
+ bar:
+
+This should lead to "addl $1, %eax", but it comes out as "addl $0, %eax".
+When only one forward reference is involved, it works correctly, as for
+example,
+
+ foo:
+ addl $bar-foo, %eax
+ nop
+ bar:
+
+Secondly, an expression involving two labels can't be used as the
+displacement for an leal. For example,
+
+ foo:
+ nop
+ bar:
+ leal bar-foo(%eax,%ebx,8), %ecx
+
+A slightly cryptic error is given, "Unimplemented segment type 0 in
+parse_operand". When only one label is used it's ok, and the label can be a
+forward reference too, as for example,
+
+ leal foo(%eax,%ebx,8), %ecx
+ nop
+ foo:
+
+These problems only affect PIC computed jump calculations. The workarounds
+are just to do an leal without a displacement and then an addl, and to make
+sure the code is placed so that there's at most one forward reference in the
+addl.
+
+
+
+REFERENCES
+
+"Intel Architecture Software Developer's Manual", volumes 1 to 3, 1999,
+order numbers 243190, 243191 and 243192. Available on-line,
+
+ ftp://download.intel.com/design/PentiumII/manuals/243190.htm
+ ftp://download.intel.com/design/PentiumII/manuals/243191.htm
+ ftp://download.intel.com/design/PentiumII/manuals/243192.htm
+
+"Intel386 Family Binary Compatibility Specification 2", Intel Corporation,
+published by McGraw-Hill, 1991, ISBN 0-07-031219-2.
+
+"System V Application Binary Interface", Unix System Laboratories Inc, 1992,
+published by Prentice Hall, ISBN 0-13-880410-9. And the "Intel386 Processor
+Supplement", AT&T, 1991, ISBN 0-13-877689-X. (These have details of ELF
+shared library PIC coding.)
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/rts/gmp/mpn/x86/addsub_n.S b/rts/gmp/mpn/x86/addsub_n.S
new file mode 100644
index 0000000000..fe6f648f53
--- /dev/null
+++ b/rts/gmp/mpn/x86/addsub_n.S
@@ -0,0 +1,174 @@
+/* Currently not working and not used. */
+
+/*
+Copyright (C) 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+
+#define SAVE_BORROW_RESTORE_CARRY(r) adcl r,r; shll $31,r
+#define SAVE_CARRY_RESTORE_BORROW(r) adcl r,r
+
+ .globl mpn_addsub_n_0
+ .globl mpn_addsub_n_1
+
+/* Cute i386/i486/p6 addsub loop for the "full overlap" case r1==s2,r2==s1.
+ We let subtraction and addition alternate in being two limbs
+ ahead of the other, thereby avoiding some SAVE_RESTORE. */
+// r1 = r2 + r1 edi = esi + edi
+// r2 = r2 - r1 esi = esi - edi
+// s1 s2
+// r2 r1
+// eax,ebx,ecx,edx,esi,edi,ebp
+mpn_addsub_n_0:
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s1_ptr */
+ movl 36(%esp),%ebp /* size */
+
+ shrl $2,%ebp
+ xorl %edx,%edx
+ .align 4
+Loop0: // L=load E=execute S=store
+ movl (%esi),%ebx // sub 0 L
+ movl 4(%esi),%ecx // sub 1 L
+ sbbl (%edi),%ebx // sub 0 LE
+ sbbl 4(%edi),%ecx // sub 1 LE
+// SAVE_BORROW_RESTORE_CARRY(%edx)
+ movl (%esi),%eax // add 0 L
+ adcl %eax,(%edi) // add 0 LES
+ movl 4(%esi),%eax // add 1 L
+ adcl %eax,4(%edi) // add 1 LES
+ movl %ebx,(%esi) // sub 0 S
+ movl %ecx,4(%esi) // sub 1 S
+ movl 8(%esi),%ebx // add 2 L
+ adcl 8(%edi),%ebx // add 2 LE
+ movl 12(%esi),%ecx // add 3 L
+ adcl 12(%edi),%ecx // add 3 LE
+// SAVE_CARRY_RESTORE_BORROW(%edx)
+ movl 8(%edi),%eax // sub 2 L
+ sbbl %eax,8(%esi) // sub 2 LES
+ movl 12(%edi),%eax // sub 3 L
+ sbbl %eax,12(%esi) // sub 3 LES
+ movl %ebx,8(%edi) // add 2 S
+ movl %ecx,12(%edi) // add 3 S
+ leal 16(%esi),%esi
+ leal 16(%edi),%edi
+ decl %ebp
+ jnz Loop0
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+/* Cute i386/i486/p6 addsub loop for the "full overlap" case r1==s1,r2==s2.
+ We let subtraction and addition alternate in being two limbs
+ ahead of the other, thereby avoiding some SAVE_RESTORE. */
+// r1 = r1 + r2 edi = edi + esi
+// r2 = r1 - r2 esi = edi - esi
+// s2 s1
+// r2 r1
+// eax,ebx,ecx,edx,esi,edi,ebp
+mpn_addsub_n_1:
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s1_ptr */
+ movl 36(%esp),%ebp /* size */
+
+ shrl $2,%ebp
+ xorl %edx,%edx
+ .align 4
+Loop1: // L=load E=execute S=store
+ movl (%edi),%ebx // sub 0 L
+ sbbl (%esi),%ebx // sub 0 LE
+ movl 4(%edi),%ecx // sub 1 L
+ sbbl 4(%esi),%ecx // sub 1 LE
+// SAVE_BORROW_RESTORE_CARRY(%edx)
+ movl (%esi),%eax // add 0 L
+ adcl %eax,(%edi) // add 0 LES
+ movl 4(%esi),%eax // add 1 L
+ adcl %eax,4(%edi) // add 1 LES
+ movl %ebx,(%esi) // sub 0 S
+ movl %ecx,4(%esi) // sub 1 S
+ movl 8(%esi),%ebx // add 2 L
+ adcl 8(%edi),%ebx // add 2 LE
+ movl 12(%esi),%ecx // add 3 L
+ adcl 12(%edi),%ecx // add 3 LE
+// SAVE_CARRY_RESTORE_BORROW(%edx)
+ movl 8(%edi),%eax // sub 2 L
+ sbbl 8(%esi),%eax // sub 2 LES
+ movl %eax,8(%esi) // sub 2 S
+ movl 12(%edi),%eax // sub 3 L
+ sbbl 12(%esi),%eax // sub 3 LE
+ movl %eax,12(%esi) // sub 3 S
+ movl %ebx,8(%edi) // add 2 S
+ movl %ecx,12(%edi) // add 3 S
+ leal 16(%esi),%esi
+ leal 16(%edi),%edi
+ decl %ebp
+ jnz Loop1
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+ .globl mpn_copy
+mpn_copy:
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s1_ptr */
+ movl 28(%esp),%ebp /* size */
+
+ shrl $2,%ebp
+ .align 4
+Loop2:
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl %eax,(%edi)
+ movl %ebx,4(%edi)
+ movl 8(%esi),%eax
+ movl 12(%esi),%ebx
+ movl %eax,8(%edi)
+ movl %ebx,12(%edi)
+ leal 16(%esi),%esi
+ leal 16(%edi),%edi
+ decl %ebp
+ jnz Loop2
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
diff --git a/rts/gmp/mpn/x86/aors_n.asm b/rts/gmp/mpn/x86/aors_n.asm
new file mode 100644
index 0000000000..18ef816b4d
--- /dev/null
+++ b/rts/gmp/mpn/x86/aors_n.asm
@@ -0,0 +1,187 @@
+dnl x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
+
+dnl Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software
+dnl Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+ifdef(`OPERATION_add_n',`
+ define(M4_inst, adcl)
+ define(M4_function_n, mpn_add_n)
+ define(M4_function_nc, mpn_add_nc)
+
+',`ifdef(`OPERATION_sub_n',`
+ define(M4_inst, sbbl)
+ define(M4_function_n, mpn_sub_n)
+ define(M4_function_nc, mpn_sub_nc)
+
+',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+
+C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size);
+C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size, mp_limb_t carry);
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC2, 12)
+defframe(PARAM_SRC1, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(8)
+
+PROLOGUE(M4_function_nc)
+deflit(`FRAME',0)
+
+ pushl %edi FRAME_pushl()
+ pushl %esi FRAME_pushl()
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC1,%esi
+ movl PARAM_SRC2,%edx
+ movl PARAM_SIZE,%ecx
+
+ movl %ecx,%eax
+ shrl $3,%ecx C compute count for unrolled loop
+ negl %eax
+ andl $7,%eax C get index where to start loop
+ jz LF(M4_function_n,oopgo) C necessary special case for 0
+ incl %ecx C adjust loop count
+ shll $2,%eax C adjustment for pointers...
+ subl %eax,%edi C ... since they are offset ...
+ subl %eax,%esi C ... by a constant when we ...
+ subl %eax,%edx C ... enter the loop
+ shrl $2,%eax C restore previous value
+
+ifdef(`PIC',`
+ C Calculate start address in loop for PIC. Due to limitations in
+ C old gas, LF(M4_function_n,oop)-L(0a)-3 cannot be put into the leal
+ call L(0a)
+L(0a): leal (%eax,%eax,8),%eax
+ addl (%esp),%eax
+ addl $LF(M4_function_n,oop)-L(0a)-3,%eax
+ addl $4,%esp
+',`
+ C Calculate start address in loop for non-PIC.
+ leal LF(M4_function_n,oop)-3(%eax,%eax,8),%eax
+')
+
+ C These lines initialize carry from the 5th parameter. Should be
+ C possible to simplify.
+ pushl %ebp FRAME_pushl()
+ movl PARAM_CARRY,%ebp
+ shrl $1,%ebp C shift bit 0 into carry
+ popl %ebp FRAME_popl()
+
+ jmp *%eax C jump into loop
+
+EPILOGUE()
+
+
+ ALIGN(8)
+PROLOGUE(M4_function_n)
+deflit(`FRAME',0)
+
+ pushl %edi FRAME_pushl()
+ pushl %esi FRAME_pushl()
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC1,%esi
+ movl PARAM_SRC2,%edx
+ movl PARAM_SIZE,%ecx
+
+ movl %ecx,%eax
+ shrl $3,%ecx C compute count for unrolled loop
+ negl %eax
+ andl $7,%eax C get index where to start loop
+ jz L(oop) C necessary special case for 0
+ incl %ecx C adjust loop count
+ shll $2,%eax C adjustment for pointers...
+ subl %eax,%edi C ... since they are offset ...
+ subl %eax,%esi C ... by a constant when we ...
+ subl %eax,%edx C ... enter the loop
+ shrl $2,%eax C restore previous value
+
+ifdef(`PIC',`
+ C Calculate start address in loop for PIC. Due to limitations in
+ C some assemblers, L(oop)-L(0b)-3 cannot be put into the leal
+ call L(0b)
+L(0b): leal (%eax,%eax,8),%eax
+ addl (%esp),%eax
+ addl $L(oop)-L(0b)-3,%eax
+ addl $4,%esp
+',`
+ C Calculate start address in loop for non-PIC.
+ leal L(oop)-3(%eax,%eax,8),%eax
+')
+ jmp *%eax C jump into loop
+
+L(oopgo):
+ pushl %ebp FRAME_pushl()
+ movl PARAM_CARRY,%ebp
+ shrl $1,%ebp C shift bit 0 into carry
+ popl %ebp FRAME_popl()
+
+ ALIGN(8)
+L(oop): movl (%esi),%eax
+ M4_inst (%edx),%eax
+ movl %eax,(%edi)
+ movl 4(%esi),%eax
+ M4_inst 4(%edx),%eax
+ movl %eax,4(%edi)
+ movl 8(%esi),%eax
+ M4_inst 8(%edx),%eax
+ movl %eax,8(%edi)
+ movl 12(%esi),%eax
+ M4_inst 12(%edx),%eax
+ movl %eax,12(%edi)
+ movl 16(%esi),%eax
+ M4_inst 16(%edx),%eax
+ movl %eax,16(%edi)
+ movl 20(%esi),%eax
+ M4_inst 20(%edx),%eax
+ movl %eax,20(%edi)
+ movl 24(%esi),%eax
+ M4_inst 24(%edx),%eax
+ movl %eax,24(%edi)
+ movl 28(%esi),%eax
+ M4_inst 28(%edx),%eax
+ movl %eax,28(%edi)
+ leal 32(%edi),%edi
+ leal 32(%esi),%esi
+ leal 32(%edx),%edx
+ decl %ecx
+ jnz L(oop)
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/aorsmul_1.asm b/rts/gmp/mpn/x86/aorsmul_1.asm
new file mode 100644
index 0000000000..f32ad83989
--- /dev/null
+++ b/rts/gmp/mpn/x86/aorsmul_1.asm
@@ -0,0 +1,134 @@
+dnl x86 __gmpn_addmul_1 (for 386 and 486) -- Multiply a limb vector with a
+dnl limb and add the result to a second limb vector.
+
+
+dnl Copyright (C) 1992, 1994, 1997, 1999, 2000 Free Software Foundation,
+dnl Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+ifdef(`OPERATION_addmul_1',`
+ define(M4_inst, addl)
+ define(M4_function_1, mpn_addmul_1)
+
+',`ifdef(`OPERATION_submul_1',`
+ define(M4_inst, subl)
+ define(M4_function_1, mpn_submul_1)
+
+',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
+')')')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+
+C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mult);
+
+define(PARAM_MULTIPLIER, `FRAME+16(%esp)')
+define(PARAM_SIZE, `FRAME+12(%esp)')
+define(PARAM_SRC, `FRAME+8(%esp)')
+define(PARAM_DST, `FRAME+4(%esp)')
+
+ TEXT
+ ALIGN(8)
+
+PROLOGUE(M4_function_1)
+deflit(`FRAME',0)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+deflit(`FRAME',16)
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC,%esi
+ movl PARAM_SIZE,%ecx
+
+ xorl %ebx,%ebx
+ andl $3,%ecx
+ jz L(end0)
+
+L(oop0):
+ movl (%esi),%eax
+ mull PARAM_MULTIPLIER
+ leal 4(%esi),%esi
+ addl %ebx,%eax
+ movl $0,%ebx
+ adcl %ebx,%edx
+ M4_inst %eax,(%edi)
+ adcl %edx,%ebx C propagate carry into cylimb
+
+ leal 4(%edi),%edi
+ decl %ecx
+ jnz L(oop0)
+
+L(end0):
+ movl PARAM_SIZE,%ecx
+ shrl $2,%ecx
+ jz L(end)
+
+ ALIGN(8)
+L(oop): movl (%esi),%eax
+ mull PARAM_MULTIPLIER
+ addl %eax,%ebx
+ movl $0,%ebp
+ adcl %edx,%ebp
+
+ movl 4(%esi),%eax
+ mull PARAM_MULTIPLIER
+ M4_inst %ebx,(%edi)
+ adcl %eax,%ebp C new lo + cylimb
+ movl $0,%ebx
+ adcl %edx,%ebx
+
+ movl 8(%esi),%eax
+ mull PARAM_MULTIPLIER
+ M4_inst %ebp,4(%edi)
+ adcl %eax,%ebx C new lo + cylimb
+ movl $0,%ebp
+ adcl %edx,%ebp
+
+ movl 12(%esi),%eax
+ mull PARAM_MULTIPLIER
+ M4_inst %ebx,8(%edi)
+ adcl %eax,%ebp C new lo + cylimb
+ movl $0,%ebx
+ adcl %edx,%ebx
+
+ M4_inst %ebp,12(%edi)
+ adcl $0,%ebx C propagate carry into cylimb
+
+ leal 16(%esi),%esi
+ leal 16(%edi),%edi
+ decl %ecx
+ jnz L(oop)
+
+L(end): movl %ebx,%eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/copyd.asm b/rts/gmp/mpn/x86/copyd.asm
new file mode 100644
index 0000000000..439640e836
--- /dev/null
+++ b/rts/gmp/mpn/x86/copyd.asm
@@ -0,0 +1,80 @@
+dnl x86 mpn_copyd -- copy limb vector, decrementing.
+dnl
+dnl Future: On P6 an MMX loop should be able to go faster than this code.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Copy src,size to dst,size, working from high to low addresses.
+C
+C The code here is very generic and can be expected to be reasonable on all
+C the x86 family.
+C
+C P5 - 1.0 cycles/limb.
+C
+C P6 - 2.4 cycles/limb, approx 40 cycles startup.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_copyd)
+ C eax saved esi
+ C ebx
+ C ecx counter
+ C edx saved edi
+ C esi src
+ C edi dst
+ C ebp
+
+ movl PARAM_SIZE, %ecx
+ movl %esi, %eax
+
+ movl PARAM_SRC, %esi
+ movl %edi, %edx
+
+ movl PARAM_DST, %edi
+ leal -4(%esi,%ecx,4), %esi
+
+ leal -4(%edi,%ecx,4), %edi
+
+ std
+
+ rep
+ movsl
+
+ cld
+
+ movl %eax, %esi
+ movl %edx, %edi
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/copyi.asm b/rts/gmp/mpn/x86/copyi.asm
new file mode 100644
index 0000000000..5bc4e36689
--- /dev/null
+++ b/rts/gmp/mpn/x86/copyi.asm
@@ -0,0 +1,79 @@
+dnl x86 mpn_copyi -- copy limb vector, incrementing.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Copy src,size to dst,size, working from low to high addresses.
+C
+C The code here is very generic and can be expected to be reasonable on all
+C the x86 family.
+C
+C P5 - 1.0 cycles/limb.
+C
+C P6 - 0.75 cycles/limb. An MMX based copy was tried, but was found to be
+C slower than a rep movs in all cases. The fastest MMX found was 0.8
+C cycles/limb (when fully aligned). A rep movs seems to have a startup
+C time of about 15 cycles, but doing something special for small sizes
+C could lead to a branch misprediction that would destroy any saving.
+C For now a plain rep movs seems ok for P6.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+ .text
+ ALIGN(32)
+
+ C eax saved esi
+ C ebx
+ C ecx counter
+ C edx saved edi
+ C esi src
+ C edi dst
+ C ebp
+
+PROLOGUE(mpn_copyi)
+
+ movl PARAM_SIZE, %ecx
+ movl %esi, %eax
+
+ movl PARAM_SRC, %esi
+ movl %edi, %edx
+
+ movl PARAM_DST, %edi
+
+ cld C better safe than sorry, see mpn/x86/README.family
+
+ rep
+ movsl
+
+ movl %eax, %esi
+ movl %edx, %edi
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/diveby3.asm b/rts/gmp/mpn/x86/diveby3.asm
new file mode 100644
index 0000000000..df879da9e1
--- /dev/null
+++ b/rts/gmp/mpn/x86/diveby3.asm
@@ -0,0 +1,115 @@
+dnl x86 mpn_divexact_by3 -- mpn division by 3, expecting no remainder.
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+dnl The following all have their own optimized versions of this routine,
+dnl but for reference the code here runs as follows.
+dnl
+dnl cycles/limb
+dnl P54 18.0
+dnl P55 17.0
+dnl P6 14.5
+dnl K6 14.0
+dnl K7 10.0
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t carry);
+
+defframe(PARAM_CARRY,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl multiplicative inverse of 3, modulo 2^32
+deflit(INVERSE_3, 0xAAAAAAAB)
+
+dnl ceil(b/3) and ceil(b*2/3) where b=2^32
+deflit(ONE_THIRD_CEIL, 0x55555556)
+deflit(TWO_THIRDS_CEIL, 0xAAAAAAAB)
+
+ .text
+ ALIGN(8)
+
+PROLOGUE(mpn_divexact_by3c)
+deflit(`FRAME',0)
+
+ movl PARAM_SRC, %ecx
+ pushl %ebp FRAME_pushl()
+
+ movl PARAM_SIZE, %ebp
+ pushl %edi FRAME_pushl()
+
+ movl PARAM_DST, %edi
+ pushl %esi FRAME_pushl()
+
+ movl $INVERSE_3, %esi
+ pushl %ebx FRAME_pushl()
+
+ leal (%ecx,%ebp,4), %ecx
+ movl PARAM_CARRY, %ebx
+
+ leal (%edi,%ebp,4), %edi
+ negl %ebp
+
+
+ ALIGN(8)
+L(top):
+ C eax scratch, low product
+ C ebx carry limb (0 to 3)
+ C ecx &src[size]
+ C edx scratch, high product
+ C esi multiplier
+ C edi &dst[size]
+ C ebp counter, limbs, negative
+
+ movl (%ecx,%ebp,4), %eax
+
+ subl %ebx, %eax
+
+ setc %bl
+
+ imull %esi
+
+ cmpl $ONE_THIRD_CEIL, %eax
+ movl %eax, (%edi,%ebp,4)
+
+ sbbl $-1, %ebx C +1 if eax>=ceil(b/3)
+ cmpl $TWO_THIRDS_CEIL, %eax
+
+ sbbl $-1, %ebx C +1 if eax>=ceil(b*2/3)
+ incl %ebp
+
+ jnz L(top)
+
+
+ movl %ebx, %eax
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ebp
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/divrem_1.asm b/rts/gmp/mpn/x86/divrem_1.asm
new file mode 100644
index 0000000000..12f14676d6
--- /dev/null
+++ b/rts/gmp/mpn/x86/divrem_1.asm
@@ -0,0 +1,232 @@
+dnl x86 mpn_divrem_1 -- mpn by limb division extending to fractional quotient.
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+dnl cycles/limb
+dnl K6 20
+dnl P5 44
+dnl P6 39
+dnl 486 approx 43 maybe
+dnl
+dnl
+dnl The following have their own optimized divrem_1 implementations, but
+dnl for reference the code here runs as follows.
+dnl
+dnl cycles/limb
+dnl P6MMX 39
+dnl K7 42
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize,
+C mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize,
+C mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C
+C Divide src,size by divisor and store the quotient in dst+xsize,size.
+C Extend the division to fractional quotient limbs in dst,xsize. Return the
+C remainder. Either or both xsize and size can be 0.
+C
+C mpn_divrem_1c takes a carry parameter which is an initial high limb,
+C effectively one extra limb at the top of src,size. Must have
+C carry<divisor.
+C
+C
+C Essentially the code is the same as the division based part of
+C mpn/generic/divrem_1.c, but has the following advantages.
+C
+C - If gcc isn't being used then divrem_1.c will get the generic C
+C udiv_qrnnd() and be rather slow.
+C
+C - On K6, using the loop instruction is a 10% speedup, but gcc doesn't
+C generate that instruction (as of gcc 2.95.2 at least).
+C
+C A test is done to see if the high limb is less the the divisor, and if so
+C one less div is done. A div is between 20 and 40 cycles on the various
+C x86s, so assuming high<divisor about half the time, then this test saves
+C half that amount. The branch misprediction penalty on each chip is less
+C than half a div.
+C
+C
+C K6: Back-to-back div instructions run at 20 cycles, the same as the loop
+C here, so it seems there's nothing to gain by rearranging the loop.
+C Pairing the mov and loop instructions was found to gain nothing. (The
+C same is true of the mpn/x86/mod_1.asm loop.)
+C
+C With a "decl/jnz" rather than a "loop" this code runs at 22 cycles.
+C The loop_or_decljnz macro is an easy way to get a 10% speedup.
+C
+C The fast K6 multiply might be thought to suit a multiply-by-inverse,
+C but that algorithm has been found to suffer from the releatively poor
+C carry handling on K6 and too many auxiliary instructions. The
+C fractional part however could be done at about 13 c/l.
+C
+C P5: Moving the load down to pair with the store might save 1 cycle, but
+C that doesn't seem worth bothering with, since it'd be only a 2.2%
+C saving.
+C
+C Again here the auxiliary instructions hinder a multiply-by-inverse,
+C though there might be a 10-15% speedup available
+
+
+defframe(PARAM_CARRY, 24)
+defframe(PARAM_DIVISOR,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC, 12)
+defframe(PARAM_XSIZE, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(16)
+
+PROLOGUE(mpn_divrem_1c)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ pushl %edi FRAME_pushl()
+
+ movl PARAM_SRC, %edi
+ pushl %esi FRAME_pushl()
+
+ movl PARAM_DIVISOR, %esi
+ pushl %ebx FRAME_pushl()
+
+ movl PARAM_DST, %ebx
+ pushl %ebp FRAME_pushl()
+
+ movl PARAM_XSIZE, %ebp
+ orl %ecx, %ecx
+
+ movl PARAM_CARRY, %edx
+ jz LF(mpn_divrem_1,fraction)
+
+ leal -4(%ebx,%ebp,4), %ebx C dst one limb below integer part
+ jmp LF(mpn_divrem_1,integer_top)
+
+EPILOGUE()
+
+
+PROLOGUE(mpn_divrem_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ pushl %edi FRAME_pushl()
+
+ movl PARAM_SRC, %edi
+ pushl %esi FRAME_pushl()
+
+ movl PARAM_DIVISOR, %esi
+ orl %ecx,%ecx
+
+ jz L(size_zero)
+ pushl %ebx FRAME_pushl()
+
+ movl -4(%edi,%ecx,4), %eax C src high limb
+ xorl %edx, %edx
+
+ movl PARAM_DST, %ebx
+ pushl %ebp FRAME_pushl()
+
+ movl PARAM_XSIZE, %ebp
+ cmpl %esi, %eax
+
+ leal -4(%ebx,%ebp,4), %ebx C dst one limb below integer part
+ jae L(integer_entry)
+
+
+ C high<divisor, so high of dst is zero, and avoid one div
+
+ movl %edx, (%ebx,%ecx,4)
+ decl %ecx
+
+ movl %eax, %edx
+ jz L(fraction)
+
+
+L(integer_top):
+ C eax scratch (quotient)
+ C ebx dst+4*xsize-4
+ C ecx counter
+ C edx scratch (remainder)
+ C esi divisor
+ C edi src
+ C ebp xsize
+
+ movl -4(%edi,%ecx,4), %eax
+L(integer_entry):
+
+ divl %esi
+
+ movl %eax, (%ebx,%ecx,4)
+ loop_or_decljnz L(integer_top)
+
+
+L(fraction):
+ orl %ebp, %ecx
+ jz L(done)
+
+ movl PARAM_DST, %ebx
+
+
+L(fraction_top):
+ C eax scratch (quotient)
+ C ebx dst
+ C ecx counter
+ C edx scratch (remainder)
+ C esi divisor
+ C edi
+ C ebp
+
+ xorl %eax, %eax
+
+ divl %esi
+
+ movl %eax, -4(%ebx,%ecx,4)
+ loop_or_decljnz L(fraction_top)
+
+
+L(done):
+ popl %ebp
+ movl %edx, %eax
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+
+L(size_zero):
+deflit(`FRAME',8)
+ movl PARAM_XSIZE, %ecx
+ xorl %eax, %eax
+
+ movl PARAM_DST, %edi
+
+ cld C better safe than sorry, see mpn/x86/README.family
+
+ rep
+ stosl
+
+ popl %esi
+ popl %edi
+ ret
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/README b/rts/gmp/mpn/x86/k6/README
new file mode 100644
index 0000000000..3ad96c8b89
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/README
@@ -0,0 +1,237 @@
+
+ AMD K6 MPN SUBROUTINES
+
+
+
+This directory contains code optimized for AMD K6 CPUs, meaning K6, K6-2 and
+K6-3.
+
+The mmx and k62mmx subdirectories have routines using MMX instructions. All
+K6s have MMX, the separate directories are just so that ./configure can omit
+them if the assembler doesn't support MMX.
+
+
+
+
+STATUS
+
+Times for the loops, with all code and data in L1 cache, are as follows.
+
+ cycles/limb
+
+ mpn_add_n/sub_n 3.25 normal, 2.75 in-place
+
+ mpn_mul_1 6.25
+ mpn_add/submul_1 7.65-8.4 (varying with data values)
+
+ mpn_mul_basecase 9.25 cycles/crossproduct (approx)
+ mpn_sqr_basecase 4.7 cycles/crossproduct (approx)
+ or 9.2 cycles/triangleproduct (approx)
+
+ mpn_divrem_1 20.0
+ mpn_mod_1 20.0
+ mpn_divexact_by3 11.0
+
+ mpn_l/rshift 3.0
+
+ mpn_copyi/copyd 1.0
+
+ mpn_com_n 1.5-1.85 \
+ mpn_and/andn/ior/xor_n 1.5-1.75 | varying with
+ mpn_iorn/xnor_n 2.0-2.25 | data alignment
+ mpn_nand/nior_n 2.0-2.25 /
+
+ mpn_popcount 12.5
+ mpn_hamdist 13.0
+
+
+K6-2 and K6-3 have dual-issue MMX and get the following improvements.
+
+ mpn_l/rshift 1.75
+
+ mpn_copyi/copyd 0.56 or 1.0 \
+ |
+ mpn_com_n 1.0-1.2 | varying with
+ mpn_and/andn/ior/xor_n 1.2-1.5 | data alignment
+ mpn_iorn/xnor_n 1.5-2.0 |
+ mpn_nand/nior_n 1.75-2.0 /
+
+ mpn_popcount 9.0
+ mpn_hamdist 11.5
+
+
+Prefetching of sources hasn't yet given any joy. With the 3DNow "prefetch"
+instruction, code seems to run slower, and with just "mov" loads it doesn't
+seem faster. Results so far are inconsistent. The K6 does a hardware
+prefetch of the second cache line in a sector, so the penalty for not
+prefetching in software is reduced.
+
+
+
+
+NOTES
+
+All K6 family chips have MMX, but only K6-2 and K6-3 have 3DNow.
+
+Plain K6 executes MMX instructions only in the X pipe, but K6-2 and K6-3 can
+execute them in both X and Y (and together).
+
+Branch misprediction penalty is 1 to 4 cycles (Optimization Manual
+chapter 6 table 12).
+
+Write-allocate L1 data cache means prefetching of destinations is unnecessary.
+Store queue is 7 entries of 64 bits each.
+
+Floating point multiplications can be done in parallel with integer
+multiplications, but there doesn't seem to be any way to make use of this.
+
+
+
+OPTIMIZATIONS
+
+Unrolled loops are used to reduce looping overhead. The unrolling is
+configurable up to 32 limbs/loop for most routines, up to 64 for some.
+
+Sometimes computed jumps into the unrolling are used to handle sizes not a
+multiple of the unrolling. An attractive feature of this is that times
+smoothly increase with operand size, but an indirect jump is about 6 cycles
+and the setups about another 6, so it depends on how much the unrolled code
+is faster than a simple loop as to whether a computed jump ought to be used.
+
+Position independent code is implemented using a call to get eip for
+computed jumps and a ret is always done, rather than an addl $4,%esp or a
+popl, so the CPU return address branch prediction stack stays synchronised
+with the actual stack in memory. Such a call however still costs 4 to 7
+cycles.
+
+Branch prediction, in absence of any history, will guess forward jumps are
+not taken and backward jumps are taken. Where possible it's arranged that
+the less likely or less important case is under a taken forward jump.
+
+
+
+MMX
+
+Putting emms or femms as late as possible in a routine seems to be fastest.
+Perhaps an emms or femms stalls until all outstanding MMX instructions have
+completed, so putting it later gives them a chance to complete on their own,
+in parallel with other operations (like register popping).
+
+The Optimization Manual chapter 5 recommends using a femms on K6-2 and K6-3
+at the start of a routine, in case it's been preceded by x87 floating point
+operations. This isn't done because in gmp programs it's expected that x87
+floating point won't be much used and that chances are an mpn routine won't
+have been preceded by any x87 code.
+
+
+
+CODING
+
+Instructions in general code are shown paired if they can decode and execute
+together, meaning two short decode instructions with the second not
+depending on the first, only the first using the shifter, no more than one
+load, and no more than one store.
+
+K6 does some out of order execution so the pairings aren't essential, they
+just show what slots might be available. When decoding is the limiting
+factor things can be scheduled that might not execute until later.
+
+
+
+NOTES
+
+Code alignment
+
+- if an opcode/modrm or 0Fh/opcode/modrm crosses a cache line boundary,
+ short decode is inhibited. The cross.pl script detects this.
+
+- loops and branch targets should be aligned to 16 bytes, or ensure at least
+ 2 instructions before a 32 byte boundary. This makes use of the 16 byte
+ cache in the BTB.
+
+Addressing modes
+
+- (%esi) degrades decoding from short to vector. 0(%esi) doesn't have this
+ problem, and can be used as an equivalent, or easier is just to use a
+ different register, like %ebx.
+
+- K6 and pre-CXT core K6-2 have the following problem. (K6-2 CXT and K6-3
+ have it fixed, these being cpuid function 1 signatures 0x588 to 0x58F).
+
+ If more than 3 bytes are needed to determine instruction length then
+ decoding degrades from direct to long, or from long to vector. This
+ happens with forms like "0F opcode mod/rm" with mod/rm=00-xxx-100 since
+ with mod=00 the sib determines whether there's a displacement.
+
+ This affects all MMX and 3DNow instructions, and others with an 0F prefix
+ like movzbl. The modes affected are anything with an index and no
+ displacement, or an index but no base, and this includes (%esp) which is
+ really (,%esp,1).
+
+ The cross.pl script detects problem cases. The workaround is to always
+ use a displacement, and to do this with Zdisp if it's zero so the
+ assembler doesn't discard it.
+
+ See Optimization Manual rev D page 67 and 3DNow Porting Guide rev B pages
+ 13-14 and 36-37.
+
+Calls
+
+- indirect jumps and calls are not branch predicted, they measure about 6
+ cycles.
+
+Various
+
+- adcl 2 cycles of decode, maybe 2 cycles executing in the X pipe
+- bsf 12-27 cycles
+- emms 5 cycles
+- femms 3 cycles
+- jecxz 2 cycles taken, 13 not taken (optimization manual says 7 not taken)
+- divl 20 cycles back-to-back
+- imull 2 decode, 2 execute
+- mull 2 decode, 3 execute (optimization manual decoding sample)
+- prefetch 2 cycles
+- rcll/rcrl implicit by one bit: 2 cycles
+ immediate or %cl count: 11 + 2 per bit for dword
+ 13 + 4 per bit for byte
+- setCC 2 cycles
+- xchgl %eax,reg 1.5 cycles, back-to-back (strange)
+ reg,reg 2 cycles, back-to-back
+
+
+
+
+REFERENCES
+
+"AMD-K6 Processor Code Optimization Application Note", AMD publication
+number 21924, revision D amendment 0, January 2000. This describes K6-2 and
+K6-3. Available on-line,
+
+ http://www.amd.com/K6/k6docs/pdf/21924.pdf
+
+"AMD-K6 MMX Enhanced Processor x86 Code Optimization Application Note", AMD
+publication number 21828, revision A amendment 0, August 1997. This is an
+older edition of the above document, describing plain K6. Available
+on-line,
+
+ http://www.amd.com/K6/k6docs/pdf/21828.pdf
+
+"3DNow Technology Manual", AMD publication number 21928F/0-August 1999.
+This describes the femms and prefetch instructions, but nothing else from
+3DNow has been used. Available on-line,
+
+ http://www.amd.com/K6/k6docs/pdf/21928.pdf
+
+"3DNow Instruction Porting Guide", AMD publication number 22621, revision B,
+August 1999. This has some notes on general K6 optimizations as well as
+3DNow. Available on-line,
+
+ http://www.amd.com/products/cpg/athlon/techdocs/pdf/22621.pdf
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/rts/gmp/mpn/x86/k6/aors_n.asm b/rts/gmp/mpn/x86/k6/aors_n.asm
new file mode 100644
index 0000000000..31b05ada51
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/aors_n.asm
@@ -0,0 +1,329 @@
+dnl AMD K6 mpn_add/sub_n -- mpn addition or subtraction.
+dnl
+dnl K6: normal 3.25 cycles/limb, in-place 2.75 cycles/limb.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+ifdef(`OPERATION_add_n', `
+ define(M4_inst, adcl)
+ define(M4_function_n, mpn_add_n)
+ define(M4_function_nc, mpn_add_nc)
+ define(M4_description, add)
+',`ifdef(`OPERATION_sub_n', `
+ define(M4_inst, sbbl)
+ define(M4_function_n, mpn_sub_n)
+ define(M4_function_nc, mpn_sub_nc)
+ define(M4_description, subtract)
+',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+
+C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size);
+C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size, mp_limb_t carry);
+C
+C Calculate src1,size M4_description src2,size, and store the result in
+C dst,size. The return value is the carry bit from the top of the result
+C (1 or 0).
+C
+C The _nc version accepts 1 or 0 for an initial carry into the low limb of
+C the calculation. Note values other than 1 or 0 here will lead to garbage
+C results.
+C
+C Instruction decoding limits a normal dst=src1+src2 operation to 3 c/l, and
+C an in-place dst+=src to 2.5 c/l. The unrolled loops have 1 cycle/loop of
+C loop control, which with 4 limbs/loop means an extra 0.25 c/l.
+
+define(PARAM_CARRY, `FRAME+20(%esp)')
+define(PARAM_SIZE, `FRAME+16(%esp)')
+define(PARAM_SRC2, `FRAME+12(%esp)')
+define(PARAM_SRC1, `FRAME+8(%esp)')
+define(PARAM_DST, `FRAME+4(%esp)')
+deflit(`FRAME',0)
+
+dnl minimum 5 because the unrolled code can't handle less
+deflit(UNROLL_THRESHOLD, 5)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(M4_function_nc)
+ movl PARAM_CARRY, %eax
+ jmp LF(M4_function_n,start)
+EPILOGUE()
+
+
+PROLOGUE(M4_function_n)
+ xorl %eax, %eax
+L(start):
+ movl PARAM_SIZE, %ecx
+ pushl %ebx
+FRAME_pushl()
+
+ movl PARAM_SRC1, %ebx
+ pushl %edi
+FRAME_pushl()
+
+ movl PARAM_SRC2, %edx
+ cmpl $UNROLL_THRESHOLD, %ecx
+
+ movl PARAM_DST, %edi
+ jae L(unroll)
+
+
+ shrl %eax C initial carry flag
+
+ C offset 0x21 here, close enough to aligned
+L(simple):
+ C eax scratch
+ C ebx src1
+ C ecx counter
+ C edx src2
+ C esi
+ C edi dst
+ C ebp
+ C
+ C The store to (%edi) could be done with a stosl; it'd be smaller
+ C code, but there's no speed gain and a cld would have to be added
+ C (per mpn/x86/README.family).
+
+ movl (%ebx), %eax
+ leal 4(%ebx), %ebx
+
+ M4_inst (%edx), %eax
+
+ movl %eax, (%edi)
+ leal 4(%edi), %edi
+
+ leal 4(%edx), %edx
+ loop L(simple)
+
+
+ movl $0, %eax
+ popl %edi
+
+ setc %al
+
+ popl %ebx
+ ret
+
+
+C -----------------------------------------------------------------------------
+L(unroll):
+ C eax carry
+ C ebx src1
+ C ecx counter
+ C edx src2
+ C esi
+ C edi dst
+ C ebp
+
+ cmpl %edi, %ebx
+ pushl %esi
+
+ je L(inplace)
+
+ifdef(`OPERATION_add_n',`
+ cmpl %edi, %edx
+
+ je L(inplace_reverse)
+')
+
+ movl %ecx, %esi
+
+ andl $-4, %ecx
+ andl $3, %esi
+
+ leal (%ebx,%ecx,4), %ebx
+ leal (%edx,%ecx,4), %edx
+ leal (%edi,%ecx,4), %edi
+
+ negl %ecx
+ shrl %eax
+
+ ALIGN(32)
+L(normal_top):
+ C eax counter, qwords, negative
+ C ebx src1
+ C ecx scratch
+ C edx src2
+ C esi
+ C edi dst
+ C ebp
+
+ movl (%ebx,%ecx,4), %eax
+ leal 5(%ecx), %ecx
+ M4_inst -20(%edx,%ecx,4), %eax
+ movl %eax, -20(%edi,%ecx,4)
+
+ movl 4-20(%ebx,%ecx,4), %eax
+ M4_inst 4-20(%edx,%ecx,4), %eax
+ movl %eax, 4-20(%edi,%ecx,4)
+
+ movl 8-20(%ebx,%ecx,4), %eax
+ M4_inst 8-20(%edx,%ecx,4), %eax
+ movl %eax, 8-20(%edi,%ecx,4)
+
+ movl 12-20(%ebx,%ecx,4), %eax
+ M4_inst 12-20(%edx,%ecx,4), %eax
+ movl %eax, 12-20(%edi,%ecx,4)
+
+ loop L(normal_top)
+
+
+ decl %esi
+ jz L(normal_finish_one)
+ js L(normal_done)
+
+ C two or three more limbs
+
+ movl (%ebx), %eax
+ M4_inst (%edx), %eax
+ movl %eax, (%edi)
+
+ movl 4(%ebx), %eax
+ M4_inst 4(%edx), %eax
+ decl %esi
+ movl %eax, 4(%edi)
+
+ jz L(normal_done)
+ movl $2, %ecx
+
+L(normal_finish_one):
+ movl (%ebx,%ecx,4), %eax
+ M4_inst (%edx,%ecx,4), %eax
+ movl %eax, (%edi,%ecx,4)
+
+L(normal_done):
+ popl %esi
+ popl %edi
+
+ movl $0, %eax
+ popl %ebx
+
+ setc %al
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+
+ifdef(`OPERATION_add_n',`
+L(inplace_reverse):
+ C dst==src2
+
+ movl %ebx, %edx
+')
+
+L(inplace):
+ C eax initial carry
+ C ebx
+ C ecx size
+ C edx src
+ C esi
+ C edi dst
+ C ebp
+
+ leal -1(%ecx), %esi
+ decl %ecx
+
+ andl $-4, %ecx
+ andl $3, %esi
+
+ movl (%edx), %ebx C src low limb
+ leal (%edx,%ecx,4), %edx
+
+ leal (%edi,%ecx,4), %edi
+ negl %ecx
+
+ shrl %eax
+
+
+ ALIGN(32)
+L(inplace_top):
+ C eax
+ C ebx next src limb
+ C ecx size
+ C edx src
+ C esi
+ C edi dst
+ C ebp
+
+ M4_inst %ebx, (%edi,%ecx,4)
+
+ movl 4(%edx,%ecx,4), %eax
+ leal 5(%ecx), %ecx
+
+ M4_inst %eax, 4-20(%edi,%ecx,4)
+
+ movl 8-20(%edx,%ecx,4), %eax
+ movl 12-20(%edx,%ecx,4), %ebx
+
+ M4_inst %eax, 8-20(%edi,%ecx,4)
+ M4_inst %ebx, 12-20(%edi,%ecx,4)
+
+ movl 16-20(%edx,%ecx,4), %ebx
+ loop L(inplace_top)
+
+
+ C now %esi is 0 to 3 representing respectively 1 to 4 limbs more
+
+ M4_inst %ebx, (%edi)
+
+ decl %esi
+ jz L(inplace_finish_one)
+ js L(inplace_done)
+
+ C two or three more limbs
+
+ movl 4(%edx), %eax
+ movl 8(%edx), %ebx
+ M4_inst %eax, 4(%edi)
+ M4_inst %ebx, 8(%edi)
+
+ decl %esi
+ movl $2, %ecx
+
+ jz L(normal_done)
+
+L(inplace_finish_one):
+ movl 4(%edx,%ecx,4), %eax
+ M4_inst %eax, 4(%edi,%ecx,4)
+
+L(inplace_done):
+ popl %esi
+ popl %edi
+
+ movl $0, %eax
+ popl %ebx
+
+ setc %al
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/aorsmul_1.asm b/rts/gmp/mpn/x86/k6/aorsmul_1.asm
new file mode 100644
index 0000000000..da4120fe2f
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/aorsmul_1.asm
@@ -0,0 +1,372 @@
+dnl AMD K6 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
+dnl
+dnl K6: 7.65 to 8.5 cycles/limb (at 16 limbs/loop and depending on the data),
+dnl PIC adds about 6 cycles at the start.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K6: large multpliers small multpliers
+dnl UNROLL_COUNT cycles/limb cycles/limb
+dnl 4 9.5 7.78
+dnl 8 9.0 7.78
+dnl 16 8.4 7.65
+dnl 32 8.4 8.2
+dnl
+dnl Maximum possible unrolling with the current code is 32.
+dnl
+dnl Unrolling to 16 limbs/loop makes the unrolled loop fit exactly in a 256
+dnl byte block, which might explain the good speed at that unrolling.
+
+deflit(UNROLL_COUNT, 16)
+
+
+ifdef(`OPERATION_addmul_1', `
+ define(M4_inst, addl)
+ define(M4_function_1, mpn_addmul_1)
+ define(M4_function_1c, mpn_addmul_1c)
+ define(M4_description, add it to)
+ define(M4_desc_retval, carry)
+',`ifdef(`OPERATION_submul_1', `
+ define(M4_inst, subl)
+ define(M4_function_1, mpn_submul_1)
+ define(M4_function_1c, mpn_submul_1c)
+ define(M4_description, subtract it from)
+ define(M4_desc_retval, borrow)
+',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
+')')')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
+
+
+C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mult);
+C mp_limb_t M4_function_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mult, mp_limb_t carry);
+C
+C Calculate src,size multiplied by mult and M4_description dst,size.
+C Return the M4_desc_retval limb from the top of the result.
+C
+C The jadcl0()s in the unrolled loop makes the speed data dependent. Small
+C multipliers (most significant few bits clear) result in few carry bits and
+C speeds up to 7.65 cycles/limb are attained. Large multipliers (most
+C significant few bits set) make the carry bits 50/50 and lead to something
+C more like 8.4 c/l. (With adcl's both of these would be 9.3 c/l.)
+C
+C It's important that the gains for jadcl0 on small multipliers don't come
+C at the cost of slowing down other data. Tests on uniformly distributed
+C random data, designed to confound branch prediction, show about a 7%
+C speed-up using jadcl0 over adcl (8.93 versus 9.57 cycles/limb, with all
+C overheads included).
+C
+C In the simple loop, jadcl0() measures slower than adcl (11.9-14.7 versus
+C 11.0 cycles/limb), and hence isn't used.
+C
+C In the simple loop, note that running ecx from negative to zero and using
+C it as an index in the two movs wouldn't help. It would save one
+C instruction (2*addl+loop becoming incl+jnz), but there's nothing unpaired
+C that would be collapsed by this.
+C
+C
+C jadcl0
+C ------
+C
+C jadcl0() being faster than adcl $0 seems to be an artifact of two things,
+C firstly the instruction decoding and secondly the fact that there's a
+C carry bit for the jadcl0 only on average about 1/4 of the time.
+C
+C The code in the unrolled loop decodes something like the following.
+C
+C decode cycles
+C mull %ebp 2
+C M4_inst %esi, disp(%edi) 1
+C adcl %eax, %ecx 2
+C movl %edx, %esi \ 1
+C jnc 1f /
+C incl %esi \ 1
+C 1: movl disp(%ebx), %eax /
+C ---
+C 7
+C
+C In a back-to-back style test this measures 7 with the jnc not taken, or 8
+C with it taken (both when correctly predicted). This is opposite to the
+C measurements showing small multipliers running faster than large ones.
+C Watch this space for more info ...
+C
+C It's not clear how much branch misprediction might be costing. The K6
+C doco says it will be 1 to 4 cycles, but presumably it's near the low end
+C of that range to get the measured results.
+C
+C
+C In the code the two carries are more or less the preceding mul product and
+C the calculation is roughly
+C
+C x*y + u*b+v
+C
+C where b=2^32 is the size of a limb, x*y is the two carry limbs, and u and
+C v are the two limbs it's added to (being the low of the next mul, and a
+C limb from the destination).
+C
+C To get a carry requires x*y+u*b+v >= b^2, which is u*b+v >= b^2-x*y, and
+C there are b^2-(b^2-x*y) = x*y many such values, giving a probability of
+C x*y/b^2. If x, y, u and v are random and uniformly distributed between 0
+C and b-1, then the total probability can be summed over x and y,
+C
+C 1 b-1 b-1 x*y 1 b*(b-1) b*(b-1)
+C --- * sum sum --- = --- * ------- * ------- = 1/4
+C b^2 x=0 y=1 b^2 b^4 2 2
+C
+C Actually it's a very tiny bit less than 1/4 of course. If y is fixed,
+C then the probability is 1/2*y/b thus varying linearly between 0 and 1/2.
+
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 9)
+',`
+deflit(UNROLL_THRESHOLD, 6)
+')
+
+defframe(PARAM_CARRY, 20)
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(M4_function_1c)
+ pushl %esi
+deflit(`FRAME',4)
+ movl PARAM_CARRY, %esi
+ jmp LF(M4_function_1,start_nc)
+EPILOGUE()
+
+PROLOGUE(M4_function_1)
+ push %esi
+deflit(`FRAME',4)
+ xorl %esi, %esi C initial carry
+
+L(start_nc):
+ movl PARAM_SIZE, %ecx
+ pushl %ebx
+deflit(`FRAME',8)
+
+ movl PARAM_SRC, %ebx
+ pushl %edi
+deflit(`FRAME',12)
+
+ cmpl $UNROLL_THRESHOLD, %ecx
+ movl PARAM_DST, %edi
+
+ pushl %ebp
+deflit(`FRAME',16)
+ jae L(unroll)
+
+
+ C simple loop
+
+ movl PARAM_MULTIPLIER, %ebp
+
+L(simple):
+ C eax scratch
+ C ebx src
+ C ecx counter
+ C edx scratch
+ C esi carry
+ C edi dst
+ C ebp multiplier
+
+ movl (%ebx), %eax
+ addl $4, %ebx
+
+ mull %ebp
+
+ addl $4, %edi
+ addl %esi, %eax
+
+ adcl $0, %edx
+
+ M4_inst %eax, -4(%edi)
+
+ adcl $0, %edx
+
+ movl %edx, %esi
+ loop L(simple)
+
+
+ popl %ebp
+ popl %edi
+
+ popl %ebx
+ movl %esi, %eax
+
+ popl %esi
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+C The unrolled loop uses a "two carry limbs" scheme. At the top of the loop
+C the carries are ecx=lo, esi=hi, then they swap for each limb processed.
+C For the computed jump an odd size means they start one way around, an even
+C size the other.
+C
+C VAR_JUMP holds the computed jump temporarily because there's not enough
+C registers at the point of doing the mul for the initial two carry limbs.
+C
+C The add/adc for the initial carry in %esi is necessary only for the
+C mpn_addmul/submul_1c entry points. Duplicating the startup code to
+C eliminiate this for the plain mpn_add/submul_1 doesn't seem like a good
+C idea.
+
+dnl overlapping with parameters already fetched
+define(VAR_COUNTER, `PARAM_SIZE')
+define(VAR_JUMP, `PARAM_DST')
+
+L(unroll):
+ C eax
+ C ebx src
+ C ecx size
+ C edx
+ C esi initial carry
+ C edi dst
+ C ebp
+
+ movl %ecx, %edx
+ decl %ecx
+
+ subl $2, %edx
+ negl %ecx
+
+ shrl $UNROLL_LOG2, %edx
+ andl $UNROLL_MASK, %ecx
+
+ movl %edx, VAR_COUNTER
+ movl %ecx, %edx
+
+ shll $4, %edx
+ negl %ecx
+
+ C 15 code bytes per limb
+ifdef(`PIC',`
+ call L(pic_calc)
+L(here):
+',`
+ leal L(entry) (%edx,%ecx,1), %edx
+')
+ movl (%ebx), %eax C src low limb
+
+ movl PARAM_MULTIPLIER, %ebp
+ movl %edx, VAR_JUMP
+
+ mull %ebp
+
+ addl %esi, %eax C initial carry (from _1c)
+ jadcl0( %edx)
+
+
+ leal 4(%ebx,%ecx,4), %ebx
+ movl %edx, %esi C high carry
+
+ movl VAR_JUMP, %edx
+ leal (%edi,%ecx,4), %edi
+
+ testl $1, %ecx
+ movl %eax, %ecx C low carry
+
+ jz L(noswap)
+ movl %esi, %ecx C high,low carry other way around
+
+ movl %eax, %esi
+L(noswap):
+
+ jmp *%edx
+
+
+ifdef(`PIC',`
+L(pic_calc):
+ C See README.family about old gas bugs
+ leal (%edx,%ecx,1), %edx
+ addl $L(entry)-L(here), %edx
+ addl (%esp), %edx
+ ret
+')
+
+
+C -----------------------------------------------------------
+ ALIGN(32)
+L(top):
+deflit(`FRAME',16)
+ C eax scratch
+ C ebx src
+ C ecx carry lo
+ C edx scratch
+ C esi carry hi
+ C edi dst
+ C ebp multiplier
+ C
+ C 15 code bytes per limb
+
+ leal UNROLL_BYTES(%edi), %edi
+
+L(entry):
+forloop(`i', 0, UNROLL_COUNT/2-1, `
+ deflit(`disp0', eval(2*i*4))
+ deflit(`disp1', eval(disp0 + 4))
+
+Zdisp( movl, disp0,(%ebx), %eax)
+ mull %ebp
+Zdisp( M4_inst,%ecx, disp0,(%edi))
+ adcl %eax, %esi
+ movl %edx, %ecx
+ jadcl0( %ecx)
+
+ movl disp1(%ebx), %eax
+ mull %ebp
+ M4_inst %esi, disp1(%edi)
+ adcl %eax, %ecx
+ movl %edx, %esi
+ jadcl0( %esi)
+')
+
+ decl VAR_COUNTER
+ leal UNROLL_BYTES(%ebx), %ebx
+
+ jns L(top)
+
+
+ popl %ebp
+ M4_inst %ecx, UNROLL_BYTES(%edi)
+
+ popl %edi
+ movl %esi, %eax
+
+ popl %ebx
+ jadcl0( %eax)
+
+ popl %esi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/cross.pl b/rts/gmp/mpn/x86/k6/cross.pl
new file mode 100644
index 0000000000..21734f3e52
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/cross.pl
@@ -0,0 +1,141 @@
+#! /usr/bin/perl
+
+# Copyright (C) 2000 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 2.1 of the License, or (at
+# your option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# Usage: cross.pl [filename.o]...
+#
+# Produce an annotated disassembly of the given object files, indicating
+# certain code alignment and addressing mode problems afflicting K6 chips.
+# "ZZ" is used on all annotations, so this can be searched for.
+#
+# With no arguments, all .o files corresponding to .asm files are processed.
+# This is good in the mpn object directory of a k6*-*-* build.
+#
+# As far as fixing problems goes, any cache line crossing problems in loops
+# get attention, but as a rule it's too tedious to rearrange code or slip in
+# nops to fix every problem in setup or finishup code.
+#
+# Bugs:
+#
+# Instructions without mod/rm bytes or which are already vector decoded are
+# unaffected by cache line boundary crossing, but not all of these have yet
+# been put in as exceptions. All that occur in practice in GMP are present
+# though.
+#
+# There's no messages for using the vector decoded addressing mode (%esi),
+# but that mode is easy to avoid when coding.
+
+use strict;
+
+sub disassemble {
+ my ($file) = @_;
+ my ($addr,$b1,$b2,$b3, $prefix,$opcode,$modrm);
+
+ open (IN, "objdump -Srfh $file |")
+ || die "Cannot open pipe from objdump\n";
+ while (<IN>) {
+ print;
+
+ if (/^[ \t]*[0-9]+[ \t]+\.text[ \t]/ && /2\*\*([0-9]+)$/) {
+ if ($1 < 5) {
+ print "ZZ need at least 2**5 for predictable cache line crossing\n";
+ }
+ }
+
+ if (/^[ \t]*([0-9a-f]*):[ \t]*([0-9a-f]+)[ \t]+([0-9a-f]+)[ \t]+([0-9a-f]+)/) {
+ ($addr,$b1,$b2,$b3) = ($1,$2,$3,$4);
+
+ } elsif (/^[ \t]*([0-9a-f]*):[ \t]*([0-9a-f]+)[ \t]+([0-9a-f]+)/) {
+ ($addr,$b1,$b2,$b3) = ($1,$2,$3,'');
+
+ } elsif (/^[ \t]*([0-9a-f]*):[ \t]*([0-9a-f]+)/) {
+ ($addr,$b1,$b2,$b3) = ($1,$2,'','');
+
+ } else {
+ next;
+ }
+
+ if ($b1 =~ /0f/) {
+ $prefix = $b1;
+ $opcode = $b2;
+ $modrm = $b3;
+ } else {
+ $prefix = '';
+ $opcode = $b1;
+ $modrm = $b2;
+ }
+
+ # modrm of the form 00-xxx-100 with an 0F prefix is the problem case
+ # for K6 and pre-CXT K6-2
+ if ($prefix =~ /0f/
+ && $opcode !~ /^8/ # jcond disp32
+ && $modrm =~ /^[0-3][4c]/) {
+ print "ZZ ($file) >3 bytes to determine instruction length\n";
+ }
+
+ # with just an opcode, starting 1f mod 20h
+ if ($addr =~ /[13579bdf]f$/
+ && $prefix !~ /0f/
+ && $opcode !~ /1[012345]/ # adc
+ && $opcode !~ /1[89abcd]/ # sbb
+ && $opcode !~ /68/ # push $imm32
+ && $opcode !~ /^7/ # jcond disp8
+ && $opcode !~ /a[89]/ # test+imm
+ && $opcode !~ /a[a-f]/ # stos/lods/scas
+ && $opcode !~ /b8/ # movl $imm32,%eax
+ && $opcode !~ /e[0123]/ # loop/loopz/loopnz/jcxz
+ && $opcode !~ /e[b9]/ # jmp disp8/disp32
+ && $opcode !~ /f[89abcd]/ # clc,stc,cli,sti,cld,std
+ && !($opcode =~ /f[67]/ # grp 1
+ && $modrm =~ /^[2367abef]/) # mul, imul, div, idiv
+ && $modrm !~ /^$/) {
+ print "ZZ ($file) opcode/modrm cross 32-byte boundary\n";
+ }
+
+ # with an 0F prefix, anything starting at 1f mod 20h
+ if ($addr =~ /[13579bdf][f]$/
+ && $prefix =~ /0f/) {
+ print "ZZ ($file) prefix/opcode cross 32-byte boundary\n";
+ }
+
+ # with an 0F prefix, anything with mod/rm starting at 1e mod 20h
+ if ($addr =~ /[13579bdf][e]$/
+ && $prefix =~ /0f/
+ && $opcode !~ /^8/ # jcond disp32
+ && $modrm !~ /^$/) {
+ print "ZZ ($file) prefix/opcode/modrm cross 32-byte boundary\n";
+ }
+ }
+ close IN || die "Error from objdump (or objdump not available)\n";
+}
+
+
+my @files;
+if ($#ARGV >= 0) {
+ @files = @ARGV;
+} else {
+ @files = glob "*.asm";
+ map {s/.asm/.o/} @files;
+}
+
+foreach (@files) {
+ disassemble($_);
+}
diff --git a/rts/gmp/mpn/x86/k6/diveby3.asm b/rts/gmp/mpn/x86/k6/diveby3.asm
new file mode 100644
index 0000000000..ffb97bc380
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/diveby3.asm
@@ -0,0 +1,110 @@
+dnl AMD K6 mpn_divexact_by3 -- mpn division by 3, expecting no remainder.
+dnl
+dnl K6: 11.0 cycles/limb
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t carry);
+C
+C Using %esi in (%esi,%ecx,4) or 0(%esi,%ecx,4) addressing modes doesn't
+C lead to vector decoding, unlike plain (%esi) does.
+
+defframe(PARAM_CARRY,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl multiplicative inverse of 3, modulo 2^32
+deflit(INVERSE_3, 0xAAAAAAAB)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_divexact_by3c)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ pushl %esi defframe_pushl(SAVE_ESI)
+
+ movl PARAM_SRC, %esi
+ pushl %edi defframe_pushl(SAVE_EDI)
+
+ movl PARAM_DST, %edi
+ pushl %ebx defframe_pushl(SAVE_EBX)
+
+ movl PARAM_CARRY, %ebx
+ leal (%esi,%ecx,4), %esi
+
+ pushl $3 defframe_pushl(VAR_THREE)
+ leal (%edi,%ecx,4), %edi
+
+ negl %ecx
+
+
+ C Need 32 alignment for claimed speed, to avoid the movl store
+ C opcode/modrm crossing a cache line boundary
+
+ ALIGN(32)
+L(top):
+ C eax scratch, low product
+ C ebx carry limb (0 to 3)
+ C ecx counter, limbs, negative
+ C edx scratch, high product
+ C esi &src[size]
+ C edi &dst[size]
+ C ebp
+ C
+ C The 0(%esi,%ecx,4) form pads so the finishup "movl %ebx, %eax"
+ C doesn't cross a 32 byte boundary, saving a couple of cycles
+ C (that's a fixed couple, not per loop).
+
+Zdisp( movl, 0,(%esi,%ecx,4), %eax)
+ subl %ebx, %eax
+
+ setc %bl
+
+ imull $INVERSE_3, %eax
+
+ movl %eax, (%edi,%ecx,4)
+ addl $2, %ecx
+
+ mull VAR_THREE
+
+ addl %edx, %ebx
+ loop L(top)
+
+
+ movl SAVE_ESI, %esi
+ movl %ebx, %eax
+
+ movl SAVE_EBX, %ebx
+
+ movl SAVE_EDI, %edi
+ addl $FRAME, %esp
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/gmp-mparam.h b/rts/gmp/mpn/x86/k6/gmp-mparam.h
new file mode 100644
index 0000000000..77f3948d77
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/gmp-mparam.h
@@ -0,0 +1,97 @@
+/* AMD K6 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 3 /* cycles */
+#endif
+
+#ifndef UDIV_TIME
+#define UDIV_TIME 20 /* cycles */
+#endif
+
+/* bsfl takes 12-27 cycles, put an average for uniform random numbers */
+#ifndef COUNT_TRAILING_ZEROS_TIME
+#define COUNT_TRAILING_ZEROS_TIME 14 /* cycles */
+#endif
+
+
+/* Generated by tuneup.c, 2000-07-04. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 18
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 130
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 34
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 116
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 68
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 98
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 13
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 4
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 67
+#endif
+
+#ifndef FFT_MUL_TABLE
+#define FFT_MUL_TABLE { 528, 1184, 2176, 5632, 14336, 40960, 0 }
+#endif
+#ifndef FFT_MODF_MUL_THRESHOLD
+#define FFT_MODF_MUL_THRESHOLD 472
+#endif
+#ifndef FFT_MUL_THRESHOLD
+#define FFT_MUL_THRESHOLD 4352
+#endif
+
+#ifndef FFT_SQR_TABLE
+#define FFT_SQR_TABLE { 528, 1184, 2176, 5632, 14336, 40960, 0 }
+#endif
+#ifndef FFT_MODF_SQR_THRESHOLD
+#define FFT_MODF_SQR_THRESHOLD 544
+#endif
+#ifndef FFT_SQR_THRESHOLD
+#define FFT_SQR_THRESHOLD 4352
+#endif
diff --git a/rts/gmp/mpn/x86/k6/k62mmx/copyd.asm b/rts/gmp/mpn/x86/k6/k62mmx/copyd.asm
new file mode 100644
index 0000000000..20a33e6ccf
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/k62mmx/copyd.asm
@@ -0,0 +1,179 @@
+dnl AMD K6-2 mpn_copyd -- copy limb vector, decrementing.
+dnl
+dnl K6-2: 0.56 or 1.0 cycles/limb (at 32 limbs/loop), depending on data
+dnl alignment.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K6-2 aligned:
+dnl UNROLL_COUNT cycles/limb
+dnl 8 0.75
+dnl 16 0.625
+dnl 32 0.5625
+dnl 64 0.53
+dnl Maximum possible with the current code is 64, the minimum is 2.
+
+deflit(UNROLL_COUNT, 32)
+
+
+C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Copy src,size to dst,size, processing limbs from high to low addresses.
+C
+C The comments in copyi.asm apply here too.
+
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_copyd)
+ movl PARAM_SIZE, %ecx
+ movl %esi, %eax
+
+ movl PARAM_SRC, %esi
+ movl %edi, %edx
+
+ std
+
+ movl PARAM_DST, %edi
+ cmpl $UNROLL_COUNT, %ecx
+
+ leal -4(%esi,%ecx,4), %esi
+
+ leal -4(%edi,%ecx,4), %edi
+ ja L(unroll)
+
+L(simple):
+ rep
+ movsl
+
+ cld
+
+ movl %eax, %esi
+ movl %edx, %edi
+
+ ret
+
+
+L(unroll):
+ C if src and dst are different alignments mod8, then use rep movs
+ C if src and dst are both 4mod8 then process one limb to get 0mod8
+
+ pushl %ebx
+ leal (%esi,%edi), %ebx
+
+ testb $4, %bl
+ popl %ebx
+
+ jnz L(simple)
+ testl $4, %esi
+
+ leal -UNROLL_COUNT(%ecx), %ecx
+ jnz L(already_aligned)
+
+ movsl
+
+ decl %ecx
+L(already_aligned):
+
+
+ifelse(UNROLL_BYTES,256,`
+ subl $128, %esi
+ subl $128, %edi
+')
+
+ C offset 0x3D here, but gets full speed without further alignment
+L(top):
+ C eax saved esi
+ C ebx
+ C ecx counter, limbs
+ C edx saved edi
+ C esi src, incrementing
+ C edi dst, incrementing
+ C ebp
+ C
+ C `disp' is never 0, so don't need to force 0(%esi).
+
+deflit(CHUNK_COUNT, 2)
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+ deflit(`disp', eval(-4-i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,+128)))
+ movq disp(%esi), %mm0
+ movq %mm0, disp(%edi)
+')
+
+ leal -UNROLL_BYTES(%esi), %esi
+ subl $UNROLL_COUNT, %ecx
+
+ leal -UNROLL_BYTES(%edi), %edi
+ jns L(top)
+
+
+ C now %ecx is -UNROLL_COUNT to -1 representing repectively 0 to
+ C UNROLL_COUNT-1 limbs remaining
+
+ testb $eval(UNROLL_COUNT/2), %cl
+
+ leal UNROLL_COUNT(%ecx), %ecx
+ jz L(not_half)
+
+
+ C at an unroll count of 32 this block of code is 16 cycles faster than
+ C the rep movs, less 3 or 4 to test whether to do it
+
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT/2-1, `
+ deflit(`disp', eval(-4-i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,+128)))
+ movq disp(%esi), %mm0
+ movq %mm0, disp(%edi)
+')
+
+ subl $eval(UNROLL_BYTES/2), %esi
+ subl $eval(UNROLL_BYTES/2), %edi
+
+ subl $eval(UNROLL_COUNT/2), %ecx
+L(not_half):
+
+
+ifelse(UNROLL_BYTES,256,`
+ addl $128, %esi
+ addl $128, %edi
+')
+
+ rep
+ movsl
+
+ cld
+
+ movl %eax, %esi
+ movl %edx, %edi
+
+ femms
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/k62mmx/copyi.asm b/rts/gmp/mpn/x86/k6/k62mmx/copyi.asm
new file mode 100644
index 0000000000..215d805f2e
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/k62mmx/copyi.asm
@@ -0,0 +1,196 @@
+dnl AMD K6-2 mpn_copyi -- copy limb vector, incrementing.
+dnl
+dnl K6-2: 0.56 or 1.0 cycles/limb (at 32 limbs/loop), depending on data
+dnl alignment.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K6-2 aligned:
+dnl UNROLL_COUNT cycles/limb
+dnl 8 0.75
+dnl 16 0.625
+dnl 32 0.5625
+dnl 64 0.53
+dnl Maximum possible with the current code is 64, the minimum is 2.
+
+deflit(UNROLL_COUNT, 32)
+
+
+C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C The MMX loop is faster than a rep movs when src and dst are both 0mod8.
+C With one 0mod8 and one 4mod8 it's 1.056 c/l and the rep movs at 1.0 c/l is
+C used instead.
+C
+C mod8
+C src dst
+C 0 0 both aligned, use mmx
+C 0 4 unaligned, use rep movs
+C 4 0 unaligned, use rep movs
+C 4 4 do one movs, then both aligned, use mmx
+C
+C The MMX code on aligned data is 0.5 c/l, plus loop overhead of 2
+C cycles/loop, which is 0.0625 c/l at 32 limbs/loop.
+C
+C A pattern of two movq loads and two movq stores (or four and four) was
+C tried, but found to be the same speed as just one of each.
+C
+C Note that this code only suits K6-2 and K6-3. Plain K6 does only one mmx
+C instruction per cycle, so "movq"s are no faster than the simple 1 c/l rep
+C movs.
+C
+C Enhancement:
+C
+C Addressing modes like disp(%esi,%ecx,4) aren't currently used. They'd
+C make it possible to avoid incrementing %esi and %edi in the loop and hence
+C get loop overhead down to 1 cycle. Care would be needed to avoid bad
+C cache line crossings since the "movq"s would then be 5 code bytes rather
+C than 4.
+
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_copyi)
+ movl PARAM_SIZE, %ecx
+ movl %esi, %eax
+
+ movl PARAM_SRC, %esi
+ movl %edi, %edx
+
+ cld
+
+ movl PARAM_DST, %edi
+ cmpl $UNROLL_COUNT, %ecx
+
+ ja L(unroll)
+
+L(simple):
+ rep
+ movsl
+
+ movl %eax, %esi
+ movl %edx, %edi
+
+ ret
+
+
+L(unroll):
+ C if src and dst are different alignments mod8, then use rep movs
+ C if src and dst are both 4mod8 then process one limb to get 0mod8
+
+ pushl %ebx
+ leal (%esi,%edi), %ebx
+
+ testb $4, %bl
+ popl %ebx
+
+ jnz L(simple)
+ testl $4, %esi
+
+ leal -UNROLL_COUNT(%ecx), %ecx
+ jz L(already_aligned)
+
+ decl %ecx
+
+ movsl
+L(already_aligned):
+
+
+ifelse(UNROLL_BYTES,256,`
+ addl $128, %esi
+ addl $128, %edi
+')
+
+ C this is offset 0x34, no alignment needed
+L(top):
+ C eax saved esi
+ C ebx
+ C ecx counter, limbs
+ C edx saved edi
+ C esi src, incrementing
+ C edi dst, incrementing
+ C ebp
+ C
+ C Zdisp gets 0(%esi) left that way to avoid vector decode, and with
+ C 0(%edi) keeps code aligned to 16 byte boundaries.
+
+deflit(CHUNK_COUNT, 2)
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+ deflit(`disp', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+Zdisp( movq, disp,(%esi), %mm0)
+Zdisp( movq, %mm0, disp,(%edi))
+')
+
+ addl $UNROLL_BYTES, %esi
+ subl $UNROLL_COUNT, %ecx
+
+ leal UNROLL_BYTES(%edi), %edi
+ jns L(top)
+
+
+ C now %ecx is -UNROLL_COUNT to -1 representing repectively 0 to
+ C UNROLL_COUNT-1 limbs remaining
+
+ testb $eval(UNROLL_COUNT/2), %cl
+
+ leal UNROLL_COUNT(%ecx), %ecx
+ jz L(not_half)
+
+ C at an unroll count of 32 this block of code is 16 cycles faster than
+ C the rep movs, less 3 or 4 to test whether to do it
+
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT/2-1, `
+ deflit(`disp', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+ movq disp(%esi), %mm0
+ movq %mm0, disp(%edi)
+')
+ addl $eval(UNROLL_BYTES/2), %esi
+ addl $eval(UNROLL_BYTES/2), %edi
+
+ subl $eval(UNROLL_COUNT/2), %ecx
+L(not_half):
+
+
+ifelse(UNROLL_BYTES,256,`
+ subl $128, %esi
+ subl $128, %edi
+')
+
+ rep
+ movsl
+
+ movl %eax, %esi
+ movl %edx, %edi
+
+ femms
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/k62mmx/lshift.asm b/rts/gmp/mpn/x86/k6/k62mmx/lshift.asm
new file mode 100644
index 0000000000..f6d54f97a8
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/k62mmx/lshift.asm
@@ -0,0 +1,286 @@
+dnl AMD K6-2 mpn_lshift -- mpn left shift.
+dnl
+dnl K6-2: 1.75 cycles/limb
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+dnl used after src has been fetched
+define(VAR_RETVAL,`PARAM_SRC')
+
+dnl minimum 9, because unrolled loop can't handle less
+deflit(UNROLL_THRESHOLD, 9)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_lshift)
+deflit(`FRAME',0)
+
+ C The 1 limb case can be done without the push %ebx, but it's then
+ C still the same speed. The push is left as a free helping hand for
+ C the two_or_more code.
+
+ movl PARAM_SIZE, %eax
+ pushl %ebx FRAME_pushl()
+
+ movl PARAM_SRC, %ebx
+ decl %eax
+
+ movl PARAM_SHIFT, %ecx
+ jnz L(two_or_more)
+
+ movl (%ebx), %edx C src limb
+ movl PARAM_DST, %ebx
+
+ shldl( %cl, %edx, %eax) C return value
+
+ shll %cl, %edx
+
+ movl %edx, (%ebx) C dst limb
+ popl %ebx
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16) C avoid offset 0x1f
+L(two_or_more):
+ C eax size-1
+ C ebx src
+ C ecx shift
+ C edx
+
+ movl (%ebx,%eax,4), %edx C src high limb
+ negl %ecx
+
+ movd PARAM_SHIFT, %mm6
+ addl $32, %ecx C 32-shift
+
+ shrl %cl, %edx
+ cmpl $UNROLL_THRESHOLD-1, %eax
+
+ movl %edx, VAR_RETVAL
+ jae L(unroll)
+
+
+ movd %ecx, %mm7
+ movl %eax, %ecx
+
+ movl PARAM_DST, %eax
+
+L(simple):
+ C eax dst
+ C ebx src
+ C ecx counter, size-1 to 1
+ C edx retval
+ C
+ C mm0 scratch
+ C mm6 shift
+ C mm7 32-shift
+
+ movq -4(%ebx,%ecx,4), %mm0
+
+ psrlq %mm7, %mm0
+
+Zdisp( movd, %mm0, 0,(%eax,%ecx,4))
+ loop L(simple)
+
+
+ movd (%ebx), %mm0
+ popl %ebx
+
+ psllq %mm6, %mm0
+
+ movd %mm0, (%eax)
+ movl %edx, %eax
+
+ femms
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(unroll):
+ C eax size-1
+ C ebx src
+ C ecx 32-shift
+ C edx retval (but instead VAR_RETVAL is used)
+ C
+ C mm6 shift
+
+ addl $32, %ecx
+ movl PARAM_DST, %edx
+
+ movd %ecx, %mm7
+ subl $7, %eax C size-8
+
+ leal (%edx,%eax,4), %ecx C alignment of dst
+
+ movq 32-8(%ebx,%eax,4), %mm2 C src high qword
+ testb $4, %cl
+
+ jz L(dst_aligned)
+ psllq %mm6, %mm2
+
+ psrlq $32, %mm2
+ decl %eax
+
+ movd %mm2, 32(%edx,%eax,4) C dst high limb
+ movq 32-8(%ebx,%eax,4), %mm2 C new src high qword
+L(dst_aligned):
+
+ movq 32-16(%ebx,%eax,4), %mm0 C src second highest qword
+
+
+ C This loop is the important bit, the rest is just support for it.
+ C Four src limbs are held at the start, and four more will be read.
+ C Four dst limbs will be written. This schedule seems necessary for
+ C full speed.
+ C
+ C The use of size-8 lets the loop stop when %eax goes negative and
+ C leaves -4 to -1 which can be tested with test $1 and $2.
+
+L(top):
+ C eax counter, size-8 step by -4 until <0
+ C ebx src
+ C ecx
+ C edx dst
+ C
+ C mm0 src next qword
+ C mm1 scratch
+ C mm2 src prev qword
+ C mm6 shift
+ C mm7 64-shift
+
+ psllq %mm6, %mm2
+ subl $4, %eax
+
+ movq %mm0, %mm1
+ psrlq %mm7, %mm0
+
+ por %mm0, %mm2
+ movq 24(%ebx,%eax,4), %mm0
+
+ psllq %mm6, %mm1
+ movq %mm2, 40(%edx,%eax,4)
+
+ movq %mm0, %mm2
+ psrlq %mm7, %mm0
+
+ por %mm0, %mm1
+ movq 16(%ebx,%eax,4), %mm0
+
+ movq %mm1, 32(%edx,%eax,4)
+ jnc L(top)
+
+
+ C Now have four limbs in mm2 (prev) and mm0 (next), plus eax mod 4.
+ C
+ C 8(%ebx) is the next source, and 24(%edx) is the next destination.
+ C %eax is between -4 and -1, representing respectively 0 to 3 extra
+ C limbs that must be read.
+
+
+ testl $2, %eax C testl to avoid bad cache line crossing
+ jz L(finish_nottwo)
+
+ C Two more limbs: lshift mm2, OR it with rshifted mm0, mm0 becomes
+ C new mm2 and a new mm0 is loaded.
+
+ psllq %mm6, %mm2
+ movq %mm0, %mm1
+
+ psrlq %mm7, %mm0
+ subl $2, %eax
+
+ por %mm0, %mm2
+ movq 16(%ebx,%eax,4), %mm0
+
+ movq %mm2, 32(%edx,%eax,4)
+ movq %mm1, %mm2
+L(finish_nottwo):
+
+
+ C lshift mm2, OR with rshifted mm0, mm1 becomes lshifted mm0
+
+ testb $1, %al
+ psllq %mm6, %mm2
+
+ movq %mm0, %mm1
+ psrlq %mm7, %mm0
+
+ por %mm0, %mm2
+ psllq %mm6, %mm1
+
+ movq %mm2, 24(%edx,%eax,4)
+ jz L(finish_even)
+
+
+ C Size is odd, so mm1 and one extra limb to process.
+
+ movd (%ebx), %mm0 C src[0]
+ popl %ebx
+deflit(`FRAME',0)
+
+ movq %mm0, %mm2
+ psllq $32, %mm0
+
+ psrlq %mm7, %mm0
+
+ psllq %mm6, %mm2
+ por %mm0, %mm1
+
+ movq %mm1, 4(%edx) C dst[1,2]
+ movd %mm2, (%edx) C dst[0]
+
+ movl VAR_RETVAL, %eax
+
+ femms
+ ret
+
+
+ nop C avoid bad cache line crossing
+L(finish_even):
+deflit(`FRAME',4)
+ C Size is even, so only mm1 left to process.
+
+ movq %mm1, (%edx) C dst[0,1]
+ movl VAR_RETVAL, %eax
+
+ popl %ebx
+ femms
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/k62mmx/rshift.asm b/rts/gmp/mpn/x86/k6/k62mmx/rshift.asm
new file mode 100644
index 0000000000..8a8c144241
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/k62mmx/rshift.asm
@@ -0,0 +1,285 @@
+dnl AMD K6-2 mpn_rshift -- mpn right shift.
+dnl
+dnl K6-2: 1.75 cycles/limb
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+dnl Minimum 9, because the unrolled loop can't handle less.
+dnl
+deflit(UNROLL_THRESHOLD, 9)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_rshift)
+deflit(`FRAME',0)
+
+ C The 1 limb case can be done without the push %ebx, but it's then
+ C still the same speed. The push is left as a free helping hand for
+ C the two_or_more code.
+
+ movl PARAM_SIZE, %eax
+ pushl %ebx FRAME_pushl()
+
+ movl PARAM_SRC, %ebx
+ decl %eax
+
+ movl PARAM_SHIFT, %ecx
+ jnz L(two_or_more)
+
+ movl (%ebx), %edx C src limb
+ movl PARAM_DST, %ebx
+
+ shrdl( %cl, %edx, %eax) C return value
+
+ shrl %cl, %edx
+
+ movl %edx, (%ebx) C dst limb
+ popl %ebx
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16) C avoid offset 0x1f
+L(two_or_more):
+ C eax size-1
+ C ebx src
+ C ecx shift
+ C edx
+
+ movl (%ebx), %edx C src low limb
+ negl %ecx
+
+ addl $32, %ecx
+ movd PARAM_SHIFT, %mm6
+
+ shll %cl, %edx
+ cmpl $UNROLL_THRESHOLD-1, %eax
+
+ jae L(unroll)
+
+
+ C eax size-1
+ C ebx src
+ C ecx 32-shift
+ C edx retval
+ C
+ C mm6 shift
+
+ movl PARAM_DST, %ecx
+ leal (%ebx,%eax,4), %ebx
+
+ leal -4(%ecx,%eax,4), %ecx
+ negl %eax
+
+ C This loop runs at about 3 cycles/limb, which is the amount of
+ C decoding, and this is despite every second access being unaligned.
+
+L(simple):
+ C eax counter, -(size-1) to -1
+ C ebx &src[size-1]
+ C ecx &dst[size-1]
+ C edx retval
+ C
+ C mm0 scratch
+ C mm6 shift
+
+Zdisp( movq, 0,(%ebx,%eax,4), %mm0)
+ incl %eax
+
+ psrlq %mm6, %mm0
+
+Zdisp( movd, %mm0, 0,(%ecx,%eax,4))
+ jnz L(simple)
+
+
+ movq %mm0, (%ecx)
+ movl %edx, %eax
+
+ popl %ebx
+
+ femms
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(unroll):
+ C eax size-1
+ C ebx src
+ C ecx 32-shift
+ C edx retval
+ C
+ C mm6 shift
+
+ addl $32, %ecx
+ subl $7, %eax C size-8
+
+ movd %ecx, %mm7
+ movl PARAM_DST, %ecx
+
+ movq (%ebx), %mm2 C src low qword
+ leal (%ebx,%eax,4), %ebx C src end - 32
+
+ testb $4, %cl
+ leal (%ecx,%eax,4), %ecx C dst end - 32
+
+ notl %eax C -(size-7)
+ jz L(dst_aligned)
+
+ psrlq %mm6, %mm2
+ incl %eax
+
+Zdisp( movd, %mm2, 0,(%ecx,%eax,4)) C dst low limb
+ movq 4(%ebx,%eax,4), %mm2 C new src low qword
+L(dst_aligned):
+
+ movq 12(%ebx,%eax,4), %mm0 C src second lowest qword
+ nop C avoid bad cache line crossing
+
+
+ C This loop is the important bit, the rest is just support for it.
+ C Four src limbs are held at the start, and four more will be read.
+ C Four dst limbs will be written. This schedule seems necessary for
+ C full speed.
+ C
+ C The use of -(size-7) lets the loop stop when %eax becomes >= 0 and
+ C and leaves 0 to 3 which can be tested with test $1 and $2.
+
+L(top):
+ C eax counter, -(size-7) step by +4 until >=0
+ C ebx src end - 32
+ C ecx dst end - 32
+ C edx retval
+ C
+ C mm0 src next qword
+ C mm1 scratch
+ C mm2 src prev qword
+ C mm6 shift
+ C mm7 64-shift
+
+ psrlq %mm6, %mm2
+ addl $4, %eax
+
+ movq %mm0, %mm1
+ psllq %mm7, %mm0
+
+ por %mm0, %mm2
+ movq 4(%ebx,%eax,4), %mm0
+
+ psrlq %mm6, %mm1
+ movq %mm2, -12(%ecx,%eax,4)
+
+ movq %mm0, %mm2
+ psllq %mm7, %mm0
+
+ por %mm0, %mm1
+ movq 12(%ebx,%eax,4), %mm0
+
+ movq %mm1, -4(%ecx,%eax,4)
+ ja L(top) C jump if no carry and not zero
+
+
+
+ C Now have the four limbs in mm2 (low) and mm0 (high), and %eax is 0
+ C to 3 representing respectively 3 to 0 further limbs.
+
+ testl $2, %eax C testl to avoid bad cache line crossings
+ jnz L(finish_nottwo)
+
+ C Two or three extra limbs: rshift mm2, OR it with lshifted mm0, mm0
+ C becomes new mm2 and a new mm0 is loaded.
+
+ psrlq %mm6, %mm2
+ movq %mm0, %mm1
+
+ psllq %mm7, %mm0
+ addl $2, %eax
+
+ por %mm0, %mm2
+ movq 12(%ebx,%eax,4), %mm0
+
+ movq %mm2, -4(%ecx,%eax,4)
+ movq %mm1, %mm2
+L(finish_nottwo):
+
+
+ testb $1, %al
+ psrlq %mm6, %mm2
+
+ movq %mm0, %mm1
+ psllq %mm7, %mm0
+
+ por %mm0, %mm2
+ psrlq %mm6, %mm1
+
+ movq %mm2, 4(%ecx,%eax,4)
+ jnz L(finish_even)
+
+
+ C one further extra limb to process
+
+ movd 32-4(%ebx), %mm0 C src[size-1], most significant limb
+ popl %ebx
+
+ movq %mm0, %mm2
+ psllq %mm7, %mm0
+
+ por %mm0, %mm1
+ psrlq %mm6, %mm2
+
+ movq %mm1, 32-12(%ecx) C dst[size-3,size-2]
+ movd %mm2, 32-4(%ecx) C dst[size-1]
+
+ movl %edx, %eax C retval
+
+ femms
+ ret
+
+
+ nop C avoid bad cache line crossing
+L(finish_even):
+ C no further extra limbs
+
+ movq %mm1, 32-8(%ecx) C dst[size-2,size-1]
+ movl %edx, %eax C retval
+
+ popl %ebx
+
+ femms
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/mmx/com_n.asm b/rts/gmp/mpn/x86/k6/mmx/com_n.asm
new file mode 100644
index 0000000000..8915080f0f
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/mmx/com_n.asm
@@ -0,0 +1,91 @@
+dnl AMD K6-2 mpn_com_n -- mpn bitwise one's complement.
+dnl
+dnl alignment dst/src, A=0mod8 N=4mod8
+dnl A/A A/N N/A N/N
+dnl K6-2 1.0 1.18 1.18 1.18 cycles/limb
+dnl K6 1.5 1.85 1.75 1.85
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Take the bitwise ones-complement of src,size and write it to dst,size.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(32)
+PROLOGUE(mpn_com_n)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ movl PARAM_SRC, %eax
+ movl PARAM_DST, %edx
+ shrl %ecx
+ jnz L(two_or_more)
+
+ movl (%eax), %eax
+ notl %eax
+ movl %eax, (%edx)
+ ret
+
+
+L(two_or_more):
+ pushl %ebx
+FRAME_pushl()
+ movl %ecx, %ebx
+
+ pcmpeqd %mm7, %mm7 C all ones
+
+
+ ALIGN(16)
+L(top):
+ C eax src
+ C ebx floor(size/2)
+ C ecx counter
+ C edx dst
+ C esi
+ C edi
+ C ebp
+
+ movq -8(%eax,%ecx,8), %mm0
+ pxor %mm7, %mm0
+ movq %mm0, -8(%edx,%ecx,8)
+ loop L(top)
+
+
+ jnc L(no_extra)
+ movl (%eax,%ebx,8), %eax
+ notl %eax
+ movl %eax, (%edx,%ebx,8)
+L(no_extra):
+
+ popl %ebx
+ emms_or_femms
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/mmx/logops_n.asm b/rts/gmp/mpn/x86/k6/mmx/logops_n.asm
new file mode 100644
index 0000000000..46cb3b7ea5
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/mmx/logops_n.asm
@@ -0,0 +1,212 @@
+dnl AMD K6-2 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
+dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
+dnl
+dnl alignment dst/src1/src2, A=0mod8, N=4mod8
+dnl A/A/A A/A/N A/N/A A/N/N N/A/A N/A/N N/N/A N/N/N
+dnl
+dnl K6-2 1.2 1.5 1.5 1.2 1.2 1.5 1.5 1.2 and,andn,ior,xor
+dnl K6-2 1.5 1.75 2.0 1.75 1.75 2.0 1.75 1.5 iorn,xnor
+dnl K6-2 1.75 2.0 2.0 2.0 2.0 2.0 2.0 1.75 nand,nior
+dnl
+dnl K6 1.5 1.68 1.75 1.2 1.75 1.75 1.68 1.5 and,andn,ior,xor
+dnl K6 2.0 2.0 2.25 2.25 2.25 2.25 2.0 2.0 iorn,xnor
+dnl K6 2.0 2.25 2.25 2.25 2.25 2.25 2.25 2.0 nand,nior
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl M4_p and M4_i are the MMX and integer instructions
+dnl M4_*_neg_dst means whether to negate the final result before writing
+dnl M4_*_neg_src2 means whether to negate the src2 values before using them
+
+define(M4_choose_op,
+m4_assert_numargs(7)
+`ifdef(`OPERATION_$1',`
+define(`M4_function', `mpn_$1')
+define(`M4_operation', `$1')
+define(`M4_p', `$2')
+define(`M4_p_neg_dst', `$3')
+define(`M4_p_neg_src2',`$4')
+define(`M4_i', `$5')
+define(`M4_i_neg_dst', `$6')
+define(`M4_i_neg_src2',`$7')
+')')
+
+dnl xnor is done in "iorn" style because it's a touch faster than "nior"
+dnl style (the two are equivalent for xor).
+
+M4_choose_op( and_n, pand,0,0, andl,0,0)
+M4_choose_op( andn_n, pandn,0,0, andl,0,1)
+M4_choose_op( nand_n, pand,1,0, andl,1,0)
+M4_choose_op( ior_n, por,0,0, orl,0,0)
+M4_choose_op( iorn_n, por,0,1, orl,0,1)
+M4_choose_op( nior_n, por,1,0, orl,1,0)
+M4_choose_op( xor_n, pxor,0,0, xorl,0,0)
+M4_choose_op( xnor_n, pxor,0,1, xorl,0,1)
+
+ifdef(`M4_function',,
+`m4_error(`Unrecognised or undefined OPERATION symbol
+')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+
+C void M4_function (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size);
+C
+C Do src1,size M4_operation src2,size, storing the result in dst,size.
+C
+C Unaligned movq loads and stores are a bit slower than aligned ones. The
+C test at the start of the routine checks the alignment of src1 and if
+C necessary processes one limb separately at the low end to make it aligned.
+C
+C The raw speeds without this alignment switch are as follows.
+C
+C alignment dst/src1/src2, A=0mod8, N=4mod8
+C A/A/A A/A/N A/N/A A/N/N N/A/A N/A/N N/N/A N/N/N
+C
+C K6 1.5 2.0 1.5 2.0 and,andn,ior,xor
+C K6 1.75 2.2 2.0 2.28 iorn,xnor
+C K6 2.0 2.25 2.35 2.28 nand,nior
+C
+C
+C Future:
+C
+C K6 can do one 64-bit load per cycle so each of these routines should be
+C able to approach 1.0 c/l, if aligned. The basic and/andn/ior/xor might be
+C able to get 1.0 with just a 4 limb loop, being 3 instructions per 2 limbs.
+C The others are 4 instructions per 2 limbs, and so can only approach 1.0
+C because there's nowhere to hide some loop control.
+
+defframe(PARAM_SIZE,16)
+defframe(PARAM_SRC2,12)
+defframe(PARAM_SRC1,8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+ .text
+ ALIGN(32)
+PROLOGUE(M4_function)
+ movl PARAM_SIZE, %ecx
+ pushl %ebx
+ FRAME_pushl()
+ movl PARAM_SRC1, %eax
+ movl PARAM_SRC2, %ebx
+ cmpl $1, %ecx
+ movl PARAM_DST, %edx
+ ja L(two_or_more)
+
+
+ movl (%ebx), %ecx
+ popl %ebx
+ifelse(M4_i_neg_src2,1,`notl %ecx')
+ M4_i (%eax), %ecx
+ifelse(M4_i_neg_dst,1,` notl %ecx')
+ movl %ecx, (%edx)
+
+ ret
+
+
+L(two_or_more):
+ C eax src1
+ C ebx src2
+ C ecx size
+ C edx dst
+ C esi
+ C edi
+ C ebp
+ C
+ C carry bit is low of size
+
+ pushl %esi
+ FRAME_pushl()
+ testl $4, %eax
+ jz L(alignment_ok)
+
+ movl (%ebx), %esi
+ addl $4, %ebx
+ifelse(M4_i_neg_src2,1,`notl %esi')
+ M4_i (%eax), %esi
+ addl $4, %eax
+ifelse(M4_i_neg_dst,1,` notl %esi')
+ movl %esi, (%edx)
+ addl $4, %edx
+ decl %ecx
+
+L(alignment_ok):
+ movl %ecx, %esi
+ shrl %ecx
+ jnz L(still_two_or_more)
+
+ movl (%ebx), %ecx
+ popl %esi
+ifelse(M4_i_neg_src2,1,`notl %ecx')
+ M4_i (%eax), %ecx
+ifelse(M4_i_neg_dst,1,` notl %ecx')
+ popl %ebx
+ movl %ecx, (%edx)
+ ret
+
+
+L(still_two_or_more):
+ifelse(eval(M4_p_neg_src2 || M4_p_neg_dst),1,`
+ pcmpeqd %mm7, %mm7 C all ones
+')
+
+ ALIGN(16)
+L(top):
+ C eax src1
+ C ebx src2
+ C ecx counter
+ C edx dst
+ C esi
+ C edi
+ C ebp
+ C
+ C carry bit is low of size
+
+ movq -8(%ebx,%ecx,8), %mm0
+ifelse(M4_p_neg_src2,1,`pxor %mm7, %mm0')
+ M4_p -8(%eax,%ecx,8), %mm0
+ifelse(M4_p_neg_dst,1,` pxor %mm7, %mm0')
+ movq %mm0, -8(%edx,%ecx,8)
+
+ loop L(top)
+
+
+ jnc L(no_extra)
+
+ movl -4(%ebx,%esi,4), %ebx
+ifelse(M4_i_neg_src2,1,`notl %ebx')
+ M4_i -4(%eax,%esi,4), %ebx
+ifelse(M4_i_neg_dst,1,` notl %ebx')
+ movl %ebx, -4(%edx,%esi,4)
+L(no_extra):
+
+ popl %esi
+ popl %ebx
+ emms_or_femms
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/mmx/lshift.asm b/rts/gmp/mpn/x86/k6/mmx/lshift.asm
new file mode 100644
index 0000000000..f1dc83db46
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/mmx/lshift.asm
@@ -0,0 +1,122 @@
+dnl AMD K6 mpn_lshift -- mpn left shift.
+dnl
+dnl K6: 3.0 cycles/limb
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C The loop runs at 3 cycles/limb, limited by decoding and by having 3 mmx
+C instructions. This is despite every second fetch being unaligned.
+
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_lshift)
+deflit(`FRAME',0)
+
+ C The 1 limb case can be done without the push %ebx, but it's then
+ C still the same speed. The push is left as a free helping hand for
+ C the two_or_more code.
+
+ movl PARAM_SIZE, %eax
+ pushl %ebx FRAME_pushl()
+
+ movl PARAM_SRC, %ebx
+ decl %eax
+
+ movl PARAM_SHIFT, %ecx
+ jnz L(two_or_more)
+
+ movl (%ebx), %edx C src limb
+ movl PARAM_DST, %ebx
+
+ shldl( %cl, %edx, %eax) C return value
+
+ shll %cl, %edx
+
+ movl %edx, (%ebx) C dst limb
+ popl %ebx
+
+ ret
+
+
+ ALIGN(16) C avoid offset 0x1f
+ nop C avoid bad cache line crossing
+L(two_or_more):
+ C eax size-1
+ C ebx src
+ C ecx shift
+ C edx
+
+ movl (%ebx,%eax,4), %edx C src high limb
+ negl %ecx
+
+ movd PARAM_SHIFT, %mm6
+ addl $32, %ecx C 32-shift
+
+ shrl %cl, %edx
+
+ movd %ecx, %mm7
+ movl PARAM_DST, %ecx
+
+L(top):
+ C eax counter, size-1 to 1
+ C ebx src
+ C ecx dst
+ C edx retval
+ C
+ C mm0 scratch
+ C mm6 shift
+ C mm7 32-shift
+
+ movq -4(%ebx,%eax,4), %mm0
+ decl %eax
+
+ psrlq %mm7, %mm0
+
+ movd %mm0, 4(%ecx,%eax,4)
+ jnz L(top)
+
+
+ movd (%ebx), %mm0
+ popl %ebx
+
+ psllq %mm6, %mm0
+ movl %edx, %eax
+
+ movd %mm0, (%ecx)
+
+ emms
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/mmx/popham.asm b/rts/gmp/mpn/x86/k6/mmx/popham.asm
new file mode 100644
index 0000000000..2c619252bb
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/mmx/popham.asm
@@ -0,0 +1,238 @@
+dnl AMD K6-2 mpn_popcount, mpn_hamdist -- mpn bit population count and
+dnl hamming distance.
+dnl
+dnl popcount hamdist
+dnl K6-2: 9.0 11.5 cycles/limb
+dnl K6: 12.5 13.0
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size);
+C unsigned long mpn_hamdist (mp_srcptr src, mp_srcptr src2, mp_size_t size);
+C
+C The code here isn't optimal, but it's already a 2x speedup over the plain
+C integer mpn/generic/popcount.c,hamdist.c.
+
+
+ifdef(`OPERATION_popcount',,
+`ifdef(`OPERATION_hamdist',,
+`m4_error(`Need OPERATION_popcount or OPERATION_hamdist
+')m4exit(1)')')
+
+define(HAM,
+m4_assert_numargs(1)
+`ifdef(`OPERATION_hamdist',`$1')')
+
+define(POP,
+m4_assert_numargs(1)
+`ifdef(`OPERATION_popcount',`$1')')
+
+HAM(`
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC2, 8)
+defframe(PARAM_SRC, 4)
+define(M4_function,mpn_hamdist)
+')
+POP(`
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC, 4)
+define(M4_function,mpn_popcount)
+')
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+
+
+ifdef(`PIC',,`
+ dnl non-PIC
+
+ DATA
+ ALIGN(8)
+
+define(LS,
+m4_assert_numargs(1)
+`LF(M4_function,`$1')')
+
+LS(rodata_AAAAAAAAAAAAAAAA):
+ .long 0xAAAAAAAA
+ .long 0xAAAAAAAA
+
+LS(rodata_3333333333333333):
+ .long 0x33333333
+ .long 0x33333333
+
+LS(rodata_0F0F0F0F0F0F0F0F):
+ .long 0x0F0F0F0F
+ .long 0x0F0F0F0F
+
+LS(rodata_000000FF000000FF):
+ .long 0x000000FF
+ .long 0x000000FF
+')
+
+ .text
+ ALIGN(32)
+
+POP(`ifdef(`PIC', `
+ C avoid shrl crossing a 32-byte boundary
+ nop')')
+
+PROLOGUE(M4_function)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ orl %ecx, %ecx
+ jz L(zero)
+
+ifdef(`PIC',`
+ movl $0xAAAAAAAA, %eax
+ movl $0x33333333, %edx
+
+ movd %eax, %mm7
+ movd %edx, %mm6
+
+ movl $0x0F0F0F0F, %eax
+ movl $0x000000FF, %edx
+
+ punpckldq %mm7, %mm7
+ punpckldq %mm6, %mm6
+
+ movd %eax, %mm5
+ movd %edx, %mm4
+
+ punpckldq %mm5, %mm5
+ punpckldq %mm4, %mm4
+',`
+
+ movq LS(rodata_AAAAAAAAAAAAAAAA), %mm7
+ movq LS(rodata_3333333333333333), %mm6
+ movq LS(rodata_0F0F0F0F0F0F0F0F), %mm5
+ movq LS(rodata_000000FF000000FF), %mm4
+')
+
+define(REG_AAAAAAAAAAAAAAAA, %mm7)
+define(REG_3333333333333333, %mm6)
+define(REG_0F0F0F0F0F0F0F0F, %mm5)
+define(REG_000000FF000000FF, %mm4)
+
+
+ movl PARAM_SRC, %eax
+HAM(` movl PARAM_SRC2, %edx')
+
+ pxor %mm2, %mm2 C total
+
+ shrl %ecx
+ jnc L(top)
+
+Zdisp( movd, 0,(%eax,%ecx,8), %mm1)
+
+HAM(`
+Zdisp( movd, 0,(%edx,%ecx,8), %mm0)
+ pxor %mm0, %mm1
+')
+
+ incl %ecx
+ jmp L(loaded)
+
+
+ ALIGN(16)
+POP(` nop C alignment to avoid crossing 32-byte boundaries')
+
+L(top):
+ C eax src
+ C ebx
+ C ecx counter, qwords, decrementing
+ C edx [hamdist] src2
+ C
+ C mm0 (scratch)
+ C mm1 (scratch)
+ C mm2 total (low dword)
+ C mm3
+ C mm4 \
+ C mm5 | special constants
+ C mm6 |
+ C mm7 /
+
+ movq -8(%eax,%ecx,8), %mm1
+HAM(` pxor -8(%edx,%ecx,8), %mm1')
+
+L(loaded):
+ movq %mm1, %mm0
+ pand REG_AAAAAAAAAAAAAAAA, %mm1
+
+ psrlq $1, %mm1
+HAM(` nop C code alignment')
+
+ psubd %mm1, %mm0 C bit pairs
+HAM(` nop C code alignment')
+
+
+ movq %mm0, %mm1
+ psrlq $2, %mm0
+
+ pand REG_3333333333333333, %mm0
+ pand REG_3333333333333333, %mm1
+
+ paddd %mm1, %mm0 C nibbles
+
+
+ movq %mm0, %mm1
+ psrlq $4, %mm0
+
+ pand REG_0F0F0F0F0F0F0F0F, %mm0
+ pand REG_0F0F0F0F0F0F0F0F, %mm1
+
+ paddd %mm1, %mm0 C bytes
+
+ movq %mm0, %mm1
+ psrlq $8, %mm0
+
+
+ paddb %mm1, %mm0 C words
+
+
+ movq %mm0, %mm1
+ psrlq $16, %mm0
+
+ paddd %mm1, %mm0 C dwords
+
+ pand REG_000000FF000000FF, %mm0
+
+ paddd %mm0, %mm2 C low to total
+ psrlq $32, %mm0
+
+ paddd %mm0, %mm2 C high to total
+ loop L(top)
+
+
+
+ movd %mm2, %eax
+ emms_or_femms
+ ret
+
+L(zero):
+ movl $0, %eax
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/mmx/rshift.asm b/rts/gmp/mpn/x86/k6/mmx/rshift.asm
new file mode 100644
index 0000000000..cc5948f26c
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/mmx/rshift.asm
@@ -0,0 +1,122 @@
+dnl AMD K6 mpn_rshift -- mpn right shift.
+dnl
+dnl K6: 3.0 cycles/limb
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C The loop runs at 3 cycles/limb, limited by decoding and by having 3 mmx
+C instructions. This is despite every second fetch being unaligned.
+
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_rshift)
+deflit(`FRAME',0)
+
+ C The 1 limb case can be done without the push %ebx, but it's then
+ C still the same speed. The push is left as a free helping hand for
+ C the two_or_more code.
+
+ movl PARAM_SIZE, %eax
+ pushl %ebx FRAME_pushl()
+
+ movl PARAM_SRC, %ebx
+ decl %eax
+
+ movl PARAM_SHIFT, %ecx
+ jnz L(two_or_more)
+
+ movl (%ebx), %edx C src limb
+ movl PARAM_DST, %ebx
+
+ shrdl( %cl, %edx, %eax) C return value
+
+ shrl %cl, %edx
+
+ movl %edx, (%ebx) C dst limb
+ popl %ebx
+
+ ret
+
+
+ ALIGN(16) C avoid offset 0x1f
+L(two_or_more):
+ C eax size-1
+ C ebx src
+ C ecx shift
+ C edx
+
+ movl (%ebx), %edx C src low limb
+ negl %ecx
+
+ addl $32, %ecx C 32-shift
+ movd PARAM_SHIFT, %mm6
+
+ shll %cl, %edx C retval
+ movl PARAM_DST, %ecx
+
+ leal (%ebx,%eax,4), %ebx
+
+ leal -4(%ecx,%eax,4), %ecx
+ negl %eax
+
+
+L(simple):
+ C eax counter (negative)
+ C ebx &src[size-1]
+ C ecx &dst[size-1]
+ C edx retval
+ C
+ C mm0 scratch
+ C mm6 shift
+
+Zdisp( movq, 0,(%ebx,%eax,4), %mm0)
+ incl %eax
+
+ psrlq %mm6, %mm0
+
+Zdisp( movd, %mm0, 0,(%ecx,%eax,4))
+ jnz L(simple)
+
+
+ movq %mm0, (%ecx)
+ movl %edx, %eax
+
+ popl %ebx
+
+ emms
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/mul_1.asm b/rts/gmp/mpn/x86/k6/mul_1.asm
new file mode 100644
index 0000000000..c2220fe4ca
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/mul_1.asm
@@ -0,0 +1,272 @@
+dnl AMD K6 mpn_mul_1 -- mpn by limb multiply.
+dnl
+dnl K6: 6.25 cycles/limb.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t multiplier);
+C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t multiplier, mp_limb_t carry);
+C
+C Multiply src,size by mult and store the result in dst,size.
+C Return the carry limb from the top of the result.
+C
+C mpn_mul_1c() accepts an initial carry for the calculation, it's added into
+C the low limb of the result.
+
+defframe(PARAM_CARRY, 20)
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl minimum 5 because the unrolled code can't handle less
+deflit(UNROLL_THRESHOLD, 5)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_mul_1c)
+ pushl %esi
+deflit(`FRAME',4)
+ movl PARAM_CARRY, %esi
+ jmp LF(mpn_mul_1,start_nc)
+EPILOGUE()
+
+
+PROLOGUE(mpn_mul_1)
+ push %esi
+deflit(`FRAME',4)
+ xorl %esi, %esi C initial carry
+
+L(start_nc):
+ mov PARAM_SIZE, %ecx
+ push %ebx
+FRAME_pushl()
+
+ movl PARAM_SRC, %ebx
+ push %edi
+FRAME_pushl()
+
+ movl PARAM_DST, %edi
+ pushl %ebp
+FRAME_pushl()
+
+ cmpl $UNROLL_THRESHOLD, %ecx
+ movl PARAM_MULTIPLIER, %ebp
+
+ jae L(unroll)
+
+
+ C code offset 0x22 here, close enough to aligned
+L(simple):
+ C eax scratch
+ C ebx src
+ C ecx counter
+ C edx scratch
+ C esi carry
+ C edi dst
+ C ebp multiplier
+ C
+ C this loop 8 cycles/limb
+
+ movl (%ebx), %eax
+ addl $4, %ebx
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, (%edi)
+ addl $4, %edi
+
+ loop L(simple)
+
+
+ popl %ebp
+
+ popl %edi
+ popl %ebx
+
+ movl %esi, %eax
+ popl %esi
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+C The code for each limb is 6 cycles, with instruction decoding being the
+C limiting factor. At 4 limbs/loop and 1 cycle/loop of overhead it's 6.25
+C cycles/limb in total.
+C
+C The secret ingredient to get 6.25 is to start the loop with the mul and
+C have the load/store pair at the end. Rotating the load/store to the top
+C is an 0.5 c/l slowdown. (Some address generation effect probably.)
+C
+C The whole unrolled loop fits nicely in exactly 80 bytes.
+
+
+ ALIGN(16) C already aligned to 16 here actually
+L(unroll):
+ movl (%ebx), %eax
+ leal -16(%ebx,%ecx,4), %ebx
+
+ leal -16(%edi,%ecx,4), %edi
+ subl $4, %ecx
+
+ negl %ecx
+
+
+ ALIGN(16) C one byte nop for this alignment
+L(top):
+ C eax scratch
+ C ebx &src[size-4]
+ C ecx counter
+ C edx scratch
+ C esi carry
+ C edi &dst[size-4]
+ C ebp multiplier
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, (%edi,%ecx,4)
+ movl 4(%ebx,%ecx,4), %eax
+
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, 4(%edi,%ecx,4)
+ movl 8(%ebx,%ecx,4), %eax
+
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, 8(%edi,%ecx,4)
+ movl 12(%ebx,%ecx,4), %eax
+
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, 12(%edi,%ecx,4)
+ movl 16(%ebx,%ecx,4), %eax
+
+
+ addl $4, %ecx
+ js L(top)
+
+
+
+ C eax next src limb
+ C ebx &src[size-4]
+ C ecx 0 to 3 representing respectively 4 to 1 further limbs
+ C edx
+ C esi carry
+ C edi &dst[size-4]
+
+ testb $2, %cl
+ jnz L(finish_not_two)
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, (%edi,%ecx,4)
+ movl 4(%ebx,%ecx,4), %eax
+
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, 4(%edi,%ecx,4)
+ movl 8(%ebx,%ecx,4), %eax
+
+ addl $2, %ecx
+L(finish_not_two):
+
+
+ testb $1, %cl
+ jnz L(finish_not_one)
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, 8(%edi)
+ movl 12(%ebx), %eax
+L(finish_not_one):
+
+
+ mull %ebp
+
+ addl %esi, %eax
+ popl %ebp
+
+ adcl $0, %edx
+
+ movl %eax, 12(%edi)
+ popl %edi
+
+ popl %ebx
+ movl %edx, %eax
+
+ popl %esi
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/mul_basecase.asm b/rts/gmp/mpn/x86/k6/mul_basecase.asm
new file mode 100644
index 0000000000..1f5a3a4b4b
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/mul_basecase.asm
@@ -0,0 +1,600 @@
+dnl AMD K6 mpn_mul_basecase -- multiply two mpn numbers.
+dnl
+dnl K6: approx 9.0 cycles per cross product on 30x30 limbs (with 16 limbs/loop
+dnl unrolling).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K6: UNROLL_COUNT cycles/product (approx)
+dnl 8 9.75
+dnl 16 9.3
+dnl 32 9.3
+dnl Maximum possible with the current code is 32.
+dnl
+dnl With 16 the inner unrolled loop fits exactly in a 256 byte block, which
+dnl might explain it's good performance.
+
+deflit(UNROLL_COUNT, 16)
+
+
+C void mpn_mul_basecase (mp_ptr wp,
+C mp_srcptr xp, mp_size_t xsize,
+C mp_srcptr yp, mp_size_t ysize);
+C
+C Calculate xp,xsize multiplied by yp,ysize, storing the result in
+C wp,xsize+ysize.
+C
+C This routine is essentially the same as mpn/generic/mul_basecase.c, but
+C it's faster because it does most of the mpn_addmul_1() entry code only
+C once. The saving is about 10-20% on typical sizes coming from the
+C Karatsuba multiply code.
+C
+C Future:
+C
+C The unrolled loop could be shared by mpn_addmul_1, with some extra stack
+C setups and maybe 2 or 3 wasted cycles at the end. Code saving would be
+C 256 bytes.
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 8)
+',`
+deflit(UNROLL_THRESHOLD, 8)
+')
+
+defframe(PARAM_YSIZE,20)
+defframe(PARAM_YP, 16)
+defframe(PARAM_XSIZE,12)
+defframe(PARAM_XP, 8)
+defframe(PARAM_WP, 4)
+
+ .text
+ ALIGN(32)
+PROLOGUE(mpn_mul_basecase)
+deflit(`FRAME',0)
+
+ movl PARAM_XSIZE, %ecx
+ movl PARAM_YP, %eax
+
+ movl PARAM_XP, %edx
+ movl (%eax), %eax C yp low limb
+
+ cmpl $2, %ecx
+ ja L(xsize_more_than_two_limbs)
+ je L(two_by_something)
+
+
+ C one limb by one limb
+
+ movl (%edx), %edx C xp low limb
+ movl PARAM_WP, %ecx
+
+ mull %edx
+
+ movl %eax, (%ecx)
+ movl %edx, 4(%ecx)
+ ret
+
+
+C -----------------------------------------------------------------------------
+L(two_by_something):
+ decl PARAM_YSIZE
+ pushl %ebx
+deflit(`FRAME',4)
+
+ movl PARAM_WP, %ebx
+ pushl %esi
+deflit(`FRAME',8)
+
+ movl %eax, %ecx C yp low limb
+ movl (%edx), %eax C xp low limb
+
+ movl %edx, %esi C xp
+ jnz L(two_by_two)
+
+
+ C two limbs by one limb
+
+ mull %ecx
+
+ movl %eax, (%ebx)
+ movl 4(%esi), %eax
+
+ movl %edx, %esi C carry
+
+ mull %ecx
+
+ addl %eax, %esi
+ movl %esi, 4(%ebx)
+
+ adcl $0, %edx
+
+ movl %edx, 8(%ebx)
+ popl %esi
+
+ popl %ebx
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(two_by_two):
+ C eax xp low limb
+ C ebx wp
+ C ecx yp low limb
+ C edx
+ C esi xp
+ C edi
+ C ebp
+deflit(`FRAME',8)
+
+ mull %ecx C xp[0] * yp[0]
+
+ push %edi
+deflit(`FRAME',12)
+ movl %eax, (%ebx)
+
+ movl 4(%esi), %eax
+ movl %edx, %edi C carry, for wp[1]
+
+ mull %ecx C xp[1] * yp[0]
+
+ addl %eax, %edi
+ movl PARAM_YP, %ecx
+
+ adcl $0, %edx
+
+ movl %edi, 4(%ebx)
+ movl 4(%ecx), %ecx C yp[1]
+
+ movl 4(%esi), %eax C xp[1]
+ movl %edx, %edi C carry, for wp[2]
+
+ mull %ecx C xp[1] * yp[1]
+
+ addl %eax, %edi
+
+ adcl $0, %edx
+
+ movl (%esi), %eax C xp[0]
+ movl %edx, %esi C carry, for wp[3]
+
+ mull %ecx C xp[0] * yp[1]
+
+ addl %eax, 4(%ebx)
+ adcl %edx, %edi
+ adcl $0, %esi
+
+ movl %edi, 8(%ebx)
+ popl %edi
+
+ movl %esi, 12(%ebx)
+ popl %esi
+
+ popl %ebx
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(xsize_more_than_two_limbs):
+
+C The first limb of yp is processed with a simple mpn_mul_1 style loop
+C inline. Unrolling this doesn't seem worthwhile since it's only run once
+C (whereas the addmul below is run ysize-1 many times). A call to the
+C actual mpn_mul_1 will be slowed down by the call and parameter pushing and
+C popping, and doesn't seem likely to be worthwhile on the typical 10-20
+C limb operations the Karatsuba code calls here with.
+
+ C eax yp[0]
+ C ebx
+ C ecx xsize
+ C edx xp
+ C esi
+ C edi
+ C ebp
+deflit(`FRAME',0)
+
+ pushl %edi defframe_pushl(SAVE_EDI)
+ pushl %ebp defframe_pushl(SAVE_EBP)
+
+ movl PARAM_WP, %edi
+ pushl %esi defframe_pushl(SAVE_ESI)
+
+ movl %eax, %ebp
+ pushl %ebx defframe_pushl(SAVE_EBX)
+
+ leal (%edx,%ecx,4), %ebx C xp end
+ xorl %esi, %esi
+
+ leal (%edi,%ecx,4), %edi C wp end of mul1
+ negl %ecx
+
+
+L(mul1):
+ C eax scratch
+ C ebx xp end
+ C ecx counter, negative
+ C edx scratch
+ C esi carry
+ C edi wp end of mul1
+ C ebp multiplier
+
+ movl (%ebx,%ecx,4), %eax
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, (%edi,%ecx,4)
+ incl %ecx
+
+ jnz L(mul1)
+
+
+ movl PARAM_YSIZE, %edx
+ movl %esi, (%edi) C final carry
+
+ movl PARAM_XSIZE, %ecx
+ decl %edx
+
+ jnz L(ysize_more_than_one_limb)
+
+ popl %ebx
+ popl %esi
+ popl %ebp
+ popl %edi
+ ret
+
+
+L(ysize_more_than_one_limb):
+ cmpl $UNROLL_THRESHOLD, %ecx
+ movl PARAM_YP, %eax
+
+ jae L(unroll)
+
+
+C -----------------------------------------------------------------------------
+C Simple addmul loop.
+C
+C Using ebx and edi pointing at the ends of their respective locations saves
+C a couple of instructions in the outer loop. The inner loop is still 11
+C cycles, the same as the simple loop in aorsmul_1.asm.
+
+ C eax yp
+ C ebx xp end
+ C ecx xsize
+ C edx ysize-1
+ C esi
+ C edi wp end of mul1
+ C ebp
+
+ movl 4(%eax), %ebp C multiplier
+ negl %ecx
+
+ movl %ecx, PARAM_XSIZE C -xsize
+ xorl %esi, %esi C initial carry
+
+ leal 4(%eax,%edx,4), %eax C yp end
+ negl %edx
+
+ movl %eax, PARAM_YP
+ movl %edx, PARAM_YSIZE
+
+ jmp L(simple_outer_entry)
+
+
+ C aligning here saves a couple of cycles
+ ALIGN(16)
+L(simple_outer_top):
+ C edx ysize counter, negative
+
+ movl PARAM_YP, %eax C yp end
+ xorl %esi, %esi C carry
+
+ movl PARAM_XSIZE, %ecx C -xsize
+ movl %edx, PARAM_YSIZE
+
+ movl (%eax,%edx,4), %ebp C yp limb multiplier
+L(simple_outer_entry):
+ addl $4, %edi
+
+
+L(simple_inner):
+ C eax scratch
+ C ebx xp end
+ C ecx counter, negative
+ C edx scratch
+ C esi carry
+ C edi wp end of this addmul
+ C ebp multiplier
+
+ movl (%ebx,%ecx,4), %eax
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl $0, %edx
+ addl %eax, (%edi,%ecx,4)
+ adcl %edx, %esi
+
+ incl %ecx
+ jnz L(simple_inner)
+
+
+ movl PARAM_YSIZE, %edx
+ movl %esi, (%edi)
+
+ incl %edx
+ jnz L(simple_outer_top)
+
+
+ popl %ebx
+ popl %esi
+ popl %ebp
+ popl %edi
+ ret
+
+
+C -----------------------------------------------------------------------------
+C Unrolled loop.
+C
+C The unrolled inner loop is the same as in aorsmul_1.asm, see that code for
+C some comments.
+C
+C VAR_COUNTER is for the inner loop, running from VAR_COUNTER_INIT down to
+C 0, inclusive.
+C
+C VAR_JMP is the computed jump into the unrolled loop.
+C
+C PARAM_XP and PARAM_WP get offset appropriately for where the unrolled loop
+C is entered.
+C
+C VAR_XP_LOW is the least significant limb of xp, which is needed at the
+C start of the unrolled loop. This can't just be fetched through the xp
+C pointer because of the offset applied to it.
+C
+C PARAM_YSIZE is the outer loop counter, going from -(ysize-1) up to -1,
+C inclusive.
+C
+C PARAM_YP is offset appropriately so that the PARAM_YSIZE counter can be
+C added to give the location of the next limb of yp, which is the multiplier
+C in the unrolled loop.
+C
+C PARAM_WP is similarly offset so that the PARAM_YSIZE counter can be added
+C to give the starting point in the destination for each unrolled loop (this
+C point is one limb upwards for each limb of yp processed).
+C
+C Having PARAM_YSIZE count negative to zero means it's not necessary to
+C store new values of PARAM_YP and PARAM_WP on each loop. Those values on
+C the stack remain constant and on each loop an leal adjusts them with the
+C PARAM_YSIZE counter value.
+
+
+defframe(VAR_COUNTER, -20)
+defframe(VAR_COUNTER_INIT, -24)
+defframe(VAR_JMP, -28)
+defframe(VAR_XP_LOW, -32)
+deflit(VAR_STACK_SPACE, 16)
+
+dnl For some strange reason using (%esp) instead of 0(%esp) is a touch
+dnl slower in this code, hence the defframe empty-if-zero feature is
+dnl disabled.
+dnl
+dnl If VAR_COUNTER is at (%esp), the effect is worse. In this case the
+dnl unrolled loop is 255 instead of 256 bytes, but quite how this affects
+dnl anything isn't clear.
+dnl
+define(`defframe_empty_if_zero_disabled',1)
+
+L(unroll):
+ C eax yp (not used)
+ C ebx xp end (not used)
+ C ecx xsize
+ C edx ysize-1
+ C esi
+ C edi wp end of mul1 (not used)
+ C ebp
+deflit(`FRAME', 16)
+
+ leal -2(%ecx), %ebp C one limb processed at start,
+ decl %ecx C and ebp is one less
+
+ shrl $UNROLL_LOG2, %ebp
+ negl %ecx
+
+ subl $VAR_STACK_SPACE, %esp
+deflit(`FRAME', 16+VAR_STACK_SPACE)
+ andl $UNROLL_MASK, %ecx
+
+ movl %ecx, %esi
+ shll $4, %ecx
+
+ movl %ebp, VAR_COUNTER_INIT
+ negl %esi
+
+ C 15 code bytes per limb
+ifdef(`PIC',`
+ call L(pic_calc)
+L(unroll_here):
+',`
+ leal L(unroll_entry) (%ecx,%esi,1), %ecx
+')
+
+ movl PARAM_XP, %ebx
+ movl %ebp, VAR_COUNTER
+
+ movl PARAM_WP, %edi
+ movl %ecx, VAR_JMP
+
+ movl (%ebx), %eax
+ leal 4(%edi,%esi,4), %edi C wp adjust for unrolling and mul1
+
+ leal (%ebx,%esi,4), %ebx C xp adjust for unrolling
+
+ movl %eax, VAR_XP_LOW
+
+ movl %ebx, PARAM_XP
+ movl PARAM_YP, %ebx
+
+ leal (%edi,%edx,4), %ecx C wp adjust for ysize indexing
+ movl 4(%ebx), %ebp C multiplier (yp second limb)
+
+ leal 4(%ebx,%edx,4), %ebx C yp adjust for ysize indexing
+
+ movl %ecx, PARAM_WP
+
+ leal 1(%esi), %ecx C adjust parity for decl %ecx above
+
+ movl %ebx, PARAM_YP
+ negl %edx
+
+ movl %edx, PARAM_YSIZE
+ jmp L(unroll_outer_entry)
+
+
+ifdef(`PIC',`
+L(pic_calc):
+ C See README.family about old gas bugs
+ leal (%ecx,%esi,1), %ecx
+ addl $L(unroll_entry)-L(unroll_here), %ecx
+ addl (%esp), %ecx
+ ret
+')
+
+
+C -----------------------------------------------------------------------------
+ C Aligning here saves a couple of cycles per loop. Using 32 doesn't
+ C cost any extra space, since the inner unrolled loop below is
+ C aligned to 32.
+ ALIGN(32)
+L(unroll_outer_top):
+ C edx ysize
+
+ movl PARAM_YP, %eax
+ movl %edx, PARAM_YSIZE C incremented ysize counter
+
+ movl PARAM_WP, %edi
+
+ movl VAR_COUNTER_INIT, %ebx
+ movl (%eax,%edx,4), %ebp C next multiplier
+
+ movl PARAM_XSIZE, %ecx
+ leal (%edi,%edx,4), %edi C adjust wp for where we are in yp
+
+ movl VAR_XP_LOW, %eax
+ movl %ebx, VAR_COUNTER
+
+L(unroll_outer_entry):
+ mull %ebp
+
+ C using testb is a tiny bit faster than testl
+ testb $1, %cl
+
+ movl %eax, %ecx C low carry
+ movl VAR_JMP, %eax
+
+ movl %edx, %esi C high carry
+ movl PARAM_XP, %ebx
+
+ jnz L(unroll_noswap)
+ movl %ecx, %esi C high,low carry other way around
+
+ movl %edx, %ecx
+L(unroll_noswap):
+
+ jmp *%eax
+
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(32)
+L(unroll_top):
+ C eax scratch
+ C ebx xp
+ C ecx carry low
+ C edx scratch
+ C esi carry high
+ C edi wp
+ C ebp multiplier
+ C VAR_COUNTER loop counter
+ C
+ C 15 code bytes each limb
+
+ leal UNROLL_BYTES(%edi), %edi
+
+L(unroll_entry):
+deflit(CHUNK_COUNT,2)
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+ deflit(`disp0', eval(i*CHUNK_COUNT*4))
+ deflit(`disp1', eval(disp0 + 4))
+ deflit(`disp2', eval(disp1 + 4))
+
+ movl disp1(%ebx), %eax
+ mull %ebp
+Zdisp( addl, %ecx, disp0,(%edi))
+ adcl %eax, %esi
+ movl %edx, %ecx
+ jadcl0( %ecx)
+
+ movl disp2(%ebx), %eax
+ mull %ebp
+ addl %esi, disp1(%edi)
+ adcl %eax, %ecx
+ movl %edx, %esi
+ jadcl0( %esi)
+')
+
+ decl VAR_COUNTER
+ leal UNROLL_BYTES(%ebx), %ebx
+
+ jns L(unroll_top)
+
+
+ movl PARAM_YSIZE, %edx
+ addl %ecx, UNROLL_BYTES(%edi)
+
+ adcl $0, %esi
+
+ incl %edx
+ movl %esi, UNROLL_BYTES+4(%edi)
+
+ jnz L(unroll_outer_top)
+
+
+ movl SAVE_ESI, %esi
+ movl SAVE_EBP, %ebp
+ movl SAVE_EDI, %edi
+ movl SAVE_EBX, %ebx
+
+ addl $FRAME, %esp
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k6/sqr_basecase.asm b/rts/gmp/mpn/x86/k6/sqr_basecase.asm
new file mode 100644
index 0000000000..70d49b3e57
--- /dev/null
+++ b/rts/gmp/mpn/x86/k6/sqr_basecase.asm
@@ -0,0 +1,672 @@
+dnl AMD K6 mpn_sqr_basecase -- square an mpn number.
+dnl
+dnl K6: approx 4.7 cycles per cross product, or 9.2 cycles per triangular
+dnl product (measured on the speed difference between 17 and 33 limbs,
+dnl which is roughly the Karatsuba recursing range).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl KARATSUBA_SQR_THRESHOLD_MAX is the maximum KARATSUBA_SQR_THRESHOLD this
+dnl code supports. This value is used only by the tune program to know
+dnl what it can go up to. (An attempt to compile with a bigger value will
+dnl trigger some m4_assert()s in the code, making the build fail.)
+dnl
+dnl The value is determined by requiring the displacements in the unrolled
+dnl addmul to fit in single bytes. This means a maximum UNROLL_COUNT of
+dnl 63, giving a maximum KARATSUBA_SQR_THRESHOLD of 66.
+
+deflit(KARATSUBA_SQR_THRESHOLD_MAX, 66)
+
+
+dnl Allow a value from the tune program to override config.m4.
+
+ifdef(`KARATSUBA_SQR_THRESHOLD_OVERRIDE',
+`define(`KARATSUBA_SQR_THRESHOLD',KARATSUBA_SQR_THRESHOLD_OVERRIDE)')
+
+
+dnl UNROLL_COUNT is the number of code chunks in the unrolled addmul. The
+dnl number required is determined by KARATSUBA_SQR_THRESHOLD, since
+dnl mpn_sqr_basecase only needs to handle sizes < KARATSUBA_SQR_THRESHOLD.
+dnl
+dnl The first addmul is the biggest, and this takes the second least
+dnl significant limb and multiplies it by the third least significant and
+dnl up. Hence for a maximum operand size of KARATSUBA_SQR_THRESHOLD-1
+dnl limbs, UNROLL_COUNT needs to be KARATSUBA_SQR_THRESHOLD-3.
+
+m4_config_gmp_mparam(`KARATSUBA_SQR_THRESHOLD')
+deflit(UNROLL_COUNT, eval(KARATSUBA_SQR_THRESHOLD-3))
+
+
+C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C The algorithm is essentially the same as mpn/generic/sqr_basecase.c, but a
+C lot of function call overheads are avoided, especially when the given size
+C is small.
+C
+C The code size might look a bit excessive, but not all of it is executed
+C and so won't fill up the code cache. The 1x1, 2x2 and 3x3 special cases
+C clearly apply only to those sizes; mid sizes like 10x10 only need part of
+C the unrolled addmul; and big sizes like 35x35 that do need all of it will
+C at least be getting value for money, because 35x35 spends something like
+C 5780 cycles here.
+C
+C Different values of UNROLL_COUNT give slightly different speeds, between
+C 9.0 and 9.2 c/tri-prod measured on the difference between 17 and 33 limbs.
+C This isn't a big difference, but it's presumably some alignment effect
+C which if understood could give a simple speedup.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(32)
+PROLOGUE(mpn_sqr_basecase)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ movl PARAM_SRC, %eax
+
+ cmpl $2, %ecx
+ je L(two_limbs)
+
+ movl PARAM_DST, %edx
+ ja L(three_or_more)
+
+
+C -----------------------------------------------------------------------------
+C one limb only
+ C eax src
+ C ebx
+ C ecx size
+ C edx dst
+
+ movl (%eax), %eax
+ movl %edx, %ecx
+
+ mull %eax
+
+ movl %eax, (%ecx)
+ movl %edx, 4(%ecx)
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(two_limbs):
+ C eax src
+ C ebx
+ C ecx size
+ C edx dst
+
+ pushl %ebx
+ movl %eax, %ebx C src
+deflit(`FRAME',4)
+
+ movl (%ebx), %eax
+ movl PARAM_DST, %ecx
+
+ mull %eax C src[0]^2
+
+ movl %eax, (%ecx)
+ movl 4(%ebx), %eax
+
+ movl %edx, 4(%ecx)
+
+ mull %eax C src[1]^2
+
+ movl %eax, 8(%ecx)
+ movl (%ebx), %eax
+
+ movl %edx, 12(%ecx)
+ movl 4(%ebx), %edx
+
+ mull %edx C src[0]*src[1]
+
+ addl %eax, 4(%ecx)
+
+ adcl %edx, 8(%ecx)
+ adcl $0, 12(%ecx)
+
+ popl %ebx
+ addl %eax, 4(%ecx)
+
+ adcl %edx, 8(%ecx)
+ adcl $0, 12(%ecx)
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+L(three_or_more):
+deflit(`FRAME',0)
+ cmpl $4, %ecx
+ jae L(four_or_more)
+
+
+C -----------------------------------------------------------------------------
+C three limbs
+ C eax src
+ C ecx size
+ C edx dst
+
+ pushl %ebx
+ movl %eax, %ebx C src
+
+ movl (%ebx), %eax
+ movl %edx, %ecx C dst
+
+ mull %eax C src[0] ^ 2
+
+ movl %eax, (%ecx)
+ movl 4(%ebx), %eax
+
+ movl %edx, 4(%ecx)
+ pushl %esi
+
+ mull %eax C src[1] ^ 2
+
+ movl %eax, 8(%ecx)
+ movl 8(%ebx), %eax
+
+ movl %edx, 12(%ecx)
+ pushl %edi
+
+ mull %eax C src[2] ^ 2
+
+ movl %eax, 16(%ecx)
+ movl (%ebx), %eax
+
+ movl %edx, 20(%ecx)
+ movl 4(%ebx), %edx
+
+ mull %edx C src[0] * src[1]
+
+ movl %eax, %esi
+ movl (%ebx), %eax
+
+ movl %edx, %edi
+ movl 8(%ebx), %edx
+
+ pushl %ebp
+ xorl %ebp, %ebp
+
+ mull %edx C src[0] * src[2]
+
+ addl %eax, %edi
+ movl 4(%ebx), %eax
+
+ adcl %edx, %ebp
+
+ movl 8(%ebx), %edx
+
+ mull %edx C src[1] * src[2]
+
+ addl %eax, %ebp
+
+ adcl $0, %edx
+
+
+ C eax will be dst[5]
+ C ebx
+ C ecx dst
+ C edx dst[4]
+ C esi dst[1]
+ C edi dst[2]
+ C ebp dst[3]
+
+ xorl %eax, %eax
+ addl %esi, %esi
+ adcl %edi, %edi
+ adcl %ebp, %ebp
+ adcl %edx, %edx
+ adcl $0, %eax
+
+ addl %esi, 4(%ecx)
+ adcl %edi, 8(%ecx)
+ adcl %ebp, 12(%ecx)
+
+ popl %ebp
+ popl %edi
+
+ adcl %edx, 16(%ecx)
+
+ popl %esi
+ popl %ebx
+
+ adcl %eax, 20(%ecx)
+ ASSERT(nc)
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+defframe(VAR_COUNTER,-20)
+defframe(VAR_JMP, -24)
+deflit(STACK_SPACE, 24)
+
+ ALIGN(16)
+L(four_or_more):
+
+ C eax src
+ C ebx
+ C ecx size
+ C edx dst
+ C esi
+ C edi
+ C ebp
+
+C First multiply src[0]*src[1..size-1] and store at dst[1..size].
+C
+C A test was done calling mpn_mul_1 here to get the benefit of its unrolled
+C loop, but this was only a tiny speedup; at 35 limbs it took 24 cycles off
+C a 5780 cycle operation, which is not surprising since the loop here is 8
+C c/l and mpn_mul_1 is 6.25 c/l.
+
+ subl $STACK_SPACE, %esp deflit(`FRAME',STACK_SPACE)
+
+ movl %edi, SAVE_EDI
+ leal 4(%edx), %edi
+
+ movl %ebx, SAVE_EBX
+ leal 4(%eax), %ebx
+
+ movl %esi, SAVE_ESI
+ xorl %esi, %esi
+
+ movl %ebp, SAVE_EBP
+
+ C eax
+ C ebx src+4
+ C ecx size
+ C edx
+ C esi
+ C edi dst+4
+ C ebp
+
+ movl (%eax), %ebp C multiplier
+ leal -1(%ecx), %ecx C size-1, and pad to a 16 byte boundary
+
+
+ ALIGN(16)
+L(mul_1):
+ C eax scratch
+ C ebx src ptr
+ C ecx counter
+ C edx scratch
+ C esi carry
+ C edi dst ptr
+ C ebp multiplier
+
+ movl (%ebx), %eax
+ addl $4, %ebx
+
+ mull %ebp
+
+ addl %esi, %eax
+ movl $0, %esi
+
+ adcl %edx, %esi
+
+ movl %eax, (%edi)
+ addl $4, %edi
+
+ loop L(mul_1)
+
+
+C Addmul src[n]*src[n+1..size-1] at dst[2*n-1...], for each n=1..size-2.
+C
+C The last two addmuls, which are the bottom right corner of the product
+C triangle, are left to the end. These are src[size-3]*src[size-2,size-1]
+C and src[size-2]*src[size-1]. If size is 4 then it's only these corner
+C cases that need to be done.
+C
+C The unrolled code is the same as mpn_addmul_1(), see that routine for some
+C comments.
+C
+C VAR_COUNTER is the outer loop, running from -(size-4) to -1, inclusive.
+C
+C VAR_JMP is the computed jump into the unrolled code, stepped by one code
+C chunk each outer loop.
+C
+C K6 doesn't do any branch prediction on indirect jumps, which is good
+C actually because it's a different target each time. The unrolled addmul
+C is about 3 cycles/limb faster than a simple loop, so the 6 cycle cost of
+C the indirect jump is quickly recovered.
+
+
+dnl This value is also implicitly encoded in a shift and add.
+dnl
+deflit(CODE_BYTES_PER_LIMB, 15)
+
+dnl With the unmodified &src[size] and &dst[size] pointers, the
+dnl displacements in the unrolled code fit in a byte for UNROLL_COUNT
+dnl values up to 31. Above that an offset must be added to them.
+dnl
+deflit(OFFSET,
+ifelse(eval(UNROLL_COUNT>31),1,
+eval((UNROLL_COUNT-31)*4),
+0))
+
+ C eax
+ C ebx &src[size]
+ C ecx
+ C edx
+ C esi carry
+ C edi &dst[size]
+ C ebp
+
+ movl PARAM_SIZE, %ecx
+ movl %esi, (%edi)
+
+ subl $4, %ecx
+ jz L(corner)
+
+ movl %ecx, %edx
+ifelse(OFFSET,0,,
+` subl $OFFSET, %ebx')
+
+ shll $4, %ecx
+ifelse(OFFSET,0,,
+` subl $OFFSET, %edi')
+
+ negl %ecx
+
+ifdef(`PIC',`
+ call L(pic_calc)
+L(here):
+',`
+ leal L(unroll_inner_end)-eval(2*CODE_BYTES_PER_LIMB)(%ecx,%edx), %ecx
+')
+ negl %edx
+
+
+ C The calculated jump mustn't be before the start of the available
+ C code. This is the limitation UNROLL_COUNT puts on the src operand
+ C size, but checked here using the jump address directly.
+ C
+ ASSERT(ae,`
+ movl_text_address( L(unroll_inner_start), %eax)
+ cmpl %eax, %ecx
+ ')
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(unroll_outer_top):
+ C eax
+ C ebx &src[size], constant
+ C ecx VAR_JMP
+ C edx VAR_COUNTER, limbs, negative
+ C esi high limb to store
+ C edi dst ptr, high of last addmul
+ C ebp
+
+ movl -12+OFFSET(%ebx,%edx,4), %ebp C multiplier
+ movl %edx, VAR_COUNTER
+
+ movl -8+OFFSET(%ebx,%edx,4), %eax C first limb of multiplicand
+
+ mull %ebp
+
+ testb $1, %cl
+
+ movl %edx, %esi C high carry
+ movl %ecx, %edx C jump
+
+ movl %eax, %ecx C low carry
+ leal CODE_BYTES_PER_LIMB(%edx), %edx
+
+ movl %edx, VAR_JMP
+ leal 4(%edi), %edi
+
+ C A branch-free version of this using some xors was found to be a
+ C touch slower than just a conditional jump, despite the jump
+ C switching between taken and not taken on every loop.
+
+ifelse(eval(UNROLL_COUNT%2),0,
+ jz,jnz) L(unroll_noswap)
+ movl %esi, %eax C high,low carry other way around
+
+ movl %ecx, %esi
+ movl %eax, %ecx
+L(unroll_noswap):
+
+ jmp *%edx
+
+
+ C Must be on an even address here so the low bit of the jump address
+ C will indicate which way around ecx/esi should start.
+ C
+ C An attempt was made at padding here to get the end of the unrolled
+ C code to come out on a good alignment, to save padding before
+ C L(corner). This worked, but turned out to run slower than just an
+ C ALIGN(2). The reason for this is not clear, it might be related
+ C to the different speeds on different UNROLL_COUNTs noted above.
+
+ ALIGN(2)
+
+L(unroll_inner_start):
+ C eax scratch
+ C ebx src
+ C ecx carry low
+ C edx scratch
+ C esi carry high
+ C edi dst
+ C ebp multiplier
+ C
+ C 15 code bytes each limb
+ C ecx/esi swapped on each chunk
+
+forloop(`i', UNROLL_COUNT, 1, `
+ deflit(`disp_src', eval(-i*4 + OFFSET))
+ deflit(`disp_dst', eval(disp_src - 4))
+
+ m4_assert(`disp_src>=-128 && disp_src<128')
+ m4_assert(`disp_dst>=-128 && disp_dst<128')
+
+ifelse(eval(i%2),0,`
+Zdisp( movl, disp_src,(%ebx), %eax)
+ mull %ebp
+Zdisp( addl, %esi, disp_dst,(%edi))
+ adcl %eax, %ecx
+ movl %edx, %esi
+ jadcl0( %esi)
+',`
+ dnl this one comes out last
+Zdisp( movl, disp_src,(%ebx), %eax)
+ mull %ebp
+Zdisp( addl, %ecx, disp_dst,(%edi))
+ adcl %eax, %esi
+ movl %edx, %ecx
+ jadcl0( %ecx)
+')
+')
+L(unroll_inner_end):
+
+ addl %esi, -4+OFFSET(%edi)
+
+ movl VAR_COUNTER, %edx
+ jadcl0( %ecx)
+
+ movl %ecx, m4_empty_if_zero(OFFSET)(%edi)
+ movl VAR_JMP, %ecx
+
+ incl %edx
+ jnz L(unroll_outer_top)
+
+
+ifelse(OFFSET,0,,`
+ addl $OFFSET, %ebx
+ addl $OFFSET, %edi
+')
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(corner):
+ C ebx &src[size]
+ C edi &dst[2*size-5]
+
+ movl -12(%ebx), %ebp
+
+ movl -8(%ebx), %eax
+ movl %eax, %ecx
+
+ mull %ebp
+
+ addl %eax, -4(%edi)
+ adcl $0, %edx
+
+ movl -4(%ebx), %eax
+ movl %edx, %esi
+ movl %eax, %ebx
+
+ mull %ebp
+
+ addl %esi, %eax
+ adcl $0, %edx
+
+ addl %eax, (%edi)
+ adcl $0, %edx
+
+ movl %edx, %esi
+ movl %ebx, %eax
+
+ mull %ecx
+
+ addl %esi, %eax
+ movl %eax, 4(%edi)
+
+ adcl $0, %edx
+
+ movl %edx, 8(%edi)
+
+
+C -----------------------------------------------------------------------------
+C Left shift of dst[1..2*size-2], the bit shifted out becomes dst[2*size-1].
+C The loop measures about 6 cycles/iteration, though it looks like it should
+C decode in 5.
+
+L(lshift_start):
+ movl PARAM_SIZE, %ecx
+
+ movl PARAM_DST, %edi
+ subl $1, %ecx C size-1 and clear carry
+
+ movl PARAM_SRC, %ebx
+ movl %ecx, %edx
+
+ xorl %eax, %eax C ready for adcl
+
+
+ ALIGN(16)
+L(lshift):
+ C eax
+ C ebx src (for later use)
+ C ecx counter, decrementing
+ C edx size-1 (for later use)
+ C esi
+ C edi dst, incrementing
+ C ebp
+
+ rcll 4(%edi)
+ rcll 8(%edi)
+ leal 8(%edi), %edi
+ loop L(lshift)
+
+
+ adcl %eax, %eax
+
+ movl %eax, 4(%edi) C dst most significant limb
+ movl (%ebx), %eax C src[0]
+
+ leal 4(%ebx,%edx,4), %ebx C &src[size]
+ subl %edx, %ecx C -(size-1)
+
+
+C -----------------------------------------------------------------------------
+C Now add in the squares on the diagonal, src[0]^2, src[1]^2, ...,
+C src[size-1]^2. dst[0] hasn't yet been set at all yet, and just gets the
+C low limb of src[0]^2.
+
+
+ mull %eax
+
+ movl %eax, (%edi,%ecx,8) C dst[0]
+
+
+ ALIGN(16)
+L(diag):
+ C eax scratch
+ C ebx &src[size]
+ C ecx counter, negative
+ C edx carry
+ C esi scratch
+ C edi dst[2*size-2]
+ C ebp
+
+ movl (%ebx,%ecx,4), %eax
+ movl %edx, %esi
+
+ mull %eax
+
+ addl %esi, 4(%edi,%ecx,8)
+ adcl %eax, 8(%edi,%ecx,8)
+ adcl $0, %edx
+
+ incl %ecx
+ jnz L(diag)
+
+
+ movl SAVE_EBX, %ebx
+ movl SAVE_ESI, %esi
+
+ addl %edx, 4(%edi) C dst most significant limb
+
+ movl SAVE_EDI, %edi
+ movl SAVE_EBP, %ebp
+ addl $FRAME, %esp
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+ifdef(`PIC',`
+L(pic_calc):
+ C See README.family about old gas bugs
+ addl (%esp), %ecx
+ addl $L(unroll_inner_end)-L(here)-eval(2*CODE_BYTES_PER_LIMB), %ecx
+ addl %edx, %ecx
+ ret
+')
+
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/README b/rts/gmp/mpn/x86/k7/README
new file mode 100644
index 0000000000..c34315c401
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/README
@@ -0,0 +1,145 @@
+
+ AMD K7 MPN SUBROUTINES
+
+
+This directory contains code optimized for the AMD Athlon CPU.
+
+The mmx subdirectory has routines using MMX instructions. All Athlons have
+MMX, the separate directory is just so that configure can omit it if the
+assembler doesn't support MMX.
+
+
+
+STATUS
+
+Times for the loops, with all code and data in L1 cache.
+
+ cycles/limb
+ mpn_add/sub_n 1.6
+
+ mpn_copyi 0.75 or 1.0 \ varying with data alignment
+ mpn_copyd 0.75 or 1.0 /
+
+ mpn_divrem_1 17.0 integer part, 15.0 fractional part
+ mpn_mod_1 17.0
+ mpn_divexact_by3 8.0
+
+ mpn_l/rshift 1.2
+
+ mpn_mul_1 3.4
+ mpn_addmul/submul_1 3.9
+
+ mpn_mul_basecase 4.42 cycles/crossproduct (approx)
+
+ mpn_popcount 5.0
+ mpn_hamdist 6.0
+
+Prefetching of sources hasn't yet been tried.
+
+
+
+NOTES
+
+cmov, MMX, 3DNow and some extensions to MMX and 3DNow are available.
+
+Write-allocate L1 data cache means prefetching of destinations is unnecessary.
+
+Floating point multiplications can be done in parallel with integer
+multiplications, but there doesn't seem to be any way to make use of this.
+
+Unsigned "mul"s can be issued every 3 cycles. This suggests 3 is a limit on
+the speed of the multiplication routines. The documentation shows mul
+executing in IEU0 (or maybe in IEU0 and IEU1 together), so it might be that,
+to get near 3 cycles code has to be arranged so that nothing else is issued
+to IEU0. A busy IEU0 could explain why some code takes 4 cycles and other
+apparently equivalent code takes 5.
+
+
+
+OPTIMIZATIONS
+
+Unrolled loops are used to reduce looping overhead. The unrolling is
+configurable up to 32 limbs/loop for most routines and up to 64 for some.
+The K7 has 64k L1 code cache so quite big unrolling is allowable.
+
+Computed jumps into the unrolling are used to handle sizes not a multiple of
+the unrolling. An attractive feature of this is that times increase
+smoothly with operand size, but it may be that some routines should just
+have simple loops to finish up, especially when PIC adds between 2 and 16
+cycles to get %eip.
+
+Position independent code is implemented using a call to get %eip for the
+computed jumps and a ret is always done, rather than an addl $4,%esp or a
+popl, so the CPU return address branch prediction stack stays synchronised
+with the actual stack in memory.
+
+Branch prediction, in absence of any history, will guess forward jumps are
+not taken and backward jumps are taken. Where possible it's arranged that
+the less likely or less important case is under a taken forward jump.
+
+
+
+CODING
+
+Instructions in general code have been shown grouped if they can execute
+together, which means up to three direct-path instructions which have no
+successive dependencies. K7 always decodes three and has out-of-order
+execution, but the groupings show what slots might be available and what
+dependency chains exist.
+
+When there's vector-path instructions an effort is made to get triplets of
+direct-path instructions in between them, even if there's dependencies,
+since this maximizes decoding throughput and might save a cycle or two if
+decoding is the limiting factor.
+
+
+
+INSTRUCTIONS
+
+adcl direct
+divl 39 cycles back-to-back
+lodsl,etc vector
+loop 1 cycle vector (decl/jnz opens up one decode slot)
+movd reg vector
+movd mem direct
+mull issue every 3 cycles, latency 4 cycles low word, 6 cycles high word
+popl vector (use movl for more than one pop)
+pushl direct, will pair with a load
+shrdl %cl vector, 3 cycles, seems to be 3 decode too
+xorl r,r false read dependency recognised
+
+
+
+REFERENCES
+
+"AMD Athlon Processor X86 Code Optimization Guide", AMD publication number
+22007, revision E, November 1999. Available on-line,
+
+ http://www.amd.com/products/cpg/athlon/techdocs/pdf/22007.pdf
+
+"3DNow Technology Manual", AMD publication number 21928F/0-August 1999.
+This describes the femms and prefetch instructions. Available on-line,
+
+ http://www.amd.com/K6/k6docs/pdf/21928.pdf
+
+"AMD Extensions to the 3DNow and MMX Instruction Sets Manual", AMD
+publication number 22466, revision B, August 1999. This describes
+instructions added in the Athlon processor, such as pswapd and the extra
+prefetch forms. Available on-line,
+
+ http://www.amd.com/products/cpg/athlon/techdocs/pdf/22466.pdf
+
+"3DNow Instruction Porting Guide", AMD publication number 22621, revision B,
+August 1999. This has some notes on general Athlon optimizations as well as
+3DNow. Available on-line,
+
+ http://www.amd.com/products/cpg/athlon/techdocs/pdf/22621.pdf
+
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/rts/gmp/mpn/x86/k7/aors_n.asm b/rts/gmp/mpn/x86/k7/aors_n.asm
new file mode 100644
index 0000000000..85fa9d3036
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/aors_n.asm
@@ -0,0 +1,250 @@
+dnl AMD K7 mpn_add_n/mpn_sub_n -- mpn add or subtract.
+dnl
+dnl K7: 1.64 cycles/limb (at 16 limb/loop).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K7: UNROLL_COUNT cycles/limb
+dnl 8 1.9
+dnl 16 1.64
+dnl 32 1.7
+dnl 64 2.0
+dnl Maximum possible with the current code is 64.
+
+deflit(UNROLL_COUNT, 16)
+
+
+ifdef(`OPERATION_add_n', `
+ define(M4_inst, adcl)
+ define(M4_function_n, mpn_add_n)
+ define(M4_function_nc, mpn_add_nc)
+ define(M4_description, add)
+',`ifdef(`OPERATION_sub_n', `
+ define(M4_inst, sbbl)
+ define(M4_function_n, mpn_sub_n)
+ define(M4_function_nc, mpn_sub_nc)
+ define(M4_description, subtract)
+',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+
+C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size);
+C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size, mp_limb_t carry);
+C
+C Calculate src1,size M4_description src2,size, and store the result in
+C dst,size. The return value is the carry bit from the top of the result (1
+C or 0).
+C
+C The _nc version accepts 1 or 0 for an initial carry into the low limb of
+C the calculation. Note values other than 1 or 0 here will lead to garbage
+C results.
+C
+C This code runs at 1.64 cycles/limb, which is probably the best possible
+C with plain integer operations. Each limb is 2 loads and 1 store, and in
+C one cycle the K7 can do two loads, or a load and a store, leading to 1.5
+C c/l.
+
+dnl Must have UNROLL_THRESHOLD >= 2, since the unrolled loop can't handle 1.
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 8)
+',`
+deflit(UNROLL_THRESHOLD, 8)
+')
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC2, 12)
+defframe(PARAM_SRC1, 8)
+defframe(PARAM_DST, 4)
+
+defframe(SAVE_EBP, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EBX, -12)
+defframe(SAVE_EDI, -16)
+deflit(STACK_SPACE, 16)
+
+ .text
+ ALIGN(32)
+deflit(`FRAME',0)
+
+PROLOGUE(M4_function_nc)
+ movl PARAM_CARRY, %eax
+ jmp LF(M4_function_n,start)
+EPILOGUE()
+
+PROLOGUE(M4_function_n)
+
+ xorl %eax, %eax C carry
+L(start):
+ movl PARAM_SIZE, %ecx
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %edi, SAVE_EDI
+ movl %ebx, SAVE_EBX
+ cmpl $UNROLL_THRESHOLD, %ecx
+
+ movl PARAM_SRC2, %edx
+ movl PARAM_SRC1, %ebx
+ jae L(unroll)
+
+ movl PARAM_DST, %edi
+ leal (%ebx,%ecx,4), %ebx
+ leal (%edx,%ecx,4), %edx
+
+ leal (%edi,%ecx,4), %edi
+ negl %ecx
+ shrl %eax
+
+ C This loop in in a single 16 byte code block already, so no
+ C alignment necessary.
+L(simple):
+ C eax scratch
+ C ebx src1
+ C ecx counter
+ C edx src2
+ C esi
+ C edi dst
+ C ebp
+
+ movl (%ebx,%ecx,4), %eax
+ M4_inst (%edx,%ecx,4), %eax
+ movl %eax, (%edi,%ecx,4)
+ incl %ecx
+ jnz L(simple)
+
+ movl $0, %eax
+ movl SAVE_EDI, %edi
+
+ movl SAVE_EBX, %ebx
+ setc %al
+ addl $STACK_SPACE, %esp
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ C This is at 0x55, close enough to aligned.
+L(unroll):
+deflit(`FRAME',STACK_SPACE)
+ movl %ebp, SAVE_EBP
+ andl $-2, %ecx C size low bit masked out
+ andl $1, PARAM_SIZE C size low bit kept
+
+ movl %ecx, %edi
+ decl %ecx
+ movl PARAM_DST, %ebp
+
+ shrl $UNROLL_LOG2, %ecx
+ negl %edi
+ movl %esi, SAVE_ESI
+
+ andl $UNROLL_MASK, %edi
+
+ifdef(`PIC',`
+ call L(pic_calc)
+L(here):
+',`
+ leal L(entry) (%edi,%edi,8), %esi C 9 bytes per
+')
+ negl %edi
+ shrl %eax
+
+ leal ifelse(UNROLL_BYTES,256,128) (%ebx,%edi,4), %ebx
+ leal ifelse(UNROLL_BYTES,256,128) (%edx,%edi,4), %edx
+ leal ifelse(UNROLL_BYTES,256,128) (%ebp,%edi,4), %edi
+
+ jmp *%esi
+
+
+ifdef(`PIC',`
+L(pic_calc):
+ C See README.family about old gas bugs
+ leal (%edi,%edi,8), %esi
+ addl $L(entry)-L(here), %esi
+ addl (%esp), %esi
+ ret
+')
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(32)
+L(top):
+ C eax zero
+ C ebx src1
+ C ecx counter
+ C edx src2
+ C esi scratch (was computed jump)
+ C edi dst
+ C ebp scratch
+
+ leal UNROLL_BYTES(%edx), %edx
+
+L(entry):
+deflit(CHUNK_COUNT, 2)
+forloop(i, 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+ deflit(`disp0', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+ deflit(`disp1', eval(disp0 + 4))
+
+Zdisp( movl, disp0,(%ebx), %esi)
+ movl disp1(%ebx), %ebp
+Zdisp( M4_inst,disp0,(%edx), %esi)
+Zdisp( movl, %esi, disp0,(%edi))
+ M4_inst disp1(%edx), %ebp
+ movl %ebp, disp1(%edi)
+')
+
+ decl %ecx
+ leal UNROLL_BYTES(%ebx), %ebx
+ leal UNROLL_BYTES(%edi), %edi
+ jns L(top)
+
+
+ mov PARAM_SIZE, %esi
+ movl SAVE_EBP, %ebp
+ movl $0, %eax
+
+ decl %esi
+ js L(even)
+
+ movl (%ebx), %ecx
+ M4_inst UNROLL_BYTES(%edx), %ecx
+ movl %ecx, (%edi)
+L(even):
+
+ movl SAVE_EDI, %edi
+ movl SAVE_EBX, %ebx
+ setc %al
+
+ movl SAVE_ESI, %esi
+ addl $STACK_SPACE, %esp
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/aorsmul_1.asm b/rts/gmp/mpn/x86/k7/aorsmul_1.asm
new file mode 100644
index 0000000000..9f9c3daaf4
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/aorsmul_1.asm
@@ -0,0 +1,364 @@
+dnl AMD K7 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
+dnl
+dnl K7: 3.9 cycles/limb.
+dnl
+dnl Future: It should be possible to avoid the separate mul after the
+dnl unrolled loop by moving the movl/adcl to the top.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K7: UNROLL_COUNT cycles/limb
+dnl 4 4.42
+dnl 8 4.16
+dnl 16 3.9
+dnl 32 3.9
+dnl 64 3.87
+dnl Maximum possible with the current code is 64.
+
+deflit(UNROLL_COUNT, 16)
+
+
+ifdef(`OPERATION_addmul_1',`
+ define(M4_inst, addl)
+ define(M4_function_1, mpn_addmul_1)
+ define(M4_function_1c, mpn_addmul_1c)
+ define(M4_description, add it to)
+ define(M4_desc_retval, carry)
+',`ifdef(`OPERATION_submul_1',`
+ define(M4_inst, subl)
+ define(M4_function_1, mpn_submul_1)
+ define(M4_function_1c, mpn_submul_1c)
+ define(M4_description, subtract it from)
+ define(M4_desc_retval, borrow)
+',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
+')')')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
+
+
+C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mult);
+C mp_limb_t M4_function_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mult, mp_limb_t carry);
+C
+C Calculate src,size multiplied by mult and M4_description dst,size.
+C Return the M4_desc_retval limb from the top of the result.
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 9)
+',`
+deflit(UNROLL_THRESHOLD, 6)
+')
+
+defframe(PARAM_CARRY, 20)
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+deflit(SAVE_SIZE, 16)
+
+ .text
+ ALIGN(32)
+PROLOGUE(M4_function_1)
+ movl PARAM_SIZE, %edx
+ movl PARAM_SRC, %eax
+ xorl %ecx, %ecx
+
+ decl %edx
+ jnz LF(M4_function_1c,start_1)
+
+ movl (%eax), %eax
+ movl PARAM_DST, %ecx
+
+ mull PARAM_MULTIPLIER
+
+ M4_inst %eax, (%ecx)
+ adcl $0, %edx
+ movl %edx, %eax
+
+ ret
+EPILOGUE()
+
+ ALIGN(16)
+PROLOGUE(M4_function_1c)
+ movl PARAM_SIZE, %edx
+ movl PARAM_SRC, %eax
+
+ decl %edx
+ jnz L(more_than_one_limb)
+
+ movl (%eax), %eax
+ movl PARAM_DST, %ecx
+
+ mull PARAM_MULTIPLIER
+
+ addl PARAM_CARRY, %eax
+
+ adcl $0, %edx
+ M4_inst %eax, (%ecx)
+
+ adcl $0, %edx
+ movl %edx, %eax
+
+ ret
+
+
+ C offset 0x44 so close enough to aligned
+L(more_than_one_limb):
+ movl PARAM_CARRY, %ecx
+L(start_1):
+ C eax src
+ C ecx initial carry
+ C edx size-1
+ subl $SAVE_SIZE, %esp
+deflit(`FRAME',16)
+
+ movl %ebx, SAVE_EBX
+ movl %esi, SAVE_ESI
+ movl %edx, %ebx C size-1
+
+ movl PARAM_SRC, %esi
+ movl %ebp, SAVE_EBP
+ cmpl $UNROLL_THRESHOLD, %edx
+
+ movl PARAM_MULTIPLIER, %ebp
+ movl %edi, SAVE_EDI
+
+ movl (%esi), %eax C src low limb
+ movl PARAM_DST, %edi
+ ja L(unroll)
+
+
+ C simple loop
+
+ leal 4(%esi,%ebx,4), %esi C point one limb past last
+ leal (%edi,%ebx,4), %edi C point at last limb
+ negl %ebx
+
+ C The movl to load the next source limb is done well ahead of the
+ C mul. This is necessary for full speed, and leads to one limb
+ C handled separately at the end.
+
+L(simple):
+ C eax src limb
+ C ebx loop counter
+ C ecx carry limb
+ C edx scratch
+ C esi src
+ C edi dst
+ C ebp multiplier
+
+ mull %ebp
+
+ addl %eax, %ecx
+ adcl $0, %edx
+
+ M4_inst %ecx, (%edi,%ebx,4)
+ movl (%esi,%ebx,4), %eax
+ adcl $0, %edx
+
+ incl %ebx
+ movl %edx, %ecx
+ jnz L(simple)
+
+
+ mull %ebp
+
+ movl SAVE_EBX, %ebx
+ movl SAVE_ESI, %esi
+ movl SAVE_EBP, %ebp
+
+ addl %eax, %ecx
+ adcl $0, %edx
+
+ M4_inst %ecx, (%edi)
+ adcl $0, %edx
+ movl SAVE_EDI, %edi
+
+ addl $SAVE_SIZE, %esp
+ movl %edx, %eax
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(unroll):
+ C eax src low limb
+ C ebx size-1
+ C ecx carry
+ C edx size-1
+ C esi src
+ C edi dst
+ C ebp multiplier
+
+dnl overlapping with parameters no longer needed
+define(VAR_COUNTER,`PARAM_SIZE')
+define(VAR_JUMP, `PARAM_MULTIPLIER')
+
+ subl $2, %ebx C (size-2)-1
+ decl %edx C size-2
+
+ shrl $UNROLL_LOG2, %ebx
+ negl %edx
+
+ movl %ebx, VAR_COUNTER
+ andl $UNROLL_MASK, %edx
+
+ movl %edx, %ebx
+ shll $4, %edx
+
+ifdef(`PIC',`
+ call L(pic_calc)
+L(here):
+',`
+ leal L(entry) (%edx,%ebx,1), %edx
+')
+ negl %ebx
+ movl %edx, VAR_JUMP
+
+ mull %ebp
+
+ addl %eax, %ecx C initial carry, becomes low carry
+ adcl $0, %edx
+ testb $1, %bl
+
+ movl 4(%esi), %eax C src second limb
+ leal ifelse(UNROLL_BYTES,256,128+) 8(%esi,%ebx,4), %esi
+ leal ifelse(UNROLL_BYTES,256,128) (%edi,%ebx,4), %edi
+
+ movl %edx, %ebx C high carry
+ cmovnz( %ecx, %ebx) C high,low carry other way around
+ cmovnz( %edx, %ecx)
+
+ jmp *VAR_JUMP
+
+
+ifdef(`PIC',`
+L(pic_calc):
+ C See README.family about old gas bugs
+ leal (%edx,%ebx,1), %edx
+ addl $L(entry)-L(here), %edx
+ addl (%esp), %edx
+ ret
+')
+
+
+C -----------------------------------------------------------------------------
+C This code uses a "two carry limbs" scheme. At the top of the loop the
+C carries are ebx=lo, ecx=hi, then they swap for each limb processed. For
+C the computed jump an odd size means they start one way around, an even
+C size the other. Either way one limb is handled separately at the start of
+C the loop.
+C
+C The positioning of the movl to load the next source limb is important.
+C Moving it after the adcl with a view to avoiding a separate mul at the end
+C of the loop slows the code down.
+
+ ALIGN(32)
+L(top):
+ C eax src limb
+ C ebx carry high
+ C ecx carry low
+ C edx scratch
+ C esi src+8
+ C edi dst
+ C ebp multiplier
+ C
+ C VAR_COUNTER loop counter
+ C
+ C 17 bytes each limb
+
+L(entry):
+deflit(CHUNK_COUNT,2)
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+ deflit(`disp0', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+ deflit(`disp1', eval(disp0 + 4))
+
+ mull %ebp
+
+Zdisp( M4_inst,%ecx, disp0,(%edi))
+ movl $0, %ecx
+
+ adcl %eax, %ebx
+
+Zdisp( movl, disp0,(%esi), %eax)
+ adcl %edx, %ecx
+
+
+ mull %ebp
+
+ M4_inst %ebx, disp1(%edi)
+ movl $0, %ebx
+
+ adcl %eax, %ecx
+
+ movl disp1(%esi), %eax
+ adcl %edx, %ebx
+')
+
+ decl VAR_COUNTER
+ leal UNROLL_BYTES(%esi), %esi
+ leal UNROLL_BYTES(%edi), %edi
+
+ jns L(top)
+
+
+ C eax src limb
+ C ebx carry high
+ C ecx carry low
+ C edx
+ C esi
+ C edi dst (points at second last limb)
+ C ebp multiplier
+deflit(`disp0', ifelse(UNROLL_BYTES,256,-128))
+deflit(`disp1', eval(disp0-0 + 4))
+
+ mull %ebp
+
+ M4_inst %ecx, disp0(%edi)
+ movl SAVE_EBP, %ebp
+
+ adcl %ebx, %eax
+ movl SAVE_EBX, %ebx
+ movl SAVE_ESI, %esi
+
+ adcl $0, %edx
+ M4_inst %eax, disp1(%edi)
+ movl SAVE_EDI, %edi
+
+ adcl $0, %edx
+ addl $SAVE_SIZE, %esp
+
+ movl %edx, %eax
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/diveby3.asm b/rts/gmp/mpn/x86/k7/diveby3.asm
new file mode 100644
index 0000000000..57684958a5
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/diveby3.asm
@@ -0,0 +1,131 @@
+dnl AMD K7 mpn_divexact_by3 -- mpn division by 3, expecting no remainder.
+dnl
+dnl K7: 8.0 cycles/limb
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t carry);
+
+defframe(PARAM_CARRY,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl multiplicative inverse of 3, modulo 2^32
+deflit(INVERSE_3, 0xAAAAAAAB)
+
+dnl ceil(b/3) and floor(b*2/3) where b=2^32
+deflit(ONE_THIRD_CEIL, 0x55555556)
+deflit(TWO_THIRDS_FLOOR, 0xAAAAAAAA)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_divexact_by3c)
+deflit(`FRAME',0)
+
+ movl PARAM_SRC, %ecx
+ pushl %ebx defframe_pushl(SAVE_EBX)
+
+ movl PARAM_CARRY, %ebx
+ pushl %ebp defframe_pushl(SAVE_EBP)
+
+ movl PARAM_SIZE, %ebp
+ pushl %edi defframe_pushl(SAVE_EDI)
+
+ movl (%ecx), %eax C src low limb
+ pushl %esi defframe_pushl(SAVE_ESI)
+
+ movl PARAM_DST, %edi
+ movl $TWO_THIRDS_FLOOR, %esi
+ leal -4(%ecx,%ebp,4), %ecx C &src[size-1]
+
+ subl %ebx, %eax
+
+ setc %bl
+ decl %ebp
+ jz L(last)
+
+ leal (%edi,%ebp,4), %edi C &dst[size-1]
+ negl %ebp
+
+
+ ALIGN(16)
+L(top):
+ C eax src limb, carry subtracted
+ C ebx carry limb (0 or 1)
+ C ecx &src[size-1]
+ C edx scratch
+ C esi TWO_THIRDS_FLOOR
+ C edi &dst[size-1]
+ C ebp counter, limbs, negative
+
+ imull $INVERSE_3, %eax, %edx
+
+ movl 4(%ecx,%ebp,4), %eax C next src limb
+ cmpl $ONE_THIRD_CEIL, %edx
+
+ sbbl $-1, %ebx C +1 if result>=ceil(b/3)
+ cmpl %edx, %esi
+
+ sbbl %ebx, %eax C and further 1 if result>=ceil(b*2/3)
+ movl %edx, (%edi,%ebp,4)
+ incl %ebp
+
+ setc %bl C new carry
+ jnz L(top)
+
+
+
+L(last):
+ C eax src limb, carry subtracted
+ C ebx carry limb (0 or 1)
+ C ecx &src[size-1]
+ C edx scratch
+ C esi multiplier
+ C edi &dst[size-1]
+ C ebp
+
+ imull $INVERSE_3, %eax
+
+ cmpl $ONE_THIRD_CEIL, %eax
+ movl %eax, (%edi)
+ movl SAVE_EBP, %ebp
+
+ sbbl $-1, %ebx C +1 if eax>=ceil(b/3)
+ cmpl %eax, %esi
+ movl $0, %eax
+
+ adcl %ebx, %eax C further +1 if eax>=ceil(b*2/3)
+ movl SAVE_EDI, %edi
+ movl SAVE_ESI, %esi
+
+ movl SAVE_EBX, %ebx
+ addl $FRAME, %esp
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/gmp-mparam.h b/rts/gmp/mpn/x86/k7/gmp-mparam.h
new file mode 100644
index 0000000000..c3bba0afc4
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/gmp-mparam.h
@@ -0,0 +1,100 @@
+/* AMD K7 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+
+/* the low limb is ready after 4 cycles, but normally it's the high limb
+ which is of interest, and that comes out after 6 cycles */
+#ifndef UMUL_TIME
+#define UMUL_TIME 6 /* cycles */
+#endif
+
+/* AMD doco says 40, but it measures 39 back-to-back */
+#ifndef UDIV_TIME
+#define UDIV_TIME 39 /* cycles */
+#endif
+
+/* using bsf */
+#ifndef COUNT_TRAILING_ZEROS_TIME
+#define COUNT_TRAILING_ZEROS_TIME 7 /* cycles */
+#endif
+
+
+/* Generated by tuneup.c, 2000-07-06. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 26
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 177
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 52
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 173
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 76
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 114
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 34
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 5
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 54
+#endif
+
+#ifndef FFT_MUL_TABLE
+#define FFT_MUL_TABLE { 720, 1440, 2944, 7680, 18432, 57344, 0 }
+#endif
+#ifndef FFT_MODF_MUL_THRESHOLD
+#define FFT_MODF_MUL_THRESHOLD 736
+#endif
+#ifndef FFT_MUL_THRESHOLD
+#define FFT_MUL_THRESHOLD 6912
+#endif
+
+#ifndef FFT_SQR_TABLE
+#define FFT_SQR_TABLE { 784, 1696, 3200, 7680, 18432, 57344, 0 }
+#endif
+#ifndef FFT_MODF_SQR_THRESHOLD
+#define FFT_MODF_SQR_THRESHOLD 800
+#endif
+#ifndef FFT_SQR_THRESHOLD
+#define FFT_SQR_THRESHOLD 8448
+#endif
diff --git a/rts/gmp/mpn/x86/k7/mmx/copyd.asm b/rts/gmp/mpn/x86/k7/mmx/copyd.asm
new file mode 100644
index 0000000000..33214daa1f
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/mmx/copyd.asm
@@ -0,0 +1,136 @@
+dnl AMD K7 mpn_copyd -- copy limb vector, decrementing.
+dnl
+dnl alignment dst/src, A=0mod8 N=4mod8
+dnl A/A A/N N/A N/N
+dnl K7 0.75 1.0 1.0 0.75
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C The various comments in mpn/x86/k7/copyi.asm apply here too.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+dnl parameter space reused
+define(SAVE_EBX,`PARAM_SIZE')
+define(SAVE_ESI,`PARAM_SRC')
+
+dnl minimum 5 since the unrolled code can't handle less than 5
+deflit(UNROLL_THRESHOLD, 5)
+
+ .text
+ ALIGN(32)
+PROLOGUE(mpn_copyd)
+
+ movl PARAM_SIZE, %ecx
+ movl %ebx, SAVE_EBX
+
+ movl PARAM_SRC, %eax
+ movl PARAM_DST, %edx
+
+ cmpl $UNROLL_THRESHOLD, %ecx
+ jae L(unroll)
+
+ orl %ecx, %ecx
+ jz L(simple_done)
+
+L(simple):
+ C eax src
+ C ebx scratch
+ C ecx counter
+ C edx dst
+ C
+ C this loop is 2 cycles/limb
+
+ movl -4(%eax,%ecx,4), %ebx
+ movl %ebx, -4(%edx,%ecx,4)
+ decl %ecx
+ jnz L(simple)
+
+L(simple_done):
+ movl SAVE_EBX, %ebx
+ ret
+
+
+L(unroll):
+ movl %esi, SAVE_ESI
+ leal (%eax,%ecx,4), %ebx
+ leal (%edx,%ecx,4), %esi
+
+ andl %esi, %ebx
+ movl SAVE_ESI, %esi
+ subl $4, %ecx C size-4
+
+ testl $4, %ebx C testl to pad code closer to 16 bytes for L(top)
+ jz L(aligned)
+
+ C both src and dst unaligned, process one limb to align them
+ movl 12(%eax,%ecx,4), %ebx
+ movl %ebx, 12(%edx,%ecx,4)
+ decl %ecx
+L(aligned):
+
+
+ ALIGN(16)
+L(top):
+ C eax src
+ C ebx
+ C ecx counter, limbs
+ C edx dst
+
+ movq 8(%eax,%ecx,4), %mm0
+ movq (%eax,%ecx,4), %mm1
+ subl $4, %ecx
+ movq %mm0, 16+8(%edx,%ecx,4)
+ movq %mm1, 16(%edx,%ecx,4)
+ jns L(top)
+
+
+ C now %ecx is -4 to -1 representing respectively 0 to 3 limbs remaining
+
+ testb $2, %cl
+ jz L(finish_not_two)
+
+ movq 8(%eax,%ecx,4), %mm0
+ movq %mm0, 8(%edx,%ecx,4)
+L(finish_not_two):
+
+ testb $1, %cl
+ jz L(done)
+
+ movl (%eax), %ebx
+ movl %ebx, (%edx)
+
+L(done):
+ movl SAVE_EBX, %ebx
+ emms
+ ret
+
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/mmx/copyi.asm b/rts/gmp/mpn/x86/k7/mmx/copyi.asm
new file mode 100644
index 0000000000..b234a1628c
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/mmx/copyi.asm
@@ -0,0 +1,147 @@
+dnl AMD K7 mpn_copyi -- copy limb vector, incrementing.
+dnl
+dnl alignment dst/src, A=0mod8 N=4mod8
+dnl A/A A/N N/A N/N
+dnl K7 0.75 1.0 1.0 0.75
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Copy src,size to dst,size.
+C
+C This code at 0.75 or 1.0 c/l is always faster than a plain rep movsl at
+C 1.33 c/l.
+C
+C The K7 can do two loads, or two stores, or a load and a store, in one
+C cycle, so if those are 64-bit operations then 0.5 c/l should be possible,
+C however nothing under 0.7 c/l is known.
+C
+C If both source and destination are unaligned then one limb is processed at
+C the start to make them aligned and so get 0.75 c/l, whereas if they'd been
+C used unaligned it would be 1.5 c/l.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl parameter space reused
+define(SAVE_EBX,`PARAM_SIZE')
+
+dnl minimum 5 since the unrolled code can't handle less than 5
+deflit(UNROLL_THRESHOLD, 5)
+
+ .text
+ ALIGN(32)
+PROLOGUE(mpn_copyi)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ movl %ebx, SAVE_EBX
+
+ movl PARAM_SRC, %eax
+ movl PARAM_DST, %edx
+
+ cmpl $UNROLL_THRESHOLD, %ecx
+ jae L(unroll)
+
+ orl %ecx, %ecx
+ jz L(simple_done)
+
+L(simple):
+ C eax src, incrementing
+ C ebx scratch
+ C ecx counter
+ C edx dst, incrementing
+ C
+ C this loop is 2 cycles/limb
+
+ movl (%eax), %ebx
+ movl %ebx, (%edx)
+ decl %ecx
+ leal 4(%eax), %eax
+ leal 4(%edx), %edx
+ jnz L(simple)
+
+L(simple_done):
+ movl SAVE_EBX, %ebx
+ ret
+
+
+L(unroll):
+ movl %eax, %ebx
+ leal -12(%eax,%ecx,4), %eax C src end - 12
+ subl $3, %ecx C size-3
+
+ andl %edx, %ebx
+ leal (%edx,%ecx,4), %edx C dst end - 12
+ negl %ecx
+
+ testl $4, %ebx C testl to pad code closer to 16 bytes for L(top)
+ jz L(aligned)
+
+ C both src and dst unaligned, process one limb to align them
+ movl (%eax,%ecx,4), %ebx
+ movl %ebx, (%edx,%ecx,4)
+ incl %ecx
+L(aligned):
+
+
+ ALIGN(16)
+L(top):
+ C eax src end - 12
+ C ebx
+ C ecx counter, negative, limbs
+ C edx dst end - 12
+
+ movq (%eax,%ecx,4), %mm0
+ movq 8(%eax,%ecx,4), %mm1
+ addl $4, %ecx
+ movq %mm0, -16(%edx,%ecx,4)
+ movq %mm1, -16+8(%edx,%ecx,4)
+ ja L(top) C jump no carry and not zero
+
+
+ C now %ecx is 0 to 3 representing respectively 3 to 0 limbs remaining
+
+ testb $2, %cl
+ jnz L(finish_not_two)
+
+ movq (%eax,%ecx,4), %mm0
+ movq %mm0, (%edx,%ecx,4)
+L(finish_not_two):
+
+ testb $1, %cl
+ jnz L(done)
+
+ movl 8(%eax), %ebx
+ movl %ebx, 8(%edx)
+
+L(done):
+ movl SAVE_EBX, %ebx
+ emms
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/mmx/divrem_1.asm b/rts/gmp/mpn/x86/k7/mmx/divrem_1.asm
new file mode 100644
index 0000000000..483ad6a9a1
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/mmx/divrem_1.asm
@@ -0,0 +1,718 @@
+dnl AMD K7 mpn_divrem_1 -- mpn by limb division.
+dnl
+dnl K7: 17.0 cycles/limb integer part, 15.0 cycles/limb fraction part.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize,
+C mp_srcptr src, mp_size_t size,
+C mp_limb_t divisor);
+C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize,
+C mp_srcptr src, mp_size_t size,
+C mp_limb_t divisor, mp_limb_t carry);
+C
+C The method and nomenclature follow part 8 of "Division by Invariant
+C Integers using Multiplication" by Granlund and Montgomery, reference in
+C gmp.texi.
+C
+C The "and"s shown in the paper are done here with "cmov"s. "m" is written
+C for m', and "d" for d_norm, which won't cause any confusion since it's
+C only the normalized divisor that's of any use in the code. "b" is written
+C for 2^N, the size of a limb, N being 32 here.
+C
+C mpn_divrem_1 avoids one division if the src high limb is less than the
+C divisor. mpn_divrem_1c doesn't check for a zero carry, since in normal
+C circumstances that will be a very rare event.
+C
+C There's a small bias towards expecting xsize==0, by having code for
+C xsize==0 in a straight line and xsize!=0 under forward jumps.
+
+
+dnl MUL_THRESHOLD is the value of xsize+size at which the multiply by
+dnl inverse method is used, rather than plain "divl"s. Minimum value 1.
+dnl
+dnl The inverse takes about 50 cycles to calculate, but after that the
+dnl multiply is 17 c/l versus division at 42 c/l.
+dnl
+dnl At 3 limbs the mul is a touch faster than div on the integer part, and
+dnl even more so on the fractional part.
+
+deflit(MUL_THRESHOLD, 3)
+
+
+defframe(PARAM_CARRY, 24)
+defframe(PARAM_DIVISOR,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC, 12)
+defframe(PARAM_XSIZE, 8)
+defframe(PARAM_DST, 4)
+
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+
+defframe(VAR_NORM, -20)
+defframe(VAR_INVERSE, -24)
+defframe(VAR_SRC, -28)
+defframe(VAR_DST, -32)
+defframe(VAR_DST_STOP,-36)
+
+deflit(STACK_SPACE, 36)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_divrem_1c)
+deflit(`FRAME',0)
+ movl PARAM_CARRY, %edx
+ movl PARAM_SIZE, %ecx
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %ebx, SAVE_EBX
+ movl PARAM_XSIZE, %ebx
+
+ movl %edi, SAVE_EDI
+ movl PARAM_DST, %edi
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+
+ leal -4(%edi,%ebx,4), %edi
+ jmp LF(mpn_divrem_1,start_1c)
+
+EPILOGUE()
+
+
+ C offset 0x31, close enough to aligned
+PROLOGUE(mpn_divrem_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ movl $0, %edx C initial carry (if can't skip a div)
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ movl %ebx, SAVE_EBX
+ movl PARAM_XSIZE, %ebx
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+ orl %ecx, %ecx
+
+ movl %edi, SAVE_EDI
+ movl PARAM_DST, %edi
+ leal -4(%edi,%ebx,4), %edi C &dst[xsize-1]
+
+ jz L(no_skip_div)
+ movl -4(%esi,%ecx,4), %eax C src high limb
+
+ cmpl %ebp, %eax C one less div if high<divisor
+ jnb L(no_skip_div)
+
+ movl $0, (%edi,%ecx,4) C dst high limb
+ decl %ecx C size-1
+ movl %eax, %edx C src high limb as initial carry
+L(no_skip_div):
+
+
+L(start_1c):
+ C eax
+ C ebx xsize
+ C ecx size
+ C edx carry
+ C esi src
+ C edi &dst[xsize-1]
+ C ebp divisor
+
+ leal (%ebx,%ecx), %eax C size+xsize
+ cmpl $MUL_THRESHOLD, %eax
+ jae L(mul_by_inverse)
+
+
+C With MUL_THRESHOLD set to 3, the simple loops here only do 0 to 2 limbs.
+C It'd be possible to write them out without the looping, but no speedup
+C would be expected.
+C
+C Using PARAM_DIVISOR instead of %ebp measures 1 cycle/loop faster on the
+C integer part, but curiously not on the fractional part, where %ebp is a
+C (fixed) couple of cycles faster.
+
+ orl %ecx, %ecx
+ jz L(divide_no_integer)
+
+L(divide_integer):
+ C eax scratch (quotient)
+ C ebx xsize
+ C ecx counter
+ C edx scratch (remainder)
+ C esi src
+ C edi &dst[xsize-1]
+ C ebp divisor
+
+ movl -4(%esi,%ecx,4), %eax
+
+ divl PARAM_DIVISOR
+
+ movl %eax, (%edi,%ecx,4)
+ decl %ecx
+ jnz L(divide_integer)
+
+
+L(divide_no_integer):
+ movl PARAM_DST, %edi
+ orl %ebx, %ebx
+ jnz L(divide_fraction)
+
+L(divide_done):
+ movl SAVE_ESI, %esi
+ movl SAVE_EDI, %edi
+ movl %edx, %eax
+
+ movl SAVE_EBX, %ebx
+ movl SAVE_EBP, %ebp
+ addl $STACK_SPACE, %esp
+
+ ret
+
+
+L(divide_fraction):
+ C eax scratch (quotient)
+ C ebx counter
+ C ecx
+ C edx scratch (remainder)
+ C esi
+ C edi dst
+ C ebp divisor
+
+ movl $0, %eax
+
+ divl %ebp
+
+ movl %eax, -4(%edi,%ebx,4)
+ decl %ebx
+ jnz L(divide_fraction)
+
+ jmp L(divide_done)
+
+
+
+C -----------------------------------------------------------------------------
+
+L(mul_by_inverse):
+ C eax
+ C ebx xsize
+ C ecx size
+ C edx carry
+ C esi src
+ C edi &dst[xsize-1]
+ C ebp divisor
+
+ bsrl %ebp, %eax C 31-l
+
+ leal 12(%edi), %ebx
+ leal 4(%edi,%ecx,4), %edi C &dst[xsize+size]
+
+ movl %edi, VAR_DST
+ movl %ebx, VAR_DST_STOP
+
+ movl %ecx, %ebx C size
+ movl $31, %ecx
+
+ movl %edx, %edi C carry
+ movl $-1, %edx
+
+ C
+
+ xorl %eax, %ecx C l
+ incl %eax C 32-l
+
+ shll %cl, %ebp C d normalized
+ movl %ecx, VAR_NORM
+
+ movd %eax, %mm7
+
+ movl $-1, %eax
+ subl %ebp, %edx C (b-d)-1 giving edx:eax = b*(b-d)-1
+
+ divl %ebp C floor (b*(b-d)-1) / d
+
+ orl %ebx, %ebx C size
+ movl %eax, VAR_INVERSE
+ leal -12(%esi,%ebx,4), %eax C &src[size-3]
+
+ jz L(start_zero)
+ movl %eax, VAR_SRC
+ cmpl $1, %ebx
+
+ movl 8(%eax), %esi C src high limb
+ jz L(start_one)
+
+L(start_two_or_more):
+ movl 4(%eax), %edx C src second highest limb
+
+ shldl( %cl, %esi, %edi) C n2 = carry,high << l
+
+ shldl( %cl, %edx, %esi) C n10 = high,second << l
+
+ cmpl $2, %ebx
+ je L(integer_two_left)
+ jmp L(integer_top)
+
+
+L(start_one):
+ shldl( %cl, %esi, %edi) C n2 = carry,high << l
+
+ shll %cl, %esi C n10 = high << l
+ movl %eax, VAR_SRC
+ jmp L(integer_one_left)
+
+
+L(start_zero):
+ shll %cl, %edi C n2 = carry << l
+ movl $0, %esi C n10 = 0
+
+ C we're here because xsize+size>=MUL_THRESHOLD, so with size==0 then
+ C must have xsize!=0
+ jmp L(fraction_some)
+
+
+
+C -----------------------------------------------------------------------------
+C
+C The multiply by inverse loop is 17 cycles, and relies on some out-of-order
+C execution. The instruction scheduling is important, with various
+C apparently equivalent forms running 1 to 5 cycles slower.
+C
+C A lower bound for the time would seem to be 16 cycles, based on the
+C following successive dependencies.
+C
+C cycles
+C n2+n1 1
+C mul 6
+C q1+1 1
+C mul 6
+C sub 1
+C addback 1
+C ---
+C 16
+C
+C This chain is what the loop has already, but 16 cycles isn't achieved.
+C K7 has enough decode, and probably enough execute (depending maybe on what
+C a mul actually consumes), but nothing running under 17 has been found.
+C
+C In theory n2+n1 could be done in the sub and addback stages (by
+C calculating both n2 and n2+n1 there), but lack of registers makes this an
+C unlikely proposition.
+C
+C The jz in the loop keeps the q1+1 stage to 1 cycle. Handling an overflow
+C from q1+1 with an "sbbl $0, %ebx" would add a cycle to the dependent
+C chain, and nothing better than 18 cycles has been found when using it.
+C The jump is taken only when q1 is 0xFFFFFFFF, and on random data this will
+C be an extremely rare event.
+C
+C Branch mispredictions will hit random occurrances of q1==0xFFFFFFFF, but
+C if some special data is coming out with this always, the q1_ff special
+C case actually runs at 15 c/l. 0x2FFF...FFFD divided by 3 is a good way to
+C induce the q1_ff case, for speed measurements or testing. Note that
+C 0xFFF...FFF divided by 1 or 2 doesn't induce it.
+C
+C The instruction groupings and empty comments show the cycles for a naive
+C in-order view of the code (conveniently ignoring the load latency on
+C VAR_INVERSE). This shows some of where the time is going, but is nonsense
+C to the extent that out-of-order execution rearranges it. In this case
+C there's 19 cycles shown, but it executes at 17.
+
+ ALIGN(16)
+L(integer_top):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx scratch (src, dst)
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 scratch (src qword)
+ C mm7 rshift for normalization
+
+ cmpl $0x80000000, %esi C n1 as 0=c, 1=nc
+ movl %edi, %eax C n2
+ movl VAR_SRC, %ecx
+
+ leal (%ebp,%esi), %ebx
+ cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow
+ sbbl $-1, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movq (%ecx), %mm0 C next limb and the one below it
+ subl $4, %ecx
+
+ movl %ecx, VAR_SRC
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+ movl %ebp, %eax C d
+
+ C
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+ jz L(q1_ff)
+ movl VAR_DST, %ecx
+
+ mull %ebx C (q1+1)*d
+
+ psrlq %mm7, %mm0
+
+ leal -4(%ecx), %ecx
+
+ C
+
+ subl %eax, %esi
+ movl VAR_DST_STOP, %eax
+
+ C
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ movd %mm0, %esi
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+ sbbl $0, %ebx C q
+ cmpl %eax, %ecx
+
+ movl %ebx, (%ecx)
+ movl %ecx, VAR_DST
+ jne L(integer_top)
+
+
+L(integer_loop_done):
+
+
+C -----------------------------------------------------------------------------
+C
+C Here, and in integer_one_left below, an sbbl $0 is used rather than a jz
+C q1_ff special case. This make the code a bit smaller and simpler, and
+C costs only 1 cycle (each).
+
+L(integer_two_left):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx scratch (src, dst)
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 src limb, shifted
+ C mm7 rshift
+
+ cmpl $0x80000000, %esi C n1 as 0=c, 1=nc
+ movl %edi, %eax C n2
+ movl PARAM_SRC, %ecx
+
+ leal (%ebp,%esi), %ebx
+ cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow
+ sbbl $-1, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movd (%ecx), %mm0 C src low limb
+
+ movl VAR_DST_STOP, %ecx
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+ movl %ebp, %eax C d
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx
+
+ mull %ebx C (q1+1)*d
+
+ psllq $32, %mm0
+
+ psrlq %mm7, %mm0
+
+ C
+
+ subl %eax, %esi
+
+ C
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ movd %mm0, %esi
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+ sbbl $0, %ebx C q
+
+ movl %ebx, -4(%ecx)
+
+
+C -----------------------------------------------------------------------------
+L(integer_one_left):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx dst
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 src limb, shifted
+ C mm7 rshift
+
+ movl VAR_DST_STOP, %ecx
+ cmpl $0x80000000, %esi C n1 as 0=c, 1=nc
+ movl %edi, %eax C n2
+
+ leal (%ebp,%esi), %ebx
+ cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow
+ sbbl $-1, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ C
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+ movl %ebp, %eax C d
+
+ C
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx C q1 if q1+1 overflowed
+
+ mull %ebx
+
+ C
+
+ C
+
+ C
+
+ subl %eax, %esi
+
+ C
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+ sbbl $0, %ebx C q
+
+ movl %ebx, -8(%ecx)
+ subl $8, %ecx
+
+
+
+L(integer_none):
+ cmpl $0, PARAM_XSIZE
+ jne L(fraction_some)
+
+ movl %edi, %eax
+L(fraction_done):
+ movl VAR_NORM, %ecx
+ movl SAVE_EBP, %ebp
+
+ movl SAVE_EDI, %edi
+ movl SAVE_ESI, %esi
+
+ movl SAVE_EBX, %ebx
+ addl $STACK_SPACE, %esp
+
+ shrl %cl, %eax
+ emms
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+C
+C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword
+C of q*d is simply -d and the remainder n-q*d = n10+d
+
+L(q1_ff):
+ C eax (divisor)
+ C ebx (q1+1 == 0)
+ C ecx
+ C edx
+ C esi n10
+ C edi n2
+ C ebp divisor
+
+ movl VAR_DST, %ecx
+ movl VAR_DST_STOP, %edx
+ subl $4, %ecx
+
+ psrlq %mm7, %mm0
+ leal (%ebp,%esi), %edi C n-q*d remainder -> next n2
+ movl %ecx, VAR_DST
+
+ movd %mm0, %esi C next n10
+
+ movl $-1, (%ecx)
+ cmpl %ecx, %edx
+ jne L(integer_top)
+
+ jmp L(integer_loop_done)
+
+
+
+C -----------------------------------------------------------------------------
+C
+C Being the fractional part, the "source" limbs are all zero, meaning
+C n10=0, n1=0, and hence nadj=0, leading to many instructions eliminated.
+C
+C The loop runs at 15 cycles. The dependent chain is the same as the
+C general case above, but without the n2+n1 stage (due to n1==0), so 15
+C would seem to be the lower bound.
+C
+C A not entirely obvious simplification is that q1+1 never overflows a limb,
+C and so there's no need for the sbbl $0 or jz q1_ff from the general case.
+C q1 is the high word of m*n2+b*n2 and the following shows q1<=b-2 always.
+C rnd() means rounding down to a multiple of d.
+C
+C m*n2 + b*n2 <= m*(d-1) + b*(d-1)
+C = m*d + b*d - m - b
+C = floor((b(b-d)-1)/d)*d + b*d - m - b
+C = rnd(b(b-d)-1) + b*d - m - b
+C = rnd(b(b-d)-1 + b*d) - m - b
+C = rnd(b*b-1) - m - b
+C <= (b-2)*b
+C
+C Unchanged from the general case is that the final quotient limb q can be
+C either q1 or q1+1, and the q1+1 case occurs often. This can be seen from
+C equation 8.4 of the paper which simplifies as follows when n1==0 and
+C n0==0.
+C
+C n-q1*d = (n2*k+q0*d)/b <= d + (d*d-2d)/b
+C
+C As before, the instruction groupings and empty comments show a naive
+C in-order view of the code, which is made a nonsense by out of order
+C execution. There's 17 cycles shown, but it executes at 15.
+C
+C Rotating the store q and remainder->n2 instructions up to the top of the
+C loop gets the run time down from 16 to 15.
+
+ ALIGN(16)
+L(fraction_some):
+ C eax
+ C ebx
+ C ecx
+ C edx
+ C esi
+ C edi carry
+ C ebp divisor
+
+ movl PARAM_DST, %esi
+ movl VAR_DST_STOP, %ecx
+ movl %edi, %eax
+
+ subl $8, %ecx
+
+ jmp L(fraction_entry)
+
+
+ ALIGN(16)
+L(fraction_top):
+ C eax n2 carry, then scratch
+ C ebx scratch (nadj, q1)
+ C ecx dst, decrementing
+ C edx scratch
+ C esi dst stop point
+ C edi (will be n2)
+ C ebp divisor
+
+ movl %ebx, (%ecx) C previous q
+ movl %eax, %edi C remainder->n2
+
+L(fraction_entry):
+ mull VAR_INVERSE C m*n2
+
+ movl %ebp, %eax C d
+ subl $4, %ecx C dst
+ leal 1(%edi), %ebx
+
+ C
+
+ C
+
+ C
+
+ C
+
+ addl %edx, %ebx C 1 + high(n2<<32 + m*n2) = q1+1
+
+ mull %ebx C (q1+1)*d
+
+ C
+
+ C
+
+ C
+
+ negl %eax C low of n - (q1+1)*d
+
+ C
+
+ sbbl %edx, %edi C high of n - (q1+1)*d, caring only about carry
+ leal (%ebp,%eax), %edx
+
+ cmovc( %edx, %eax) C n - q1*d if underflow from using q1+1
+ sbbl $0, %ebx C q
+ cmpl %esi, %ecx
+
+ jne L(fraction_top)
+
+
+ movl %ebx, (%ecx)
+ jmp L(fraction_done)
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/mmx/lshift.asm b/rts/gmp/mpn/x86/k7/mmx/lshift.asm
new file mode 100644
index 0000000000..4d17c881ec
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/mmx/lshift.asm
@@ -0,0 +1,472 @@
+dnl AMD K7 mpn_lshift -- mpn left shift.
+dnl
+dnl K7: 1.21 cycles/limb (at 16 limbs/loop).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K7: UNROLL_COUNT cycles/limb
+dnl 4 1.51
+dnl 8 1.26
+dnl 16 1.21
+dnl 32 1.2
+dnl Maximum possible with the current code is 64.
+
+deflit(UNROLL_COUNT, 16)
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C Shift src,size left by shift many bits and store the result in dst,size.
+C Zeros are shifted in at the right. The bits shifted out at the left are
+C the return value.
+C
+C The comments in mpn_rshift apply here too.
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 10)
+',`
+deflit(UNROLL_THRESHOLD, 10)
+')
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+defframe(SAVE_EDI, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EBX, -12)
+deflit(SAVE_SIZE, 12)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_lshift)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %eax
+ movl PARAM_SRC, %edx
+ subl $SAVE_SIZE, %esp
+deflit(`FRAME',SAVE_SIZE)
+
+ movl PARAM_SHIFT, %ecx
+ movl %edi, SAVE_EDI
+
+ movl PARAM_DST, %edi
+ decl %eax
+ jnz L(more_than_one_limb)
+
+ movl (%edx), %edx
+
+ shldl( %cl, %edx, %eax) C eax was decremented to zero
+
+ shll %cl, %edx
+
+ movl %edx, (%edi)
+ movl SAVE_EDI, %edi
+ addl $SAVE_SIZE, %esp
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+L(more_than_one_limb):
+ C eax size-1
+ C ebx
+ C ecx shift
+ C edx src
+ C esi
+ C edi dst
+ C ebp
+
+ movd PARAM_SHIFT, %mm6
+ movd (%edx,%eax,4), %mm5 C src high limb
+ cmp $UNROLL_THRESHOLD-1, %eax
+
+ jae L(unroll)
+ negl %ecx
+ movd (%edx), %mm4 C src low limb
+
+ addl $32, %ecx
+
+ movd %ecx, %mm7
+
+L(simple_top):
+ C eax loop counter, limbs
+ C ebx
+ C ecx
+ C edx src
+ C esi
+ C edi dst
+ C ebp
+ C
+ C mm0 scratch
+ C mm4 src low limb
+ C mm5 src high limb
+ C mm6 shift
+ C mm7 32-shift
+
+ movq -4(%edx,%eax,4), %mm0
+ decl %eax
+
+ psrlq %mm7, %mm0
+
+ movd %mm0, 4(%edi,%eax,4)
+ jnz L(simple_top)
+
+
+ psllq %mm6, %mm5
+ psllq %mm6, %mm4
+
+ psrlq $32, %mm5
+ movd %mm4, (%edi) C dst low limb
+
+ movd %mm5, %eax C return value
+
+ movl SAVE_EDI, %edi
+ addl $SAVE_SIZE, %esp
+ emms
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(unroll):
+ C eax size-1
+ C ebx (saved)
+ C ecx shift
+ C edx src
+ C esi
+ C edi dst
+ C ebp
+ C
+ C mm5 src high limb, for return value
+ C mm6 lshift
+
+ movl %esi, SAVE_ESI
+ movl %ebx, SAVE_EBX
+ leal -4(%edx,%eax,4), %edx C &src[size-2]
+
+ testb $4, %dl
+ movq (%edx), %mm1 C src high qword
+
+ jz L(start_src_aligned)
+
+
+ C src isn't aligned, process high limb (marked xxx) separately to
+ C make it so
+ C
+ C source -4(edx,%eax,4)
+ C |
+ C +-------+-------+-------+--
+ C | xxx |
+ C +-------+-------+-------+--
+ C 0mod8 4mod8 0mod8
+ C
+ C dest -4(edi,%eax,4)
+ C |
+ C +-------+-------+--
+ C | xxx | |
+ C +-------+-------+--
+
+ psllq %mm6, %mm1
+ subl $4, %edx
+ movl %eax, PARAM_SIZE C size-1
+
+ psrlq $32, %mm1
+ decl %eax C size-2 is new size-1
+
+ movd %mm1, 4(%edi,%eax,4)
+ movq (%edx), %mm1 C new src high qword
+L(start_src_aligned):
+
+
+ leal -4(%edi,%eax,4), %edi C &dst[size-2]
+ psllq %mm6, %mm5
+
+ testl $4, %edi
+ psrlq $32, %mm5 C return value
+
+ jz L(start_dst_aligned)
+
+
+ C dst isn't aligned, subtract 4 bytes to make it so, and pretend the
+ C shift is 32 bits extra. High limb of dst (marked xxx) handled
+ C here separately.
+ C
+ C source %edx
+ C +-------+-------+--
+ C | mm1 |
+ C +-------+-------+--
+ C 0mod8 4mod8
+ C
+ C dest %edi
+ C +-------+-------+-------+--
+ C | xxx |
+ C +-------+-------+-------+--
+ C 0mod8 4mod8 0mod8
+
+ movq %mm1, %mm0
+ psllq %mm6, %mm1
+ addl $32, %ecx C shift+32
+
+ psrlq $32, %mm1
+
+ movd %mm1, 4(%edi)
+ movq %mm0, %mm1
+ subl $4, %edi
+
+ movd %ecx, %mm6 C new lshift
+L(start_dst_aligned):
+
+ decl %eax C size-2, two last limbs handled at end
+ movq %mm1, %mm2 C copy of src high qword
+ negl %ecx
+
+ andl $-2, %eax C round size down to even
+ addl $64, %ecx
+
+ movl %eax, %ebx
+ negl %eax
+
+ andl $UNROLL_MASK, %eax
+ decl %ebx
+
+ shll %eax
+
+ movd %ecx, %mm7 C rshift = 64-lshift
+
+ifdef(`PIC',`
+ call L(pic_calc)
+L(here):
+',`
+ leal L(entry) (%eax,%eax,4), %esi
+')
+ shrl $UNROLL_LOG2, %ebx C loop counter
+
+ leal ifelse(UNROLL_BYTES,256,128) -8(%edx,%eax,2), %edx
+ leal ifelse(UNROLL_BYTES,256,128) (%edi,%eax,2), %edi
+ movl PARAM_SIZE, %eax C for use at end
+ jmp *%esi
+
+
+ifdef(`PIC',`
+L(pic_calc):
+ C See README.family about old gas bugs
+ leal (%eax,%eax,4), %esi
+ addl $L(entry)-L(here), %esi
+ addl (%esp), %esi
+
+ ret
+')
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(32)
+L(top):
+ C eax size (for use at end)
+ C ebx loop counter
+ C ecx rshift
+ C edx src
+ C esi computed jump
+ C edi dst
+ C ebp
+ C
+ C mm0 scratch
+ C mm1 \ carry (alternating, mm2 first)
+ C mm2 /
+ C mm6 lshift
+ C mm7 rshift
+ C
+ C 10 code bytes/limb
+ C
+ C The two chunks differ in whether mm1 or mm2 hold the carry.
+ C The computed jump puts the initial carry in both mm1 and mm2.
+
+L(entry):
+deflit(CHUNK_COUNT, 4)
+forloop(i, 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+ deflit(`disp0', eval(-i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+ deflit(`disp1', eval(disp0 - 8))
+
+ movq disp0(%edx), %mm0
+ psllq %mm6, %mm2
+
+ movq %mm0, %mm1
+ psrlq %mm7, %mm0
+
+ por %mm2, %mm0
+ movq %mm0, disp0(%edi)
+
+
+ movq disp1(%edx), %mm0
+ psllq %mm6, %mm1
+
+ movq %mm0, %mm2
+ psrlq %mm7, %mm0
+
+ por %mm1, %mm0
+ movq %mm0, disp1(%edi)
+')
+
+ subl $UNROLL_BYTES, %edx
+ subl $UNROLL_BYTES, %edi
+ decl %ebx
+
+ jns L(top)
+
+
+
+define(`disp', `m4_empty_if_zero(eval($1 ifelse(UNROLL_BYTES,256,-128)))')
+
+L(end):
+ testb $1, %al
+ movl SAVE_EBX, %ebx
+ psllq %mm6, %mm2 C wanted left shifted in all cases below
+
+ movd %mm5, %eax
+
+ movl SAVE_ESI, %esi
+ jz L(end_even)
+
+
+L(end_odd):
+
+ C Size odd, destination was aligned.
+ C
+ C source edx+8 edx+4
+ C --+---------------+-------+
+ C | mm2 | |
+ C --+---------------+-------+
+ C
+ C dest edi
+ C --+---------------+---------------+-------+
+ C | written | | |
+ C --+---------------+---------------+-------+
+ C
+ C mm6 = shift
+ C mm7 = ecx = 64-shift
+
+
+ C Size odd, destination was unaligned.
+ C
+ C source edx+8 edx+4
+ C --+---------------+-------+
+ C | mm2 | |
+ C --+---------------+-------+
+ C
+ C dest edi
+ C --+---------------+---------------+
+ C | written | |
+ C --+---------------+---------------+
+ C
+ C mm6 = shift+32
+ C mm7 = ecx = 64-(shift+32)
+
+
+ C In both cases there's one extra limb of src to fetch and combine
+ C with mm2 to make a qword at (%edi), and in the aligned case
+ C there's an extra limb of dst to be formed from that extra src limb
+ C left shifted.
+
+ movd disp(4) (%edx), %mm0
+ testb $32, %cl
+
+ movq %mm0, %mm1
+ psllq $32, %mm0
+
+ psrlq %mm7, %mm0
+ psllq %mm6, %mm1
+
+ por %mm2, %mm0
+
+ movq %mm0, disp(0) (%edi)
+ jz L(end_odd_unaligned)
+ movd %mm1, disp(-4) (%edi)
+L(end_odd_unaligned):
+
+ movl SAVE_EDI, %edi
+ addl $SAVE_SIZE, %esp
+ emms
+
+ ret
+
+
+L(end_even):
+
+ C Size even, destination was aligned.
+ C
+ C source edx+8
+ C --+---------------+
+ C | mm2 |
+ C --+---------------+
+ C
+ C dest edi
+ C --+---------------+---------------+
+ C | written | |
+ C --+---------------+---------------+
+ C
+ C mm6 = shift
+ C mm7 = ecx = 64-shift
+
+
+ C Size even, destination was unaligned.
+ C
+ C source edx+8
+ C --+---------------+
+ C | mm2 |
+ C --+---------------+
+ C
+ C dest edi+4
+ C --+---------------+-------+
+ C | written | |
+ C --+---------------+-------+
+ C
+ C mm6 = shift+32
+ C mm7 = ecx = 64-(shift+32)
+
+
+ C The movq for the aligned case overwrites the movd for the
+ C unaligned case.
+
+ movq %mm2, %mm0
+ psrlq $32, %mm2
+
+ testb $32, %cl
+ movd %mm2, disp(4) (%edi)
+
+ jz L(end_even_unaligned)
+ movq %mm0, disp(0) (%edi)
+L(end_even_unaligned):
+
+ movl SAVE_EDI, %edi
+ addl $SAVE_SIZE, %esp
+ emms
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/mmx/mod_1.asm b/rts/gmp/mpn/x86/k7/mmx/mod_1.asm
new file mode 100644
index 0000000000..545ca56ddf
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/mmx/mod_1.asm
@@ -0,0 +1,457 @@
+dnl AMD K7 mpn_mod_1 -- mpn by limb remainder.
+dnl
+dnl K7: 17.0 cycles/limb.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t carry);
+C
+C The code here is the same as mpn_divrem_1, but with the quotient
+C discarded. See mpn/x86/k7/mmx/divrem_1.c for some comments.
+
+
+dnl MUL_THRESHOLD is the size at which the multiply by inverse method is
+dnl used, rather than plain "divl"s. Minimum value 2.
+dnl
+dnl The inverse takes about 50 cycles to calculate, but after that the
+dnl multiply is 17 c/l versus division at 41 c/l.
+dnl
+dnl Using mul or div is about the same speed at 3 limbs, so the threshold
+dnl is set to 4 to get the smaller div code used at 3.
+
+deflit(MUL_THRESHOLD, 4)
+
+
+defframe(PARAM_CARRY, 16)
+defframe(PARAM_DIVISOR,12)
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC, 4)
+
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+
+defframe(VAR_NORM, -20)
+defframe(VAR_INVERSE, -24)
+defframe(VAR_SRC_STOP,-28)
+
+deflit(STACK_SPACE, 28)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_mod_1c)
+deflit(`FRAME',0)
+ movl PARAM_CARRY, %edx
+ movl PARAM_SIZE, %ecx
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+ jmp LF(mpn_mod_1,start_1c)
+
+EPILOGUE()
+
+
+ ALIGN(32)
+PROLOGUE(mpn_mod_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ movl $0, %edx C initial carry (if can't skip a div)
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ orl %ecx, %ecx
+ jz L(divide_done)
+
+ movl -4(%esi,%ecx,4), %eax C src high limb
+
+ cmpl %ebp, %eax C carry flag if high<divisor
+
+ cmovc( %eax, %edx) C src high limb as initial carry
+ sbbl $0, %ecx C size-1 to skip one div
+ jz L(divide_done)
+
+
+ ALIGN(16)
+L(start_1c):
+ C eax
+ C ebx
+ C ecx size
+ C edx carry
+ C esi src
+ C edi
+ C ebp divisor
+
+ cmpl $MUL_THRESHOLD, %ecx
+ jae L(mul_by_inverse)
+
+
+
+C With a MUL_THRESHOLD of 4, this "loop" only ever does 1 to 3 iterations,
+C but it's already fast and compact, and there's nothing to gain by
+C expanding it out.
+C
+C Using PARAM_DIVISOR in the divl is a couple of cycles faster than %ebp.
+
+ orl %ecx, %ecx
+ jz L(divide_done)
+
+
+L(divide_top):
+ C eax scratch (quotient)
+ C ebx
+ C ecx counter, limbs, decrementing
+ C edx scratch (remainder)
+ C esi src
+ C edi
+ C ebp
+
+ movl -4(%esi,%ecx,4), %eax
+
+ divl PARAM_DIVISOR
+
+ decl %ecx
+ jnz L(divide_top)
+
+
+L(divide_done):
+ movl SAVE_ESI, %esi
+ movl SAVE_EBP, %ebp
+ addl $STACK_SPACE, %esp
+
+ movl %edx, %eax
+
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+
+L(mul_by_inverse):
+ C eax
+ C ebx
+ C ecx size
+ C edx carry
+ C esi src
+ C edi
+ C ebp divisor
+
+ bsrl %ebp, %eax C 31-l
+
+ movl %ebx, SAVE_EBX
+ leal -4(%esi), %ebx
+
+ movl %ebx, VAR_SRC_STOP
+ movl %edi, SAVE_EDI
+
+ movl %ecx, %ebx C size
+ movl $31, %ecx
+
+ movl %edx, %edi C carry
+ movl $-1, %edx
+
+ C
+
+ xorl %eax, %ecx C l
+ incl %eax C 32-l
+
+ shll %cl, %ebp C d normalized
+ movl %ecx, VAR_NORM
+
+ movd %eax, %mm7
+
+ movl $-1, %eax
+ subl %ebp, %edx C (b-d)-1 so edx:eax = b*(b-d)-1
+
+ divl %ebp C floor (b*(b-d)-1) / d
+
+ C
+
+ movl %eax, VAR_INVERSE
+ leal -12(%esi,%ebx,4), %eax C &src[size-3]
+
+ movl 8(%eax), %esi C src high limb
+ movl 4(%eax), %edx C src second highest limb
+
+ shldl( %cl, %esi, %edi) C n2 = carry,high << l
+
+ shldl( %cl, %edx, %esi) C n10 = high,second << l
+
+ movl %eax, %ecx C &src[size-3]
+
+
+ifelse(MUL_THRESHOLD,2,`
+ cmpl $2, %ebx
+ je L(inverse_two_left)
+')
+
+
+C The dependent chain here is the same as in mpn_divrem_1, but a few
+C instructions are saved by not needing to store the quotient limbs.
+C Unfortunately this doesn't get the code down to the theoretical 16 c/l.
+C
+C There's four dummy instructions in the loop, all of which are necessary
+C for the claimed 17 c/l. It's a 1 to 3 cycle slowdown if any are removed,
+C or changed from load to store or vice versa. They're not completely
+C random, since they correspond to what mpn_divrem_1 has, but there's no
+C obvious reason why they're necessary. Presumably they induce something
+C good in the out of order execution, perhaps through some load/store
+C ordering and/or decoding effects.
+C
+C The q1==0xFFFFFFFF case is handled here the same as in mpn_divrem_1. On
+C on special data that comes out as q1==0xFFFFFFFF always, the loop runs at
+C about 13.5 c/l.
+
+ ALIGN(32)
+L(inverse_top):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx src pointer, decrementing
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 scratch (src qword)
+ C mm7 rshift for normalization
+
+ cmpl $0x80000000, %esi C n1 as 0=c, 1=nc
+ movl %edi, %eax C n2
+ movl PARAM_SIZE, %ebx C dummy
+
+ leal (%ebp,%esi), %ebx
+ cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow
+ sbbl $-1, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movq (%ecx), %mm0 C next src limb and the one below it
+ subl $4, %ecx
+
+ movl %ecx, PARAM_SIZE C dummy
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+ movl %ebp, %eax C d
+
+ C
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+ jz L(q1_ff)
+ nop C dummy
+
+ mull %ebx C (q1+1)*d
+
+ psrlq %mm7, %mm0
+ leal 0(%ecx), %ecx C dummy
+
+ C
+
+ C
+
+ subl %eax, %esi
+ movl VAR_SRC_STOP, %eax
+
+ C
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ movd %mm0, %esi
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+ cmpl %eax, %ecx
+ jne L(inverse_top)
+
+
+L(inverse_loop_done):
+
+
+C -----------------------------------------------------------------------------
+
+L(inverse_two_left):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx &src[-1]
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 scratch (src dword)
+ C mm7 rshift
+
+ cmpl $0x80000000, %esi C n1 as 0=c, 1=nc
+ movl %edi, %eax C n2
+
+ leal (%ebp,%esi), %ebx
+ cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow
+ sbbl $-1, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movd 4(%ecx), %mm0 C src low limb
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+ movl %ebp, %eax C d
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx
+
+ mull %ebx C (q1+1)*d
+
+ psllq $32, %mm0
+
+ psrlq %mm7, %mm0
+
+ C
+
+ subl %eax, %esi
+
+ C
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ movd %mm0, %esi
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+
+
+C One limb left
+
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 src limb, shifted
+ C mm7 rshift
+
+ cmpl $0x80000000, %esi C n1 as 0=c, 1=nc
+ movl %edi, %eax C n2
+
+ leal (%ebp,%esi), %ebx
+ cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow
+ sbbl $-1, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movl VAR_NORM, %ecx C for final denorm
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+ movl %ebp, %eax C d
+
+ C
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx
+
+ mull %ebx C (q1+1)*d
+
+ movl SAVE_EBX, %ebx
+
+ C
+
+ C
+
+ subl %eax, %esi
+
+ movl %esi, %eax C remainder
+ movl SAVE_ESI, %esi
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ leal (%ebp,%eax), %edx
+ movl SAVE_EBP, %ebp
+
+ cmovc( %edx, %eax) C n - q1*d if underflow from using q1+1
+ movl SAVE_EDI, %edi
+
+ shrl %cl, %eax C denorm remainder
+ addl $STACK_SPACE, %esp
+ emms
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+C
+C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword
+C of q*d is simply -d and the remainder n-q*d = n10+d
+
+L(q1_ff):
+ C eax (divisor)
+ C ebx (q1+1 == 0)
+ C ecx src pointer
+ C edx
+ C esi n10
+ C edi (n2)
+ C ebp divisor
+
+ movl VAR_SRC_STOP, %edx
+ leal (%ebp,%esi), %edi C n-q*d remainder -> next n2
+ psrlq %mm7, %mm0
+
+ movd %mm0, %esi C next n10
+
+ cmpl %ecx, %edx
+ jne L(inverse_top)
+ jmp L(inverse_loop_done)
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/mmx/popham.asm b/rts/gmp/mpn/x86/k7/mmx/popham.asm
new file mode 100644
index 0000000000..fa7c8c04a5
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/mmx/popham.asm
@@ -0,0 +1,239 @@
+dnl AMD K7 mpn_popcount, mpn_hamdist -- population count and hamming
+dnl distance.
+dnl
+dnl K7: popcount 5.0 cycles/limb, hamdist 6.0 cycles/limb
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl Only recent versions of gas know psadbw, in particular gas 2.9.1 on
+dnl FreeBSD 3.3 and 3.4 doesn't recognise it.
+
+define(psadbw_mm4_mm0,
+`ifelse(m4_ifdef_anyof_p(`HAVE_TARGET_CPU_athlon',
+ `HAVE_TARGET_CPU_pentium3'),1,
+ `.byte 0x0f,0xf6,0xc4 C psadbw %mm4, %mm0',
+
+`m4_warning(`warning, using simulated and only partly functional psadbw, use for testing only
+') C this works enough for the sum of bytes done below, making it
+ C possible to test on an older cpu
+ leal -8(%esp), %esp
+ movq %mm4, (%esp)
+ movq %mm0, %mm4
+forloop(i,1,7,
+` psrlq $ 8, %mm4
+ paddb %mm4, %mm0
+')
+ pushl $ 0
+ pushl $ 0xFF
+ pand (%esp), %mm0
+ movq 8(%esp), %mm4
+ leal 16(%esp), %esp
+')')
+
+
+C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size);
+C unsigned long mpn_hamdist (mp_srcptr src, mp_srcptr src2, mp_size_t size);
+C
+C The code here is almost certainly not optimal, but is already a 3x speedup
+C over the generic C code. The main improvement would be to interleave
+C processing of two qwords in the loop so as to fully exploit the available
+C execution units, possibly leading to 3.25 c/l (13 cycles for 4 limbs).
+C
+C The loop is based on the example "Efficient 64-bit population count using
+C MMX instructions" in the Athlon Optimization Guide, AMD document 22007,
+C page 158 of rev E (reference in mpn/x86/k7/README).
+
+ifdef(`OPERATION_popcount',,
+`ifdef(`OPERATION_hamdist',,
+`m4_error(`Need OPERATION_popcount or OPERATION_hamdist defined
+')')')
+
+define(HAM,
+m4_assert_numargs(1)
+`ifdef(`OPERATION_hamdist',`$1')')
+
+define(POP,
+m4_assert_numargs(1)
+`ifdef(`OPERATION_popcount',`$1')')
+
+HAM(`
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC2, 8)
+defframe(PARAM_SRC, 4)
+define(M4_function,mpn_hamdist)
+')
+POP(`
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC, 4)
+define(M4_function,mpn_popcount)
+')
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+
+
+ifdef(`PIC',,`
+ dnl non-PIC
+
+ DATA
+ ALIGN(8)
+
+define(LS,
+m4_assert_numargs(1)
+`LF(M4_function,`$1')')
+
+LS(rodata_AAAAAAAAAAAAAAAA):
+ .long 0xAAAAAAAA
+ .long 0xAAAAAAAA
+
+LS(rodata_3333333333333333):
+ .long 0x33333333
+ .long 0x33333333
+
+LS(rodata_0F0F0F0F0F0F0F0F):
+ .long 0x0F0F0F0F
+ .long 0x0F0F0F0F
+')
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(M4_function)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ orl %ecx, %ecx
+ jz L(zero)
+
+ifdef(`PIC',`
+ movl $0xAAAAAAAA, %eax
+ movl $0x33333333, %edx
+
+ movd %eax, %mm7
+ movd %edx, %mm6
+
+ movl $0x0F0F0F0F, %eax
+
+ punpckldq %mm7, %mm7
+ punpckldq %mm6, %mm6
+
+ movd %eax, %mm5
+ movd %edx, %mm4
+
+ punpckldq %mm5, %mm5
+
+',`
+ movq LS(rodata_AAAAAAAAAAAAAAAA), %mm7
+ movq LS(rodata_3333333333333333), %mm6
+ movq LS(rodata_0F0F0F0F0F0F0F0F), %mm5
+')
+ pxor %mm4, %mm4
+
+define(REG_AAAAAAAAAAAAAAAA,%mm7)
+define(REG_3333333333333333,%mm6)
+define(REG_0F0F0F0F0F0F0F0F,%mm5)
+define(REG_0000000000000000,%mm4)
+
+
+ movl PARAM_SRC, %eax
+HAM(` movl PARAM_SRC2, %edx')
+
+ pxor %mm2, %mm2 C total
+
+ shrl %ecx
+ jnc L(top)
+
+ movd (%eax,%ecx,8), %mm1
+
+HAM(` movd 0(%edx,%ecx,8), %mm0
+ pxor %mm0, %mm1
+')
+ orl %ecx, %ecx
+ jmp L(loaded)
+
+
+ ALIGN(16)
+L(top):
+ C eax src
+ C ebx
+ C ecx counter, qwords, decrementing
+ C edx [hamdist] src2
+ C
+ C mm0 (scratch)
+ C mm1 (scratch)
+ C mm2 total (low dword)
+ C mm3
+ C mm4 \
+ C mm5 | special constants
+ C mm6 |
+ C mm7 /
+
+ movq -8(%eax,%ecx,8), %mm1
+
+HAM(` pxor -8(%edx,%ecx,8), %mm1')
+ decl %ecx
+
+L(loaded):
+ movq %mm1, %mm0
+ pand REG_AAAAAAAAAAAAAAAA, %mm1
+
+ psrlq $1, %mm1
+
+ psubd %mm1, %mm0 C bit pairs
+
+
+ movq %mm0, %mm1
+ psrlq $2, %mm0
+
+ pand REG_3333333333333333, %mm0
+ pand REG_3333333333333333, %mm1
+
+ paddd %mm1, %mm0 C nibbles
+
+
+ movq %mm0, %mm1
+ psrlq $4, %mm0
+
+ pand REG_0F0F0F0F0F0F0F0F, %mm0
+ pand REG_0F0F0F0F0F0F0F0F, %mm1
+
+ paddd %mm1, %mm0 C bytes
+
+
+ psadbw_mm4_mm0
+
+ paddd %mm0, %mm2 C add to total
+ jnz L(top)
+
+
+ movd %mm2, %eax
+ emms
+ ret
+
+
+L(zero):
+ movl $0, %eax
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/mmx/rshift.asm b/rts/gmp/mpn/x86/k7/mmx/rshift.asm
new file mode 100644
index 0000000000..abb546cd5b
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/mmx/rshift.asm
@@ -0,0 +1,471 @@
+dnl AMD K7 mpn_rshift -- mpn right shift.
+dnl
+dnl K7: 1.21 cycles/limb (at 16 limbs/loop).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K7: UNROLL_COUNT cycles/limb
+dnl 4 1.51
+dnl 8 1.26
+dnl 16 1.21
+dnl 32 1.2
+dnl Maximum possible with the current code is 64.
+
+deflit(UNROLL_COUNT, 16)
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C Shift src,size right by shift many bits and store the result in dst,size.
+C Zeros are shifted in at the left. The bits shifted out at the right are
+C the return value.
+C
+C This code uses 64-bit MMX operations, which makes it possible to handle
+C two limbs at a time, for a theoretical 1.0 cycles/limb. Plain integer
+C code, on the other hand, suffers from shrd being a vector path decode and
+C running at 3 cycles back-to-back.
+C
+C Full speed depends on source and destination being aligned, and some hairy
+C setups and finish-ups are done to arrange this for the loop.
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 10)
+',`
+deflit(UNROLL_THRESHOLD, 10)
+')
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+defframe(SAVE_EDI, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EBX, -12)
+deflit(SAVE_SIZE, 12)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(mpn_rshift)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %eax
+ movl PARAM_SRC, %edx
+ subl $SAVE_SIZE, %esp
+deflit(`FRAME',SAVE_SIZE)
+
+ movl PARAM_SHIFT, %ecx
+ movl %edi, SAVE_EDI
+
+ movl PARAM_DST, %edi
+ decl %eax
+ jnz L(more_than_one_limb)
+
+ movl (%edx), %edx C src limb
+
+ shrdl( %cl, %edx, %eax) C eax was decremented to zero
+
+ shrl %cl, %edx
+
+ movl %edx, (%edi) C dst limb
+ movl SAVE_EDI, %edi
+ addl $SAVE_SIZE, %esp
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+L(more_than_one_limb):
+ C eax size-1
+ C ebx
+ C ecx shift
+ C edx src
+ C esi
+ C edi dst
+ C ebp
+
+ movd PARAM_SHIFT, %mm6 C rshift
+ movd (%edx), %mm5 C src low limb
+ cmp $UNROLL_THRESHOLD-1, %eax
+
+ jae L(unroll)
+ leal (%edx,%eax,4), %edx C &src[size-1]
+ leal -4(%edi,%eax,4), %edi C &dst[size-2]
+
+ movd (%edx), %mm4 C src high limb
+ negl %eax
+
+
+L(simple_top):
+ C eax loop counter, limbs, negative
+ C ebx
+ C ecx shift
+ C edx carry
+ C edx &src[size-1]
+ C edi &dst[size-2]
+ C ebp
+ C
+ C mm0 scratch
+ C mm4 src high limb
+ C mm5 src low limb
+ C mm6 shift
+
+ movq (%edx,%eax,4), %mm0
+ incl %eax
+
+ psrlq %mm6, %mm0
+
+ movd %mm0, (%edi,%eax,4)
+ jnz L(simple_top)
+
+
+ psllq $32, %mm5
+ psrlq %mm6, %mm4
+
+ psrlq %mm6, %mm5
+ movd %mm4, 4(%edi) C dst high limb
+
+ movd %mm5, %eax C return value
+
+ movl SAVE_EDI, %edi
+ addl $SAVE_SIZE, %esp
+ emms
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(unroll):
+ C eax size-1
+ C ebx
+ C ecx shift
+ C edx src
+ C esi
+ C edi dst
+ C ebp
+ C
+ C mm5 src low limb
+ C mm6 rshift
+
+ testb $4, %dl
+ movl %esi, SAVE_ESI
+ movl %ebx, SAVE_EBX
+
+ psllq $32, %mm5
+ jz L(start_src_aligned)
+
+
+ C src isn't aligned, process low limb separately (marked xxx) and
+ C step src and dst by one limb, making src aligned.
+ C
+ C source edx
+ C --+-------+-------+-------+
+ C | xxx |
+ C --+-------+-------+-------+
+ C 4mod8 0mod8 4mod8
+ C
+ C dest edi
+ C --+-------+-------+
+ C | | xxx |
+ C --+-------+-------+
+
+ movq (%edx), %mm0 C src low two limbs
+ addl $4, %edx
+ movl %eax, PARAM_SIZE C size-1
+
+ addl $4, %edi
+ decl %eax C size-2 is new size-1
+
+ psrlq %mm6, %mm0
+ movl %edi, PARAM_DST C new dst
+
+ movd %mm0, -4(%edi)
+L(start_src_aligned):
+
+
+ movq (%edx), %mm1 C src low two limbs
+ decl %eax C size-2, two last limbs handled at end
+ testl $4, %edi
+
+ psrlq %mm6, %mm5
+ jz L(start_dst_aligned)
+
+
+ C dst isn't aligned, add 4 to make it so, and pretend the shift is
+ C 32 bits extra. Low limb of dst (marked xxx) handled here separately.
+ C
+ C source edx
+ C --+-------+-------+
+ C | mm1 |
+ C --+-------+-------+
+ C 4mod8 0mod8
+ C
+ C dest edi
+ C --+-------+-------+-------+
+ C | xxx |
+ C --+-------+-------+-------+
+ C 4mod8 0mod8 4mod8
+
+ movq %mm1, %mm0
+ psrlq %mm6, %mm1
+ addl $32, %ecx C shift+32
+
+ movd %mm1, (%edi)
+ movq %mm0, %mm1
+ addl $4, %edi C new dst
+
+ movd %ecx, %mm6
+L(start_dst_aligned):
+
+
+ movq %mm1, %mm2 C copy of src low two limbs
+ negl %ecx
+ andl $-2, %eax C round size down to even
+
+ movl %eax, %ebx
+ negl %eax
+ addl $64, %ecx
+
+ andl $UNROLL_MASK, %eax
+ decl %ebx
+
+ shll %eax
+
+ movd %ecx, %mm7 C lshift = 64-rshift
+
+ifdef(`PIC',`
+ call L(pic_calc)
+L(here):
+',`
+ leal L(entry) (%eax,%eax,4), %esi
+ negl %eax
+')
+ shrl $UNROLL_LOG2, %ebx C loop counter
+
+ leal ifelse(UNROLL_BYTES,256,128+) 8(%edx,%eax,2), %edx
+ leal ifelse(UNROLL_BYTES,256,128) (%edi,%eax,2), %edi
+ movl PARAM_SIZE, %eax C for use at end
+
+ jmp *%esi
+
+
+ifdef(`PIC',`
+L(pic_calc):
+ C See README.family about old gas bugs
+ leal (%eax,%eax,4), %esi
+ addl $L(entry)-L(here), %esi
+ addl (%esp), %esi
+ negl %eax
+
+ ret
+')
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(64)
+L(top):
+ C eax size, for use at end
+ C ebx loop counter
+ C ecx lshift
+ C edx src
+ C esi was computed jump
+ C edi dst
+ C ebp
+ C
+ C mm0 scratch
+ C mm1 \ carry (alternating)
+ C mm2 /
+ C mm6 rshift
+ C mm7 lshift
+ C
+ C 10 code bytes/limb
+ C
+ C The two chunks differ in whether mm1 or mm2 hold the carry.
+ C The computed jump puts the initial carry in both mm1 and mm2.
+
+L(entry):
+deflit(CHUNK_COUNT, 4)
+forloop(i, 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+ deflit(`disp0', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+ deflit(`disp1', eval(disp0 + 8))
+
+ movq disp0(%edx), %mm0
+ psrlq %mm6, %mm2
+
+ movq %mm0, %mm1
+ psllq %mm7, %mm0
+
+ por %mm2, %mm0
+ movq %mm0, disp0(%edi)
+
+
+ movq disp1(%edx), %mm0
+ psrlq %mm6, %mm1
+
+ movq %mm0, %mm2
+ psllq %mm7, %mm0
+
+ por %mm1, %mm0
+ movq %mm0, disp1(%edi)
+')
+
+ addl $UNROLL_BYTES, %edx
+ addl $UNROLL_BYTES, %edi
+ decl %ebx
+
+ jns L(top)
+
+
+deflit(`disp0', ifelse(UNROLL_BYTES,256,-128))
+deflit(`disp1', eval(disp0-0 + 8))
+
+ testb $1, %al
+ psrlq %mm6, %mm2 C wanted rshifted in all cases below
+ movl SAVE_ESI, %esi
+
+ movd %mm5, %eax C return value
+
+ movl SAVE_EBX, %ebx
+ jz L(end_even)
+
+
+ C Size odd, destination was aligned.
+ C
+ C source
+ C edx
+ C +-------+---------------+--
+ C | | mm2 |
+ C +-------+---------------+--
+ C
+ C dest edi
+ C +-------+---------------+---------------+--
+ C | | | written |
+ C +-------+---------------+---------------+--
+ C
+ C mm6 = shift
+ C mm7 = ecx = 64-shift
+
+
+ C Size odd, destination was unaligned.
+ C
+ C source
+ C edx
+ C +-------+---------------+--
+ C | | mm2 |
+ C +-------+---------------+--
+ C
+ C dest edi
+ C +---------------+---------------+--
+ C | | written |
+ C +---------------+---------------+--
+ C
+ C mm6 = shift+32
+ C mm7 = ecx = 64-(shift+32)
+
+
+ C In both cases there's one extra limb of src to fetch and combine
+ C with mm2 to make a qword to store, and in the aligned case there's
+ C a further extra limb of dst to be formed.
+
+
+ movd disp0(%edx), %mm0
+ movq %mm0, %mm1
+
+ psllq %mm7, %mm0
+ testb $32, %cl
+
+ por %mm2, %mm0
+ psrlq %mm6, %mm1
+
+ movq %mm0, disp0(%edi)
+ jz L(finish_odd_unaligned)
+
+ movd %mm1, disp1(%edi)
+L(finish_odd_unaligned):
+
+ movl SAVE_EDI, %edi
+ addl $SAVE_SIZE, %esp
+ emms
+
+ ret
+
+
+L(end_even):
+
+ C Size even, destination was aligned.
+ C
+ C source
+ C +---------------+--
+ C | mm2 |
+ C +---------------+--
+ C
+ C dest edi
+ C +---------------+---------------+--
+ C | | mm3 |
+ C +---------------+---------------+--
+ C
+ C mm6 = shift
+ C mm7 = ecx = 64-shift
+
+
+ C Size even, destination was unaligned.
+ C
+ C source
+ C +---------------+--
+ C | mm2 |
+ C +---------------+--
+ C
+ C dest edi
+ C +-------+---------------+--
+ C | | mm3 |
+ C +-------+---------------+--
+ C
+ C mm6 = shift+32
+ C mm7 = 64-(shift+32)
+
+
+ C The movd for the unaligned case is the same data as the movq for
+ C the aligned case, it's just a choice between whether one or two
+ C limbs should be written.
+
+
+ testb $32, %cl
+ movd %mm2, disp0(%edi)
+
+ jz L(end_even_unaligned)
+
+ movq %mm2, disp0(%edi)
+L(end_even_unaligned):
+
+ movl SAVE_EDI, %edi
+ addl $SAVE_SIZE, %esp
+ emms
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/mul_1.asm b/rts/gmp/mpn/x86/k7/mul_1.asm
new file mode 100644
index 0000000000..07f7085b10
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/mul_1.asm
@@ -0,0 +1,265 @@
+dnl AMD K7 mpn_mul_1 -- mpn by limb multiply.
+dnl
+dnl K7: 3.4 cycles/limb (at 16 limbs/loop).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K7: UNROLL_COUNT cycles/limb
+dnl 8 3.9
+dnl 16 3.4
+dnl 32 3.4
+dnl 64 3.35
+dnl Maximum possible with the current code is 64.
+
+deflit(UNROLL_COUNT, 16)
+
+
+C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t multiplier);
+C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t multiplier, mp_limb_t carry);
+C
+C Multiply src,size by mult and store the result in dst,size.
+C Return the carry limb from the top of the result.
+C
+C mpn_mul_1c() accepts an initial carry for the calculation, it's added into
+C the low limb of the destination.
+C
+C Variations on the unrolled loop have been tried, with the current
+C registers or with the counter on the stack to free up ecx. The current
+C code is the fastest found.
+C
+C An interesting effect is that removing the stores "movl %ebx, disp0(%edi)"
+C from the unrolled loop actually slows it down to 5.0 cycles/limb. Code
+C with this change can be tested on sizes of the form UNROLL_COUNT*n+1
+C without having to change the computed jump. There's obviously something
+C fishy going on, perhaps with what execution units the mul needs.
+
+defframe(PARAM_CARRY, 20)
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+defframe(SAVE_EBP, -4)
+defframe(SAVE_EDI, -8)
+defframe(SAVE_ESI, -12)
+defframe(SAVE_EBX, -16)
+deflit(STACK_SPACE, 16)
+
+dnl Must have UNROLL_THRESHOLD >= 2, since the unrolled loop can't handle 1.
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 7)
+',`
+deflit(UNROLL_THRESHOLD, 5)
+')
+
+ .text
+ ALIGN(32)
+PROLOGUE(mpn_mul_1c)
+deflit(`FRAME',0)
+ movl PARAM_CARRY, %edx
+ jmp LF(mpn_mul_1,start_nc)
+EPILOGUE()
+
+
+PROLOGUE(mpn_mul_1)
+deflit(`FRAME',0)
+ xorl %edx, %edx C initial carry
+L(start_nc):
+ movl PARAM_SIZE, %ecx
+ subl $STACK_SPACE, %esp
+deflit(`FRAME', STACK_SPACE)
+
+ movl %edi, SAVE_EDI
+ movl %ebx, SAVE_EBX
+ movl %edx, %ebx
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+ cmpl $UNROLL_THRESHOLD, %ecx
+
+ movl PARAM_DST, %edi
+ movl %ebp, SAVE_EBP
+ jae L(unroll)
+
+ leal (%esi,%ecx,4), %esi
+ leal (%edi,%ecx,4), %edi
+ negl %ecx
+
+ movl PARAM_MULTIPLIER, %ebp
+
+L(simple):
+ C eax scratch
+ C ebx carry
+ C ecx counter (negative)
+ C edx scratch
+ C esi src
+ C edi dst
+ C ebp multiplier
+
+ movl (%esi,%ecx,4), %eax
+
+ mull %ebp
+
+ addl %ebx, %eax
+ movl %eax, (%edi,%ecx,4)
+ movl $0, %ebx
+
+ adcl %edx, %ebx
+ incl %ecx
+ jnz L(simple)
+
+ movl %ebx, %eax
+ movl SAVE_EBX, %ebx
+ movl SAVE_ESI, %esi
+
+ movl SAVE_EDI, %edi
+ movl SAVE_EBP, %ebp
+ addl $STACK_SPACE, %esp
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+C The mov to load the next source limb is done well ahead of the mul, this
+C is necessary for full speed. It leads to one limb handled separately
+C after the loop.
+C
+C When unrolling to 32 or more, an offset of +4 is used on the src pointer,
+C to avoid having an 0x80 displacement in the code for the last limb in the
+C unrolled loop. This is for a fair comparison between 16 and 32 unrolling.
+
+ifelse(eval(UNROLL_COUNT >= 32),1,`
+deflit(SRC_OFFSET,4)
+',`
+deflit(SRC_OFFSET,)
+')
+
+ C this is offset 0x62, so close enough to aligned
+L(unroll):
+ C eax
+ C ebx initial carry
+ C ecx size
+ C edx
+ C esi src
+ C edi dst
+ C ebp
+deflit(`FRAME', STACK_SPACE)
+
+ leal -1(%ecx), %edx C one limb handled at end
+ leal -2(%ecx), %ecx C and ecx is one less than edx
+ movl %ebp, SAVE_EBP
+
+ negl %edx
+ shrl $UNROLL_LOG2, %ecx C unrolled loop counter
+ movl (%esi), %eax C src low limb
+
+ andl $UNROLL_MASK, %edx
+ movl PARAM_DST, %edi
+
+ movl %edx, %ebp
+ shll $4, %edx
+
+ C 17 code bytes per limb
+ifdef(`PIC',`
+ call L(add_eip_to_edx)
+L(here):
+',`
+ leal L(entry) (%edx,%ebp), %edx
+')
+ negl %ebp
+
+ leal ifelse(UNROLL_BYTES,256,128+) SRC_OFFSET(%esi,%ebp,4), %esi
+ leal ifelse(UNROLL_BYTES,256,128) (%edi,%ebp,4), %edi
+ movl PARAM_MULTIPLIER, %ebp
+
+ jmp *%edx
+
+
+ifdef(`PIC',`
+L(add_eip_to_edx):
+ C See README.family about old gas bugs
+ leal (%edx,%ebp), %edx
+ addl $L(entry)-L(here), %edx
+ addl (%esp), %edx
+ ret
+')
+
+
+C ----------------------------------------------------------------------------
+ ALIGN(32)
+L(top):
+ C eax next src limb
+ C ebx carry
+ C ecx counter
+ C edx scratch
+ C esi src+4
+ C edi dst
+ C ebp multiplier
+ C
+ C 17 code bytes per limb processed
+
+L(entry):
+forloop(i, 0, UNROLL_COUNT-1, `
+ deflit(`disp_dst', eval(i*4 ifelse(UNROLL_BYTES,256,-128)))
+ deflit(`disp_src', eval(disp_dst + 4-(SRC_OFFSET-0)))
+
+ mull %ebp
+
+ addl %eax, %ebx
+Zdisp( movl, disp_src,(%esi), %eax)
+Zdisp( movl, %ebx, disp_dst,(%edi))
+
+ movl $0, %ebx
+ adcl %edx, %ebx
+')
+
+ decl %ecx
+
+ leal UNROLL_BYTES(%esi), %esi
+ leal UNROLL_BYTES(%edi), %edi
+ jns L(top)
+
+
+deflit(`disp0', ifelse(UNROLL_BYTES,256,-128))
+
+ mull %ebp
+
+ addl %eax, %ebx
+ movl $0, %eax
+ movl SAVE_ESI, %esi
+
+ movl %ebx, disp0(%edi)
+ movl SAVE_EBX, %ebx
+ movl SAVE_EDI, %edi
+
+ adcl %edx, %eax
+ movl SAVE_EBP, %ebp
+ addl $STACK_SPACE, %esp
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/mul_basecase.asm b/rts/gmp/mpn/x86/k7/mul_basecase.asm
new file mode 100644
index 0000000000..c4be62e633
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/mul_basecase.asm
@@ -0,0 +1,593 @@
+dnl AMD K7 mpn_mul_basecase -- multiply two mpn numbers.
+dnl
+dnl K7: approx 4.42 cycles per cross product at around 20x20 limbs (16
+dnl limbs/loop unrolling).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl K7 UNROLL_COUNT cycles/product (at around 20x20)
+dnl 8 4.67
+dnl 16 4.59
+dnl 32 4.42
+dnl Maximum possible with the current code is 32.
+dnl
+dnl At 32 the typical 13-26 limb sizes from the karatsuba code will get
+dnl done with a straight run through a block of code, no inner loop. Using
+dnl 32 gives 1k of code, but the k7 has a 64k L1 code cache.
+
+deflit(UNROLL_COUNT, 32)
+
+
+C void mpn_mul_basecase (mp_ptr wp,
+C mp_srcptr xp, mp_size_t xsize,
+C mp_srcptr yp, mp_size_t ysize);
+C
+C Calculate xp,xsize multiplied by yp,ysize, storing the result in
+C wp,xsize+ysize.
+C
+C This routine is essentially the same as mpn/generic/mul_basecase.c, but
+C it's faster because it does most of the mpn_addmul_1() startup
+C calculations only once. The saving is 15-25% on typical sizes coming from
+C the Karatsuba multiply code.
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 5)
+',`
+deflit(UNROLL_THRESHOLD, 5)
+')
+
+defframe(PARAM_YSIZE,20)
+defframe(PARAM_YP, 16)
+defframe(PARAM_XSIZE,12)
+defframe(PARAM_XP, 8)
+defframe(PARAM_WP, 4)
+
+ .text
+ ALIGN(32)
+PROLOGUE(mpn_mul_basecase)
+deflit(`FRAME',0)
+
+ movl PARAM_XSIZE, %ecx
+ movl PARAM_YP, %eax
+
+ movl PARAM_XP, %edx
+ movl (%eax), %eax C yp low limb
+
+ cmpl $2, %ecx
+ ja L(xsize_more_than_two)
+ je L(two_by_something)
+
+
+ C one limb by one limb
+
+ mull (%edx)
+
+ movl PARAM_WP, %ecx
+ movl %eax, (%ecx)
+ movl %edx, 4(%ecx)
+ ret
+
+
+C -----------------------------------------------------------------------------
+L(two_by_something):
+deflit(`FRAME',0)
+ decl PARAM_YSIZE
+ pushl %ebx defframe_pushl(`SAVE_EBX')
+ movl %eax, %ecx C yp low limb
+
+ movl PARAM_WP, %ebx
+ pushl %esi defframe_pushl(`SAVE_ESI')
+ movl %edx, %esi C xp
+
+ movl (%edx), %eax C xp low limb
+ jnz L(two_by_two)
+
+
+ C two limbs by one limb
+
+ mull %ecx
+
+ movl %eax, (%ebx)
+ movl 4(%esi), %eax
+ movl %edx, %esi C carry
+
+ mull %ecx
+
+ addl %eax, %esi
+
+ movl %esi, 4(%ebx)
+ movl SAVE_ESI, %esi
+
+ adcl $0, %edx
+
+ movl %edx, 8(%ebx)
+ movl SAVE_EBX, %ebx
+ addl $FRAME, %esp
+
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+C Could load yp earlier into another register.
+
+ ALIGN(16)
+L(two_by_two):
+ C eax xp low limb
+ C ebx wp
+ C ecx yp low limb
+ C edx
+ C esi xp
+ C edi
+ C ebp
+
+dnl FRAME carries on from previous
+
+ mull %ecx C xp[0] * yp[0]
+
+ push %edi defframe_pushl(`SAVE_EDI')
+ movl %edx, %edi C carry, for wp[1]
+
+ movl %eax, (%ebx)
+ movl 4(%esi), %eax
+
+ mull %ecx C xp[1] * yp[0]
+
+ addl %eax, %edi
+ movl PARAM_YP, %ecx
+
+ adcl $0, %edx
+ movl 4(%ecx), %ecx C yp[1]
+ movl %edi, 4(%ebx)
+
+ movl 4(%esi), %eax C xp[1]
+ movl %edx, %edi C carry, for wp[2]
+
+ mull %ecx C xp[1] * yp[1]
+
+ addl %eax, %edi
+
+ adcl $0, %edx
+ movl (%esi), %eax C xp[0]
+
+ movl %edx, %esi C carry, for wp[3]
+
+ mull %ecx C xp[0] * yp[1]
+
+ addl %eax, 4(%ebx)
+ adcl %edx, %edi
+ movl %edi, 8(%ebx)
+
+ adcl $0, %esi
+ movl SAVE_EDI, %edi
+ movl %esi, 12(%ebx)
+
+ movl SAVE_ESI, %esi
+ movl SAVE_EBX, %ebx
+ addl $FRAME, %esp
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(xsize_more_than_two):
+
+C The first limb of yp is processed with a simple mpn_mul_1 style loop
+C inline. Unrolling this doesn't seem worthwhile since it's only run once
+C (whereas the addmul below is run ysize-1 many times). A call to the
+C actual mpn_mul_1 will be slowed down by the call and parameter pushing and
+C popping, and doesn't seem likely to be worthwhile on the typical 13-26
+C limb operations the Karatsuba code calls here with.
+
+ C eax yp[0]
+ C ebx
+ C ecx xsize
+ C edx xp
+ C esi
+ C edi
+ C ebp
+
+dnl FRAME doesn't carry on from previous, no pushes yet here
+defframe(`SAVE_EBX',-4)
+defframe(`SAVE_ESI',-8)
+defframe(`SAVE_EDI',-12)
+defframe(`SAVE_EBP',-16)
+deflit(`FRAME',0)
+
+ subl $16, %esp
+deflit(`FRAME',16)
+
+ movl %edi, SAVE_EDI
+ movl PARAM_WP, %edi
+
+ movl %ebx, SAVE_EBX
+ movl %ebp, SAVE_EBP
+ movl %eax, %ebp
+
+ movl %esi, SAVE_ESI
+ xorl %ebx, %ebx
+ leal (%edx,%ecx,4), %esi C xp end
+
+ leal (%edi,%ecx,4), %edi C wp end of mul1
+ negl %ecx
+
+
+L(mul1):
+ C eax scratch
+ C ebx carry
+ C ecx counter, negative
+ C edx scratch
+ C esi xp end
+ C edi wp end of mul1
+ C ebp multiplier
+
+ movl (%esi,%ecx,4), %eax
+
+ mull %ebp
+
+ addl %ebx, %eax
+ movl %eax, (%edi,%ecx,4)
+ movl $0, %ebx
+
+ adcl %edx, %ebx
+ incl %ecx
+ jnz L(mul1)
+
+
+ movl PARAM_YSIZE, %edx
+ movl PARAM_XSIZE, %ecx
+
+ movl %ebx, (%edi) C final carry
+ decl %edx
+
+ jnz L(ysize_more_than_one)
+
+
+ movl SAVE_EDI, %edi
+ movl SAVE_EBX, %ebx
+
+ movl SAVE_EBP, %ebp
+ movl SAVE_ESI, %esi
+ addl $FRAME, %esp
+
+ ret
+
+
+L(ysize_more_than_one):
+ cmpl $UNROLL_THRESHOLD, %ecx
+ movl PARAM_YP, %eax
+
+ jae L(unroll)
+
+
+C -----------------------------------------------------------------------------
+ C simple addmul looping
+ C
+ C eax yp
+ C ebx
+ C ecx xsize
+ C edx ysize-1
+ C esi xp end
+ C edi wp end of mul1
+ C ebp
+
+ leal 4(%eax,%edx,4), %ebp C yp end
+ negl %ecx
+ negl %edx
+
+ movl (%esi,%ecx,4), %eax C xp low limb
+ movl %edx, PARAM_YSIZE C -(ysize-1)
+ incl %ecx
+
+ xorl %ebx, %ebx C initial carry
+ movl %ecx, PARAM_XSIZE C -(xsize-1)
+ movl %ebp, PARAM_YP
+
+ movl (%ebp,%edx,4), %ebp C yp second lowest limb - multiplier
+ jmp L(simple_outer_entry)
+
+
+ C this is offset 0x121 so close enough to aligned
+L(simple_outer_top):
+ C ebp ysize counter, negative
+
+ movl PARAM_YP, %edx
+ movl PARAM_XSIZE, %ecx C -(xsize-1)
+ xorl %ebx, %ebx C carry
+
+ movl %ebp, PARAM_YSIZE
+ addl $4, %edi C next position in wp
+
+ movl (%edx,%ebp,4), %ebp C yp limb - multiplier
+ movl -4(%esi,%ecx,4), %eax C xp low limb
+
+
+L(simple_outer_entry):
+
+L(simple_inner):
+ C eax xp limb
+ C ebx carry limb
+ C ecx loop counter (negative)
+ C edx scratch
+ C esi xp end
+ C edi wp end
+ C ebp multiplier
+
+ mull %ebp
+
+ addl %eax, %ebx
+ adcl $0, %edx
+
+ addl %ebx, (%edi,%ecx,4)
+ movl (%esi,%ecx,4), %eax
+ adcl $0, %edx
+
+ incl %ecx
+ movl %edx, %ebx
+ jnz L(simple_inner)
+
+
+ mull %ebp
+
+ movl PARAM_YSIZE, %ebp
+ addl %eax, %ebx
+
+ adcl $0, %edx
+ addl %ebx, (%edi)
+
+ adcl $0, %edx
+ incl %ebp
+
+ movl %edx, 4(%edi)
+ jnz L(simple_outer_top)
+
+
+ movl SAVE_EBX, %ebx
+ movl SAVE_ESI, %esi
+
+ movl SAVE_EDI, %edi
+ movl SAVE_EBP, %ebp
+ addl $FRAME, %esp
+
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+C
+C The unrolled loop is the same as in mpn_addmul_1(), see that code for some
+C comments.
+C
+C VAR_ADJUST is the negative of how many limbs the leals in the inner loop
+C increment xp and wp. This is used to adjust back xp and wp, and rshifted
+C to given an initial VAR_COUNTER at the top of the outer loop.
+C
+C VAR_COUNTER is for the unrolled loop, running from VAR_ADJUST/UNROLL_COUNT
+C up to -1, inclusive.
+C
+C VAR_JMP is the computed jump into the unrolled loop.
+C
+C VAR_XP_LOW is the least significant limb of xp, which is needed at the
+C start of the unrolled loop.
+C
+C PARAM_YSIZE is the outer loop counter, going from -(ysize-1) up to -1,
+C inclusive.
+C
+C PARAM_YP is offset appropriately so that the PARAM_YSIZE counter can be
+C added to give the location of the next limb of yp, which is the multiplier
+C in the unrolled loop.
+C
+C The trick with VAR_ADJUST means it's only necessary to do one fetch in the
+C outer loop to take care of xp, wp and the inner loop counter.
+
+defframe(VAR_COUNTER, -20)
+defframe(VAR_ADJUST, -24)
+defframe(VAR_JMP, -28)
+defframe(VAR_XP_LOW, -32)
+deflit(VAR_EXTRA_SPACE, 16)
+
+
+L(unroll):
+ C eax yp
+ C ebx
+ C ecx xsize
+ C edx ysize-1
+ C esi xp end
+ C edi wp end of mul1
+ C ebp
+
+ movl PARAM_XP, %esi
+ movl 4(%eax), %ebp C multiplier (yp second limb)
+ leal 4(%eax,%edx,4), %eax C yp adjust for ysize indexing
+
+ movl PARAM_WP, %edi
+ movl %eax, PARAM_YP
+ negl %edx
+
+ movl %edx, PARAM_YSIZE
+ leal UNROLL_COUNT-2(%ecx), %ebx C (xsize-1)+UNROLL_COUNT-1
+ decl %ecx C xsize-1
+
+ movl (%esi), %eax C xp low limb
+ andl $-UNROLL_MASK-1, %ebx
+ negl %ecx
+
+ subl $VAR_EXTRA_SPACE, %esp
+deflit(`FRAME',16+VAR_EXTRA_SPACE)
+ negl %ebx
+ andl $UNROLL_MASK, %ecx
+
+ movl %ebx, VAR_ADJUST
+ movl %ecx, %edx
+ shll $4, %ecx
+
+ sarl $UNROLL_LOG2, %ebx
+
+ C 17 code bytes per limb
+ifdef(`PIC',`
+ call L(pic_calc)
+L(unroll_here):
+',`
+ leal L(unroll_entry) (%ecx,%edx,1), %ecx
+')
+ negl %edx
+
+ movl %eax, VAR_XP_LOW
+ movl %ecx, VAR_JMP
+ leal 4(%edi,%edx,4), %edi C wp and xp, adjust for unrolling,
+ leal 4(%esi,%edx,4), %esi C and start at second limb
+ jmp L(unroll_outer_entry)
+
+
+ifdef(`PIC',`
+L(pic_calc):
+ C See README.family about old gas bugs
+ leal (%ecx,%edx,1), %ecx
+ addl $L(unroll_entry)-L(unroll_here), %ecx
+ addl (%esp), %ecx
+ ret
+')
+
+
+C --------------------------------------------------------------------------
+ ALIGN(32)
+L(unroll_outer_top):
+ C ebp ysize counter, negative
+
+ movl VAR_ADJUST, %ebx
+ movl PARAM_YP, %edx
+
+ movl VAR_XP_LOW, %eax
+ movl %ebp, PARAM_YSIZE C store incremented ysize counter
+
+ leal 4(%edi,%ebx,4), %edi
+ leal (%esi,%ebx,4), %esi
+ sarl $UNROLL_LOG2, %ebx
+
+ movl (%edx,%ebp,4), %ebp C yp next multiplier
+ movl VAR_JMP, %ecx
+
+L(unroll_outer_entry):
+ mull %ebp
+
+ testb $1, %cl C and clear carry bit
+ movl %ebx, VAR_COUNTER
+ movl $0, %ebx
+
+ movl $0, %ecx
+ cmovz( %eax, %ecx) C eax into low carry, zero into high carry limb
+ cmovnz( %eax, %ebx)
+
+ C Extra fetch of VAR_JMP is bad, but registers are tight
+ jmp *VAR_JMP
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(32)
+L(unroll_top):
+ C eax xp limb
+ C ebx carry high
+ C ecx carry low
+ C edx scratch
+ C esi xp+8
+ C edi wp
+ C ebp yp multiplier limb
+ C
+ C VAR_COUNTER loop counter, negative
+ C
+ C 17 bytes each limb
+
+L(unroll_entry):
+
+deflit(CHUNK_COUNT,2)
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+ deflit(`disp0', eval(i*CHUNK_COUNT*4 ifelse(UNROLL_BYTES,256,-128)))
+ deflit(`disp1', eval(disp0 + 4))
+
+Zdisp( movl, disp0,(%esi), %eax)
+ adcl %edx, %ebx
+
+ mull %ebp
+
+Zdisp( addl, %ecx, disp0,(%edi))
+ movl $0, %ecx
+
+ adcl %eax, %ebx
+
+
+ movl disp1(%esi), %eax
+ adcl %edx, %ecx
+
+ mull %ebp
+
+ addl %ebx, disp1(%edi)
+ movl $0, %ebx
+
+ adcl %eax, %ecx
+')
+
+
+ incl VAR_COUNTER
+ leal UNROLL_BYTES(%esi), %esi
+ leal UNROLL_BYTES(%edi), %edi
+
+ jnz L(unroll_top)
+
+
+ C eax
+ C ebx zero
+ C ecx low
+ C edx high
+ C esi
+ C edi wp, pointing at second last limb)
+ C ebp
+ C
+ C carry flag to be added to high
+
+deflit(`disp0', ifelse(UNROLL_BYTES,256,-128))
+deflit(`disp1', eval(disp0-0 + 4))
+
+ movl PARAM_YSIZE, %ebp
+ adcl $0, %edx
+ addl %ecx, disp0(%edi)
+
+ adcl $0, %edx
+ incl %ebp
+
+ movl %edx, disp1(%edi)
+ jnz L(unroll_outer_top)
+
+
+ movl SAVE_ESI, %esi
+ movl SAVE_EBP, %ebp
+
+ movl SAVE_EDI, %edi
+ movl SAVE_EBX, %ebx
+ addl $FRAME, %esp
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/k7/sqr_basecase.asm b/rts/gmp/mpn/x86/k7/sqr_basecase.asm
new file mode 100644
index 0000000000..84861ea66b
--- /dev/null
+++ b/rts/gmp/mpn/x86/k7/sqr_basecase.asm
@@ -0,0 +1,627 @@
+dnl AMD K7 mpn_sqr_basecase -- square an mpn number.
+dnl
+dnl K7: approx 2.3 cycles/crossproduct, or 4.55 cycles/triangular product
+dnl (measured on the speed difference between 25 and 50 limbs, which is
+dnl roughly the Karatsuba recursing range).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl These are the same as mpn/x86/k6/sqr_basecase.asm, see that code for
+dnl some comments.
+
+deflit(KARATSUBA_SQR_THRESHOLD_MAX, 66)
+
+ifdef(`KARATSUBA_SQR_THRESHOLD_OVERRIDE',
+`define(`KARATSUBA_SQR_THRESHOLD',KARATSUBA_SQR_THRESHOLD_OVERRIDE)')
+
+m4_config_gmp_mparam(`KARATSUBA_SQR_THRESHOLD')
+deflit(UNROLL_COUNT, eval(KARATSUBA_SQR_THRESHOLD-3))
+
+
+C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C With a KARATSUBA_SQR_THRESHOLD around 50 this code is about 1500 bytes,
+C which is quite a bit, but is considered good value since squares big
+C enough to use most of the code will be spending quite a few cycles in it.
+
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(32)
+PROLOGUE(mpn_sqr_basecase)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ movl PARAM_SRC, %eax
+ cmpl $2, %ecx
+
+ movl PARAM_DST, %edx
+ je L(two_limbs)
+ ja L(three_or_more)
+
+
+C------------------------------------------------------------------------------
+C one limb only
+ C eax src
+ C ecx size
+ C edx dst
+
+ movl (%eax), %eax
+ movl %edx, %ecx
+
+ mull %eax
+
+ movl %edx, 4(%ecx)
+ movl %eax, (%ecx)
+ ret
+
+
+C------------------------------------------------------------------------------
+C
+C Using the read/modify/write "add"s seems to be faster than saving and
+C restoring registers. Perhaps the loads for the first set hide under the
+C mul latency and the second gets store to load forwarding.
+
+ ALIGN(16)
+L(two_limbs):
+ C eax src
+ C ebx
+ C ecx size
+ C edx dst
+deflit(`FRAME',0)
+
+ pushl %ebx FRAME_pushl()
+ movl %eax, %ebx C src
+ movl (%eax), %eax
+
+ movl %edx, %ecx C dst
+
+ mull %eax C src[0]^2
+
+ movl %eax, (%ecx) C dst[0]
+ movl 4(%ebx), %eax
+
+ movl %edx, 4(%ecx) C dst[1]
+
+ mull %eax C src[1]^2
+
+ movl %eax, 8(%ecx) C dst[2]
+ movl (%ebx), %eax
+
+ movl %edx, 12(%ecx) C dst[3]
+
+ mull 4(%ebx) C src[0]*src[1]
+
+ popl %ebx
+
+ addl %eax, 4(%ecx)
+ adcl %edx, 8(%ecx)
+ adcl $0, 12(%ecx)
+ ASSERT(nc)
+
+ addl %eax, 4(%ecx)
+ adcl %edx, 8(%ecx)
+ adcl $0, 12(%ecx)
+ ASSERT(nc)
+
+ ret
+
+
+C------------------------------------------------------------------------------
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+deflit(STACK_SPACE, 16)
+
+L(three_or_more):
+ subl $STACK_SPACE, %esp
+ cmpl $4, %ecx
+ jae L(four_or_more)
+deflit(`FRAME',STACK_SPACE)
+
+
+C------------------------------------------------------------------------------
+C Three limbs
+C
+C Writing out the loads and stores separately at the end of this code comes
+C out about 10 cycles faster than using adcls to memory.
+
+ C eax src
+ C ecx size
+ C edx dst
+
+ movl %ebx, SAVE_EBX
+ movl %eax, %ebx C src
+ movl (%eax), %eax
+
+ movl %edx, %ecx C dst
+ movl %esi, SAVE_ESI
+ movl %edi, SAVE_EDI
+
+ mull %eax C src[0] ^ 2
+
+ movl %eax, (%ecx)
+ movl 4(%ebx), %eax
+ movl %edx, 4(%ecx)
+
+ mull %eax C src[1] ^ 2
+
+ movl %eax, 8(%ecx)
+ movl 8(%ebx), %eax
+ movl %edx, 12(%ecx)
+
+ mull %eax C src[2] ^ 2
+
+ movl %eax, 16(%ecx)
+ movl (%ebx), %eax
+ movl %edx, 20(%ecx)
+
+ mull 4(%ebx) C src[0] * src[1]
+
+ movl %eax, %esi
+ movl (%ebx), %eax
+ movl %edx, %edi
+
+ mull 8(%ebx) C src[0] * src[2]
+
+ addl %eax, %edi
+ movl %ebp, SAVE_EBP
+ movl $0, %ebp
+
+ movl 4(%ebx), %eax
+ adcl %edx, %ebp
+
+ mull 8(%ebx) C src[1] * src[2]
+
+ xorl %ebx, %ebx
+ addl %eax, %ebp
+
+ adcl $0, %edx
+
+ C eax
+ C ebx zero, will be dst[5]
+ C ecx dst
+ C edx dst[4]
+ C esi dst[1]
+ C edi dst[2]
+ C ebp dst[3]
+
+ adcl $0, %edx
+ addl %esi, %esi
+
+ adcl %edi, %edi
+ movl 4(%ecx), %eax
+
+ adcl %ebp, %ebp
+
+ adcl %edx, %edx
+
+ adcl $0, %ebx
+ addl %eax, %esi
+ movl 8(%ecx), %eax
+
+ adcl %eax, %edi
+ movl 12(%ecx), %eax
+ movl %esi, 4(%ecx)
+
+ adcl %eax, %ebp
+ movl 16(%ecx), %eax
+ movl %edi, 8(%ecx)
+
+ movl SAVE_ESI, %esi
+ movl SAVE_EDI, %edi
+
+ adcl %eax, %edx
+ movl 20(%ecx), %eax
+ movl %ebp, 12(%ecx)
+
+ adcl %ebx, %eax
+ ASSERT(nc)
+ movl SAVE_EBX, %ebx
+ movl SAVE_EBP, %ebp
+
+ movl %edx, 16(%ecx)
+ movl %eax, 20(%ecx)
+ addl $FRAME, %esp
+
+ ret
+
+
+C------------------------------------------------------------------------------
+L(four_or_more):
+
+C First multiply src[0]*src[1..size-1] and store at dst[1..size].
+C Further products are added in rather than stored.
+
+ C eax src
+ C ebx
+ C ecx size
+ C edx dst
+ C esi
+ C edi
+ C ebp
+
+defframe(`VAR_COUNTER',-20)
+defframe(`VAR_JMP', -24)
+deflit(EXTRA_STACK_SPACE, 8)
+
+ movl %ebx, SAVE_EBX
+ movl %edi, SAVE_EDI
+ leal (%edx,%ecx,4), %edi C &dst[size]
+
+ movl %esi, SAVE_ESI
+ movl %ebp, SAVE_EBP
+ leal (%eax,%ecx,4), %esi C &src[size]
+
+ movl (%eax), %ebp C multiplier
+ movl $0, %ebx
+ decl %ecx
+
+ negl %ecx
+ subl $EXTRA_STACK_SPACE, %esp
+FRAME_subl_esp(EXTRA_STACK_SPACE)
+
+L(mul_1):
+ C eax scratch
+ C ebx carry
+ C ecx counter
+ C edx scratch
+ C esi &src[size]
+ C edi &dst[size]
+ C ebp multiplier
+
+ movl (%esi,%ecx,4), %eax
+
+ mull %ebp
+
+ addl %ebx, %eax
+ movl %eax, (%edi,%ecx,4)
+ movl $0, %ebx
+
+ adcl %edx, %ebx
+ incl %ecx
+ jnz L(mul_1)
+
+
+C Add products src[n]*src[n+1..size-1] at dst[2*n-1...], for each n=1..size-2.
+C
+C The last two products, which are the bottom right corner of the product
+C triangle, are left to the end. These are src[size-3]*src[size-2,size-1]
+C and src[size-2]*src[size-1]. If size is 4 then it's only these corner
+C cases that need to be done.
+C
+C The unrolled code is the same as in mpn_addmul_1, see that routine for
+C some comments.
+C
+C VAR_COUNTER is the outer loop, running from -size+4 to -1, inclusive.
+C
+C VAR_JMP is the computed jump into the unrolled code, stepped by one code
+C chunk each outer loop.
+C
+C K7 does branch prediction on indirect jumps, which is bad since it's a
+C different target each time. There seems no way to avoid this.
+
+dnl This value also hard coded in some shifts and adds
+deflit(CODE_BYTES_PER_LIMB, 17)
+
+dnl With the unmodified &src[size] and &dst[size] pointers, the
+dnl displacements in the unrolled code fit in a byte for UNROLL_COUNT
+dnl values up to 31, but above that an offset must be added to them.
+
+deflit(OFFSET,
+ifelse(eval(UNROLL_COUNT>31),1,
+eval((UNROLL_COUNT-31)*4),
+0))
+
+dnl Because the last chunk of code is generated differently, a label placed
+dnl at the end doesn't work. Instead calculate the implied end using the
+dnl start and how many chunks of code there are.
+
+deflit(UNROLL_INNER_END,
+`L(unroll_inner_start)+eval(UNROLL_COUNT*CODE_BYTES_PER_LIMB)')
+
+ C eax
+ C ebx carry
+ C ecx
+ C edx
+ C esi &src[size]
+ C edi &dst[size]
+ C ebp
+
+ movl PARAM_SIZE, %ecx
+ movl %ebx, (%edi)
+
+ subl $4, %ecx
+ jz L(corner)
+
+ negl %ecx
+ifelse(OFFSET,0,,`subl $OFFSET, %edi')
+ifelse(OFFSET,0,,`subl $OFFSET, %esi')
+
+ movl %ecx, %edx
+ shll $4, %ecx
+
+ifdef(`PIC',`
+ call L(pic_calc)
+L(here):
+',`
+ leal UNROLL_INNER_END-eval(2*CODE_BYTES_PER_LIMB)(%ecx,%edx), %ecx
+')
+
+
+ C The calculated jump mustn't come out to before the start of the
+ C code available. This is the limit UNROLL_COUNT puts on the src
+ C operand size, but checked here directly using the jump address.
+ ASSERT(ae,
+ `movl_text_address(L(unroll_inner_start), %eax)
+ cmpl %eax, %ecx')
+
+
+C------------------------------------------------------------------------------
+ ALIGN(16)
+L(unroll_outer_top):
+ C eax
+ C ebx high limb to store
+ C ecx VAR_JMP
+ C edx VAR_COUNTER, limbs, negative
+ C esi &src[size], constant
+ C edi dst ptr, high of last addmul
+ C ebp
+
+ movl -12+OFFSET(%esi,%edx,4), %ebp C next multiplier
+ movl -8+OFFSET(%esi,%edx,4), %eax C first of multiplicand
+
+ movl %edx, VAR_COUNTER
+
+ mull %ebp
+
+define(cmovX,`ifelse(eval(UNROLL_COUNT%2),0,`cmovz($@)',`cmovnz($@)')')
+
+ testb $1, %cl
+ movl %edx, %ebx C high carry
+ movl %ecx, %edx C jump
+
+ movl %eax, %ecx C low carry
+ cmovX( %ebx, %ecx) C high carry reverse
+ cmovX( %eax, %ebx) C low carry reverse
+
+ leal CODE_BYTES_PER_LIMB(%edx), %eax
+ xorl %edx, %edx
+ leal 4(%edi), %edi
+
+ movl %eax, VAR_JMP
+
+ jmp *%eax
+
+
+ifdef(`PIC',`
+L(pic_calc):
+ addl (%esp), %ecx
+ addl $UNROLL_INNER_END-eval(2*CODE_BYTES_PER_LIMB)-L(here), %ecx
+ addl %edx, %ecx
+ ret
+')
+
+
+ C Must be an even address to preserve the significance of the low
+ C bit of the jump address indicating which way around ecx/ebx should
+ C start.
+ ALIGN(2)
+
+L(unroll_inner_start):
+ C eax next limb
+ C ebx carry high
+ C ecx carry low
+ C edx scratch
+ C esi src
+ C edi dst
+ C ebp multiplier
+
+forloop(`i', UNROLL_COUNT, 1, `
+ deflit(`disp_src', eval(-i*4 + OFFSET))
+ deflit(`disp_dst', eval(disp_src - 4))
+
+ m4_assert(`disp_src>=-128 && disp_src<128')
+ m4_assert(`disp_dst>=-128 && disp_dst<128')
+
+ifelse(eval(i%2),0,`
+Zdisp( movl, disp_src,(%esi), %eax)
+ adcl %edx, %ebx
+
+ mull %ebp
+
+Zdisp( addl, %ecx, disp_dst,(%edi))
+ movl $0, %ecx
+
+ adcl %eax, %ebx
+
+',`
+ dnl this bit comes out last
+Zdisp( movl, disp_src,(%esi), %eax)
+ adcl %edx, %ecx
+
+ mull %ebp
+
+dnl Zdisp( addl %ebx, disp_src,(%edi))
+ addl %ebx, disp_dst(%edi)
+ifelse(forloop_last,0,
+` movl $0, %ebx')
+
+ adcl %eax, %ecx
+')
+')
+
+ C eax next limb
+ C ebx carry high
+ C ecx carry low
+ C edx scratch
+ C esi src
+ C edi dst
+ C ebp multiplier
+
+ adcl $0, %edx
+ addl %ecx, -4+OFFSET(%edi)
+ movl VAR_JMP, %ecx
+
+ adcl $0, %edx
+
+ movl %edx, m4_empty_if_zero(OFFSET) (%edi)
+ movl VAR_COUNTER, %edx
+
+ incl %edx
+ jnz L(unroll_outer_top)
+
+
+ifelse(OFFSET,0,,`
+ addl $OFFSET, %esi
+ addl $OFFSET, %edi
+')
+
+
+C------------------------------------------------------------------------------
+L(corner):
+ C esi &src[size]
+ C edi &dst[2*size-5]
+
+ movl -12(%esi), %ebp
+ movl -8(%esi), %eax
+ movl %eax, %ecx
+
+ mull %ebp
+
+ addl %eax, -4(%edi)
+ movl -4(%esi), %eax
+
+ adcl $0, %edx
+ movl %edx, %ebx
+ movl %eax, %esi
+
+ mull %ebp
+
+ addl %ebx, %eax
+
+ adcl $0, %edx
+ addl %eax, (%edi)
+ movl %esi, %eax
+
+ adcl $0, %edx
+ movl %edx, %ebx
+
+ mull %ecx
+
+ addl %ebx, %eax
+ movl %eax, 4(%edi)
+
+ adcl $0, %edx
+ movl %edx, 8(%edi)
+
+
+
+C Left shift of dst[1..2*size-2], high bit shifted out becomes dst[2*size-1].
+
+L(lshift_start):
+ movl PARAM_SIZE, %eax
+ movl PARAM_DST, %edi
+ xorl %ecx, %ecx C clear carry
+
+ leal (%edi,%eax,8), %edi
+ notl %eax C -size-1, preserve carry
+
+ leal 2(%eax), %eax C -(size-1)
+
+L(lshift):
+ C eax counter, negative
+ C ebx
+ C ecx
+ C edx
+ C esi
+ C edi dst, pointing just after last limb
+ C ebp
+
+ rcll -4(%edi,%eax,8)
+ rcll (%edi,%eax,8)
+ incl %eax
+ jnz L(lshift)
+
+ setc %al
+
+ movl PARAM_SRC, %esi
+ movl %eax, -4(%edi) C dst most significant limb
+
+ movl PARAM_SIZE, %ecx
+
+
+C Now add in the squares on the diagonal, src[0]^2, src[1]^2, ...,
+C src[size-1]^2. dst[0] hasn't yet been set at all yet, and just gets the
+C low limb of src[0]^2.
+
+ movl (%esi), %eax C src[0]
+
+ mull %eax
+
+ leal (%esi,%ecx,4), %esi C src point just after last limb
+ negl %ecx
+
+ movl %eax, (%edi,%ecx,8) C dst[0]
+ incl %ecx
+
+L(diag):
+ C eax scratch
+ C ebx scratch
+ C ecx counter, negative
+ C edx carry
+ C esi src just after last limb
+ C edi dst just after last limb
+ C ebp
+
+ movl (%esi,%ecx,4), %eax
+ movl %edx, %ebx
+
+ mull %eax
+
+ addl %ebx, -4(%edi,%ecx,8)
+ adcl %eax, (%edi,%ecx,8)
+ adcl $0, %edx
+
+ incl %ecx
+ jnz L(diag)
+
+
+ movl SAVE_ESI, %esi
+ movl SAVE_EBX, %ebx
+
+ addl %edx, -4(%edi) C dst most significant limb
+ movl SAVE_EDI, %edi
+
+ movl SAVE_EBP, %ebp
+ addl $FRAME, %esp
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/lshift.asm b/rts/gmp/mpn/x86/lshift.asm
new file mode 100644
index 0000000000..4735335cbe
--- /dev/null
+++ b/rts/gmp/mpn/x86/lshift.asm
@@ -0,0 +1,90 @@
+dnl x86 mpn_lshift -- mpn left shift.
+
+dnl Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation,
+dnl Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(8)
+PROLOGUE(mpn_lshift)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+deflit(`FRAME',12)
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC,%esi
+ movl PARAM_SIZE,%edx
+ movl PARAM_SHIFT,%ecx
+
+ subl $4,%esi C adjust src
+
+ movl (%esi,%edx,4),%ebx C read most significant limb
+ xorl %eax,%eax
+ shldl( %cl, %ebx, %eax) C compute carry limb
+ decl %edx
+ jz L(end)
+ pushl %eax C push carry limb onto stack
+ testb $1,%dl
+ jnz L(1) C enter loop in the middle
+ movl %ebx,%eax
+
+ ALIGN(8)
+L(oop): movl (%esi,%edx,4),%ebx C load next lower limb
+ shldl( %cl, %ebx, %eax) C compute result limb
+ movl %eax,(%edi,%edx,4) C store it
+ decl %edx
+L(1): movl (%esi,%edx,4),%eax
+ shldl( %cl, %eax, %ebx)
+ movl %ebx,(%edi,%edx,4)
+ decl %edx
+ jnz L(oop)
+
+ shll %cl,%eax C compute least significant limb
+ movl %eax,(%edi) C store it
+
+ popl %eax C pop carry limb
+
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+L(end): shll %cl,%ebx C compute least significant limb
+ movl %ebx,(%edi) C store it
+
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/mod_1.asm b/rts/gmp/mpn/x86/mod_1.asm
new file mode 100644
index 0000000000..3908161b3e
--- /dev/null
+++ b/rts/gmp/mpn/x86/mod_1.asm
@@ -0,0 +1,141 @@
+dnl x86 mpn_mod_1 -- mpn by limb remainder.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+dnl cycles/limb
+dnl K6 20
+dnl P5 44
+dnl P6 39
+dnl 486 approx 42 maybe
+dnl
+dnl The following have their own optimized mod_1 implementations, but for
+dnl reference the code here runs as follows.
+dnl
+dnl P6MMX 39
+dnl K7 41
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t carry);
+C
+C Divide src,size by divisor and return the remainder. The quotient is
+C discarded.
+C
+C See mpn/x86/divrem_1.asm for some comments.
+
+defframe(PARAM_CARRY, 16)
+defframe(PARAM_DIVISOR,12)
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC, 4)
+
+ .text
+ ALIGN(16)
+
+PROLOGUE(mpn_mod_1c)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ pushl %ebx FRAME_pushl()
+
+ movl PARAM_SRC, %ebx
+ pushl %esi FRAME_pushl()
+
+ movl PARAM_DIVISOR, %esi
+ orl %ecx, %ecx
+
+ movl PARAM_CARRY, %edx
+ jnz LF(mpn_mod_1,top)
+
+ popl %esi
+ movl %edx, %eax
+
+ popl %ebx
+
+ ret
+
+EPILOGUE()
+
+
+PROLOGUE(mpn_mod_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ pushl %ebx FRAME_pushl()
+
+ movl PARAM_SRC, %ebx
+ pushl %esi FRAME_pushl()
+
+ orl %ecx, %ecx
+ jz L(done_zero)
+
+ movl PARAM_DIVISOR, %esi
+ movl -4(%ebx,%ecx,4), %eax C src high limb
+
+ cmpl %esi, %eax
+
+ sbbl %edx, %edx C -1 if high<divisor
+
+ addl %edx, %ecx C skip one division if high<divisor
+ jz L(done_eax)
+
+ andl %eax, %edx C carry if high<divisor
+
+
+L(top):
+ C eax scratch (quotient)
+ C ebx src
+ C ecx counter
+ C edx carry (remainder)
+ C esi divisor
+ C edi
+ C ebp
+
+ movl -4(%ebx,%ecx,4), %eax
+
+ divl %esi
+
+ loop_or_decljnz L(top)
+
+
+ movl %edx, %eax
+L(done_eax):
+ popl %esi
+
+ popl %ebx
+
+ ret
+
+
+L(done_zero):
+ popl %esi
+ xorl %eax, %eax
+
+ popl %ebx
+
+ ret
+
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/mul_1.asm b/rts/gmp/mpn/x86/mul_1.asm
new file mode 100644
index 0000000000..8817f291bc
--- /dev/null
+++ b/rts/gmp/mpn/x86/mul_1.asm
@@ -0,0 +1,130 @@
+dnl x86 mpn_mul_1 (for 386, 486, and Pentium Pro) -- Multiply a limb vector
+dnl with a limb and store the result in a second limb vector.
+dnl
+dnl cycles/limb
+dnl P6: 5.5
+dnl
+dnl The following CPUs have their own optimized code, but for reference the
+dnl code here runs as follows.
+dnl
+dnl cycles/limb
+dnl P5: 12.5
+dnl K6: 10.5
+dnl K7: 4.5
+
+
+dnl Copyright (C) 1992, 1994, 1997, 1998, 1999, 2000 Free Software
+dnl Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t multiplier);
+
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ TEXT
+ ALIGN(8)
+PROLOGUE(mpn_mul_1)
+deflit(`FRAME',0)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+deflit(`FRAME',16)
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC,%esi
+ movl PARAM_SIZE,%ecx
+
+ xorl %ebx,%ebx
+ andl $3,%ecx
+ jz L(end0)
+
+L(oop0):
+ movl (%esi),%eax
+ mull PARAM_MULTIPLIER
+ leal 4(%esi),%esi
+ addl %ebx,%eax
+ movl $0,%ebx
+ adcl %ebx,%edx
+ movl %eax,(%edi)
+ movl %edx,%ebx C propagate carry into cylimb
+
+ leal 4(%edi),%edi
+ decl %ecx
+ jnz L(oop0)
+
+L(end0):
+ movl PARAM_SIZE,%ecx
+ shrl $2,%ecx
+ jz L(end)
+
+
+ ALIGN(8)
+L(oop): movl (%esi),%eax
+ mull PARAM_MULTIPLIER
+ addl %eax,%ebx
+ movl $0,%ebp
+ adcl %edx,%ebp
+
+ movl 4(%esi),%eax
+ mull PARAM_MULTIPLIER
+ movl %ebx,(%edi)
+ addl %eax,%ebp C new lo + cylimb
+ movl $0,%ebx
+ adcl %edx,%ebx
+
+ movl 8(%esi),%eax
+ mull PARAM_MULTIPLIER
+ movl %ebp,4(%edi)
+ addl %eax,%ebx C new lo + cylimb
+ movl $0,%ebp
+ adcl %edx,%ebp
+
+ movl 12(%esi),%eax
+ mull PARAM_MULTIPLIER
+ movl %ebx,8(%edi)
+ addl %eax,%ebp C new lo + cylimb
+ movl $0,%ebx
+ adcl %edx,%ebx
+
+ movl %ebp,12(%edi)
+
+ leal 16(%esi),%esi
+ leal 16(%edi),%edi
+ decl %ecx
+ jnz L(oop)
+
+L(end): movl %ebx,%eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/mul_basecase.asm b/rts/gmp/mpn/x86/mul_basecase.asm
new file mode 100644
index 0000000000..3a9b73895b
--- /dev/null
+++ b/rts/gmp/mpn/x86/mul_basecase.asm
@@ -0,0 +1,209 @@
+dnl x86 mpn_mul_basecase -- Multiply two limb vectors and store the result
+dnl in a third limb vector.
+
+
+dnl Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation,
+dnl Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C void mpn_mul_basecase (mp_ptr wp,
+C mp_srcptr xp, mp_size_t xsize,
+C mp_srcptr yp, mp_size_t ysize);
+C
+C This was written in a haste since the Pentium optimized code that was used
+C for all x86 machines was slow for the Pentium II. This code would benefit
+C from some cleanup.
+C
+C To shave off some percentage of the run-time, one should make 4 variants
+C of the Louter loop, for the four different outcomes of un mod 4. That
+C would avoid Loop0 altogether. Code expansion would be > 4-fold for that
+C part of the function, but since it is not very large, that would be
+C acceptable.
+C
+C The mul loop (at L(oopM)) might need some tweaking. It's current speed is
+C unknown.
+
+defframe(PARAM_YSIZE,20)
+defframe(PARAM_YP, 16)
+defframe(PARAM_XSIZE,12)
+defframe(PARAM_XP, 8)
+defframe(PARAM_WP, 4)
+
+defframe(VAR_MULTIPLIER, -4)
+defframe(VAR_COUNTER, -8)
+deflit(VAR_STACK_SPACE, 8)
+
+ .text
+ ALIGN(8)
+
+PROLOGUE(mpn_mul_basecase)
+deflit(`FRAME',0)
+
+ subl $VAR_STACK_SPACE,%esp
+ pushl %esi
+ pushl %ebp
+ pushl %edi
+deflit(`FRAME',eval(VAR_STACK_SPACE+12))
+
+ movl PARAM_XP,%esi
+ movl PARAM_WP,%edi
+ movl PARAM_YP,%ebp
+
+ movl (%esi),%eax C load xp[0]
+ mull (%ebp) C multiply by yp[0]
+ movl %eax,(%edi) C store to wp[0]
+ movl PARAM_XSIZE,%ecx C xsize
+ decl %ecx C If xsize = 1, ysize = 1 too
+ jz L(done)
+
+ pushl %ebx
+FRAME_pushl()
+ movl %edx,%ebx
+
+ leal 4(%esi),%esi
+ leal 4(%edi),%edi
+
+L(oopM):
+ movl (%esi),%eax C load next limb at xp[j]
+ leal 4(%esi),%esi
+ mull (%ebp)
+ addl %ebx,%eax
+ movl %edx,%ebx
+ adcl $0,%ebx
+ movl %eax,(%edi)
+ leal 4(%edi),%edi
+ decl %ecx
+ jnz L(oopM)
+
+ movl %ebx,(%edi) C most significant limb of product
+ addl $4,%edi C increment wp
+ movl PARAM_XSIZE,%eax
+ shll $2,%eax
+ subl %eax,%edi
+ subl %eax,%esi
+
+ movl PARAM_YSIZE,%eax C ysize
+ decl %eax
+ jz L(skip)
+ movl %eax,VAR_COUNTER C set index i to ysize
+
+L(outer):
+ movl PARAM_YP,%ebp C yp
+ addl $4,%ebp C make ebp point to next v limb
+ movl %ebp,PARAM_YP
+ movl (%ebp),%eax C copy y limb ...
+ movl %eax,VAR_MULTIPLIER C ... to stack slot
+ movl PARAM_XSIZE,%ecx
+
+ xorl %ebx,%ebx
+ andl $3,%ecx
+ jz L(end0)
+
+L(oop0):
+ movl (%esi),%eax
+ mull VAR_MULTIPLIER
+ leal 4(%esi),%esi
+ addl %ebx,%eax
+ movl $0,%ebx
+ adcl %ebx,%edx
+ addl %eax,(%edi)
+ adcl %edx,%ebx C propagate carry into cylimb
+
+ leal 4(%edi),%edi
+ decl %ecx
+ jnz L(oop0)
+
+L(end0):
+ movl PARAM_XSIZE,%ecx
+ shrl $2,%ecx
+ jz L(endX)
+
+ ALIGN(8)
+L(oopX):
+ movl (%esi),%eax
+ mull VAR_MULTIPLIER
+ addl %eax,%ebx
+ movl $0,%ebp
+ adcl %edx,%ebp
+
+ movl 4(%esi),%eax
+ mull VAR_MULTIPLIER
+ addl %ebx,(%edi)
+ adcl %eax,%ebp C new lo + cylimb
+ movl $0,%ebx
+ adcl %edx,%ebx
+
+ movl 8(%esi),%eax
+ mull VAR_MULTIPLIER
+ addl %ebp,4(%edi)
+ adcl %eax,%ebx C new lo + cylimb
+ movl $0,%ebp
+ adcl %edx,%ebp
+
+ movl 12(%esi),%eax
+ mull VAR_MULTIPLIER
+ addl %ebx,8(%edi)
+ adcl %eax,%ebp C new lo + cylimb
+ movl $0,%ebx
+ adcl %edx,%ebx
+
+ addl %ebp,12(%edi)
+ adcl $0,%ebx C propagate carry into cylimb
+
+ leal 16(%esi),%esi
+ leal 16(%edi),%edi
+ decl %ecx
+ jnz L(oopX)
+
+L(endX):
+ movl %ebx,(%edi)
+ addl $4,%edi
+
+ C we incremented wp and xp in the loop above; compensate
+ movl PARAM_XSIZE,%eax
+ shll $2,%eax
+ subl %eax,%edi
+ subl %eax,%esi
+
+ movl VAR_COUNTER,%eax
+ decl %eax
+ movl %eax,VAR_COUNTER
+ jnz L(outer)
+
+L(skip):
+ popl %ebx
+ popl %edi
+ popl %ebp
+ popl %esi
+ addl $8,%esp
+ ret
+
+L(done):
+ movl %edx,4(%edi) C store to wp[1]
+ popl %edi
+ popl %ebp
+ popl %esi
+ addl $8,%esp
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/p6/README b/rts/gmp/mpn/x86/p6/README
new file mode 100644
index 0000000000..7dbc905a0d
--- /dev/null
+++ b/rts/gmp/mpn/x86/p6/README
@@ -0,0 +1,95 @@
+
+ INTEL P6 MPN SUBROUTINES
+
+
+
+This directory contains code optimized for Intel P6 class CPUs, meaning
+PentiumPro, Pentium II and Pentium III. The mmx and p3mmx subdirectories
+have routines using MMX instructions.
+
+
+
+STATUS
+
+Times for the loops, with all code and data in L1 cache, are as follows.
+Some of these might be able to be improved.
+
+ cycles/limb
+
+ mpn_add_n/sub_n 3.7
+
+ mpn_copyi 0.75
+ mpn_copyd 2.4
+
+ mpn_divrem_1 39.0
+ mpn_mod_1 39.0
+ mpn_divexact_by3 8.5
+
+ mpn_mul_1 5.5
+ mpn_addmul/submul_1 6.35
+
+ mpn_l/rshift 2.5
+
+ mpn_mul_basecase 8.2 cycles/crossproduct (approx)
+ mpn_sqr_basecase 4.0 cycles/crossproduct (approx)
+ or 7.75 cycles/triangleproduct (approx)
+
+Pentium II and III have MMX and get the following improvements.
+
+ mpn_divrem_1 25.0 integer part, 17.5 fractional part
+ mpn_mod_1 24.0
+
+ mpn_l/rshift 1.75
+
+
+
+
+NOTES
+
+Write-allocate L1 data cache means prefetching of destinations is unnecessary.
+
+Mispredicted branches have a penalty of between 9 and 15 cycles, and even up
+to 26 cycles depending how far speculative execution has gone. The 9 cycle
+minimum penalty comes from the issue pipeline being 9 stages.
+
+A copy with rep movs seems to copy 16 bytes at a time, since speeds for 4,
+5, 6 or 7 limb operations are all the same. The 0.75 cycles/limb would be 3
+cycles per 16 byte block.
+
+
+
+
+CODING
+
+Instructions in general code have been shown grouped if they can execute
+together, which means up to three instructions with no successive
+dependencies, and with only the first being a multiple micro-op.
+
+P6 has out-of-order execution, so the groupings are really only showing
+dependent paths where some shuffling might allow some latencies to be
+hidden.
+
+
+
+
+REFERENCES
+
+"Intel Architecture Optimization Reference Manual", 1999, revision 001 dated
+02/99, order number 245127 (order number 730795-001 is in the document too).
+Available on-line:
+
+ http://download.intel.com/design/PentiumII/manuals/245127.htm
+
+"Intel Architecture Optimization Manual", 1997, order number 242816. This
+is an older document mostly about P5 and not as good as the above.
+Available on-line:
+
+ http://download.intel.com/design/PentiumII/manuals/242816.htm
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/rts/gmp/mpn/x86/p6/aorsmul_1.asm b/rts/gmp/mpn/x86/p6/aorsmul_1.asm
new file mode 100644
index 0000000000..feb364ec0b
--- /dev/null
+++ b/rts/gmp/mpn/x86/p6/aorsmul_1.asm
@@ -0,0 +1,300 @@
+dnl Intel P6 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
+dnl
+dnl P6: 6.35 cycles/limb (at 16 limbs/loop).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl P6 UNROLL_COUNT cycles/limb
+dnl 8 6.7
+dnl 16 6.35
+dnl 32 6.3
+dnl 64 6.3
+dnl Maximum possible with the current code is 64.
+
+deflit(UNROLL_COUNT, 16)
+
+
+ifdef(`OPERATION_addmul_1', `
+ define(M4_inst, addl)
+ define(M4_function_1, mpn_addmul_1)
+ define(M4_function_1c, mpn_addmul_1c)
+ define(M4_description, add it to)
+ define(M4_desc_retval, carry)
+',`ifdef(`OPERATION_submul_1', `
+ define(M4_inst, subl)
+ define(M4_function_1, mpn_submul_1)
+ define(M4_function_1c, mpn_submul_1c)
+ define(M4_description, subtract it from)
+ define(M4_desc_retval, borrow)
+',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
+')')')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
+
+
+C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mult);
+C mp_limb_t M4_function_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mult, mp_limb_t carry);
+C
+C Calculate src,size multiplied by mult and M4_description dst,size.
+C Return the M4_desc_retval limb from the top of the result.
+C
+C This code is pretty much the same as the K6 code. The unrolled loop is
+C the same, but there's just a few scheduling tweaks in the setups and the
+C simple loop.
+C
+C A number of variations have been tried for the unrolled loop, with one or
+C two carries, and with loads scheduled earlier, but nothing faster than 6
+C cycles/limb has been found.
+
+ifdef(`PIC',`
+deflit(UNROLL_THRESHOLD, 5)
+',`
+deflit(UNROLL_THRESHOLD, 5)
+')
+
+defframe(PARAM_CARRY, 20)
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(32)
+
+PROLOGUE(M4_function_1c)
+ pushl %ebx
+deflit(`FRAME',4)
+ movl PARAM_CARRY, %ebx
+ jmp LF(M4_function_1,start_nc)
+EPILOGUE()
+
+PROLOGUE(M4_function_1)
+ push %ebx
+deflit(`FRAME',4)
+ xorl %ebx, %ebx C initial carry
+
+L(start_nc):
+ movl PARAM_SIZE, %ecx
+ pushl %esi
+deflit(`FRAME',8)
+
+ movl PARAM_SRC, %esi
+ pushl %edi
+deflit(`FRAME',12)
+
+ movl PARAM_DST, %edi
+ pushl %ebp
+deflit(`FRAME',16)
+ cmpl $UNROLL_THRESHOLD, %ecx
+
+ movl PARAM_MULTIPLIER, %ebp
+ jae L(unroll)
+
+
+ C simple loop
+ C this is offset 0x22, so close enough to aligned
+L(simple):
+ C eax scratch
+ C ebx carry
+ C ecx counter
+ C edx scratch
+ C esi src
+ C edi dst
+ C ebp multiplier
+
+ movl (%esi), %eax
+ addl $4, %edi
+
+ mull %ebp
+
+ addl %ebx, %eax
+ adcl $0, %edx
+
+ M4_inst %eax, -4(%edi)
+ movl %edx, %ebx
+
+ adcl $0, %ebx
+ decl %ecx
+
+ leal 4(%esi), %esi
+ jnz L(simple)
+
+
+ popl %ebp
+ popl %edi
+
+ popl %esi
+ movl %ebx, %eax
+
+ popl %ebx
+ ret
+
+
+
+C------------------------------------------------------------------------------
+C VAR_JUMP holds the computed jump temporarily because there's not enough
+C registers when doing the mul for the initial two carry limbs.
+C
+C The add/adc for the initial carry in %ebx is necessary only for the
+C mpn_add/submul_1c entry points. Duplicating the startup code to
+C eliminiate this for the plain mpn_add/submul_1 doesn't seem like a good
+C idea.
+
+dnl overlapping with parameters already fetched
+define(VAR_COUNTER,`PARAM_SIZE')
+define(VAR_JUMP, `PARAM_DST')
+
+ C this is offset 0x43, so close enough to aligned
+L(unroll):
+ C eax
+ C ebx initial carry
+ C ecx size
+ C edx
+ C esi src
+ C edi dst
+ C ebp
+
+ movl %ecx, %edx
+ decl %ecx
+
+ subl $2, %edx
+ negl %ecx
+
+ shrl $UNROLL_LOG2, %edx
+ andl $UNROLL_MASK, %ecx
+
+ movl %edx, VAR_COUNTER
+ movl %ecx, %edx
+
+ C 15 code bytes per limb
+ifdef(`PIC',`
+ call L(pic_calc)
+L(here):
+',`
+ shll $4, %edx
+ negl %ecx
+
+ leal L(entry) (%edx,%ecx,1), %edx
+')
+ movl (%esi), %eax C src low limb
+
+ movl %edx, VAR_JUMP
+ leal ifelse(UNROLL_BYTES,256,128+) 4(%esi,%ecx,4), %esi
+
+ mull %ebp
+
+ addl %ebx, %eax C initial carry (from _1c)
+ adcl $0, %edx
+
+ movl %edx, %ebx C high carry
+ leal ifelse(UNROLL_BYTES,256,128) (%edi,%ecx,4), %edi
+
+ movl VAR_JUMP, %edx
+ testl $1, %ecx
+ movl %eax, %ecx C low carry
+
+ cmovnz( %ebx, %ecx) C high,low carry other way around
+ cmovnz( %eax, %ebx)
+
+ jmp *%edx
+
+
+ifdef(`PIC',`
+L(pic_calc):
+ shll $4, %edx
+ negl %ecx
+
+ C See README.family about old gas bugs
+ leal (%edx,%ecx,1), %edx
+ addl $L(entry)-L(here), %edx
+
+ addl (%esp), %edx
+
+ ret
+')
+
+
+C -----------------------------------------------------------
+ ALIGN(32)
+L(top):
+deflit(`FRAME',16)
+ C eax scratch
+ C ebx carry hi
+ C ecx carry lo
+ C edx scratch
+ C esi src
+ C edi dst
+ C ebp multiplier
+ C
+ C VAR_COUNTER loop counter
+ C
+ C 15 code bytes per limb
+
+ addl $UNROLL_BYTES, %edi
+
+L(entry):
+deflit(CHUNK_COUNT,2)
+forloop(`i', 0, UNROLL_COUNT/CHUNK_COUNT-1, `
+ deflit(`disp0', eval(i*4*CHUNK_COUNT ifelse(UNROLL_BYTES,256,-128)))
+ deflit(`disp1', eval(disp0 + 4))
+
+Zdisp( movl, disp0,(%esi), %eax)
+ mull %ebp
+Zdisp( M4_inst,%ecx, disp0,(%edi))
+ adcl %eax, %ebx
+ movl %edx, %ecx
+ adcl $0, %ecx
+
+ movl disp1(%esi), %eax
+ mull %ebp
+ M4_inst %ebx, disp1(%edi)
+ adcl %eax, %ecx
+ movl %edx, %ebx
+ adcl $0, %ebx
+')
+
+ decl VAR_COUNTER
+ leal UNROLL_BYTES(%esi), %esi
+
+ jns L(top)
+
+
+deflit(`disp0', eval(UNROLL_BYTES ifelse(UNROLL_BYTES,256,-128)))
+
+ M4_inst %ecx, disp0(%edi)
+ movl %ebx, %eax
+
+ popl %ebp
+ popl %edi
+
+ popl %esi
+ popl %ebx
+ adcl $0, %eax
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/p6/diveby3.asm b/rts/gmp/mpn/x86/p6/diveby3.asm
new file mode 100644
index 0000000000..a77703ea89
--- /dev/null
+++ b/rts/gmp/mpn/x86/p6/diveby3.asm
@@ -0,0 +1,37 @@
+dnl Intel P6 mpn_divexact_by3 -- mpn division by 3, expecting no remainder.
+dnl
+dnl P6: 8.5 cycles/limb
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+dnl The P5 code runs well on P6, in fact better than anything else found so
+dnl far. An imul is 4 cycles, meaning the two cmp/sbbl pairs on the
+dnl dependent path are taking 4.5 cycles.
+dnl
+dnl The destination cache line prefetching is unnecessary on P6, but
+dnl removing it is a 2 cycle slowdown (approx), so it must be inducing
+dnl something good in the out of order execution.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_divexact_by3c)
+include_mpn(`x86/pentium/diveby3.asm')
diff --git a/rts/gmp/mpn/x86/p6/gmp-mparam.h b/rts/gmp/mpn/x86/p6/gmp-mparam.h
new file mode 100644
index 0000000000..d7bfb6d60c
--- /dev/null
+++ b/rts/gmp/mpn/x86/p6/gmp-mparam.h
@@ -0,0 +1,96 @@
+/* Intel P6 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 5 /* cycles */
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME 39 /* cycles */
+#endif
+
+#ifndef COUNT_TRAILING_ZEROS_TIME
+#define COUNT_TRAILING_ZEROS_TIME 2 /* cycles */
+#endif
+
+
+/* Generated by tuneup.c, 2000-07-06. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 23
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 139
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 52
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 166
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 116
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 66
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 20
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 4
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 54
+#endif
+
+#ifndef FFT_MUL_TABLE
+#define FFT_MUL_TABLE { 592, 1440, 2688, 5632, 14336, 40960, 0 }
+#endif
+#ifndef FFT_MODF_MUL_THRESHOLD
+#define FFT_MODF_MUL_THRESHOLD 608
+#endif
+#ifndef FFT_MUL_THRESHOLD
+#define FFT_MUL_THRESHOLD 5888
+#endif
+
+#ifndef FFT_SQR_TABLE
+#define FFT_SQR_TABLE { 656, 1504, 2944, 6656, 18432, 57344, 0 }
+#endif
+#ifndef FFT_MODF_SQR_THRESHOLD
+#define FFT_MODF_SQR_THRESHOLD 672
+#endif
+#ifndef FFT_SQR_THRESHOLD
+#define FFT_SQR_THRESHOLD 5888
+#endif
diff --git a/rts/gmp/mpn/x86/p6/mmx/divrem_1.asm b/rts/gmp/mpn/x86/p6/mmx/divrem_1.asm
new file mode 100644
index 0000000000..f1b011b623
--- /dev/null
+++ b/rts/gmp/mpn/x86/p6/mmx/divrem_1.asm
@@ -0,0 +1,677 @@
+dnl Intel Pentium-II mpn_divrem_1 -- mpn by limb division.
+dnl
+dnl P6MMX: 25.0 cycles/limb integer part, 17.5 cycles/limb fraction part.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_divrem_1 (mp_ptr dst, mp_size_t xsize,
+C mp_srcptr src, mp_size_t size,
+C mp_limb_t divisor);
+C mp_limb_t mpn_divrem_1c (mp_ptr dst, mp_size_t xsize,
+C mp_srcptr src, mp_size_t size,
+C mp_limb_t divisor, mp_limb_t carry);
+C
+C This code is a lightly reworked version of mpn/x86/k7/mmx/divrem_1.asm,
+C see that file for some comments. It's likely what's here can be improved.
+
+
+dnl MUL_THRESHOLD is the value of xsize+size at which the multiply by
+dnl inverse method is used, rather than plain "divl"s. Minimum value 1.
+dnl
+dnl The different speeds of the integer and fraction parts means that using
+dnl xsize+size isn't quite right. The threshold wants to be a bit higher
+dnl for the integer part and a bit lower for the fraction part. (Or what's
+dnl really wanted is to speed up the integer part!)
+dnl
+dnl The threshold is set to make the integer part right. At 4 limbs the
+dnl div and mul are about the same there, but on the fractional part the
+dnl mul is much faster.
+
+deflit(MUL_THRESHOLD, 4)
+
+
+defframe(PARAM_CARRY, 24)
+defframe(PARAM_DIVISOR,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC, 12)
+defframe(PARAM_XSIZE, 8)
+defframe(PARAM_DST, 4)
+
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+
+defframe(VAR_NORM, -20)
+defframe(VAR_INVERSE, -24)
+defframe(VAR_SRC, -28)
+defframe(VAR_DST, -32)
+defframe(VAR_DST_STOP,-36)
+
+deflit(STACK_SPACE, 36)
+
+ .text
+ ALIGN(16)
+
+PROLOGUE(mpn_divrem_1c)
+deflit(`FRAME',0)
+ movl PARAM_CARRY, %edx
+
+ movl PARAM_SIZE, %ecx
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %ebx, SAVE_EBX
+ movl PARAM_XSIZE, %ebx
+
+ movl %edi, SAVE_EDI
+ movl PARAM_DST, %edi
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+
+ leal -4(%edi,%ebx,4), %edi
+ jmp LF(mpn_divrem_1,start_1c)
+
+EPILOGUE()
+
+
+ C offset 0x31, close enough to aligned
+PROLOGUE(mpn_divrem_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ movl $0, %edx C initial carry (if can't skip a div)
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ movl %ebx, SAVE_EBX
+ movl PARAM_XSIZE, %ebx
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+ orl %ecx, %ecx
+
+ movl %edi, SAVE_EDI
+ movl PARAM_DST, %edi
+
+ leal -4(%edi,%ebx,4), %edi C &dst[xsize-1]
+ jz L(no_skip_div)
+
+ movl -4(%esi,%ecx,4), %eax C src high limb
+ cmpl %ebp, %eax C one less div if high<divisor
+ jnb L(no_skip_div)
+
+ movl $0, (%edi,%ecx,4) C dst high limb
+ decl %ecx C size-1
+ movl %eax, %edx C src high limb as initial carry
+L(no_skip_div):
+
+
+L(start_1c):
+ C eax
+ C ebx xsize
+ C ecx size
+ C edx carry
+ C esi src
+ C edi &dst[xsize-1]
+ C ebp divisor
+
+ leal (%ebx,%ecx), %eax C size+xsize
+ cmpl $MUL_THRESHOLD, %eax
+ jae L(mul_by_inverse)
+
+ orl %ecx, %ecx
+ jz L(divide_no_integer)
+
+L(divide_integer):
+ C eax scratch (quotient)
+ C ebx xsize
+ C ecx counter
+ C edx scratch (remainder)
+ C esi src
+ C edi &dst[xsize-1]
+ C ebp divisor
+
+ movl -4(%esi,%ecx,4), %eax
+
+ divl %ebp
+
+ movl %eax, (%edi,%ecx,4)
+ decl %ecx
+ jnz L(divide_integer)
+
+
+L(divide_no_integer):
+ movl PARAM_DST, %edi
+ orl %ebx, %ebx
+ jnz L(divide_fraction)
+
+L(divide_done):
+ movl SAVE_ESI, %esi
+
+ movl SAVE_EDI, %edi
+
+ movl SAVE_EBX, %ebx
+ movl %edx, %eax
+
+ movl SAVE_EBP, %ebp
+ addl $STACK_SPACE, %esp
+
+ ret
+
+
+L(divide_fraction):
+ C eax scratch (quotient)
+ C ebx counter
+ C ecx
+ C edx scratch (remainder)
+ C esi
+ C edi dst
+ C ebp divisor
+
+ movl $0, %eax
+
+ divl %ebp
+
+ movl %eax, -4(%edi,%ebx,4)
+ decl %ebx
+ jnz L(divide_fraction)
+
+ jmp L(divide_done)
+
+
+
+C -----------------------------------------------------------------------------
+
+L(mul_by_inverse):
+ C eax
+ C ebx xsize
+ C ecx size
+ C edx carry
+ C esi src
+ C edi &dst[xsize-1]
+ C ebp divisor
+
+ leal 12(%edi), %ebx
+
+ movl %ebx, VAR_DST_STOP
+ leal 4(%edi,%ecx,4), %edi C &dst[xsize+size]
+
+ movl %edi, VAR_DST
+ movl %ecx, %ebx C size
+
+ bsrl %ebp, %ecx C 31-l
+ movl %edx, %edi C carry
+
+ leal 1(%ecx), %eax C 32-l
+ xorl $31, %ecx C l
+
+ movl %ecx, VAR_NORM
+ movl $-1, %edx
+
+ shll %cl, %ebp C d normalized
+ movd %eax, %mm7
+
+ movl $-1, %eax
+ subl %ebp, %edx C (b-d)-1 giving edx:eax = b*(b-d)-1
+
+ divl %ebp C floor (b*(b-d)-1) / d
+
+ movl %eax, VAR_INVERSE
+ orl %ebx, %ebx C size
+ leal -12(%esi,%ebx,4), %eax C &src[size-3]
+
+ movl %eax, VAR_SRC
+ jz L(start_zero)
+
+ movl 8(%eax), %esi C src high limb
+ cmpl $1, %ebx
+ jz L(start_one)
+
+L(start_two_or_more):
+ movl 4(%eax), %edx C src second highest limb
+
+ shldl( %cl, %esi, %edi) C n2 = carry,high << l
+
+ shldl( %cl, %edx, %esi) C n10 = high,second << l
+
+ cmpl $2, %ebx
+ je L(integer_two_left)
+ jmp L(integer_top)
+
+
+L(start_one):
+ shldl( %cl, %esi, %edi) C n2 = carry,high << l
+
+ shll %cl, %esi C n10 = high << l
+ jmp L(integer_one_left)
+
+
+L(start_zero):
+ shll %cl, %edi C n2 = carry << l
+ movl $0, %esi C n10 = 0
+
+ C we're here because xsize+size>=MUL_THRESHOLD, so with size==0 then
+ C must have xsize!=0
+ jmp L(fraction_some)
+
+
+
+C -----------------------------------------------------------------------------
+C
+C This loop runs at about 25 cycles, which is probably sub-optimal, and
+C certainly more than the dependent chain would suggest. A better loop, or
+C a better rough analysis of what's possible, would be welcomed.
+C
+C In the current implementation, the following successively dependent
+C micro-ops seem to exist.
+C
+C uops
+C n2+n1 1 (addl)
+C mul 5
+C q1+1 3 (addl/adcl)
+C mul 5
+C sub 3 (subl/sbbl)
+C addback 2 (cmov)
+C ---
+C 19
+C
+C Lack of registers hinders explicit scheduling and it might be that the
+C normal out of order execution isn't able to hide enough under the mul
+C latencies.
+C
+C Using sarl/negl to pick out n1 for the n2+n1 stage is a touch faster than
+C cmov (and takes one uop off the dependent chain). A sarl/andl/addl
+C combination was tried for the addback (despite the fact it would lengthen
+C the dependent chain) but found to be no faster.
+
+
+ ALIGN(16)
+L(integer_top):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx scratch (src, dst)
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp d
+ C
+ C mm0 scratch (src qword)
+ C mm7 rshift for normalization
+
+ movl %esi, %eax
+ movl %ebp, %ebx
+
+ sarl $31, %eax C -n1
+ movl VAR_SRC, %ecx
+
+ andl %eax, %ebx C -n1 & d
+ negl %eax C n1
+
+ addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow
+ addl %edi, %eax C n2+n1
+ movq (%ecx), %mm0 C next src limb and the one below it
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ subl $4, %ecx
+
+ movl %ecx, VAR_SRC
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ movl %ebp, %eax C d
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+ jz L(q1_ff)
+
+ mull %ebx C (q1+1)*d
+
+ movl VAR_DST, %ecx
+ psrlq %mm7, %mm0
+
+ C
+
+ C
+
+ C
+
+ subl %eax, %esi
+ movl VAR_DST_STOP, %eax
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+ movd %mm0, %esi
+
+ sbbl $0, %ebx C q
+ subl $4, %ecx
+
+ movl %ebx, (%ecx)
+ cmpl %eax, %ecx
+
+ movl %ecx, VAR_DST
+ jne L(integer_top)
+
+
+L(integer_loop_done):
+
+
+C -----------------------------------------------------------------------------
+C
+C Here, and in integer_one_left below, an sbbl $0 is used rather than a jz
+C q1_ff special case. This make the code a bit smaller and simpler, and
+C costs only 2 cycles (each).
+
+L(integer_two_left):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx scratch (src, dst)
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 src limb, shifted
+ C mm7 rshift
+
+
+ movl %esi, %eax
+ movl %ebp, %ebx
+
+ sarl $31, %eax C -n1
+ movl PARAM_SRC, %ecx
+
+ andl %eax, %ebx C -n1 & d
+ negl %eax C n1
+
+ addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow
+ addl %edi, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movd (%ecx), %mm0 C src low limb
+
+ movl VAR_DST_STOP, %ecx
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+ movl %ebp, %eax C d
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx
+
+ mull %ebx C (q1+1)*d
+
+ psllq $32, %mm0
+
+ psrlq %mm7, %mm0
+
+ C
+
+ C
+
+ subl %eax, %esi
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+ movd %mm0, %esi
+
+ sbbl $0, %ebx C q
+
+ movl %ebx, -4(%ecx)
+
+
+C -----------------------------------------------------------------------------
+L(integer_one_left):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx scratch (dst)
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 src limb, shifted
+ C mm7 rshift
+
+
+ movl %esi, %eax
+ movl %ebp, %ebx
+
+ sarl $31, %eax C -n1
+ movl VAR_DST_STOP, %ecx
+
+ andl %eax, %ebx C -n1 & d
+ negl %eax C n1
+
+ addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow
+ addl %edi, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ C
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+ movl %ebp, %eax C d
+
+ C
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx C q1 if q1+1 overflowed
+
+ mull %ebx
+
+ C
+
+ C
+
+ C
+
+ C
+
+ subl %eax, %esi
+ movl PARAM_XSIZE, %eax
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+
+ sbbl $0, %ebx C q
+
+ movl %ebx, -8(%ecx)
+ subl $8, %ecx
+
+
+
+ orl %eax, %eax C xsize
+ jnz L(fraction_some)
+
+ movl %edi, %eax
+L(fraction_done):
+ movl VAR_NORM, %ecx
+ movl SAVE_EBP, %ebp
+
+ movl SAVE_EDI, %edi
+
+ movl SAVE_ESI, %esi
+
+ movl SAVE_EBX, %ebx
+ addl $STACK_SPACE, %esp
+
+ shrl %cl, %eax
+ emms
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+C
+C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword
+C of q*d is simply -d and the remainder n-q*d = n10+d
+
+L(q1_ff):
+ C eax (divisor)
+ C ebx (q1+1 == 0)
+ C ecx
+ C edx
+ C esi n10
+ C edi n2
+ C ebp divisor
+
+ movl VAR_DST, %ecx
+ movl VAR_DST_STOP, %edx
+ subl $4, %ecx
+
+ movl %ecx, VAR_DST
+ psrlq %mm7, %mm0
+ leal (%ebp,%esi), %edi C n-q*d remainder -> next n2
+
+ movl $-1, (%ecx)
+ movd %mm0, %esi C next n10
+
+ cmpl %ecx, %edx
+ jne L(integer_top)
+
+ jmp L(integer_loop_done)
+
+
+
+C -----------------------------------------------------------------------------
+C
+C In the current implementation, the following successively dependent
+C micro-ops seem to exist.
+C
+C uops
+C mul 5
+C q1+1 1 (addl)
+C mul 5
+C sub 3 (negl/sbbl)
+C addback 2 (cmov)
+C ---
+C 16
+C
+C The loop in fact runs at about 17.5 cycles. Using a sarl/andl/addl for
+C the addback was found to be a touch slower.
+
+
+ ALIGN(16)
+L(fraction_some):
+ C eax
+ C ebx
+ C ecx
+ C edx
+ C esi
+ C edi carry
+ C ebp divisor
+
+ movl PARAM_DST, %esi
+ movl VAR_DST_STOP, %ecx
+ movl %edi, %eax
+
+ subl $8, %ecx
+
+
+ ALIGN(16)
+L(fraction_top):
+ C eax n2, then scratch
+ C ebx scratch (nadj, q1)
+ C ecx dst, decrementing
+ C edx scratch
+ C esi dst stop point
+ C edi n2
+ C ebp divisor
+
+ mull VAR_INVERSE C m*n2
+
+ movl %ebp, %eax C d
+ subl $4, %ecx C dst
+ leal 1(%edi), %ebx
+
+ C
+
+ C
+
+ C
+
+ addl %edx, %ebx C 1 + high(n2<<32 + m*n2) = q1+1
+
+ mull %ebx C (q1+1)*d
+
+ C
+
+ C
+
+ C
+
+ C
+
+ negl %eax C low of n - (q1+1)*d
+
+ sbbl %edx, %edi C high of n - (q1+1)*d, caring only about carry
+ leal (%ebp,%eax), %edx
+
+ cmovc( %edx, %eax) C n - q1*d if underflow from using q1+1
+
+ sbbl $0, %ebx C q
+ movl %eax, %edi C remainder->n2
+ cmpl %esi, %ecx
+
+ movl %ebx, (%ecx) C previous q
+ jne L(fraction_top)
+
+
+ jmp L(fraction_done)
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/p6/mmx/mod_1.asm b/rts/gmp/mpn/x86/p6/mmx/mod_1.asm
new file mode 100644
index 0000000000..e7d8d94d33
--- /dev/null
+++ b/rts/gmp/mpn/x86/p6/mmx/mod_1.asm
@@ -0,0 +1,444 @@
+dnl Intel Pentium-II mpn_mod_1 -- mpn by limb remainder.
+dnl
+dnl P6MMX: 24.0 cycles/limb.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t carry);
+C
+C The code here very similar to mpn_divrem_1, but with the quotient
+C discarded. What's here probably isn't optimal.
+C
+C See mpn/x86/p6/mmx/divrem_1.c and mpn/x86/k7/mmx/mod_1.asm for some
+C comments.
+
+
+dnl MUL_THRESHOLD is the size at which the multiply by inverse method is
+dnl used, rather than plain "divl"s. Minimum value 2.
+
+deflit(MUL_THRESHOLD, 4)
+
+
+defframe(PARAM_CARRY, 16)
+defframe(PARAM_DIVISOR,12)
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC, 4)
+
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+
+defframe(VAR_NORM, -20)
+defframe(VAR_INVERSE, -24)
+defframe(VAR_SRC_STOP,-28)
+
+deflit(STACK_SPACE, 28)
+
+ .text
+ ALIGN(16)
+
+PROLOGUE(mpn_mod_1c)
+deflit(`FRAME',0)
+ movl PARAM_CARRY, %edx
+ movl PARAM_SIZE, %ecx
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+ jmp LF(mpn_mod_1,start_1c)
+
+EPILOGUE()
+
+
+ ALIGN(16)
+PROLOGUE(mpn_mod_1)
+deflit(`FRAME',0)
+
+ movl $0, %edx C initial carry (if can't skip a div)
+ movl PARAM_SIZE, %ecx
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ orl %ecx, %ecx
+ jz L(divide_done)
+
+ movl -4(%esi,%ecx,4), %eax C src high limb
+
+ cmpl %ebp, %eax C carry flag if high<divisor
+
+ cmovc( %eax, %edx) C src high limb as initial carry
+ sbbl $0, %ecx C size-1 to skip one div
+ jz L(divide_done)
+
+
+ ALIGN(16)
+L(start_1c):
+ C eax
+ C ebx
+ C ecx size
+ C edx carry
+ C esi src
+ C edi
+ C ebp divisor
+
+ cmpl $MUL_THRESHOLD, %ecx
+ jae L(mul_by_inverse)
+
+
+ orl %ecx, %ecx
+ jz L(divide_done)
+
+
+L(divide_top):
+ C eax scratch (quotient)
+ C ebx
+ C ecx counter, limbs, decrementing
+ C edx scratch (remainder)
+ C esi src
+ C edi
+ C ebp
+
+ movl -4(%esi,%ecx,4), %eax
+
+ divl %ebp
+
+ decl %ecx
+ jnz L(divide_top)
+
+
+L(divide_done):
+ movl SAVE_ESI, %esi
+ movl %edx, %eax
+
+ movl SAVE_EBP, %ebp
+ addl $STACK_SPACE, %esp
+
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+
+L(mul_by_inverse):
+ C eax
+ C ebx
+ C ecx size
+ C edx carry
+ C esi src
+ C edi
+ C ebp divisor
+
+ movl %ebx, SAVE_EBX
+ leal -4(%esi), %ebx
+
+ movl %ebx, VAR_SRC_STOP
+ movl %ecx, %ebx C size
+
+ movl %edi, SAVE_EDI
+ movl %edx, %edi C carry
+
+ bsrl %ebp, %ecx C 31-l
+ movl $-1, %edx
+
+ leal 1(%ecx), %eax C 32-l
+ xorl $31, %ecx C l
+
+ movl %ecx, VAR_NORM
+ shll %cl, %ebp C d normalized
+
+ movd %eax, %mm7
+ movl $-1, %eax
+ subl %ebp, %edx C (b-d)-1 so edx:eax = b*(b-d)-1
+
+ divl %ebp C floor (b*(b-d)-1) / d
+
+ C
+
+ movl %eax, VAR_INVERSE
+ leal -12(%esi,%ebx,4), %eax C &src[size-3]
+
+ movl 8(%eax), %esi C src high limb
+ movl 4(%eax), %edx C src second highest limb
+
+ shldl( %cl, %esi, %edi) C n2 = carry,high << l
+
+ shldl( %cl, %edx, %esi) C n10 = high,second << l
+
+ movl %eax, %ecx C &src[size-3]
+
+
+ifelse(MUL_THRESHOLD,2,`
+ cmpl $2, %ebx
+ je L(inverse_two_left)
+')
+
+
+C The dependent chain here is the same as in mpn_divrem_1, but a few
+C instructions are saved by not needing to store the quotient limbs. This
+C gets it down to 24 c/l, which is still a bit away from a theoretical 19
+C c/l.
+
+ ALIGN(16)
+L(inverse_top):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx src pointer, decrementing
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 scratch (src qword)
+ C mm7 rshift for normalization
+
+
+ movl %esi, %eax
+ movl %ebp, %ebx
+
+ sarl $31, %eax C -n1
+
+ andl %eax, %ebx C -n1 & d
+ negl %eax C n1
+
+ addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow
+ addl %edi, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movq (%ecx), %mm0 C next src limb and the one below it
+ subl $4, %ecx
+
+ C
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+ movl %ebp, %eax C d
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+ jz L(q1_ff)
+
+ mull %ebx C (q1+1)*d
+
+ psrlq %mm7, %mm0
+ movl VAR_SRC_STOP, %ebx
+
+ C
+
+ C
+
+ C
+
+ subl %eax, %esi
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+ movd %mm0, %esi
+ cmpl %ebx, %ecx
+
+ jne L(inverse_top)
+
+
+L(inverse_loop_done):
+
+
+C -----------------------------------------------------------------------------
+
+L(inverse_two_left):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx &src[-1]
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 scratch (src dword)
+ C mm7 rshift
+
+ movl %esi, %eax
+ movl %ebp, %ebx
+
+ sarl $31, %eax C -n1
+
+ andl %eax, %ebx C -n1 & d
+ negl %eax C n1
+
+ addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow
+ addl %edi, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movd 4(%ecx), %mm0 C src low limb
+
+ C
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx
+ movl %ebp, %eax C d
+
+ mull %ebx C (q1+1)*d
+
+ psllq $32, %mm0
+
+ psrlq %mm7, %mm0
+
+ C
+
+ C
+
+ subl %eax, %esi
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+ movd %mm0, %esi
+
+
+C One limb left
+
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 src limb, shifted
+ C mm7 rshift
+
+ movl %esi, %eax
+ movl %ebp, %ebx
+
+ sarl $31, %eax C -n1
+
+ andl %eax, %ebx C -n1 & d
+ negl %eax C n1
+
+ addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow
+ addl %edi, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movl VAR_NORM, %ecx C for final denorm
+
+ C
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2<<32 + m*(n2+n1))
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx
+ movl %ebp, %eax C d
+
+ mull %ebx C (q1+1)*d
+
+ movl SAVE_EBX, %ebx
+
+ C
+
+ C
+
+ C
+
+ subl %eax, %esi
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ leal (%ebp,%esi), %edx
+ movl SAVE_EBP, %ebp
+
+ movl %esi, %eax C remainder
+ movl SAVE_ESI, %esi
+
+ cmovc( %edx, %eax) C n - q1*d if underflow from using q1+1
+ movl SAVE_EDI, %edi
+
+ shrl %cl, %eax C denorm remainder
+ addl $STACK_SPACE, %esp
+ emms
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+C
+C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword
+C of q*d is simply -d and the remainder n-q*d = n10+d
+
+L(q1_ff):
+ C eax (divisor)
+ C ebx (q1+1 == 0)
+ C ecx src pointer
+ C edx
+ C esi n10
+ C edi (n2)
+ C ebp divisor
+
+ leal (%ebp,%esi), %edi C n-q*d remainder -> next n2
+ movl VAR_SRC_STOP, %edx
+ psrlq %mm7, %mm0
+
+ movd %mm0, %esi C next n10
+ cmpl %ecx, %edx
+ jne L(inverse_top)
+
+ jmp L(inverse_loop_done)
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/p6/mmx/popham.asm b/rts/gmp/mpn/x86/p6/mmx/popham.asm
new file mode 100644
index 0000000000..50f9a11218
--- /dev/null
+++ b/rts/gmp/mpn/x86/p6/mmx/popham.asm
@@ -0,0 +1,31 @@
+dnl Intel Pentium-II mpn_popcount, mpn_hamdist -- population count and
+dnl hamming distance.
+dnl
+dnl P6MMX: popcount 11 cycles/limb (approx), hamdist 11.5 cycles/limb
+dnl (approx)
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+include_mpn(`x86/k6/mmx/popham.asm')
diff --git a/rts/gmp/mpn/x86/p6/p3mmx/popham.asm b/rts/gmp/mpn/x86/p6/p3mmx/popham.asm
new file mode 100644
index 0000000000..e63fbf334b
--- /dev/null
+++ b/rts/gmp/mpn/x86/p6/p3mmx/popham.asm
@@ -0,0 +1,30 @@
+dnl Intel Pentium-III mpn_popcount, mpn_hamdist -- population count and
+dnl hamming distance.
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+dnl Haven't actually measured it, but the K7 code with the psadbw should be
+dnl good on P-III.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+include_mpn(`x86/k7/mmx/popham.asm')
diff --git a/rts/gmp/mpn/x86/p6/sqr_basecase.asm b/rts/gmp/mpn/x86/p6/sqr_basecase.asm
new file mode 100644
index 0000000000..174c78406a
--- /dev/null
+++ b/rts/gmp/mpn/x86/p6/sqr_basecase.asm
@@ -0,0 +1,641 @@
+dnl Intel P6 mpn_sqr_basecase -- square an mpn number.
+dnl
+dnl P6: approx 4.0 cycles per cross product, or 7.75 cycles per triangular
+dnl product (measured on the speed difference between 20 and 40 limbs,
+dnl which is the Karatsuba recursing range).
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+dnl These are the same as in mpn/x86/k6/sqr_basecase.asm, see that file for
+dnl a description. The only difference here is that UNROLL_COUNT can go up
+dnl to 64 (not 63) making KARATSUBA_SQR_THRESHOLD_MAX 67.
+
+deflit(KARATSUBA_SQR_THRESHOLD_MAX, 67)
+
+ifdef(`KARATSUBA_SQR_THRESHOLD_OVERRIDE',
+`define(`KARATSUBA_SQR_THRESHOLD',KARATSUBA_SQR_THRESHOLD_OVERRIDE)')
+
+m4_config_gmp_mparam(`KARATSUBA_SQR_THRESHOLD')
+deflit(UNROLL_COUNT, eval(KARATSUBA_SQR_THRESHOLD-3))
+
+
+C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C The algorithm is basically the same as mpn/generic/sqr_basecase.c, but a
+C lot of function call overheads are avoided, especially when the given size
+C is small.
+C
+C The code size might look a bit excessive, but not all of it is executed so
+C it won't all get into the code cache. The 1x1, 2x2 and 3x3 special cases
+C clearly apply only to those sizes; mid sizes like 10x10 only need part of
+C the unrolled addmul; and big sizes like 40x40 that do use the full
+C unrolling will least be making good use of it, because 40x40 will take
+C something like 7000 cycles.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(32)
+PROLOGUE(mpn_sqr_basecase)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %edx
+
+ movl PARAM_SRC, %eax
+
+ cmpl $2, %edx
+ movl PARAM_DST, %ecx
+ je L(two_limbs)
+
+ movl (%eax), %eax
+ ja L(three_or_more)
+
+
+C -----------------------------------------------------------------------------
+C one limb only
+ C eax src limb
+ C ebx
+ C ecx dst
+ C edx
+
+ mull %eax
+
+ movl %eax, (%ecx)
+ movl %edx, 4(%ecx)
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+L(two_limbs):
+ C eax src
+ C ebx
+ C ecx dst
+ C edx
+
+defframe(SAVE_ESI, -4)
+defframe(SAVE_EBX, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+deflit(`STACK_SPACE',16)
+
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %esi, SAVE_ESI
+ movl %eax, %esi
+ movl (%eax), %eax
+
+ mull %eax C src[0]^2
+
+ movl %eax, (%ecx) C dst[0]
+ movl 4(%esi), %eax
+
+ movl %ebx, SAVE_EBX
+ movl %edx, %ebx C dst[1]
+
+ mull %eax C src[1]^2
+
+ movl %edi, SAVE_EDI
+ movl %eax, %edi C dst[2]
+ movl (%esi), %eax
+
+ movl %ebp, SAVE_EBP
+ movl %edx, %ebp C dst[3]
+
+ mull 4(%esi) C src[0]*src[1]
+
+ addl %eax, %ebx
+ movl SAVE_ESI, %esi
+
+ adcl %edx, %edi
+
+ adcl $0, %ebp
+ addl %ebx, %eax
+ movl SAVE_EBX, %ebx
+
+ adcl %edi, %edx
+ movl SAVE_EDI, %edi
+
+ adcl $0, %ebp
+
+ movl %eax, 4(%ecx)
+
+ movl %ebp, 12(%ecx)
+ movl SAVE_EBP, %ebp
+
+ movl %edx, 8(%ecx)
+ addl $FRAME, %esp
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+L(three_or_more):
+ C eax src low limb
+ C ebx
+ C ecx dst
+ C edx size
+deflit(`FRAME',0)
+
+ pushl %esi defframe_pushl(`SAVE_ESI')
+ cmpl $4, %edx
+
+ movl PARAM_SRC, %esi
+ jae L(four_or_more)
+
+
+C -----------------------------------------------------------------------------
+C three limbs
+
+ C eax src low limb
+ C ebx
+ C ecx dst
+ C edx
+ C esi src
+ C edi
+ C ebp
+
+ pushl %ebp defframe_pushl(`SAVE_EBP')
+ pushl %edi defframe_pushl(`SAVE_EDI')
+
+ mull %eax C src[0] ^ 2
+
+ movl %eax, (%ecx)
+ movl %edx, 4(%ecx)
+
+ movl 4(%esi), %eax
+ xorl %ebp, %ebp
+
+ mull %eax C src[1] ^ 2
+
+ movl %eax, 8(%ecx)
+ movl %edx, 12(%ecx)
+ movl 8(%esi), %eax
+
+ pushl %ebx defframe_pushl(`SAVE_EBX')
+
+ mull %eax C src[2] ^ 2
+
+ movl %eax, 16(%ecx)
+ movl %edx, 20(%ecx)
+
+ movl (%esi), %eax
+
+ mull 4(%esi) C src[0] * src[1]
+
+ movl %eax, %ebx
+ movl %edx, %edi
+
+ movl (%esi), %eax
+
+ mull 8(%esi) C src[0] * src[2]
+
+ addl %eax, %edi
+ movl %edx, %ebp
+
+ adcl $0, %ebp
+ movl 4(%esi), %eax
+
+ mull 8(%esi) C src[1] * src[2]
+
+ xorl %esi, %esi
+ addl %eax, %ebp
+
+ C eax
+ C ebx dst[1]
+ C ecx dst
+ C edx dst[4]
+ C esi zero, will be dst[5]
+ C edi dst[2]
+ C ebp dst[3]
+
+ adcl $0, %edx
+ addl %ebx, %ebx
+
+ adcl %edi, %edi
+
+ adcl %ebp, %ebp
+
+ adcl %edx, %edx
+ movl 4(%ecx), %eax
+
+ adcl $0, %esi
+ addl %ebx, %eax
+
+ movl %eax, 4(%ecx)
+ movl 8(%ecx), %eax
+
+ adcl %edi, %eax
+ movl 12(%ecx), %ebx
+
+ adcl %ebp, %ebx
+ movl 16(%ecx), %edi
+
+ movl %eax, 8(%ecx)
+ movl SAVE_EBP, %ebp
+
+ movl %ebx, 12(%ecx)
+ movl SAVE_EBX, %ebx
+
+ adcl %edx, %edi
+ movl 20(%ecx), %eax
+
+ movl %edi, 16(%ecx)
+ movl SAVE_EDI, %edi
+
+ adcl %esi, %eax C no carry out of this
+ movl SAVE_ESI, %esi
+
+ movl %eax, 20(%ecx)
+ addl $FRAME, %esp
+
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+defframe(VAR_COUNTER,-20)
+defframe(VAR_JMP, -24)
+deflit(`STACK_SPACE',24)
+
+L(four_or_more):
+ C eax src low limb
+ C ebx
+ C ecx
+ C edx size
+ C esi src
+ C edi
+ C ebp
+deflit(`FRAME',4) dnl %esi already pushed
+
+C First multiply src[0]*src[1..size-1] and store at dst[1..size].
+
+ subl $STACK_SPACE-FRAME, %esp
+deflit(`FRAME',STACK_SPACE)
+ movl $1, %ecx
+
+ movl %edi, SAVE_EDI
+ movl PARAM_DST, %edi
+
+ movl %ebx, SAVE_EBX
+ subl %edx, %ecx C -(size-1)
+
+ movl %ebp, SAVE_EBP
+ movl $0, %ebx C initial carry
+
+ leal (%esi,%edx,4), %esi C &src[size]
+ movl %eax, %ebp C multiplier
+
+ leal -4(%edi,%edx,4), %edi C &dst[size-1]
+
+
+C This loop runs at just over 6 c/l.
+
+L(mul_1):
+ C eax scratch
+ C ebx carry
+ C ecx counter, limbs, negative, -(size-1) to -1
+ C edx scratch
+ C esi &src[size]
+ C edi &dst[size-1]
+ C ebp multiplier
+
+ movl %ebp, %eax
+
+ mull (%esi,%ecx,4)
+
+ addl %ebx, %eax
+ movl $0, %ebx
+
+ adcl %edx, %ebx
+ movl %eax, 4(%edi,%ecx,4)
+
+ incl %ecx
+ jnz L(mul_1)
+
+
+ movl %ebx, 4(%edi)
+
+
+C Addmul src[n]*src[n+1..size-1] at dst[2*n-1...], for each n=1..size-2.
+C
+C The last two addmuls, which are the bottom right corner of the product
+C triangle, are left to the end. These are src[size-3]*src[size-2,size-1]
+C and src[size-2]*src[size-1]. If size is 4 then it's only these corner
+C cases that need to be done.
+C
+C The unrolled code is the same as mpn_addmul_1(), see that routine for some
+C comments.
+C
+C VAR_COUNTER is the outer loop, running from -(size-4) to -1, inclusive.
+C
+C VAR_JMP is the computed jump into the unrolled code, stepped by one code
+C chunk each outer loop.
+
+dnl This is also hard-coded in the address calculation below.
+deflit(CODE_BYTES_PER_LIMB, 15)
+
+dnl With &src[size] and &dst[size-1] pointers, the displacements in the
+dnl unrolled code fit in a byte for UNROLL_COUNT values up to 32, but above
+dnl that an offset must be added to them.
+deflit(OFFSET,
+ifelse(eval(UNROLL_COUNT>32),1,
+eval((UNROLL_COUNT-32)*4),
+0))
+
+ C eax
+ C ebx carry
+ C ecx
+ C edx
+ C esi &src[size]
+ C edi &dst[size-1]
+ C ebp
+
+ movl PARAM_SIZE, %ecx
+
+ subl $4, %ecx
+ jz L(corner)
+
+ movl %ecx, %edx
+ negl %ecx
+
+ shll $4, %ecx
+ifelse(OFFSET,0,,`subl $OFFSET, %esi')
+
+ifdef(`PIC',`
+ call L(pic_calc)
+L(here):
+',`
+ leal L(unroll_inner_end)-eval(2*CODE_BYTES_PER_LIMB)(%ecx,%edx), %ecx
+')
+ negl %edx
+
+ifelse(OFFSET,0,,`subl $OFFSET, %edi')
+
+ C The calculated jump mustn't be before the start of the available
+ C code. This is the limit that UNROLL_COUNT puts on the src operand
+ C size, but checked here using the jump address directly.
+
+ ASSERT(ae,
+ `movl_text_address( L(unroll_inner_start), %eax)
+ cmpl %eax, %ecx')
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(unroll_outer_top):
+ C eax
+ C ebx high limb to store
+ C ecx VAR_JMP
+ C edx VAR_COUNTER, limbs, negative
+ C esi &src[size], constant
+ C edi dst ptr, second highest limb of last addmul
+ C ebp
+
+ movl -12+OFFSET(%esi,%edx,4), %ebp C multiplier
+ movl %edx, VAR_COUNTER
+
+ movl -8+OFFSET(%esi,%edx,4), %eax C first limb of multiplicand
+
+ mull %ebp
+
+define(cmovX,`ifelse(eval(UNROLL_COUNT%2),1,`cmovz($@)',`cmovnz($@)')')
+
+ testb $1, %cl
+
+ movl %edx, %ebx C high carry
+ leal 4(%edi), %edi
+
+ movl %ecx, %edx C jump
+
+ movl %eax, %ecx C low carry
+ leal CODE_BYTES_PER_LIMB(%edx), %edx
+
+ cmovX( %ebx, %ecx) C high carry reverse
+ cmovX( %eax, %ebx) C low carry reverse
+ movl %edx, VAR_JMP
+ jmp *%edx
+
+
+ C Must be on an even address here so the low bit of the jump address
+ C will indicate which way around ecx/ebx should start.
+
+ ALIGN(2)
+
+L(unroll_inner_start):
+ C eax scratch
+ C ebx carry high
+ C ecx carry low
+ C edx scratch
+ C esi src pointer
+ C edi dst pointer
+ C ebp multiplier
+ C
+ C 15 code bytes each limb
+ C ecx/ebx reversed on each chunk
+
+forloop(`i', UNROLL_COUNT, 1, `
+ deflit(`disp_src', eval(-i*4 + OFFSET))
+ deflit(`disp_dst', eval(disp_src))
+
+ m4_assert(`disp_src>=-128 && disp_src<128')
+ m4_assert(`disp_dst>=-128 && disp_dst<128')
+
+ifelse(eval(i%2),0,`
+Zdisp( movl, disp_src,(%esi), %eax)
+ mull %ebp
+Zdisp( addl, %ebx, disp_dst,(%edi))
+ adcl %eax, %ecx
+ movl %edx, %ebx
+ adcl $0, %ebx
+',`
+ dnl this one comes out last
+Zdisp( movl, disp_src,(%esi), %eax)
+ mull %ebp
+Zdisp( addl, %ecx, disp_dst,(%edi))
+ adcl %eax, %ebx
+ movl %edx, %ecx
+ adcl $0, %ecx
+')
+')
+L(unroll_inner_end):
+
+ addl %ebx, m4_empty_if_zero(OFFSET)(%edi)
+
+ movl VAR_COUNTER, %edx
+ adcl $0, %ecx
+
+ movl %ecx, m4_empty_if_zero(OFFSET+4)(%edi)
+ movl VAR_JMP, %ecx
+
+ incl %edx
+ jnz L(unroll_outer_top)
+
+
+ifelse(OFFSET,0,,`
+ addl $OFFSET, %esi
+ addl $OFFSET, %edi
+')
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(16)
+L(corner):
+ C eax
+ C ebx
+ C ecx
+ C edx
+ C esi &src[size]
+ C edi &dst[2*size-5]
+ C ebp
+
+ movl -12(%esi), %eax
+
+ mull -8(%esi)
+
+ addl %eax, (%edi)
+ movl -12(%esi), %eax
+ movl $0, %ebx
+
+ adcl %edx, %ebx
+
+ mull -4(%esi)
+
+ addl %eax, %ebx
+ movl -8(%esi), %eax
+
+ adcl $0, %edx
+
+ addl %ebx, 4(%edi)
+ movl $0, %ebx
+
+ adcl %edx, %ebx
+
+ mull -4(%esi)
+
+ movl PARAM_SIZE, %ecx
+ addl %ebx, %eax
+
+ adcl $0, %edx
+
+ movl %eax, 8(%edi)
+
+ movl %edx, 12(%edi)
+ movl PARAM_DST, %edi
+
+
+C Left shift of dst[1..2*size-2], the bit shifted out becomes dst[2*size-1].
+
+ subl $1, %ecx C size-1
+ xorl %eax, %eax C ready for final adcl, and clear carry
+
+ movl %ecx, %edx
+ movl PARAM_SRC, %esi
+
+
+L(lshift):
+ C eax
+ C ebx
+ C ecx counter, size-1 to 1
+ C edx size-1 (for later use)
+ C esi src (for later use)
+ C edi dst, incrementing
+ C ebp
+
+ rcll 4(%edi)
+ rcll 8(%edi)
+
+ leal 8(%edi), %edi
+ decl %ecx
+ jnz L(lshift)
+
+
+ adcl %eax, %eax
+
+ movl %eax, 4(%edi) C dst most significant limb
+ movl (%esi), %eax C src[0]
+
+ leal 4(%esi,%edx,4), %esi C &src[size]
+ subl %edx, %ecx C -(size-1)
+
+
+C Now add in the squares on the diagonal, src[0]^2, src[1]^2, ...,
+C src[size-1]^2. dst[0] hasn't yet been set at all yet, and just gets the
+C low limb of src[0]^2.
+
+
+ mull %eax
+
+ movl %eax, (%edi,%ecx,8) C dst[0]
+
+
+L(diag):
+ C eax scratch
+ C ebx scratch
+ C ecx counter, negative
+ C edx carry
+ C esi &src[size]
+ C edi dst[2*size-2]
+ C ebp
+
+ movl (%esi,%ecx,4), %eax
+ movl %edx, %ebx
+
+ mull %eax
+
+ addl %ebx, 4(%edi,%ecx,8)
+ adcl %eax, 8(%edi,%ecx,8)
+ adcl $0, %edx
+
+ incl %ecx
+ jnz L(diag)
+
+
+ movl SAVE_ESI, %esi
+ movl SAVE_EBX, %ebx
+
+ addl %edx, 4(%edi) C dst most significant limb
+
+ movl SAVE_EDI, %edi
+ movl SAVE_EBP, %ebp
+ addl $FRAME, %esp
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+ifdef(`PIC',`
+L(pic_calc):
+ addl (%esp), %ecx
+ addl $L(unroll_inner_end)-L(here)-eval(2*CODE_BYTES_PER_LIMB), %ecx
+ addl %edx, %ecx
+ ret
+')
+
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/pentium/README b/rts/gmp/mpn/x86/pentium/README
new file mode 100644
index 0000000000..3b9ec8ac6f
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/README
@@ -0,0 +1,77 @@
+
+ INTEL PENTIUM P5 MPN SUBROUTINES
+
+
+This directory contains mpn functions optimized for Intel Pentium (P5,P54)
+processors. The mmx subdirectory has code for Pentium with MMX (P55).
+
+
+STATUS
+
+ cycles/limb
+
+ mpn_add_n/sub_n 2.375
+
+ mpn_copyi/copyd 1.0
+
+ mpn_divrem_1 44.0
+ mpn_mod_1 44.0
+ mpn_divexact_by3 15.0
+
+ mpn_l/rshift 5.375 normal (6.0 on P54)
+ 1.875 special shift by 1 bit
+
+ mpn_mul_1 13.0
+ mpn_add/submul_1 14.0
+
+ mpn_mul_basecase 14.2 cycles/crossproduct (approx)
+
+ mpn_sqr_basecase 8 cycles/crossproduct (approx)
+ or 15.5 cycles/triangleproduct (approx)
+
+Pentium MMX gets the following improvements
+
+ mpn_l/rshift 1.75
+
+
+1. mpn_lshift and mpn_rshift run at about 6 cycles/limb on P5 and P54, but the
+documentation indicates that they should take only 43/8 = 5.375 cycles/limb,
+or 5 cycles/limb asymptotically. The P55 runs them at the expected speed.
+
+2. mpn_add_n and mpn_sub_n run at asymptotically 2 cycles/limb. Due to loop
+overhead and other delays (cache refill?), they run at or near 2.5 cycles/limb.
+
+3. mpn_mul_1, mpn_addmul_1, mpn_submul_1 all run 1 cycle faster than they
+should. Intel documentation says a mul instruction is 10 cycles, but it
+measures 9 and the routines using it run with it as 9.
+
+
+
+RELEVANT OPTIMIZATION ISSUES
+
+1. Pentium doesn't allocate cache lines on writes, unlike most other modern
+processors. Since the functions in the mpn class do array writes, we have to
+handle allocating the destination cache lines by reading a word from it in the
+loops, to achieve the best performance.
+
+2. Pairing of memory operations requires that the two issued operations refer
+to different cache banks. The simplest way to insure this is to read/write
+two words from the same object. If we make operations on different objects,
+they might or might not be to the same cache bank.
+
+
+
+REFERENCES
+
+"Intel Architecture Optimization Manual", 1997, order number 242816. This
+is mostly about P5, the parts about P6 aren't relevant. Available on-line:
+
+ http://download.intel.com/design/PentiumII/manuals/242816.htm
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/rts/gmp/mpn/x86/pentium/aors_n.asm b/rts/gmp/mpn/x86/pentium/aors_n.asm
new file mode 100644
index 0000000000..a61082a456
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/aors_n.asm
@@ -0,0 +1,196 @@
+dnl Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
+dnl
+dnl P5: 2.375 cycles/limb
+
+
+dnl Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software
+dnl Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+ifdef(`OPERATION_add_n',`
+ define(M4_inst, adcl)
+ define(M4_function_n, mpn_add_n)
+ define(M4_function_nc, mpn_add_nc)
+
+',`ifdef(`OPERATION_sub_n',`
+ define(M4_inst, sbbl)
+ define(M4_function_n, mpn_sub_n)
+ define(M4_function_nc, mpn_sub_nc)
+
+',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+
+C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size);
+C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size, mp_limb_t carry);
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC2, 12)
+defframe(PARAM_SRC1, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(8)
+PROLOGUE(M4_function_nc)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+deflit(`FRAME',16)
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC1,%esi
+ movl PARAM_SRC2,%ebp
+ movl PARAM_SIZE,%ecx
+
+ movl (%ebp),%ebx
+
+ decl %ecx
+ movl %ecx,%edx
+ shrl $3,%ecx
+ andl $7,%edx
+ testl %ecx,%ecx C zero carry flag
+ jz L(endgo)
+
+ pushl %edx
+FRAME_pushl()
+ movl PARAM_CARRY,%eax
+ shrl $1,%eax C shift bit 0 into carry
+ jmp LF(M4_function_n,oop)
+
+L(endgo):
+deflit(`FRAME',16)
+ movl PARAM_CARRY,%eax
+ shrl $1,%eax C shift bit 0 into carry
+ jmp LF(M4_function_n,end)
+
+EPILOGUE()
+
+
+ ALIGN(8)
+PROLOGUE(M4_function_n)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+deflit(`FRAME',16)
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC1,%esi
+ movl PARAM_SRC2,%ebp
+ movl PARAM_SIZE,%ecx
+
+ movl (%ebp),%ebx
+
+ decl %ecx
+ movl %ecx,%edx
+ shrl $3,%ecx
+ andl $7,%edx
+ testl %ecx,%ecx C zero carry flag
+ jz L(end)
+ pushl %edx
+FRAME_pushl()
+
+ ALIGN(8)
+L(oop): movl 28(%edi),%eax C fetch destination cache line
+ leal 32(%edi),%edi
+
+L(1): movl (%esi),%eax
+ movl 4(%esi),%edx
+ M4_inst %ebx,%eax
+ movl 4(%ebp),%ebx
+ M4_inst %ebx,%edx
+ movl 8(%ebp),%ebx
+ movl %eax,-32(%edi)
+ movl %edx,-28(%edi)
+
+L(2): movl 8(%esi),%eax
+ movl 12(%esi),%edx
+ M4_inst %ebx,%eax
+ movl 12(%ebp),%ebx
+ M4_inst %ebx,%edx
+ movl 16(%ebp),%ebx
+ movl %eax,-24(%edi)
+ movl %edx,-20(%edi)
+
+L(3): movl 16(%esi),%eax
+ movl 20(%esi),%edx
+ M4_inst %ebx,%eax
+ movl 20(%ebp),%ebx
+ M4_inst %ebx,%edx
+ movl 24(%ebp),%ebx
+ movl %eax,-16(%edi)
+ movl %edx,-12(%edi)
+
+L(4): movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ M4_inst %ebx,%eax
+ movl 28(%ebp),%ebx
+ M4_inst %ebx,%edx
+ movl 32(%ebp),%ebx
+ movl %eax,-8(%edi)
+ movl %edx,-4(%edi)
+
+ leal 32(%esi),%esi
+ leal 32(%ebp),%ebp
+ decl %ecx
+ jnz L(oop)
+
+ popl %edx
+FRAME_popl()
+L(end):
+ decl %edx C test %edx w/o clobbering carry
+ js L(end2)
+ incl %edx
+L(oop2):
+ leal 4(%edi),%edi
+ movl (%esi),%eax
+ M4_inst %ebx,%eax
+ movl 4(%ebp),%ebx
+ movl %eax,-4(%edi)
+ leal 4(%esi),%esi
+ leal 4(%ebp),%ebp
+ decl %edx
+ jnz L(oop2)
+L(end2):
+ movl (%esi),%eax
+ M4_inst %ebx,%eax
+ movl %eax,(%edi)
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/pentium/aorsmul_1.asm b/rts/gmp/mpn/x86/pentium/aorsmul_1.asm
new file mode 100644
index 0000000000..147b55610f
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/aorsmul_1.asm
@@ -0,0 +1,99 @@
+dnl Intel Pentium mpn_addmul_1 -- mpn by limb multiplication.
+dnl
+dnl P5: 14.0 cycles/limb
+
+
+dnl Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation,
+dnl Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA. */
+
+
+include(`../config.m4')
+
+
+ifdef(`OPERATION_addmul_1', `
+ define(M4_inst, addl)
+ define(M4_function_1, mpn_addmul_1)
+
+',`ifdef(`OPERATION_submul_1', `
+ define(M4_inst, subl)
+ define(M4_function_1, mpn_submul_1)
+
+',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
+')')')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+
+C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mult);
+
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(8)
+
+PROLOGUE(M4_function_1)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+deflit(`FRAME',16)
+
+ movl PARAM_DST, %edi
+ movl PARAM_SRC, %esi
+ movl PARAM_SIZE, %ecx
+ movl PARAM_MULTIPLIER, %ebp
+
+ leal (%edi,%ecx,4), %edi
+ leal (%esi,%ecx,4), %esi
+ negl %ecx
+ xorl %ebx, %ebx
+ ALIGN(8)
+
+L(oop): adcl $0, %ebx
+ movl (%esi,%ecx,4), %eax
+
+ mull %ebp
+
+ addl %ebx, %eax
+ movl (%edi,%ecx,4), %ebx
+
+ adcl $0, %edx
+ M4_inst %eax, %ebx
+
+ movl %ebx, (%edi,%ecx,4)
+ incl %ecx
+
+ movl %edx, %ebx
+ jnz L(oop)
+
+ adcl $0, %ebx
+ movl %ebx, %eax
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/pentium/diveby3.asm b/rts/gmp/mpn/x86/pentium/diveby3.asm
new file mode 100644
index 0000000000..dbac81642f
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/diveby3.asm
@@ -0,0 +1,183 @@
+dnl Intel P5 mpn_divexact_by3 -- mpn division by 3, expecting no remainder.
+dnl
+dnl P5: 15.0 cycles/limb
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t carry);
+
+defframe(PARAM_CARRY,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl multiplicative inverse of 3, modulo 2^32
+deflit(INVERSE_3, 0xAAAAAAAB)
+
+dnl ceil(b/3), ceil(b*2/3) and floor(b*2/3) where b=2^32
+deflit(ONE_THIRD_CEIL, 0x55555556)
+deflit(TWO_THIRDS_CEIL, 0xAAAAAAAB)
+deflit(TWO_THIRDS_FLOOR, 0xAAAAAAAA)
+
+ .text
+ ALIGN(8)
+
+PROLOGUE(mpn_divexact_by3c)
+deflit(`FRAME',0)
+
+ movl PARAM_SRC, %ecx
+ movl PARAM_SIZE, %edx
+
+ decl %edx
+ jnz L(two_or_more)
+
+ movl (%ecx), %edx
+ movl PARAM_CARRY, %eax C risk of cache bank clash here
+
+ movl PARAM_DST, %ecx
+ subl %eax, %edx
+
+ sbbl %eax, %eax C 0 or -1
+
+ imull $INVERSE_3, %edx, %edx
+
+ negl %eax C 0 or 1
+ cmpl $ONE_THIRD_CEIL, %edx
+
+ sbbl $-1, %eax C +1 if edx>=ceil(b/3)
+ cmpl $TWO_THIRDS_CEIL, %edx
+
+ sbbl $-1, %eax C +1 if edx>=ceil(b*2/3)
+ movl %edx, (%ecx)
+
+ ret
+
+
+L(two_or_more):
+ C eax
+ C ebx
+ C ecx src
+ C edx size-1
+ C esi
+ C edi
+ C ebp
+
+ pushl %ebx FRAME_pushl()
+ pushl %esi FRAME_pushl()
+
+ pushl %edi FRAME_pushl()
+ pushl %ebp FRAME_pushl()
+
+ movl PARAM_DST, %edi
+ movl PARAM_CARRY, %esi
+
+ movl (%ecx), %eax C src low limb
+ xorl %ebx, %ebx
+
+ sub %esi, %eax
+ movl $TWO_THIRDS_FLOOR, %esi
+
+ leal (%ecx,%edx,4), %ecx C &src[size-1]
+ leal (%edi,%edx,4), %edi C &dst[size-1]
+
+ adcl $0, %ebx C carry, 0 or 1
+ negl %edx C -(size-1)
+
+
+C The loop needs a source limb ready at the top, which leads to one limb
+C handled separately at the end, and the special case above for size==1.
+C There doesn't seem to be any scheduling that would keep the speed but move
+C the source load and carry subtract up to the top.
+C
+C The destination cache line prefetching adds 1 cycle to the loop but is
+C considered worthwhile. The slowdown is a factor of 1.07, but will prevent
+C repeated write-throughs if the destination isn't in L1. A version using
+C an outer loop to prefetch only every 8 limbs (a cache line) proved to be
+C no faster, due to unavoidable branch mispreditions in the inner loop.
+C
+C setc is 2 cycles on P54, so an adcl is used instead. If the movl $0,%ebx
+C could be avoided then the src limb fetch could pair up and save a cycle.
+C This would probably mean going to a two limb loop with the carry limb
+C alternately positive or negative, since an sbbl %ebx,%ebx will leave a
+C value which is in the opposite sense to the preceding sbbl/adcl %ebx,%eax.
+C
+C A register is used for TWO_THIRDS_FLOOR because a cmp can't be done as
+C "cmpl %edx, $n" with the immediate as the second operand.
+C
+C The "4" source displacement is in the loop rather than the setup because
+C this gets L(top) aligned to 8 bytes at no cost.
+
+ ALIGN(8)
+L(top):
+ C eax source limb, carry subtracted
+ C ebx carry (0 or 1)
+ C ecx &src[size-1]
+ C edx counter, limbs, negative
+ C esi TWO_THIRDS_FLOOR
+ C edi &dst[size-1]
+ C ebp scratch (result limb)
+
+ imull $INVERSE_3, %eax, %ebp
+
+ cmpl $ONE_THIRD_CEIL, %ebp
+ movl (%edi,%edx,4), %eax C dst cache line prefetch
+
+ sbbl $-1, %ebx C +1 if ebp>=ceil(b/3)
+ cmpl %ebp, %esi
+
+ movl 4(%ecx,%edx,4), %eax C next src limb
+
+ sbbl %ebx, %eax C and further -1 if ebp>=ceil(b*2/3)
+ movl $0, %ebx
+
+ adcl $0, %ebx C new carry
+ movl %ebp, (%edi,%edx,4)
+
+ incl %edx
+ jnz L(top)
+
+
+
+ imull $INVERSE_3, %eax, %edx
+
+ cmpl $ONE_THIRD_CEIL, %edx
+ movl %edx, (%edi)
+
+ sbbl $-1, %ebx C +1 if edx>=ceil(b/3)
+ cmpl $TWO_THIRDS_CEIL, %edx
+
+ sbbl $-1, %ebx C +1 if edx>=ceil(b*2/3)
+ popl %ebp
+
+ movl %ebx, %eax
+ popl %edi
+
+ popl %esi
+ popl %ebx
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/pentium/gmp-mparam.h b/rts/gmp/mpn/x86/pentium/gmp-mparam.h
new file mode 100644
index 0000000000..d3ed3d73ce
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/gmp-mparam.h
@@ -0,0 +1,97 @@
+/* Intel P54 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 9 /* cycles */
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME 41 /* cycles */
+#endif
+
+/* bsf takes 18-42 cycles, put an average for uniform random numbers */
+#ifndef COUNT_TRAILING_ZEROS_TIME
+#define COUNT_TRAILING_ZEROS_TIME 20 /* cycles */
+#endif
+
+
+/* Generated by tuneup.c, 2000-07-06. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 14
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 179
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 22
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 153
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 46
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 110
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 13
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 4
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 25
+#endif
+
+#ifndef FFT_MUL_TABLE
+#define FFT_MUL_TABLE { 496, 928, 1920, 4608, 14336, 40960, 0 }
+#endif
+#ifndef FFT_MODF_MUL_THRESHOLD
+#define FFT_MODF_MUL_THRESHOLD 512
+#endif
+#ifndef FFT_MUL_THRESHOLD
+#define FFT_MUL_THRESHOLD 3840
+#endif
+
+#ifndef FFT_SQR_TABLE
+#define FFT_SQR_TABLE { 496, 1184, 1920, 5632, 14336, 40960, 0 }
+#endif
+#ifndef FFT_MODF_SQR_THRESHOLD
+#define FFT_MODF_SQR_THRESHOLD 512
+#endif
+#ifndef FFT_SQR_THRESHOLD
+#define FFT_SQR_THRESHOLD 3840
+#endif
diff --git a/rts/gmp/mpn/x86/pentium/lshift.asm b/rts/gmp/mpn/x86/pentium/lshift.asm
new file mode 100644
index 0000000000..e1e35d4c57
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/lshift.asm
@@ -0,0 +1,236 @@
+dnl Intel Pentium mpn_lshift -- mpn left shift.
+dnl
+dnl cycles/limb
+dnl P5,P54: 6.0
+dnl P55: 5.375
+
+
+dnl Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software
+dnl Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C The main shift-by-N loop should run at 5.375 c/l and that's what P55 does,
+C but P5 and P54 run only at 6.0 c/l, which is 4 cycles lost somewhere.
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(8)
+PROLOGUE(mpn_lshift)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+deflit(`FRAME',16)
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC,%esi
+ movl PARAM_SIZE,%ebp
+ movl PARAM_SHIFT,%ecx
+
+C We can use faster code for shift-by-1 under certain conditions.
+ cmp $1,%ecx
+ jne L(normal)
+ leal 4(%esi),%eax
+ cmpl %edi,%eax
+ jnc L(special) C jump if s_ptr + 1 >= res_ptr
+ leal (%esi,%ebp,4),%eax
+ cmpl %eax,%edi
+ jnc L(special) C jump if res_ptr >= s_ptr + size
+
+L(normal):
+ leal -4(%edi,%ebp,4),%edi
+ leal -4(%esi,%ebp,4),%esi
+
+ movl (%esi),%edx
+ subl $4,%esi
+ xorl %eax,%eax
+ shldl( %cl, %edx, %eax) C compute carry limb
+ pushl %eax C push carry limb onto stack
+
+ decl %ebp
+ pushl %ebp
+ shrl $3,%ebp
+ jz L(end)
+
+ movl (%edi),%eax C fetch destination cache line
+
+ ALIGN(4)
+L(oop): movl -28(%edi),%eax C fetch destination cache line
+ movl %edx,%ebx
+
+ movl (%esi),%eax
+ movl -4(%esi),%edx
+ shldl( %cl, %eax, %ebx)
+ shldl( %cl, %edx, %eax)
+ movl %ebx,(%edi)
+ movl %eax,-4(%edi)
+
+ movl -8(%esi),%ebx
+ movl -12(%esi),%eax
+ shldl( %cl, %ebx, %edx)
+ shldl( %cl, %eax, %ebx)
+ movl %edx,-8(%edi)
+ movl %ebx,-12(%edi)
+
+ movl -16(%esi),%edx
+ movl -20(%esi),%ebx
+ shldl( %cl, %edx, %eax)
+ shldl( %cl, %ebx, %edx)
+ movl %eax,-16(%edi)
+ movl %edx,-20(%edi)
+
+ movl -24(%esi),%eax
+ movl -28(%esi),%edx
+ shldl( %cl, %eax, %ebx)
+ shldl( %cl, %edx, %eax)
+ movl %ebx,-24(%edi)
+ movl %eax,-28(%edi)
+
+ subl $32,%esi
+ subl $32,%edi
+ decl %ebp
+ jnz L(oop)
+
+L(end): popl %ebp
+ andl $7,%ebp
+ jz L(end2)
+L(oop2):
+ movl (%esi),%eax
+ shldl( %cl,%eax,%edx)
+ movl %edx,(%edi)
+ movl %eax,%edx
+ subl $4,%esi
+ subl $4,%edi
+ decl %ebp
+ jnz L(oop2)
+
+L(end2):
+ shll %cl,%edx C compute least significant limb
+ movl %edx,(%edi) C store it
+
+ popl %eax C pop carry limb
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+
+C We loop from least significant end of the arrays, which is only
+C permissable if the source and destination don't overlap, since the
+C function is documented to work for overlapping source and destination.
+
+L(special):
+ movl (%esi),%edx
+ addl $4,%esi
+
+ decl %ebp
+ pushl %ebp
+ shrl $3,%ebp
+
+ addl %edx,%edx
+ incl %ebp
+ decl %ebp
+ jz L(Lend)
+
+ movl (%edi),%eax C fetch destination cache line
+
+ ALIGN(4)
+L(Loop):
+ movl 28(%edi),%eax C fetch destination cache line
+ movl %edx,%ebx
+
+ movl (%esi),%eax
+ movl 4(%esi),%edx
+ adcl %eax,%eax
+ movl %ebx,(%edi)
+ adcl %edx,%edx
+ movl %eax,4(%edi)
+
+ movl 8(%esi),%ebx
+ movl 12(%esi),%eax
+ adcl %ebx,%ebx
+ movl %edx,8(%edi)
+ adcl %eax,%eax
+ movl %ebx,12(%edi)
+
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ adcl %edx,%edx
+ movl %eax,16(%edi)
+ adcl %ebx,%ebx
+ movl %edx,20(%edi)
+
+ movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ adcl %eax,%eax
+ movl %ebx,24(%edi)
+ adcl %edx,%edx
+ movl %eax,28(%edi)
+
+ leal 32(%esi),%esi C use leal not to clobber carry
+ leal 32(%edi),%edi
+ decl %ebp
+ jnz L(Loop)
+
+L(Lend):
+ popl %ebp
+ sbbl %eax,%eax C save carry in %eax
+ andl $7,%ebp
+ jz L(Lend2)
+ addl %eax,%eax C restore carry from eax
+L(Loop2):
+ movl %edx,%ebx
+ movl (%esi),%edx
+ adcl %edx,%edx
+ movl %ebx,(%edi)
+
+ leal 4(%esi),%esi C use leal not to clobber carry
+ leal 4(%edi),%edi
+ decl %ebp
+ jnz L(Loop2)
+
+ jmp L(L1)
+L(Lend2):
+ addl %eax,%eax C restore carry from eax
+L(L1): movl %edx,(%edi) C store last limb
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/pentium/mmx/gmp-mparam.h b/rts/gmp/mpn/x86/pentium/mmx/gmp-mparam.h
new file mode 100644
index 0000000000..2379077d0c
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/mmx/gmp-mparam.h
@@ -0,0 +1,97 @@
+/* Intel P55 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 9 /* cycles */
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME 41 /* cycles */
+#endif
+
+/* bsf takes 18-42 cycles, put an average for uniform random numbers */
+#ifndef COUNT_TRAILING_ZEROS_TIME
+#define COUNT_TRAILING_ZEROS_TIME 20 /* cycles */
+#endif
+
+
+/* Generated by tuneup.c, 2000-07-06. */
+
+#ifndef KARATSUBA_MUL_THRESHOLD
+#define KARATSUBA_MUL_THRESHOLD 14
+#endif
+#ifndef TOOM3_MUL_THRESHOLD
+#define TOOM3_MUL_THRESHOLD 99
+#endif
+
+#ifndef KARATSUBA_SQR_THRESHOLD
+#define KARATSUBA_SQR_THRESHOLD 22
+#endif
+#ifndef TOOM3_SQR_THRESHOLD
+#define TOOM3_SQR_THRESHOLD 89
+#endif
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD 40
+#endif
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 98
+#endif
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD 13
+#endif
+
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD 5
+#endif
+#ifndef GCDEXT_THRESHOLD
+#define GCDEXT_THRESHOLD 25
+#endif
+
+#ifndef FFT_MUL_TABLE
+#define FFT_MUL_TABLE { 496, 1056, 1920, 4608, 14336, 40960, 0 }
+#endif
+#ifndef FFT_MODF_MUL_THRESHOLD
+#define FFT_MODF_MUL_THRESHOLD 512
+#endif
+#ifndef FFT_MUL_THRESHOLD
+#define FFT_MUL_THRESHOLD 3840
+#endif
+
+#ifndef FFT_SQR_TABLE
+#define FFT_SQR_TABLE { 496, 1184, 2176, 5632, 14336, 40960, 0 }
+#endif
+#ifndef FFT_MODF_SQR_THRESHOLD
+#define FFT_MODF_SQR_THRESHOLD 512
+#endif
+#ifndef FFT_SQR_THRESHOLD
+#define FFT_SQR_THRESHOLD 4352
+#endif
diff --git a/rts/gmp/mpn/x86/pentium/mmx/lshift.asm b/rts/gmp/mpn/x86/pentium/mmx/lshift.asm
new file mode 100644
index 0000000000..2225438658
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/mmx/lshift.asm
@@ -0,0 +1,455 @@
+dnl Intel P5 mpn_lshift -- mpn left shift.
+dnl
+dnl P5: 1.75 cycles/limb.
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C Shift src,size left by shift many bits and store the result in dst,size.
+C Zeros are shifted in at the right. Return the bits shifted out at the
+C left.
+C
+C The comments in mpn_rshift apply here too.
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+dnl minimum 5, because the unrolled loop can't handle less
+deflit(UNROLL_THRESHOLD, 5)
+
+ .text
+ ALIGN(8)
+
+PROLOGUE(mpn_lshift)
+
+ pushl %ebx
+ pushl %edi
+deflit(`FRAME',8)
+
+ movl PARAM_SIZE, %eax
+ movl PARAM_DST, %edx
+
+ movl PARAM_SRC, %ebx
+ movl PARAM_SHIFT, %ecx
+
+ cmp $UNROLL_THRESHOLD, %eax
+ jae L(unroll)
+
+ movl -4(%ebx,%eax,4), %edi C src high limb
+ decl %eax
+
+ jnz L(simple)
+
+ shldl( %cl, %edi, %eax) C eax was decremented to zero
+
+ shll %cl, %edi
+
+ movl %edi, (%edx) C dst low limb
+ popl %edi C risk of data cache bank clash
+
+ popl %ebx
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+L(simple):
+ C eax size-1
+ C ebx src
+ C ecx shift
+ C edx dst
+ C esi
+ C edi
+ C ebp
+deflit(`FRAME',8)
+
+ movd (%ebx,%eax,4), %mm5 C src high limb
+
+ movd %ecx, %mm6 C lshift
+ negl %ecx
+
+ psllq %mm6, %mm5
+ addl $32, %ecx
+
+ movd %ecx, %mm7
+ psrlq $32, %mm5 C retval
+
+
+L(simple_top):
+ C eax counter, limbs, negative
+ C ebx src
+ C ecx
+ C edx dst
+ C esi
+ C edi
+ C
+ C mm0 scratch
+ C mm5 return value
+ C mm6 shift
+ C mm7 32-shift
+
+ movq -4(%ebx,%eax,4), %mm0
+ decl %eax
+
+ psrlq %mm7, %mm0
+
+ C
+
+ movd %mm0, 4(%edx,%eax,4)
+ jnz L(simple_top)
+
+
+ movd (%ebx), %mm0
+
+ movd %mm5, %eax
+ psllq %mm6, %mm0
+
+ popl %edi
+ popl %ebx
+
+ movd %mm0, (%edx)
+
+ emms
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(8)
+L(unroll):
+ C eax size
+ C ebx src
+ C ecx shift
+ C edx dst
+ C esi
+ C edi
+ C ebp
+deflit(`FRAME',8)
+
+ movd -4(%ebx,%eax,4), %mm5 C src high limb
+ leal (%ebx,%eax,4), %edi
+
+ movd %ecx, %mm6 C lshift
+ andl $4, %edi
+
+ psllq %mm6, %mm5
+ jz L(start_src_aligned)
+
+
+ C src isn't aligned, process high limb separately (marked xxx) to
+ C make it so.
+ C
+ C source -8(ebx,%eax,4)
+ C |
+ C +-------+-------+-------+--
+ C | |
+ C +-------+-------+-------+--
+ C 0mod8 4mod8 0mod8
+ C
+ C dest
+ C -4(edx,%eax,4)
+ C |
+ C +-------+-------+--
+ C | xxx | |
+ C +-------+-------+--
+
+ movq -8(%ebx,%eax,4), %mm0 C unaligned load
+
+ psllq %mm6, %mm0
+ decl %eax
+
+ psrlq $32, %mm0
+
+ C
+
+ movd %mm0, (%edx,%eax,4)
+L(start_src_aligned):
+
+ movq -8(%ebx,%eax,4), %mm1 C src high qword
+ leal (%edx,%eax,4), %edi
+
+ andl $4, %edi
+ psrlq $32, %mm5 C return value
+
+ movq -16(%ebx,%eax,4), %mm3 C src second highest qword
+ jz L(start_dst_aligned)
+
+ C dst isn't aligned, subtract 4 to make it so, and pretend the shift
+ C is 32 bits extra. High limb of dst (marked xxx) handled here
+ C separately.
+ C
+ C source -8(ebx,%eax,4)
+ C |
+ C +-------+-------+--
+ C | mm1 |
+ C +-------+-------+--
+ C 0mod8 4mod8
+ C
+ C dest
+ C -4(edx,%eax,4)
+ C |
+ C +-------+-------+-------+--
+ C | xxx | |
+ C +-------+-------+-------+--
+ C 0mod8 4mod8 0mod8
+
+ movq %mm1, %mm0
+ addl $32, %ecx C new shift
+
+ psllq %mm6, %mm0
+
+ movd %ecx, %mm6
+ psrlq $32, %mm0
+
+ C wasted cycle here waiting for %mm0
+
+ movd %mm0, -4(%edx,%eax,4)
+ subl $4, %edx
+L(start_dst_aligned):
+
+
+ psllq %mm6, %mm1
+ negl %ecx C -shift
+
+ addl $64, %ecx C 64-shift
+ movq %mm3, %mm2
+
+ movd %ecx, %mm7
+ subl $8, %eax C size-8
+
+ psrlq %mm7, %mm3
+
+ por %mm1, %mm3 C mm3 ready to store
+ jc L(finish)
+
+
+ C The comments in mpn_rshift apply here too.
+
+ ALIGN(8)
+L(unroll_loop):
+ C eax counter, limbs
+ C ebx src
+ C ecx
+ C edx dst
+ C esi
+ C edi
+ C
+ C mm0
+ C mm1
+ C mm2 src qword from 48(%ebx,%eax,4)
+ C mm3 dst qword ready to store to 56(%edx,%eax,4)
+ C
+ C mm5 return value
+ C mm6 lshift
+ C mm7 rshift
+
+ movq 8(%ebx,%eax,4), %mm0
+ psllq %mm6, %mm2
+
+ movq %mm0, %mm1
+ psrlq %mm7, %mm0
+
+ movq %mm3, 24(%edx,%eax,4) C prev
+ por %mm2, %mm0
+
+ movq (%ebx,%eax,4), %mm3 C
+ psllq %mm6, %mm1 C
+
+ movq %mm0, 16(%edx,%eax,4)
+ movq %mm3, %mm2 C
+
+ psrlq %mm7, %mm3 C
+ subl $4, %eax
+
+ por %mm1, %mm3 C
+ jnc L(unroll_loop)
+
+
+
+L(finish):
+ C eax -4 to -1 representing respectively 0 to 3 limbs remaining
+
+ testb $2, %al
+
+ jz L(finish_no_two)
+
+ movq 8(%ebx,%eax,4), %mm0
+ psllq %mm6, %mm2
+
+ movq %mm0, %mm1
+ psrlq %mm7, %mm0
+
+ movq %mm3, 24(%edx,%eax,4) C prev
+ por %mm2, %mm0
+
+ movq %mm1, %mm2
+ movq %mm0, %mm3
+
+ subl $2, %eax
+L(finish_no_two):
+
+
+ C eax -4 or -3 representing respectively 0 or 1 limbs remaining
+ C
+ C mm2 src prev qword, from 48(%ebx,%eax,4)
+ C mm3 dst qword, for 56(%edx,%eax,4)
+
+ testb $1, %al
+ movd %mm5, %eax C retval
+
+ popl %edi
+ jz L(finish_zero)
+
+
+ C One extra src limb, destination was aligned.
+ C
+ C source ebx
+ C --+---------------+-------+
+ C | mm2 | |
+ C --+---------------+-------+
+ C
+ C dest edx+12 edx+4 edx
+ C --+---------------+---------------+-------+
+ C | mm3 | | |
+ C --+---------------+---------------+-------+
+ C
+ C mm6 = shift
+ C mm7 = ecx = 64-shift
+
+
+ C One extra src limb, destination was unaligned.
+ C
+ C source ebx
+ C --+---------------+-------+
+ C | mm2 | |
+ C --+---------------+-------+
+ C
+ C dest edx+12 edx+4
+ C --+---------------+---------------+
+ C | mm3 | |
+ C --+---------------+---------------+
+ C
+ C mm6 = shift+32
+ C mm7 = ecx = 64-(shift+32)
+
+
+ C In both cases there's one extra limb of src to fetch and combine
+ C with mm2 to make a qword at 4(%edx), and in the aligned case
+ C there's an extra limb of dst to be formed from that extra src limb
+ C left shifted.
+
+
+ movd (%ebx), %mm0
+ psllq %mm6, %mm2
+
+ movq %mm3, 12(%edx)
+ psllq $32, %mm0
+
+ movq %mm0, %mm1
+ psrlq %mm7, %mm0
+
+ por %mm2, %mm0
+ psllq %mm6, %mm1
+
+ movq %mm0, 4(%edx)
+ psrlq $32, %mm1
+
+ andl $32, %ecx
+ popl %ebx
+
+ jz L(finish_one_unaligned)
+
+ movd %mm1, (%edx)
+L(finish_one_unaligned):
+
+ emms
+
+ ret
+
+
+L(finish_zero):
+
+ C No extra src limbs, destination was aligned.
+ C
+ C source ebx
+ C --+---------------+
+ C | mm2 |
+ C --+---------------+
+ C
+ C dest edx+8 edx
+ C --+---------------+---------------+
+ C | mm3 | |
+ C --+---------------+---------------+
+ C
+ C mm6 = shift
+ C mm7 = ecx = 64-shift
+
+
+ C No extra src limbs, destination was unaligned.
+ C
+ C source ebx
+ C --+---------------+
+ C | mm2 |
+ C --+---------------+
+ C
+ C dest edx+8 edx+4
+ C --+---------------+-------+
+ C | mm3 | |
+ C --+---------------+-------+
+ C
+ C mm6 = shift+32
+ C mm7 = ecx = 64-(shift+32)
+
+
+ C The movd for the unaligned case writes the same data to 4(%edx)
+ C that the movq does for the aligned case.
+
+
+ movq %mm3, 8(%edx)
+ andl $32, %ecx
+
+ psllq %mm6, %mm2
+ jz L(finish_zero_unaligned)
+
+ movq %mm2, (%edx)
+L(finish_zero_unaligned):
+
+ psrlq $32, %mm2
+ popl %ebx
+
+ movd %mm5, %eax C retval
+
+ movd %mm2, 4(%edx)
+
+ emms
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/pentium/mmx/popham.asm b/rts/gmp/mpn/x86/pentium/mmx/popham.asm
new file mode 100644
index 0000000000..587a07ab3d
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/mmx/popham.asm
@@ -0,0 +1,30 @@
+dnl Intel P55 mpn_popcount, mpn_hamdist -- population count and hamming
+dnl distance.
+dnl
+dnl P55: popcount 11.5 cycles/limb, hamdist 12.0 cycles/limb
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+include_mpn(`x86/k6/mmx/popham.asm')
diff --git a/rts/gmp/mpn/x86/pentium/mmx/rshift.asm b/rts/gmp/mpn/x86/pentium/mmx/rshift.asm
new file mode 100644
index 0000000000..7672630d57
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/mmx/rshift.asm
@@ -0,0 +1,460 @@
+dnl Intel P5 mpn_rshift -- mpn right shift.
+dnl
+dnl P5: 1.75 cycles/limb.
+
+
+dnl Copyright (C) 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C Shift src,size right by shift many bits and store the result in dst,size.
+C Zeros are shifted in at the left. Return the bits shifted out at the
+C right.
+C
+C It takes 6 mmx instructions to process 2 limbs, making 1.5 cycles/limb,
+C and with a 4 limb loop and 1 cycle of loop overhead the total is 1.75 c/l.
+C
+C Full speed depends on source and destination being aligned. Unaligned mmx
+C loads and stores on P5 don't pair and have a 2 cycle penalty. Some hairy
+C setups and finish-ups are done to ensure alignment for the loop.
+C
+C MMX shifts work out a bit faster even for the simple loop.
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+deflit(`FRAME',0)
+
+dnl Minimum 5, because the unrolled loop can't handle less.
+deflit(UNROLL_THRESHOLD, 5)
+
+ .text
+ ALIGN(8)
+
+PROLOGUE(mpn_rshift)
+
+ pushl %ebx
+ pushl %edi
+deflit(`FRAME',8)
+
+ movl PARAM_SIZE, %eax
+ movl PARAM_DST, %edx
+
+ movl PARAM_SRC, %ebx
+ movl PARAM_SHIFT, %ecx
+
+ cmp $UNROLL_THRESHOLD, %eax
+ jae L(unroll)
+
+ decl %eax
+ movl (%ebx), %edi C src low limb
+
+ jnz L(simple)
+
+ shrdl( %cl, %edi, %eax) C eax was decremented to zero
+
+ shrl %cl, %edi
+
+ movl %edi, (%edx) C dst low limb
+ popl %edi C risk of data cache bank clash
+
+ popl %ebx
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(8)
+L(simple):
+ C eax size-1
+ C ebx src
+ C ecx shift
+ C edx dst
+ C esi
+ C edi
+ C ebp
+deflit(`FRAME',8)
+
+ movd (%ebx), %mm5 C src[0]
+ leal (%ebx,%eax,4), %ebx C &src[size-1]
+
+ movd %ecx, %mm6 C rshift
+ leal -4(%edx,%eax,4), %edx C &dst[size-2]
+
+ psllq $32, %mm5
+ negl %eax
+
+
+C This loop is 5 or 8 cycles, with every second load unaligned and a wasted
+C cycle waiting for the mm0 result to be ready. For comparison a shrdl is 4
+C cycles and would be 8 in a simple loop. Using mmx helps the return value
+C and last limb calculations too.
+
+L(simple_top):
+ C eax counter, limbs, negative
+ C ebx &src[size-1]
+ C ecx return value
+ C edx &dst[size-2]
+ C
+ C mm0 scratch
+ C mm5 return value
+ C mm6 shift
+
+ movq (%ebx,%eax,4), %mm0
+ incl %eax
+
+ psrlq %mm6, %mm0
+
+ movd %mm0, (%edx,%eax,4)
+ jnz L(simple_top)
+
+
+ movd (%ebx), %mm0
+ psrlq %mm6, %mm5 C return value
+
+ psrlq %mm6, %mm0
+ popl %edi
+
+ movd %mm5, %eax
+ popl %ebx
+
+ movd %mm0, 4(%edx)
+
+ emms
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(8)
+L(unroll):
+ C eax size
+ C ebx src
+ C ecx shift
+ C edx dst
+ C esi
+ C edi
+ C ebp
+deflit(`FRAME',8)
+
+ movd (%ebx), %mm5 C src[0]
+ movl $4, %edi
+
+ movd %ecx, %mm6 C rshift
+ testl %edi, %ebx
+
+ psllq $32, %mm5
+ jz L(start_src_aligned)
+
+
+ C src isn't aligned, process low limb separately (marked xxx) and
+ C step src and dst by one limb, making src aligned.
+ C
+ C source ebx
+ C --+-------+-------+-------+
+ C | xxx |
+ C --+-------+-------+-------+
+ C 4mod8 0mod8 4mod8
+ C
+ C dest edx
+ C --+-------+-------+
+ C | | xxx |
+ C --+-------+-------+
+
+ movq (%ebx), %mm0 C unaligned load
+
+ psrlq %mm6, %mm0
+ addl $4, %ebx
+
+ decl %eax
+
+ movd %mm0, (%edx)
+ addl $4, %edx
+L(start_src_aligned):
+
+
+ movq (%ebx), %mm1
+ testl %edi, %edx
+
+ psrlq %mm6, %mm5 C retval
+ jz L(start_dst_aligned)
+
+ C dst isn't aligned, add 4 to make it so, and pretend the shift is
+ C 32 bits extra. Low limb of dst (marked xxx) handled here
+ C separately.
+ C
+ C source ebx
+ C --+-------+-------+
+ C | mm1 |
+ C --+-------+-------+
+ C 4mod8 0mod8
+ C
+ C dest edx
+ C --+-------+-------+-------+
+ C | xxx |
+ C --+-------+-------+-------+
+ C 4mod8 0mod8 4mod8
+
+ movq %mm1, %mm0
+ addl $32, %ecx C new shift
+
+ psrlq %mm6, %mm0
+
+ movd %ecx, %mm6
+
+ movd %mm0, (%edx)
+ addl $4, %edx
+L(start_dst_aligned):
+
+
+ movq 8(%ebx), %mm3
+ negl %ecx
+
+ movq %mm3, %mm2 C mm2 src qword
+ addl $64, %ecx
+
+ movd %ecx, %mm7
+ psrlq %mm6, %mm1
+
+ leal -12(%ebx,%eax,4), %ebx
+ leal -20(%edx,%eax,4), %edx
+
+ psllq %mm7, %mm3
+ subl $7, %eax C size-7
+
+ por %mm1, %mm3 C mm3 ready to store
+ negl %eax C -(size-7)
+
+ jns L(finish)
+
+
+ C This loop is the important bit, the rest is just support. Careful
+ C instruction scheduling achieves the claimed 1.75 c/l. The
+ C relevant parts of the pairing rules are:
+ C
+ C - mmx loads and stores execute only in the U pipe
+ C - only one mmx shift in a pair
+ C - wait one cycle before storing an mmx register result
+ C - the usual address generation interlock
+ C
+ C Two qword calculations are slightly interleaved. The instructions
+ C marked "C" belong to the second qword, and the "C prev" one is for
+ C the second qword from the previous iteration.
+
+ ALIGN(8)
+L(unroll_loop):
+ C eax counter, limbs, negative
+ C ebx &src[size-12]
+ C ecx
+ C edx &dst[size-12]
+ C esi
+ C edi
+ C
+ C mm0
+ C mm1
+ C mm2 src qword from -8(%ebx,%eax,4)
+ C mm3 dst qword ready to store to -8(%edx,%eax,4)
+ C
+ C mm5 return value
+ C mm6 rshift
+ C mm7 lshift
+
+ movq (%ebx,%eax,4), %mm0
+ psrlq %mm6, %mm2
+
+ movq %mm0, %mm1
+ psllq %mm7, %mm0
+
+ movq %mm3, -8(%edx,%eax,4) C prev
+ por %mm2, %mm0
+
+ movq 8(%ebx,%eax,4), %mm3 C
+ psrlq %mm6, %mm1 C
+
+ movq %mm0, (%edx,%eax,4)
+ movq %mm3, %mm2 C
+
+ psllq %mm7, %mm3 C
+ addl $4, %eax
+
+ por %mm1, %mm3 C
+ js L(unroll_loop)
+
+
+L(finish):
+ C eax 0 to 3 representing respectively 3 to 0 limbs remaining
+
+ testb $2, %al
+
+ jnz L(finish_no_two)
+
+ movq (%ebx,%eax,4), %mm0
+ psrlq %mm6, %mm2
+
+ movq %mm0, %mm1
+ psllq %mm7, %mm0
+
+ movq %mm3, -8(%edx,%eax,4) C prev
+ por %mm2, %mm0
+
+ movq %mm1, %mm2
+ movq %mm0, %mm3
+
+ addl $2, %eax
+L(finish_no_two):
+
+
+ C eax 2 or 3 representing respectively 1 or 0 limbs remaining
+ C
+ C mm2 src prev qword, from -8(%ebx,%eax,4)
+ C mm3 dst qword, for -8(%edx,%eax,4)
+
+ testb $1, %al
+ popl %edi
+
+ movd %mm5, %eax C retval
+ jnz L(finish_zero)
+
+
+ C One extra limb, destination was aligned.
+ C
+ C source ebx
+ C +-------+---------------+--
+ C | | mm2 |
+ C +-------+---------------+--
+ C
+ C dest edx
+ C +-------+---------------+---------------+--
+ C | | | mm3 |
+ C +-------+---------------+---------------+--
+ C
+ C mm6 = shift
+ C mm7 = ecx = 64-shift
+
+
+ C One extra limb, destination was unaligned.
+ C
+ C source ebx
+ C +-------+---------------+--
+ C | | mm2 |
+ C +-------+---------------+--
+ C
+ C dest edx
+ C +---------------+---------------+--
+ C | | mm3 |
+ C +---------------+---------------+--
+ C
+ C mm6 = shift+32
+ C mm7 = ecx = 64-(shift+32)
+
+
+ C In both cases there's one extra limb of src to fetch and combine
+ C with mm2 to make a qword at 8(%edx), and in the aligned case
+ C there's a further extra limb of dst to be formed.
+
+
+ movd 8(%ebx), %mm0
+ psrlq %mm6, %mm2
+
+ movq %mm0, %mm1
+ psllq %mm7, %mm0
+
+ movq %mm3, (%edx)
+ por %mm2, %mm0
+
+ psrlq %mm6, %mm1
+ andl $32, %ecx
+
+ popl %ebx
+ jz L(finish_one_unaligned)
+
+ C dst was aligned, must store one extra limb
+ movd %mm1, 16(%edx)
+L(finish_one_unaligned):
+
+ movq %mm0, 8(%edx)
+
+ emms
+
+ ret
+
+
+L(finish_zero):
+
+ C No extra limbs, destination was aligned.
+ C
+ C source ebx
+ C +---------------+--
+ C | mm2 |
+ C +---------------+--
+ C
+ C dest edx+4
+ C +---------------+---------------+--
+ C | | mm3 |
+ C +---------------+---------------+--
+ C
+ C mm6 = shift
+ C mm7 = ecx = 64-shift
+
+
+ C No extra limbs, destination was unaligned.
+ C
+ C source ebx
+ C +---------------+--
+ C | mm2 |
+ C +---------------+--
+ C
+ C dest edx+4
+ C +-------+---------------+--
+ C | | mm3 |
+ C +-------+---------------+--
+ C
+ C mm6 = shift+32
+ C mm7 = 64-(shift+32)
+
+
+ C The movd for the unaligned case is clearly the same data as the
+ C movq for the aligned case, it's just a choice between whether one
+ C or two limbs should be written.
+
+
+ movq %mm3, 4(%edx)
+ psrlq %mm6, %mm2
+
+ movd %mm2, 12(%edx)
+ andl $32, %ecx
+
+ popl %ebx
+ jz L(finish_zero_unaligned)
+
+ movq %mm2, 12(%edx)
+L(finish_zero_unaligned):
+
+ emms
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/pentium/mul_1.asm b/rts/gmp/mpn/x86/pentium/mul_1.asm
new file mode 100644
index 0000000000..08639eca09
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/mul_1.asm
@@ -0,0 +1,79 @@
+dnl Intel Pentium mpn_mul_1 -- mpn by limb multiplication.
+dnl
+dnl P5: 13.0 cycles/limb
+
+dnl Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation,
+dnl Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA. */
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t multiplier);
+
+defframe(PARAM_MULTIPLIER,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(8)
+PROLOGUE(mpn_mul_1)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+deflit(`FRAME',16)
+
+ movl PARAM_DST, %edi
+ movl PARAM_SRC, %esi
+ movl PARAM_SIZE, %ecx
+ movl PARAM_MULTIPLIER, %ebp
+
+ leal (%edi,%ecx,4), %edi
+ leal (%esi,%ecx,4), %esi
+ negl %ecx
+ xorl %ebx, %ebx
+ ALIGN(8)
+
+L(oop): adcl $0, %ebx
+ movl (%esi,%ecx,4), %eax
+
+ mull %ebp
+
+ addl %eax, %ebx
+
+ movl %ebx, (%edi,%ecx,4)
+ incl %ecx
+
+ movl %edx, %ebx
+ jnz L(oop)
+
+ adcl $0, %ebx
+ movl %ebx, %eax
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/pentium/mul_basecase.asm b/rts/gmp/mpn/x86/pentium/mul_basecase.asm
new file mode 100644
index 0000000000..d9f79a0831
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/mul_basecase.asm
@@ -0,0 +1,135 @@
+dnl Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication.
+dnl
+dnl P5: 14.2 cycles/crossproduct (approx)
+
+
+dnl Copyright (C) 1996, 1998, 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C void mpn_mul_basecase (mp_ptr wp,
+C mp_srcptr xp, mp_size_t xsize,
+C mp_srcptr yp, mp_size_t ysize);
+
+defframe(PARAM_YSIZE, 20)
+defframe(PARAM_YP, 16)
+defframe(PARAM_XSIZE, 12)
+defframe(PARAM_XP, 8)
+defframe(PARAM_WP, 4)
+
+defframe(VAR_COUNTER, -4)
+
+ .text
+ ALIGN(8)
+PROLOGUE(mpn_mul_basecase)
+
+ pushl %eax C dummy push for allocating stack slot
+ pushl %esi
+ pushl %ebp
+ pushl %edi
+deflit(`FRAME',16)
+
+ movl PARAM_XP,%esi
+ movl PARAM_WP,%edi
+ movl PARAM_YP,%ebp
+
+ movl (%esi),%eax C load xp[0]
+ mull (%ebp) C multiply by yp[0]
+ movl %eax,(%edi) C store to wp[0]
+ movl PARAM_XSIZE,%ecx C xsize
+ decl %ecx C If xsize = 1, ysize = 1 too
+ jz L(done)
+
+ movl PARAM_XSIZE,%eax
+ pushl %ebx
+FRAME_pushl()
+ movl %edx,%ebx
+ leal (%esi,%eax,4),%esi C make xp point at end
+ leal (%edi,%eax,4),%edi C offset wp by xsize
+ negl %ecx C negate j size/index for inner loop
+ xorl %eax,%eax C clear carry
+
+ ALIGN(8)
+L(oop1): adcl $0,%ebx
+ movl (%esi,%ecx,4),%eax C load next limb at xp[j]
+ mull (%ebp)
+ addl %ebx,%eax
+ movl %eax,(%edi,%ecx,4)
+ incl %ecx
+ movl %edx,%ebx
+ jnz L(oop1)
+
+ adcl $0,%ebx
+ movl PARAM_YSIZE,%eax
+ movl %ebx,(%edi) C most significant limb of product
+ addl $4,%edi C increment wp
+ decl %eax
+ jz L(skip)
+ movl %eax,VAR_COUNTER C set index i to ysize
+
+L(outer):
+ addl $4,%ebp C make ebp point to next y limb
+ movl PARAM_XSIZE,%ecx
+ negl %ecx
+ xorl %ebx,%ebx
+
+ C code at 0x61 here, close enough to aligned
+L(oop2):
+ adcl $0,%ebx
+ movl (%esi,%ecx,4),%eax
+ mull (%ebp)
+ addl %ebx,%eax
+ movl (%edi,%ecx,4),%ebx
+ adcl $0,%edx
+ addl %eax,%ebx
+ movl %ebx,(%edi,%ecx,4)
+ incl %ecx
+ movl %edx,%ebx
+ jnz L(oop2)
+
+ adcl $0,%ebx
+
+ movl %ebx,(%edi)
+ addl $4,%edi
+ movl VAR_COUNTER,%eax
+ decl %eax
+ movl %eax,VAR_COUNTER
+ jnz L(outer)
+
+L(skip):
+ popl %ebx
+ popl %edi
+ popl %ebp
+ popl %esi
+ addl $4,%esp
+ ret
+
+L(done):
+ movl %edx,4(%edi) C store to wp[1]
+ popl %edi
+ popl %ebp
+ popl %esi
+ popl %eax C dummy pop for deallocating stack slot
+ ret
+
+EPILOGUE()
+
diff --git a/rts/gmp/mpn/x86/pentium/rshift.asm b/rts/gmp/mpn/x86/pentium/rshift.asm
new file mode 100644
index 0000000000..e8f5ae8ec8
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/rshift.asm
@@ -0,0 +1,236 @@
+dnl Intel Pentium mpn_rshift -- mpn right shift.
+dnl
+dnl cycles/limb
+dnl P5,P54: 6.0
+dnl P55: 5.375
+
+
+dnl Copyright (C) 1992, 1994, 1995, 1996, 1999, 2000 Free Software
+dnl Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C The main shift-by-N loop should run at 5.375 c/l and that's what P55 does,
+C but P5 and P54 run only at 6.0 c/l, which is 4 cycles lost somewhere.
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(8)
+PROLOGUE(mpn_rshift)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+deflit(`FRAME',16)
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC,%esi
+ movl PARAM_SIZE,%ebp
+ movl PARAM_SHIFT,%ecx
+
+C We can use faster code for shift-by-1 under certain conditions.
+ cmp $1,%ecx
+ jne L(normal)
+ leal 4(%edi),%eax
+ cmpl %esi,%eax
+ jnc L(special) C jump if res_ptr + 1 >= s_ptr
+ leal (%edi,%ebp,4),%eax
+ cmpl %eax,%esi
+ jnc L(special) C jump if s_ptr >= res_ptr + size
+
+L(normal):
+ movl (%esi),%edx
+ addl $4,%esi
+ xorl %eax,%eax
+ shrdl( %cl, %edx, %eax) C compute carry limb
+ pushl %eax C push carry limb onto stack
+
+ decl %ebp
+ pushl %ebp
+ shrl $3,%ebp
+ jz L(end)
+
+ movl (%edi),%eax C fetch destination cache line
+
+ ALIGN(4)
+L(oop): movl 28(%edi),%eax C fetch destination cache line
+ movl %edx,%ebx
+
+ movl (%esi),%eax
+ movl 4(%esi),%edx
+ shrdl( %cl, %eax, %ebx)
+ shrdl( %cl, %edx, %eax)
+ movl %ebx,(%edi)
+ movl %eax,4(%edi)
+
+ movl 8(%esi),%ebx
+ movl 12(%esi),%eax
+ shrdl( %cl, %ebx, %edx)
+ shrdl( %cl, %eax, %ebx)
+ movl %edx,8(%edi)
+ movl %ebx,12(%edi)
+
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ shrdl( %cl, %edx, %eax)
+ shrdl( %cl, %ebx, %edx)
+ movl %eax,16(%edi)
+ movl %edx,20(%edi)
+
+ movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ shrdl( %cl, %eax, %ebx)
+ shrdl( %cl, %edx, %eax)
+ movl %ebx,24(%edi)
+ movl %eax,28(%edi)
+
+ addl $32,%esi
+ addl $32,%edi
+ decl %ebp
+ jnz L(oop)
+
+L(end): popl %ebp
+ andl $7,%ebp
+ jz L(end2)
+L(oop2):
+ movl (%esi),%eax
+ shrdl( %cl,%eax,%edx) C compute result limb
+ movl %edx,(%edi)
+ movl %eax,%edx
+ addl $4,%esi
+ addl $4,%edi
+ decl %ebp
+ jnz L(oop2)
+
+L(end2):
+ shrl %cl,%edx C compute most significant limb
+ movl %edx,(%edi) C store it
+
+ popl %eax C pop carry limb
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+
+C We loop from least significant end of the arrays, which is only
+C permissable if the source and destination don't overlap, since the
+C function is documented to work for overlapping source and destination.
+
+L(special):
+ leal -4(%edi,%ebp,4),%edi
+ leal -4(%esi,%ebp,4),%esi
+
+ movl (%esi),%edx
+ subl $4,%esi
+
+ decl %ebp
+ pushl %ebp
+ shrl $3,%ebp
+
+ shrl %edx
+ incl %ebp
+ decl %ebp
+ jz L(Lend)
+
+ movl (%edi),%eax C fetch destination cache line
+
+ ALIGN(4)
+L(Loop):
+ movl -28(%edi),%eax C fetch destination cache line
+ movl %edx,%ebx
+
+ movl (%esi),%eax
+ movl -4(%esi),%edx
+ rcrl %eax
+ movl %ebx,(%edi)
+ rcrl %edx
+ movl %eax,-4(%edi)
+
+ movl -8(%esi),%ebx
+ movl -12(%esi),%eax
+ rcrl %ebx
+ movl %edx,-8(%edi)
+ rcrl %eax
+ movl %ebx,-12(%edi)
+
+ movl -16(%esi),%edx
+ movl -20(%esi),%ebx
+ rcrl %edx
+ movl %eax,-16(%edi)
+ rcrl %ebx
+ movl %edx,-20(%edi)
+
+ movl -24(%esi),%eax
+ movl -28(%esi),%edx
+ rcrl %eax
+ movl %ebx,-24(%edi)
+ rcrl %edx
+ movl %eax,-28(%edi)
+
+ leal -32(%esi),%esi C use leal not to clobber carry
+ leal -32(%edi),%edi
+ decl %ebp
+ jnz L(Loop)
+
+L(Lend):
+ popl %ebp
+ sbbl %eax,%eax C save carry in %eax
+ andl $7,%ebp
+ jz L(Lend2)
+ addl %eax,%eax C restore carry from eax
+L(Loop2):
+ movl %edx,%ebx
+ movl (%esi),%edx
+ rcrl %edx
+ movl %ebx,(%edi)
+
+ leal -4(%esi),%esi C use leal not to clobber carry
+ leal -4(%edi),%edi
+ decl %ebp
+ jnz L(Loop2)
+
+ jmp L(L1)
+L(Lend2):
+ addl %eax,%eax C restore carry from eax
+L(L1): movl %edx,(%edi) C store last limb
+
+ movl $0,%eax
+ rcrl %eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/pentium/sqr_basecase.asm b/rts/gmp/mpn/x86/pentium/sqr_basecase.asm
new file mode 100644
index 0000000000..c8584df13c
--- /dev/null
+++ b/rts/gmp/mpn/x86/pentium/sqr_basecase.asm
@@ -0,0 +1,520 @@
+dnl Intel P5 mpn_sqr_basecase -- square an mpn number.
+dnl
+dnl P5: approx 8 cycles per crossproduct, or 15.5 cycles per triangular
+dnl product at around 20x20 limbs.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C Calculate src,size squared, storing the result in dst,2*size.
+C
+C The algorithm is basically the same as mpn/generic/sqr_basecase.c, but a
+C lot of function call overheads are avoided, especially when the size is
+C small.
+
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(8)
+PROLOGUE(mpn_sqr_basecase)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %edx
+ movl PARAM_SRC, %eax
+
+ cmpl $2, %edx
+ movl PARAM_DST, %ecx
+
+ je L(two_limbs)
+
+ movl (%eax), %eax
+ ja L(three_or_more)
+
+C -----------------------------------------------------------------------------
+C one limb only
+ C eax src
+ C ebx
+ C ecx dst
+ C edx
+
+ mull %eax
+
+ movl %eax, (%ecx)
+ movl %edx, 4(%ecx)
+
+ ret
+
+C -----------------------------------------------------------------------------
+ ALIGN(8)
+L(two_limbs):
+ C eax src
+ C ebx
+ C ecx dst
+ C edx size
+
+ pushl %ebp
+ pushl %edi
+
+ pushl %esi
+ pushl %ebx
+
+ movl %eax, %ebx
+ movl (%eax), %eax
+
+ mull %eax C src[0]^2
+
+ movl %eax, (%ecx) C dst[0]
+ movl %edx, %esi C dst[1]
+
+ movl 4(%ebx), %eax
+
+ mull %eax C src[1]^2
+
+ movl %eax, %edi C dst[2]
+ movl %edx, %ebp C dst[3]
+
+ movl (%ebx), %eax
+
+ mull 4(%ebx) C src[0]*src[1]
+
+ addl %eax, %esi
+ popl %ebx
+
+ adcl %edx, %edi
+
+ adcl $0, %ebp
+ addl %esi, %eax
+
+ adcl %edi, %edx
+ movl %eax, 4(%ecx)
+
+ adcl $0, %ebp
+ popl %esi
+
+ movl %edx, 8(%ecx)
+ movl %ebp, 12(%ecx)
+
+ popl %edi
+ popl %ebp
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(8)
+L(three_or_more):
+ C eax src low limb
+ C ebx
+ C ecx dst
+ C edx size
+
+ cmpl $4, %edx
+ pushl %ebx
+deflit(`FRAME',4)
+
+ movl PARAM_SRC, %ebx
+ jae L(four_or_more)
+
+
+C -----------------------------------------------------------------------------
+C three limbs
+ C eax src low limb
+ C ebx src
+ C ecx dst
+ C edx size
+
+ pushl %ebp
+ pushl %edi
+
+ mull %eax C src[0] ^ 2
+
+ movl %eax, (%ecx)
+ movl %edx, 4(%ecx)
+
+ movl 4(%ebx), %eax
+ xorl %ebp, %ebp
+
+ mull %eax C src[1] ^ 2
+
+ movl %eax, 8(%ecx)
+ movl %edx, 12(%ecx)
+
+ movl 8(%ebx), %eax
+ pushl %esi C risk of cache bank clash
+
+ mull %eax C src[2] ^ 2
+
+ movl %eax, 16(%ecx)
+ movl %edx, 20(%ecx)
+
+ movl (%ebx), %eax
+
+ mull 4(%ebx) C src[0] * src[1]
+
+ movl %eax, %esi
+ movl %edx, %edi
+
+ movl (%ebx), %eax
+
+ mull 8(%ebx) C src[0] * src[2]
+
+ addl %eax, %edi
+ movl %edx, %ebp
+
+ adcl $0, %ebp
+ movl 4(%ebx), %eax
+
+ mull 8(%ebx) C src[1] * src[2]
+
+ xorl %ebx, %ebx
+ addl %eax, %ebp
+
+ C eax
+ C ebx zero, will be dst[5]
+ C ecx dst
+ C edx dst[4]
+ C esi dst[1]
+ C edi dst[2]
+ C ebp dst[3]
+
+ adcl $0, %edx
+ addl %esi, %esi
+
+ adcl %edi, %edi
+
+ adcl %ebp, %ebp
+
+ adcl %edx, %edx
+ movl 4(%ecx), %eax
+
+ adcl $0, %ebx
+ addl %esi, %eax
+
+ movl %eax, 4(%ecx)
+ movl 8(%ecx), %eax
+
+ adcl %edi, %eax
+ movl 12(%ecx), %esi
+
+ adcl %ebp, %esi
+ movl 16(%ecx), %edi
+
+ movl %eax, 8(%ecx)
+ movl %esi, 12(%ecx)
+
+ adcl %edx, %edi
+ popl %esi
+
+ movl 20(%ecx), %eax
+ movl %edi, 16(%ecx)
+
+ popl %edi
+ popl %ebp
+
+ adcl %ebx, %eax C no carry out of this
+ popl %ebx
+
+ movl %eax, 20(%ecx)
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+ ALIGN(8)
+L(four_or_more):
+ C eax src low limb
+ C ebx src
+ C ecx dst
+ C edx size
+ C esi
+ C edi
+ C ebp
+ C
+ C First multiply src[0]*src[1..size-1] and store at dst[1..size].
+
+deflit(`FRAME',4)
+
+ pushl %edi
+FRAME_pushl()
+ pushl %esi
+FRAME_pushl()
+
+ pushl %ebp
+FRAME_pushl()
+ leal (%ecx,%edx,4), %edi C dst end of this mul1
+
+ leal (%ebx,%edx,4), %esi C src end
+ movl %ebx, %ebp C src
+
+ negl %edx C -size
+ xorl %ebx, %ebx C clear carry limb and carry flag
+
+ leal 1(%edx), %ecx C -(size-1)
+
+L(mul1):
+ C eax scratch
+ C ebx carry
+ C ecx counter, negative
+ C edx scratch
+ C esi &src[size]
+ C edi &dst[size]
+ C ebp src
+
+ adcl $0, %ebx
+ movl (%esi,%ecx,4), %eax
+
+ mull (%ebp)
+
+ addl %eax, %ebx
+
+ movl %ebx, (%edi,%ecx,4)
+ incl %ecx
+
+ movl %edx, %ebx
+ jnz L(mul1)
+
+
+ C Add products src[n]*src[n+1..size-1] at dst[2*n-1...], for
+ C n=1..size-2.
+ C
+ C The last two products, which are the end corner of the product
+ C triangle, are handled separately to save looping overhead. These
+ C are src[size-3]*src[size-2,size-1] and src[size-2]*src[size-1].
+ C If size is 4 then it's only these that need to be done.
+ C
+ C In the outer loop %esi is a constant, and %edi just advances by 1
+ C limb each time. The size of the operation decreases by 1 limb
+ C each time.
+
+ C eax
+ C ebx carry (needing carry flag added)
+ C ecx
+ C edx
+ C esi &src[size]
+ C edi &dst[size]
+ C ebp
+
+ adcl $0, %ebx
+ movl PARAM_SIZE, %edx
+
+ movl %ebx, (%edi)
+ subl $4, %edx
+
+ negl %edx
+ jz L(corner)
+
+
+L(outer):
+ C ebx previous carry limb to store
+ C edx outer loop counter (negative)
+ C esi &src[size]
+ C edi dst, pointing at stored carry limb of previous loop
+
+ pushl %edx C new outer loop counter
+ leal -2(%edx), %ecx
+
+ movl %ebx, (%edi)
+ addl $4, %edi
+
+ addl $4, %ebp
+ xorl %ebx, %ebx C initial carry limb, clear carry flag
+
+L(inner):
+ C eax scratch
+ C ebx carry (needing carry flag added)
+ C ecx counter, negative
+ C edx scratch
+ C esi &src[size]
+ C edi dst end of this addmul
+ C ebp &src[j]
+
+ adcl $0, %ebx
+ movl (%esi,%ecx,4), %eax
+
+ mull (%ebp)
+
+ addl %ebx, %eax
+ movl (%edi,%ecx,4), %ebx
+
+ adcl $0, %edx
+ addl %eax, %ebx
+
+ movl %ebx, (%edi,%ecx,4)
+ incl %ecx
+
+ movl %edx, %ebx
+ jnz L(inner)
+
+
+ adcl $0, %ebx
+ popl %edx C outer loop counter
+
+ incl %edx
+ jnz L(outer)
+
+
+ movl %ebx, (%edi)
+
+L(corner):
+ C esi &src[size]
+ C edi &dst[2*size-4]
+
+ movl -8(%esi), %eax
+ movl -4(%edi), %ebx C risk of data cache bank clash here
+
+ mull -12(%esi) C src[size-2]*src[size-3]
+
+ addl %eax, %ebx
+ movl %edx, %ecx
+
+ adcl $0, %ecx
+ movl -4(%esi), %eax
+
+ mull -12(%esi) C src[size-1]*src[size-3]
+
+ addl %ecx, %eax
+ movl (%edi), %ecx
+
+ adcl $0, %edx
+ movl %ebx, -4(%edi)
+
+ addl %eax, %ecx
+ movl %edx, %ebx
+
+ adcl $0, %ebx
+ movl -4(%esi), %eax
+
+ mull -8(%esi) C src[size-1]*src[size-2]
+
+ movl %ecx, 0(%edi)
+ addl %eax, %ebx
+
+ adcl $0, %edx
+ movl PARAM_SIZE, %eax
+
+ negl %eax
+ movl %ebx, 4(%edi)
+
+ addl $1, %eax C -(size-1) and clear carry
+ movl %edx, 8(%edi)
+
+
+C -----------------------------------------------------------------------------
+C Left shift of dst[1..2*size-2], high bit shifted out becomes dst[2*size-1].
+
+L(lshift):
+ C eax counter, negative
+ C ebx next limb
+ C ecx
+ C edx
+ C esi
+ C edi &dst[2*size-4]
+ C ebp
+
+ movl 12(%edi,%eax,8), %ebx
+
+ rcll %ebx
+ movl 16(%edi,%eax,8), %ecx
+
+ rcll %ecx
+ movl %ebx, 12(%edi,%eax,8)
+
+ movl %ecx, 16(%edi,%eax,8)
+ incl %eax
+
+ jnz L(lshift)
+
+
+ adcl %eax, %eax C high bit out
+ movl PARAM_SRC, %esi
+
+ movl PARAM_SIZE, %ecx C risk of cache bank clash
+ movl %eax, 12(%edi) C dst most significant limb
+
+
+C -----------------------------------------------------------------------------
+C Now add in the squares on the diagonal, namely src[0]^2, src[1]^2, ...,
+C src[size-1]^2. dst[0] hasn't yet been set at all yet, and just gets the
+C low limb of src[0]^2.
+
+ movl (%esi), %eax C src[0]
+ leal (%esi,%ecx,4), %esi C src end
+
+ negl %ecx
+
+ mull %eax
+
+ movl %eax, 16(%edi,%ecx,8) C dst[0]
+ movl %edx, %ebx
+
+ addl $1, %ecx C size-1 and clear carry
+
+L(diag):
+ C eax scratch (low product)
+ C ebx carry limb
+ C ecx counter, negative
+ C edx scratch (high product)
+ C esi &src[size]
+ C edi &dst[2*size-4]
+ C ebp scratch (fetched dst limbs)
+
+ movl (%esi,%ecx,4), %eax
+ adcl $0, %ebx
+
+ mull %eax
+
+ movl 16-4(%edi,%ecx,8), %ebp
+
+ addl %ebp, %ebx
+ movl 16(%edi,%ecx,8), %ebp
+
+ adcl %eax, %ebp
+ movl %ebx, 16-4(%edi,%ecx,8)
+
+ movl %ebp, 16(%edi,%ecx,8)
+ incl %ecx
+
+ movl %edx, %ebx
+ jnz L(diag)
+
+
+ adcl $0, %edx
+ movl 16-4(%edi), %eax C dst most significant limb
+
+ addl %eax, %edx
+ popl %ebp
+
+ movl %edx, 16-4(%edi)
+ popl %esi C risk of cache bank clash
+
+ popl %edi
+ popl %ebx
+
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/rshift.asm b/rts/gmp/mpn/x86/rshift.asm
new file mode 100644
index 0000000000..c9881fd966
--- /dev/null
+++ b/rts/gmp/mpn/x86/rshift.asm
@@ -0,0 +1,92 @@
+dnl x86 mpn_rshift -- mpn right shift.
+
+dnl Copyright (C) 1992, 1994, 1996, 1999, 2000 Free Software Foundation,
+dnl Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+
+defframe(PARAM_SHIFT,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ .text
+ ALIGN(8)
+PROLOGUE(mpn_rshift)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+deflit(`FRAME',12)
+
+ movl PARAM_DST,%edi
+ movl PARAM_SRC,%esi
+ movl PARAM_SIZE,%edx
+ movl PARAM_SHIFT,%ecx
+
+ leal -4(%edi,%edx,4),%edi
+ leal (%esi,%edx,4),%esi
+ negl %edx
+
+ movl (%esi,%edx,4),%ebx C read least significant limb
+ xorl %eax,%eax
+ shrdl( %cl, %ebx, %eax) C compute carry limb
+ incl %edx
+ jz L(end)
+ pushl %eax C push carry limb onto stack
+ testb $1,%dl
+ jnz L(1) C enter loop in the middle
+ movl %ebx,%eax
+
+ ALIGN(8)
+L(oop): movl (%esi,%edx,4),%ebx C load next higher limb
+ shrdl( %cl, %ebx, %eax) C compute result limb
+ movl %eax,(%edi,%edx,4) C store it
+ incl %edx
+L(1): movl (%esi,%edx,4),%eax
+ shrdl( %cl, %eax, %ebx)
+ movl %ebx,(%edi,%edx,4)
+ incl %edx
+ jnz L(oop)
+
+ shrl %cl,%eax C compute most significant limb
+ movl %eax,(%edi) C store it
+
+ popl %eax C pop carry limb
+
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+L(end): shrl %cl,%ebx C compute most significant limb
+ movl %ebx,(%edi) C store it
+
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/udiv.asm b/rts/gmp/mpn/x86/udiv.asm
new file mode 100644
index 0000000000..9fe022b107
--- /dev/null
+++ b/rts/gmp/mpn/x86/udiv.asm
@@ -0,0 +1,44 @@
+dnl x86 mpn_udiv_qrnnd -- 2 by 1 limb division
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_udiv_qrnnd (mp_limb_t *remptr, mp_limb_t high, mp_limb_t low,
+C mp_limb_t divisor);
+
+defframe(PARAM_DIVISOR, 16)
+defframe(PARAM_LOW, 12)
+defframe(PARAM_HIGH, 8)
+defframe(PARAM_REMPTR, 4)
+
+ TEXT
+ ALIGN(8)
+PROLOGUE(mpn_udiv_qrnnd)
+deflit(`FRAME',0)
+ movl PARAM_LOW, %eax
+ movl PARAM_HIGH, %edx
+ divl PARAM_DIVISOR
+ movl PARAM_REMPTR, %ecx
+ movl %edx, (%ecx)
+ ret
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/umul.asm b/rts/gmp/mpn/x86/umul.asm
new file mode 100644
index 0000000000..3d289d1784
--- /dev/null
+++ b/rts/gmp/mpn/x86/umul.asm
@@ -0,0 +1,43 @@
+dnl mpn_umul_ppmm -- 1x1->2 limb multiplication
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_umul_ppmm (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2);
+C
+
+defframe(PARAM_M2, 12)
+defframe(PARAM_M1, 8)
+defframe(PARAM_LOWPTR, 4)
+
+ TEXT
+ ALIGN(8)
+PROLOGUE(mpn_umul_ppmm)
+deflit(`FRAME',0)
+ movl PARAM_LOWPTR, %ecx
+ movl PARAM_M1, %eax
+ mull PARAM_M2
+ movl %eax, (%ecx)
+ movl %edx, %eax
+ ret
+EPILOGUE()
diff --git a/rts/gmp/mpn/x86/x86-defs.m4 b/rts/gmp/mpn/x86/x86-defs.m4
new file mode 100644
index 0000000000..2dad698002
--- /dev/null
+++ b/rts/gmp/mpn/x86/x86-defs.m4
@@ -0,0 +1,713 @@
+divert(-1)
+
+dnl m4 macros for x86 assembler.
+
+
+dnl Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+dnl Notes:
+dnl
+dnl m4 isn't perfect for processing BSD style x86 assembler code, the main
+dnl problems are,
+dnl
+dnl 1. Doing define(foo,123) and then using foo in an addressing mode like
+dnl foo(%ebx) expands as a macro rather than a constant. This is worked
+dnl around by using deflit() from asm-defs.m4, instead of define().
+dnl
+dnl 2. Immediates in macro definitions need a space or `' to stop the $
+dnl looking like a macro parameter. For example,
+dnl
+dnl define(foo, `mov $ 123, %eax')
+dnl
+dnl This is only a problem in macro definitions, not in ordinary text,
+dnl nor in macro parameters like text passed to forloop() or ifdef().
+
+
+deflit(BYTES_PER_MP_LIMB, 4)
+
+
+dnl --------------------------------------------------------------------------
+dnl Replacement PROLOGUE/EPILOGUE with more sophisticated error checking.
+dnl Nesting and overlapping not allowed.
+dnl
+
+
+dnl Usage: PROLOGUE(functionname)
+dnl
+dnl Generate a function prologue. functionname gets GSYM_PREFIX added.
+dnl Examples,
+dnl
+dnl PROLOGUE(mpn_add_n)
+dnl PROLOGUE(somefun)
+
+define(`PROLOGUE',
+m4_assert_numargs(1)
+m4_assert_defined(`PROLOGUE_cpu')
+`ifdef(`PROLOGUE_current_function',
+`m4_error(`PROLOGUE'(`PROLOGUE_current_function') needs an `EPILOGUE'() before `PROLOGUE'($1)
+)')dnl
+m4_file_seen()dnl
+define(`PROLOGUE_current_function',`$1')dnl
+PROLOGUE_cpu(GSYM_PREFIX`'$1)')
+
+
+dnl Usage: EPILOGUE()
+dnl
+dnl Notice the function name is passed to EPILOGUE_cpu(), letting it use $1
+dnl instead of the long PROLOGUE_current_function symbol.
+
+define(`EPILOGUE',
+m4_assert_numargs(0)
+m4_assert_defined(`EPILOGUE_cpu')
+`ifdef(`PROLOGUE_current_function',,
+`m4_error(`EPILOGUE'() with no `PROLOGUE'()
+)')dnl
+EPILOGUE_cpu(GSYM_PREFIX`'PROLOGUE_current_function)`'dnl
+undefine(`PROLOGUE_current_function')')
+
+m4wrap_prepend(
+`ifdef(`PROLOGUE_current_function',
+`m4_error(`EPILOGUE() for PROLOGUE('PROLOGUE_current_function`) never seen
+')')')
+
+
+dnl Usage: PROLOGUE_assert_inside()
+dnl
+dnl Use this unquoted on a line on its own at the start of a macro
+dnl definition to add some code to check the macro is only used inside a
+dnl PROLOGUE/EPILOGUE pair, and that hence PROLOGUE_current_function is
+dnl defined.
+
+define(PROLOGUE_assert_inside,
+m4_assert_numargs(0)
+``PROLOGUE_assert_inside_internal'(m4_doublequote($`'0))`dnl '')
+
+define(PROLOGUE_assert_inside_internal,
+m4_assert_numargs(1)
+`ifdef(`PROLOGUE_current_function',,
+`m4_error(`$1 used outside a PROLOGUE / EPILOGUE pair
+')')')
+
+
+dnl Usage: L(labelname)
+dnl LF(functionname,labelname)
+dnl
+dnl Generate a local label in the current or given function. For LF(),
+dnl functionname gets GSYM_PREFIX added, the same as with PROLOGUE().
+dnl
+dnl For example, in a function mpn_add_n (and with MPN_PREFIX __gmpn),
+dnl
+dnl L(bar) => L__gmpn_add_n__bar
+dnl LF(somefun,bar) => Lsomefun__bar
+dnl
+dnl The funtion name and label name get two underscores between them rather
+dnl than one to guard against clashing with a separate external symbol that
+dnl happened to be called functionname_labelname. (Though this would only
+dnl happen if the local label prefix is is empty.) Underscores are used so
+dnl the whole label will still be a valid C identifier and so can be easily
+dnl used in gdb.
+
+dnl LSYM_PREFIX can be L$, so defn() is used to prevent L expanding as the
+dnl L macro and making an infinite recursion.
+define(LF,
+m4_assert_numargs(2)
+m4_assert_defined(`LSYM_PREFIX')
+`defn(`LSYM_PREFIX')GSYM_PREFIX`'$1`'__$2')
+
+define(`L',
+m4_assert_numargs(1)
+PROLOGUE_assert_inside()
+`LF(PROLOGUE_current_function,`$1')')
+
+
+dnl Called: PROLOGUE_cpu(gsym)
+dnl EPILOGUE_cpu(gsym)
+
+define(PROLOGUE_cpu,
+m4_assert_numargs(1)
+ `GLOBL $1
+ TYPE($1,`function')
+$1:')
+
+define(EPILOGUE_cpu,
+m4_assert_numargs(1)
+` SIZE($1,.-$1)')
+
+
+
+dnl --------------------------------------------------------------------------
+dnl Various x86 macros.
+dnl
+
+
+dnl Usage: ALIGN_OFFSET(bytes,offset)
+dnl
+dnl Align to `offset' away from a multiple of `bytes'.
+dnl
+dnl This is useful for testing, for example align to something very strict
+dnl and see what effect offsets from it have, "ALIGN_OFFSET(256,32)".
+dnl
+dnl Generally you wouldn't execute across the padding, but it's done with
+dnl nop's so it'll work.
+
+define(ALIGN_OFFSET,
+m4_assert_numargs(2)
+`ALIGN($1)
+forloop(`i',1,$2,` nop
+')')
+
+
+dnl Usage: defframe(name,offset)
+dnl
+dnl Make a definition like the following with which to access a parameter
+dnl or variable on the stack.
+dnl
+dnl define(name,`FRAME+offset(%esp)')
+dnl
+dnl Actually m4_empty_if_zero(FRAME+offset) is used, which will save one
+dnl byte if FRAME+offset is zero, by putting (%esp) rather than 0(%esp).
+dnl Use define(`defframe_empty_if_zero_disabled',1) if for some reason the
+dnl zero offset is wanted.
+dnl
+dnl The new macro also gets a check that when it's used FRAME is actually
+dnl defined, and that the final %esp offset isn't negative, which would
+dnl mean an attempt to access something below the current %esp.
+dnl
+dnl deflit() is used rather than a plain define(), so the new macro won't
+dnl delete any following parenthesized expression. name(%edi) will come
+dnl out say as 16(%esp)(%edi). This isn't valid assembler and should
+dnl provoke an error, which is better than silently giving just 16(%esp).
+dnl
+dnl See README.family for more on the suggested way to access the stack
+dnl frame.
+
+define(defframe,
+m4_assert_numargs(2)
+`deflit(`$1',
+m4_assert_defined(`FRAME')
+`defframe_check_notbelow(`$1',$2,FRAME)dnl
+defframe_empty_if_zero(FRAME+($2))(%esp)')')
+
+dnl Called: defframe_empty_if_zero(expression)
+define(defframe_empty_if_zero,
+`ifelse(defframe_empty_if_zero_disabled,1,
+`eval($1)',
+`m4_empty_if_zero($1)')')
+
+dnl Called: defframe_check_notbelow(`name',offset,FRAME)
+define(defframe_check_notbelow,
+m4_assert_numargs(3)
+`ifelse(eval(($3)+($2)<0),1,
+`m4_error(`$1 at frame offset $2 used when FRAME is only $3 bytes
+')')')
+
+
+dnl Usage: FRAME_pushl()
+dnl FRAME_popl()
+dnl FRAME_addl_esp(n)
+dnl FRAME_subl_esp(n)
+dnl
+dnl Adjust FRAME appropriately for a pushl or popl, or for an addl or subl
+dnl %esp of n bytes.
+dnl
+dnl Using these macros is completely optional. Sometimes it makes more
+dnl sense to put explicit deflit(`FRAME',N) forms, especially when there's
+dnl jumps and different sequences of FRAME values need to be used in
+dnl different places.
+
+define(FRAME_pushl,
+m4_assert_numargs(0)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME+4))')
+
+define(FRAME_popl,
+m4_assert_numargs(0)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME-4))')
+
+define(FRAME_addl_esp,
+m4_assert_numargs(1)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME-($1)))')
+
+define(FRAME_subl_esp,
+m4_assert_numargs(1)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME+($1)))')
+
+
+dnl Usage: defframe_pushl(name)
+dnl
+dnl Do a combination of a FRAME_pushl() and a defframe() to name the stack
+dnl location just pushed. This should come after a pushl instruction.
+dnl Putting it on the same line works and avoids lengthening the code. For
+dnl example,
+dnl
+dnl pushl %eax defframe_pushl(VAR_COUNTER)
+dnl
+dnl Notice the defframe() is done with an unquoted -FRAME thus giving its
+dnl current value without tracking future changes.
+
+define(defframe_pushl,
+`FRAME_pushl()defframe(`$1',-FRAME)')
+
+
+dnl --------------------------------------------------------------------------
+dnl Assembler instruction macros.
+dnl
+
+
+dnl Usage: emms_or_femms
+dnl femms_available_p
+dnl
+dnl femms_available_p expands to 1 or 0 according to whether the AMD 3DNow
+dnl femms instruction is available. emms_or_femms expands to femms if
+dnl available, or emms if not.
+dnl
+dnl emms_or_femms is meant for use in the K6 directory where plain K6
+dnl (without femms) and K6-2 and K6-3 (with a slightly faster femms) are
+dnl supported together.
+dnl
+dnl On K7 femms is no longer faster and is just an alias for emms, so plain
+dnl emms may as well be used.
+
+define(femms_available_p,
+m4_assert_numargs(-1)
+`m4_ifdef_anyof_p(
+ `HAVE_TARGET_CPU_k62',
+ `HAVE_TARGET_CPU_k63',
+ `HAVE_TARGET_CPU_athlon')')
+
+define(emms_or_femms,
+m4_assert_numargs(-1)
+`ifelse(femms_available_p,1,`femms',`emms')')
+
+
+dnl Usage: femms
+dnl
+dnl The gas 2.9.1 that comes with FreeBSD 3.4 doesn't support femms, so the
+dnl following is a replacement using .byte.
+dnl
+dnl If femms isn't available, an emms is generated instead, for convenience
+dnl when testing on a machine without femms.
+
+define(femms,
+m4_assert_numargs(-1)
+`ifelse(femms_available_p,1,
+`.byte 15,14 C AMD 3DNow femms',
+`emms`'dnl
+m4_warning(`warning, using emms in place of femms, use for testing only
+')')')
+
+
+dnl Usage: jadcl0(op)
+dnl
+dnl Issue a jnc/incl as a substitute for adcl $0,op. This isn't an exact
+dnl replacement, since it doesn't set the flags like adcl does.
+dnl
+dnl This finds a use in K6 mpn_addmul_1, mpn_submul_1, mpn_mul_basecase and
+dnl mpn_sqr_basecase because on K6 an adcl is slow, the branch
+dnl misprediction penalty is small, and the multiply algorithm used leads
+dnl to a carry bit on average only 1/4 of the time.
+dnl
+dnl jadcl0_disabled can be set to 1 to instead issue an ordinary adcl for
+dnl comparison. For example,
+dnl
+dnl define(`jadcl0_disabled',1)
+dnl
+dnl When using a register operand, eg. "jadcl0(%edx)", the jnc/incl code is
+dnl the same size as an adcl. This makes it possible to use the exact same
+dnl computed jump code when testing the relative speed of jnc/incl and adcl
+dnl with jadcl0_disabled.
+
+define(jadcl0,
+m4_assert_numargs(1)
+`ifelse(jadcl0_disabled,1,
+ `adcl $`'0, $1',
+ `jnc 1f
+ incl $1
+1:dnl')')
+
+
+dnl Usage: cmov_available_p
+dnl
+dnl Expand to 1 if cmov is available, 0 if not.
+
+define(cmov_available_p,
+`m4_ifdef_anyof_p(
+ `HAVE_TARGET_CPU_pentiumpro',
+ `HAVE_TARGET_CPU_pentium2',
+ `HAVE_TARGET_CPU_pentium3',
+ `HAVE_TARGET_CPU_athlon')')
+
+
+dnl Usage: x86_lookup(target, key,value, key,value, ...)
+dnl x86_lookup_p(target, key,value, key,value, ...)
+dnl
+dnl Look for `target' among the `key' parameters.
+dnl
+dnl x86_lookup expands to the corresponding `value', or generates an error
+dnl if `target' isn't found.
+dnl
+dnl x86_lookup_p expands to 1 if `target' is found, or 0 if not.
+
+define(x86_lookup,
+`ifelse(eval($#<3),1,
+`m4_error(`unrecognised part of x86 instruction: $1
+')',
+`ifelse(`$1',`$2', `$3',
+`x86_lookup(`$1',shift(shift(shift($@))))')')')
+
+define(x86_lookup_p,
+`ifelse(eval($#<3),1, `0',
+`ifelse(`$1',`$2', `1',
+`x86_lookup_p(`$1',shift(shift(shift($@))))')')')
+
+
+dnl Usage: x86_opcode_reg32(reg)
+dnl x86_opcode_reg32_p(reg)
+dnl
+dnl x86_opcode_reg32 expands to the standard 3 bit encoding for the given
+dnl 32-bit register, eg. `%ebp' turns into 5.
+dnl
+dnl x86_opcode_reg32_p expands to 1 if reg is a valid 32-bit register, or 0
+dnl if not.
+
+define(x86_opcode_reg32,
+m4_assert_numargs(1)
+`x86_lookup(`$1',x86_opcode_reg32_list)')
+
+define(x86_opcode_reg32_p,
+m4_assert_onearg()
+`x86_lookup_p(`$1',x86_opcode_reg32_list)')
+
+define(x86_opcode_reg32_list,
+``%eax',0,
+`%ecx',1,
+`%edx',2,
+`%ebx',3,
+`%esp',4,
+`%ebp',5,
+`%esi',6,
+`%edi',7')
+
+
+dnl Usage: x86_opcode_tttn(cond)
+dnl
+dnl Expand to the 4-bit "tttn" field value for the given x86 branch
+dnl condition (like `c', `ae', etc).
+
+define(x86_opcode_tttn,
+m4_assert_numargs(1)
+`x86_lookup(`$1',x86_opcode_ttn_list)')
+
+define(x86_opcode_tttn_list,
+``o', 0,
+`no', 1,
+`b', 2, `c', 2, `nae',2,
+`nb', 3, `nc', 3, `ae', 3,
+`e', 4, `z', 4,
+`ne', 5, `nz', 5,
+`be', 6, `na', 6,
+`nbe', 7, `a', 7,
+`s', 8,
+`ns', 9,
+`p', 10, `pe', 10, `npo',10,
+`np', 11, `npe',11, `po', 11,
+`l', 12, `nge',12,
+`nl', 13, `ge', 13,
+`le', 14, `ng', 14,
+`nle',15, `g', 15')
+
+
+dnl Usage: cmovCC(srcreg,dstreg)
+dnl
+dnl Generate a cmov instruction if the target supports cmov, or simulate it
+dnl with a conditional jump if not (the latter being meant only for
+dnl testing). For example,
+dnl
+dnl cmovz( %eax, %ebx)
+dnl
+dnl cmov instructions are generated using .byte sequences, since only
+dnl recent versions of gas know cmov.
+dnl
+dnl The source operand can only be a plain register. (m4 code implementing
+dnl full memory addressing modes exists, believe it or not, but isn't
+dnl currently needed and isn't included.)
+dnl
+dnl All the standard conditions are defined. Attempting to use one without
+dnl the macro parentheses, such as just "cmovbe %eax, %ebx", will provoke
+dnl an error. This ensures the necessary .byte sequences aren't
+dnl accidentally missed.
+
+dnl Called: define_cmov_many(cond,tttn,cond,tttn,...)
+define(define_cmov_many,
+`ifelse(m4_length(`$1'),0,,
+`define_cmov(`$1',`$2')define_cmov_many(shift(shift($@)))')')
+
+dnl Called: define_cmov(cond,tttn)
+define(define_cmov,
+m4_assert_numargs(2)
+`define(`cmov$1',
+m4_instruction_wrapper()
+m4_assert_numargs(2)
+`cmov_internal'(m4_doublequote($`'0),``$1',`$2'',dnl
+m4_doublequote($`'1),m4_doublequote($`'2)))')
+
+define_cmov_many(x86_opcode_tttn_list)
+
+
+dnl Called: cmov_internal(name,cond,tttn,src,dst)
+define(cmov_internal,
+m4_assert_numargs(5)
+`ifelse(cmov_available_p,1,
+`cmov_bytes_tttn(`$1',`$3',`$4',`$5')',
+`m4_warning(`warning, simulating cmov with jump, use for testing only
+')cmov_simulate(`$2',`$4',`$5')')')
+
+dnl Called: cmov_simulate(cond,src,dst)
+dnl If this is going to be used with memory operands for the source it will
+dnl need to be changed to do a fetch even if the condition is false, so as
+dnl to trigger exceptions the same way a real cmov does.
+define(cmov_simulate,
+m4_assert_numargs(3)
+ `j$1 1f C cmov$1 $2, $3
+ jmp 2f
+1: movl $2, $3
+2:')
+
+dnl Called: cmov_bytes_tttn(name,tttn,src,dst)
+define(cmov_bytes_tttn,
+m4_assert_numargs(4)
+`.byte dnl
+15, dnl
+eval(64+$2), dnl
+eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3')) dnl
+ C `$1 $3, $4'')
+
+
+dnl Usage: loop_or_decljnz label
+dnl
+dnl Generate either a "loop" instruction or a "decl %ecx / jnz", whichever
+dnl is better. "loop" is better on K6 and probably on 386, on other chips
+dnl separate decl/jnz is better.
+dnl
+dnl This macro is just for mpn/x86/divrem_1.asm and mpn/x86/mod_1.asm where
+dnl this loop_or_decljnz variation is enough to let the code be shared by
+dnl all chips.
+
+define(loop_or_decljnz,
+`ifelse(loop_is_better_p,1,
+ `loop',
+ `decl %ecx
+ jnz')')
+
+define(loop_is_better_p,
+`m4_ifdef_anyof_p(`HAVE_TARGET_CPU_k6',
+ `HAVE_TARGET_CPU_k62',
+ `HAVE_TARGET_CPU_k63',
+ `HAVE_TARGET_CPU_i386')')
+
+
+dnl Usage: Zdisp(inst,op,op,op)
+dnl
+dnl Generate explicit .byte sequences if necessary to force a byte-sized
+dnl zero displacement on an instruction. For example,
+dnl
+dnl Zdisp( movl, 0,(%esi), %eax)
+dnl
+dnl expands to
+dnl
+dnl .byte 139,70,0 C movl 0(%esi), %eax
+dnl
+dnl If the displacement given isn't 0, then normal assembler code is
+dnl generated. For example,
+dnl
+dnl Zdisp( movl, 4,(%esi), %eax)
+dnl
+dnl expands to
+dnl
+dnl movl 4(%esi), %eax
+dnl
+dnl This means a single Zdisp() form can be used with an expression for the
+dnl displacement, and .byte will be used only if necessary. The
+dnl displacement argument is eval()ed.
+dnl
+dnl Because there aren't many places a 0(reg) form is wanted, Zdisp is
+dnl implemented with a table of instructions and encodings. A new entry is
+dnl needed for any different operation or registers.
+
+define(Zdisp,
+`define(`Zdisp_found',0)dnl
+Zdisp_match( movl, %eax, 0,(%edi), `137,71,0', $@)`'dnl
+Zdisp_match( movl, %ebx, 0,(%edi), `137,95,0', $@)`'dnl
+Zdisp_match( movl, %esi, 0,(%edi), `137,119,0', $@)`'dnl
+Zdisp_match( movl, 0,(%ebx), %eax, `139,67,0', $@)`'dnl
+Zdisp_match( movl, 0,(%ebx), %esi, `139,115,0', $@)`'dnl
+Zdisp_match( movl, 0,(%esi), %eax, `139,70,0', $@)`'dnl
+Zdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00', $@)`'dnl
+Zdisp_match( addl, %ebx, 0,(%edi), `1,95,0', $@)`'dnl
+Zdisp_match( addl, %ecx, 0,(%edi), `1,79,0', $@)`'dnl
+Zdisp_match( addl, %esi, 0,(%edi), `1,119,0', $@)`'dnl
+Zdisp_match( subl, %ecx, 0,(%edi), `41,79,0', $@)`'dnl
+Zdisp_match( adcl, 0,(%edx), %esi, `19,114,0', $@)`'dnl
+Zdisp_match( sbbl, 0,(%edx), %esi, `27,114,0', $@)`'dnl
+Zdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl
+Zdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl
+Zdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl
+Zdisp_match( movq, 0,(%esi), %mm0, `15,111,70,0', $@)`'dnl
+Zdisp_match( movq, %mm0, 0,(%edi), `15,127,71,0', $@)`'dnl
+Zdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl
+Zdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl
+Zdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl
+Zdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl
+Zdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl
+Zdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl
+Zdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl
+Zdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl
+ifelse(Zdisp_found,0,
+`m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4
+')')')
+
+define(Zdisp_match,
+`ifelse(eval(m4_stringequal_p(`$1',`$6')
+ && m4_stringequal_p(`$2',0)
+ && m4_stringequal_p(`$3',`$8')
+ && m4_stringequal_p(`$4',`$9')),1,
+`define(`Zdisp_found',1)dnl
+ifelse(eval(`$7'),0,
+` .byte $5 C `$1 0$3, $4'',
+` $6 $7$8, $9')',
+
+`ifelse(eval(m4_stringequal_p(`$1',`$6')
+ && m4_stringequal_p(`$2',`$7')
+ && m4_stringequal_p(`$3',0)
+ && m4_stringequal_p(`$4',`$9')),1,
+`define(`Zdisp_found',1)dnl
+ifelse(eval(`$8'),0,
+` .byte $5 C `$1 $2, 0$4'',
+` $6 $7, $8$9')')')')
+
+
+dnl Usage: shldl(count,src,dst)
+dnl shrdl(count,src,dst)
+dnl shldw(count,src,dst)
+dnl shrdw(count,src,dst)
+dnl
+dnl Generate a double-shift instruction, possibly omitting a %cl count
+dnl parameter if that's what the assembler requires, as indicated by
+dnl WANT_SHLDL_CL in config.m4. For example,
+dnl
+dnl shldl( %cl, %eax, %ebx)
+dnl
+dnl turns into either
+dnl
+dnl shldl %cl, %eax, %ebx
+dnl or
+dnl shldl %eax, %ebx
+dnl
+dnl Immediate counts are always passed through unchanged. For example,
+dnl
+dnl shrdl( $2, %esi, %edi)
+dnl becomes
+dnl shrdl $2, %esi, %edi
+dnl
+dnl
+dnl If you forget to use the macro form "shldl( ...)" and instead write
+dnl just a plain "shldl ...", an error results. This ensures the necessary
+dnl variant treatment of %cl isn't accidentally bypassed.
+
+define(define_shd_instruction,
+`define($1,
+m4_instruction_wrapper()
+m4_assert_numargs(3)
+`shd_instruction'(m4_doublequote($`'0),m4_doublequote($`'1),dnl
+m4_doublequote($`'2),m4_doublequote($`'3)))')
+
+dnl Effectively: define(shldl,`shd_instruction(`$0',`$1',`$2',`$3')') etc
+define_shd_instruction(shldl)
+define_shd_instruction(shrdl)
+define_shd_instruction(shldw)
+define_shd_instruction(shrdw)
+
+dnl Called: shd_instruction(op,count,src,dst)
+define(shd_instruction,
+m4_assert_numargs(4)
+m4_assert_defined(`WANT_SHLDL_CL')
+`ifelse(eval(m4_stringequal_p(`$2',`%cl') && !WANT_SHLDL_CL),1,
+``$1' `$3', `$4'',
+``$1' `$2', `$3', `$4'')')
+
+
+dnl Usage: ASSERT(cond, instructions)
+dnl
+dnl If WANT_ASSERT is 1, output the given instructions and expect the given
+dnl flags condition to then be satisfied. For example,
+dnl
+dnl ASSERT(ne, `cmpl %eax, %ebx')
+dnl
+dnl The instructions can be omitted to just assert a flags condition with
+dnl no extra calculation. For example,
+dnl
+dnl ASSERT(nc)
+dnl
+dnl When `instructions' is not empty, a pushf/popf is added to preserve the
+dnl flags, but the instructions themselves must preserve any registers that
+dnl matter. FRAME is adjusted for the push and pop, so the instructions
+dnl given can use defframe() stack variables.
+
+define(ASSERT,
+m4_assert_numargs_range(1,2)
+`ifelse(WANT_ASSERT,1,
+ `C ASSERT
+ifelse(`$2',,,` pushf ifdef(`FRAME',`FRAME_pushl()')')
+ $2
+ j`$1' 1f
+ ud2 C assertion failed
+1:
+ifelse(`$2',,,` popf ifdef(`FRAME',`FRAME_popl()')')
+')')
+
+
+dnl Usage: movl_text_address(label,register)
+dnl
+dnl Get the address of a text segment label, using either a plain movl or a
+dnl position-independent calculation, as necessary. For example,
+dnl
+dnl movl_code_address(L(foo),%eax)
+dnl
+dnl This macro is only meant for use in ASSERT()s or when testing, since
+dnl the PIC sequence it generates will want to be done with a ret balancing
+dnl the call on CPUs with return address branch predition.
+dnl
+dnl The addl generated here has a backward reference to 1b, and so won't
+dnl suffer from the two forwards references bug in old gas (described in
+dnl mpn/x86/README.family).
+
+define(movl_text_address,
+`ifdef(`PIC',
+ `call 1f
+1: popl $2 C %eip
+ addl `$'$1-1b, $2',
+ `movl `$'$1, $2')')
+
+
+divert`'dnl
diff --git a/rts/gmp/mpn/z8000/add_n.s b/rts/gmp/mpn/z8000/add_n.s
new file mode 100644
index 0000000000..3a136107fe
--- /dev/null
+++ b/rts/gmp/mpn/z8000/add_n.s
@@ -0,0 +1,53 @@
+! Z8000 __gmpn_add_n -- Add two limb vectors of equal, non-zero length.
+
+! Copyright (C) 1993, 1994, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r7
+! s1_ptr r6
+! s2_ptr r5
+! size r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp. We'd
+! then add 2x the number of words written to r7...
+
+ unseg
+ .text
+ even
+ global ___gmpn_add_n
+___gmpn_add_n:
+ pop r0,@r6
+ pop r1,@r5
+ add r0,r1
+ ld @r7,r0
+ dec r4
+ jr eq,Lend
+Loop: pop r0,@r6
+ pop r1,@r5
+ adc r0,r1
+ inc r7,#2
+ ld @r7,r0
+ dec r4
+ jr ne,Loop
+Lend: ld r2,r4 ! use 0 already in r4
+ adc r2,r2
+ ret t
diff --git a/rts/gmp/mpn/z8000/gmp-mparam.h b/rts/gmp/mpn/z8000/gmp-mparam.h
new file mode 100644
index 0000000000..4216df673c
--- /dev/null
+++ b/rts/gmp/mpn/z8000/gmp-mparam.h
@@ -0,0 +1,27 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 16
+#define BYTES_PER_MP_LIMB 2
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 16
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
diff --git a/rts/gmp/mpn/z8000/mul_1.s b/rts/gmp/mpn/z8000/mul_1.s
new file mode 100644
index 0000000000..20fadd340a
--- /dev/null
+++ b/rts/gmp/mpn/z8000/mul_1.s
@@ -0,0 +1,68 @@
+! Z8000 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+! the result in a second limb vector.
+
+! Copyright (C) 1993, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r7
+! s1_ptr r6
+! size r5
+! s2_limb r4
+
+ unseg
+ .text
+ even
+ global ___gmpn_mul_1
+___gmpn_mul_1:
+ sub r2,r2 ! zero carry limb
+ and r4,r4
+ jr mi,Lneg
+
+Lpos: pop r1,@r6
+ ld r9,r1
+ mult rr8,r4
+ and r1,r1 ! shift msb of loaded limb into cy
+ jr mi,Lp ! branch if loaded limb's msb is set
+ add r8,r4 ! hi_limb += sign_comp2
+Lp: add r9,r2 ! lo_limb += cy_limb
+ xor r2,r2
+ adc r2,r8
+ ld @r7,r9
+ inc r7,#2
+ dec r5
+ jr ne,Lpos
+ ret t
+
+Lneg: pop r1,@r6
+ ld r9,r1
+ mult rr8,r4
+ add r8,r1 ! hi_limb += sign_comp1
+ and r1,r1
+ jr mi,Ln
+ add r8,r4 ! hi_limb += sign_comp2
+Ln: add r9,r2 ! lo_limb += cy_limb
+ xor r2,r2
+ adc r2,r8
+ ld @r7,r9
+ inc r7,#2
+ dec r5
+ jr ne,Lneg
+ ret t
diff --git a/rts/gmp/mpn/z8000/sub_n.s b/rts/gmp/mpn/z8000/sub_n.s
new file mode 100644
index 0000000000..bd9a7ad409
--- /dev/null
+++ b/rts/gmp/mpn/z8000/sub_n.s
@@ -0,0 +1,54 @@
+! Z8000 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! store difference in a third limb vector.
+
+! Copyright (C) 1993, 1994, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r7
+! s1_ptr r6
+! s2_ptr r5
+! size r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp. We'd
+! then add 2x the number of words written to r7...
+
+ unseg
+ .text
+ even
+ global ___gmpn_sub_n
+___gmpn_sub_n:
+ pop r0,@r6
+ pop r1,@r5
+ sub r0,r1
+ ld @r7,r0
+ dec r4
+ jr eq,Lend
+Loop: pop r0,@r6
+ pop r1,@r5
+ sbc r0,r1
+ inc r7,#2
+ ld @r7,r0
+ dec r4
+ jr ne,Loop
+Lend: ld r2,r4 ! use 0 already in r4
+ adc r2,r2
+ ret t
diff --git a/rts/gmp/mpn/z8000x/add_n.s b/rts/gmp/mpn/z8000x/add_n.s
new file mode 100644
index 0000000000..7f130785c5
--- /dev/null
+++ b/rts/gmp/mpn/z8000x/add_n.s
@@ -0,0 +1,56 @@
+! Z8000 (32 bit limb version) __gmpn_add_n -- Add two limb vectors of equal,
+! non-zero length.
+
+! Copyright (C) 1993, 1994, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r7
+! s1_ptr r6
+! s2_ptr r5
+! size r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp. We'd
+! then add 2x the number of words written to r7...
+
+ segm
+ .text
+ even
+ global ___gmpn_add_n
+___gmpn_add_n:
+ popl rr0,@r6
+ popl rr8,@r5
+ addl rr0,rr8
+ ldl @r7,rr0
+ dec r4
+ jr eq,Lend
+Loop: popl rr0,@r6
+ popl rr8,@r5
+ adc r1,r9
+ adc r0,r8
+ inc r7,#4
+ ldl @r7,rr0
+ dec r4
+ jr ne,Loop
+Lend: ld r2,r4 ! use 0 already in r4
+ ld r3,r4
+ adc r2,r2
+ ret t
diff --git a/rts/gmp/mpn/z8000x/sub_n.s b/rts/gmp/mpn/z8000x/sub_n.s
new file mode 100644
index 0000000000..f416d1d6eb
--- /dev/null
+++ b/rts/gmp/mpn/z8000x/sub_n.s
@@ -0,0 +1,56 @@
+! Z8000 (32 bit limb version) __gmpn_sub_n -- Subtract two limb vectors of the
+! same length > 0 and store difference in a third limb vector.
+
+! Copyright (C) 1993, 1994, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 2.1 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr r7
+! s1_ptr r6
+! s2_ptr r5
+! size r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp. We'd
+! then add 2x the number of words written to r7...
+
+ segm
+ .text
+ even
+ global ___gmpn_sub_n
+___gmpn_sub_n:
+ popl rr0,@r6
+ popl rr8,@r5
+ subl rr0,rr8
+ ldl @r7,rr0
+ dec r4
+ jr eq,Lend
+Loop: popl rr0,@r6
+ popl rr8,@r5
+ sbc r1,r9
+ sbc r0,r8
+ inc r7,#4
+ ldl @r7,rr0
+ dec r4
+ jr ne,Loop
+Lend: ld r2,r4 ! use 0 already in r4
+ ld r3,r4
+ adc r2,r2
+ ret t
diff --git a/rts/gmp/mpz/Makefile.am b/rts/gmp/mpz/Makefile.am
new file mode 100644
index 0000000000..cd6fec4e21
--- /dev/null
+++ b/rts/gmp/mpz/Makefile.am
@@ -0,0 +1,58 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright (C) 1996, 1998, 1999, 2000 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+AUTOMAKE_OPTIONS = gnu no-dependencies
+
+SUBDIRS = tests
+
+INCLUDES = -I$(top_srcdir) -DOPERATION_$*
+
+noinst_LTLIBRARIES = libmpz.la
+libmpz_la_SOURCES = \
+ abs.c add.c add_ui.c addmul_ui.c and.c array_init.c \
+ bin_ui.c bin_uiui.c cdiv_q.c \
+ cdiv_q_ui.c cdiv_qr.c cdiv_qr_ui.c cdiv_r.c cdiv_r_ui.c cdiv_ui.c \
+ clear.c clrbit.c cmp.c cmp_si.c cmp_ui.c cmpabs.c cmpabs_ui.c com.c \
+ divexact.c dump.c fac_ui.c fdiv_q.c fdiv_q_2exp.c fdiv_q_ui.c \
+ fdiv_qr.c fdiv_qr_ui.c fdiv_r.c fdiv_r_2exp.c fdiv_r_ui.c fdiv_ui.c \
+ fib_ui.c fits_sint_p.c fits_slong_p.c fits_sshort_p.c fits_uint_p.c \
+ fits_ulong_p.c fits_ushort_p.c gcd.c gcd_ui.c gcdext.c get_d.c get_si.c \
+ get_str.c get_ui.c getlimbn.c hamdist.c init.c inp_raw.c inp_str.c \
+ invert.c ior.c iset.c iset_d.c iset_si.c iset_str.c iset_ui.c \
+ jacobi.c kronsz.c kronuz.c kronzs.c kronzu.c \
+ lcm.c legendre.c mod.c mul.c mul_2exp.c neg.c nextprime.c \
+ out_raw.c out_str.c perfpow.c perfsqr.c popcount.c pow_ui.c powm.c \
+ powm_ui.c pprime_p.c random.c random2.c realloc.c remove.c root.c rrandomb.c \
+ scan0.c scan1.c set.c set_d.c set_f.c set_q.c set_si.c set_str.c \
+ set_ui.c setbit.c size.c sizeinbase.c sqrt.c sqrtrem.c sub.c \
+ sub_ui.c swap.c tdiv_ui.c tdiv_q.c tdiv_q_2exp.c tdiv_q_ui.c tdiv_qr.c \
+ tdiv_qr_ui.c tdiv_r.c tdiv_r_2exp.c tdiv_r_ui.c tstbit.c ui_pow_ui.c \
+ urandomb.c urandomm.c xor.c
+
+EXTRA_DIST = mul_siui.c
+nodist_libmpz_la_SOURCES = mul_si.c mul_ui.c
+CLEANFILES = $(nodist_libmpz_la_SOURCES)
+
+mul_si.c: $(srcdir)/mul_siui.c
+ cp $(srcdir)/mul_siui.c mul_si.c
+mul_ui.c: $(srcdir)/mul_siui.c
+ cp $(srcdir)/mul_siui.c mul_ui.c
diff --git a/rts/gmp/mpz/Makefile.in b/rts/gmp/mpz/Makefile.in
new file mode 100644
index 0000000000..e0f2cdc133
--- /dev/null
+++ b/rts/gmp/mpz/Makefile.in
@@ -0,0 +1,457 @@
+# Makefile.in generated automatically by automake 1.4a from Makefile.am
+
+# Copyright (C) 1994, 1995-8, 1999 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+
+DESTDIR =
+
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+
+top_builddir = ..
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_FLAG =
+transform = @program_transform_name@
+
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+
+@SET_MAKE@
+build_alias = @build_alias@
+build_triplet = @build@
+host_alias = @host_alias@
+host_triplet = @host@
+target_alias = @target_alias@
+target_triplet = @target@
+AMDEP = @AMDEP@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CPP = @CPP@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+EXEEXT = @EXEEXT@
+LIBTOOL = @LIBTOOL@
+LN_S = @LN_S@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+RANLIB = @RANLIB@
+SPEED_CYCLECOUNTER_OBJS = @SPEED_CYCLECOUNTER_OBJS@
+STRIP = @STRIP@
+U = @U@
+VERSION = @VERSION@
+gmp_srclinks = @gmp_srclinks@
+install_sh = @install_sh@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+
+# Copyright (C) 1996, 1998, 1999, 2000 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+AUTOMAKE_OPTIONS = gnu no-dependencies
+
+SUBDIRS =
+
+INCLUDES = -I$(top_srcdir) -DOPERATION_$*
+
+noinst_LTLIBRARIES = libmpz.la
+libmpz_la_SOURCES = \
+ abs.c add.c add_ui.c addmul_ui.c and.c array_init.c \
+ bin_ui.c bin_uiui.c cdiv_q.c \
+ cdiv_q_ui.c cdiv_qr.c cdiv_qr_ui.c cdiv_r.c cdiv_r_ui.c cdiv_ui.c \
+ clear.c clrbit.c cmp.c cmp_si.c cmp_ui.c cmpabs.c cmpabs_ui.c com.c \
+ divexact.c dump.c fac_ui.c fdiv_q.c fdiv_q_2exp.c fdiv_q_ui.c \
+ fdiv_qr.c fdiv_qr_ui.c fdiv_r.c fdiv_r_2exp.c fdiv_r_ui.c fdiv_ui.c \
+ fib_ui.c fits_sint_p.c fits_slong_p.c fits_sshort_p.c fits_uint_p.c \
+ fits_ulong_p.c fits_ushort_p.c gcd.c gcd_ui.c gcdext.c get_d.c get_si.c \
+ get_str.c get_ui.c getlimbn.c hamdist.c init.c inp_raw.c inp_str.c \
+ invert.c ior.c iset.c iset_d.c iset_si.c iset_str.c iset_ui.c \
+ jacobi.c kronsz.c kronuz.c kronzs.c kronzu.c \
+ lcm.c legendre.c mod.c mul.c mul_2exp.c neg.c nextprime.c \
+ out_raw.c out_str.c perfpow.c perfsqr.c popcount.c pow_ui.c powm.c \
+ powm_ui.c pprime_p.c random.c random2.c realloc.c remove.c root.c rrandomb.c \
+ scan0.c scan1.c set.c set_d.c set_f.c set_q.c set_si.c set_str.c \
+ set_ui.c setbit.c size.c sizeinbase.c sqrt.c sqrtrem.c sub.c \
+ sub_ui.c swap.c tdiv_ui.c tdiv_q.c tdiv_q_2exp.c tdiv_q_ui.c tdiv_qr.c \
+ tdiv_qr_ui.c tdiv_r.c tdiv_r_2exp.c tdiv_r_ui.c tstbit.c ui_pow_ui.c \
+ urandomb.c urandomm.c xor.c
+
+
+EXTRA_DIST = mul_siui.c
+nodist_libmpz_la_SOURCES = mul_si.c mul_ui.c
+CLEANFILES = $(nodist_libmpz_la_SOURCES)
+subdir = mpz
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = ../config.h
+CONFIG_CLEAN_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+
+
+DEFS = @DEFS@ -I. -I$(srcdir) -I..
+CPPFLAGS = @CPPFLAGS@
+LDFLAGS = @LDFLAGS@
+LIBS = @LIBS@
+libmpz_la_LDFLAGS =
+libmpz_la_LIBADD =
+am_libmpz_la_OBJECTS = abs.lo add.lo add_ui.lo addmul_ui.lo and.lo \
+array_init.lo bin_ui.lo bin_uiui.lo cdiv_q.lo cdiv_q_ui.lo cdiv_qr.lo \
+cdiv_qr_ui.lo cdiv_r.lo cdiv_r_ui.lo cdiv_ui.lo clear.lo clrbit.lo \
+cmp.lo cmp_si.lo cmp_ui.lo cmpabs.lo cmpabs_ui.lo com.lo divexact.lo \
+dump.lo fac_ui.lo fdiv_q.lo fdiv_q_2exp.lo fdiv_q_ui.lo fdiv_qr.lo \
+fdiv_qr_ui.lo fdiv_r.lo fdiv_r_2exp.lo fdiv_r_ui.lo fdiv_ui.lo \
+fib_ui.lo fits_sint_p.lo fits_slong_p.lo fits_sshort_p.lo \
+fits_uint_p.lo fits_ulong_p.lo fits_ushort_p.lo gcd.lo gcd_ui.lo \
+gcdext.lo get_d.lo get_si.lo get_str.lo get_ui.lo getlimbn.lo \
+hamdist.lo init.lo inp_raw.lo inp_str.lo invert.lo ior.lo iset.lo \
+iset_d.lo iset_si.lo iset_str.lo iset_ui.lo jacobi.lo kronsz.lo \
+kronuz.lo kronzs.lo kronzu.lo lcm.lo legendre.lo mod.lo mul.lo \
+mul_2exp.lo neg.lo nextprime.lo out_raw.lo out_str.lo perfpow.lo \
+perfsqr.lo popcount.lo pow_ui.lo powm.lo powm_ui.lo pprime_p.lo \
+random.lo random2.lo realloc.lo remove.lo root.lo rrandomb.lo scan0.lo \
+scan1.lo set.lo set_d.lo set_f.lo set_q.lo set_si.lo set_str.lo \
+set_ui.lo setbit.lo size.lo sizeinbase.lo sqrt.lo sqrtrem.lo sub.lo \
+sub_ui.lo swap.lo tdiv_ui.lo tdiv_q.lo tdiv_q_2exp.lo tdiv_q_ui.lo \
+tdiv_qr.lo tdiv_qr_ui.lo tdiv_r.lo tdiv_r_2exp.lo tdiv_r_ui.lo \
+tstbit.lo ui_pow_ui.lo urandomb.lo urandomm.lo xor.lo
+nodist_libmpz_la_OBJECTS = mul_si.lo mul_ui.lo
+libmpz_la_OBJECTS = $(am_libmpz_la_OBJECTS) $(nodist_libmpz_la_OBJECTS)
+COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CFLAGS = @CFLAGS@
+CCLD = $(CC)
+LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+DIST_SOURCES = $(libmpz_la_SOURCES)
+DIST_COMMON = README Makefile.am Makefile.in
+
+
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+GZIP_ENV = --best
+depcomp =
+SOURCES = $(libmpz_la_SOURCES) $(nodist_libmpz_la_SOURCES)
+OBJECTS = $(am_libmpz_la_OBJECTS) $(nodist_libmpz_la_OBJECTS)
+
+all: all-redirect
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4)
+ cd $(top_srcdir) && $(AUTOMAKE) --gnu mpz/Makefile
+
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) \
+ && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+
+mostlyclean-noinstLTLIBRARIES:
+
+clean-noinstLTLIBRARIES:
+ -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+
+distclean-noinstLTLIBRARIES:
+
+maintainer-clean-noinstLTLIBRARIES:
+
+mostlyclean-compile:
+ -rm -f *.o core *.core
+ -rm -f *.$(OBJEXT)
+
+clean-compile:
+
+distclean-compile:
+ -rm -f *.tab.c
+
+maintainer-clean-compile:
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+distclean-libtool:
+
+maintainer-clean-libtool:
+
+libmpz.la: $(libmpz_la_OBJECTS) $(libmpz_la_DEPENDENCIES)
+ $(LINK) $(libmpz_la_LDFLAGS) $(libmpz_la_OBJECTS) $(libmpz_la_LIBADD) $(LIBS)
+.c.o:
+ $(COMPILE) -c $<
+.c.obj:
+ $(COMPILE) -c `cygpath -w $<`
+.c.lo:
+ $(LTCOMPILE) -c -o $@ $<
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+# (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+
+all-recursive install-data-recursive install-exec-recursive \
+installdirs-recursive install-recursive uninstall-recursive \
+check-recursive installcheck-recursive info-recursive dvi-recursive:
+ @set fnord $(MAKEFLAGS); amf=$$2; \
+ dot_seen=no; \
+ target=`echo $@ | sed s/-recursive//`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ dot_seen=yes; \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
+ done; \
+ if test "$$dot_seen" = "no"; then \
+ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+ fi; test -z "$$fail"
+
+mostlyclean-recursive clean-recursive distclean-recursive \
+maintainer-clean-recursive:
+ @set fnord $(MAKEFLAGS); amf=$$2; \
+ dot_seen=no; \
+ rev=''; list='$(SUBDIRS)'; for subdir in $$list; do \
+ rev="$$subdir $$rev"; \
+ if test "$$subdir" = "."; then dot_seen=yes; else :; fi; \
+ done; \
+ test "$$dot_seen" = "no" && rev=". $$rev"; \
+ target=`echo $@ | sed s/-recursive//`; \
+ for subdir in $$rev; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
+ done && test -z "$$fail"
+tags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+ done
+
+tags: TAGS
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ mkid -f$$here/ID $$unique $(LISP)
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test -f $$subdir/TAGS && tags="$$tags -i $$here/$$subdir/TAGS"; \
+ fi; \
+ done; \
+ list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \
+ || etags $(ETAGS_ARGS) $$tags $$unique $(LISP)
+
+mostlyclean-tags:
+
+clean-tags:
+
+distclean-tags:
+ -rm -f TAGS ID
+
+maintainer-clean-tags:
+
+distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir)
+
+distdir: $(DISTFILES)
+ @for file in $(DISTFILES); do \
+ d=$(srcdir); \
+ if test -d $$d/$$file; then \
+ cp -pR $$d/$$file $(distdir); \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file || :; \
+ fi; \
+ done
+ for subdir in $(SUBDIRS); do \
+ if test "$$subdir" = .; then :; else \
+ test -d $(distdir)/$$subdir \
+ || mkdir $(distdir)/$$subdir \
+ || exit 1; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir=../$(top_distdir) distdir=../$(distdir)/$$subdir distdir) \
+ || exit 1; \
+ fi; \
+ done
+info-am:
+info: info-recursive
+dvi-am:
+dvi: dvi-recursive
+check-am: all-am
+check: check-recursive
+installcheck-am:
+installcheck: installcheck-recursive
+install-exec-am:
+install-exec: install-exec-recursive
+
+install-data-am:
+install-data: install-data-recursive
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+install: install-recursive
+uninstall-am:
+uninstall: uninstall-recursive
+all-am: Makefile $(LTLIBRARIES)
+all-redirect: all-recursive
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_STRIP_FLAG=-s install
+installdirs: installdirs-recursive
+installdirs-am:
+
+
+mostlyclean-generic:
+
+clean-generic:
+ -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+ -rm -f Makefile $(CONFIG_CLEAN_FILES)
+ -rm -f config.cache config.log stamp-h stamp-h[0-9]*
+
+maintainer-clean-generic:
+ -rm -f Makefile.in
+mostlyclean-am: mostlyclean-noinstLTLIBRARIES mostlyclean-compile \
+ mostlyclean-libtool mostlyclean-tags \
+ mostlyclean-generic
+
+mostlyclean: mostlyclean-recursive
+
+clean-am: clean-noinstLTLIBRARIES clean-compile clean-libtool \
+ clean-tags clean-generic mostlyclean-am
+
+clean: clean-recursive
+
+distclean-am: distclean-noinstLTLIBRARIES distclean-compile \
+ distclean-libtool distclean-tags distclean-generic \
+ clean-am
+ -rm -f libtool
+
+distclean: distclean-recursive
+
+maintainer-clean-am: maintainer-clean-noinstLTLIBRARIES \
+ maintainer-clean-compile maintainer-clean-libtool \
+ maintainer-clean-tags maintainer-clean-generic \
+ distclean-am
+ @echo "This command is intended for maintainers to use;"
+ @echo "it deletes files that may require special tools to rebuild."
+
+maintainer-clean: maintainer-clean-recursive
+
+.PHONY: mostlyclean-noinstLTLIBRARIES distclean-noinstLTLIBRARIES \
+clean-noinstLTLIBRARIES maintainer-clean-noinstLTLIBRARIES \
+mostlyclean-compile distclean-compile clean-compile \
+maintainer-clean-compile mostlyclean-libtool distclean-libtool \
+clean-libtool maintainer-clean-libtool install-recursive \
+uninstall-recursive install-data-recursive uninstall-data-recursive \
+install-exec-recursive uninstall-exec-recursive installdirs-recursive \
+uninstalldirs-recursive all-recursive check-recursive \
+installcheck-recursive info-recursive dvi-recursive \
+mostlyclean-recursive distclean-recursive clean-recursive \
+maintainer-clean-recursive tags tags-recursive mostlyclean-tags \
+distclean-tags clean-tags maintainer-clean-tags distdir info-am info \
+dvi-am dvi check check-am installcheck-am installcheck install-exec-am \
+install-exec install-data-am install-data install-am install \
+uninstall-am uninstall all-redirect all-am all install-strip \
+installdirs-am installdirs mostlyclean-generic distclean-generic \
+clean-generic maintainer-clean-generic clean mostlyclean distclean \
+maintainer-clean
+
+
+mul_si.c: $(srcdir)/mul_siui.c
+ cp $(srcdir)/mul_siui.c mul_si.c
+mul_ui.c: $(srcdir)/mul_siui.c
+ cp $(srcdir)/mul_siui.c mul_ui.c
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/rts/gmp/mpz/README b/rts/gmp/mpz/README
new file mode 100644
index 0000000000..06b481d770
--- /dev/null
+++ b/rts/gmp/mpz/README
@@ -0,0 +1,23 @@
+This directory contains functions for GMP's integer function layer.
+
+In this version of GMP, integers are represented like in the figure below.
+(Please note that the format might change between every version, and that
+depending on the internal format in any way is a bad idea.)
+
+ most least
+significant significant
+ limb limb
+
+ _mp_d
+ /
+ /
+ \/
+ ____ ____ ____ ____ ____
+ |____|____|____|____|____|
+
+ <------- _mp_size ------->
+
+
+The most significant limb will be non-zero. The _mp_size field's sign
+reflects the sign of the number. Its absolute value is the count of limbs
+in the number.
diff --git a/rts/gmp/mpz/abs.c b/rts/gmp/mpz/abs.c
new file mode 100644
index 0000000000..0b5eab1ce6
--- /dev/null
+++ b/rts/gmp/mpz/abs.c
@@ -0,0 +1,51 @@
+/* mpz_abs(dst, src) -- Assign the absolute value of SRC to DST.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_abs (mpz_ptr w, mpz_srcptr u)
+#else
+mpz_abs (w, u)
+ mpz_ptr w;
+ mpz_srcptr u;
+#endif
+{
+ mp_ptr wp, up;
+ mp_size_t size;
+
+ size = ABS (u->_mp_size);
+
+ if (u != w)
+ {
+ if (w->_mp_alloc < size)
+ _mpz_realloc (w, size);
+
+ wp = w->_mp_d;
+ up = u->_mp_d;
+
+ MPN_COPY (wp, up, size);
+ }
+
+ w->_mp_size = size;
+}
diff --git a/rts/gmp/mpz/add.c b/rts/gmp/mpz/add.c
new file mode 100644
index 0000000000..a22c3778fb
--- /dev/null
+++ b/rts/gmp/mpz/add.c
@@ -0,0 +1,123 @@
+/* mpz_add -- Add two integers.
+
+Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+#ifndef BERKELEY_MP
+void
+#if __STDC__
+mpz_add (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
+#else
+mpz_add (w, u, v)
+ mpz_ptr w;
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+#else /* BERKELEY_MP */
+void
+#if __STDC__
+madd (mpz_srcptr u, mpz_srcptr v, mpz_ptr w)
+#else
+madd (u, v, w)
+ mpz_srcptr u;
+ mpz_srcptr v;
+ mpz_ptr w;
+#endif
+#endif /* BERKELEY_MP */
+{
+ mp_srcptr up, vp;
+ mp_ptr wp;
+ mp_size_t usize, vsize, wsize;
+ mp_size_t abs_usize;
+ mp_size_t abs_vsize;
+
+ usize = u->_mp_size;
+ vsize = v->_mp_size;
+ abs_usize = ABS (usize);
+ abs_vsize = ABS (vsize);
+
+ if (abs_usize < abs_vsize)
+ {
+ /* Swap U and V. */
+ MPZ_SRCPTR_SWAP (u, v);
+ MP_SIZE_T_SWAP (usize, vsize);
+ MP_SIZE_T_SWAP (abs_usize, abs_vsize);
+ }
+
+ /* True: ABS_USIZE >= ABS_VSIZE. */
+
+ /* If not space for w (and possible carry), increase space. */
+ wsize = abs_usize + 1;
+ if (w->_mp_alloc < wsize)
+ _mpz_realloc (w, wsize);
+
+ /* These must be after realloc (u or v may be the same as w). */
+ up = u->_mp_d;
+ vp = v->_mp_d;
+ wp = w->_mp_d;
+
+ if ((usize ^ vsize) < 0)
+ {
+ /* U and V have different sign. Need to compare them to determine
+ which operand to subtract from which. */
+
+ /* This test is right since ABS_USIZE >= ABS_VSIZE. */
+ if (abs_usize != abs_vsize)
+ {
+ mpn_sub (wp, up, abs_usize, vp, abs_vsize);
+ wsize = abs_usize;
+ MPN_NORMALIZE (wp, wsize);
+ if (usize < 0)
+ wsize = -wsize;
+ }
+ else if (mpn_cmp (up, vp, abs_usize) < 0)
+ {
+ mpn_sub_n (wp, vp, up, abs_usize);
+ wsize = abs_usize;
+ MPN_NORMALIZE (wp, wsize);
+ if (usize >= 0)
+ wsize = -wsize;
+ }
+ else
+ {
+ mpn_sub_n (wp, up, vp, abs_usize);
+ wsize = abs_usize;
+ MPN_NORMALIZE (wp, wsize);
+ if (usize < 0)
+ wsize = -wsize;
+ }
+ }
+ else
+ {
+ /* U and V have same sign. Add them. */
+ mp_limb_t cy_limb = mpn_add (wp, up, abs_usize, vp, abs_vsize);
+ wp[abs_usize] = cy_limb;
+ wsize = abs_usize + cy_limb;
+ if (usize < 0)
+ wsize = -wsize;
+ }
+
+ w->_mp_size = wsize;
+}
diff --git a/rts/gmp/mpz/add_ui.c b/rts/gmp/mpz/add_ui.c
new file mode 100644
index 0000000000..28dbd71f45
--- /dev/null
+++ b/rts/gmp/mpz/add_ui.c
@@ -0,0 +1,84 @@
+/* mpz_add_ui -- Add an mpz_t and an unsigned one-word integer.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_add_ui (mpz_ptr w, mpz_srcptr u, unsigned long int v)
+#else
+mpz_add_ui (w, u, v)
+ mpz_ptr w;
+ mpz_srcptr u;
+ unsigned long int v;
+#endif
+{
+ mp_srcptr up;
+ mp_ptr wp;
+ mp_size_t usize, wsize;
+ mp_size_t abs_usize;
+
+ usize = u->_mp_size;
+ abs_usize = ABS (usize);
+
+ /* If not space for W (and possible carry), increase space. */
+ wsize = abs_usize + 1;
+ if (w->_mp_alloc < wsize)
+ _mpz_realloc (w, wsize);
+
+ /* These must be after realloc (U may be the same as W). */
+ up = u->_mp_d;
+ wp = w->_mp_d;
+
+ if (abs_usize == 0)
+ {
+ wp[0] = v;
+ w->_mp_size = v != 0;
+ return;
+ }
+
+ if (usize >= 0)
+ {
+ mp_limb_t cy;
+ cy = mpn_add_1 (wp, up, abs_usize, (mp_limb_t) v);
+ wp[abs_usize] = cy;
+ wsize = abs_usize + cy;
+ }
+ else
+ {
+ /* The signs are different. Need exact comparison to determine
+ which operand to subtract from which. */
+ if (abs_usize == 1 && up[0] < v)
+ {
+ wp[0] = v - up[0];
+ wsize = 1;
+ }
+ else
+ {
+ mpn_sub_1 (wp, up, abs_usize, (mp_limb_t) v);
+ /* Size can decrease with at most one limb. */
+ wsize = -(abs_usize - (wp[abs_usize - 1] == 0));
+ }
+ }
+
+ w->_mp_size = wsize;
+}
diff --git a/rts/gmp/mpz/addmul_ui.c b/rts/gmp/mpz/addmul_ui.c
new file mode 100644
index 0000000000..7b38d3624d
--- /dev/null
+++ b/rts/gmp/mpz/addmul_ui.c
@@ -0,0 +1,214 @@
+/* mpz_addmul_ui(prodsum, multiplier, small_multiplicand) --
+ Add MULTIPLICATOR times SMALL_MULTIPLICAND to PRODSUM.
+
+Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+static mp_limb_t mpn_neg1 _PROTO ((mp_ptr, mp_size_t));
+
+#if 0
+#undef MPN_NORMALIZE
+#define MPN_NORMALIZE(DST, NLIMBS) \
+ do { \
+ while (--(NLIMBS) >= 0 && (DST)[NLIMBS] == 0) \
+ ; \
+ (NLIMBS)++; \
+ } while (0)
+#undef MPN_NORMALIZE_NOT_ZERO
+#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
+ do { \
+ while ((DST)[--(NLIMBS)] == 0) \
+ ; \
+ (NLIMBS)++; \
+ } while (0)
+#endif
+
+void
+#if __STDC__
+mpz_addmul_ui (mpz_ptr rz, mpz_srcptr az, unsigned long int bu)
+#else
+mpz_addmul_ui (rz, az, bu)
+ mpz_ptr rz;
+ mpz_srcptr az;
+ unsigned long int bu;
+#endif
+{
+ mp_size_t rn, an;
+ mp_ptr rp, ap;
+
+ an = SIZ (az);
+
+ /* If either multiplier is zero, result is unaffected. */
+ if (bu == 0 || an == 0)
+ return;
+
+ rn = SIZ (rz);
+
+ if (rn == 0)
+ {
+ mp_limb_t cy;
+
+ an = ABS (an);
+ if (ALLOC (rz) <= an)
+ _mpz_realloc (rz, an + 1);
+ rp = PTR (rz);
+ ap = PTR (az);
+ cy = mpn_mul_1 (rp, ap, an, (mp_limb_t) bu);
+ rp[an] = cy;
+ an += cy != 0;
+ SIZ (rz) = SIZ (az) >= 0 ? an : -an;
+ return;
+ }
+
+ if ((an ^ rn) >= 0)
+ {
+ /* Sign of operands are the same--really add. */
+ an = ABS (an);
+ rn = ABS (rn);
+ if (rn > an)
+ {
+ mp_limb_t cy;
+ if (ALLOC (rz) <= rn)
+ _mpz_realloc (rz, rn + 1);
+ rp = PTR (rz);
+ ap = PTR (az);
+ cy = mpn_addmul_1 (rp, ap, an, (mp_limb_t) bu);
+ cy = mpn_add_1 (rp + an, rp + an, rn - an, cy);
+ rp[rn] = cy;
+ rn += cy != 0;
+ SIZ (rz) = SIZ (rz) >= 0 ? rn : -rn;
+ return;
+ }
+ else
+ {
+ mp_limb_t cy;
+ if (ALLOC (rz) <= an)
+ _mpz_realloc (rz, an + 1);
+ rp = PTR (rz);
+ ap = PTR (az);
+ cy = mpn_addmul_1 (rp, ap, rn, (mp_limb_t) bu);
+ if (an != rn)
+ {
+ mp_limb_t cy2;
+ cy2 = mpn_mul_1 (rp + rn, ap + rn, an - rn, (mp_limb_t) bu);
+ cy = cy2 + mpn_add_1 (rp + rn, rp + rn, an - rn, cy);
+ }
+ rn = an;
+ rp[rn] = cy;
+ rn += cy != 0;
+ SIZ (rz) = SIZ (rz) >= 0 ? rn : -rn;
+ return;
+ }
+ }
+ else
+ {
+ /* Sign of operands are different--actually subtract. */
+ an = ABS (an);
+ rn = ABS (rn);
+ if (rn > an)
+ {
+ mp_limb_t cy;
+ rp = PTR (rz);
+ ap = PTR (az);
+ cy = mpn_submul_1 (rp, ap, an, (mp_limb_t) bu);
+ cy = mpn_sub_1 (rp + an, rp + an, rn - an, cy);
+ if (cy != 0)
+ {
+ mpn_neg1 (rp, rn);
+ MPN_NORMALIZE_NOT_ZERO (rp, rn);
+ }
+ else
+ {
+ MPN_NORMALIZE (rp, rn);
+ rn = -rn;
+ }
+
+ SIZ (rz) = SIZ (rz) >= 0 ? -rn : rn;
+ return;
+ }
+ else
+ {
+ /* Tricky case. We need to subtract an operand that might be larger
+ than the minuend. To avoid allocating temporary space, we compute
+ a*b-r instead of r-a*b and then negate. */
+ mp_limb_t cy;
+ if (ALLOC (rz) <= an)
+ _mpz_realloc (rz, an + 1);
+ rp = PTR (rz);
+ ap = PTR (az);
+ cy = mpn_submul_1 (rp, ap, rn, (mp_limb_t) bu);
+ if (an != rn)
+ {
+ mp_limb_t cy2;
+ cy -= mpn_neg1 (rp, rn);
+ cy2 = mpn_mul_1 (rp + rn, ap + rn, an - rn, (mp_limb_t) bu);
+ if (cy == ~(mp_limb_t) 0)
+ cy = cy2 - mpn_sub_1 (rp + rn, rp + rn, an - rn, (mp_limb_t) 1);
+ else
+ cy = cy2 + mpn_add_1 (rp + rn, rp + rn, an - rn, cy);
+ rp[an] = cy;
+ rn = an + (cy != 0);
+ rn -= rp[rn - 1] == 0;
+ }
+ else if (cy != 0)
+ {
+ cy -= mpn_neg1 (rp, rn);
+ rp[an] = cy;
+ rn = an + 1;
+ MPN_NORMALIZE_NOT_ZERO (rp, rn);
+ }
+ else
+ {
+ rn = an;
+ MPN_NORMALIZE (rp, rn);
+ rn = -rn;
+ }
+
+ SIZ (rz) = SIZ (rz) >= 0 ? -rn : rn;
+ return;
+ }
+ }
+}
+
+static mp_limb_t
+#if __STDC__
+mpn_neg1 (mp_ptr rp, mp_size_t rn)
+#else
+mpn_neg1 (rp, rn)
+ mp_ptr rp;
+ mp_size_t rn;
+#endif
+{
+ mp_size_t i;
+
+ while (rn != 0 && rp[0] == 0)
+ rp++, rn--;
+
+ if (rn != 0)
+ {
+ rp[0] = -rp[0];
+ for (i = 1; i < rn; i++)
+ rp[i] = ~rp[i];
+ return 1;
+ }
+ return 0;
+}
diff --git a/rts/gmp/mpz/and.c b/rts/gmp/mpz/and.c
new file mode 100644
index 0000000000..354e9455bf
--- /dev/null
+++ b/rts/gmp/mpz/and.c
@@ -0,0 +1,278 @@
+/* mpz_and -- Logical and.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_and (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
+#else
+mpz_and (res, op1, op2)
+ mpz_ptr res;
+ mpz_srcptr op1;
+ mpz_srcptr op2;
+#endif
+{
+ mp_srcptr op1_ptr, op2_ptr;
+ mp_size_t op1_size, op2_size;
+ mp_ptr res_ptr;
+ mp_size_t res_size;
+ mp_size_t i;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ op1_size = op1->_mp_size;
+ op2_size = op2->_mp_size;
+
+ op1_ptr = op1->_mp_d;
+ op2_ptr = op2->_mp_d;
+ res_ptr = res->_mp_d;
+
+ if (op1_size >= 0)
+ {
+ if (op2_size >= 0)
+ {
+ res_size = MIN (op1_size, op2_size);
+ /* First loop finds the size of the result. */
+ for (i = res_size - 1; i >= 0; i--)
+ if ((op1_ptr[i] & op2_ptr[i]) != 0)
+ break;
+ res_size = i + 1;
+
+ /* Handle allocation, now then we know exactly how much space is
+ needed for the result. */
+ if (res->_mp_alloc < res_size)
+ {
+ _mpz_realloc (res, res_size);
+ op1_ptr = op1->_mp_d;
+ op2_ptr = op2->_mp_d;
+ res_ptr = res->_mp_d;
+ }
+
+ /* Second loop computes the real result. */
+ for (i = res_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] & op2_ptr[i];
+
+ res->_mp_size = res_size;
+ return;
+ }
+ else /* op2_size < 0 */
+ {
+ /* Fall through to the code at the end of the function. */
+ }
+ }
+ else
+ {
+ if (op2_size < 0)
+ {
+ mp_ptr opx;
+ mp_limb_t cy;
+ mp_size_t res_alloc;
+
+ /* Both operands are negative, so will be the result.
+ -((-OP1) & (-OP2)) = -(~(OP1 - 1) & ~(OP2 - 1)) =
+ = ~(~(OP1 - 1) & ~(OP2 - 1)) + 1 =
+ = ((OP1 - 1) | (OP2 - 1)) + 1 */
+
+ /* It might seem as we could end up with an (invalid) result with
+ a leading zero-limb here when one of the operands is of the
+ type 1,,0,,..,,.0. But some analysis shows that we surely
+ would get carry into the zero-limb in this situation... */
+
+ op1_size = -op1_size;
+ op2_size = -op2_size;
+
+ res_alloc = 1 + MAX (op1_size, op2_size);
+
+ opx = (mp_ptr) TMP_ALLOC (op1_size * BYTES_PER_MP_LIMB);
+ mpn_sub_1 (opx, op1_ptr, op1_size, (mp_limb_t) 1);
+ op1_ptr = opx;
+
+ opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB);
+ mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
+ op2_ptr = opx;
+
+ if (res->_mp_alloc < res_alloc)
+ {
+ _mpz_realloc (res, res_alloc);
+ res_ptr = res->_mp_d;
+ /* Don't re-read OP1_PTR and OP2_PTR. They point to
+ temporary space--never to the space RES->_mp_d used
+ to point to before reallocation. */
+ }
+
+ if (op1_size >= op2_size)
+ {
+ MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
+ op1_size - op2_size);
+ for (i = op2_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] | op2_ptr[i];
+ res_size = op1_size;
+ }
+ else
+ {
+ MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
+ op2_size - op1_size);
+ for (i = op1_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] | op2_ptr[i];
+ res_size = op2_size;
+ }
+
+ cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
+ if (cy)
+ {
+ res_ptr[res_size] = cy;
+ res_size++;
+ }
+
+ res->_mp_size = -res_size;
+ TMP_FREE (marker);
+ return;
+ }
+ else
+ {
+ /* We should compute -OP1 & OP2. Swap OP1 and OP2 and fall
+ through to the code that handles OP1 & -OP2. */
+ MPZ_SRCPTR_SWAP (op1, op2);
+ MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
+ }
+
+ }
+
+ {
+#if ANDNEW
+ mp_size_t op2_lim;
+ mp_size_t count;
+
+ /* OP2 must be negated as with infinite precision.
+
+ Scan from the low end for a non-zero limb. The first non-zero
+ limb is simply negated (two's complement). Any subsequent
+ limbs are one's complemented. Of course, we don't need to
+ handle more limbs than there are limbs in the other, positive
+ operand as the result for those limbs is going to become zero
+ anyway. */
+
+ /* Scan for the least significant non-zero OP2 limb, and zero the
+ result meanwhile for those limb positions. (We will surely
+ find a non-zero limb, so we can write the loop with one
+ termination condition only.) */
+ for (i = 0; op2_ptr[i] == 0; i++)
+ res_ptr[i] = 0;
+ op2_lim = i;
+
+ op2_size = -op2_size;
+
+ if (op1_size <= op2_size)
+ {
+ /* The ones-extended OP2 is >= than the zero-extended OP1.
+ RES_SIZE <= OP1_SIZE. Find the exact size. */
+ for (i = op1_size - 1; i > op2_lim; i--)
+ if ((op1_ptr[i] & ~op2_ptr[i]) != 0)
+ break;
+ res_size = i + 1;
+ for (i = res_size - 1; i > op2_lim; i--)
+ res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];
+ res_ptr[op2_lim] = op1_ptr[op2_lim] & -op2_ptr[op2_lim];
+ /* Yes, this *can* happen! */
+ MPN_NORMALIZE (res_ptr, res_size);
+ }
+ else
+ {
+ /* The ones-extended OP2 is < than the zero-extended OP1.
+ RES_SIZE == OP1_SIZE, since OP1 is normalized. */
+ res_size = op1_size;
+ MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, op1_size - op2_size);
+ for (i = op2_size - 1; i > op2_lim; i--)
+ res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];
+ res_ptr[op2_lim] = op1_ptr[op2_lim] & -op2_ptr[op2_lim];
+ }
+
+ res->_mp_size = res_size;
+#else
+
+ /* OP1 is positive and zero-extended,
+ OP2 is negative and ones-extended.
+ The result will be positive.
+ OP1 & -OP2 = OP1 & ~(OP2 - 1). */
+
+ mp_ptr opx;
+
+ op2_size = -op2_size;
+ opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB);
+ mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
+ op2_ptr = opx;
+
+ if (op1_size > op2_size)
+ {
+ /* The result has the same size as OP1, since OP1 is normalized
+ and longer than the ones-extended OP2. */
+ res_size = op1_size;
+
+ /* Handle allocation, now then we know exactly how much space is
+ needed for the result. */
+ if (res->_mp_alloc < res_size)
+ {
+ _mpz_realloc (res, res_size);
+ res_ptr = res->_mp_d;
+ op1_ptr = op1->_mp_d;
+ /* Don't re-read OP2_PTR. It points to temporary space--never
+ to the space RES->_mp_d used to point to before reallocation. */
+ }
+
+ MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
+ res_size - op2_size);
+ for (i = op2_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];
+
+ res->_mp_size = res_size;
+ }
+ else
+ {
+ /* Find out the exact result size. Ignore the high limbs of OP2,
+ OP1 is zero-extended and would make the result zero. */
+ for (i = op1_size - 1; i >= 0; i--)
+ if ((op1_ptr[i] & ~op2_ptr[i]) != 0)
+ break;
+ res_size = i + 1;
+
+ /* Handle allocation, now then we know exactly how much space is
+ needed for the result. */
+ if (res->_mp_alloc < res_size)
+ {
+ _mpz_realloc (res, res_size);
+ res_ptr = res->_mp_d;
+ op1_ptr = op1->_mp_d;
+ /* Don't re-read OP2_PTR. It points to temporary space--never
+ to the space RES->_mp_d used to point to before reallocation. */
+ }
+
+ for (i = res_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];
+
+ res->_mp_size = res_size;
+ }
+#endif
+ }
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/array_init.c b/rts/gmp/mpz/array_init.c
new file mode 100644
index 0000000000..1c22046986
--- /dev/null
+++ b/rts/gmp/mpz/array_init.c
@@ -0,0 +1,48 @@
+/* mpz_array_init (array, array_size, size_per_elem) --
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_array_init (mpz_ptr arr, mp_size_t arr_size, mp_size_t nbits)
+#else
+mpz_array_init (arr, arr_size, nbits)
+ mpz_ptr arr;
+ mp_size_t arr_size;
+ mp_size_t nbits;
+#endif
+{
+ register mp_ptr p;
+ register size_t i;
+ mp_size_t nlimbs;
+
+ nlimbs = (nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
+ p = (mp_ptr) (*_mp_allocate_func) (arr_size * nlimbs * BYTES_PER_MP_LIMB);
+
+ for (i = 0; i < arr_size; i++)
+ {
+ arr[i]._mp_alloc = nlimbs + 1; /* Yes, lie a little... */
+ arr[i]._mp_size = 0;
+ arr[i]._mp_d = p + i * nlimbs;
+ }
+}
diff --git a/rts/gmp/mpz/bin_ui.c b/rts/gmp/mpz/bin_ui.c
new file mode 100644
index 0000000000..a7a6c98218
--- /dev/null
+++ b/rts/gmp/mpz/bin_ui.c
@@ -0,0 +1,141 @@
+/* mpz_bin_uiui - compute n over k.
+
+Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* This is a poor implementation. Look at bin_uiui.c for improvement ideas.
+ In fact consider calling mpz_bin_uiui() when the arguments fit, leaving
+ the code here only for big n.
+
+ The identity bin(n,k) = (-1)^k * bin(-n+k-1,k) can be found in Knuth vol
+ 1 section 1.2.6 part G. */
+
+
+/* Enhancement: use mpn_divexact_1 when it exists */
+#define DIVIDE() \
+ ASSERT (SIZ(r) > 0); \
+ ASSERT_NOCARRY (mpn_divrem_1 (PTR(r), (mp_size_t) 0, \
+ PTR(r), SIZ(r), kacc)); \
+ SIZ(r) -= (PTR(r)[SIZ(r)-1] == 0);
+
+void
+#if __STDC__
+mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k)
+#else
+mpz_bin_ui (r, n, k)
+ mpz_ptr r;
+ mpz_srcptr n;
+ unsigned long int k;
+#endif
+{
+ mpz_t ni;
+ mp_limb_t i;
+ mpz_t nacc;
+ mp_limb_t kacc;
+ mp_size_t negate;
+
+ if (mpz_sgn (n) < 0)
+ {
+ /* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */
+ mpz_init (ni);
+ mpz_neg (ni, n);
+ mpz_sub_ui (ni, ni, 1L);
+ negate = (k & 1); /* (-1)^k */
+ }
+ else
+ {
+ /* bin(n,k) == 0 if k>n
+ (no test for this under the n<0 case, since -n+k-1 >= k there) */
+ if (mpz_cmp_ui (n, k) < 0)
+ {
+ mpz_set_ui (r, 0L);
+ return;
+ }
+
+ /* set ni = n-k */
+ mpz_init (ni);
+ mpz_sub_ui (ni, n, k);
+ negate = 0;
+ }
+
+ /* Now wanting bin(ni+k,k), with ni positive, and "negate" is the sign (0
+ for positive, 1 for negative). */
+ mpz_set_ui (r, 1L);
+
+ /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. In this case it's
+ whether ni+k-k < k meaning ni<k, and if so change to denominator ni+k-k
+ = ni, and new ni of ni+k-ni = k. */
+ if (mpz_cmp_ui (ni, k) < 0)
+ {
+ unsigned long tmp;
+ tmp = k;
+ k = mpz_get_ui (ni);
+ mpz_set_ui (ni, tmp);
+ }
+
+ kacc = 1;
+ mpz_init_set_ui (nacc, 1);
+
+ for (i = 1; i <= k; i++)
+ {
+ mp_limb_t k1, k0;
+
+#if 0
+ mp_limb_t nacclow;
+ int c;
+
+ nacclow = PTR(nacc)[0];
+ for (c = 0; (((kacc | nacclow) & 1) == 0); c++)
+ {
+ kacc >>= 1;
+ nacclow >>= 1;
+ }
+ mpz_div_2exp (nacc, nacc, c);
+#endif
+
+ mpz_add_ui (ni, ni, 1);
+ mpz_mul (nacc, nacc, ni);
+ umul_ppmm (k1, k0, kacc, i);
+ if (k1 != 0)
+ {
+ /* Accumulator overflow. Perform bignum step. */
+ mpz_mul (r, r, nacc);
+ mpz_set_ui (nacc, 1);
+ DIVIDE ();
+ kacc = i;
+ }
+ else
+ {
+ /* Save new products in accumulators to keep accumulating. */
+ kacc = k0;
+ }
+ }
+
+ mpz_mul (r, r, nacc);
+ DIVIDE ();
+ SIZ(r) = (SIZ(r) ^ -negate) + negate;
+
+ mpz_clear (nacc);
+ mpz_clear (ni);
+}
diff --git a/rts/gmp/mpz/bin_uiui.c b/rts/gmp/mpz/bin_uiui.c
new file mode 100644
index 0000000000..b37541ba54
--- /dev/null
+++ b/rts/gmp/mpz/bin_uiui.c
@@ -0,0 +1,120 @@
+/* mpz_bin_uiui - compute n over k.
+
+Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* Avoid reallocs by rounding up any new size */
+#define ROUNDUP_MASK 15
+
+/* Enhancement: use mpn_divexact_1 when it exists */
+#define MULDIV() \
+ MPZ_REALLOC (r, (SIZ(r)+1)|ROUNDUP_MASK); \
+ PTR(r)[SIZ(r)] = mpn_mul_1 (PTR(r), PTR(r), SIZ(r), nacc); \
+ ASSERT_NOCARRY (mpn_divrem_1 (PTR(r), (mp_size_t) 0, \
+ PTR(r), SIZ(r)+1, kacc)); \
+ SIZ(r) += (PTR(r)[SIZ(r)] != 0);
+
+void
+#if __STDC__
+mpz_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+#else
+mpz_bin_uiui (r, n, k)
+ mpz_ptr r;
+ unsigned long int n;
+ unsigned long int k;
+#endif
+{
+ unsigned long int i, j;
+ mp_limb_t nacc, kacc;
+ unsigned long int cnt;
+
+ /* bin(n,k) = 0 if k>n. */
+ if (n < k)
+ {
+ mpz_set_ui (r, 0);
+ return;
+ }
+
+ /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. */
+ k = MIN (k, n-k);
+
+ /* bin(n,0) = 1 */
+ if (k == 0)
+ {
+ mpz_set_ui (r, 1);
+ return;
+ }
+
+ j = n - k + 1;
+ mpz_set_ui (r, j);
+
+ /* Initialize accumulators. */
+ nacc = 1;
+ kacc = 1;
+
+ cnt = 0;
+ for (i = 2; i <= k; i++)
+ {
+ mp_limb_t n1, n0, k1, k0;
+
+ j++;
+#if 0
+ /* Remove common multiples of 2. This will allow us to accumulate
+ more in nacc and kacc before we need a bignum step. It would make
+ sense to cancel factors of 3, 5, etc too, but this would be best
+ handled by sieving out factors. Alternatively, we could perform a
+ gcd of the accumulators just as they have overflown, and keep
+ accumulating until the gcd doesn't remove a significant factor. */
+ while (((nacc | kacc) & 1) == 0)
+ {
+ nacc >>= 1;
+ kacc >>= 1;
+ }
+#else
+ cnt = ((nacc | kacc) & 1) ^ 1;
+ nacc >>= cnt;
+ kacc >>= cnt;
+#endif
+ /* Accumulate next multiples. */
+ umul_ppmm (n1, n0, nacc, j);
+ umul_ppmm (k1, k0, kacc, i);
+ if (n1 != 0)
+ {
+ /* Accumulator overflow. Perform bignum step. */
+ MULDIV ();
+ nacc = j;
+ kacc = i;
+ }
+ else
+ {
+ if (k1 != 0) abort ();
+ /* Save new products in accumulators to keep accumulating. */
+ nacc = n0;
+ kacc = k0;
+ }
+ }
+
+ /* Take care of whatever is left in accumulators. */
+ MULDIV ();
+}
diff --git a/rts/gmp/mpz/cdiv_q.c b/rts/gmp/mpz/cdiv_q.c
new file mode 100644
index 0000000000..b15ba8aaa9
--- /dev/null
+++ b/rts/gmp/mpz/cdiv_q.c
@@ -0,0 +1,51 @@
+/* mpz_cdiv_q -- Division rounding the quotient towards +infinity. The
+ remainder gets the opposite sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_cdiv_q (mpz_ptr quot, mpz_srcptr dividend, mpz_srcptr divisor)
+#else
+mpz_cdiv_q (quot, dividend, divisor)
+ mpz_ptr quot;
+ mpz_srcptr dividend;
+ mpz_srcptr divisor;
+#endif
+{
+ mp_size_t dividend_size = dividend->_mp_size;
+ mp_size_t divisor_size = divisor->_mp_size;
+ mpz_t rem;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ MPZ_TMP_INIT (rem, ABS (divisor_size));
+
+ mpz_tdiv_qr (quot, rem, dividend, divisor);
+
+ if ((divisor_size ^ dividend_size) >= 0 && rem->_mp_size != 0)
+ mpz_add_ui (quot, quot, 1L);
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/cdiv_q_ui.c b/rts/gmp/mpz/cdiv_q_ui.c
new file mode 100644
index 0000000000..74f3a90b83
--- /dev/null
+++ b/rts/gmp/mpz/cdiv_q_ui.c
@@ -0,0 +1,67 @@
+/* mpz_cdiv_q_ui -- Division rounding the quotient towards +infinity. The
+ remainder gets the opposite sign as the denominator. In order to make it
+ always fit into the return type, the negative of the true remainder is
+ returned.
+
+Copyright (C) 1994, 1996, 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_cdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_cdiv_q_ui (quot, dividend, divisor)
+ mpz_ptr quot;
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_ptr quot_ptr;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ if (quot->_mp_alloc < size)
+ _mpz_realloc (quot, size);
+
+ quot_ptr = quot->_mp_d;
+
+ remainder_limb = mpn_divmod_1 (quot_ptr, dividend->_mp_d, size,
+ (mp_limb_t) divisor);
+
+ if (remainder_limb != 0 && dividend_size >= 0)
+ {
+ mpn_incr_u (quot_ptr, (mp_limb_t) 1);
+ remainder_limb = divisor - remainder_limb;
+ }
+
+ size -= size != 0 && quot_ptr[size - 1] == 0;
+ quot->_mp_size = dividend_size >= 0 ? size : -size;
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/cdiv_qr.c b/rts/gmp/mpz/cdiv_qr.c
new file mode 100644
index 0000000000..29c7c41a4e
--- /dev/null
+++ b/rts/gmp/mpz/cdiv_qr.c
@@ -0,0 +1,64 @@
+/* mpz_cdiv_qr -- Division rounding the quotient towards +infinity. The
+ remainder gets the opposite sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_cdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+#else
+mpz_cdiv_qr (quot, rem, dividend, divisor)
+ mpz_ptr quot;
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ mpz_srcptr divisor;
+#endif
+{
+ mp_size_t divisor_size = divisor->_mp_size;
+ mp_size_t xsize;
+ mpz_t temp_divisor; /* N.B.: lives until function returns! */
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ /* We need the original value of the divisor after the quotient and
+ remainder have been preliminary calculated. We have to copy it to
+ temporary space if it's the same variable as either QUOT or REM. */
+ if (quot == divisor || rem == divisor)
+ {
+ MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+ mpz_set (temp_divisor, divisor);
+ divisor = temp_divisor;
+ }
+
+ xsize = dividend->_mp_size ^ divisor_size;;
+ mpz_tdiv_qr (quot, rem, dividend, divisor);
+
+ if (xsize >= 0 && rem->_mp_size != 0)
+ {
+ mpz_add_ui (quot, quot, 1L);
+ mpz_sub (rem, rem, divisor);
+ }
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/cdiv_qr_ui.c b/rts/gmp/mpz/cdiv_qr_ui.c
new file mode 100644
index 0000000000..a7873c6e20
--- /dev/null
+++ b/rts/gmp/mpz/cdiv_qr_ui.c
@@ -0,0 +1,71 @@
+/* mpz_cdiv_qr_ui -- Division rounding the quotient towards +infinity. The
+ remainder gets the opposite sign as the denominator. In order to make it
+ always fit into the return type, the negative of the true remainder is
+ returned.
+
+Copyright (C) 1994, 1995, 1996, 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_cdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_cdiv_qr_ui (quot, rem, dividend, divisor)
+ mpz_ptr quot;
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_ptr quot_ptr;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ if (quot->_mp_alloc < size)
+ _mpz_realloc (quot, size);
+
+ quot_ptr = quot->_mp_d;
+
+ remainder_limb = mpn_divmod_1 (quot_ptr, dividend->_mp_d, size,
+ (mp_limb_t) divisor);
+
+ if (remainder_limb != 0 && dividend_size >= 0)
+ {
+ mpn_incr_u (quot_ptr, (mp_limb_t) 1);
+ remainder_limb = divisor - remainder_limb;
+ }
+
+ size -= size != 0 && quot_ptr[size - 1] == 0;
+ quot->_mp_size = dividend_size >= 0 ? size : -size;
+
+ rem->_mp_d[0] = remainder_limb;
+ rem->_mp_size = -(remainder_limb != 0);
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/cdiv_r.c b/rts/gmp/mpz/cdiv_r.c
new file mode 100644
index 0000000000..e96ce7e677
--- /dev/null
+++ b/rts/gmp/mpz/cdiv_r.c
@@ -0,0 +1,59 @@
+/* mpz_cdiv_r -- Division rounding the quotient towards +infinity. The
+ remainder gets the opposite sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_cdiv_r (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+#else
+mpz_cdiv_r (rem, dividend, divisor)
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ mpz_srcptr divisor;
+#endif
+{
+ mp_size_t divisor_size = divisor->_mp_size;
+ mpz_t temp_divisor; /* N.B.: lives until function returns! */
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ /* We need the original value of the divisor after the remainder has been
+ preliminary calculated. We have to copy it to temporary space if it's
+ the same variable as REM. */
+ if (rem == divisor)
+ {
+
+ MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+ mpz_set (temp_divisor, divisor);
+ divisor = temp_divisor;
+ }
+
+ mpz_tdiv_r (rem, dividend, divisor);
+
+ if ((divisor_size ^ dividend->_mp_size) >= 0 && rem->_mp_size != 0)
+ mpz_sub (rem, rem, divisor);
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/cdiv_r_ui.c b/rts/gmp/mpz/cdiv_r_ui.c
new file mode 100644
index 0000000000..e17e2381c0
--- /dev/null
+++ b/rts/gmp/mpz/cdiv_r_ui.c
@@ -0,0 +1,57 @@
+/* mpz_cdiv_r_ui -- Division rounding the quotient towards +infinity. The
+ remainder gets the opposite sign as the denominator. In order to make it
+ always fit into the return type, the negative of the true remainder is
+ returned.
+
+Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_cdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_cdiv_r_ui (rem, dividend, divisor)
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ remainder_limb = mpn_mod_1 (dividend->_mp_d, size, (mp_limb_t) divisor);
+
+ if (remainder_limb != 0 && dividend_size >= 0)
+ remainder_limb = divisor - remainder_limb;
+
+ rem->_mp_d[0] = remainder_limb;
+ rem->_mp_size = -(remainder_limb != 0);
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/cdiv_ui.c b/rts/gmp/mpz/cdiv_ui.c
new file mode 100644
index 0000000000..63547a78c0
--- /dev/null
+++ b/rts/gmp/mpz/cdiv_ui.c
@@ -0,0 +1,50 @@
+/* mpz_cdiv_ui -- Division rounding the quotient towards +infinity. The
+ remainder gets the opposite sign as the denominator. In order to make it
+ always fit into the return type, the negative of the true remainder is
+ returned.
+
+Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_cdiv_ui (mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_cdiv_ui (dividend, divisor)
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_limb_t remainder_limb;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ remainder_limb = mpn_mod_1 (dividend->_mp_d, size, (mp_limb_t) divisor);
+
+ if (remainder_limb != 0 && dividend_size >= 0)
+ remainder_limb = divisor - remainder_limb;
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/clear.c b/rts/gmp/mpz/clear.c
new file mode 100644
index 0000000000..5224553f9e
--- /dev/null
+++ b/rts/gmp/mpz/clear.c
@@ -0,0 +1,35 @@
+/* mpz_clear -- de-allocate the space occupied by the dynamic digit space of
+ an integer.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_clear (mpz_ptr m)
+#else
+mpz_clear (m)
+ mpz_ptr m;
+#endif
+{
+ (*_mp_free_func) (m->_mp_d, m->_mp_alloc * BYTES_PER_MP_LIMB);
+}
diff --git a/rts/gmp/mpz/clrbit.c b/rts/gmp/mpz/clrbit.c
new file mode 100644
index 0000000000..865d84902f
--- /dev/null
+++ b/rts/gmp/mpz/clrbit.c
@@ -0,0 +1,114 @@
+/* mpz_clrbit -- clear a specified bit.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_clrbit (mpz_ptr d, unsigned long int bit_index)
+#else
+mpz_clrbit (d, bit_index)
+ mpz_ptr d;
+ unsigned long int bit_index;
+#endif
+{
+ mp_size_t dsize = d->_mp_size;
+ mp_ptr dp = d->_mp_d;
+ mp_size_t limb_index;
+
+ limb_index = bit_index / BITS_PER_MP_LIMB;
+ if (dsize >= 0)
+ {
+ if (limb_index < dsize)
+ {
+ dp[limb_index] &= ~((mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB));
+ MPN_NORMALIZE (dp, dsize);
+ d->_mp_size = dsize;
+ }
+ else
+ ;
+ }
+ else
+ {
+ mp_size_t zero_bound;
+
+ /* Simulate two's complement arithmetic, i.e. simulate
+ 1. Set OP = ~(OP - 1) [with infinitely many leading ones].
+ 2. clear the bit.
+ 3. Set OP = ~OP + 1. */
+
+ dsize = -dsize;
+
+ /* No upper bound on this loop, we're sure there's a non-zero limb
+ sooner ot later. */
+ for (zero_bound = 0; ; zero_bound++)
+ if (dp[zero_bound] != 0)
+ break;
+
+ if (limb_index > zero_bound)
+ {
+ if (limb_index < dsize)
+ dp[limb_index] |= (mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB);
+ else
+ {
+ /* Ugh. The bit should be cleared outside of the end of the
+ number. We have to increase the size of the number. */
+ if (d->_mp_alloc < limb_index + 1)
+ {
+ _mpz_realloc (d, limb_index + 1);
+ dp = d->_mp_d;
+ }
+ MPN_ZERO (dp + dsize, limb_index - dsize);
+ dp[limb_index] = (mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB);
+ d->_mp_size = -(limb_index + 1);
+ }
+ }
+ else if (limb_index == zero_bound)
+ {
+ dp[limb_index] = ((dp[limb_index] - 1)
+ | ((mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB))) + 1;
+ if (dp[limb_index] == 0)
+ {
+ mp_size_t i;
+ for (i = limb_index + 1; i < dsize; i++)
+ {
+ dp[i] += 1;
+ if (dp[i] != 0)
+ goto fin;
+ }
+ /* We got carry all way out beyond the end of D. Increase
+ its size (and allocation if necessary). */
+ dsize++;
+ if (d->_mp_alloc < dsize)
+ {
+ _mpz_realloc (d, dsize);
+ dp = d->_mp_d;
+ }
+ dp[i] = 1;
+ d->_mp_size = -dsize;
+ fin:;
+ }
+ }
+ else
+ ;
+ }
+}
diff --git a/rts/gmp/mpz/cmp.c b/rts/gmp/mpz/cmp.c
new file mode 100644
index 0000000000..60628348e5
--- /dev/null
+++ b/rts/gmp/mpz/cmp.c
@@ -0,0 +1,75 @@
+/* mpz_cmp(u,v) -- Compare U, V. Return postive, zero, or negative
+ based on if U > V, U == V, or U < V.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifndef BERKELEY_MP
+int
+#if __STDC__
+mpz_cmp (mpz_srcptr u, mpz_srcptr v)
+#else
+mpz_cmp (u, v)
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+#else /* BERKELEY_MP */
+int
+#if __STDC__
+mcmp (mpz_srcptr u, mpz_srcptr v)
+#else
+mcmp (u, v)
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+#endif /* BERKELEY_MP */
+{
+ mp_size_t usize = u->_mp_size;
+ mp_size_t vsize = v->_mp_size;
+ mp_size_t size;
+ mp_srcptr up, vp;
+ int cmp;
+
+ if (usize != vsize)
+ return usize - vsize;
+
+ if (usize == 0)
+ return 0;
+
+ size = ABS (usize);
+
+ up = u->_mp_d;
+ vp = v->_mp_d;
+
+ cmp = mpn_cmp (up, vp, size);
+
+ if (cmp == 0)
+ return 0;
+
+ if ((cmp < 0) == (usize < 0))
+ return 1;
+ else
+ return -1;
+}
diff --git a/rts/gmp/mpz/cmp_si.c b/rts/gmp/mpz/cmp_si.c
new file mode 100644
index 0000000000..0c2212fbe9
--- /dev/null
+++ b/rts/gmp/mpz/cmp_si.c
@@ -0,0 +1,64 @@
+/* mpz_cmp_si(u,v) -- Compare an integer U with a single-word int V.
+ Return positive, zero, or negative based on if U > V, U == V, or U < V.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+_mpz_cmp_si (mpz_srcptr u, signed long int v_digit)
+#else
+_mpz_cmp_si (u, v_digit)
+ mpz_srcptr u;
+ signed long int v_digit;
+#endif
+{
+ mp_size_t usize = u->_mp_size;
+ mp_size_t vsize;
+ mp_limb_t u_digit;
+
+ vsize = 0;
+ if (v_digit > 0)
+ vsize = 1;
+ else if (v_digit < 0)
+ {
+ vsize = -1;
+ v_digit = -v_digit;
+ }
+
+ if (usize != vsize)
+ return usize - vsize;
+
+ if (usize == 0)
+ return 0;
+
+ u_digit = u->_mp_d[0];
+
+ if (u_digit == (mp_limb_t) (unsigned long) v_digit)
+ return 0;
+
+ if (u_digit > (mp_limb_t) (unsigned long) v_digit)
+ return usize;
+ else
+ return -usize;
+}
diff --git a/rts/gmp/mpz/cmp_ui.c b/rts/gmp/mpz/cmp_ui.c
new file mode 100644
index 0000000000..fd84f301c1
--- /dev/null
+++ b/rts/gmp/mpz/cmp_ui.c
@@ -0,0 +1,53 @@
+/* mpz_cmp_ui.c -- Compare a mpz_t a with an mp_limb_t b. Return positive,
+ zero, or negative based on if a > b, a == b, or a < b.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+_mpz_cmp_ui (mpz_srcptr u, unsigned long int v_digit)
+#else
+_mpz_cmp_ui (u, v_digit)
+ mpz_srcptr u;
+ unsigned long int v_digit;
+#endif
+{
+ mp_size_t usize = u->_mp_size;
+
+ if (usize == 0)
+ return -(v_digit != 0);
+
+ if (usize == 1)
+ {
+ mp_limb_t u_digit;
+
+ u_digit = u->_mp_d[0];
+ if (u_digit > v_digit)
+ return 1;
+ if (u_digit < v_digit)
+ return -1;
+ return 0;
+ }
+
+ return (usize > 0) ? 1 : -1;
+}
diff --git a/rts/gmp/mpz/cmpabs.c b/rts/gmp/mpz/cmpabs.c
new file mode 100644
index 0000000000..037d7a9145
--- /dev/null
+++ b/rts/gmp/mpz/cmpabs.c
@@ -0,0 +1,57 @@
+/* mpz_cmpabs(u,v) -- Compare U, V. Return postive, zero, or negative
+ based on if U > V, U == V, or U < V.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_cmpabs (mpz_srcptr u, mpz_srcptr v)
+#else
+mpz_cmpabs (u, v)
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+{
+ mp_size_t usize = u->_mp_size;
+ mp_size_t vsize = v->_mp_size;
+ mp_size_t size;
+ mp_srcptr up, vp;
+ int cmp;
+
+ usize = ABS (usize);
+ vsize = ABS (vsize);
+
+ if (usize != vsize)
+ return usize - vsize;
+
+ if (usize == 0)
+ return 0;
+
+ up = u->_mp_d;
+ vp = v->_mp_d;
+
+ cmp = mpn_cmp (up, vp, usize);
+
+ return cmp;
+}
diff --git a/rts/gmp/mpz/cmpabs_ui.c b/rts/gmp/mpz/cmpabs_ui.c
new file mode 100644
index 0000000000..db816b5820
--- /dev/null
+++ b/rts/gmp/mpz/cmpabs_ui.c
@@ -0,0 +1,56 @@
+/* mpz_cmpabs_ui.c -- Compare a mpz_t a with an mp_limb_t b. Return positive,
+ zero, or negative based on if a > b, a == b, or a < b.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1997, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_cmpabs_ui (mpz_srcptr u, unsigned long int v_digit)
+#else
+mpz_cmpabs_ui (u, v_digit)
+ mpz_srcptr u;
+ unsigned long int v_digit;
+#endif
+{
+ mp_size_t usize = u->_mp_size;
+
+ if (usize == 0)
+ return -(v_digit != 0);
+
+ usize = ABS (usize);
+
+ if (usize == 1)
+ {
+ mp_limb_t u_digit;
+
+ u_digit = u->_mp_d[0];
+ if (u_digit > v_digit)
+ return 1;
+ if (u_digit < v_digit)
+ return -1;
+ return 0;
+ }
+
+ return 1;
+}
diff --git a/rts/gmp/mpz/com.c b/rts/gmp/mpz/com.c
new file mode 100644
index 0000000000..18d6427779
--- /dev/null
+++ b/rts/gmp/mpz/com.c
@@ -0,0 +1,93 @@
+/* mpz_com(mpz_ptr dst, mpz_ptr src) -- Assign the bit-complemented value of
+ SRC to DST.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_com (mpz_ptr dst, mpz_srcptr src)
+#else
+mpz_com (dst, src)
+ mpz_ptr dst;
+ mpz_srcptr src;
+#endif
+{
+ mp_size_t size = src->_mp_size;
+ mp_srcptr src_ptr;
+ mp_ptr dst_ptr;
+
+ if (size >= 0)
+ {
+ /* As with infinite precision: one's complement, two's complement.
+ But this can be simplified using the identity -x = ~x + 1.
+ So we're going to compute (~~x) + 1 = x + 1! */
+
+ if (dst->_mp_alloc < size + 1)
+ _mpz_realloc (dst, size + 1);
+
+ src_ptr = src->_mp_d;
+ dst_ptr = dst->_mp_d;
+
+ if (size == 0)
+ {
+ /* Special case, as mpn_add wants the first arg's size >= the
+ second arg's size. */
+ dst_ptr[0] = 1;
+ dst->_mp_size = -1;
+ return;
+ }
+
+ {
+ mp_limb_t cy;
+
+ cy = mpn_add_1 (dst_ptr, src_ptr, size, (mp_limb_t) 1);
+ if (cy)
+ {
+ dst_ptr[size] = cy;
+ size++;
+ }
+ }
+
+ /* Store a negative size, to indicate ones-extension. */
+ dst->_mp_size = -size;
+ }
+ else
+ {
+ /* As with infinite precision: two's complement, then one's complement.
+ But that can be simplified using the identity -x = ~(x - 1).
+ So we're going to compute ~~(x - 1) = x - 1! */
+ size = -size;
+
+ if (dst->_mp_alloc < size)
+ _mpz_realloc (dst, size);
+
+ src_ptr = src->_mp_d;
+ dst_ptr = dst->_mp_d;
+
+ mpn_sub_1 (dst_ptr, src_ptr, size, (mp_limb_t) 1);
+ size -= dst_ptr[size - 1] == 0;
+
+ /* Store a positive size, to indicate zero-extension. */
+ dst->_mp_size = size;
+ }
+}
diff --git a/rts/gmp/mpz/divexact.c b/rts/gmp/mpz/divexact.c
new file mode 100644
index 0000000000..c2970454fd
--- /dev/null
+++ b/rts/gmp/mpz/divexact.c
@@ -0,0 +1,125 @@
+/* mpz_divexact -- finds quotient when known that quot * den == num && den != 0.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/* Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
+
+ Funding for this work has been partially provided by Conselho Nacional
+ de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
+ 301314194-2, and was done while I was a visiting reseacher in the Instituto
+ de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
+
+ References:
+ T. Jebelean, An algorithm for exact division, Journal of Symbolic
+ Computation, v. 15, 1993, pp. 169-180. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+#if __STDC__
+mpz_divexact (mpz_ptr quot, mpz_srcptr num, mpz_srcptr den)
+#else
+mpz_divexact (quot, num, den)
+ mpz_ptr quot;
+ mpz_srcptr num;
+ mpz_srcptr den;
+#endif
+{
+ mp_ptr qp, tp;
+ mp_size_t qsize, tsize;
+ mp_srcptr np, dp;
+ mp_size_t nsize, dsize;
+ TMP_DECL (marker);
+
+ nsize = ABS (num->_mp_size);
+ dsize = ABS (den->_mp_size);
+
+ qsize = nsize - dsize + 1;
+ if (quot->_mp_alloc < qsize)
+ _mpz_realloc (quot, qsize);
+
+ np = num->_mp_d;
+ dp = den->_mp_d;
+ qp = quot->_mp_d;
+
+ if (nsize == 0)
+ {
+ if (dsize == 0)
+ DIVIDE_BY_ZERO;
+ quot->_mp_size = 0;
+ return;
+ }
+
+ if (dsize <= 1)
+ {
+ if (dsize == 1)
+ {
+ mpn_divmod_1 (qp, np, nsize, dp[0]);
+ qsize -= qp[qsize - 1] == 0;
+ quot->_mp_size = (num->_mp_size ^ den->_mp_size) >= 0 ? qsize : -qsize;
+ return;
+ }
+
+ /* Generate divide-by-zero error since dsize == 0. */
+ DIVIDE_BY_ZERO;
+ }
+
+ TMP_MARK (marker);
+
+ /* QUOT <-- NUM/2^r, T <-- DEN/2^r where = r number of twos in DEN. */
+ while (dp[0] == 0)
+ np += 1, nsize -= 1, dp += 1, dsize -= 1;
+ tsize = MIN (qsize, dsize);
+ if ((dp[0] & 1) != 0)
+ {
+ if (quot == den) /* QUOT and DEN overlap. */
+ {
+ tp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
+ MPN_COPY (tp, dp, tsize);
+ }
+ else
+ tp = (mp_ptr) dp;
+ if (qp != np)
+ MPN_COPY_INCR (qp, np, qsize);
+ }
+ else
+ {
+ unsigned int r;
+ tp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
+ count_trailing_zeros (r, dp[0]);
+ mpn_rshift (tp, dp, tsize, r);
+ if (dsize > tsize)
+ tp[tsize - 1] |= dp[tsize] << (BITS_PER_MP_LIMB - r);
+ mpn_rshift (qp, np, qsize, r);
+ if (nsize > qsize)
+ qp[qsize - 1] |= np[qsize] << (BITS_PER_MP_LIMB - r);
+ }
+
+ /* Now QUOT <-- QUOT/T. */
+ mpn_bdivmod (qp, qp, qsize, tp, tsize, qsize * BITS_PER_MP_LIMB);
+ MPN_NORMALIZE (qp, qsize);
+
+ quot->_mp_size = (num->_mp_size ^ den->_mp_size) >= 0 ? qsize : -qsize;
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/dump.c b/rts/gmp/mpz/dump.c
new file mode 100644
index 0000000000..dc318ac8cf
--- /dev/null
+++ b/rts/gmp/mpz/dump.c
@@ -0,0 +1,44 @@
+/* mpz_dump - Dump an integer to stdout.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS NOT SAFE TO
+ CALL THIS FUNCTION DIRECTLY. IN FACT, IT IS ALMOST GUARANTEED THAT THIS
+ FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_dump (mpz_srcptr u)
+#else
+mpz_dump (u)
+ mpz_srcptr u;
+#endif
+{
+ char *str;
+
+ str = mpz_get_str (0, 10, u);
+ printf ("%s\n", str);
+ (*_mp_free_func) (str, 0);/* ??? broken alloc interface, pass what size ??? */
+}
diff --git a/rts/gmp/mpz/fac_ui.c b/rts/gmp/mpz/fac_ui.c
new file mode 100644
index 0000000000..85f40f271c
--- /dev/null
+++ b/rts/gmp/mpz/fac_ui.c
@@ -0,0 +1,157 @@
+/* mpz_fac_ui(result, n) -- Set RESULT to N!.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#ifdef DBG
+#include <stdio.h>
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+#if __STDC__
+mpz_fac_ui (mpz_ptr result, unsigned long int n)
+#else
+mpz_fac_ui (result, n)
+ mpz_ptr result;
+ unsigned long int n;
+#endif
+{
+#if SIMPLE_FAC
+
+ /* Be silly. Just multiply the numbers in ascending order. O(n**2). */
+
+ unsigned long int k;
+
+ mpz_set_ui (result, 1L);
+
+ for (k = 2; k <= n; k++)
+ mpz_mul_ui (result, result, k);
+#else
+
+ /* Be smarter. Multiply groups of numbers in ascending order until the
+ product doesn't fit in a limb. Multiply these partial product in a
+ balanced binary tree fashion, to make the operand have as equal sizes
+ as possible. When the operands have about the same size, mpn_mul
+ becomes faster. */
+
+ unsigned long int p, k;
+ mp_limb_t p1, p0;
+
+ /* Stack of partial products, used to make the computation balanced
+ (i.e. make the sizes of the multiplication operands equal). The
+ topmost position of MP_STACK will contain a one-limb partial product,
+ the second topmost will contain a two-limb partial product, and so
+ on. MP_STACK[0] will contain a partial product with 2**t limbs.
+ To compute n! MP_STACK needs to be less than
+ log(n)**2/log(BITS_PER_MP_LIMB), so 30 is surely enough. */
+#define MP_STACK_SIZE 30
+ mpz_t mp_stack[MP_STACK_SIZE];
+
+ /* TOP is an index into MP_STACK, giving the topmost element.
+ TOP_LIMIT_SO_FAR is the largets value it has taken so far. */
+ int top, top_limit_so_far;
+
+ /* Count of the total number of limbs put on MP_STACK so far. This
+ variable plays an essential role in making the compututation balanced.
+ See below. */
+ unsigned int tree_cnt;
+
+ top = top_limit_so_far = -1;
+ tree_cnt = 0;
+ p = 1;
+ for (k = 2; k <= n; k++)
+ {
+ /* Multiply the partial product in P with K. */
+ umul_ppmm (p1, p0, (mp_limb_t) p, (mp_limb_t) k);
+
+ /* Did we get overflow into the high limb, i.e. is the partial
+ product now more than one limb? */
+ if (p1 != 0)
+ {
+ tree_cnt++;
+
+ if (tree_cnt % 2 == 0)
+ {
+ mp_size_t i;
+
+ /* TREE_CNT is even (i.e. we have generated an even number of
+ one-limb partial products), which means that we have a
+ single-limb product on the top of MP_STACK. */
+
+ mpz_mul_ui (mp_stack[top], mp_stack[top], p);
+
+ /* If TREE_CNT is divisable by 4, 8,..., we have two
+ similar-sized partial products with 2, 4,... limbs at
+ the topmost two positions of MP_STACK. Multiply them
+ to form a new partial product with 4, 8,... limbs. */
+ for (i = 4; (tree_cnt & (i - 1)) == 0; i <<= 1)
+ {
+ mpz_mul (mp_stack[top - 1],
+ mp_stack[top], mp_stack[top - 1]);
+ top--;
+ }
+ }
+ else
+ {
+ /* Put the single-limb partial product in P on the stack.
+ (The next time we get a single-limb product, we will
+ multiply the two together.) */
+ top++;
+ if (top > top_limit_so_far)
+ {
+ if (top > MP_STACK_SIZE)
+ abort();
+ /* The stack is now bigger than ever, initialize the top
+ element. */
+ mpz_init_set_ui (mp_stack[top], p);
+ top_limit_so_far++;
+ }
+ else
+ mpz_set_ui (mp_stack[top], p);
+ }
+
+ /* We ignored the last result from umul_ppmm. Put K in P as the
+ first component of the next single-limb partial product. */
+ p = k;
+ }
+ else
+ /* We didn't get overflow in umul_ppmm. Put p0 in P and try
+ with one more value of K. */
+ p = p0; /* bogus if long != mp_limb_t */
+ }
+
+ /* We have partial products in mp_stack[0..top], in descending order.
+ We also have a small partial product in p.
+ Their product is the final result. */
+ if (top < 0)
+ mpz_set_ui (result, p);
+ else
+ mpz_mul_ui (result, mp_stack[top--], p);
+ while (top >= 0)
+ mpz_mul (result, result, mp_stack[top--]);
+
+ /* Free the storage allocated for MP_STACK. */
+ for (top = top_limit_so_far; top >= 0; top--)
+ mpz_clear (mp_stack[top]);
+#endif
+}
diff --git a/rts/gmp/mpz/fdiv_q.c b/rts/gmp/mpz/fdiv_q.c
new file mode 100644
index 0000000000..9d75ca33d2
--- /dev/null
+++ b/rts/gmp/mpz/fdiv_q.c
@@ -0,0 +1,51 @@
+/* mpz_fdiv_q -- Division rounding the quotient towards -infinity.
+ The remainder gets the same sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_fdiv_q (mpz_ptr quot, mpz_srcptr dividend, mpz_srcptr divisor)
+#else
+mpz_fdiv_q (quot, dividend, divisor)
+ mpz_ptr quot;
+ mpz_srcptr dividend;
+ mpz_srcptr divisor;
+#endif
+{
+ mp_size_t dividend_size = dividend->_mp_size;
+ mp_size_t divisor_size = divisor->_mp_size;
+ mpz_t rem;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ MPZ_TMP_INIT (rem, ABS (divisor_size));
+
+ mpz_tdiv_qr (quot, rem, dividend, divisor);
+
+ if ((divisor_size ^ dividend_size) < 0 && rem->_mp_size != 0)
+ mpz_sub_ui (quot, quot, 1L);
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/fdiv_q_2exp.c b/rts/gmp/mpz/fdiv_q_2exp.c
new file mode 100644
index 0000000000..8e02180ecc
--- /dev/null
+++ b/rts/gmp/mpz/fdiv_q_2exp.c
@@ -0,0 +1,104 @@
+/* mpz_fdiv_q_2exp -- Divide an integer by 2**CNT. Round the quotient
+ towards -infinity.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1998, 1999 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_fdiv_q_2exp (mpz_ptr w, mpz_srcptr u, unsigned long int cnt)
+#else
+mpz_fdiv_q_2exp (w, u, cnt)
+ mpz_ptr w;
+ mpz_srcptr u;
+ unsigned long int cnt;
+#endif
+{
+ mp_size_t usize = u->_mp_size;
+ mp_size_t wsize;
+ mp_size_t abs_usize = ABS (usize);
+ mp_size_t limb_cnt;
+ mp_ptr wp;
+ mp_limb_t round = 0;
+
+ limb_cnt = cnt / BITS_PER_MP_LIMB;
+ wsize = abs_usize - limb_cnt;
+ if (wsize <= 0)
+ {
+ wp = w->_mp_d;
+ wsize = 0;
+ /* Set ROUND since we know we skip some non-zero words in this case.
+ Well, if U is zero, we don't, but then this will be taken care of
+ below, since rounding only really takes place for negative U. */
+ round = 1;
+ wp[0] = 1;
+ w->_mp_size = -(usize < 0);
+ return;
+ }
+ else
+ {
+ mp_size_t i;
+ mp_ptr up;
+
+ /* Make sure there is enough space. We make an extra limb
+ here to account for possible rounding at the end. */
+ if (w->_mp_alloc < wsize + 1)
+ _mpz_realloc (w, wsize + 1);
+
+ wp = w->_mp_d;
+ up = u->_mp_d;
+
+ /* Set ROUND if we are about skip some non-zero limbs. */
+ for (i = 0; i < limb_cnt && round == 0; i++)
+ round = up[i];
+
+ cnt %= BITS_PER_MP_LIMB;
+ if (cnt != 0)
+ {
+ round |= mpn_rshift (wp, up + limb_cnt, wsize, cnt);
+ wsize -= wp[wsize - 1] == 0;
+ }
+ else
+ {
+ MPN_COPY_INCR (wp, up + limb_cnt, wsize);
+ }
+ }
+
+ if (usize < 0 && round != 0)
+ {
+ mp_limb_t cy;
+ if (wsize != 0)
+ {
+ cy = mpn_add_1 (wp, wp, wsize, (mp_limb_t) 1);
+ wp[wsize] = cy;
+ wsize += cy;
+ }
+ else
+ {
+ /* We shifted something negative to zero. The result is -1. */
+ wp[0] = 1;
+ wsize = 1;
+ }
+ }
+ w->_mp_size = usize >= 0 ? wsize : -wsize;
+}
diff --git a/rts/gmp/mpz/fdiv_q_ui.c b/rts/gmp/mpz/fdiv_q_ui.c
new file mode 100644
index 0000000000..55d2498693
--- /dev/null
+++ b/rts/gmp/mpz/fdiv_q_ui.c
@@ -0,0 +1,65 @@
+/* mpz_fdiv_q_ui -- Division rounding the quotient towards -infinity.
+ The remainder gets the same sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996, 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_fdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_fdiv_q_ui (quot, dividend, divisor)
+ mpz_ptr quot;
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_ptr quot_ptr;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ if (quot->_mp_alloc < size)
+ _mpz_realloc (quot, size);
+
+ quot_ptr = quot->_mp_d;
+
+ remainder_limb = mpn_divmod_1 (quot_ptr, dividend->_mp_d, size,
+ (mp_limb_t) divisor);
+
+ if (remainder_limb != 0 && dividend_size < 0)
+ {
+ mpn_incr_u (quot_ptr, (mp_limb_t) 1);
+ remainder_limb = divisor - remainder_limb;
+ }
+
+ size -= size != 0 && quot_ptr[size - 1] == 0;
+ quot->_mp_size = dividend_size >= 0 ? size : -size;
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/fdiv_qr.c b/rts/gmp/mpz/fdiv_qr.c
new file mode 100644
index 0000000000..06ce50607b
--- /dev/null
+++ b/rts/gmp/mpz/fdiv_qr.c
@@ -0,0 +1,64 @@
+/* mpz_fdiv_qr -- Division rounding the quotient towards -infinity.
+ The remainder gets the same sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_fdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+#else
+mpz_fdiv_qr (quot, rem, dividend, divisor)
+ mpz_ptr quot;
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ mpz_srcptr divisor;
+#endif
+{
+ mp_size_t divisor_size = divisor->_mp_size;
+ mp_size_t xsize;
+ mpz_t temp_divisor; /* N.B.: lives until function returns! */
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ /* We need the original value of the divisor after the quotient and
+ remainder have been preliminary calculated. We have to copy it to
+ temporary space if it's the same variable as either QUOT or REM. */
+ if (quot == divisor || rem == divisor)
+ {
+ MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+ mpz_set (temp_divisor, divisor);
+ divisor = temp_divisor;
+ }
+
+ xsize = dividend->_mp_size ^ divisor_size;;
+ mpz_tdiv_qr (quot, rem, dividend, divisor);
+
+ if (xsize < 0 && rem->_mp_size != 0)
+ {
+ mpz_sub_ui (quot, quot, 1L);
+ mpz_add (rem, rem, divisor);
+ }
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/fdiv_qr_ui.c b/rts/gmp/mpz/fdiv_qr_ui.c
new file mode 100644
index 0000000000..600c0dacfc
--- /dev/null
+++ b/rts/gmp/mpz/fdiv_qr_ui.c
@@ -0,0 +1,69 @@
+/* mpz_fdiv_qr_ui -- Division rounding the quotient towards -infinity.
+ The remainder gets the same sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996, 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_fdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_fdiv_qr_ui (quot, rem, dividend, divisor)
+ mpz_ptr quot;
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_ptr quot_ptr;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ if (quot->_mp_alloc < size)
+ _mpz_realloc (quot, size);
+
+ quot_ptr = quot->_mp_d;
+
+ remainder_limb = mpn_divmod_1 (quot_ptr, dividend->_mp_d, size,
+ (mp_limb_t) divisor);
+
+ if (remainder_limb != 0 && dividend_size < 0)
+ {
+ mpn_incr_u (quot_ptr, (mp_limb_t) 1);
+ remainder_limb = divisor - remainder_limb;
+ }
+
+ size -= size != 0 && quot_ptr[size - 1] == 0;
+ quot->_mp_size = dividend_size >= 0 ? size : -size;
+
+ rem->_mp_d[0] = remainder_limb;
+ rem->_mp_size = remainder_limb != 0;
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/fdiv_r.c b/rts/gmp/mpz/fdiv_r.c
new file mode 100644
index 0000000000..a3652838d2
--- /dev/null
+++ b/rts/gmp/mpz/fdiv_r.c
@@ -0,0 +1,58 @@
+/* mpz_fdiv_r -- Division rounding the quotient towards -infinity.
+ The remainder gets the same sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_fdiv_r (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+#else
+mpz_fdiv_r (rem, dividend, divisor)
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ mpz_srcptr divisor;
+#endif
+{
+ mp_size_t divisor_size = divisor->_mp_size;
+ mpz_t temp_divisor; /* N.B.: lives until function returns! */
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ /* We need the original value of the divisor after the remainder has been
+ preliminary calculated. We have to copy it to temporary space if it's
+ the same variable as REM. */
+ if (rem == divisor)
+ {
+ MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+ mpz_set (temp_divisor, divisor);
+ divisor = temp_divisor;
+ }
+
+ mpz_tdiv_r (rem, dividend, divisor);
+
+ if ((divisor_size ^ dividend->_mp_size) < 0 && rem->_mp_size != 0)
+ mpz_add (rem, rem, divisor);
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/fdiv_r_2exp.c b/rts/gmp/mpz/fdiv_r_2exp.c
new file mode 100644
index 0000000000..081ce19203
--- /dev/null
+++ b/rts/gmp/mpz/fdiv_r_2exp.c
@@ -0,0 +1,156 @@
+/* mpz_fdiv_r_2exp -- Divide a integer by 2**CNT and produce a remainder.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1998, 1999, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_fdiv_r_2exp (mpz_ptr res, mpz_srcptr in, unsigned long int cnt)
+#else
+mpz_fdiv_r_2exp (res, in, cnt)
+ mpz_ptr res;
+ mpz_srcptr in;
+ unsigned long int cnt;
+#endif
+{
+ mp_size_t in_size = ABS (in->_mp_size);
+ mp_size_t res_size;
+ mp_size_t limb_cnt = cnt / BITS_PER_MP_LIMB;
+ mp_srcptr in_ptr = in->_mp_d;
+
+ if (in_size > limb_cnt)
+ {
+ /* The input operand is (probably) greater than 2**CNT. */
+ mp_limb_t x;
+
+ x = in_ptr[limb_cnt] & (((mp_limb_t) 1 << cnt % BITS_PER_MP_LIMB) - 1);
+ if (x != 0)
+ {
+ res_size = limb_cnt + 1;
+ if (res->_mp_alloc < res_size)
+ _mpz_realloc (res, res_size);
+
+ res->_mp_d[limb_cnt] = x;
+ }
+ else
+ {
+ res_size = limb_cnt;
+ MPN_NORMALIZE (in_ptr, res_size);
+
+ if (res->_mp_alloc < res_size)
+ _mpz_realloc (res, res_size);
+
+ limb_cnt = res_size;
+ }
+ }
+ else
+ {
+ /* The input operand is smaller than 2**CNT. We perform a no-op,
+ apart from that we might need to copy IN to RES, and may need
+ to round the result. */
+ res_size = in_size;
+ if (res->_mp_alloc < res_size)
+ _mpz_realloc (res, res_size);
+
+ limb_cnt = res_size;
+ }
+
+ if (res != in)
+ MPN_COPY (res->_mp_d, in->_mp_d, limb_cnt);
+ in_size = in->_mp_size;
+ res->_mp_size = res_size;
+ if (in_size < 0 && res_size != 0)
+ {
+ /* Result should be 2^CNT - RES */
+ mpz_t tmp;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+ MPZ_TMP_INIT (tmp, cnt/BITS_PER_MP_LIMB + 2);
+ mpz_set_ui (tmp, 1L);
+ mpz_mul_2exp (tmp, tmp, cnt);
+ mpz_sub (res, tmp, res);
+ TMP_FREE (marker);
+ }
+}
+
+/* This is an alternative ending of the above function using just low-level
+ functions. Tested, but perhaps excessive? */
+#if 0
+ if (in->_mp_size < 0 && res_size != 0)
+ {
+ /* Result should be 2^CNT - RES */
+
+ mp_ptr rp;
+
+ limb_cnt = cnt / BITS_PER_MP_LIMB;
+
+ if (res->_mp_alloc <= limb_cnt)
+ _mpz_realloc (res, limb_cnt + 1);
+ rp = PTR(res);
+ if (res_size > limb_cnt)
+ {
+ mpn_nz_neg (rp, rp, res_size);
+ rp[limb_cnt] &= ~(~(mp_limb_t) 0 << cnt % BITS_PER_MP_LIMB);
+ MPN_NORMALIZE_NOT_ZERO (rp, res_size);
+ }
+ else
+ {
+ mp_size_t i;
+ mpn_nz_neg (rp, rp, res_size);
+ for (i = res_size; i < limb_cnt; i++)
+ rp[i] = ~ (mp_limb_t) 0;
+ res_size = limb_cnt;
+ if (cnt % BITS_PER_MP_LIMB != 0)
+ {
+ rp[res_size] = ((mp_limb_t) 1 << (cnt % BITS_PER_MP_LIMB)) - 1;
+ res_size++;
+ }
+ else
+ MPN_NORMALIZE_NOT_ZERO (rp, res_size);
+ }
+ }
+ SIZ(res) = res_size;
+}
+
+static void
+mpn_nz_neg (rp, sp, n)
+ mp_ptr rp, sp;
+ mp_size_t n;
+{
+ mp_size_t i;
+ mp_limb_t x;
+
+ x = sp[0];
+ rp[0] = -x;
+ for (i = 1; x == 0; i++)
+ {
+ x = sp[i];
+ rp[i] = -x;
+ }
+
+ for (; i < n; i++)
+ {
+ rp[i] = ~sp[i];
+ }
+}
+#endif
diff --git a/rts/gmp/mpz/fdiv_r_ui.c b/rts/gmp/mpz/fdiv_r_ui.c
new file mode 100644
index 0000000000..dd5c743d27
--- /dev/null
+++ b/rts/gmp/mpz/fdiv_r_ui.c
@@ -0,0 +1,55 @@
+/* mpz_fdiv_r_ui -- Division rounding the quotient towards -infinity.
+ The remainder gets the same sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_fdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_fdiv_r_ui (rem, dividend, divisor)
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ remainder_limb = mpn_mod_1 (dividend->_mp_d, size, (mp_limb_t) divisor);
+
+ if (remainder_limb != 0 && dividend_size < 0)
+ remainder_limb = divisor - remainder_limb;
+
+ rem->_mp_d[0] = remainder_limb;
+ rem->_mp_size = remainder_limb != 0;
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/fdiv_ui.c b/rts/gmp/mpz/fdiv_ui.c
new file mode 100644
index 0000000000..f937b5f6d0
--- /dev/null
+++ b/rts/gmp/mpz/fdiv_ui.c
@@ -0,0 +1,48 @@
+/* mpz_fdiv_ui -- Division rounding the quotient towards -infinity.
+ The remainder gets the same sign as the denominator.
+
+Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_fdiv_ui (mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_fdiv_ui (dividend, divisor)
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_limb_t remainder_limb;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ remainder_limb = mpn_mod_1 (dividend->_mp_d, size, (mp_limb_t) divisor);
+
+ if (remainder_limb != 0 && dividend_size < 0)
+ remainder_limb = divisor - remainder_limb;
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/fib_ui.c b/rts/gmp/mpz/fib_ui.c
new file mode 100644
index 0000000000..4bebb80d94
--- /dev/null
+++ b/rts/gmp/mpz/fib_ui.c
@@ -0,0 +1,165 @@
+/* mpz_fib_ui(result, n) -- Set RESULT to the Nth Fibonacci number.
+
+Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* This is fast, but could be made somewhat faster and neater.
+ The timing is somewhat fluctuating for even/odd sizes because
+ of the extra hair used to save variables and operations. Here
+ are a few things one might want to address:
+ 1. Avoid using 4 intermediate variables in mpz_fib_bigcase.
+ 2. Call mpn functions directly. Straightforward for these functions.
+ 3. Merge the three functions into one.
+
+Said by Kevin:
+ Consider using the Lucas numbers L[n] as an auxiliary sequence, making
+ it possible to do the "doubling" operation in mpz_fib_bigcase with two
+ squares rather than two multiplies. The formulas are a little more
+ complicated, something like the following (untested).
+
+ F[2n] = ((F[n]+L[n])^2 - 6*F[n]^2 - 4*(-1)^n) / 2
+ L[2n] = 5*F[n]^2 + 2*(-1)^n
+
+ F[2n+1] = (F[2n] + L[2n]) / 2
+ L[2n+1] = (5*F[2n] + L[2n]) / 2
+
+ The Lucas number that comes for free here could even be returned.
+
+ Maybe there's formulas with two squares using just F[n], but I don't
+ know of any.
+*/
+
+/* Determine the needed storage for Fib(n). */
+#define FIB_SIZE(n) (((mp_size_t) ((n)*0.695)) / BITS_PER_MP_LIMB + 2)
+
+static void mpz_fib_bigcase _PROTO ((mpz_t, mpz_t, unsigned long int));
+static void mpz_fib_basecase _PROTO ((mpz_t, mpz_t, unsigned long int));
+
+
+#ifndef FIB_THRESHOLD
+#define FIB_THRESHOLD 60
+#endif
+
+void
+#if __STDC__
+mpz_fib_ui (mpz_t r, unsigned long int n)
+#else
+mpz_fib_ui (r, n)
+ mpz_t r;
+ unsigned long int n;
+#endif
+{
+ if (n == 0)
+ mpz_set_ui (r, 0);
+ else
+ {
+ mpz_t t1;
+ mpz_init (t1);
+ if (n < FIB_THRESHOLD)
+ mpz_fib_basecase (t1, r, n);
+ else
+ mpz_fib_bigcase (t1, r, n);
+ mpz_clear (t1);
+ }
+}
+
+static void
+#if __STDC__
+mpz_fib_basecase (mpz_t t1, mpz_t t2, unsigned long int n)
+#else
+mpz_fib_basecase (t1, t2, n)
+ mpz_t t1;
+ mpz_t t2;
+ unsigned long int n;
+#endif
+{
+ unsigned long int m, i;
+
+ mpz_set_ui (t1, 0);
+ mpz_set_ui (t2, 1);
+ m = n/2;
+ for (i = 0; i < m; i++)
+ {
+ mpz_add (t1, t1, t2);
+ mpz_add (t2, t1, t2);
+ }
+ if ((n & 1) == 0)
+ {
+ mpz_sub (t1, t2, t1);
+ mpz_sub (t2, t2, t1); /* trick: recover t1 value just overwritten */
+ }
+}
+
+static void
+#if __STDC__
+mpz_fib_bigcase (mpz_t t1, mpz_t t2, unsigned long int n)
+#else
+mpz_fib_bigcase (t1, t2, n)
+ mpz_t t1;
+ mpz_t t2;
+ unsigned long int n;
+#endif
+{
+ unsigned long int n2;
+ int ni, i;
+ mpz_t x1, x2, u1, u2;
+
+ ni = 0;
+ for (n2 = n; n2 >= FIB_THRESHOLD; n2 /= 2)
+ ni++;
+
+ mpz_fib_basecase (t1, t2, n2);
+
+ mpz_init (x1);
+ mpz_init (x2);
+ mpz_init (u1);
+ mpz_init (u2);
+
+ for (i = ni - 1; i >= 0; i--)
+ {
+ mpz_mul_2exp (x1, t1, 1);
+ mpz_mul_2exp (x2, t2, 1);
+
+ mpz_add (x1, x1, t2);
+ mpz_sub (x2, x2, t1);
+
+ mpz_mul (u1, t2, x1);
+ mpz_mul (u2, t1, x2);
+
+ if (((n >> i) & 1) == 0)
+ {
+ mpz_sub (t1, u1, u2);
+ mpz_set (t2, u1);
+ }
+ else
+ {
+ mpz_set (t1, u1);
+ mpz_mul_2exp (t2, u1, 1);
+ mpz_sub (t2, t2, u2);
+ }
+ }
+
+ mpz_clear (x1);
+ mpz_clear (x2);
+ mpz_clear (u1);
+ mpz_clear (u2);
+}
diff --git a/rts/gmp/mpz/fits_sint_p.c b/rts/gmp/mpz/fits_sint_p.c
new file mode 100644
index 0000000000..82e32a24d5
--- /dev/null
+++ b/rts/gmp/mpz/fits_sint_p.c
@@ -0,0 +1,50 @@
+/* int mpz_fits_X_p (mpz_t src) -- Return whether src fits the C type X.
+
+Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_fits_sint_p (mpz_srcptr src)
+#else
+mpz_fits_sint_p (src)
+ mpz_srcptr src;
+#endif
+{
+ mp_size_t size;
+ mp_limb_t mpl;
+
+ mpl = PTR(src)[0];
+ size = SIZ(src);
+ if (size > 0)
+ {
+ if (size > 1)
+ return 0;
+ return mpl < ~((~(unsigned int) 0) >> 1);
+ }
+ else
+ {
+ if (size < -1)
+ return 0;
+ return mpl <= ~((~(unsigned int) 0) >> 1);
+ }
+}
diff --git a/rts/gmp/mpz/fits_slong_p.c b/rts/gmp/mpz/fits_slong_p.c
new file mode 100644
index 0000000000..e0669b5aaa
--- /dev/null
+++ b/rts/gmp/mpz/fits_slong_p.c
@@ -0,0 +1,50 @@
+/* int mpz_fits_X_p (mpz_t src) -- Return whether src fits the C type X.
+
+Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_fits_slong_p (mpz_srcptr src)
+#else
+mpz_fits_slong_p (src)
+ mpz_srcptr src;
+#endif
+{
+ mp_size_t size;
+ mp_limb_t mpl;
+
+ mpl = PTR(src)[0];
+ size = SIZ(src);
+ if (size > 0)
+ {
+ if (size > 1)
+ return 0;
+ return mpl < ~((~(unsigned long int) 0) >> 1);
+ }
+ else
+ {
+ if (size < -1)
+ return 0;
+ return mpl <= ~((~(unsigned long int) 0) >> 1);
+ }
+}
diff --git a/rts/gmp/mpz/fits_sshort_p.c b/rts/gmp/mpz/fits_sshort_p.c
new file mode 100644
index 0000000000..5b8e31afae
--- /dev/null
+++ b/rts/gmp/mpz/fits_sshort_p.c
@@ -0,0 +1,50 @@
+/* int mpz_fits_X_p (mpz_t src) -- Return whether src fits the C type X.
+
+Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_fits_sshort_p (mpz_srcptr src)
+#else
+mpz_fits_sshort_p (src)
+ mpz_srcptr src;
+#endif
+{
+ mp_size_t size;
+ mp_limb_t mpl;
+
+ mpl = PTR(src)[0];
+ size = SIZ(src);
+ if (size > 0)
+ {
+ if (size > 1)
+ return 0;
+ return mpl <= (((unsigned short int) ~(unsigned int) 0) >> 1);
+ }
+ else
+ {
+ if (size < -1)
+ return 0;
+ return mpl <= (((unsigned short int) ~(unsigned int) 0) >> 1) + 1;
+ }
+}
diff --git a/rts/gmp/mpz/fits_uint_p.c b/rts/gmp/mpz/fits_uint_p.c
new file mode 100644
index 0000000000..72f62fa723
--- /dev/null
+++ b/rts/gmp/mpz/fits_uint_p.c
@@ -0,0 +1,41 @@
+/* int mpz_fits_X_p (mpz_t src) -- Return whether src fits the C type X.
+
+Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_fits_uint_p (mpz_srcptr src)
+#else
+mpz_fits_uint_p (src)
+ mpz_srcptr src;
+#endif
+{
+ mp_size_t size;
+ mp_limb_t mpl;
+
+ mpl = PTR(src)[0];
+ size = SIZ(src);
+ if (size < 0 || size > 1)
+ return 0;
+ return mpl <= (~(unsigned int) 0);
+}
diff --git a/rts/gmp/mpz/fits_ulong_p.c b/rts/gmp/mpz/fits_ulong_p.c
new file mode 100644
index 0000000000..92eb42e86e
--- /dev/null
+++ b/rts/gmp/mpz/fits_ulong_p.c
@@ -0,0 +1,41 @@
+/* int mpz_fits_X_p (mpz_t src) -- Return whether src fits the C type X.
+
+Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_fits_ulong_p (mpz_srcptr src)
+#else
+mpz_fits_ulong_p (src)
+ mpz_srcptr src;
+#endif
+{
+ mp_size_t size;
+ mp_limb_t mpl;
+
+ mpl = PTR(src)[0];
+ size = SIZ(src);
+ if (size < 0 || size > 1)
+ return 0;
+ return mpl <= (~(unsigned long int) 0);
+}
diff --git a/rts/gmp/mpz/fits_ushort_p.c b/rts/gmp/mpz/fits_ushort_p.c
new file mode 100644
index 0000000000..bde0edae6e
--- /dev/null
+++ b/rts/gmp/mpz/fits_ushort_p.c
@@ -0,0 +1,41 @@
+/* int mpz_fits_X_p (mpz_t src) -- Return whether src fits the C type X.
+
+Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_fits_ushort_p (mpz_srcptr src)
+#else
+mpz_fits_ushort_p (src)
+ mpz_srcptr src;
+#endif
+{
+ mp_size_t size;
+ mp_limb_t mpl;
+
+ mpl = PTR(src)[0];
+ size = SIZ(src);
+ if (size < 0 || size > 1)
+ return 0;
+ return mpl <= ((unsigned short int) ~(unsigned int) 0);
+}
diff --git a/rts/gmp/mpz/gcd.c b/rts/gmp/mpz/gcd.c
new file mode 100644
index 0000000000..0d950dd609
--- /dev/null
+++ b/rts/gmp/mpz/gcd.c
@@ -0,0 +1,180 @@
+/* mpz/gcd.c: Calculate the greatest common divisor of two integers.
+
+Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+
+#ifndef BERKELEY_MP
+void
+#if __STDC__
+mpz_gcd (mpz_ptr g, mpz_srcptr u, mpz_srcptr v)
+#else
+mpz_gcd (g, u, v)
+ mpz_ptr g;
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+#else /* BERKELEY_MP */
+void
+#if __STDC__
+gcd (mpz_srcptr u, mpz_srcptr v, mpz_ptr g)
+#else
+gcd (u, v, g)
+ mpz_ptr g;
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+#endif /* BERKELEY_MP */
+
+{
+ unsigned long int g_zero_bits, u_zero_bits, v_zero_bits;
+ mp_size_t g_zero_limbs, u_zero_limbs, v_zero_limbs;
+ mp_ptr tp;
+ mp_ptr up = u->_mp_d;
+ mp_size_t usize = ABS (u->_mp_size);
+ mp_ptr vp = v->_mp_d;
+ mp_size_t vsize = ABS (v->_mp_size);
+ mp_size_t gsize;
+ TMP_DECL (marker);
+
+ /* GCD(0, V) == V. */
+ if (usize == 0)
+ {
+ g->_mp_size = vsize;
+ if (g == v)
+ return;
+ if (g->_mp_alloc < vsize)
+ _mpz_realloc (g, vsize);
+ MPN_COPY (g->_mp_d, vp, vsize);
+ return;
+ }
+
+ /* GCD(U, 0) == U. */
+ if (vsize == 0)
+ {
+ g->_mp_size = usize;
+ if (g == u)
+ return;
+ if (g->_mp_alloc < usize)
+ _mpz_realloc (g, usize);
+ MPN_COPY (g->_mp_d, up, usize);
+ return;
+ }
+
+ if (usize == 1)
+ {
+ g->_mp_size = 1;
+ g->_mp_d[0] = mpn_gcd_1 (vp, vsize, up[0]);
+ return;
+ }
+
+ if (vsize == 1)
+ {
+ g->_mp_size = 1;
+ g->_mp_d[0] = mpn_gcd_1 (up, usize, vp[0]);
+ return;
+ }
+
+ TMP_MARK (marker);
+
+ /* Eliminate low zero bits from U and V and move to temporary storage. */
+ while (*up == 0)
+ up++;
+ u_zero_limbs = up - u->_mp_d;
+ usize -= u_zero_limbs;
+ count_trailing_zeros (u_zero_bits, *up);
+ tp = up;
+ up = (mp_ptr) TMP_ALLOC (usize * BYTES_PER_MP_LIMB);
+ if (u_zero_bits != 0)
+ {
+ mpn_rshift (up, tp, usize, u_zero_bits);
+ usize -= up[usize - 1] == 0;
+ }
+ else
+ MPN_COPY (up, tp, usize);
+
+ while (*vp == 0)
+ vp++;
+ v_zero_limbs = vp - v->_mp_d;
+ vsize -= v_zero_limbs;
+ count_trailing_zeros (v_zero_bits, *vp);
+ tp = vp;
+ vp = (mp_ptr) TMP_ALLOC (vsize * BYTES_PER_MP_LIMB);
+ if (v_zero_bits != 0)
+ {
+ mpn_rshift (vp, tp, vsize, v_zero_bits);
+ vsize -= vp[vsize - 1] == 0;
+ }
+ else
+ MPN_COPY (vp, tp, vsize);
+
+ if (u_zero_limbs > v_zero_limbs)
+ {
+ g_zero_limbs = v_zero_limbs;
+ g_zero_bits = v_zero_bits;
+ }
+ else if (u_zero_limbs < v_zero_limbs)
+ {
+ g_zero_limbs = u_zero_limbs;
+ g_zero_bits = u_zero_bits;
+ }
+ else /* Equal. */
+ {
+ g_zero_limbs = u_zero_limbs;
+ g_zero_bits = MIN (u_zero_bits, v_zero_bits);
+ }
+
+ /* Call mpn_gcd. The 2nd argument must not have more bits than the 1st. */
+ vsize = (usize < vsize || (usize == vsize && up[usize-1] < vp[vsize-1]))
+ ? mpn_gcd (vp, vp, vsize, up, usize)
+ : mpn_gcd (vp, up, usize, vp, vsize);
+
+ /* Here G <-- V << (g_zero_limbs*BITS_PER_MP_LIMB + g_zero_bits). */
+ gsize = vsize + g_zero_limbs;
+ if (g_zero_bits != 0)
+ {
+ mp_limb_t cy_limb;
+ gsize += (vp[vsize - 1] >> (BITS_PER_MP_LIMB - g_zero_bits)) != 0;
+ if (g->_mp_alloc < gsize)
+ _mpz_realloc (g, gsize);
+ MPN_ZERO (g->_mp_d, g_zero_limbs);
+
+ tp = g->_mp_d + g_zero_limbs;
+ cy_limb = mpn_lshift (tp, vp, vsize, g_zero_bits);
+ if (cy_limb != 0)
+ tp[vsize] = cy_limb;
+ }
+ else
+ {
+ if (g->_mp_alloc < gsize)
+ _mpz_realloc (g, gsize);
+ MPN_ZERO (g->_mp_d, g_zero_limbs);
+ MPN_COPY (g->_mp_d + g_zero_limbs, vp, vsize);
+ }
+
+ g->_mp_size = gsize;
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/gcd_ui.c b/rts/gmp/mpz/gcd_ui.c
new file mode 100644
index 0000000000..f3bec58829
--- /dev/null
+++ b/rts/gmp/mpz/gcd_ui.c
@@ -0,0 +1,65 @@
+/* mpz_gcd_ui -- Calculate the greatest common divisior of two integers.
+
+Copyright (C) 1994, 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_gcd_ui (mpz_ptr w, mpz_srcptr u, unsigned long int v)
+#else
+mpz_gcd_ui (w, u, v)
+ mpz_ptr w;
+ mpz_srcptr u;
+ unsigned long int v;
+#endif
+{
+ mp_size_t size;
+ mp_limb_t res;
+
+ size = ABS (u->_mp_size);
+
+ if (size == 0)
+ res = v;
+ else if (v == 0)
+ {
+ if (w != NULL && u != w)
+ {
+ if (w->_mp_alloc < size)
+ _mpz_realloc (w, size);
+
+ MPN_COPY (w->_mp_d, u->_mp_d, size);
+ }
+ w->_mp_size = size;
+ /* We can't return any useful result for gcd(big,0). */
+ return size > 1 ? 0 : w->_mp_d[0];
+ }
+ else
+ res = mpn_gcd_1 (u->_mp_d, size, (mp_limb_t) v);
+
+ if (w != NULL)
+ {
+ w->_mp_d[0] = res;
+ w->_mp_size = 1;
+ }
+ return res;
+}
diff --git a/rts/gmp/mpz/gcdext.c b/rts/gmp/mpz/gcdext.c
new file mode 100644
index 0000000000..3ba04c84ff
--- /dev/null
+++ b/rts/gmp/mpz/gcdext.c
@@ -0,0 +1,137 @@
+/* mpz_gcdext(g, s, t, a, b) -- Set G to gcd(a, b), and S and T such that
+ g = as + bt.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_gcdext (mpz_ptr g, mpz_ptr s, mpz_ptr t, mpz_srcptr a, mpz_srcptr b)
+#else
+mpz_gcdext (g, s, t, a, b)
+ mpz_ptr g;
+ mpz_ptr s;
+ mpz_ptr t;
+ mpz_srcptr a;
+ mpz_srcptr b;
+#endif
+{
+ mp_size_t asize, bsize, usize, vsize;
+ mp_srcptr ap, bp;
+ mp_ptr up, vp;
+ mp_size_t gsize, ssize, tmp_ssize;
+ mp_ptr gp, sp, tmp_gp, tmp_sp;
+ mpz_srcptr u, v;
+ mpz_ptr ss, tt;
+ __mpz_struct stmp, gtmp;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ /* mpn_gcdext requires that U >= V. Therefore, we often have to swap U and
+ V. This in turn leads to a lot of complications. The computed cofactor
+ will be the wrong one, so we have to fix that up at the end. */
+
+ asize = ABS (SIZ (a));
+ bsize = ABS (SIZ (b));
+ ap = PTR (a);
+ bp = PTR (b);
+ if (asize > bsize || (asize == bsize && mpn_cmp (ap, bp, asize) > 0))
+ {
+ usize = asize;
+ vsize = bsize;
+ up = (mp_ptr) TMP_ALLOC ((usize + 1) * BYTES_PER_MP_LIMB);
+ vp = (mp_ptr) TMP_ALLOC ((vsize + 1) * BYTES_PER_MP_LIMB);
+ MPN_COPY (up, ap, usize);
+ MPN_COPY (vp, bp, vsize);
+ u = a;
+ v = b;
+ ss = s;
+ tt = t;
+ }
+ else
+ {
+ usize = bsize;
+ vsize = asize;
+ up = (mp_ptr) TMP_ALLOC ((usize + 1) * BYTES_PER_MP_LIMB);
+ vp = (mp_ptr) TMP_ALLOC ((vsize + 1) * BYTES_PER_MP_LIMB);
+ MPN_COPY (up, bp, usize);
+ MPN_COPY (vp, ap, vsize);
+ u = b;
+ v = a;
+ ss = t;
+ tt = s;
+ }
+
+ tmp_gp = (mp_ptr) TMP_ALLOC ((usize + 1) * BYTES_PER_MP_LIMB);
+ tmp_sp = (mp_ptr) TMP_ALLOC ((usize + 1) * BYTES_PER_MP_LIMB);
+
+ if (vsize == 0)
+ {
+ tmp_sp[0] = 1;
+ tmp_ssize = 1;
+ MPN_COPY (tmp_gp, up, usize);
+ gsize = usize;
+ }
+ else
+ gsize = mpn_gcdext (tmp_gp, tmp_sp, &tmp_ssize, up, usize, vp, vsize);
+ ssize = ABS (tmp_ssize);
+
+ PTR (&gtmp) = tmp_gp;
+ SIZ (&gtmp) = gsize;
+
+ PTR (&stmp) = tmp_sp;
+ SIZ (&stmp) = (tmp_ssize ^ SIZ (u)) >= 0 ? ssize : -ssize;
+
+ if (tt != NULL)
+ {
+ if (SIZ (v) == 0)
+ SIZ (tt) = 0;
+ else
+ {
+ mpz_t x;
+ MPZ_TMP_INIT (x, ssize + usize + 1);
+ mpz_mul (x, &stmp, u);
+ mpz_sub (x, &gtmp, x);
+ mpz_tdiv_q (tt, x, v);
+ }
+ }
+
+ if (ss != NULL)
+ {
+ if (ALLOC (ss) < ssize)
+ _mpz_realloc (ss, ssize);
+ sp = PTR (ss);
+ MPN_COPY (sp, tmp_sp, ssize);
+ SIZ (ss) = SIZ (&stmp);
+ }
+
+ if (ALLOC (g) < gsize)
+ _mpz_realloc (g, gsize);
+ gp = PTR (g);
+ MPN_COPY (gp, tmp_gp, gsize);
+ SIZ (g) = gsize;
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/get_d.c b/rts/gmp/mpz/get_d.c
new file mode 100644
index 0000000000..6a7c5856bb
--- /dev/null
+++ b/rts/gmp/mpz/get_d.c
@@ -0,0 +1,128 @@
+/* double mpz_get_d (mpz_t src) -- Return the double approximation to SRC.
+
+Copyright (C) 1996, 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+static int
+#if __STDC__
+mpn_zero_p (mp_ptr p, mp_size_t n)
+#else
+mpn_zero_p (p, n)
+ mp_ptr p;
+ mp_size_t n;
+#endif
+{
+ mp_size_t i;
+
+ for (i = 0; i < n; i++)
+ {
+ if (p[i] != 0)
+ return 0;
+ }
+
+ return 1;
+}
+
+
+double
+#if __STDC__
+mpz_get_d (mpz_srcptr src)
+#else
+mpz_get_d (src)
+ mpz_srcptr src;
+#endif
+{
+ double res;
+ mp_size_t size;
+ int negative;
+ mp_ptr qp;
+ mp_limb_t hz, lz;
+ int cnt;
+
+ size = SIZ(src);
+ if (size == 0)
+ return 0.0;
+
+ negative = size < 0;
+ size = ABS (size);
+ qp = PTR(src);
+
+ if (size == 1)
+ {
+ res = qp[size - 1];
+ }
+ else if (size == 2)
+ {
+ res = MP_BASE_AS_DOUBLE * qp[size - 1] + qp[size - 2];
+ }
+ else
+ {
+ count_leading_zeros (cnt, qp[size - 1]);
+
+#if BITS_PER_MP_LIMB == 32
+ if (cnt == 0)
+ {
+ hz = qp[size - 1];
+ lz = qp[size - 2];
+ }
+ else
+ {
+ hz = (qp[size - 1] << cnt) | (qp[size - 2] >> BITS_PER_MP_LIMB - cnt);
+ lz = (qp[size - 2] << cnt) | (qp[size - 3] >> BITS_PER_MP_LIMB - cnt);
+ }
+#if _GMP_IEEE_FLOATS
+ /* Take bits from less significant limbs, but only if they may affect
+ the result. */
+ if ((lz & 0x7ff) == 0x400)
+ {
+ if (cnt != 0)
+ lz += ((qp[size - 3] << cnt) != 0 || ! mpn_zero_p (qp, size - 3));
+ else
+ lz += (! mpn_zero_p (qp, size - 2));
+ }
+#endif
+ res = MP_BASE_AS_DOUBLE * hz + lz;
+ res = __gmp_scale2 (res, (size - 2) * BITS_PER_MP_LIMB - cnt);
+#endif
+#if BITS_PER_MP_LIMB == 64
+ if (cnt == 0)
+ hz = qp[size - 1];
+ else
+ hz = (qp[size - 1] << cnt) | (qp[size - 2] >> BITS_PER_MP_LIMB - cnt);
+#if _GMP_IEEE_FLOATS
+ if ((hz & 0x7ff) == 0x400)
+ {
+ if (cnt != 0)
+ hz += ((qp[size - 2] << cnt) != 0 || ! mpn_zero_p (qp, size - 2));
+ else
+ hz += (! mpn_zero_p (qp, size - 1));
+ }
+#endif
+ res = hz;
+ res = __gmp_scale2 (res, (size - 1) * BITS_PER_MP_LIMB - cnt);
+#endif
+ }
+
+ return negative ? -res : res;
+}
diff --git a/rts/gmp/mpz/get_si.c b/rts/gmp/mpz/get_si.c
new file mode 100644
index 0000000000..8a5d0e4803
--- /dev/null
+++ b/rts/gmp/mpz/get_si.c
@@ -0,0 +1,43 @@
+/* mpz_get_si(integer) -- Return the least significant digit from INTEGER.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+signed long int
+#if __STDC__
+mpz_get_si (mpz_srcptr op)
+#else
+mpz_get_si (op)
+ mpz_srcptr op;
+#endif
+{
+ mp_size_t size = op->_mp_size;
+ mp_limb_t low_limb = op->_mp_d[0];
+
+ if (size > 0)
+ return low_limb % ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1));
+ else if (size < 0)
+ /* This convoluted expression is necessary to properly handle 0x80000000 */
+ return ~((low_limb - 1) % ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)));
+ else
+ return 0;
+}
diff --git a/rts/gmp/mpz/get_str.c b/rts/gmp/mpz/get_str.c
new file mode 100644
index 0000000000..c7278afb52
--- /dev/null
+++ b/rts/gmp/mpz/get_str.c
@@ -0,0 +1,118 @@
+/* mpz_get_str (string, base, mp_src) -- Convert the multiple precision
+ number MP_SRC to a string STRING of base BASE. If STRING is NULL
+ allocate space for the result. In any case, return a pointer to the
+ result. If STRING is not NULL, the caller must ensure enough space is
+ available to store the result.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+char *
+#if __STDC__
+mpz_get_str (char *res_str, int base, mpz_srcptr x)
+#else
+mpz_get_str (res_str, base, x)
+ char *res_str;
+ int base;
+ mpz_srcptr x;
+#endif
+{
+ mp_ptr xp;
+ mp_size_t x_size = x->_mp_size;
+ unsigned char *str;
+ char *return_str;
+ size_t str_size;
+ char *num_to_text;
+ int i;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ if (base >= 0)
+ {
+ if (base == 0)
+ base = 10;
+ num_to_text = "0123456789abcdefghijklmnopqrstuvwxyz";
+ }
+ else
+ {
+ base = -base;
+ num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ }
+
+ /* We allways allocate space for the string. If the caller passed a
+ NULL pointer for RES_STR, we allocate permanent space and return
+ a pointer to that to the caller. */
+ str_size = ((size_t) (ABS (x_size) * BITS_PER_MP_LIMB
+ * __mp_bases[base].chars_per_bit_exactly)) + 3;
+ if (res_str == 0)
+ {
+ /* We didn't get a string from the user. Allocate one (and return
+ a pointer to it). */
+ res_str = (char *) (*_mp_allocate_func) (str_size);
+ /* Make str, the variable used for raw result from mpn_get_str,
+ point to the same string, but just after a possible minus sign. */
+ str = (unsigned char *) res_str + 1;
+ }
+ else
+ {
+ /* Use TMP_ALLOC to get temporary space, since we need a few extra bytes
+ that we can't expect to caller to supply us with. */
+ str = (unsigned char *) TMP_ALLOC (str_size);
+ }
+
+ return_str = res_str;
+
+ if (x_size == 0)
+ {
+ res_str[0] = '0';
+ res_str[1] = 0;
+ TMP_FREE (marker);
+ return res_str;
+ }
+ if (x_size < 0)
+ {
+ *res_str++ = '-';
+ x_size = -x_size;
+ }
+
+ /* Move the number to convert into temporary space, since mpn_get_str
+ clobbers its argument + needs one extra high limb.... */
+ xp = (mp_ptr) TMP_ALLOC ((x_size + 1) * BYTES_PER_MP_LIMB);
+ MPN_COPY (xp, x->_mp_d, x_size);
+
+ str_size = mpn_get_str (str, base, xp, x_size);
+
+ /* mpn_get_str might make some leading zeros. Skip them. */
+ while (*str == 0)
+ {
+ str_size--;
+ str++;
+ }
+
+ /* Translate result to printable chars and move result to RES_STR. */
+ for (i = 0; i < str_size; i++)
+ res_str[i] = num_to_text[str[i]];
+ res_str[str_size] = 0;
+
+ TMP_FREE (marker);
+ return return_str;
+}
diff --git a/rts/gmp/mpz/get_ui.c b/rts/gmp/mpz/get_ui.c
new file mode 100644
index 0000000000..a8ec9e01a4
--- /dev/null
+++ b/rts/gmp/mpz/get_ui.c
@@ -0,0 +1,37 @@
+/* mpz_get_ui(integer) -- Return the least significant digit from INTEGER.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_get_ui (mpz_srcptr integer)
+#else
+mpz_get_ui (integer)
+ mpz_srcptr integer;
+#endif
+{
+ if (integer->_mp_size == 0)
+ return 0;
+ else
+ return integer->_mp_d[0];
+}
diff --git a/rts/gmp/mpz/getlimbn.c b/rts/gmp/mpz/getlimbn.c
new file mode 100644
index 0000000000..b772ed05c4
--- /dev/null
+++ b/rts/gmp/mpz/getlimbn.c
@@ -0,0 +1,38 @@
+/* mpz_getlimbn(integer,n) -- Return the N:th limb from INTEGER.
+
+Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+#if __STDC__
+mpz_getlimbn (mpz_srcptr integer, mp_size_t n)
+#else
+mpz_getlimbn (integer, n)
+ mpz_srcptr integer;
+ mp_size_t n;
+#endif
+{
+ if (ABS (integer->_mp_size) <= n || n < 0)
+ return 0;
+ else
+ return integer->_mp_d[n];
+}
diff --git a/rts/gmp/mpz/hamdist.c b/rts/gmp/mpz/hamdist.c
new file mode 100644
index 0000000000..b039a653d2
--- /dev/null
+++ b/rts/gmp/mpz/hamdist.c
@@ -0,0 +1,62 @@
+/* mpz_hamdist(mpz_ptr op1, mpz_ptr op2) -- Compute the hamming distance
+ between OP1 and OP2. If one of the operands is negative, return ~0. (We
+ could make the function well-defined when both operands are negative, but
+ that would probably not be worth the trouble.
+
+Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_hamdist (mpz_srcptr u, mpz_srcptr v)
+#else
+mpz_hamdist (u, v)
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+{
+ mp_srcptr up, vp;
+ mp_size_t usize, vsize, size;
+ unsigned long int count;
+
+ usize = u->_mp_size;
+ vsize = v->_mp_size;
+
+ if ((usize | vsize) < 0)
+ return ~ (unsigned long int) 0;
+
+ up = u->_mp_d;
+ vp = v->_mp_d;
+
+ if (usize > vsize)
+ {
+ count = mpn_popcount (up + vsize, usize - vsize);
+ size = vsize;
+ }
+ else
+ {
+ count = mpn_popcount (vp + usize, vsize - usize);
+ size = usize;
+ }
+
+ return count + mpn_hamdist (up, vp, size);
+}
diff --git a/rts/gmp/mpz/init.c b/rts/gmp/mpz/init.c
new file mode 100644
index 0000000000..2e8e4d2cbd
--- /dev/null
+++ b/rts/gmp/mpz/init.c
@@ -0,0 +1,36 @@
+/* mpz_init() -- Make a new multiple precision number with value 0.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_init (mpz_ptr x)
+#else
+mpz_init (x)
+ mpz_ptr x;
+#endif
+{
+ x->_mp_alloc = 1;
+ x->_mp_d = (mp_ptr) (*_mp_allocate_func) (BYTES_PER_MP_LIMB);
+ x->_mp_size = 0;
+}
diff --git a/rts/gmp/mpz/inp_raw.c b/rts/gmp/mpz/inp_raw.c
new file mode 100644
index 0000000000..15e601229d
--- /dev/null
+++ b/rts/gmp/mpz/inp_raw.c
@@ -0,0 +1,101 @@
+/* mpz_inp_raw -- Input a mpz_t in raw, but endianess, and wordsize
+ independent format (as output by mpz_out_raw).
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+size_t
+#if __STDC__
+mpz_inp_raw (mpz_ptr x, FILE *stream)
+#else
+mpz_inp_raw (x, stream)
+ mpz_ptr x;
+ FILE *stream;
+#endif
+{
+ int i;
+ mp_size_t s;
+ mp_size_t xsize;
+ mp_ptr xp;
+ unsigned int c;
+ mp_limb_t x_limb;
+ mp_size_t in_bytesize;
+ int neg_flag;
+
+ if (stream == 0)
+ stream = stdin;
+
+ /* Read 4-byte size */
+ in_bytesize = 0;
+ for (i = 4 - 1; i >= 0; i--)
+ {
+ c = fgetc (stream);
+ in_bytesize = (in_bytesize << BITS_PER_CHAR) | c;
+ }
+
+ /* Size is stored as a 32 bit word; sign extend in_bytesize for non-32 bit
+ machines. */
+ if (sizeof (mp_size_t) > 4)
+ in_bytesize |= (-(in_bytesize < 0)) << 31;
+
+ neg_flag = in_bytesize < 0;
+ in_bytesize = ABS (in_bytesize);
+ xsize = (in_bytesize + BYTES_PER_MP_LIMB - 1) / BYTES_PER_MP_LIMB;
+
+ if (xsize == 0)
+ {
+ x->_mp_size = 0;
+ return 4; /* we've read 4 bytes */
+ }
+
+ if (x->_mp_alloc < xsize)
+ _mpz_realloc (x, xsize);
+ xp = x->_mp_d;
+
+ x_limb = 0;
+ for (i = (in_bytesize - 1) % BYTES_PER_MP_LIMB; i >= 0; i--)
+ {
+ c = fgetc (stream);
+ x_limb = (x_limb << BITS_PER_CHAR) | c;
+ }
+ xp[xsize - 1] = x_limb;
+
+ for (s = xsize - 2; s >= 0; s--)
+ {
+ x_limb = 0;
+ for (i = BYTES_PER_MP_LIMB - 1; i >= 0; i--)
+ {
+ c = fgetc (stream);
+ x_limb = (x_limb << BITS_PER_CHAR) | c;
+ }
+ xp[s] = x_limb;
+ }
+
+ if (c == EOF)
+ return 0; /* error */
+
+ MPN_NORMALIZE (xp, xsize);
+ x->_mp_size = neg_flag ? -xsize : xsize;
+ return in_bytesize + 4;
+}
diff --git a/rts/gmp/mpz/inp_str.c b/rts/gmp/mpz/inp_str.c
new file mode 100644
index 0000000000..7aa5e1fc30
--- /dev/null
+++ b/rts/gmp/mpz/inp_str.c
@@ -0,0 +1,167 @@
+/* mpz_inp_str(dest_integer, stream, base) -- Input a number in base
+ BASE from stdio stream STREAM and store the result in DEST_INTEGER.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1998, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h>
+#include <ctype.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+static int
+#if __STDC__
+digit_value_in_base (int c, int base)
+#else
+digit_value_in_base (c, base)
+ int c;
+ int base;
+#endif
+{
+ int digit;
+
+ if (isdigit (c))
+ digit = c - '0';
+ else if (islower (c))
+ digit = c - 'a' + 10;
+ else if (isupper (c))
+ digit = c - 'A' + 10;
+ else
+ return -1;
+
+ if (digit < base)
+ return digit;
+ return -1;
+}
+
+size_t
+#if __STDC__
+mpz_inp_str (mpz_ptr x, FILE *stream, int base)
+#else
+mpz_inp_str (x, stream, base)
+ mpz_ptr x;
+ FILE *stream;
+ int base;
+#endif
+{
+ char *str;
+ size_t alloc_size, str_size;
+ int c;
+ int negative;
+ mp_size_t xsize;
+ size_t nread;
+
+ if (stream == 0)
+ stream = stdin;
+
+ nread = 0;
+
+ /* Skip whitespace. */
+ do
+ {
+ c = getc (stream);
+ nread++;
+ }
+ while (isspace (c));
+
+ negative = 0;
+ if (c == '-')
+ {
+ negative = 1;
+ c = getc (stream);
+ nread++;
+ }
+
+ if (digit_value_in_base (c, base == 0 ? 10 : base) < 0)
+ return 0; /* error if no digits */
+
+ /* If BASE is 0, try to find out the base by looking at the initial
+ characters. */
+ if (base == 0)
+ {
+ base = 10;
+ if (c == '0')
+ {
+ base = 8;
+ c = getc (stream);
+ nread++;
+ if (c == 'x' || c == 'X')
+ {
+ base = 16;
+ c = getc (stream);
+ nread++;
+ }
+ else if (c == 'b' || c == 'B')
+ {
+ base = 2;
+ c = getc (stream);
+ nread++;
+ }
+ }
+ }
+
+ /* Skip leading zeros. */
+ while (c == '0')
+ {
+ c = getc (stream);
+ nread++;
+ }
+
+ alloc_size = 100;
+ str = (char *) (*_mp_allocate_func) (alloc_size);
+ str_size = 0;
+
+ for (;;)
+ {
+ int dig;
+ if (str_size >= alloc_size)
+ {
+ size_t old_alloc_size = alloc_size;
+ alloc_size = alloc_size * 3 / 2;
+ str = (char *) (*_mp_reallocate_func) (str, old_alloc_size, alloc_size);
+ }
+ dig = digit_value_in_base (c, base);
+ if (dig < 0)
+ break;
+ str[str_size++] = dig;
+ c = getc (stream);
+ }
+
+ ungetc (c, stream);
+
+ /* Make sure the string is not empty, mpn_set_str would fail. */
+ if (str_size == 0)
+ {
+ x->_mp_size = 0;
+ (*_mp_free_func) (str, alloc_size);
+ return nread;
+ }
+
+ xsize = (((mp_size_t) (str_size / __mp_bases[base].chars_per_bit_exactly))
+ / BITS_PER_MP_LIMB + 2);
+ if (x->_mp_alloc < xsize)
+ _mpz_realloc (x, xsize);
+
+ /* Convert the byte array in base BASE to our bignum format. */
+ xsize = mpn_set_str (x->_mp_d, (unsigned char *) str, str_size, base);
+ x->_mp_size = negative ? -xsize : xsize;
+
+ (*_mp_free_func) (str, alloc_size);
+ return str_size + nread;
+}
diff --git a/rts/gmp/mpz/invert.c b/rts/gmp/mpz/invert.c
new file mode 100644
index 0000000000..749a0969fc
--- /dev/null
+++ b/rts/gmp/mpz/invert.c
@@ -0,0 +1,77 @@
+/* mpz_invert (inv, x, n). Find multiplicative inverse of X in Z(N).
+ If X has an inverse, return non-zero and store inverse in INVERSE,
+ otherwise, return 0 and put garbage in INVERSE.
+
+Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_invert (mpz_ptr inverse, mpz_srcptr x, mpz_srcptr n)
+#else
+mpz_invert (inverse, x, n)
+ mpz_ptr inverse;
+ mpz_srcptr x, n;
+#endif
+{
+ mpz_t gcd, tmp;
+ mp_size_t xsize, nsize, size;
+ TMP_DECL (marker);
+
+ xsize = SIZ (x);
+ nsize = SIZ (n);
+ xsize = ABS (xsize);
+ nsize = ABS (nsize);
+ size = MAX (xsize, nsize) + 1;
+
+ /* No inverse exists if the leftside operand is 0. Likewise, no
+ inverse exists if the mod operand is 1. */
+ if (xsize == 0 || (nsize == 1 && (PTR (n))[0] == 1))
+ return 0;
+
+ TMP_MARK (marker);
+
+ MPZ_TMP_INIT (gcd, size);
+ MPZ_TMP_INIT (tmp, size);
+ mpz_gcdext (gcd, tmp, (mpz_ptr) 0, x, n);
+
+ /* If no inverse existed, return with an indication of that. */
+ if (gcd->_mp_size != 1 || (gcd->_mp_d)[0] != 1)
+ {
+ TMP_FREE (marker);
+ return 0;
+ }
+
+ /* Make sure we return a positive inverse. */
+ if (SIZ (tmp) < 0)
+ {
+ if (SIZ (n) < 0)
+ mpz_sub (inverse, tmp, n);
+ else
+ mpz_add (inverse, tmp, n);
+ }
+ else
+ mpz_set (inverse, tmp);
+
+ TMP_FREE (marker);
+ return 1;
+}
diff --git a/rts/gmp/mpz/ior.c b/rts/gmp/mpz/ior.c
new file mode 100644
index 0000000000..0bb5a806dc
--- /dev/null
+++ b/rts/gmp/mpz/ior.c
@@ -0,0 +1,244 @@
+/* mpz_ior -- Logical inclusive or.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_ior (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
+#else
+mpz_ior (res, op1, op2)
+ mpz_ptr res;
+ mpz_srcptr op1;
+ mpz_srcptr op2;
+#endif
+{
+ mp_srcptr op1_ptr, op2_ptr;
+ mp_size_t op1_size, op2_size;
+ mp_ptr res_ptr;
+ mp_size_t res_size;
+ mp_size_t i;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ op1_size = op1->_mp_size;
+ op2_size = op2->_mp_size;
+
+ op1_ptr = op1->_mp_d;
+ op2_ptr = op2->_mp_d;
+ res_ptr = res->_mp_d;
+
+ if (op1_size >= 0)
+ {
+ if (op2_size >= 0)
+ {
+ if (op1_size >= op2_size)
+ {
+ if (res->_mp_alloc < op1_size)
+ {
+ _mpz_realloc (res, op1_size);
+ op1_ptr = op1->_mp_d;
+ op2_ptr = op2->_mp_d;
+ res_ptr = res->_mp_d;
+ }
+
+ if (res_ptr != op1_ptr)
+ MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
+ op1_size - op2_size);
+ for (i = op2_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] | op2_ptr[i];
+ res_size = op1_size;
+ }
+ else
+ {
+ if (res->_mp_alloc < op2_size)
+ {
+ _mpz_realloc (res, op2_size);
+ op1_ptr = op1->_mp_d;
+ op2_ptr = op2->_mp_d;
+ res_ptr = res->_mp_d;
+ }
+
+ if (res_ptr != op2_ptr)
+ MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
+ op2_size - op1_size);
+ for (i = op1_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] | op2_ptr[i];
+ res_size = op2_size;
+ }
+
+ res->_mp_size = res_size;
+ return;
+ }
+ else /* op2_size < 0 */
+ {
+ /* Fall through to the code at the end of the function. */
+ }
+ }
+ else
+ {
+ if (op2_size < 0)
+ {
+ mp_ptr opx;
+ mp_limb_t cy;
+
+ /* Both operands are negative, so will be the result.
+ -((-OP1) | (-OP2)) = -(~(OP1 - 1) | ~(OP2 - 1)) =
+ = ~(~(OP1 - 1) | ~(OP2 - 1)) + 1 =
+ = ((OP1 - 1) & (OP2 - 1)) + 1 */
+
+ op1_size = -op1_size;
+ op2_size = -op2_size;
+
+ res_size = MIN (op1_size, op2_size);
+
+ /* Possible optimization: Decrease mpn_sub precision,
+ as we won't use the entire res of both. */
+ opx = (mp_ptr) TMP_ALLOC (res_size * BYTES_PER_MP_LIMB);
+ mpn_sub_1 (opx, op1_ptr, res_size, (mp_limb_t) 1);
+ op1_ptr = opx;
+
+ opx = (mp_ptr) TMP_ALLOC (res_size * BYTES_PER_MP_LIMB);
+ mpn_sub_1 (opx, op2_ptr, res_size, (mp_limb_t) 1);
+ op2_ptr = opx;
+
+ if (res->_mp_alloc < res_size)
+ {
+ _mpz_realloc (res, res_size);
+ res_ptr = res->_mp_d;
+ /* Don't re-read OP1_PTR and OP2_PTR. They point to
+ temporary space--never to the space RES->_mp_d used
+ to point to before reallocation. */
+ }
+
+ /* First loop finds the size of the result. */
+ for (i = res_size - 1; i >= 0; i--)
+ if ((op1_ptr[i] & op2_ptr[i]) != 0)
+ break;
+ res_size = i + 1;
+
+ if (res_size != 0)
+ {
+ /* Second loop computes the real result. */
+ for (i = res_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] & op2_ptr[i];
+
+ cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
+ if (cy)
+ {
+ res_ptr[res_size] = cy;
+ res_size++;
+ }
+ }
+ else
+ {
+ res_ptr[0] = 1;
+ res_size = 1;
+ }
+
+ res->_mp_size = -res_size;
+ TMP_FREE (marker);
+ return;
+ }
+ else
+ {
+ /* We should compute -OP1 | OP2. Swap OP1 and OP2 and fall
+ through to the code that handles OP1 | -OP2. */
+ MPZ_SRCPTR_SWAP (op1, op2);
+ MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
+ }
+ }
+
+ {
+ mp_ptr opx;
+ mp_limb_t cy;
+ mp_size_t res_alloc;
+ mp_size_t count;
+
+ /* Operand 2 negative, so will be the result.
+ -(OP1 | (-OP2)) = -(OP1 | ~(OP2 - 1)) =
+ = ~(OP1 | ~(OP2 - 1)) + 1 =
+ = (~OP1 & (OP2 - 1)) + 1 */
+
+ op2_size = -op2_size;
+
+ res_alloc = op2_size;
+
+ opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB);
+ mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
+ op2_ptr = opx;
+ op2_size -= op2_ptr[op2_size - 1] == 0;
+
+ if (res->_mp_alloc < res_alloc)
+ {
+ _mpz_realloc (res, res_alloc);
+ op1_ptr = op1->_mp_d;
+ res_ptr = res->_mp_d;
+ /* Don't re-read OP2_PTR. It points to temporary space--never
+ to the space RES->_mp_d used to point to before reallocation. */
+ }
+
+ if (op1_size >= op2_size)
+ {
+ /* We can just ignore the part of OP1 that stretches above OP2,
+ because the result limbs are zero there. */
+
+ /* First loop finds the size of the result. */
+ for (i = op2_size - 1; i >= 0; i--)
+ if ((~op1_ptr[i] & op2_ptr[i]) != 0)
+ break;
+ res_size = i + 1;
+ count = res_size;
+ }
+ else
+ {
+ res_size = op2_size;
+
+ /* Copy the part of OP2 that stretches above OP1, to RES. */
+ MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, op2_size - op1_size);
+ count = op1_size;
+ }
+
+ if (res_size != 0)
+ {
+ /* Second loop computes the real result. */
+ for (i = count - 1; i >= 0; i--)
+ res_ptr[i] = ~op1_ptr[i] & op2_ptr[i];
+
+ cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
+ if (cy)
+ {
+ res_ptr[res_size] = cy;
+ res_size++;
+ }
+ }
+ else
+ {
+ res_ptr[0] = 1;
+ res_size = 1;
+ }
+
+ res->_mp_size = -res_size;
+ }
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/iset.c b/rts/gmp/mpz/iset.c
new file mode 100644
index 0000000000..114bc2d542
--- /dev/null
+++ b/rts/gmp/mpz/iset.c
@@ -0,0 +1,49 @@
+/* mpz_init_set (src_integer) -- Make a new multiple precision number with
+ a value copied from SRC_INTEGER.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_init_set (mpz_ptr w, mpz_srcptr u)
+#else
+mpz_init_set (w, u)
+ mpz_ptr w;
+ mpz_srcptr u;
+#endif
+{
+ mp_ptr wp, up;
+ mp_size_t usize, size;
+
+ usize = u->_mp_size;
+ size = ABS (usize);
+
+ w->_mp_alloc = MAX (size, 1);
+ w->_mp_d = (mp_ptr) (*_mp_allocate_func) (w->_mp_alloc * BYTES_PER_MP_LIMB);
+
+ wp = w->_mp_d;
+ up = u->_mp_d;
+
+ MPN_COPY (wp, up, size);
+ w->_mp_size = usize;
+}
diff --git a/rts/gmp/mpz/iset_d.c b/rts/gmp/mpz/iset_d.c
new file mode 100644
index 0000000000..502a8933e2
--- /dev/null
+++ b/rts/gmp/mpz/iset_d.c
@@ -0,0 +1,39 @@
+/* mpz_init_set_d(integer, val) -- Initialize and assign INTEGER with a double
+ value VAL.
+
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_init_set_d (mpz_ptr dest, double val)
+#else
+mpz_init_set_d (dest, val)
+ mpz_ptr dest;
+ double val;
+#endif
+{
+ dest->_mp_alloc = 1;
+ dest->_mp_d = (mp_ptr) (*_mp_allocate_func) (BYTES_PER_MP_LIMB);
+ dest->_mp_size = 0;
+ mpz_set_d (dest, val);
+}
diff --git a/rts/gmp/mpz/iset_si.c b/rts/gmp/mpz/iset_si.c
new file mode 100644
index 0000000000..842db140ef
--- /dev/null
+++ b/rts/gmp/mpz/iset_si.c
@@ -0,0 +1,49 @@
+/* mpz_init_set_si(val) -- Make a new multiple precision number with
+ value val.
+
+Copyright (C) 1991, 1993, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_init_set_si (mpz_ptr x, signed long int val)
+#else
+mpz_init_set_si (x, val)
+ mpz_ptr x;
+ signed long int val;
+#endif
+{
+ x->_mp_alloc = 1;
+ x->_mp_d = (mp_ptr) (*_mp_allocate_func) (BYTES_PER_MP_LIMB);
+ if (val > 0)
+ {
+ x->_mp_d[0] = val;
+ x->_mp_size = 1;
+ }
+ else if (val < 0)
+ {
+ x->_mp_d[0] = (unsigned long) -val;
+ x->_mp_size = -1;
+ }
+ else
+ x->_mp_size = 0;
+}
diff --git a/rts/gmp/mpz/iset_str.c b/rts/gmp/mpz/iset_str.c
new file mode 100644
index 0000000000..dfb8c6b230
--- /dev/null
+++ b/rts/gmp/mpz/iset_str.c
@@ -0,0 +1,47 @@
+/* mpz_init_set_str(string, base) -- Convert the \0-terminated string
+ STRING in base BASE to a multiple precision integer. Return a MP_INT
+ structure representing the integer. Allow white space in the
+ string. If BASE == 0 determine the base in the C standard way,
+ i.e. 0xhh...h means base 16, 0oo...o means base 8, otherwise
+ assume base 10.
+
+Copyright (C) 1991, 1993, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_init_set_str (mpz_ptr x, const char *str, int base)
+#else
+mpz_init_set_str (x, str, base)
+ mpz_ptr x;
+ const char *str;
+ int base;
+#endif
+{
+ x->_mp_alloc = 1;
+ x->_mp_d = (mp_ptr) (*_mp_allocate_func) (BYTES_PER_MP_LIMB);
+
+ /* if str has no digits mpz_set_str leaves x->_mp_size unset */
+ x->_mp_size = 0;
+
+ return mpz_set_str (x, str, base);
+}
diff --git a/rts/gmp/mpz/iset_ui.c b/rts/gmp/mpz/iset_ui.c
new file mode 100644
index 0000000000..759182c556
--- /dev/null
+++ b/rts/gmp/mpz/iset_ui.c
@@ -0,0 +1,39 @@
+/* mpz_init_set_ui(val) -- Make a new multiple precision number with
+ value val.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_init_set_ui (mpz_ptr x, unsigned long int val)
+#else
+mpz_init_set_ui (x, val)
+ mpz_ptr x;
+ unsigned long int val;
+#endif
+{
+ x->_mp_alloc = 1;
+ x->_mp_d = (mp_ptr) (*_mp_allocate_func) (BYTES_PER_MP_LIMB);
+ x->_mp_d[0] = val;
+ x->_mp_size = val != 0;
+}
diff --git a/rts/gmp/mpz/jacobi.c b/rts/gmp/mpz/jacobi.c
new file mode 100644
index 0000000000..9d49e1d0c6
--- /dev/null
+++ b/rts/gmp/mpz/jacobi.c
@@ -0,0 +1,53 @@
+/* mpz_jacobi (op1, op2).
+ Contributed by Bennet Yee (bsy) at Carnegie-Mellon University
+
+Copyright (C) 1991, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+
+/* Precondition: both p and q are positive */
+
+int
+#if __STDC__
+mpz_jacobi (mpz_srcptr pi, mpz_srcptr qi)
+#else
+mpz_jacobi (pi, qi)
+ mpz_srcptr pi, qi;
+#endif
+{
+#if GCDCHECK
+ int retval;
+ mpz_t gcdval;
+
+ mpz_init (gcdval);
+ mpz_gcd (gcdval, pi, qi);
+ if (!mpz_cmp_ui (gcdval, 1L))
+ {
+ /* J(ab,cb) = J(ab,c)J(ab,b) = J(ab,c)J(0,b) = J(ab,c)*0 */
+ retval = 0;
+ }
+ else
+ retval = mpz_legendre (pi, qi);
+ mpz_clear (gcdval);
+ return retval;
+#else
+ return mpz_legendre (pi, qi);
+#endif
+}
diff --git a/rts/gmp/mpz/kronsz.c b/rts/gmp/mpz/kronsz.c
new file mode 100644
index 0000000000..c8c6752224
--- /dev/null
+++ b/rts/gmp/mpz/kronsz.c
@@ -0,0 +1,126 @@
+/* mpz_si_kronecker -- Kronecker/Jacobi symbol. */
+
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+int
+#if __STDC__
+mpz_si_kronecker (long a, mpz_srcptr b)
+#else
+mpz_si_kronecker (a, b)
+ long a;
+ mpz_srcptr b;
+#endif
+{
+ int b_abs_size;
+ mp_srcptr b_ptr;
+ mp_limb_t b_low;
+ int twos;
+ int result_bit1;
+
+ b_abs_size = ABSIZ (b);
+ if (b_abs_size == 0)
+ return JACOBI_S0 (a); /* (a/0) */
+
+ b_ptr = PTR(b);
+ b_low = b_ptr[0];
+
+ /* (0/b) = 1 if b=+/-1, 0 otherwise */
+ if (a == 0)
+ return (b_abs_size == 1) & (b_low == 1);
+
+ /* account for the effect of the sign of b, so can then ignore it */
+ result_bit1 = JACOBI_BSGN_SZ_BIT1 (a, b);
+
+ if ((b_low & 1) == 0)
+ {
+ /* b even */
+
+ if ((a & 1) == 0)
+ return 0; /* (a/b)=0 if both a,b even */
+
+ /* Require MP_BITS_PER_LIMB even, so that (a/2)^MP_BITS_PER_LIMB = 1,
+ and so that therefore there's no need to account for how many zero
+ limbs are stripped. */
+ ASSERT ((BITS_PER_MP_LIMB & 1) == 0);
+
+ MPN_STRIP_LOW_ZEROS_NOT_ZERO (b_ptr, b_abs_size);
+ b_low = b_ptr[0];
+
+ if ((b_low & 1) == 0)
+ {
+ /* odd a, even b */
+
+ mp_limb_t b_shl_bit1;
+
+ count_trailing_zeros (twos, b_low);
+
+ /* b_shl_bit1 is b>>twos, but with only bit 1 guaranteed */
+ if (twos == BITS_PER_MP_LIMB-1)
+ b_shl_bit1 = (b_abs_size == 1) ? 0 : (b_ptr[1] << 1);
+ else
+ b_shl_bit1 = (b_low >> twos);
+
+ result_bit1 ^= JACOBI_ASGN_SU_BIT1 (a, b_shl_bit1);
+ a = ABS(a);
+
+ if (a == 1)
+ return JACOBI_BIT1_TO_PN (result_bit1); /* (1/b)=1 */
+
+ /* twos (a/2), reciprocity to (b/a), and (b/a) = (b mod a / b) */
+ return mpn_jacobi_base (mpn_mod_1_rshift (b_ptr, b_abs_size,
+ twos, a),
+ a,
+ result_bit1
+ ^ JACOBI_TWOS_U_BIT1 (twos, a)
+ ^ JACOBI_RECIP_UU_BIT1 (a, b_shl_bit1));
+ }
+ }
+
+ /* b odd */
+
+ result_bit1 ^= JACOBI_ASGN_SU_BIT1 (a, b_low);
+ a = ABS(a);
+
+ /* (a/1) = 1 for any a */
+ if (b_abs_size == 1 && b_low == 1)
+ return JACOBI_BIT1_TO_PN (result_bit1);
+
+ /* Note a is cast to unsigned because 0x80..00 doesn't fit in a signed. */
+ if ((a & 1) == 0)
+ {
+ count_trailing_zeros (twos, a);
+ a = ((unsigned long) a) >> twos;
+ result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b_low);
+ }
+
+ if (a == 1)
+ return JACOBI_BIT1_TO_PN (result_bit1); /* (1/b)=1 */
+
+ /* reciprocity to (b/a), and (b/a) == (b mod a / a) */
+ return mpn_jacobi_base (mpn_mod_1 (b_ptr, b_abs_size, a), a,
+ result_bit1 ^ JACOBI_RECIP_UU_BIT1 (a, b_low));
+}
diff --git a/rts/gmp/mpz/kronuz.c b/rts/gmp/mpz/kronuz.c
new file mode 100644
index 0000000000..b877e6f64c
--- /dev/null
+++ b/rts/gmp/mpz/kronuz.c
@@ -0,0 +1,115 @@
+/* mpz_ui_kronecker -- Kronecker/Jacobi symbol. */
+
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+int
+#if __STDC__
+mpz_ui_kronecker (unsigned long a, mpz_srcptr b)
+#else
+mpz_ui_kronecker (a, b)
+ unsigned long a;
+ mpz_srcptr b;
+#endif
+{
+ int b_abs_size;
+ mp_srcptr b_ptr;
+ mp_limb_t b_low;
+ int twos;
+ int result_bit1;
+
+ /* (a/0) */
+ b_abs_size = ABSIZ (b);
+ if (b_abs_size == 0)
+ return JACOBI_U0 (a);
+
+ /* (a/-1)=1 when a>=0, so the sign of b is ignored */
+ b_ptr = PTR(b);
+ b_low = b_ptr[0];
+
+ /* (0/1)=1; (0/-1)=1; (0/b)=0 for b!=+/-1
+ (1/b)=1, for any b */
+ if (a <= 1)
+ return (a == 1) | ((b_abs_size == 1) & (b_low == 1));
+
+ if (b_low & 1)
+ {
+ /* (a/1) = 1 for any a */
+ if (b_abs_size == 1 && b_low == 1)
+ return 1;
+
+ count_trailing_zeros (twos, a);
+ a >>= twos;
+ if (a == 1)
+ return JACOBI_TWOS_U (twos, b_low); /* powers of (2/b) only */
+
+ /* powers of (2/b); reciprocity to (b/a); (b/a) == (b mod a / a) */
+ return mpn_jacobi_base (mpn_mod_1 (b_ptr, b_abs_size, a),
+ a,
+ JACOBI_TWOS_U_BIT1 (twos, b_low)
+ ^ JACOBI_RECIP_UU_BIT1 (b_low, a));
+ }
+
+ /* b is even; (a/2)=0 if a is even */
+ if ((a & 1) == 0)
+ return 0;
+
+ /* Require MP_BITS_PER_LIMB even, so (a/2)^MP_BITS_PER_LIMB = 1, and so we
+ don't have to pay attention to how many trailing zero limbs are
+ stripped. */
+ ASSERT ((BITS_PER_MP_LIMB & 1) == 0);
+
+ MPN_STRIP_LOW_ZEROS_NOT_ZERO (b_ptr, b_abs_size);
+ b_low = b_ptr[0];
+
+ if (b_low & 1)
+ /* reciprocity to (b/a); (b/a) == (b mod a / a) */
+ return mpn_jacobi_base (mpn_mod_1 (b_ptr, b_abs_size, a),
+ a,
+ JACOBI_RECIP_UU_BIT1 (b_low, a));
+
+ count_trailing_zeros (twos, b_low);
+
+ /* reciprocity to get (b/a) */
+ if (twos == BITS_PER_MP_LIMB-1)
+ {
+ if (b_abs_size == 1)
+ {
+ /* b==0x800...00, one limb high bit only, so (a/2)^(BPML-1) */
+ return JACOBI_TWOS_U (BITS_PER_MP_LIMB-1, a);
+ }
+
+ /* b_abs_size > 1 */
+ result_bit1 = JACOBI_RECIP_UU_BIT1 (a, b_ptr[1] << 1);
+ }
+ else
+ result_bit1 = JACOBI_RECIP_UU_BIT1 (a, b_low >> twos);
+
+ /* powers of (a/2); reciprocity to (b/a); (b/a) == (b mod a / a) */
+ return mpn_jacobi_base (mpn_mod_1_rshift (b_ptr, b_abs_size, twos, a),
+ a,
+ JACOBI_TWOS_U_BIT1 (twos, a) ^ result_bit1);
+}
diff --git a/rts/gmp/mpz/kronzs.c b/rts/gmp/mpz/kronzs.c
new file mode 100644
index 0000000000..edfb465976
--- /dev/null
+++ b/rts/gmp/mpz/kronzs.c
@@ -0,0 +1,74 @@
+/* mpz_kronecker_si -- Kronecker/Jacobi symbol. */
+
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* This function is expected to be often used with b odd, so there's a test
+ for this before invoking count_trailing_zeros().
+
+ After the absolute value of b is established it's treated as an unsigned
+ long, because 0x80..00 doesn't fit in a signed long. */
+
+int
+#if __STDC__
+mpz_kronecker_si (mpz_srcptr a, long b)
+#else
+mpz_kronecker_si (a, b)
+ mpz_srcptr a;
+ long b;
+#endif
+{
+ int result_bit1;
+ int twos;
+
+ if (b == 0)
+ return JACOBI_Z0 (a);
+
+ result_bit1 = JACOBI_BSGN_ZS_BIT1(a, b);
+ b = ABS (b);
+
+ if (b == 1)
+ return JACOBI_BIT1_TO_PN (result_bit1); /* (a/1) = 1 for any a */
+
+ if (b & 1)
+ return mpn_jacobi_base (mpz_fdiv_ui (a, b), b, result_bit1);
+
+ /* result 0 if both a,b even */
+ if (mpz_even_p (a))
+ return 0;
+
+ /* (a/2)=(2/a) when a odd */
+ count_trailing_zeros (twos, b);
+ result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, PTR(a)[0]);
+
+ b = ((unsigned long) b) >> twos;
+ if (b == 1)
+ return JACOBI_BIT1_TO_PN (result_bit1);
+ else
+ return mpn_jacobi_base (mpz_fdiv_ui (a, b), b, result_bit1);
+}
+
+
diff --git a/rts/gmp/mpz/kronzu.c b/rts/gmp/mpz/kronzu.c
new file mode 100644
index 0000000000..749be5df07
--- /dev/null
+++ b/rts/gmp/mpz/kronzu.c
@@ -0,0 +1,66 @@
+/* mpz_kronecker_ui -- Kronecker/Jacobi symbol. */
+
+/*
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* This function is expected to be often used with b an odd prime, so the
+ code for odd b is nice and short. */
+
+int
+#if __STDC__
+mpz_kronecker_ui (mpz_srcptr a, unsigned long b)
+#else
+mpz_kronecker_ui (a, b)
+ mpz_srcptr a;
+ unsigned long b;
+#endif
+{
+ int twos;
+
+ if (b & 1)
+ {
+ if (b != 1)
+ return mpn_jacobi_base (mpz_fdiv_ui (a, b), b, 0);
+ else
+ return 1; /* (a/1)=1 for any a */
+ }
+
+ if (b == 0)
+ return JACOBI_Z0 (a);
+
+ /* (a/2)=0 if a even */
+ if (mpz_even_p (a))
+ return 0;
+
+ /* (a/2)=(2/a) when a odd */
+ count_trailing_zeros (twos, b);
+ b >>= twos;
+ if (b == 1)
+ return JACOBI_TWOS_U (twos, PTR(a)[0]);
+
+ return mpn_jacobi_base (mpz_fdiv_ui (a, b), b,
+ JACOBI_TWOS_U_BIT1(twos, PTR(a)[0]));
+}
diff --git a/rts/gmp/mpz/lcm.c b/rts/gmp/mpz/lcm.c
new file mode 100644
index 0000000000..7495882ae5
--- /dev/null
+++ b/rts/gmp/mpz/lcm.c
@@ -0,0 +1,61 @@
+/* mpz/lcm.c: Calculate the least common multiple of two integers.
+
+Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void *_mpz_realloc ();
+
+void
+#if __STDC__
+mpz_lcm (mpz_ptr r, mpz_srcptr u, mpz_srcptr v)
+#else
+mpz_lcm (r, u, v)
+ mpz_ptr r;
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+{
+ mpz_t g;
+ mp_size_t usize, vsize, size;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ usize = ABS (SIZ (u));
+ vsize = ABS (SIZ (v));
+
+ if (usize == 0 || vsize == 0)
+ {
+ SIZ (r) = 0;
+ return;
+ }
+
+ size = MAX (usize, vsize);
+ MPZ_TMP_INIT (g, size);
+
+ mpz_gcd (g, u, v);
+ mpz_divexact (g, u, g);
+ mpz_mul (r, g, v);
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/legendre.c b/rts/gmp/mpz/legendre.c
new file mode 100644
index 0000000000..ab665f70d0
--- /dev/null
+++ b/rts/gmp/mpz/legendre.c
@@ -0,0 +1,184 @@
+/* mpz_legendre (op1, op2).
+ Contributed by Bennet Yee (bsy) at Carnegie-Mellon University
+
+Copyright (C) 1992, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+
+#if defined (DEBUG)
+#include <stdio.h>
+#endif
+
+/* Precondition: both p and q are positive */
+
+int
+#if __STDC__
+mpz_legendre (mpz_srcptr pi, mpz_srcptr qi)
+#else
+mpz_legendre (pi, qi)
+mpz_srcptr pi, qi;
+#endif
+{
+ mpz_t p, q, qdiv2;
+#ifdef Q_MINUS_1
+ mpz_t q_minus_1;
+#endif
+ mpz_ptr mtmp;
+ register mpz_ptr pptr, qptr;
+ register int retval = 1;
+ register unsigned long int s;
+
+ pptr = p;
+ mpz_init_set (pptr, pi);
+ qptr = q;
+ mpz_init_set (qptr, qi);
+
+#ifdef Q_MINUS_1
+ mpz_init (q_minus_1);
+#endif
+ mpz_init (qdiv2);
+
+tail_recurse2:
+#ifdef DEBUG
+ printf ("tail_recurse2: p=");
+ mpz_out_str (stdout, 10, pptr);
+ printf ("\nq=");
+ mpz_out_str (stdout, 10, qptr);
+ putchar ('\n');
+#endif
+ s = mpz_scan1 (qptr, 0);
+ if (s) mpz_tdiv_q_2exp (qptr, qptr, s); /* J(a,2) = 1 */
+#ifdef DEBUG
+ printf ("2 factor decomposition: p=");
+ mpz_out_str (stdout, 10, pptr);
+ printf ("\nq=");
+ mpz_out_str (stdout, 10, qptr);
+ putchar ('\n');
+#endif
+ /* postcondition q odd */
+ if (!mpz_cmp_ui (qptr, 1L)) /* J(a,1) = 1 */
+ goto done;
+ mpz_mod (pptr, pptr, qptr); /* J(a,q) = J(b,q) when a == b mod q */
+#ifdef DEBUG
+ printf ("mod out by q: p=");
+ mpz_out_str (stdout, 10, pptr);
+ printf ("\nq=");
+ mpz_out_str (stdout, 10, qptr);
+ putchar ('\n');
+#endif
+ /* quick calculation to get approximate size first */
+ /* precondition: p < q */
+ if ((mpz_sizeinbase (pptr, 2) + 1 >= mpz_sizeinbase (qptr,2))
+ && (mpz_tdiv_q_2exp (qdiv2, qptr, 1L), mpz_cmp (pptr, qdiv2) > 0))
+ {
+ /* p > q/2 */
+ mpz_sub (pptr, qptr, pptr);
+ /* J(-1,q) = (-1)^((q-1)/2), q odd */
+ if (mpz_get_ui (qptr) & 2)
+ retval = -retval;
+ }
+ /* p < q/2 */
+#ifdef Q_MINUS_1
+ mpz_sub_ui (q_minus_q, qptr, 1L);
+#endif
+tail_recurse: /* we use tail_recurse only if q has not changed */
+#ifdef DEBUG
+ printf ("tail_recurse1: p=");
+ mpz_out_str (stdout, 10, pptr);
+ printf ("\nq=");
+ mpz_out_str (stdout, 10, qptr);
+ putchar ('\n');
+#endif
+ /*
+ * J(0,q) = 0
+ * this occurs only if gcd(p,q) != 1 which is never true for
+ * Legendre function.
+ */
+ if (!mpz_cmp_ui (pptr, 0L))
+ {
+ retval = 0;
+ goto done;
+ }
+
+ if (!mpz_cmp_ui (pptr, 1L))
+ {
+ /* J(1,q) = 1 */
+ /* retval *= 1; */
+ goto done;
+ }
+#ifdef Q_MINUS_1
+ if (!mpz_cmp (pptr, q_minus_1))
+ {
+ /* J(-1,q) = (-1)^((q-1)/2) */
+ if (mpz_get_ui (qptr) & 2)
+ retval = -retval;
+ /* else retval *= 1; */
+ goto done;
+ }
+#endif
+ /*
+ * we do not handle J(xy,q) except for x==2
+ * since we do not want to factor
+ */
+ if ((s = mpz_scan1 (pptr, 0)) != 0)
+ {
+ /*
+ * J(2,q) = (-1)^((q^2-1)/8)
+ *
+ * Note that q odd guarantees that q^2-1 is divisible by 8:
+ * Let a: q=2a+1. q^2 = 4a^2+4a+1, (q^2-1)/8 = a(a+1)/2, qed
+ *
+ * Now, note that this means that the low two bits of _a_
+ * (or the low bits of q shifted over by 1 determines
+ * the factor).
+ */
+ mpz_tdiv_q_2exp (pptr, pptr, s);
+
+ /* even powers of 2 gives J(2,q)^{2n} = 1 */
+ if (s & 1)
+ {
+ s = mpz_get_ui (qptr) >> 1;
+ s = s * (s + 1);
+ if (s & 2)
+ retval = -retval;
+ }
+ goto tail_recurse;
+ }
+ /*
+ * we know p is odd since we have cast out 2s
+ * precondition that q is odd guarantees both odd.
+ *
+ * quadratic reciprocity
+ * J(p,q) = (-1)^((p-1)(q-1)/4) * J(q,p)
+ */
+ if ((s = mpz_scan1 (pptr, 1)) <= 2 && (s + mpz_scan1 (qptr, 1)) <= 2)
+ retval = -retval;
+
+ mtmp = pptr; pptr = qptr; qptr = mtmp;
+ goto tail_recurse2;
+done:
+ mpz_clear (p);
+ mpz_clear (q);
+ mpz_clear (qdiv2);
+#ifdef Q_MINUS_1
+ mpz_clear (q_minus_1);
+#endif
+ return retval;
+}
diff --git a/rts/gmp/mpz/mod.c b/rts/gmp/mpz/mod.c
new file mode 100644
index 0000000000..87033b333b
--- /dev/null
+++ b/rts/gmp/mpz/mod.c
@@ -0,0 +1,63 @@
+/* mpz_mod -- The mathematical mod function.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_mod (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
+#else
+mpz_mod (rem, dividend, divisor)
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ mpz_srcptr divisor;
+#endif
+{
+ mp_size_t divisor_size = divisor->_mp_size;
+ mpz_t temp_divisor; /* N.B.: lives until function returns! */
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ /* We need the original value of the divisor after the remainder has been
+ preliminary calculated. We have to copy it to temporary space if it's
+ the same variable as REM. */
+ if (rem == divisor)
+ {
+ MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
+ mpz_set (temp_divisor, divisor);
+ divisor = temp_divisor;
+ }
+
+ mpz_tdiv_r (rem, dividend, divisor);
+
+ if (rem->_mp_size != 0)
+ {
+ if (dividend->_mp_size < 0)
+ if (divisor->_mp_size < 0)
+ mpz_sub (rem, rem, divisor);
+ else
+ mpz_add (rem, rem, divisor);
+ }
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/mul.c b/rts/gmp/mpz/mul.c
new file mode 100644
index 0000000000..7854788e50
--- /dev/null
+++ b/rts/gmp/mpz/mul.c
@@ -0,0 +1,131 @@
+/* mpz_mul -- Multiply two integers.
+
+Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+#ifndef BERKELEY_MP
+void
+#if __STDC__
+mpz_mul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
+#else
+mpz_mul (w, u, v)
+ mpz_ptr w;
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+#else /* BERKELEY_MP */
+void
+#if __STDC__
+mult (mpz_srcptr u, mpz_srcptr v, mpz_ptr w)
+#else
+mult (u, v, w)
+ mpz_srcptr u;
+ mpz_srcptr v;
+ mpz_ptr w;
+#endif
+#endif /* BERKELEY_MP */
+{
+ mp_size_t usize = u->_mp_size;
+ mp_size_t vsize = v->_mp_size;
+ mp_size_t wsize;
+ mp_size_t sign_product;
+ mp_ptr up, vp;
+ mp_ptr wp;
+ mp_ptr free_me = NULL;
+ size_t free_me_size;
+ mp_limb_t cy_limb;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ sign_product = usize ^ vsize;
+ usize = ABS (usize);
+ vsize = ABS (vsize);
+
+ if (usize < vsize)
+ {
+ /* Swap U and V. */
+ {const __mpz_struct *t = u; u = v; v = t;}
+ {mp_size_t t = usize; usize = vsize; vsize = t;}
+ }
+
+ up = u->_mp_d;
+ vp = v->_mp_d;
+ wp = w->_mp_d;
+
+ /* Ensure W has space enough to store the result. */
+ wsize = usize + vsize;
+ if (w->_mp_alloc < wsize)
+ {
+ if (wp == up || wp == vp)
+ {
+ free_me = wp;
+ free_me_size = w->_mp_alloc;
+ }
+ else
+ (*_mp_free_func) (wp, w->_mp_alloc * BYTES_PER_MP_LIMB);
+
+ w->_mp_alloc = wsize;
+ wp = (mp_ptr) (*_mp_allocate_func) (wsize * BYTES_PER_MP_LIMB);
+ w->_mp_d = wp;
+ }
+ else
+ {
+ /* Make U and V not overlap with W. */
+ if (wp == up)
+ {
+ /* W and U are identical. Allocate temporary space for U. */
+ up = (mp_ptr) TMP_ALLOC (usize * BYTES_PER_MP_LIMB);
+ /* Is V identical too? Keep it identical with U. */
+ if (wp == vp)
+ vp = up;
+ /* Copy to the temporary space. */
+ MPN_COPY (up, wp, usize);
+ }
+ else if (wp == vp)
+ {
+ /* W and V are identical. Allocate temporary space for V. */
+ vp = (mp_ptr) TMP_ALLOC (vsize * BYTES_PER_MP_LIMB);
+ /* Copy to the temporary space. */
+ MPN_COPY (vp, wp, vsize);
+ }
+ }
+
+ if (vsize == 0)
+ {
+ wsize = 0;
+ }
+ else
+ {
+ cy_limb = mpn_mul (wp, up, usize, vp, vsize);
+ wsize = usize + vsize;
+ wsize -= cy_limb == 0;
+ }
+
+ w->_mp_size = sign_product < 0 ? -wsize : wsize;
+ if (free_me != NULL)
+ (*_mp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/mul_2exp.c b/rts/gmp/mpz/mul_2exp.c
new file mode 100644
index 0000000000..abea5fed2c
--- /dev/null
+++ b/rts/gmp/mpz/mul_2exp.c
@@ -0,0 +1,76 @@
+/* mpz_mul_2exp -- Multiply a bignum by 2**CNT
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_mul_2exp (mpz_ptr w, mpz_srcptr u, unsigned long int cnt)
+#else
+mpz_mul_2exp (w, u, cnt)
+ mpz_ptr w;
+ mpz_srcptr u;
+ unsigned long int cnt;
+#endif
+{
+ mp_size_t usize = u->_mp_size;
+ mp_size_t abs_usize = ABS (usize);
+ mp_size_t wsize;
+ mp_size_t limb_cnt;
+ mp_ptr wp;
+ mp_limb_t wlimb;
+
+ if (usize == 0)
+ {
+ w->_mp_size = 0;
+ return;
+ }
+
+ limb_cnt = cnt / BITS_PER_MP_LIMB;
+ wsize = abs_usize + limb_cnt + 1;
+ if (w->_mp_alloc < wsize)
+ _mpz_realloc (w, wsize);
+
+ wp = w->_mp_d;
+ wsize = abs_usize + limb_cnt;
+
+ cnt %= BITS_PER_MP_LIMB;
+ if (cnt != 0)
+ {
+ wlimb = mpn_lshift (wp + limb_cnt, u->_mp_d, abs_usize, cnt);
+ if (wlimb != 0)
+ {
+ wp[wsize] = wlimb;
+ wsize++;
+ }
+ }
+ else
+ {
+ MPN_COPY_DECR (wp + limb_cnt, u->_mp_d, abs_usize);
+ }
+
+ /* Zero all whole limbs at low end. Do it here and not before calling
+ mpn_lshift, not to lose for U == W. */
+ MPN_ZERO (wp, limb_cnt);
+
+ w->_mp_size = usize >= 0 ? wsize : -wsize;
+}
diff --git a/rts/gmp/mpz/mul_siui.c b/rts/gmp/mpz/mul_siui.c
new file mode 100644
index 0000000000..9849cd41b0
--- /dev/null
+++ b/rts/gmp/mpz/mul_siui.c
@@ -0,0 +1,81 @@
+/* mpz_mul_ui/si (product, multiplier, small_multiplicand) -- Set PRODUCT to
+ MULTIPLICATOR times SMALL_MULTIPLICAND.
+
+Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#ifdef OPERATION_mul_ui
+#define FUNCTION mpz_mul_ui
+#define MULTIPLICAND_UNSIGNED unsigned
+#define MULTIPLICAND_ABS(x) x
+#else
+#ifdef OPERATION_mul_si
+#define FUNCTION mpz_mul_si
+#define MULTIPLICAND_UNSIGNED
+#define MULTIPLICAND_ABS(x) ABS(x)
+#else
+Error, error, unrecognised OPERATION
+#endif
+#endif
+
+
+void
+#if __STDC__
+FUNCTION (mpz_ptr prod, mpz_srcptr mult,
+ MULTIPLICAND_UNSIGNED long int small_mult)
+#else
+FUNCTION (prod, mult, small_mult)
+ mpz_ptr prod;
+ mpz_srcptr mult;
+ MULTIPLICAND_UNSIGNED long int small_mult;
+#endif
+{
+ mp_size_t size = mult->_mp_size;
+ mp_size_t sign_product = size;
+ mp_limb_t cy;
+ mp_size_t prod_size;
+ mp_ptr prod_ptr;
+
+ if (size == 0 || small_mult == 0)
+ {
+ prod->_mp_size = 0;
+ return;
+ }
+ size = ABS (size);
+
+ prod_size = size + 1;
+ if (prod->_mp_alloc < prod_size)
+ _mpz_realloc (prod, prod_size);
+
+ prod_ptr = prod->_mp_d;
+
+ cy = mpn_mul_1 (prod_ptr, mult->_mp_d, size,
+ (mp_limb_t) MULTIPLICAND_ABS (small_mult));
+ if (cy != 0)
+ {
+ prod_ptr[size] = cy;
+ size++;
+ }
+
+ prod->_mp_size = ((sign_product < 0) ^ (small_mult < 0)) ? -size : size;
+}
diff --git a/rts/gmp/mpz/neg.c b/rts/gmp/mpz/neg.c
new file mode 100644
index 0000000000..566c3a95aa
--- /dev/null
+++ b/rts/gmp/mpz/neg.c
@@ -0,0 +1,53 @@
+/* mpz_neg(mpz_ptr dst, mpz_ptr src) -- Assign the negated value of SRC to DST.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_neg (mpz_ptr w, mpz_srcptr u)
+#else
+mpz_neg (w, u)
+ mpz_ptr w;
+ mpz_srcptr u;
+#endif
+{
+ mp_ptr wp, up;
+ mp_size_t usize, size;
+
+ usize = u->_mp_size;
+
+ if (u != w)
+ {
+ size = ABS (usize);
+
+ if (w->_mp_alloc < size)
+ _mpz_realloc (w, size);
+
+ wp = w->_mp_d;
+ up = u->_mp_d;
+
+ MPN_COPY (wp, up, size);
+ }
+
+ w->_mp_size = -usize;
+}
diff --git a/rts/gmp/mpz/nextprime.c b/rts/gmp/mpz/nextprime.c
new file mode 100644
index 0000000000..f024dd1206
--- /dev/null
+++ b/rts/gmp/mpz/nextprime.c
@@ -0,0 +1,120 @@
+/* mpz_nextprime(p,t) - compute the next prime > t and store that in p.
+
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_nextprime (mpz_ptr p, mpz_srcptr t)
+#else
+mpz_nextprime (p, t)
+ mpz_ptr p;
+ mpz_srcptr t;
+#endif
+{
+ mpz_add_ui (p, t, 1L);
+ while (! mpz_probab_prime_p (p, 5))
+ mpz_add_ui (p, p, 1L);
+}
+
+#if 0
+/* This code is not yet tested. Will be enabled in 3.1. */
+
+status unsigned short primes[] =
+{
+3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,
+101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,
+191,193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,
+281,283,293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,
+389,397,401,409,419,421,431,433,439,443,449,457,461,463,467,479,487,
+491,499,503,509,521,523,541,547,557,563,569,571,577,587,593,599,601,
+607,613,617,619,631,641,643,647,653,659,661,673,677,683,691,701,709,
+719,727,733,739,743,751,757,761,769,773,787,797,809,811,821,823,827,
+829,839,853,857,859,863,877,881,883,887,907,911,919,929,937,941,947,
+953,967,971,977,983,991,997
+};
+
+#define NUMBER_OF_PRIMES 167
+
+void
+#if __STDC__
+mpz_nextprime (mpz_ptr p, mpz_srcptr n)
+#else
+mpz_nextprime (p, n)
+ mpz_ptr p;
+ mpz_srcptr n;
+#endif
+{
+ mpz_t tmp;
+ unsigned short *moduli;
+ unsigned long difference;
+ int i;
+ int composite;
+
+ /* First handle tiny numbers */
+ if (mpz_cmp_ui (n, 2) < 0)
+ {
+ mpz_set_ui (p, 2);
+ return;
+ }
+ mpz_add_ui (p, n, 1);
+ mpz_setbit (p, 0);
+
+ if (mpz_cmp_ui (p, 7) <= 0)
+ return;
+
+ prime_limit = NUMBER_OF_PRIMES - 1;
+ if (mpz_cmp_ui (p, primes[prime_limit]) <= 0)
+ /* Just use first three entries (3,5,7) of table for small numbers */
+ prime_limit = 3;
+ if (prime_limit)
+ {
+ /* Compute residues modulo small odd primes */
+ moduli = (unsigned short *) TMP_ALLOC (prime_limit * sizeof moduli[0]);
+ for (i = 0; i < prime_limit; i++)
+ moduli[i] = mpz_fdiv_ui (p, primes[i]);
+ }
+ for (difference = 0; ; difference += 2)
+ {
+ composite = 0;
+
+ /* First check residues */
+ for (i = 0; i < prime_limit; i++)
+ {
+ int acc, pr;
+ composite |= (moduli[i] == 0);
+ acc = moduli[i] + 2;
+ pr = primes[i];
+ moduli[i] = acc >= pr ? acc - pr : acc;
+ }
+ if (composite)
+ continue;
+
+ mpz_add_ui (p, p, difference);
+ difference = 0;
+
+ /* Miller-Rabin test */
+ if (mpz_millerrabin (p, 2))
+ break;
+ }
+}
+#endif
diff --git a/rts/gmp/mpz/out_raw.c b/rts/gmp/mpz/out_raw.c
new file mode 100644
index 0000000000..62709479c5
--- /dev/null
+++ b/rts/gmp/mpz/out_raw.c
@@ -0,0 +1,89 @@
+/* mpz_out_raw -- Output a mpz_t in binary. Use an endianess and word size
+ independent format.
+
+Copyright (C) 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+size_t
+#if __STDC__
+mpz_out_raw (FILE *stream, mpz_srcptr x)
+#else
+mpz_out_raw (stream, x)
+ FILE *stream;
+ mpz_srcptr x;
+#endif
+{
+ int i;
+ mp_size_t s;
+ mp_size_t xsize = ABS (x->_mp_size);
+ mp_srcptr xp = x->_mp_d;
+ mp_size_t out_bytesize;
+ mp_limb_t hi_limb;
+ int n_bytes_in_hi_limb;
+
+ if (stream == 0)
+ stream = stdout;
+
+ if (xsize == 0)
+ {
+ for (i = 4 - 1; i >= 0; i--)
+ fputc (0, stream);
+ return ferror (stream) ? 0 : 4;
+ }
+
+ hi_limb = xp[xsize - 1];
+ for (i = BYTES_PER_MP_LIMB - 1; i > 0; i--)
+ {
+ if ((hi_limb >> i * BITS_PER_CHAR) != 0)
+ break;
+ }
+ n_bytes_in_hi_limb = i + 1;
+ out_bytesize = BYTES_PER_MP_LIMB * (xsize - 1) + n_bytes_in_hi_limb;
+ if (x->_mp_size < 0)
+ out_bytesize = -out_bytesize;
+
+ /* Make the size 4 bytes on all machines, to make the format portable. */
+ for (i = 4 - 1; i >= 0; i--)
+ fputc ((out_bytesize >> (i * BITS_PER_CHAR)) % (1 << BITS_PER_CHAR),
+ stream);
+
+ /* Output from the most significant limb to the least significant limb,
+ with each limb also output in decreasing significance order. */
+
+ /* Output the most significant limb separately, since we will only
+ output some of its bytes. */
+ for (i = n_bytes_in_hi_limb - 1; i >= 0; i--)
+ fputc ((hi_limb >> (i * BITS_PER_CHAR)) % (1 << BITS_PER_CHAR), stream);
+
+ /* Output the remaining limbs. */
+ for (s = xsize - 2; s >= 0; s--)
+ {
+ mp_limb_t x_limb;
+
+ x_limb = xp[s];
+ for (i = BYTES_PER_MP_LIMB - 1; i >= 0; i--)
+ fputc ((x_limb >> (i * BITS_PER_CHAR)) % (1 << BITS_PER_CHAR), stream);
+ }
+ return ferror (stream) ? 0 : ABS (out_bytesize) + 4;
+}
diff --git a/rts/gmp/mpz/out_str.c b/rts/gmp/mpz/out_str.c
new file mode 100644
index 0000000000..bf971b0057
--- /dev/null
+++ b/rts/gmp/mpz/out_str.c
@@ -0,0 +1,108 @@
+/* mpz_out_str(stream, base, integer) -- Output to STREAM the multi prec.
+ integer INTEGER in base BASE.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+size_t
+#if __STDC__
+mpz_out_str (FILE *stream, int base, mpz_srcptr x)
+#else
+mpz_out_str (stream, base, x)
+ FILE *stream;
+ int base;
+ mpz_srcptr x;
+#endif
+{
+ mp_ptr xp;
+ mp_size_t x_size = x->_mp_size;
+ unsigned char *str;
+ size_t str_size;
+ size_t i;
+ size_t written;
+ char *num_to_text;
+ TMP_DECL (marker);
+
+ if (stream == 0)
+ stream = stdout;
+
+ if (base >= 0)
+ {
+ if (base == 0)
+ base = 10;
+ num_to_text = "0123456789abcdefghijklmnopqrstuvwxyz";
+ }
+ else
+ {
+ base = -base;
+ num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ }
+
+ if (x_size == 0)
+ {
+ fputc ('0', stream);
+ return ferror (stream) ? 0 : 1;
+ }
+
+ written = 0;
+
+ if (x_size < 0)
+ {
+ fputc ('-', stream);
+ x_size = -x_size;
+ written = 1;
+ }
+
+ TMP_MARK (marker);
+ str_size = ((size_t) (x_size * BITS_PER_MP_LIMB
+ * __mp_bases[base].chars_per_bit_exactly)) + 3;
+ str = (unsigned char *) TMP_ALLOC (str_size);
+
+ /* Move the number to convert into temporary space, since mpn_get_str
+ clobbers its argument + needs one extra high limb.... */
+ xp = (mp_ptr) TMP_ALLOC ((x_size + 1) * BYTES_PER_MP_LIMB);
+ MPN_COPY (xp, x->_mp_d, x_size);
+
+ str_size = mpn_get_str (str, base, xp, x_size);
+
+ /* mpn_get_str might make some leading zeros. Skip them. */
+ while (*str == 0)
+ {
+ str_size--;
+ str++;
+ }
+
+ /* Translate to printable chars. */
+ for (i = 0; i < str_size; i++)
+ str[i] = num_to_text[str[i]];
+ str[str_size] = 0;
+
+ {
+ size_t fwret;
+ fwret = fwrite ((char *) str, 1, str_size, stream);
+ written += fwret;
+ }
+
+ TMP_FREE (marker);
+ return ferror (stream) ? 0 : written;
+}
diff --git a/rts/gmp/mpz/perfpow.c b/rts/gmp/mpz/perfpow.c
new file mode 100644
index 0000000000..e71670a0be
--- /dev/null
+++ b/rts/gmp/mpz/perfpow.c
@@ -0,0 +1,272 @@
+/* mpz_perfect_power_p(arg) -- Return non-zero if ARG is a perfect power,
+ zero otherwise.
+
+Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+ We are to determine if c is a perfect power, c = a ^ b.
+ Assume c is divisible by 2^n and that codd = c/2^n is odd.
+ Assume a is divisible by 2^m and that aodd = a/2^m is odd.
+ It is always true that m divides n.
+
+ * If n is prime, either 1) a is 2*aodd and b = n
+ or 2) a = c and b = 1.
+ So for n prime, we readily have a solution.
+ * If n is factorable into the non-trivial factors p1,p2,...
+ Since m divides n, m has a subset of n's factors and b = n / m.
+
+ BUG: Should handle negative numbers, since they can be odd perfect powers.
+*/
+
+/* This is a naive approach to recognizing perfect powers.
+ Many things can be improved. In particular, we should use p-adic
+ arithmetic for computing possible roots. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+static unsigned long int gcd _PROTO ((unsigned long int a, unsigned long int b));
+static int isprime _PROTO ((unsigned long int t));
+
+static const unsigned short primes[] =
+{ 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53,
+ 59, 61, 67, 71, 73, 79, 83, 89, 97,101,103,107,109,113,127,131,
+ 137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,
+ 227,229,233,239,241,251,257,263,269,271,277,281,283,293,307,311,
+ 313,317,331,337,347,349,353,359,367,373,379,383,389,397,401,409,
+ 419,421,431,433,439,443,449,457,461,463,467,479,487,491,499,503,
+ 509,521,523,541,547,557,563,569,571,577,587,593,599,601,607,613,
+ 617,619,631,641,643,647,653,659,661,673,677,683,691,701,709,719,
+ 727,733,739,743,751,757,761,769,773,787,797,809,811,821,823,827,
+ 829,839,853,857,859,863,877,881,883,887,907,911,919,929,937,941,
+ 947,953,967,971,977,983,991,997,0
+};
+#define SMALLEST_OMITTED_PRIME 1009
+
+
+int
+#if __STDC__
+mpz_perfect_power_p (mpz_srcptr u)
+#else
+mpz_perfect_power_p (u)
+ mpz_srcptr u;
+#endif
+{
+ unsigned long int prime;
+ unsigned long int n, n2;
+ int i;
+ unsigned long int rem;
+ mpz_t u2, q;
+ int exact;
+ mp_size_t uns;
+ TMP_DECL (marker);
+
+ if (mpz_cmp_ui (u, 1) <= 0)
+ return 0;
+
+ n2 = mpz_scan1 (u, 0);
+ if (n2 == 1)
+ return 0;
+
+ TMP_MARK (marker);
+
+ uns = ABSIZ (u) - n2 / BITS_PER_MP_LIMB;
+ MPZ_TMP_INIT (q, uns);
+ MPZ_TMP_INIT (u2, uns);
+
+ mpz_tdiv_q_2exp (u2, u, n2);
+
+ if (isprime (n2))
+ goto n2prime;
+
+ for (i = 1; primes[i] != 0; i++)
+ {
+ prime = primes[i];
+ rem = mpz_tdiv_ui (u2, prime);
+ if (rem == 0) /* divisable? */
+ {
+ rem = mpz_tdiv_q_ui (q, u2, prime * prime);
+ if (rem != 0)
+ {
+ TMP_FREE (marker);
+ return 0;
+ }
+ mpz_swap (q, u2);
+ for (n = 2;;)
+ {
+ rem = mpz_tdiv_q_ui (q, u2, prime);
+ if (rem != 0)
+ break;
+ mpz_swap (q, u2);
+ n++;
+ }
+
+ n2 = gcd (n2, n);
+ if (n2 == 1)
+ {
+ TMP_FREE (marker);
+ return 0;
+ }
+
+ /* As soon as n2 becomes a prime number, stop factoring.
+ Either we have u=x^n2 or u is not a perfect power. */
+ if (isprime (n2))
+ goto n2prime;
+ }
+ }
+
+ if (mpz_cmp_ui (u2, 1) == 0)
+ {
+ TMP_FREE (marker);
+ return 1;
+ }
+
+ if (n2 == 0)
+ {
+ unsigned long int nth;
+ /* We did not find any factors above. We have to consider all values
+ of n. */
+ for (nth = 2;; nth++)
+ {
+ if (! isprime (nth))
+ continue;
+#if 0
+ exact = mpz_padic_root (q, u2, nth, PTH);
+ if (exact)
+#endif
+ exact = mpz_root (q, u2, nth);
+ if (exact)
+ {
+ TMP_FREE (marker);
+ return 1;
+ }
+ if (mpz_cmp_ui (q, SMALLEST_OMITTED_PRIME) < 0)
+ {
+ TMP_FREE (marker);
+ return 0;
+ }
+ }
+ }
+ else
+ {
+ unsigned long int nth;
+ /* We found some factors above. We just need to consider values of n
+ that divides n2. */
+ for (nth = 2; nth <= n2; nth++)
+ {
+ if (! isprime (nth))
+ continue;
+ if (n2 % nth != 0)
+ continue;
+#if 0
+ exact = mpz_padic_root (q, u2, nth, PTH);
+ if (exact)
+#endif
+ exact = mpz_root (q, u2, nth);
+ if (exact)
+ {
+ TMP_FREE (marker);
+ return 1;
+ }
+ if (mpz_cmp_ui (q, SMALLEST_OMITTED_PRIME) < 0)
+ {
+ TMP_FREE (marker);
+ return 0;
+ }
+ }
+
+ TMP_FREE (marker);
+ return 0;
+ }
+
+n2prime:
+ exact = mpz_root (NULL, u2, n2);
+ TMP_FREE (marker);
+ return exact;
+}
+
+static unsigned long int
+#if __STDC__
+gcd (unsigned long int a, unsigned long int b)
+#else
+gcd (a, b)
+ unsigned long int a, b;
+#endif
+{
+ int an2, bn2, n2;
+
+ if (a == 0)
+ return b;
+ if (b == 0)
+ return a;
+
+ count_trailing_zeros (an2, a);
+ a >>= an2;
+
+ count_trailing_zeros (bn2, b);
+ b >>= bn2;
+
+ n2 = MIN (an2, bn2);
+
+ while (a != b)
+ {
+ if (a > b)
+ {
+ a -= b;
+ do
+ a >>= 1;
+ while ((a & 1) == 0);
+ }
+ else /* b > a. */
+ {
+ b -= a;
+ do
+ b >>= 1;
+ while ((b & 1) == 0);
+ }
+ }
+
+ return a << n2;
+}
+
+static int
+#if __STDC__
+isprime (unsigned long int t)
+#else
+isprime (t)
+ unsigned long int t;
+#endif
+{
+ unsigned long int q, r, d;
+
+ if (t < 3 || (t & 1) == 0)
+ return t == 2;
+
+ for (d = 3, r = 1; r != 0; d += 2)
+ {
+ q = t / d;
+ r = t - q * d;
+ if (q < d)
+ return 1;
+ }
+ return 0;
+}
diff --git a/rts/gmp/mpz/perfsqr.c b/rts/gmp/mpz/perfsqr.c
new file mode 100644
index 0000000000..92e8d08ea9
--- /dev/null
+++ b/rts/gmp/mpz/perfsqr.c
@@ -0,0 +1,45 @@
+/* mpz_perfect_square_p(arg) -- Return non-zero if ARG is a perfect square,
+ zero otherwise.
+
+Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_perfect_square_p (mpz_srcptr a)
+#else
+mpz_perfect_square_p (a)
+ mpz_srcptr a;
+#endif
+{
+ mp_size_t asize = a->_mp_size;
+
+ /* No negative numbers are perfect squares. */
+ if (asize < 0)
+ return 0;
+
+ /* Zero is a perfect square. */
+ if (asize == 0)
+ return 1;
+
+ return mpn_perfect_square_p (a->_mp_d, asize);
+}
diff --git a/rts/gmp/mpz/popcount.c b/rts/gmp/mpz/popcount.c
new file mode 100644
index 0000000000..3105258e26
--- /dev/null
+++ b/rts/gmp/mpz/popcount.c
@@ -0,0 +1,42 @@
+/* mpz_popcount(mpz_ptr op) -- Population count of OP. If the operand is
+ negative, return ~0 (a novel representation of infinity).
+
+Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_popcount (mpz_srcptr u)
+#else
+mpz_popcount (u)
+ mpz_srcptr u;
+#endif
+{
+ mp_size_t usize;
+
+ usize = u->_mp_size;
+
+ if ((usize) < 0)
+ return ~ (unsigned long int) 0;
+
+ return mpn_popcount (u->_mp_d, usize);
+}
diff --git a/rts/gmp/mpz/pow_ui.c b/rts/gmp/mpz/pow_ui.c
new file mode 100644
index 0000000000..96ca114e4d
--- /dev/null
+++ b/rts/gmp/mpz/pow_ui.c
@@ -0,0 +1,129 @@
+/* mpz_pow_ui(res, base, exp) -- Set RES to BASE**EXP.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef BERKELEY_MP
+void
+#if __STDC__
+mpz_pow_ui (mpz_ptr r, mpz_srcptr b, unsigned long int e)
+#else
+mpz_pow_ui (r, b, e)
+ mpz_ptr r;
+ mpz_srcptr b;
+ unsigned long int e;
+#endif
+#else /* BERKELEY_MP */
+void
+#if __STDC__
+rpow (const MINT *b, signed short int e, MINT *r)
+#else
+rpow (b, e, r)
+ const MINT *b;
+ signed short int e;
+ MINT *r;
+#endif
+#endif /* BERKELEY_MP */
+{
+ mp_ptr rp, bp, tp, xp;
+ mp_size_t ralloc, rsize, bsize;
+ int cnt, i;
+ mp_limb_t blimb;
+ TMP_DECL (marker);
+
+ bsize = ABS (b->_mp_size);
+
+ /* Single out cases that give result == 0 or 1. These tests are here
+ to simplify the general code below, not to optimize. */
+ if (e == 0)
+ {
+ r->_mp_d[0] = 1;
+ r->_mp_size = 1;
+ return;
+ }
+ if (bsize == 0
+#ifdef BERKELEY_MP
+ || e < 0
+#endif
+ )
+ {
+ r->_mp_size = 0;
+ return;
+ }
+
+ bp = b->_mp_d;
+
+ blimb = bp[bsize - 1];
+ if (bsize == 1 && blimb < 0x100)
+ {
+ /* Estimate space requirements accurately. Using the code from the
+ `else' path would over-estimate space requirements wildly. */
+ float lb = __mp_bases[blimb].chars_per_bit_exactly;
+ ralloc = 3 + ((mp_size_t) (e / lb) / BITS_PER_MP_LIMB);
+ }
+ else
+ {
+ /* Over-estimate space requirements somewhat. */
+ count_leading_zeros (cnt, blimb);
+ ralloc = bsize * e - cnt * e / BITS_PER_MP_LIMB + 2;
+ }
+
+ TMP_MARK (marker);
+
+ /* The two areas are used to alternatingly hold the input and recieve the
+ product for mpn_mul. (This scheme is used to fulfill the requirements
+ of mpn_mul; that the product space may not be the same as any of the
+ input operands.) */
+ rp = (mp_ptr) TMP_ALLOC (ralloc * BYTES_PER_MP_LIMB);
+ tp = (mp_ptr) TMP_ALLOC (ralloc * BYTES_PER_MP_LIMB);
+
+ MPN_COPY (rp, bp, bsize);
+ rsize = bsize;
+ count_leading_zeros (cnt, e);
+
+ for (i = BITS_PER_MP_LIMB - cnt - 2; i >= 0; i--)
+ {
+ mpn_mul_n (tp, rp, rp, rsize);
+ rsize = 2 * rsize;
+ rsize -= tp[rsize - 1] == 0;
+ xp = tp; tp = rp; rp = xp;
+
+ if ((e & ((mp_limb_t) 1 << i)) != 0)
+ {
+ rsize = rsize + bsize - (mpn_mul (tp, rp, rsize, bp, bsize) == 0);
+ xp = tp; tp = rp; rp = xp;
+ }
+ }
+
+ /* Now then we know the exact space requirements, reallocate if
+ necessary. */
+ if (r->_mp_alloc < rsize)
+ _mpz_realloc (r, rsize);
+
+ MPN_COPY (r->_mp_d, rp, rsize);
+ r->_mp_size = (e & 1) == 0 || b->_mp_size >= 0 ? rsize : -rsize;
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/powm.c b/rts/gmp/mpz/powm.c
new file mode 100644
index 0000000000..e6af855a71
--- /dev/null
+++ b/rts/gmp/mpz/powm.c
@@ -0,0 +1,364 @@
+/* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation, Inc.
+Contributed by Paul Zimmermann.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+
+/* set c <- (a*b)/R^n mod m c has to have at least (2n) allocated limbs */
+static void
+#if __STDC__
+mpz_redc (mpz_ptr c, mpz_srcptr a, mpz_srcptr b, mpz_srcptr m, mp_limb_t Nprim)
+#else
+mpz_redc (c, a, b, m, Nprim)
+ mpz_ptr c;
+ mpz_srcptr a;
+ mpz_srcptr b;
+ mpz_srcptr m;
+ mp_limb_t Nprim;
+#endif
+{
+ mp_ptr cp, mp = PTR (m);
+ mp_limb_t cy, cout = 0;
+ mp_limb_t q;
+ size_t j, n = ABSIZ (m);
+
+ ASSERT (ALLOC (c) >= 2 * n);
+
+ mpz_mul (c, a, b);
+ cp = PTR (c);
+ j = ABSIZ (c);
+ MPN_ZERO (cp + j, 2 * n - j);
+ for (j = 0; j < n; j++)
+ {
+ q = cp[0] * Nprim;
+ cy = mpn_addmul_1 (cp, mp, n, q);
+ cout += mpn_add_1 (cp + n, cp + n, n - j, cy);
+ cp++;
+ }
+ cp -= n;
+ if (cout)
+ {
+ cy = cout - mpn_sub_n (cp, cp + n, mp, n);
+ while (cy)
+ cy -= mpn_sub_n (cp, cp, mp, n);
+ }
+ else
+ MPN_COPY (cp, cp + n, n);
+ MPN_NORMALIZE (cp, n);
+ SIZ (c) = SIZ (c) < 0 ? -n : n;
+}
+
+/* average number of calls to redc for an exponent of n bits
+ with the sliding window algorithm of base 2^k: the optimal is
+ obtained for the value of k which minimizes 2^(k-1)+n/(k+1):
+
+ n\k 4 5 6 7 8
+ 128 156* 159 171 200 261
+ 256 309 307* 316 343 403
+ 512 617 607* 610 632 688
+ 1024 1231 1204 1195* 1207 1256
+ 2048 2461 2399 2366 2360* 2396
+ 4096 4918 4787 4707 4665* 4670
+*/
+
+#ifndef BERKELEY_MP
+void
+#if __STDC__
+mpz_powm (mpz_ptr res, mpz_srcptr base, mpz_srcptr e, mpz_srcptr mod)
+#else
+mpz_powm (res, base, e, mod)
+ mpz_ptr res;
+ mpz_srcptr base;
+ mpz_srcptr e;
+ mpz_srcptr mod;
+#endif
+#else /* BERKELEY_MP */
+void
+#if __STDC__
+pow (mpz_srcptr base, mpz_srcptr e, mpz_srcptr mod, mpz_ptr res)
+#else
+pow (base, e, mod, res)
+ mpz_srcptr base;
+ mpz_srcptr e;
+ mpz_srcptr mod;
+ mpz_ptr res;
+#endif
+#endif /* BERKELEY_MP */
+{
+ mp_limb_t invm, *ep, c, mask;
+ mpz_t xx, *g;
+ mp_size_t n, i, K, j, l, k;
+ int sh;
+ int use_redc;
+
+#ifdef POWM_DEBUG
+ mpz_t exp;
+ mpz_init (exp);
+#endif
+
+ n = ABSIZ (mod);
+
+ if (n == 0)
+ DIVIDE_BY_ZERO;
+
+ if (SIZ (e) == 0)
+ {
+ /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
+ depending on if MOD equals 1. */
+ SIZ(res) = (ABSIZ (mod) == 1 && (PTR(mod))[0] == 1) ? 0 : 1;
+ PTR(res)[0] = 1;
+ return;
+ }
+
+ /* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD.
+ In REDC each modular multiplication costs about 2*n^2 limbs operations,
+ whereas using usual reduction it costs 3*K(n), where K(n) is the cost of a
+ multiplication using Karatsuba, and a division is assumed to cost 2*K(n),
+ for example using Burnikel-Ziegler's algorithm. This gives a theoretical
+ threshold of a*KARATSUBA_SQR_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~
+ 2.66. */
+ /* For now, also disable REDC when MOD is even, as the inverse can't
+ handle that. */
+
+#ifndef POWM_THRESHOLD
+#define POWM_THRESHOLD ((8 * KARATSUBA_SQR_THRESHOLD) / 3)
+#endif
+
+ use_redc = (n < POWM_THRESHOLD && PTR(mod)[0] % 2 != 0);
+ if (use_redc)
+ {
+ /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */
+ modlimb_invert (invm, PTR(mod)[0]);
+ invm = -invm;
+ }
+
+ /* determines optimal value of k */
+ l = ABSIZ (e) * BITS_PER_MP_LIMB; /* number of bits of exponent */
+ k = 1;
+ K = 2;
+ while (2 * l > K * (2 + k * (3 + k)))
+ {
+ k++;
+ K *= 2;
+ }
+
+ g = (mpz_t *) (*_mp_allocate_func) (K / 2 * sizeof (mpz_t));
+ /* compute x*R^n where R=2^BITS_PER_MP_LIMB */
+ mpz_init (g[0]);
+ if (use_redc)
+ {
+ mpz_mul_2exp (g[0], base, n * BITS_PER_MP_LIMB);
+ mpz_mod (g[0], g[0], mod);
+ }
+ else
+ mpz_mod (g[0], base, mod);
+
+ /* compute xx^g for odd g < 2^k */
+ mpz_init (xx);
+ if (use_redc)
+ {
+ _mpz_realloc (xx, 2 * n);
+ mpz_redc (xx, g[0], g[0], mod, invm); /* xx = x^2*R^n */
+ }
+ else
+ {
+ mpz_mul (xx, g[0], g[0]);
+ mpz_mod (xx, xx, mod);
+ }
+ for (i = 1; i < K / 2; i++)
+ {
+ mpz_init (g[i]);
+ if (use_redc)
+ {
+ _mpz_realloc (g[i], 2 * n);
+ mpz_redc (g[i], g[i - 1], xx, mod, invm); /* g[i] = x^(2i+1)*R^n */
+ }
+ else
+ {
+ mpz_mul (g[i], g[i - 1], xx);
+ mpz_mod (g[i], g[i], mod);
+ }
+ }
+
+ /* now starts the real stuff */
+ mask = (mp_limb_t) ((1<<k) - 1);
+ ep = PTR (e);
+ i = ABSIZ (e) - 1; /* current index */
+ c = ep[i]; /* current limb */
+ count_leading_zeros (sh, c);
+ sh = BITS_PER_MP_LIMB - sh; /* significant bits in ep[i] */
+ sh -= k; /* index of lower bit of ep[i] to take into account */
+ if (sh < 0)
+ { /* k-sh extra bits are needed */
+ if (i > 0)
+ {
+ i--;
+ c = (c << (-sh)) | (ep[i] >> (BITS_PER_MP_LIMB + sh));
+ sh += BITS_PER_MP_LIMB;
+ }
+ }
+ else
+ c = c >> sh;
+#ifdef POWM_DEBUG
+ printf ("-1/m mod 2^%u = %lu\n", BITS_PER_MP_LIMB, invm);
+ mpz_set_ui (exp, c);
+#endif
+ j=0;
+ while (c % 2 == 0)
+ {
+ j++;
+ c = (c >> 1);
+ }
+ mpz_set (xx, g[c >> 1]);
+ while (j--)
+ {
+ if (use_redc)
+ mpz_redc (xx, xx, xx, mod, invm);
+ else
+ {
+ mpz_mul (xx, xx, xx);
+ mpz_mod (xx, xx, mod);
+ }
+ }
+
+#ifdef POWM_DEBUG
+ printf ("x^"); mpz_out_str (0, 10, exp);
+ printf ("*2^%u mod m = ", n * BITS_PER_MP_LIMB); mpz_out_str (0, 10, xx);
+ putchar ('\n');
+#endif
+
+ while (i > 0 || sh > 0)
+ {
+ c = ep[i];
+ sh -= k;
+ l = k; /* number of bits treated */
+ if (sh < 0)
+ {
+ if (i > 0)
+ {
+ i--;
+ c = (c << (-sh)) | (ep[i] >> (BITS_PER_MP_LIMB + sh));
+ sh += BITS_PER_MP_LIMB;
+ }
+ else
+ {
+ l += sh; /* may be less bits than k here */
+ c = c & ((1<<l) - 1);
+ }
+ }
+ else
+ c = c >> sh;
+ c = c & mask;
+
+ /* this while loop implements the sliding window improvement */
+ while ((c & (1 << (k - 1))) == 0 && (i > 0 || sh > 0))
+ {
+ if (use_redc) mpz_redc (xx, xx, xx, mod, invm);
+ else
+ {
+ mpz_mul (xx, xx, xx);
+ mpz_mod (xx, xx, mod);
+ }
+ if (sh)
+ {
+ sh--;
+ c = (c<<1) + ((ep[i]>>sh) & 1);
+ }
+ else
+ {
+ i--;
+ sh = BITS_PER_MP_LIMB - 1;
+ c = (c<<1) + (ep[i]>>sh);
+ }
+ }
+
+#ifdef POWM_DEBUG
+ printf ("l=%u c=%lu\n", l, c);
+ mpz_mul_2exp (exp, exp, k);
+ mpz_add_ui (exp, exp, c);
+#endif
+
+ /* now replace xx by xx^(2^k)*x^c */
+ if (c != 0)
+ {
+ j = 0;
+ while (c % 2 == 0)
+ {
+ j++;
+ c = c >> 1;
+ }
+ /* c0 = c * 2^j, i.e. xx^(2^k)*x^c = (A^(2^(k - j))*c)^(2^j) */
+ l -= j;
+ while (l--)
+ if (use_redc) mpz_redc (xx, xx, xx, mod, invm);
+ else
+ {
+ mpz_mul (xx, xx, xx);
+ mpz_mod (xx, xx, mod);
+ }
+ if (use_redc)
+ mpz_redc (xx, xx, g[c >> 1], mod, invm);
+ else
+ {
+ mpz_mul (xx, xx, g[c >> 1]);
+ mpz_mod (xx, xx, mod);
+ }
+ }
+ else
+ j = l; /* case c=0 */
+ while (j--)
+ {
+ if (use_redc)
+ mpz_redc (xx, xx, xx, mod, invm);
+ else
+ {
+ mpz_mul (xx, xx, xx);
+ mpz_mod (xx, xx, mod);
+ }
+ }
+#ifdef POWM_DEBUG
+ printf ("x^"); mpz_out_str (0, 10, exp);
+ printf ("*2^%u mod m = ", n * BITS_PER_MP_LIMB); mpz_out_str (0, 10, xx);
+ putchar ('\n');
+#endif
+ }
+
+ /* now convert back xx to xx/R^n */
+ if (use_redc)
+ {
+ mpz_set_ui (g[0], 1);
+ mpz_redc (xx, xx, g[0], mod, invm);
+ if (mpz_cmp (xx, mod) >= 0)
+ mpz_sub (xx, xx, mod);
+ }
+ mpz_set (res, xx);
+
+ mpz_clear (xx);
+ for (i = 0; i < K / 2; i++)
+ mpz_clear (g[i]);
+ (*_mp_free_func) (g, K / 2 * sizeof (mpz_t));
+}
diff --git a/rts/gmp/mpz/powm_ui.c b/rts/gmp/mpz/powm_ui.c
new file mode 100644
index 0000000000..00f70bd563
--- /dev/null
+++ b/rts/gmp/mpz/powm_ui.c
@@ -0,0 +1,248 @@
+/* mpz_powm_ui(res,base,exp,mod) -- Set RES to (base**exp) mod MOD.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+#if __STDC__
+mpz_powm_ui (mpz_ptr res, mpz_srcptr base, unsigned long int exp, mpz_srcptr mod)
+#else
+mpz_powm_ui (res, base, exp, mod)
+ mpz_ptr res;
+ mpz_srcptr base;
+ unsigned long int exp;
+ mpz_srcptr mod;
+#endif
+{
+ mp_ptr rp, mp, bp;
+ mp_size_t msize, bsize, rsize;
+ mp_size_t size;
+ int mod_shift_cnt;
+ int negative_result;
+ mp_limb_t *free_me = NULL;
+ size_t free_me_size;
+ TMP_DECL (marker);
+
+ msize = ABS (mod->_mp_size);
+ size = 2 * msize;
+
+ rp = res->_mp_d;
+
+ if (msize == 0)
+ DIVIDE_BY_ZERO;
+
+ if (exp == 0)
+ {
+ /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
+ depending on if MOD equals 1. */
+ res->_mp_size = (msize == 1 && (mod->_mp_d)[0] == 1) ? 0 : 1;
+ rp[0] = 1;
+ return;
+ }
+
+ TMP_MARK (marker);
+
+ /* Normalize MOD (i.e. make its most significant bit set) as required by
+ mpn_divmod. This will make the intermediate values in the calculation
+ slightly larger, but the correct result is obtained after a final
+ reduction using the original MOD value. */
+
+ mp = (mp_ptr) TMP_ALLOC (msize * BYTES_PER_MP_LIMB);
+ count_leading_zeros (mod_shift_cnt, mod->_mp_d[msize - 1]);
+ if (mod_shift_cnt != 0)
+ mpn_lshift (mp, mod->_mp_d, msize, mod_shift_cnt);
+ else
+ MPN_COPY (mp, mod->_mp_d, msize);
+
+ bsize = ABS (base->_mp_size);
+ if (bsize > msize)
+ {
+ /* The base is larger than the module. Reduce it. */
+
+ /* Allocate (BSIZE + 1) with space for remainder and quotient.
+ (The quotient is (bsize - msize + 1) limbs.) */
+ bp = (mp_ptr) TMP_ALLOC ((bsize + 1) * BYTES_PER_MP_LIMB);
+ MPN_COPY (bp, base->_mp_d, bsize);
+ /* We don't care about the quotient, store it above the remainder,
+ at BP + MSIZE. */
+ mpn_divmod (bp + msize, bp, bsize, mp, msize);
+ bsize = msize;
+ /* Canonicalize the base, since we are going to multiply with it
+ quite a few times. */
+ MPN_NORMALIZE (bp, bsize);
+ }
+ else
+ bp = base->_mp_d;
+
+ if (bsize == 0)
+ {
+ res->_mp_size = 0;
+ TMP_FREE (marker);
+ return;
+ }
+
+ if (res->_mp_alloc < size)
+ {
+ /* We have to allocate more space for RES. If any of the input
+ parameters are identical to RES, defer deallocation of the old
+ space. */
+
+ if (rp == mp || rp == bp)
+ {
+ free_me = rp;
+ free_me_size = res->_mp_alloc;
+ }
+ else
+ (*_mp_free_func) (rp, res->_mp_alloc * BYTES_PER_MP_LIMB);
+
+ rp = (mp_ptr) (*_mp_allocate_func) (size * BYTES_PER_MP_LIMB);
+ res->_mp_alloc = size;
+ res->_mp_d = rp;
+ }
+ else
+ {
+ /* Make BASE, EXP and MOD not overlap with RES. */
+ if (rp == bp)
+ {
+ /* RES and BASE are identical. Allocate temp. space for BASE. */
+ bp = (mp_ptr) TMP_ALLOC (bsize * BYTES_PER_MP_LIMB);
+ MPN_COPY (bp, rp, bsize);
+ }
+ if (rp == mp)
+ {
+ /* RES and MOD are identical. Allocate temporary space for MOD. */
+ mp = (mp_ptr) TMP_ALLOC (msize * BYTES_PER_MP_LIMB);
+ MPN_COPY (mp, rp, msize);
+ }
+ }
+
+ MPN_COPY (rp, bp, bsize);
+ rsize = bsize;
+
+ {
+ mp_ptr xp = (mp_ptr) TMP_ALLOC (2 * (msize + 1) * BYTES_PER_MP_LIMB);
+ int c;
+ mp_limb_t e;
+ mp_limb_t carry_limb;
+
+ negative_result = (exp & 1) && base->_mp_size < 0;
+
+ e = exp;
+ count_leading_zeros (c, e);
+ e = (e << c) << 1; /* shift the exp bits to the left, lose msb */
+ c = BITS_PER_MP_LIMB - 1 - c;
+
+ /* Main loop.
+
+ Make the result be pointed to alternately by XP and RP. This
+ helps us avoid block copying, which would otherwise be necessary
+ with the overlap restrictions of mpn_divmod. With 50% probability
+ the result after this loop will be in the area originally pointed
+ by RP (==RES->_mp_d), and with 50% probability in the area originally
+ pointed to by XP. */
+
+ while (c != 0)
+ {
+ mp_ptr tp;
+ mp_size_t xsize;
+
+ mpn_mul_n (xp, rp, rp, rsize);
+ xsize = 2 * rsize;
+ xsize -= xp[xsize - 1] == 0;
+ if (xsize > msize)
+ {
+ mpn_divmod (xp + msize, xp, xsize, mp, msize);
+ xsize = msize;
+ }
+
+ tp = rp; rp = xp; xp = tp;
+ rsize = xsize;
+
+ if ((mp_limb_signed_t) e < 0)
+ {
+ mpn_mul (xp, rp, rsize, bp, bsize);
+ xsize = rsize + bsize;
+ xsize -= xp[xsize - 1] == 0;
+ if (xsize > msize)
+ {
+ mpn_divmod (xp + msize, xp, xsize, mp, msize);
+ xsize = msize;
+ }
+
+ tp = rp; rp = xp; xp = tp;
+ rsize = xsize;
+ }
+ e <<= 1;
+ c--;
+ }
+
+ /* We shifted MOD, the modulo reduction argument, left MOD_SHIFT_CNT
+ steps. Adjust the result by reducing it with the original MOD.
+
+ Also make sure the result is put in RES->_mp_d (where it already
+ might be, see above). */
+
+ if (mod_shift_cnt != 0)
+ {
+ carry_limb = mpn_lshift (res->_mp_d, rp, rsize, mod_shift_cnt);
+ rp = res->_mp_d;
+ if (carry_limb != 0)
+ {
+ rp[rsize] = carry_limb;
+ rsize++;
+ }
+ }
+ else
+ {
+ MPN_COPY (res->_mp_d, rp, rsize);
+ rp = res->_mp_d;
+ }
+
+ if (rsize >= msize)
+ {
+ mpn_divmod (rp + msize, rp, rsize, mp, msize);
+ rsize = msize;
+ }
+
+ /* Remove any leading zero words from the result. */
+ if (mod_shift_cnt != 0)
+ mpn_rshift (rp, rp, rsize, mod_shift_cnt);
+ MPN_NORMALIZE (rp, rsize);
+ }
+
+ if (negative_result && rsize != 0)
+ {
+ if (mod_shift_cnt != 0)
+ mpn_rshift (mp, mp, msize, mod_shift_cnt);
+ mpn_sub (rp, mp, msize, rp, rsize);
+ rsize = msize;
+ MPN_NORMALIZE (rp, rsize);
+ }
+ res->_mp_size = rsize;
+
+ if (free_me != NULL)
+ (*_mp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/pprime_p.c b/rts/gmp/mpz/pprime_p.c
new file mode 100644
index 0000000000..82eb678238
--- /dev/null
+++ b/rts/gmp/mpz/pprime_p.c
@@ -0,0 +1,242 @@
+/* mpz_probab_prime_p --
+ An implementation of the probabilistic primality test found in Knuth's
+ Seminumerical Algorithms book. If the function mpz_probab_prime_p()
+ returns 0 then n is not prime. If it returns 1, then n is 'probably'
+ prime. If it returns 2, n is surely prime. The probability of a false
+ positive is (1/4)**reps, where reps is the number of internal passes of the
+ probabilistic algorithm. Knuth indicates that 25 passes are reasonable.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000 Free Software
+Foundation, Inc. Miller-Rabin code contributed by John Amanatides.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+static int isprime _PROTO ((unsigned long int t));
+static int mpz_millerrabin _PROTO ((mpz_srcptr n, int reps));
+
+int
+#if __STDC__
+mpz_probab_prime_p (mpz_srcptr n, int reps)
+#else
+mpz_probab_prime_p (n, reps)
+ mpz_srcptr n;
+ int reps;
+#endif
+{
+ mp_limb_t r;
+
+ /* Handle small and negative n. */
+ if (mpz_cmp_ui (n, 1000000L) <= 0)
+ {
+ int is_prime;
+ if (mpz_sgn (n) < 0)
+ {
+ /* Negative number. Negate and call ourselves. */
+ mpz_t n2;
+ mpz_init (n2);
+ mpz_neg (n2, n);
+ is_prime = mpz_probab_prime_p (n2, reps);
+ mpz_clear (n2);
+ return is_prime;
+ }
+ is_prime = isprime (mpz_get_ui (n));
+ return is_prime ? 2 : 0;
+ }
+
+ /* If n is now even, it is not a prime. */
+ if ((mpz_get_ui (n) & 1) == 0)
+ return 0;
+
+ /* Check if n has small factors. */
+ if (UDIV_TIME > (2 * UMUL_TIME + 6))
+ r = mpn_preinv_mod_1 (PTR(n), SIZ(n), (mp_limb_t) PP, (mp_limb_t) PP_INVERTED);
+ else
+ r = mpn_mod_1 (PTR(n), SIZ(n), (mp_limb_t) PP);
+ if (r % 3 == 0 || r % 5 == 0 || r % 7 == 0 || r % 11 == 0 || r % 13 == 0
+ || r % 17 == 0 || r % 19 == 0 || r % 23 == 0 || r % 29 == 0
+#if BITS_PER_MP_LIMB == 64
+ || r % 31 == 0 || r % 37 == 0 || r % 41 == 0 || r % 43 == 0
+ || r % 47 == 0 || r % 53 == 0
+#endif
+ )
+ {
+ return 0;
+ }
+
+ /* Do more dividing. We collect small primes, using umul_ppmm, until we
+ overflow a single limb. We divide our number by the small primes product,
+ and look for factors in the remainder. */
+ {
+ unsigned long int ln2;
+ unsigned long int q;
+ mp_limb_t p1, p0, p;
+ unsigned int primes[15];
+ int nprimes;
+
+ nprimes = 0;
+ p = 1;
+ ln2 = mpz_sizeinbase (n, 2) / 30; ln2 = ln2 * ln2;
+ for (q = BITS_PER_MP_LIMB == 64 ? 59 : 31; q < ln2; q += 2)
+ {
+ if (isprime (q))
+ {
+ umul_ppmm (p1, p0, p, q);
+ if (p1 != 0)
+ {
+ r = mpn_mod_1 (PTR(n), SIZ(n), p);
+ while (--nprimes >= 0)
+ if (r % primes[nprimes] == 0)
+ {
+ if (mpn_mod_1 (PTR(n), SIZ(n), (mp_limb_t) primes[nprimes]) != 0)
+ abort ();
+ return 0;
+ }
+ p = q;
+ nprimes = 0;
+ }
+ else
+ {
+ p = p0;
+ }
+ primes[nprimes++] = q;
+ }
+ }
+ }
+
+ /* Perform a number of Miller-Rabin tests. */
+ return mpz_millerrabin (n, reps);
+}
+
+static int
+#if __STDC__
+isprime (unsigned long int t)
+#else
+isprime (t)
+ unsigned long int t;
+#endif
+{
+ unsigned long int q, r, d;
+
+ if (t < 3 || (t & 1) == 0)
+ return t == 2;
+
+ for (d = 3, r = 1; r != 0; d += 2)
+ {
+ q = t / d;
+ r = t - q * d;
+ if (q < d)
+ return 1;
+ }
+ return 0;
+}
+
+static int millerrabin _PROTO ((mpz_srcptr n, mpz_srcptr nm1,
+ mpz_ptr x, mpz_ptr y,
+ mpz_srcptr q, unsigned long int k));
+
+static int
+#if __STDC__
+mpz_millerrabin (mpz_srcptr n, int reps)
+#else
+mpz_millerrabin (n, reps)
+ mpz_srcptr n;
+ int reps;
+#endif
+{
+ int r;
+ mpz_t nm1, x, y, q;
+ unsigned long int k;
+ gmp_randstate_t rstate;
+ int is_prime;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+
+ MPZ_TMP_INIT (nm1, SIZ (n) + 1);
+ mpz_sub_ui (nm1, n, 1L);
+
+ MPZ_TMP_INIT (x, SIZ (n));
+ MPZ_TMP_INIT (y, 2 * SIZ (n)); /* mpz_powm_ui needs excessive memory!!! */
+
+ /* Perform a Fermat test. */
+ mpz_set_ui (x, 210L);
+ mpz_powm (y, x, nm1, n);
+ if (mpz_cmp_ui (y, 1L) != 0)
+ {
+ TMP_FREE (marker);
+ return 0;
+ }
+
+ MPZ_TMP_INIT (q, SIZ (n));
+
+ /* Find q and k, where q is odd and n = 1 + 2**k * q. */
+ k = mpz_scan1 (nm1, 0L);
+ mpz_tdiv_q_2exp (q, nm1, k);
+
+ gmp_randinit (rstate, GMP_RAND_ALG_DEFAULT, 32L);
+
+ is_prime = 1;
+ for (r = 0; r < reps && is_prime; r++)
+ {
+ do
+ mpz_urandomb (x, rstate, mpz_sizeinbase (n, 2) - 1);
+ while (mpz_cmp_ui (x, 1L) <= 0);
+
+ is_prime = millerrabin (n, nm1, x, y, q, k);
+ }
+
+ gmp_randclear (rstate);
+
+ TMP_FREE (marker);
+ return is_prime;
+}
+
+static int
+#if __STDC__
+millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
+ mpz_srcptr q, unsigned long int k)
+#else
+millerrabin (n, nm1, x, y, q, k)
+ mpz_srcptr n;
+ mpz_srcptr nm1;
+ mpz_ptr x;
+ mpz_ptr y;
+ mpz_srcptr q;
+ unsigned long int k;
+#endif
+{
+ unsigned long int i;
+
+ mpz_powm (y, x, q, n);
+
+ if (mpz_cmp_ui (y, 1L) == 0 || mpz_cmp (y, nm1) == 0)
+ return 1;
+
+ for (i = 1; i < k; i++)
+ {
+ mpz_powm_ui (y, y, 2L, n);
+ if (mpz_cmp (y, nm1) == 0)
+ return 1;
+ if (mpz_cmp_ui (y, 1L) == 0)
+ return 0;
+ }
+ return 0;
+}
diff --git a/rts/gmp/mpz/random.c b/rts/gmp/mpz/random.c
new file mode 100644
index 0000000000..60d9113991
--- /dev/null
+++ b/rts/gmp/mpz/random.c
@@ -0,0 +1,56 @@
+/* mpz_random -- Generate a random mpz_t of specified size.
+ This function is non-portable and generates poor random numbers.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "urandom.h"
+
+void
+#if __STDC__
+mpz_random (mpz_ptr x, mp_size_t size)
+#else
+mpz_random (x, size)
+ mpz_ptr x;
+ mp_size_t size;
+#endif
+{
+ mp_size_t i;
+ mp_limb_t ran;
+ mp_ptr xp;
+ mp_size_t abs_size;
+
+ abs_size = ABS (size);
+
+ if (x->_mp_alloc < abs_size)
+ _mpz_realloc (x, abs_size);
+
+ xp = x->_mp_d;
+
+ for (i = 0; i < abs_size; i++)
+ {
+ ran = urandom ();
+ xp[i] = ran;
+ }
+
+ MPN_NORMALIZE (xp, abs_size);
+ x->_mp_size = size < 0 ? -abs_size : abs_size;
+}
diff --git a/rts/gmp/mpz/random2.c b/rts/gmp/mpz/random2.c
new file mode 100644
index 0000000000..a90af115e9
--- /dev/null
+++ b/rts/gmp/mpz/random2.c
@@ -0,0 +1,48 @@
+/* mpz_random2 -- Generate a positive random mpz_t of specified size, with
+ long runs of consecutive ones and zeros in the binary representation.
+ Meant for testing of other MP routines.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_random2 (mpz_ptr x, mp_size_t size)
+#else
+mpz_random2 (x, size)
+ mpz_ptr x;
+ mp_size_t size;
+#endif
+{
+ mp_size_t abs_size;
+
+ abs_size = ABS (size);
+ if (abs_size != 0)
+ {
+ if (x->_mp_alloc < abs_size)
+ _mpz_realloc (x, abs_size);
+
+ mpn_random2 (x->_mp_d, abs_size);
+ }
+
+ x->_mp_size = size;
+}
diff --git a/rts/gmp/mpz/realloc.c b/rts/gmp/mpz/realloc.c
new file mode 100644
index 0000000000..0b9e447ec3
--- /dev/null
+++ b/rts/gmp/mpz/realloc.c
@@ -0,0 +1,52 @@
+/* _mpz_realloc -- make the mpz_t have NEW_SIZE digits allocated.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void *
+#if __STDC__
+_mpz_realloc (mpz_ptr m, mp_size_t new_size)
+#else
+_mpz_realloc (m, new_size)
+ mpz_ptr m;
+ mp_size_t new_size;
+#endif
+{
+ /* Never allocate zero space. */
+ if (new_size == 0)
+ new_size = 1;
+
+ m->_mp_d = (mp_ptr) (*_mp_reallocate_func) (m->_mp_d,
+ m->_mp_alloc * BYTES_PER_MP_LIMB,
+ new_size * BYTES_PER_MP_LIMB);
+ m->_mp_alloc = new_size;
+
+#if 0
+ /* This might break some code that reads the size field after
+ reallocation, in the case the reallocated destination and a
+ source argument are identical. */
+ if (ABS (m->_mp_size) > new_size)
+ m->_mp_size = 0;
+#endif
+
+ return (void *) m->_mp_d;
+}
diff --git a/rts/gmp/mpz/remove.c b/rts/gmp/mpz/remove.c
new file mode 100644
index 0000000000..bc6675f972
--- /dev/null
+++ b/rts/gmp/mpz/remove.c
@@ -0,0 +1,93 @@
+/* mpz_remove -- divide out a factor and return its multiplicity.
+
+Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_remove (mpz_ptr dest, mpz_srcptr src, mpz_srcptr f)
+#else
+mpz_remove (dest, src, f)
+ mpz_ptr dest;
+ mpz_srcptr src;
+ mpz_srcptr f;
+#endif
+{
+ mpz_t fpow[40]; /* inexhaustible...until year 2020 or so */
+ mpz_t x, rem;
+ unsigned long int pwr;
+ int p;
+
+ if (mpz_cmp_ui (f, 1) <= 0 || mpz_sgn (src) == 0)
+ DIVIDE_BY_ZERO;
+ if (mpz_cmp_ui (f, 2) == 0)
+ {
+ unsigned long int s0;
+ s0 = mpz_scan1 (src, 0);
+ mpz_div_2exp (dest, src, s0);
+ return s0;
+ }
+
+ /* We could perhaps compute mpz_scan1(src,0)/mpz_scan1(f,0). It is an
+ upper bound of the result we're seeking. We could also shift down the
+ operands so that they become odd, to make intermediate values smaller. */
+
+ mpz_init (rem);
+ mpz_init (x);
+
+ pwr = 0;
+ mpz_init (fpow[0]);
+ mpz_set (fpow[0], f);
+ mpz_set (dest, src);
+
+ /* Divide by f, f^2, ..., f^(2^k) until we get a remainder for f^(2^k). */
+ for (p = 0;; p++)
+ {
+ mpz_tdiv_qr (x, rem, dest, fpow[p]);
+ if (SIZ (rem) != 0)
+ break;
+ mpz_init (fpow[p + 1]);
+ mpz_mul (fpow[p + 1], fpow[p], fpow[p]);
+ mpz_set (dest, x);
+ }
+
+ pwr = (1 << p) - 1;
+
+ mpz_clear (fpow[p]);
+
+ /* Divide by f^(2^(k-1)), f^(2^(k-2)), ..., f for all divisors that give a
+ zero remainder. */
+ while (--p >= 0)
+ {
+ mpz_tdiv_qr (x, rem, dest, fpow[p]);
+ if (SIZ (rem) == 0)
+ {
+ pwr += 1 << p;
+ mpz_set (dest, x);
+ }
+ mpz_clear (fpow[p]);
+ }
+
+ mpz_clear (x);
+ mpz_clear (rem);
+ return pwr;
+}
diff --git a/rts/gmp/mpz/root.c b/rts/gmp/mpz/root.c
new file mode 100644
index 0000000000..0920bf22d3
--- /dev/null
+++ b/rts/gmp/mpz/root.c
@@ -0,0 +1,183 @@
+/* mpz_root(root, u, nth) -- Set ROOT to floor(U^(1/nth)).
+ Return an indication if the result is exact.
+
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/* Naive implementation of nth root extraction. It would probably be a
+ better idea to use a division-free Newton iteration. It is insane
+ to use full precision from iteration 1. The mpz_scan1 trick compensates
+ to some extent. It would be natural to avoid representing the low zero
+ bits mpz_scan1 is counting, and at the same time call mpn directly. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+int
+#if __STDC__
+mpz_root (mpz_ptr r, mpz_srcptr c, unsigned long int nth)
+#else
+mpz_root (r, c, nth)
+ mpz_ptr r;
+ mpz_srcptr c;
+ unsigned long int nth;
+#endif
+{
+ mpz_t x, t0, t1, t2;
+ __mpz_struct ccs, *cc = &ccs;
+ unsigned long int nbits;
+ int bit;
+ int exact;
+ int i;
+ unsigned long int lowz;
+ unsigned long int rl;
+
+ /* even roots of negatives provoke an exception */
+ if (mpz_sgn (c) < 0 && (nth & 1) == 0)
+ SQRT_OF_NEGATIVE;
+
+ /* root extraction interpreted as c^(1/nth) means a zeroth root should
+ provoke a divide by zero, do this even if c==0 */
+ if (nth == 0)
+ DIVIDE_BY_ZERO;
+
+ if (mpz_sgn (c) == 0)
+ {
+ if (r != NULL)
+ mpz_set_ui (r, 0);
+ return 1; /* exact result */
+ }
+
+ PTR(cc) = PTR(c);
+ SIZ(cc) = ABSIZ(c);
+
+ nbits = (mpz_sizeinbase (cc, 2) - 1) / nth;
+ if (nbits == 0)
+ {
+ if (r != NULL)
+ mpz_set_ui (r, 1);
+ if (mpz_sgn (c) < 0)
+ {
+ if (r != NULL)
+ SIZ(r) = -SIZ(r);
+ return mpz_cmp_si (c, -1L) == 0;
+ }
+ return mpz_cmp_ui (c, 1L) == 0;
+ }
+
+ mpz_init (x);
+ mpz_init (t0);
+ mpz_init (t1);
+ mpz_init (t2);
+
+ /* Create a one-bit approximation. */
+ mpz_set_ui (x, 0);
+ mpz_setbit (x, nbits);
+
+ /* Make the approximation better, one bit at a time. This odd-looking
+ termination criteria makes large nth get better initial approximation,
+ which avoids slow convergence for such values. */
+ bit = nbits - 1;
+ for (i = 1; (nth >> i) != 0; i++)
+ {
+ mpz_setbit (x, bit);
+ mpz_tdiv_q_2exp (t0, x, bit);
+ mpz_pow_ui (t1, t0, nth);
+ mpz_mul_2exp (t1, t1, bit * nth);
+ if (mpz_cmp (cc, t1) < 0)
+ mpz_clrbit (x, bit);
+
+ bit--; /* check/set next bit */
+ if (bit < 0)
+ {
+ /* We're done. */
+ mpz_pow_ui (t1, x, nth);
+ goto done;
+ }
+ }
+ mpz_setbit (x, bit);
+ mpz_set_ui (t2, 0); mpz_setbit (t2, bit); mpz_add (x, x, t2);
+
+#if DEBUG
+ /* Check that the starting approximation is >= than the root. */
+ mpz_pow_ui (t1, x, nth);
+ if (mpz_cmp (cc, t1) >= 0)
+ abort ();
+#endif
+
+ mpz_add_ui (x, x, 1);
+
+ /* Main loop */
+ do
+ {
+ lowz = mpz_scan1 (x, 0);
+ mpz_tdiv_q_2exp (t0, x, lowz);
+ mpz_pow_ui (t1, t0, nth - 1);
+ mpz_mul_2exp (t1, t1, lowz * (nth - 1));
+ mpz_tdiv_q (t2, cc, t1);
+ mpz_sub (t2, x, t2);
+ rl = mpz_tdiv_q_ui (t2, t2, nth);
+ mpz_sub (x, x, t2);
+ }
+ while (mpz_sgn (t2) != 0);
+
+ /* If we got a non-zero remainder in the last division, we know our root
+ is too large. */
+ mpz_sub_ui (x, x, (mp_limb_t) (rl != 0));
+
+ /* Adjustment loop. If we spend more care on rounding in the loop above,
+ we could probably get rid of this, or greatly simplify it. */
+ {
+ int bad = 0;
+ lowz = mpz_scan1 (x, 0);
+ mpz_tdiv_q_2exp (t0, x, lowz);
+ mpz_pow_ui (t1, t0, nth);
+ mpz_mul_2exp (t1, t1, lowz * nth);
+ while (mpz_cmp (cc, t1) < 0)
+ {
+ bad++;
+ if (bad > 2)
+ abort (); /* abort if our root is far off */
+ mpz_sub_ui (x, x, 1);
+ lowz = mpz_scan1 (x, 0);
+ mpz_tdiv_q_2exp (t0, x, lowz);
+ mpz_pow_ui (t1, t0, nth);
+ mpz_mul_2exp (t1, t1, lowz * nth);
+ }
+ }
+
+ done:
+ exact = mpz_cmp (t1, cc) == 0;
+
+ if (r != NULL)
+ {
+ mpz_set (r, x);
+ if (mpz_sgn (c) < 0)
+ SIZ(r) = -SIZ(r);
+ }
+
+ mpz_clear (t2);
+ mpz_clear (t1);
+ mpz_clear (t0);
+ mpz_clear (x);
+
+ return exact;
+}
diff --git a/rts/gmp/mpz/rrandomb.c b/rts/gmp/mpz/rrandomb.c
new file mode 100644
index 0000000000..7d78243674
--- /dev/null
+++ b/rts/gmp/mpz/rrandomb.c
@@ -0,0 +1,117 @@
+/* mpz_rrandomb -- Generate a positive random mpz_t of specified bit size, with
+ long runs of consecutive ones and zeros in the binary representation.
+ Meant for testing of other MP routines.
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+static void gmp_rrandomb _PROTO ((mp_ptr rp, gmp_randstate_t rstate, unsigned long int nbits));
+
+void
+#if __STDC__
+mpz_rrandomb (mpz_ptr x, gmp_randstate_t rstate, unsigned long int nbits)
+#else
+mpz_rrandomb (x, rstate, nbits)
+ mpz_ptr x;
+ gmp_randstate_t rstate;
+ unsigned long int nbits;
+#endif
+{
+ mp_size_t nl = 0;
+
+ if (nbits != 0)
+ {
+ mp_ptr xp;
+ nl = (nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
+ if (x->_mp_alloc < nl)
+ _mpz_realloc (x, nl);
+
+ xp = PTR(x);
+ gmp_rrandomb (xp, rstate, nbits);
+ MPN_NORMALIZE (xp, nl);
+ }
+
+ SIZ(x) = nl;
+}
+
+#define BITS_PER_CHUNK 4
+
+static void
+#if __STDC__
+gmp_rrandomb (mp_ptr rp, gmp_randstate_t rstate, unsigned long int nbits)
+#else
+gmp_rrandomb (rp, rstate, nbits)
+ mp_ptr rp;
+ gmp_randstate_t rstate;
+ unsigned long int nbits;
+#endif
+{
+ int nb;
+ int bit_pos;
+ mp_size_t limb_pos;
+ mp_limb_t ran, ranm;
+ mp_limb_t acc;
+ mp_size_t n;
+
+ bit_pos = nbits % BITS_PER_MP_LIMB;
+ limb_pos = nbits / BITS_PER_MP_LIMB;
+ if (bit_pos == 0)
+ {
+ bit_pos = BITS_PER_MP_LIMB;
+ limb_pos--;
+ }
+
+ acc = 0;
+ while (limb_pos >= 0)
+ {
+ _gmp_rand (&ranm, rstate, BITS_PER_CHUNK + 1);
+ ran = ranm;
+ nb = (ran >> 1) + 1;
+ if ((ran & 1) != 0)
+ {
+ /* Generate a string of ones. */
+ if (nb > bit_pos)
+ {
+ rp[limb_pos--] = acc | ((((mp_limb_t) 1) << bit_pos) - 1);
+ bit_pos += BITS_PER_MP_LIMB;
+ bit_pos -= nb;
+ acc = (~(mp_limb_t) 0) << bit_pos;
+ }
+ else
+ {
+ bit_pos -= nb;
+ acc |= ((((mp_limb_t) 1) << nb) - 1) << bit_pos;
+ }
+ }
+ else
+ {
+ /* Generate a string of zeroes. */
+ if (nb > bit_pos)
+ {
+ rp[limb_pos--] = acc;
+ acc = 0;
+ bit_pos += BITS_PER_MP_LIMB;
+ }
+ bit_pos -= nb;
+ }
+ }
+}
diff --git a/rts/gmp/mpz/scan0.c b/rts/gmp/mpz/scan0.c
new file mode 100644
index 0000000000..6c59cf8939
--- /dev/null
+++ b/rts/gmp/mpz/scan0.c
@@ -0,0 +1,35 @@
+/* mpz_scan0(op, startbit) -- Scan for the next set bit, starting at startbit.
+
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_scan0 (mpz_srcptr u, unsigned long int starting_bit)
+#else
+mpz_scan0 (u, starting_bit)
+ mpz_srcptr u;
+ unsigned long int starting_bit;
+#endif
+{
+ return mpn_scan0 (u->_mp_d, starting_bit);
+}
diff --git a/rts/gmp/mpz/scan1.c b/rts/gmp/mpz/scan1.c
new file mode 100644
index 0000000000..3b84e3420c
--- /dev/null
+++ b/rts/gmp/mpz/scan1.c
@@ -0,0 +1,35 @@
+/* mpz_scan1(op, startbit) -- Scan for the next set bit, starting at startbit.
+
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_scan1 (mpz_srcptr u, unsigned long int starting_bit)
+#else
+mpz_scan1 (u, starting_bit)
+ mpz_srcptr u;
+ unsigned long int starting_bit;
+#endif
+{
+ return mpn_scan1 (u->_mp_d, starting_bit);
+}
diff --git a/rts/gmp/mpz/set.c b/rts/gmp/mpz/set.c
new file mode 100644
index 0000000000..06b2eef511
--- /dev/null
+++ b/rts/gmp/mpz/set.c
@@ -0,0 +1,48 @@
+/* mpz_set (dest_integer, src_integer) -- Assign DEST_INTEGER from SRC_INTEGER.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_set (mpz_ptr w, mpz_srcptr u)
+#else
+mpz_set (w, u)
+ mpz_ptr w;
+ mpz_srcptr u;
+#endif
+{
+ mp_ptr wp, up;
+ mp_size_t usize, size;
+
+ usize = u->_mp_size;
+ size = ABS (usize);
+
+ if (w->_mp_alloc < size)
+ _mpz_realloc (w, size);
+
+ wp = w->_mp_d;
+ up = u->_mp_d;
+
+ MPN_COPY (wp, up, size);
+ w->_mp_size = usize;
+}
diff --git a/rts/gmp/mpz/set_d.c b/rts/gmp/mpz/set_d.c
new file mode 100644
index 0000000000..e90ed9bc2f
--- /dev/null
+++ b/rts/gmp/mpz/set_d.c
@@ -0,0 +1,96 @@
+/* mpz_set_d(integer, val) -- Assign INTEGER with a double value VAL.
+
+Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_set_d (mpz_ptr r, double d)
+#else
+mpz_set_d (r, d)
+ mpz_ptr r;
+ double d;
+#endif
+{
+ int negative;
+ mp_limb_t tp[3];
+ mp_ptr rp;
+ mp_size_t rn;
+
+ negative = d < 0;
+ d = ABS (d);
+
+ /* Handle small arguments quickly. */
+ if (d < MP_BASE_AS_DOUBLE)
+ {
+ mp_limb_t tmp;
+ tmp = d;
+ PTR(r)[0] = tmp;
+ SIZ(r) = negative ? -(tmp != 0) : (tmp != 0);
+ return;
+ }
+
+ rn = __gmp_extract_double (tp, d);
+
+ if (ALLOC(r) < rn)
+ _mpz_realloc (r, rn);
+
+ rp = PTR (r);
+
+#if BITS_PER_MP_LIMB == 32
+ switch (rn)
+ {
+ default:
+ MPN_ZERO (rp, rn - 3);
+ rp += rn - 3;
+ /* fall through */
+ case 3:
+ rp[2] = tp[2];
+ rp[1] = tp[1];
+ rp[0] = tp[0];
+ break;
+ case 2:
+ rp[1] = tp[2];
+ rp[0] = tp[1];
+ break;
+ case 1:
+ /* handled in "small aguments" case above */
+ abort ();
+ }
+#else
+ switch (rn)
+ {
+ default:
+ MPN_ZERO (rp, rn - 2);
+ rp += rn - 2;
+ /* fall through */
+ case 2:
+ rp[1] = tp[1], rp[0] = tp[0];
+ break;
+ case 1:
+ /* handled in "small aguments" case above */
+ abort ();
+ }
+#endif
+
+ SIZ(r) = negative ? -rn : rn;
+}
diff --git a/rts/gmp/mpz/set_f.c b/rts/gmp/mpz/set_f.c
new file mode 100644
index 0000000000..2273953dfd
--- /dev/null
+++ b/rts/gmp/mpz/set_f.c
@@ -0,0 +1,64 @@
+/* mpz_set_f (dest_integer, src_float) -- Assign DEST_INTEGER from SRC_FLOAT.
+
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_set_f (mpz_ptr w, mpf_srcptr u)
+#else
+mpz_set_f (w, u)
+ mpz_ptr w;
+ mpf_srcptr u;
+#endif
+{
+ mp_ptr wp, up;
+ mp_size_t usize, size;
+ mp_exp_t exp;
+
+ usize = SIZ (u);
+ size = ABS (usize);
+ exp = EXP (u);
+
+ if (w->_mp_alloc < exp)
+ _mpz_realloc (w, exp);
+
+ wp = w->_mp_d;
+ up = u->_mp_d;
+
+ if (exp <= 0)
+ {
+ SIZ (w) = 0;
+ return;
+ }
+ if (exp < size)
+ {
+ MPN_COPY (wp, up + size - exp, exp);
+ }
+ else
+ {
+ MPN_ZERO (wp, exp - size);
+ MPN_COPY (wp + exp - size, up, size);
+ }
+
+ w->_mp_size = usize >= 0 ? exp : -exp;
+}
diff --git a/rts/gmp/mpz/set_q.c b/rts/gmp/mpz/set_q.c
new file mode 100644
index 0000000000..72d3222a80
--- /dev/null
+++ b/rts/gmp/mpz/set_q.c
@@ -0,0 +1,36 @@
+/* mpz_set_q (dest_integer, src_rational) -- Assign DEST_INTEGER from
+ SRC_rational.
+
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_set_q (mpz_ptr w, mpq_srcptr u)
+#else
+mpz_set_q (w, u)
+ mpz_ptr w;
+ mpq_srcptr u;
+#endif
+{
+ mpz_tdiv_q (w, mpq_numref (u), mpq_denref (u));
+}
diff --git a/rts/gmp/mpz/set_si.c b/rts/gmp/mpz/set_si.c
new file mode 100644
index 0000000000..9ba2fbaf30
--- /dev/null
+++ b/rts/gmp/mpz/set_si.c
@@ -0,0 +1,48 @@
+/* mpz_set_si(integer, val) -- Assign INTEGER with a small value VAL.
+
+Copyright (C) 1991, 1993, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_set_si (mpz_ptr dest, signed long int val)
+#else
+mpz_set_si (dest, val)
+ mpz_ptr dest;
+ signed long int val;
+#endif
+{
+ /* We don't check if the allocation is enough, since the rest of the
+ package ensures it's at least 1, which is what we need here. */
+ if (val > 0)
+ {
+ dest->_mp_d[0] = val;
+ dest->_mp_size = 1;
+ }
+ else if (val < 0)
+ {
+ dest->_mp_d[0] = (unsigned long) -val;
+ dest->_mp_size = -1;
+ }
+ else
+ dest->_mp_size = 0;
+}
diff --git a/rts/gmp/mpz/set_str.c b/rts/gmp/mpz/set_str.c
new file mode 100644
index 0000000000..3ab79c0e89
--- /dev/null
+++ b/rts/gmp/mpz/set_str.c
@@ -0,0 +1,157 @@
+/* mpz_set_str(mp_dest, string, base) -- Convert the \0-terminated
+ string STRING in base BASE to multiple precision integer in
+ MP_DEST. Allow white space in the string. If BASE == 0 determine
+ the base in the C standard way, i.e. 0xhh...h means base 16,
+ 0oo...o means base 8, otherwise assume base 10.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 1998, 2000 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <string.h>
+#include <ctype.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+static int
+#if __STDC__
+digit_value_in_base (int c, int base)
+#else
+digit_value_in_base (c, base)
+ int c;
+ int base;
+#endif
+{
+ int digit;
+
+ if (isdigit (c))
+ digit = c - '0';
+ else if (islower (c))
+ digit = c - 'a' + 10;
+ else if (isupper (c))
+ digit = c - 'A' + 10;
+ else
+ return -1;
+
+ if (digit < base)
+ return digit;
+ return -1;
+}
+
+int
+#if __STDC__
+mpz_set_str (mpz_ptr x, const char *str, int base)
+#else
+mpz_set_str (x, str, base)
+ mpz_ptr x;
+ const char *str;
+ int base;
+#endif
+{
+ size_t str_size;
+ char *s, *begs;
+ size_t i;
+ mp_size_t xsize;
+ int c;
+ int negative;
+ TMP_DECL (marker);
+
+ /* Skip whitespace. */
+ do
+ c = *str++;
+ while (isspace (c));
+
+ negative = 0;
+ if (c == '-')
+ {
+ negative = 1;
+ c = *str++;
+ }
+
+ if (digit_value_in_base (c, base == 0 ? 10 : base) < 0)
+ return -1; /* error if no digits */
+
+ /* If BASE is 0, try to find out the base by looking at the initial
+ characters. */
+ if (base == 0)
+ {
+ base = 10;
+ if (c == '0')
+ {
+ base = 8;
+ c = *str++;
+ if (c == 'x' || c == 'X')
+ {
+ base = 16;
+ c = *str++;
+ }
+ else if (c == 'b' || c == 'B')
+ {
+ base = 2;
+ c = *str++;
+ }
+ }
+ }
+
+ /* Skip leading zeros. */
+ while (c == '0')
+ c = *str++;
+ /* Make sure the string does not become empty, mpn_set_str would fail. */
+ if (c == 0)
+ {
+ x->_mp_size = 0;
+ return 0;
+ }
+
+ TMP_MARK (marker);
+ str_size = strlen (str - 1);
+ s = begs = (char *) TMP_ALLOC (str_size + 1);
+
+ /* Remove spaces from the string and convert the result from ASCII to a
+ byte array. */
+ for (i = 0; i < str_size; i++)
+ {
+ if (!isspace (c))
+ {
+ int dig = digit_value_in_base (c, base);
+ if (dig < 0)
+ {
+ TMP_FREE (marker);
+ return -1;
+ }
+ *s++ = dig;
+ }
+ c = *str++;
+ }
+
+ str_size = s - begs;
+
+ xsize = (((mp_size_t) (str_size / __mp_bases[base].chars_per_bit_exactly))
+ / BITS_PER_MP_LIMB + 2);
+ if (x->_mp_alloc < xsize)
+ _mpz_realloc (x, xsize);
+
+ /* Convert the byte array in base BASE to our bignum format. */
+ xsize = mpn_set_str (x->_mp_d, (unsigned char *) begs, str_size, base);
+ x->_mp_size = negative ? -xsize : xsize;
+
+ TMP_FREE (marker);
+ return 0;
+}
diff --git a/rts/gmp/mpz/set_ui.c b/rts/gmp/mpz/set_ui.c
new file mode 100644
index 0000000000..d6097c170a
--- /dev/null
+++ b/rts/gmp/mpz/set_ui.c
@@ -0,0 +1,43 @@
+/* mpz_set_ui(integer, val) -- Assign INTEGER with a small value VAL.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_set_ui (mpz_ptr dest, unsigned long int val)
+#else
+mpz_set_ui (dest, val)
+ mpz_ptr dest;
+ unsigned long int val;
+#endif
+{
+ /* We don't check if the allocation is enough, since the rest of the
+ package ensures it's at least 1, which is what we need here. */
+ if (val > 0)
+ {
+ dest->_mp_d[0] = val;
+ dest->_mp_size = 1;
+ }
+ else
+ dest->_mp_size = 0;
+}
diff --git a/rts/gmp/mpz/setbit.c b/rts/gmp/mpz/setbit.c
new file mode 100644
index 0000000000..d4249a434e
--- /dev/null
+++ b/rts/gmp/mpz/setbit.c
@@ -0,0 +1,119 @@
+/* mpz_setbit -- set a specified bit.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1997, 1999 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_setbit (mpz_ptr d, unsigned long int bit_index)
+#else
+mpz_setbit (d, bit_index)
+ mpz_ptr d;
+ unsigned long int bit_index;
+#endif
+{
+ mp_size_t dsize = d->_mp_size;
+ mp_ptr dp = d->_mp_d;
+ mp_size_t limb_index;
+
+ limb_index = bit_index / BITS_PER_MP_LIMB;
+ if (dsize >= 0)
+ {
+ if (limb_index < dsize)
+ {
+ dp[limb_index] |= (mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB);
+ d->_mp_size = dsize;
+ }
+ else
+ {
+ /* Ugh. The bit should be set outside of the end of the
+ number. We have to increase the size of the number. */
+ if (d->_mp_alloc < limb_index + 1)
+ {
+ _mpz_realloc (d, limb_index + 1);
+ dp = d->_mp_d;
+ }
+ MPN_ZERO (dp + dsize, limb_index - dsize);
+ dp[limb_index] = (mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB);
+ d->_mp_size = limb_index + 1;
+ }
+ }
+ else
+ {
+ mp_size_t zero_bound;
+
+ /* Simulate two's complement arithmetic, i.e. simulate
+ 1. Set OP = ~(OP - 1) [with infinitely many leading ones].
+ 2. Set the bit.
+ 3. Set OP = ~OP + 1. */
+
+ dsize = -dsize;
+
+ /* No upper bound on this loop, we're sure there's a non-zero limb
+ sooner ot later. */
+ for (zero_bound = 0; ; zero_bound++)
+ if (dp[zero_bound] != 0)
+ break;
+
+ if (limb_index > zero_bound)
+ {
+ if (limb_index < dsize)
+ dp[limb_index] &= ~((mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB));
+ else
+ ;
+ }
+ else if (limb_index == zero_bound)
+ {
+ dp[limb_index] = ((dp[limb_index] - 1)
+ & ~((mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB))) + 1;
+ if (dp[limb_index] == 0)
+ {
+ mp_size_t i;
+ for (i = limb_index + 1; i < dsize; i++)
+ {
+ dp[i] += 1;
+ if (dp[i] != 0)
+ goto fin;
+ }
+ /* We got carry all way out beyond the end of D. Increase
+ its size (and allocation if necessary). */
+ dsize++;
+ if (d->_mp_alloc < dsize)
+ {
+ _mpz_realloc (d, dsize);
+ dp = d->_mp_d;
+ }
+ dp[i] = 1;
+ d->_mp_size = -dsize;
+ fin:;
+ }
+ }
+ else
+ {
+ mpn_decr_u (dp + limb_index,
+ (mp_limb_t) 1 << (bit_index % BITS_PER_MP_LIMB));
+ dsize -= dp[dsize - 1] == 0;
+ d->_mp_size = -dsize;
+ }
+ }
+}
diff --git a/rts/gmp/mpz/size.c b/rts/gmp/mpz/size.c
new file mode 100644
index 0000000000..6574756783
--- /dev/null
+++ b/rts/gmp/mpz/size.c
@@ -0,0 +1,35 @@
+/* mpz_size(x) -- return the number of lims currently used by the
+ value of integer X.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+size_t
+#if __STDC__
+mpz_size (mpz_srcptr x)
+#else
+mpz_size (x)
+ mpz_srcptr x;
+#endif
+{
+ return ABS (x->_mp_size);
+}
diff --git a/rts/gmp/mpz/sizeinbase.c b/rts/gmp/mpz/sizeinbase.c
new file mode 100644
index 0000000000..734f9c4532
--- /dev/null
+++ b/rts/gmp/mpz/sizeinbase.c
@@ -0,0 +1,60 @@
+/* mpz_sizeinbase(x, base) -- return an approximation to the number of
+ character the integer X would have printed in base BASE. The
+ approximation is never too small.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+size_t
+#if __STDC__
+mpz_sizeinbase (mpz_srcptr x, int base)
+#else
+mpz_sizeinbase (x, base)
+ mpz_srcptr x;
+ int base;
+#endif
+{
+ mp_size_t size = ABS (x->_mp_size);
+ int lb_base, cnt;
+ size_t totbits;
+
+ /* Special case for X == 0. */
+ if (size == 0)
+ return 1;
+
+ /* Calculate the total number of significant bits of X. */
+ count_leading_zeros (cnt, x->_mp_d[size - 1]);
+ totbits = size * BITS_PER_MP_LIMB - cnt;
+
+ if ((base & (base - 1)) == 0)
+ {
+ /* Special case for powers of 2, giving exact result. */
+
+ count_leading_zeros (lb_base, base);
+ lb_base = BITS_PER_MP_LIMB - lb_base - 1;
+
+ return (totbits + lb_base - 1) / lb_base;
+ }
+ else
+ return (size_t) (totbits * __mp_bases[base].chars_per_bit_exactly) + 1;
+}
diff --git a/rts/gmp/mpz/sqrt.c b/rts/gmp/mpz/sqrt.c
new file mode 100644
index 0000000000..fe82fe407a
--- /dev/null
+++ b/rts/gmp/mpz/sqrt.c
@@ -0,0 +1,86 @@
+/* mpz_sqrt(root, u) -- Set ROOT to floor(sqrt(U)).
+
+Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_sqrt (mpz_ptr root, mpz_srcptr op)
+#else
+mpz_sqrt (root, op)
+ mpz_ptr root;
+ mpz_srcptr op;
+#endif
+{
+ mp_size_t op_size, root_size;
+ mp_ptr root_ptr, op_ptr;
+ mp_ptr free_me = NULL;
+ mp_size_t free_me_size;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ op_size = op->_mp_size;
+ if (op_size < 0)
+ SQRT_OF_NEGATIVE;
+
+ /* The size of the root is accurate after this simple calculation. */
+ root_size = (op_size + 1) / 2;
+
+ root_ptr = root->_mp_d;
+ op_ptr = op->_mp_d;
+
+ if (root->_mp_alloc < root_size)
+ {
+ if (root_ptr == op_ptr)
+ {
+ free_me = root_ptr;
+ free_me_size = root->_mp_alloc;
+ }
+ else
+ (*_mp_free_func) (root_ptr, root->_mp_alloc * BYTES_PER_MP_LIMB);
+
+ root->_mp_alloc = root_size;
+ root_ptr = (mp_ptr) (*_mp_allocate_func) (root_size * BYTES_PER_MP_LIMB);
+ root->_mp_d = root_ptr;
+ }
+ else
+ {
+ /* Make OP not overlap with ROOT. */
+ if (root_ptr == op_ptr)
+ {
+ /* ROOT and OP are identical. Allocate temporary space for OP. */
+ op_ptr = (mp_ptr) TMP_ALLOC (op_size * BYTES_PER_MP_LIMB);
+ /* Copy to the temporary space. Hack: Avoid temporary variable
+ by using ROOT_PTR. */
+ MPN_COPY (op_ptr, root_ptr, op_size);
+ }
+ }
+
+ mpn_sqrtrem (root_ptr, NULL, op_ptr, op_size);
+
+ root->_mp_size = root_size;
+
+ if (free_me != NULL)
+ (*_mp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/sqrtrem.c b/rts/gmp/mpz/sqrtrem.c
new file mode 100644
index 0000000000..99a6453122
--- /dev/null
+++ b/rts/gmp/mpz/sqrtrem.c
@@ -0,0 +1,111 @@
+/* mpz_sqrtrem(root,rem,x) -- Set ROOT to floor(sqrt(X)) and REM
+ to the remainder, i.e. X - ROOT**2.
+
+Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+#ifndef BERKELEY_MP
+void
+#if __STDC__
+mpz_sqrtrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr op)
+#else
+mpz_sqrtrem (root, rem, op)
+ mpz_ptr root;
+ mpz_ptr rem;
+ mpz_srcptr op;
+#endif
+#else /* BERKELEY_MP */
+void
+#if __STDC__
+msqrt (mpz_srcptr op, mpz_ptr root, mpz_ptr rem)
+#else
+msqrt (op, root, rem)
+ mpz_srcptr op;
+ mpz_ptr root;
+ mpz_ptr rem;
+#endif
+#endif /* BERKELEY_MP */
+{
+ mp_size_t op_size, root_size, rem_size;
+ mp_ptr root_ptr, op_ptr;
+ mp_ptr free_me = NULL;
+ mp_size_t free_me_size;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ op_size = op->_mp_size;
+ if (op_size < 0)
+ SQRT_OF_NEGATIVE;
+
+ if (rem->_mp_alloc < op_size)
+ _mpz_realloc (rem, op_size);
+
+ /* The size of the root is accurate after this simple calculation. */
+ root_size = (op_size + 1) / 2;
+
+ root_ptr = root->_mp_d;
+ op_ptr = op->_mp_d;
+
+ if (root->_mp_alloc < root_size)
+ {
+ if (root_ptr == op_ptr)
+ {
+ free_me = root_ptr;
+ free_me_size = root->_mp_alloc;
+ }
+ else
+ (*_mp_free_func) (root_ptr, root->_mp_alloc * BYTES_PER_MP_LIMB);
+
+ root->_mp_alloc = root_size;
+ root_ptr = (mp_ptr) (*_mp_allocate_func) (root_size * BYTES_PER_MP_LIMB);
+ root->_mp_d = root_ptr;
+ }
+ else
+ {
+ /* Make OP not overlap with ROOT. */
+ if (root_ptr == op_ptr)
+ {
+ /* ROOT and OP are identical. Allocate temporary space for OP. */
+ op_ptr = (mp_ptr) TMP_ALLOC (op_size * BYTES_PER_MP_LIMB);
+ /* Copy to the temporary space. Hack: Avoid temporary variable
+ by using ROOT_PTR. */
+ MPN_COPY (op_ptr, root_ptr, op_size);
+ }
+ }
+
+ rem_size = mpn_sqrtrem (root_ptr, rem->_mp_d, op_ptr, op_size);
+
+ root->_mp_size = root_size;
+
+ /* Write remainder size last, to enable us to define this function to
+ give only the square root remainder, if the user calls if with
+ ROOT == REM. */
+ rem->_mp_size = rem_size;
+
+ if (free_me != NULL)
+ (*_mp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/sub.c b/rts/gmp/mpz/sub.c
new file mode 100644
index 0000000000..f3ae7c23a0
--- /dev/null
+++ b/rts/gmp/mpz/sub.c
@@ -0,0 +1,123 @@
+/* mpz_sub -- Subtract two integers.
+
+Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+#ifndef BERKELEY_MP
+void
+#if __STDC__
+mpz_sub (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
+#else
+mpz_sub (w, u, v)
+ mpz_ptr w;
+ mpz_srcptr u;
+ mpz_srcptr v;
+#endif
+#else /* BERKELEY_MP */
+void
+#if __STDC__
+msub (mpz_srcptr u, mpz_srcptr v, mpz_ptr w)
+#else
+msub (u, v, w)
+ mpz_srcptr u;
+ mpz_srcptr v;
+ mpz_ptr w;
+#endif
+#endif /* BERKELEY_MP */
+{
+ mp_srcptr up, vp;
+ mp_ptr wp;
+ mp_size_t usize, vsize, wsize;
+ mp_size_t abs_usize;
+ mp_size_t abs_vsize;
+
+ usize = u->_mp_size;
+ vsize = -v->_mp_size; /* The "-" makes the difference from mpz_add */
+ abs_usize = ABS (usize);
+ abs_vsize = ABS (vsize);
+
+ if (abs_usize < abs_vsize)
+ {
+ /* Swap U and V. */
+ MPZ_SRCPTR_SWAP (u, v);
+ MP_SIZE_T_SWAP (usize, vsize);
+ MP_SIZE_T_SWAP (abs_usize, abs_vsize);
+ }
+
+ /* True: ABS_USIZE >= ABS_VSIZE. */
+
+ /* If not space for w (and possible carry), increase space. */
+ wsize = abs_usize + 1;
+ if (w->_mp_alloc < wsize)
+ _mpz_realloc (w, wsize);
+
+ /* These must be after realloc (u or v may be the same as w). */
+ up = u->_mp_d;
+ vp = v->_mp_d;
+ wp = w->_mp_d;
+
+ if ((usize ^ vsize) < 0)
+ {
+ /* U and V have different sign. Need to compare them to determine
+ which operand to subtract from which. */
+
+ /* This test is right since ABS_USIZE >= ABS_VSIZE. */
+ if (abs_usize != abs_vsize)
+ {
+ mpn_sub (wp, up, abs_usize, vp, abs_vsize);
+ wsize = abs_usize;
+ MPN_NORMALIZE (wp, wsize);
+ if (usize < 0)
+ wsize = -wsize;
+ }
+ else if (mpn_cmp (up, vp, abs_usize) < 0)
+ {
+ mpn_sub_n (wp, vp, up, abs_usize);
+ wsize = abs_usize;
+ MPN_NORMALIZE (wp, wsize);
+ if (usize >= 0)
+ wsize = -wsize;
+ }
+ else
+ {
+ mpn_sub_n (wp, up, vp, abs_usize);
+ wsize = abs_usize;
+ MPN_NORMALIZE (wp, wsize);
+ if (usize < 0)
+ wsize = -wsize;
+ }
+ }
+ else
+ {
+ /* U and V have same sign. Add them. */
+ mp_limb_t cy_limb = mpn_add (wp, up, abs_usize, vp, abs_vsize);
+ wp[abs_usize] = cy_limb;
+ wsize = abs_usize + cy_limb;
+ if (usize < 0)
+ wsize = -wsize;
+ }
+
+ w->_mp_size = wsize;
+}
diff --git a/rts/gmp/mpz/sub_ui.c b/rts/gmp/mpz/sub_ui.c
new file mode 100644
index 0000000000..327add8503
--- /dev/null
+++ b/rts/gmp/mpz/sub_ui.c
@@ -0,0 +1,84 @@
+/* mpz_sub_ui -- Subtract an unsigned one-word integer from an MP_INT.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1999 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_sub_ui (mpz_ptr w, mpz_srcptr u, unsigned long int v)
+#else
+mpz_sub_ui (w, u, v)
+ mpz_ptr w;
+ mpz_srcptr u;
+ unsigned long int v;
+#endif
+{
+ mp_srcptr up;
+ mp_ptr wp;
+ mp_size_t usize, wsize;
+ mp_size_t abs_usize;
+
+ usize = u->_mp_size;
+ abs_usize = ABS (usize);
+
+ /* If not space for W (and possible carry), increase space. */
+ wsize = abs_usize + 1;
+ if (w->_mp_alloc < wsize)
+ _mpz_realloc (w, wsize);
+
+ /* These must be after realloc (U may be the same as W). */
+ up = u->_mp_d;
+ wp = w->_mp_d;
+
+ if (abs_usize == 0)
+ {
+ wp[0] = v;
+ w->_mp_size = -(v != 0);
+ return;
+ }
+
+ if (usize < 0)
+ {
+ mp_limb_t cy;
+ cy = mpn_add_1 (wp, up, abs_usize, (mp_limb_t) v);
+ wp[abs_usize] = cy;
+ wsize = -(abs_usize + cy);
+ }
+ else
+ {
+ /* The signs are different. Need exact comparison to determine
+ which operand to subtract from which. */
+ if (abs_usize == 1 && up[0] < v)
+ {
+ wp[0] = v - up[0];
+ wsize = -1;
+ }
+ else
+ {
+ mpn_sub_1 (wp, up, abs_usize, (mp_limb_t) v);
+ /* Size can decrease with at most one limb. */
+ wsize = abs_usize - (wp[abs_usize - 1] == 0);
+ }
+ }
+
+ w->_mp_size = wsize;
+}
diff --git a/rts/gmp/mpz/swap.c b/rts/gmp/mpz/swap.c
new file mode 100644
index 0000000000..0070d6ff24
--- /dev/null
+++ b/rts/gmp/mpz/swap.c
@@ -0,0 +1,52 @@
+/* mpz_swap (dest_integer, src_integer) -- Swap U and V.
+
+Copyright (C) 1997, 1998 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_swap (mpz_ptr u, mpz_ptr v)
+#else
+mpz_swap (u, v)
+ mpz_ptr u;
+ mpz_ptr v;
+#endif
+{
+ mp_ptr up, vp;
+ mp_size_t usize, vsize;
+ mp_size_t ualloc, valloc;
+
+ ualloc = u->_mp_alloc;
+ valloc = v->_mp_alloc;
+ v->_mp_alloc = ualloc;
+ u->_mp_alloc = valloc;
+
+ usize = u->_mp_size;
+ vsize = v->_mp_size;
+ v->_mp_size = usize;
+ u->_mp_size = vsize;
+
+ up = u->_mp_d;
+ vp = v->_mp_d;
+ v->_mp_d = up;
+ u->_mp_d = vp;
+}
diff --git a/rts/gmp/mpz/tdiv_q.c b/rts/gmp/mpz/tdiv_q.c
new file mode 100644
index 0000000000..21db4ab385
--- /dev/null
+++ b/rts/gmp/mpz/tdiv_q.c
@@ -0,0 +1,91 @@
+/* mpz_tdiv_q -- divide two integers and produce a quotient.
+
+Copyright (C) 1991, 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+#if __STDC__
+mpz_tdiv_q (mpz_ptr quot, mpz_srcptr num, mpz_srcptr den)
+#else
+mpz_tdiv_q (quot, num, den)
+ mpz_ptr quot;
+ mpz_srcptr num;
+ mpz_srcptr den;
+#endif
+{
+ mp_size_t ql;
+ mp_size_t ns, ds, nl, dl;
+ mp_ptr np, dp, qp, rp;
+ TMP_DECL (marker);
+
+ ns = SIZ (num);
+ ds = SIZ (den);
+ nl = ABS (ns);
+ dl = ABS (ds);
+ ql = nl - dl + 1;
+
+ if (dl == 0)
+ DIVIDE_BY_ZERO;
+
+ if (ql <= 0)
+ {
+ SIZ (quot) = 0;
+ return;
+ }
+
+ MPZ_REALLOC (quot, ql);
+
+ TMP_MARK (marker);
+ qp = PTR (quot);
+ rp = (mp_ptr) TMP_ALLOC (dl * BYTES_PER_MP_LIMB);
+ np = PTR (num);
+ dp = PTR (den);
+
+ /* FIXME: We should think about how to handle the temporary allocation.
+ Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to
+ allocate temp space. */
+
+ /* Copy denominator to temporary space if it overlaps with the quotient. */
+ if (dp == qp)
+ {
+ mp_ptr tp;
+ tp = (mp_ptr) TMP_ALLOC (dl * BYTES_PER_MP_LIMB);
+ MPN_COPY (tp, dp, dl);
+ dp = tp;
+ }
+ /* Copy numerator to temporary space if it overlaps with the quotient. */
+ if (np == qp)
+ {
+ mp_ptr tp;
+ tp = (mp_ptr) TMP_ALLOC (nl * BYTES_PER_MP_LIMB);
+ MPN_COPY (tp, np, nl);
+ np = tp;
+ }
+
+ mpn_tdiv_qr (qp, rp, 0L, np, nl, dp, dl);
+
+ ql -= qp[ql - 1] == 0;
+
+ SIZ (quot) = (ns ^ ds) >= 0 ? ql : -ql;
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/tdiv_q_2exp.c b/rts/gmp/mpz/tdiv_q_2exp.c
new file mode 100644
index 0000000000..03d1e01f89
--- /dev/null
+++ b/rts/gmp/mpz/tdiv_q_2exp.c
@@ -0,0 +1,68 @@
+/* mpz_tdiv_q_2exp -- Divide an integer by 2**CNT. Round the quotient
+ towards -infinity.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_tdiv_q_2exp (mpz_ptr w, mpz_srcptr u, unsigned long int cnt)
+#else
+mpz_tdiv_q_2exp (w, u, cnt)
+ mpz_ptr w;
+ mpz_srcptr u;
+ unsigned long int cnt;
+#endif
+{
+ mp_size_t usize, wsize;
+ mp_size_t limb_cnt;
+
+ usize = u->_mp_size;
+ limb_cnt = cnt / BITS_PER_MP_LIMB;
+ wsize = ABS (usize) - limb_cnt;
+ if (wsize <= 0)
+ w->_mp_size = 0;
+ else
+ {
+ mp_ptr wp;
+ mp_srcptr up;
+
+ if (w->_mp_alloc < wsize)
+ _mpz_realloc (w, wsize);
+
+ wp = w->_mp_d;
+ up = u->_mp_d;
+
+ cnt %= BITS_PER_MP_LIMB;
+ if (cnt != 0)
+ {
+ mpn_rshift (wp, up + limb_cnt, wsize, cnt);
+ wsize -= wp[wsize - 1] == 0;
+ }
+ else
+ {
+ MPN_COPY_INCR (wp, up + limb_cnt, wsize);
+ }
+
+ w->_mp_size = usize >= 0 ? wsize : -wsize;
+ }
+}
diff --git a/rts/gmp/mpz/tdiv_q_ui.c b/rts/gmp/mpz/tdiv_q_ui.c
new file mode 100644
index 0000000000..a2e3462b76
--- /dev/null
+++ b/rts/gmp/mpz/tdiv_q_ui.c
@@ -0,0 +1,64 @@
+/* mpz_tdiv_q_ui(quot, dividend, divisor_limb)
+ -- Divide DIVIDEND by DIVISOR_LIMB and store the result in QUOT.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1998 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_tdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_tdiv_q_ui (quot, dividend, divisor)
+ mpz_ptr quot;
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_ptr quot_ptr;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ /* No need for temporary allocation and copying if QUOT == DIVIDEND as
+ the divisor is just one limb, and thus no intermediate remainders
+ need to be stored. */
+
+ if (quot->_mp_alloc < size)
+ _mpz_realloc (quot, size);
+
+ quot_ptr = quot->_mp_d;
+
+ remainder_limb
+ = mpn_divmod_1 (quot_ptr, dividend->_mp_d, size, (mp_limb_t) divisor);
+
+ /* The quotient is SIZE limbs, but the most significant might be zero. */
+ size -= size != 0 && quot_ptr[size - 1] == 0;
+ quot->_mp_size = dividend_size >= 0 ? size : -size;
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/tdiv_qr.c b/rts/gmp/mpz/tdiv_qr.c
new file mode 100644
index 0000000000..d66f57d9e5
--- /dev/null
+++ b/rts/gmp/mpz/tdiv_qr.c
@@ -0,0 +1,130 @@
+/* mpz_tdiv_qr(quot,rem,dividend,divisor) -- Set QUOT to DIVIDEND/DIVISOR,
+ and REM to DIVIDEND mod DIVISOR.
+
+Copyright (C) 1991, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#ifdef BERKELEY_MP
+#include "mp.h"
+#endif
+
+
+#ifndef BERKELEY_MP
+
+void
+#if __STDC__
+mpz_tdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr num, mpz_srcptr den)
+#else
+mpz_tdiv_qr (quot, rem, num, den)
+ mpz_ptr quot;
+ mpz_ptr rem;
+ mpz_srcptr num;
+ mpz_srcptr den;
+#endif
+
+#else /* BERKELEY_MP */
+
+void
+#if __STDC__
+mdiv (mpz_srcptr num, mpz_srcptr den, mpz_ptr quot, mpz_ptr rem)
+#else
+mdiv (num, den, quot, rem)
+ mpz_srcptr num;
+ mpz_srcptr den;
+ mpz_ptr quot;
+ mpz_ptr rem;
+#endif
+
+#endif /* BERKELEY_MP */
+{
+ mp_size_t ql;
+ mp_size_t ns, ds, nl, dl;
+ mp_ptr np, dp, qp, rp;
+ TMP_DECL (marker);
+
+ ns = SIZ (num);
+ ds = SIZ (den);
+ nl = ABS (ns);
+ dl = ABS (ds);
+ ql = nl - dl + 1;
+
+ if (dl == 0)
+ DIVIDE_BY_ZERO;
+
+ MPZ_REALLOC (rem, dl);
+
+ if (ql <= 0)
+ {
+ if (num != rem)
+ {
+ mp_ptr np, rp;
+ np = PTR (num);
+ rp = PTR (rem);
+ MPN_COPY (rp, np, nl);
+ SIZ (rem) = SIZ (num);
+ }
+ /* This needs to follow the assignment to rem, in case the
+ numerator and quotient are the same. */
+ SIZ (quot) = 0;
+ return;
+ }
+
+ MPZ_REALLOC (quot, ql);
+
+ TMP_MARK (marker);
+ qp = PTR (quot);
+ rp = PTR (rem);
+ np = PTR (num);
+ dp = PTR (den);
+
+ /* FIXME: We should think about how to handle the temporary allocation.
+ Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to
+ allocate temp space. */
+
+ /* Copy denominator to temporary space if it overlaps with the quotient
+ or remainder. */
+ if (dp == rp || dp == qp)
+ {
+ mp_ptr tp;
+ tp = (mp_ptr) TMP_ALLOC (dl * BYTES_PER_MP_LIMB);
+ MPN_COPY (tp, dp, dl);
+ dp = tp;
+ }
+ /* Copy numerator to temporary space if it overlaps with the quotient or
+ remainder. */
+ if (np == rp || np == qp)
+ {
+ mp_ptr tp;
+ tp = (mp_ptr) TMP_ALLOC (nl * BYTES_PER_MP_LIMB);
+ MPN_COPY (tp, np, nl);
+ np = tp;
+ }
+
+ mpn_tdiv_qr (qp, rp, 0L, np, nl, dp, dl);
+
+ ql -= qp[ql - 1] == 0;
+ MPN_NORMALIZE (rp, dl);
+
+ SIZ (quot) = (ns ^ ds) >= 0 ? ql : -ql;
+ SIZ (rem) = ns >= 0 ? dl : -dl;
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/tdiv_qr_ui.c b/rts/gmp/mpz/tdiv_qr_ui.c
new file mode 100644
index 0000000000..10368cd340
--- /dev/null
+++ b/rts/gmp/mpz/tdiv_qr_ui.c
@@ -0,0 +1,76 @@
+/* mpz_tdiv_qr_ui(quot,rem,dividend,short_divisor) --
+ Set QUOT to DIVIDEND / SHORT_DIVISOR
+ and REM to DIVIDEND mod SHORT_DIVISOR.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1998 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_tdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_tdiv_qr_ui (quot, rem, dividend, divisor)
+ mpz_ptr quot;
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_ptr quot_ptr;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ /* No need for temporary allocation and copying if QUOT == DIVIDEND as
+ the divisor is just one limb, and thus no intermediate remainders
+ need to be stored. */
+
+ if (quot->_mp_alloc < size)
+ _mpz_realloc (quot, size);
+
+ quot_ptr = quot->_mp_d;
+
+ remainder_limb = mpn_divmod_1 (quot_ptr, dividend->_mp_d, size,
+ (mp_limb_t) divisor);
+
+ if (remainder_limb == 0)
+ rem->_mp_size = 0;
+ else
+ {
+ /* Store the single-limb remainder. We don't check if there's space
+ for just one limb, since no function ever makes zero space. */
+ rem->_mp_size = dividend_size >= 0 ? 1 : -1;
+ rem->_mp_d[0] = remainder_limb;
+ }
+
+ /* The quotient is SIZE limbs, but the most significant might be zero. */
+ size -= size != 0 && quot_ptr[size - 1] == 0;
+ quot->_mp_size = dividend_size >= 0 ? size : -size;
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/tdiv_r.c b/rts/gmp/mpz/tdiv_r.c
new file mode 100644
index 0000000000..9eb87dfabf
--- /dev/null
+++ b/rts/gmp/mpz/tdiv_r.c
@@ -0,0 +1,98 @@
+/* mpz_tdiv_r(rem, dividend, divisor) -- Set REM to DIVIDEND mod DIVISOR.
+
+Copyright (C) 1991, 1993, 1994, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+#if __STDC__
+mpz_tdiv_r (mpz_ptr rem, mpz_srcptr num, mpz_srcptr den)
+#else
+mpz_tdiv_r (rem, num, den)
+ mpz_ptr rem;
+ mpz_srcptr num;
+ mpz_srcptr den;
+#endif
+{
+ mp_size_t ql;
+ mp_size_t ns, ds, nl, dl;
+ mp_ptr np, dp, qp, rp;
+ TMP_DECL (marker);
+
+ ns = SIZ (num);
+ ds = SIZ (den);
+ nl = ABS (ns);
+ dl = ABS (ds);
+ ql = nl - dl + 1;
+
+ if (dl == 0)
+ DIVIDE_BY_ZERO;
+
+ MPZ_REALLOC (rem, dl);
+
+ if (ql <= 0)
+ {
+ if (num != rem)
+ {
+ mp_ptr np, rp;
+ np = PTR (num);
+ rp = PTR (rem);
+ MPN_COPY (rp, np, nl);
+ SIZ (rem) = SIZ (num);
+ }
+ return;
+ }
+
+ TMP_MARK (marker);
+ qp = (mp_ptr) TMP_ALLOC (ql * BYTES_PER_MP_LIMB);
+ rp = PTR (rem);
+ np = PTR (num);
+ dp = PTR (den);
+
+ /* FIXME: We should think about how to handle the temporary allocation.
+ Perhaps mpn_tdiv_qr should handle it, since it anyway often needs to
+ allocate temp space. */
+
+ /* Copy denominator to temporary space if it overlaps with the remainder. */
+ if (dp == rp)
+ {
+ mp_ptr tp;
+ tp = (mp_ptr) TMP_ALLOC (dl * BYTES_PER_MP_LIMB);
+ MPN_COPY (tp, dp, dl);
+ dp = tp;
+ }
+ /* Copy numerator to temporary space if it overlaps with the remainder. */
+ if (np == rp)
+ {
+ mp_ptr tp;
+ tp = (mp_ptr) TMP_ALLOC (nl * BYTES_PER_MP_LIMB);
+ MPN_COPY (tp, np, nl);
+ np = tp;
+ }
+
+ mpn_tdiv_qr (qp, rp, 0L, np, nl, dp, dl);
+
+ MPN_NORMALIZE (rp, dl);
+
+ SIZ (rem) = ns >= 0 ? dl : -dl;
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/tdiv_r_2exp.c b/rts/gmp/mpz/tdiv_r_2exp.c
new file mode 100644
index 0000000000..91de170f5c
--- /dev/null
+++ b/rts/gmp/mpz/tdiv_r_2exp.c
@@ -0,0 +1,79 @@
+/* mpz_tdiv_r_2exp -- Divide a integer by 2**CNT and produce a remainder.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_tdiv_r_2exp (mpz_ptr res, mpz_srcptr in, unsigned long int cnt)
+#else
+mpz_tdiv_r_2exp (res, in, cnt)
+ mpz_ptr res;
+ mpz_srcptr in;
+ unsigned long int cnt;
+#endif
+{
+ mp_size_t in_size = ABS (in->_mp_size);
+ mp_size_t res_size;
+ mp_size_t limb_cnt = cnt / BITS_PER_MP_LIMB;
+ mp_srcptr in_ptr = in->_mp_d;
+
+ if (in_size > limb_cnt)
+ {
+ /* The input operand is (probably) greater than 2**CNT. */
+ mp_limb_t x;
+
+ x = in_ptr[limb_cnt] & (((mp_limb_t) 1 << cnt % BITS_PER_MP_LIMB) - 1);
+ if (x != 0)
+ {
+ res_size = limb_cnt + 1;
+ if (res->_mp_alloc < res_size)
+ _mpz_realloc (res, res_size);
+
+ res->_mp_d[limb_cnt] = x;
+ }
+ else
+ {
+ res_size = limb_cnt;
+ MPN_NORMALIZE (in_ptr, res_size);
+
+ if (res->_mp_alloc < res_size)
+ _mpz_realloc (res, res_size);
+
+ limb_cnt = res_size;
+ }
+ }
+ else
+ {
+ /* The input operand is smaller than 2**CNT. We perform a no-op,
+ apart from that we might need to copy IN to RES. */
+ res_size = in_size;
+ if (res->_mp_alloc < res_size)
+ _mpz_realloc (res, res_size);
+
+ limb_cnt = res_size;
+ }
+
+ if (res != in)
+ MPN_COPY (res->_mp_d, in->_mp_d, limb_cnt);
+ res->_mp_size = in->_mp_size >= 0 ? res_size : -res_size;
+}
diff --git a/rts/gmp/mpz/tdiv_r_ui.c b/rts/gmp/mpz/tdiv_r_ui.c
new file mode 100644
index 0000000000..2ea411fda1
--- /dev/null
+++ b/rts/gmp/mpz/tdiv_r_ui.c
@@ -0,0 +1,63 @@
+/* mpz_tdiv_r_ui(rem, dividend, divisor_limb)
+ -- Set REM to DIVDEND mod DIVISOR_LIMB.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1998 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_tdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_tdiv_r_ui (rem, dividend, divisor)
+ mpz_ptr rem;
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ /* No need for temporary allocation and copying if QUOT == DIVIDEND as
+ the divisor is just one limb, and thus no intermediate remainders
+ need to be stored. */
+
+ remainder_limb = mpn_mod_1 (dividend->_mp_d, size, (mp_limb_t) divisor);
+
+ if (remainder_limb == 0)
+ rem->_mp_size = 0;
+ else
+ {
+ /* Store the single-limb remainder. We don't check if there's space
+ for just one limb, since no function ever makes zero space. */
+ rem->_mp_size = dividend_size >= 0 ? 1 : -1;
+ rem->_mp_d[0] = remainder_limb;
+ }
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/tdiv_ui.c b/rts/gmp/mpz/tdiv_ui.c
new file mode 100644
index 0000000000..7a40a6a7f7
--- /dev/null
+++ b/rts/gmp/mpz/tdiv_ui.c
@@ -0,0 +1,53 @@
+/* mpz_tdiv_ui(dividend, divisor_limb)
+ -- Return DIVDEND mod DIVISOR_LIMB.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 1998 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+unsigned long int
+#if __STDC__
+mpz_tdiv_ui (mpz_srcptr dividend, unsigned long int divisor)
+#else
+mpz_tdiv_ui (dividend, divisor)
+ mpz_srcptr dividend;
+ unsigned long int divisor;
+#endif
+{
+ mp_size_t dividend_size;
+ mp_size_t size;
+ mp_limb_t remainder_limb;
+
+ if (divisor == 0)
+ DIVIDE_BY_ZERO;
+
+ dividend_size = dividend->_mp_size;
+ size = ABS (dividend_size);
+
+ /* No need for temporary allocation and copying if QUOT == DIVIDEND as
+ the divisor is just one limb, and thus no intermediate remainders
+ need to be stored. */
+
+ remainder_limb = mpn_mod_1 (dividend->_mp_d, size, (mp_limb_t) divisor);
+
+ return remainder_limb;
+}
diff --git a/rts/gmp/mpz/tstbit.c b/rts/gmp/mpz/tstbit.c
new file mode 100644
index 0000000000..b0a8b0b31a
--- /dev/null
+++ b/rts/gmp/mpz/tstbit.c
@@ -0,0 +1,70 @@
+/* mpz_tstbit -- test a specified bit. Simulate 2's complement representation.
+
+Copyright (C) 1997 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+int
+#if __STDC__
+mpz_tstbit (mpz_srcptr d, unsigned long int bit_index)
+#else
+mpz_tstbit (d, bit_index)
+ mpz_srcptr d;
+ unsigned long int bit_index;
+#endif
+{
+ mp_size_t dsize = d->_mp_size;
+ mp_ptr dp = d->_mp_d;
+ mp_size_t limb_index;
+
+ limb_index = bit_index / BITS_PER_MP_LIMB;
+ if (dsize >= 0)
+ {
+ if (limb_index < dsize)
+ return (dp[limb_index] >> (bit_index % BITS_PER_MP_LIMB)) & 1;
+ else
+ /* Testing a bit outside of a positive number. */
+ return 0;
+ }
+ else
+ {
+ mp_size_t zero_bound;
+
+ dsize = -dsize;
+
+ /* Locate the least significant non-zero limb. */
+ for (zero_bound = 0; dp[zero_bound] == 0; zero_bound++)
+ ;
+
+ if (limb_index > zero_bound)
+ {
+ if (limb_index < dsize)
+ return (~dp[limb_index] >> (bit_index % BITS_PER_MP_LIMB)) & 1;
+ else
+ /* Testing a bit outside of a negative number. */
+ return 1;
+ }
+ else if (limb_index == zero_bound)
+ return (-dp[limb_index] >> (bit_index % BITS_PER_MP_LIMB)) & 1;
+ else
+ return 0;
+ }
+}
diff --git a/rts/gmp/mpz/ui_pow_ui.c b/rts/gmp/mpz/ui_pow_ui.c
new file mode 100644
index 0000000000..edd2dee625
--- /dev/null
+++ b/rts/gmp/mpz/ui_pow_ui.c
@@ -0,0 +1,139 @@
+/* mpz_ui_pow_ui(res, base, exp) -- Set RES to BASE**EXP.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+static void mpz_pow2 _PROTO ((mpz_ptr r, mp_limb_t blimb, unsigned long int e, mp_limb_t rl));
+
+void
+#if __STDC__
+mpz_ui_pow_ui (mpz_ptr r, unsigned long int b, unsigned long int e)
+#else
+mpz_ui_pow_ui (r, b, e)
+ mpz_ptr r;
+ unsigned long int b;
+ unsigned long int e;
+#endif
+{
+ mp_limb_t blimb = b;
+ mp_limb_t rl;
+
+ if (e == 0)
+ {
+ /* For x^0 we return 1, even if x is 0. */
+ r->_mp_d[0] = 1;
+ r->_mp_size = 1;
+ return;
+ }
+
+ /* Compute b^e as (b^n)^(e div n) * b^(e mod n), where n is chosen such that
+ the latter factor is the largest number small enough to fit in a limb. */
+
+ rl = 1;
+ while (e != 0 && blimb < ((mp_limb_t) 1 << BITS_PER_MP_LIMB/2))
+ {
+ if ((e & 1) != 0)
+ rl = rl * blimb;
+ blimb = blimb * blimb;
+ e = e >> 1;
+ }
+
+ /* rl is now b^(e mod n). (I.e., the latter factor above.) */
+
+ if (e == 0)
+ {
+ r->_mp_d[0] = rl;
+ r->_mp_size = rl != 0;
+ return;
+ }
+
+ mpz_pow2 (r, blimb, e, rl);
+}
+
+/* Multi-precision part of expontialization code. */
+static void
+#if __STDC__
+mpz_pow2 (mpz_ptr r, mp_limb_t blimb, unsigned long int e, mp_limb_t rl)
+#else
+mpz_pow2 (r, blimb, e, rl)
+ mpz_ptr r;
+ mp_limb_t blimb;
+ unsigned long int e;
+ mp_limb_t rl;
+#endif
+{
+ mp_ptr rp, tp;
+ mp_size_t ralloc, rsize;
+ int cnt, i;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ /* Over-estimate temporary space requirements somewhat. */
+ count_leading_zeros (cnt, blimb);
+ ralloc = e - cnt * e / BITS_PER_MP_LIMB + 1;
+
+ /* The two areas are used to alternatingly hold the input and receive the
+ product for mpn_mul. (Needed since mpn_mul_n requires that the product
+ is distinct from either input operand.) */
+ rp = (mp_ptr) TMP_ALLOC (ralloc * BYTES_PER_MP_LIMB);
+ tp = (mp_ptr) TMP_ALLOC (ralloc * BYTES_PER_MP_LIMB);
+
+ rp[0] = blimb;
+ rsize = 1;
+
+ count_leading_zeros (cnt, e);
+ for (i = BITS_PER_MP_LIMB - cnt - 2; i >= 0; i--)
+ {
+ mpn_mul_n (tp, rp, rp, rsize);
+ rsize = 2 * rsize;
+ rsize -= tp[rsize - 1] == 0;
+ MP_PTR_SWAP (rp, tp);
+
+ if ((e & ((mp_limb_t) 1 << i)) != 0)
+ {
+ mp_limb_t cy;
+ cy = mpn_mul_1 (rp, rp, rsize, blimb);
+ rp[rsize] = cy;
+ rsize += cy != 0;
+ }
+ }
+
+ /* We will need rsize or rsize+1 limbs for the result. */
+ if (r->_mp_alloc <= rsize)
+ _mpz_realloc (r, rsize + 1);
+
+ /* Multiply the two factors (in rp,rsize and rl) and put the final result
+ in place. */
+ {
+ mp_limb_t cy;
+ cy = mpn_mul_1 (r->_mp_d, rp, rsize, rl);
+ (r->_mp_d)[rsize] = cy;
+ rsize += cy != 0;
+ }
+
+ r->_mp_size = rsize;
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/urandomb.c b/rts/gmp/mpz/urandomb.c
new file mode 100644
index 0000000000..caca086e05
--- /dev/null
+++ b/rts/gmp/mpz/urandomb.c
@@ -0,0 +1,49 @@
+/* mpz_urandomb (rop, state, n) -- Generate a uniform pseudorandom
+ integer in the range 0 to 2^N - 1, inclusive, using STATE as the
+ random state previously initialized by a call to gmp_randinit().
+
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_urandomb (mpz_t rop, gmp_randstate_t rstate, unsigned long int nbits)
+#else
+mpz_urandomb (rop, rstate, nbits)
+ mpz_t rop;
+ gmp_randstate_t rstate;
+ unsigned long int nbits;
+#endif
+{
+ mp_ptr rp;
+ mp_size_t size;
+
+ size = (nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
+ if (ALLOC (rop) < size)
+ _mpz_realloc (rop, size);
+
+ rp = PTR (rop);
+
+ _gmp_rand (rp, rstate, nbits);
+ MPN_NORMALIZE (rp, size);
+ SIZ (rop) = size;
+}
diff --git a/rts/gmp/mpz/urandomm.c b/rts/gmp/mpz/urandomm.c
new file mode 100644
index 0000000000..69e1bae78a
--- /dev/null
+++ b/rts/gmp/mpz/urandomm.c
@@ -0,0 +1,78 @@
+/* mpz_urandomm (rop, state, n) -- Generate a uniform pseudorandom
+ integer in the range 0 to N-1, using STATE as the random state
+ previously initialized by a call to gmp_randinit().
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+void
+#if __STDC__
+mpz_urandomm (mpz_t rop, gmp_randstate_t rstate, mpz_t n)
+#else
+mpz_urandomm (rop, rstate, n)
+ mpz_t rop;
+ gmp_randstate_t rstate;
+ mpz_t n;
+#endif
+{
+ mpz_t t, p, m;
+ mp_ptr tp;
+ mp_size_t nbits, size;
+ int count;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+
+ /* FIXME: Should check for n == 0 and report error */
+
+ size = SIZ (n);
+ count_leading_zeros (count, PTR (n)[size - 1]);
+ nbits = size * BITS_PER_MP_LIMB - count;
+
+ /* Allocate enough for any mpz function called since a realloc of
+ these will fail. */
+ MPZ_TMP_INIT (t, size);
+ MPZ_TMP_INIT (m, size + 1);
+ MPZ_TMP_INIT (p, size + 1);
+
+ /* Let m = highest possible random number plus 1. */
+ mpz_set_ui (m, 0);
+ mpz_setbit (m, nbits);
+
+ /* Let p = floor(m / n) * n. */
+ mpz_fdiv_q (p, m, n);
+ mpz_mul (p, p, n);
+
+ tp = PTR (t);
+ do
+ {
+ _gmp_rand (tp, rstate, nbits);
+ MPN_NORMALIZE (tp, size); /* FIXME: Really necessary? */
+ SIZ (t) = size;
+ }
+ while (mpz_cmp (t, p) >= 0);
+
+ mpz_mod (rop, t, n);
+
+ TMP_FREE (marker);
+}
diff --git a/rts/gmp/mpz/xor.c b/rts/gmp/mpz/xor.c
new file mode 100644
index 0000000000..69898d1791
--- /dev/null
+++ b/rts/gmp/mpz/xor.c
@@ -0,0 +1,217 @@
+/* mpz_xor -- Logical xor.
+
+Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+mpz_xor (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
+#else
+mpz_xor (res, op1, op2)
+ mpz_ptr res;
+ mpz_srcptr op1;
+ mpz_srcptr op2;
+#endif
+{
+ mp_srcptr op1_ptr, op2_ptr;
+ mp_size_t op1_size, op2_size;
+ mp_ptr res_ptr;
+ mp_size_t res_size, res_alloc;
+ mp_size_t i;
+ TMP_DECL (marker);
+
+ TMP_MARK (marker);
+ op1_size = op1->_mp_size;
+ op2_size = op2->_mp_size;
+
+ op1_ptr = op1->_mp_d;
+ op2_ptr = op2->_mp_d;
+ res_ptr = res->_mp_d;
+
+ if (op1_size >= 0)
+ {
+ if (op2_size >= 0)
+ {
+ if (op1_size >= op2_size)
+ {
+ if (res->_mp_alloc < op1_size)
+ {
+ _mpz_realloc (res, op1_size);
+ op1_ptr = op1->_mp_d;
+ op2_ptr = op2->_mp_d;
+ res_ptr = res->_mp_d;
+ }
+
+ if (res_ptr != op1_ptr)
+ MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
+ op1_size - op2_size);
+ for (i = op2_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+ res_size = op1_size;
+ }
+ else
+ {
+ if (res->_mp_alloc < op2_size)
+ {
+ _mpz_realloc (res, op2_size);
+ op1_ptr = op1->_mp_d;
+ op2_ptr = op2->_mp_d;
+ res_ptr = res->_mp_d;
+ }
+
+ if (res_ptr != op2_ptr)
+ MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
+ op2_size - op1_size);
+ for (i = op1_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+ res_size = op2_size;
+ }
+
+ MPN_NORMALIZE (res_ptr, res_size);
+ res->_mp_size = res_size;
+ return;
+ }
+ else /* op2_size < 0 */
+ {
+ /* Fall through to the code at the end of the function. */
+ }
+ }
+ else
+ {
+ if (op2_size < 0)
+ {
+ mp_ptr opx;
+ mp_limb_t cy;
+
+ /* Both operands are negative, the result will be positive.
+ (-OP1) ^ (-OP2) =
+ = ~(OP1 - 1) ^ ~(OP2 - 1) =
+ = (OP1 - 1) ^ (OP2 - 1) */
+
+ op1_size = -op1_size;
+ op2_size = -op2_size;
+
+ /* Possible optimization: Decrease mpn_sub precision,
+ as we won't use the entire res of both. */
+ opx = (mp_ptr) TMP_ALLOC (op1_size * BYTES_PER_MP_LIMB);
+ mpn_sub_1 (opx, op1_ptr, op1_size, (mp_limb_t) 1);
+ op1_ptr = opx;
+
+ opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB);
+ mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
+ op2_ptr = opx;
+
+ res_alloc = MAX (op1_size, op2_size);
+ if (res->_mp_alloc < res_alloc)
+ {
+ _mpz_realloc (res, res_alloc);
+ res_ptr = res->_mp_d;
+ /* Don't re-read OP1_PTR and OP2_PTR. They point to
+ temporary space--never to the space RES->_mp_d used
+ to point to before reallocation. */
+ }
+
+ if (op1_size > op2_size)
+ {
+ MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
+ op1_size - op2_size);
+ for (i = op2_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+ res_size = op1_size;
+ }
+ else
+ {
+ MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
+ op2_size - op1_size);
+ for (i = op1_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+ res_size = op2_size;
+ }
+
+ MPN_NORMALIZE (res_ptr, res_size);
+ res->_mp_size = res_size;
+ TMP_FREE (marker);
+ return;
+ }
+ else
+ {
+ /* We should compute -OP1 ^ OP2. Swap OP1 and OP2 and fall
+ through to the code that handles OP1 ^ -OP2. */
+ MPZ_SRCPTR_SWAP (op1, op2);
+ MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
+ }
+ }
+
+ {
+ mp_ptr opx;
+ mp_limb_t cy;
+ mp_size_t count;
+
+ /* Operand 2 negative, so will be the result.
+ -(OP1 ^ (-OP2)) = -(OP1 ^ ~(OP2 - 1)) =
+ = ~(OP1 ^ ~(OP2 - 1)) + 1 =
+ = (OP1 ^ (OP2 - 1)) + 1 */
+
+ op2_size = -op2_size;
+
+ opx = (mp_ptr) TMP_ALLOC (op2_size * BYTES_PER_MP_LIMB);
+ mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
+ op2_ptr = opx;
+
+ res_alloc = MAX (op1_size, op2_size) + 1;
+ if (res->_mp_alloc < res_alloc)
+ {
+ _mpz_realloc (res, res_alloc);
+ op1_ptr = op1->_mp_d;
+ res_ptr = res->_mp_d;
+ /* Don't re-read OP2_PTR. It points to temporary space--never
+ to the space RES->_mp_d used to point to before reallocation. */
+ }
+
+ if (op1_size > op2_size)
+ {
+ MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, op1_size - op2_size);
+ for (i = op2_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+ res_size = op1_size;
+ }
+ else
+ {
+ MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, op2_size - op1_size);
+ for (i = op1_size - 1; i >= 0; i--)
+ res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+ res_size = op2_size;
+ }
+
+ cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
+ if (cy)
+ {
+ res_ptr[res_size] = cy;
+ res_size++;
+ }
+
+ MPN_NORMALIZE (res_ptr, res_size);
+ res->_mp_size = -res_size;
+ TMP_FREE (marker);
+ }
+}
diff --git a/rts/gmp/rand.c b/rts/gmp/rand.c
new file mode 100644
index 0000000000..d1f9354511
--- /dev/null
+++ b/rts/gmp/rand.c
@@ -0,0 +1,171 @@
+/* gmp_randinit (state, algorithm, ...) -- Initialize a random state.
+
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include <stdio.h> /* for NULL */
+#if __STDC__
+# include <stdarg.h>
+#else
+# include <varargs.h>
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Array of CL-schemes, ordered in increasing order of the first
+ member (the 'm2exp' value). The end of the array is indicated with
+ an entry containing all zeros. */
+
+/* All multipliers are in the range 0.01*m and 0.99*m, and are
+congruent to 5 (mod 8).
+They all pass the spectral test with Vt >= 2^(30/t) and merit >= 1.
+(Up to and including 196 bits, merit is >= 3.) */
+
+struct __gmp_rand_lc_scheme_struct
+{
+ unsigned long int m2exp; /* Modulus is 2 ^ m2exp. */
+ char *astr; /* Multiplier in string form. */
+ unsigned long int c; /* Adder. */
+};
+
+struct __gmp_rand_lc_scheme_struct __gmp_rand_lc_scheme[] =
+{
+ {32, "43840821", 1},
+ {33, "85943917", 1},
+ {34, "171799469", 1},
+ {35, "343825285", 1},
+ {36, "687285701", 1},
+ {37, "1374564613", 1},
+ {38, "2749193437", 1},
+ {39, "5497652029", 1},
+ {40, "10995212661", 1},
+ {56, "47988680294711517", 1},
+ {64, "13469374875402548381", 1},
+ {100, "203786806069096950756900463357", 1},
+ {128, "96573135900076068624591706046897650309", 1},
+ {156, "43051576988660538262511726153887323360449035333", 1},
+ {196, "1611627857640767981443524165616850972435303571524033586421", 1},
+ {200, "491824250216153841876046962368396460896019632211283945747141", 1},
+ {256, "79336254595106925775099152154558630917988041692672147726148065355845551082677", 1},
+ {0, NULL, 0} /* End of array. */
+};
+
+void
+#if __STDC__
+gmp_randinit (gmp_randstate_t rstate,
+ gmp_randalg_t alg,
+ ...)
+#else
+gmp_randinit (va_alist)
+ va_dcl
+#endif
+{
+ va_list ap;
+#if __STDC__
+#else
+ __gmp_randstate_struct *rstate;
+ gmp_randalg_t alg;
+#endif
+
+#if __STDC__
+ va_start (ap, alg);
+#else
+ va_start (ap);
+
+ rstate = va_arg (ap, __gmp_randstate_struct *);
+ alg = va_arg (ap, gmp_randalg_t);
+#endif
+
+ switch (alg)
+ {
+ case GMP_RAND_ALG_LC: /* Linear congruential. */
+ {
+ unsigned long int size;
+ struct __gmp_rand_lc_scheme_struct *sp;
+ mpz_t a;
+
+ size = va_arg (ap, unsigned long int);
+
+ /* Pick a scheme. */
+ for (sp = __gmp_rand_lc_scheme; sp->m2exp != 0; sp++)
+ if (sp->m2exp / 2 >= size)
+ break;
+
+ if (sp->m2exp == 0) /* Nothing big enough found. */
+ {
+ gmp_errno |= GMP_ERROR_INVALID_ARGUMENT;
+ return;
+ }
+
+ /* Install scheme. */
+ mpz_init_set_str (a, sp->astr, 0);
+ gmp_randinit_lc_2exp (rstate, a, sp->c, sp->m2exp);
+ mpz_clear (a);
+ break;
+ }
+
+#if 0
+ case GMP_RAND_ALG_BBS: /* Blum, Blum, and Shub. */
+ {
+ mpz_t p, q;
+ mpz_t ztmp;
+
+ /* FIXME: Generate p and q. They must be ``large'' primes,
+ congruent to 3 mod 4. Should we ensure that they meet some
+ of the criterias for being ``hard primes''?*/
+
+ /* These are around 128 bits. */
+ mpz_init_set_str (p, "148028650191182616877187862194899201391", 10);
+ mpz_init_set_str (q, "315270837425234199477225845240496832591", 10);
+
+ /* Allocate algorithm specific data. */
+ rstate->data.bbs = (__gmp_rand_data_bbs *)
+ (*_mp_allocate_func) (sizeof (__gmp_rand_data_bbs));
+
+ mpz_init (rstate->data.bbs->bi); /* The Blum integer. */
+ mpz_mul (rstate->data.bbs->bi, p, q);
+
+ /* Find a seed, x, with gcd (x, bi) == 1. */
+ mpz_init (ztmp);
+ while (1)
+ {
+ mpz_gcd (ztmp, seed, rstate->data.bbs->bi);
+ if (!mpz_cmp_ui (ztmp, 1))
+ break;
+ mpz_add_ui (seed, seed, 1);
+ }
+
+ rstate->alg = alg;
+ rstate->size = size; /* FIXME: Remove. */
+ mpz_set (rstate->seed, seed);
+
+ mpz_clear (p);
+ mpz_clear (q);
+ mpz_clear (ztmp);
+ break;
+ }
+#endif /* 0 */
+
+ default: /* Bad choice. */
+ gmp_errno |= GMP_ERROR_UNSUPPORTED_ARGUMENT;
+ }
+
+ va_end (ap);
+}
diff --git a/rts/gmp/randclr.c b/rts/gmp/randclr.c
new file mode 100644
index 0000000000..5cb0291165
--- /dev/null
+++ b/rts/gmp/randclr.c
@@ -0,0 +1,54 @@
+/* gmp_randclear (state) -- Clear and deallocate random state STATE.
+
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+gmp_randclear (gmp_randstate_t rstate)
+#else
+gmp_randclear (rstate)
+ gmp_randstate_t rstate;
+#endif
+{
+ mpz_clear (rstate->seed);
+
+ switch (rstate->alg)
+ {
+ case GMP_RAND_ALG_LC:
+ mpz_clear (rstate->algdata.lc->a);
+ if (rstate->algdata.lc->m2exp == 0)
+ mpz_clear (rstate->algdata.lc->m);
+ (*_mp_free_func) (rstate->algdata.lc, sizeof (*rstate->algdata.lc));
+ break;
+
+#if 0
+ case GMP_RAND_ALG_BBS:
+ mpz_clear (rstate->algdata.bbs->bi);
+ (*_mp_free_func) (rstate->algdata.bbs, sizeof (*rstate->algdata.bbs));
+ break;
+#endif /* 0 */
+
+ default:
+ gmp_errno |= GMP_ERROR_UNSUPPORTED_ARGUMENT;
+ }
+}
diff --git a/rts/gmp/randlc.c b/rts/gmp/randlc.c
new file mode 100644
index 0000000000..7079db827e
--- /dev/null
+++ b/rts/gmp/randlc.c
@@ -0,0 +1,56 @@
+/* gmp_randinit_lc (state, a, c, m) -- Initialize a random state for a
+ linear congruential generator with multiplier A, adder C, and
+ modulus M.
+
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+gmp_randinit_lc (gmp_randstate_t rstate,
+ mpz_t a,
+ unsigned long int c,
+ mpz_t m)
+#else
+gmp_randinit_lc (rstate, a, c, m)
+ gmp_randstate_t rstate;
+ mpz_t a;
+ unsigned long int c;
+ mpz_t m;
+#endif
+{
+ /* FIXME: Not finished. We don't handle this in _gmp_rand() yet. */
+ abort ();
+
+ mpz_init_set_ui (rstate->seed, 1);
+ _mpz_realloc (rstate->seed, ABSIZ (m));
+
+ /* Allocate algorithm specific data. */
+ rstate->algdata.lc = (__gmp_randata_lc *)
+ (*_mp_allocate_func) (sizeof (__gmp_randata_lc));
+
+ mpz_init_set (rstate->algdata.lc->a, a);
+ rstate->algdata.lc->c = c;
+ mpz_init_set (rstate->algdata.lc->m, m);
+
+ rstate->alg = GMP_RAND_ALG_LC;
+}
diff --git a/rts/gmp/randlc2x.c b/rts/gmp/randlc2x.c
new file mode 100644
index 0000000000..dbd5f041ee
--- /dev/null
+++ b/rts/gmp/randlc2x.c
@@ -0,0 +1,59 @@
+/* gmp_randinit_lc_2exp (state, a, c, m2exp) -- Initialize random
+ state STATE for a linear congruential generator with multiplier A,
+ adder C, and modulus 2 ^ M2EXP.
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+gmp_randinit_lc_2exp (gmp_randstate_t rstate,
+ mpz_t a,
+ unsigned long int c,
+ unsigned long int m2exp)
+#else
+gmp_randinit_lc_2exp (rstate, a, c, m2exp)
+ gmp_randstate_t rstate;
+ mpz_t a;
+ unsigned long int c;
+ unsigned long int m2exp;
+#endif
+{
+ mpz_init_set_ui (rstate->seed, 1);
+ _mpz_realloc (rstate->seed, m2exp / BITS_PER_MP_LIMB
+ + (m2exp % BITS_PER_MP_LIMB != 0));
+
+ /* Allocate algorithm specific data. */
+ rstate->algdata.lc = (__gmp_randata_lc *)
+ (*_mp_allocate_func) (sizeof (__gmp_randata_lc));
+
+ mpz_init_set (rstate->algdata.lc->a, a);
+ rstate->algdata.lc->c = c;
+
+ /* Cover weird case where m2exp is 0, which means that m is used
+ instead of m2exp. */
+ if (m2exp == 0)
+ mpz_init_set_ui (rstate->algdata.lc->m, 0);
+ rstate->algdata.lc->m2exp = m2exp;
+
+ rstate->alg = GMP_RAND_ALG_LC;
+}
diff --git a/rts/gmp/randraw.c b/rts/gmp/randraw.c
new file mode 100644
index 0000000000..c0c3889d33
--- /dev/null
+++ b/rts/gmp/randraw.c
@@ -0,0 +1,360 @@
+/* _gmp_rand (rp, state, nbits) -- Generate a random bitstream of
+ length NBITS in RP. RP must have enough space allocated to hold
+ NBITS.
+
+Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* For linear congruential (LC), we use one of algorithms (1) or (2).
+ (gmp-3.0 uses algorithm (1) with 'm' as a power of 2.)
+
+LC algorithm (1).
+
+ X = (aX + c) mod m
+
+[D. Knuth, "The Art of Computer Programming: Volume 2, Seminumerical Algorithms",
+Third Edition, Addison Wesley, 1998, pp. 184-185.]
+
+ X is the seed and the result
+ a is chosen so that
+ a mod 8 = 5 [3.2.1.2] and [3.2.1.3]
+ .01m < a < .99m
+ its binary or decimal digits is not a simple, regular pattern
+ it has no large quotients when Euclid's algorithm is used to find
+ gcd(a, m) [3.3.3]
+ it passes the spectral test [3.3.4]
+ it passes several tests of [3.3.2]
+ c has no factor in common with m (c=1 or c=a can be good)
+ m is large (2^30)
+ is a power of 2 [3.2.1.1]
+
+The least significant digits of the generated number are not very
+random. It should be regarded as a random fraction X/m. To get a
+random integer between 0 and n-1, multiply X/m by n and truncate.
+(Don't use X/n [ex 3.4.1-3])
+
+The ``accuracy'' in t dimensions is one part in ``the t'th root of m'' [3.3.4].
+
+Don't generate more than about m/1000 numbers without changing a, c, or m.
+
+The sequence length depends on chosen a,c,m.
+
+
+LC algorithm (2).
+
+ X = a * (X mod q) - r * (long) (X/q)
+ if X<0 then X+=m
+
+[Knuth, pp. 185-186.]
+
+ X is the seed and the result
+ as a seed is nonzero and less than m
+ a is a primitive root of m (which means that a^2 <= m)
+ q is (long) m / a
+ r is m mod a
+ m is a prime number near the largest easily computed integer
+
+which gives
+
+ X = a * (X % ((long) m / a)) -
+ (M % a) * ((long) (X / ((long) m / a)))
+
+Since m is prime, the least-significant bits of X are just as random as
+the most-significant bits. */
+
+/* Blum, Blum, and Shub.
+
+ [Bruce Schneier, "Applied Cryptography", Second Edition, John Wiley
+ & Sons, Inc., 1996, pp. 417-418.]
+
+ "Find two large prime numbers, p and q, which are congruent to 3
+ modulo 4. The product of those numbers, n, is a blum integer.
+ Choose another random integer, x, which is relatively prime to n.
+ Compute
+ x[0] = x^2 mod n
+ That's the seed for the generator."
+
+ To generate a random bit, compute
+ x[i] = x[i-1]^2 mod n
+ The least significant bit of x[i] is the one we want.
+
+ We can use more than one bit from x[i], namely the
+ log2(bitlength of x[i])
+ least significant bits of x[i].
+
+ So, for a 32-bit seed we get 5 bits per computation.
+
+ The non-predictability of this generator is based on the difficulty
+ of factoring n.
+ */
+
+/* -------------------------------------------------- */
+
+/* lc (rp, state) -- Generate next number in LC sequence. Return the
+ number of valid bits in the result. NOTE: If 'm' is a power of 2
+ (m2exp != 0), discard the lower half of the result. */
+
+static
+unsigned long int
+#if __STDC__
+lc (mp_ptr rp, gmp_randstate_t rstate)
+#else
+lc (rp, rstate)
+ mp_ptr rp;
+ gmp_randstate_t rstate;
+#endif
+{
+ mp_ptr tp, seedp, ap;
+ mp_size_t ta;
+ mp_size_t tn, seedn, an;
+ mp_size_t retval;
+ int shiftcount = 0;
+ unsigned long int m2exp;
+ mp_limb_t c;
+ TMP_DECL (mark);
+
+ m2exp = rstate->algdata.lc->m2exp;
+ c = (mp_limb_t) rstate->algdata.lc->c;
+
+ seedp = PTR (rstate->seed);
+ seedn = SIZ (rstate->seed);
+
+ if (seedn == 0)
+ {
+ /* Seed is 0. Result is C % M. */
+ *rp = c;
+
+ if (m2exp != 0)
+ {
+ /* M is a power of 2. */
+ if (m2exp < BITS_PER_MP_LIMB)
+ {
+ /* Only necessary when M may be smaller than C. */
+ *rp &= (((mp_limb_t) 1 << m2exp) - 1);
+ }
+ }
+ else
+ {
+ /* M is not a power of 2. */
+ abort (); /* FIXME. */
+ }
+
+ /* Save result as next seed. */
+ *seedp = *rp;
+ SIZ (rstate->seed) = 1;
+ return BITS_PER_MP_LIMB;
+ }
+
+ ap = PTR (rstate->algdata.lc->a);
+ an = SIZ (rstate->algdata.lc->a);
+
+ /* Allocate temporary storage. Let there be room for calculation of
+ (A * seed + C) % M, or M if bigger than that. */
+
+ ASSERT_ALWAYS (m2exp != 0); /* FIXME. */
+
+ TMP_MARK (mark);
+ ta = an + seedn + 1;
+ tp = (mp_ptr) TMP_ALLOC (ta * BYTES_PER_MP_LIMB);
+ MPN_ZERO (tp, ta);
+
+ /* t = a * seed */
+ if (seedn >= an)
+ mpn_mul_basecase (tp, seedp, seedn, ap, an);
+ else
+ mpn_mul_basecase (tp, ap, an, seedp, seedn);
+ tn = an + seedn;
+
+ /* t = t + c */
+ mpn_incr_u (tp, c);
+
+ /* t = t % m */
+ if (m2exp != 0)
+ {
+ /* M is a power of 2. The mod operation is trivial. */
+
+ tp[m2exp / BITS_PER_MP_LIMB] &= ((mp_limb_t) 1 << m2exp % BITS_PER_MP_LIMB) - 1;
+ tn = (m2exp + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
+ }
+ else
+ {
+ abort (); /* FIXME. */
+ }
+
+ /* Save result as next seed. */
+ MPN_COPY (PTR (rstate->seed), tp, tn);
+ SIZ (rstate->seed) = tn;
+
+ if (m2exp != 0)
+ {
+ /* Discard the lower half of the result. */
+ unsigned long int discardb = m2exp / 2;
+ mp_size_t discardl = discardb / BITS_PER_MP_LIMB;
+
+ tn -= discardl;
+ if (tn > 0)
+ {
+ if (discardb % BITS_PER_MP_LIMB != 0)
+ {
+ mpn_rshift (tp, tp + discardl, tn, discardb % BITS_PER_MP_LIMB);
+ MPN_COPY (rp, tp, (discardb + BITS_PER_MP_LIMB -1) / BITS_PER_MP_LIMB);
+ }
+ else /* Even limb boundary. */
+ MPN_COPY_INCR (rp, tp + discardl, tn);
+ }
+ }
+ else
+ {
+ MPN_COPY (rp, tp, tn);
+ }
+
+ TMP_FREE (mark);
+
+ /* Return number of valid bits in the result. */
+ if (m2exp != 0)
+ retval = (m2exp + 1) / 2;
+ else
+ retval = SIZ (rstate->algdata.lc->m) * BITS_PER_MP_LIMB - shiftcount;
+ return retval;
+}
+
+#ifdef RAWRANDEBUG
+/* Set even bits to EVENBITS and odd bits to ! EVENBITS in RP.
+ Number of bits is m2exp in state. */
+/* FIXME: Remove. */
+unsigned long int
+lc_test (mp_ptr rp, gmp_randstate_t s, const int evenbits)
+{
+ unsigned long int rn, nbits;
+ int f;
+
+ nbits = s->algdata.lc->m2exp / 2;
+ rn = nbits / BITS_PER_MP_LIMB + (nbits % BITS_PER_MP_LIMB != 0);
+ MPN_ZERO (rp, rn);
+
+ for (f = 0; f < nbits; f++)
+ {
+ mpn_lshift (rp, rp, rn, 1);
+ if (f % 2 == ! evenbits)
+ rp[0] += 1;
+ }
+
+ return nbits;
+}
+#endif /* RAWRANDEBUG */
+
+void
+#if __STDC__
+_gmp_rand (mp_ptr rp, gmp_randstate_t rstate, unsigned long int nbits)
+#else
+_gmp_rand (rp, rstate, nbits)
+ mp_ptr rp;
+ gmp_randstate_t rstate;
+ unsigned long int nbits;
+#endif
+{
+ mp_size_t rn; /* Size of R. */
+
+ rn = (nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
+
+ switch (rstate->alg)
+ {
+ case GMP_RAND_ALG_LC:
+ {
+ unsigned long int rbitpos;
+ int chunk_nbits;
+ mp_ptr tp;
+ mp_size_t tn;
+ TMP_DECL (lcmark);
+
+ TMP_MARK (lcmark);
+
+ chunk_nbits = rstate->algdata.lc->m2exp / 2;
+ tn = (chunk_nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
+
+ tp = (mp_ptr) TMP_ALLOC (tn * BYTES_PER_MP_LIMB);
+
+ rbitpos = 0;
+ while (rbitpos + chunk_nbits <= nbits)
+ {
+ mp_ptr r2p = rp + rbitpos / BITS_PER_MP_LIMB;
+
+ if (rbitpos % BITS_PER_MP_LIMB != 0)
+ {
+ mp_limb_t savelimb, rcy;
+ /* Target of of new chunk is not bit aligned. Use temp space
+ and align things by shifting it up. */
+ lc (tp, rstate);
+ savelimb = r2p[0];
+ rcy = mpn_lshift (r2p, tp, tn, rbitpos % BITS_PER_MP_LIMB);
+ r2p[0] |= savelimb;
+/* bogus */ if ((chunk_nbits % BITS_PER_MP_LIMB + rbitpos % BITS_PER_MP_LIMB)
+ > BITS_PER_MP_LIMB)
+ r2p[tn] = rcy;
+ }
+ else
+ {
+ /* Target of of new chunk is bit aligned. Let `lc' put bits
+ directly into our target variable. */
+ lc (r2p, rstate);
+ }
+ rbitpos += chunk_nbits;
+ }
+
+ /* Handle last [0..chunk_nbits) bits. */
+ if (rbitpos != nbits)
+ {
+ mp_ptr r2p = rp + rbitpos / BITS_PER_MP_LIMB;
+ int last_nbits = nbits - rbitpos;
+ tn = (last_nbits + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
+ lc (tp, rstate);
+ if (rbitpos % BITS_PER_MP_LIMB != 0)
+ {
+ mp_limb_t savelimb, rcy;
+ /* Target of of new chunk is not bit aligned. Use temp space
+ and align things by shifting it up. */
+ savelimb = r2p[0];
+ rcy = mpn_lshift (r2p, tp, tn, rbitpos % BITS_PER_MP_LIMB);
+ r2p[0] |= savelimb;
+ if (rbitpos + tn * BITS_PER_MP_LIMB - rbitpos % BITS_PER_MP_LIMB < nbits)
+ r2p[tn] = rcy;
+ }
+ else
+ {
+ MPN_COPY (r2p, tp, tn);
+ }
+ /* Mask off top bits if needed. */
+ if (nbits % BITS_PER_MP_LIMB != 0)
+ rp[nbits / BITS_PER_MP_LIMB]
+ &= ~ ((~(mp_limb_t) 0) << nbits % BITS_PER_MP_LIMB);
+ }
+
+ TMP_FREE (lcmark);
+ break;
+ }
+
+ default:
+ gmp_errno |= GMP_ERROR_UNSUPPORTED_ARGUMENT;
+ break;
+ }
+}
diff --git a/rts/gmp/randsd.c b/rts/gmp/randsd.c
new file mode 100644
index 0000000000..3bed14b578
--- /dev/null
+++ b/rts/gmp/randsd.c
@@ -0,0 +1,37 @@
+/* gmp_randseed (state, seed) -- Set initial seed SEED in random state
+ STATE.
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+gmp_randseed (gmp_randstate_t rstate,
+ mpz_t seed)
+#else
+gmp_randseed (rstate, seed)
+ gmp_randstate_t rstate;
+ mpz_t seed;
+#endif
+{
+ mpz_set (rstate->seed, seed);
+}
diff --git a/rts/gmp/randsdui.c b/rts/gmp/randsdui.c
new file mode 100644
index 0000000000..92f412f3ea
--- /dev/null
+++ b/rts/gmp/randsdui.c
@@ -0,0 +1,37 @@
+/* gmp_randseed_ui (state, seed) -- Set initial seed SEED in random
+ state STATE.
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if __STDC__
+gmp_randseed_ui (gmp_randstate_t rstate,
+ unsigned long int seed)
+#else
+gmp_randseed_ui (rstate, seed)
+ gmp_randstate_t rstate;
+ mpz_t seed;
+#endif
+{
+ mpz_set_ui (rstate->seed, seed);
+}
diff --git a/rts/gmp/stack-alloc.c b/rts/gmp/stack-alloc.c
new file mode 100644
index 0000000000..9ab98fe5f9
--- /dev/null
+++ b/rts/gmp/stack-alloc.c
@@ -0,0 +1,136 @@
+/* Stack allocation routines. This is intended for machines without support
+ for the `alloca' function.
+
+Copyright (C) 1996, 1997, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "stack-alloc.h"
+
+#define __need_size_t
+#include <stddef.h>
+#undef __need_size_t
+
+/* gmp-impl.h and stack-alloc.h conflict when not USE_STACK_ALLOC, so these
+ declarations are copied here */
+#if __STDC__
+extern void * (*__gmp_allocate_func) (size_t);
+extern void (*__gmp_free_func) (void *, size_t);
+#else
+extern void * (*__gmp_allocate_func) ();
+extern void (*__gmp_free_func) ();
+#endif
+
+typedef struct tmp_stack tmp_stack;
+
+static unsigned long max_total_allocation = 0;
+static unsigned long current_total_allocation = 0;
+
+static tmp_stack xxx = {&xxx, &xxx, 0};
+static tmp_stack *current = &xxx;
+
+/* The rounded size of the header of each allocation block. */
+#define HSIZ ((sizeof (tmp_stack) + __TMP_ALIGN - 1) & -__TMP_ALIGN)
+
+/* Allocate a block of exactly <size> bytes. This should only be called
+ through the TMP_ALLOC macro, which takes care of rounding/alignment. */
+void *
+#if __STDC__
+__gmp_tmp_alloc (unsigned long size)
+#else
+__gmp_tmp_alloc (size)
+ unsigned long size;
+#endif
+{
+ void *that;
+
+ if (size > (char *) current->end - (char *) current->alloc_point)
+ {
+ void *chunk;
+ tmp_stack *header;
+ unsigned long chunk_size;
+ unsigned long now;
+
+ /* Allocate a chunk that makes the total current allocation somewhat
+ larger than the maximum allocation ever. If size is very large, we
+ allocate that much. */
+
+ now = current_total_allocation + size;
+ if (now > max_total_allocation)
+ {
+ /* We need more temporary memory than ever before. Increase
+ for future needs. */
+ now = now * 3 / 2;
+ chunk_size = now - current_total_allocation + HSIZ;
+ current_total_allocation = now;
+ max_total_allocation = current_total_allocation;
+ }
+ else
+ {
+ chunk_size = max_total_allocation - current_total_allocation + HSIZ;
+ current_total_allocation = max_total_allocation;
+ }
+
+ chunk = (*__gmp_allocate_func) (chunk_size);
+ header = (tmp_stack *) chunk;
+ header->end = (char *) chunk + chunk_size;
+ header->alloc_point = (char *) chunk + HSIZ;
+ header->prev = current;
+ current = header;
+ }
+
+ that = current->alloc_point;
+ current->alloc_point = (char *) that + size;
+ return that;
+}
+
+/* Typically called at function entry. <mark> is assigned so that
+ __gmp_tmp_free can later be used to reclaim all subsequently allocated
+ storage. */
+void
+#if __STDC__
+__gmp_tmp_mark (tmp_marker *mark)
+#else
+__gmp_tmp_mark (mark)
+ tmp_marker *mark;
+#endif
+{
+ mark->which_chunk = current;
+ mark->alloc_point = current->alloc_point;
+}
+
+/* Free everything allocated since <mark> was assigned by __gmp_tmp_mark */
+void
+#if __STDC__
+__gmp_tmp_free (tmp_marker *mark)
+#else
+__gmp_tmp_free (mark)
+ tmp_marker *mark;
+#endif
+{
+ while (mark->which_chunk != current)
+ {
+ tmp_stack *tmp;
+
+ tmp = current;
+ current = tmp->prev;
+ current_total_allocation -= (((char *) (tmp->end) - (char *) tmp) - HSIZ);
+ (*__gmp_free_func) (tmp, (char *) tmp->end - (char *) tmp);
+ }
+ current->alloc_point = mark->alloc_point;
+}
diff --git a/rts/gmp/stack-alloc.h b/rts/gmp/stack-alloc.h
new file mode 100644
index 0000000000..f59beec266
--- /dev/null
+++ b/rts/gmp/stack-alloc.h
@@ -0,0 +1,64 @@
+/* Stack allocation routines. This is intended for machines without support
+ for the `alloca' function.
+
+Copyright (C) 1996, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+struct tmp_stack
+{
+ void *end;
+ void *alloc_point;
+ struct tmp_stack *prev;
+};
+
+struct tmp_marker
+{
+ struct tmp_stack *which_chunk;
+ void *alloc_point;
+};
+
+typedef struct tmp_marker tmp_marker;
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#if __STDC__
+void *__gmp_tmp_alloc (unsigned long);
+void __gmp_tmp_mark (tmp_marker *);
+void __gmp_tmp_free (tmp_marker *);
+#else
+void *__gmp_tmp_alloc ();
+void __gmp_tmp_mark ();
+void __gmp_tmp_free ();
+#endif
+
+#if defined (__cplusplus)
+}
+#endif
+
+#ifndef __TMP_ALIGN
+#define __TMP_ALIGN 8
+#endif
+
+#define TMP_DECL(marker) tmp_marker marker
+#define TMP_ALLOC(size) \
+ __gmp_tmp_alloc (((unsigned long) (size) + __TMP_ALIGN - 1) & -__TMP_ALIGN)
+#define TMP_MARK(marker) __gmp_tmp_mark (&marker)
+#define TMP_FREE(marker) __gmp_tmp_free (&marker)
diff --git a/rts/gmp/stamp-h.in b/rts/gmp/stamp-h.in
new file mode 100644
index 0000000000..9788f70238
--- /dev/null
+++ b/rts/gmp/stamp-h.in
@@ -0,0 +1 @@
+timestamp
diff --git a/rts/gmp/stamp-vti b/rts/gmp/stamp-vti
new file mode 100644
index 0000000000..e3186186b2
--- /dev/null
+++ b/rts/gmp/stamp-vti
@@ -0,0 +1,3 @@
+@set UPDATED 5 October 2000
+@set EDITION 3.1.1
+@set VERSION 3.1.1
diff --git a/rts/gmp/urandom.h b/rts/gmp/urandom.h
new file mode 100644
index 0000000000..313479e8b7
--- /dev/null
+++ b/rts/gmp/urandom.h
@@ -0,0 +1,86 @@
+/* urandom.h -- define urandom returning a full unsigned long random value.
+
+Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#if defined (__hpux) || defined (__svr4__) || defined (__SVR4)
+/* HPUX lacks random(). */
+static inline mp_limb_t
+urandom ()
+{
+ return mrand48 ();
+}
+#define __URANDOM
+#endif
+
+#if defined(_WIN32) && !(defined(__CYGWIN__) || defined(__CYGWIN32__))
+/* MS CRT supplies just the poxy rand(), with an upper bound of 0x7fff */
+static inline unsigned long
+urandom ()
+{
+ return rand () ^ (rand () << 16) ^ (rand() << 32);
+}
+#define __URANDOM
+#endif
+
+#if defined (__alpha) && !defined (__URANDOM)
+/* DEC OSF/1 1.2 random() returns a double. */
+long mrand48 ();
+static inline mp_limb_t
+urandom ()
+{
+ return mrand48 () | (mrand48 () << 32);
+}
+#define __URANDOM
+#endif
+
+#if BITS_PER_MP_LIMB == 32 && !defined (__URANDOM)
+#if defined (__cplusplus)
+extern "C" {
+#endif
+long random ();
+#if defined (__cplusplus)
+}
+#endif
+static inline mp_limb_t
+urandom ()
+{
+ /* random() returns 31 bits, we want 32. */
+ return random () ^ (random () << 1);
+}
+#define __URANDOM
+#endif
+
+#if BITS_PER_MP_LIMB == 64 && !defined (__URANDOM)
+#if defined (__cplusplus)
+extern "C" {
+#endif
+long random ();
+#if defined (__cplusplus)
+}
+#endif
+static inline mp_limb_t
+urandom ()
+{
+ /* random() returns 31 bits, we want 64. */
+ return random () ^ ((mp_limb_t) random () << 31) ^ ((mp_limb_t) random () << 62);
+}
+#define __URANDOM
+#endif
+
diff --git a/rts/gmp/version.c b/rts/gmp/version.c
new file mode 100644
index 0000000000..9d544ee1d8
--- /dev/null
+++ b/rts/gmp/version.c
@@ -0,0 +1,26 @@
+/* gmp_version -- version number compiled into the library */
+
+/*
+Copyright (C) 1996, 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+const char *gmp_version = VERSION;
diff --git a/rts/gmp/version.texi b/rts/gmp/version.texi
new file mode 100644
index 0000000000..e3186186b2
--- /dev/null
+++ b/rts/gmp/version.texi
@@ -0,0 +1,3 @@
+@set UPDATED 5 October 2000
+@set EDITION 3.1.1
+@set VERSION 3.1.1
diff --git a/rts/hooks/FlagDefaults.c b/rts/hooks/FlagDefaults.c
new file mode 100644
index 0000000000..393d39bc39
--- /dev/null
+++ b/rts/hooks/FlagDefaults.c
@@ -0,0 +1,20 @@
+/* -----------------------------------------------------------------------------
+ *
+ * User-overridable RTS hooks.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+
+void
+defaultsHook (void)
+{ /* this is called *after* RTSflags has had
+ its defaults set, but *before* we start
+ processing the RTS command-line options.
+
+ This default version does *nothing*.
+ The user may provide a more interesting
+ one.
+ */
+}
+
diff --git a/rts/hooks/InitEachPE.c b/rts/hooks/InitEachPE.c
new file mode 100644
index 0000000000..cc9cdc0dba
--- /dev/null
+++ b/rts/hooks/InitEachPE.c
@@ -0,0 +1,23 @@
+/* -----------------------------------------------------------------------------
+ *
+ * User-overridable RTS hooks.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+
+#ifdef PAR
+void
+InitEachPEHook (void)
+{ /* In a GUM setup this is called on each
+ PE immediately before SynchroniseSystem.
+ It can be used to read in static data
+ to each PE which has to be available to
+ each PE. See GPH-Maple as an example how to
+ use this in combination with foreign language
+ code:
+ http://www.risc.uni-linz.ac.at/software/ghc-maple/
+ -- HWL
+ */
+}
+#endif
diff --git a/rts/hooks/MallocFail.c b/rts/hooks/MallocFail.c
new file mode 100644
index 0000000000..1218d1d8d0
--- /dev/null
+++ b/rts/hooks/MallocFail.c
@@ -0,0 +1,16 @@
+/* -----------------------------------------------------------------------------
+ *
+ * User-overridable RTS hooks.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+
+#include <stdio.h>
+
+void
+MallocFailHook (lnat request_size /* in bytes */, char *msg)
+{
+ fprintf(stderr, "malloc: failed on request for %lu bytes; message: %s\n", request_size, msg);
+}
+
diff --git a/rts/hooks/OnExit.c b/rts/hooks/OnExit.c
new file mode 100644
index 0000000000..dd4c3b4bb0
--- /dev/null
+++ b/rts/hooks/OnExit.c
@@ -0,0 +1,19 @@
+/* -----------------------------------------------------------------------------
+ *
+ * User-overridable RTS hooks.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+
+/* Note: by the time this hook has been called, Haskell land
+ * will have been shut down completely.
+ *
+ * ToDo: feed the hook info on whether we're shutting down as a result
+ * of termination or run-time error ?
+ */
+
+void
+OnExitHook ()
+{
+}
diff --git a/rts/hooks/OutOfHeap.c b/rts/hooks/OutOfHeap.c
new file mode 100644
index 0000000000..98db0d7d49
--- /dev/null
+++ b/rts/hooks/OutOfHeap.c
@@ -0,0 +1,19 @@
+/* -----------------------------------------------------------------------------
+ *
+ * User-overridable RTS hooks.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+#include <stdio.h>
+
+void
+OutOfHeapHook (lnat request_size, lnat heap_size) /* both sizes in bytes */
+{
+ /* fprintf(stderr, "Heap exhausted;\nwhile trying to allocate %lu bytes in a %lu-byte heap;\nuse `+RTS -H<size>' to increase the total heap size.\n", */
+
+ (void)request_size; /* keep gcc -Wall happy */
+ fprintf(stderr, "Heap exhausted;\nCurrent maximum heap size is %lu bytes (%lu Mb);\nuse `+RTS -M<size>' to increase it.\n",
+ heap_size, heap_size / (1024*1024));
+}
+
diff --git a/rts/hooks/RtsOpts.c b/rts/hooks/RtsOpts.c
new file mode 100644
index 0000000000..b934b05f1b
--- /dev/null
+++ b/rts/hooks/RtsOpts.c
@@ -0,0 +1,13 @@
+/* -----------------------------------------------------------------------------
+ *
+ * Default RTS options.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+
+#include <stdlib.h>
+
+// Default RTS options can be given by providing an alternate
+// definition for this variable, pointing to a string of RTS options.
+char *ghc_rts_opts = NULL;
diff --git a/rts/hooks/ShutdownEachPEHook.c b/rts/hooks/ShutdownEachPEHook.c
new file mode 100644
index 0000000000..f5e3ba9344
--- /dev/null
+++ b/rts/hooks/ShutdownEachPEHook.c
@@ -0,0 +1,19 @@
+/* -----------------------------------------------------------------------------
+ *
+ * User-overridable RTS hooks.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+
+#ifdef PAR
+void
+ShutdownEachPEHook (void)
+{ /* In a GUM setup this routine is called at the end of
+ shutdownParallelSystem on each PE. Useful for
+ cleaning up stuff, especially when interfacing
+ with foreign language code.
+ -- HWL
+ */
+}
+#endif
diff --git a/rts/hooks/StackOverflow.c b/rts/hooks/StackOverflow.c
new file mode 100644
index 0000000000..a395a3a1a5
--- /dev/null
+++ b/rts/hooks/StackOverflow.c
@@ -0,0 +1,16 @@
+/* -----------------------------------------------------------------------------
+ *
+ * User-overridable RTS hooks.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+
+#include <stdio.h>
+
+void
+StackOverflowHook (lnat stack_size) /* in bytes */
+{
+ fprintf(stderr, "Stack space overflow: current size %ld bytes.\nUse `+RTS -Ksize' to increase it.\n", stack_size);
+}
+
diff --git a/rts/package.conf.in b/rts/package.conf.in
new file mode 100644
index 0000000000..935b71d6a6
--- /dev/null
+++ b/rts/package.conf.in
@@ -0,0 +1,152 @@
+/* The RTS is just another package! */
+
+#include "ghcconfig.h"
+#include "RtsConfig.h"
+
+name: PACKAGE
+version: 1.0
+license: BSD3
+maintainer: glasgow-haskell-users@haskell.org
+exposed: True
+
+exposed-modules:
+hidden-modules:
+
+import-dirs:
+
+#ifdef INSTALLING
+library-dirs: LIB_DIR
+# ifdef mingw32_HOST_OS
+ , LIB_DIR"/gcc-lib"
+ /* force the dist-provided gcc-lib/ into scope. */
+# endif
+#else /* !INSTALLING */
+library-dirs: FPTOOLS_TOP_ABS"/rts"
+# if !defined(HAVE_LIBGMP) && !defined(HAVE_FRAMEWORK_GMP)
+ , FPTOOLS_TOP_ABS"/rts/gmp"
+# endif
+#endif
+
+hs-libraries: "HSrts"
+
+extra-libraries: "m" /* for ldexp() */
+#ifndef HAVE_FRAMEWORK_GMP
+ , "gmp"
+#ifdef HAVE_LIBDL
+ , "dl"
+#endif
+#endif
+#ifdef HAVE_LIBRT
+ , "rt"
+#endif
+#ifdef mingw32_HOST_OS
+ ,"wsock32" /* for the linker */
+#endif
+#ifdef WANT_DOTNET_SUPPORT
+ , "oleaut32", "ole32", "uuid"
+#endif
+#if defined(DEBUG) && defined(HAVE_LIBBFD)
+ ,"bfd", "iberty" /* for debugging */
+#endif
+#ifdef HAVE_LIBMINGWEX
+# ifndef INSTALLING /* Bundled Mingw is behind */
+ ,"mingwex"
+# endif
+#endif
+
+#ifdef INSTALLING
+include-dirs: INCLUDE_DIR
+# ifdef mingw32_HOST_OS
+ , INCLUDE_DIR"/mingw"
+# endif
+#else /* !INSTALLING */
+include-dirs: FPTOOLS_TOP_ABS"/includes"
+#endif
+
+includes: Stg.h
+depends:
+hugs-options:
+cc-options:
+
+ld-options:
+#ifdef LEADING_UNDERSCORE
+ "-u", "_GHCziBase_Izh_static_info"
+ , "-u", "_GHCziBase_Czh_static_info"
+ , "-u", "_GHCziFloat_Fzh_static_info"
+ , "-u", "_GHCziFloat_Dzh_static_info"
+ , "-u", "_GHCziPtr_Ptr_static_info"
+ , "-u", "_GHCziWord_Wzh_static_info"
+ , "-u", "_GHCziInt_I8zh_static_info"
+ , "-u", "_GHCziInt_I16zh_static_info"
+ , "-u", "_GHCziInt_I32zh_static_info"
+ , "-u", "_GHCziInt_I64zh_static_info"
+ , "-u", "_GHCziWord_W8zh_static_info"
+ , "-u", "_GHCziWord_W16zh_static_info"
+ , "-u", "_GHCziWord_W32zh_static_info"
+ , "-u", "_GHCziWord_W64zh_static_info"
+ , "-u", "_GHCziStable_StablePtr_static_info"
+ , "-u", "_GHCziBase_Izh_con_info"
+ , "-u", "_GHCziBase_Czh_con_info"
+ , "-u", "_GHCziFloat_Fzh_con_info"
+ , "-u", "_GHCziFloat_Dzh_con_info"
+ , "-u", "_GHCziPtr_Ptr_con_info"
+ , "-u", "_GHCziPtr_FunPtr_con_info"
+ , "-u", "_GHCziStable_StablePtr_con_info"
+ , "-u", "_GHCziBase_False_closure"
+ , "-u", "_GHCziBase_True_closure"
+ , "-u", "_GHCziPack_unpackCString_closure"
+ , "-u", "_GHCziIOBase_stackOverflow_closure"
+ , "-u", "_GHCziIOBase_heapOverflow_closure"
+ , "-u", "_GHCziIOBase_NonTermination_closure"
+ , "-u", "_GHCziIOBase_BlockedOnDeadMVar_closure"
+ , "-u", "_GHCziIOBase_BlockedIndefinitely_closure"
+ , "-u", "_GHCziIOBase_Deadlock_closure"
+ , "-u", "_GHCziIOBase_NestedAtomically_closure"
+ , "-u", "_GHCziWeak_runFinalizzerBatch_closure"
+#else
+ "-u", "GHCziBase_Izh_static_info"
+ , "-u", "GHCziBase_Czh_static_info"
+ , "-u", "GHCziFloat_Fzh_static_info"
+ , "-u", "GHCziFloat_Dzh_static_info"
+ , "-u", "GHCziPtr_Ptr_static_info"
+ , "-u", "GHCziWord_Wzh_static_info"
+ , "-u", "GHCziInt_I8zh_static_info"
+ , "-u", "GHCziInt_I16zh_static_info"
+ , "-u", "GHCziInt_I32zh_static_info"
+ , "-u", "GHCziInt_I64zh_static_info"
+ , "-u", "GHCziWord_W8zh_static_info"
+ , "-u", "GHCziWord_W16zh_static_info"
+ , "-u", "GHCziWord_W32zh_static_info"
+ , "-u", "GHCziWord_W64zh_static_info"
+ , "-u", "GHCziStable_StablePtr_static_info"
+ , "-u", "GHCziBase_Izh_con_info"
+ , "-u", "GHCziBase_Czh_con_info"
+ , "-u", "GHCziFloat_Fzh_con_info"
+ , "-u", "GHCziFloat_Dzh_con_info"
+ , "-u", "GHCziPtr_Ptr_con_info"
+ , "-u", "GHCziPtr_FunPtr_con_info"
+ , "-u", "GHCziStable_StablePtr_con_info"
+ , "-u", "GHCziBase_False_closure"
+ , "-u", "GHCziBase_True_closure"
+ , "-u", "GHCziPack_unpackCString_closure"
+ , "-u", "GHCziIOBase_stackOverflow_closure"
+ , "-u", "GHCziIOBase_heapOverflow_closure"
+ , "-u", "GHCziIOBase_NonTermination_closure"
+ , "-u", "GHCziIOBase_BlockedOnDeadMVar_closure"
+ , "-u", "GHCziIOBase_BlockedIndefinitely_closure"
+ , "-u", "GHCziIOBase_Deadlock_closure"
+ , "-u", "GHCziIOBase_NestedAtomically_closure"
+ , "-u", "GHCziWeak_runFinalizzerBatch_closure"
+#endif
+
+framework-dirs:
+
+#ifdef HAVE_FRAMEWORK_GMP
+frameworks: "GMP"
+#else
+frameworks:
+#endif
+
+haddock-interfaces:
+haddock-html:
+
diff --git a/rts/parallel/0Hash.c b/rts/parallel/0Hash.c
new file mode 100644
index 0000000000..a471e30a66
--- /dev/null
+++ b/rts/parallel/0Hash.c
@@ -0,0 +1,320 @@
+/*-----------------------------------------------------------------------------
+ *
+ * (c) The AQUA Project, Glasgow University, 1995-1998
+ * (c) The GHC Team, 1999
+ *
+ * Dynamically expanding linear hash tables, as described in
+ * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
+ * pp. 446 -- 457.
+ * -------------------------------------------------------------------------- */
+
+/*
+ Replaced with ghc/rts/Hash.c in the new RTS
+*/
+
+#if 0
+
+#include "Rts.h"
+#include "Hash.h"
+#include "RtsUtils.h"
+
+#define HSEGSIZE 1024 /* Size of a single hash table segment */
+ /* Also the minimum size of a hash table */
+#define HDIRSIZE 1024 /* Size of the segment directory */
+ /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
+#define HLOAD 5 /* Maximum average load of a single hash bucket */
+
+#define HCHUNK (1024 * sizeof(W_) / sizeof(HashList))
+ /* Number of HashList cells to allocate in one go */
+
+
+/* Linked list of (key, data) pairs for separate chaining */
+struct hashlist {
+ StgWord key;
+ void *data;
+ struct hashlist *next; /* Next cell in bucket chain (same hash value) */
+};
+
+typedef struct hashlist HashList;
+
+struct hashtable {
+ int split; /* Next bucket to split when expanding */
+ int max; /* Max bucket of smaller table */
+ int mask1; /* Mask for doing the mod of h_1 (smaller table) */
+ int mask2; /* Mask for doing the mod of h_2 (larger table) */
+ int kcount; /* Number of keys */
+ int bcount; /* Number of buckets */
+ HashList **dir[HDIRSIZE]; /* Directory of segments */
+};
+
+/* -----------------------------------------------------------------------------
+ * Hash first using the smaller table. If the bucket is less than the
+ * next bucket to be split, re-hash using the larger table.
+ * -------------------------------------------------------------------------- */
+
+static int
+hash(HashTable *table, W_ key)
+{
+ int bucket;
+
+ /* Strip the boring zero bits */
+ key /= sizeof(StgWord);
+
+ /* Mod the size of the hash table (a power of 2) */
+ bucket = key & table->mask1;
+
+ if (bucket < table->split) {
+ /* Mod the size of the expanded hash table (also a power of 2) */
+ bucket = key & table->mask2;
+ }
+ return bucket;
+}
+
+/* -----------------------------------------------------------------------------
+ * Allocate a new segment of the dynamically growing hash table.
+ * -------------------------------------------------------------------------- */
+
+static void
+allocSegment(HashTable *table, int segment)
+{
+ table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),
+ "allocSegment");
+}
+
+
+/* -----------------------------------------------------------------------------
+ * Expand the larger hash table by one bucket, and split one bucket
+ * from the smaller table into two parts. Only the bucket referenced
+ * by @table->split@ is affected by the expansion.
+ * -------------------------------------------------------------------------- */
+
+static void
+expand(HashTable *table)
+{
+ int oldsegment;
+ int oldindex;
+ int newbucket;
+ int newsegment;
+ int newindex;
+ HashList *hl;
+ HashList *next;
+ HashList *old, *new;
+
+ if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
+ /* Wow! That's big. Too big, so don't expand. */
+ return;
+
+ /* Calculate indices of bucket to split */
+ oldsegment = table->split / HSEGSIZE;
+ oldindex = table->split % HSEGSIZE;
+
+ newbucket = table->max + table->split;
+
+ /* And the indices of the new bucket */
+ newsegment = newbucket / HSEGSIZE;
+ newindex = newbucket % HSEGSIZE;
+
+ if (newindex == 0)
+ allocSegment(table, newsegment);
+
+ if (++table->split == table->max) {
+ table->split = 0;
+ table->max *= 2;
+ table->mask1 = table->mask2;
+ table->mask2 = table->mask2 << 1 | 1;
+ }
+ table->bcount++;
+
+ /* Split the bucket, paying no attention to the original order */
+
+ old = new = NULL;
+ for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
+ next = hl->next;
+ if (hash(table, hl->key) == newbucket) {
+ hl->next = new;
+ new = hl;
+ } else {
+ hl->next = old;
+ old = hl;
+ }
+ }
+ table->dir[oldsegment][oldindex] = old;
+ table->dir[newsegment][newindex] = new;
+
+ return;
+}
+
+void *
+lookupHashTable(HashTable *table, StgWord key)
+{
+ int bucket;
+ int segment;
+ int index;
+ HashList *hl;
+
+ bucket = hash(table, key);
+ segment = bucket / HSEGSIZE;
+ index = bucket % HSEGSIZE;
+
+ for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next)
+ if (hl->key == key)
+ return hl->data;
+
+ /* It's not there */
+ return NULL;
+}
+
+/* -----------------------------------------------------------------------------
+ * We allocate the hashlist cells in large chunks to cut down on malloc
+ * overhead. Although we keep a free list of hashlist cells, we make
+ * no effort to actually return the space to the malloc arena.
+ * -------------------------------------------------------------------------- */
+
+static HashList *freeList = NULL;
+
+static HashList *
+allocHashList(void)
+{
+ HashList *hl, *p;
+
+ if ((hl = freeList) != NULL) {
+ freeList = hl->next;
+ } else {
+ hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
+
+ freeList = hl + 1;
+ for (p = freeList; p < hl + HCHUNK - 1; p++)
+ p->next = p + 1;
+ p->next = NULL;
+ }
+ return hl;
+}
+
+static void
+freeHashList(HashList *hl)
+{
+ hl->next = freeList;
+ freeList = hl;
+}
+
+void
+insertHashTable(HashTable *table, StgWord key, void *data)
+{
+ int bucket;
+ int segment;
+ int index;
+ HashList *hl;
+
+ /* We want no duplicates */
+ ASSERT(lookupHashTable(table, key) == NULL);
+
+ /* When the average load gets too high, we expand the table */
+ if (++table->kcount >= HLOAD * table->bcount)
+ expand(table);
+
+ bucket = hash(table, key);
+ segment = bucket / HSEGSIZE;
+ index = bucket % HSEGSIZE;
+
+ hl = allocHashList();
+
+ hl->key = key;
+ hl->data = data;
+ hl->next = table->dir[segment][index];
+ table->dir[segment][index] = hl;
+
+}
+
+void *
+removeHashTable(HashTable *table, StgWord key, void *data)
+{
+ int bucket;
+ int segment;
+ int index;
+ HashList *hl;
+ HashList *prev = NULL;
+
+ bucket = hash(table, key);
+ segment = bucket / HSEGSIZE;
+ index = bucket % HSEGSIZE;
+
+ for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
+ if (hl->key == key && (data == NULL || hl->data == data)) {
+ if (prev == NULL)
+ table->dir[segment][index] = hl->next;
+ else
+ prev->next = hl->next;
+ table->kcount--;
+ return hl->data;
+ }
+ prev = hl;
+ }
+
+ /* It's not there */
+ ASSERT(data == NULL);
+ return NULL;
+}
+
+/* -----------------------------------------------------------------------------
+ * When we free a hash table, we are also good enough to free the
+ * data part of each (key, data) pair, as long as our caller can tell
+ * us how to do it.
+ * -------------------------------------------------------------------------- */
+
+void
+freeHashTable(HashTable *table, void (*freeDataFun)(void *) )
+{
+ long segment;
+ long index;
+ HashList *hl;
+ HashList *next;
+
+ /* The last bucket with something in it is table->max + table->split - 1 */
+ segment = (table->max + table->split - 1) / HSEGSIZE;
+ index = (table->max + table->split - 1) % HSEGSIZE;
+
+ while (segment >= 0) {
+ while (index >= 0) {
+ for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
+ next = hl->next;
+ if (freeDataFun != NULL)
+ (*freeDataFun)(hl->data);
+ freeHashList(hl);
+ }
+ index--;
+ }
+ free(table->dir[segment]);
+ segment--;
+ index = HSEGSIZE - 1;
+ }
+ free(table);
+}
+
+/* -----------------------------------------------------------------------------
+ * When we initialize a hash table, we set up the first segment as well,
+ * initializing all of the first segment's hash buckets to NULL.
+ * -------------------------------------------------------------------------- */
+
+HashTable *
+allocHashTable(void)
+{
+ HashTable *table;
+ HashList **hb;
+
+ table = stgMallocBytes(sizeof(HashTable),"allocHashTable");
+
+ allocSegment(table, 0);
+
+ for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
+ *hb = NULL;
+
+ table->split = 0;
+ table->max = HSEGSIZE;
+ table->mask1 = HSEGSIZE - 1;
+ table->mask2 = 2 * HSEGSIZE - 1;
+ table->kcount = 0;
+ table->bcount = HSEGSIZE;
+
+ return table;
+}
+#endif
diff --git a/rts/parallel/0Parallel.h b/rts/parallel/0Parallel.h
new file mode 100644
index 0000000000..d52bf00fc2
--- /dev/null
+++ b/rts/parallel/0Parallel.h
@@ -0,0 +1,414 @@
+/*
+ Time-stamp: <Mon Oct 04 1999 14:50:28 Stardate: [-30]3692.88 hwloidl>
+
+ Definitions for parallel machines.
+
+This section contains definitions applicable only to programs compiled
+to run on a parallel machine, i.e. on GUM. Some of these definitions
+are also used when simulating parallel execution, i.e. on GranSim.
+ */
+
+/*
+ ToDo: Check the PAR specfic part of this file
+ Move stuff into Closures.h and ClosureMacros.h
+ Clean-up GRAN specific code
+ -- HWL
+ */
+
+#ifndef PARALLEL_H
+#define PARALLEL_H
+
+#if defined(PAR) || defined(GRAN) /* whole file */
+
+#include "Rts.h"
+#include "GranSim.h"
+//#include "ClosureTypes.h"
+
+//@menu
+//* Basic definitions::
+//* Externs and types::
+//* Dummy defs::
+//* Par specific fixed headers::
+//* Parallel only heap objects::
+//* Packing definitions::
+//* End of File::
+//@end menu
+//*/
+
+//@node Basic definitions, Externs and types
+//@section Basic definitions
+
+/* SET_PAR_HDR and SET_STATIC_PAR_HDR now live in ClosureMacros.h */
+
+/* Needed for dumping routines */
+#if defined(PAR)
+# define TIME ullong
+# define CURRENT_TIME msTime()
+# define TIME_ON_PROC(p) msTime()
+# define CURRENT_PROC thisPE
+# define BINARY_STATS RtsFlags.ParFlags.granSimStats_Binary
+#elif defined(GRAN)
+# define TIME rtsTime
+# define CURRENT_TIME CurrentTime[CurrentProc]
+# define TIME_ON_PROC(p) CurrentTime[p]
+# define CURRENT_PROC CurrentProc
+# define BINARY_STATS RtsFlags.GranFlags.granSimStats_Binary
+#endif
+
+#if defined(PAR)
+# define MAX_PES 256 /* Maximum number of processors */
+ /* MAX_PES is enforced by SysMan, which does not
+ allow more than this many "processors".
+ This is important because PackGA [GlobAddr.lc]
+ **assumes** that a PE# can fit in 8+ bits.
+ */
+#endif
+
+//@node Externs and types, Dummy defs, Basic definitions
+//@section Externs and types
+
+#if defined(PAR)
+/* GUM: one spark queue on each PE, and each PE sees only its own spark queue */
+extern rtsSparkQ pending_sparks_hd;
+extern rtsSparkQ pending_sparks_tl;
+#elif defined(GRAN)
+/* GranSim: a globally visible array of spark queues */
+extern rtsSparkQ pending_sparks_hds[];
+extern rtsSparkQ pending_sparks_tls[];
+#endif
+extern unsigned int /* nat */ spark_queue_len(PEs proc);
+
+extern StgInt SparksAvail; /* How many sparks are available */
+
+/* prototypes of spark routines */
+/* ToDo: check whether all have to be visible -- HWL */
+#if defined(GRAN)
+rtsSpark *newSpark(StgClosure *node, StgInt name, StgInt gran_info, StgInt size_info, StgInt par_info, StgInt local);
+void disposeSpark(rtsSpark *spark);
+void disposeSparkQ(rtsSparkQ spark);
+void add_to_spark_queue(rtsSpark *spark);
+void delete_from_spark_queue (rtsSpark *spark);
+#endif
+
+#define STATS_FILENAME_MAXLEN 128
+
+/* Where to write the log file */
+//extern FILE *gr_file;
+extern char gr_filename[STATS_FILENAME_MAXLEN];
+
+#if defined(GRAN)
+int init_gr_simulation(char *rts_argv[], int rts_argc, char *prog_argv[], int prog_argc);
+void end_gr_simulation(void);
+#endif
+
+#if defined(PAR)
+extern I_ do_sp_profile;
+
+extern P_ PendingFetches;
+extern GLOBAL_TASK_ID *PEs;
+
+extern rtsBool IAmMainThread, GlobalStopPending;
+extern rtsBool fishing;
+extern GLOBAL_TASK_ID SysManTask;
+extern int seed; /*pseudo-random-number generator seed:*/
+ /*Initialised in ParInit*/
+extern I_ threadId; /*Number of Threads that have existed on a PE*/
+extern GLOBAL_TASK_ID mytid;
+
+extern int nPEs;
+
+extern rtsBool InGlobalGC; /* Are we in the midst of performing global GC */
+
+extern HashTable *pGAtoGALAtable;
+extern HashTable *LAtoGALAtable;
+extern GALA *freeIndirections;
+extern GALA *liveIndirections;
+extern GALA *freeGALAList;
+extern GALA *liveRemoteGAs;
+extern int thisPE;
+
+void RunParallelSystem (StgPtr program_closure);
+void initParallelSystem();
+void SynchroniseSystem();
+
+void registerTask (GLOBAL_TASK_ID gtid);
+globalAddr *LAGAlookup (P_ addr);
+P_ GALAlookup (globalAddr *ga);
+globalAddr *MakeGlobal (P_ addr, rtsBool preferred);
+globalAddr *setRemoteGA (P_ addr, globalAddr *ga, rtsBool preferred);
+void splitWeight (globalAddr *to, globalAddr *from);
+globalAddr *addWeight (globalAddr *ga);
+void initGAtables();
+W_ taskIDtoPE (GLOBAL_TASK_ID gtid);
+void RebuildLAGAtable();
+
+void *lookupHashTable (HashTable *table, StgWord key);
+void insertHashTable (HashTable *table, StgWord key, void *data);
+void freeHashTable (HashTable *table, void (*freeDataFun) ((void *data)));
+HashTable *allocHashTable();
+void *removeHashTable (HashTable *table, StgWord key, void *data);
+#endif /* PAR */
+
+/* Interface for dumping routines (i.e. writing to log file) */
+void DumpGranEvent(GranEventType name, StgTSO *tso);
+void DumpRawGranEvent(PEs proc, PEs p, GranEventType name,
+ StgTSO *tso, StgClosure *node, StgInt sparkname, StgInt len);
+//void DumpEndEvent(PEs proc, StgTSO *tso, rtsBool mandatory_thread);
+
+//@node Dummy defs, Par specific fixed headers, Externs and types
+//@section Dummy defs
+
+/*
+Get this out of the way. These are all null definitions.
+*/
+
+
+//# define GA_HDR_SIZE 0
+//# define GA(closure) /*nothing */
+
+//# define SET_GA(closure,ga) /* nothing */
+//# define SET_STATIC_GA(closure) /* nothing */
+//# define SET_GRAN_HDR(closure,pe) /* nothing */
+//# define SET_STATIC_PROCS(closure) /* nothing */
+
+//# define SET_TASK_ACTIVITY(act) /* nothing */
+
+#if defined(GRAN)
+
+# define GA_HDR_SIZE 1
+
+# define PROCS_HDR_POSN PAR_HDR_POSN
+# define PROCS_HDR_SIZE 1
+
+/* Accessing components of the field */
+# define PROCS(closure) ((closure)->header.gran.procs)
+/* SET_PROCS is now SET_GRAN_HEADER in ClosureMacros.h. */
+#endif
+
+
+//@node Par specific fixed headers, Parallel only heap objects, Dummy defs
+//@section Par specific fixed headers
+
+/*
+Definitions relating to the entire parallel-only fixed-header field.
+
+On GUM, the global addresses for each local closure are stored in a separate
+hash table, rather then with the closure in the heap. We call @getGA@ to
+look up the global address associated with a local closure (0 is returned
+for local closures that have no global address), and @setGA@ to store a new
+global address for a local closure which did not previously have one.
+*/
+
+#if defined(PAR)
+
+# define GA_HDR_SIZE 0
+
+# define GA(closure) getGA(closure)
+
+# define SET_GA(closure, ga) setGA(closure,ga)
+# define SET_STATIC_GA(closure)
+# define SET_GRAN_HDR(closure,pe)
+# define SET_STATIC_PROCS(closure)
+
+# define MAX_GA_WEIGHT 0 /* Treat as 2^n */
+
+W_ PackGA ((W_, int));
+ /* There was a PACK_GA macro here; but we turned it into the PackGA
+ routine [GlobAddr.lc] (because it needs to do quite a bit of
+ paranoia checking. Phil & Will (95/08)
+ */
+
+/* At the moment, there is no activity profiling for GUM. This may change. */
+# define SET_TASK_ACTIVITY(act) /* nothing */
+#endif
+
+//@node Parallel only heap objects, Packing definitions, Par specific fixed headers
+//@section Parallel only heap objects
+
+// NB: The following definitons are BOTH for GUM and GrAnSim -- HWL
+
+/* All in Closures.h and CLosureMacros.h */
+
+//@node Packing definitions, End of File, Parallel only heap objects
+//@section Packing definitions
+
+//@menu
+//* GUM::
+//* GranSim::
+//@end menu
+//*/
+
+//@node GUM, GranSim, Packing definitions, Packing definitions
+//@subsection GUM
+
+#if defined(PAR)
+/*
+Symbolic constants for the packing code.
+
+This constant defines how many words of data we can pack into a single
+packet in the parallel (GUM) system.
+*/
+
+//@menu
+//* Externs::
+//* Prototypes::
+//* Macros::
+//@end menu
+//*/
+
+//@node Externs, Prototypes, GUM, GUM
+//@subsubsection Externs
+
+extern W_ *PackBuffer; /* size: can be set via option */
+extern long *buffer; /* HWL_ */
+extern W_ *freeBuffer; /* HWL_ */
+extern W_ *packBuffer; /* HWL_ */
+
+extern void InitPackBuffer(STG_NO_ARGS);
+extern void InitMoreBuffers(STG_NO_ARGS);
+extern void InitPendingGABuffer(W_ size);
+extern void AllocClosureQueue(W_ size);
+
+//@node Prototypes, Macros, Externs, GUM
+//@subsubsection Prototypes
+
+void InitPackBuffer();
+P_ PackTSO (P_ tso, W_ *size);
+P_ PackStkO (P_ stko, W_ *size);
+P_ AllocateHeap (W_ size); /* Doesn't belong */
+
+void InitClosureQueue ();
+P_ DeQueueClosure();
+void QueueClosure (P_ closure);
+rtsBool QueueEmpty();
+void PrintPacket (P_ buffer);
+
+P_ get_closure_info (P_ closure, W_ *size, W_ *ptrs, W_ *nonptrs, W_ *vhs, char *type);
+
+rtsBool isOffset (globalAddr *ga),
+ isFixed (globalAddr *ga);
+
+void doGlobalGC();
+
+P_ PackNearbyGraph (P_ closure,W_ *size);
+P_ UnpackGraph (W_ *buffer, globalAddr **gamap, W_ *nGAs);
+
+
+//@node Macros, , Prototypes, GUM
+//@subsubsection Macros
+
+# define PACK_HEAP_REQUIRED \
+ ((RtsFlags.ParFlags.packBufferSize - PACK_HDR_SIZE) / (PACK_GA_SIZE + _FHS) * (SPEC_HS + 2))
+
+# define MAX_GAS (RtsFlags.ParFlags.packBufferSize / PACK_GA_SIZE)
+
+
+# define PACK_GA_SIZE 3 /* Size of a packed GA in words */
+ /* Size of a packed fetch-me in words */
+# define PACK_FETCHME_SIZE (PACK_GA_SIZE + FIXED_HS)
+
+# define PACK_HDR_SIZE 1 /* Words of header in a packet */
+
+# define PACK_PLC_SIZE 2 /* Size of a packed PLC in words */
+
+#endif /* PAR */
+
+//@node GranSim, , GUM, Packing definitions
+//@subsection GranSim
+
+#if defined(GRAN)
+/* ToDo: Check which of the PAR routines are needed in GranSim -- HWL */
+
+//@menu
+//* Types::
+//* Prototypes::
+//* Macros::
+//@end menu
+//*/
+
+//@node Types, Prototypes, GranSim, GranSim
+//@subsubsection Types
+
+typedef struct rtsPackBuffer_ {
+ StgInt /* nat */ size;
+ StgInt /* nat */ unpacked_size;
+ StgTSO *tso;
+ StgClosure **buffer;
+} rtsPackBuffer;
+
+//@node Prototypes, Macros, Types, GranSim
+//@subsubsection Prototypes
+
+
+/* main packing functions */
+/*
+rtsPackBuffer *PackNearbyGraph(StgClosure* closure, StgTSO* tso, nat *packbuffersize);
+rtsPackBuffer *PackOneNode(StgClosure* closure, StgTSO* tso, nat *packbuffersize);
+void PrintPacket(rtsPackBuffer *buffer);
+StgClosure *UnpackGraph(rtsPackBuffer* buffer);
+*/
+/* important auxiliary functions */
+
+//StgInfoTable *get_closure_info(StgClosure* node, nat *size, nat *ptrs, nat *nonptrs, nat *vhs, char *info_hdr_ty);
+int IS_BLACK_HOLE(StgClosure* node);
+StgClosure *IS_INDIRECTION(StgClosure* node);
+int IS_THUNK(StgClosure* closure);
+char *display_info_type(StgClosure* closure, char *str);
+
+/*
+OLD CODE -- HWL
+void InitPackBuffer(void);
+P_ AllocateHeap (W_ size);
+P_ PackNearbyGraph (P_ closure, P_ tso, W_ *packbuffersize);
+P_ PackOneNode (P_ closure, P_ tso, W_ *packbuffersize);
+P_ UnpackGraph (P_ buffer);
+
+void InitClosureQueue (void);
+P_ DeQueueClosure(void);
+void QueueClosure (P_ closure);
+// rtsBool QueueEmpty();
+void PrintPacket (P_ buffer);
+*/
+
+// StgInfoTable *get_closure_info(StgClosure* node, unsigned int /* nat */ *size, unsigned int /* nat */ *ptrs, unsigned int /* nat */ *nonptrs, unsigned int /* nat */ *vhs, char *info_hdr_ty);
+// int /* rtsBool */ IS_BLACK_HOLE(StgClosure* node) ;
+
+//@node Macros, , Prototypes, GranSim
+//@subsubsection Macros
+
+/* These are needed in the packing code to get the size of the packet
+ right. The closures itself are never built in GrAnSim. */
+# define FETCHME_VHS IND_VHS
+# define FETCHME_HS IND_HS
+
+# define FETCHME_GA_LOCN FETCHME_HS
+
+# define FETCHME_CLOSURE_SIZE(closure) IND_CLOSURE_SIZE(closure)
+# define FETCHME_CLOSURE_NoPTRS(closure) 0L
+# define FETCHME_CLOSURE_NoNONPTRS(closure) (IND_CLOSURE_SIZE(closure)-IND_VHS)
+
+# define MAX_GAS (RtsFlags.GranFlags.packBufferSize / PACK_GA_SIZE)
+# define PACK_GA_SIZE 3 /* Size of a packed GA in words */
+ /* Size of a packed fetch-me in words */
+# define PACK_FETCHME_SIZE (PACK_GA_SIZE + FIXED_HS)
+# define PACK_HDR_SIZE 4 /* Words of header in a packet */
+
+# define PACK_HEAP_REQUIRED \
+ (RtsFlags.GranFlags.packBufferSize * sizeofW(StgClosure*) + \
+ 2 * sizeofW(StgInt) + sizeofW(StgTSO*))
+
+# define PACK_FLAG_LOCN 0
+# define PACK_TSO_LOCN 1
+# define PACK_UNPACKED_SIZE_LOCN 2
+# define PACK_SIZE_LOCN 3
+# define MAGIC_PACK_FLAG 0xfabc
+
+#endif /* GRAN */
+
+//@node End of File, , Packing definitions
+//@section End of File
+
+#endif /* defined(PAR) || defined(GRAN) whole file */
+#endif /* Parallel_H */
+
+
diff --git a/rts/parallel/0Unpack.c b/rts/parallel/0Unpack.c
new file mode 100644
index 0000000000..fc4a8e50c3
--- /dev/null
+++ b/rts/parallel/0Unpack.c
@@ -0,0 +1,440 @@
+/*
+ Time-stamp: <Wed Jan 12 2000 13:29:08 Stardate: [-30]4193.85 hwloidl>
+
+ Unpacking closures which have been exported to remote processors
+
+ This module defines routines for unpacking closures in the parallel
+ runtime system (GUM).
+
+ In the case of GrAnSim, this module defines routines for *simulating* the
+ unpacking of closures as it is done in the parallel runtime system.
+*/
+
+/*
+ Code in this file has been merged with Pack.c
+*/
+
+#if 0
+
+//@node Unpacking closures, , ,
+//@section Unpacking closures
+
+//@menu
+//* Includes::
+//* Prototypes::
+//* GUM code::
+//* GranSim Code::
+//* Index::
+//@end menu
+//*/
+
+//@node Includes, Prototypes, Unpacking closures, Unpacking closures
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+#include "ParallelDebug.h"
+#include "FetchMe.h"
+#include "Storage.h"
+
+//@node Prototypes, GUM code, Includes, Unpacking closures
+//@subsection Prototypes
+
+void InitPacking(void);
+# if defined(PAR)
+void InitPackBuffer(void);
+# endif
+/* Interface for ADT of closure queues */
+void AllocClosureQueue(nat size);
+void InitClosureQueue(void);
+rtsBool QueueEmpty(void);
+void QueueClosure(StgClosure *closure);
+StgClosure *DeQueueClosure(void);
+
+StgPtr AllocateHeap(nat size);
+
+//@node GUM code, GranSim Code, Prototypes, Unpacking closures
+//@subsection GUM code
+
+#if defined(PAR)
+
+//@node Local Definitions, , GUM code, GUM code
+//@subsubsection Local Definitions
+
+//@cindex PendingGABuffer
+static globalAddr *PendingGABuffer;
+/* is initialised in main; */
+
+//@cindex InitPendingGABuffer
+void
+InitPendingGABuffer(size)
+nat size;
+{
+ PendingGABuffer = (globalAddr *)
+ stgMallocBytes((size-PACK_HDR_SIZE)*2*sizeof(globalAddr),
+ "InitPendingGABuffer");
+}
+
+/*
+ @CommonUp@ commons up two closures which we have discovered to be
+ variants of the same object. One is made an indirection to the other. */
+
+//@cindex CommonUp
+void
+CommonUp(StgClosure *src, StgClosure *dst)
+{
+ StgBlockingQueueElement *bqe;
+
+ ASSERT(src != dst);
+ switch (get_itbl(src)->type) {
+ case BLACKHOLE_BQ:
+ bqe = ((StgBlockingQueue *)src)->blocking_queue;
+ break;
+
+ case FETCH_ME_BQ:
+ bqe = ((StgFetchMeBlockingQueue *)src)->blocking_queue;
+ break;
+
+ case RBH:
+ bqe = ((StgRBH *)src)->blocking_queue;
+ break;
+
+ case BLACKHOLE:
+ case FETCH_ME:
+ bqe = END_BQ_QUEUE;
+ break;
+
+ default:
+ /* Don't common up anything else */
+ return;
+ }
+ /* We do not use UPD_IND because that would awaken the bq, too */
+ // UPD_IND(src, dst);
+ updateWithIndirection(get_itbl(src), src, dst);
+ //ASSERT(!IS_BIG_MOTHER(INFO_PTR(dst)));
+ if (bqe != END_BQ_QUEUE)
+ awaken_blocked_queue(bqe, src);
+}
+
+/*
+ @UnpackGraph@ unpacks the graph contained in a message buffer. It
+ returns a pointer to the new graph. The @gamap@ parameter is set to
+ point to an array of (oldGA,newGA) pairs which were created as a result
+ of unpacking the buffer; @nGAs@ is set to the number of GA pairs which
+ were created.
+
+ The format of graph in the pack buffer is as defined in @Pack.lc@. */
+
+//@cindex UnpackGraph
+StgClosure *
+UnpackGraph(packBuffer, gamap, nGAs)
+rtsPackBuffer *packBuffer;
+globalAddr **gamap;
+nat *nGAs;
+{
+ nat size, ptrs, nonptrs, vhs;
+ StgWord **buffer, **bufptr, **slotptr;
+ globalAddr ga, *gaga;
+ StgClosure *closure, *existing,
+ *graphroot, *graph, *parent;
+ StgInfoTable *ip, *oldip;
+ nat bufsize, i,
+ pptr = 0, pptrs = 0, pvhs;
+ char str[80];
+
+ InitPackBuffer(); /* in case it isn't already init'd */
+ graphroot = (StgClosure *)NULL;
+
+ gaga = PendingGABuffer;
+
+ InitClosureQueue();
+
+ /* Unpack the header */
+ bufsize = packBuffer->size;
+ buffer = packBuffer->buffer;
+ bufptr = buffer;
+
+ /* allocate heap */
+ if (bufsize > 0) {
+ graph = allocate(bufsize);
+ ASSERT(graph != NULL);
+ }
+
+ parent = (StgClosure *)NULL;
+
+ do {
+ /* This is where we will ultimately save the closure's address */
+ slotptr = bufptr;
+
+ /* First, unpack the next GA or PLC */
+ ga.weight = (rtsWeight) *bufptr++;
+
+ if (ga.weight > 0) {
+ ga.payload.gc.gtid = (GlobalTaskId) *bufptr++;
+ ga.payload.gc.slot = (int) *bufptr++;
+ } else
+ ga.payload.plc = (StgPtr) *bufptr++;
+
+ /* Now unpack the closure body, if there is one */
+ if (isFixed(&ga)) {
+ /* No more to unpack; just set closure to local address */
+ IF_PAR_DEBUG(pack,
+ belch("Unpacked PLC at %x", ga.payload.plc));
+ closure = ga.payload.plc;
+ } else if (isOffset(&ga)) {
+ /* No more to unpack; just set closure to cached address */
+ ASSERT(parent != (StgClosure *)NULL);
+ closure = (StgClosure *) buffer[ga.payload.gc.slot];
+ } else {
+ /* Now we have to build something. */
+
+ ASSERT(bufsize > 0);
+
+ /*
+ * Close your eyes. You don't want to see where we're looking. You
+ * can't get closure info until you've unpacked the variable header,
+ * but you don't know how big it is until you've got closure info.
+ * So...we trust that the closure in the buffer is organized the
+ * same way as they will be in the heap...at least up through the
+ * end of the variable header.
+ */
+ ip = get_closure_info(bufptr, &size, &ptrs, &nonptrs, &vhs, str);
+
+ /*
+ Remember, the generic closure layout is as follows:
+ +-------------------------------------------------+
+ | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS |
+ +-------------------------------------------------+
+ */
+ /* Fill in the fixed header */
+ for (i = 0; i < FIXED_HS; i++)
+ ((StgPtr)graph)[i] = *bufptr++;
+
+ if (ip->type == FETCH_ME)
+ size = ptrs = nonptrs = vhs = 0;
+
+ /* Fill in the packed variable header */
+ for (i = 0; i < vhs; i++)
+ ((StgPtr)graph)[FIXED_HS + i] = *bufptr++;
+
+ /* Pointers will be filled in later */
+
+ /* Fill in the packed non-pointers */
+ for (i = 0; i < nonptrs; i++)
+ ((StgPtr)graph)[FIXED_HS + i + vhs + ptrs] = *bufptr++;
+
+ /* Indirections are never packed */
+ // ASSERT(INFO_PTR(graph) != (W_) Ind_info_TO_USE);
+
+ /* Add to queue for processing */
+ QueueClosure(graph);
+
+ /*
+ * Common up the new closure with any existing closure having the same
+ * GA
+ */
+
+ if ((existing = GALAlookup(&ga)) == NULL) {
+ globalAddr *newGA;
+ /* Just keep the new object */
+ IF_PAR_DEBUG(pack,
+ belch("Unpacking new (%x, %d, %x)\n",
+ ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight));
+
+ closure = graph;
+ newGA = setRemoteGA(graph, &ga, rtsTrue);
+ if (ip->type == FETCH_ME)
+ // FETCHME_GA(closure) = newGA;
+ ((StgFetchMe *)closure)->ga = newGA;
+ } else {
+ /* Two closures, one global name. Someone loses */
+ oldip = get_itbl(existing);
+
+ if ((oldip->type == FETCH_ME || IS_BLACK_HOLE(existing)) &&
+ ip->type != FETCH_ME) {
+
+ /* What we had wasn't worth keeping */
+ closure = graph;
+ CommonUp(existing, graph);
+ } else {
+
+ /*
+ * Either we already had something worthwhile by this name or
+ * the new thing is just another FetchMe. However, the thing we
+ * just unpacked has to be left as-is, or the child unpacking
+ * code will fail. Remember that the way pointer words are
+ * filled in depends on the info pointers of the parents being
+ * the same as when they were packed.
+ */
+ IF_PAR_DEBUG(pack,
+ belch("Unpacking old (%x, %d, %x), keeping %#lx",
+ ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight,
+ existing));
+
+ closure = existing;
+ }
+ /* Pool the total weight in the stored ga */
+ (void) addWeight(&ga);
+ }
+
+ /* Sort out the global address mapping */
+ if ((ip_THUNK(ip) && !ip_UNPOINTED(ip)) ||
+ (ip_MUTABLE(ip) && ip->type != FETCH_ME)) {
+ /* Make up new GAs for single-copy closures */
+ globalAddr *newGA = makeGlobal(closure, rtsTrue);
+
+ ASSERT(closure == graph);
+
+ /* Create an old GA to new GA mapping */
+ *gaga++ = ga;
+ splitWeight(gaga, newGA);
+ ASSERT(gaga->weight == 1L << (BITS_IN(unsigned) - 1));
+ gaga++;
+ }
+ graph += FIXED_HS + (size < MIN_UPD_SIZE ? MIN_UPD_SIZE : size);
+ }
+
+ /*
+ * Set parent pointer to point to chosen closure. If we're at the top of
+ * the graph (our parent is NULL), then we want to arrange to return the
+ * chosen closure to our caller (possibly in place of the allocated graph
+ * root.)
+ */
+ if (parent == NULL)
+ graphroot = closure;
+ else
+ ((StgPtr)parent)[FIXED_HS + pvhs + pptr] = (StgWord) closure;
+
+ /* Save closure pointer for resolving offsets */
+ *slotptr = (StgWord) closure;
+
+ /* Locate next parent pointer */
+ pptr++;
+ while (pptr + 1 > pptrs) {
+ parent = DeQueueClosure();
+
+ if (parent == NULL)
+ break;
+ else {
+ (void) get_closure_info(parent, &size, &pptrs, &nonptrs,
+ &pvhs, str);
+ pptr = 0;
+ }
+ }
+ } while (parent != NULL);
+
+ ASSERT(bufsize == 0 || graph - 1 <= SAVE_Hp);
+
+ *gamap = PendingGABuffer;
+ *nGAs = (gaga - PendingGABuffer) / 2;
+
+ /* ToDo: are we *certain* graphroot has been set??? WDP 95/07 */
+ ASSERT(graphroot!=NULL);
+ return (graphroot);
+}
+#endif /* PAR */
+
+//@node GranSim Code, Index, GUM code, Unpacking closures
+//@subsection GranSim Code
+
+/*
+ For GrAnSim: In general no actual unpacking should be necessary. We just
+ have to walk over the graph and set the bitmasks appropriately. -- HWL */
+
+//@node Unpacking, , GranSim Code, GranSim Code
+//@subsubsection Unpacking
+
+#if defined(GRAN)
+void
+CommonUp(StgClosure *src, StgClosure *dst)
+{
+ barf("CommonUp: should never be entered in a GranSim setup");
+}
+
+/* This code fakes the unpacking of a somewhat virtual buffer */
+StgClosure*
+UnpackGraph(buffer)
+rtsPackBuffer* buffer;
+{
+ nat size, ptrs, nonptrs, vhs,
+ bufptr = 0;
+ StgClosure *closure, *graphroot, *graph;
+ StgInfoTable *ip;
+ StgWord bufsize, unpackedsize,
+ pptr = 0, pptrs = 0, pvhs;
+ StgTSO* tso;
+ char str[240], str1[80];
+ int i;
+
+ bufptr = 0;
+ graphroot = buffer->buffer[0];
+
+ tso = buffer->tso;
+
+ /* Unpack the header */
+ unpackedsize = buffer->unpacked_size;
+ bufsize = buffer->size;
+
+ IF_GRAN_DEBUG(pack,
+ belch("<<< Unpacking <<%d>> (buffer @ %p):\n (root @ %p, PE %d,size=%d), demanded by TSO %d (%p)[PE %d]",
+ buffer->id, buffer, graphroot, where_is(graphroot),
+ bufsize, tso->id, tso,
+ where_is((StgClosure *)tso)));
+
+ do {
+ closure = buffer->buffer[bufptr++]; /* that's all we need for GrAnSim -- HWL */
+
+ /* Actually only ip is needed; rest is useful for TESTING -- HWL */
+ ip = get_closure_info(closure,
+ &size, &ptrs, &nonptrs, &vhs, str);
+
+ IF_GRAN_DEBUG(pack,
+ sprintf(str, "** (%p): Changing bitmask[%s]: 0x%x ",
+ closure, (closure_HNF(closure) ? "NF" : "__"),
+ PROCS(closure)));
+
+ if (ip->type == RBH) {
+ closure->header.gran.procs = PE_NUMBER(CurrentProc); /* Move node */
+
+ IF_GRAN_DEBUG(pack,
+ strcat(str, " (converting RBH) "));
+
+ convertFromRBH(closure); /* In GUM that's done by convertToFetchMe */
+ } else if (IS_BLACK_HOLE(closure)) {
+ closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */
+ } else if ( closure->header.gran.procs & PE_NUMBER(CurrentProc) == 0 ) {
+ if (closure_HNF(closure))
+ closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */
+ else
+ closure->header.gran.procs = PE_NUMBER(CurrentProc); /* Move node */
+ }
+
+ IF_GRAN_DEBUG(pack,
+ sprintf(str1, "0x%x", PROCS(closure)); strcat(str, str1));
+ IF_GRAN_DEBUG(pack, belch(str));
+
+ } while (bufptr<buffer->size) ; /* (parent != NULL); */
+
+ /* In GrAnSim we allocate pack buffers dynamically! -- HWL */
+ free(buffer->buffer);
+ free(buffer);
+
+ IF_GRAN_DEBUG(pack,
+ belch("PrintGraph of %p is:", graphroot); PrintGraph(graphroot,0));
+
+ return (graphroot);
+}
+#endif /* GRAN */
+#endif
+
+//@node Index, , GranSim Code, Unpacking closures
+//@subsection Index
+
+//@index
+//* CommonUp:: @cindex\s-+CommonUp
+//* InitPendingGABuffer:: @cindex\s-+InitPendingGABuffer
+//* PendingGABuffer:: @cindex\s-+PendingGABuffer
+//* UnpackGraph:: @cindex\s-+UnpackGraph
+//@end index
diff --git a/rts/parallel/Dist.c b/rts/parallel/Dist.c
new file mode 100644
index 0000000000..eeec780716
--- /dev/null
+++ b/rts/parallel/Dist.c
@@ -0,0 +1,117 @@
+#include "Dist.h"
+
+#ifdef DIST /* whole file */
+
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "ParallelRts.h"
+#include "Parallel.h" // nPEs,allPEs,mytid
+#include "HLC.h" //for sendReval
+#include "LLC.h" //for pvm stuff
+#include "FetchMe.h" // for BLOCKED_FETCH_info
+#include "Storage.h" // for recordMutable
+
+/* hopefully the result>0 */
+StgWord32 cGetPECount(void)
+{ return nPEs;
+}
+
+/* return taskID, n is 1..count, n=1 is always the mainPE */
+StgPEId cGetPEId(StgWord32 n)
+{ return allPEs[n-1];
+}
+
+/* return the taskID */
+StgPEId cGetMyPEId(void)
+{ return mytid;
+}
+
+/* return the taskID of the owning PE of an MVar/TSO:
+- MVAR/TSOs get converted to REMOTE_REFs when shipped, and
+ there is no mechanism for using these REMOTE_REFs
+ apart from this code.
+*/
+
+StgPEId cGetCertainOwner(StgClosure *mv)
+{ globalAddr *ga;
+ switch(get_itbl(mv)->type)
+ { case TSO:
+ case MVAR:
+ return mytid; // must be local
+ case REMOTE_REF:
+ ga = LAGAlookup(mv);
+ ASSERT(ga);
+ return ga->payload.gc.gtid; // I know its global address
+ }
+ barf("Dist.c:cGetCertainOwner() wrong closure type %s",info_type(mv));
+}
+
+/* for some additional fun, lets look up a certain host... */
+StgPEId cGetHostOwner(StgByteArray h) //okay h is a C string
+{ int nArch,nHost,nTask,i;
+ StgPEId dtid;
+ struct pvmhostinfo *host;
+ struct pvmtaskinfo *task;
+
+ dtid=0;
+ pvm_config(&nHost,&nArch,&host);
+ for(i=0;i<nHost;i++)
+ if(strcmp(host[i].hi_name,h)==0)
+ { dtid=host[i].hi_tid;
+ break;
+ }
+ if(dtid==0) return 0; // no host of that name
+
+ for(i=0;i<nPEs;i++)
+ { pvm_tasks(allPEs[i],&nTask,&task);
+ ASSERT(nTask==1); //cause we lookup a single task
+ if(task[0].ti_host==dtid)
+ return allPEs[i];
+ }
+ return 0; //know host, put no PE on it
+}
+
+void cRevalIO(StgClosure *job,StgPEId p)
+{ nat size;
+ rtsPackBuffer *buffer=NULL;
+
+ ASSERT(get_itbl(job)->type==MVAR);
+ job=((StgMVar*)job)->value; // extract the job from the MVar
+
+ ASSERT(closure_THUNK(job)); // must be a closure!!!!!
+ ASSERT(p!=mytid);
+
+ buffer = PackNearbyGraph(job, END_TSO_QUEUE, &size,p);
+ ASSERT(buffer != (rtsPackBuffer *)NULL);
+ ASSERT(get_itbl(job)->type==RBH);
+
+ IF_PAR_DEBUG(verbose,
+ belch("@;~) %x doing revalIO to %x\n",
+ mytid,p));
+
+ sendReval(p,size,buffer);
+
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_reval_mess++;
+ }
+
+ /*
+ We turn job into a FETCHME_BQ so that the thread will block
+ when it enters it.
+
+ Note: it will not receive an ACK, thus no GA.
+ */
+
+ ASSERT(get_itbl(job)->type==RBH);
+
+ /* put closure on mutables list, while it is still a RBH */
+ recordMutable((StgMutClosure *)job);
+
+ /* actually turn it into a FETCH_ME_BQ */
+ SET_INFO(job, &FETCH_ME_BQ_info);
+ ((StgFetchMe *)job)->ga = 0; //hope this won't make anyone barf!!!
+ ((StgBlockingQueue*)job)->blocking_queue=END_BQ_QUEUE;
+}
+
+#endif
diff --git a/rts/parallel/Dist.h b/rts/parallel/Dist.h
new file mode 100644
index 0000000000..c67cce2748
--- /dev/null
+++ b/rts/parallel/Dist.h
@@ -0,0 +1,20 @@
+#ifndef __DIST_H
+#define __DIST_H
+
+#ifdef DIST
+
+#include "Rts.h"
+
+typedef StgWord32 StgPEId;
+
+// interface functions for Haskell Language calls
+StgWord32 cGetPECount(void);
+StgPEId cGetPEId(StgWord32 n);
+StgPEId cGetMyPEId(void);
+StgPEId cGetCertainOwner(StgClosure *mv);
+void cRevalIO(StgClosure *job,StgPEId p);
+StgPEId cGetHostOwner(StgByteArray h);
+
+#endif /* DIST */
+
+#endif /* __DIST_H */
diff --git a/rts/parallel/FetchMe.h b/rts/parallel/FetchMe.h
new file mode 100644
index 0000000000..be5cbf6b54
--- /dev/null
+++ b/rts/parallel/FetchMe.h
@@ -0,0 +1,24 @@
+/* -----------------------------------------------------------------------------
+ *
+ * Closure types for the parallel system.
+ *
+ * ---------------------------------------------------------------------------*/
+
+EI_(stg_FETCH_ME_info);
+EF_(stg_FETCH_ME_entry);
+
+EI_(stg_FETCH_ME_BQ_info);
+EF_(stg_FETCH_ME_BQ_entry);
+
+EI_(stg_BLOCKED_FETCH_info);
+EF_(stg_BLOCKED_FETCH_entry);
+
+EI_(stg_REMOTE_REF_info);
+EF_(stg_REMOTE_REF_entry);
+
+EI_(stg_RBH_Save_0_info);
+EF_(stg_RBH_Save_0_entry);
+EI_(stg_RBH_Save_1_info);
+EF_(stg_RBH_Save_1_entry);
+EI_(stg_RBH_Save_2_info);
+EF_(stg_RBH_Save_2_entry);
diff --git a/rts/parallel/FetchMe.hc b/rts/parallel/FetchMe.hc
new file mode 100644
index 0000000000..f142e9e514
--- /dev/null
+++ b/rts/parallel/FetchMe.hc
@@ -0,0 +1,180 @@
+/* ----------------------------------------------------------------------------
+ Time-stamp: <Tue Mar 06 2001 17:01:46 Stardate: [-30]6288.54 hwloidl>
+
+ Entry code for a FETCH_ME closure
+
+ This module defines routines for handling remote pointers (@FetchMe@s)
+ in GUM. It is threaded (@.hc@) because @FetchMe_entry@ will be
+ called during evaluation.
+
+ * --------------------------------------------------------------------------*/
+
+#ifdef PAR /* all of it */
+
+//@menu
+//* Includes::
+//* Info tables::
+//* Index::
+//@end menu
+
+//@node Includes, Info tables
+//@subsection Includes
+
+#include "Stg.h"
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "Storage.h"
+#include "GranSim.h"
+#include "GranSimRts.h"
+#include "Parallel.h"
+#include "ParallelRts.h"
+#include "FetchMe.h"
+#include "HLC.h"
+#include "StgRun.h" /* for StgReturn and register saving */
+
+/* --------------------------------------------------------------------------
+ FETCH_ME closures.
+
+ A FETCH_ME closure represents data that currently resides on
+ another PE. We issue a fetch message, and wait for the data to be
+ retrieved.
+
+ A word on the ptr/nonptr fields in the macros: they are unused at the
+ moment; all closures defined here have constant size (ie. no payload
+ that varies from closure to closure). Therefore, all routines that
+ need to know the size of these closures have to do a sizeofW(StgFetchMe)
+ etc to get the closure size. See get_closure_info(), evacuate() and
+ checkClosure() (using the same fcts for determining the size of the
+ closures would be a good idea; at least it would be a nice step towards
+ making this code bug free).
+ ------------------------------------------------------------------------ */
+
+//@node Info tables, Index, Includes
+//@subsection Info tables
+
+//@cindex FETCH_ME_info
+INFO_TABLE(stg_FETCH_ME_info, stg_FETCH_ME_entry, 0,2, FETCH_ME,, EF_,"FETCH_ME","FETCH_ME");
+//@cindex FETCH_ME_entry
+STGFUN(stg_FETCH_ME_entry)
+{
+ FB_
+ TICK_ENT_BH();
+
+ ASSERT(((StgFetchMe *)R1.p)->ga->payload.gc.gtid != mytid);
+
+ /* Turn the FETCH_ME into a FETCH_ME_BQ, and place the current thread
+ * on the blocking queue.
+ */
+ // ((StgFetchMeBlockingQueue *)R1.cl)->header.info = &FETCH_ME_BQ_info; // does the same as SET_INFO
+ SET_INFO((StgClosure *)R1.cl, &stg_FETCH_ME_BQ_info);
+
+ /* Remember GA as a global var (used in blockThread); NB: not thread safe! */
+ ASSERT(theGlobalFromGA.payload.gc.gtid == (GlobalTaskId)0);
+ theGlobalFromGA = *((StgFetchMe *)R1.p)->ga;
+
+ /* Put ourselves on the blocking queue for this black hole */
+ ASSERT(looks_like_ga(((StgFetchMe *)R1.p)->ga));
+ CurrentTSO->link = END_BQ_QUEUE;
+ ((StgFetchMeBlockingQueue *)R1.cl)->blocking_queue = (StgBlockingQueueElement *)CurrentTSO;
+
+ /* jot down why and on what closure we are blocked */
+ CurrentTSO->why_blocked = BlockedOnGA;
+ CurrentTSO->block_info.closure = R1.cl;
+ /* closure is mutable since something has just been added to its BQ */
+ //recordMutable((StgMutClosure *)R1.cl);
+
+ /* sendFetch etc is now done in blockThread, which is called from the
+ scheduler -- HWL */
+
+ BLOCK_NP(1);
+ FE_
+}
+
+/* ---------------------------------------------------------------------------
+ FETCH_ME_BQ
+
+ On the first entry of a FETCH_ME closure, we turn the closure into
+ a FETCH_ME_BQ, which behaves just like a BLACKHOLE_BQ. Any thread
+ entering the FETCH_ME_BQ will be placed in the blocking queue.
+ When the data arrives from the remote PE, all waiting threads are
+ woken up and the FETCH_ME_BQ is overwritten with the fetched data.
+
+ FETCH_ME_BQ_entry is almost identical to BLACKHOLE_BQ_entry -- HWL
+ ------------------------------------------------------------------------ */
+
+INFO_TABLE(stg_FETCH_ME_BQ_info, stg_FETCH_ME_BQ_entry,0,2,FETCH_ME_BQ,,EF_,"FETCH_ME_BQ","FETCH_ME_BQ");
+//@cindex FETCH_ME_BQ_info
+STGFUN(stg_FETCH_ME_BQ_entry)
+{
+ FB_
+ TICK_ENT_BH();
+
+ /* Put ourselves on the blocking queue for this node */
+ CurrentTSO->link = (StgTSO*)((StgBlockingQueue *)R1.p)->blocking_queue;
+ ((StgBlockingQueue *)R1.p)->blocking_queue = (StgBlockingQueueElement *)CurrentTSO;
+
+ /* jot down why and on what closure we are blocked */
+ CurrentTSO->why_blocked = BlockedOnGA_NoSend;
+ CurrentTSO->block_info.closure = R1.cl;
+
+ /* stg_gen_block is too heavyweight, use a specialised one */
+ BLOCK_NP(1);
+ FE_
+}
+
+/* ---------------------------------------------------------------------------
+ BLOCKED_FETCH_BQ
+
+ A BLOCKED_FETCH closure only ever exists in the blocking queue of a
+ globally visible closure i.e. one with a GA. A BLOCKED_FETCH closure
+ indicates that a TSO on another PE is waiting for the result of this
+ computation. Thus, when updating the closure, the result has to be sent
+ to that PE. The relevant routines handling that are awakenBlockedQueue
+ and blockFetch (for putting BLOCKED_FETCH closure into a BQ).
+ ------------------------------------------------------------------------ */
+
+//@cindex BLOCKED_FETCH_info
+INFO_TABLE(stg_BLOCKED_FETCH_info, stg_BLOCKED_FETCH_entry,0,2,BLOCKED_FETCH,,EF_,"BLOCKED_FETCH","BLOCKED_FETCH");
+//@cindex BLOCKED_FETCH_entry
+STGFUN(stg_BLOCKED_FETCH_entry)
+{
+ FB_
+ /* see NON_ENTERABLE_ENTRY_CODE in StgMiscClosures.hc */
+ STGCALL2(fprintf,stderr,"BLOCKED_FETCH object entered!\n");
+ STGCALL1(shutdownHaskellAndExit, EXIT_FAILURE);
+ FE_
+}
+
+
+/* ---------------------------------------------------------------------------
+ REMOTE_REF
+
+ A REMOTE_REF closure is generated whenever we wish to refer to a sticky
+ object on another PE.
+ ------------------------------------------------------------------------ */
+
+//@cindex REMOTE_REF_info
+INFO_TABLE(stg_REMOTE_REF_info, stg_REMOTE_REF_entry,0,2,REMOTE_REF,,EF_,"REMOTE_REF","REMOTE_REF");
+//@cindex REMOTE_REF_entry
+STGFUN(stg_REMOTE_REF_entry)
+{
+ FB_
+ /* see NON_ENTERABLE_ENTRY_CODE in StgMiscClosures.hc */
+ STGCALL2(fprintf,stderr,"REMOTE REF object entered!\n");
+ STGCALL1(shutdownHaskellAndExit, EXIT_FAILURE);
+ FE_
+}
+
+#endif /* PAR */
+
+//@node Index, , Info tables
+//@subsection Index
+
+//@index
+//* BLOCKED_FETCH_entry:: @cindex\s-+BLOCKED_FETCH_entry
+//* BLOCKED_FETCH_info:: @cindex\s-+BLOCKED_FETCH_info
+//* FETCH_ME_BQ_info:: @cindex\s-+FETCH_ME_BQ_info
+//* FETCH_ME_entry:: @cindex\s-+FETCH_ME_entry
+//* FETCH_ME_info:: @cindex\s-+FETCH_ME_info
+//@end index
diff --git a/rts/parallel/Global.c b/rts/parallel/Global.c
new file mode 100644
index 0000000000..b2541357e1
--- /dev/null
+++ b/rts/parallel/Global.c
@@ -0,0 +1,1090 @@
+/* ---------------------------------------------------------------------------
+ Time-stamp: <Wed Mar 21 2001 16:32:23 Stardate: [-30]6363.44 hwloidl>
+
+ (c) The AQUA/Parade Projects, Glasgow University, 1995
+ The GdH/APART 624 Projects, Heriot-Watt University, Edinburgh, 1999
+
+ Global Address Manipulation.
+
+ The GALA and LAGA tables for mapping global addresses to local addresses
+ (i.e. heap pointers) are defined here. We use the generic hash tables
+ defined in Hash.c.
+ ------------------------------------------------------------------------- */
+
+#ifdef PAR /* whole file */
+
+//@menu
+//* Includes::
+//* Global tables and lists::
+//* Fcts on GALA tables::
+//* Interface to taskId-PE table::
+//* Interface to LAGA table::
+//* Interface to GALA table::
+//* GC functions for GALA tables::
+//* Index::
+//@end menu
+//*/
+
+//@node Includes, Global tables and lists, Global Address Manipulation, Global Address Manipulation
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "Storage.h"
+#include "Hash.h"
+#include "HLC.h"
+#include "ParallelRts.h"
+#if defined(DEBUG)
+# include "Sanity.h"
+#include "ParallelDebug.h"
+#endif
+#if defined(DIST)
+# include "Dist.h"
+#endif
+
+/*
+ @globalAddr@ structures are allocated in chunks to reduce malloc overhead.
+*/
+
+//@node Global tables and lists, Fcts on GALA tables, Includes, Global Address Manipulation
+//@subsection Global tables and lists
+
+//@cindex thisPE
+nat thisPE;
+
+//@menu
+//* Free lists::
+//* Hash tables::
+//@end menu
+
+//@node Free lists, Hash tables, Global tables and lists, Global tables and lists
+//@subsubsection Free lists
+
+/* Free list of GALA entries */
+GALA *freeGALAList = NULL;
+
+/* Number of globalAddr cells to allocate in one go */
+#define GCHUNK (1024 * sizeof(StgWord) / sizeof(GALA))
+
+/* Free list of indirections */
+
+//@cindex nextIndirection
+static StgInt nextIndirection = 0;
+//@cindex freeIndirections
+GALA *freeIndirections = NULL;
+
+/* The list of live indirections has to be marked for GC (see makeGlobal) */
+//@cindex liveIndirections
+GALA *liveIndirections = NULL;
+
+/* The list of remote indirections has to be marked for GC (see setRemoteGA) */
+//@cindex liveRemoteGAs
+GALA *liveRemoteGAs = NULL;
+
+//@node Hash tables, , Free lists, Global tables and lists
+//@subsubsection Hash tables
+
+/* Mapping global task ids PEs */
+//@cindex taskIDtoPEtable
+HashTable *taskIDtoPEtable = NULL;
+
+static int nextPE = 0;
+
+/* LAGA table: StgClosure* -> globalAddr*
+ (Remember: globalAddr = (GlobalTaskId, Slot, Weight))
+ Mapping local to global addresses (see interface below)
+*/
+
+//@cindex LAtoGALAtable
+HashTable *LAtoGALAtable = NULL;
+
+/* GALA table: globalAddr* -> StgClosure*
+ (Remember: globalAddr = (GlobalTaskId, Slot, Weight))
+ Mapping global to local addresses (see interface below)
+*/
+
+//@cindex pGAtoGALAtable
+HashTable *pGAtoGALAtable = NULL;
+
+//@node Fcts on GALA tables, Interface to taskId-PE table, Global tables and lists, Global Address Manipulation
+//@subsection Fcts on GALA tables
+
+//@cindex allocGALA
+static GALA *
+allocGALA(void)
+{
+ GALA *gl, *p;
+
+ if ((gl = freeGALAList) != NULL) {
+ IF_DEBUG(sanity,
+ ASSERT(gl->ga.weight==0xdead0add);
+ ASSERT(gl->la==(StgPtr)0xdead00aa));
+ freeGALAList = gl->next;
+ } else {
+ gl = (GALA *) stgMallocBytes(GCHUNK * sizeof(GALA), "allocGALA");
+
+ freeGALAList = gl + 1;
+ for (p = freeGALAList; p < gl + GCHUNK - 1; p++) {
+ p->next = p + 1;
+ IF_DEBUG(sanity,
+ p->ga.weight=0xdead0add;
+ p->la=(StgPtr)0xdead00aa);
+ }
+ /* last elem in the new block has NULL pointer in link field */
+ p->next = NULL;
+ IF_DEBUG(sanity,
+ p->ga.weight=0xdead0add;
+ p->la=(StgPtr)0xdead00aa);
+ }
+ IF_DEBUG(sanity,
+ gl->ga.weight=0xdead0add;
+ gl->la=(StgPtr)0xdead00aa);
+ return gl;
+}
+
+//@node Interface to taskId-PE table, Interface to LAGA table, Fcts on GALA tables, Global Address Manipulation
+//@subsection Interface to taskId-PE table
+
+/*
+ We don't really like GLOBAL_TASK_ID, so we keep a table of TASK_ID to
+ PE mappings. The idea is that a PE identifier will fit in 16 bits, whereas
+ a TASK_ID may not.
+*/
+
+//@cindex taskIDtoPE
+PEs
+taskIDtoPE(GlobalTaskId gtid)
+{
+ return ((PEs) lookupHashTable(taskIDtoPEtable, gtid));
+}
+
+//@cindex registerTask
+void
+registerTask(GlobalTaskId gtid) {
+ nextPE++; //start counting from 1
+ if (gtid == mytid)
+ thisPE = nextPE;
+
+ insertHashTable(taskIDtoPEtable, gtid, (void *) (StgWord) nextPE);
+}
+
+//@node Interface to LAGA table, Interface to GALA table, Interface to taskId-PE table, Global Address Manipulation
+//@subsection Interface to LAGA table
+
+/*
+ The local address to global address mapping returns a globalAddr structure
+ (pe task id, slot, weight) for any closure in the local heap which has a
+ global identity. Such closures may be copies of normal form objects with
+ a remote `master' location, @FetchMe@ nodes referencing remote objects, or
+ globally visible objects in the local heap (for which we are the master).
+*/
+
+//@cindex LAGAlookup
+globalAddr *
+LAGAlookup(addr)
+StgClosure *addr;
+{
+ GALA *gala;
+
+ /* We never look for GA's on indirections. -- unknown hacker
+ Well, in fact at the moment we do in the new RTS. -- HWL
+ ToDo: unwind INDs when entering them into the hash table
+
+ ASSERT(IS_INDIRECTION(addr) == NULL);
+ */
+ if ((gala = lookupHashTable(LAtoGALAtable, (StgWord) addr)) == NULL)
+ return NULL;
+ else
+ return &(gala->ga);
+}
+
+//@node Interface to GALA table, GC functions for GALA tables, Interface to LAGA table, Global Address Manipulation
+//@subsection Interface to GALA table
+
+/*
+ We also manage a mapping of global addresses to local addresses, so that
+ we can ``common up'' multiple references to the same object as they arrive
+ in data packets from remote PEs.
+
+ The global address to local address mapping is actually managed via a
+ ``packed global address'' to GALA hash table. The packed global
+ address takes the interesting part of the @globalAddr@ structure
+ (i.e. the pe and slot fields) and packs them into a single word
+ suitable for hashing.
+*/
+
+//@cindex GALAlookup
+StgClosure *
+GALAlookup(ga)
+globalAddr *ga;
+{
+ StgWord pga = PackGA(taskIDtoPE(ga->payload.gc.gtid), ga->payload.gc.slot);
+ GALA *gala;
+
+ if ((gala = (GALA *) lookupHashTable(pGAtoGALAtable, pga)) == NULL)
+ return NULL;
+ else {
+ /*
+ * Bypass any indirections when returning a local closure to
+ * the caller. Note that we do not short-circuit the entry in
+ * the GALA tables right now, because we would have to do a
+ * hash table delete and insert in the LAtoGALAtable to keep
+ * that table up-to-date for preferred GALA pairs. That's
+ * probably a bit expensive.
+ */
+ return UNWIND_IND((StgClosure *)(gala->la));
+ }
+}
+
+/* ga becomes non-preferred (e.g. due to CommonUp) */
+void
+GALAdeprecate(ga)
+globalAddr *ga;
+{
+ StgWord pga = PackGA(taskIDtoPE(ga->payload.gc.gtid), ga->payload.gc.slot);
+ GALA *gala;
+
+ gala = (GALA *) lookupHashTable(pGAtoGALAtable, pga);
+ ASSERT(gala!=NULL);
+ ASSERT(gala->preferred==rtsTrue);
+ gala->preferred = rtsFalse;
+}
+
+/*
+ External references to our globally-visible closures are managed through an
+ indirection table. The idea is that the closure may move about as the result
+ of local garbage collections, but its global identity is determined by its
+ slot in the indirection table, which never changes.
+
+ The indirection table is maintained implicitly as part of the global
+ address to local address table. We need only keep track of the
+ highest numbered indirection index allocated so far, along with a free
+ list of lower numbered indices no longer in use.
+*/
+
+/*
+ Allocate an indirection slot for the closure currently at address @addr@.
+*/
+
+//@cindex allocIndirection
+static GALA *
+allocIndirection(StgClosure *closure)
+{
+ GALA *gala;
+
+ if ((gala = freeIndirections) != NULL) {
+ IF_DEBUG(sanity,
+ ASSERT(gala->ga.weight==0xdead0add);
+ ASSERT(gala->la==(StgPtr)0xdead00aa));
+ freeIndirections = gala->next;
+ } else {
+ gala = allocGALA();
+ IF_DEBUG(sanity,
+ ASSERT(gala->ga.weight==0xdead0add);
+ ASSERT(gala->la==(StgPtr)0xdead00aa));
+ gala->ga.payload.gc.gtid = mytid;
+ gala->ga.payload.gc.slot = nextIndirection++;
+ IF_DEBUG(sanity,
+ if (nextIndirection>=MAX_SLOTS)
+ barf("Cannot handle more than %d slots for GA in a sanity-checking setup (this is no error)"));
+ }
+ gala->ga.weight = MAX_GA_WEIGHT;
+ gala->la = (StgPtr)closure;
+ IF_DEBUG(sanity,
+ gala->next=(struct gala *)0xcccccccc);
+ return gala;
+}
+
+/*
+ This is only used for sanity checking (see LOOKS_LIKE_SLOT)
+*/
+StgInt
+highest_slot (void) { return nextIndirection; }
+
+/*
+ Make a local closure globally visible.
+
+ Called from: GlobaliseAndPackGA
+ Args:
+ closure ... closure to be made visible
+ preferred ... should the new GA become the preferred one (normalle=y true)
+
+ Allocate a GALA structure and add it to the (logical) Indirections table,
+ by inserting it into the LAtoGALAtable hash table and putting it onto the
+ liveIndirections list (only if it is preferred).
+
+ We have to allocate an indirection slot for it, and update both the local
+ address to global address and global address to local address maps.
+*/
+
+//@cindex makeGlobal
+globalAddr *
+makeGlobal(closure, preferred)
+StgClosure *closure;
+rtsBool preferred;
+{
+ /* check whether we already have a GA for this local closure */
+ GALA *oldGALA = lookupHashTable(LAtoGALAtable, (StgWord) closure);
+ /* create an entry in the LAGA table */
+ GALA *newGALA = allocIndirection(closure);
+ StgWord pga = PackGA(thisPE, newGALA->ga.payload.gc.slot);
+
+ IF_DEBUG(sanity,
+ ASSERT(newGALA->next==(struct gala *)0xcccccccc););
+ // ASSERT(HEAP_ALLOCED(closure)); // check that closure might point into the heap; might be static, though
+ ASSERT(GALAlookup(&(newGALA->ga)) == NULL);
+
+ /* global statistics gathering */
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.local_alloc_GA++;
+ }
+
+ newGALA->la = (StgPtr)closure;
+ newGALA->preferred = preferred;
+
+ if (preferred) {
+ /* The new GA is now the preferred GA for the LA */
+ if (oldGALA != NULL) {
+ oldGALA->preferred = rtsFalse;
+ (void) removeHashTable(LAtoGALAtable, (StgWord) closure, (void *) oldGALA);
+ }
+ insertHashTable(LAtoGALAtable, (StgWord) closure, (void *) newGALA);
+ }
+
+ ASSERT(!isOnLiveIndTable(&(newGALA->ga)));
+ /* put the new GALA entry on the list of live indirections */
+ newGALA->next = liveIndirections;
+ liveIndirections = newGALA;
+
+ insertHashTable(pGAtoGALAtable, pga, (void *) newGALA);
+
+ return &(newGALA->ga);
+}
+
+/*
+ Assign an existing remote global address to an existing closure.
+
+ Called from: Unpack in Pack.c
+ Args:
+ local_closure ... a closure that has just been unpacked
+ remote_ga ... the GA that came with it, ie. the name under which the
+ closure is known while being transferred
+ preferred ... should the new GA become the preferred one (normalle=y true)
+
+ Allocate a GALA structure and add it to the (logical) RemoteGA table,
+ by inserting it into the LAtoGALAtable hash table and putting it onto the
+ liveRemoteGAs list (only if it is preferred).
+
+ We do not retain the @globalAddr@ structure that's passed in as an argument,
+ so it can be a static in the calling routine.
+*/
+
+//@cindex setRemoteGA
+globalAddr *
+setRemoteGA(local_closure, remote_ga, preferred)
+StgClosure *local_closure;
+globalAddr *remote_ga;
+rtsBool preferred;
+{
+ /* old entry ie the one with the GA generated when sending off the closure */
+ GALA *oldGALA = lookupHashTable(LAtoGALAtable, (StgWord) local_closure);
+ /* alloc new entry and fill it with contents of the newly arrives GA */
+ GALA *newGALA = allocGALA();
+ StgWord pga = PackGA(taskIDtoPE(remote_ga->payload.gc.gtid),
+ remote_ga->payload.gc.slot);
+
+ ASSERT(remote_ga->payload.gc.gtid != mytid);
+ ASSERT(remote_ga->weight > 0);
+ ASSERT(GALAlookup(remote_ga) == NULL);
+
+ newGALA->ga = *remote_ga;
+ newGALA->la = (StgPtr)local_closure;
+ newGALA->preferred = preferred;
+
+ if (preferred) {
+ /* The new GA is now the preferred GA for the LA */
+ if (oldGALA != NULL) {
+ oldGALA->preferred = rtsFalse;
+ (void) removeHashTable(LAtoGALAtable, (StgWord) local_closure, (void *) oldGALA);
+ }
+ insertHashTable(LAtoGALAtable, (StgWord) local_closure, (void *) newGALA);
+ }
+
+ ASSERT(!isOnRemoteGATable(&(newGALA->ga)));
+ /* add new entry to the (logical) RemoteGA table */
+ newGALA->next = liveRemoteGAs;
+ liveRemoteGAs = newGALA;
+
+ insertHashTable(pGAtoGALAtable, pga, (void *) newGALA);
+
+ /*
+ The weight carried by the incoming closure is transferred to the newGALA
+ entry (via the structure assign above). Therefore, we have to give back
+ the weight to the GA on the other processor, because that indirection is
+ no longer needed.
+ */
+ remote_ga->weight = 0;
+ return &(newGALA->ga);
+}
+
+/*
+ Give me a bit of weight to give away on a new reference to a particular
+ global address. If we run down to nothing, we have to assign a new GA.
+*/
+
+//@cindex splitWeight
+#if 0
+void
+splitWeight(to, from)
+globalAddr *to, *from;
+{
+ /* Make sure we have enough weight to split */
+ if (from->weight!=MAX_GA_WEIGHT && from->weight<=3) // fixed by UK in Eden implementation
+ from = makeGlobal(GALAlookup(from), rtsTrue);
+
+ to->payload = from->payload;
+
+ if (from->weight == MAX_GA_WEIGHT)
+ to->weight = 1L << (BITS_IN(unsigned) - 1);
+ else
+ to->weight = from->weight / 2;
+
+ from->weight -= to->weight;
+}
+#else
+void
+splitWeight(to, from)
+globalAddr *to, *from;
+{
+ /* Make sure we have enough weight to split */
+ /* Splitting at 2 needed, as weight 1 is not legal in packets (UK+KH) */
+
+ if (from->weight / 2 <= 2) /* old: weight== 1 (UK) */
+ from = makeGlobal(GALAlookup(from), rtsTrue);
+
+ to->payload = from->payload;
+
+ if (from->weight <= 1) /* old == 0 (UK) */
+ to->weight = 1L << (BITS_IN(unsigned) - 1);
+ else
+ to->weight = from->weight / 2;
+
+ from->weight -= to->weight;
+}
+#endif
+/*
+ Here, I am returning a bit of weight that a remote PE no longer needs.
+*/
+
+//@cindex addWeight
+globalAddr *
+addWeight(ga)
+globalAddr *ga;
+{
+ StgWord pga;
+ GALA *gala;
+
+ ASSERT(LOOKS_LIKE_GA(ga));
+
+ pga = PackGA(taskIDtoPE(ga->payload.gc.gtid), ga->payload.gc.slot);
+ gala = (GALA *) lookupHashTable(pGAtoGALAtable, pga);
+
+ IF_PAR_DEBUG(weight,
+ fprintf(stderr, "@* Adding weight %x to ", ga->weight);
+ printGA(&(gala->ga));
+ fputc('\n', stderr));
+
+ gala->ga.weight += ga->weight;
+ ga->weight = 0;
+
+ return &(gala->ga);
+}
+
+/*
+ Initialize all of the global address structures: the task ID to PE id
+ map, the local address to global address map, the global address to
+ local address map, and the indirection table.
+*/
+
+//@cindex initGAtables
+void
+initGAtables(void)
+{
+ taskIDtoPEtable = allocHashTable();
+ LAtoGALAtable = allocHashTable();
+ pGAtoGALAtable = allocHashTable();
+}
+
+//@cindex PackGA
+StgWord
+PackGA (pe, slot)
+StgWord pe;
+int slot;
+{
+ int pe_shift = (BITS_IN(StgWord)*3)/4;
+ int pe_bits = BITS_IN(StgWord) - pe_shift;
+
+ if ( pe_bits < 8 || slot >= (1L << pe_shift) ) { /* big trouble */
+ fflush(stdout);
+ fprintf(stderr, "PackGA: slot# too big (%d) or not enough pe_bits (%d)\n",
+ slot,pe_bits);
+ stg_exit(EXIT_FAILURE);
+ }
+
+ return((((StgWord)(pe)) << pe_shift) | ((StgWord)(slot)));
+
+ /* the idea is to use 3/4 of the bits (e.g., 24) for indirection-
+ table "slot", and 1/4 for the pe# (e.g., 8).
+
+ We check for too many bits in "slot", and double-check (at
+ compile-time?) that we have enough bits for "pe". We *don't*
+ check for too many bits in "pe", because SysMan enforces a
+ MAX_PEs limit at the very very beginning.
+
+ Phil & Will 95/08
+ */
+}
+
+//@node GC functions for GALA tables, Debugging routines, Interface to GALA table, Global Address Manipulation
+//@subsection GC functions for GALA tables
+
+/*
+ When we do a copying collection, we want to evacuate all of the local
+ entries in the GALA table for which there are outstanding remote
+ pointers (i.e. for which the weight is not MAX_GA_WEIGHT.)
+ This routine has to be run BEFORE doing the GC proper (it's a
+ ``mark roots'' thing).
+*/
+//@cindex markLocalGAs
+void
+markLocalGAs(rtsBool full)
+{
+ GALA *gala, *next, *prev = NULL;
+ StgPtr old_la, new_la;
+ nat n=0, m=0; // debugging only
+ double start_time_GA; // stats only
+
+ IF_PAR_DEBUG(tables,
+ belch("@@%%%% markLocalGAs (full=%d): Marking LIVE INDIRECTIONS in GALA table starting with GALA at %p\n",
+ full, liveIndirections);
+ printLAGAtable());
+
+ PAR_TICKY_MARK_LOCAL_GAS_START();
+
+ for (gala = liveIndirections, m=0; gala != NULL; gala = next, m++) {
+ IF_PAR_DEBUG(tables,
+ fputs("@@ ",stderr);
+ printGA(&(gala->ga));
+ fprintf(stderr, ";@ %d: LA: %p (%s) ",
+ m, (void*)gala->la, info_type((StgClosure*)gala->la)));
+ next = gala->next;
+ old_la = gala->la;
+ ASSERT(gala->ga.payload.gc.gtid == mytid); /* it's supposed to be local */
+ if (gala->ga.weight != MAX_GA_WEIGHT) {
+ /* Remote references exist, so we must evacuate the local closure */
+ if (get_itbl((StgClosure *)old_la)->type == EVACUATED) {
+ /* somebody else already evacuated this closure */
+ new_la = (StgPtr)((StgEvacuated *)old_la)->evacuee;
+ IF_PAR_DEBUG(tables,
+ belch(" already evacuated to %p", new_la));
+ } else {
+#if 1
+ /* unwind any indirections we find */
+ StgClosure *foo = UNWIND_IND((StgClosure *)old_la) ; // debugging only
+ //ASSERT(HEAP_ALLOCED(foo));
+ n++;
+
+ new_la = (StgPtr) MarkRoot(foo);
+ IF_PAR_DEBUG(tables,
+ belch(" evacuated %p to %p", foo, new_la));
+ /* ToDo: is this the right assertion to check that new_la is in to-space?
+ ASSERT(!HEAP_ALLOCED(new_la) || Bdescr(new_la)->evacuated);
+ */
+#else
+ new_la = MarkRoot(old_la); // or just evacuate(old_ga)
+ IF_PAR_DEBUG(tables,
+ belch(" evacuated %p to %p", old_la, new_la));
+#endif
+ }
+
+ gala->la = new_la;
+ /* remove old LA and replace with new LA */
+ if (/* !full && */ gala->preferred && new_la != old_la) {
+ GALA *q;
+ ASSERT(lookupHashTable(LAtoGALAtable, (StgWord)old_la));
+ (void) removeHashTable(LAtoGALAtable, (StgWord) old_la, (void *) gala);
+ if ((q = lookupHashTable(LAtoGALAtable, (StgWord) new_la))!=NULL) {
+ if (q->preferred && gala->preferred) {
+ q->preferred = rtsFalse;
+ IF_PAR_DEBUG(tables,
+ fprintf(stderr, "@@## found hash entry for closure %p (%s): deprecated GA ",
+ new_la, info_type((StgClosure*)new_la));
+ printGA(&(q->ga));
+ fputc('\n', stderr));
+ }
+ } else {
+ insertHashTable(LAtoGALAtable, (StgWord) new_la, (void *) gala);
+ }
+ IF_PAR_DEBUG(tables,
+ belch("__## Hash table update (%p --> %p): ",
+ old_la, new_la));
+ }
+
+ gala->next = prev;
+ prev = gala;
+ } else if(LOOKS_LIKE_STATIC_CLOSURE(gala->la)) {
+ /* to handle the CAFs, is this all?*/
+ MarkRoot(gala->la);
+ IF_PAR_DEBUG(tables,
+ belch(" processed static closure"));
+ n++;
+ gala->next = prev;
+ prev = gala;
+ } else {
+ /* Since we have all of the weight, this GA is no longer needed */
+ StgWord pga = PackGA(thisPE, gala->ga.payload.gc.slot);
+
+ IF_PAR_DEBUG(free,
+ belch("@@!! Freeing slot %d",
+ gala->ga.payload.gc.slot));
+ /* put gala on free indirections list */
+ gala->next = freeIndirections;
+ freeIndirections = gala;
+ (void) removeHashTable(pGAtoGALAtable, pga, (void *) gala);
+ if (/* !full && */ gala->preferred)
+ (void) removeHashTable(LAtoGALAtable, (W_) gala->la, (void *) gala);
+
+ IF_DEBUG(sanity,
+ gala->ga.weight = 0xdead0add;
+ gala->la = (StgPtr) 0xdead00aa);
+ }
+ } /* for gala ... */
+ liveIndirections = prev; /* list has been reversed during the marking */
+
+
+ PAR_TICKY_MARK_LOCAL_GAS_END(n);
+
+ IF_PAR_DEBUG(tables,
+ belch("@@%%%% markLocalGAs: %d of %d GALAs marked on PE %x",
+ n, m, mytid));
+}
+
+/*
+ Traverse the GALA table: for every live remote GA check whether it has been
+ touched during GC; if not it is not needed locally and we can free the
+ closure (i.e. let go of its heap space and send a free message to the
+ PE holding its GA).
+ This routine has to be run AFTER doing the GC proper.
+*/
+void
+rebuildGAtables(rtsBool full)
+{
+ GALA *gala, *next, *prev;
+ StgClosure *closure;
+ nat n = 0, size_GA = 0; // stats only (no. of GAs, and their heap size in bytes)
+
+ IF_PAR_DEBUG(tables,
+ belch("@@%%%% rebuildGAtables (full=%d): rebuilding LIVE REMOTE GAs in GALA table starting with GALA at %p\n",
+ full, liveRemoteGAs));
+
+ PAR_TICKY_REBUILD_GA_TABLES_START();
+
+ prepareFreeMsgBuffers();
+
+ for (gala = liveRemoteGAs, prev = NULL; gala != NULL; gala = next) {
+ IF_PAR_DEBUG(tables,
+ printGA(&(gala->ga)));
+ next = gala->next;
+ ASSERT(gala->ga.payload.gc.gtid != mytid); /* it's supposed to be remote */
+
+ closure = (StgClosure *) (gala->la);
+ IF_PAR_DEBUG(tables,
+ fprintf(stderr, " %p (%s) ",
+ (StgClosure *)closure, info_type(closure)));
+
+ if (/* !full && */ gala->preferred)
+ (void) removeHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala);
+
+ /* Follow indirection chains to the end, just in case */
+ // should conform with unwinding in markLocalGAs
+ closure = UNWIND_IND(closure);
+
+ /*
+ If closure has been evacuated it is live; otherwise it's dead and we
+ can nuke the GA attached to it in the LAGA table.
+ This approach also drops global aliases for PLCs.
+ */
+
+ //ASSERT(!HEAP_ALLOCED(closure) || !(Bdescr((StgPtr)closure)->evacuated));
+ if (get_itbl(closure)->type == EVACUATED) {
+ closure = ((StgEvacuated *)closure)->evacuee;
+ IF_PAR_DEBUG(tables,
+ fprintf(stderr, " EVAC %p (%s)\n",
+ closure, info_type(closure)));
+ } else {
+ /* closure is not alive any more, thus remove GA and send free msg */
+ int pe = taskIDtoPE(gala->ga.payload.gc.gtid);
+ StgWord pga = PackGA(pe, gala->ga.payload.gc.slot);
+
+ /* check that the block containing this closure is not in to-space */
+ IF_PAR_DEBUG(tables,
+ fprintf(stderr, " !EVAC %p (%s); sending free to PE %d\n",
+ closure, info_type(closure), pe));
+
+ (void) removeHashTable(pGAtoGALAtable, pga, (void *) gala);
+ freeRemoteGA(pe-1, &(gala->ga)); //-1 cause ids start at 1... not 0
+ gala->next = freeGALAList;
+ freeGALAList = gala;
+ IF_DEBUG(sanity,
+ gala->ga.weight = 0xdead0add;
+ gala->la = (StgPtr)0xdead00aa);
+ continue;
+ }
+ gala->la = (StgPtr)closure;
+ if (/* !full && */ gala->preferred) {
+ GALA *q;
+ if ((q = lookupHashTable(LAtoGALAtable, (StgWord) gala->la))!=NULL) {
+ if (q->preferred && gala->preferred) {
+ q->preferred = rtsFalse;
+ IF_PAR_DEBUG(tables,
+ fprintf(stderr, "@@## found hash entry for closure %p (%s): deprecated GA ",
+ gala->la, info_type((StgClosure*)gala->la));
+ printGA(&(q->ga));
+ fputc('\n', stderr));
+ }
+ } else {
+ insertHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala);
+ }
+ }
+ gala->next = prev;
+ prev = gala;
+ /* Global statistics: count GAs and total size
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs, i;
+ char str[80];
+
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+
+ size_GA += size ;
+ n++; // stats: count number of GAs we add to the new table
+ }
+ */
+ }
+ liveRemoteGAs = prev; /* list is reversed during marking */
+
+ /* If we have any remaining FREE messages to send off, do so now */
+ sendFreeMessages();
+
+ PAR_TICKY_CNT_FREE_GA();
+
+ IF_DEBUG(sanity,
+ checkFreeGALAList();
+ checkFreeIndirectionsList());
+
+ rebuildLAGAtable();
+
+#if defined(PAR_TICKY)
+ getLAGAtableSize(&n, &size_GA); // determine no of GAs and global heap
+ PAR_TICKY_REBUILD_GA_TABLES_END(n, size_GA); // record these values
+#endif
+
+ IF_PAR_DEBUG(tables,
+ belch("@#%%%% rebuildGAtables: After ReBuilding GALA table starting with GALA at %p",
+ liveRemoteGAs);
+ printLAGAtable());
+}
+
+/*
+ Rebuild the LA->GA table, assuming that the addresses in the GALAs are
+ correct.
+ A word on the lookupHashTable check in both loops:
+ After GC we may end up with 2 preferred GAs for the same LA! For example,
+ if we received a closure whose GA already exists on this PE we CommonUp
+ both closures, making one an indirection to the other. Before GC everything
+ is fine: one preferred GA refers to the IND, the other preferred GA refers
+ to the closure it points to. After GC, however, we have short cutted the
+ IND and suddenly we have 2 preferred GAs for the same closure. We detect
+ this case in the loop below and deprecate one GA, so that we always just
+ have one preferred GA per LA.
+*/
+
+//@cindex rebuildLAGAtable
+void
+rebuildLAGAtable(void)
+{
+ GALA *gala;
+ nat n=0, m=0; // debugging
+
+ /* The old LA->GA table is worthless */
+ freeHashTable(LAtoGALAtable, NULL);
+ LAtoGALAtable = allocHashTable();
+
+ IF_PAR_DEBUG(tables,
+ belch("@@%%%% rebuildLAGAtable: new LAGA table at %p",
+ LAtoGALAtable));
+
+ for (gala = liveIndirections; gala != NULL; gala = gala->next) {
+ n++;
+ if (gala->preferred) {
+ GALA *q;
+ if ((q = lookupHashTable(LAtoGALAtable, (StgWord) gala->la))!=NULL) {
+ if (q->preferred && gala->preferred) {
+ /* this deprecates q (see also GALAdeprecate) */
+ q->preferred = rtsFalse;
+ (void) removeHashTable(LAtoGALAtable, (StgWord) gala->la, (void *)q);
+ IF_PAR_DEBUG(tables,
+ fprintf(stderr, "@@## found hash entry for closure %p (%s): deprecated GA ",
+ gala->la, info_type((StgClosure*)gala->la));
+ printGA(&(q->ga));
+ fputc('\n', stderr));
+ }
+ }
+ insertHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala);
+ }
+ }
+
+ for (gala = liveRemoteGAs; gala != NULL; gala = gala->next) {
+ m++;
+ if (gala->preferred) {
+ GALA *q;
+ if ((q = lookupHashTable(LAtoGALAtable, (StgWord) gala->la))!=NULL) {
+ if (q->preferred && gala->preferred) {
+ /* this deprecates q (see also GALAdeprecate) */
+ q->preferred = rtsFalse;
+ (void) removeHashTable(LAtoGALAtable, (StgWord) gala->la, (void *)q);
+ IF_PAR_DEBUG(tables,
+ fprintf(stderr, "@@## found hash entry for closure %p (%s): deprecated GA ",
+ (StgClosure*)gala->la, info_type((StgClosure*)gala->la));
+ printGA(&(q->ga));
+ fputc('\n', stderr));
+ }
+ }
+ insertHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala);
+ }
+ }
+
+ IF_PAR_DEBUG(tables,
+ belch("@@%%%% rebuildLAGAtable: inserted %d entries from liveIndirections and %d entries from liveRemoteGAs",
+ n,m));
+}
+
+/*
+ Determine the size of the LAGA and GALA tables.
+ Has to be done after rebuilding the tables.
+ Only used for global statistics gathering.
+*/
+
+//@cindex getLAGAtableSize
+void
+getLAGAtableSize(nat *nP, nat *sizeP)
+{
+ GALA *gala;
+ // nat n=0, tot_size=0;
+ StgClosure *closure;
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs, i;
+ char str[80];
+ /* IN order to avoid counting closures twice we maintain a hash table
+ of all closures seen so far.
+ ToDo: collect this data while rebuilding the GALA table and make use
+ of the existing hash tables;
+ */
+ HashTable *closureTable; // hash table for closures encountered already
+
+ closureTable = allocHashTable();
+
+ (*nP) = (*sizeP) = 0;
+ for (gala = liveIndirections; gala != NULL; gala = gala->next) {
+ closure = (StgClosure*) gala->la;
+ if (lookupHashTable(closureTable, (StgWord)closure)==NULL) { // not seen yet
+ insertHashTable(closureTable, (StgWord)closure, (void *)1);
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+ (*sizeP) += size ; // stats: measure total heap size of global closures
+ (*nP)++; // stats: count number of GAs
+ }
+ }
+
+ for (gala = liveRemoteGAs; gala != NULL; gala = gala->next) {
+ closure = (StgClosure*) gala->la;
+ if (lookupHashTable(closureTable, (StgWord)closure)==NULL) { // not seen yet
+ insertHashTable(closureTable, (StgWord)closure, (void *)1);
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+ (*sizeP) += size ; // stats: measure total heap size of global closures
+ (*nP)++; // stats: count number of GAs
+ }
+ }
+
+ freeHashTable(closureTable, NULL);
+}
+
+//@node Debugging routines, Index, GC functions for GALA tables, Global Address Manipulation
+//@subsection Debugging routines
+
+//@cindex printGA
+void
+printGA (globalAddr *ga)
+{
+ fprintf(stderr, "((%x, %d, %x))",
+ ga->payload.gc.gtid,
+ ga->payload.gc.slot,
+ ga->weight);
+}
+
+//@cindex printGALA
+void
+printGALA (GALA *gala)
+{
+ printGA(&(gala->ga));
+ fprintf(stderr, " -> %p (%s)",
+ (StgClosure*)gala->la, info_type((StgClosure*)gala->la));
+ fprintf(stderr, " %s",
+ (gala->preferred) ? "PREF" : "____");
+}
+
+/*
+ Printing the LA->GA table.
+*/
+
+//@cindex printLiveIndTable
+void
+printLiveIndTable(void)
+{
+ GALA *gala, *q;
+ nat n=0; // debugging
+
+ belch("@@%%%%:: logical LiveIndTable (%p) (liveIndirections=%p):",
+ LAtoGALAtable, liveIndirections);
+
+ for (gala = liveIndirections; gala != NULL; gala = gala->next) {
+ n++;
+ printGALA(gala);
+ /* check whether this gala->la is hashed into the LAGA table */
+ q = lookupHashTable(LAtoGALAtable, (StgWord)(gala->la));
+ fprintf(stderr, "\t%s\n", (q==NULL) ? "...." : (q==gala) ? "====" : "####");
+ //ASSERT(lookupHashTable(LAtoGALAtable, (StgWord)(gala->la)));
+ }
+ belch("@@%%%%:: %d live indirections",
+ n);
+}
+
+void
+printRemoteGATable(void)
+{
+ GALA *gala, *q;
+ nat m=0; // debugging
+
+ belch("@@%%%%:: logical RemoteGATable (%p) (liveRemoteGAs=%p):",
+ LAtoGALAtable, liveRemoteGAs);
+
+ for (gala = liveRemoteGAs; gala != NULL; gala = gala->next) {
+ m++;
+ printGALA(gala);
+ /* check whether this gala->la is hashed into the LAGA table */
+ q = lookupHashTable(LAtoGALAtable, (StgWord)(gala->la));
+ fprintf(stderr, "\t%s\n", (q==NULL) ? "...." : (q==gala) ? "====" : "####");
+ // ASSERT(lookupHashTable(LAtoGALAtable, (StgWord)(gala->la)));
+ }
+ belch("@@%%%%:: %d remote GAs",
+ m);
+}
+
+//@cindex printLAGAtable
+void
+printLAGAtable(void)
+{
+ belch("@@%%: LAGAtable (%p) with liveIndirections=%p, liveRemoteGAs=%p:",
+ LAtoGALAtable, liveIndirections, liveRemoteGAs);
+
+ printLiveIndTable();
+ printRemoteGATable();
+}
+
+/*
+ Check whether a GA is already in a list.
+*/
+rtsBool
+isOnLiveIndTable(globalAddr *ga)
+{
+ GALA *gala;
+
+ for (gala = liveIndirections; gala != NULL; gala = gala->next)
+ if (gala->ga.weight==ga->weight &&
+ gala->ga.payload.gc.slot==ga->payload.gc.slot &&
+ gala->ga.payload.gc.gtid==ga->payload.gc.gtid)
+ return rtsTrue;
+
+ return rtsFalse;
+}
+
+rtsBool
+isOnRemoteGATable(globalAddr *ga)
+{
+ GALA *gala;
+
+ for (gala = liveRemoteGAs; gala != NULL; gala = gala->next)
+ if (gala->ga.weight==ga->weight &&
+ gala->ga.payload.gc.slot==ga->payload.gc.slot &&
+ gala->ga.payload.gc.gtid==ga->payload.gc.gtid)
+ return rtsTrue;
+
+ return rtsFalse;
+}
+
+/*
+ Sanity check for free lists.
+*/
+void
+checkFreeGALAList(void) {
+ GALA *gl;
+
+ for (gl=freeGALAList; gl != NULL; gl=gl->next) {
+ ASSERT(gl->ga.weight==0xdead0add);
+ ASSERT(gl->la==(StgPtr)0xdead00aa);
+ }
+}
+
+void
+checkFreeIndirectionsList(void) {
+ GALA *gl;
+
+ for (gl=freeIndirections; gl != NULL; gl=gl->next) {
+ ASSERT(gl->ga.weight==0xdead0add);
+ ASSERT(gl->la==(StgPtr)0xdead00aa);
+ }
+}
+#endif /* PAR -- whole file */
+
+//@node Index, , Debugging routines, Global Address Manipulation
+//@subsection Index
+
+//@index
+//* DebugPrintLAGAtable:: @cindex\s-+DebugPrintLAGAtable
+//* GALAlookup:: @cindex\s-+GALAlookup
+//* LAGAlookup:: @cindex\s-+LAGAlookup
+//* LAtoGALAtable:: @cindex\s-+LAtoGALAtable
+//* PackGA:: @cindex\s-+PackGA
+//* addWeight:: @cindex\s-+addWeight
+//* allocGALA:: @cindex\s-+allocGALA
+//* allocIndirection:: @cindex\s-+allocIndirection
+//* freeIndirections:: @cindex\s-+freeIndirections
+//* initGAtables:: @cindex\s-+initGAtables
+//* liveIndirections:: @cindex\s-+liveIndirections
+//* liveRemoteGAs:: @cindex\s-+liveRemoteGAs
+//* makeGlobal:: @cindex\s-+makeGlobal
+//* markLocalGAs:: @cindex\s-+markLocalGAs
+//* nextIndirection:: @cindex\s-+nextIndirection
+//* pGAtoGALAtable:: @cindex\s-+pGAtoGALAtable
+//* printGA:: @cindex\s-+printGA
+//* printGALA:: @cindex\s-+printGALA
+//* rebuildLAGAtable:: @cindex\s-+rebuildLAGAtable
+//* registerTask:: @cindex\s-+registerTask
+//* setRemoteGA:: @cindex\s-+setRemoteGA
+//* splitWeight:: @cindex\s-+splitWeight
+//* taskIDtoPE:: @cindex\s-+taskIDtoPE
+//* taskIDtoPEtable:: @cindex\s-+taskIDtoPEtable
+//* thisPE:: @cindex\s-+thisPE
+//@end index
diff --git a/rts/parallel/GranSim.c b/rts/parallel/GranSim.c
new file mode 100644
index 0000000000..b1cc0962be
--- /dev/null
+++ b/rts/parallel/GranSim.c
@@ -0,0 +1,3015 @@
+/*
+ Time-stamp: <Tue Mar 06 2001 00:17:42 Stardate: [-30]6285.06 hwloidl>
+
+ Variables and functions specific to GranSim the parallelism simulator
+ for GPH.
+*/
+
+//@node GranSim specific code, , ,
+//@section GranSim specific code
+
+/*
+ Macros for dealing with the new and improved GA field for simulating
+ parallel execution. Based on @CONCURRENT@ package. The GA field now
+ contains a mask, where the n-th bit stands for the n-th processor, where
+ this data can be found. In case of multiple copies, several bits are
+ set. The total number of processors is bounded by @MAX_PROC@, which
+ should be <= the length of a word in bits. -- HWL
+*/
+
+//@menu
+//* Includes::
+//* Prototypes and externs::
+//* Constants and Variables::
+//* Initialisation::
+//* Global Address Operations::
+//* Global Event Queue::
+//* Spark queue functions::
+//* Scheduling functions::
+//* Thread Queue routines::
+//* GranSim functions::
+//* GranSimLight routines::
+//* Code for Fetching Nodes::
+//* Idle PEs::
+//* Routines directly called from Haskell world::
+//* Emiting profiling info for GrAnSim::
+//* Dumping routines::
+//* Index::
+//@end menu
+
+//@node Includes, Prototypes and externs, GranSim specific code, GranSim specific code
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "StgMiscClosures.h"
+#include "StgTypes.h"
+#include "Schedule.h"
+#include "SchedAPI.h" // for pushClosure
+#include "GranSimRts.h"
+#include "GranSim.h"
+#include "ParallelRts.h"
+#include "ParallelDebug.h"
+#include "Sparks.h"
+#include "Storage.h" // for recordMutable
+
+
+//@node Prototypes and externs, Constants and Variables, Includes, GranSim specific code
+//@subsection Prototypes and externs
+
+#if defined(GRAN)
+
+/* Prototypes */
+static inline PEs ga_to_proc(StgWord);
+static inline rtsBool any_idle(void);
+static inline nat idlers(void);
+ PEs where_is(StgClosure *node);
+
+static rtsBool stealSomething(PEs proc, rtsBool steal_spark, rtsBool steal_thread);
+static rtsBool stealSpark(PEs proc);
+static rtsBool stealThread(PEs proc);
+static rtsBool stealSparkMagic(PEs proc);
+static rtsBool stealThreadMagic(PEs proc);
+/* subsumed by stealSomething
+static void stealThread(PEs proc);
+static void stealSpark(PEs proc);
+*/
+static rtsTime sparkStealTime(void);
+static nat natRandom(nat from, nat to);
+static PEs findRandomPE(PEs proc);
+static void sortPEsByTime (PEs proc, PEs *pes_by_time,
+ nat *firstp, nat *np);
+
+void GetRoots(void);
+
+#endif /* GRAN */
+
+//@node Constants and Variables, Initialisation, Prototypes and externs, GranSim specific code
+//@subsection Constants and Variables
+
+#if defined(GRAN) || defined(PAR)
+/* See GranSim.h for the definition of the enum gran_event_types */
+char *gran_event_names[] = {
+ "START", "START(Q)",
+ "STEALING", "STOLEN", "STOLEN(Q)",
+ "FETCH", "REPLY", "BLOCK", "RESUME", "RESUME(Q)",
+ "SCHEDULE", "DESCHEDULE",
+ "END",
+ "SPARK", "SPARKAT", "USED", "PRUNED", "EXPORTED", "ACQUIRED",
+ "ALLOC",
+ "TERMINATE",
+ "SYSTEM_START", "SYSTEM_END", /* only for debugging */
+ "??"
+};
+#endif
+
+#if defined(GRAN) /* whole file */
+char *proc_status_names[] = {
+ "Idle", "Sparking", "Starting", "Fetching", "Fishing", "Busy",
+ "UnknownProcStatus"
+};
+
+/* For internal use (event statistics) only */
+char *event_names[] =
+ { "ContinueThread", "StartThread", "ResumeThread",
+ "MoveSpark", "MoveThread", "FindWork",
+ "FetchNode", "FetchReply",
+ "GlobalBlock", "UnblockThread"
+ };
+
+//@cindex CurrentProc
+PEs CurrentProc = 0;
+
+/*
+ ToDo: Create a structure for the processor status and put all the
+ arrays below into it.
+ -- HWL */
+
+//@cindex CurrentTime
+/* One clock for each PE */
+rtsTime CurrentTime[MAX_PROC];
+
+/* Useful to restrict communication; cf fishing model in GUM */
+nat OutstandingFetches[MAX_PROC], OutstandingFishes[MAX_PROC];
+
+/* Status of each PE (new since but independent of GranSim Light) */
+rtsProcStatus procStatus[MAX_PROC];
+
+# if defined(GRAN) && defined(GRAN_CHECK)
+/* To check if the RTS ever tries to run a thread that should be blocked
+ because of fetching remote data */
+StgTSO *BlockedOnFetch[MAX_PROC];
+# define FETCH_MASK_TSO 0x08000000 /* only bits 0, 1, 2 should be used */
+# endif
+
+nat SparksAvail = 0; /* How many sparks are available */
+nat SurplusThreads = 0; /* How many excess threads are there */
+
+/* Do we need to reschedule following a fetch? */
+rtsBool NeedToReSchedule = rtsFalse, IgnoreEvents = rtsFalse, IgnoreYields = rtsFalse;
+rtsTime TimeOfNextEvent, TimeOfLastEvent, EndOfTimeSlice; /* checked from the threaded world! */
+
+//@cindex spark queue
+/* GranSim: a globally visible array of spark queues */
+rtsSparkQ pending_sparks_hds[MAX_PROC];
+rtsSparkQ pending_sparks_tls[MAX_PROC];
+
+nat sparksIgnored = 0, sparksCreated = 0;
+
+GlobalGranStats globalGranStats;
+
+nat gran_arith_cost, gran_branch_cost, gran_load_cost,
+ gran_store_cost, gran_float_cost;
+
+/*
+Old comment from 0.29. ToDo: Check and update -- HWL
+
+The following variables control the behaviour of GrAnSim. In general, there
+is one RTS option for enabling each of these features. In getting the
+desired setup of GranSim the following questions have to be answered:
+\begin{itemize}
+\item {\em Which scheduling algorithm} to use (@RtsFlags.GranFlags.DoFairSchedule@)?
+ Currently only unfair scheduling is supported.
+\item What to do when remote data is fetched (@RtsFlags.GranFlags.DoAsyncFetch@)?
+ Either block and wait for the
+ data or reschedule and do some other work.
+ Thus, if this variable is true, asynchronous communication is
+ modelled. Block on fetch mainly makes sense for incremental fetching.
+
+ There is also a simplified fetch variant available
+ (@RtsFlags.GranFlags.SimplifiedFetch@). This variant does not use events to model
+ communication. It is faster but the results will be less accurate.
+\item How aggressive to be in getting work after a reschedule on fetch
+ (@RtsFlags.GranFlags.FetchStrategy@)?
+ This is determined by the so-called {\em fetching
+ strategy\/}. Currently, there are four possibilities:
+ \begin{enumerate}
+ \item Only run a runnable thread.
+ \item Turn a spark into a thread, if necessary.
+ \item Steal a remote spark, if necessary.
+ \item Steal a runnable thread from another processor, if necessary.
+ \end{itemize}
+ The variable @RtsFlags.GranFlags.FetchStrategy@ determines how far to go in this list
+ when rescheduling on a fetch.
+\item Should sparks or threads be stolen first when looking for work
+ (@RtsFlags.GranFlags.DoStealThreadsFirst@)?
+ The default is to steal sparks first (much cheaper).
+\item Should the RTS use a lazy thread creation scheme
+ (@RtsFlags.GranFlags.DoAlwaysCreateThreads@)? By default yes i.e.\ sparks are only
+ turned into threads when work is needed. Also note, that sparks
+ can be discarded by the RTS (this is done in the case of an overflow
+ of the spark pool). Setting @RtsFlags.GranFlags.DoAlwaysCreateThreads@ to @True@ forces
+ the creation of threads at the next possibility (i.e.\ when new work
+ is demanded the next time).
+\item Should data be fetched closure-by-closure or in packets
+ (@RtsFlags.GranFlags.DoBulkFetching@)? The default strategy is a GRIP-like incremental
+ (i.e.\ closure-by-closure) strategy. This makes sense in a
+ low-latency setting but is bad in a high-latency system. Setting
+ @RtsFlags.GranFlags.DoBulkFetching@ to @True@ enables bulk (packet) fetching. Other
+ parameters determine the size of the packets (@pack_buffer_size@) and the number of
+ thunks that should be put into one packet (@RtsFlags.GranFlags.ThunksToPack@).
+\item If there is no other possibility to find work, should runnable threads
+ be moved to an idle processor (@RtsFlags.GranFlags.DoThreadMigration@)? In any case, the
+ RTS tried to get sparks (either local or remote ones) first. Thread
+ migration is very expensive, since a whole TSO has to be transferred
+ and probably data locality becomes worse in the process. Note, that
+ the closure, which will be evaluated next by that TSO is not
+ transferred together with the TSO (that might block another thread).
+\item Should the RTS distinguish between sparks created by local nodes and
+ stolen sparks (@RtsFlags.GranFlags.PreferSparksOfLocalNodes@)? The idea is to improve
+ data locality by preferring sparks of local nodes (it is more likely
+ that the data for those sparks is already on the local processor).
+ However, such a distinction also imposes an overhead on the spark
+ queue management, and typically a large number of sparks are
+ generated during execution. By default this variable is set to @False@.
+\item Should the RTS use granularity control mechanisms? The idea of a
+ granularity control mechanism is to make use of granularity
+ information provided via annotation of the @par@ construct in order
+ to prefer bigger threads when either turning a spark into a thread or
+ when choosing the next thread to schedule. Currently, three such
+ mechanisms are implemented:
+ \begin{itemize}
+ \item Cut-off: The granularity information is interpreted as a
+ priority. If a threshold priority is given to the RTS, then
+ only those sparks with a higher priority than the threshold
+ are actually created. Other sparks are immediately discarded.
+ This is similar to a usual cut-off mechanism often used in
+ parallel programs, where parallelism is only created if the
+ input data is lage enough. With this option, the choice is
+ hidden in the RTS and only the threshold value has to be
+ provided as a parameter to the runtime system.
+ \item Priority Sparking: This mechanism keeps priorities for sparks
+ and chooses the spark with the highest priority when turning
+ a spark into a thread. After that the priority information is
+ discarded. The overhead of this mechanism comes from
+ maintaining a sorted spark queue.
+ \item Priority Scheduling: This mechanism keeps the granularity
+ information for threads, to. Thus, on each reschedule the
+ largest thread is chosen. This mechanism has a higher
+ overhead, as the thread queue is sorted, too.
+ \end{itemize}
+\end{itemize}
+*/
+
+//@node Initialisation, Global Address Operations, Constants and Variables, GranSim specific code
+//@subsection Initialisation
+
+void
+init_gr_stats (void) {
+ memset(&globalGranStats, '\0', sizeof(GlobalGranStats));
+#if 0
+ /* event stats */
+ globalGranStats.noOfEvents = 0;
+ for (i=0; i<MAX_EVENT; i++) globalGranStats.event_counts[i]=0;
+
+ /* communication stats */
+ globalGranStats.fetch_misses = 0;
+ globalGranStats.tot_low_pri_sparks = 0;
+
+ /* obscure stats */
+ globalGranStats.rs_sp_count = 0;
+ globalGranStats.rs_t_count = 0;
+ globalGranStats.ntimes_total = 0,
+ globalGranStats.fl_total = 0;
+ globalGranStats.no_of_steals = 0;
+
+ /* spark queue stats */
+ globalGranStats.tot_sq_len = 0,
+ globalGranStats.tot_sq_probes = 0;
+ globalGranStats.tot_sparks = 0;
+ globalGranStats.withered_sparks = 0;
+ globalGranStats.tot_add_threads = 0;
+ globalGranStats.tot_tq_len = 0;
+ globalGranStats.non_end_add_threads = 0;
+
+ /* thread stats */
+ globalGranStats.tot_threads_created = 0;
+ for (i=0; i<MAX_PROC; i++) globalGranStats.threads_created_on_PE[i]=0;
+#endif /* 0 */
+}
+
+//@node Global Address Operations, Global Event Queue, Initialisation, GranSim specific code
+//@subsection Global Address Operations
+/*
+ ----------------------------------------------------------------------
+ Global Address Operations
+
+ These functions perform operations on the global-address (ga) part of a
+ closure. The ga is the only new field (1 word) in a closure introduced by
+ GrAnSim. It serves as a bitmask, indicating on which processor the
+ closure is residing. Since threads are described by Thread State Object
+ (TSO), which is nothing but another kind of closure, this scheme allows
+ gives placement information about threads.
+
+ A ga is just a bitmask, so the operations on them are mainly bitmask
+ manipulating functions. Note, that there are important macros like PROCS,
+ IS_LOCAL_TO etc. They are defined in @GrAnSim.lh@.
+
+ NOTE: In GrAnSim-light we don't maintain placement information. This
+ allows to simulate an arbitrary number of processors. The price we have
+ to be is the lack of costing any communication properly. In short,
+ GrAnSim-light is meant to reveal the maximal parallelism in a program.
+ From an implementation point of view the important thing is: {\em
+ GrAnSim-light does not maintain global-addresses}. */
+
+/* ga_to_proc returns the first processor marked in the bitmask ga.
+ Normally only one bit in ga should be set. But for PLCs all bits
+ are set. That shouldn't hurt since we only need IS_LOCAL_TO for PLCs */
+
+//@cindex ga_to_proc
+
+static inline PEs
+ga_to_proc(StgWord ga)
+{
+ PEs i;
+ for (i = 0; i < RtsFlags.GranFlags.proc && !IS_LOCAL_TO(ga, i); i++);
+ ASSERT(i<RtsFlags.GranFlags.proc);
+ return (i);
+}
+
+/* NB: This takes a *node* rather than just a ga as input */
+//@cindex where_is
+PEs
+where_is(StgClosure *node)
+{ return (ga_to_proc(PROCS(node))); }
+
+// debugging only
+//@cindex is_unique
+rtsBool
+is_unique(StgClosure *node)
+{
+ PEs i;
+ rtsBool unique = rtsFalse;
+
+ for (i = 0; i < RtsFlags.GranFlags.proc ; i++)
+ if (IS_LOCAL_TO(PROCS(node), i))
+ if (unique) // exactly 1 instance found so far
+ return rtsFalse; // found a 2nd instance => not unique
+ else
+ unique = rtsTrue; // found 1st instance
+ ASSERT(unique); // otherwise returned from within loop
+ return (unique);
+}
+
+//@cindex any_idle
+static inline rtsBool
+any_idle(void) { /* any (map (\ i -> procStatus[i] == Idle)) [0,..,MAX_PROC] */
+ PEs i;
+ rtsBool any_idle;
+ for(i=0, any_idle=rtsFalse;
+ !any_idle && i<RtsFlags.GranFlags.proc;
+ any_idle = any_idle || procStatus[i] == Idle, i++)
+ {} ;
+}
+
+//@cindex idlers
+static inline nat
+idlers(void) { /* number of idle PEs */
+ PEs i, j;
+ for(i=0, j=0;
+ i<RtsFlags.GranFlags.proc;
+ j += (procStatus[i] == Idle) ? 1 : 0, i++)
+ {} ;
+ return j;
+}
+
+//@node Global Event Queue, Spark queue functions, Global Address Operations, GranSim specific code
+//@subsection Global Event Queue
+/*
+The following routines implement an ADT of an event-queue (FIFO).
+ToDo: Put that in an own file(?)
+*/
+
+/* Pointer to the global event queue; events are currently malloc'ed */
+rtsEventQ EventHd = NULL;
+
+//@cindex get_next_event
+rtsEvent *
+get_next_event(void)
+{
+ static rtsEventQ entry = NULL;
+
+ if (EventHd == NULL) {
+ barf("No next event. This may be caused by a circular data dependency in the program.");
+ }
+
+ if (entry != NULL)
+ free((char *)entry);
+
+ if (RtsFlags.GranFlags.GranSimStats.Global) { /* count events */
+ globalGranStats.noOfEvents++;
+ globalGranStats.event_counts[EventHd->evttype]++;
+ }
+
+ entry = EventHd;
+
+ IF_GRAN_DEBUG(event_trace,
+ print_event(entry));
+
+ EventHd = EventHd->next;
+ return(entry);
+}
+
+/* When getting the time of the next event we ignore CONTINUETHREAD events:
+ we don't want to be interrupted before the end of the current time slice
+ unless there is something important to handle.
+*/
+//@cindex get_time_of_next_event
+rtsTime
+get_time_of_next_event(void)
+{
+ rtsEventQ event = EventHd;
+
+ while (event != NULL && event->evttype==ContinueThread) {
+ event = event->next;
+ }
+ if(event == NULL)
+ return ((rtsTime) 0);
+ else
+ return (event->time);
+}
+
+/* ToDo: replace malloc/free with a free list */
+//@cindex insert_event
+void
+insert_event(newentry)
+rtsEvent *newentry;
+{
+ rtsEventType evttype = newentry->evttype;
+ rtsEvent *event, **prev;
+
+ /* if(evttype >= CONTINUETHREAD1) evttype = CONTINUETHREAD; */
+
+ /* Search the queue and insert at the right point:
+ FINDWORK before everything, CONTINUETHREAD after everything.
+
+ This ensures that we find any available work after all threads have
+ executed the current cycle. This level of detail would normally be
+ irrelevant, but matters for ridiculously low latencies...
+ */
+
+ /* Changed the ordering: Now FINDWORK comes after everything but
+ CONTINUETHREAD. This makes sure that a MOVESPARK comes before a
+ FINDWORK. This is important when a GranSimSparkAt happens and
+ DoAlwaysCreateThreads is turned on. Also important if a GC occurs
+ when trying to build a new thread (see much_spark) -- HWL 02/96 */
+
+ if(EventHd == NULL)
+ EventHd = newentry;
+ else {
+ for (event = EventHd, prev=(rtsEvent**)&EventHd;
+ event != NULL;
+ prev = (rtsEvent**)&(event->next), event = event->next) {
+ switch (evttype) {
+ case FindWork: if ( event->time < newentry->time ||
+ ( (event->time == newentry->time) &&
+ (event->evttype != ContinueThread) ) )
+ continue;
+ else
+ break;
+ case ContinueThread: if ( event->time <= newentry->time )
+ continue;
+ else
+ break;
+ default: if ( event->time < newentry->time ||
+ ((event->time == newentry->time) &&
+ (event->evttype == newentry->evttype)) )
+ continue;
+ else
+ break;
+ }
+ /* Insert newentry here (i.e. before event) */
+ *prev = newentry;
+ newentry->next = event;
+ break;
+ }
+ if (event == NULL)
+ *prev = newentry;
+ }
+}
+
+//@cindex new_event
+void
+new_event(proc,creator,time,evttype,tso,node,spark)
+PEs proc, creator;
+rtsTime time;
+rtsEventType evttype;
+StgTSO *tso;
+StgClosure *node;
+rtsSpark *spark;
+{
+ rtsEvent *newentry = (rtsEvent *) stgMallocBytes(sizeof(rtsEvent), "new_event");
+
+ newentry->proc = proc;
+ newentry->creator = creator;
+ newentry->time = time;
+ newentry->evttype = evttype;
+ newentry->tso = tso;
+ newentry->node = node;
+ newentry->spark = spark;
+ newentry->gc_info = 0;
+ newentry->next = NULL;
+
+ insert_event(newentry);
+
+ IF_DEBUG(gran,
+ fprintf(stderr, "GRAN: new_event: \n");
+ print_event(newentry));
+}
+
+//@cindex prepend_event
+void
+prepend_event(event) /* put event at beginning of EventQueue */
+rtsEvent *event;
+{ /* only used for GC! */
+ event->next = EventHd;
+ EventHd = event;
+}
+
+//@cindex grab_event
+rtsEventQ
+grab_event(void) /* undo prepend_event i.e. get the event */
+{ /* at the head of EventQ but don't free anything */
+ rtsEventQ event = EventHd;
+
+ if (EventHd == NULL) {
+ barf("No next event (in grab_event). This may be caused by a circular data dependency in the program.");
+ }
+
+ EventHd = EventHd->next;
+ return (event);
+}
+
+//@cindex traverse_eventq_for_gc
+void
+traverse_eventq_for_gc(void)
+{
+ rtsEventQ event = EventHd;
+ StgWord bufsize;
+ StgClosure *closurep;
+ StgTSO *tsop;
+ StgPtr buffer, bufptr;
+ PEs proc, creator;
+
+ /* Traverse eventq and replace every FETCHREPLY by a FETCHNODE for the
+ orig closure (root of packed graph). This means that a graph, which is
+ between processors at the time of GC is fetched again at the time when
+ it would have arrived, had there been no GC. Slightly inaccurate but
+ safe for GC.
+ This is only needed for GUM style fetchng. -- HWL */
+ if (!RtsFlags.GranFlags.DoBulkFetching)
+ return;
+
+ for(event = EventHd; event!=NULL; event=event->next) {
+ if (event->evttype==FetchReply) {
+ buffer = stgCast(StgPtr,event->node);
+ ASSERT(buffer[PACK_FLAG_LOCN]==MAGIC_PACK_FLAG); /* It's a pack buffer */
+ bufsize = buffer[PACK_SIZE_LOCN];
+ closurep = stgCast(StgClosure*,buffer[PACK_HDR_SIZE]);
+ tsop = stgCast(StgTSO*,buffer[PACK_TSO_LOCN]);
+ proc = event->proc;
+ creator = event->creator; /* similar to unpacking */
+ for (bufptr=buffer+PACK_HDR_SIZE;
+ bufptr<(buffer+bufsize);
+ bufptr++) {
+ // if ( (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_SPEC_RBH_TYPE) ||
+ // (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_GEN_RBH_TYPE) ) {
+ if ( GET_INFO(stgCast(StgClosure*,bufptr)) ) {
+ convertFromRBH(stgCast(StgClosure *,bufptr));
+ }
+ }
+ free(buffer);
+ event->evttype = FetchNode;
+ event->proc = creator;
+ event->creator = proc;
+ event->node = closurep;
+ event->tso = tsop;
+ event->gc_info = 0;
+ }
+ }
+}
+
+void
+markEventQueue(void)
+{
+ StgClosure *MarkRoot(StgClosure *root); // prototype
+
+ rtsEventQ event = EventHd;
+ nat len;
+
+ /* iterate over eventq and register relevant fields in event as roots */
+ for(event = EventHd, len = 0; event!=NULL; event=event->next, len++) {
+ switch (event->evttype) {
+ case ContinueThread:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ break;
+ case StartThread:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ event->node = (StgClosure *)MarkRoot((StgClosure *)event->node);
+ break;
+ case ResumeThread:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ event->node = (StgClosure *)MarkRoot((StgClosure *)event->node);
+ break;
+ case MoveSpark:
+ event->spark->node = (StgClosure *)MarkRoot((StgClosure *)event->spark->node);
+ break;
+ case MoveThread:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ break;
+ case FindWork:
+ break;
+ case FetchNode:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ event->node = (StgClosure *)MarkRoot((StgClosure *)event->node);
+ break;
+ case FetchReply:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ if (RtsFlags.GranFlags.DoBulkFetching)
+ // ToDo: traverse_eventw_for_gc if GUM-Fetching!!! HWL
+ belch("ghuH: packets in BulkFetching not marked as roots; mayb be fatal");
+ else
+ event->node = (StgClosure *)MarkRoot((StgClosure *)event->node);
+ break;
+ case GlobalBlock:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ event->node = (StgClosure *)MarkRoot((StgClosure *)event->node);
+ break;
+ case UnblockThread:
+ event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso);
+ event->node = (StgClosure *)MarkRoot((StgClosure *)event->node);
+ break;
+ default:
+ barf("markEventQueue: trying to mark unknown event @ %p", event);
+ }}
+ IF_DEBUG(gc,
+ belch("GC: markEventQueue: %d events in queue", len));
+}
+
+/*
+ Prune all ContinueThread events related to tso or node in the eventq.
+ Currently used if a thread leaves STG land with ThreadBlocked status,
+ i.e. it blocked on a closure and has been put on its blocking queue. It
+ will be reawakended via a call to awakenBlockedQueue. Until then no
+ event effecting this tso should appear in the eventq. A bit of a hack,
+ because ideally we shouldn't generate such spurious ContinueThread events
+ in the first place.
+*/
+//@cindex prune_eventq
+void
+prune_eventq(tso, node)
+StgTSO *tso;
+StgClosure *node;
+{ rtsEventQ prev = (rtsEventQ)NULL, event = EventHd;
+
+ /* node unused for now */
+ ASSERT(node==NULL);
+ /* tso must be valid, then */
+ ASSERT(tso!=END_TSO_QUEUE);
+ while (event != NULL) {
+ if (event->evttype==ContinueThread &&
+ (event->tso==tso)) {
+ IF_GRAN_DEBUG(event_trace, // ToDo: use another debug flag
+ belch("prune_eventq: pruning ContinueThread event for TSO %d (%p) on PE %d @ %lx (%p)",
+ event->tso->id, event->tso, event->proc, event->time, event));
+ if (prev==(rtsEventQ)NULL) { // beginning of eventq
+ EventHd = event->next;
+ free(event);
+ event = EventHd;
+ } else {
+ prev->next = event->next;
+ free(event);
+ event = prev->next;
+ }
+ } else { // no pruning necessary; go to next event
+ prev = event;
+ event = event->next;
+ }
+ }
+}
+
+//@cindex print_event
+void
+print_event(event)
+rtsEvent *event;
+{
+ char str_tso[16], str_node[16];
+ StgThreadID tso_id;
+
+ if (event->tso==END_TSO_QUEUE) {
+ strcpy(str_tso, "______");
+ tso_id = 0;
+ } else {
+ sprintf(str_tso, "%p", event->tso);
+ tso_id = (event->tso==NULL) ? 0 : event->tso->id;
+ }
+ if (event->node==(StgClosure*)NULL) {
+ strcpy(str_node, "______");
+ } else {
+ sprintf(str_node, "%p", event->node);
+ }
+ // HWL: shouldn't be necessary; ToDo: nuke
+ //str_tso[6]='\0';
+ //str_node[6]='\0';
+
+ if (event==NULL)
+ fprintf(stderr,"Evt: NIL\n");
+ else
+ fprintf(stderr, "Evt: %s (%u), PE %u [%u], Time %lu, TSO %d (%s), Node %s\n", //"Evt: %s (%u), PE %u [%u], Time %u, TSO %s (%#l), Node %s\n",
+ event_names[event->evttype], event->evttype,
+ event->proc, event->creator, event->time,
+ tso_id, str_tso, str_node
+ /*, event->spark, event->next */ );
+
+}
+
+//@cindex print_eventq
+void
+print_eventq(hd)
+rtsEvent *hd;
+{
+ rtsEvent *x;
+
+ fprintf(stderr,"Event Queue with root at %p:\n", hd);
+ for (x=hd; x!=NULL; x=x->next) {
+ print_event(x);
+ }
+}
+
+/*
+ Spark queue functions are now all in Sparks.c!!
+*/
+//@node Scheduling functions, Thread Queue routines, Spark queue functions, GranSim specific code
+//@subsection Scheduling functions
+
+/*
+ These functions are variants of thread initialisation and therefore
+ related to initThread and friends in Schedule.c. However, they are
+ specific to a GranSim setup in storing more info in the TSO's statistics
+ buffer and sorting the thread queues etc.
+*/
+
+/*
+ A large portion of startThread deals with maintaining a sorted thread
+ queue, which is needed for the Priority Sparking option. Without that
+ complication the code boils down to FIFO handling.
+*/
+//@cindex insertThread
+void
+insertThread(tso, proc)
+StgTSO* tso;
+PEs proc;
+{
+ StgTSO *prev = NULL, *next = NULL;
+ nat count = 0;
+ rtsBool found = rtsFalse;
+
+ ASSERT(CurrentProc==proc);
+ ASSERT(!is_on_queue(tso,proc));
+ /* Idle proc: put the thread on the run queue
+ same for pri spark and basic version */
+ if (run_queue_hds[proc] == END_TSO_QUEUE)
+ {
+ /* too strong!
+ ASSERT((CurrentProc==MainProc &&
+ CurrentTime[MainProc]==0 &&
+ procStatus[MainProc]==Idle) ||
+ procStatus[proc]==Starting);
+ */
+ run_queue_hds[proc] = run_queue_tls[proc] = tso;
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadqueuetime;
+
+ /* new_event of ContinueThread has been moved to do_the_startthread */
+
+ /* too strong!
+ ASSERT(procStatus[proc]==Idle ||
+ procStatus[proc]==Fishing ||
+ procStatus[proc]==Starting);
+ procStatus[proc] = Busy;
+ */
+ return;
+ }
+
+ if (RtsFlags.GranFlags.Light)
+ GranSimLight_insertThread(tso, proc);
+
+ /* Only for Pri Scheduling: find place where to insert tso into queue */
+ if (RtsFlags.GranFlags.DoPriorityScheduling && tso->gran.pri!=0)
+ /* {add_to_spark_queue}vo' jInIHta'; Qu' wa'DIch yIleghQo' */
+ for (prev = run_queue_hds[proc], next = run_queue_hds[proc]->link, count=0;
+ (next != END_TSO_QUEUE) &&
+ !(found = tso->gran.pri >= next->gran.pri);
+ prev = next, next = next->link, count++)
+ {
+ ASSERT((prev!=(StgTSO*)NULL || next==run_queue_hds[proc]) &&
+ (prev==(StgTSO*)NULL || prev->link==next));
+ }
+
+ ASSERT(!found || next != END_TSO_QUEUE);
+ ASSERT(procStatus[proc]!=Idle);
+
+ if (found) {
+ /* found can only be rtsTrue if pri scheduling enabled */
+ ASSERT(RtsFlags.GranFlags.DoPriorityScheduling);
+ if (RtsFlags.GranFlags.GranSimStats.Global)
+ globalGranStats.non_end_add_threads++;
+ /* Add tso to ThreadQueue between prev and next */
+ tso->link = next;
+ if ( next == (StgTSO*)END_TSO_QUEUE ) {
+ run_queue_tl = tso;
+ } else {
+ /* no back link for TSO chain */
+ }
+
+ if ( prev == (StgTSO*)END_TSO_QUEUE ) {
+ /* Never add TSO as first elem of thread queue; the first */
+ /* element should be the one that is currently running -- HWL */
+ IF_DEBUG(gran,
+ belch("GRAN: Qagh: NewThread (w/ PriorityScheduling): Trying to add TSO %p (PRI=%d) as first elem of threadQ (%p) on proc %u (@ %u)\n",
+ tso, tso->gran.pri, run_queue_hd, proc,
+ CurrentTime[proc]));
+ } else {
+ prev->link = tso;
+ }
+ } else { /* !found */ /* or not pri sparking! */
+ /* Add TSO to the end of the thread queue on that processor */
+ run_queue_tls[proc]->link = tso;
+ run_queue_tls[proc] = tso;
+ }
+ ASSERT(RtsFlags.GranFlags.DoPriorityScheduling || count==0);
+ CurrentTime[proc] += count * RtsFlags.GranFlags.Costs.pri_sched_overhead +
+ RtsFlags.GranFlags.Costs.threadqueuetime;
+
+ /* ToDo: check if this is still needed -- HWL
+ if (RtsFlags.GranFlags.DoThreadMigration)
+ ++SurplusThreads;
+
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ !(( event_type == GR_START || event_type == GR_STARTQ) &&
+ RtsFlags.GranFlags.labelling) )
+ DumpRawGranEvent(proc, creator, event_type+1, tso, node,
+ tso->gran.sparkname, spark_queue_len(proc));
+ */
+
+# if defined(GRAN_CHECK)
+ /* Check if thread queue is sorted. Only for testing, really! HWL */
+ if ( RtsFlags.GranFlags.DoPriorityScheduling &&
+ (RtsFlags.GranFlags.Debug.sortedQ) ) {
+ rtsBool sorted = rtsTrue;
+ StgTSO *prev, *next;
+
+ if (run_queue_hds[proc]==END_TSO_QUEUE ||
+ run_queue_hds[proc]->link==END_TSO_QUEUE) {
+ /* just 1 elem => ok */
+ } else {
+ /* Qu' wa'DIch yIleghQo' (ignore first elem)! */
+ for (prev = run_queue_hds[proc]->link, next = prev->link;
+ (next != END_TSO_QUEUE) ;
+ prev = next, next = prev->link) {
+ ASSERT((prev!=(StgTSO*)NULL || next==run_queue_hds[proc]) &&
+ (prev==(StgTSO*)NULL || prev->link==next));
+ sorted = sorted &&
+ (prev->gran.pri >= next->gran.pri);
+ }
+ }
+ if (!sorted) {
+ fprintf(stderr,"Qagh: THREADQ on PE %d is not sorted:\n",
+ CurrentProc);
+ G_THREADQ(run_queue_hd,0x1);
+ }
+ }
+# endif
+}
+
+/*
+ insertThread, which is only used for GranSim Light, is similar to
+ startThread in that it adds a TSO to a thread queue. However, it assumes
+ that the thread queue is sorted by local clocks and it inserts the TSO at
+ the right place in the queue. Don't create any event, just insert.
+*/
+//@cindex GranSimLight_insertThread
+rtsBool
+GranSimLight_insertThread(tso, proc)
+StgTSO* tso;
+PEs proc;
+{
+ StgTSO *prev, *next;
+ nat count = 0;
+ rtsBool found = rtsFalse;
+
+ ASSERT(RtsFlags.GranFlags.Light);
+
+ /* In GrAnSim-Light we always have an idle `virtual' proc.
+ The semantics of the one-and-only thread queue is different here:
+ all threads in the queue are running (each on its own virtual processor);
+ the queue is only needed internally in the simulator to interleave the
+ reductions of the different processors.
+ The one-and-only thread queue is sorted by the local clocks of the TSOs.
+ */
+ ASSERT(run_queue_hds[proc] != END_TSO_QUEUE);
+ ASSERT(tso->link == END_TSO_QUEUE);
+
+ /* If only one thread in queue so far we emit DESCHEDULE in debug mode */
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ (RtsFlags.GranFlags.Debug.checkLight) &&
+ (run_queue_hd->link == END_TSO_QUEUE)) {
+ DumpRawGranEvent(proc, proc, GR_DESCHEDULE,
+ run_queue_hds[proc], (StgClosure*)NULL,
+ tso->gran.sparkname, spark_queue_len(proc)); // ToDo: check spar_queue_len
+ // resched = rtsTrue;
+ }
+
+ /* this routine should only be used in a GrAnSim Light setup */
+ /* && CurrentProc must be 0 in GrAnSim Light setup */
+ ASSERT(RtsFlags.GranFlags.Light && CurrentProc==0);
+
+ /* Idle proc; same for pri spark and basic version */
+ if (run_queue_hd==END_TSO_QUEUE)
+ {
+ run_queue_hd = run_queue_tl = tso;
+ /* MAKE_BUSY(CurrentProc); */
+ return rtsTrue;
+ }
+
+ for (prev = run_queue_hds[proc], next = run_queue_hds[proc]->link, count = 0;
+ (next != END_TSO_QUEUE) &&
+ !(found = (tso->gran.clock < next->gran.clock));
+ prev = next, next = next->link, count++)
+ {
+ ASSERT((prev!=(StgTSO*)NULL || next==run_queue_hds[proc]) &&
+ (prev==(StgTSO*)NULL || prev->link==next));
+ }
+
+ /* found can only be rtsTrue if pri sparking enabled */
+ if (found) {
+ /* Add tso to ThreadQueue between prev and next */
+ tso->link = next;
+ if ( next == END_TSO_QUEUE ) {
+ run_queue_tls[proc] = tso;
+ } else {
+ /* no back link for TSO chain */
+ }
+
+ if ( prev == END_TSO_QUEUE ) {
+ run_queue_hds[proc] = tso;
+ } else {
+ prev->link = tso;
+ }
+ } else { /* !found */ /* or not pri sparking! */
+ /* Add TSO to the end of the thread queue on that processor */
+ run_queue_tls[proc]->link = tso;
+ run_queue_tls[proc] = tso;
+ }
+
+ if ( prev == END_TSO_QUEUE ) { /* new head of queue */
+ new_event(proc, proc, CurrentTime[proc],
+ ContinueThread,
+ tso, (StgClosure*)NULL, (rtsSpark*)NULL);
+ }
+ /*
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ !(( event_type == GR_START || event_type == GR_STARTQ) &&
+ RtsFlags.GranFlags.labelling) )
+ DumpRawGranEvent(proc, creator, gr_evttype, tso, node,
+ tso->gran.sparkname, spark_queue_len(proc));
+ */
+ return rtsTrue;
+}
+
+/*
+ endThread is responsible for general clean-up after the thread tso has
+ finished. This includes emitting statistics into the profile etc.
+*/
+void
+endThread(StgTSO *tso, PEs proc)
+{
+ ASSERT(procStatus[proc]==Busy); // coming straight out of STG land
+ ASSERT(tso->what_next==ThreadComplete);
+ // ToDo: prune ContinueThreads for this TSO from event queue
+ DumpEndEvent(proc, tso, rtsFalse /* not mandatory */);
+
+ /* if this was the last thread on this PE then make it Idle */
+ if (run_queue_hds[proc]==END_TSO_QUEUE) {
+ procStatus[CurrentProc] = Idle;
+ }
+}
+
+//@node Thread Queue routines, GranSim functions, Scheduling functions, GranSim specific code
+//@subsection Thread Queue routines
+
+/*
+ Check whether given tso resides on the run queue of the current processor.
+ Only used for debugging.
+*/
+
+//@cindex is_on_queue
+rtsBool
+is_on_queue (StgTSO *tso, PEs proc)
+{
+ StgTSO *t;
+ rtsBool found;
+
+ for (t=run_queue_hds[proc], found=rtsFalse;
+ t!=END_TSO_QUEUE && !(found = t==tso);
+ t=t->link)
+ /* nothing */ ;
+
+ return found;
+}
+
+/* This routine is only used for keeping a statistics of thread queue
+ lengths to evaluate the impact of priority scheduling. -- HWL
+ {spark_queue_len}vo' jInIHta'
+*/
+//@cindex thread_queue_len
+nat
+thread_queue_len(PEs proc)
+{
+ StgTSO *prev, *next;
+ nat len;
+
+ for (len = 0, prev = END_TSO_QUEUE, next = run_queue_hds[proc];
+ next != END_TSO_QUEUE;
+ len++, prev = next, next = prev->link)
+ {}
+
+ return (len);
+}
+
+//@node GranSim functions, GranSimLight routines, Thread Queue routines, GranSim specific code
+//@subsection GranSim functions
+
+/* ----------------------------------------------------------------- */
+/* The main event handling functions; called from Schedule.c (schedule) */
+/* ----------------------------------------------------------------- */
+
+//@cindex do_the_globalblock
+
+void
+do_the_globalblock(rtsEvent* event)
+{
+ PEs proc = event->proc; /* proc that requested node */
+ StgTSO *tso = event->tso; /* tso that requested node */
+ StgClosure *node = event->node; /* requested, remote node */
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the GlobalBlock\n"));
+ /* There should be no GLOBALBLOCKs in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+ /* GlobalBlock events only valid with GUM fetching */
+ ASSERT(RtsFlags.GranFlags.DoBulkFetching);
+
+ IF_GRAN_DEBUG(bq, // globalBlock,
+ if (IS_LOCAL_TO(PROCS(node),proc)) {
+ belch("## Qagh: GlobalBlock: Blocking TSO %d (%p) on LOCAL node %p (PE %d).\n",
+ tso->id, tso, node, proc);
+ });
+
+ /* CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.munpacktime; */
+ if ( blockFetch(tso,proc,node) != 0 )
+ return; /* node has become local by now */
+
+#if 0
+ ToDo: check whether anything has to be done at all after blockFetch -- HWL
+
+ if (!RtsFlags.GranFlags.DoAsyncFetch) { /* head of queue is next thread */
+ StgTSO* tso = run_queue_hds[proc]; /* awaken next thread */
+ if (tso != (StgTSO*)NULL) {
+ new_event(proc, proc, CurrentTime[proc],
+ ContinueThread,
+ tso, (StgClosure*)NULL, (rtsSpark*)NULL);
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadcontextswitchtime;
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc, CurrentProc, GR_SCHEDULE, tso,
+ (StgClosure*)NULL, tso->gran.sparkname, spark_queue_len(CurrentProc)); // ToDo: check sparkname and spar_queue_len
+ procStatus[proc] = Busy; /* might have been fetching */
+ } else {
+ procStatus[proc] = Idle; /* no work on proc now */
+ }
+ } else { /* RtsFlags.GranFlags.DoAsyncFetch i.e. block-on-fetch */
+ /* other thread is already running */
+ /* 'oH 'utbe' 'e' vIHar ; I think that's not needed -- HWL
+ new_event(proc,proc,CurrentTime[proc],
+ CONTINUETHREAD,EVENT_TSO(event),
+ (RtsFlags.GranFlags.DoBulkFetching ? closure :
+ EVENT_NODE(event)),NULL);
+ */
+ }
+#endif
+}
+
+//@cindex do_the_unblock
+
+void
+do_the_unblock(rtsEvent* event)
+{
+ PEs proc = event->proc, /* proc that requested node */
+ creator = event->creator; /* proc that requested node */
+ StgTSO* tso = event->tso; /* tso that requested node */
+ StgClosure* node = event->node; /* requested, remote node */
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the UnBlock\n"))
+ /* There should be no UNBLOCKs in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+ /* UnblockThread means either FetchReply has arrived or
+ a blocking queue has been awakened;
+ ToDo: check with assertions
+ ASSERT(procStatus[proc]==Fetching || IS_BLACK_HOLE(event->node));
+ */
+ if (!RtsFlags.GranFlags.DoAsyncFetch) { /* block-on-fetch */
+ /* We count block-on-fetch as normal block time */
+ tso->gran.blocktime += CurrentTime[proc] - tso->gran.blockedat;
+ /* Dumping now done when processing the event
+ No costs for contextswitch or thread queueing in this case
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc, CurrentProc, GR_RESUME, tso,
+ (StgClosure*)NULL, tso->gran.sparkname, spark_queue_len(CurrentProc));
+ */
+ /* Maybe do this in FetchReply already
+ if (procStatus[proc]==Fetching)
+ procStatus[proc] = Busy;
+ */
+ /*
+ new_event(proc, proc, CurrentTime[proc],
+ ContinueThread,
+ tso, node, (rtsSpark*)NULL);
+ */
+ } else {
+ /* Asynchr comm causes additional costs here: */
+ /* Bring the TSO from the blocked queue into the threadq */
+ }
+ /* In all cases, the UnblockThread causes a ResumeThread to be scheduled */
+ new_event(proc, proc,
+ CurrentTime[proc]+RtsFlags.GranFlags.Costs.threadqueuetime,
+ ResumeThread,
+ tso, node, (rtsSpark*)NULL);
+}
+
+//@cindex do_the_fetchnode
+
+void
+do_the_fetchnode(rtsEvent* event)
+{
+ PEs proc = event->proc, /* proc that holds the requested node */
+ creator = event->creator; /* proc that requested node */
+ StgTSO* tso = event->tso;
+ StgClosure* node = event->node; /* requested, remote node */
+ rtsFetchReturnCode rc;
+
+ ASSERT(CurrentProc==proc);
+ /* There should be no FETCHNODEs in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the FetchNode\n"));
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.munpacktime;
+
+ /* ToDo: check whether this is the right place for dumping the event */
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(creator, proc, GR_FETCH, tso, node, (StgInt)0, 0);
+
+ do {
+ rc = handleFetchRequest(node, proc, creator, tso);
+ if (rc == OutOfHeap) { /* trigger GC */
+# if defined(GRAN_CHECK) && defined(GRAN)
+ if (RtsFlags.GcFlags.giveStats)
+ fprintf(RtsFlags.GcFlags.statsFile,"***** veQ boSwI' PackNearbyGraph(node %p, tso %p (%d))\n",
+ node, tso, tso->id);
+# endif
+ barf("//// do_the_fetchnode: out of heap after handleFetchRequest; ToDo: call GarbageCollect()");
+ prepend_event(event);
+ GarbageCollect(GetRoots, rtsFalse);
+ // HWL: ToDo: check whether a ContinueThread has to be issued
+ // HWL old: ReallyPerformThreadGC(PACK_HEAP_REQUIRED, rtsFalse);
+# if 0 && defined(GRAN_CHECK) && defined(GRAN)
+ if (RtsFlags.GcFlags.giveStats) {
+ fprintf(RtsFlags.GcFlags.statsFile,"***** SAVE_Hp=%p, SAVE_HpLim=%p, PACK_HEAP_REQUIRED=%d\n",
+ Hp, HpLim, 0) ; // PACK_HEAP_REQUIRED); ???
+ fprintf(stderr,"***** No. of packets so far: %d (total size: %d)\n",
+ globalGranStats.tot_packets, globalGranStats.tot_packet_size);
+ }
+# endif
+ event = grab_event();
+ // Hp -= PACK_HEAP_REQUIRED; // ???
+
+ /* GC knows that events are special and follows the pointer i.e. */
+ /* events are valid even if they moved. An EXIT is triggered */
+ /* if there is not enough heap after GC. */
+ }
+ } while (rc == OutOfHeap);
+}
+
+//@cindex do_the_fetchreply
+void
+do_the_fetchreply(rtsEvent* event)
+{
+ PEs proc = event->proc, /* proc that requested node */
+ creator = event->creator; /* proc that holds the requested node */
+ StgTSO* tso = event->tso;
+ StgClosure* node = event->node; /* requested, remote node */
+ StgClosure* closure=(StgClosure*)NULL;
+
+ ASSERT(CurrentProc==proc);
+ ASSERT(RtsFlags.GranFlags.DoAsyncFetch || procStatus[proc]==Fetching);
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the FetchReply\n"));
+ /* There should be no FETCHREPLYs in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+
+ /* assign message unpack costs *before* dumping the event */
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.munpacktime;
+
+ /* ToDo: check whether this is the right place for dumping the event */
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc, creator, GR_REPLY, tso, node,
+ tso->gran.sparkname, spark_queue_len(proc));
+
+ /* THIS SHOULD NEVER HAPPEN
+ If tso is in the BQ of node this means that it actually entered the
+ remote closure, due to a missing GranSimFetch at the beginning of the
+ entry code; therefore, this is actually a faked fetch, triggered from
+ within GranSimBlock;
+ since tso is both in the EVQ and the BQ for node, we have to take it out
+ of the BQ first before we can handle the FetchReply;
+ ToDo: special cases in awakenBlockedQueue, since the BQ magically moved.
+ */
+ if (tso->block_info.closure!=(StgClosure*)NULL) {
+ IF_GRAN_DEBUG(bq,
+ belch("## ghuH: TSO %d (%p) in FetchReply is blocked on node %p (shouldn't happen AFAIK)",
+ tso->id, tso, node));
+ // unlink_from_bq(tso, node);
+ }
+
+ if (RtsFlags.GranFlags.DoBulkFetching) { /* bulk (packet) fetching */
+ rtsPackBuffer *buffer = (rtsPackBuffer*)node;
+ nat size = buffer->size;
+
+ /* NB: Fetch misses can't occur with GUM fetching, as */
+ /* updatable closure are turned into RBHs and therefore locked */
+ /* for other processors that try to grab them. */
+
+ closure = UnpackGraph(buffer);
+ CurrentTime[proc] += size * RtsFlags.GranFlags.Costs.munpacktime;
+ } else // incremental fetching
+ /* Copy or move node to CurrentProc */
+ if (fetchNode(node, creator, proc)) {
+ /* Fetch has failed i.e. node has been grabbed by another PE */
+ PEs p = where_is(node);
+ rtsTime fetchtime;
+
+ if (RtsFlags.GranFlags.GranSimStats.Global)
+ globalGranStats.fetch_misses++;
+
+ IF_GRAN_DEBUG(thunkStealing,
+ belch("== Qu'vatlh! fetch miss @ %u: node %p is at proc %u (rather than proc %u)\n",
+ CurrentTime[proc],node,p,creator));
+
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mpacktime;
+
+ /* Count fetch again !? */
+ ++(tso->gran.fetchcount);
+ tso->gran.fetchtime += RtsFlags.GranFlags.Costs.fetchtime;
+
+ fetchtime = stg_max(CurrentTime[CurrentProc],CurrentTime[p]) +
+ RtsFlags.GranFlags.Costs.latency;
+
+ /* Chase the grabbed node */
+ new_event(p, proc, fetchtime,
+ FetchNode,
+ tso, node, (rtsSpark*)NULL);
+
+# if 0 && defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */
+ IF_GRAN_DEBUG(blockOnFetch,
+ BlockedOnFetch[CurrentProc] = tso;) /*-rtsTrue;-*/
+
+ IF_GRAN_DEBUG(blockOnFetch_sanity,
+ tso->type |= FETCH_MASK_TSO;)
+# endif
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.mtidytime;
+
+ return; /* NB: no REPLy has been processed; tso still sleeping */
+ }
+
+ /* -- Qapla'! Fetch has been successful; node is here, now */
+ ++(event->tso->gran.fetchcount);
+ event->tso->gran.fetchtime += RtsFlags.GranFlags.Costs.fetchtime;
+
+ /* this is now done at the beginning of this routine
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc,event->creator, GR_REPLY, event->tso,
+ (RtsFlags.GranFlags.DoBulkFetching ?
+ closure :
+ event->node),
+ tso->gran.sparkname, spark_queue_len(proc));
+ */
+
+ ASSERT(OutstandingFetches[proc] > 0);
+ --OutstandingFetches[proc];
+ new_event(proc, proc, CurrentTime[proc],
+ ResumeThread,
+ event->tso, (RtsFlags.GranFlags.DoBulkFetching ?
+ closure :
+ event->node),
+ (rtsSpark*)NULL);
+}
+
+//@cindex do_the_movethread
+
+void
+do_the_movethread(rtsEvent* event) {
+ PEs proc = event->proc, /* proc that requested node */
+ creator = event->creator; /* proc that holds the requested node */
+ StgTSO* tso = event->tso;
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the MoveThread\n"));
+
+ ASSERT(CurrentProc==proc);
+ /* There should be no MOVETHREADs in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+ /* MOVETHREAD events should never occur without -bM */
+ ASSERT(RtsFlags.GranFlags.DoThreadMigration);
+ /* Bitmask of moved thread should be 0 */
+ ASSERT(PROCS(tso)==0);
+ ASSERT(procStatus[proc] == Fishing ||
+ RtsFlags.GranFlags.DoAsyncFetch);
+ ASSERT(OutstandingFishes[proc]>0);
+
+ /* ToDo: exact costs for unpacking the whole TSO */
+ CurrentTime[proc] += 5l * RtsFlags.GranFlags.Costs.munpacktime;
+
+ /* ToDo: check whether this is the right place for dumping the event */
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc, creator,
+ GR_STOLEN, tso, (StgClosure*)NULL, (StgInt)0, 0);
+
+ // ToDo: check cost functions
+ --OutstandingFishes[proc];
+ SET_GRAN_HDR(tso, ThisPE); // adjust the bitmask for the TSO
+ insertThread(tso, proc);
+
+ if (procStatus[proc]==Fishing)
+ procStatus[proc] = Idle;
+
+ if (RtsFlags.GranFlags.GranSimStats.Global)
+ globalGranStats.tot_TSOs_migrated++;
+}
+
+//@cindex do_the_movespark
+
+void
+do_the_movespark(rtsEvent* event) {
+ PEs proc = event->proc, /* proc that requested spark */
+ creator = event->creator; /* proc that holds the requested spark */
+ StgTSO* tso = event->tso;
+ rtsSparkQ spark = event->spark;
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the MoveSpark\n"))
+
+ ASSERT(CurrentProc==proc);
+ ASSERT(spark!=NULL);
+ ASSERT(procStatus[proc] == Fishing ||
+ RtsFlags.GranFlags.DoAsyncFetch);
+ ASSERT(OutstandingFishes[proc]>0);
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.munpacktime;
+
+ /* record movement of spark only if spark profiling is turned on */
+ if (RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(proc, creator,
+ SP_ACQUIRED,
+ tso, spark->node, spark->name, spark_queue_len(proc));
+
+ /* global statistics */
+ if ( RtsFlags.GranFlags.GranSimStats.Global &&
+ !closure_SHOULD_SPARK(spark->node))
+ globalGranStats.withered_sparks++;
+ /* Not adding the spark to the spark queue would be the right */
+ /* thing here, but it also would be cheating, as this info can't be */
+ /* available in a real system. -- HWL */
+
+ --OutstandingFishes[proc];
+
+ add_to_spark_queue(spark);
+
+ IF_GRAN_DEBUG(randomSteal, // ToDo: spark-distribution flag
+ print_sparkq_stats());
+
+ /* Should we treat stolen sparks specially? Currently, we don't. */
+
+ if (procStatus[proc]==Fishing)
+ procStatus[proc] = Idle;
+
+ /* add_to_spark_queue will increase the time of the current proc. */
+ /*
+ If proc was fishing, it is Idle now with the new spark in its spark
+ pool. This means that the next time handleIdlePEs is called, a local
+ FindWork will be created on this PE to turn the spark into a thread. Of
+ course another PE might steal the spark in the meantime (that's why we
+ are using events rather than inlining all the operations in the first
+ place). */
+}
+
+/*
+ In the Constellation class version of GranSim the semantics of StarThread
+ events has changed. Now, StartThread has to perform 3 basic operations:
+ - create a new thread (previously this was done in ActivateSpark);
+ - insert the thread into the run queue of the current processor
+ - generate a new event for actually running the new thread
+ Note that the insertThread is called via createThread.
+*/
+
+//@cindex do_the_startthread
+
+void
+do_the_startthread(rtsEvent *event)
+{
+ PEs proc = event->proc; /* proc that requested node */
+ StgTSO *tso = event->tso; /* tso that requested node */
+ StgClosure *node = event->node; /* requested, remote node */
+ rtsSpark *spark = event->spark;
+ GranEventType gr_evttype;
+
+ ASSERT(CurrentProc==proc);
+ ASSERT(!RtsFlags.GranFlags.Light || CurrentProc==0);
+ ASSERT(event->evttype == ResumeThread || event->evttype == StartThread);
+ /* if this was called via StartThread: */
+ ASSERT(event->evttype!=StartThread || tso == END_TSO_QUEUE); // not yet created
+ // ToDo: check: ASSERT(event->evttype!=StartThread || procStatus[proc]==Starting);
+ /* if this was called via ResumeThread: */
+ ASSERT(event->evttype!=ResumeThread ||
+ RtsFlags.GranFlags.DoAsyncFetch ||!is_on_queue(tso,proc));
+
+ /* startThread may have been called from the main event handler upon
+ finding either a ResumeThread or a StartThread event; set the
+ gr_evttype (needed for writing to .gr file) accordingly */
+ // gr_evttype = (event->evttype == ResumeThread) ? GR_RESUME : GR_START;
+
+ if ( event->evttype == StartThread ) {
+ GranEventType gr_evttype = (run_queue_hds[proc]==END_TSO_QUEUE) ?
+ GR_START : GR_STARTQ;
+
+ tso = createThread(BLOCK_SIZE_W, spark->gran_info);// implicit insertThread!
+ pushClosure(tso, node);
+
+ // ToDo: fwd info on local/global spark to thread -- HWL
+ // tso->gran.exported = spark->exported;
+ // tso->gran.locked = !spark->global;
+ tso->gran.sparkname = spark->name;
+
+ ASSERT(CurrentProc==proc);
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpGranEvent(gr_evttype,tso);
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadcreatetime;
+ } else { // event->evttype == ResumeThread
+ GranEventType gr_evttype = (run_queue_hds[proc]==END_TSO_QUEUE) ?
+ GR_RESUME : GR_RESUMEQ;
+
+ insertThread(tso, proc);
+
+ ASSERT(CurrentProc==proc);
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpGranEvent(gr_evttype,tso);
+ }
+
+ ASSERT(run_queue_hds[proc]!=END_TSO_QUEUE); // non-empty run queue
+ procStatus[proc] = Busy;
+ /* make sure that this thread is actually run */
+ new_event(proc, proc,
+ CurrentTime[proc],
+ ContinueThread,
+ tso, node, (rtsSpark*)NULL);
+
+ /* A wee bit of statistics gathering */
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.tot_add_threads++;
+ globalGranStats.tot_tq_len += thread_queue_len(CurrentProc);
+ }
+
+}
+
+//@cindex do_the_findwork
+void
+do_the_findwork(rtsEvent* event)
+{
+ PEs proc = event->proc, /* proc to search for work */
+ creator = event->creator; /* proc that requested work */
+ rtsSparkQ spark = event->spark;
+ /* ToDo: check that this size is safe -- HWL */
+#if 0
+ ToDo: check available heap
+
+ nat req_heap = sizeofW(StgTSO) + MIN_STACK_WORDS;
+ // add this? -- HWL:RtsFlags.ConcFlags.stkChunkSize;
+#endif
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the Findwork\n"));
+
+ /* If GUM style fishing is enabled, the contents of the spark field says
+ what to steal (spark(1) or thread(2)); */
+ ASSERT(!(RtsFlags.GranFlags.Fishing && event->spark==(rtsSpark*)0));
+
+ /* Make sure that we have enough heap for creating a new
+ thread. This is a conservative estimate of the required heap.
+ This eliminates special checks for GC around NewThread within
+ ActivateSpark. */
+
+#if 0
+ ToDo: check available heap
+
+ if (Hp + req_heap > HpLim ) {
+ IF_DEBUG(gc,
+ belch("GC: Doing GC from within Findwork handling (that's bloody dangerous if you ask me)");)
+ GarbageCollect(GetRoots);
+ // ReallyPerformThreadGC(req_heap, rtsFalse); old -- HWL
+ Hp -= req_heap;
+ if (procStatus[CurrentProc]==Sparking)
+ procStatus[CurrentProc]=Idle;
+ return;
+ }
+#endif
+
+ if ( RtsFlags.GranFlags.DoAlwaysCreateThreads ||
+ RtsFlags.GranFlags.Fishing ||
+ ((procStatus[proc]==Idle || procStatus[proc]==Sparking) &&
+ (RtsFlags.GranFlags.FetchStrategy >= 2 ||
+ OutstandingFetches[proc] == 0)) )
+ {
+ rtsBool found;
+ rtsSparkQ prev, spark;
+
+ /* ToDo: check */
+ ASSERT(procStatus[proc]==Sparking ||
+ RtsFlags.GranFlags.DoAlwaysCreateThreads ||
+ RtsFlags.GranFlags.Fishing);
+
+ /* SImmoHwI' yInej! Search spark queue! */
+ /* gimme_spark (event, &found, &spark); */
+ findLocalSpark(event, &found, &spark);
+
+ if (!found) { /* pagh vumwI' */
+ /*
+ If no spark has been found this can mean 2 things:
+ 1/ The FindWork was a fish (i.e. a message sent by another PE) and
+ the spark pool of the receiver is empty
+ --> the fish has to be forwarded to another PE
+ 2/ The FindWork was local to this PE (i.e. no communication; in this
+ case creator==proc) and the spark pool of the PE is not empty
+ contains only sparks of closures that should not be sparked
+ (note: if the spark pool were empty, handleIdlePEs wouldn't have
+ generated a FindWork in the first place)
+ --> the PE has to be made idle to trigger stealing sparks the next
+ time handleIdlePEs is performed
+ */
+
+ ASSERT(pending_sparks_hds[proc]==(rtsSpark*)NULL);
+ if (creator==proc) {
+ /* local FindWork */
+ if (procStatus[proc]==Busy) {
+ belch("ghuH: PE %d in Busy state while processing local FindWork (spark pool is empty!) @ %lx",
+ proc, CurrentTime[proc]);
+ procStatus[proc] = Idle;
+ }
+ } else {
+ /* global FindWork i.e. a Fish */
+ ASSERT(RtsFlags.GranFlags.Fishing);
+ /* actually this generates another request from the originating PE */
+ ASSERT(OutstandingFishes[creator]>0);
+ OutstandingFishes[creator]--;
+ /* ToDo: assign costs for sending fish to proc not to creator */
+ stealSpark(creator); /* might steal from same PE; ToDo: fix */
+ ASSERT(RtsFlags.GranFlags.maxFishes!=1 || procStatus[creator] == Fishing);
+ /* any assertions on state of proc possible here? */
+ }
+ } else {
+ /* DaH chu' Qu' yIchen! Now create new work! */
+ IF_GRAN_DEBUG(findWork,
+ belch("+- munching spark %p; creating thread for node %p",
+ spark, spark->node));
+ activateSpark (event, spark);
+ ASSERT(spark != (rtsSpark*)NULL);
+ spark = delete_from_sparkq (spark, proc, rtsTrue);
+ }
+
+ IF_GRAN_DEBUG(findWork,
+ belch("+- Contents of spark queues at the end of FindWork @ %lx",
+ CurrentTime[proc]);
+ print_sparkq_stats());
+
+ /* ToDo: check ; not valid if GC occurs in ActivateSpark */
+ ASSERT(!found ||
+ /* forward fish or */
+ (proc!=creator ||
+ /* local spark or */
+ (proc==creator && procStatus[proc]==Starting)) ||
+ //(!found && procStatus[proc]==Idle) ||
+ RtsFlags.GranFlags.DoAlwaysCreateThreads);
+ } else {
+ IF_GRAN_DEBUG(findWork,
+ belch("+- RTS refuses to findWork on PE %d @ %lx",
+ proc, CurrentTime[proc]);
+ belch(" procStatus[%d]=%s, fetch strategy=%d, outstanding fetches[%d]=%d",
+ proc, proc_status_names[procStatus[proc]],
+ RtsFlags.GranFlags.FetchStrategy,
+ proc, OutstandingFetches[proc]));
+ }
+}
+
+//@node GranSimLight routines, Code for Fetching Nodes, GranSim functions, GranSim specific code
+//@subsection GranSimLight routines
+
+/*
+ This code is called from the central scheduler after having rgabbed a
+ new event and is only needed for GranSim-Light. It mainly adjusts the
+ ActiveTSO so that all costs that have to be assigned from within the
+ scheduler are assigned to the right TSO. The choice of ActiveTSO depends
+ on the type of event that has been found.
+*/
+
+void
+GranSimLight_enter_system(event, ActiveTSOp)
+rtsEvent *event;
+StgTSO **ActiveTSOp;
+{
+ StgTSO *ActiveTSO = *ActiveTSOp;
+
+ ASSERT (RtsFlags.GranFlags.Light);
+
+ /* Restore local clock of the virtual processor attached to CurrentTSO.
+ All costs will be associated to the `virt. proc' on which the tso
+ is living. */
+ if (ActiveTSO != NULL) { /* already in system area */
+ ActiveTSO->gran.clock = CurrentTime[CurrentProc];
+ if (RtsFlags.GranFlags.DoFairSchedule)
+ {
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ RtsFlags.GranFlags.Debug.checkLight)
+ DumpGranEvent(GR_SYSTEM_END,ActiveTSO);
+ }
+ }
+ switch (event->evttype)
+ {
+ case ContinueThread:
+ case FindWork: /* inaccurate this way */
+ ActiveTSO = run_queue_hd;
+ break;
+ case ResumeThread:
+ case StartThread:
+ case MoveSpark: /* has tso of virt proc in tso field of event */
+ ActiveTSO = event->tso;
+ break;
+ default: barf("Illegal event type %s (%d) in GrAnSim Light setup\n",
+ event_names[event->evttype],event->evttype);
+ }
+ CurrentTime[CurrentProc] = ActiveTSO->gran.clock;
+ if (RtsFlags.GranFlags.DoFairSchedule) {
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ RtsFlags.GranFlags.Debug.checkLight)
+ DumpGranEvent(GR_SYSTEM_START,ActiveTSO);
+ }
+}
+
+void
+GranSimLight_leave_system(event, ActiveTSOp)
+rtsEvent *event;
+StgTSO **ActiveTSOp;
+{
+ StgTSO *ActiveTSO = *ActiveTSOp;
+
+ ASSERT(RtsFlags.GranFlags.Light);
+
+ /* Save time of `virt. proc' which was active since last getevent and
+ restore time of `virt. proc' where CurrentTSO is living on. */
+ if(RtsFlags.GranFlags.DoFairSchedule) {
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ RtsFlags.GranFlags.Debug.checkLight) // ToDo: clean up flags
+ DumpGranEvent(GR_SYSTEM_END,ActiveTSO);
+ }
+ ActiveTSO->gran.clock = CurrentTime[CurrentProc];
+ ActiveTSO = (StgTSO*)NULL;
+ CurrentTime[CurrentProc] = CurrentTSO->gran.clock;
+ if (RtsFlags.GranFlags.DoFairSchedule /* && resched */ ) {
+ // resched = rtsFalse;
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ RtsFlags.GranFlags.Debug.checkLight)
+ DumpGranEvent(GR_SCHEDULE,run_queue_hd);
+ }
+ /*
+ if (TSO_LINK(ThreadQueueHd)!=PrelBase_Z91Z93_closure &&
+ (TimeOfNextEvent == 0 ||
+ TSO_CLOCK(TSO_LINK(ThreadQueueHd))+1000<TimeOfNextEvent)) {
+ new_event(CurrentProc,CurrentProc,TSO_CLOCK(TSO_LINK(ThreadQueueHd))+1000,
+ CONTINUETHREAD,TSO_LINK(ThreadQueueHd),PrelBase_Z91Z93_closure,NULL);
+ TimeOfNextEvent = get_time_of_next_event();
+ }
+ */
+}
+
+//@node Code for Fetching Nodes, Idle PEs, GranSimLight routines, GranSim specific code
+//@subsection Code for Fetching Nodes
+
+/*
+ The following GrAnSim routines simulate the fetching of nodes from a
+ remote processor. We use a 1 word bitmask to indicate on which processor
+ a node is lying. Thus, moving or copying a node from one processor to
+ another just requires an appropriate change in this bitmask (using
+ @SET_GA@). Additionally, the clocks have to be updated.
+
+ A special case arises when the node that is needed by processor A has
+ been moved from a processor B to a processor C between sending out a
+ @FETCH@ (from A) and its arrival at B. In that case the @FETCH@ has to
+ be forwarded to C. This is simulated by issuing another FetchNode event
+ on processor C with A as creator.
+*/
+
+/* ngoqvam che' {GrAnSim}! */
+
+/* Fetch node "node" to processor "p" */
+
+//@cindex fetchNode
+
+rtsFetchReturnCode
+fetchNode(node,from,to)
+StgClosure* node;
+PEs from, to;
+{
+ /* In case of RtsFlags.GranFlags.DoBulkFetching this fct should never be
+ entered! Instead, UnpackGraph is used in ReSchedule */
+ StgClosure* closure;
+
+ ASSERT(to==CurrentProc);
+ /* Should never be entered in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+ /* fetchNode should never be entered with DoBulkFetching */
+ ASSERT(!RtsFlags.GranFlags.DoBulkFetching);
+
+ /* Now fetch the node */
+ if (!IS_LOCAL_TO(PROCS(node),from) &&
+ !IS_LOCAL_TO(PROCS(node),to) )
+ return NodeHasMoved;
+
+ if (closure_HNF(node)) /* node already in head normal form? */
+ node->header.gran.procs |= PE_NUMBER(to); /* Copy node */
+ else
+ node->header.gran.procs = PE_NUMBER(to); /* Move node */
+
+ return Ok;
+}
+
+/*
+ Process a fetch request.
+
+ Cost of sending a packet of size n = C + P*n
+ where C = packet construction constant,
+ P = cost of packing one word into a packet
+ [Should also account for multiple packets].
+*/
+
+//@cindex handleFetchRequest
+
+rtsFetchReturnCode
+handleFetchRequest(node,to,from,tso)
+StgClosure* node; // the node which is requested
+PEs to, from; // fetch request: from -> to
+StgTSO* tso; // the tso which needs the node
+{
+ ASSERT(!RtsFlags.GranFlags.Light);
+ /* ToDo: check assertion */
+ ASSERT(OutstandingFetches[from]>0);
+
+ /* probably wrong place; */
+ ASSERT(CurrentProc==to);
+
+ if (IS_LOCAL_TO(PROCS(node), from)) /* Somebody else moved node already => */
+ { /* start tso */
+ IF_GRAN_DEBUG(thunkStealing,
+ fprintf(stderr,"ghuH: handleFetchRequest entered with local node %p (%s) (PE %d)\n",
+ node, info_type(node), from));
+
+ if (RtsFlags.GranFlags.DoBulkFetching) {
+ nat size;
+ rtsPackBuffer *graph;
+
+ /* Create a 1-node-buffer and schedule a FETCHREPLY now */
+ graph = PackOneNode(node, tso, &size);
+ new_event(from, to, CurrentTime[to],
+ FetchReply,
+ tso, (StgClosure *)graph, (rtsSpark*)NULL);
+ } else {
+ new_event(from, to, CurrentTime[to],
+ FetchReply,
+ tso, node, (rtsSpark*)NULL);
+ }
+ IF_GRAN_DEBUG(thunkStealing,
+ belch("== majQa'! closure %p is local on PE %d already (this is a good thing)", node, from));
+ return (NodeIsLocal);
+ }
+ else if (IS_LOCAL_TO(PROCS(node), to) ) /* Is node still here? */
+ {
+ if (RtsFlags.GranFlags.DoBulkFetching) { /* {GUM}vo' ngoqvam vInIHta' */
+ nat size; /* (code from GUM) */
+ StgClosure* graph;
+
+ if (IS_BLACK_HOLE(node)) { /* block on BH or RBH */
+ new_event(from, to, CurrentTime[to],
+ GlobalBlock,
+ tso, node, (rtsSpark*)NULL);
+ /* Note: blockFetch is done when handling GLOBALBLOCK event;
+ make sure the TSO stays out of the run queue */
+ /* When this thread is reawoken it does the usual: it tries to
+ enter the updated node and issues a fetch if it's remote.
+ It has forgotten that it has sent a fetch already (i.e. a
+ FETCHNODE is swallowed by a BH, leaving the thread in a BQ) */
+ --OutstandingFetches[from];
+
+ IF_GRAN_DEBUG(thunkStealing,
+ belch("== majQa'! closure %p on PE %d is a BH (demander=PE %d); faking a FMBQ",
+ node, to, from));
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.tot_FMBQs++;
+ }
+ return (NodeIsBH);
+ }
+
+ /* The tso requesting the node is blocked and cannot be on a run queue */
+ ASSERT(!is_on_queue(tso, from));
+
+ // ToDo: check whether graph is ever used as an rtsPackBuffer!!
+ if ((graph = (StgClosure *)PackNearbyGraph(node, tso, &size, 0)) == NULL)
+ return (OutOfHeap); /* out of heap */
+
+ /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
+ /* Send a reply to the originator */
+ /* ToDo: Replace that by software costs for doing graph packing! */
+ CurrentTime[to] += size * RtsFlags.GranFlags.Costs.mpacktime;
+
+ new_event(from, to,
+ CurrentTime[to]+RtsFlags.GranFlags.Costs.latency,
+ FetchReply,
+ tso, (StgClosure *)graph, (rtsSpark*)NULL);
+
+ CurrentTime[to] += RtsFlags.GranFlags.Costs.mtidytime;
+ return (Ok);
+ } else { /* incremental (single closure) fetching */
+ /* Actual moving/copying of node is done on arrival; see FETCHREPLY */
+ /* Send a reply to the originator */
+ CurrentTime[to] += RtsFlags.GranFlags.Costs.mpacktime;
+
+ new_event(from, to,
+ CurrentTime[to]+RtsFlags.GranFlags.Costs.latency,
+ FetchReply,
+ tso, node, (rtsSpark*)NULL);
+
+ CurrentTime[to] += RtsFlags.GranFlags.Costs.mtidytime;
+ return (Ok);
+ }
+ }
+ else /* Qu'vatlh! node has been grabbed by another proc => forward */
+ {
+ PEs node_loc = where_is(node);
+ rtsTime fetchtime;
+
+ IF_GRAN_DEBUG(thunkStealing,
+ belch("== Qu'vatlh! node %p has been grabbed by PE %d from PE %d (demander=%d) @ %d\n",
+ node,node_loc,to,from,CurrentTime[to]));
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.fetch_misses++;
+ }
+
+ /* Prepare FORWARD message to proc p_new */
+ CurrentTime[to] += RtsFlags.GranFlags.Costs.mpacktime;
+
+ fetchtime = stg_max(CurrentTime[to], CurrentTime[node_loc]) +
+ RtsFlags.GranFlags.Costs.latency;
+
+ new_event(node_loc, from, fetchtime,
+ FetchNode,
+ tso, node, (rtsSpark*)NULL);
+
+ CurrentTime[to] += RtsFlags.GranFlags.Costs.mtidytime;
+
+ return (NodeHasMoved);
+ }
+}
+
+/*
+ blockFetch blocks a BlockedFetch node on some kind of black hole.
+
+ Taken from gum/HLComms.lc. [find a better place for that ?] -- HWL
+
+ {\bf Note:} In GranSim we don't have @FETCHME@ nodes and therefore don't
+ create @FMBQ@'s (FetchMe blocking queues) to cope with global
+ blocking. Instead, non-local TSO are put into the BQ in the same way as
+ local TSOs. However, we have to check if a TSO is local or global in
+ order to account for the latencies involved and for keeping track of the
+ number of fetches that are really going on.
+*/
+
+//@cindex blockFetch
+
+rtsFetchReturnCode
+blockFetch(tso, proc, bh)
+StgTSO* tso; /* TSO which gets blocked */
+PEs proc; /* PE where that tso was running */
+StgClosure* bh; /* closure to block on (BH, RBH, BQ) */
+{
+ StgInfoTable *info;
+
+ IF_GRAN_DEBUG(bq,
+ fprintf(stderr,"## blockFetch: blocking TSO %p (%d)[PE %d] on node %p (%s) [PE %d]. No graph is packed!\n",
+ tso, tso->id, proc, bh, info_type(bh), where_is(bh)));
+
+ if (!IS_BLACK_HOLE(bh)) { /* catches BHs and RBHs */
+ IF_GRAN_DEBUG(bq,
+ fprintf(stderr,"## blockFetch: node %p (%s) is not a BH => awakening TSO %p (%d) [PE %u]\n",
+ bh, info_type(bh), tso, tso->id, proc));
+
+ /* No BH anymore => immediately unblock tso */
+ new_event(proc, proc, CurrentTime[proc],
+ UnblockThread,
+ tso, bh, (rtsSpark*)NULL);
+
+ /* Is this always a REPLY to a FETCH in the profile ? */
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc, proc, GR_REPLY, tso, bh, (StgInt)0, 0);
+ return (NodeIsNoBH);
+ }
+
+ /* DaH {BQ}Daq Qu' Suq 'e' wISov!
+ Now we know that we have to put the tso into the BQ.
+ 2 cases: If block-on-fetch, tso is at head of threadq =>
+ => take it out of threadq and into BQ
+ If reschedule-on-fetch, tso is only pointed to be event
+ => just put it into BQ
+
+ ngoq ngo'!!
+ if (!RtsFlags.GranFlags.DoAsyncFetch) {
+ GranSimBlock(tso, proc, bh);
+ } else {
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc, where_is(bh), GR_BLOCK, tso, bh, (StgInt)0, 0);
+ ++(tso->gran.blockcount);
+ tso->gran.blockedat = CurrentTime[proc];
+ }
+ */
+
+ /* after scheduling the GlobalBlock event the TSO is not put into the
+ run queue again; it is only pointed to via the event we are
+ processing now; in GranSim 4.xx there is no difference between
+ synchr and asynchr comm here */
+ ASSERT(!is_on_queue(tso, proc));
+ ASSERT(tso->link == END_TSO_QUEUE);
+
+ GranSimBlock(tso, proc, bh); /* GranSim statistics gathering */
+
+ /* Now, put tso into BQ (similar to blocking entry codes) */
+ info = get_itbl(bh);
+ switch (info -> type) {
+ case RBH:
+ case BLACKHOLE:
+ case CAF_BLACKHOLE: // ToDo: check whether this is a possibly ITBL here
+ case SE_BLACKHOLE: // ToDo: check whether this is a possibly ITBL here
+ case SE_CAF_BLACKHOLE:// ToDo: check whether this is a possibly ITBL here
+ /* basically an inlined version of BLACKHOLE_entry -- HWL */
+ /* Change the BLACKHOLE into a BLACKHOLE_BQ */
+ ((StgBlockingQueue *)bh)->header.info = &BLACKHOLE_BQ_info;
+ /* Put ourselves on the blocking queue for this black hole */
+ // tso->link=END_TSO_QUEUE; not necessary; see assertion above
+ ((StgBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)tso;
+ tso->block_info.closure = bh;
+ recordMutable((StgMutClosure *)bh);
+ break;
+
+ case BLACKHOLE_BQ:
+ /* basically an inlined version of BLACKHOLE_BQ_entry -- HWL */
+ tso->link = (StgTSO *) (((StgBlockingQueue*)bh)->blocking_queue);
+ ((StgBlockingQueue*)bh)->blocking_queue = (StgBlockingQueueElement *)tso;
+ recordMutable((StgMutClosure *)bh);
+
+# if 0 && defined(GC_MUT_REQUIRED)
+ ToDo: check whether recordMutable is necessary -- HWL
+ /*
+ * If we modify a black hole in the old generation, we have to make
+ * sure it goes on the mutables list
+ */
+
+ if (bh <= StorageMgrInfo.OldLim) {
+ MUT_LINK(bh) = (W_) StorageMgrInfo.OldMutables;
+ StorageMgrInfo.OldMutables = bh;
+ } else
+ MUT_LINK(bh) = MUT_NOT_LINKED;
+# endif
+ break;
+
+ case FETCH_ME_BQ:
+ barf("Qagh: FMBQ closure (%p) found in GrAnSim (TSO=%p (%d))\n",
+ bh, tso, tso->id);
+
+ default:
+ {
+ G_PRINT_NODE(bh);
+ barf("Qagh: thought %p was a black hole (IP %p (%s))",
+ bh, info, info_type(bh));
+ }
+ }
+ return (Ok);
+}
+
+
+//@node Idle PEs, Routines directly called from Haskell world, Code for Fetching Nodes, GranSim specific code
+//@subsection Idle PEs
+
+/*
+ Export work to idle PEs. This function is called from @ReSchedule@
+ before dispatching on the current event. @HandleIdlePEs@ iterates over
+ all PEs, trying to get work for idle PEs. Note, that this is a
+ simplification compared to GUM's fishing model. We try to compensate for
+ that by making the cost for stealing work dependent on the number of
+ idle processors and thereby on the probability with which a randomly
+ sent fish would find work.
+*/
+
+//@cindex handleIdlePEs
+
+void
+handleIdlePEs(void)
+{
+ PEs p;
+
+ IF_DEBUG(gran, fprintf(stderr, "GRAN: handling Idle PEs\n"))
+
+ /* Should never be entered in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+
+ /* Could check whether there are idle PEs if it's a cheap check */
+ for (p = 0; p < RtsFlags.GranFlags.proc; p++)
+ if (procStatus[p]==Idle) /* && IS_SPARKING(p) && IS_STARTING(p) */
+ /* First look for local work i.e. examine local spark pool! */
+ if (pending_sparks_hds[p]!=(rtsSpark *)NULL) {
+ new_event(p, p, CurrentTime[p],
+ FindWork,
+ (StgTSO*)NULL, (StgClosure*)NULL, (rtsSpark*)NULL);
+ procStatus[p] = Sparking;
+ } else if ((RtsFlags.GranFlags.maxFishes==0 ||
+ OutstandingFishes[p]<RtsFlags.GranFlags.maxFishes) ) {
+
+ /* If no local work then try to get remote work!
+ Qu' Hopbe' pagh tu'lu'pu'chugh Qu' Hop yISuq ! */
+ if (RtsFlags.GranFlags.DoStealThreadsFirst &&
+ (RtsFlags.GranFlags.FetchStrategy >= 4 || OutstandingFetches[p] == 0))
+ {
+ if (SurplusThreads > 0l) /* Steal a thread */
+ stealThread(p);
+
+ if (procStatus[p]!=Idle)
+ break;
+ }
+
+ if (SparksAvail > 0 &&
+ (RtsFlags.GranFlags.FetchStrategy >= 3 || OutstandingFetches[p] == 0)) /* Steal a spark */
+ stealSpark(p);
+
+ if (SurplusThreads > 0 &&
+ (RtsFlags.GranFlags.FetchStrategy >= 4 || OutstandingFetches[p] == 0)) /* Steal a thread */
+ stealThread(p);
+ }
+}
+
+/*
+ Steal a spark and schedule moving it to proc. We want to look at PEs in
+ clock order -- most retarded first. Currently sparks are only stolen
+ from the @ADVISORY_POOL@ never from the @REQUIRED_POOL@. Eventually,
+ this should be changed to first steal from the former then from the
+ latter.
+
+ We model a sort of fishing mechanism by counting the number of sparks
+ and threads we are currently stealing. */
+
+/*
+ Return a random nat value in the intervall [from, to)
+*/
+static nat
+natRandom(from, to)
+nat from, to;
+{
+ nat r, d;
+
+ ASSERT(from<=to);
+ d = to - from;
+ /* random returns a value in [0, RAND_MAX] */
+ r = (nat) ((float)from + ((float)random()*(float)d)/(float)RAND_MAX);
+ r = (r==to) ? from : r;
+ ASSERT(from<=r && (r<to || from==to));
+ return r;
+}
+
+/*
+ Find any PE other than proc. Used for GUM style fishing only.
+*/
+static PEs
+findRandomPE (proc)
+PEs proc;
+{
+ nat p;
+
+ ASSERT(RtsFlags.GranFlags.Fishing);
+ if (RtsFlags.GranFlags.RandomSteal) {
+ p = natRandom(0,RtsFlags.GranFlags.proc); /* full range of PEs */
+ } else {
+ p = 0;
+ }
+ IF_GRAN_DEBUG(randomSteal,
+ belch("^^ RANDOM_STEAL (fishing): stealing from PE %d (current proc is %d)",
+ p, proc));
+
+ return (PEs)p;
+}
+
+/*
+ Magic code for stealing sparks/threads makes use of global knowledge on
+ spark queues.
+*/
+static void
+sortPEsByTime (proc, pes_by_time, firstp, np)
+PEs proc;
+PEs *pes_by_time;
+nat *firstp, *np;
+{
+ PEs p, temp, n, i, j;
+ nat first, upb, r=0, q=0;
+
+ ASSERT(!RtsFlags.GranFlags.Fishing);
+
+#if 0
+ upb = RtsFlags.GranFlags.proc; /* full range of PEs */
+
+ if (RtsFlags.GranFlags.RandomSteal) {
+ r = natRandom(0,RtsFlags.GranFlags.proc); /* full range of PEs */
+ } else {
+ r = 0;
+ }
+#endif
+
+ /* pes_by_time shall contain processors from which we may steal sparks */
+ for(n=0, p=0; p < RtsFlags.GranFlags.proc; ++p)
+ if ((proc != p) && // not the current proc
+ (pending_sparks_hds[p] != (rtsSpark *)NULL) && // non-empty spark pool
+ (CurrentTime[p] <= CurrentTime[CurrentProc]))
+ pes_by_time[n++] = p;
+
+ /* sort pes_by_time */
+ for(i=0; i < n; ++i)
+ for(j=i+1; j < n; ++j)
+ if (CurrentTime[pes_by_time[i]] > CurrentTime[pes_by_time[j]]) {
+ rtsTime temp = pes_by_time[i];
+ pes_by_time[i] = pes_by_time[j];
+ pes_by_time[j] = temp;
+ }
+
+ /* Choose random processor to steal spark from; first look at processors */
+ /* that are earlier than the current one (i.e. proc) */
+ for(first=0;
+ (first < n) && (CurrentTime[pes_by_time[first]] <= CurrentTime[proc]);
+ ++first)
+ /* nothing */ ;
+
+ /* if the assertion below is true we can get rid of first */
+ /* ASSERT(first==n); */
+ /* ToDo: check if first is really needed; find cleaner solution */
+
+ *firstp = first;
+ *np = n;
+}
+
+/*
+ Steal a spark (piece of work) from any processor and bring it to proc.
+*/
+//@cindex stealSpark
+static rtsBool
+stealSpark(PEs proc) { stealSomething(proc, rtsTrue, rtsFalse); }
+
+/*
+ Steal a thread from any processor and bring it to proc i.e. thread migration
+*/
+//@cindex stealThread
+static rtsBool
+stealThread(PEs proc) { stealSomething(proc, rtsFalse, rtsTrue); }
+
+/*
+ Steal a spark or a thread and schedule moving it to proc.
+*/
+//@cindex stealSomething
+static rtsBool
+stealSomething(proc, steal_spark, steal_thread)
+PEs proc; // PE that needs work (stealer)
+rtsBool steal_spark, steal_thread; // should a spark and/or thread be stolen
+{
+ PEs p;
+ rtsTime fish_arrival_time;
+ rtsSpark *spark, *prev, *next;
+ rtsBool stolen = rtsFalse;
+
+ ASSERT(steal_spark || steal_thread);
+
+ /* Should never be entered in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+ ASSERT(!steal_thread || RtsFlags.GranFlags.DoThreadMigration);
+
+ if (!RtsFlags.GranFlags.Fishing) {
+ // ToDo: check if stealing threads is prefered over stealing sparks
+ if (steal_spark) {
+ if (stealSparkMagic(proc))
+ return rtsTrue;
+ else // no spark found
+ if (steal_thread)
+ return stealThreadMagic(proc);
+ else // no thread found
+ return rtsFalse;
+ } else { // ASSERT(steal_thread);
+ return stealThreadMagic(proc);
+ }
+ barf("stealSomething: never reached");
+ }
+
+ /* The rest of this function does GUM style fishing */
+
+ p = findRandomPE(proc); /* find a random PE other than proc */
+
+ /* Message packing costs for sending a Fish; qeq jabbI'ID */
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.mpacktime;
+
+ /* use another GranEvent for requesting a thread? */
+ if (steal_spark && RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(p, proc, SP_REQUESTED,
+ (StgTSO*)NULL, (StgClosure *)NULL, (StgInt)0, 0);
+
+ /* time of the fish arrival on the remote PE */
+ fish_arrival_time = CurrentTime[proc] + RtsFlags.GranFlags.Costs.latency;
+
+ /* Phps use an own Fish event for that? */
+ /* The contents of the spark component is a HACK:
+ 1 means give me a spark;
+ 2 means give me a thread
+ 0 means give me nothing (this should never happen)
+ */
+ new_event(p, proc, fish_arrival_time,
+ FindWork,
+ (StgTSO*)NULL, (StgClosure*)NULL,
+ (steal_spark ? (rtsSpark*)1 : steal_thread ? (rtsSpark*)2 : (rtsSpark*)0));
+
+ ++OutstandingFishes[proc];
+ /* only with Async fetching? */
+ if (procStatus[proc]==Idle)
+ procStatus[proc]=Fishing;
+
+ /* time needed to clean up buffers etc after sending a message */
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.mtidytime;
+
+ /* If GUM style fishing stealing always succeeds because it only consists
+ of sending out a fish; of course, when the fish may return
+ empty-handed! */
+ return rtsTrue;
+}
+
+/*
+ This version of stealing a spark makes use of the global info on all
+ spark pools etc which is not available in a real parallel system.
+ This could be extended to test e.g. the impact of perfect load information.
+*/
+//@cindex stealSparkMagic
+static rtsBool
+stealSparkMagic(proc)
+PEs proc;
+{
+ PEs p=0, i=0, j=0, n=0, first, upb;
+ rtsSpark *spark=NULL, *next;
+ PEs pes_by_time[MAX_PROC];
+ rtsBool stolen = rtsFalse;
+ rtsTime stealtime;
+
+ /* Should never be entered in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+
+ sortPEsByTime(proc, pes_by_time, &first, &n);
+
+ while (!stolen && n>0) {
+ upb = (first==0) ? n : first;
+ i = natRandom(0,upb); /* choose a random eligible PE */
+ p = pes_by_time[i];
+
+ IF_GRAN_DEBUG(randomSteal,
+ belch("^^ stealSparkMagic (random_steal, not fishing): stealing spark from PE %d (current proc is %d)",
+ p, proc));
+
+ ASSERT(pending_sparks_hds[p]!=(rtsSpark *)NULL); /* non-empty spark pool */
+
+ /* Now go through rtsSparkQ and steal the first eligible spark */
+
+ spark = pending_sparks_hds[p];
+ while (!stolen && spark != (rtsSpark*)NULL)
+ {
+ /* NB: no prev pointer is needed here because all sparks that are not
+ chosen are pruned
+ */
+ if ((procStatus[p]==Idle || procStatus[p]==Sparking || procStatus[p] == Fishing) &&
+ spark->next==(rtsSpark*)NULL)
+ {
+ /* Be social! Don't steal the only spark of an idle processor
+ not {spark} neH yInIH !! */
+ break; /* next PE */
+ }
+ else if (closure_SHOULD_SPARK(spark->node))
+ {
+ /* Don't Steal local sparks;
+ ToDo: optionally prefer local over global sparks
+ if (!spark->global) {
+ prev=spark;
+ continue; next spark
+ }
+ */
+ /* found a spark! */
+
+ /* Prepare message for sending spark */
+ CurrentTime[p] += RtsFlags.GranFlags.Costs.mpacktime;
+
+ if (RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(p, (PEs)0, SP_EXPORTED,
+ (StgTSO*)NULL, spark->node,
+ spark->name, spark_queue_len(p));
+
+ stealtime = (CurrentTime[p] > CurrentTime[proc] ?
+ CurrentTime[p] :
+ CurrentTime[proc])
+ + sparkStealTime();
+
+ new_event(proc, p /* CurrentProc */, stealtime,
+ MoveSpark,
+ (StgTSO*)NULL, spark->node, spark);
+
+ stolen = rtsTrue;
+ ++OutstandingFishes[proc]; /* no. of sparks currently on the fly */
+ if (procStatus[proc]==Idle)
+ procStatus[proc] = Fishing;
+ ++(spark->global); /* record that this is a global spark */
+ ASSERT(SparksAvail>0);
+ --SparksAvail; /* on-the-fly sparks are not available */
+ next = delete_from_sparkq(spark, p, rtsFalse); // don't dispose!
+ CurrentTime[p] += RtsFlags.GranFlags.Costs.mtidytime;
+ }
+ else /* !(closure_SHOULD_SPARK(SPARK_NODE(spark))) */
+ {
+ IF_GRAN_DEBUG(checkSparkQ,
+ belch("^^ pruning spark %p (node %p) in stealSparkMagic",
+ spark, spark->node));
+
+ /* if the spark points to a node that should not be sparked,
+ prune the spark queue at this point */
+ if (RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(p, (PEs)0, SP_PRUNED,
+ (StgTSO*)NULL, spark->node,
+ spark->name, spark_queue_len(p));
+ if (RtsFlags.GranFlags.GranSimStats.Global)
+ globalGranStats.pruned_sparks++;
+
+ ASSERT(SparksAvail>0);
+ --SparksAvail;
+ spark = delete_from_sparkq(spark, p, rtsTrue);
+ }
+ /* unlink spark (may have been freed!) from sparkq;
+ if (prev == NULL) // spark was head of spark queue
+ pending_sparks_hds[p] = spark->next;
+ else
+ prev->next = spark->next;
+ if (spark->next == NULL)
+ pending_sparks_tls[p] = prev;
+ else
+ next->prev = prev;
+ */
+ } /* while ... iterating over sparkq */
+
+ /* ToDo: assert that PE p still has work left after stealing the spark */
+
+ if (!stolen && (n>0)) { /* nothing stealable from proc p :( */
+ ASSERT(pes_by_time[i]==p);
+
+ /* remove p from the list (at pos i) */
+ for (j=i; j+1<n; j++)
+ pes_by_time[j] = pes_by_time[j+1];
+ n--;
+
+ /* update index to first proc which is later (or equal) than proc */
+ for ( ;
+ (first>0) &&
+ (CurrentTime[pes_by_time[first-1]]>CurrentTime[proc]);
+ first--)
+ /* nothing */ ;
+ }
+ } /* while ... iterating over PEs in pes_by_time */
+
+ IF_GRAN_DEBUG(randomSteal,
+ if (stolen)
+ belch("^^ stealSparkMagic: spark %p (node=%p) stolen by PE %d from PE %d (SparksAvail=%d; idlers=%d)",
+ spark, spark->node, proc, p,
+ SparksAvail, idlers());
+ else
+ belch("^^ stealSparkMagic: nothing stolen by PE %d (sparkq len after pruning=%d)(SparksAvail=%d; idlers=%d)",
+ proc, SparksAvail, idlers()));
+
+ if (RtsFlags.GranFlags.GranSimStats.Global &&
+ stolen && (i!=0)) { /* only for statistics */
+ globalGranStats.rs_sp_count++;
+ globalGranStats.ntimes_total += n;
+ globalGranStats.fl_total += first;
+ globalGranStats.no_of_steals++;
+ }
+
+ return stolen;
+}
+
+/*
+ The old stealThread code, which makes use of global info and does not
+ send out fishes.
+ NB: most of this is the same as in stealSparkMagic;
+ only the pieces specific to processing thread queues are different;
+ long live polymorphism!
+*/
+
+//@cindex stealThreadMagic
+static rtsBool
+stealThreadMagic(proc)
+PEs proc;
+{
+ PEs p=0, i=0, j=0, n=0, first, upb;
+ StgTSO *tso=END_TSO_QUEUE;
+ PEs pes_by_time[MAX_PROC];
+ rtsBool stolen = rtsFalse;
+ rtsTime stealtime;
+
+ /* Should never be entered in GrAnSim Light setup */
+ ASSERT(!RtsFlags.GranFlags.Light);
+
+ sortPEsByTime(proc, pes_by_time, &first, &n);
+
+ while (!stolen && n>0) {
+ upb = (first==0) ? n : first;
+ i = natRandom(0,upb); /* choose a random eligible PE */
+ p = pes_by_time[i];
+
+ IF_GRAN_DEBUG(randomSteal,
+ belch("^^ stealThreadMagic (random_steal, not fishing): stealing thread from PE %d (current proc is %d)",
+ p, proc));
+
+ /* Steal the first exportable thread in the runnable queue but
+ never steal the first in the queue for social reasons;
+ not Qu' wa'DIch yInIH !!
+ */
+ /* Would be better to search through queue and have options which of
+ the threads to pick when stealing */
+ if (run_queue_hds[p] == END_TSO_QUEUE) {
+ IF_GRAN_DEBUG(randomSteal,
+ belch("^^ stealThreadMagic: No thread to steal from PE %d (stealer=PE %d)",
+ p, proc));
+ } else {
+ tso = run_queue_hds[p]->link; /* tso is *2nd* thread in thread queue */
+ /* Found one */
+ stolen = rtsTrue;
+
+ /* update links in queue */
+ run_queue_hds[p]->link = tso->link;
+ if (run_queue_tls[p] == tso)
+ run_queue_tls[p] = run_queue_hds[p];
+
+ /* ToDo: Turn magic constants into params */
+
+ CurrentTime[p] += 5l * RtsFlags.GranFlags.Costs.mpacktime;
+
+ stealtime = (CurrentTime[p] > CurrentTime[proc] ?
+ CurrentTime[p] :
+ CurrentTime[proc])
+ + sparkStealTime()
+ + 4l * RtsFlags.GranFlags.Costs.additional_latency
+ + 5l * RtsFlags.GranFlags.Costs.munpacktime;
+
+ /* Move the thread; set bitmask to 0 while TSO is `on-the-fly' */
+ SET_GRAN_HDR(tso,Nowhere /* PE_NUMBER(proc) */);
+
+ /* Move from one queue to another */
+ new_event(proc, p, stealtime,
+ MoveThread,
+ tso, (StgClosure*)NULL, (rtsSpark*)NULL);
+
+ /* MAKE_BUSY(proc); not yet; only when thread is in threadq */
+ ++OutstandingFishes[proc];
+ if (procStatus[proc])
+ procStatus[proc] = Fishing;
+ --SurplusThreads;
+
+ if(RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(p, proc,
+ GR_STEALING,
+ tso, (StgClosure*)NULL, (StgInt)0, 0);
+
+ /* costs for tidying up buffer after having sent it */
+ CurrentTime[p] += 5l * RtsFlags.GranFlags.Costs.mtidytime;
+ }
+
+ /* ToDo: assert that PE p still has work left after stealing the spark */
+
+ if (!stolen && (n>0)) { /* nothing stealable from proc p :( */
+ ASSERT(pes_by_time[i]==p);
+
+ /* remove p from the list (at pos i) */
+ for (j=i; j+1<n; j++)
+ pes_by_time[j] = pes_by_time[j+1];
+ n--;
+
+ /* update index to first proc which is later (or equal) than proc */
+ for ( ;
+ (first>0) &&
+ (CurrentTime[pes_by_time[first-1]]>CurrentTime[proc]);
+ first--)
+ /* nothing */ ;
+ }
+ } /* while ... iterating over PEs in pes_by_time */
+
+ IF_GRAN_DEBUG(randomSteal,
+ if (stolen)
+ belch("^^ stealThreadMagic: stolen TSO %d (%p) by PE %d from PE %d (SparksAvail=%d; idlers=%d)",
+ tso->id, tso, proc, p,
+ SparksAvail, idlers());
+ else
+ belch("stealThreadMagic: nothing stolen by PE %d (SparksAvail=%d; idlers=%d)",
+ proc, SparksAvail, idlers()));
+
+ if (RtsFlags.GranFlags.GranSimStats.Global &&
+ stolen && (i!=0)) { /* only for statistics */
+ /* ToDo: more statistics on avg thread queue lenght etc */
+ globalGranStats.rs_t_count++;
+ globalGranStats.no_of_migrates++;
+ }
+
+ return stolen;
+}
+
+//@cindex sparkStealTime
+static rtsTime
+sparkStealTime(void)
+{
+ double fishdelay, sparkdelay, latencydelay;
+ fishdelay = (double)RtsFlags.GranFlags.proc/2;
+ sparkdelay = fishdelay -
+ ((fishdelay-1.0)/(double)(RtsFlags.GranFlags.proc-1))*((double)idlers());
+ latencydelay = sparkdelay*((double)RtsFlags.GranFlags.Costs.latency);
+
+ return((rtsTime)latencydelay);
+}
+
+//@node Routines directly called from Haskell world, Emiting profiling info for GrAnSim, Idle PEs, GranSim specific code
+//@subsection Routines directly called from Haskell world
+/*
+The @GranSim...@ routines in here are directly called via macros from the
+threaded world.
+
+First some auxiliary routines.
+*/
+
+/* Take the current thread off the thread queue and thereby activate the
+ next thread. It's assumed that the next ReSchedule after this uses
+ NEW_THREAD as param.
+ This fct is called from GranSimBlock and GranSimFetch
+*/
+
+//@cindex ActivateNextThread
+
+void
+ActivateNextThread (proc)
+PEs proc;
+{
+ StgTSO *t;
+ /*
+ This routine is entered either via GranSimFetch or via GranSimBlock.
+ It has to prepare the CurrentTSO for being blocked and update the
+ run queue and other statistics on PE proc. The actual enqueuing to the
+ blocking queue (if coming from GranSimBlock) is done in the entry code
+ of the BLACKHOLE and BLACKHOLE_BQ closures (see StgMiscClosures.hc).
+ */
+ /* ToDo: add assertions here!! */
+ //ASSERT(run_queue_hds[proc]!=END_TSO_QUEUE);
+
+ // Only necessary if the running thread is at front of the queue
+ // run_queue_hds[proc] = run_queue_hds[proc]->link;
+ ASSERT(CurrentProc==proc);
+ ASSERT(!is_on_queue(CurrentTSO,proc));
+ if (run_queue_hds[proc]==END_TSO_QUEUE) {
+ /* NB: this routine is only entered with asynchr comm (see assertion) */
+ procStatus[proc] = Idle;
+ } else {
+ /* ToDo: check cost assignment */
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadcontextswitchtime;
+ if (RtsFlags.GranFlags.GranSimStats.Full &&
+ (!RtsFlags.GranFlags.Light || RtsFlags.GranFlags.Debug.checkLight))
+ /* right flag !?? ^^^ */
+ DumpRawGranEvent(proc, 0, GR_SCHEDULE, run_queue_hds[proc],
+ (StgClosure*)NULL, (StgInt)0, 0);
+ }
+}
+
+/*
+ The following GranSim fcts are stg-called from the threaded world.
+*/
+
+/* Called from HP_CHK and friends (see StgMacros.h) */
+//@cindex GranSimAllocate
+void
+GranSimAllocate(n)
+StgInt n;
+{
+ CurrentTSO->gran.allocs += n;
+ ++(CurrentTSO->gran.basicblocks);
+
+ if (RtsFlags.GranFlags.GranSimStats.Heap) {
+ DumpRawGranEvent(CurrentProc, 0, GR_ALLOC, CurrentTSO,
+ (StgClosure*)NULL, (StgInt)0, n);
+ }
+
+ CurrentTSO->gran.exectime += RtsFlags.GranFlags.Costs.heapalloc_cost;
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.heapalloc_cost;
+}
+
+/*
+ Subtract the values added above, if a heap check fails and
+ so has to be redone.
+*/
+//@cindex GranSimUnallocate
+void
+GranSimUnallocate(n)
+StgInt n;
+{
+ CurrentTSO->gran.allocs -= n;
+ --(CurrentTSO->gran.basicblocks);
+
+ CurrentTSO->gran.exectime -= RtsFlags.GranFlags.Costs.heapalloc_cost;
+ CurrentTime[CurrentProc] -= RtsFlags.GranFlags.Costs.heapalloc_cost;
+}
+
+/* NB: We now inline this code via GRAN_EXEC rather than calling this fct */
+//@cindex GranSimExec
+void
+GranSimExec(ariths,branches,loads,stores,floats)
+StgWord ariths,branches,loads,stores,floats;
+{
+ StgWord cost = RtsFlags.GranFlags.Costs.arith_cost*ariths +
+ RtsFlags.GranFlags.Costs.branch_cost*branches +
+ RtsFlags.GranFlags.Costs.load_cost * loads +
+ RtsFlags.GranFlags.Costs.store_cost*stores +
+ RtsFlags.GranFlags.Costs.float_cost*floats;
+
+ CurrentTSO->gran.exectime += cost;
+ CurrentTime[CurrentProc] += cost;
+}
+
+/*
+ Fetch the node if it isn't local
+ -- result indicates whether fetch has been done.
+
+ This is GRIP-style single item fetching.
+*/
+
+//@cindex GranSimFetch
+StgInt
+GranSimFetch(node /* , liveness_mask */ )
+StgClosure *node;
+/* StgInt liveness_mask; */
+{
+ /* reset the return value (to be checked within STG land) */
+ NeedToReSchedule = rtsFalse;
+
+ if (RtsFlags.GranFlags.Light) {
+ /* Always reschedule in GrAnSim-Light to prevent one TSO from
+ running off too far
+ new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ ContinueThread,CurrentTSO,node,NULL);
+ */
+ return(0);
+ }
+
+ /* Faking an RBH closure:
+ If the bitmask of the closure is 0 then this node is a fake RBH;
+ */
+ if (node->header.gran.procs == Nowhere) {
+ IF_GRAN_DEBUG(bq,
+ belch("## Found fake RBH (node %p); delaying TSO %d (%p)",
+ node, CurrentTSO->id, CurrentTSO));
+
+ new_event(CurrentProc, CurrentProc, CurrentTime[CurrentProc]+10000,
+ ContinueThread, CurrentTSO, node, (rtsSpark*)NULL);
+
+ /* Rescheduling (GranSim internal) is necessary */
+ NeedToReSchedule = rtsTrue;
+
+ return(1);
+ }
+
+ /* Note: once a node has been fetched, this test will be passed */
+ if (!IS_LOCAL_TO(PROCS(node),CurrentProc))
+ {
+ PEs p = where_is(node);
+ rtsTime fetchtime;
+
+ IF_GRAN_DEBUG(thunkStealing,
+ if (p==CurrentProc)
+ belch("GranSimFetch: Trying to fetch from own processor%u\n", p););
+
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mpacktime;
+ /* NB: Fetch is counted on arrival (FetchReply) */
+
+ fetchtime = stg_max(CurrentTime[CurrentProc],CurrentTime[p]) +
+ RtsFlags.GranFlags.Costs.latency;
+
+ new_event(p, CurrentProc, fetchtime,
+ FetchNode, CurrentTSO, node, (rtsSpark*)NULL);
+
+ if (fetchtime<TimeOfNextEvent)
+ TimeOfNextEvent = fetchtime;
+
+ /* About to block */
+ CurrentTSO->gran.blockedat = CurrentTime[CurrentProc];
+
+ ++OutstandingFetches[CurrentProc];
+
+ if (RtsFlags.GranFlags.DoAsyncFetch)
+ /* if asynchr comm is turned on, activate the next thread in the q */
+ ActivateNextThread(CurrentProc);
+ else
+ procStatus[CurrentProc] = Fetching;
+
+#if 0
+ /* ToDo: nuke the entire if (anything special for fair schedule?) */
+ if (RtsFlags.GranFlags.DoAsyncFetch)
+ {
+ /* Remove CurrentTSO from the queue -- assumes head of queue == CurrentTSO */
+ if(!RtsFlags.GranFlags.DoFairSchedule)
+ {
+ /* now done in do_the_fetchnode
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(CurrentProc, p, GR_FETCH, CurrentTSO,
+ node, (StgInt)0, 0);
+ */
+ ActivateNextThread(CurrentProc);
+
+# if 0 && defined(GRAN_CHECK)
+ if (RtsFlags.GranFlags.Debug.blockOnFetch_sanity) {
+ if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) {
+ fprintf(stderr,"FetchNode: TSO 0x%x has fetch-mask set @ %d\n",
+ CurrentTSO,CurrentTime[CurrentProc]);
+ stg_exit(EXIT_FAILURE);
+ } else {
+ TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO;
+ }
+ }
+# endif
+ CurrentTSO->link = END_TSO_QUEUE;
+ /* CurrentTSO = END_TSO_QUEUE; */
+
+ /* CurrentTSO is pointed to by the FetchNode event; it is
+ on no run queue any more */
+ } else { /* fair scheduling currently not supported -- HWL */
+ barf("Asynchr communication is not yet compatible with fair scheduling\n");
+ }
+ } else { /* !RtsFlags.GranFlags.DoAsyncFetch */
+ procStatus[CurrentProc] = Fetching; // ToDo: BlockedOnFetch;
+ /* now done in do_the_fetchnode
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(CurrentProc, p,
+ GR_FETCH, CurrentTSO, node, (StgInt)0, 0);
+ */
+ IF_GRAN_DEBUG(blockOnFetch,
+ BlockedOnFetch[CurrentProc] = CurrentTSO;); /*- rtsTrue; -*/
+ }
+#endif /* 0 */
+
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mtidytime;
+
+ /* Rescheduling (GranSim internal) is necessary */
+ NeedToReSchedule = rtsTrue;
+
+ return(1);
+ }
+ return(0);
+}
+
+//@cindex GranSimSpark
+void
+GranSimSpark(local,node)
+StgInt local;
+StgClosure *node;
+{
+ /* ++SparksAvail; Nope; do that in add_to_spark_queue */
+ if (RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(CurrentProc, (PEs)0, SP_SPARK,
+ END_TSO_QUEUE, node, (StgInt)0, spark_queue_len(CurrentProc)-1);
+
+ /* Force the PE to take notice of the spark */
+ if(RtsFlags.GranFlags.DoAlwaysCreateThreads) {
+ new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc],
+ FindWork,
+ END_TSO_QUEUE, (StgClosure*)NULL, (rtsSpark*)NULL);
+ if (CurrentTime[CurrentProc]<TimeOfNextEvent)
+ TimeOfNextEvent = CurrentTime[CurrentProc];
+ }
+
+ if(local)
+ ++CurrentTSO->gran.localsparks;
+ else
+ ++CurrentTSO->gran.globalsparks;
+}
+
+//@cindex GranSimSparkAt
+void
+GranSimSparkAt(spark,where,identifier)
+rtsSpark *spark;
+StgClosure *where; /* This should be a node; alternatively could be a GA */
+StgInt identifier;
+{
+ PEs p = where_is(where);
+ GranSimSparkAtAbs(spark,p,identifier);
+}
+
+//@cindex GranSimSparkAtAbs
+void
+GranSimSparkAtAbs(spark,proc,identifier)
+rtsSpark *spark;
+PEs proc;
+StgInt identifier;
+{
+ rtsTime exporttime;
+
+ if (spark == (rtsSpark *)NULL) /* Note: Granularity control might have */
+ return; /* turned a spark into a NULL. */
+
+ /* ++SparksAvail; Nope; do that in add_to_spark_queue */
+ if(RtsFlags.GranFlags.GranSimStats.Sparks)
+ DumpRawGranEvent(proc,0,SP_SPARKAT,
+ END_TSO_QUEUE, spark->node, (StgInt)0, spark_queue_len(proc));
+
+ if (proc!=CurrentProc) {
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mpacktime;
+ exporttime = (CurrentTime[proc] > CurrentTime[CurrentProc]?
+ CurrentTime[proc]: CurrentTime[CurrentProc])
+ + RtsFlags.GranFlags.Costs.latency;
+ } else {
+ exporttime = CurrentTime[CurrentProc];
+ }
+
+ if ( RtsFlags.GranFlags.Light )
+ /* Need CurrentTSO in event field to associate costs with creating
+ spark even in a GrAnSim Light setup */
+ new_event(proc, CurrentProc, exporttime,
+ MoveSpark,
+ CurrentTSO, spark->node, spark);
+ else
+ new_event(proc, CurrentProc, exporttime,
+ MoveSpark, (StgTSO*)NULL, spark->node, spark);
+ /* Bit of a hack to treat placed sparks the same as stolen sparks */
+ ++OutstandingFishes[proc];
+
+ /* Force the PE to take notice of the spark (FINDWORK is put after a
+ MoveSpark into the sparkq!) */
+ if (RtsFlags.GranFlags.DoAlwaysCreateThreads) {
+ new_event(CurrentProc,CurrentProc,exporttime+1,
+ FindWork,
+ (StgTSO*)NULL, (StgClosure*)NULL, (rtsSpark*)NULL);
+ }
+
+ if (exporttime<TimeOfNextEvent)
+ TimeOfNextEvent = exporttime;
+
+ if (proc!=CurrentProc) {
+ CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mtidytime;
+ ++CurrentTSO->gran.globalsparks;
+ } else {
+ ++CurrentTSO->gran.localsparks;
+ }
+}
+
+/*
+ This function handles local and global blocking. It's called either
+ from threaded code (RBH_entry, BH_entry etc) or from blockFetch when
+ trying to fetch an BH or RBH
+*/
+
+//@cindex GranSimBlock
+void
+GranSimBlock(tso, proc, node)
+StgTSO *tso;
+PEs proc;
+StgClosure *node;
+{
+ PEs node_proc = where_is(node),
+ tso_proc = where_is((StgClosure *)tso);
+
+ ASSERT(tso_proc==CurrentProc);
+ // ASSERT(node_proc==CurrentProc);
+ IF_GRAN_DEBUG(bq,
+ if (node_proc!=CurrentProc)
+ belch("## ghuH: TSO %d (%lx) [PE %d] blocks on non-local node %p [PE %d] (no simulation of FETCHMEs)",
+ tso->id, tso, tso_proc, node, node_proc));
+ ASSERT(tso->link==END_TSO_QUEUE);
+ ASSERT(!is_on_queue(tso,proc)); // tso must not be on run queue already!
+ //ASSERT(tso==run_queue_hds[proc]);
+
+ IF_DEBUG(gran,
+ belch("GRAN: TSO %d (%p) [PE %d] blocks on closure %p @ %lx",
+ tso->id, tso, proc, node, CurrentTime[proc]));
+
+
+ /* THIS SHOULD NEVER HAPPEN!
+ If tso tries to block on a remote node (i.e. node_proc!=CurrentProc)
+ we have missed a GranSimFetch before entering this closure;
+ we hack around it for now, faking a FetchNode;
+ because GranSimBlock is entered via a BLACKHOLE(_BQ) closure,
+ tso will be blocked on this closure until the FetchReply occurs.
+
+ ngoq Dogh!
+
+ if (node_proc!=CurrentProc) {
+ StgInt ret;
+ ret = GranSimFetch(node);
+ IF_GRAN_DEBUG(bq,
+ if (ret)
+ belch(".. GranSimBlock: faking a FetchNode of node %p from %d to %d",
+ node, node_proc, CurrentProc););
+ return;
+ }
+ */
+
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ DumpRawGranEvent(proc,node_proc,GR_BLOCK,tso,node,(StgInt)0,0);
+
+ ++(tso->gran.blockcount);
+ /* Distinction between local and global block is made in blockFetch */
+ tso->gran.blockedat = CurrentTime[proc];
+
+ CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadqueuetime;
+ ActivateNextThread(proc);
+ /* tso->link = END_TSO_QUEUE; not really necessary; only for testing */
+}
+
+#endif /* GRAN */
+
+//@node Index, , Dumping routines, GranSim specific code
+//@subsection Index
+
+//@index
+//* ActivateNextThread:: @cindex\s-+ActivateNextThread
+//* CurrentProc:: @cindex\s-+CurrentProc
+//* CurrentTime:: @cindex\s-+CurrentTime
+//* GranSimAllocate:: @cindex\s-+GranSimAllocate
+//* GranSimBlock:: @cindex\s-+GranSimBlock
+//* GranSimExec:: @cindex\s-+GranSimExec
+//* GranSimFetch:: @cindex\s-+GranSimFetch
+//* GranSimLight_insertThread:: @cindex\s-+GranSimLight_insertThread
+//* GranSimSpark:: @cindex\s-+GranSimSpark
+//* GranSimSparkAt:: @cindex\s-+GranSimSparkAt
+//* GranSimSparkAtAbs:: @cindex\s-+GranSimSparkAtAbs
+//* GranSimUnallocate:: @cindex\s-+GranSimUnallocate
+//* any_idle:: @cindex\s-+any_idle
+//* blockFetch:: @cindex\s-+blockFetch
+//* do_the_fetchnode:: @cindex\s-+do_the_fetchnode
+//* do_the_fetchreply:: @cindex\s-+do_the_fetchreply
+//* do_the_findwork:: @cindex\s-+do_the_findwork
+//* do_the_globalblock:: @cindex\s-+do_the_globalblock
+//* do_the_movespark:: @cindex\s-+do_the_movespark
+//* do_the_movethread:: @cindex\s-+do_the_movethread
+//* do_the_startthread:: @cindex\s-+do_the_startthread
+//* do_the_unblock:: @cindex\s-+do_the_unblock
+//* fetchNode:: @cindex\s-+fetchNode
+//* ga_to_proc:: @cindex\s-+ga_to_proc
+//* get_next_event:: @cindex\s-+get_next_event
+//* get_time_of_next_event:: @cindex\s-+get_time_of_next_event
+//* grab_event:: @cindex\s-+grab_event
+//* handleFetchRequest:: @cindex\s-+handleFetchRequest
+//* handleIdlePEs:: @cindex\s-+handleIdlePEs
+//* idlers:: @cindex\s-+idlers
+//* insertThread:: @cindex\s-+insertThread
+//* insert_event:: @cindex\s-+insert_event
+//* is_on_queue:: @cindex\s-+is_on_queue
+//* is_unique:: @cindex\s-+is_unique
+//* new_event:: @cindex\s-+new_event
+//* prepend_event:: @cindex\s-+prepend_event
+//* print_event:: @cindex\s-+print_event
+//* print_eventq:: @cindex\s-+print_eventq
+//* prune_eventq :: @cindex\s-+prune_eventq
+//* spark queue:: @cindex\s-+spark queue
+//* sparkStealTime:: @cindex\s-+sparkStealTime
+//* stealSomething:: @cindex\s-+stealSomething
+//* stealSpark:: @cindex\s-+stealSpark
+//* stealSparkMagic:: @cindex\s-+stealSparkMagic
+//* stealThread:: @cindex\s-+stealThread
+//* stealThreadMagic:: @cindex\s-+stealThreadMagic
+//* thread_queue_len:: @cindex\s-+thread_queue_len
+//* traverse_eventq_for_gc:: @cindex\s-+traverse_eventq_for_gc
+//* where_is:: @cindex\s-+where_is
+//@end index
diff --git a/rts/parallel/GranSimRts.h b/rts/parallel/GranSimRts.h
new file mode 100644
index 0000000000..fc31a1f0a6
--- /dev/null
+++ b/rts/parallel/GranSimRts.h
@@ -0,0 +1,268 @@
+/* --------------------------------------------------------------------------
+ Time-stamp: <Tue Mar 06 2001 00:18:30 Stardate: [-30]6285.06 hwloidl>
+
+ Variables and functions specific to GranSim.
+ ----------------------------------------------------------------------- */
+
+#ifndef GRANSIM_RTS_H
+#define GRANSIM_RTS_H
+
+//@node Headers for GranSim objs used only in the RTS internally, , ,
+//@section Headers for GranSim objs used only in the RTS internally
+
+//@menu
+//* Event queue::
+//* Spark handling routines::
+//* Processor related stuff::
+//* Local types::
+//* Statistics gathering::
+//* Prototypes::
+//@end menu
+//*/ fool highlight
+
+//@node Event queue, Spark handling routines, Headers for GranSim objs used only in the RTS internally, Headers for GranSim objs used only in the RTS internally
+//@subsection Event queue
+
+#if defined(GRAN) || defined(PAR)
+/* Granularity event types for output (see DumpGranEvent) */
+typedef enum GranEventType_ {
+ GR_START = 0, GR_STARTQ,
+ GR_STEALING, GR_STOLEN, GR_STOLENQ,
+ GR_FETCH, GR_REPLY, GR_BLOCK, GR_RESUME, GR_RESUMEQ,
+ GR_SCHEDULE, GR_DESCHEDULE,
+ GR_END,
+ SP_SPARK, SP_SPARKAT, SP_USED, SP_PRUNED, SP_EXPORTED, SP_ACQUIRED, SP_REQUESTED,
+ GR_ALLOC,
+ GR_TERMINATE,
+ GR_SYSTEM_START, GR_SYSTEM_END, /* only for debugging */
+ GR_EVENT_MAX
+} GranEventType;
+
+extern char *gran_event_names[];
+#endif
+
+#if defined(GRAN) /* whole file */
+
+/* Event Types (internal use only) */
+typedef enum rtsEventType_ {
+ ContinueThread = 0, /* Continue running the first thread in the queue */
+ StartThread, /* Start a newly created thread */
+ ResumeThread, /* Resume a previously running thread */
+ MoveSpark, /* Move a spark from one PE to another */
+ MoveThread, /* Move a thread from one PE to another */
+ FindWork, /* Search for work */
+ FetchNode, /* Fetch a node */
+ FetchReply, /* Receive a node */
+ GlobalBlock, /* Block a TSO on a remote node */
+ UnblockThread /* Make a TSO runnable */
+} rtsEventType;
+
+/* Number of last event type */
+#define MAX_EVENT 9
+
+typedef struct rtsEvent_ {
+ PEs proc; /* Processor id */
+ PEs creator; /* Processor id of PE that created the event */
+ rtsEventType evttype; /* rtsEvent type */
+ rtsTime time; /* Time at which event happened */
+ StgTSO *tso; /* Associated TSO, if relevant */
+ StgClosure *node; /* Associated node, if relevant */
+ rtsSpark *spark; /* Associated SPARK, if relevant */
+ StgInt gc_info; /* Counter of heap objects to mark (used in GC only)*/
+ struct rtsEvent_ *next;
+ } rtsEvent;
+
+typedef rtsEvent *rtsEventQ;
+
+extern rtsEventQ EventHd;
+
+/* Interface for ADT of Event Queue */
+rtsEvent *get_next_event(void);
+rtsTime get_time_of_next_event(void);
+void insert_event(rtsEvent *newentry);
+void new_event(PEs proc, PEs creator, rtsTime time,
+ rtsEventType evttype, StgTSO *tso,
+ StgClosure *node, rtsSpark *spark);
+void print_event(rtsEvent *event);
+void print_eventq(rtsEvent *hd);
+void prepend_event(rtsEvent *event);
+rtsEventQ grab_event(void);
+void prune_eventq(StgTSO *tso, StgClosure *node);
+
+void traverse_eventq_for_gc(void);
+void markEventQueue(void);
+
+//@node Spark handling routines, Processor related stuff, Event queue, Headers for GranSim objs used only in the RTS internally
+//@subsection Spark handling routines
+
+/* These functions are only used in the RTS internally; see GranSim.h for rest */
+void disposeSpark(rtsSpark *spark);
+void disposeSparkQ(rtsSparkQ spark);
+void print_spark(rtsSpark *spark);
+void print_sparkq(PEs proc);
+void print_sparkq_stats(void);
+nat spark_queue_len(PEs proc);
+rtsSpark *delete_from_sparkq (rtsSpark *spark, PEs p, rtsBool dispose_too);
+void markSparkQueue(void);
+
+//@node Processor related stuff, Local types, Spark handling routines, Headers for GranSim objs used only in the RTS internally
+//@subsection Processor related stuff
+
+typedef enum rtsProcStatus_ {
+ Idle = 0, /* empty threadq */
+ Sparking, /* non-empty sparkq; FINDWORK has been issued */
+ Starting, /* STARTTHREAD has been issue */
+ Fetching, /* waiting for remote data (only if block-on-fetch) */
+ Fishing, /* waiting for remote spark/thread */
+ Busy /* non-empty threadq, with head of queue active */
+} rtsProcStatus;
+
+/*
+#define IS_IDLE(proc) (procStatus[proc] == Idle)
+#define IS_SPARKING(proc) (procStatus[proc] == Sparking)
+#define IS_STARTING(proc) (procStatus[proc] == Starting)
+#define IS_FETCHING(proc) (procStatus[proc] == Fetching)
+#define IS_FISHING(proc) (procStatus[proc] == Fishing)
+#define IS_BUSY(proc) (procStatus[proc] == Busy)
+#define ANY_IDLE (any_idle())
+#define MAKE_IDLE(proc) procStatus[proc] = Idle
+#define MAKE_SPARKING(proc) procStatus[proc] = Sparking
+#define MAKE_STARTING(proc) procStatus[proc] = Starting
+#define MAKE_FETCHING(proc) procStatus[proc] = Fetching
+#define MAKE_FISHING(proc) procStatus[proc] = Fishing
+#define MAKE_BUSY(proc) procStatus[proc] = Busy
+*/
+
+//@node Local types, Statistics gathering, Processor related stuff, Headers for GranSim objs used only in the RTS internally
+//@subsection Local types
+
+/* Return codes of HandleFetchRequest:
+ 0 ... ok (FETCHREPLY event with a buffer containing addresses of the
+ nearby graph has been scheduled)
+ 1 ... node is already local (fetched by somebody else; no event is
+ scheduled in here)
+ 2 ... fetch request has been forwrded to the PE that now contains the
+ node
+ 3 ... node is a black hole (BH, BQ or RBH); no event is scheduled, and
+ the current TSO is put into the blocking queue of that node
+ 4 ... out of heap in PackNearbyGraph; GC should be triggered in calling
+ function to guarantee that the tso and node inputs are valid
+ (they may be moved during GC).
+ Return codes of blockFetch:
+ 0 ... ok; tso is now at beginning of BQ attached to the bh closure
+ 1 ... the bh closure is no BH any more; tso is immediately unblocked
+*/
+
+typedef enum rtsFetchReturnCode_ {
+ Ok = 0,
+ NodeIsLocal,
+ NodeHasMoved,
+ NodeIsBH,
+ NodeIsNoBH,
+ OutOfHeap,
+} rtsFetchReturnCode;
+
+//@node Statistics gathering, Prototypes, Local types, Headers for GranSim objs used only in the RTS internally
+//@subsection Statistics gathering
+
+extern unsigned int /* nat */ OutstandingFetches[], OutstandingFishes[];
+extern rtsProcStatus procStatus[];
+extern StgTSO *BlockedOnFetch[];
+
+/* global structure for collecting statistics */
+typedef struct GlobalGranStats_ {
+ /* event stats */
+ nat noOfEvents;
+ nat event_counts[MAX_EVENT];
+
+ /* communication stats */
+ nat fetch_misses;
+ nat tot_fake_fetches; // GranSim internal; faked Fetches are a kludge!!
+ nat tot_low_pri_sparks;
+
+ /* load distribution statistics */
+ nat rs_sp_count, rs_t_count, ntimes_total, fl_total,
+ no_of_steals, no_of_migrates;
+
+ /* spark queue stats */
+ nat tot_sq_len, tot_sq_probes, tot_sparks;
+ nat tot_add_threads, tot_tq_len, non_end_add_threads;
+
+ /* packet statistics */
+ nat tot_packets, tot_packet_size, tot_cuts, tot_thunks;
+
+ /* thread stats */
+ nat tot_threads_created, threads_created_on_PE[MAX_PROC],
+ tot_TSOs_migrated;
+
+ /* spark stats */
+ nat pruned_sparks, withered_sparks;
+ nat tot_sparks_created, sparks_created_on_PE[MAX_PROC];
+
+ /* scheduling stats */
+ nat tot_yields, tot_stackover, tot_heapover;
+
+ /* blocking queue statistics */
+ rtsTime tot_bq_processing_time;
+ nat tot_bq_len, tot_bq_len_local, tot_awbq, tot_FMBQs;
+} GlobalGranStats;
+
+extern GlobalGranStats globalGranStats;
+
+//@node Prototypes, , Statistics gathering, Headers for GranSim objs used only in the RTS internally
+//@subsection Prototypes
+
+/* Generally useful fcts */
+PEs where_is(StgClosure *node);
+rtsBool is_unique(StgClosure *node);
+
+/* Prototypes of event handling functions; needed in Schedule.c:ReSchedule() */
+void do_the_globalblock (rtsEvent* event);
+void do_the_unblock (rtsEvent* event);
+void do_the_fetchnode (rtsEvent* event);
+void do_the_fetchreply (rtsEvent* event);
+void do_the_movethread (rtsEvent* event);
+void do_the_movespark (rtsEvent* event);
+void do_the_startthread(rtsEvent *event);
+void do_the_findwork(rtsEvent* event);
+void gimme_spark (rtsEvent *event, rtsBool *found_res, rtsSparkQ *spark_res);
+rtsBool munch_spark (rtsEvent *event, rtsSparkQ spark);
+
+/* GranSimLight routines */
+void GranSimLight_enter_system(rtsEvent *event, StgTSO **ActiveTSOp);
+void GranSimLight_leave_system(rtsEvent *event, StgTSO **ActiveTSOp);
+
+/* Communication related routines */
+rtsFetchReturnCode fetchNode(StgClosure* node, PEs from, PEs to);
+rtsFetchReturnCode handleFetchRequest(StgClosure* node, PEs curr_proc, PEs p, StgTSO* tso);
+void handleIdlePEs(void);
+
+long int random(void); /* used in stealSpark() and stealThread() in GranSim.c */
+
+/* Scheduling fcts defined in GranSim.c */
+void insertThread(StgTSO *tso, PEs proc);
+void endThread(StgTSO *tso, PEs proc);
+rtsBool GranSimLight_insertThread(StgTSO *tso, PEs proc);
+nat thread_queue_len(PEs proc);
+
+/* For debugging */
+rtsBool is_on_queue (StgTSO *tso, PEs proc);
+#endif
+
+#if defined(GRAN) || defined(PAR)
+/*
+ Interface for dumping routines (i.e. writing to log file).
+ These routines are shared with GUM (and could also be used for SMP).
+*/
+void DumpGranEvent(GranEventType name, StgTSO *tso);
+void DumpEndEvent(PEs proc, StgTSO *tso, rtsBool mandatory_thread);
+void DumpTSO(StgTSO *tso);
+void DumpRawGranEvent(PEs proc, PEs p, GranEventType name,
+ StgTSO *tso, StgClosure *node,
+ StgInt sparkname, StgInt len);
+void DumpVeryRawGranEvent(rtsTime time, PEs proc, PEs p, GranEventType name,
+ StgTSO *tso, StgClosure *node,
+ StgInt sparkname, StgInt len);
+#endif
+
+#endif /* GRANSIM_RTS_H */
diff --git a/rts/parallel/HLC.h b/rts/parallel/HLC.h
new file mode 100644
index 0000000000..793ac840f9
--- /dev/null
+++ b/rts/parallel/HLC.h
@@ -0,0 +1,63 @@
+/* --------------------------------------------------------------------------
+ Time-stamp: <Sun Mar 18 2001 20:16:14 Stardate: [-30]6349.22 hwloidl>
+
+ High Level Communications Header (HLC.h)
+
+ Contains the high-level definitions (i.e. communication
+ subsystem independent) used by GUM
+ Phil Trinder, Glasgow University, 12 December 1994
+ H-W. Loidl, Heriot-Watt, November 1999
+ ----------------------------------------------------------------------- */
+
+#ifndef __HLC_H
+#define __HLC_H
+
+#ifdef PAR
+
+#include "LLC.h"
+
+#define NEW_FISH_AGE 0
+#define NEW_FISH_HISTORY 0
+#define NEW_FISH_HUNGER 0
+#define FISH_LIFE_EXPECTANCY 10
+
+
+//@node GUM Message Sending and Unpacking Functions
+//@subsection GUM Message Sending and Unpacking Functions
+
+rtsBool initMoreBuffers(void);
+
+void sendFetch (globalAddr *ga, globalAddr *bqga, int load);
+void sendResume(globalAddr *rga, int nelem, rtsPackBuffer *packBuffer);
+void sendAck (GlobalTaskId task, int ngas, globalAddr *gagamap);
+void sendFish (GlobalTaskId destPE, GlobalTaskId origPE, int age, int history, int hunger);
+void sendFree (GlobalTaskId destPE, int nelem, P_ data);
+void sendSchedule(GlobalTaskId origPE, int nelem, rtsPackBuffer *packBuffer);
+void sendReval(GlobalTaskId origPE, int nelem, rtsPackBuffer *data);
+
+//@node Message-Processing Functions
+//@subsection Message-Processing Functions
+
+rtsBool processMessages(void);
+void processFetches(void);
+void processTheRealFetches(void);
+
+//@node Miscellaneous Functions
+//@subsection Miscellaneous Functions
+
+void prepareFreeMsgBuffers(void);
+void freeRemoteGA (int pe, globalAddr *ga);
+void sendFreeMessages(void);
+
+GlobalTaskId choosePE(void);
+StgClosure *createBlockedFetch (globalAddr ga, globalAddr rga);
+void waitForTermination(void);
+
+/* Message bouncing (startup and shutdown, mainly) */
+void bounceFish(void);
+void bounceReval(void);
+
+void DebugPrintGAGAMap (globalAddr *gagamap, int nGAs);
+
+#endif /* PAR */
+#endif /* __HLC_H */
diff --git a/rts/parallel/HLComms.c b/rts/parallel/HLComms.c
new file mode 100644
index 0000000000..b0982e441c
--- /dev/null
+++ b/rts/parallel/HLComms.c
@@ -0,0 +1,1810 @@
+/* ----------------------------------------------------------------------------
+ * Time-stamp: <Wed Mar 21 2001 16:34:41 Stardate: [-30]6363.45 hwloidl>
+ *
+ * High Level Communications Routines (HLComms.lc)
+ *
+ * Contains the high-level routines (i.e. communication
+ * subsystem independent) used by GUM
+ *
+ * GUM 0.2x: Phil Trinder, Glasgow University, 12 December 1994
+ * GUM 3.xx: Phil Trinder, Simon Marlow July 1998
+ * GUM 4.xx: H-W. Loidl, Heriot-Watt University, November 1999 -
+ *
+ * ------------------------------------------------------------------------- */
+
+#ifdef PAR /* whole file */
+
+//@node High Level Communications Routines, , ,
+//@section High Level Communications Routines
+
+//@menu
+//* Macros etc::
+//* Includes::
+//* GUM Message Sending and Unpacking Functions::
+//* Message-Processing Functions::
+//* GUM Message Processor::
+//* Miscellaneous Functions::
+//* Index::
+//@end menu
+
+//@node Macros etc, Includes, High Level Communications Routines, High Level Communications Routines
+//@subsection Macros etc
+
+/* Evidently not Posix */
+/* #include "PosixSource.h" */
+
+//@node Includes, GUM Message Sending and Unpacking Functions, Macros etc, High Level Communications Routines
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "RtsFlags.h"
+#include "Storage.h" // for recordMutable
+#include "HLC.h"
+#include "Parallel.h"
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+#include "Sparks.h"
+#include "FetchMe.h" // for BLOCKED_FETCH_info etc
+#if defined(DEBUG)
+# include "ParallelDebug.h"
+#endif
+#include "StgMacros.h" // inlined IS_... fcts
+
+#ifdef DIST
+#include "SchedAPI.h" //for createIOThread
+extern unsigned int context_switch;
+#endif /* DIST */
+
+//@node GUM Message Sending and Unpacking Functions, Message-Processing Functions, Includes, High Level Communications Routines
+//@subsection GUM Message Sending and Unpacking Functions
+
+/*
+ * GUM Message Sending and Unpacking Functions
+ */
+
+/*
+ * Allocate space for message processing
+ */
+
+//@cindex gumPackBuffer
+static rtsPackBuffer *gumPackBuffer;
+
+//@cindex initMoreBuffers
+rtsBool
+initMoreBuffers(void)
+{
+ if ((gumPackBuffer = (rtsPackBuffer *)stgMallocWords(RtsFlags.ParFlags.packBufferSize,
+ "initMoreBuffers")) == NULL)
+ return rtsFalse;
+ return rtsTrue;
+}
+
+/*
+ * SendFetch packs the two global addresses and a load into a message +
+ * sends it.
+
+//@cindex FETCH
+
+ Structure of a FETCH message:
+
+ | GA 1 | GA 2 |
+ +------------------------------------+------+
+ | gtid | slot | weight | gtid | slot | load |
+ +------------------------------------+------+
+ */
+
+//@cindex sendFetch
+void
+sendFetch(globalAddr *rga, globalAddr *lga, int load)
+{
+ ASSERT(rga->weight > 0 && lga->weight > 0);
+ IF_PAR_DEBUG(fetch,
+ belch("~^** Sending Fetch for ((%x, %d, 0)); locally ((%x, %d, %x)), load = %d",
+ rga->payload.gc.gtid, rga->payload.gc.slot,
+ lga->payload.gc.gtid, lga->payload.gc.slot, lga->weight,
+ load));
+
+
+ /* ToDo: Dump event
+ DumpRawGranEvent(CURRENT_PROC, taskIDtoPE(rga->payload.gc.gtid),
+ GR_FETCH, CurrentTSO, (StgClosure *)(lga->payload.gc.slot),
+ 0, spark_queue_len(ADVISORY_POOL));
+ */
+
+ sendOpV(PP_FETCH, rga->payload.gc.gtid, 6,
+ (StgWord) rga->payload.gc.gtid, (StgWord) rga->payload.gc.slot,
+ (StgWord) lga->weight, (StgWord) lga->payload.gc.gtid,
+ (StgWord) lga->payload.gc.slot, (StgWord) load);
+}
+
+/*
+ * unpackFetch unpacks a FETCH message into two Global addresses and a load
+ * figure.
+*/
+
+//@cindex unpackFetch
+static void
+unpackFetch(globalAddr *lga, globalAddr *rga, int *load)
+{
+ long buf[6];
+
+ GetArgs(buf, 6);
+
+ IF_PAR_DEBUG(fetch,
+ belch("~^** Unpacking Fetch for ((%x, %d, 0)) to ((%x, %d, %x)), load = %d",
+ (GlobalTaskId) buf[0], (int) buf[1],
+ (GlobalTaskId) buf[3], (int) buf[4], buf[2], buf[5]));
+
+ lga->weight = 1;
+ lga->payload.gc.gtid = (GlobalTaskId) buf[0];
+ lga->payload.gc.slot = (int) buf[1];
+
+ rga->weight = (unsigned) buf[2];
+ rga->payload.gc.gtid = (GlobalTaskId) buf[3];
+ rga->payload.gc.slot = (int) buf[4];
+
+ *load = (int) buf[5];
+
+ ASSERT(rga->weight > 0);
+}
+
+/*
+ * SendResume packs the remote blocking queue's GA and data into a message
+ * and sends it.
+
+//@cindex RESUME
+
+ Structure of a RESUME message:
+
+ -------------------------------
+ | weight | slot | n | data ...
+ -------------------------------
+
+ data is a packed graph represented as an rtsPackBuffer
+ n is the size of the graph (as returned by PackNearbyGraph) + packet hdr size
+ */
+
+//@cindex sendResume
+void
+sendResume(globalAddr *rga, int nelem, rtsPackBuffer *packBuffer)
+{
+ IF_PAR_DEBUG(fetch,
+ belch("~^[] Sending Resume (packet <<%d>> with %d elems) for ((%x, %d, %x)) to [%x]",
+ packBuffer->id, nelem,
+ rga->payload.gc.gtid, rga->payload.gc.slot, rga->weight,
+ rga->payload.gc.gtid));
+ IF_PAR_DEBUG(packet,
+ PrintPacket(packBuffer));
+
+ ASSERT(nelem==packBuffer->size);
+ /* check for magic end-of-buffer word */
+ IF_DEBUG(sanity, ASSERT(*(packBuffer->buffer+nelem) == END_OF_BUFFER_MARKER));
+
+ sendOpNV(PP_RESUME, rga->payload.gc.gtid,
+ nelem + PACK_BUFFER_HDR_SIZE + DEBUG_HEADROOM, (StgPtr)packBuffer,
+ 2, (rtsWeight) rga->weight, (StgWord) rga->payload.gc.slot);
+}
+
+/*
+ * unpackResume unpacks a Resume message into two Global addresses and
+ * a data array.
+ */
+
+//@cindex unpackResume
+static void
+unpackResume(globalAddr *lga, int *nelem, rtsPackBuffer *packBuffer)
+{
+ long buf[3];
+
+ GetArgs(buf, 3);
+
+ /*
+ RESUME event is written in awaken_blocked_queue
+ DumpRawGranEvent(CURRENT_PROC, taskIDtoPE(lga->payload.gc.gtid),
+ GR_RESUME, END_TSO_QUEUE, (StgClosure *)NULL, 0, 0);
+ */
+
+ lga->weight = (unsigned) buf[0];
+ lga->payload.gc.gtid = mytid;
+ lga->payload.gc.slot = (int) buf[1];
+
+ *nelem = (int) buf[2] - PACK_BUFFER_HDR_SIZE - DEBUG_HEADROOM;
+ GetArgs(packBuffer, *nelem + PACK_BUFFER_HDR_SIZE + DEBUG_HEADROOM);
+
+ IF_PAR_DEBUG(fetch,
+ belch("~^[] Unpacking Resume (packet <<%d>> with %d elems) for ((%x, %d, %x))",
+ packBuffer->id, *nelem, mytid, (int) buf[1], (unsigned) buf[0]));
+
+ /* check for magic end-of-buffer word */
+ IF_DEBUG(sanity, ASSERT(*(packBuffer->buffer+*nelem) == END_OF_BUFFER_MARKER));
+}
+
+/*
+ * SendAck packs the global address being acknowledged, together with
+ * an array of global addresses for any closures shipped and sends them.
+
+//@cindex ACK
+
+ Structure of an ACK message:
+
+ | GA 1 | GA 2 |
+ +---------------------------------------------+-------
+ | weight | gtid | slot | weight | gtid | slot | ..... ngas times
+ + --------------------------------------------+-------
+
+ */
+
+//@cindex sendAck
+void
+sendAck(GlobalTaskId task, int ngas, globalAddr *gagamap)
+{
+ static long *buffer;
+ long *p;
+ int i;
+
+ if(ngas==0)
+ return; //don't send unnecessary messages!!
+
+ buffer = (long *) gumPackBuffer;
+
+ for(i = 0, p = buffer; i < ngas; i++, p += 6) {
+ ASSERT(gagamap[1].weight > 0);
+ p[0] = (long) gagamap->weight;
+ p[1] = (long) gagamap->payload.gc.gtid;
+ p[2] = (long) gagamap->payload.gc.slot;
+ gagamap++;
+ p[3] = (long) gagamap->weight;
+ p[4] = (long) gagamap->payload.gc.gtid;
+ p[5] = (long) gagamap->payload.gc.slot;
+ gagamap++;
+ }
+ IF_PAR_DEBUG(schedule,
+ belch("~^,, Sending Ack (%d pairs) to [%x]\n",
+ ngas, task));
+
+ sendOpN(PP_ACK, task, p - buffer, (StgPtr)buffer);
+}
+
+/*
+ * unpackAck unpacks an Acknowledgement message into a Global address,
+ * a count of the number of global addresses following and a map of
+ * Global addresses
+ */
+
+//@cindex unpackAck
+static void
+unpackAck(int *ngas, globalAddr *gagamap)
+{
+ long GAarraysize;
+ long buf[6];
+
+ GetArgs(&GAarraysize, 1);
+
+ *ngas = GAarraysize / 6;
+
+ IF_PAR_DEBUG(schedule,
+ belch("~^,, Unpacking Ack (%d pairs) on [%x]\n",
+ *ngas, mytid));
+
+ while (GAarraysize > 0) {
+ GetArgs(buf, 6);
+ gagamap->weight = (rtsWeight) buf[0];
+ gagamap->payload.gc.gtid = (GlobalTaskId) buf[1];
+ gagamap->payload.gc.slot = (int) buf[2];
+ gagamap++;
+ gagamap->weight = (rtsWeight) buf[3];
+ gagamap->payload.gc.gtid = (GlobalTaskId) buf[4];
+ gagamap->payload.gc.slot = (int) buf[5];
+ ASSERT(gagamap->weight > 0);
+ gagamap++;
+ GAarraysize -= 6;
+ }
+}
+
+/*
+ * SendFish packs the global address being acknowledged, together with
+ * an array of global addresses for any closures shipped and sends them.
+
+//@cindex FISH
+
+ Structure of a FISH message:
+
+ +----------------------------------+
+ | orig PE | age | history | hunger |
+ +----------------------------------+
+ */
+
+//@cindex sendFish
+void
+sendFish(GlobalTaskId destPE, GlobalTaskId origPE,
+ int age, int history, int hunger)
+{
+ IF_PAR_DEBUG(fish,
+ belch("~^$$ Sending Fish to [%x] (%d outstanding fishes)",
+ destPE, outstandingFishes));
+
+ sendOpV(PP_FISH, destPE, 4,
+ (StgWord) origPE, (StgWord) age, (StgWord) history, (StgWord) hunger);
+
+ if (origPE == mytid) {
+ //fishing = rtsTrue;
+ outstandingFishes++;
+ }
+}
+
+/*
+ * unpackFish unpacks a FISH message into the global task id of the
+ * originating PE and 3 data fields: the age, history and hunger of the
+ * fish. The history + hunger are not currently used.
+
+ */
+
+//@cindex unpackFish
+static void
+unpackFish(GlobalTaskId *origPE, int *age, int *history, int *hunger)
+{
+ long buf[4];
+
+ GetArgs(buf, 4);
+
+ IF_PAR_DEBUG(fish,
+ belch("~^$$ Unpacking Fish from [%x] (age=%d)",
+ (GlobalTaskId) buf[0], (int) buf[1]));
+
+ *origPE = (GlobalTaskId) buf[0];
+ *age = (int) buf[1];
+ *history = (int) buf[2];
+ *hunger = (int) buf[3];
+}
+
+/*
+ * SendFree sends (weight, slot) pairs for GAs that we no longer need
+ * references to.
+
+//@cindex FREE
+
+ Structure of a FREE message:
+
+ +-----------------------------
+ | n | weight_1 | slot_1 | ...
+ +-----------------------------
+ */
+//@cindex sendFree
+void
+sendFree(GlobalTaskId pe, int nelem, StgPtr data)
+{
+ IF_PAR_DEBUG(free,
+ belch("~^!! Sending Free (%d GAs) to [%x]",
+ nelem/2, pe));
+
+ sendOpN(PP_FREE, pe, nelem, data);
+}
+
+/*
+ * unpackFree unpacks a FREE message into the amount of data shipped and
+ * a data block.
+ */
+//@cindex unpackFree
+static void
+unpackFree(int *nelem, StgWord *data)
+{
+ long buf[1];
+
+ GetArgs(buf, 1);
+ *nelem = (int) buf[0];
+
+ IF_PAR_DEBUG(free,
+ belch("~^!! Unpacking Free (%d GAs)",
+ *nelem/2));
+
+ GetArgs(data, *nelem);
+}
+
+/*
+ * SendSchedule sends a closure to be evaluated in response to a Fish
+ * message. The message is directed to the PE that originated the Fish
+ * (origPE), and includes the packed closure (data) along with its size
+ * (nelem).
+
+//@cindex SCHEDULE
+
+ Structure of a SCHEDULE message:
+
+ +------------------------------------
+ | PE | n | pack buffer of a graph ...
+ +------------------------------------
+ */
+//@cindex sendSchedule
+void
+sendSchedule(GlobalTaskId origPE, int nelem, rtsPackBuffer *packBuffer)
+{
+ IF_PAR_DEBUG(schedule,
+ belch("~^-- Sending Schedule (packet <<%d>> with %d elems) to [%x]\n",
+ packBuffer->id, nelem, origPE));
+ IF_PAR_DEBUG(packet,
+ PrintPacket(packBuffer));
+
+ ASSERT(nelem==packBuffer->size);
+ /* check for magic end-of-buffer word */
+ IF_DEBUG(sanity, ASSERT(*(packBuffer->buffer+nelem) == END_OF_BUFFER_MARKER));
+
+ sendOpN(PP_SCHEDULE, origPE,
+ nelem + PACK_BUFFER_HDR_SIZE + DEBUG_HEADROOM, (StgPtr)packBuffer);
+}
+
+/*
+ * unpackSchedule unpacks a SCHEDULE message into the Global address of
+ * the closure shipped, the amount of data shipped (nelem) and the data
+ * block (data).
+ */
+
+//@cindex unpackSchedule
+static void
+unpackSchedule(int *nelem, rtsPackBuffer *packBuffer)
+{
+ long buf[1];
+
+ /* first, just unpack 1 word containing the total size (including header) */
+ GetArgs(buf, 1);
+ /* no. of elems, not counting the header of the pack buffer */
+ *nelem = (int) buf[0] - PACK_BUFFER_HDR_SIZE - DEBUG_HEADROOM;
+
+ /* automatic cast of flat pvm-data to rtsPackBuffer */
+ GetArgs(packBuffer, *nelem + PACK_BUFFER_HDR_SIZE + DEBUG_HEADROOM);
+
+ IF_PAR_DEBUG(schedule,
+ belch("~^-- Unpacking Schedule (packet <<%d>> with %d elems) on [%x]\n",
+ packBuffer->id, *nelem, mytid));
+
+ ASSERT(*nelem==packBuffer->size);
+ /* check for magic end-of-buffer word */
+ IF_DEBUG(sanity, ASSERT(*(packBuffer->buffer+*nelem) == END_OF_BUFFER_MARKER));
+}
+
+#ifdef DIST
+/* sendReval is almost identical to the Schedule version, so we can unpack with unpackSchedule */
+void
+sendReval(GlobalTaskId origPE, int nelem, rtsPackBuffer *packBuffer)
+{
+ IF_PAR_DEBUG(schedule,
+ belch("~^-- Sending Reval (packet <<%d>> with %d elems) to [%x]\n",
+ packBuffer->id, nelem, origPE));
+ IF_PAR_DEBUG(packet,
+ PrintPacket(packBuffer));
+
+ ASSERT(nelem==packBuffer->size);
+ /* check for magic end-of-buffer word */
+ IF_DEBUG(sanity, ASSERT(*(packBuffer->buffer+nelem) == END_OF_BUFFER_MARKER));
+
+ sendOpN(PP_REVAL, origPE,
+ nelem + PACK_BUFFER_HDR_SIZE + DEBUG_HEADROOM, (StgPtr)packBuffer);
+}
+
+void FinishReval(StgTSO *t)
+{ StgClosure *res;
+ globalAddr ga;
+ nat size;
+ rtsPackBuffer *buffer=NULL;
+
+ ga.payload.gc.slot = t->revalSlot;
+ ga.payload.gc.gtid = t->revalTid;
+ ga.weight = 0;
+
+ //find where the reval result is
+ res = GALAlookup(&ga);
+ ASSERT(res);
+
+ IF_PAR_DEBUG(schedule,
+ printGA(&ga);
+ belch(" needs the result %08x\n",res));
+
+ //send off the result
+ buffer = PackNearbyGraph(res, END_TSO_QUEUE, &size,ga.payload.gc.gtid);
+ ASSERT(buffer != (rtsPackBuffer *)NULL);
+ sendResume(&ga, size, buffer);
+
+ IF_PAR_DEBUG(schedule,
+ belch("@;~) Reval Finished"));
+}
+
+#endif /* DIST */
+
+//@node Message-Processing Functions, GUM Message Processor, GUM Message Sending and Unpacking Functions, High Level Communications Routines
+//@subsection Message-Processing Functions
+
+/*
+ * Message-Processing Functions
+ *
+ * The following routines process incoming GUM messages. Often reissuing
+ * messages in response.
+ *
+ * processFish unpacks a fish message, reissuing it if it's our own,
+ * sending work if we have it or sending it onwards otherwise.
+ */
+
+/*
+ * processFetches constructs and sends resume messages for every
+ * BlockedFetch which is ready to be awakened.
+ * awaken_blocked_queue (in Schedule.c) is responsible for moving
+ * BlockedFetches from a blocking queue to the PendingFetches queue.
+ */
+void GetRoots(void);
+extern StgBlockedFetch *PendingFetches;
+
+nat
+pending_fetches_len(void)
+{
+ StgBlockedFetch *bf;
+ nat n;
+
+ for (n=0, bf=PendingFetches; bf != END_BF_QUEUE; n++, bf = (StgBlockedFetch *)(bf->link)) {
+ ASSERT(get_itbl(bf)->type==BLOCKED_FETCH);
+ }
+ return n;
+}
+
+//@cindex processFetches
+void
+processFetches(void) {
+ StgBlockedFetch *bf, *next;
+ StgClosure *closure;
+ StgInfoTable *ip;
+ globalAddr rga;
+ static rtsPackBuffer *packBuffer;
+
+ IF_PAR_DEBUG(verbose,
+ belch("____ processFetches: %d pending fetches (root @ %p)",
+ pending_fetches_len(), PendingFetches));
+
+ for (bf = PendingFetches;
+ bf != END_BF_QUEUE;
+ bf=next) {
+ /* the PendingFetches list contains only BLOCKED_FETCH closures */
+ ASSERT(get_itbl(bf)->type==BLOCKED_FETCH);
+ /* store link (we might overwrite it via blockFetch later on */
+ next = (StgBlockedFetch *)(bf->link);
+
+ /*
+ * Find the target at the end of the indirection chain, and
+ * process it in much the same fashion as the original target
+ * of the fetch. Though we hope to find graph here, we could
+ * find a black hole (of any flavor) or even a FetchMe.
+ */
+ closure = bf->node;
+ /*
+ We evacuate BQs and update the node fields where necessary in GC.c
+ So, if we find an EVACUATED closure, something has gone Very Wrong
+ (and therefore we let the RTS crash most ungracefully).
+ */
+ ASSERT(get_itbl(closure)->type != EVACUATED);
+ // closure = ((StgEvacuated *)closure)->evacuee;
+
+ closure = UNWIND_IND(closure);
+ //while ((ind = IS_INDIRECTION(closure)) != NULL) { closure = ind; }
+
+ ip = get_itbl(closure);
+ if (ip->type == FETCH_ME) {
+ /* Forward the Fetch to someone else */
+ rga.payload.gc.gtid = bf->ga.payload.gc.gtid;
+ rga.payload.gc.slot = bf->ga.payload.gc.slot;
+ rga.weight = bf->ga.weight;
+
+ sendFetch(((StgFetchMe *)closure)->ga, &rga, 0 /* load */);
+
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_fetch_mess++;
+ }
+
+ IF_PAR_DEBUG(fetch,
+ belch("__-> processFetches: Forwarding fetch from %lx to %lx",
+ mytid, rga.payload.gc.gtid));
+
+ } else if (IS_BLACK_HOLE(closure)) {
+ IF_PAR_DEBUG(verbose,
+ belch("__++ processFetches: trying to send a BLACK_HOLE => doing a blockFetch on closure %p (%s)",
+ closure, info_type(closure)));
+ bf->node = closure;
+ blockFetch(bf, closure);
+ } else {
+ /* We now have some local graph to send back */
+ nat size;
+
+ packBuffer = gumPackBuffer;
+ IF_PAR_DEBUG(verbose,
+ belch("__*> processFetches: PackNearbyGraph of closure %p (%s)",
+ closure, info_type(closure)));
+
+ if ((packBuffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size, bf->ga.payload.gc.gtid)) == NULL) {
+ // Put current BF back on list
+ bf->link = (StgBlockingQueueElement *)PendingFetches;
+ PendingFetches = (StgBlockedFetch *)bf;
+ // ToDo: check that nothing more has to be done to prepare for GC!
+ barf("processFetches: out of heap while packing graph; ToDo: call GC here");
+ GarbageCollect(GetRoots, rtsFalse);
+ bf = PendingFetches;
+ PendingFetches = (StgBlockedFetch *)(bf->link);
+ closure = bf->node;
+ packBuffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size, bf->ga.payload.gc.gtid);
+ ASSERT(packBuffer != (rtsPackBuffer *)NULL);
+ }
+ rga.payload.gc.gtid = bf->ga.payload.gc.gtid;
+ rga.payload.gc.slot = bf->ga.payload.gc.slot;
+ rga.weight = bf->ga.weight;
+
+ sendResume(&rga, size, packBuffer);
+
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_resume_mess++;
+ }
+ }
+ }
+ PendingFetches = END_BF_QUEUE;
+}
+
+#if 0
+/*
+ Alternatively to sending fetch messages directly from the FETCH_ME_entry
+ code we could just store the data about the remote data in a global
+ variable and send the fetch request from the main scheduling loop (similar
+ to processFetches above). This would save an expensive STGCALL in the entry
+ code because we have to go back to the scheduler anyway.
+*/
+//@cindex processFetches
+void
+processTheRealFetches(void) {
+ StgBlockedFetch *bf;
+ StgClosure *closure, *next;
+
+ IF_PAR_DEBUG(verbose,
+ belch("__ processTheRealFetches: ");
+ printGA(&theGlobalFromGA);
+ printGA(&theGlobalToGA));
+
+ ASSERT(theGlobalFromGA.payload.gc.gtid != 0 &&
+ theGlobalToGA.payload.gc.gtid != 0);
+
+ /* the old version did this in the FETCH_ME entry code */
+ sendFetch(&theGlobalFromGA, &theGlobalToGA, 0/*load*/);
+
+}
+#endif
+
+
+/*
+ Way of dealing with unwanted fish.
+ Used during startup/shutdown, or from unknown PEs
+*/
+void
+bounceFish(void) {
+ GlobalTaskId origPE;
+ int age, history, hunger;
+
+ /* IF_PAR_DEBUG(verbose, */
+ belch(".... [%x] Bouncing unwanted FISH",mytid);
+
+ unpackFish(&origPE, &age, &history, &hunger);
+
+ if (origPE == mytid) {
+ //fishing = rtsFalse; // fish has come home
+ outstandingFishes--;
+ last_fish_arrived_at = CURRENT_TIME; // remember time (see schedule fct)
+ return; // that's all
+ }
+
+ /* otherwise, send it home to die */
+ sendFish(origPE, origPE, (age + 1), NEW_FISH_HISTORY, NEW_FISH_HUNGER);
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_fish_mess++;
+ }
+}
+
+/*
+ * processFish unpacks a fish message, reissuing it if it's our own,
+ * sending work if we have it or sending it onwards otherwise.
+ */
+//@cindex processFish
+static void
+processFish(void)
+{
+ GlobalTaskId origPE;
+ int age, history, hunger;
+ rtsSpark spark;
+ static rtsPackBuffer *packBuffer;
+
+ unpackFish(&origPE, &age, &history, &hunger);
+
+ if (origPE == mytid) {
+ //fishing = rtsFalse; // fish has come home
+ outstandingFishes--;
+ last_fish_arrived_at = CURRENT_TIME; // remember time (see schedule fct)
+ return; // that's all
+ }
+
+ ASSERT(origPE != mytid);
+ IF_PAR_DEBUG(fish,
+ belch("$$__ processing fish; %d sparks available",
+ spark_queue_len(&(MainRegTable.rSparks))));
+ while ((spark = findSpark(rtsTrue/*for_export*/)) != NULL) {
+ nat size;
+ // StgClosure *graph;
+
+ packBuffer = gumPackBuffer;
+ ASSERT(closure_SHOULD_SPARK((StgClosure *)spark));
+ if ((packBuffer = PackNearbyGraph(spark, END_TSO_QUEUE, &size,origPE)) == NULL) {
+ IF_PAR_DEBUG(fish,
+ belch("$$ GC while trying to satisfy FISH via PackNearbyGraph of node %p",
+ (StgClosure *)spark));
+ barf("processFish: out of heap while packing graph; ToDo: call GC here");
+ GarbageCollect(GetRoots, rtsFalse);
+ /* Now go back and try again */
+ } else {
+ IF_PAR_DEBUG(verbose,
+ if (RtsFlags.ParFlags.ParStats.Sparks)
+ belch("==== STEALING spark %x; sending to %x", spark, origPE));
+
+ IF_PAR_DEBUG(fish,
+ belch("$$-- Replying to FISH from %x by sending graph @ %p (%s)",
+ origPE,
+ (StgClosure *)spark, info_type((StgClosure *)spark)));
+ sendSchedule(origPE, size, packBuffer);
+ disposeSpark(spark);
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_schedule_mess++;
+ }
+
+ break;
+ }
+ }
+ if (spark == (rtsSpark)NULL) {
+ IF_PAR_DEBUG(fish,
+ belch("$$^^ No sparks available for FISH from %x",
+ origPE));
+ /* We have no sparks to give */
+ if (age < FISH_LIFE_EXPECTANCY) {
+ /* and the fish is atill young, send it to another PE to look for work */
+ sendFish(choosePE(), origPE,
+ (age + 1), NEW_FISH_HISTORY, NEW_FISH_HUNGER);
+
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_fish_mess++;
+ }
+ } else { /* otherwise, send it home to die */
+ sendFish(origPE, origPE, (age + 1), NEW_FISH_HISTORY, NEW_FISH_HUNGER);
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_fish_mess++;
+ }
+ }
+ }
+} /* processFish */
+
+/*
+ * processFetch either returns the requested data (if available)
+ * or blocks the remote blocking queue on a black hole (if not).
+ */
+
+//@cindex processFetch
+static void
+processFetch(void)
+{
+ globalAddr ga, rga;
+ int load;
+ StgClosure *closure;
+ StgInfoTable *ip;
+
+ unpackFetch(&ga, &rga, &load);
+ IF_PAR_DEBUG(fetch,
+ belch("%%%%__ Rcvd Fetch for ((%x, %d, 0)), Resume ((%x, %d, %x)) (load %d) from %x",
+ ga.payload.gc.gtid, ga.payload.gc.slot,
+ rga.payload.gc.gtid, rga.payload.gc.slot, rga.weight, load,
+ rga.payload.gc.gtid));
+
+ closure = GALAlookup(&ga);
+ ASSERT(closure != (StgClosure *)NULL);
+ ip = get_itbl(closure);
+ if (ip->type == FETCH_ME) {
+ /* Forward the Fetch to someone else */
+ sendFetch(((StgFetchMe *)closure)->ga, &rga, load);
+
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_fetch_mess++;
+ }
+ } else if (rga.payload.gc.gtid == mytid) {
+ /* Our own FETCH forwarded back around to us */
+ StgFetchMeBlockingQueue *fmbq = (StgFetchMeBlockingQueue *)GALAlookup(&rga);
+
+ IF_PAR_DEBUG(fetch,
+ belch("%%%%== Fetch returned to sending PE; closure=%p (%s); receiver=%p (%s)",
+ closure, info_type(closure), fmbq, info_type((StgClosure*)fmbq)));
+ /* We may have already discovered that the fetch target is our own. */
+ if ((StgClosure *)fmbq != closure)
+ CommonUp((StgClosure *)fmbq, closure);
+ (void) addWeight(&rga);
+ } else if (IS_BLACK_HOLE(closure)) {
+ /* This includes RBH's and FMBQ's */
+ StgBlockedFetch *bf;
+
+ /* Can we assert something on the remote GA? */
+ ASSERT(GALAlookup(&rga) == NULL);
+
+ /* If we're hitting a BH or RBH or FMBQ we have to put a BLOCKED_FETCH
+ closure into the BQ in order to denote that when updating this node
+ the result should be sent to the originator of this fetch message. */
+ bf = (StgBlockedFetch *)createBlockedFetch(ga, rga);
+ IF_PAR_DEBUG(fetch,
+ belch("%%++ Blocking Fetch ((%x, %d, %x)) on %p (%s)",
+ rga.payload.gc.gtid, rga.payload.gc.slot, rga.weight,
+ closure, info_type(closure)));
+ blockFetch(bf, closure);
+ } else {
+ /* The target of the FetchMe is some local graph */
+ nat size;
+ // StgClosure *graph;
+ rtsPackBuffer *buffer = (rtsPackBuffer *)NULL;
+
+ if ((buffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size, rga.payload.gc.gtid)) == NULL) {
+ barf("processFetch: out of heap while packing graph; ToDo: call GC here");
+ GarbageCollect(GetRoots, rtsFalse);
+ closure = GALAlookup(&ga);
+ buffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size, rga.payload.gc.gtid);
+ ASSERT(buffer != (rtsPackBuffer *)NULL);
+ }
+ sendResume(&rga, size, buffer);
+
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_resume_mess++;
+ }
+ }
+}
+
+/*
+ The list of pending fetches must be a root-list for GC.
+ This routine is called from GC.c (same as marking GAs etc).
+*/
+void
+markPendingFetches(rtsBool major_gc) {
+
+ /* No need to traverse the list; this is done via the scavenge code
+ for a BLOCKED_FETCH closure, which evacuates the link field */
+
+ if (PendingFetches != END_BF_QUEUE ) {
+ IF_PAR_DEBUG(tables,
+ fprintf(stderr, "@@@@ PendingFetches is root; evaced from %p to",
+ PendingFetches));
+
+ PendingFetches = MarkRoot((StgClosure*)PendingFetches);
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, " %p\n", PendingFetches));
+
+ } else {
+ IF_PAR_DEBUG(tables,
+ fprintf(stderr, "@@@@ PendingFetches is empty; no need to mark it\n"));
+ }
+}
+
+/*
+ * processFree unpacks a FREE message and adds the weights to our GAs.
+ */
+//@cindex processFree
+static void
+processFree(void)
+{
+ int nelem;
+ static StgWord *buffer;
+ int i;
+ globalAddr ga;
+
+ buffer = (StgWord *)gumPackBuffer;
+ unpackFree(&nelem, buffer);
+ IF_PAR_DEBUG(free,
+ belch("!!__ Rcvd Free (%d GAs)", nelem / 2));
+
+ ga.payload.gc.gtid = mytid;
+ for (i = 0; i < nelem;) {
+ ga.weight = (rtsWeight) buffer[i++];
+ ga.payload.gc.slot = (int) buffer[i++];
+ IF_PAR_DEBUG(free,
+ fprintf(stderr, "!!-- Processing free ");
+ printGA(&ga);
+ fputc('\n', stderr);
+ );
+ (void) addWeight(&ga);
+ }
+}
+
+/*
+ * processResume unpacks a RESUME message into the graph, filling in
+ * the LA -> GA, and GA -> LA tables. Threads blocked on the original
+ * FetchMe (now a blocking queue) are awakened, and the blocking queue
+ * is converted into an indirection. Finally it sends an ACK in response
+ * which contains any newly allocated GAs.
+ */
+
+//@cindex processResume
+static void
+processResume(GlobalTaskId sender)
+{
+ int nelem;
+ nat nGAs;
+ static rtsPackBuffer *packBuffer;
+ StgClosure *newGraph, *old;
+ globalAddr lga;
+ globalAddr *gagamap;
+
+ packBuffer = (rtsPackBuffer *)gumPackBuffer;
+ unpackResume(&lga, &nelem, packBuffer);
+
+ IF_PAR_DEBUG(fetch,
+ fprintf(stderr, "[]__ Rcvd Resume for ");
+ printGA(&lga);
+ fputc('\n', stderr));
+ IF_PAR_DEBUG(packet,
+ PrintPacket((rtsPackBuffer *)packBuffer));
+
+ /*
+ * We always unpack the incoming graph, even if we've received the
+ * requested node in some other data packet (and already awakened
+ * the blocking queue).
+ if (SAVE_Hp + packBuffer[0] >= SAVE_HpLim) {
+ ReallyPerformThreadGC(packBuffer[0], rtsFalse);
+ SAVE_Hp -= packBuffer[0];
+ }
+ */
+
+ // ToDo: Check for GC here !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ /* Do this *after* GC; we don't want to release the object early! */
+
+ if (lga.weight > 0)
+ (void) addWeight(&lga);
+
+ old = GALAlookup(&lga);
+
+ /* ToDo: The closure that requested this graph must be one of these two?*/
+ ASSERT(get_itbl(old)->type == FETCH_ME_BQ ||
+ get_itbl(old)->type == RBH);
+
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ StgBlockingQueueElement *bqe, *last_bqe;
+
+ IF_PAR_DEBUG(fetch,
+ belch("[]-- Resume is REPLY to closure %lx", old));
+
+ /* Write REPLY events to the log file, indicating that the remote
+ data has arrived
+ NB: we emit a REPLY only for the *last* elem in the queue; this is
+ the one that triggered the fetch message; all other entries
+ have just added themselves to the queue, waiting for the data
+ they know that has been requested (see entry code for FETCH_ME_BQ)
+ */
+ if ((get_itbl(old)->type == FETCH_ME_BQ ||
+ get_itbl(old)->type == RBH)) {
+ for (bqe = ((StgFetchMeBlockingQueue *)old)->blocking_queue,
+ last_bqe = END_BQ_QUEUE;
+ get_itbl(bqe)->type==TSO ||
+ get_itbl(bqe)->type==BLOCKED_FETCH;
+ last_bqe = bqe, bqe = bqe->link) { /* nothing */ }
+
+ ASSERT(last_bqe==END_BQ_QUEUE ||
+ get_itbl((StgClosure *)last_bqe)->type == TSO);
+
+ /* last_bqe now points to the TSO that triggered the FETCH */
+ if (get_itbl((StgClosure *)last_bqe)->type == TSO)
+ DumpRawGranEvent(CURRENT_PROC, taskIDtoPE(sender),
+ GR_REPLY, ((StgTSO *)last_bqe), ((StgTSO *)last_bqe)->block_info.closure,
+ 0, spark_queue_len(&(MainRegTable.rSparks)));
+ }
+ }
+
+ newGraph = UnpackGraph(packBuffer, &gagamap, &nGAs);
+ ASSERT(newGraph != NULL);
+
+ /*
+ * Sometimes, unpacking will common up the resumee with the
+ * incoming graph, but if it hasn't, we'd better do so now.
+ */
+
+ if (get_itbl(old)->type == FETCH_ME_BQ)
+ CommonUp(old, newGraph);
+
+ IF_PAR_DEBUG(fetch,
+ belch("[]-- Ready to resume unpacked graph at %p (%s)",
+ newGraph, info_type(newGraph)));
+
+ IF_PAR_DEBUG(tables,
+ DebugPrintGAGAMap(gagamap, nGAs));
+
+ sendAck(sender, nGAs, gagamap);
+}
+
+/*
+ * processSchedule unpacks a SCHEDULE message into the graph, filling
+ * in the LA -> GA, and GA -> LA tables. The root of the graph is added to
+ * the local spark queue. Finally it sends an ACK in response
+ * which contains any newly allocated GAs.
+ */
+//@cindex processSchedule
+static void
+processSchedule(GlobalTaskId sender)
+{
+ nat nelem, nGAs;
+ rtsBool success;
+ static rtsPackBuffer *packBuffer;
+ StgClosure *newGraph;
+ globalAddr *gagamap;
+
+ packBuffer = gumPackBuffer; /* HWL */
+ unpackSchedule(&nelem, packBuffer);
+
+ IF_PAR_DEBUG(schedule,
+ belch("--__ Rcvd Schedule (%d elems)", nelem));
+ IF_PAR_DEBUG(packet,
+ PrintPacket(packBuffer));
+
+ /*
+ * For now, the graph is a closure to be sparked as an advisory
+ * spark, but in future it may be a complete spark with
+ * required/advisory status, priority etc.
+ */
+
+ /*
+ space_required = packBuffer[0];
+ if (SAVE_Hp + space_required >= SAVE_HpLim) {
+ ReallyPerformThreadGC(space_required, rtsFalse);
+ SAVE_Hp -= space_required;
+ }
+ */
+ // ToDo: check whether GC is necessary !!!!!!!!!!!!!!!!!!!!!
+ newGraph = UnpackGraph(packBuffer, &gagamap, &nGAs);
+ ASSERT(newGraph != NULL);
+ success = add_to_spark_queue(newGraph, &(MainRegTable.rSparks));
+
+ if (RtsFlags.ParFlags.ParStats.Full &&
+ RtsFlags.ParFlags.ParStats.Sparks &&
+ success)
+ DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC,
+ GR_STOLEN, ((StgTSO *)NULL), newGraph,
+ 0, 0 /* spark_queue_len(ADVISORY_POOL) */);
+
+ IF_PAR_DEBUG(schedule,
+ if (success)
+ belch("--^^ added spark to unpacked graph %p (%s); %d sparks available on [%x] (%s)",
+ newGraph, info_type(newGraph), spark_queue_len(&(MainRegTable.rSparks)), mytid);
+ else
+ belch("--^^ received non-sparkable closure %p (%s); nothing added to spark pool; %d sparks available on [%x]",
+ newGraph, info_type(newGraph), spark_queue_len(&(MainRegTable.rSparks)), mytid));
+ IF_PAR_DEBUG(packet,
+ belch("*< Unpacked graph with root at %p (%s):",
+ newGraph, info_type(newGraph));
+ PrintGraph(newGraph, 0));
+
+ IF_PAR_DEBUG(tables,
+ DebugPrintGAGAMap(gagamap, nGAs));
+
+ sendAck(sender, nGAs, gagamap);
+
+ //fishing = rtsFalse;
+ ASSERT(outstandingFishes>0);
+ outstandingFishes--;
+}
+
+/*
+ * processAck unpacks an ACK, and uses the GAGA map to convert RBH's
+ * (which represent shared thunks that have been shipped) into fetch-mes
+ * to remote GAs.
+ */
+//@cindex processAck
+static void
+processAck(void)
+{
+ nat nGAs;
+ globalAddr *gaga;
+ globalAddr gagamap[256]; // ToDo: elim magic constant!! MAX_GAS * 2];??
+
+ unpackAck(&nGAs, gagamap);
+
+ IF_PAR_DEBUG(tables,
+ belch(",,,, Rcvd Ack (%d pairs)", nGAs);
+ DebugPrintGAGAMap(gagamap, nGAs));
+
+ IF_DEBUG(sanity,
+ checkGAGAMap(gagamap, nGAs));
+
+ /*
+ * For each (oldGA, newGA) pair, set the GA of the corresponding
+ * thunk to the newGA, convert the thunk to a FetchMe, and return
+ * the weight from the oldGA.
+ */
+ for (gaga = gagamap; gaga < gagamap + nGAs * 2; gaga += 2) {
+ StgClosure *old_closure = GALAlookup(gaga);
+ StgClosure *new_closure = GALAlookup(gaga + 1);
+
+ ASSERT(old_closure != NULL);
+ if (new_closure == NULL) {
+ /* We don't have this closure, so we make a fetchme for it */
+ globalAddr *ga = setRemoteGA(old_closure, gaga + 1, rtsTrue);
+
+ /* convertToFetchMe should be done unconditionally here.
+ Currently, we assign GAs to CONSTRs, too, (a bit of a hack),
+ so we have to check whether it is an RBH before converting
+
+ ASSERT(get_itbl(old_closure)==RBH);
+ */
+ if (get_itbl(old_closure)->type==RBH)
+ convertToFetchMe((StgRBH *)old_closure, ga);
+ } else {
+ /*
+ * Oops...we've got this one already; update the RBH to
+ * point to the object we already know about, whatever it
+ * happens to be.
+ */
+ CommonUp(old_closure, new_closure);
+
+ /*
+ * Increase the weight of the object by the amount just
+ * received in the second part of the ACK pair.
+ */
+ (void) addWeight(gaga + 1);
+ }
+ (void) addWeight(gaga);
+ }
+
+ /* check the sanity of the LAGA and GALA tables after mincing them */
+ IF_DEBUG(sanity, checkLAGAtable(rtsFalse));
+}
+
+#ifdef DIST
+
+void
+bounceReval(void) {
+ barf("Task %x: TODO: should send NACK in response to REVAL",mytid);
+}
+
+static void
+processReval(GlobalTaskId sender) //similar to schedule...
+{ nat nelem, space_required, nGAs;
+ static rtsPackBuffer *packBuffer;
+ StgClosure *newGraph;
+ globalAddr *gagamap;
+ StgTSO* tso;
+ globalAddr *ga;
+
+ packBuffer = gumPackBuffer; /* HWL */
+ unpackSchedule(&nelem, packBuffer); /* okay, since the structure is the same */
+
+ IF_PAR_DEBUG(packet,
+ belch("@;~) [%x] Rcvd Reval (%d elems)", mytid, nelem);
+ PrintPacket(packBuffer));
+
+ /*
+ space_required = packBuffer[0];
+ if (SAVE_Hp + space_required >= SAVE_HpLim) {
+ ReallyPerformThreadGC(space_required, rtsFalse);
+ SAVE_Hp -= space_required;
+ }
+ */
+
+ // ToDo: check whether GC is necessary !!!!!!!!!!!!!!!!!!!!!
+ newGraph = UnpackGraph(packBuffer, &gagamap, &nGAs);
+ ASSERT(newGraph != NULL);
+
+ IF_PAR_DEBUG(packet,
+ belch("@;~) Unpacked graph with root at %p (%s):",
+ newGraph, info_type(newGraph));
+ PrintGraph(newGraph, 0));
+
+ IF_PAR_DEBUG(tables,
+ DebugPrintGAGAMap(gagamap, nGAs));
+
+ IF_PAR_DEBUG(tables,
+ printLAGAtable();
+ DebugPrintGAGAMap(gagamap, nGAs));
+
+ //We don't send an Ack to the head!!!!
+ ASSERT(nGAs>0);
+ sendAck(sender, nGAs-1, gagamap+2);
+
+ IF_PAR_DEBUG(verbose,
+ belch("@;~) About to create Reval thread on behalf of %x",
+ sender));
+
+ tso=createGenThread(RtsFlags.GcFlags.initialStkSize,newGraph);
+ tso->priority=RevalPriority;
+ tso->revalSlot=gagamap->payload.gc.slot;//record who sent the reval
+ tso->revalTid =gagamap->payload.gc.gtid;
+ scheduleThread(tso);
+ context_switch = 1; // switch at the earliest opportunity
+}
+#endif
+
+
+//@node GUM Message Processor, Miscellaneous Functions, Message-Processing Functions, High Level Communications Routines
+//@subsection GUM Message Processor
+
+/*
+ * GUM Message Processor
+
+ * processMessages processes any messages that have arrived, calling
+ * appropriate routines depending on the message tag
+ * (opcode). N.B. Unless profiling it assumes that there {\em ARE} messages
+ * present and performs a blocking receive! During profiling it
+ * busy-waits in order to record idle time.
+ */
+
+//@cindex processMessages
+rtsBool
+processMessages(void)
+{
+ rtsPacket packet;
+ OpCode opcode;
+ GlobalTaskId task;
+ rtsBool receivedFinish = rtsFalse;
+
+ do {
+ packet = GetPacket(); /* Get next message; block until one available */
+ getOpcodeAndSender(packet, &opcode, &task);
+
+ if (task==SysManTask) {
+ switch (opcode) {
+ case PP_PETIDS:
+ processPEtids();
+ break;
+
+ case PP_FINISH:
+ IF_PAR_DEBUG(verbose,
+ belch("==== received FINISH [%p]", mytid));
+ /* this boolean value is returned and propagated to the main
+ scheduling loop, thus shutting-down this PE */
+ receivedFinish = rtsTrue;
+ break;
+
+ default:
+ barf("Task %x: received unknown opcode %x from SysMan",mytid, opcode);
+ }
+ } else if (taskIDtoPE(task)==0) {
+ /* When a new PE joins then potentially FISH & REVAL message may
+ reach PES before they are notified of the new PEs existance. The
+ only solution is to bounce/fail these messages back to the sender.
+ But we will worry about it once we start seeing these race
+ conditions! */
+ switch (opcode) {
+ case PP_FISH:
+ bounceFish();
+ break;
+#ifdef DIST
+ case PP_REVAL:
+ bounceReval();
+ break;
+#endif
+ case PP_PETIDS:
+ belch("Task %x: Ignoring PVM session opened by another SysMan %x",mytid,task);
+ break;
+
+ case PP_FINISH:
+ break;
+
+ default:
+ belch("Task %x: Ignoring opcode %x from unknown PE %x",mytid, opcode, task);
+ }
+ } else
+ switch (opcode) {
+ case PP_FETCH:
+ processFetch();
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.rec_fetch_mess++;
+ }
+ break;
+
+ case PP_RESUME:
+ processResume(task);
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.rec_resume_mess++;
+ }
+ break;
+
+ case PP_ACK:
+ processAck();
+ break;
+
+ case PP_FISH:
+ processFish();
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.rec_fish_mess++;
+ }
+ break;
+
+ case PP_FREE:
+ processFree();
+ break;
+
+ case PP_SCHEDULE:
+ processSchedule(task);
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.rec_schedule_mess++;
+ }
+ break;
+
+#ifdef DIST
+ case PP_REVAL:
+ processReval(task);
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.rec_reval_mess++;
+ }
+ break;
+#endif
+
+ default:
+ /* Anything we're not prepared to deal with. */
+ barf("Task %x: Unexpected opcode %x from %x",
+ mytid, opcode, task);
+ } /* switch */
+
+ } while (PacketsWaiting()); /* While there are messages: process them */
+ return receivedFinish;
+} /* processMessages */
+
+//@node Miscellaneous Functions, Index, GUM Message Processor, High Level Communications Routines
+//@subsection Miscellaneous Functions
+
+/*
+ * blockFetch blocks a BlockedFetch node on some kind of black hole.
+ */
+//@cindex blockFetch
+void
+blockFetch(StgBlockedFetch *bf, StgClosure *bh) {
+ bf->node = bh;
+ switch (get_itbl(bh)->type) {
+ case BLACKHOLE:
+ bf->link = END_BQ_QUEUE;
+ //((StgBlockingQueue *)bh)->header.info = &stg_BLACKHOLE_BQ_info;
+ SET_INFO(bh, &stg_BLACKHOLE_BQ_info); // turn closure into a blocking queue
+ ((StgBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)bf;
+
+ // put bh on the mutables list
+ recordMutable((StgMutClosure *)bh);
+ break;
+
+ case BLACKHOLE_BQ:
+ /* enqueue bf on blocking queue of closure bh */
+ bf->link = ((StgBlockingQueue *)bh)->blocking_queue;
+ ((StgBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)bf;
+
+ // put bh on the mutables list; ToDo: check
+ recordMutable((StgMutClosure *)bh);
+ break;
+
+ case FETCH_ME_BQ:
+ /* enqueue bf on blocking queue of closure bh */
+ bf->link = ((StgFetchMeBlockingQueue *)bh)->blocking_queue;
+ ((StgFetchMeBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)bf;
+
+ // put bh on the mutables list; ToDo: check
+ recordMutable((StgMutClosure *)bh);
+ break;
+
+ case RBH:
+ /* enqueue bf on blocking queue of closure bh */
+ bf->link = ((StgRBH *)bh)->blocking_queue;
+ ((StgRBH *)bh)->blocking_queue = (StgBlockingQueueElement *)bf;
+
+ // put bh on the mutables list; ToDo: check
+ recordMutable((StgMutClosure *)bh);
+ break;
+
+ default:
+ barf("blockFetch: thought %p was a black hole (IP %#lx, %s)",
+ (StgClosure *)bh, get_itbl((StgClosure *)bh),
+ info_type((StgClosure *)bh));
+ }
+ IF_PAR_DEBUG(bq,
+ belch("##++ blockFetch: after block the BQ of %p (%s) is:",
+ bh, info_type(bh));
+ print_bq(bh));
+}
+
+
+/*
+ @blockThread@ is called from the main scheduler whenever tso returns with
+ a ThreadBlocked return code; tso has already been added to a blocking
+ queue (that's done in the entry code of the closure, because it is a
+ cheap operation we have to do in any case); the main purpose of this
+ routine is to send a Fetch message in case we are blocking on a FETCHME(_BQ)
+ closure, which is indicated by the tso.why_blocked field;
+ we also write an entry into the log file if we are generating one
+
+ Should update exectime etc in the entry code already; but we don't have
+ something like ``system time'' in the log file anyway, so this should
+ even out the inaccuracies.
+*/
+
+//@cindex blockThread
+void
+blockThread(StgTSO *tso)
+{
+ globalAddr *remote_ga=NULL;
+ globalAddr *local_ga;
+ globalAddr fmbq_ga;
+
+ // ASSERT(we are on some blocking queue)
+ ASSERT(tso->block_info.closure != (StgClosure *)NULL);
+
+ /*
+ We have to check why this thread has been blocked.
+ */
+ switch (tso->why_blocked) {
+ case BlockedOnGA:
+ /* the closure must be a FETCH_ME_BQ; tso came in here via
+ FETCH_ME entry code */
+ ASSERT(get_itbl(tso->block_info.closure)->type==FETCH_ME_BQ);
+
+ /* HACK: the link field is used to hold the GA between FETCH_ME_entry
+ end this point; if something (eg. GC) happens inbetween the whole
+ thing will blow up
+ The problem is that the ga field of the FETCH_ME has been overwritten
+ with the head of the blocking queue (which is tso).
+ */
+ ASSERT(looks_like_ga(&theGlobalFromGA));
+ // ASSERT(tso->link!=END_TSO_QUEUE && tso->link!=NULL);
+ remote_ga = &theGlobalFromGA; //tso->link;
+ tso->link = (StgTSO*)END_BQ_QUEUE;
+ /* it was tso which turned node from FETCH_ME into FETCH_ME_BQ =>
+ we have to send a Fetch message here! */
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ /* Note that CURRENT_TIME may perform an unsafe call */
+ tso->par.exectime += CURRENT_TIME - tso->par.blockedat;
+ tso->par.fetchcount++;
+ tso->par.blockedat = CURRENT_TIME;
+ /* we are about to send off a FETCH message, so dump a FETCH event */
+ DumpRawGranEvent(CURRENT_PROC,
+ taskIDtoPE(remote_ga->payload.gc.gtid),
+ GR_FETCH, tso, tso->block_info.closure, 0, 0);
+ }
+ /* Phil T. claims that this was a workaround for a hard-to-find
+ * bug, hence I'm leaving it out for now --SDM
+ */
+ /* Assign a brand-new global address to the newly created FMBQ */
+ local_ga = makeGlobal(tso->block_info.closure, rtsFalse);
+ splitWeight(&fmbq_ga, local_ga);
+ ASSERT(fmbq_ga.weight == 1U << (BITS_IN(unsigned) - 1));
+
+ sendFetch(remote_ga, &fmbq_ga, 0/*load*/);
+
+ // Global statistics: count no. of fetches
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_fetch_mess++;
+ }
+
+ IF_DEBUG(sanity,
+ theGlobalFromGA.payload.gc.gtid = (GlobalTaskId)0);
+ break;
+
+ case BlockedOnGA_NoSend:
+ /* the closure must be a FETCH_ME_BQ; tso came in here via
+ FETCH_ME_BQ entry code */
+ ASSERT(get_itbl(tso->block_info.closure)->type==FETCH_ME_BQ);
+
+ /* Fetch message has been sent already */
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ /* Note that CURRENT_TIME may perform an unsafe call */
+ tso->par.exectime += CURRENT_TIME - tso->par.blockedat;
+ tso->par.blockcount++;
+ tso->par.blockedat = CURRENT_TIME;
+ /* dump a block event, because fetch has been sent already */
+ DumpRawGranEvent(CURRENT_PROC, thisPE,
+ GR_BLOCK, tso, tso->block_info.closure, 0, 0);
+ }
+ break;
+
+ case BlockedOnMVar:
+ case BlockedOnBlackHole:
+ /* the closure must be a BLACKHOLE_BQ or an RBH; tso came in here via
+ BLACKHOLE(_BQ) or CAF_BLACKHOLE or RBH entry code */
+ ASSERT(get_itbl(tso->block_info.closure)->type==MVAR ||
+ get_itbl(tso->block_info.closure)->type==BLACKHOLE_BQ ||
+ get_itbl(tso->block_info.closure)->type==RBH);
+
+ /* if collecting stats update the execution time etc */
+ if (RtsFlags.ParFlags.ParStats.Full) {
+ /* Note that CURRENT_TIME may perform an unsafe call */
+ tso->par.exectime += CURRENT_TIME - tso->par.blockedat;
+ tso->par.blockcount++;
+ tso->par.blockedat = CURRENT_TIME;
+ DumpRawGranEvent(CURRENT_PROC, thisPE,
+ GR_BLOCK, tso, tso->block_info.closure, 0, 0);
+ }
+ break;
+
+ case BlockedOnDelay:
+ /* Whats sort of stats shall we collect for an explicit threadDelay? */
+ IF_PAR_DEBUG(verbose,
+ belch("##++ blockThread: TSO %d blocked on ThreadDelay",
+ tso->id));
+ break;
+
+ /* Check that the following is impossible to happen, indeed
+ case BlockedOnException:
+ case BlockedOnRead:
+ case BlockedOnWrite:
+ */
+ default:
+ barf("blockThread: impossible why_blocked code %d for TSO %d",
+ tso->why_blocked, tso->id);
+ }
+
+ IF_PAR_DEBUG(verbose,
+ belch("##++ blockThread: TSO %d blocked on closure %p (%s); %s",
+ tso->id, tso->block_info.closure, info_type(tso->block_info.closure),
+ (tso->why_blocked==BlockedOnGA) ? "Sent FETCH for GA" : ""));
+
+ IF_PAR_DEBUG(bq,
+ print_bq(tso->block_info.closure));
+}
+
+/*
+ * ChoosePE selects a GlobalTaskId from the array of PEs 'at random'.
+ * Important properties:
+ * - it varies during execution, even if the PE is idle
+ * - it's different for each PE
+ * - we never send a fish to ourselves
+ */
+extern long lrand48 (void);
+
+//@cindex choosePE
+GlobalTaskId
+choosePE(void)
+{
+ long temp;
+
+ temp = lrand48() % nPEs;
+ if (allPEs[temp] == mytid) { /* Never send a FISH to yourself */
+ temp = (temp + 1) % nPEs;
+ }
+ return allPEs[temp];
+}
+
+/*
+ * allocate a BLOCKED_FETCH closure and fill it with the relevant fields
+ * of the ga argument; called from processFetch when the local closure is
+ * under evaluation
+ */
+//@cindex createBlockedFetch
+StgClosure *
+createBlockedFetch (globalAddr ga, globalAddr rga)
+{
+ StgBlockedFetch *bf;
+ StgClosure *closure;
+
+ closure = GALAlookup(&ga);
+ if ((bf = (StgBlockedFetch *)allocate(_HS + sizeofW(StgBlockedFetch))) == NULL) {
+ barf("createBlockedFetch: out of heap while allocating heap for a BlocekdFetch; ToDo: call GC here");
+ GarbageCollect(GetRoots, rtsFalse);
+ closure = GALAlookup(&ga);
+ bf = (StgBlockedFetch *)allocate(_HS + sizeofW(StgBlockedFetch));
+ // ToDo: check whether really guaranteed to succeed 2nd time around
+ }
+
+ ASSERT(bf != (StgBlockedFetch *)NULL);
+ SET_INFO((StgClosure *)bf, &stg_BLOCKED_FETCH_info);
+ // ToDo: check whether other header info is needed
+ bf->node = closure;
+ bf->ga.payload.gc.gtid = rga.payload.gc.gtid;
+ bf->ga.payload.gc.slot = rga.payload.gc.slot;
+ bf->ga.weight = rga.weight;
+ // bf->link = NULL; debugging
+
+ IF_PAR_DEBUG(schedule,
+ fprintf(stderr, "%%%%// created BF: bf=%p (%s) of closure , GA: ",
+ bf, info_type((StgClosure*)bf));
+ printGA(&(bf->ga));
+ fputc('\n',stderr));
+ return (StgClosure *)bf;
+}
+
+/*
+ * waitForTermination enters a loop ignoring spurious messages while
+ * waiting for the termination sequence to be completed.
+ */
+//@cindex waitForTermination
+void
+waitForTermination(void)
+{
+ do {
+ rtsPacket p = GetPacket();
+ processUnexpectedMessage(p);
+ } while (rtsTrue);
+}
+
+#ifdef DEBUG
+//@cindex DebugPrintGAGAMap
+void
+DebugPrintGAGAMap(globalAddr *gagamap, int nGAs)
+{
+ nat i;
+
+ for (i = 0; i < nGAs; ++i, gagamap += 2)
+ fprintf(stderr, "__ gagamap[%d] = ((%x, %d, %x)) -> ((%x, %d, %x))\n", i,
+ gagamap[0].payload.gc.gtid, gagamap[0].payload.gc.slot, gagamap[0].weight,
+ gagamap[1].payload.gc.gtid, gagamap[1].payload.gc.slot, gagamap[1].weight);
+}
+
+//@cindex checkGAGAMap
+void
+checkGAGAMap(globalAddr *gagamap, int nGAs)
+{
+ nat i;
+
+ for (i = 0; i < (nat)nGAs; ++i, gagamap += 2) {
+ ASSERT(looks_like_ga(gagamap));
+ ASSERT(looks_like_ga(gagamap+1));
+ }
+}
+#endif
+
+//@cindex freeMsgBuffer
+static StgWord **freeMsgBuffer = NULL;
+//@cindex freeMsgIndex
+static nat *freeMsgIndex = NULL;
+
+//@cindex prepareFreeMsgBuffers
+void
+prepareFreeMsgBuffers(void)
+{
+ nat i;
+
+ /* Allocate the freeMsg buffers just once and then hang onto them. */
+ if (freeMsgIndex == NULL) {
+ freeMsgIndex = (nat *) stgMallocBytes(nPEs * sizeof(nat),
+ "prepareFreeMsgBuffers (Index)");
+ freeMsgBuffer = (StgWord **) stgMallocBytes(nPEs * sizeof(long *),
+ "prepareFreeMsgBuffers (Buffer)");
+
+ for(i = 0; i < nPEs; i++)
+ if (i != (thisPE-1))
+ freeMsgBuffer[i] = (StgPtr) stgMallocWords(RtsFlags.ParFlags.packBufferSize,
+ "prepareFreeMsgBuffers (Buffer #i)");
+ else
+ freeMsgBuffer[i] = 0;
+ }
+
+ /* Initialize the freeMsg buffer pointers to point to the start of their
+ buffers */
+ for (i = 0; i < nPEs; i++)
+ freeMsgIndex[i] = 0;
+}
+
+//@cindex freeRemoteGA
+void
+freeRemoteGA(int pe, globalAddr *ga)
+{
+ nat i;
+
+ ASSERT(GALAlookup(ga) == NULL);
+
+ if ((i = freeMsgIndex[pe]) + 2 >= RtsFlags.ParFlags.packBufferSize) {
+ IF_PAR_DEBUG(free,
+ belch("!! Filled a free message buffer (sending remaining messages indivisually)"));
+
+ sendFree(ga->payload.gc.gtid, i, freeMsgBuffer[pe]);
+ i = 0;
+ }
+ freeMsgBuffer[pe][i++] = (StgWord) ga->weight;
+ freeMsgBuffer[pe][i++] = (StgWord) ga->payload.gc.slot;
+ freeMsgIndex[pe] = i;
+
+ IF_DEBUG(sanity,
+ ga->weight = 0xdead0add;
+ ga->payload.gc.gtid = 0xbbbbbbbb;
+ ga->payload.gc.slot = 0xbbbbbbbb;);
+}
+
+//@cindex sendFreeMessages
+void
+sendFreeMessages(void)
+{
+ nat i;
+
+ for (i = 0; i < nPEs; i++)
+ if (freeMsgIndex[i] > 0)
+ sendFree(allPEs[i], freeMsgIndex[i], freeMsgBuffer[i]);
+}
+
+/* synchronises with the other PEs. Receives and records in a global
+ * variable the task-id of SysMan. If this is the main thread (discovered
+ * in main.lc), identifies itself to SysMan. Finally it receives
+ * from SysMan an array of the Global Task Ids of each PE, which is
+ * returned as the value of the function.
+ */
+
+#if defined(PAR_TICKY)
+/* Has to see freeMsgIndex, so must be defined here not in ParTicky.c */
+//@cindex stats_CntFreeGA
+void
+stats_CntFreeGA (void) { // stats only
+
+ // Global statistics: residency of thread and spark pool
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ nat i, s;
+
+ globalParStats.cnt_free_GA++;
+ for (i = 0, s = 0; i < nPEs; i++)
+ s += globalParStats.tot_free_GA += freeMsgIndex[i]/2;
+
+ if ( s > globalParStats.res_free_GA )
+ globalParStats.res_free_GA = s;
+ }
+}
+#endif /* PAR_TICKY */
+
+#endif /* PAR -- whole file */
+
+//@node Index, , Miscellaneous Functions, High Level Communications Routines
+//@subsection Index
+
+//@index
+//* ACK:: @cindex\s-+ACK
+//* DebugPrintGAGAMap:: @cindex\s-+DebugPrintGAGAMap
+//* FETCH:: @cindex\s-+FETCH
+//* FISH:: @cindex\s-+FISH
+//* FREE:: @cindex\s-+FREE
+//* RESUME:: @cindex\s-+RESUME
+//* SCHEDULE:: @cindex\s-+SCHEDULE
+//* blockFetch:: @cindex\s-+blockFetch
+//* choosePE:: @cindex\s-+choosePE
+//* freeMsgBuffer:: @cindex\s-+freeMsgBuffer
+//* freeMsgIndex:: @cindex\s-+freeMsgIndex
+//* freeRemoteGA:: @cindex\s-+freeRemoteGA
+//* gumPackBuffer:: @cindex\s-+gumPackBuffer
+//* initMoreBuffers:: @cindex\s-+initMoreBuffers
+//* prepareFreeMsgBuffers:: @cindex\s-+prepareFreeMsgBuffers
+//* processAck:: @cindex\s-+processAck
+//* processFetch:: @cindex\s-+processFetch
+//* processFetches:: @cindex\s-+processFetches
+//* processFish:: @cindex\s-+processFish
+//* processFree:: @cindex\s-+processFree
+//* processMessages:: @cindex\s-+processMessages
+//* processResume:: @cindex\s-+processResume
+//* processSchedule:: @cindex\s-+processSchedule
+//* sendAck:: @cindex\s-+sendAck
+//* sendFetch:: @cindex\s-+sendFetch
+//* sendFish:: @cindex\s-+sendFish
+//* sendFree:: @cindex\s-+sendFree
+//* sendFreeMessages:: @cindex\s-+sendFreeMessages
+//* sendResume:: @cindex\s-+sendResume
+//* sendSchedule:: @cindex\s-+sendSchedule
+//* unpackAck:: @cindex\s-+unpackAck
+//* unpackFetch:: @cindex\s-+unpackFetch
+//* unpackFish:: @cindex\s-+unpackFish
+//* unpackFree:: @cindex\s-+unpackFree
+//* unpackResume:: @cindex\s-+unpackResume
+//* unpackSchedule:: @cindex\s-+unpackSchedule
+//* waitForTermination:: @cindex\s-+waitForTermination
+//@end index
diff --git a/rts/parallel/LLC.h b/rts/parallel/LLC.h
new file mode 100644
index 0000000000..536e431bef
--- /dev/null
+++ b/rts/parallel/LLC.h
@@ -0,0 +1,130 @@
+/* --------------------------------------------------------------------------
+ Time-stamp: <Sun Mar 18 2001 21:23:50 Stardate: [-30]6349.45 hwloidl>
+
+ Low Level Communications Header (LLC.h)
+
+ Contains the definitions used by the Low-level Communications
+ module of the GUM Haskell runtime environment.
+ Based on the Graph for PVM implementation.
+
+ Phil Trinder, Glasgow University, 13th Dec 1994
+ Adapted for the 4.xx RTS
+ H-W. Loidl, Heriot-Watt, November 1999
+ ----------------------------------------------------------------------- */
+
+#ifndef __LLC_H
+#define __LLC_H
+
+#ifdef PAR
+
+//@node Low Level Communications Header, , ,
+//@section Low Level Communications Header
+
+//@menu
+//* Includes::
+//* Macros and Constants::
+//* PVM macros::
+//* Externs::
+//@end menu
+
+//@node Includes, Macros and Constants, Low Level Communications Header, Low Level Communications Header
+//@subsection Includes
+
+#include "Rts.h"
+#include "Parallel.h"
+
+#include "PEOpCodes.h"
+#include "pvm3.h"
+
+//@node Macros and Constants, PVM macros, Includes, Low Level Communications Header
+//@subsection Macros and Constants
+
+#define ANY_TASK (-1) /* receive messages from any task */
+#define ANY_GLOBAL_TASK ANY_TASK
+#define ANY_OPCODE (-1) /* receive any opcode */
+#define ALL_GROUP (-1) /* wait for barrier from every group member */
+
+#define PEGROUP "PE"
+
+#define MGRGROUP "MGR"
+#define SYSGROUP "SYS"
+
+
+#define PETASK "PE"
+
+//@node PVM macros, Externs, Macros and Constants, Low Level Communications Header
+//@subsection PVM macros
+
+#define sync(gp,op) do { \
+ broadcast(gp,op); \
+ pvm_barrier(gp,ALL_GROUP); \
+ } while(0)
+
+#define broadcast(gp,op) do { \
+ pvm_initsend(PvmDataDefault); \
+ pvm_bcast(gp,op); \
+ } while(0)
+
+#define checkComms(c,s) do { \
+ if ((c)<0) { \
+ pvm_perror(s); \
+ stg_exit(EXIT_FAILURE); \
+ }} while(0)
+
+#define _my_gtid pvm_mytid()
+#define GetPacket() pvm_recv(ANY_TASK,ANY_OPCODE)
+#define PacketsWaiting() (pvm_probe(ANY_TASK,ANY_OPCODE) != 0)
+
+#define SPARK_THREAD_DESCRIPTOR 1
+#define GLOBAL_THREAD_DESCRIPTOR 2
+
+#define _extract_jump_field(v) (v)
+
+#define MAX_DATA_WORDS_IN_PACKET 1024
+
+/* basic PVM packing */
+#define PutArg1(a) pvm_pklong((long *)&(a),1,1)
+#define PutArg2(a) pvm_pklong((long *)&(a),1,1)
+#define PutArgN(n,a) pvm_pklong((long *)&(a),1,1)
+#define PutArgs(b,n) pvm_pklong((long *)b,n,1)
+
+#define PutLit(l) { int a = l; PutArgN(?,a); }
+
+/* basic PVM unpacking */
+#define GetArg1(a) pvm_upklong((long *)&(a),1,1)
+#define GetArg2(a) pvm_upklong((long *)&(a),1,1)
+#define GetArgN(n,a) pvm_upklong((long *)&(a),1,1)
+#define GetArgs(b,n) pvm_upklong((long *)b,n,1)
+
+//@node Externs, , PVM macros, Low Level Communications Header
+//@subsection Externs
+
+/* basic message passing routines */
+extern void sendOp (OpCode,GlobalTaskId),
+ sendOp1 (OpCode,GlobalTaskId,StgWord),
+ sendOp2 (OpCode,GlobalTaskId,StgWord,StgWord),
+ sendOpV (OpCode,GlobalTaskId,int,...),
+ sendOpN (OpCode,GlobalTaskId,int,StgPtr),
+ sendOpNV (OpCode,GlobalTaskId,int,StgPtr,int,...);
+
+extern void broadcastOpN(OpCode op, char *group, int n, StgPtr args);
+
+/* extracting data out of a packet */
+OpCode getOpcode (rtsPacket p);
+void getOpcodeAndSender (rtsPacket p, OpCode *popcode,
+ GlobalTaskId *psender_id);
+GlobalTaskId senderTask (rtsPacket p);
+rtsPacket waitForPEOp(OpCode op, GlobalTaskId who, void(*processUnexpected)(rtsPacket) );
+
+/* Init and shutdown routines */
+void startUpPE (void);
+void shutDownPE(void);
+int getExitCode(int nbytes, GlobalTaskId *sender_idp);
+
+/* aux functions */
+char *getOpName (unsigned op); // returns string of opcode
+void processUnexpectedMessage (rtsPacket);
+//void NullException(void);
+
+#endif /*PAR */
+#endif /*defined __LLC_H */
diff --git a/rts/parallel/LLComms.c b/rts/parallel/LLComms.c
new file mode 100644
index 0000000000..baa6dddf0c
--- /dev/null
+++ b/rts/parallel/LLComms.c
@@ -0,0 +1,489 @@
+/* ----------------------------------------------------------------------------
+ * Time-stamp: <Mon Mar 19 2001 22:10:38 Stardate: [-30]6354.62 hwloidl>
+ *
+ * GUM Low-Level Inter-Task Communication
+ *
+ * This module defines PVM Routines for PE-PE communication.
+ *
+ * P. Trinder, December 5th. 1994.
+ * P. Trinder, July 1998
+ * H-W. Loidl, November 1999 -
+ --------------------------------------------------------------------------- */
+
+#ifdef PAR /* whole file */
+
+//@node GUM Low-Level Inter-Task Communication, , ,
+//@section GUM Low-Level Inter-Task Communication
+
+/*
+ *This module defines the routines which communicate between PEs. The
+ *code is based on Kevin Hammond's GRIP RTS. (OpCodes.h defines
+ *PEOp1 etc. in terms of sendOp1 etc.).
+ *
+ *Routine & Arguments
+ * &
+ *sendOp & 0 \\
+ *sendOp1 & 1 \\
+ *sendOp2 & 2 \\
+ *sendOpN & vector \\
+ *sendOpV & variable \\
+ *sendOpNV & variable+ vector \\
+ *
+ *First the standard include files.
+ */
+
+//@menu
+//* Macros etc::
+//* Includes::
+//* Auxiliary functions::
+//* Index::
+//@end menu
+
+//@node Macros etc, Includes, GUM Low-Level Inter-Task Communication, GUM Low-Level Inter-Task Communication
+//@subsection Macros etc
+
+/* Evidently not Posix */
+/* #include "PosixSource.h" */
+
+#define UNUSED /* nothing */
+
+//@node Includes, Auxiliary functions, Macros etc, GUM Low-Level Inter-Task Communication
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "Parallel.h"
+#include "ParallelRts.h"
+#if defined(DEBUG)
+# include "ParallelDebug.h"
+#endif
+#include "LLC.h"
+
+#ifdef __STDC__
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+/* Cannot use std macro when compiling for SysMan */
+/* debugging enabled */
+// #define IF_PAR_DEBUG(c,s) { s; }
+/* debugging disabled */
+#define IF_PAR_DEBUG(c,s) /* nothing */
+
+//@node Auxiliary functions, Index, Includes, GUM Low-Level Inter-Task Communication
+//@subsection Auxiliary functions
+
+/*
+ * heapChkCounter tracks the number of heap checks since the last probe.
+ * Not currently used! We check for messages when a thread is resheduled.
+ */
+int heapChkCounter = 0;
+
+/*
+ * Then some miscellaneous functions.
+ * getOpName returns the character-string name of any OpCode.
+ */
+
+char *UserPEOpNames[] = { PEOP_NAMES };
+
+//@cindex getOpName
+char *
+getOpName(nat op)
+{
+ if (op >= MIN_PEOPS && op <= MAX_PEOPS)
+ return (UserPEOpNames[op - MIN_PEOPS]);
+ else
+ return ("Unknown PE OpCode");
+}
+
+/*
+ * traceSendOp handles the tracing of messages.
+ */
+
+//@cindex traceSendOp
+static void
+traceSendOp(OpCode op, GlobalTaskId dest UNUSED,
+ unsigned int data1 UNUSED, unsigned int data2 UNUSED)
+{
+ char *OpName;
+
+ OpName = getOpName(op);
+ IF_PAR_DEBUG(trace,
+ fprintf(stderr," %s [%x,%x] sent from %x to %x",
+ OpName, data1, data2, mytid, dest));
+}
+
+/*
+ * sendOp sends a 0-argument message with OpCode {\em op} to
+ * the global task {\em task}.
+ */
+
+//@cindex sendOp
+void
+sendOp(OpCode op, GlobalTaskId task)
+{
+ traceSendOp(op, task,0,0);
+
+ pvm_initsend(PvmDataRaw);
+ pvm_send(task, op);
+}
+
+/*
+ * sendOp1 sends a 1-argument message with OpCode {\em op}
+ * to the global task {\em task}.
+ */
+
+//@cindex sendOp1
+void
+sendOp1(OpCode op, GlobalTaskId task, StgWord arg1)
+{
+ traceSendOp(op, task, arg1,0);
+
+ pvm_initsend(PvmDataRaw);
+ PutArg1(arg1);
+ pvm_send(task, op);
+}
+
+
+/*
+ * sendOp2 is used by the FP code only.
+ */
+
+//@cindex sendOp2
+void
+sendOp2(OpCode op, GlobalTaskId task, StgWord arg1, StgWord arg2)
+{
+ traceSendOp(op, task, arg1, arg2);
+
+ pvm_initsend(PvmDataRaw);
+ PutArg1(arg1);
+ PutArg2(arg2);
+ pvm_send(task, op);
+}
+
+/*
+ *
+ * sendOpV takes a variable number of arguments, as specified by {\em n}.
+ * For example,
+ *
+ * sendOpV( PP_STATS, StatsTask, 3, start_time, stop_time, sparkcount);
+ */
+
+//@cindex sendOpV
+void
+sendOpV(OpCode op, GlobalTaskId task, int n, ...)
+{
+ va_list ap;
+ int i;
+ StgWord arg;
+
+ va_start(ap, n);
+
+ traceSendOp(op, task, 0, 0);
+
+ pvm_initsend(PvmDataRaw);
+
+ for (i = 0; i < n; ++i) {
+ arg = va_arg(ap, StgWord);
+ PutArgN(i, arg);
+ }
+ va_end(ap);
+
+ pvm_send(task, op);
+}
+
+/*
+ *
+ * sendOpNV takes a variable-size datablock, as specified by {\em
+ * nelem} and a variable number of arguments, as specified by {\em
+ * narg}. N.B. The datablock and the additional arguments are contiguous
+ * and are copied over together. For example,
+ *
+ * sendOpNV(PP_RESUME, tsoga.pe, 6, nelem, data,
+ * (W_) ga.weight, (W_) ga.loc.gc.gtid, (W_) ga.loc.gc.slot,
+ * (W_) tsoga.weight, (W_) tsoga.loc.gc.gtid, (W_) tsoga.loc.gc.slot);
+ *
+ * Important: The variable arguments must all be StgWords.
+
+ sendOpNV(_, tid, m, n, data, x1, ..., xm):
+
+ | n elems
+ +------------------------------
+ | x1 | ... | xm | n | data ....
+ +------------------------------
+ */
+
+//@cindex sendOpNV
+void
+sendOpNV(OpCode op, GlobalTaskId task, int nelem,
+ StgWord *datablock, int narg, ...)
+{
+ va_list ap;
+ int i;
+ StgWord arg;
+
+ va_start(ap, narg);
+
+ traceSendOp(op, task, 0, 0);
+ IF_PAR_DEBUG(trace,
+ fprintf(stderr,"~~ sendOpNV: op = %x (%s), task = %x, narg = %d, nelem = %d",
+ op, getOpName(op), task, narg, nelem));
+
+ pvm_initsend(PvmDataRaw);
+
+ for (i = 0; i < narg; ++i) {
+ arg = va_arg(ap, StgWord);
+ IF_PAR_DEBUG(trace,
+ fprintf(stderr,"~~ sendOpNV: arg = %d\n",arg));
+ PutArgN(i, arg);
+ }
+ arg = (StgWord) nelem;
+ PutArgN(narg, arg);
+
+/* for (i=0; i < nelem; ++i) fprintf(stderr, "%d ",datablock[i]); */
+/* fprintf(stderr," in sendOpNV\n");*/
+
+ PutArgs(datablock, nelem);
+ va_end(ap);
+
+ pvm_send(task, op);
+}
+
+/*
+ * sendOpN take a variable size array argument, whose size is given by
+ * {\em n}. For example,
+ *
+ * sendOpN( PP_STATS, StatsTask, 3, stats_array);
+ */
+
+//@cindex sendOpN
+void
+sendOpN(OpCode op, GlobalTaskId task, int n, StgPtr args)
+{
+ long arg;
+
+ traceSendOp(op, task, 0, 0);
+
+ pvm_initsend(PvmDataRaw);
+ arg = (long) n;
+ PutArgN(0, arg);
+ PutArgs(args, n);
+ pvm_send(task, op);
+}
+
+/*
+ * broadcastOpN is as sendOpN but broadcasts to all members of a group.
+ */
+
+void
+broadcastOpN(OpCode op, char *group, int n, StgPtr args)
+{
+ long arg;
+
+ //traceSendOp(op, task, 0, 0);
+
+ pvm_initsend(PvmDataRaw);
+ arg = (long) n;
+ PutArgN(0, arg);
+ PutArgs(args, n);
+ pvm_bcast(group, op);
+}
+
+/*
+ waitForPEOp waits for a packet from global task who with the
+ OpCode op. If ignore is true all other messages are simply ignored;
+ otherwise they are handled by processUnexpected.
+ */
+//@cindex waitForPEOp
+rtsPacket
+waitForPEOp(OpCode op, GlobalTaskId who, void(*processUnexpected)(rtsPacket) )
+{
+ rtsPacket p;
+ int nbytes;
+ OpCode opCode;
+ GlobalTaskId sender_id;
+ rtsBool match;
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"~~ waitForPEOp: expecting op = %x (%s), who = [%x]\n",
+ op, getOpName(op), who));
+
+ do {
+ while((p = pvm_recv(ANY_TASK,ANY_OPCODE)) < 0)
+ pvm_perror("waitForPEOp: Waiting for PEOp");
+
+ pvm_bufinfo( p, &nbytes, &opCode, &sender_id );
+ match = (op == ANY_OPCODE || op == opCode) &&
+ (who == ANY_TASK || who == sender_id);
+
+ if (match) {
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,
+ "~~waitForPEOp: Qapla! received: OpCode = %#x (%s), sender_id = [%x]",
+ opCode, getOpName(opCode), sender_id));
+
+ return(p);
+ }
+
+ /* Handle the unexpected OpCodes */
+ if (processUnexpected!=NULL) {
+ (*processUnexpected)(p);
+ } else {
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,
+ "~~ waitForPEOp: ignoring OpCode = %#x (%s), sender_id = [%x]",
+ opCode, getOpName(opCode), sender_id));
+ }
+
+ } while(rtsTrue);
+}
+
+/*
+ processUnexpected processes unexpected messages. If the message is a
+ FINISH it exits the prgram, and PVM gracefully
+ */
+//@cindex processUnexpectedMessage
+void
+processUnexpectedMessage(rtsPacket packet) {
+ OpCode opCode = getOpcode(packet);
+
+ IF_PAR_DEBUG(verbose,
+ GlobalTaskId sender = senderTask(packet);
+ fprintf(stderr,"~~ [%x] processUnexpected: Received %x (%s), sender %x\n",
+ mytid, opCode, getOpName(opCode), sender));
+
+ switch (opCode) {
+ case PP_FINISH:
+ stg_exit(EXIT_SUCCESS);
+ break;
+
+ /* Anything we're not prepared to deal with. Note that ALL OpCodes
+ are discarded during termination -- this helps prevent bizarre
+ race conditions. */
+ default:
+ // if (!GlobalStopPending)
+ {
+ GlobalTaskId errorTask;
+ OpCode opCode;
+
+ getOpcodeAndSender(packet, &opCode, &errorTask);
+ fprintf(stderr,"== Task %x: Unexpected OpCode %x from %x in processUnexpected",
+ mytid, opCode, errorTask );
+
+ stg_exit(EXIT_FAILURE);
+ }
+ }
+}
+
+//@cindex getOpcode
+OpCode
+getOpcode(rtsPacket p)
+{
+ int nbytes;
+ OpCode OpCode;
+ GlobalTaskId sender_id;
+ /* read PVM buffer */
+ pvm_bufinfo(p, &nbytes, &OpCode, &sender_id);
+ /* return tag of the buffer as opcode */
+ return(OpCode);
+}
+
+//@cindex getOpcodeAndSender
+void
+getOpcodeAndSender(rtsPacket p, OpCode *opCodep, GlobalTaskId *senderIdp)
+{
+ int nbytes;
+ /* read PVM buffer */
+ pvm_bufinfo(p, &nbytes, opCodep, senderIdp);
+}
+
+//@cindex senderTask
+GlobalTaskId
+senderTask(rtsPacket p)
+{
+ int nbytes;
+ OpCode opCode;
+ GlobalTaskId sender_id;
+ /* read PVM buffer */
+ pvm_bufinfo(p, &nbytes, &opCode, &sender_id);
+ return(sender_id);
+}
+
+/*
+ * startUpPE does the low-level comms specific startup stuff for a
+ * PE. It initialises the comms system, joins the appropriate groups
+ * allocates the PE buffer
+ */
+
+//@cindex startUpPE
+void
+startUpPE(void)
+{
+ mytid = _my_gtid; /* Initialise PVM and get task id into global var.*/
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] PEStartup: Task id = [%x], No. PEs = %d \n",
+ mytid, mytid, nPEs));
+ checkComms(pvm_joingroup(PEGROUP), "PEStartup");
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] PEStartup: Joined PEGROUP\n", mytid));
+}
+
+/*
+ * PEShutdown does the low-level comms-specific shutdown stuff for a
+ * single PE. It leaves the groups and then exits from pvm.
+ */
+//@cindex shutDownPE
+void
+shutDownPE(void)
+{
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, "== [%x] PEshutdown\n", mytid));
+
+ checkComms(pvm_lvgroup(PEGROUP),"PEShutDown");
+ checkComms(pvm_exit(),"PEShutDown");
+}
+
+/*
+ Extract the exit code out of a PP_FINISH packet (used in SysMan)
+*/
+int
+getExitCode(int nbytes, GlobalTaskId *sender_idp) {
+ int exitCode=0;
+
+ if (nbytes==4) { // Notification from a task doing pvm_exit
+ GetArgs(sender_idp,1); // Presumably this must be MainPE Id
+ exitCode = -1;
+ } else if (nbytes==8) { // Doing a controlled shutdown
+ GetArgs(&exitCode,1); // HACK: controlled shutdown == 2 values
+ GetArgs(&exitCode,1);
+ } else {
+ exitCode = -2; // everything else
+ }
+ return exitCode;
+}
+
+#endif /* PAR -- whole file */
+
+//@node Index, , Auxiliary functions, GUM Low-Level Inter-Task Communication
+//@subsection Index
+
+//@index
+//* getOpName:: @cindex\s-+getOpName
+//* traceSendOp:: @cindex\s-+traceSendOp
+//* sendOp:: @cindex\s-+sendOp
+//* sendOp1:: @cindex\s-+sendOp1
+//* sendOp2:: @cindex\s-+sendOp2
+//* sendOpV:: @cindex\s-+sendOpV
+//* sendOpNV:: @cindex\s-+sendOpNV
+//* sendOpN:: @cindex\s-+sendOpN
+//* waitForPEOp:: @cindex\s-+waitForPEOp
+//* processUnexpectedMessage:: @cindex\s-+processUnexpectedMessage
+//* getOpcode:: @cindex\s-+getOpcode
+//* getOpcodeAndSender:: @cindex\s-+getOpcodeAndSender
+//* senderTask:: @cindex\s-+senderTask
+//* startUpPE:: @cindex\s-+startUpPE
+//* shutDownPE:: @cindex\s-+shutDownPE
+//@end index
diff --git a/rts/parallel/PEOpCodes.h b/rts/parallel/PEOpCodes.h
new file mode 100644
index 0000000000..2d18b439f2
--- /dev/null
+++ b/rts/parallel/PEOpCodes.h
@@ -0,0 +1,58 @@
+#ifndef PEOPCODES_H
+#define PEOPCODES_H
+
+/************************************************************************
+* PEOpCodes.h *
+* *
+* This file contains definitions for all the GUM PE Opcodes *
+* It's based on the GRAPH for PVM version *
+* Phil Trinder, Glasgow University 8th December 1994 *
+* *
+ RFPointon, December 1999
+ - removed PP_SYSMAN_TID, introduced PP_READY
+ - removed PP_MAIN_TASK, introduced PP_NEWPE
+ - added PP_REVAL
+************************************************************************/
+
+#define REPLY_OK 0x00
+
+/*Startup + Shutdown*/
+#define PP_READY 0x50 /* sent PEs -> SysMan */
+#define PP_NEWPE 0x51 /* sent via newHost notify -> SysMan */
+#define PP_FINISH 0x52 /* sent PEs & via taskExit notfiy -> SysMan */
+#define PP_PETIDS 0x53 /* sent sysman -> PEs */
+
+/* Stats stuff */
+#define PP_STATS 0x54
+#define PP_STATS_ON 0x55
+#define PP_STATS_OFF 0x56
+
+//#define PP_FAIL 0x57
+
+/*Garbage Collection*/
+#define PP_GC_INIT 0x58
+#define PP_FULL_SYSTEM 0x59
+#define PP_GC_POLL 0x5a
+
+/*GUM Messages*/
+#define PP_FETCH 0x5b
+#define PP_RESUME 0x5c
+#define PP_ACK 0x5d
+#define PP_FISH 0x5e
+#define PP_SCHEDULE 0x5f
+#define PP_FREE 0x60
+#define PP_REVAL 0x61
+
+
+#define MIN_PEOPS 0x50
+#define MAX_PEOPS 0x61
+
+#define PEOP_NAMES "Ready", "NewPE", \
+ "Finish", "PETIDS", \
+ "Stats", "Stats_On", "Stats_Off", \
+ "Fail", \
+ "GCInit", "FullSystem", "GCPoll", \
+ "Fetch","Resume","ACK","Fish","Schedule", \
+ "Free","REval"
+
+#endif /* PEOPCODES_H */
diff --git a/rts/parallel/Pack.c b/rts/parallel/Pack.c
new file mode 100644
index 0000000000..e8653f6303
--- /dev/null
+++ b/rts/parallel/Pack.c
@@ -0,0 +1,4293 @@
+/*
+ Time-stamp: <Wed Mar 21 2001 16:32:47 Stardate: [-30]6363.44 hwloidl>
+
+ Graph packing and unpacking code for sending it to another processor
+ and retrieving the original graph structure from the packet.
+ In the old RTS the code was split into Pack.c and Unpack.c (now deceased)
+ Used in GUM and GrAnSim.
+
+ The GrAnSim version of the code defines routines for *simulating* the
+ packing of closures in the same way it is done in the parallel runtime
+ system. Basically GrAnSim only puts the addresses of the closures to be
+ transferred into a buffer. This buffer will then be associated with the
+ event of transferring the graph. When this event is scheduled, the
+ @UnpackGraph@ routine is called and the buffer can be discarded
+ afterwards.
+
+ Note that in GranSim we need many buffers, not just one per PE.
+*/
+
+//@node Graph packing, , ,
+//@section Graph packing
+
+#if defined(PAR) || defined(GRAN) /* whole file */
+
+//@menu
+//* Includes::
+//* Prototypes::
+//* Global variables::
+//* ADT of Closure Queues::
+//* Initialisation for packing::
+//* Packing Functions::
+//* Low level packing routines::
+//* Unpacking routines::
+//* Aux fcts for packing::
+//* Printing Packet Contents::
+//* End of file::
+//@end menu
+//*/
+
+//@node Includes, Prototypes, Graph packing, Graph packing
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "ClosureTypes.h"
+#include "Storage.h"
+#include "Hash.h"
+#include "Parallel.h"
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+# if defined(DEBUG)
+# include "Sanity.h"
+# include "Printer.h"
+# include "ParallelDebug.h"
+# endif
+#include "FetchMe.h"
+
+/* Which RTS flag should be used to get the size of the pack buffer ? */
+# if defined(PAR)
+# define RTS_PACK_BUFFER_SIZE RtsFlags.ParFlags.packBufferSize
+# else /* GRAN */
+# define RTS_PACK_BUFFER_SIZE RtsFlags.GranFlags.packBufferSize
+# endif
+
+//@node Prototypes, Global variables, Includes, Graph packing
+//@subsection Prototypes
+/*
+ Code declarations.
+*/
+
+//@node ADT of closure queues, Init for packing, Prototypes, Prototypes
+//@subsubsection ADT of closure queues
+
+static inline void InitClosureQueue(void);
+static inline rtsBool QueueEmpty(void);
+static inline void QueueClosure(StgClosure *closure);
+static inline StgClosure *DeQueueClosure(void);
+
+//@node Init for packing, Packing routines, ADT of closure queues, Prototypes
+//@subsubsection Init for packing
+
+static void InitPacking(rtsBool unpack);
+# if defined(PAR)
+rtsBool InitPackBuffer(void);
+# elif defined(GRAN)
+rtsPackBuffer *InstantiatePackBuffer (void);
+static void reallocPackBuffer (void);
+# endif
+
+//@node Packing routines, Low level packing fcts, Init for packing, Prototypes
+//@subsubsection Packing routines
+
+static void PackClosure (StgClosure *closure);
+
+//@node Low level packing fcts, Unpacking routines, Packing routines, Prototypes
+//@subsubsection Low level packing fcts
+
+# if defined(GRAN)
+static void Pack (StgClosure *data);
+# else
+static void Pack (StgWord data);
+
+static void PackGeneric(StgClosure *closure);
+static void PackArray(StgClosure *closure);
+static void PackPLC (StgPtr addr);
+static void PackOffset (int offset);
+static void PackPAP(StgPAP *pap);
+static rtsPackBuffer *PackTSO(StgTSO *tso, nat *packBufferSize);
+static rtsPackBuffer *PackStkO(StgPtr stko, nat *packBufferSize);
+static void PackFetchMe(StgClosure *closure);
+
+static void GlobaliseAndPackGA (StgClosure *closure);
+# endif
+
+//@node Unpacking routines, Aux fcts for packing, Low level packing fcts, Prototypes
+//@subsubsection Unpacking routines
+
+# if defined(PAR)
+void InitPendingGABuffer(nat size);
+void CommonUp(StgClosure *src, StgClosure *dst);
+static StgClosure *SetGAandCommonUp(globalAddr *gaP, StgClosure *closure,
+ rtsBool hasGA);
+static nat FillInClosure(StgWord ***bufptrP, StgClosure *graph);
+static void LocateNextParent(StgClosure **parentP,
+ nat *pptrP, nat *pptrsP, nat *sizeP);
+StgClosure *UnpackGraph(rtsPackBuffer *packBuffer,
+ globalAddr **gamap,
+ nat *nGAs);
+static StgClosure *UnpackClosure (StgWord ***bufptrP, StgClosure **graphP,
+ globalAddr *ga);
+static StgWord **UnpackGA(StgWord **bufptr, globalAddr *ga);
+static StgClosure *UnpackOffset(globalAddr *ga);
+static StgClosure *UnpackPLC(globalAddr *ga);
+static void UnpackArray(StgWord ***bufptrP, StgClosure *graph);
+static nat UnpackPAP(StgWord ***bufptrP, StgClosure *graph);
+
+# elif defined(GRAN)
+void CommonUp(StgClosure *src, StgClosure *dst);
+StgClosure *UnpackGraph(rtsPackBuffer* buffer);
+#endif
+
+//@node Aux fcts for packing, , Unpacking routines, Prototypes
+//@subsubsection Aux fcts for packing
+
+# if defined(PAR)
+static void DonePacking(void);
+static void AmPacking(StgClosure *closure);
+static int OffsetFor(StgClosure *closure);
+static rtsBool NotYetPacking(int offset);
+static inline rtsBool RoomToPack (nat size, nat ptrs);
+static inline rtsBool isOffset(globalAddr *ga);
+static inline rtsBool isFixed(globalAddr *ga);
+static inline rtsBool isConstr(globalAddr *ga);
+static inline rtsBool isUnglobalised(globalAddr *ga);
+# elif defined(GRAN)
+static void DonePacking(void);
+static rtsBool NotYetPacking(StgClosure *closure);
+# endif
+
+//@node Global variables, ADT of Closure Queues, Prototypes, Graph packing
+//@subsection Global variables
+/*
+ Static data declarations
+*/
+
+static nat pack_locn, /* ptr to first free loc in pack buffer */
+ clq_size, clq_pos,
+ buf_id = 1; /* identifier for buffer */
+static nat unpacked_size;
+static rtsBool roomInBuffer;
+#if defined(PAR)
+static GlobalTaskId dest_gtid=0; /* destination for message to send */
+#endif
+
+/*
+ The pack buffer
+ To be pedantic: in GrAnSim we're packing *addresses* of closures,
+ not the closures themselves.
+*/
+static rtsPackBuffer *globalPackBuffer = NULL, /* for packing a graph */
+ *globalUnpackBuffer = NULL; /* for unpacking a graph */
+
+
+/*
+ Bit of a hack for testing if a closure is the root of the graph. This is
+ set in @PackNearbyGraph@ and tested in @PackClosure@.
+*/
+
+static nat packed_thunks = 0;
+static StgClosure *graph_root;
+
+# if defined(PAR)
+/*
+ The offset hash table is used during packing to record the location in
+ the pack buffer of each closure which is packed.
+*/
+//@cindex offsetTable
+static HashTable *offsetTable;
+
+//@cindex PendingGABuffer
+static globalAddr *PendingGABuffer, *gaga;
+
+# endif /* PAR */
+
+
+//@node ADT of Closure Queues, Initialisation for packing, Global variables, Graph packing
+//@subsection ADT of Closure Queues
+
+//@menu
+//* Closure Queues::
+//* Init routines::
+//* Basic routines::
+//@end menu
+
+//@node Closure Queues, Init routines, ADT of Closure Queues, ADT of Closure Queues
+//@subsubsection Closure Queues
+/*
+ Closure Queues
+
+ These routines manage the closure queue.
+*/
+
+static nat clq_pos, clq_size;
+
+static StgClosure **ClosureQueue = NULL; /* HWL: init in main */
+
+#if defined(DEBUG)
+static char graphFingerPrint[MAX_FINGER_PRINT_LEN];
+#endif
+
+//@node Init routines, Basic routines, Closure Queues, ADT of Closure Queues
+//@subsubsection Init routines
+
+/* @InitClosureQueue@ allocates and initialises the closure queue. */
+
+//@cindex InitClosureQueue
+static inline void
+InitClosureQueue(void)
+{
+ clq_pos = clq_size = 0;
+
+ if (ClosureQueue==NULL)
+ ClosureQueue = (StgClosure**) stgMallocWords(RTS_PACK_BUFFER_SIZE,
+ "InitClosureQueue");
+}
+
+//@node Basic routines, Types of Global Addresses, Init routines, ADT of Closure Queues
+//@subsubsection Basic routines
+
+/*
+ QueueEmpty returns rtsTrue if the closure queue is empty; rtsFalse otherwise.
+*/
+
+//@cindex QueueEmpty
+static inline rtsBool
+QueueEmpty(void)
+{
+ return(clq_pos >= clq_size);
+}
+
+/* QueueClosure adds its argument to the closure queue. */
+
+//@cindex QueueClosure
+static inline void
+QueueClosure(closure)
+StgClosure *closure;
+{
+ if(clq_size < RTS_PACK_BUFFER_SIZE ) {
+ IF_PAR_DEBUG(paranoia,
+ belch(">__> <<%d>> Q: %p (%s); %d elems in q",
+ globalPackBuffer->id, closure, info_type(closure), clq_size-clq_pos));
+ ClosureQueue[clq_size++] = closure;
+ } else {
+ barf("Closure Queue Overflow (EnQueueing %p (%s))",
+ closure, info_type(closure));
+ }
+}
+
+/* DeQueueClosure returns the head of the closure queue. */
+
+//@cindex DeQueueClosure
+static inline StgClosure *
+DeQueueClosure(void)
+{
+ if(!QueueEmpty()) {
+ IF_PAR_DEBUG(paranoia,
+ belch(">__> <<%d>> DeQ: %p (%s); %d elems in q",
+ globalPackBuffer->id, ClosureQueue[clq_pos], info_type(ClosureQueue[clq_pos]),
+ clq_size-clq_pos));
+ return(ClosureQueue[clq_pos++]);
+ } else {
+ return((StgClosure*)NULL);
+ }
+}
+
+/* DeQueueClosure returns the head of the closure queue. */
+
+#if defined(DEBUG)
+//@cindex PrintQueueClosure
+static void
+PrintQueueClosure(void)
+{
+ nat i;
+
+ fputs("Closure queue:", stderr);
+ for (i=clq_pos; i < clq_size; i++)
+ fprintf(stderr, "%p (%s), ",
+ (StgClosure *)ClosureQueue[clq_pos++],
+ info_type(ClosureQueue[clq_pos++]));
+ fputc('\n', stderr);
+}
+#endif
+
+//@node Types of Global Addresses, , Basic routines, ADT of Closure Queues
+//@subsubsection Types of Global Addresses
+
+/*
+ Types of Global Addresses
+
+ These routines determine whether a GA is one of a number of special types
+ of GA.
+*/
+
+# if defined(PAR)
+//@cindex isOffset
+static inline rtsBool
+isOffset(globalAddr *ga)
+{
+ return (ga->weight == 1U && ga->payload.gc.gtid == (GlobalTaskId)0);
+}
+
+//@cindex isFixed
+static inline rtsBool
+isFixed(globalAddr *ga)
+{
+ return (ga->weight == 0U);
+}
+
+//@cindex isConstr
+static inline rtsBool
+isConstr(globalAddr *ga)
+{
+ return (ga->weight == 2U);
+}
+
+//@cindex isUnglobalised
+static inline rtsBool
+isUnglobalised(globalAddr *ga)
+{
+ return (ga->weight == 2U);
+}
+# endif
+
+//@node Initialisation for packing, Packing Functions, ADT of Closure Queues, Graph packing
+//@subsection Initialisation for packing
+/*
+ Simple Packing Routines
+
+ About packet sizes in GrAnSim: In GrAnSim we use a malloced block of
+ gransim_pack_buffer_size words to simulate a packet of pack_buffer_size
+ words. In the simulated PackBuffer we only keep the addresses of the
+ closures that would be packed in the parallel system (see Pack). To
+ decide if a packet overflow occurs pack_buffer_size must be compared
+ versus unpacked_size (see RoomToPack). Currently, there is no multi
+ packet strategy implemented, so in the case of an overflow we just stop
+ adding closures to the closure queue. If an overflow of the simulated
+ packet occurs, we just realloc some more space for it and carry on as
+ usual. -- HWL
+*/
+
+# if defined(GRAN)
+rtsPackBuffer *
+InstantiatePackBuffer (void) {
+ extern rtsPackBuffer *globalPackBuffer;
+
+ globalPackBuffer = (rtsPackBuffer *) stgMallocWords(sizeofW(rtsPackBuffer),
+ "InstantiatePackBuffer: failed to alloc packBuffer");
+ globalPackBuffer->size = RtsFlags.GranFlags.packBufferSize_internal;
+ globalPackBuffer->buffer = (StgWord **) stgMallocWords(RtsFlags.GranFlags.packBufferSize_internal,
+ "InstantiatePackBuffer: failed to alloc GranSim internal packBuffer");
+ /* NB: gransim_pack_buffer_size instead of pack_buffer_size -- HWL */
+ /* stgMallocWords is now simple allocate in Storage.c */
+
+ return (globalPackBuffer);
+}
+
+/*
+ Reallocate the GranSim internal pack buffer to make room for more closure
+ pointers. This is independent of the check for packet overflow as in GUM
+*/
+static void
+reallocPackBuffer (void) {
+
+ ASSERT(pack_locn >= (int)globalPackBuffer->size+sizeofW(rtsPackBuffer));
+
+ IF_GRAN_DEBUG(packBuffer,
+ belch("** Increasing size of PackBuffer %p to %d words (PE %u @ %d)\n",
+ globalPackBuffer, globalPackBuffer->size+REALLOC_SZ,
+ CurrentProc, CurrentTime[CurrentProc]));
+
+ globalPackBuffer = (rtsPackBuffer*)realloc(globalPackBuffer,
+ sizeof(StgClosure*)*(REALLOC_SZ +
+ (int)globalPackBuffer->size +
+ sizeofW(rtsPackBuffer))) ;
+ if (globalPackBuffer==(rtsPackBuffer*)NULL)
+ barf("Failing to realloc %d more words for PackBuffer %p (PE %u @ %d)\n",
+ REALLOC_SZ, globalPackBuffer, CurrentProc, CurrentTime[CurrentProc]);
+
+ globalPackBuffer->size += REALLOC_SZ;
+
+ ASSERT(pack_locn < globalPackBuffer->size+sizeofW(rtsPackBuffer));
+}
+# endif
+
+# if defined(PAR)
+/* @initPacking@ initialises the packing buffer etc. */
+//@cindex InitPackBuffer
+rtsBool
+InitPackBuffer(void)
+{
+ if (globalPackBuffer==(rtsPackBuffer*)NULL) {
+ if ((globalPackBuffer = (rtsPackBuffer *)
+ stgMallocWords(sizeofW(rtsPackBuffer)+RtsFlags.ParFlags.packBufferSize+DEBUG_HEADROOM,
+ "InitPackBuffer")) == NULL)
+ return rtsFalse;
+ }
+ return rtsTrue;
+}
+
+# endif
+//@cindex InitPacking
+static void
+InitPacking(rtsBool unpack)
+{
+# if defined(GRAN)
+ globalPackBuffer = InstantiatePackBuffer(); /* for GrAnSim only -- HWL */
+ /* NB: free in UnpackGraph */
+# elif defined(PAR)
+ if (unpack) {
+ /* allocate a GA-to-GA map (needed for ACK message) */
+ InitPendingGABuffer(RtsFlags.ParFlags.packBufferSize);
+ } else {
+ /* allocate memory to pack the graph into */
+ InitPackBuffer();
+ }
+# endif
+ /* init queue of closures seen during packing */
+ InitClosureQueue();
+
+ if (unpack)
+ return;
+
+ globalPackBuffer->id = buf_id++; /* buffer id are only used for debugging! */
+ pack_locn = 0; /* the index into the actual pack buffer */
+ unpacked_size = 0; /* the size of the whole graph when unpacked */
+ roomInBuffer = rtsTrue;
+ packed_thunks = 0; /* total number of thunks packed so far */
+# if defined(PAR)
+ offsetTable = allocHashTable();
+# endif
+}
+
+//@node Packing Functions, Low level packing routines, Initialisation for packing, Graph packing
+//@subsection Packing Functions
+
+//@menu
+//* Packing Sections of Nearby Graph::
+//* Packing Closures::
+//@end menu
+
+//@node Packing Sections of Nearby Graph, Packing Closures, Packing Functions, Packing Functions
+//@subsubsection Packing Sections of Nearby Graph
+/*
+ Packing Sections of Nearby Graph
+
+ @PackNearbyGraph@ packs a closure and associated graph into a static
+ buffer (@PackBuffer@). It returns the address of this buffer and the
+ size of the data packed into the buffer (in its second parameter,
+ @packBufferSize@). The associated graph is packed in a depth first
+ manner, hence it uses an explicit queue of closures to be packed rather
+ than simply using a recursive algorithm. Once the packet is full,
+ closures (other than primitive arrays) are packed as FetchMes, and their
+ children are not queued for packing. */
+
+//@cindex PackNearbyGraph
+
+/* NB: this code is shared between GranSim and GUM;
+ tso only used in GranSim */
+rtsPackBuffer *
+PackNearbyGraph(closure, tso, packBufferSize, dest)
+StgClosure* closure;
+StgTSO* tso;
+nat *packBufferSize;
+GlobalTaskId dest;
+{
+ IF_PAR_DEBUG(resume,
+ graphFingerPrint[0] = '\0');
+
+ ASSERT(RTS_PACK_BUFFER_SIZE > 0);
+ ASSERT(_HS==1); // HWL HACK; compile time constant
+
+#if defined(PAR_TICKY) // HWL HAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACK
+ PAR_TICKY_PACK_NEARBY_GRAPH_START();
+#endif
+
+ /* ToDo: check that we have enough heap for the packet
+ ngoq ngo'
+ if (Hp + PACK_HEAP_REQUIRED > HpLim)
+ return NULL;
+ */
+ InitPacking(rtsFalse);
+# if defined(PAR)
+ dest_gtid=dest; //-1 to disable
+# elif defined(GRAN)
+ graph_root = closure;
+# endif
+
+ IF_GRAN_DEBUG(pack,
+ belch(">>> Packing <<%d>> (buffer @ %p); graph root @ %p [PE %d]\n demanded by TSO %d (%p) [PE %u]",
+ globalPackBuffer->id, globalPackBuffer, closure, where_is(closure),
+ tso->id, tso, where_is((StgClosure*)tso)));
+
+ IF_GRAN_DEBUG(pack,
+ belch("** PrintGraph of %p is:", closure);
+ PrintGraph(closure,0));
+
+ IF_PAR_DEBUG(resume,
+ GraphFingerPrint(closure, graphFingerPrint);
+ ASSERT(strlen(graphFingerPrint)<=MAX_FINGER_PRINT_LEN);
+ belch(">>> Packing <<%d>> (buffer @ %p); graph root @ %p [%x]\n demanded by TSO %d (%p); Finger-print is\n {%s}",
+ globalPackBuffer->id, globalPackBuffer, closure, mytid,
+ tso->id, tso, graphFingerPrint));
+
+ IF_PAR_DEBUG(packet,
+ belch("** PrintGraph of %p is:", closure);
+ belch("** pack_locn=%d", pack_locn);
+ PrintGraph(closure,0));
+
+ QueueClosure(closure);
+ do {
+ PackClosure(DeQueueClosure());
+ } while (!QueueEmpty());
+
+# if defined(PAR)
+
+ /* Record how much space the graph needs in packet and in heap */
+ globalPackBuffer->tso = tso; // currently unused, I think (debugging?)
+ globalPackBuffer->unpacked_size = unpacked_size;
+ globalPackBuffer->size = pack_locn;
+
+ /* Check for buffer overflow (again) */
+ ASSERT(pack_locn <= RtsFlags.ParFlags.packBufferSize+DEBUG_HEADROOM);
+ IF_DEBUG(sanity, // write magic end-of-buffer word
+ globalPackBuffer->buffer[pack_locn] = END_OF_BUFFER_MARKER);
+ *packBufferSize = pack_locn;
+
+# else /* GRAN */
+
+ /* Record how much space is needed to unpack the graph */
+ // PackBuffer[PACK_FLAG_LOCN] = (P_) MAGIC_PACK_FLAG; for testing
+ globalPackBuffer->tso = tso;
+ globalPackBuffer->unpacked_size = unpacked_size;
+
+ // ASSERT(pack_locn <= PackBuffer[PACK_SIZE_LOCN]+PACK_HDR_SIZE);
+ /* ToDo: Print an earlier, more meaningful message */
+ if (pack_locn==0) /* i.e. packet is empty */
+ barf("EMPTY PACKET! Can't transfer closure %p at all!!\n",
+ closure);
+ globalPackBuffer->size = pack_locn;
+ *packBufferSize = pack_locn;
+
+# endif
+
+ DonePacking(); /* {GrAnSim}vaD 'ut'Ha' */
+
+# if defined(GRAN)
+ IF_GRAN_DEBUG(pack ,
+ belch("** Finished <<%d>> packing graph %p; closures packed: %d; thunks packed: %d; size of graph: %d",
+ globalPackBuffer->id, closure, globalPackBuffer->size, packed_thunks, globalPackBuffer->unpacked_size));
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.tot_packets++;
+ globalGranStats.tot_packet_size += pack_locn;
+ }
+
+ IF_GRAN_DEBUG(pack, PrintPacket(globalPackBuffer));
+# elif defined(PAR)
+ IF_PAR_DEBUG(packet,
+ belch("** Finished <<%d>> packing graph %p (%s); closures packed: %d; thunks packed: %d; size of graph: %d",
+ globalPackBuffer->id, closure, info_type(closure),
+ globalPackBuffer->size, packed_thunks,
+ globalPackBuffer->unpacked_size));;
+
+ IF_DEBUG(sanity, // do a sanity check on the packet just constructed
+ checkPacket(globalPackBuffer));
+# endif /* GRAN */
+
+#if defined(PAR_TICKY) // HWL HAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACK
+ PAR_TICKY_PACK_NEARBY_GRAPH_END(globalPackBuffer->size, packed_thunks);
+#endif
+
+ return (globalPackBuffer);
+}
+
+//@cindex PackOneNode
+
+# if defined(GRAN)
+/* This version is used when the node is already local */
+
+rtsPackBuffer *
+PackOneNode(closure, tso, packBufferSize)
+StgClosure* closure;
+StgTSO* tso;
+nat *packBufferSize;
+{
+ extern rtsPackBuffer *globalPackBuffer;
+ int i, clpack_locn;
+
+ InitPacking(rtsFalse);
+
+ IF_GRAN_DEBUG(pack,
+ belch("** PackOneNode: %p (%s)[PE %d] requested by TSO %d (%p) [PE %d]",
+ closure, info_type(closure),
+ where_is(closure), tso->id, tso, where_is((StgClosure *)tso)));
+
+ Pack(closure);
+
+ /* Record how much space is needed to unpack the graph */
+ globalPackBuffer->tso = tso;
+ globalPackBuffer->unpacked_size = unpacked_size;
+
+ /* Set the size parameter */
+ ASSERT(pack_locn <= RTS_PACK_BUFFER_SIZE);
+ globalPackBuffer->size = pack_locn;
+ *packBufferSize = pack_locn;
+
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ globalGranStats.tot_packets++;
+ globalGranStats.tot_packet_size += pack_locn;
+ }
+ IF_GRAN_DEBUG(pack,
+ PrintPacket(globalPackBuffer));
+
+ return (globalPackBuffer);
+}
+# endif /* GRAN */
+
+#if defined(GRAN)
+
+/*
+ PackTSO and PackStkO are entry points for two special kinds of closure
+ which are used in the parallel RTS. Compared with other closures they
+ are rather awkward to pack because they don't follow the normal closure
+ layout (where all pointers occur before all non-pointers). Luckily,
+ they're only needed when migrating threads between processors. */
+
+//@cindex PackTSO
+rtsPackBuffer*
+PackTSO(tso, packBufferSize)
+StgTSO *tso;
+nat *packBufferSize;
+{
+ extern rtsPackBuffer *globalPackBuffer;
+ IF_GRAN_DEBUG(pack,
+ belch("** Packing TSO %d (%p)", tso->id, tso));
+ *packBufferSize = 0;
+ // PackBuffer[0] = PackBuffer[1] = 0; ???
+ return(globalPackBuffer);
+}
+
+//@cindex PackStkO
+static rtsPackBuffer*
+PackStkO(stko, packBufferSize)
+StgPtr stko;
+nat *packBufferSize;
+{
+ extern rtsPackBuffer *globalPackBuffer;
+ IF_GRAN_DEBUG(pack,
+ belch("** Packing STKO %p", stko));
+ *packBufferSize = 0;
+ // PackBuffer[0] = PackBuffer[1] = 0;
+ return(globalPackBuffer);
+}
+
+static void
+PackFetchMe(StgClosure *closure)
+{
+ barf("{PackFetchMe}Daq Qagh: no FetchMe closures in GRAN!");
+}
+
+#elif defined(PAR)
+
+static rtsPackBuffer*
+PackTSO(tso, packBufferSize)
+StgTSO *tso;
+nat *packBufferSize;
+{
+ barf("{PackTSO}Daq Qagh: trying to pack a TSO %d (%p) of size %d; thread migrations not supported, yet",
+ tso->id, tso, packBufferSize);
+}
+
+rtsPackBuffer*
+PackStkO(stko, packBufferSize)
+StgPtr stko;
+nat *packBufferSize;
+{
+ barf("{PackStkO}Daq Qagh: trying to pack a STKO (%p) of size %d; thread migrations not supported, yet",
+ stko, packBufferSize);
+}
+
+//@cindex PackFetchMe
+static void
+PackFetchMe(StgClosure *closure)
+{
+ StgInfoTable *ip;
+ nat i;
+ int offset;
+#if defined(DEBUG)
+ nat x = pack_locn;
+#endif
+
+#if defined(GRAN)
+ barf("{PackFetchMe}Daq Qagh: no FetchMe closures in GRAN!");
+#else
+ offset = OffsetFor(closure);
+ if (!NotYetPacking(offset)) {
+ IF_PAR_DEBUG(pack,
+ belch("*>.. Packing FETCH_ME for closure %p (s) as offset to %d",
+ closure, info_type(closure), offset));
+ PackOffset(offset);
+ // unpacked_size += 0; // unpacked_size unchanged (closure is shared!!)
+ return;
+ }
+
+ /* Need a GA even when packing a constructed FETCH_ME (cruel world!) */
+ AmPacking(closure);
+ /* FMs must be always globalised */
+ GlobaliseAndPackGA(closure);
+
+ IF_PAR_DEBUG(pack,
+ belch("*>.. Packing FETCH_ME for closure %p (%s) with GA: ((%x, %d, %x))",
+ closure, info_type(closure),
+ globalPackBuffer->buffer[pack_locn-2],
+ globalPackBuffer->buffer[pack_locn-1],
+ globalPackBuffer->buffer[pack_locn-3]));
+
+ /* Pack a FetchMe closure instead of closure */
+ ip = &stg_FETCH_ME_info;
+ /* this assumes that the info ptr is always the first word in a closure*/
+ Pack((StgWord)ip);
+ for (i = 1; i < _HS; ++i) // pack rest of fixed header
+ Pack((StgWord)*(((StgPtr)closure)+i));
+
+ unpacked_size += sizeofW(StgFetchMe);
+ /* size of FETCHME in packed is the same as that constant */
+ ASSERT(pack_locn-x==PACK_FETCHME_SIZE);
+ /* In the pack buffer the pointer to a GA (in the FetchMe closure)
+ is expanded to the full GA; this is a compile-time const */
+ //ASSERT(PACK_FETCHME_SIZE == sizeofW(StgFetchMe)-1+PACK_GA_SIZE);
+#endif
+}
+
+#endif
+
+#ifdef DIST
+static void
+PackRemoteRef(StgClosure *closure)
+{
+ StgInfoTable *ip;
+ nat i;
+ int offset;
+
+ offset = OffsetFor(closure);
+ if (!NotYetPacking(offset)) {
+ PackOffset(offset);
+ unpacked_size += 2;
+ return;
+ }
+
+ /* Need a GA even when packing a constructed REMOTE_REF (cruel world!) */
+ AmPacking(closure);
+
+ /* basically we just Globalise, but for sticky things we can't have multiple GAs,
+ so we must prevent the GAs being split.
+
+ In returning things to the true sticky owner, this case is already handled, but for
+ anything else we just give up at the moment... This needs to be fixed!
+ */
+ { globalAddr *ga;
+ ga = LAGAlookup(closure); // surely this ga must exist?
+
+ // ***************************************************************************
+ // ***************************************************************************
+ // REMOTE_REF HACK - dual is in SetGAandCommonUp
+ // - prevents the weight from ever reaching zero
+ if(ga != NULL)
+ ga->weight=0x06660666; //anything apart from 0 really...
+ // ***************************************************************************
+ // ***************************************************************************
+
+ if((ga != NULL)&&(ga->weight / 2 <= 2))
+ barf("Cant split the weight any further when packing REMOTE_REF for closure %p (%s) with GA: ((%x, %d, %x))",
+ closure, info_type(closure),
+ ga->payload.gc.gtid, ga->payload.gc.slot, ga->weight);
+ }
+ GlobaliseAndPackGA(closure);
+
+ IF_PAR_DEBUG(pack,
+ belch("*>.. Packing REMOTE_REF for closure %p (%s) with GA: ((%x, %d, %x))",
+ closure, info_type(closure),
+ globalPackBuffer->buffer[pack_locn-2],
+ globalPackBuffer->buffer[pack_locn-1],
+ globalPackBuffer->buffer[pack_locn-3]));
+
+ /* Pack a REMOTE_REF closure instead of closure */
+ ip = &stg_REMOTE_REF_info;
+ /* this assumes that the info ptr is always the first word in a closure*/
+ Pack((StgWord)ip);
+ for (i = 1; i < _HS; ++i) // pack rest of fixed header
+ Pack((StgWord)*(((StgPtr)closure)+i));
+
+ unpacked_size += PACK_FETCHME_SIZE;
+}
+#endif /* DIST */
+
+//@node Packing Closures, , Packing Sections of Nearby Graph, Packing Functions
+//@subsubsection Packing Closures
+/*
+ Packing Closures
+
+ @PackClosure@ is the heart of the normal packing code. It packs a single
+ closure into the pack buffer, skipping over any indirections and
+ globalising it as necessary, queues any child pointers for further
+ packing, and turns it into a @FetchMe@ or revertible black hole (@RBH@)
+ locally if it was a thunk. Before the actual closure is packed, a
+ suitable global address (GA) is inserted in the pack buffer. There is
+ always room to pack a fetch-me to the closure (guaranteed by the
+ RoomToPack calculation), and this is packed if there is no room for the
+ entire closure.
+
+ Space is allocated for any primitive array children of a closure, and
+ hence a primitive array can always be packed along with it's parent
+ closure. */
+
+//@cindex PackClosure
+
+# if defined(PAR)
+
+void
+PackClosure(closure)
+StgClosure *closure;
+{
+ StgInfoTable *info;
+ nat clpack_locn;
+
+ ASSERT(LOOKS_LIKE_GHC_INFO(get_itbl(closure)));
+
+ closure = UNWIND_IND(closure);
+ /* now closure is the thing we want to pack */
+ info = get_itbl(closure);
+
+ clpack_locn = OffsetFor(closure);
+
+ /* If the closure has been packed already, just pack an indirection to it
+ to guarantee that the graph doesn't become a tree when unpacked */
+ if (!NotYetPacking(clpack_locn)) {
+ PackOffset(clpack_locn);
+ return;
+ }
+
+ switch (info->type) {
+
+ case CONSTR_CHARLIKE:
+ IF_PAR_DEBUG(pack,
+ belch("*>^^ Packing a charlike closure %d",
+ ((StgIntCharlikeClosure*)closure)->data));
+
+ PackPLC((StgPtr)CHARLIKE_CLOSURE(((StgIntCharlikeClosure*)closure)->data));
+ // NB: unpacked_size of a PLC is 0
+ return;
+
+ case CONSTR_INTLIKE:
+ {
+ StgInt val = ((StgIntCharlikeClosure*)closure)->data;
+
+ if ((val <= MAX_INTLIKE) && (val >= MIN_INTLIKE)) {
+ IF_PAR_DEBUG(pack,
+ belch("*>^^ Packing a small intlike %d as a PLC",
+ val));
+ PackPLC((StgPtr)INTLIKE_CLOSURE(val));
+ // NB: unpacked_size of a PLC is 0
+ return;
+ } else {
+ IF_PAR_DEBUG(pack,
+ belch("*>^^ Packing a big intlike %d as a normal closure",
+ val));
+ PackGeneric(closure);
+ return;
+ }
+ }
+
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ /* it's a constructor (i.e. plain data) */
+ IF_PAR_DEBUG(pack,
+ belch("*>^^ Packing a CONSTR %p (%s) using generic packing",
+ closure, info_type(closure)));
+ PackGeneric(closure);
+ return;
+
+ case THUNK_STATIC: // ToDo: check whether that's ok
+ case FUN_STATIC: // ToDo: check whether that's ok
+ case CONSTR_STATIC:
+ case CONSTR_NOCAF_STATIC:// For now we ship indirections to CAFs: They are
+ // evaluated on each PE if needed
+ IF_PAR_DEBUG(pack,
+ belch("*>~~ Packing a %p (%s) as a PLC",
+ closure, info_type(closure)));
+
+ PackPLC((StgPtr)closure);
+ // NB: unpacked_size of a PLC is 0
+ return;
+
+ case THUNK_SELECTOR:
+ {
+ StgClosure *selectee = ((StgSelector *)closure)->selectee;
+
+ IF_PAR_DEBUG(pack,
+ belch("*>** Found THUNK_SELECTOR at %p (%s) pointing to %p (%s); using PackGeneric",
+ closure, info_type(closure),
+ selectee, info_type(selectee)));
+ PackGeneric(closure);
+ /* inlined code; probably could use PackGeneric
+ Pack((StgWord)(*(StgPtr)closure));
+ Pack((StgWord)(selectee));
+ QueueClosure(selectee);
+ unpacked_size += 2;
+ */
+ }
+ return;
+
+ case FUN:
+ case FUN_1_0:
+ case FUN_0_1:
+ case FUN_2_0:
+ case FUN_1_1:
+ case FUN_0_2:
+ case THUNK:
+ case THUNK_1_0:
+ case THUNK_0_1:
+ case THUNK_2_0:
+ case THUNK_1_1:
+ case THUNK_0_2:
+ PackGeneric(closure);
+ return;
+
+ case AP_UPD:
+ case PAP:
+ /*
+ barf("*> Packing of PAP not implemented %p (%s)",
+ closure, info_type(closure));
+
+ Currently we don't pack PAPs; we pack a FETCH_ME to the closure,
+ instead. Note that since PAPs contain a chunk of stack as payload,
+ implementing packing of PAPs is a first step towards thread migration.
+ IF_PAR_DEBUG(pack,
+ belch("*>.. Packing a PAP closure at %p (%s) as a FETCH_ME",
+ closure, info_type(closure)));
+ PackFetchMe(closure);
+ */
+ PackPAP((StgPAP *)closure);
+ return;
+
+ case CAF_BLACKHOLE:
+ case BLACKHOLE:
+ case BLACKHOLE_BQ:
+ case SE_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case RBH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+
+ /* If it's a (revertible) black-hole, pack a FetchMe closure to it */
+ //ASSERT(pack_locn > PACK_HDR_SIZE);
+
+ IF_PAR_DEBUG(pack,
+ belch("*>.. Packing a BH-like closure at %p (%s) as a FETCH_ME",
+ closure, info_type(closure)));
+ /* NB: in case of a FETCH_ME this might build up a chain of FETCH_MEs;
+ phps short-cut the GA here */
+ PackFetchMe(closure);
+ return;
+
+#ifdef DIST
+ case REMOTE_REF:
+ IF_PAR_DEBUG(pack,
+ belch("*>.. Packing %p (%s) as a REMOTE_REF",
+ closure, info_type(closure)));
+ PackRemoteRef(closure);
+ /* we hopefully don't end up with a chain of REMOTE_REFs!!!!!!!!!! */
+
+ return;
+#endif
+
+ case TSO:
+ case MVAR:
+#ifdef DIST
+ IF_PAR_DEBUG(pack,
+ belch("*>.. Packing %p (%s) as a RemoteRef",
+ closure, info_type(closure)));
+ PackRemoteRef(closure);
+#else
+ barf("{Pack}Daq Qagh: Only GdH can pack %p (%s)",
+ closure, info_type(closure));
+#endif
+ return;
+
+ case ARR_WORDS:
+ PackArray(closure);
+ return;
+
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+ case MUT_VAR:
+ /*
+ Eventually, this should use the same packing routine as ARR_WRODS
+
+ GlobaliseAndPackGA(closure);
+ PackArray(closure);
+ return;
+ */
+ barf("Qagh{Pack}Doq: packing of mutable closures not yet implemented: %p (%s)",
+ closure, info_type(closure));
+
+# ifdef DEBUG
+ case BCO:
+ barf("{Pack}Daq Qagh: found BCO closure %p (%s); GUM hates interpreted code",
+ closure, info_type(closure));
+ /* never reached */
+
+ // check error cases only in a debugging setup
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ case RET_DYN:
+ barf("{Pack}Daq Qagh: found return vector %p (%s) when packing (thread migration not implemented)",
+ closure, info_type(closure));
+ /* never reached */
+
+ case UPDATE_FRAME:
+ case STOP_FRAME:
+ case CATCH_FRAME:
+ case SEQ_FRAME:
+ barf("{Pack}Daq Qagh: found stack frame %p (%s) when packing (thread migration not implemented)",
+ closure, info_type(closure));
+ /* never reached */
+
+ case BLOCKED_FETCH:
+ case EVACUATED:
+ /* something's very wrong */
+ barf("{Pack}Daq Qagh: found %s (%p) when packing",
+ info_type(closure), closure);
+ /* never reached */
+
+ case IND:
+ case IND_OLDGEN:
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ case IND_STATIC:
+ barf("Pack: found IND_... after shorting out indirections %d (%s)",
+ (nat)(info->type), info_type(closure));
+
+ case WEAK:
+ case FOREIGN:
+ case STABLE_NAME:
+ barf("Pack: found foreign thingy; not yet implemented in %d (%s)",
+ (nat)(info->type), info_type(closure));
+#endif
+
+ default:
+ barf("Pack: strange closure %d", (nat)(info->type));
+ } /* switch */
+}
+
+/*
+ Pack a constructor of unknown size.
+ Similar to PackGeneric but without creating GAs.
+*/
+#if 0
+//@cindex PackConstr
+static void
+PackConstr(StgClosure *closure)
+{
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs, i;
+ char str[80];
+
+ ASSERT(LOOKS_LIKE_GHC_INFO(closure->header.info));
+
+ /* get info about basic layout of the closure */
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+
+ ASSERT(info->type == CONSTR ||
+ info->type == CONSTR_1_0 ||
+ info->type == CONSTR_0_1 ||
+ info->type == CONSTR_2_0 ||
+ info->type == CONSTR_1_1 ||
+ info->type == CONSTR_0_2);
+
+ IF_PAR_DEBUG(pack,
+ fprintf(stderr, "*>^^ packing a constructor at %p (%s) (size=%d, ptrs=%d, nonptrs=%d)\n",
+ closure, info_type(closure), size, ptrs, nonptrs));
+
+ /* Primitive arrays have gone; now we have (MUT_)ARR_WORDS etc */
+
+ if (!RoomToPack(PACK_GA_SIZE + _HS + vhs + nonptrs, ptrs)) {
+ IF_PAR_DEBUG(pack,
+ belch("*>&& pack buffer is full; packing FETCH_ME for closure %p (%s)",
+ closure, info_type(closure)));
+ PackFetchMe(closure);
+ return;
+ }
+
+ /* Record the location of the GA */
+ AmPacking(closure);
+
+ /* Pack Constructor marker */
+ Pack((StgWord)2);
+
+ /* pack fixed and variable header */
+ for (i = 0; i < _HS + vhs; ++i)
+ Pack((StgWord)*(((StgPtr)closure)+i));
+
+ /* register all ptrs for further packing */
+ for (i = 0; i < ptrs; ++i)
+ QueueClosure(((StgClosure *) *(((StgPtr)closure)+(_HS+vhs)+i)));
+
+ /* pack non-ptrs */
+ for (i = 0; i < nonptrs; ++i)
+ Pack((StgWord)*(((StgPtr)closure)+(_HS+vhs)+ptrs+i));
+}
+#endif
+
+/*
+ Generic packing code.
+ This code is performed for `ordinary' closures such as CONSTR, THUNK etc.
+*/
+//@cindex PackGeneric
+static void
+PackGeneric(StgClosure *closure)
+{
+ StgInfoTable *info;
+ StgClosure *rbh;
+ nat size, ptrs, nonptrs, vhs, i, m;
+ char str[80];
+
+ ASSERT(LOOKS_LIKE_COOL_CLOSURE(closure));
+
+ /* get info about basic layout of the closure */
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+
+ ASSERT(!IS_BLACK_HOLE(closure));
+
+ IF_PAR_DEBUG(pack,
+ fprintf(stderr, "*>== %p (%s): generic packing (size=%d, ptrs=%d, nonptrs=%d)\n",
+ closure, info_type(closure), size, ptrs, nonptrs));
+
+ /* packing strategies: how many thunks to add to a packet;
+ default is infinity i.e. RtsFlags.ParFlags.thunksToPack==0 */
+ if (RtsFlags.ParFlags.thunksToPack &&
+ packed_thunks >= RtsFlags.ParFlags.thunksToPack &&
+ closure_THUNK(closure)) {
+ IF_PAR_DEBUG(pack,
+ belch("*>&& refusing to pack more than %d thunks per packet; packing FETCH_ME for closure %p (%s)",
+ packed_thunks, closure, info_type(closure)));
+ PackFetchMe(closure);
+ return;
+ }
+
+ /* Primitive arrays have gone; now we have (MUT_)ARR_WORDS etc */
+
+ if (!RoomToPack(PACK_GA_SIZE + _HS + vhs + nonptrs, ptrs)) {
+ IF_PAR_DEBUG(pack,
+ belch("*>&& pack buffer is full; packing FETCH_ME for closure %p (%s)",
+ closure, info_type(closure)));
+ PackFetchMe(closure);
+ return;
+ }
+
+ /* Record the location of the GA */
+ AmPacking(closure);
+ /* Allocate a GA for this closure and put it into the buffer */
+ /* Checks for globalisation scheme; default: globalise everything thunks */
+ if ( RtsFlags.ParFlags.globalising == 0 ||
+ (closure_THUNK(closure) && !closure_UNPOINTED(closure)) )
+ GlobaliseAndPackGA(closure);
+ else
+ Pack((StgWord)2); // marker for unglobalised closure
+
+
+ ASSERT(!(info->type == ARR_WORDS || info->type == MUT_ARR_PTRS ||
+ info->type == MUT_ARR_PTRS_FROZEN || info->type == MUT_VAR));
+
+ /* At last! A closure we can actually pack! */
+ if (ip_MUTABLE(info) && ((info->type != FETCH_ME)||(info->type != REMOTE_REF)))
+ barf("*>// %p (%s) PackClosure: trying to replicate a Mutable closure!",
+ closure, info_type(closure));
+
+ /*
+ Remember, the generic closure layout is as follows:
+ +-------------------------------------------------+
+ | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS |
+ +-------------------------------------------------+
+ */
+ /* pack fixed and variable header */
+ for (i = 0; i < _HS + vhs; ++i)
+ Pack((StgWord)*(((StgPtr)closure)+i));
+
+ /* register all ptrs for further packing */
+ for (i = 0; i < ptrs; ++i)
+ QueueClosure(((StgClosure *) *(((StgPtr)closure)+(_HS+vhs)+i)));
+
+ /* pack non-ptrs */
+ for (i = 0; i < nonptrs; ++i)
+ Pack((StgWord)*(((StgPtr)closure)+(_HS+vhs)+ptrs+i));
+
+ // ASSERT(_HS+vhs+ptrs+nonptrs==size);
+ if ((m=_HS+vhs+ptrs+nonptrs)<size) {
+ IF_PAR_DEBUG(pack,
+ belch("*>** WARNING: slop in closure %p (%s); filling %d words; SHOULD NEVER HAPPEN",
+ closure, info_type(closure), size-m));
+ for (i=m; i<size; i++)
+ Pack((StgWord)*(((StgPtr)closure)+i));
+ }
+
+ unpacked_size += size;
+ //unpacked_size += (size < MIN_UPD_SIZE) ? MIN_UPD_SIZE : size;
+
+ /*
+ * Record that this is a revertable black hole so that we can fill in
+ * its address from the fetch reply. Problem: unshared thunks may cause
+ * space leaks this way, their GAs should be deallocated following an
+ * ACK.
+ */
+
+ if (closure_THUNK(closure) && !closure_UNPOINTED(closure)) {
+ rbh = convertToRBH(closure);
+ ASSERT(size>=_HS+MIN_UPD_SIZE); // min size for any updatable closure
+ ASSERT(rbh == closure); // rbh at the same position (minced version)
+ packed_thunks++;
+ } else if ( closure==graph_root ) {
+ packed_thunks++; // root of graph is counted as a thunk
+ }
+}
+/*
+ Pack an array of words.
+ ToDo: implement packing of MUT_ARRAYs
+*/
+
+//@cindex PackArray
+static void
+PackArray(StgClosure *closure)
+{
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs;
+ nat i, n;
+ char str[80];
+
+ /* get info about basic layout of the closure */
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+
+ ASSERT(info->type == ARR_WORDS || info->type == MUT_ARR_PTRS ||
+ info->type == MUT_ARR_PTRS_FROZEN || info->type == MUT_VAR);
+
+ n = ((StgArrWords *)closure)->words;
+ // this includes the header!: arr_words_sizeW(stgCast(StgArrWords*,q));
+
+ IF_PAR_DEBUG(pack,
+ belch("*>== %p (%s): packing an array of %d words (size=%d)\n",
+ closure, info_type(closure), n,
+ arr_words_sizeW((StgArrWords *)closure)));
+
+ /* check that we have enough room in the pack buffer */
+ if (!RoomToPack(PACK_GA_SIZE + _HS + vhs + nonptrs, ptrs)) {
+ IF_PAR_DEBUG(pack,
+ belch("*>&& pack buffer is full; packing FETCH_ME for closure %p (%s)",
+ closure, info_type(closure)));
+ PackFetchMe(closure);
+ return;
+ }
+
+ /* global stats about arrays sent */
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_arrs++;
+ globalParStats.tot_arr_size += ((StgArrWords *)closure)->words;
+ }
+
+ /* record offset of the closure and allocate a GA */
+ AmPacking(closure);
+ /* Checks for globalisation scheme; default: globalise everything thunks */
+ if ( RtsFlags.ParFlags.globalising == 0 ||
+ (closure_THUNK(closure) && !closure_UNPOINTED(closure)) )
+ GlobaliseAndPackGA(closure);
+ else
+ Pack((StgWord)2); // marker for unglobalised closure
+
+ /* Pack the header (2 words: info ptr and the number of words to follow) */
+ Pack((StgWord)*(StgPtr)closure);
+ Pack(((StgArrWords *)closure)->words);
+
+ /* pack the payload of the closure (all non-ptrs) */
+ for (i=0; i<n; i++)
+ Pack((StgWord)((StgArrWords *)closure)->payload[i]);
+
+ unpacked_size += arr_words_sizeW((StgArrWords *)closure);
+}
+
+/*
+ Pack a PAP closure.
+ Note that the representation of a PAP in the buffer is different from
+ its representation in the heap. In particular, pointers to local
+ closures are packed directly as FETCHME closures, using
+ PACK_FETCHME_SIZE words to represent q 1 word pointer in the orig graph
+ structure. To account for the difference in size we store the packed
+ size of the closure as part of the PAP's variable header in the buffer.
+*/
+
+//@cindex PackPAP
+static void
+PackPAP(StgPAP *pap) {
+ nat n, i, j, pack_start;
+ StgPtr p, q;
+ const StgInfoTable* info;
+ StgWord bitmap;
+ /* debugging only */
+ StgPtr end;
+ nat size, ptrs, nonptrs, vhs;
+ char str[80];
+ nat unpacked_size_before_PAP, FMs_in_PAP=0; // debugging only
+
+ /* This is actually a setup invariant; checked here 'cause it affects PAPs*/
+ //ASSERT(PACK_FETCHME_SIZE == sizeofW(StgFetchMe)-1+PACK_GA_SIZE);
+ ASSERT(NotYetPacking(OffsetFor((StgClosure *)pap)));
+ IF_DEBUG(sanity,
+ unpacked_size_before_PAP = unpacked_size);
+
+ n = (nat)(pap->n_args);
+
+ /* get info about basic layout of the closure */
+ info = get_closure_info((StgClosure *)pap, &size, &ptrs, &nonptrs, &vhs, str);
+ ASSERT(ptrs==0 && nonptrs==0 && size==pap_sizeW(pap));
+
+ IF_PAR_DEBUG(pack,
+ belch("*>** %p (%s): PackPAP: packing PAP with %d words (size=%d; ptrs=%d; nonptrs=%d:",
+ (StgClosure *)pap, info_type((StgClosure *)pap),
+ n, size, ptrs, nonptrs);
+ printClosure((StgClosure *)pap));
+
+ /* check that we have enough room in the pack buffer */
+ if (!RoomToPack(PACK_GA_SIZE + _HS + vhs + nonptrs, ptrs)) {
+ IF_PAR_DEBUG(pack,
+ belch("*>&& pack buffer is full; packing FETCH_ME for closure %p (%s)",
+ (StgClosure *)pap, info_type((StgClosure *)pap)));
+ PackFetchMe((StgClosure *)pap);
+ return;
+ }
+
+ /* record offset of the closure and allocate a GA */
+ AmPacking((StgClosure *)pap);
+ /* Checks for globalisation scheme; default: globalise everything thunks */
+ if ( RtsFlags.ParFlags.globalising == 0 ||
+ (closure_THUNK(pap) && !closure_UNPOINTED(pap)) )
+ GlobaliseAndPackGA((StgClosure *)pap);
+ else
+ Pack((StgWord)2); // marker for unglobalised closure
+
+ /* Pack the PAP header */
+ Pack((StgWord)(pap->header.info));
+ Pack((StgWord)(pap->n_args));
+ Pack((StgWord)(pap->fun));
+ pack_start = pack_locn; // to compute size of PAP in buffer
+ Pack((StgWord)0); // this will be filled in later (size of PAP in buffer)
+
+ /* Pack the payload of a PAP i.e. a stack chunk */
+ /* pointers to start of stack chunk */
+ p = (StgPtr)(pap->payload);
+ end = (StgPtr)((nat)pap+pap_sizeW(pap)*sizeof(StgWord)); // (StgPtr)((nat)pap+sizeof(StgPAP)+sizeof(StgPtr)*n);
+ while (p<end) {
+ /* the loop body has been borrowed from scavenge_stack */
+ q = (StgPtr)*p;
+
+ /* If we've got a tag, pack all words in that block */
+ if (IS_ARG_TAG((W_)q)) { // q stands for the no. of non-ptrs to follow
+ nat m = ARG_TAG((W_)q); // first word after this block
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: packing %d words (tagged), starting @ %p",
+ p, m, p));
+ for (i=0; i<m+1; i++)
+ Pack((StgWord)*(p+i));
+ p += m+1; // m words + the tag
+ continue;
+ }
+
+ /* If q is is a pointer to a (heap allocated) closure we pack a FETCH_ME
+ ToDo: provide RTS flag to also pack these closures
+ */
+ if (! LOOKS_LIKE_GHC_INFO(q) ) {
+ /* distinguish static closure (PLC) from other closures (FM) */
+ switch (get_itbl((StgClosure*)q)->type) {
+ case CONSTR_CHARLIKE:
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP: packing a charlike closure %d",
+ ((StgIntCharlikeClosure*)q)->data));
+
+ PackPLC((StgPtr)CHARLIKE_CLOSURE(((StgIntCharlikeClosure*)q)->data));
+ p++;
+ break;
+
+ case CONSTR_INTLIKE:
+ {
+ StgInt val = ((StgIntCharlikeClosure*)q)->data;
+
+ if ((val <= MAX_INTLIKE) && (val >= MIN_INTLIKE)) {
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP: Packing ptr to a small intlike %d as a PLC", val));
+ PackPLC((StgPtr)INTLIKE_CLOSURE(val));
+ p++;
+ break;
+ } else {
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP: Packing a ptr to a big intlike %d as a FM",
+ val));
+ Pack((StgWord)(ARGTAG_MAX+1));
+ PackFetchMe((StgClosure *)q);
+ p++;
+ IF_DEBUG(sanity, FMs_in_PAP++);
+ break;
+ }
+ }
+ case THUNK_STATIC: // ToDo: check whether that's ok
+ case FUN_STATIC: // ToDo: check whether that's ok
+ case CONSTR_STATIC:
+ case CONSTR_NOCAF_STATIC:
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP: packing a ptr to a %p (%s) as a PLC",
+ q, info_type((StgClosure *)q)));
+
+ PackPLC((StgPtr)q);
+ p++;
+ break;
+ }
+ default:
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: packing FM to %p (%s)",
+ p, q, info_type((StgClosure*)q)));
+ Pack((StgWord)(ARGTAG_MAX+1));
+ PackFetchMe((StgClosure *)q);
+ p++;
+ IF_DEBUG(sanity, FMs_in_PAP++);
+ break;
+ }
+ continue;
+ }
+
+ /*
+ * Otherwise, q must be the info pointer of an activation
+ * record. All activation records have 'bitmap' style layout
+ * info.
+ */
+ info = get_itbl((StgClosure *)p);
+ switch (info->type) {
+
+ /* Dynamic bitmap: the mask is stored on the stack */
+ case RET_DYN:
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: RET_DYN",
+ p));
+
+ /* Pack the header as is */
+ Pack((StgWord)(((StgRetDyn *)p)->info));
+ Pack((StgWord)(((StgRetDyn *)p)->liveness));
+ Pack((StgWord)(((StgRetDyn *)p)->ret_addr));
+
+ bitmap = ((StgRetDyn *)p)->liveness;
+ p = (P_)&((StgRetDyn *)p)->payload[0];
+ goto small_bitmap;
+
+ /* probably a slow-entry point return address: */
+ case FUN:
+ case FUN_STATIC:
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: FUN or FUN_STATIC",
+ p));
+
+ Pack((StgWord)(((StgClosure *)p)->header.info));
+ p++;
+
+ goto follow_srt; //??
+ }
+
+ /* Using generic code here; could inline as in scavenge_stack */
+ case UPDATE_FRAME:
+ {
+ StgUpdateFrame *frame = (StgUpdateFrame *)p;
+ nat type = get_itbl(frame->updatee)->type;
+
+ ASSERT(type==BLACKHOLE || type==CAF_BLACKHOLE || type==BLACKHOLE_BQ);
+
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: UPDATE_FRAME (updatee=%p; link=%p)",
+ p, frame->updatee, frame->link));
+
+ Pack((StgWord)(frame->header.info));
+ Pack((StgWord)(frame->link)); // ToDo: fix intra-stack pointer
+ Pack((StgWord)(frame->updatee)); // ToDo: follow link
+
+ p += 3;
+ }
+
+ /* small bitmap (< 32 entries, or 64 on a 64-bit machine) */
+ case STOP_FRAME:
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: STOP_FRAME",
+ p));
+ Pack((StgWord)((StgStopFrame *)p)->header.info);
+ p++;
+ }
+
+ case CATCH_FRAME:
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: CATCH_FRAME (handler=%p)",
+ p, ((StgCatchFrame *)p)->handler));
+
+ Pack((StgWord)((StgCatchFrame *)p)->header.info);
+ Pack((StgWord)((StgCatchFrame *)p)->link); // ToDo: fix intra-stack pointer
+ Pack((StgWord)((StgCatchFrame *)p)->exceptions_blocked);
+ Pack((StgWord)((StgCatchFrame *)p)->handler);
+ p += 4;
+ }
+
+ case SEQ_FRAME:
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: UPDATE_FRAME (link=%p)",
+ p, ((StgSeqFrame *)p)->link));
+
+ Pack((StgWord)((StgSeqFrame *)p)->header.info);
+ Pack((StgWord)((StgSeqFrame *)p)->link); // ToDo: fix intra-stack pointer
+
+ // ToDo: handle bitmap
+ bitmap = info->layout.bitmap;
+
+ p = (StgPtr)&(((StgClosure *)p)->payload);
+ goto small_bitmap;
+ }
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: RET_{BCO,SMALL,VEC_SMALL} (bitmap=%o)",
+ p, info->layout.bitmap));
+
+
+ Pack((StgWord)((StgClosure *)p)->header.info);
+ p++;
+ // ToDo: handle bitmap
+ bitmap = info->layout.bitmap;
+ /* this assumes that the payload starts immediately after the info-ptr */
+
+ small_bitmap:
+ while (bitmap != 0) {
+ if ((bitmap & 1) == 0) {
+ Pack((StgWord)(ARGTAG_MAX+1));
+ PackFetchMe((StgClosure *)*p++); // pack a FetchMe to the closure
+ IF_DEBUG(sanity, FMs_in_PAP++);
+ } else {
+ Pack((StgWord)*p++);
+ }
+ bitmap = bitmap >> 1;
+ }
+
+ follow_srt:
+ IF_PAR_DEBUG(pack,
+ belch("*>-- PackPAP: nothing to do for follow_srt"));
+ continue;
+
+ /* large bitmap (> 32 entries) */
+ case RET_BIG:
+ case RET_VEC_BIG:
+ {
+ StgPtr q;
+ StgLargeBitmap *large_bitmap;
+
+ IF_PAR_DEBUG(pack,
+ belch("*>** PackPAP @ %p: RET_{BIG,VEC_BIG} (large_bitmap=%p)",
+ p, info->layout.large_bitmap));
+
+
+ Pack((StgWord)((StgClosure *)p)->header.info);
+ p++;
+
+ large_bitmap = info->layout.large_bitmap;
+
+ for (j=0; j<large_bitmap->size; j++) {
+ bitmap = large_bitmap->bitmap[j];
+ q = p + BITS_IN(W_);
+ while (bitmap != 0) {
+ if ((bitmap & 1) == 0) {
+ Pack((StgWord)(ARGTAG_MAX+1));
+ PackFetchMe((StgClosure *)*p++); // ToDo: pack pointer(StgClosure *)*p = evacuate((StgClosure *)*p);
+ IF_DEBUG(sanity, FMs_in_PAP++);
+ } else {
+ Pack((StgWord)*p++);
+ }
+ bitmap = bitmap >> 1;
+ }
+ if (j+1 < large_bitmap->size) {
+ while (p < q) {
+ Pack((StgWord)(ARGTAG_MAX+1));
+ PackFetchMe((StgClosure *)*p++); // ToDo: pack pointer (StgClosure *)*p = evacuate((StgClosure *)*p);
+ IF_DEBUG(sanity, FMs_in_PAP++);
+ }
+ }
+ }
+
+ /* and don't forget to follow the SRT */
+ goto follow_srt;
+ }
+
+ default:
+ barf("PackPAP: weird activation record found on stack (@ %p): %d",
+ p, (int)(info->type));
+ }
+ }
+ // fill in size of the PAP (only the payload!) in buffer
+ globalPackBuffer->buffer[pack_start] = (StgWord)(pack_locn - pack_start - 1*sizeofW(StgWord));
+ /*
+ We can use the generic pap_sizeW macro to compute the size of the
+ unpacked PAP because whenever we pack a new FETCHME as part of the
+ PAP's payload we also adjust unpacked_size accordingly (smart, aren't we?)
+
+ NB: the current PAP (un-)packing code relies on the fact that
+ the size of the unpacked PAP + size of all unpacked FMs is the same as
+ the size of the packed PAP!!
+ */
+ unpacked_size += pap_sizeW(pap); // sizeofW(pap) + (nat)(globalPackBuffer->buffer[pack_start]);
+ IF_DEBUG(sanity,
+ ASSERT(unpacked_size-unpacked_size_before_PAP==pap_sizeW(pap)+FMs_in_PAP*sizeofW(StgFetchMe)));
+}
+# else /* GRAN */
+
+/* Fake the packing of a closure */
+
+void
+PackClosure(closure)
+StgClosure *closure;
+{
+ StgInfoTable *info, *childInfo;
+ nat size, ptrs, nonptrs, vhs;
+ char info_hdr_ty[80];
+ nat i;
+ StgClosure *indirectee, *rbh;
+ char str[80];
+ rtsBool is_mutable, will_be_rbh, no_more_thunks_please;
+
+ is_mutable = rtsFalse;
+
+ /* In GranSim we don't pack and unpack closures -- we just simulate
+ packing by updating the bitmask. So, the graph structure is unchanged
+ i.e. we don't short out indirections here. -- HWL */
+
+ /* Nothing to do with packing but good place to (sanity) check closure;
+ if the closure is a thunk, it must be unique; otherwise we have copied
+ work at some point before that which violates one of our main global
+ assertions in GranSim/GUM */
+ ASSERT(!closure_THUNK(closure) || is_unique(closure));
+
+ IF_GRAN_DEBUG(pack,
+ belch("** Packing closure %p (%s)",
+ closure, info_type(closure)));
+
+ if (where_is(closure) != where_is(graph_root)) {
+ IF_GRAN_DEBUG(pack,
+ belch("** faking a FETCHME [current PE: %d, closure's PE: %d]",
+ where_is(graph_root), where_is(closure)));
+
+ /* GUM would pack a FETCHME here; simulate that by increasing the */
+ /* unpacked size accordingly but don't pack anything -- HWL */
+ unpacked_size += _HS + 2 ; // sizeofW(StgFetchMe);
+ return;
+ }
+
+ /* If the closure's not already being packed */
+ if (!NotYetPacking(closure))
+ /* Don't have to do anything in GrAnSim if closure is already */
+ /* packed -- HWL */
+ {
+ IF_GRAN_DEBUG(pack,
+ belch("** Closure %p is already packed and omitted now!",
+ closure));
+ return;
+ }
+
+ switch (get_itbl(closure)->type) {
+ /* ToDo: check for sticky bit here? */
+ /* BH-like closures which must not be moved to another PE */
+ case CAF_BLACKHOLE: /* # of ptrs, nptrs: 0,2 */
+ case SE_BLACKHOLE: /* # of ptrs, nptrs: 0,2 */
+ case SE_CAF_BLACKHOLE: /* # of ptrs, nptrs: 0,2 */
+ case BLACKHOLE: /* # of ptrs, nptrs: 0,2 */
+ case BLACKHOLE_BQ: /* # of ptrs, nptrs: 1,1 */
+ case RBH: /* # of ptrs, nptrs: 1,1 */
+ /* same for these parallel specific closures */
+ case BLOCKED_FETCH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ IF_GRAN_DEBUG(pack,
+ belch("** Avoid packing BH-like closures (%p, %s)!",
+ closure, info_type(closure)));
+ /* Just ignore RBHs i.e. they stay where they are */
+ return;
+
+ case THUNK_SELECTOR:
+ {
+ StgClosure *selectee = ((StgSelector *)closure)->selectee;
+
+ IF_GRAN_DEBUG(pack,
+ belch("** Avoid packing THUNK_SELECTOR (%p, %s) but queuing %p (%s)!",
+ closure, info_type(closure), selectee, info_type(selectee)));
+ QueueClosure(selectee);
+ IF_GRAN_DEBUG(pack,
+ belch("** [%p (%s) (Queueing closure) ....]",
+ selectee, info_type(selectee)));
+ }
+ return;
+
+ case CONSTR_STATIC:
+ case CONSTR_NOCAF_STATIC:
+ /* For now we ship indirections to CAFs:
+ * They are evaluated on each PE if needed */
+ IF_GRAN_DEBUG(pack,
+ belch("** Nothing to pack for %p (%s)!",
+ closure, info_type(closure)));
+ // Pack(closure); GUM only
+ return;
+
+ case CONSTR_CHARLIKE:
+ case CONSTR_INTLIKE:
+ IF_GRAN_DEBUG(pack,
+ belch("** Nothing to pack for %s (%p)!",
+ closure, info_type(closure)));
+ // PackPLC(((StgIntCharlikeClosure *)closure)->data); GUM only
+ return;
+
+ case AP_UPD:
+ case PAP:
+ /* partial applications; special treatment necessary? */
+ break;
+
+ case MVAR:
+ barf("{PackClosure}Daq Qagh: found an MVAR (%p, %s); ToDo: implement proper treatment of MVARs",
+ closure, info_type(closure));
+
+ case ARR_WORDS:
+ case MUT_VAR:
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+ /* Mutable objects; require special treatment to ship all data */
+ is_mutable = rtsTrue;
+ break;
+
+ case WEAK:
+ case FOREIGN:
+ case STABLE_NAME:
+ /* weak pointers and other FFI objects */
+ barf("{PackClosure}Daq Qagh: found an FFI object (%p, %s); FFI not yet supported by GranSim, sorry",
+ closure, info_type(closure));
+
+ case TSO:
+ /* parallel objects */
+ barf("{PackClosure}Daq Qagh: found a TSO when packing (%p, %s); thread migration not yet implemented, sorry",
+ closure, info_type(closure));
+
+ case BCO:
+ /* Hugs objects (i.e. closures used by the interpreter) */
+ barf("{PackClosure}Daq Qagh: found a Hugs closure when packing (%p, %s); GranSim not yet integrated with Hugs, sorry",
+ closure, info_type(closure));
+
+ case IND: /* # of ptrs, nptrs: 1,0 */
+ case IND_STATIC: /* # of ptrs, nptrs: 1,0 */
+ case IND_PERM: /* # of ptrs, nptrs: 1,1 */
+ case IND_OLDGEN: /* # of ptrs, nptrs: 1,1 */
+ case IND_OLDGEN_PERM: /* # of ptrs, nptrs: 1,1 */
+ /* we shouldn't find an indirection here, because we have shorted them
+ out at the beginning of this functions already.
+ */
+ break;
+ /* should be:
+ barf("{PackClosure}Daq Qagh: found indirection when packing (%p, %s)",
+ closure, info_type(closure));
+ */
+
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ case SEQ_FRAME:
+ case STOP_FRAME:
+ /* stack frames; should never be found when packing for now;
+ once we support thread migration these have to be covered properly
+ */
+ barf("{PackClosure}Daq Qagh: found stack frame when packing (%p, %s)",
+ closure, info_type(closure));
+
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ case RET_DYN:
+ /* vectored returns; should never be found when packing; */
+ barf("{PackClosure}Daq Qagh: found vectored return (%p, %s)",
+ closure, info_type(closure));
+
+ case INVALID_OBJECT:
+ barf("{PackClosure}Daq Qagh: found Invalid object (%p, %s)",
+ closure, info_type(closure));
+
+ default:
+ /*
+ Here we know that the closure is a CONSTR, FUN or THUNK (maybe
+ a specialised version with wired in #ptr/#nptr info; currently
+ we treat these specialised versions like the generic version)
+ */
+ } /* switch */
+
+ /* Otherwise it's not Fixed */
+
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+ will_be_rbh = closure_THUNK(closure) && !closure_UNPOINTED(closure);
+
+ IF_GRAN_DEBUG(pack,
+ belch("** Info on closure %p (%s): size=%d; ptrs=%d",
+ closure, info_type(closure),
+ size, ptrs,
+ (will_be_rbh) ? "will become RBH" : "will NOT become RBH"));
+
+ // check whether IS_UPDATABLE(closure) == !closure_UNPOINTED(closure) -- HWL
+ no_more_thunks_please =
+ (RtsFlags.GranFlags.ThunksToPack>0) &&
+ (packed_thunks>=RtsFlags.GranFlags.ThunksToPack);
+
+ /*
+ should be covered by get_closure_info
+ if (info->type == FETCH_ME || info->type == FETCH_ME_BQ ||
+ info->type == BLACKHOLE || info->type == RBH )
+ size = ptrs = nonptrs = vhs = 0;
+ */
+ /* Now peek ahead to see whether the closure has any primitive */
+ /* array children */
+ /*
+ ToDo: fix this code
+ for (i = 0; i < ptrs; ++i) {
+ P_ childInfo;
+ W_ childSize, childPtrs, childNonPtrs, childVhs;
+
+ childInfo = get_closure_info(((StgPtrPtr) (closure))[i + _HS + vhs],
+ &childSize, &childPtrs, &childNonPtrs,
+ &childVhs, junk_str);
+ if (IS_BIG_MOTHER(childInfo)) {
+ reservedPAsize += PACK_GA_SIZE + _HS +
+ childVhs + childNonPtrs +
+ childPtrs * PACK_FETCHME_SIZE;
+ PAsize += PACK_GA_SIZE + _HS + childSize;
+ PAptrs += childPtrs;
+ }
+ }
+ */
+ /* Don't pack anything (GrAnSim) if it's a black hole, or the buffer
+ * is full and it isn't a primitive array. N.B. Primitive arrays are
+ * always packed (because their parents index into them directly) */
+
+ if (IS_BLACK_HOLE(closure))
+ /*
+ ToDo: fix this code
+ ||
+ !(RoomToPack(PACK_GA_SIZE + _HS + vhs + nonptrs, ptrs)
+ || IS_BIG_MOTHER(info)))
+ */
+ return;
+
+ /* At last! A closure we can actually pack! */
+
+ if (closure_MUTABLE(closure)) // not nec. && (info->type != FETCHME))
+ belch("ghuH: Replicated a Mutable closure!");
+
+ if (RtsFlags.GranFlags.GranSimStats.Global &&
+ no_more_thunks_please && will_be_rbh) {
+ globalGranStats.tot_cuts++;
+ if ( RtsFlags.GranFlags.Debug.pack )
+ belch("** PackClosure (w/ ThunksToPack=%d): Cutting tree with root at %#x\n",
+ RtsFlags.GranFlags.ThunksToPack, closure);
+ } else if (will_be_rbh || (closure==graph_root) ) {
+ packed_thunks++;
+ globalGranStats.tot_thunks++;
+ }
+
+ if (no_more_thunks_please && will_be_rbh)
+ return; /* don't pack anything */
+
+ /* actual PACKING done here -- HWL */
+ Pack(closure);
+ for (i = 0; i < ptrs; ++i) {
+ /* extract i-th pointer from closure */
+ QueueClosure((StgClosure *)(closure->payload[i]));
+ IF_GRAN_DEBUG(pack,
+ belch("** [%p (%s) (Queueing closure) ....]",
+ closure->payload[i],
+ info_type(*stgCast(StgPtr*,((closure)->payload+(i))))));
+ //^^^^^^^^^^^ payloadPtr(closure,i))));
+ }
+
+ /*
+ for packing words (GUM only) do something like this:
+
+ for (i = 0; i < ptrs; ++i) {
+ Pack(payloadWord(obj,i+j));
+ }
+ */
+ /* Turn thunk into a revertible black hole. */
+ if (will_be_rbh) {
+ rbh = convertToRBH(closure);
+ ASSERT(rbh != NULL);
+ }
+}
+# endif /* PAR */
+
+//@node Low level packing routines, Unpacking routines, Packing Functions, Graph packing
+//@subsection Low level packing routines
+
+/*
+ @Pack@ is the basic packing routine. It just writes a word of data into
+ the pack buffer and increments the pack location. */
+
+//@cindex Pack
+
+# if defined(PAR)
+static void
+Pack(data)
+StgWord data;
+{
+ ASSERT(pack_locn < RtsFlags.ParFlags.packBufferSize);
+ globalPackBuffer->buffer[pack_locn++] = data;
+}
+#endif
+
+#if defined(GRAN)
+static void
+Pack(closure)
+StgClosure *closure;
+{
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs;
+ char str[80];
+
+ /* This checks the size of the GrAnSim internal pack buffer. The simulated
+ pack buffer is checked via RoomToPack (as in GUM) */
+ if (pack_locn >= (int)globalPackBuffer->size+sizeofW(rtsPackBuffer))
+ reallocPackBuffer();
+
+ if (closure==(StgClosure*)NULL)
+ belch("Qagh {Pack}Daq: Trying to pack 0");
+ globalPackBuffer->buffer[pack_locn++] = closure;
+ /* ASSERT: Data is a closure in GrAnSim here */
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+ // ToDo: is check for MIN_UPD_SIZE really needed? */
+ unpacked_size += _HS + (size < MIN_UPD_SIZE ?
+ MIN_UPD_SIZE :
+ size);
+}
+# endif /* GRAN */
+
+/*
+ If a closure is local, make it global. Then, divide its weight for
+ export. The GA is then packed into the pack buffer. */
+
+# if defined(PAR)
+//@cindex GlobaliseAndPackGA
+static void
+GlobaliseAndPackGA(closure)
+StgClosure *closure;
+{
+ globalAddr *ga;
+ globalAddr packGA;
+
+ if ((ga = LAGAlookup(closure)) == NULL) {
+ ga = makeGlobal(closure, rtsTrue);
+
+ // Global statistics: increase amount of global data by closure-size
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs, i, m; // stats only!!
+ char str[80]; // stats only!!
+
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+ globalParStats.tot_global += size;
+ }
+ }
+ ASSERT(ga->weight==MAX_GA_WEIGHT || ga->weight > 2);
+
+ if(dest_gtid==ga->payload.gc.gtid)
+ { packGA.payload = ga->payload;
+ packGA.weight = 0xFFFFFFFF; // 0,1,2 are used already
+ }
+ else
+ { splitWeight(&packGA, ga);
+ ASSERT(packGA.weight > 0);
+ }
+
+ IF_PAR_DEBUG(pack,
+ fprintf(stderr, "*>## %p (%s): Globalising (%s) closure with GA ",
+ closure, info_type(closure),
+ ( (ga->payload.gc.gtid==dest_gtid)?"returning":
+ ( (ga->payload.gc.gtid==mytid)?"creating":"sharing" ) ));
+ printGA(&packGA);
+ fputc('\n', stderr));
+
+
+ Pack((StgWord) packGA.weight);
+ Pack((StgWord) packGA.payload.gc.gtid);
+ Pack((StgWord) packGA.payload.gc.slot);
+}
+
+/*
+ @PackPLC@ makes up a bogus GA for a PLC. Weight 0 implies that a PLC
+ address follows instead of PE, slot. */
+
+//@cindex PackPLC
+
+static void
+PackPLC(addr)
+StgPtr addr;
+{
+ Pack(0L); /* weight */
+ Pack((StgWord) addr); /* address */
+}
+
+/*
+ @PackOffset@ packs a special GA value that will be interpreted as an
+ offset to a closure in the pack buffer. This is used to avoid unfolding
+ the graph structure into a tree. */
+
+static void
+PackOffset(offset)
+int offset;
+{
+ /*
+ IF_PAR_DEBUG(pack,
+ belch("** Packing Offset %d at pack location %u",
+ offset, pack_locn));
+ */
+ Pack(1L); /* weight */
+ Pack(0L); /* pe */
+ Pack(offset); /* slot/offset */
+}
+# endif /* PAR */
+
+//@node Unpacking routines, Aux fcts for packing, Low level packing routines, Graph packing
+//@subsection Unpacking routines
+
+/*
+ This was formerly in the (now deceased) module Unpack.c
+
+ Unpacking closures which have been exported to remote processors
+
+ This module defines routines for unpacking closures in the parallel
+ runtime system (GUM).
+
+ In the case of GrAnSim, this module defines routines for *simulating* the
+ unpacking of closures as it is done in the parallel runtime system.
+*/
+
+//@node GUM code, GranSim Code, Unpacking routines, Unpacking routines
+//@subsubsection GUM code
+
+#if defined(PAR)
+
+//@cindex InitPendingGABuffer
+void
+InitPendingGABuffer(size)
+nat size;
+{
+ if (PendingGABuffer==(globalAddr *)NULL)
+ PendingGABuffer = (globalAddr *)
+ stgMallocBytes(size*2*sizeof(globalAddr),
+ "InitPendingGABuffer");
+
+ /* current location in the buffer */
+ gaga = PendingGABuffer;
+}
+
+/*
+ @CommonUp@ commons up two closures which we have discovered to be
+ variants of the same object. One is made an indirection to the other. */
+
+//@cindex CommonUp
+void
+CommonUp(StgClosure *src, StgClosure *dst)
+{
+ StgBlockingQueueElement *bqe;
+#if defined(DEBUG)
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs, i;
+ char str[80];
+
+ /* get info about basic layout of the closure */
+ info = get_closure_info(src, &size, &ptrs, &nonptrs, &vhs, str);
+#endif
+
+ ASSERT(src != (StgClosure *)NULL && dst != (StgClosure *)NULL);
+ ASSERT(src != dst);
+
+ IF_PAR_DEBUG(pack,
+ belch("*___ CommonUp %p (%s) --> %p (%s)",
+ src, info_type(src), dst, info_type(dst)));
+
+ switch (get_itbl(src)->type) {
+ case BLACKHOLE_BQ:
+ bqe = ((StgBlockingQueue *)src)->blocking_queue;
+ break;
+
+ case FETCH_ME_BQ:
+ bqe = ((StgFetchMeBlockingQueue *)src)->blocking_queue;
+ break;
+
+ case RBH:
+ bqe = ((StgRBH *)src)->blocking_queue;
+ break;
+
+ case BLACKHOLE:
+ case FETCH_ME:
+ bqe = END_BQ_QUEUE;
+ break;
+
+ /* These closures are too small to be updated with an indirection!!! */
+ case CONSTR_1_0:
+ case CONSTR_0_1:
+ ASSERT(size<_HS+MIN_UPD_SIZE); // that's why we have to avoid UPD_IND
+ return;
+
+ /* currently we also common up 2 CONSTRs; this should reduce heap
+ * consumption but also does more work; not sure whether it's worth doing
+ */
+ case CONSTR:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_0_2:
+ case ARR_WORDS:
+ case MUT_ARR_PTRS:
+ case MUT_ARR_PTRS_FROZEN:
+ case MUT_VAR:
+ break;
+
+ default:
+ /* Don't common up anything else */
+ return;
+ }
+
+ /* closure must be big enough to permit update with ind */
+ ASSERT(size>=_HS+MIN_UPD_SIZE);
+ /* NB: this also awakens the blocking queue for src */
+ UPD_IND(src, dst);
+}
+
+/*
+ * Common up the new closure with any existing closure having the same
+ * GA
+ */
+//@cindex SetGAandCommonUp
+static StgClosure *
+SetGAandCommonUp(globalAddr *ga, StgClosure *closure, rtsBool hasGA)
+{
+ StgClosure *existing;
+ StgInfoTable *ip, *oldip;
+ globalAddr *newGA;
+
+ if (!hasGA)
+ return closure;
+
+ /* should we already have a local copy? */
+ if (ga->weight==0xFFFFFFFF) {
+ ASSERT(ga->payload.gc.gtid==mytid); //sanity
+ ga->weight=0;
+ /* probably should also ASSERT that a commonUp takes place...*/
+ }
+
+ ip = get_itbl(closure);
+ if ((existing = GALAlookup(ga)) == NULL) {
+ /* Just keep the new object */
+ IF_PAR_DEBUG(pack,
+ belch("*<## New local object for GA ((%x, %d, %x)) is %p (%s)",
+ ga->payload.gc.gtid, ga->payload.gc.slot, ga->weight,
+ closure, info_type(closure)));
+
+ // make an entry binding closure to ga in the RemoteGA table
+ newGA = setRemoteGA(closure, ga, rtsTrue);
+ // if local closure is a FETCH_ME etc fill in the global indirection
+ if (ip->type == FETCH_ME || ip->type == REMOTE_REF)
+ ((StgFetchMe *)closure)->ga = newGA;
+ } else {
+
+
+#ifdef DIST
+// ***************************************************************************
+// ***************************************************************************
+// REMOTE_REF HACK - dual is in PackRemoteRef
+// - prevents the weight ever being updated
+ if (ip->type == REMOTE_REF)
+ ga->weight=0;
+// ***************************************************************************
+// ***************************************************************************
+#endif /* DIST */
+
+ /* Two closures, one global name. Someone loses */
+ oldip = get_itbl(existing);
+ if ((oldip->type == FETCH_ME ||
+ IS_BLACK_HOLE(existing) ||
+ /* try to share evaluated closures */
+ oldip->type == CONSTR ||
+ oldip->type == CONSTR_1_0 ||
+ oldip->type == CONSTR_0_1 ||
+ oldip->type == CONSTR_2_0 ||
+ oldip->type == CONSTR_1_1 ||
+ oldip->type == CONSTR_0_2
+ ) &&
+ ip->type != FETCH_ME)
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*<#- Duplicate local object for GA ((%x, %d, %x)); redirecting %p (%s) -> %p (%s)",
+ ga->payload.gc.gtid, ga->payload.gc.slot, ga->weight,
+ existing, info_type(existing), closure, info_type(closure)));
+
+ /*
+ * What we had wasn't worth keeping, so make the old closure an
+ * indirection to the new closure (copying BQs if necessary) and
+ * make sure that the old entry is not the preferred one for this
+ * closure.
+ */
+ CommonUp(existing, closure);
+ //GALAdeprecate(ga);
+#if defined(DEBUG)
+ {
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs, i;
+ char str[80];
+
+ /* get info about basic layout of the closure */
+ info = get_closure_info(GALAlookup(ga), &size, &ptrs, &nonptrs, &vhs, str);
+
+ /* now ga indirectly refers to the new closure */
+ ASSERT(size<_HS+MIN_UPD_SIZE ||
+ UNWIND_IND(GALAlookup(ga))==closure);
+ }
+#endif
+ } else {
+ /*
+ * Either we already had something worthwhile by this name or
+ * the new thing is just another FetchMe. However, the thing we
+ * just unpacked has to be left as-is, or the child unpacking
+ * code will fail. Remember that the way pointer words are
+ * filled in depends on the info pointers of the parents being
+ * the same as when they were packed.
+ */
+ IF_PAR_DEBUG(pack,
+ belch("*<#@ Duplicate local object for GA ((%x, %d, %x)); keeping %p (%s) nuking unpacked %p (%s)",
+ ga->payload.gc.gtid, ga->payload.gc.slot, ga->weight,
+ existing, info_type(existing), closure, info_type(closure)));
+
+ /* overwrite 2nd word; indicates that the closure is garbage */
+ IF_DEBUG(sanity,
+ ((StgFetchMe*)closure)->ga = (globalAddr*)GARBAGE_MARKER;
+ IF_PAR_DEBUG(pack,
+ belch("++++ unpacked closure %p (%s) is garbage: %p",
+ closure, info_type(closure), *(closure+1))));
+
+ closure = existing;
+#if 0
+ // HACK
+ ty = get_itbl(closure)->type;
+ if (ty == CONSTR ||
+ ty == CONSTR_1_0 ||
+ ty == CONSTR_0_1 ||
+ ty == CONSTR_2_0 ||
+ ty == CONSTR_1_1 ||
+ ty == CONSTR_0_2)
+ CommonUp(closure, graph);
+#endif
+ }
+ /* We don't use this GA after all, so give back the weight */
+ (void) addWeight(ga);
+ }
+
+ /* if we have unpacked a FETCH_ME, we have a GA, too */
+ ASSERT(get_itbl(closure)->type!=FETCH_ME ||
+ looks_like_ga(((StgFetchMe*)closure)->ga));
+
+ /* Sort out the global address mapping */
+ if (ip_THUNK(ip)){
+ // || // (ip_THUNK(ip) && !ip_UNPOINTED(ip)) ||
+ //(ip_MUTABLE(ip) && ip->type != FETCH_ME)) {
+ /* Make up new GAs for single-copy closures */
+ globalAddr *newGA = makeGlobal(closure, rtsTrue);
+
+ // It's a new GA and therefore has the full weight
+ ASSERT(newGA->weight==0);
+
+ /* Create an old GA to new GA mapping */
+ *gaga++ = *ga;
+ splitWeight(gaga, newGA);
+ /* inlined splitWeight; we know that newGALA has full weight
+ newGA->weight = gaga->weight = 1L << (BITS_IN(unsigned) - 1);
+ gaga->payload = newGA->payload;
+ */
+ ASSERT(gaga->weight == 1U << (BITS_IN(unsigned) - 1));
+ gaga++;
+ }
+ return closure;
+}
+
+/*
+ Copies a segment of the buffer, starting at @bufptr@, representing a closure
+ into the heap at @graph@.
+ */
+//@cindex FillInClosure
+static nat
+FillInClosure(StgWord ***bufptrP, StgClosure *graph)
+{
+ StgInfoTable *ip;
+ StgWord **bufptr = *bufptrP;
+ nat ptrs, nonptrs, vhs, i, size;
+ char str[80];
+
+ ASSERT(LOOKS_LIKE_GHC_INFO(((StgClosure*)bufptr)->header.info));
+
+ /*
+ * Close your eyes. You don't want to see where we're looking. You
+ * can't get closure info until you've unpacked the variable header,
+ * but you don't know how big it is until you've got closure info.
+ * So...we trust that the closure in the buffer is organized the
+ * same way as they will be in the heap...at least up through the
+ * end of the variable header.
+ */
+ ip = get_closure_info((StgClosure *)bufptr, &size, &ptrs, &nonptrs, &vhs, str);
+
+ /* Make sure that nothing sans the fixed header is filled in
+ The ga field of the FETCH_ME is filled in in SetGAandCommonUp */
+ if (ip->type == FETCH_ME || ip->type == REMOTE_REF) {
+ ASSERT(size>=_HS+MIN_UPD_SIZE); // size of the FM in the heap
+ ptrs = nonptrs = vhs = 0; // i.e. only unpack FH from buffer
+ }
+ /* ToDo: check whether this is really needed */
+ if (ip->type == ARR_WORDS) {
+ UnpackArray(bufptrP, graph);
+ return arr_words_sizeW((StgArrWords *)bufptr);
+ }
+
+ if (ip->type == PAP || ip->type == AP_UPD) {
+ return UnpackPAP(bufptrP, graph); // includes size of unpackes FMs
+ }
+
+ /*
+ Remember, the generic closure layout is as follows:
+ +-------------------------------------------------+
+ | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS |
+ +-------------------------------------------------+
+ */
+ /* Fill in the fixed header */
+ for (i = 0; i < _HS; i++)
+ ((StgPtr)graph)[i] = (StgWord)*bufptr++;
+
+ /* Fill in the packed variable header */
+ for (i = 0; i < vhs; i++)
+ ((StgPtr)graph)[_HS + i] = (StgWord)*bufptr++;
+
+ /* Pointers will be filled in later */
+
+ /* Fill in the packed non-pointers */
+ for (i = 0; i < nonptrs; i++)
+ ((StgPtr)graph)[_HS + i + vhs + ptrs] = (StgWord)*bufptr++;
+
+ /* Indirections are never packed */
+ // ASSERT(INFO_PTR(graph) != (W_) Ind_info_TO_USE);
+ // return bufptr;
+ *bufptrP = bufptr;
+ ASSERT(((ip->type==FETCH_ME || ip->type==REMOTE_REF)&& sizeofW(StgFetchMe)==size) ||
+ _HS+vhs+ptrs+nonptrs == size);
+ return size;
+}
+
+/*
+ Find the next pointer field in the parent closure.
+ If the current parent has been completely unpacked already, get the
+ next closure from the global closure queue.
+*/
+//@cindex LocateNextParent
+static void
+LocateNextParent(parentP, pptrP, pptrsP, sizeP)
+StgClosure **parentP;
+nat *pptrP, *pptrsP, *sizeP;
+{
+ StgInfoTable *ip; // debugging
+ nat nonptrs, pvhs;
+ char str[80];
+
+ /* pptr as an index into the current parent; find the next pointer field
+ in the parent by increasing pptr; if that takes us off the closure
+ (i.e. *pptr + 1 > *pptrs) grab a new parent from the closure queue
+ */
+ (*pptrP)++;
+ while (*pptrP + 1 > *pptrsP) {
+ /* *parentP has been constructed (all pointer set); so check it now */
+ IF_DEBUG(sanity,
+ if ((*parentP!=(StgClosure*)NULL) && // not root
+ (*((StgPtr)(*parentP)+1)!=GARBAGE_MARKER) && // not commoned up
+ (get_itbl(*parentP)->type != FETCH_ME))
+ checkClosure(*parentP));
+
+ *parentP = DeQueueClosure();
+
+ if (*parentP == NULL)
+ break;
+ else {
+ ip = get_closure_info(*parentP, sizeP, pptrsP, &nonptrs,
+ &pvhs, str);
+ *pptrP = 0;
+ }
+ }
+ /* *parentP points to the new (or old) parent; */
+ /* *pptr, *pptrs and *size have been updated referring to the new parent */
+}
+
+/*
+ UnpackClosure is the heart of the unpacking routine. It is called for
+ every closure found in the packBuffer. Any prefix such as GA, PLC marker
+ etc has been unpacked into the *ga structure.
+ UnpackClosure does the following:
+ - check for the kind of the closure (PLC, Offset, std closure)
+ - copy the contents of the closure from the buffer into the heap
+ - update LAGA tables (in particular if we end up with 2 closures
+ having the same GA, we make one an indirection to the other)
+ - set the GAGA map in order to send back an ACK message
+
+ At the end of this function *graphP has been updated to point to the
+ next free word in the heap for unpacking the rest of the graph and
+ *bufptrP points to the next word in the pack buffer to be unpacked.
+*/
+
+static StgClosure*
+UnpackClosure (StgWord ***bufptrP, StgClosure **graphP, globalAddr *ga) {
+ StgClosure *closure;
+ nat size;
+ rtsBool hasGA = rtsFalse, unglobalised = rtsFalse;
+
+ /* Now unpack the closure body, if there is one; three cases:
+ - PLC: closure is just a pointer to a static closure
+ - Offset: closure has been unpacked already
+ - else: copy data from packet into closure
+ */
+ if (isFixed(ga)) {
+ closure = UnpackPLC(ga);
+ } else if (isOffset(ga)) {
+ closure = UnpackOffset(ga);
+ } else {
+ /* if not PLC or Offset it must be a GA and then the closure */
+ ASSERT(RtsFlags.ParFlags.globalising!=0 || LOOKS_LIKE_GA(ga));
+ /* check whether this is an unglobalised closure */
+ unglobalised = isUnglobalised(ga);
+ /* Now we have to build something. */
+ hasGA = !isConstr(ga);
+ /* the new closure will be built here */
+ closure = *graphP;
+
+ /* fill in the closure from the buffer */
+ size = FillInClosure(/*in/out*/bufptrP, /*in*/closure);
+ /* if it is unglobalised, it may not be a thunk!! */
+ ASSERT(!unglobalised || !closure_THUNK(closure));
+
+ /* Add to queue for processing */
+ QueueClosure(closure);
+
+ /* common up with other graph if necessary */
+ if (!unglobalised)
+ closure = SetGAandCommonUp(ga, closure, hasGA);
+
+ /* if we unpacked a THUNK, check that it is large enough to update */
+ ASSERT(!closure_THUNK(closure) || size>=_HS+MIN_UPD_SIZE);
+ /* graph shall point to next free word in the heap */
+ *graphP += size;
+ //*graphP += (size < _HS+MIN_UPD_SIZE) ? _HS+MIN_UPD_SIZE : size; // see ASSERT
+ }
+ return closure;
+}
+
+/*
+ @UnpackGraph@ unpacks the graph contained in a message buffer. It
+ returns a pointer to the new graph. The @gamap@ parameter is set to
+ point to an array of (oldGA,newGA) pairs which were created as a result
+ of unpacking the buffer; @nGAs@ is set to the number of GA pairs which
+ were created.
+
+ The format of graph in the pack buffer is as defined in @Pack.lc@. */
+
+//@cindex UnpackGraph
+StgClosure *
+UnpackGraph(packBuffer, gamap, nGAs)
+rtsPackBuffer *packBuffer;
+globalAddr **gamap;
+nat *nGAs;
+{
+ StgWord **bufptr, **slotptr;
+ globalAddr gaS;
+ StgClosure *closure, *graphroot, *graph, *parent;
+ nat size, heapsize, bufsize,
+ pptr = 0, pptrs = 0, pvhs = 0;
+ nat unpacked_closures = 0, unpacked_thunks = 0; // stats only
+
+ IF_PAR_DEBUG(resume,
+ graphFingerPrint[0] = '\0');
+
+ ASSERT(_HS==1); // HWL HACK; compile time constant
+
+#if defined(PAR_TICKY) // HWL HAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACK
+ PAR_TICKY_UNPACK_GRAPH_START();
+#endif
+
+ /* Initialisation */
+ InitPacking(rtsTrue); // same as in PackNearbyGraph
+ globalUnpackBuffer = packBuffer;
+
+ IF_DEBUG(sanity, // do a sanity check on the incoming packet
+ checkPacket(packBuffer));
+
+ ASSERT(gaga==PendingGABuffer);
+ graphroot = (StgClosure *)NULL;
+
+ /* Unpack the header */
+ bufsize = packBuffer->size;
+ heapsize = packBuffer->unpacked_size;
+ bufptr = packBuffer->buffer;
+
+ /* allocate heap */
+ if (heapsize > 0) {
+ graph = (StgClosure *)allocate(heapsize);
+ ASSERT(graph != NULL);
+ // parallel global statistics: increase amount of global data
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.tot_global += heapsize;
+ }
+ }
+
+ /* iterate over the buffer contents and unpack all closures */
+ parent = (StgClosure *)NULL;
+ do {
+ /* check that we aren't at the end of the buffer, yet */
+ IF_DEBUG(sanity, ASSERT(*bufptr != END_OF_BUFFER_MARKER));
+
+ /* This is where we will ultimately save the closure's address */
+ slotptr = bufptr;
+
+ /* fill in gaS from buffer; gaS may receive GA, PLC- or offset-marker */
+ bufptr = UnpackGA(/*in*/bufptr, /*out*/&gaS);
+
+ /* this allocates heap space, updates LAGA tables etc */
+ closure = UnpackClosure (/*in/out*/&bufptr, /*in/out*/&graph, /*in*/&gaS);
+ unpacked_closures++; // stats only; doesn't count FMs in PAP!!!
+ unpacked_thunks += (closure_THUNK(closure)) ? 1 : 0; // stats only
+
+ /*
+ * Set parent pointer to point to chosen closure. If we're at the top of
+ * the graph (our parent is NULL), then we want to arrange to return the
+ * chosen closure to our caller (possibly in place of the allocated graph
+ * root.)
+ */
+ if (parent == NULL)
+ graphroot = closure;
+ else
+ ((StgPtr)parent)[_HS + pvhs + pptr] = (StgWord) closure;
+
+ /* Save closure pointer for resolving offsets */
+ *slotptr = (StgWord*) closure;
+
+ /* Locate next parent pointer */
+ LocateNextParent(&parent, &pptr, &pptrs, &size);
+
+ IF_DEBUG(sanity,
+ gaS.weight = 0xdeadffff;
+ gaS.payload.gc.gtid = 0xdead;
+ gaS.payload.gc.slot = 0xdeadbeef;);
+ } while (parent != NULL);
+
+ IF_PAR_DEBUG(resume,
+ GraphFingerPrint(graphroot, graphFingerPrint);
+ ASSERT(strlen(graphFingerPrint)<=MAX_FINGER_PRINT_LEN);
+ belch(">>> Fingerprint of graph rooted at %p (after unpacking <<%d>>:\n {%s}",
+ graphroot, packBuffer->id, graphFingerPrint));
+
+ /* we unpacked exactly as many words as there are in the buffer */
+ ASSERT(bufsize == (nat) (bufptr-(packBuffer->buffer)));
+ /* we filled no more heap closure than we allocated at the beginning;
+ ideally this should be a ==;
+ NB: test is only valid if we unpacked anything at all (graphroot might
+ end up to be a PLC!), therfore the strange test for HEAP_ALLOCED
+ */
+
+ /*
+ {
+ StgInfoTable *info = get_itbl(graphroot);
+ ASSERT(!HEAP_ALLOCED(graphroot) || heapsize >= (nat) (graph-graphroot) ||
+ // ToDo: check whether CAFs are really a special case here!!
+ info->type==CAF_BLACKHOLE || info->type==FETCH_ME || info->type==FETCH_ME_BQ);
+ }
+ */
+
+ /* check for magic end-of-buffer word */
+ IF_DEBUG(sanity, ASSERT(*bufptr == END_OF_BUFFER_MARKER));
+
+ *gamap = PendingGABuffer;
+ *nGAs = (gaga - PendingGABuffer) / 2;
+
+ IF_PAR_DEBUG(tables,
+ belch("** LAGA table after unpacking closure %p:",
+ graphroot);
+ printLAGAtable());
+
+ /* ToDo: are we *certain* graphroot has been set??? WDP 95/07 */
+ ASSERT(graphroot!=NULL);
+
+ IF_DEBUG(sanity,
+ {
+ StgPtr p;
+
+ /* check the unpacked graph */
+ //checkHeapChunk(graphroot,graph-sizeof(StgWord));
+
+ // if we do sanity checks, then wipe the pack buffer after unpacking
+ for (p=(StgPtr)packBuffer->buffer; p<(StgPtr)(packBuffer->buffer)+(packBuffer->size); )
+ *p++ = 0xdeadbeef;
+ });
+
+ /* reset the global variable */
+ globalUnpackBuffer = (rtsPackBuffer*)NULL;
+
+#if defined(PAR_TICKY) // HWL HAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACK
+ PAR_TICKY_UNPACK_GRAPH_END(unpacked_closures, unpacked_thunks);
+#endif
+
+ return (graphroot);
+}
+
+//@cindex UnpackGA
+static StgWord **
+UnpackGA(StgWord **bufptr, globalAddr *ga)
+{
+ /* First, unpack the next GA or PLC */
+ ga->weight = (rtsWeight) *bufptr++;
+
+ if (ga->weight == 2) { // unglobalised closure to follow
+ // nothing to do; closure starts at *bufptr
+ } else if (ga->weight > 0) { // fill in GA
+ ga->payload.gc.gtid = (GlobalTaskId) *bufptr++;
+ ga->payload.gc.slot = (int) *bufptr++;
+ } else {
+ ga->payload.plc = (StgPtr) *bufptr++;
+ }
+ return bufptr;
+}
+
+//@cindex UnpackPLC
+static StgClosure *
+UnpackPLC(globalAddr *ga)
+{
+ /* No more to unpack; just set closure to local address */
+ IF_PAR_DEBUG(pack,
+ belch("*<^^ Unpacked PLC at %x", ga->payload.plc));
+ return (StgClosure*)ga->payload.plc;
+}
+
+//@cindex UnpackOffset
+static StgClosure *
+UnpackOffset(globalAddr *ga)
+{
+ /* globalUnpackBuffer is a global var init in UnpackGraph */
+ ASSERT(globalUnpackBuffer!=(rtsPackBuffer*)NULL);
+ /* No more to unpack; just set closure to cached address */
+ IF_PAR_DEBUG(pack,
+ belch("*<__ Unpacked indirection to %p (was OFFSET %d)",
+ (StgClosure *)((globalUnpackBuffer->buffer)[ga->payload.gc.slot]),
+ ga->payload.gc.slot));
+ return (StgClosure *)(globalUnpackBuffer->buffer)[ga->payload.gc.slot];
+}
+
+/*
+ Input: *bufptrP, *graphP ... ptrs to the pack buffer and into the heap.
+
+ *bufptrP points to something that should be unpacked as a FETCH_ME:
+ |
+ v
+ +-------------------------------
+ | GA | FH of FM
+ +-------------------------------
+
+ The first 3 words starting at *bufptrP are the GA address; the next
+ word is the generic FM info ptr followed by the remaining FH (if any)
+ The result after unpacking will be a FETCH_ME closure, pointed to by
+ *graphP at the start of the fct;
+ |
+ v
+ +------------------------+
+ | FH of FM | ptr to a GA |
+ +------------------------+
+
+ The ptr field points into the RemoteGA table, which holds the actual GA.
+ *bufptrP has been updated to point to the next word in the buffer.
+ *graphP has been updated to point to the first free word at the end.
+*/
+
+static StgClosure*
+UnpackFetchMe (StgWord ***bufptrP, StgClosure **graphP) {
+ StgClosure *closure, *foo;
+ globalAddr gaS;
+
+ /* This fct relies on size of FM < size of FM in pack buffer */
+ ASSERT(sizeofW(StgFetchMe)<=PACK_FETCHME_SIZE);
+
+ /* fill in gaS from buffer */
+ *bufptrP = UnpackGA(*bufptrP, &gaS);
+ /* might be an offset to a closure in the pack buffer */
+ if (isOffset(&gaS)) {
+ belch("*< UnpackFetchMe: found OFFSET to %d when unpacking FM at buffer loc %p",
+ gaS.payload.gc.slot, *bufptrP);
+
+ closure = UnpackOffset(&gaS);
+ /* return address of previously unpacked closure; leaves *graphP unchanged */
+ return closure;
+ }
+
+ /* we have a proper GA at hand */
+ ASSERT(LOOKS_LIKE_GA(&gaS));
+
+ IF_DEBUG(sanity,
+ if (isFixed(&gaS))
+ barf("*< UnpackFetchMe: found PLC where FM was expected %p (%s)",
+ *bufptrP, info_type((StgClosure*)*bufptrP)));
+
+ IF_PAR_DEBUG(pack,
+ belch("*<_- Unpacked @ %p a FETCH_ME to GA ",
+ *graphP);
+ printGA(&gaS);
+ fputc('\n', stderr));
+
+ /* the next thing must be the IP to a FETCH_ME closure */
+ ASSERT(get_itbl((StgClosure *)*bufptrP)->type == FETCH_ME);
+
+ closure = *graphP;
+ /* fill in the closure from the buffer */
+ FillInClosure(bufptrP, closure);
+
+ /* the newly built closure is a FETCH_ME */
+ ASSERT(get_itbl(closure)->type == FETCH_ME);
+
+ /* common up with other graph if necessary
+ this also assigns the contents of gaS to the ga field of the FM closure */
+ foo = SetGAandCommonUp(&gaS, closure, rtsTrue);
+
+ ASSERT(foo!=closure || LOOKS_LIKE_GA(((StgFetchMe*)closure)->ga));
+
+ IF_PAR_DEBUG(pack,
+ if (foo==closure) { // only if not commoned up
+ belch("*<_- current FM @ %p next FM @ %p; unpacked FM @ %p is ",
+ *graphP, *graphP+sizeofW(StgFetchMe), closure);
+ printClosure(closure);
+ });
+ *graphP += sizeofW(StgFetchMe);
+ return foo;
+}
+
+/*
+ Unpack an array of words.
+ Could use generic unpack most of the time, but cleaner to separate it.
+ ToDo: implement packing of MUT_ARRAYs
+*/
+
+//@cindex UnackArray
+static void
+UnpackArray(StgWord ***bufptrP, StgClosure *graph)
+{
+ StgInfoTable *info;
+ StgWord **bufptr=*bufptrP;
+ nat size, ptrs, nonptrs, vhs, i, n;
+ char str[80];
+
+ /* yes, I know I am paranoid; but who's asking !? */
+ IF_DEBUG(sanity,
+ info = get_closure_info((StgClosure*)bufptr,
+ &size, &ptrs, &nonptrs, &vhs, str);
+ ASSERT(info->type == ARR_WORDS || info->type == MUT_ARR_PTRS ||
+ info->type == MUT_ARR_PTRS_FROZEN || info->type == MUT_VAR));
+
+ n = ((StgArrWords *)bufptr)->words;
+ // this includes the header!: arr_words_sizeW(stgCast(StgArrWords*,q));
+
+ IF_PAR_DEBUG(pack,
+ if (n<100)
+ belch("*<== unpacking an array of %d words %p (%s) (size=%d) |%s|\n",
+ n, (StgClosure*)bufptr, info_type((StgClosure*)bufptr),
+ arr_words_sizeW((StgArrWords *)bufptr),
+ /* print array (string?) */
+ ((StgArrWords *)graph)->payload);
+ else
+ belch("*<== unpacking an array of %d words %p (%s) (size=%d)\n",
+ n, (StgClosure*)bufptr, info_type((StgClosure*)bufptr),
+ arr_words_sizeW((StgArrWords *)bufptr)));
+
+ /* Unpack the header (2 words: info ptr and the number of words to follow) */
+ ((StgArrWords *)graph)->header.info = (StgInfoTable*)*bufptr++; // assumes _HS==1; yuck!
+ ((StgArrWords *)graph)->words = (StgWord)*bufptr++;
+
+ /* unpack the payload of the closure (all non-ptrs) */
+ for (i=0; i<n; i++)
+ ((StgArrWords *)graph)->payload[i] = (StgWord)*bufptr++;
+
+ ASSERT(bufptr==*bufptrP+arr_words_sizeW((StgArrWords *)*bufptrP));
+ *bufptrP = bufptr;
+}
+
+/*
+ Unpack a PAP in the buffer into a heap closure.
+ For each FETCHME we find in the packed PAP we have to unpack a separate
+ FETCHME closure and insert a pointer to this closure into the PAP.
+ We unpack all FETCHMEs into an area after the PAP proper (the `FM area').
+ Note that the size of a FETCHME in the buffer is exactly the same as
+ the size of an unpacked FETCHME plus 1 word for the pointer to it.
+ Therefore, we just allocate packed_size words in the heap for the unpacking.
+ After this routine the heap starting from *graph looks like this:
+
+ graph
+ |
+ v PAP closure | FM area |
+ +------------------------------------------------------------+
+ | PAP header | n_args | fun | payload ... | FM_1 | FM_2 .... |
+ +------------------------------------------------------------+
+
+ where payload contains pointers to each of the unpacked FM_1, FM_2 ...
+ The size of the PAP closure plus all FMs is _HS+2+packed_size.
+*/
+
+//@cindex UnpackPAP
+static nat
+UnpackPAP(StgWord ***bufptrP, StgClosure *graph)
+{
+ nat n, i, j, packed_size = 0;
+ StgPtr p, q, end, payload_start, p_FMs;
+ const StgInfoTable* info;
+ StgWord bitmap;
+ StgWord **bufptr = *bufptrP;
+#if defined(DEBUG)
+ nat FMs_in_PAP=0;
+ void checkPAPSanity(StgPAP *graph, StgPtr p_FM_begin, StgPtr p_FM_end);
+#endif
+
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP: unpacking PAP @ %p with %d words to closure %p",
+ *bufptr, *(bufptr+1), graph));
+
+ /* Unpack the PAP header (both fixed and variable) */
+ ((StgPAP *)graph)->header.info = (StgInfoTable*)*bufptr++;
+ n = ((StgPAP *)graph)->n_args = (StgWord)*bufptr++;
+ ((StgPAP *)graph)->fun = (StgClosure*)*bufptr++;
+ packed_size = (nat)*bufptr++;
+
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP: PAP header is [%p, %d, %p] %d",
+ ((StgPAP *)graph)->header.info,
+ ((StgPAP *)graph)->n_args,
+ ((StgPAP *)graph)->fun,
+ packed_size));
+
+ payload_start = (StgPtr)bufptr;
+ /* p points to the current word in the heap */
+ p = (StgPtr)((StgPAP *)graph)->payload; // payload of PAP will be unpacked here
+ p_FMs = (StgPtr)graph+pap_sizeW((StgPAP*)graph); // FMs will be unpacked here
+ end = (StgPtr) payload_start+packed_size;
+ /*
+ The main loop unpacks the PAP in *bufptr into *p, with *p_FMS as the
+ FM area for unpacking all FETCHMEs encountered during unpacking.
+ */
+ while ((StgPtr)bufptr<end) {
+ /* be sure that we don't write more than we allocated for this closure */
+ ASSERT(p_FMs <= (StgPtr)(graph+_HS+2+packed_size));
+ /* be sure that the unpacked PAP doesn't run into the FM area */
+ ASSERT(p < (StgPtr)(graph+pap_sizeW((StgPAP*)graph)));
+ /* the loop body has been borrowed from scavenge_stack */
+ q = *bufptr; // let q be the contents of the current pointer into the buffer
+
+ /* Test whether the next thing is a FETCH_ME.
+ In PAPs FETCH_ME are encoded via a starting marker of ARGTAG_MAX+1
+ */
+ if (q==(StgPtr)(ARGTAG_MAX+1)) {
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: unpacking FM; filling in ptr to FM area: %p",
+ p, p_FMs));
+ bufptr++; // skip ARGTAG_MAX+1 marker
+ // Unpack a FM into the FM area after the PAP proper and insert pointer
+ *p++ = (StgWord)UnpackFetchMe(&bufptr, (StgClosure**)&p_FMs);
+ IF_DEBUG(sanity, FMs_in_PAP++);
+ continue;
+ }
+
+ /* Test whether it is a PLC */
+ if (q==(StgPtr)0) { // same as isFixed(q)
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: unpacking PLC to %p",
+ p, *(bufptr+1)));
+ bufptr++; // skip 0 marker
+ *p++ = (StgWord)*bufptr++;
+ continue;
+ }
+
+ /* If we've got a tag, pack all words in that block */
+ if (IS_ARG_TAG((W_)q)) { // q stands for the no. of non-ptrs to follow
+ nat m = ARG_SIZE(q); // first word after this block
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: unpacking %d words (tagged), starting @ %p",
+ p, m, p));
+ for (i=0; i<m+1; i++)
+ *p++ = (StgWord)*bufptr++;
+ continue;
+ }
+
+ /*
+ * Otherwise, q must be the info pointer of an activation
+ * record. All activation records have 'bitmap' style layout
+ * info.
+ */
+ info = get_itbl((StgClosure *)q);
+ switch (info->type) {
+
+ /* Dynamic bitmap: the mask is stored on the stack */
+ case RET_DYN:
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: RET_DYN",
+ p));
+
+ /* Pack the header as is */
+ ((StgRetDyn *)p)->info = (StgWord)*bufptr++;
+ ((StgRetDyn *)p)->liveness = (StgWord)*bufptr++;
+ ((StgRetDyn *)p)->ret_addr = (StgWord)*bufptr++;
+ p += 3;
+
+ //bitmap = ((StgRetDyn *)p)->liveness;
+ //p = (P_)&((StgRetDyn *)p)->payload[0];
+ goto small_bitmap;
+
+ /* probably a slow-entry point return address: */
+ case FUN:
+ case FUN_STATIC:
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: FUN or FUN_STATIC",
+ p));
+
+ ((StgClosure *)p)->header.info = (StgInfoTable*)*bufptr;
+ p++;
+
+ goto follow_srt; //??
+ }
+
+ /* Using generic code here; could inline as in scavenge_stack */
+ case UPDATE_FRAME:
+ {
+ StgUpdateFrame *frame = (StgUpdateFrame *)p;
+ //nat type = get_itbl(frame->updatee)->type;
+
+ //ASSERT(type==BLACKHOLE || type==CAF_BLACKHOLE || type==BLACKHOLE_BQ);
+
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnackPAP @ %p: UPDATE_FRAME",
+ p));
+
+ ((StgUpdateFrame *)p)->header.info = (StgInfoTable*)*bufptr++;
+ ((StgUpdateFrame *)p)->link = (StgUpdateFrame*)*bufptr++; // ToDo: fix intra-stack pointer
+ ((StgUpdateFrame *)p)->updatee = (StgClosure*)*bufptr++; // ToDo: follow link
+
+ p += 3;
+ }
+
+ /* small bitmap (< 32 entries, or 64 on a 64-bit machine) */
+ case STOP_FRAME:
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: STOP_FRAME",
+ p));
+ ((StgStopFrame *)p)->header.info = (StgInfoTable*)*bufptr;
+ p++;
+ }
+
+ case CATCH_FRAME:
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: CATCH_FRAME",
+ p));
+
+ ((StgCatchFrame *)p)->header.info = (StgInfoTable*)*bufptr++;
+ ((StgCatchFrame *)p)->link = (StgUpdateFrame*)*bufptr++;
+ ((StgCatchFrame *)p)->exceptions_blocked = (StgInt)*bufptr++;
+ ((StgCatchFrame *)p)->handler = (StgClosure*)*bufptr++;
+ p += 4;
+ }
+
+ case SEQ_FRAME:
+ {
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: UPDATE_FRAME",
+ p));
+
+ ((StgSeqFrame *)p)->header.info = (StgInfoTable*)*bufptr++;
+ ((StgSeqFrame *)p)->link = (StgUpdateFrame*)*bufptr++;
+
+ // ToDo: handle bitmap
+ bitmap = info->layout.bitmap;
+
+ p = (StgPtr)&(((StgClosure *)p)->payload);
+ goto small_bitmap;
+ }
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: RET_{BCO,SMALL,VEC_SMALL}",
+ p));
+
+
+ ((StgClosure *)p)->header.info = (StgInfoTable*)*bufptr++;
+ p++;
+ // ToDo: handle bitmap
+ bitmap = info->layout.bitmap;
+ /* this assumes that the payload starts immediately after the info-ptr */
+
+ small_bitmap:
+ while (bitmap != 0) {
+ if ((bitmap & 1) == 0) {
+ *p++ = (StgWord)UnpackFetchMe(&bufptr, (StgClosure**)&p_FMs);
+ IF_DEBUG(sanity, FMs_in_PAP++);
+ } else {
+ *p++ = (StgWord)*bufptr++;
+ }
+ bitmap = bitmap >> 1;
+ }
+
+ follow_srt:
+ belch("*<-- UnpackPAP: nothing to do for follow_srt");
+ continue;
+
+ /* large bitmap (> 32 entries) */
+ case RET_BIG:
+ case RET_VEC_BIG:
+ {
+ StgPtr q;
+ StgLargeBitmap *large_bitmap;
+
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP @ %p: RET_{BIG,VEC_BIG} (large_bitmap=%p)",
+ p, info->layout.large_bitmap));
+
+
+ ((StgClosure *)p)->header.info = (StgInfoTable*)*bufptr++;
+ p++;
+
+ large_bitmap = info->layout.large_bitmap;
+
+ for (j=0; j<large_bitmap->size; j++) {
+ bitmap = large_bitmap->bitmap[j];
+ q = p + BITS_IN(W_);
+ while (bitmap != 0) {
+ if ((bitmap & 1) == 0) {
+ *p++ = (StgWord)UnpackFetchMe(&bufptr, (StgClosure**)&p_FMs);
+ IF_DEBUG(sanity, FMs_in_PAP++);
+ } else {
+ *p++ = (StgWord)*bufptr;
+ }
+ bitmap = bitmap >> 1;
+ }
+ if (j+1 < large_bitmap->size) {
+ while (p < q) {
+ *p++ = (StgWord)UnpackFetchMe(&bufptr, (StgClosure**)&p_FMs);
+ IF_DEBUG(sanity, FMs_in_PAP++);
+ }
+ }
+ }
+
+ /* and don't forget to follow the SRT */
+ goto follow_srt;
+ }
+
+ default:
+ barf("UnpackPAP: weird activation record found on stack: %d",
+ (int)(info->type));
+ }
+ }
+ IF_PAR_DEBUG(pack,
+ belch("*<** UnpackPAP finished; unpacked closure @ %p is:",
+ (StgClosure *)graph);
+ printClosure((StgClosure *)graph));
+
+ IF_DEBUG(sanity, /* check sanity of unpacked PAP */
+ checkClosure(graph));
+
+ *bufptrP = bufptr;
+ /*
+ Now p points to the first word after the PAP proper and p_FMs points
+ to the next free word in the heap; everything between p and p_FMs are
+ FETCHMEs
+ */
+ IF_DEBUG(sanity,
+ checkPAPSanity(graph, p, p_FMs));
+
+ /* we have to return the size of PAP + FMs as size of the unpacked thing */
+ ASSERT(graph+pap_sizeW((StgPAP*)graph)==p);
+ return (nat)((StgClosure*)p_FMs-graph);
+}
+
+#if defined(DEBUG)
+/*
+ Check sanity of a PAP after unpacking the PAP.
+ This means that there is slice of heap after the PAP containing FETCHMEs
+*/
+void
+checkPAPSanity(StgPAP *graph, StgPtr p_FM_begin, StgPtr p_FM_end)
+{
+ StgPtr xx;
+
+ /* check that the main unpacked closure is a PAP */
+ ASSERT(graph->header.info = &stg_PAP_info);
+ checkClosure(graph);
+ /* check that all of the closures in the FM-area are FETCHMEs */
+ for (xx=p_FM_begin; xx<p_FM_end; xx += sizeofW(StgFetchMe)) {
+ /* must be a FETCHME closure */
+ ASSERT(((StgClosure*)xx)->header.info == &stg_FETCH_ME_info);
+ /* it might have been commoned up (=> marked as garbage);
+ otherwise it points to a GA */
+ ASSERT((((StgFetchMe*)xx)->ga)==GARBAGE_MARKER ||
+ LOOKS_LIKE_GA(((StgFetchMe*)xx)->ga));
+ }
+ /* traverse the payload of the PAP */
+ for (xx=graph->payload; xx-(StgPtr)(graph->payload)<graph->n_args; xx++) {
+ /* if the current elem is a pointer into the FM area, check that
+ the GA field is ok */
+ ASSERT(!(p_FM_begin<(StgPtr)*xx && (StgPtr)*xx<p_FM_end) ||
+ LOOKS_LIKE_GA(((StgFetchMe*)*xx)->ga));
+ }
+}
+#endif /* DEBUG */
+#endif /* PAR */
+
+//@node GranSim Code, , GUM code, Unpacking routines
+//@subsubsection GranSim Code
+
+/*
+ For GrAnSim: No actual unpacking should be necessary. We just
+ have to walk over the graph and set the bitmasks appropriately.
+ Since we use RBHs similarly to GUM but without an ACK message/event
+ we have to revert the RBH from within the UnpackGraph routine (good luck!)
+ -- HWL
+*/
+
+#if defined(GRAN)
+void
+CommonUp(StgClosure *src, StgClosure *dst)
+{
+ barf("CommonUp: should never be entered in a GranSim setup");
+}
+
+StgClosure*
+UnpackGraph(buffer)
+rtsPackBuffer* buffer;
+{
+ nat size, ptrs, nonptrs, vhs,
+ bufptr = 0;
+ StgClosure *closure, *graphroot, *graph;
+ StgInfoTable *ip;
+ StgWord bufsize, unpackedsize,
+ pptr = 0, pptrs = 0, pvhs;
+ StgTSO* tso;
+ char str[240], str1[80];
+ int i;
+
+ bufptr = 0;
+ graphroot = buffer->buffer[0];
+
+ tso = buffer->tso;
+
+ /* Unpack the header */
+ unpackedsize = buffer->unpacked_size;
+ bufsize = buffer->size;
+
+ IF_GRAN_DEBUG(pack,
+ belch("<<< Unpacking <<%d>> (buffer @ %p):\n (root @ %p, PE %d,size=%d), demanded by TSO %d (%p)[PE %d]",
+ buffer->id, buffer, graphroot, where_is(graphroot),
+ bufsize, tso->id, tso,
+ where_is((StgClosure *)tso)));
+
+ do {
+ closure = buffer->buffer[bufptr++]; /* that's all we need for GrAnSim -- HWL */
+
+ /* Actually only ip is needed; rest is useful for TESTING -- HWL */
+ ip = get_closure_info(closure,
+ &size, &ptrs, &nonptrs, &vhs, str);
+
+ IF_GRAN_DEBUG(pack,
+ sprintf(str, "** (%p): Changing bitmask[%s]: 0x%x ",
+ closure, (closure_HNF(closure) ? "NF" : "__"),
+ PROCS(closure)));
+
+ if (get_itbl(closure)->type == RBH) {
+ /* if it's an RBH, we have to revert it into a normal closure, thereby
+ awakening the blocking queue; not that this is code currently not
+ needed in GUM, but it should be added with the new features in
+ GdH (and the implementation of an NACK message)
+ */
+ // closure->header.gran.procs = PE_NUMBER(CurrentProc);
+ SET_GRAN_HDR(closure, PE_NUMBER(CurrentProc)); /* Move node */
+
+ IF_GRAN_DEBUG(pack,
+ strcat(str, " (converting RBH) "));
+
+ convertFromRBH(closure); /* In GUM that's done by convertToFetchMe */
+
+ IF_GRAN_DEBUG(pack,
+ belch(":: closure %p (%s) is a RBH; after reverting: IP=%p",
+ closure, info_type(closure), get_itbl(closure)));
+ } else if (IS_BLACK_HOLE(closure)) {
+ IF_GRAN_DEBUG(pack,
+ belch(":: closure %p (%s) is a BH; copying node to %d",
+ closure, info_type(closure), CurrentProc));
+ closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */
+ } else if ( (closure->header.gran.procs & PE_NUMBER(CurrentProc)) == 0 ) {
+ if (closure_HNF(closure)) {
+ IF_GRAN_DEBUG(pack,
+ belch(":: closure %p (%s) is a HNF; copying node to %d",
+ closure, info_type(closure), CurrentProc));
+ closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */
+ } else {
+ IF_GRAN_DEBUG(pack,
+ belch(":: closure %p (%s) is no (R)BH or HNF; moving node to %d",
+ closure, info_type(closure), CurrentProc));
+ closure->header.gran.procs = PE_NUMBER(CurrentProc); /* Move node */
+ }
+ }
+
+ IF_GRAN_DEBUG(pack,
+ sprintf(str1, "0x%x", PROCS(closure)); strcat(str, str1));
+ IF_GRAN_DEBUG(pack, belch(str));
+
+ } while (bufptr<buffer->size) ; /* (parent != NULL); */
+
+ /* In GrAnSim we allocate pack buffers dynamically! -- HWL */
+ free(buffer->buffer);
+ free(buffer);
+
+ IF_GRAN_DEBUG(pack,
+ belch("PrintGraph of %p is:", graphroot); PrintGraph(graphroot,0));
+
+ return (graphroot);
+}
+#endif /* GRAN */
+
+//@node Aux fcts for packing, Printing Packet Contents, Unpacking routines, Graph packing
+//@subsection Aux fcts for packing
+
+//@menu
+//* Offset table::
+//* Packet size::
+//* Types of Global Addresses::
+//* Closure Info::
+//@end menu
+
+//@node Offset table, Packet size, Aux fcts for packing, Aux fcts for packing
+//@subsubsection Offset table
+
+/*
+ DonePacking is called when we've finished packing. It releases memory
+ etc. */
+
+//@cindex DonePacking
+
+# if defined(PAR)
+
+static void
+DonePacking(void)
+{
+ freeHashTable(offsetTable, NULL);
+ offsetTable = NULL;
+}
+
+/*
+ AmPacking records that the closure is being packed. Note the abuse of
+ the data field in the hash table -- this saves calling @malloc@! */
+
+//@cindex AmPacking
+
+static void
+AmPacking(closure)
+StgClosure *closure;
+{
+ insertHashTable(offsetTable, (StgWord) closure, (void *) (StgWord) pack_locn);
+}
+
+/*
+ OffsetFor returns an offset for a closure which is already being packed. */
+
+//@cindex OffsetFor
+
+static int
+OffsetFor(closure)
+StgClosure *closure;
+{
+ return (int) (StgWord) lookupHashTable(offsetTable, (StgWord) closure);
+}
+
+/*
+ NotYetPacking determines whether the closure's already being packed.
+ Offsets $<$ @PACK_HDR_SIZE@ (e.g. 0) mean no. */
+
+//@cindex NotYetPacking
+
+static rtsBool
+NotYetPacking(offset)
+int offset;
+{
+ return(offset == 0); // ToDo: what if root is found again?? FIX
+}
+
+# else /* GRAN */
+
+static void
+DonePacking(void)
+{
+ /* nothing */
+}
+
+/*
+ NotYetPacking searches through the whole pack buffer for closure. */
+
+static rtsBool
+NotYetPacking(closure)
+StgClosure *closure;
+{ nat i;
+ rtsBool found = rtsFalse;
+
+ for (i=0; (i<pack_locn) && !found; i++)
+ found = globalPackBuffer->buffer[i]==closure;
+
+ return (!found);
+}
+# endif
+
+//@node Packet size, Closure Info, Offset table, Aux fcts for packing
+//@subsubsection Packet size
+
+/*
+ The size needed if all currently queued closures are packed as FETCH_ME
+ closures. This represents the headroom we must have when packing the
+ buffer in order to maintain all links in the graphs.
+*/
+// ToDo: check and merge cases
+#if defined(PAR)
+static nat
+QueuedClosuresMinSize (nat ptrs) {
+ return ((clq_size - clq_pos) + ptrs) * PACK_FETCHME_SIZE;
+}
+#else /* GRAN */
+static nat
+QueuedClosuresMinSize (nat ptrs) {
+ return ((clq_size - clq_pos) + ptrs) * PACK_FETCHME_SIZE;
+}
+#endif
+
+/*
+ RoomToPack determines whether there's room to pack the closure into
+ the pack buffer based on
+
+ o how full the buffer is already,
+ o the closures' size and number of pointers (which must be packed as GAs),
+ o the size and number of pointers held by any primitive arrays that it
+ points to
+
+ It has a *side-effect* (naughty, naughty) in assigning roomInBuffer
+ to rtsFalse.
+*/
+
+//@cindex RoomToPack
+static rtsBool
+RoomToPack(size, ptrs)
+nat size, ptrs;
+{
+# if defined(PAR)
+ if (roomInBuffer &&
+ (pack_locn + // where we are in the buffer right now
+ size + // space needed for the current closure
+ QueuedClosuresMinSize(ptrs) // space for queued closures as FETCH_MEs
+ + 1 // headroom (DEBUGGING only)
+ >=
+ RTS_PACK_BUFFER_SIZE))
+ {
+ roomInBuffer = rtsFalse;
+ }
+# else /* GRAN */
+ if (roomInBuffer &&
+ (unpacked_size +
+ size +
+ QueuedClosuresMinSize(ptrs)
+ >=
+ RTS_PACK_BUFFER_SIZE))
+ {
+ roomInBuffer = rtsFalse;
+ }
+# endif
+ return (roomInBuffer);
+}
+
+//@node Closure Info, , Packet size, Aux fcts for packing
+//@subsubsection Closure Info
+
+/*
+ Closure Info
+
+ @get_closure_info@ determines the size, number of pointers etc. for this
+ type of closure -- see @SMInfoTables.lh@ for the legal info. types etc.
+
+[Can someone please keep this function up to date. I keep needing it
+ (or something similar) for interpretive code, and it keeps
+ bit-rotting. {\em It really belongs somewhere else too}. KH @@ 17/2/95] */
+
+#if 0
+
+// {Parallel.h}Daq ngoqvam vIroQpu'
+
+# if defined(GRAN) || defined(PAR)
+/* extracting specific info out of closure; currently only used in GRAN -- HWL */
+//@cindex get_closure_info
+StgInfoTable*
+get_closure_info(node, size, ptrs, nonptrs, vhs, info_hdr_ty)
+StgClosure* node;
+nat *size, *ptrs, *nonptrs, *vhs;
+char *info_hdr_ty;
+{
+ StgInfoTable *info;
+
+ info = get_itbl(node);
+ /* the switch shouldn't be necessary, really; just use default case */
+ switch (info->type) {
+#if 0
+ case CONSTR_1_0:
+ case THUNK_1_0:
+ case FUN_1_0:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) 1; // (info->layout.payload.ptrs);
+ *nonptrs = (nat) 0; // (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return info;
+
+ case CONSTR_0_1:
+ case THUNK_0_1:
+ case FUN_0_1:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) 0; // (info->layout.payload.ptrs);
+ *nonptrs = (nat) 1; // (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return info;
+
+ case CONSTR_2_0:
+ case THUNK_2_0:
+ case FUN_2_0:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) 2; // (info->layout.payload.ptrs);
+ *nonptrs = (nat) 0; // (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return info;
+
+ case CONSTR_1_1:
+ case THUNK_1_1:
+ case FUN_1_1:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) 1; // (info->layout.payload.ptrs);
+ *nonptrs = (nat) 1; // (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return info;
+
+ case CONSTR_0_2:
+ case THUNK_0_2:
+ case FUN_0_2:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) 0; // (info->layout.payload.ptrs);
+ *nonptrs = (nat) 2; // (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return info;
+#endif
+ case RBH:
+ {
+ StgInfoTable *rip = REVERT_INFOPTR(info); // closure to revert to
+ *size = sizeW_fromITBL(rip);
+ *ptrs = (nat) (rip->layout.payload.ptrs);
+ *nonptrs = (nat) (rip->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return rip; // NB: we return the reverted info ptr for a RBH!!!!!!
+ }
+
+ default:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) (info->layout.payload.ptrs);
+ *nonptrs = (nat) (info->layout.payload.nptrs);
+ *vhs = (nat) 0; // unknown
+ info_hdr_type(node, info_hdr_ty);
+ return info;
+ }
+}
+
+//@cindex IS_BLACK_HOLE
+rtsBool
+IS_BLACK_HOLE(StgClosure* node)
+{
+ StgInfoTable *info;
+ info = get_itbl(node);
+ return ((info->type == BLACKHOLE || info->type == RBH) ? rtsTrue : rtsFalse);
+}
+
+//@cindex IS_INDIRECTION
+StgClosure *
+IS_INDIRECTION(StgClosure* node)
+{
+ StgInfoTable *info;
+ info = get_itbl(node);
+ switch (info->type) {
+ case IND:
+ case IND_OLDGEN:
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ case IND_STATIC:
+ /* relies on indirectee being at same place for all these closure types */
+ return (((StgInd*)node) -> indirectee);
+ default:
+ return NULL;
+ }
+}
+
+/*
+rtsBool
+IS_THUNK(StgClosure* node)
+{
+ StgInfoTable *info;
+ info = get_itbl(node);
+ return ((info->type == THUNK ||
+ info->type == THUNK_STATIC ||
+ info->type == THUNK_SELECTOR) ? rtsTrue : rtsFalse);
+}
+*/
+
+# endif /* GRAN */
+#endif /* 0 */
+
+# if 0
+/* ngoq ngo' */
+
+P_
+get_closure_info(closure, size, ptrs, nonptrs, vhs, type)
+P_ closure;
+W_ *size, *ptrs, *nonptrs, *vhs;
+char *type;
+{
+ P_ ip = (P_) INFO_PTR(closure);
+
+ if (closure==NULL) {
+ fprintf(stderr, "Qagh {get_closure_info}Daq: NULL closure\n");
+ *size = *ptrs = *nonptrs = *vhs = 0;
+ strcpy(type,"ERROR in get_closure_info");
+ return;
+ } else if (closure==PrelBase_Z91Z93_closure) {
+ /* fprintf(stderr, "Qagh {get_closure_info}Daq: PrelBase_Z91Z93_closure closure\n"); */
+ *size = *ptrs = *nonptrs = *vhs = 0;
+ strcpy(type,"PrelBase_Z91Z93_closure");
+ return;
+ };
+
+ ip = (P_) INFO_PTR(closure);
+
+ switch (INFO_TYPE(ip)) {
+ case INFO_SPEC_U_TYPE:
+ case INFO_SPEC_S_TYPE:
+ case INFO_SPEC_N_TYPE:
+ *size = SPEC_CLOSURE_SIZE(closure);
+ *ptrs = SPEC_CLOSURE_NoPTRS(closure);
+ *nonptrs = SPEC_CLOSURE_NoNONPTRS(closure);
+ *vhs = 0 /*SPEC_VHS*/;
+ strcpy(type,"SPEC");
+ break;
+
+ case INFO_GEN_U_TYPE:
+ case INFO_GEN_S_TYPE:
+ case INFO_GEN_N_TYPE:
+ *size = GEN_CLOSURE_SIZE(closure);
+ *ptrs = GEN_CLOSURE_NoPTRS(closure);
+ *nonptrs = GEN_CLOSURE_NoNONPTRS(closure);
+ *vhs = GEN_VHS;
+ strcpy(type,"GEN");
+ break;
+
+ case INFO_DYN_TYPE:
+ *size = DYN_CLOSURE_SIZE(closure);
+ *ptrs = DYN_CLOSURE_NoPTRS(closure);
+ *nonptrs = DYN_CLOSURE_NoNONPTRS(closure);
+ *vhs = DYN_VHS;
+ strcpy(type,"DYN");
+ break;
+
+ case INFO_TUPLE_TYPE:
+ *size = TUPLE_CLOSURE_SIZE(closure);
+ *ptrs = TUPLE_CLOSURE_NoPTRS(closure);
+ *nonptrs = TUPLE_CLOSURE_NoNONPTRS(closure);
+ *vhs = TUPLE_VHS;
+ strcpy(type,"TUPLE");
+ break;
+
+ case INFO_DATA_TYPE:
+ *size = DATA_CLOSURE_SIZE(closure);
+ *ptrs = DATA_CLOSURE_NoPTRS(closure);
+ *nonptrs = DATA_CLOSURE_NoNONPTRS(closure);
+ *vhs = DATA_VHS;
+ strcpy(type,"DATA");
+ break;
+
+ case INFO_IMMUTUPLE_TYPE:
+ case INFO_MUTUPLE_TYPE:
+ *size = MUTUPLE_CLOSURE_SIZE(closure);
+ *ptrs = MUTUPLE_CLOSURE_NoPTRS(closure);
+ *nonptrs = MUTUPLE_CLOSURE_NoNONPTRS(closure);
+ *vhs = MUTUPLE_VHS;
+ strcpy(type,"(IM)MUTUPLE");
+ break;
+
+ case INFO_STATIC_TYPE:
+ *size = STATIC_CLOSURE_SIZE(closure);
+ *ptrs = STATIC_CLOSURE_NoPTRS(closure);
+ *nonptrs = STATIC_CLOSURE_NoNONPTRS(closure);
+ *vhs = STATIC_VHS;
+ strcpy(type,"STATIC");
+ break;
+
+ case INFO_CAF_TYPE:
+ case INFO_IND_TYPE:
+ *size = IND_CLOSURE_SIZE(closure);
+ *ptrs = IND_CLOSURE_NoPTRS(closure);
+ *nonptrs = IND_CLOSURE_NoNONPTRS(closure);
+ *vhs = IND_VHS;
+ strcpy(type,"CAF|IND");
+ break;
+
+ case INFO_CONST_TYPE:
+ *size = CONST_CLOSURE_SIZE(closure);
+ *ptrs = CONST_CLOSURE_NoPTRS(closure);
+ *nonptrs = CONST_CLOSURE_NoNONPTRS(closure);
+ *vhs = CONST_VHS;
+ strcpy(type,"CONST");
+ break;
+
+ case INFO_SPEC_RBH_TYPE:
+ *size = SPEC_RBH_CLOSURE_SIZE(closure);
+ *ptrs = SPEC_RBH_CLOSURE_NoPTRS(closure);
+ *nonptrs = SPEC_RBH_CLOSURE_NoNONPTRS(closure);
+ if (*ptrs <= 2) {
+ *nonptrs -= (2 - *ptrs);
+ *ptrs = 1;
+ } else
+ *ptrs -= 1;
+ *vhs = SPEC_RBH_VHS;
+ strcpy(type,"SPEC_RBH");
+ break;
+
+ case INFO_GEN_RBH_TYPE:
+ *size = GEN_RBH_CLOSURE_SIZE(closure);
+ *ptrs = GEN_RBH_CLOSURE_NoPTRS(closure);
+ *nonptrs = GEN_RBH_CLOSURE_NoNONPTRS(closure);
+ if (*ptrs <= 2) {
+ *nonptrs -= (2 - *ptrs);
+ *ptrs = 1;
+ } else
+ *ptrs -= 1;
+ *vhs = GEN_RBH_VHS;
+ strcpy(type,"GEN_RBH");
+ break;
+
+ case INFO_CHARLIKE_TYPE:
+ *size = CHARLIKE_CLOSURE_SIZE(closure);
+ *ptrs = CHARLIKE_CLOSURE_NoPTRS(closure);
+ *nonptrs = CHARLIKE_CLOSURE_NoNONPTRS(closure);
+ *vhs = CHARLIKE_VHS;
+ strcpy(type,"CHARLIKE");
+ break;
+
+ case INFO_INTLIKE_TYPE:
+ *size = INTLIKE_CLOSURE_SIZE(closure);
+ *ptrs = INTLIKE_CLOSURE_NoPTRS(closure);
+ *nonptrs = INTLIKE_CLOSURE_NoNONPTRS(closure);
+ *vhs = INTLIKE_VHS;
+ strcpy(type,"INTLIKE");
+ break;
+
+# if !defined(GRAN)
+ case INFO_FETCHME_TYPE:
+ *size = FETCHME_CLOSURE_SIZE(closure);
+ *ptrs = FETCHME_CLOSURE_NoPTRS(closure);
+ *nonptrs = FETCHME_CLOSURE_NoNONPTRS(closure);
+ *vhs = FETCHME_VHS;
+ strcpy(type,"FETCHME");
+ break;
+
+ case INFO_FMBQ_TYPE:
+ *size = FMBQ_CLOSURE_SIZE(closure);
+ *ptrs = FMBQ_CLOSURE_NoPTRS(closure);
+ *nonptrs = FMBQ_CLOSURE_NoNONPTRS(closure);
+ *vhs = FMBQ_VHS;
+ strcpy(type,"FMBQ");
+ break;
+# endif
+
+ case INFO_BQ_TYPE:
+ *size = BQ_CLOSURE_SIZE(closure);
+ *ptrs = BQ_CLOSURE_NoPTRS(closure);
+ *nonptrs = BQ_CLOSURE_NoNONPTRS(closure);
+ *vhs = BQ_VHS;
+ strcpy(type,"BQ");
+ break;
+
+ case INFO_BH_TYPE:
+ *size = BH_CLOSURE_SIZE(closure);
+ *ptrs = BH_CLOSURE_NoPTRS(closure);
+ *nonptrs = BH_CLOSURE_NoNONPTRS(closure);
+ *vhs = BH_VHS;
+ strcpy(type,"BH");
+ break;
+
+ case INFO_TSO_TYPE:
+ *size = 0; /* TSO_CLOSURE_SIZE(closure); */
+ *ptrs = 0; /* TSO_CLOSURE_NoPTRS(closure); */
+ *nonptrs = 0; /* TSO_CLOSURE_NoNONPTRS(closure); */
+ *vhs = TSO_VHS;
+ strcpy(type,"TSO");
+ break;
+
+ case INFO_STKO_TYPE:
+ *size = 0;
+ *ptrs = 0;
+ *nonptrs = 0;
+ *vhs = STKO_VHS;
+ strcpy(type,"STKO");
+ break;
+
+ default:
+ fprintf(stderr, "get_closure_info: Unexpected closure type (%lu), closure %lx\n",
+ INFO_TYPE(ip), (StgWord) closure);
+ EXIT(EXIT_FAILURE);
+ }
+
+ return ip;
+}
+# endif
+
+# if 0
+// Use allocate in Storage.c instead
+/*
+ @AllocateHeap@ will bump the heap pointer by @size@ words if the space
+ is available, but it will not perform garbage collection.
+ ToDo: check whether we can use an existing STG allocation routine -- HWL
+*/
+
+
+//@cindex AllocateHeap
+StgPtr
+AllocateHeap(size)
+nat size;
+{
+ StgPtr newClosure;
+
+ /* Allocate a new closure */
+ if (Hp + size > HpLim)
+ return NULL;
+
+ newClosure = Hp + 1;
+ Hp += size;
+
+ return newClosure;
+}
+# endif
+
+# if defined(PAR)
+
+//@cindex doGlobalGC
+void
+doGlobalGC(void)
+{
+ fprintf(stderr,"Splat -- we just hit global GC!\n");
+ stg_exit(EXIT_FAILURE);
+ //fishing = rtsFalse;
+ outstandingFishes--;
+}
+
+# endif /* PAR */
+
+//@node Printing Packet Contents, End of file, Aux fcts for packing, Graph packing
+//@subsection Printing Packet Contents
+/*
+ Printing Packet Contents
+ */
+
+#if defined(DEBUG) || defined(GRAN_CHECK)
+
+//@cindex PrintPacket
+
+#if defined(PAR)
+void
+PrintPacket(packBuffer)
+rtsPackBuffer *packBuffer;
+{
+ StgClosure *parent, *graphroot, *closure_start;
+ const StgInfoTable *ip;
+ globalAddr ga;
+ StgWord **bufptr, **slotptr;
+
+ nat bufsize;
+ nat pptr = 0, pptrs = 0, pvhs;
+ nat locn = 0;
+ nat i;
+ nat size, ptrs, nonptrs, vhs;
+ char str[80];
+
+ /* disable printing if a non-std globalisation scheme is used; ToDo: FIX */
+ if (RtsFlags.ParFlags.globalising != 0)
+ return;
+
+ /* NB: this whole routine is more or less a copy of UnpackGraph with all
+ unpacking components replaced by printing fcts
+ Long live higher-order fcts!
+ */
+ /* Initialisation */
+ //InitPackBuffer(); /* in case it isn't already init'd */
+ InitClosureQueue();
+ // ASSERT(gaga==PendingGABuffer);
+ graphroot = (StgClosure *)NULL;
+
+ /* Unpack the header */
+ bufsize = packBuffer->size;
+ bufptr = packBuffer->buffer;
+
+ fprintf(stderr, "*. Printing <<%d>> (buffer @ %p):\n",
+ packBuffer->id, packBuffer);
+ fprintf(stderr, "*. size: %d; unpacked_size: %d; tso: %p; buffer: %p\n",
+ packBuffer->size, packBuffer->unpacked_size,
+ packBuffer->tso, packBuffer->buffer);
+
+ parent = (StgClosure *)NULL;
+
+ do {
+ /* This is where we will ultimately save the closure's address */
+ slotptr = bufptr;
+ locn = slotptr-(packBuffer->buffer); // index of closure in buffer
+
+ /* First, unpack the next GA or PLC */
+ ga.weight = (rtsWeight) *bufptr++;
+
+ if (ga.weight == 2) { // unglobalised closure to follow
+ // nothing to do; closure starts at *bufptr
+ } else if (ga.weight > 0) { // fill in GA
+ ga.payload.gc.gtid = (GlobalTaskId) *bufptr++;
+ ga.payload.gc.slot = (int) *bufptr++;
+ } else
+ ga.payload.plc = (StgPtr) *bufptr++;
+
+ /* Now unpack the closure body, if there is one */
+ if (isFixed(&ga)) {
+ fprintf(stderr, "*. %u: PLC @ %p\n", locn, ga.payload.plc);
+ // closure = ga.payload.plc;
+ } else if (isOffset(&ga)) {
+ fprintf(stderr, "*. %u: OFFSET TO %d\n", locn, ga.payload.gc.slot);
+ // closure = (StgClosure *) buffer[ga.payload.gc.slot];
+ } else {
+ /* Print normal closures */
+
+ ASSERT(bufsize > 0);
+
+ fprintf(stderr, "*. %u: ((%x, %d, %x)) ", locn,
+ ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight);
+
+ closure_start = (StgClosure*)bufptr;
+ ip = get_closure_info((StgClosure *)bufptr,
+ &size, &ptrs, &nonptrs, &vhs, str);
+
+ /* ToDo: check whether this is really needed */
+ if (ip->type == FETCH_ME || ip->type == REMOTE_REF) {
+ size = _HS;
+ ptrs = nonptrs = vhs = 0;
+ }
+ /* ToDo: check whether this is really needed */
+ if (ip->type == ARR_WORDS) {
+ ptrs = vhs = 0;
+ nonptrs = ((StgArrWords *)bufptr)->words;
+ size = arr_words_sizeW((StgArrWords *)bufptr);
+ }
+
+ /* special code for printing a PAP in a buffer */
+ if (ip->type == PAP || ip->type == AP_UPD) {
+ vhs = 3;
+ ptrs = 0;
+ nonptrs = (nat)((StgPAP *)bufptr)->payload[0];
+ size = _HS+vhs+ptrs+nonptrs;
+ }
+
+ /*
+ Remember, the generic closure layout is as follows:
+ +-------------------------------------------------+
+ | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS |
+ +-------------------------------------------------+
+ */
+ /* Print fixed header */
+ fprintf(stderr, "FH [");
+ for (i = 0; i < _HS; i++)
+ fprintf(stderr, " %p", *bufptr++);
+
+ if (ip->type == FETCH_ME || ip->type == REMOTE_REF)
+ size = ptrs = nonptrs = vhs = 0;
+
+ // VH is always empty in the new RTS
+ ASSERT(vhs==0 ||
+ ip->type == PAP || ip->type == AP_UPD);
+ /* Print variable header */
+ fprintf(stderr, "] VH [");
+ for (i = 0; i < vhs; i++)
+ fprintf(stderr, " %p", *bufptr++);
+
+ //fprintf(stderr, "] %d PTRS [", ptrs);
+ /* Pointers will be filled in later */
+
+ fprintf(stderr, " ] (%d, %d) [", ptrs, nonptrs);
+ /* Print non-pointers */
+ for (i = 0; i < nonptrs; i++)
+ fprintf(stderr, " %p", *bufptr++);
+
+ fprintf(stderr, "] (%s)\n", str);
+
+ /* Indirections are never packed */
+ // ASSERT(INFO_PTR(graph) != (W_) Ind_info_TO_USE);
+
+ /* Add to queue for processing
+ When just printing the packet we do not have an unpacked closure
+ in hand, so we feed it the packet entry;
+ again, this assumes that at least the fixed header of the closure
+ has the same layout in the packet; also we may not overwrite entries
+ in the packet (done in Unpack), but for printing that's a bad idea
+ anyway */
+ QueueClosure((StgClosure *)closure_start);
+
+ /* No Common up needed for printing */
+
+ /* No Sort out the global address mapping for printing */
+
+ } /* normal closure case */
+
+ /* Locate next parent pointer */
+ pptr++;
+ while (pptr + 1 > pptrs) {
+ parent = DeQueueClosure();
+
+ if (parent == NULL)
+ break;
+ else {
+ (void) get_closure_info(parent, &size, &pptrs, &nonptrs,
+ &pvhs, str);
+ pptr = 0;
+ }
+ }
+ } while (parent != NULL);
+ fprintf(stderr, "*. --- End packet <<%d>> (claimed size=%d; real size=%d)---\n",
+ packBuffer->id, packBuffer->size, size);
+
+}
+
+/*
+ Doing a sanity check on a packet.
+ This does a full iteration over the packet, as in PrintPacket.
+*/
+//@cindex checkPacket
+void
+checkPacket(packBuffer)
+rtsPackBuffer *packBuffer;
+{
+ StgClosure *parent, *graphroot, *closure_start;
+ const StgInfoTable *ip;
+ globalAddr ga;
+ StgWord **bufptr, **slotptr;
+
+ nat bufsize;
+ nat pptr = 0, pptrs = 0, pvhs;
+ nat locn = 0;
+ nat size, ptrs, nonptrs, vhs;
+ char str[80];
+
+ /* NB: this whole routine is more or less a copy of UnpackGraph with all
+ unpacking components replaced by printing fcts
+ Long live higher-order fcts!
+ */
+ /* Initialisation */
+ //InitPackBuffer(); /* in case it isn't already init'd */
+ InitClosureQueue();
+ // ASSERT(gaga==PendingGABuffer);
+ graphroot = (StgClosure *)NULL;
+
+ /* Unpack the header */
+ bufsize = packBuffer->size;
+ bufptr = packBuffer->buffer;
+ parent = (StgClosure *)NULL;
+ ASSERT(bufsize > 0);
+ do {
+ /* check that we are not at the end of the buffer, yet */
+ IF_DEBUG(sanity, ASSERT(*bufptr != END_OF_BUFFER_MARKER));
+
+ /* This is where we will ultimately save the closure's address */
+ slotptr = bufptr;
+ locn = slotptr-(packBuffer->buffer); // index of closure in buffer
+ ASSERT(locn<=bufsize);
+
+ /* First, check whether we have a GA, a PLC, or an OFFSET at hand */
+ ga.weight = (rtsWeight) *bufptr++;
+
+ if (ga.weight == 2) { // unglobalised closure to follow
+ // nothing to do; closure starts at *bufptr
+ } else if (ga.weight > 0) { // fill in GA
+ ga.payload.gc.gtid = (GlobalTaskId) *bufptr++;
+ ga.payload.gc.slot = (int) *bufptr++;
+ } else
+ ga.payload.plc = (StgPtr) *bufptr++;
+
+ /* Now unpack the closure body, if there is one */
+ if (isFixed(&ga)) {
+ /* It's a PLC */
+ ASSERT(LOOKS_LIKE_STATIC(ga.payload.plc));
+ } else if (isOffset(&ga)) {
+ ASSERT(ga.payload.gc.slot<=(int)bufsize);
+ } else {
+ /* normal closure */
+ ASSERT(!RtsFlags.ParFlags.globalising==0 || LOOKS_LIKE_GA(&ga));
+
+ closure_start = (StgClosure*)bufptr;
+ ASSERT(LOOKS_LIKE_GHC_INFO((StgPtr)*bufptr));
+ ip = get_closure_info((StgClosure *)bufptr,
+ &size, &ptrs, &nonptrs, &vhs, str);
+
+ /* ToDo: check whether this is really needed */
+ if (ip->type == FETCH_ME || ip->type == REMOTE_REF) {
+ size = _HS;
+ ptrs = nonptrs = vhs = 0;
+ }
+ /* ToDo: check whether this is really needed */
+ if (ip->type == ARR_WORDS) {
+ ptrs = vhs = 0;
+ nonptrs = ((StgArrWords *)bufptr)->words+1; // payload+words
+ size = arr_words_sizeW((StgArrWords *)bufptr);
+ ASSERT(size==_HS+vhs+nonptrs);
+ }
+ /* special code for printing a PAP in a buffer */
+ if (ip->type == PAP || ip->type == AP_UPD) {
+ vhs = 3;
+ ptrs = 0;
+ nonptrs = (nat)((StgPAP *)bufptr)->payload[0];
+ size = _HS+vhs+ptrs+nonptrs;
+ }
+
+ /* no checks on contents of closure (pointers aren't packed anyway) */
+ ASSERT(_HS+vhs+nonptrs>=MIN_NONUPD_SIZE);
+ bufptr += _HS+vhs+nonptrs;
+
+ /* Add to queue for processing */
+ QueueClosure((StgClosure *)closure_start);
+
+ /* No Common up needed for checking */
+
+ /* No Sort out the global address mapping for checking */
+
+ } /* normal closure case */
+
+ /* Locate next parent pointer */
+ pptr++;
+ while (pptr + 1 > pptrs) {
+ parent = DeQueueClosure();
+
+ if (parent == NULL)
+ break;
+ else {
+ //ASSERT(LOOKS_LIKE_GHC_INFO((StgPtr)*parent));
+ (void) get_closure_info(parent, &size, &pptrs, &nonptrs,
+ &pvhs, str);
+ pptr = 0;
+ }
+ }
+ } while (parent != NULL);
+ /* we unpacked exactly as many words as there are in the buffer */
+ ASSERT(packBuffer->size == bufptr-(packBuffer->buffer));
+ /* check for magic end-of-buffer word */
+ IF_DEBUG(sanity, ASSERT(*bufptr == END_OF_BUFFER_MARKER));
+}
+#else /* GRAN */
+void
+PrintPacket(buffer)
+rtsPackBuffer *buffer;
+{
+ // extern char *info_hdr_type(P_ infoptr); /* defined in Threads.lc */
+ // extern char *display_info_type(P_ infoptr); /* defined in Threads.lc */
+
+ StgInfoTable *info;
+ nat size, ptrs, nonptrs, vhs;
+ char info_hdr_ty[80];
+ char str1[80], str2[80], junk_str[80];
+
+ /* globalAddr ga; */
+
+ nat bufsize, unpacked_size ;
+ StgClosure *parent;
+ nat pptr = 0, pptrs = 0, pvhs;
+
+ nat unpack_locn = 0;
+ nat gastart = unpack_locn;
+ nat closurestart = unpack_locn;
+
+ StgTSO *tso;
+ StgClosure *closure, *p;
+
+ nat i;
+
+ fprintf(stderr, "*** Printing <<%d>> (buffer @ %p):\n", buffer->id, buffer);
+ fprintf(stderr, " size: %d; unpacked_size: %d; tso: %d (%p); buffer: %p\n",
+ buffer->size, buffer->unpacked_size, buffer->tso, buffer->buffer);
+ fputs(" contents: ", stderr);
+ for (unpack_locn=0; unpack_locn<buffer->size; unpack_locn++) {
+ closure = buffer->buffer[unpack_locn];
+ fprintf(stderr, ", %p (%s)",
+ closure, info_type(closure));
+ }
+ fputc('\n', stderr);
+
+#if 0
+ /* traverse all elements of the graph; omitted for now, but might be usefule */
+ InitClosureQueue();
+
+ tso = buffer->tso;
+
+ /* Unpack the header */
+ unpacked_size = buffer->unpacked_size;
+ bufsize = buffer->size;
+
+ fprintf(stderr, "Packet %p, size %u (unpacked size is %u); demanded by TSO %d (%p)[PE %d]\n--- Begin ---\n",
+ buffer, bufsize, unpacked_size,
+ tso->id, tso, where_is((StgClosure*)tso));
+
+ do {
+ closurestart = unpack_locn;
+ closure = buffer->buffer[unpack_locn++];
+
+ fprintf(stderr, "[%u]: (%p) ", closurestart, closure);
+
+ info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str1);
+ strcpy(str2, str1);
+ fprintf(stderr, "(%s|%s) ", str1, str2);
+
+ if (info->type == FETCH_ME || info->type == FETCH_ME_BQ ||
+ IS_BLACK_HOLE(closure))
+ size = ptrs = nonptrs = vhs = 0;
+
+ if (closure_THUNK(closure)) {
+ if (closure_UNPOINTED(closure))
+ fputs("UNPOINTED ", stderr);
+ else
+ fputs("POINTED ", stderr);
+ }
+ if (IS_BLACK_HOLE(closure)) {
+ fputs("BLACK HOLE\n", stderr);
+ } else {
+ /* Fixed header */
+ fprintf(stderr, "FH [");
+ for (i = 0, p = (StgClosure*)&(closure->header); i < _HS; i++, p++)
+ fprintf(stderr, " %p", *p);
+
+ /* Variable header
+ if (vhs > 0) {
+ fprintf(stderr, "] VH [%p", closure->payload[_HS]);
+
+ for (i = 1; i < vhs; i++)
+ fprintf(stderr, " %p", closure->payload[_HS+i]);
+ }
+ */
+ fprintf(stderr, "] PTRS %u", ptrs);
+
+ /* Non-pointers */
+ if (nonptrs > 0) {
+ fprintf(stderr, " NPTRS [%p", closure->payload[_HS+vhs]);
+
+ for (i = 1; i < nonptrs; i++)
+ fprintf(stderr, " %p", closure->payload[_HS+vhs+i]);
+
+ putc(']', stderr);
+ }
+ putc('\n', stderr);
+ }
+ } while (unpack_locn<bufsize) ; /* (parent != NULL); */
+
+ fprintf(stderr, "--- End ---\n\n");
+#endif /* 0 */
+}
+#endif /* PAR */
+#endif /* DEBUG || GRAN_CHECK */
+
+#endif /* PAR || GRAN -- whole file */
+
+//@node End of file, , Printing Packet Contents, Graph packing
+//@subsection End of file
+
+//@index
+//* AllocateHeap:: @cindex\s-+AllocateHeap
+//* AmPacking:: @cindex\s-+AmPacking
+//* CommonUp:: @cindex\s-+CommonUp
+//* DeQueueClosure:: @cindex\s-+DeQueueClosure
+//* DeQueueClosure:: @cindex\s-+DeQueueClosure
+//* DonePacking:: @cindex\s-+DonePacking
+//* FillInClosure:: @cindex\s-+FillInClosure
+//* IS_BLACK_HOLE:: @cindex\s-+IS_BLACK_HOLE
+//* IS_INDIRECTION:: @cindex\s-+IS_INDIRECTION
+//* InitClosureQueue:: @cindex\s-+InitClosureQueue
+//* InitPendingGABuffer:: @cindex\s-+InitPendingGABuffer
+//* LocateNextParent:: @cindex\s-+LocateNextParent
+//* NotYetPacking:: @cindex\s-+NotYetPacking
+//* OffsetFor:: @cindex\s-+OffsetFor
+//* Pack:: @cindex\s-+Pack
+//* PackArray:: @cindex\s-+PackArray
+//* PackClosure:: @cindex\s-+PackClosure
+//* PackFetchMe:: @cindex\s-+PackFetchMe
+//* PackGeneric:: @cindex\s-+PackGeneric
+//* PackNearbyGraph:: @cindex\s-+PackNearbyGraph
+//* PackOneNode:: @cindex\s-+PackOneNode
+//* PackPAP:: @cindex\s-+PackPAP
+//* PackPLC:: @cindex\s-+PackPLC
+//* PackStkO:: @cindex\s-+PackStkO
+//* PackTSO:: @cindex\s-+PackTSO
+//* PendingGABuffer:: @cindex\s-+PendingGABuffer
+//* PrintPacket:: @cindex\s-+PrintPacket
+//* QueueClosure:: @cindex\s-+QueueClosure
+//* QueueEmpty:: @cindex\s-+QueueEmpty
+//* RoomToPack:: @cindex\s-+RoomToPack
+//* SetGAandCommonUp:: @cindex\s-+SetGAandCommonUp
+//* UnpackGA:: @cindex\s-+UnpackGA
+//* UnpackGraph:: @cindex\s-+UnpackGraph
+//* UnpackOffset:: @cindex\s-+UnpackOffset
+//* UnpackPLC:: @cindex\s-+UnpackPLC
+//* doGlobalGC:: @cindex\s-+doGlobalGC
+//* get_closure_info:: @cindex\s-+get_closure_info
+//* InitPackBuffer:: @cindex\s-+initPackBuffer
+//* isFixed:: @cindex\s-+isFixed
+//* isOffset:: @cindex\s-+isOffset
+//* offsetTable:: @cindex\s-+offsetTable
+//@end index
+
diff --git a/rts/parallel/ParInit.c b/rts/parallel/ParInit.c
new file mode 100644
index 0000000000..22c9119c89
--- /dev/null
+++ b/rts/parallel/ParInit.c
@@ -0,0 +1,322 @@
+/* --------------------------------------------------------------------------
+ Time-stamp: <Wed Mar 21 2001 16:37:16 Stardate: [-30]6363.46 hwloidl>
+
+ Initialising the parallel RTS
+
+ An extension based on Kevin Hammond's GRAPH for PVM version
+ P. Trinder, January 17th 1995.
+ Adapted for the new RTS
+ P. Trinder, July 1997.
+ H-W. Loidl, November 1999.
+
+ ------------------------------------------------------------------------ */
+
+#ifdef PAR /* whole file */
+
+//@menu
+//* Includes::
+//* Global variables::
+//* Initialisation Routines::
+//@end menu
+
+//@node Includes, Global variables
+//@subsection Includes
+
+/* Evidently not Posix */
+/* #include "PosixSource.h" */
+
+#include <setjmp.h>
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "ParallelRts.h"
+#include "Sparks.h"
+#include "LLC.h"
+#include "HLC.h"
+
+//@node Global variables, Initialisation Routines, Includes
+//@subsection Global variables
+
+/* Global conditions defined here. */
+
+rtsBool IAmMainThread = rtsFalse; /* Set for the main thread */
+
+/* Task identifiers for various interesting global tasks. */
+
+GlobalTaskId IOTask = 0, /* The IO Task Id */
+ SysManTask = 0, /* The System Manager Task Id */
+ mytid = 0; /* This PE's Task Id */
+
+rtsTime main_start_time; /* When the program started */
+rtsTime main_stop_time; /* When the program finished */
+jmp_buf exit_parallel_system; /* How to abort from the RTS */
+
+
+//rtsBool fishing = rtsFalse; /* We have no fish out in the stream */
+rtsTime last_fish_arrived_at = 0; /* Time of arrival of most recent fish*/
+nat outstandingFishes = 0; /* Number of active fishes */
+
+//@cindex spark queue
+/* GranSim: a globally visible array of spark queues */
+rtsSpark *pending_sparks_hd[SPARK_POOLS], /* ptr to start of a spark pool */
+ *pending_sparks_tl[SPARK_POOLS], /* ptr to end of a spark pool */
+ *pending_sparks_lim[SPARK_POOLS],
+ *pending_sparks_base[SPARK_POOLS];
+
+//@cindex spark_limit
+/* max number of sparks permitted on the PE;
+ see RtsFlags.ParFlags.maxLocalSparks */
+nat spark_limit[SPARK_POOLS];
+
+//@cindex PendingFetches
+/* A list of fetch reply messages not yet processed; this list is filled
+ by awaken_blocked_queue and processed by processFetches */
+StgBlockedFetch *PendingFetches = END_BF_QUEUE;
+
+//@cindex allPEs
+GlobalTaskId *allPEs;
+
+//@cindex nPEs
+nat nPEs = 0;
+
+//@cindex sparksIgnored
+nat sparksIgnored = 0, sparksCreated = 0,
+ threadsIgnored = 0, threadsCreated = 0;
+
+//@cindex advisory_thread_count
+nat advisory_thread_count = 0;
+
+globalAddr theGlobalFromGA;
+
+/* For flag handling see RtsFlags.h */
+
+//@node Prototypes
+//@subsection Prototypes
+
+/* Needed for FISH messages (initialisation of random number generator) */
+void srand48 (long);
+time_t time (time_t *);
+
+//@node Initialisation Routines, , Global variables
+//@subsection Initialisation Routines
+
+/*
+ par_exit defines how to terminate the program. If the exit code is
+ non-zero (i.e. an error has occurred), the PE should not halt until
+ outstanding error messages have been processed. Otherwise, messages
+ might be sent to non-existent Task Ids. The infinite loop will actually
+ terminate, since STG_Exception will call myexit\tr{(0)} when
+ it received a PP_FINISH from the system manager task.
+*/
+//@cindex shutdownParallelSystem
+void
+shutdownParallelSystem(StgInt n)
+{
+ /* use the file specified via -S */
+ FILE *sf = RtsFlags.GcFlags.statsFile;
+
+ IF_PAR_DEBUG(verbose,
+ if (n==0)
+ belch("==== entered shutdownParallelSystem ...");
+ else
+ belch("==== entered shutdownParallelSystem (ERROR %d)...", n);
+ );
+
+ stopPEComms(n);
+
+#if 0
+ if (sf!=(FILE*)NULL)
+ fprintf(sf, "PE %x: %u sparks created, %u sparks Ignored, %u threads created, %u threads Ignored",
+ (W_) mytid, sparksCreated, sparksIgnored,
+ threadsCreated, threadsIgnored);
+#endif
+
+ ShutdownEachPEHook();
+}
+
+//@cindex initParallelSystem
+void
+initParallelSystem(void)
+{
+ /* Don't buffer standard channels... */
+ setbuf(stdout,NULL);
+ setbuf(stderr,NULL);
+
+ srand48(time(NULL) * getpid()); /* Initialise Random-number generator seed*/
+ /* used to select target of FISH message*/
+ if (!InitPackBuffer())
+ barf("InitPackBuffer");
+
+ if (!initMoreBuffers())
+ barf("initMoreBuffers");
+
+ if (!initSparkPools())
+ barf("initSparkPools");
+}
+
+/*
+ * SynchroniseSystem synchronises the reduction task with the system
+ * manager, and initialises the Global address tables (LAGA & GALA)
+ */
+
+//@cindex synchroniseSystem
+void
+synchroniseSystem(void)
+{
+ /* Only in debug mode? */
+ fprintf(stderr, "==== Starting parallel execution on %d processors ...\n", nPEs);
+
+ InitEachPEHook(); /* HWL: hook to be execed on each PE */
+
+ /* Initialize global address tables */
+ initGAtables();
+
+ initParallelSystem();
+
+ startPEComms();
+}
+
+/*
+ Do the startup stuff (this is PVM specific!).
+ Determines global vars: mytid, IAmMainThread, SysManTask, nPEs
+ Called at the beginning of RtsStartup.startupHaskell
+*/
+void
+startupParallelSystem(char *argv[]) {
+ mytid = pvm_mytid(); /* Connect to PVM */
+
+ if (*argv[0] == '-') { /* Look to see whether we're the Main Thread */
+ IAmMainThread = rtsTrue;
+ sscanf(argv[0],"-%0X",&SysManTask); /* extract SysMan task ID*/
+ argv++; /* Strip off flag argument */
+ } else {
+ SysManTask = pvm_parent();
+ }
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, "==== [%x] %s PE located SysMan at %x\n",
+ mytid, IAmMainThread?"Main":"Remote", SysManTask));
+
+ nPEs = atoi(argv[1]);
+}
+
+/*
+ Exception handler during startup.
+*/
+void *
+processUnexpectedMessageDuringStartup(rtsPacket p) {
+ OpCode opCode;
+ GlobalTaskId sender_id;
+
+ getOpcodeAndSender(p, &opCode, &sender_id);
+
+ switch(opCode) {
+ case PP_FISH:
+ bounceFish();
+ break;
+#if defined(DIST)
+ case PP_REVAL:
+ bounceReval();
+ break;
+#endif
+ case PP_FINISH:
+ stg_exit(EXIT_SUCCESS);
+ break;
+ default:
+ fprintf(stderr,"== Task %x: Unexpected OpCode %x (%s) from %x in startPEComms\n",
+ mytid, opCode, getOpName(opCode), sender_id);
+ }
+}
+
+void
+startPEComms(void){
+
+ startUpPE();
+ allPEs = (GlobalTaskId *) stgMallocBytes(sizeof(GlobalTaskId) * MAX_PES,
+ "(PEs)");
+
+ /* Send our tid and IAmMainThread flag back to SysMan */
+ sendOp1(PP_READY, SysManTask, (StgWord)IAmMainThread);
+ /* Wait until we get the PE-Id table from Sysman */
+ waitForPEOp(PP_PETIDS, SysManTask, processUnexpectedMessageDuringStartup);
+
+ IF_PAR_DEBUG(verbose,
+ belch("==-- startPEComms: methinks we just received a PP_PETIDS message"));
+
+ /* Digest the PE table we received */
+ processPEtids();
+}
+
+void
+processPEtids(void) {
+ long newPE;
+ nat i, sentPEs, currentPEs;
+
+ nPEs=0;
+
+ currentPEs = nPEs;
+
+ IF_PAR_DEBUG(verbose,
+ belch("==-- processPEtids: starting to iterate over a PVM buffer"));
+ /* ToDo: this has to go into LLComms !!! */
+ GetArgs(&sentPEs,1);
+
+ ASSERT(sentPEs > currentPEs);
+ ASSERT(sentPEs < MAX_PES); /* enforced by SysMan too*/
+
+ for (i = 0; i < sentPEs; i++) {
+ GetArgs(&newPE,1);
+ if (i<currentPEs) {
+ ASSERT(newPE == allPEs[i]);
+ } else {
+#if defined(DIST)
+ // breaks with PAR && !DEBUG
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, "[%x] registering %d'th %x\n", mytid, i, newPE));
+ if(!looks_like_tid(newPE))
+ barf("unacceptable taskID %x\n",newPE);
+#endif
+ allPEs[i] = newPE;
+ nPEs++;
+ registerTask(newPE);
+ }
+ }
+
+ IF_PAR_DEBUG(verbose,
+ /* debugging */
+ belch("++++ [%x] PE table as I see it:", mytid);
+ for (i = 0; i < sentPEs; i++) {
+ belch("++++ allPEs[%d] = %x", i, allPEs[i]);
+ });
+}
+
+void
+stopPEComms(StgInt n) {
+ if (n != 0) {
+ /* In case sysman doesn't know about us yet...
+ pvm_initsend(PvmDataDefault);
+ PutArgs(&IAmMainThread,1);
+ pvm_send(SysManTask, PP_READY);
+ */
+ sendOp(PP_READY, SysManTask);
+ }
+
+ sendOp2(PP_FINISH, SysManTask, n, n);
+ waitForPEOp(PP_FINISH, SysManTask, NULL);
+ fflush(gr_file);
+ shutDownPE();
+}
+
+#endif /* PAR -- whole file */
+
+//@index
+//* PendingFetches:: @cindex\s-+PendingFetches
+//* SynchroniseSystem:: @cindex\s-+SynchroniseSystem
+//* allPEs:: @cindex\s-+allPEs
+//* initParallelSystem:: @cindex\s-+initParallelSystem
+//* nPEs:: @cindex\s-+nPEs
+//* par_exit:: @cindex\s-+par_exit
+//* spark queue:: @cindex\s-+spark queue
+//* sparksIgnored:: @cindex\s-+sparksIgnored
+//@end index
+
diff --git a/rts/parallel/ParInit.h b/rts/parallel/ParInit.h
new file mode 100644
index 0000000000..a22a50bae6
--- /dev/null
+++ b/rts/parallel/ParInit.h
@@ -0,0 +1,19 @@
+/* -----------------------------------------------------------------------------
+ * ParInit.h,1
+ *
+ * Phil Trinder
+ * July 1998
+ *
+ * External Parallel Initialisation Interface
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef PARINIT_H
+#define PARINIT_H
+
+extern void RunParallelSystem (P_);
+extern void initParallelSystem(void);
+extern void SynchroniseSystem(void);
+extern void par_exit(I_);
+
+#endif /* PARINIT_H */
diff --git a/rts/parallel/ParTicky.c b/rts/parallel/ParTicky.c
new file mode 100644
index 0000000000..347c2b8bca
--- /dev/null
+++ b/rts/parallel/ParTicky.c
@@ -0,0 +1,450 @@
+/* -------------------------------------------------------------------------
+ *
+ * (c) Hans-Wolfgang Loidl, 2000-
+ *
+ * Parallel ticky profiling, monitoring basic RTS operations in GUM.
+ * Similar in structure to TICKY_TICKY profiling, but doesn't need a
+ * separate way of building GHC.
+ *-------------------------------------------------------------------------- */
+
+#if defined(PAR) && defined(PAR_TICKY)
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+//#include "StoragePriv.h"
+//#include "MBlock.h"
+//#include "Schedule.h"
+#include "GC.h"
+#include "Stats.h"
+#include "ParTicky.h" // ToDo: move into Rts.h
+#include "ParallelRts.h"
+
+#if defined(PAR) && defined(HAVE_GETRUSAGE)
+#include <sys/resource.h>
+#endif
+
+/* external data */
+extern double ElapsedTimeStart;
+
+extern ullong GC_tot_alloc;
+extern ullong GC_tot_copied;
+
+extern lnat MaxResidency; /* in words; for stats only */
+extern lnat ResidencySamples; /* for stats only */
+
+/* ngIplu' {Stats.c}vo' */
+#define BIG_STRING_LEN 512
+
+/* ngIplu' {Ticky.c}vo' */
+#define INTAVG(a,b) ((b == 0) ? 0.0 : ((double) (a) / (double) (b)))
+#define PC(a) (100.0 * a)
+
+#define AVG(thing) \
+ StgDouble avg##thing = INTAVG(tot##thing,ctr##thing)
+
+
+#if 0
+void
+set_foo_time(double *x) {
+ *x = usertime();
+}
+
+double
+get_foo_time(double x) {
+ fprintf(stderr, "get_foo_time: %7.2f (%7.5f,%7.5f) \n",
+ usertime()-x,usertime(),x);
+ return (usertime()-x);
+}
+#endif
+
+static double start_time_GA = 0.0;
+static double start_mark = 0.0;
+static double start_pack = 0.0;
+static double start_unpack = 0.0;
+
+void
+par_ticky_Par_start (void) {
+# if !defined(HAVE_GETRUSAGE) || irix_HOST_OS || defined(_WIN32)
+ fprintf(stderr, "|| sorry don't have RUSAGE\n");
+ return ;
+# else
+ FILE *sf = RtsFlags.GcFlags.statsFile;
+ struct rusage t;
+ double utime, stime;
+
+ if (RtsFlags.GcFlags.giveStats>1 && sf != NULL) {
+ getrusage(RUSAGE_SELF, &t);
+
+ utime = t.ru_utime.tv_sec + 1e-6*t.ru_utime.tv_usec;
+ stime = t.ru_stime.tv_sec + 1e-6*t.ru_stime.tv_usec;
+
+ fprintf(stderr, "|| user time: %5.2f; system time: %5.2f\n",
+ utime, stime);
+ fprintf(stderr, "|| max RSS: %ld; int SM size: %ld; int USM data size: %ld; int USS size: %ld\n",
+ t.ru_maxrss, t.ru_ixrss, t.ru_idrss, t.ru_isrss);
+ }
+#endif
+}
+
+#if 0
+FYI:
+ struct rusage
+ {
+ struct timeval ru_utime; /* user time used */
+ struct timeval ru_stime; /* system time used */
+ long ru_maxrss; /* maximum resident set size */
+ long ru_ixrss; /* integral shared memory size */
+ long ru_idrss; /* integral unshared data size */
+ long ru_isrss; /* integral unshared stack size */
+ long ru_minflt; /* page reclaims */
+ long ru_majflt; /* page faults */
+ long ru_nswap; /* swaps */
+ long ru_inblock; /* block input operations */
+ long ru_oublock; /* block output operations */
+ long ru_msgsnd; /* messages sent */
+ long ru_msgrcv; /* messages received */
+ long ru_nsignals; /* signals received */
+ long ru_nvcsw; /* voluntary context switches */
+ long ru_nivcsw; /* involuntary context switches */
+ };
+#endif
+
+
+void
+par_ticky_rebuildGAtables_start(void) {
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ //set_foo_time(&start_time_GA);
+ start_time_GA = usertime();
+ }
+}
+
+void
+par_ticky_rebuildGAtables_end(nat n, nat size_GA) {
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ static double foo = 0.0;
+ foo = usertime() - start_time_GA; // get_foo_time(start_time_GA);
+ globalParStats.cnt_rebuild_GA++;
+ globalParStats.tot_rebuild_GA += n;
+ if ( n > globalParStats.res_rebuild_GA )
+ globalParStats.res_rebuild_GA = n;
+ // fprintf(stderr, "rebuildGAtables: footime=%7.2f (%11.5f, %11.5f)\n",
+ // foo, usertime(), start_time_GA);
+ globalParStats.time_rebuild_GA += foo;
+ globalParStats.tot_size_GA += size_GA;
+ if ( size_GA > globalParStats.res_size_GA )
+ globalParStats.res_size_GA = size_GA;
+ }
+ // fprintf(stderr, ">> n: %d; size: %d;; tot: %d; res: %d\n",
+ // n, size_GA, globalParStats.tot_size_GA, globalParStats.res_size_GA);
+}
+
+void
+par_ticky_markLocalGAs_start(void) {
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ start_time_GA = usertime();
+ }
+}
+
+void
+par_ticky_markLocalGAs_end(nat n) {
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.cnt_mark_GA++;
+ globalParStats.tot_mark_GA += n;
+ if ( n > globalParStats.res_mark_GA )
+ globalParStats.res_mark_GA = n;
+ globalParStats.time_mark_GA += usertime() - start_time_GA;
+ }
+}
+
+void
+par_ticky_markSparkQueue_start(void) {
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ start_mark=usertime();
+ }
+}
+
+void
+par_ticky_markSparkQueue_end(nat n) {
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.time_sparks += usertime() - start_mark;
+
+ globalParStats.tot_sparks_marked += n;
+ if ( n > globalParStats.res_sparks_marked )
+ globalParStats.res_sparks_marked = n;
+ }
+}
+
+void
+par_ticky_PackNearbyGraph_start (void) {
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ start_pack=usertime();
+ }
+}
+
+void
+par_ticky_PackNearbyGraph_end(nat n, nat thunks) {
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.time_pack += usertime() - start_pack;
+
+ globalParStats.tot_packets++;
+ globalParStats.tot_packet_size += n;
+ if ( n > globalParStats.res_packet_size )
+ globalParStats.res_packet_size = n;
+ globalParStats.tot_thunks += thunks;
+ if ( thunks > globalParStats.res_thunks )
+ globalParStats.res_thunks = thunks;
+ }
+}
+
+void
+par_ticky_UnpackGraph_start (void) {
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ start_unpack=usertime();
+ }
+}
+
+void
+par_ticky_UnpackGraph_end(nat n, nat thunks) {
+ // collect parallel global statistics (currently done together with GC stats)
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ globalParStats.time_unpack += usertime() - start_unpack;
+
+ globalParStats.rec_packets++;
+ globalParStats.rec_packet_size += n;
+ /*
+ if ( n > globalParStats.res_packet_size )
+ globalParStats.res_packet_size = n;
+ */
+ globalParStats.rec_thunks += thunks;
+ /*
+ if ( thunks > globalParStats.res_thunks )
+ globalParStats.res_thunks = thunks;
+ */
+ }
+}
+
+void
+par_ticky_TP (void) {
+ StgSparkPool *pool;
+ nat tp_size, sp_size; // stats only
+
+ // Global stats gathering
+ /* the spark pool for the current PE */
+ pool = &(MainRegTable.rSparks); // generalise to cap = &MainRegTable
+
+ // Global statistics: residency of thread and spark pool
+ if (RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS) {
+ tp_size = run_queue_len() + 1; // add the TSO just poped
+ // No: there may be many blocked threads being awoken at the same time
+ // ASSERT(tp_size <= RtsFlags.ParFlags.maxThreads);
+ globalParStats.tot_tp += tp_size;
+ globalParStats.emp_tp += (tp_size==0) ? 1 : 0;
+ globalParStats.cnt_tp++;
+ if ( tp_size > globalParStats.res_tp)
+ globalParStats.res_tp = tp_size;
+ // fprintf(stderr, "run_queue_len() = %d (max %d)\n", run_queue_len(), globalParStats.res_tp);
+ sp_size = spark_queue_len(pool);
+ //ASSERT(sp_size <= RtsFlags.ParFlags.maxLocalSparks);
+ globalParStats.tot_sp += sp_size;
+ globalParStats.emp_sp += (sp_size==0) ? 1 : 0;
+ globalParStats.cnt_sp++;
+ if ( sp_size > globalParStats.res_sp)
+ globalParStats.res_sp = sp_size;
+ // fprintf(stderr, "spark_queue_len(pool) = %d (max %d)\n", spark_queue_len(pool), globalParStats.res_sp);
+ }
+}
+
+void
+globalParStat_exit(void)
+{
+ FILE *sf = RtsFlags.GcFlags.statsFile;
+ double time, etime;
+
+ /* print only if GC stats is enabled, too; i.e. -sstderr */
+ if (!(RtsFlags.ParFlags.ParStats.Global &&
+ RtsFlags.GcFlags.giveStats > NO_GC_STATS))
+ return;
+
+ time = usertime();
+ etime = elapsedtime() - ElapsedTimeStart;
+ // fprintf(stderr, "foo=%7.2f\n", time);
+
+ if (sf != NULL){
+ char temp[BIG_STRING_LEN];
+
+ // GC_tot_alloc += alloc;
+ fprintf(sf,"\n");
+
+ fprintf(sf, "%11d threads created\n",
+ globalParStats.tot_threads_created);
+ /*
+ Would need to add a ++ to the par macro to use this
+
+ fprintf(sf, "%11d sparks created\n",
+ globalParStats.tot_sparks_created);
+ fprintf(sf, "%11d sparks ignored\n",
+ globalParStats.tot_sparks_ignored);
+ */
+ ullong_format_string(globalParStats.res_tp, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s thread pool residency", temp);
+ fprintf(sf, " (avg: %3.2f; %d times (%2.2f%%) of %d empty)\n",
+ (double)globalParStats.tot_tp/(double)globalParStats.cnt_tp,
+ globalParStats.emp_tp,
+ globalParStats.emp_tp*100.0/(double)globalParStats.cnt_tp,
+ globalParStats.cnt_tp);
+ ullong_format_string(globalParStats.res_sp, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s spark pool residency", temp);
+
+ fprintf(sf, " (avg: %3.2f; %d times (%2.2f%%) of %d empty)\n",
+ (double)globalParStats.tot_sp/(double)globalParStats.cnt_sp,
+ globalParStats.emp_sp,
+ globalParStats.emp_sp*100.0/(double)globalParStats.cnt_sp,
+ globalParStats.cnt_sp);
+ //ullong_format_string(globalParStats.tot_fishes, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11d messages sent (%d fish, %d fetch, %d resume, %d schedule",
+ globalParStats.tot_fish_mess+globalParStats.tot_fetch_mess+
+ globalParStats.tot_resume_mess+globalParStats.tot_schedule_mess,
+ globalParStats.tot_fish_mess, globalParStats.tot_fetch_mess,
+ globalParStats.tot_resume_mess, globalParStats.tot_schedule_mess);
+#if defined(DIST)
+ fprintf(sf, "%d revals", globalParStats.tot_reval_mess);
+#endif
+ fprintf(sf,")\n");
+ fprintf(sf, "%11d messages received (%d fish, %d fetch, %d resume, %d schedule",
+ globalParStats.rec_fish_mess+globalParStats.rec_fetch_mess+
+ globalParStats.rec_resume_mess+globalParStats.rec_schedule_mess,
+ globalParStats.rec_fish_mess, globalParStats.rec_fetch_mess,
+ globalParStats.rec_resume_mess, globalParStats.rec_schedule_mess);
+#if defined(DIST)
+ fprintf(sf, "%d revals", globalParStats.rec_reval_mess);
+#endif
+ fprintf(sf,")\n\n");
+
+ ullong_format_string(globalParStats.tot_size_GA*sizeof(W_), temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s bytes of global heap in total ", temp);
+ fprintf(sf, "(%5.2f%% of total allocated heap)\n",
+ globalParStats.tot_size_GA*sizeof(W_)*100.0/(double)GC_tot_alloc*sizeof(W_));
+ ullong_format_string(globalParStats.res_size_GA*sizeof(W_), temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s bytes global heap residency ", temp);
+ fprintf(sf, "(%5.2f%% of max heap residency)\n",
+ globalParStats.res_size_GA*sizeof(W_)*100.0/(double)MaxResidency*sizeof(W_));
+
+ //ullong_format_string(globalParStats.res_mark_GA, temp, rtsTrue/*commas*/);
+ //fprintf(sf, "%11s GAs residency in GALA table ", temp);
+ // ullong_format_string(globalParStats.tot_mark_GA, temp, rtsTrue/*commas*/);
+ //fprintf(sf, "(avg %5.2f; %d samples)\n",
+ // (double)globalParStats.tot_mark_GA/(double)globalParStats.cnt_mark_GA,
+ // globalParStats.cnt_mark_GA);
+
+ ullong_format_string(globalParStats.local_alloc_GA, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s GAs locally allocated (calls to makeGlobal)\n", temp);
+
+ ullong_format_string(globalParStats.tot_rebuild_GA, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s live GAs in total (after rebuilding tables)\n", temp);
+ ullong_format_string(globalParStats.res_rebuild_GA, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s GAs residency (after rebuilding tables) ", temp);
+ fprintf(sf, "(avg %5.2f; %d samples)\n",
+ (double)globalParStats.tot_rebuild_GA/(double)globalParStats.cnt_rebuild_GA,
+ globalParStats.cnt_rebuild_GA);
+ ullong_format_string(globalParStats.res_free_GA, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s residency of freeing GAs", temp);
+ fprintf(sf, " (avg %5.2f; %d samples)\n",
+ (double)globalParStats.tot_free_GA/(double)globalParStats.cnt_free_GA,
+ globalParStats.cnt_free_GA);
+
+ fprintf(sf, "%11.2fs spent marking GAs (%7.2f%% of %7.2fs)\n",
+ globalParStats.time_mark_GA,
+ globalParStats.time_mark_GA*100./time, time);
+ fprintf(sf, "%11.2fs spent rebuilding GALA tables (%7.2f%% of %7.2fs; %7.2f%% of %7.2fs)\n",
+ globalParStats.time_rebuild_GA,
+ globalParStats.time_rebuild_GA*100./time, time,
+ globalParStats.time_rebuild_GA*100./etime, etime);
+
+ ullong_format_string(globalParStats.tot_sparks_marked, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s sparks marked\t", temp);
+ ullong_format_string(globalParStats.res_sparks_marked, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%6s spark mark residency\n", temp);
+ fprintf(sf, "%11.2fs spent marking sparks (%7.2f%% of %7.2fs; %7.2f%% of %7.2fs elapsed)\n",
+ globalParStats.time_sparks,
+ globalParStats.time_sparks*100./time, time,
+ globalParStats.time_sparks*100./etime, etime);
+
+ fprintf(sf,"\n");
+
+ ullong_format_string(globalParStats.tot_packets, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s packets sent\n", temp);
+ ullong_format_string(globalParStats.tot_packet_size, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s bytes of graph sent in total (max %d; avg %.2f)\n",
+ temp, globalParStats.res_packet_size,
+ (double)globalParStats.tot_packet_size/(double)globalParStats.tot_packets);
+ ullong_format_string(globalParStats.tot_thunks, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s thunks sent in total (max %d; avg %.2f)\n",
+ temp, globalParStats.res_thunks,
+ (double)globalParStats.tot_thunks/(double)globalParStats.tot_packets);
+ fprintf(sf, "%11.2fs spent packing graph structures (%7.2f%% of %7.2fs; %7.2f%% of %7.2fs elapsed)\n",
+ globalParStats.time_pack,
+ globalParStats.time_pack*100./time, time,
+ globalParStats.time_pack*100./etime, etime);
+
+ ullong_format_string(globalParStats.rec_packets, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s packets received\n", temp);
+ ullong_format_string(globalParStats.rec_packet_size, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s bytes of graph received in total (max %d; avg %.2f)\n",
+ temp, globalParStats.rec_res_packet_size,
+ (double)globalParStats.rec_packet_size/(double)globalParStats.rec_packets);
+ ullong_format_string(globalParStats.rec_thunks, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s thunks received in total (max %d; avg %.2f)\n",
+ temp, globalParStats.rec_res_thunks,
+ (double)globalParStats.rec_thunks/(double)globalParStats.rec_packets);
+ fprintf(sf, "%11.2fs spent unpacking graph structures (%7.2f%% of %7.2fs; %7.2f%% of %7.2fs elapsed)\n",
+ globalParStats.time_unpack,
+ globalParStats.time_unpack*100./time, time,
+ globalParStats.time_unpack*100./etime, etime);
+
+ fprintf(sf,"\n");
+
+ ullong_format_string(globalParStats.tot_arrs, temp, rtsTrue/*commas*/);
+ fprintf(sf, "%11s bytearrays sent; ", temp);
+ ullong_format_string(globalParStats.tot_arr_size, temp, rtsTrue/*commas*/);
+ fprintf(sf, " %s bytes in total (avg %.2f)\n",
+ temp,
+ (double)globalParStats.tot_arr_size/(double)globalParStats.tot_arrs);
+
+ fprintf(sf,"\n");
+
+ fprintf(sf, "%11d yields, %d stack overflows, %d heap overflows\n",
+ globalParStats.tot_yields, globalParStats.tot_stackover,
+ globalParStats.tot_heapover);
+
+ fprintf(sf,"\n");
+
+ //fprintf(stderr, "Printing this pathetic statistics took %7.2fs (start @ %7.2f)\n",
+ // usertime()-time, time);
+
+ fflush(sf);
+ // Open filehandle needed by other stats printing fcts
+ // fclose(sf);
+ }
+}
+
+#endif
+
diff --git a/rts/parallel/ParTicky.h b/rts/parallel/ParTicky.h
new file mode 100644
index 0000000000..1d6e7435c9
--- /dev/null
+++ b/rts/parallel/ParTicky.h
@@ -0,0 +1,60 @@
+/* --------------------------------------------------------------------------
+ *
+ * (c) Hans-Wolfgang Loidl, 2000-
+ *
+ * Header for ParTicky.c
+ *
+ * --------------------------------------------------------------------------*/
+
+#if defined(PAR_TICKY)
+
+/* macros */
+#define PAR_TICKY_PAR_START() par_ticky_Par_start ()
+#define PAR_TICKY_PAR_END() globalParStat_exit ()
+#define PAR_TICKY_REBUILD_GA_TABLES_START() par_ticky_rebuildGAtables_start()
+#define PAR_TICKY_REBUILD_GA_TABLES_END(n, size_GA) par_ticky_rebuildGAtables_end(n, size_GA)
+#define PAR_TICKY_MARK_LOCAL_GAS_START() par_ticky_markLocalGAs_start()
+#define PAR_TICKY_MARK_LOCAL_GAS_END(n) par_ticky_markLocalGAs_end(n)
+#define PAR_TICKY_MARK_SPARK_QUEUE_START() par_ticky_markSparkQueue_start()
+#define PAR_TICKY_MARK_SPARK_QUEUE_END(n) par_ticky_markSparkQueue_end(n)
+#define PAR_TICKY_PACK_NEARBY_GRAPH_START() (par_ticky_PackNearbyGraph_start())
+#define PAR_TICKY_PACK_NEARBY_GRAPH_END(n, thunks) par_ticky_PackNearbyGraph_end(n, thunks)
+#define PAR_TICKY_UNPACK_GRAPH_START() par_ticky_UnpackGraph_start()
+#define PAR_TICKY_UNPACK_GRAPH_END(n,thunks) par_ticky_UnpackGraph_end(n,thunks)
+#define PAR_TICKY_TP() par_ticky_TP()
+#define PAR_TICKY_CNT_FREE_GA() stats_CntFreeGA()
+
+/* prototypes */
+extern void par_ticky_Par_start (void) ;
+extern void par_ticky_rebuildGAtables_start(void) ;
+extern void par_ticky_rebuildGAtables_end(nat n, nat size_GA) ;
+extern void par_ticky_markLocalGAs_start(void) ;
+extern void par_ticky_markLocalGAs_end(nat n) ;
+extern void par_ticky_markSparkQueue_start(void) ;
+extern void par_ticky_markSparkQueue_end(nat n) ;
+extern void par_ticky_PackNearbyGraph_start (void) ;
+extern void par_ticky_PackNearbyGraph_end(nat n, nat thunks) ;
+extern void par_ticky_UnpackGraph_start (void) ;
+extern void par_ticky_UnpackGraph_end(nat n, nat thunks) ;
+extern void par_ticky_TP (void) ;
+extern void globalParStat_exit(void);
+
+#else
+
+#define PAR_TICKY_PAR_START()
+#define PAR_TICKY_PAR_END()
+#define PAR_TICKY_REBUILD_GA_TABLES_START()
+#define PAR_TICKY_REBUILD_GA_TABLES_END(n, size_GA)
+#define PAR_TICKY_MARK_LOCAL_GAS_START()
+#define PAR_TICKY_MARK_LOCAL_GAS_END(n)
+#define PAR_TICKY_MARK_SPARK_QUEUE_START()
+#define PAR_TICKY_MARK_SPARK_QUEUE_END(n)
+#define PAR_TICKY_PACK_NEARBY_GRAPH_START ()
+#define PAR_TICKY_PACK_NEARBY_GRAPH_END(n, thunks)
+#define PAR_TICKY_UNPACK_GRAPH_START ()
+#define PAR_TICKY_UNPACK_GRAPH_END(n, thunks)
+#define PAR_TICKY_TP ()
+#define PAR_TICKY_CNT_FREE_GA()
+
+#endif
+
diff --git a/rts/parallel/ParTypes.h b/rts/parallel/ParTypes.h
new file mode 100644
index 0000000000..910a6f2d99
--- /dev/null
+++ b/rts/parallel/ParTypes.h
@@ -0,0 +1,38 @@
+/* ---------------------------------------------------------------------------
+ * Time-stamp: <Tue Nov 09 1999 16:31:38 Stardate: [-30]3873.44 hwloidl>
+ *
+ * Runtime system types for GUM
+ *
+ * ------------------------------------------------------------------------- */
+
+#ifndef PARTYPES_H
+#define PARTYPES_H
+
+#ifdef PAR /* all of it */
+
+// now in Parallel.h
+//typedef struct hashtable HashTable;
+//typedef struct hashlist HashList;
+
+/* Global addresses now live in Parallel.h (needed in Closures.h) */
+// gaddr
+
+// now in Parallel.h
+/* (GA, LA) pairs
+typedef struct gala {
+ globalAddr ga;
+ StgPtr la;
+ struct gala *next;
+ rtsBool preferred;
+} rtsGaLa;
+*/
+
+#if defined(GRAN)
+typedef unsigned long TIME;
+typedef unsigned char Proc;
+typedef unsigned char EVTTYPE;
+#endif
+
+#endif /* PAR */
+
+#endif /* ! PARTYPES_H */
diff --git a/rts/parallel/Parallel.c b/rts/parallel/Parallel.c
new file mode 100644
index 0000000000..414b7e4406
--- /dev/null
+++ b/rts/parallel/Parallel.c
@@ -0,0 +1,1140 @@
+/*
+ Time-stamp: <Wed Mar 21 2001 16:42:40 Stardate: [-30]6363.48 hwloidl>
+
+ Basic functions for use in either GranSim or GUM.
+*/
+
+#if defined(GRAN) || defined(PAR) /* whole file */
+
+//@menu
+//* Includes::
+//* Variables and constants::
+//* Writing to the log-file::
+//* Global statistics::
+//* Dumping routines::
+//@end menu
+//*/ fool highlight
+
+//@node Includes, Variables and constants
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "Storage.h"
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+
+//@node Variables and constants, Writing to the log-file, Includes
+//@subsection Variables and constants
+
+/* Where to write the log file */
+FILE *gr_file = NULL;
+char gr_filename[STATS_FILENAME_MAXLEN];
+
+#if defined(PAR)
+/* Global statistics */
+GlobalParStats globalParStats;
+#endif
+
+#if defined(PAR)
+ullong startTime = 0;
+#endif
+
+#if defined(PAR) && !defined(DEBUG)
+// HAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCCCKKKKKKKKKKKK
+// Definitely the wrong place for info_type in !DEBUG (see Printer.c) -- HWL
+
+static char *closure_type_names[] = {
+ "INVALID_OBJECT", /* 0 */
+ "CONSTR", /* 1 */
+ "CONSTR_1_0", /* 2 */
+ "CONSTR_0_1", /* 3 */
+ "CONSTR_2_0", /* 4 */
+ "CONSTR_1_1", /* 5 */
+ "CONSTR_0_2", /* 6 */
+ "CONSTR_INTLIKE", /* 7 */
+ "CONSTR_CHARLIKE", /* 8 */
+ "CONSTR_STATIC", /* 9 */
+ "CONSTR_NOCAF_STATIC", /* 10 */
+ "FUN", /* 11 */
+ "FUN_1_0", /* 12 */
+ "FUN_0_1", /* 13 */
+ "FUN_2_0", /* 14 */
+ "FUN_1_1", /* 15 */
+ "FUN_0_2", /* 16 */
+ "FUN_STATIC", /* 17 */
+ "THUNK", /* 18 */
+ "THUNK_1_0", /* 19 */
+ "THUNK_0_1", /* 20 */
+ "THUNK_2_0", /* 21 */
+ "THUNK_1_1", /* 22 */
+ "THUNK_0_2", /* 23 */
+ "THUNK_STATIC", /* 24 */
+ "THUNK_SELECTOR", /* 25 */
+ "BCO", /* 26 */
+ "AP_UPD", /* 27 */
+ "PAP", /* 28 */
+ "IND", /* 29 */
+ "IND_OLDGEN", /* 30 */
+ "IND_PERM", /* 31 */
+ "IND_OLDGEN_PERM", /* 32 */
+ "IND_STATIC", /* 33 */
+ "CAF_UNENTERED", /* 34 */
+ "CAF_ENTERED", /* 35 */
+ "CAF_BLACKHOLE", /* 36 */
+ "RET_BCO", /* 37 */
+ "RET_SMALL", /* 38 */
+ "RET_VEC_SMALL", /* 39 */
+ "RET_BIG", /* 40 */
+ "RET_VEC_BIG", /* 41 */
+ "RET_DYN", /* 42 */
+ "UPDATE_FRAME", /* 43 */
+ "CATCH_FRAME", /* 44 */
+ "STOP_FRAME", /* 45 */
+ "SEQ_FRAME", /* 46 */
+ "BLACKHOLE", /* 47 */
+ "BLACKHOLE_BQ", /* 48 */
+ "SE_BLACKHOLE", /* 49 */
+ "SE_CAF_BLACKHOLE", /* 50 */
+ "MVAR", /* 51 */
+ "ARR_WORDS", /* 52 */
+ "MUT_ARR_PTRS", /* 53 */
+ "MUT_ARR_PTRS_FROZEN", /* 54 */
+ "MUT_VAR", /* 55 */
+ "WEAK", /* 56 */
+ "FOREIGN", /* 57 */
+ "STABLE_NAME", /* 58 */
+ "TSO", /* 59 */
+ "BLOCKED_FETCH", /* 60 */
+ "FETCH_ME", /* 61 */
+ "FETCH_ME_BQ", /* 62 */
+ "RBH", /* 63 */
+ "EVACUATED", /* 64 */
+ "REMOTE_REF", /* 65 */
+ "N_CLOSURE_TYPES" /* 66 */
+};
+
+char *
+info_type(StgClosure *closure){
+ return closure_type_names[get_itbl(closure)->type];
+}
+
+char *
+info_type_by_ip(StgInfoTable *ip){
+ return closure_type_names[ip->type];
+}
+
+void
+info_hdr_type(StgClosure *closure, char *res){
+ strcpy(res,closure_type_names[get_itbl(closure)->type]);
+}
+#endif
+
+//@node Writing to the log-file, Global statistics, Variables and constants
+//@subsection Writing to the log-file
+/*
+ Writing to the log-file
+
+ These routines dump event-based info to the main log-file.
+ The code for writing log files is shared between GranSim and GUM.
+*/
+
+/*
+ * If you're not using GNUC and you're on a 32-bit machine, you're
+ * probably out of luck here. However, since CONCURRENT currently
+ * requires GNUC, I'm not too worried about it. --JSM
+ */
+
+//@cindex init_gr_simulation
+#if defined(GRAN)
+void
+init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv)
+char *prog_argv[], *rts_argv[];
+int prog_argc, rts_argc;
+{
+ nat i;
+ char *extension = RtsFlags.GranFlags.GranSimStats.Binary ? "gb" : "gr";
+
+ if (RtsFlags.GranFlags.GranSimStats.Global)
+ init_gr_stats();
+
+ /* init global constants for costs of basic operations */
+ gran_arith_cost = RtsFlags.GranFlags.Costs.arith_cost;
+ gran_branch_cost = RtsFlags.GranFlags.Costs.branch_cost;
+ gran_load_cost = RtsFlags.GranFlags.Costs.load_cost;
+ gran_store_cost = RtsFlags.GranFlags.Costs.store_cost;
+ gran_float_cost = RtsFlags.GranFlags.Costs.float_cost;
+
+ if (RtsFlags.GranFlags.GranSimStats.Suppressed)
+ return;
+
+ if (!RtsFlags.GranFlags.GranSimStats.Full)
+ return;
+
+ sprintf(gr_filename, GR_FILENAME_FMT, prog_argv[0], extension);
+
+ if ((gr_file = fopen(gr_filename, "w")) == NULL) {
+ barf("Can't open granularity simulation report file %s\n",
+ gr_filename);
+ }
+
+ setbuf(gr_file, NULL); /* turn buffering off */
+
+ /* write header with program name, options and setup to gr_file */
+ fputs("Granularity Simulation for ", gr_file);
+ for (i = 0; i < prog_argc; ++i) {
+ fputs(prog_argv[i], gr_file);
+ fputc(' ', gr_file);
+ }
+
+ if (rts_argc > 0) {
+ fputs("+RTS ", gr_file);
+
+ for (i = 0; i < rts_argc; ++i) {
+ fputs(rts_argv[i], gr_file);
+ fputc(' ', gr_file);
+ }
+ }
+
+ fputs("\nStart time: ", gr_file);
+ fputs(time_str(), gr_file); /* defined in RtsUtils.c */
+ fputc('\n', gr_file);
+
+ fputs("\n\n--------------------\n\n", gr_file);
+
+ fputs("General Parameters:\n\n", gr_file);
+
+ if (RtsFlags.GranFlags.Light)
+ fprintf(gr_file, "GrAnSim-Light\nPEs infinite, %s Scheduler, %sMigrate Threads %s, %s\n",
+ RtsFlags.GranFlags.DoFairSchedule?"Fair":"Unfair",
+ RtsFlags.GranFlags.DoThreadMigration?"":"Don't ",
+ RtsFlags.GranFlags.DoThreadMigration && RtsFlags.GranFlags.DoStealThreadsFirst?" Before Sparks":"",
+ RtsFlags.GranFlags.DoAsyncFetch ? "Asynchronous Fetch" :
+ "Block on Fetch");
+ else
+ fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads %s, %s\n",
+ RtsFlags.GranFlags.proc,RtsFlags.GranFlags.DoFairSchedule?"Fair":"Unfair",
+ RtsFlags.GranFlags.DoThreadMigration?"":"Don't ",
+ RtsFlags.GranFlags.DoThreadMigration && RtsFlags.GranFlags.DoStealThreadsFirst?" Before Sparks":"",
+ RtsFlags.GranFlags.DoAsyncFetch ? "Asynchronous Fetch" :
+ "Block on Fetch");
+
+ if (RtsFlags.GranFlags.DoBulkFetching)
+ if (RtsFlags.GranFlags.ThunksToPack)
+ fprintf(gr_file, "Bulk Fetching: Fetch %d Thunks in Each Packet (Packet Size = %d closures)\n",
+ RtsFlags.GranFlags.ThunksToPack,
+ RtsFlags.GranFlags.packBufferSize);
+ else
+ fprintf(gr_file, "Bulk Fetching: Fetch as many closures as possible (Packet Size = %d closures)\n",
+ RtsFlags.GranFlags.packBufferSize);
+ else
+ fprintf(gr_file, "Incremental Fetching: Fetch Exactly One Closure in Each Packet\n");
+
+ fprintf(gr_file, "Fetch Strategy(%u):If outstanding fetches %s\n",
+ RtsFlags.GranFlags.FetchStrategy,
+ RtsFlags.GranFlags.FetchStrategy==0 ?
+ " block (block-on-fetch)":
+ RtsFlags.GranFlags.FetchStrategy==1 ?
+ "only run runnable threads":
+ RtsFlags.GranFlags.FetchStrategy==2 ?
+ "create threads only from local sparks":
+ RtsFlags.GranFlags.FetchStrategy==3 ?
+ "create threads from local or global sparks":
+ RtsFlags.GranFlags.FetchStrategy==4 ?
+ "create sparks and steal threads if necessary":
+ "unknown");
+
+ if (RtsFlags.GranFlags.DoPrioritySparking)
+ fprintf(gr_file, "Priority Sparking (i.e. keep sparks ordered by priority)\n");
+
+ if (RtsFlags.GranFlags.DoPriorityScheduling)
+ fprintf(gr_file, "Priority Scheduling (i.e. keep threads ordered by priority)\n");
+
+ fprintf(gr_file, "Thread Creation Time %u, Thread Queue Time %u\n",
+ RtsFlags.GranFlags.Costs.threadcreatetime,
+ RtsFlags.GranFlags.Costs.threadqueuetime);
+ fprintf(gr_file, "Thread DeSchedule Time %u, Thread Schedule Time %u\n",
+ RtsFlags.GranFlags.Costs.threaddescheduletime,
+ RtsFlags.GranFlags.Costs.threadscheduletime);
+ fprintf(gr_file, "Thread Context-Switch Time %u\n",
+ RtsFlags.GranFlags.Costs.threadcontextswitchtime);
+ fputs("\n\n--------------------\n\n", gr_file);
+
+ fputs("Communication Metrics:\n\n", gr_file);
+ fprintf(gr_file,
+ "Latency %u (1st) %u (rest), Fetch %u, Notify %u (Global) %u (Local)\n",
+ RtsFlags.GranFlags.Costs.latency,
+ RtsFlags.GranFlags.Costs.additional_latency,
+ RtsFlags.GranFlags.Costs.fetchtime,
+ RtsFlags.GranFlags.Costs.gunblocktime,
+ RtsFlags.GranFlags.Costs.lunblocktime);
+ fprintf(gr_file,
+ "Message Creation %u (+ %u after send), Message Read %u\n",
+ RtsFlags.GranFlags.Costs.mpacktime,
+ RtsFlags.GranFlags.Costs.mtidytime,
+ RtsFlags.GranFlags.Costs.munpacktime);
+ fputs("\n\n--------------------\n\n", gr_file);
+
+ fputs("Instruction Metrics:\n\n", gr_file);
+ fprintf(gr_file, "Arith %u, Branch %u, Load %u, Store %u, Float %u, Alloc %u\n",
+ RtsFlags.GranFlags.Costs.arith_cost,
+ RtsFlags.GranFlags.Costs.branch_cost,
+ RtsFlags.GranFlags.Costs.load_cost,
+ RtsFlags.GranFlags.Costs.store_cost,
+ RtsFlags.GranFlags.Costs.float_cost,
+ RtsFlags.GranFlags.Costs.heapalloc_cost);
+ fputs("\n\n++++++++++++++++++++\n\n", gr_file);
+
+# if 0
+ /* binary log files are currently not supported */
+ if (RtsFlags.GranFlags.GranSimStats.Binary)
+ grputw(sizeof(rtsTime));
+# endif
+
+ return (0);
+}
+
+#elif defined(PAR)
+
+void init_gr_stats (void);
+
+void
+init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv)
+char *prog_argv[], *rts_argv[];
+int prog_argc, rts_argc;
+{
+ nat i;
+ char time_string[TIME_STR_LEN], node_str[NODE_STR_LEN];
+ char *extension = RtsFlags.ParFlags.ParStats.Binary ? "gb" : "gr";
+
+ sprintf(gr_filename, GR_FILENAME_FMT_GUM, prog_argv[0], thisPE, extension);
+
+ if (!RtsFlags.ParFlags.ParStats.Full)
+ return;
+
+ if (RtsFlags.ParFlags.ParStats.Global)
+ init_gr_stats();
+
+ if ((gr_file = fopen(gr_filename, "w")) == NULL)
+ barf("Can't open activity report file %s\n", gr_filename);
+
+ setbuf(gr_file, NULL); /* turn buffering off */
+
+ /* write header with program name, options and setup to gr_file */
+ for (i = 0; i < prog_argc; ++i) {
+ fputs(prog_argv[i], gr_file);
+ fputc(' ', gr_file);
+ }
+
+ if (rts_argc > 0) {
+ fputs("+RTS ", gr_file);
+
+ for (i = 0; i < rts_argc; ++i) {
+ fputs(rts_argv[i], gr_file);
+ fputc(' ', gr_file);
+ }
+ }
+ fputc('\n', gr_file);
+
+ /* record the absolute start time to allow synchronisation of log-files */
+ fputs("Start-Time: ", gr_file);
+ fputs(time_str(), gr_file);
+ fputc('\n', gr_file);
+
+ ASSERT(startTime==0);
+ // startTime = msTime();
+ startTime = CURRENT_TIME;
+ ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
+ fprintf(gr_file, "PE %2u [%s]: TIME\n", thisPE, time_string);
+
+# if 0
+ ngoq Dogh'q' vImuS
+ IF_PAR_DEBUG(verbose,
+ belch("== Start-time: %ld (%s)",
+ startTime, time_string));
+
+ if (startTime > LL(1000000000)) {
+ fprintf(gr_file, "PE %2u [%lu%lu]: TIME\n", thisPE,
+ (rtsTime) (startTime / LL(1000000000)),
+ (rtsTime) (startTime % LL(1000000000)));
+ } else {
+ fprintf(gr_file, "PE %2u [%lu]: TIME\n", thisPE, (TIME) startTime);
+ }
+ /* binary log files are currently not supported */
+ if (RtsFlags.GranFlags.GranSimStats.Binary)
+ grputw(sizeof(rtsTime));
+# endif
+
+ return;
+}
+
+void
+init_gr_stats (void) {
+ // memset(&globalParStats, '\0', sizeof(GlobalParStats));
+
+ globalParStats.tot_mark_GA = globalParStats.tot_rebuild_GA = globalParStats.tot_free_GA = globalParStats.res_mark_GA = globalParStats.res_rebuild_GA = globalParStats.res_free_GA = globalParStats.tot_size_GA = globalParStats.res_size_GA = globalParStats.tot_global = globalParStats.tot_local = 0;
+ globalParStats.cnt_mark_GA = globalParStats.cnt_rebuild_GA = globalParStats.cnt_free_GA = globalParStats.res_free_GA = globalParStats.local_alloc_GA = 0;
+
+ globalParStats.time_mark_GA = 0.0;
+ globalParStats.time_rebuild_GA = 0.0;
+ globalParStats.time_sparks = 0.0;
+ globalParStats.time_pack = 0.0;
+
+ globalParStats.res_sp = globalParStats.res_tp = globalParStats.tot_sp = globalParStats.tot_tp = globalParStats.cnt_sp = globalParStats.cnt_tp = globalParStats.emp_sp = globalParStats.emp_tp = 0;
+ globalParStats.tot_packets = globalParStats.tot_packet_size = globalParStats.tot_thunks = globalParStats.res_packet_size = globalParStats.res_thunks = globalParStats.rec_res_packet_size = globalParStats.rec_res_thunks = 0;
+
+ globalParStats.tot_fish_mess = globalParStats.tot_fetch_mess = globalParStats.tot_resume_mess = globalParStats.tot_schedule_mess = 0;
+ globalParStats.rec_fish_mess = globalParStats.rec_resume_mess = globalParStats.rec_schedule_mess = 0;
+ globalParStats.rec_fetch_mess = 0;
+#if defined(DIST)
+ globalParStats.tot_reval_mess = 0;
+ globalParStats.rec_reval_mess = 0;
+#endif
+
+ globalParStats.tot_threads_created = globalParStats.tot_sparks_created = globalParStats.tot_sparks_ignored = globalParStats.tot_sparks_marked = globalParStats.res_sparks_created = globalParStats.res_sparks_ignored = globalParStats.res_sparks_marked = 0;
+ globalParStats.tot_yields = globalParStats.tot_stackover = globalParStats.tot_heapover = 0;
+
+ globalParStats.tot_arrs = globalParStats.tot_arr_size = 0;
+}
+
+#endif /* PAR */
+
+//@cindex end_gr_simulation
+#if defined(GRAN)
+void
+end_gr_simulation(void)
+{
+ char time_string[TIME_STR_LEN];
+
+ ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/);
+
+ if (RtsFlags.GranFlags.GranSimStats.Suppressed)
+ return;
+
+ /* Print event stats */
+ if (RtsFlags.GranFlags.GranSimStats.Global) {
+ nat i;
+
+ fprintf(stderr,"Total yields: %d\n",
+ globalGranStats.tot_yields);
+
+ fprintf(stderr,"Total number of threads created: %d ; per PE:\n",
+ globalGranStats.tot_threads_created);
+ for (i=0; i<RtsFlags.GranFlags.proc; i++) {
+ fprintf(stderr," PE %d: %d\t",
+ i, globalGranStats.threads_created_on_PE[i]);
+ if (i+1 % 4 == 0) fprintf(stderr,"\n");
+ }
+ if (RtsFlags.GranFlags.proc+1 % 4 != 0) fprintf(stderr,"\n");
+ fprintf(stderr,"Total number of threads migrated: %d\n",
+ globalGranStats.tot_TSOs_migrated);
+
+ fprintf(stderr,"Total number of sparks created: %d ; per PE:\n",
+ globalGranStats.tot_sparks_created);
+ for (i=0; i<RtsFlags.GranFlags.proc; i++) {
+ fprintf(stderr," PE %d: %d\t",
+ i, globalGranStats.sparks_created_on_PE[i]);
+ if (i+1 % 4 == 0) fprintf(stderr,"\n");
+ }
+ if (RtsFlags.GranFlags.proc+1 % 4 != 0) fprintf(stderr,"\n");
+
+ fprintf(stderr,"Event statistics (number of events: %d):\n",
+ globalGranStats.noOfEvents);
+ for (i=0; i<=MAX_EVENT; i++) {
+ fprintf(stderr," %s (%d): \t%d \t%f%%\t%f%%\n",
+ event_names[i],i,globalGranStats.event_counts[i],
+ (float)(100*globalGranStats.event_counts[i])/(float)(globalGranStats.noOfEvents),
+ (i==ContinueThread ? 0.0 :
+ (float)(100*(globalGranStats.event_counts[i])/(float)(globalGranStats.noOfEvents-globalGranStats.event_counts[ContinueThread])) ));
+ }
+ fprintf(stderr,"Randomized steals: %ld sparks, %ld threads \n \t(Sparks: #%u (avg ntimes=%f; avg fl=%f)\n\t(Threads: %ld)",
+ globalGranStats.rs_sp_count,
+ globalGranStats.rs_t_count,
+ globalGranStats.no_of_steals,
+ (float)globalGranStats.ntimes_total/(float)stg_max(globalGranStats.no_of_steals,1),
+ (float)globalGranStats.fl_total/(float)stg_max(globalGranStats.no_of_steals,1),
+ globalGranStats.no_of_migrates);
+ fprintf(stderr,"Moved sparks: %d Withered sparks: %d (%.2f %%)\n",
+ globalGranStats.tot_sparks, globalGranStats.withered_sparks,
+ ( globalGranStats.tot_sparks == 0 ? 0 :
+ (float)(100*globalGranStats.withered_sparks)/(float)(globalGranStats.tot_sparks)) );
+ /* Print statistics about priority sparking */
+ if (RtsFlags.GranFlags.DoPrioritySparking) {
+ fprintf(stderr,"About Priority Sparking:\n");
+ fprintf(stderr," Total no. NewThreads: %d Avg. spark queue len: %.2f \n", globalGranStats.tot_sq_probes, (float)globalGranStats.tot_sq_len/(float)globalGranStats.tot_sq_probes);
+ }
+ /* Print statistics about priority sparking */
+ if (RtsFlags.GranFlags.DoPriorityScheduling) {
+ fprintf(stderr,"About Priority Scheduling:\n");
+ fprintf(stderr," Total no. of StartThreads: %d (non-end: %d) Avg. thread queue len: %.2f\n",
+ globalGranStats.tot_add_threads, globalGranStats.non_end_add_threads,
+ (float)globalGranStats.tot_tq_len/(float)globalGranStats.tot_add_threads);
+ }
+ /* Blocking queue statistics */
+ if (1) {
+ fprintf(stderr,"Blocking queue statistcs:\n");
+ fprintf(stderr," Total no. of FMBQs generated: %d\n",
+ globalGranStats.tot_FMBQs);
+ fprintf(stderr," Total no. of bqs awakened: %d\n",
+ globalGranStats.tot_awbq);
+ fprintf(stderr," Total length of all bqs: %d\tAvg length of bqs: %.2f\n",
+ globalGranStats.tot_bq_len, (float)globalGranStats.tot_bq_len/(float)globalGranStats.tot_awbq);
+ fprintf(stderr," Percentage of local TSOs in BQs: %.2f\n",
+ (float)globalGranStats.tot_bq_len*100.0/(float)globalGranStats.tot_bq_len);
+ fprintf(stderr," Total time spent processing BQs: %lx\n",
+ globalGranStats.tot_bq_processing_time);
+ }
+
+ /* Fetch misses and thunk stealing */
+ fprintf(stderr,"Number of fetch misses: %d\n",
+ globalGranStats.fetch_misses);
+
+ /* Print packet statistics if GUMM fetching is turned on */
+ if (RtsFlags.GranFlags.DoBulkFetching) {
+ fprintf(stderr,"Packet statistcs:\n");
+ fprintf(stderr," Total no. of packets: %d Avg. packet size: %.2f \n", globalGranStats.tot_packets, (float)globalGranStats.tot_packet_size/(float)globalGranStats.tot_packets);
+ fprintf(stderr," Total no. of thunks: %d Avg. thunks/packet: %.2f \n", globalGranStats.tot_thunks, (float)globalGranStats.tot_thunks/(float)globalGranStats.tot_packets);
+ fprintf(stderr," Total no. of cuts: %d Avg. cuts/packet: %.2f\n", globalGranStats.tot_cuts, (float)globalGranStats.tot_cuts/(float)globalGranStats.tot_packets);
+ /*
+ if (closure_queue_overflows>0)
+ fprintf(stderr," Number of closure queue overflows: %u\n",
+ closure_queue_overflows);
+ */
+ }
+ } /* RtsFlags.GranFlags.GranSimStats.Global */
+
+# if defined(GRAN_COUNT)
+# error "GRAN_COUNT not supported; should be parallel ticky profiling, really"
+ fprintf(stderr,"Update count statistics:\n");
+ fprintf(stderr," Total number of updates: %u\n",nUPDs);
+ fprintf(stderr," Needed to awaken BQ: %u with avg BQ len of: %f\n",
+ nUPDs_BQ,(float)BQ_lens/(float)nUPDs_BQ);
+ fprintf(stderr," Number of PAPs: %u\n",nPAPs);
+# endif
+
+ fprintf(stderr, "Simulation finished after @ %s @ cycles. %d sparks created, %d sparks ignored. Check %s for details.\n",
+ time_string, sparksCreated, sparksIgnored, gr_filename);
+
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ fclose(gr_file);
+}
+
+#elif defined(PAR)
+
+/*
+ Under GUM we print only one line.
+*/
+void
+end_gr_simulation(void)
+{
+ char time_string[TIME_STR_LEN];
+
+ ullong_format_string(CURRENT_TIME-startTime, time_string, rtsFalse/*no commas!*/);
+
+ fprintf(stderr, "Computation finished after @ %s @ ms. %d sparks created, %d sparks ignored. Check %s for details.\n",
+ time_string, sparksCreated, sparksIgnored, gr_filename);
+
+ if (RtsFlags.ParFlags.ParStats.Full)
+ fclose(gr_file);
+}
+#endif /* PAR */
+
+//@node Global statistics, Dumping routines, Writing to the log-file
+//@subsection Global statistics
+/*
+ Called at the end of execution
+*/
+
+//@node Dumping routines, , Global statistics
+//@subsection Dumping routines
+
+//@cindex DumpGranEvent
+void
+DumpGranEvent(name, tso)
+GranEventType name;
+StgTSO *tso;
+{
+ DumpRawGranEvent(CURRENT_PROC, (PEs)0, name, tso, &stg_END_TSO_QUEUE_closure, (StgInt)0, (StgInt)0);
+}
+
+//@cindex DumpRawGranEvent
+void
+DumpRawGranEvent(proc, p, name, tso, node, sparkname, len)
+PEs proc, p; /* proc ... where it happens; p ... where node lives */
+GranEventType name;
+StgTSO *tso;
+StgClosure *node;
+StgInt sparkname, len;
+{
+# if defined(GRAN)
+ DumpVeryRawGranEvent(TIME_ON_PROC(proc),
+ proc, p, name, tso, node, sparkname, len);
+# elif defined(PAR)
+ DumpVeryRawGranEvent(CURRENT_TIME,
+ proc, p, name, tso, node, sparkname, len);
+# endif
+}
+
+//@cindex DumpVeryRawGranEvent
+void
+DumpVeryRawGranEvent(time, proc, p, name, tso, node, sparkname, len)
+rtsTime time;
+PEs proc, p; /* proc ... where it happens; p ... where node lives */
+GranEventType name;
+StgTSO *tso;
+StgClosure *node;
+StgInt sparkname, len;
+{
+ FILE *output_file; // DEBUGGING ONLY !!!!!!!!!!!!!!!!!!!!!!!!!1
+ StgWord id;
+ char time_string[TIME_STR_LEN], node_str[NODE_STR_LEN];
+# if defined(GRAN)
+ ullong_format_string(time,
+ time_string, rtsFalse/*no commas!*/);
+# elif defined(PAR)
+ ullong_format_string(time,
+ time_string, rtsFalse/*no commas!*/);
+# endif
+ output_file = gr_file;
+
+# if defined(GRAN)
+
+ if (RtsFlags.GranFlags.GranSimStats.Full)
+ ASSERT(output_file!=NULL);
+
+ if (RtsFlags.GranFlags.GranSimStats.Suppressed)
+ return;
+# elif defined(PAR)
+
+ if (RtsFlags.ParFlags.ParStats.Full)
+ ASSERT(output_file!=NULL);
+
+ if (RtsFlags.ParFlags.ParStats.Suppressed)
+ return;
+
+# endif
+
+ id = tso == NULL ? -1 : tso->id;
+ if (node==stgCast(StgClosure*,&stg_END_TSO_QUEUE_closure))
+ strcpy(node_str,"________"); /* "END_TSO_QUEUE"); */
+ else
+ sprintf(node_str,"0x%-6lx",node);
+
+ if (name > GR_EVENT_MAX)
+ name = GR_EVENT_MAX;
+
+ if (BINARY_STATS)
+ barf("binary log files not yet supported");
+#if 0
+ /* ToDo: fix code for writing binary GrAnSim statistics */
+ switch (name) {
+ case GR_START:
+ case GR_STARTQ:
+ grputw(name);
+ grputw(proc);
+ abort(); /* die please: a single word */
+ /* doesn't represent long long times */
+ grputw(TIME_ON_PROC(proc));
+ grputw((StgWord)node);
+ break;
+ case GR_FETCH:
+ case GR_REPLY:
+ case GR_BLOCK:
+ grputw(name);
+ grputw(proc);
+ abort(); /* die please: a single word */
+ /* doesn't represent long long times */
+ grputw(TIME_ON_PROC(proc)); /* this line is bound to */
+ grputw(id); /* do the wrong thing */
+ break;
+ default:
+ grputw(name);
+ grputw(proc);
+ abort(); /* die please: a single word */
+ /* doesn't represent long long times */
+ grputw(TIME_ON_PROC(proc));
+ grputw((StgWord)node);
+ }
+#endif
+ else /* !BINARY_STATS */
+ switch (name) {
+ case GR_START:
+ case GR_STARTQ:
+ fprintf(output_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[SN %u]\t[sparks %u]\n",
+ proc,time_string,gran_event_names[name],
+ id,node_str,sparkname,len);
+ break;
+ case GR_FETCH:
+ case GR_REPLY:
+ case GR_BLOCK:
+ case GR_STOLEN:
+ case GR_STOLENQ:
+ case GR_STEALING:
+ fprintf(output_file, "PE %2u [%s]: %-9s\t%lx \t%s\t(from %2u)\n",
+ proc, time_string, gran_event_names[name],
+ id,node_str,p);
+ break;
+ case GR_RESUME:
+ case GR_RESUMEQ:
+ case GR_SCHEDULE:
+ case GR_DESCHEDULE:
+ fprintf(output_file,"PE %2u [%s]: %-9s\t%lx \n",
+ proc,time_string,gran_event_names[name],id);
+ break;
+ case GR_ALLOC:
+ fprintf(output_file,"PE %2u [%s]: %-9s\t%lx\t \tallocating %u words\n",
+ proc,time_string,gran_event_names[name],id,len);
+ break;
+ default:
+ fprintf(output_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n",
+ proc,time_string,gran_event_names[name],id,node_str,len);
+ }
+}
+
+//@cindex DumpGranInfo
+void
+DumpEndEvent(proc, tso, mandatory_thread)
+PEs proc;
+StgTSO *tso;
+rtsBool mandatory_thread;
+{
+ FILE *output_file; // DEBUGGING ONLY !!!!!!!!!!!!!!!!!!!!!!!!!1
+ char time_string[TIME_STR_LEN];
+# if defined(GRAN)
+ ullong_format_string(TIME_ON_PROC(proc),
+ time_string, rtsFalse/*no commas!*/);
+# elif defined(PAR)
+ ullong_format_string(CURRENT_TIME,
+ time_string, rtsFalse/*no commas!*/);
+# endif
+
+ output_file = gr_file;
+ ASSERT(output_file!=NULL);
+#if defined(GRAN)
+ if (RtsFlags.GranFlags.GranSimStats.Suppressed)
+ return;
+#endif
+
+ if (BINARY_STATS) {
+ barf("binary log files not yet supported");
+#if 0
+ grputw(GR_END);
+ grputw(proc);
+ abort(); /* die please: a single word doesn't represent long long times */
+ grputw(CURRENT_TIME); /* this line is bound to fail */
+ grputw(tso->id);
+#ifdef PAR
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+ grputw(0);
+#else
+ grputw(tso->gran.sparkname);
+ grputw(tso->gran.startedat);
+ grputw(tso->gran.exported);
+ grputw(tso->gran.basicblocks);
+ grputw(tso->gran.allocs);
+ grputw(tso->gran.exectime);
+ grputw(tso->gran.blocktime);
+ grputw(tso->gran.blockcount);
+ grputw(tso->gran.fetchtime);
+ grputw(tso->gran.fetchcount);
+ grputw(tso->gran.localsparks);
+ grputw(tso->gran.globalsparks);
+#endif
+ grputw(mandatory_thread);
+#endif /* 0 */
+ } else {
+
+ /*
+ * NB: DumpGranEvent cannot be used because PE may be wrong
+ * (as well as the extra info)
+ */
+ fprintf(output_file, "PE %2u [%s]: END %lx, SN %u, ST %lu, EXP %s, BB %u, HA %u, RT %u, BT %u (%u), FT %u (%u), LS %u, GS %u, MY %s\n"
+ ,proc
+ ,time_string
+ ,tso->id
+#if defined(GRAN)
+ ,tso->gran.sparkname
+ ,tso->gran.startedat
+ ,((tso->gran.exported) ? 'T' : 'F')
+ ,tso->gran.basicblocks
+ ,tso->gran.allocs
+ ,tso->gran.exectime
+ ,tso->gran.blocktime
+ ,tso->gran.blockcount
+ ,tso->gran.fetchtime
+ ,tso->gran.fetchcount
+ ,tso->gran.localsparks
+ ,tso->gran.globalsparks
+#elif defined(PAR)
+ ,tso->par.sparkname
+ ,tso->par.startedat
+ ,(tso->par.exported) ? "T" : "F"
+ ,tso->par.basicblocks
+ ,tso->par.allocs
+ ,tso->par.exectime
+ ,tso->par.blocktime
+ ,tso->par.blockcount
+ ,tso->par.fetchtime
+ ,tso->par.fetchcount
+ ,tso->par.localsparks
+ ,tso->par.globalsparks
+#endif
+ ,(mandatory_thread ? "T" : "F")
+ );
+ }
+}
+
+//@cindex DumpTSO
+void
+DumpTSO(tso)
+StgTSO *tso;
+{
+ FILE *output_file; // DEBUGGING ONLY !!!!!!!!!!!!!!!!!!!!!!!!!1
+
+ output_file = gr_file;
+ ASSERT(output_file!=NULL);
+ fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %u, LINK 0x%lx, TYPE %s\n"
+ ,tso
+#if defined(GRAN)
+ ,tso->gran.sparkname
+#elif defined(PAR)
+ ,tso->par.sparkname
+#endif
+ ,tso->id
+ ,tso->link
+ ,/*tso->state==T_MAIN?"MAIN":
+ TSO_TYPE(tso)==T_FAIL?"FAIL":
+ TSO_TYPE(tso)==T_REQUIRED?"REQUIRED":
+ TSO_TYPE(tso)==T_ADVISORY?"ADVISORY":
+ */
+ "???"
+ );
+
+ fprintf(output_file,"TSO %lx: SN %u, ST %u, GBL %c, BB %u, HA %u, RT %u, BT %u (%u), FT %u (%u) LS %u, GS %u\n"
+ ,tso->id
+#if defined(GRAN)
+ ,tso->gran.sparkname
+ ,tso->gran.startedat
+ ,tso->gran.exported?'T':'F'
+ ,tso->gran.basicblocks
+ ,tso->gran.allocs
+ ,tso->gran.exectime
+ ,tso->gran.blocktime
+ ,tso->gran.blockcount
+ ,tso->gran.fetchtime
+ ,tso->gran.fetchcount
+ ,tso->gran.localsparks
+ ,tso->gran.globalsparks
+#elif defined(PAR)
+ ,tso->par.sparkname
+ ,tso->par.startedat
+ ,tso->par.exported?'T':'F'
+ ,tso->par.basicblocks
+ ,tso->par.allocs
+ ,tso->par.exectime
+ ,tso->par.blocktime
+ ,tso->par.blockcount
+ ,tso->par.fetchtime
+ ,tso->par.fetchcount
+ ,tso->par.localsparks
+ ,tso->par.globalsparks
+#endif
+ );
+}
+
+#if 0
+/*
+ ToDo: fix binary output of log files, and support new log file format.
+*/
+/*
+ Output a terminate event and an 8-byte time.
+*/
+
+//@cindex grterminate
+void
+grterminate(v)
+rtsTime v;
+{
+ if (!BINARY_STATS)
+ barf("grterminate: binary statistics not enabled\n");
+
+# if defined(GRAN)
+ if (RtsFlags.GranFlags.GranSimStats.Suppressed)
+ return;
+# endif
+
+ DumpGranEvent(GR_TERMINATE, stgCast(StgTSO*,&stg_END_TSO_QUEUE_closure));
+
+ if (sizeof(rtsTime) == 4) {
+ putc('\0', gr_file);
+ putc('\0', gr_file);
+ putc('\0', gr_file);
+ putc('\0', gr_file);
+ } else {
+ putc(v >> 56l, gr_file);
+ putc((v >> 48l) & 0xffl, gr_file);
+ putc((v >> 40l) & 0xffl, gr_file);
+ putc((v >> 32l) & 0xffl, gr_file);
+ }
+ putc((v >> 24l) & 0xffl, gr_file);
+ putc((v >> 16l) & 0xffl, gr_file);
+ putc((v >> 8l) & 0xffl, gr_file);
+ putc(v & 0xffl, gr_file);
+}
+
+/*
+ Length-coded output: first 3 bits contain length coding
+
+ 00x 1 byte
+ 01x 2 bytes
+ 10x 4 bytes
+ 110 8 bytes
+ 111 5 or 9 bytes
+*/
+
+//@cindex grputw
+void
+grputw(v)
+rtsTime v;
+{
+ if (!BINARY_STATS)
+ barf("grputw: binary statistics not enabled\n");
+
+# if defined(GRAN)
+ if (RtsFlags.GranFlags.GranSimStats.Suppressed)
+ return;
+# endif
+
+ if (v <= 0x3fl) { /* length v = 1 byte */
+ fputc(v & 0x3f, gr_file);
+ } else if (v <= 0x3fffl) { /* length v = 2 byte */
+ fputc((v >> 8l) | 0x40l, gr_file);
+ fputc(v & 0xffl, gr_file);
+ } else if (v <= 0x3fffffffl) { /* length v = 4 byte */
+ fputc((v >> 24l) | 0x80l, gr_file);
+ fputc((v >> 16l) & 0xffl, gr_file);
+ fputc((v >> 8l) & 0xffl, gr_file);
+ fputc(v & 0xffl, gr_file);
+ } else if (sizeof(TIME) == 4) {
+ fputc(0x70, gr_file);
+ fputc((v >> 24l) & 0xffl, gr_file);
+ fputc((v >> 16l) & 0xffl, gr_file);
+ fputc((v >> 8l) & 0xffl, gr_file);
+ fputc(v & 0xffl, gr_file);
+ } else {
+ if (v <= 0x3fffffffffffffl)
+ putc((v >> 56l) | 0x60l, gr_file);
+ else {
+ putc(0x70, gr_file);
+ putc((v >> 56l) & 0xffl, gr_file);
+ }
+
+ putc((v >> 48l) & 0xffl, gr_file);
+ putc((v >> 40l) & 0xffl, gr_file);
+ putc((v >> 32l) & 0xffl, gr_file);
+ putc((v >> 24l) & 0xffl, gr_file);
+ putc((v >> 16l) & 0xffl, gr_file);
+ putc((v >> 8l) & 0xffl, gr_file);
+ putc(v & 0xffl, gr_file);
+ }
+}
+#endif /* 0 */
+
+/*
+ extracting specific info out of a closure; used in packing (GranSim, GUM)
+*/
+//@cindex get_closure_info
+StgInfoTable*
+get_closure_info(StgClosure* node, nat *size, nat *ptrs, nat *nonptrs,
+ nat *vhs, char *info_hdr_ty)
+{
+ StgInfoTable *info;
+
+ ASSERT(LOOKS_LIKE_COOL_CLOSURE(node));
+ info = get_itbl(node);
+ /* the switch shouldn't be necessary, really; just use default case */
+ switch (info->type) {
+ case RBH:
+ {
+ StgInfoTable *rip = REVERT_INFOPTR(info); // closure to revert to
+ *size = sizeW_fromITBL(rip);
+ *ptrs = (nat) (rip->layout.payload.ptrs);
+ *nonptrs = (nat) (rip->layout.payload.nptrs);
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+#if 0 /* DEBUG */
+ info_hdr_type(node, info_hdr_ty);
+#else
+ strcpy(info_hdr_ty, "RBH");
+#endif
+ return rip; // NB: we return the reverted info ptr for a RBH!!!!!!
+ }
+
+#if defined(PAR)
+ /* Closures specific to GUM */
+ case FETCH_ME:
+ *size = sizeofW(StgFetchMe);
+ *ptrs = (nat)0;
+ *nonptrs = (nat)0;
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+#if 0 /* DEBUG */
+ info_hdr_type(node, info_hdr_ty);
+#else
+ strcpy(info_hdr_ty, "FETCH_ME");
+#endif
+ return info;
+
+#ifdef DIST
+ case REMOTE_REF: //same as for FETCH_ME...
+ *size = sizeofW(StgFetchMe);
+ *ptrs = (nat)0;
+ *nonptrs = (nat)0;
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+#if 0 /* DEBUG */
+ info_hdr_type(node, info_hdr_ty);
+#else
+ strcpy(info_hdr_ty, "REMOTE_REF");
+#endif
+ return info;
+#endif /* DIST */
+
+ case FETCH_ME_BQ:
+ *size = sizeofW(StgFetchMeBlockingQueue);
+ *ptrs = (nat)0;
+ *nonptrs = (nat)0;
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+#if 0 /* DEBUG */
+ info_hdr_type(node, info_hdr_ty);
+#else
+ strcpy(info_hdr_ty, "FETCH_ME_BQ");
+#endif
+ return info;
+
+ case BLOCKED_FETCH:
+ *size = sizeofW(StgBlockedFetch);
+ *ptrs = (nat)0;
+ *nonptrs = (nat)0;
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+#if 0 /* DEBUG */
+ info_hdr_type(node, info_hdr_ty);
+#else
+ strcpy(info_hdr_ty, "BLOCKED_FETCH");
+#endif
+ return info;
+#endif /* PAR */
+
+ /* these magic constants are outrageous!! why does the ITBL lie about it? */
+ case THUNK_SELECTOR:
+ *size = THUNK_SELECTOR_sizeW();
+ *ptrs = 1;
+ *nonptrs = MIN_UPD_SIZE-*ptrs; // weird
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+ return info;
+
+ case ARR_WORDS:
+ /* ToDo: check whether this can be merged with the default case */
+ *size = arr_words_sizeW((StgArrWords *)node);
+ *ptrs = 0;
+ *nonptrs = ((StgArrWords *)node)->words;
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+ return info;
+
+ case PAP:
+ /* ToDo: check whether this can be merged with the default case */
+ *size = pap_sizeW((StgPAP *)node);
+ *ptrs = 0;
+ *nonptrs = 0;
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+ return info;
+
+ case AP_UPD:
+ /* ToDo: check whether this can be merged with the default case */
+ *size = AP_sizeW(((StgAP_UPD *)node)->n_args);
+ *ptrs = 0;
+ *nonptrs = 0;
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+ return info;
+
+ default:
+ *size = sizeW_fromITBL(info);
+ *ptrs = (nat) (info->layout.payload.ptrs);
+ *nonptrs = (nat) (info->layout.payload.nptrs);
+ *vhs = *size - *ptrs - *nonptrs - sizeofW(StgHeader);
+#if 0 /* DEBUG */
+ info_hdr_type(node, info_hdr_ty);
+#else
+ strcpy(info_hdr_ty, "UNKNOWN");
+#endif
+ return info;
+ }
+}
+
+//@cindex IS_BLACK_HOLE
+rtsBool
+IS_BLACK_HOLE(StgClosure* node)
+{
+ // StgInfoTable *info;
+ ASSERT(LOOKS_LIKE_COOL_CLOSURE(node));
+ switch (get_itbl(node)->type) {
+ case BLACKHOLE:
+ case BLACKHOLE_BQ:
+ case RBH:
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ return rtsTrue;
+ default:
+ return rtsFalse;
+ }
+//return ((info->type == BLACKHOLE || info->type == RBH) ? rtsTrue : rtsFalse);
+}
+
+//@cindex IS_INDIRECTION
+StgClosure *
+IS_INDIRECTION(StgClosure* node)
+{
+ StgInfoTable *info;
+ ASSERT(LOOKS_LIKE_COOL_CLOSURE(node));
+ info = get_itbl(node);
+ switch (info->type) {
+ case IND:
+ case IND_OLDGEN:
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ case IND_STATIC:
+ /* relies on indirectee being at same place for all these closure types */
+ return (((StgInd*)node) -> indirectee);
+#if 0
+ case EVACUATED: // counting as ind to use in GC routines, too
+ // could use the same code as above (evacuee is at same pos as indirectee)
+ return (((StgEvacuated *)node) -> evacuee);
+#endif
+ default:
+ return NULL;
+ }
+}
+
+//@cindex unwindInd
+StgClosure *
+UNWIND_IND (StgClosure *closure)
+{
+ StgClosure *next;
+
+ while ((next = IS_INDIRECTION((StgClosure *)closure)) != NULL)
+ closure = next;
+
+ ASSERT(next==(StgClosure *)NULL);
+ ASSERT(LOOKS_LIKE_COOL_CLOSURE(closure));
+ return closure;
+}
+
+#endif /* GRAN || PAR whole file */
diff --git a/rts/parallel/ParallelDebug.c b/rts/parallel/ParallelDebug.c
new file mode 100644
index 0000000000..b357af6379
--- /dev/null
+++ b/rts/parallel/ParallelDebug.c
@@ -0,0 +1,1955 @@
+/*
+ Time-stamp: <Sun Mar 18 2001 19:32:56 Stardate: [-30]6349.07 hwloidl>
+
+ Various debugging routines for GranSim and GUM
+*/
+
+#if defined(DEBUG) && (defined(GRAN) || defined(PAR)) /* whole file */
+
+//@node Debugging routines for GranSim and GUM, , ,
+//@section Debugging routines for GranSim and GUM
+
+//@menu
+//* Includes::
+//* Constants and Variables::
+//* Closures::
+//* Threads::
+//* Events::
+//* Sparks::
+//* Processors::
+//* Shortcuts::
+//* Printing info type::
+//* Printing Pack:et Contents::
+//* End of File::
+//@end menu
+//*/
+
+//@node Includes, Prototypes, Debugging routines for GranSim and GUM, Debugging routines for GranSim and GUM
+//@subsection Includes
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+#include "StgMiscClosures.h"
+#include "Printer.h"
+# if defined(DEBUG)
+# include "Hash.h"
+# include "Storage.h"
+# include "ParallelDebug.h"
+# endif
+
+//@node Prototypes, Constants and Variables, Includes, Debugging routines for GranSim and GUM
+//@subsection Prototypes
+/*
+rtsBool isOffset(globalAddr *ga);
+rtsBool isFixed(globalAddr *ga);
+*/
+//@node Constants and Variables, Closures, Prototypes, Debugging routines for GranSim and GUM
+//@subsection Constants and Variables
+
+static HashTable *tmpClosureTable; // used in GraphFingerPrint and PrintGraph
+
+#if defined(PAR)
+static char finger_print_char[] = {
+ '/', /* INVALID_OBJECT 0 */
+ 'C', /* CONSTR 1 */
+ 'C', /* CONSTR_1_0 2 */
+ 'C', /* CONSTR_0_1 3 */
+ 'C', /* CONSTR_2_0 4 */
+ 'C', /* CONSTR_1_1 5 */
+ 'C', /* CONSTR_0_2 6 */
+ 'I', /* CONSTR_INTLIKE 7 */
+ 'I', /* CONSTR_CHARLIKE 8 */
+ 'S', /* CONSTR_STATIC 9 */
+ 'S', /* CONSTR_NOCAF_STATIC 10 */
+ 'F', /* FUN 11 */
+ 'F', /* FUN_1_0 12 */
+ 'F', /* FUN_0_1 13 */
+ 'F', /* FUN_2_0 14 */
+ 'F', /* FUN_1_1 15 */
+ 'F', /* FUN_0_2 16 */
+ 'S', /* FUN_STATIC 17 */
+ 'T', /* THUNK 18 */
+ 'T', /* THUNK_1_0 19 */
+ 'T', /* THUNK_0_1 20 */
+ 'T', /* THUNK_2_0 21 */
+ 'T', /* THUNK_1_1 22 */
+ 'T', /* THUNK_0_2 23 */
+ 'S', /* THUNK_STATIC 24 */
+ 'E', /* THUNK_SELECTOR 25 */
+ 'b', /* BCO 26 */
+ 'p', /* AP_UPD 27 */
+ 'p', /* PAP 28 */
+ '_', /* IND 29 */
+ '_', /* IND_OLDGEN 30 */
+ '_', /* IND_PERM 31 */
+ '_', /* IND_OLDGEN_PERM 32 */
+ '_', /* IND_STATIC 33 */
+ '?', /* ***unused*** 34 */
+ '?', /* ***unused*** 35 */
+ '^', /* RET_BCO 36 */
+ '^', /* RET_SMALL 37 */
+ '^', /* RET_VEC_SMALL 38 */
+ '^', /* RET_BIG 39 */
+ '^', /* RET_VEC_BIG 40 */
+ '^', /* RET_DYN 41 */
+ '~', /* UPDATE_FRAME 42 */
+ '~', /* CATCH_FRAME 43 */
+ '~', /* STOP_FRAME 44 */
+ '~', /* SEQ_FRAME 45 */
+ 'o', /* CAF_BLACKHOLE 46 */
+ 'o', /* BLACKHOLE 47 */
+ 'o', /* BLACKHOLE_BQ 48 */
+ 'o', /* SE_BLACKHOLE 49 */
+ 'o', /* SE_CAF_BLACKHOLE 50 */
+ 'm', /* MVAR 51 */
+ 'a', /* ARR_WORDS 52 */
+ 'a', /* MUT_ARR_PTRS 53 */
+ 'a', /* MUT_ARR_PTRS_FROZEN 54 */
+ 'q', /* MUT_VAR 55 */
+ 'w', /* WEAK 56 */
+ 'f', /* FOREIGN 57 */
+ 's', /* STABLE_NAME 58 */
+ '@', /* TSO 59 */
+ '#', /* BLOCKED_FETCH 60 */
+ '>', /* FETCH_ME 61 */
+ '>', /* FETCH_ME_BQ 62 */
+ '$', /* RBH 63 */
+ 'v', /* EVACUATED 64 */
+ '>' /* REMOTE_REF 65 */
+ /* ASSERT(there are N_CLOSURE_TYPES (==66) in this arrary) */
+};
+#endif /* PAR */
+
+#if defined(GRAN) && defined(GRAN_CHECK)
+//@node Closures, Threads, Constants and Variables, Debugging routines for GranSim and GUM
+//@subsection Closures
+
+void
+G_PRINT_NODE(node)
+StgClosure* node;
+{
+ StgInfoTable *info_ptr;
+ StgTSO* bqe;
+ nat size = 0, ptrs = 0, nonptrs = 0, i, vhs = 0;
+ char info_hdr_ty[80], info_ty[80];
+
+ if (node==NULL) {
+ fprintf(stderr,"NULL\n");
+ return;
+ } else if (node==END_TSO_QUEUE) {
+ fprintf(stderr,"END_TSO_QUEUE\n");
+ return;
+ }
+ /* size_and_ptrs(node,&size,&ptrs); */
+ info_ptr = get_closure_info(node, &size, &ptrs, &nonptrs, &vhs, info_hdr_ty);
+
+ /* vhs = var_hdr_size(node); */
+ display_info_type(info_ptr,info_ty);
+
+ fprintf(stderr,"Node: 0x%lx", node);
+
+#if defined(PAR)
+ fprintf(stderr," [GA: 0x%lx]",GA(node));
+#endif
+
+#if defined(USE_COST_CENTRES)
+ fprintf(stderr," [CC: 0x%lx]",CC_HDR(node));
+#endif
+
+#if defined(GRAN)
+ fprintf(stderr," [Bitmask: 0%lo]",PROCS(node));
+#endif
+
+ if (info_ptr->type==TSO)
+ fprintf(stderr," TSO: 0x%lx (%x) IP: 0x%lx (%s), type %s \n ",
+ (StgTSO*)node, ((StgTSO*)node)->id, info_ptr, info_hdr_ty, info_ty);
+ else
+ fprintf(stderr," IP: 0x%lx (%s), type %s \n VHS: %d, size: %ld, ptrs:%ld, nonptrs: %ld\n ",
+ info_ptr,info_hdr_ty,info_ty,vhs,size,ptrs,nonptrs);
+
+ /* For now, we ignore the variable header */
+
+ fprintf(stderr," Ptrs: ");
+ for(i=0; i < ptrs; ++i)
+ {
+ if ( (i+1) % 6 == 0)
+ fprintf(stderr,"\n ");
+ fprintf(stderr," 0x%lx[P]",node->payload[i]);
+ };
+
+ fprintf(stderr," Data: ");
+ for(i=0; i < nonptrs; ++i)
+ {
+ if( (i+1) % 6 == 0)
+ fprintf(stderr,"\n ");
+ fprintf(stderr," %lu[D]",node->payload[ptrs+i]);
+ }
+ fprintf(stderr, "\n");
+
+
+ switch (info_ptr->type)
+ {
+ case TSO:
+ fprintf(stderr,"\n TSO_LINK: %#lx",
+ ((StgTSO*)node)->link);
+ break;
+
+ case BLACKHOLE:
+ case RBH:
+ bqe = ((StgBlockingQueue*)node)->blocking_queue;
+ fprintf(stderr," BQ of %#lx: ", node);
+ G_PRINT_BQ(bqe);
+ break;
+ case FETCH_ME:
+ case FETCH_ME_BQ:
+ printf("Panic: found FETCH_ME or FETCH_ME_BQ Infotable in GrAnSim system.\n");
+ break;
+ default:
+ /* do nothing */
+ }
+}
+
+void
+G_PPN(node) /* Extracted from PrintPacket in Pack.lc */
+StgClosure* node;
+{
+ StgInfoTable *info ;
+ nat size = 0, ptrs = 0, nonptrs = 0, i, vhs = 0, locn = 0;
+ char info_type[80];
+
+ /* size_and_ptrs(node,&size,&ptrs); */
+ info = get_closure_info(node, &size, &ptrs, &nonptrs, &vhs, info_type);
+
+ if (info->type == FETCH_ME || info->type == FETCH_ME_BQ ||
+ info->type == BLACKHOLE || info->type == RBH )
+ size = ptrs = nonptrs = vhs = 0;
+
+ if (closure_THUNK(node)) {
+ if (!closure_UNPOINTED(node))
+ fputs("SHARED ", stderr);
+ else
+ fputs("UNSHARED ", stderr);
+ }
+ if (info->type==BLACKHOLE) {
+ fputs("BLACK HOLE\n", stderr);
+ } else {
+ /* Fixed header */
+ fprintf(stderr, "(%s) FH [%#lx", info_type, node[locn++]);
+ for (i = 1; i < _HS; i++)
+ fprintf(stderr, " %#lx", node[locn++]);
+
+ /* Variable header */
+ if (vhs > 0) {
+ fprintf(stderr, "] VH [%#lx", node->payload[0]);
+
+ for (i = 1; i < vhs; i++)
+ fprintf(stderr, " %#lx", node->payload[i]);
+ }
+
+ fprintf(stderr, "] PTRS %u", ptrs);
+
+ /* Non-pointers */
+ if (nonptrs > 0) {
+ fprintf(stderr, " NPTRS [%#lx", node->payload[ptrs]);
+
+ for (i = 1; i < nonptrs; i++)
+ fprintf(stderr, " %#lx", node->payload[ptrs+i]);
+
+ putc(']', stderr);
+ }
+ putc('\n', stderr);
+ }
+
+}
+
+#if 0
+// ToDo: fix this!! -- HWL
+void
+G_INFO_TABLE(node)
+StgClosure *node;
+{
+ StgInfoTable *info_ptr;
+ nat size = 0, ptrs = 0, nonptrs = 0, vhs = 0;
+ char info_type[80], hdr_type[80];
+
+ info_hdr_type(info_ptr, hdr_type);
+
+ // get_itbl(node);
+ info_ptr = get_closure_info(node, &size, &ptrs, &nonptrs, &vhs, info_type);
+ fprintf(stderr,"%s Info Ptr @0x%lx; Entry: 0x%lx; Size: %lu; Ptrs: %lu\n\n",
+ info_type,info_ptr,(W_) ENTRY_CODE(info_ptr),
+ size, ptrs);
+ // INFO_SIZE(info_ptr),INFO_NoPTRS(info_ptr));
+
+ if (closure_THUNK(node) && !closure_UNPOINTED(node) ) {
+ fprintf(stderr," RBH InfoPtr: %#lx\n",
+ RBH_INFOPTR(info_ptr));
+ }
+
+#if defined(PAR)
+ fprintf(stderr,"Enter Flush Entry: 0x%lx;\tExit Flush Entry: 0x%lx\n",INFO_FLUSHENT(info_ptr),INFO_FLUSH(info_ptr));
+#endif
+
+#if defined(USE_COST_CENTRES)
+ fprintf(stderr,"Cost Centre (?): 0x%lx\n",INFO_CAT(info_ptr));
+#endif
+
+#if defined(_INFO_COPYING)
+ fprintf(stderr,"Evacuate Entry: 0x%lx;\tScavenge Entry: 0x%lx\n",
+ INFO_EVAC_2S(info_ptr),INFO_SCAV_2S(info_ptr));
+#endif
+
+#if defined(_INFO_COMPACTING)
+ fprintf(stderr,"Scan Link: 0x%lx;\tScan Move: 0x%lx\n",
+ (W_) INFO_SCAN_LINK_1S(info_ptr), (W_) INFO_SCAN_MOVE_1S(info_ptr));
+ fprintf(stderr,"Mark: 0x%lx;\tMarked: 0x%lx;\t",
+ (W_) INFO_MARK_1S(info_ptr), (W_) INFO_MARKED_1S(info_ptr));
+#if 0 /* avoid INFO_TYPE */
+ if(BASE_INFO_TYPE(info_ptr)==INFO_SPEC_TYPE)
+ fprintf(stderr,"plus specialised code\n");
+ else
+ fprintf(stderr,"Marking: 0x%lx\n",(W_) INFO_MARKING_1S(info_ptr));
+#endif /* 0 */
+#endif /* _INFO_COMPACTING */
+}
+#endif /* 0 */
+
+//@cindex G_PRINT_BQ
+void
+G_PRINT_BQ(node)
+StgClosure* node;
+{
+ StgInfoTable *info;
+ StgTSO *tso, *last;
+ char str[80], str0[80];
+
+ fprintf(stderr,"\n[PE %d] @ %lu BQ: ",
+ CurrentProc,CurrentTime[CurrentProc]);
+ if ( node == (StgClosure*)NULL ) {
+ fprintf(stderr," NULL.\n");
+ return;
+ }
+ if ( node == END_TSO_QUEUE ) {
+ fprintf(stderr," _|_\n");
+ return;
+ }
+ tso = ((StgBlockingQueue*)node)->blocking_queue;
+ while (node != END_TSO_QUEUE) {
+ PEs proc;
+
+ /* Find where the tso lives */
+ proc = where_is(node);
+ info = get_itbl(node);
+
+ switch (info->type) {
+ case TSO:
+ strcpy(str0,"TSO");
+ break;
+ case BLOCKED_FETCH:
+ strcpy(str0,"BLOCKED_FETCH");
+ break;
+ default:
+ strcpy(str0,"???");
+ break;
+ }
+
+ if(proc == CurrentProc)
+ fprintf(stderr," %#lx (%x) L %s,",
+ node, ((StgBlockingQueue*)node)->blocking_queue, str0);
+ else
+ fprintf(stderr," %#lx (%x) G (PE %d) %s,",
+ node, ((StgBlockingQueue*)node)->blocking_queue, proc, str0);
+
+ last = tso;
+ tso = last->link;
+ }
+ if ( tso == END_TSO_QUEUE )
+ fprintf(stderr," _|_\n");
+}
+
+//@node Threads, Events, Closures, Debugging routines for GranSim and GUM
+//@subsection Threads
+
+void
+G_CURR_THREADQ(verbose)
+StgInt verbose;
+{
+ fprintf(stderr,"Thread Queue on proc %d: ", CurrentProc);
+ G_THREADQ(run_queue_hd, verbose);
+}
+
+void
+G_THREADQ(closure, verbose)
+StgTSO* closure;
+StgInt verbose;
+{
+ StgTSO* x;
+
+ fprintf(stderr,"Thread Queue: ");
+ for (x=closure; x!=END_TSO_QUEUE; x=x->link)
+ if (verbose)
+ G_TSO(x,0);
+ else
+ fprintf(stderr," %#lx",x);
+
+ if (closure==END_TSO_QUEUE)
+ fprintf(stderr,"NIL\n");
+ else
+ fprintf(stderr,"\n");
+}
+
+void
+G_TSO(closure,verbose)
+StgTSO* closure;
+StgInt verbose;
+{
+
+ if (closure==END_TSO_QUEUE) {
+ fprintf(stderr,"TSO at %#lx is END_TSO_QUEUE!\n");
+ return;
+ }
+
+ if ( verbose & 0x08 ) { /* short info */
+ fprintf(stderr,"[TSO @ %#lx, PE %d]: Id: %#lx, Link: %#lx\n",
+ closure,where_is(closure),
+ closure->id,closure->link);
+ return;
+ }
+
+ fprintf(stderr,"TSO at %#lx has the following contents:\n",
+ closure);
+
+ fprintf(stderr,"> Id: \t%#lx",closure->id);
+ // fprintf(stderr,"\tstate: \t%#lx",closure->state);
+ fprintf(stderr,"\twhat_next: \t%#lx",closure->what_next);
+ fprintf(stderr,"\tlink: \t%#lx\n",closure->link);
+ fprintf(stderr,"\twhy_blocked: \t%d", closure->why_blocked);
+ fprintf(stderr,"\tblock_info: \t%p\n", closure->block_info);
+ // fprintf(stderr,"\tType: \t%s\n",type_name[TSO_TYPE(closure)]);
+ fprintf(stderr,">PRI: \t%#lx", closure->gran.pri);
+ fprintf(stderr,"\tMAGIC: \t%#lx %s\n", closure->gran.magic,
+ (closure->gran.magic==TSO_MAGIC ? "it IS a TSO" : "THIS IS NO TSO!!"));
+ if ( verbose & 0x04 ) {
+ fprintf(stderr, "Stack: stack @ %#lx (stack_size: %u; max_stack_size: %u)\n",
+ closure->stack, closure->stack_size, closure->max_stack_size);
+ fprintf(stderr, " sp: %#lx, su: %#lx, splim: %#lx\n",
+ closure->sp, closure->su, closure->splim);
+ }
+ // fprintf(stderr,"\n");
+ if (verbose & 0x01) {
+ // fprintf(stderr,"} LOCKED: \t%#lx",closure->locked);
+ fprintf(stderr,"} SPARKNAME: \t%#lx\n", closure->gran.sparkname);
+ fprintf(stderr,"} STARTEDAT: \t%#lx", closure->gran.startedat);
+ fprintf(stderr,"\tEXPORTED: \t%#lx\n", closure->gran.exported);
+ fprintf(stderr,"} BASICBLOCKS: \t%#lx", closure->gran.basicblocks);
+ fprintf(stderr,"\tALLOCS: \t%#lx\n", closure->gran.allocs);
+ fprintf(stderr,"} EXECTIME: \t%#lx", closure->gran.exectime);
+ fprintf(stderr,"\tFETCHTIME: \t%#lx\n", closure->gran.fetchtime);
+ fprintf(stderr,"} FETCHCOUNT: \t%#lx", closure->gran.fetchcount);
+ fprintf(stderr,"\tBLOCKTIME: \t%#lx\n", closure->gran.blocktime);
+ fprintf(stderr,"} BLOCKCOUNT: \t%#lx", closure->gran.blockcount);
+ fprintf(stderr,"\tBLOCKEDAT: \t%#lx\n", closure->gran.blockedat);
+ fprintf(stderr,"} GLOBALSPARKS:\t%#lx", closure->gran.globalsparks);
+ fprintf(stderr,"\tLOCALSPARKS:\t%#lx\n", closure->gran.localsparks);
+ }
+ if ( verbose & 0x02 ) {
+ fprintf(stderr,"BQ that starts with this TSO: ");
+ G_PRINT_BQ(closure);
+ }
+}
+
+//@node Events, Sparks, Threads, Debugging routines for GranSim and GUM
+//@subsection Events
+
+void
+G_EVENT(event, verbose)
+rtsEventQ event;
+StgInt verbose;
+{
+ if (verbose) {
+ print_event(event);
+ }else{
+ fprintf(stderr," %#lx",event);
+ }
+}
+
+void
+G_EVENTQ(verbose)
+StgInt verbose;
+{
+ extern rtsEventQ EventHd;
+ rtsEventQ x;
+
+ fprintf(stderr,"RtsEventQ (hd @%#lx):\n",EventHd);
+ for (x=EventHd; x!=NULL; x=x->next) {
+ G_EVENT(x,verbose);
+ }
+ if (EventHd==NULL)
+ fprintf(stderr,"NIL\n");
+ else
+ fprintf(stderr,"\n");
+}
+
+void
+G_PE_EQ(pe,verbose)
+PEs pe;
+StgInt verbose;
+{
+ extern rtsEventQ EventHd;
+ rtsEventQ x;
+
+ fprintf(stderr,"RtsEventQ (hd @%#lx):\n",EventHd);
+ for (x=EventHd; x!=NULL; x=x->next) {
+ if (x->proc==pe)
+ G_EVENT(x,verbose);
+ }
+ if (EventHd==NULL)
+ fprintf(stderr,"NIL\n");
+ else
+ fprintf(stderr,"\n");
+}
+
+//@node Sparks, Processors, Events, Debugging routines for GranSim and GUM
+//@subsection Sparks
+
+void
+G_SPARK(spark, verbose)
+rtsSparkQ spark;
+StgInt verbose;
+{
+ if (spark==(rtsSpark*)NULL) {
+ belch("G_SPARK: NULL spark; aborting");
+ return;
+ }
+ if (verbose)
+ print_spark(spark);
+ else
+ fprintf(stderr," %#lx",spark);
+}
+
+void
+G_SPARKQ(spark,verbose)
+rtsSparkQ spark;
+StgInt verbose;
+{
+ rtsSparkQ x;
+
+ if (spark==(rtsSpark*)NULL) {
+ belch("G_SPARKQ: NULL spark; aborting");
+ return;
+ }
+
+ fprintf(stderr,"RtsSparkQ (hd @%#lx):\n",spark);
+ for (x=spark; x!=NULL; x=x->next) {
+ G_SPARK(x,verbose);
+ }
+ if (spark==NULL)
+ fprintf(stderr,"NIL\n");
+ else
+ fprintf(stderr,"\n");
+}
+
+void
+G_CURR_SPARKQ(verbose)
+StgInt verbose;
+{
+ G_SPARKQ(pending_sparks_hd,verbose);
+}
+
+//@node Processors, Shortcuts, Sparks, Debugging routines for GranSim and GUM
+//@subsection Processors
+
+void
+G_PROC(proc,verbose)
+StgInt proc;
+StgInt verbose;
+{
+ extern rtsEventQ EventHd;
+ extern char *proc_status_names[];
+
+ fprintf(stderr,"Status of proc %d at time %d (%#lx): %s (%s)\n",
+ proc,CurrentTime[proc],CurrentTime[proc],
+ (CurrentProc==proc)?"ACTIVE":"INACTIVE",
+ proc_status_names[procStatus[proc]]);
+ G_THREADQ(run_queue_hds[proc],verbose & 0x2);
+ if ( (CurrentProc==proc) )
+ G_TSO(CurrentTSO,1);
+
+ if (EventHd!=NULL)
+ fprintf(stderr,"Next event (%s) is on proc %d\n",
+ event_names[EventHd->evttype],EventHd->proc);
+
+ if (verbose & 0x1) {
+ fprintf(stderr,"\nREQUIRED sparks: ");
+ G_SPARKQ(pending_sparks_hds[proc],1);
+ fprintf(stderr,"\nADVISORY_sparks: ");
+ G_SPARKQ(pending_sparks_hds[proc],1);
+ }
+}
+
+//@node Shortcuts, Printing info type, Processors, Debugging routines for GranSim and GUM
+//@subsection Shortcuts
+
+/* Debug Processor */
+void
+GP(proc)
+StgInt proc;
+{ G_PROC(proc,1);
+}
+
+/* Debug Current Processor */
+void
+GCP(){ G_PROC(CurrentProc,2); }
+
+/* Debug TSO */
+void
+GT(StgPtr tso){
+ G_TSO(tso,1);
+}
+
+/* Debug CurrentTSO */
+void
+GCT(){
+ fprintf(stderr,"Current Proc: %d\n",CurrentProc);
+ G_TSO(CurrentTSO,1);
+}
+
+/* Shorthand for debugging event queue */
+void
+GEQ() { G_EVENTQ(1); }
+
+/* Shorthand for debugging thread queue of a processor */
+void
+GTQ(PEs p) { G_THREADQ(run_queue_hds[p],1); }
+
+/* Shorthand for debugging thread queue of current processor */
+void
+GCTQ() { G_THREADQ(run_queue_hds[CurrentProc],1); }
+
+/* Shorthand for debugging spark queue of a processor */
+void
+GSQ(PEs p) { G_SPARKQ(pending_sparks_hds[p],1); }
+
+/* Shorthand for debugging spark queue of current processor */
+void
+GCSQ() { G_CURR_SPARKQ(1); }
+
+/* Shorthand for printing a node */
+void
+GN(StgPtr node) { G_PRINT_NODE(node); }
+
+/* Shorthand for printing info table */
+#if 0
+// ToDo: fix -- HWL
+void
+GIT(StgPtr node) { G_INFO_TABLE(node); }
+#endif
+
+void
+printThreadQPtrs(void)
+{
+ PEs p;
+ for (p=0; p<RtsFlags.GranFlags.proc; p++) {
+ fprintf(stderr,", PE %d: (hd=%p,tl=%p)",
+ run_queue_hds[p], run_queue_tls[p]);
+ }
+}
+
+void
+printThreadQ(StgTSO *tso) { G_THREADQ(tso, 0); };
+
+void
+printSparkQ(rtsSpark *spark) { G_SPARKQ(spark, 0); };
+
+void
+printThreadQ_verbose(StgTSO *tso) { G_THREADQ(tso, 1); };
+
+void
+printSparkQ_verbose(rtsSpark *spark) { G_SPARKQ(spark, 1); };
+
+/* Shorthand for some of ADRs debugging functions */
+
+#endif /* GRAN && GRAN_CHECK*/
+
+#if 0
+void
+DEBUG_PRINT_NODE(node)
+StgPtr node;
+{
+ W_ info_ptr = INFO_PTR(node);
+ StgInt size = 0, ptrs = 0, i, vhs = 0;
+ char info_type[80];
+
+ info_hdr_type(info_ptr, info_type);
+
+ size_and_ptrs(node,&size,&ptrs);
+ vhs = var_hdr_size(node);
+
+ fprintf(stderr,"Node: 0x%lx", (W_) node);
+
+#if defined(PAR)
+ fprintf(stderr," [GA: 0x%lx]",GA(node));
+#endif
+
+#if defined(PROFILING)
+ fprintf(stderr," [CC: 0x%lx]",CC_HDR(node));
+#endif
+
+#if defined(GRAN)
+ fprintf(stderr," [Bitmask: 0%lo]",PROCS(node));
+#endif
+
+ fprintf(stderr," IP: 0x%lx (%s), size %ld, %ld ptrs\n",
+ info_ptr,info_type,size,ptrs);
+
+ /* For now, we ignore the variable header */
+
+ for(i=0; i < size; ++i)
+ {
+ if(i == 0)
+ fprintf(stderr,"Data: ");
+
+ else if(i % 6 == 0)
+ fprintf(stderr,"\n ");
+
+ if(i < ptrs)
+ fprintf(stderr," 0x%lx[P]",*(node+_HS+vhs+i));
+ else
+ fprintf(stderr," %lu[D]",*(node+_HS+vhs+i));
+ }
+ fprintf(stderr, "\n");
+}
+
+
+#define INFO_MASK 0x80000000
+
+void
+DEBUG_TREE(node)
+StgPtr node;
+{
+ W_ size = 0, ptrs = 0, i, vhs = 0;
+
+ /* Don't print cycles */
+ if((INFO_PTR(node) & INFO_MASK) != 0)
+ return;
+
+ size_and_ptrs(node,&size,&ptrs);
+ vhs = var_hdr_size(node);
+
+ DEBUG_PRINT_NODE(node);
+ fprintf(stderr, "\n");
+
+ /* Mark the node -- may be dangerous */
+ INFO_PTR(node) |= INFO_MASK;
+
+ for(i = 0; i < ptrs; ++i)
+ DEBUG_TREE((StgPtr)node[i+vhs+_HS]);
+
+ /* Unmark the node */
+ INFO_PTR(node) &= ~INFO_MASK;
+}
+
+
+void
+DEBUG_INFO_TABLE(node)
+StgPtr node;
+{
+ W_ info_ptr = INFO_PTR(node);
+ char *iStgPtrtype = info_hdr_type(info_ptr);
+
+ fprintf(stderr,"%s Info Ptr @0x%lx; Entry: 0x%lx; Size: %lu; Ptrs: %lu\n\n",
+ iStgPtrtype,info_ptr,(W_) ENTRY_CODE(info_ptr),INFO_SIZE(info_ptr),INFO_NoPTRS(info_ptr));
+#if defined(PAR)
+ fprintf(stderr,"Enter Flush Entry: 0x%lx;\tExit Flush Entry: 0x%lx\n",INFO_FLUSHENT(info_ptr),INFO_FLUSH(info_ptr));
+#endif
+
+#if defined(PROFILING)
+ fprintf(stderr,"Cost Centre (?): 0x%lx\n",INFO_CAT(info_ptr));
+#endif
+
+#if defined(_INFO_COPYING)
+ fprintf(stderr,"Evacuate Entry: 0x%lx;\tScavenge Entry: 0x%lx\n",
+ INFO_EVAC_2S(info_ptr),INFO_SCAV_2S(info_ptr));
+#endif
+
+#if defined(_INFO_COMPACTING)
+ fprintf(stderr,"Scan Link: 0x%lx;\tScan Move: 0x%lx\n",
+ (W_) INFO_SCAN_LINK_1S(info_ptr), (W_) INFO_SCAN_MOVE_1S(info_ptr));
+ fprintf(stderr,"Mark: 0x%lx;\tMarked: 0x%lx;\t",
+ (W_) INFO_MARK_1S(info_ptr), (W_) INFO_MARKED_1S(info_ptr));
+#if 0 /* avoid INFO_TYPE */
+ if(BASE_INFO_TYPE(info_ptr)==INFO_SPEC_TYPE)
+ fprintf(stderr,"plus specialised code\n");
+ else
+ fprintf(stderr,"Marking: 0x%lx\n",(W_) INFO_MARKING_1S(info_ptr));
+#endif /* 0 */
+#endif /* _INFO_COMPACTING */
+}
+#endif /* 0 */
+
+//@node Printing info type, Printing Packet Contents, Shortcuts, Debugging routines for GranSim and GUM
+//@subsection Printing info type
+
+char *
+display_info_type(closure, str)
+StgClosure *closure;
+char *str;
+{
+ strcpy(str,"");
+ if ( closure_HNF(closure) )
+ strcat(str,"|_HNF ");
+ else if ( closure_BITMAP(closure) )
+ strcat(str,"|_BTM");
+ else if ( !closure_SHOULD_SPARK(closure) )
+ strcat(str,"|_NS");
+ else if ( closure_STATIC(closure) )
+ strcat(str,"|_STA");
+ else if ( closure_THUNK(closure) )
+ strcat(str,"|_THU");
+ else if ( closure_MUTABLE(closure) )
+ strcat(str,"|_MUT");
+ else if ( closure_UNPOINTED(closure) )
+ strcat(str,"|_UPT");
+ else if ( closure_SRT(closure) )
+ strcat(str,"|_SRT");
+
+ return(str);
+}
+
+/*
+ PrintPacket is in Pack.c because it makes use of closure queues
+*/
+
+#if defined(GRAN) || defined(PAR)
+
+/*
+ Print graph rooted at q. The structure of this recursive printing routine
+ should be the same as in the graph traversals when packing a graph in
+ GUM. Thus, it demonstrates the structure of such a generic graph
+ traversal, and in particular, how to extract pointer and non-pointer info
+ from the multitude of different heap objects available.
+
+ {evacuate}Daq ngoqvam nIHlu'pu'!!
+*/
+
+void
+PrintGraph(StgClosure *p, int indent_level)
+{
+ void PrintGraph_(StgClosure *p, int indent_level);
+
+ ASSERT(tmpClosureTable==NULL);
+
+ /* init hash table */
+ tmpClosureTable = allocHashTable();
+
+ /* now do the real work */
+ PrintGraph_(p, indent_level);
+
+ /* nuke hash table */
+ freeHashTable(tmpClosureTable, NULL);
+ tmpClosureTable = NULL;
+}
+
+/*
+ This is the actual worker functions.
+ All recursive calls should be made to this function.
+*/
+void
+PrintGraph_(StgClosure *p, int indent_level)
+{
+ StgPtr x, q;
+ rtsBool printed = rtsFalse;
+ nat i, j;
+ const StgInfoTable *info;
+
+ /* check whether we have met this node already to break cycles */
+ if (lookupHashTable(tmpClosureTable, (StgWord)p)) { // ie. already touched
+ /* indentation */
+ for (j=0; j<indent_level; j++)
+ fputs(" ", stderr);
+
+ fprintf(stderr, "#### cylce to %p", p);
+ return;
+ }
+
+ /* record that we are processing this closure */
+ insertHashTable(tmpClosureTable, (StgWord) p, (void *)rtsTrue/*non-NULL*/);
+
+ q = p; /* save ptr to object */
+
+ /* indentation */
+ for (j=0; j<indent_level; j++)
+ fputs(" ", stderr);
+
+ ASSERT(p!=(StgClosure*)NULL);
+ ASSERT(LOOKS_LIKE_STATIC(p) ||
+ LOOKS_LIKE_GHC_INFO(GET_INFO((StgClosure *)p)) ||
+ IS_HUGS_CONSTR_INFO(GET_INFO((StgClosure *)p)));
+
+ printClosure(p); // prints contents of this one closure
+
+ /* indentation */
+ for (j=0; j<indent_level; j++)
+ fputs(" ", stderr);
+
+ info = get_itbl((StgClosure *)p);
+ /* the rest of this fct recursively traverses the graph */
+ switch (info -> type) {
+
+ case BCO:
+ {
+ StgBCO* bco = stgCast(StgBCO*,p);
+ nat i;
+ fprintf(stderr, "BCO (%p)\n", p);
+ /*
+ for (i = 0; i < bco->n_ptrs; i++) {
+ // bcoConstCPtr(bco,i) =
+ PrintGraph_(bcoConstCPtr(bco,i), indent_level+1);
+ }
+ */
+ // p += bco_sizeW(bco);
+ break;
+ }
+
+ case MVAR:
+ /* treat MVars specially, because we don't want to PrintGraph the
+ * mut_link field in the middle of the closure.
+ */
+ {
+ StgMVar *mvar = ((StgMVar *)p);
+ // evac_gen = 0;
+ fprintf(stderr, "MVAR (%p) with 3 pointers (head, tail, value)\n", p);
+ // (StgClosure *)mvar->head =
+ PrintGraph_((StgClosure *)mvar->head, indent_level+1);
+ // (StgClosure *)mvar->tail =
+ PrintGraph_((StgClosure *)mvar->tail, indent_level+1);
+ //(StgClosure *)mvar->value =
+ PrintGraph_((StgClosure *)mvar->value, indent_level+1);
+ // p += sizeofW(StgMVar);
+ // evac_gen = saved_evac_gen;
+ break;
+ }
+
+ case THUNK_2_0:
+ if (!printed) {
+ fprintf(stderr, "THUNK_2_0 (%p) with 2 pointers\n", p);
+ printed = rtsTrue;
+ }
+ case FUN_2_0:
+ if (!printed) {
+ fprintf(stderr, "FUN_2_0 (%p) with 2 pointers\n", p);
+ printed = rtsTrue;
+ }
+ // scavenge_srt(info);
+ case CONSTR_2_0:
+ if (!printed) {
+ fprintf(stderr, "CONSTR_2_0 (%p) with 2 pointers\n", p);
+ printed = rtsTrue;
+ }
+ // ((StgClosure *)p)->payload[0] =
+ PrintGraph_(((StgClosure *)p)->payload[0],
+ indent_level+1);
+ // ((StgClosure *)p)->payload[1] =
+ PrintGraph_(((StgClosure *)p)->payload[1],
+ indent_level+1);
+ // p += sizeofW(StgHeader) + 2;
+ break;
+
+ case THUNK_1_0:
+ // scavenge_srt(info);
+ fprintf(stderr, "THUNK_1_0 (%p) with 1 pointer\n", p);
+ // ((StgClosure *)p)->payload[0] =
+ PrintGraph_(((StgClosure *)p)->payload[0],
+ indent_level+1);
+ // p += sizeofW(StgHeader) + 2; /* MIN_UPD_SIZE */
+ break;
+
+ case FUN_1_0:
+ if (!printed) {
+ fprintf(stderr, "FUN_1_0 (%p) with 1 pointer\n", p);
+ printed = rtsTrue;
+ }
+ // scavenge_srt(info);
+ case CONSTR_1_0:
+ if (!printed) {
+ fprintf(stderr, "CONSTR_2_0 (%p) with 2 pointers\n", p);
+ printed = rtsTrue;
+ }
+ // ((StgClosure *)p)->payload[0] =
+ PrintGraph_(((StgClosure *)p)->payload[0],
+ indent_level+1);
+ // p += sizeofW(StgHeader) + 1;
+ break;
+
+ case THUNK_0_1:
+ fprintf(stderr, "THUNK_0_1 (%p) with 0 pointers\n", p);
+ // scavenge_srt(info);
+ // p += sizeofW(StgHeader) + 2; /* MIN_UPD_SIZE */
+ break;
+
+ case FUN_0_1:
+ fprintf(stderr, "FUN_0_1 (%p) with 0 pointers\n", p);
+ //scavenge_srt(info);
+ case CONSTR_0_1:
+ fprintf(stderr, "CONSTR_0_1 (%p) with 0 pointers\n", p);
+ //p += sizeofW(StgHeader) + 1;
+ break;
+
+ case THUNK_0_2:
+ if (!printed) {
+ fprintf(stderr, "THUNK_0_2 (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ case FUN_0_2:
+ if (!printed) {
+ fprintf(stderr, "FUN_0_2 (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ // scavenge_srt(info);
+ case CONSTR_0_2:
+ if (!printed) {
+ fprintf(stderr, "CONSTR_0_2 (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ // p += sizeofW(StgHeader) + 2;
+ break;
+
+ case THUNK_1_1:
+ if (!printed) {
+ fprintf(stderr, "THUNK_1_1 (%p) with 1 pointer\n", p);
+ printed = rtsTrue;
+ }
+ case FUN_1_1:
+ if (!printed) {
+ fprintf(stderr, "FUN_1_1 (%p) with 1 pointer\n", p);
+ printed = rtsTrue;
+ }
+ // scavenge_srt(info);
+ case CONSTR_1_1:
+ if (!printed) {
+ fprintf(stderr, "CONSTR_1_1 (%p) with 1 pointer\n", p);
+ printed = rtsTrue;
+ }
+ // ((StgClosure *)p)->payload[0] =
+ PrintGraph_(((StgClosure *)p)->payload[0],
+ indent_level+1);
+ // p += sizeofW(StgHeader) + 2;
+ break;
+
+ case FUN:
+ if (!printed) {
+ fprintf(stderr, "FUN (%p) with %d pointers\n", p, info->layout.payload.ptrs);
+ printed = rtsTrue;
+ }
+ /* fall through */
+
+ case THUNK:
+ if (!printed) {
+ fprintf(stderr, "THUNK (%p) with %d pointers\n", p, info->layout.payload.ptrs);
+ printed = rtsTrue;
+ }
+ // scavenge_srt(info);
+ /* fall through */
+
+ case CONSTR:
+ if (!printed) {
+ fprintf(stderr, "CONSTR (%p) with %d pointers\n", p, info->layout.payload.ptrs);
+ printed = rtsTrue;
+ }
+ /* basically same as loop in STABLE_NAME case */
+ for (i=0; i<info->layout.payload.ptrs; i++)
+ PrintGraph_(((StgClosure *)p)->payload[i],
+ indent_level+1);
+ break;
+ /* NOT fall through */
+
+ case WEAK:
+ if (!printed) {
+ fprintf(stderr, "WEAK (%p) with %d pointers\n", p, info->layout.payload.ptrs);
+ printed = rtsTrue;
+ }
+ /* fall through */
+
+ case FOREIGN:
+ if (!printed) {
+ fprintf(stderr, "FOREIGN (%p) with %d pointers\n", p, info->layout.payload.ptrs);
+ printed = rtsTrue;
+ }
+ /* fall through */
+
+ case STABLE_NAME:
+ {
+ StgPtr end;
+
+ if (!printed) {
+ fprintf(stderr, "STABLE_NAME (%p) with %d pointers (not followed!)\n",
+ p, info->layout.payload.ptrs);
+ printed = rtsTrue;
+ }
+ end = (StgPtr)((StgClosure *)p)->payload + info->layout.payload.ptrs;
+ for (p = (StgPtr)((StgClosure *)p)->payload; p < end; p++) {
+ // (StgClosure *)*p =
+ //PrintGraph_((StgClosure *)*p, indent_level+1);
+ fprintf(stderr, ", %p", *p);
+ }
+ //fputs("\n", stderr);
+ // p += info->layout.payload.nptrs;
+ break;
+ }
+
+ case IND_PERM:
+ //if (step->gen->no != 0) {
+ // SET_INFO(((StgClosure *)p), &IND_OLDGEN_PERM_info);
+ //}
+ if (!printed) {
+ fprintf(stderr, "IND_PERM (%p) with indirection to\n",
+ p, ((StgIndOldGen *)p)->indirectee);
+ printed = rtsTrue;
+ }
+ /* fall through */
+
+ case IND_OLDGEN_PERM:
+ if (!printed) {
+ fprintf(stderr, "IND_OLDGEN_PERM (%p) with indirection to %p\n",
+ p, ((StgIndOldGen *)p)->indirectee);
+ printed = rtsTrue;
+ }
+ // ((StgIndOldGen *)p)->indirectee =
+ PrintGraph_(((StgIndOldGen *)p)->indirectee,
+ indent_level+1);
+ //if (failed_to_evac) {
+ // failed_to_evac = rtsFalse;
+ // recordOldToNewPtrs((StgMutClosure *)p);
+ //}
+ // p += sizeofW(StgIndOldGen);
+ break;
+
+ case MUT_VAR:
+ /* ignore MUT_CONSs */
+ fprintf(stderr, "MUT_VAR (%p) pointing to %p\n", p, ((StgMutVar *)p)->var);
+ if (((StgMutVar *)p)->header.info != &stg_MUT_CONS_info) {
+ //evac_gen = 0;
+ PrintGraph_(((StgMutVar *)p)->var, indent_level+1);
+ //evac_gen = saved_evac_gen;
+ }
+ //p += sizeofW(StgMutVar);
+ break;
+
+ case CAF_BLACKHOLE:
+ if (!printed) {
+ fprintf(stderr, "CAF_BLACKHOLE (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ case SE_CAF_BLACKHOLE:
+ if (!printed) {
+ fprintf(stderr, "SE_CAF_BLACKHOLE (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ case SE_BLACKHOLE:
+ if (!printed) {
+ fprintf(stderr, "SE_BLACKHOLE (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ case BLACKHOLE:
+ if (!printed) {
+ fprintf(stderr, "BLACKHOLE (%p) with 0 pointers\n", p);
+ printed = rtsTrue;
+ }
+ //p += BLACKHOLE_sizeW();
+ break;
+
+ case BLACKHOLE_BQ:
+ {
+ StgBlockingQueue *bh = (StgBlockingQueue *)p;
+ // (StgClosure *)bh->blocking_queue =
+ fprintf(stderr, "BLACKHOLE_BQ (%p) pointing to %p\n",
+ p, (StgClosure *)bh->blocking_queue);
+ PrintGraph_((StgClosure *)bh->blocking_queue, indent_level+1);
+ //if (failed_to_evac) {
+ // failed_to_evac = rtsFalse;
+ // recordMutable((StgMutClosure *)bh);
+ //}
+ // p += BLACKHOLE_sizeW();
+ break;
+ }
+
+ case THUNK_SELECTOR:
+ {
+ StgSelector *s = (StgSelector *)p;
+ fprintf(stderr, "THUNK_SELECTOR (%p) pointing to %p\n",
+ p, s->selectee);
+ PrintGraph_(s->selectee, indent_level+1);
+ // p += THUNK_SELECTOR_sizeW();
+ break;
+ }
+
+ case IND:
+ fprintf(stderr, "IND (%p) pointing to %p\n", p, ((StgInd*)p)->indirectee);
+ PrintGraph_(((StgInd*)p)->indirectee, indent_level+1);
+ break;
+
+ case IND_OLDGEN:
+ fprintf(stderr, "IND_OLDGEN (%p) pointing to %p\n",
+ p, ((StgIndOldGen*)p)->indirectee);
+ PrintGraph_(((StgIndOldGen*)p)->indirectee, indent_level+1);
+ break;
+
+ case CONSTR_INTLIKE:
+ fprintf(stderr, "CONSTR_INTLIKE (%p) with 0 pointers\n", p);
+ break;
+ case CONSTR_CHARLIKE:
+ fprintf(stderr, "CONSTR_CHARLIKE (%p) with 0 pointers\n", p);
+ break;
+ case CONSTR_STATIC:
+ fprintf(stderr, "CONSTR_STATIC (%p) with 0 pointers\n", p);
+ break;
+ case CONSTR_NOCAF_STATIC:
+ fprintf(stderr, "CONSTR_NOCAF_STATIC (%p) with 0 pointers\n", p);
+ break;
+ case THUNK_STATIC:
+ fprintf(stderr, "THUNK_STATIC (%p) with 0 pointers\n", p);
+ break;
+ case FUN_STATIC:
+ fprintf(stderr, "FUN_STATIC (%p) with 0 pointers\n", p);
+ break;
+ case IND_STATIC:
+ fprintf(stderr, "IND_STATIC (%p) with 0 pointers\n", p);
+ break;
+
+ case RET_BCO:
+ fprintf(stderr, "RET_BCO (%p) with 0 pointers\n", p);
+ break;
+ case RET_SMALL:
+ fprintf(stderr, "RET_SMALL (%p) with 0 pointers\n", p);
+ break;
+ case RET_VEC_SMALL:
+ fprintf(stderr, "RET_VEC_SMALL (%p) with 0 pointers\n", p);
+ break;
+ case RET_BIG:
+ fprintf(stderr, "RET_BIG (%p) with 0 pointers\n", p);
+ break;
+ case RET_VEC_BIG:
+ fprintf(stderr, "RET_VEC_BIG (%p) with 0 pointers\n", p);
+ break;
+ case RET_DYN:
+ fprintf(stderr, "RET_DYN (%p) with 0 pointers\n", p);
+ break;
+ case UPDATE_FRAME:
+ fprintf(stderr, "UPDATE_FRAME (%p) with 0 pointers\n", p);
+ break;
+ case STOP_FRAME:
+ fprintf(stderr, "STOP_FRAME (%p) with 0 pointers\n", p);
+ break;
+ case CATCH_FRAME:
+ fprintf(stderr, "CATCH_FRAME (%p) with 0 pointers\n", p);
+ break;
+ case SEQ_FRAME:
+ fprintf(stderr, "SEQ_FRAME (%p) with 0 pointers\n", p);
+ break;
+
+ case AP_UPD: /* same as PAPs */
+ fprintf(stderr, "AP_UPD (%p) with 0 pointers\n", p);
+ case PAP:
+ /* Treat a PAP just like a section of stack, not forgetting to
+ * PrintGraph_ the function pointer too...
+ */
+ {
+ StgPAP* pap = stgCast(StgPAP*,p);
+
+ fprintf(stderr, "PAP (%p) pointing to %p\n", p, pap->fun);
+ // pap->fun =
+ //PrintGraph_(pap->fun, indent_level+1);
+ //scavenge_stack((P_)pap->payload, (P_)pap->payload + pap->n_args);
+ //p += pap_sizeW(pap);
+ break;
+ }
+
+ case ARR_WORDS:
+ /* an array of (non-mutable) words */
+ fprintf(stderr, "ARR_WORDS (%p) of %d non-ptrs (maybe a string?)\n",
+ p, ((StgArrWords *)q)->words);
+ break;
+
+ case MUT_ARR_PTRS:
+ /* follow everything */
+ {
+ StgPtr next;
+
+ fprintf(stderr, "MUT_ARR_PTRS (%p) with %d pointers (not followed)\n",
+ p, mut_arr_ptrs_sizeW((StgMutArrPtrs*)p));
+ // evac_gen = 0; /* repeatedly mutable */
+ next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ // (StgClosure *)*p =
+ // PrintGraph_((StgClosure *)*p, indent_level+1);
+ fprintf(stderr, ", %p", *p);
+ }
+ fputs("\n", stderr);
+ //evac_gen = saved_evac_gen;
+ break;
+ }
+
+ case MUT_ARR_PTRS_FROZEN:
+ /* follow everything */
+ {
+ StgPtr start = p, next;
+
+ fprintf(stderr, "MUT_ARR_PTRS (%p) with %d pointers (not followed)",
+ p, mut_arr_ptrs_sizeW((StgMutArrPtrs*)p));
+ next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ // (StgClosure *)*p =
+ // PrintGraph_((StgClosure *)*p, indent_level+1);
+ fprintf(stderr, ", %p", *p);
+ }
+ fputs("\n", stderr);
+ //if (failed_to_evac) {
+ /* we can do this easier... */
+ // recordMutable((StgMutClosure *)start);
+ // failed_to_evac = rtsFalse;
+ //}
+ break;
+ }
+
+ case TSO:
+ {
+ StgTSO *tso;
+
+ tso = (StgTSO *)p;
+ fprintf(stderr, "TSO (%p) with link field %p\n", p, (StgClosure *)tso->link);
+ // evac_gen = 0;
+ /* chase the link field for any TSOs on the same queue */
+ // (StgClosure *)tso->link =
+ PrintGraph_((StgClosure *)tso->link, indent_level+1);
+ //if (tso->blocked_on) {
+ // tso->blocked_on = PrintGraph_(tso->blocked_on);
+ //}
+ /* scavenge this thread's stack */
+ //scavenge_stack(tso->sp, &(tso->stack[tso->stack_size]));
+ //evac_gen = saved_evac_gen;
+ //p += tso_sizeW(tso);
+ break;
+ }
+
+#if defined(GRAN) || defined(PAR)
+ case RBH:
+ {
+ StgInfoTable *rip = REVERT_INFOPTR(get_itbl(p));
+ //if (LOOKS_LIKE_GHC_INFO(rip))
+ // fprintf(stderr, "RBH (%p) with 0 pointers (reverted type=%s)\n",
+ // p, info_type_by_ip(rip));
+ //else
+ fprintf(stderr, "RBH (%p) with 0 pointers (reverted IP=%x)\n",
+ p, rip);
+ }
+ break;
+#endif
+#if defined(PAR)
+ case BLOCKED_FETCH:
+ fprintf(stderr, "BLOCKED_FETCH (%p) with 0 pointers (link=%p)\n",
+ p, ((StgBlockedFetch *)p)->link);
+ break;
+ case FETCH_ME:
+ fprintf(stderr, "FETCH_ME (%p) with 0 pointers\n", p);
+ break;
+ case FETCH_ME_BQ:
+ fprintf(stderr, "FETCH_ME_BQ (%p) with 0 pointers (blocking_queue=%p)\n",
+ p, ((StgFetchMeBlockingQueue *)p)->blocking_queue);
+ break;
+#endif
+
+#ifdef DIST
+ case REMOTE_REF:
+ fprintf(stderr, "REMOTE_REF (%p) with 0 pointers\n", p);
+ break;
+#endif
+
+ case EVACUATED:
+ fprintf(stderr, "EVACUATED (%p) with 0 pointers (evacuee=%p)\n",
+ p, ((StgEvacuated *)p)->evacuee);
+ break;
+
+ default:
+ barf("PrintGraph_: unknown closure %d (%s)",
+ info -> type, info_type(info));
+ }
+
+ /* If we didn't manage to promote all the objects pointed to by
+ * the current object, then we have to designate this object as
+ * mutable (because it contains old-to-new generation pointers).
+ */
+ //if (failed_to_evac) {
+ // mkMutCons((StgClosure *)q, &generations[evac_gen]);
+ // failed_to_evac = rtsFalse;
+ //}
+}
+
+# if defined(PAR)
+/*
+ Generate a finger-print for a graph.
+ A finger-print is a string, with each char representing one node;
+ depth-first traversal
+*/
+
+void
+GraphFingerPrint(StgClosure *p, char *finger_print)
+{
+ void GraphFingerPrint_(StgClosure *p, char *finger_print);
+
+ ASSERT(tmpClosureTable==NULL);
+ ASSERT(strlen(finger_print)==0);
+
+ /* init hash table */
+ tmpClosureTable = allocHashTable();
+
+ /* now do the real work */
+ GraphFingerPrint_(p, finger_print);
+
+ /* nuke hash table */
+ freeHashTable(tmpClosureTable, NULL);
+ tmpClosureTable = NULL;
+}
+
+/*
+ This is the actual worker functions.
+ All recursive calls should be made to this function.
+*/
+void
+GraphFingerPrint_(StgClosure *p, char *finger_print)
+{
+ StgPtr x, q;
+ rtsBool printed = rtsFalse;
+ nat i, j, len;
+ const StgInfoTable *info;
+
+ q = p; /* save ptr to object */
+ len = strlen(finger_print);
+ ASSERT(len<=MAX_FINGER_PRINT_LEN);
+ /* at most 7 chars for this node (I think) */
+ if (len+7>=MAX_FINGER_PRINT_LEN)
+ return;
+
+ /* check whether we have met this node already to break cycles */
+ if (lookupHashTable(tmpClosureTable, (StgWord)p)) { // ie. already touched
+ strcat(finger_print, "#");
+ return;
+ }
+
+ /* record that we are processing this closure */
+ insertHashTable(tmpClosureTable, (StgWord) p, (void *)rtsTrue/*non-NULL*/);
+
+ ASSERT(p!=(StgClosure*)NULL);
+ ASSERT(LOOKS_LIKE_STATIC(p) ||
+ LOOKS_LIKE_GHC_INFO(GET_INFO((StgClosure *)p)) ||
+ IS_HUGS_CONSTR_INFO(GET_INFO((StgClosure *)p)));
+
+ info = get_itbl((StgClosure *)p);
+ // append char for this node
+ finger_print[len] = finger_print_char[info->type]; finger_print[len+1] = '\0';
+ /* the rest of this fct recursively traverses the graph */
+ switch (info -> type) {
+
+ case BCO:
+ {
+ StgBCO* bco = stgCast(StgBCO*,p);
+ nat i;
+ //%% fprintf(stderr, "BCO (%p) with %d pointers\n", p, bco->n_ptrs);
+ /*
+ for (i = 0; i < bco->n_ptrs; i++) {
+ // bcoConstCPtr(bco,i) =
+ GraphFingerPrint_(bcoConstCPtr(bco,i), finger_print);
+ }
+ */
+ // p += bco_sizeW(bco);
+ break;
+ }
+
+ case MVAR:
+ break;
+
+ case THUNK_2_0:
+ case FUN_2_0:
+ case CONSTR_2_0:
+ // append char for this node
+ strcat(finger_print, "22(");
+ GraphFingerPrint_(((StgClosure *)p)->payload[0], finger_print);
+ GraphFingerPrint_(((StgClosure *)p)->payload[1], finger_print);
+ if (strlen(finger_print)+2<MAX_FINGER_PRINT_LEN)
+ strcat(finger_print, ")");
+ break;
+
+ case THUNK_1_0:
+ case FUN_1_0:
+ case CONSTR_1_0:
+ // append char for this node
+ strcat(finger_print, "12(");
+ GraphFingerPrint_(((StgClosure *)p)->payload[0], finger_print);
+ if (strlen(finger_print)+2<MAX_FINGER_PRINT_LEN)
+ strcat(finger_print, ")");
+ break;
+
+ case THUNK_0_1:
+ case FUN_0_1:
+ case CONSTR_0_1:
+ // append char for this node
+ strcat(finger_print, "01");
+ break;
+
+ case THUNK_0_2:
+ case FUN_0_2:
+ case CONSTR_0_2:
+ // append char for this node
+ strcat(finger_print, "02");
+ break;
+
+ case THUNK_1_1:
+ case FUN_1_1:
+ case CONSTR_1_1:
+ // append char for this node
+ strcat(finger_print, "11(");
+ GraphFingerPrint_(((StgClosure *)p)->payload[0], finger_print);
+ if (strlen(finger_print)+2<MAX_FINGER_PRINT_LEN)
+ strcat(finger_print, ")");
+ break;
+
+ case FUN:
+ case THUNK:
+ case CONSTR:
+ /* basically same as loop in STABLE_NAME case */
+ {
+ char str[6];
+ sprintf(str,"%d?(",info->layout.payload.ptrs);
+ strcat(finger_print,str);
+ for (i=0; i<info->layout.payload.ptrs; i++)
+ GraphFingerPrint_(((StgClosure *)p)->payload[i], finger_print);
+ if (strlen(finger_print)+2<MAX_FINGER_PRINT_LEN)
+ strcat(finger_print, ")");
+ }
+ break;
+
+ case WEAK:
+ case FOREIGN:
+ case STABLE_NAME:
+ {
+ StgPtr end;
+ char str[6];
+ sprintf(str,"%d?", info->layout.payload.ptrs);
+ strcat(finger_print,str);
+
+ //end = (StgPtr)((StgClosure *)p)->payload + info->layout.payload.ptrs;
+ //for (p = (StgPtr)((StgClosure *)p)->payload; p < end; p++) {
+ // GraphFingerPrint_((StgClosure *)*p, finger_print);
+ //}
+ break;
+ }
+
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ GraphFingerPrint_(((StgIndOldGen *)p)->indirectee, finger_print);
+ break;
+
+ case MUT_VAR:
+ /* ignore MUT_CONSs */
+ if (((StgMutVar *)p)->header.info != &stg_MUT_CONS_info) {
+ GraphFingerPrint_(((StgMutVar *)p)->var, finger_print);
+ }
+ break;
+
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case SE_BLACKHOLE:
+ case BLACKHOLE:
+ break;
+
+ case BLACKHOLE_BQ:
+ {
+ StgBlockingQueue *bh = (StgBlockingQueue *)p;
+ // GraphFingerPrint_((StgClosure *)bh->blocking_queue, finger_print);
+ break;
+ }
+
+ case THUNK_SELECTOR:
+ {
+ StgSelector *s = (StgSelector *)p;
+ GraphFingerPrint_(s->selectee, finger_print);
+ break;
+ }
+
+ case IND:
+ GraphFingerPrint_(((StgInd*)p)->indirectee, finger_print);
+ break;
+
+ case IND_OLDGEN:
+ GraphFingerPrint_(((StgIndOldGen*)p)->indirectee, finger_print);
+ break;
+
+ case IND_STATIC:
+ GraphFingerPrint_(((StgIndOldGen*)p)->indirectee, finger_print);
+ break;
+
+ case CONSTR_INTLIKE:
+ case CONSTR_CHARLIKE:
+ case CONSTR_STATIC:
+ case CONSTR_NOCAF_STATIC:
+ case THUNK_STATIC:
+ case FUN_STATIC:
+ break;
+
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_VEC_SMALL:
+ case RET_BIG:
+ case RET_VEC_BIG:
+ case RET_DYN:
+ case UPDATE_FRAME:
+ case STOP_FRAME:
+ case CATCH_FRAME:
+ case SEQ_FRAME:
+ break;
+
+ case AP_UPD: /* same as PAPs */
+ case PAP:
+ /* Treat a PAP just like a section of stack, not forgetting to
+ * GraphFingerPrint_ the function pointer too...
+ */
+ {
+ StgPAP* pap = stgCast(StgPAP*,p);
+ char str[6];
+ sprintf(str,"%d",pap->n_args);
+ strcat(finger_print,str);
+ //GraphFingerPrint_(pap->fun, finger_print); // ??
+ break;
+ }
+
+ case ARR_WORDS:
+ {
+ char str[6];
+ sprintf(str,"%d",((StgArrWords*)p)->words);
+ strcat(finger_print,str);
+ }
+ break;
+
+ case MUT_ARR_PTRS:
+ /* follow everything */
+ {
+ char str[6];
+ sprintf(str,"%d",((StgMutArrPtrs*)p)->ptrs);
+ strcat(finger_print,str);
+ }
+ {
+ StgPtr next;
+ //next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ //for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ // GraphFingerPrint_((StgClosure *)*p, finger_print);
+ //}
+ break;
+ }
+
+ case MUT_ARR_PTRS_FROZEN:
+ /* follow everything */
+ {
+ char str[6];
+ sprintf(str,"%d",((StgMutArrPtrs*)p)->ptrs);
+ strcat(finger_print,str);
+ }
+ {
+ StgPtr start = p, next;
+ //next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ //for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ // GraphFingerPrint_((StgClosure *)*p, finger_print);
+ //}
+ break;
+ }
+
+ case TSO:
+ {
+ StgTSO *tso = (StgTSO *)p;
+ char str[6];
+ sprintf(str,"%d",tso->id);
+ strcat(finger_print,str);
+ }
+ //GraphFingerPrint_((StgClosure *)tso->link, indent_level+1);
+ break;
+
+#if defined(GRAN) || defined(PAR)
+ case RBH:
+ {
+ // use this
+ // StgInfoTable *rip = REVERT_INFOPTR(get_itbl(p));
+ }
+ break;
+#endif
+#if defined(PAR)
+ case BLOCKED_FETCH:
+ break;
+ case FETCH_ME:
+ break;
+ case FETCH_ME_BQ:
+ break;
+#endif
+#ifdef DIST
+ case REMOTE_REF:
+ break;
+#endif
+ case EVACUATED:
+ break;
+
+ default:
+ barf("GraphFingerPrint_: unknown closure %d (%s)",
+ info -> type, info_type(info));
+ }
+
+}
+# endif /* PAR */
+
+/*
+ Do a sanity check on the whole graph, down to a recursion level of level.
+ Same structure as PrintGraph (nona).
+*/
+void
+checkGraph(StgClosure *p, int rec_level)
+{
+ StgPtr x, q;
+ nat i, j;
+ const StgInfoTable *info;
+
+ if (rec_level==0)
+ return;
+
+ q = p; /* save ptr to object */
+
+ /* First, the obvious generic checks */
+ ASSERT(p!=(StgClosure*)NULL);
+ checkClosure(p); /* see Sanity.c for what's actually checked */
+
+ info = get_itbl((StgClosure *)p);
+ /* the rest of this fct recursively traverses the graph */
+ switch (info -> type) {
+
+ case BCO:
+ {
+ StgBCO* bco = stgCast(StgBCO*,p);
+ nat i;
+ /*
+ for (i = 0; i < bco->n_ptrs; i++) {
+ checkGraph(bcoConstCPtr(bco,i), rec_level-1);
+ }
+ */
+ break;
+ }
+
+ case MVAR:
+ /* treat MVars specially, because we don't want to PrintGraph the
+ * mut_link field in the middle of the closure.
+ */
+ {
+ StgMVar *mvar = ((StgMVar *)p);
+ checkGraph((StgClosure *)mvar->head, rec_level-1);
+ checkGraph((StgClosure *)mvar->tail, rec_level-1);
+ checkGraph((StgClosure *)mvar->value, rec_level-1);
+ break;
+ }
+
+ case THUNK_2_0:
+ case FUN_2_0:
+ case CONSTR_2_0:
+ checkGraph(((StgClosure *)p)->payload[0], rec_level-1);
+ checkGraph(((StgClosure *)p)->payload[1], rec_level-1);
+ break;
+
+ case THUNK_1_0:
+ checkGraph(((StgClosure *)p)->payload[0], rec_level-1);
+ break;
+
+ case FUN_1_0:
+ case CONSTR_1_0:
+ checkGraph(((StgClosure *)p)->payload[0], rec_level-1);
+ break;
+
+ case THUNK_0_1:
+ break;
+
+ case FUN_0_1:
+ case CONSTR_0_1:
+ break;
+
+ case THUNK_0_2:
+ case FUN_0_2:
+ case CONSTR_0_2:
+ break;
+
+ case THUNK_1_1:
+ case FUN_1_1:
+ case CONSTR_1_1:
+ checkGraph(((StgClosure *)p)->payload[0], rec_level-1);
+ break;
+
+ case FUN:
+ case THUNK:
+ case CONSTR:
+ for (i=0; i<info->layout.payload.ptrs; i++)
+ checkGraph(((StgClosure *)p)->payload[i], rec_level-1);
+ break;
+
+ case WEAK:
+ case FOREIGN:
+ case STABLE_NAME:
+ {
+ StgPtr end;
+
+ end = (StgPtr)((StgClosure *)p)->payload + info->layout.payload.ptrs;
+ for (p = (StgPtr)((StgClosure *)p)->payload; p < end; p++) {
+ checkGraph(*(StgClosure **)p, rec_level-1);
+ }
+ break;
+ }
+
+ case IND_PERM:
+ case IND_OLDGEN_PERM:
+ checkGraph(((StgIndOldGen *)p)->indirectee, rec_level-1);
+ break;
+
+ case MUT_VAR:
+ /* ignore MUT_CONSs */
+ if (((StgMutVar *)p)->header.info != &stg_MUT_CONS_info) {
+ checkGraph(((StgMutVar *)p)->var, rec_level-1);
+ }
+ break;
+
+ case CAF_BLACKHOLE:
+ case SE_CAF_BLACKHOLE:
+ case SE_BLACKHOLE:
+ case BLACKHOLE:
+ break;
+
+ case BLACKHOLE_BQ:
+ break;
+
+ case THUNK_SELECTOR:
+ {
+ StgSelector *s = (StgSelector *)p;
+ checkGraph(s->selectee, rec_level-1);
+ break;
+ }
+
+ case IND:
+ checkGraph(((StgInd*)p)->indirectee, rec_level-1);
+ break;
+
+ case IND_OLDGEN:
+ checkGraph(((StgIndOldGen*)p)->indirectee, rec_level-1);
+ break;
+
+ case CONSTR_INTLIKE:
+ break;
+ case CONSTR_CHARLIKE:
+ break;
+ case CONSTR_STATIC:
+ break;
+ case CONSTR_NOCAF_STATIC:
+ break;
+ case THUNK_STATIC:
+ break;
+ case FUN_STATIC:
+ break;
+ case IND_STATIC:
+ break;
+
+ case RET_BCO:
+ break;
+ case RET_SMALL:
+ break;
+ case RET_VEC_SMALL:
+ break;
+ case RET_BIG:
+ break;
+ case RET_VEC_BIG:
+ break;
+ case RET_DYN:
+ break;
+ case UPDATE_FRAME:
+ break;
+ case STOP_FRAME:
+ break;
+ case CATCH_FRAME:
+ break;
+ case SEQ_FRAME:
+ break;
+
+ case AP_UPD: /* same as PAPs */
+ case PAP:
+ /* Treat a PAP just like a section of stack, not forgetting to
+ * checkGraph the function pointer too...
+ */
+ {
+ StgPAP* pap = stgCast(StgPAP*,p);
+
+ checkGraph(pap->fun, rec_level-1);
+ break;
+ }
+
+ case ARR_WORDS:
+ break;
+
+ case MUT_ARR_PTRS:
+ /* follow everything */
+ {
+ StgPtr next;
+
+ next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ checkGraph(*(StgClosure **)p, rec_level-1);
+ }
+ break;
+ }
+
+ case MUT_ARR_PTRS_FROZEN:
+ /* follow everything */
+ {
+ StgPtr start = p, next;
+
+ next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+ checkGraph(*(StgClosure **)p, rec_level-1);
+ }
+ break;
+ }
+
+ case TSO:
+ {
+ StgTSO *tso;
+
+ tso = (StgTSO *)p;
+ checkGraph((StgClosure *)tso->link, rec_level-1);
+ break;
+ }
+
+#if defined(GRAN) || defined(PAR)
+ case RBH:
+ break;
+#endif
+#if defined(PAR)
+ case BLOCKED_FETCH:
+ break;
+ case FETCH_ME:
+ break;
+ case FETCH_ME_BQ:
+ break;
+#endif
+ case EVACUATED:
+ barf("checkGraph: found EVACUATED closure %p (%s)",
+ p, info_type(p));
+ break;
+
+ default:
+ }
+}
+
+#endif /* GRAN */
+
+#endif /* GRAN || PAR */
+
+//@node End of File, , Printing Packet Contents, Debugging routines for GranSim and GUM
+//@subsection End of File
diff --git a/rts/parallel/ParallelDebug.h b/rts/parallel/ParallelDebug.h
new file mode 100644
index 0000000000..f8aaeb85d4
--- /dev/null
+++ b/rts/parallel/ParallelDebug.h
@@ -0,0 +1,79 @@
+/*
+ Time-stamp: <Tue Mar 06 2001 00:25:14 Stardate: [-30]6285.08 hwloidl>
+
+ Prototypes of all parallel debugging functions.
+*/
+
+#ifndef PARALLEL_DEBUG_H
+#define PARALLEL_DEBUG_H
+
+#if defined(DEBUG) && (defined(GRAN) || defined(PAR))
+/* max length of the string holding a finger-print for a graph */
+#define MAX_FINGER_PRINT_LEN 10000
+// (10*RtsFlags.ParFlags.packBufferSize)
+#endif
+
+#if defined(DEBUG) && defined(GRAN)
+void G_PRINT_NODE(StgClosure* node);
+void G_PPN(StgClosure* node);
+void G_INFO_TABLE(StgClosure* node);
+void G_CURR_THREADQ(StgInt verbose);
+void G_THREADQ(StgTSO* closure, StgInt verbose);
+void G_TSO(StgTSO* closure, StgInt verbose);
+void G_EVENT(rtsEventQ event, StgInt verbose);
+void G_EVENTQ(StgInt verbose);
+void G_PE_EQ(PEs pe, StgInt verbose);
+void G_SPARK(rtsSparkQ spark, StgInt verbose);
+void G_SPARKQ(rtsSparkQ spark, StgInt verbose);
+void G_CURR_SPARKQ(StgInt verbose);
+void G_PROC(StgInt proc, StgInt verbose);
+void GP(StgInt proc);
+void GCP(void);
+void GT(StgPtr tso);
+void GCT(void);
+void GEQ(void);
+void GTQ(PEs p);
+void GCTQ(void);
+void GSQ(PEs p);
+void GCSQ(void);
+void GN(StgPtr node);
+void GIT(StgPtr node);
+#endif
+
+#if defined(GRAN) || defined(PAR)
+
+char *display_info_type(StgClosure *closure, char *str);
+void info_hdr_type(StgClosure *closure, char *res);
+char *info_type(StgClosure *closure);
+char *info_type_by_ip(StgInfoTable *ip);
+
+void PrintPacket(rtsPackBuffer *buffer);
+void PrintGraph(StgClosure *p, int indent_level);
+void GraphFingerPrint(StgClosure *p, char *finger_print);
+void checkGraph(StgClosure *p, int rec_level);
+
+void checkPacket(rtsPackBuffer *packBuffer);
+
+#endif /* GRAN || PAR */
+
+#if defined(PAR)
+
+/* don't want to import Schedule.h and Sanity.h everywhere */
+extern void print_bq (StgClosure *node);
+extern void checkBQ (StgBlockingQueueElement *bqe, StgClosure *closure);
+
+void checkGAGAMap(globalAddr *gagamap, int nGAs);
+extern rtsBool isOnLiveIndTable(globalAddr *ga);
+extern void rebuildGAtables(rtsBool full);
+extern void rebuildLAGAtable(void);
+extern void checkLAGAtable(rtsBool check_closures);
+extern void checkHeapChunk(StgPtr start, StgPtr end);
+extern void printGA (globalAddr *ga);
+extern void printGALA (GALA *gala);
+extern void printLiveIndTable(void);
+extern void printRemoteGATable(void);
+extern void printLAGAtable(void);
+
+#endif
+
+#endif /* PARALLEL_DEBUG_H */
diff --git a/rts/parallel/ParallelRts.h b/rts/parallel/ParallelRts.h
new file mode 100644
index 0000000000..d421296d19
--- /dev/null
+++ b/rts/parallel/ParallelRts.h
@@ -0,0 +1,253 @@
+/* --------------------------------------------------------------------------
+ Time-stamp: <Tue Mar 06 2001 00:25:50 Stardate: [-30]6285.08 hwloidl>
+
+ Variables and functions specific to the parallel RTS (i.e. GUM or GranSim)
+ ----------------------------------------------------------------------- */
+
+#ifndef PARALLEL_RTS_H
+#define PARALLEL_RTS_H
+
+#include "ParTicky.h"
+
+/* HWL HACK: compile time sanity checks; shouldn't be necessary at all */
+#if defined(PAR) && defined(GRAN)
+# error "Both PAR and GRAN defined"
+#endif
+
+#if defined(DEBUG)
+/* Paranoia debugging: we add an end-of-buffer marker to every pack buffer
+ (only when sanity checking RTS is enabled, of course) */
+#define DEBUG_HEADROOM 1
+#define END_OF_BUFFER_MARKER 0x1111bbbb
+#define GARBAGE_MARKER 0x1111eeee
+#else
+#define DEBUG_HEADROOM 0
+#endif /* DEBUG */
+
+#if defined(GRAN) || defined(PAR)
+
+#if defined(GRAN)
+
+/* Statistics info */
+extern nat tot_packets, tot_packet_size, tot_cuts, tot_thunks;
+
+/* Pack.c */
+rtsPackBuffer *PackNearbyGraph(StgClosure* closure, StgTSO* tso,
+ nat *packBufferSize, GlobalTaskId dest);
+rtsPackBuffer *PackOneNode(StgClosure* closure, StgTSO* tso,
+ nat *packBufferSize);
+rtsPackBuffer *PackTSO(StgTSO *tso, nat *packBufferSize);
+rtsPackBuffer *PackStkO(StgPtr stko, nat *packBufferSize);
+void PackFetchMe(StgClosure *closure);
+
+/* Unpack.c */
+StgClosure* UnpackGraph(rtsPackBuffer* buffer);
+void InitPendingGABuffer(nat size);
+
+/* RBH.c */
+StgClosure *convertToRBH(StgClosure *closure);
+void convertFromRBH(StgClosure *closure);
+
+/* HLComms.c */
+rtsFetchReturnCode blockFetch(StgTSO* tso, PEs proc, StgClosure* bh);
+void blockThread(StgTSO *tso);
+
+#endif
+#if defined(PAR)
+
+/* Statistics info */
+
+/* global structure for collecting statistics */
+typedef struct GlobalParStats_ {
+ /* GALA and LAGA table info */
+ nat tot_mark_GA, tot_rebuild_GA, tot_free_GA,
+ res_mark_GA, res_rebuild_GA, res_free_GA,
+ cnt_mark_GA, cnt_rebuild_GA, cnt_free_GA,
+ res_size_GA, tot_size_GA, local_alloc_GA, tot_global, tot_local;
+
+ /* time spent managing the GAs */
+ double time_mark_GA, time_rebuild_GA;
+
+ /* spark queue stats */
+ nat res_sp, tot_sp, cnt_sp, emp_sp;
+ // nat tot_sq_len, tot_sq_probes, tot_sparks;
+ /* thread queue stats */
+ nat res_tp, tot_tp, cnt_tp, emp_tp;
+ //nat tot_add_threads, tot_tq_len, non_end_add_threads;
+
+ /* packet statistics */
+ nat tot_packets, tot_packet_size, tot_thunks,
+ res_packet_size, res_thunks,
+ rec_packets, rec_packet_size, rec_thunks,
+ rec_res_packet_size, rec_res_thunks;
+ /* time spent packing stuff */
+ double time_pack, time_unpack;
+
+ /* thread stats */
+ nat tot_threads_created;
+
+ /* spark stats */
+ //nat pruned_sparks, withered_sparks;
+ nat tot_sparks_created, tot_sparks_ignored, tot_sparks_marked,
+ res_sparks_created, res_sparks_ignored, res_sparks_marked; // , sparks_created_on_PE[MAX_PROC];
+ double time_sparks;
+
+ /* scheduling stats */
+ nat tot_yields, tot_stackover, tot_heapover;
+
+ /* message statistics */
+ nat tot_fish_mess, tot_fetch_mess, tot_resume_mess, tot_schedule_mess;
+ nat rec_fish_mess, rec_fetch_mess, rec_resume_mess, rec_schedule_mess;
+#if defined(DIST)
+ nat tot_reval_mess;
+ nat rec_reval_mess;
+#endif
+
+ /* blocking queue statistics
+ rtsTime tot_bq_processing_time;
+ nat tot_bq_len, tot_bq_len_local, tot_awbq, tot_FMBQs;
+ */
+
+ /* specialised info on arrays (for GPH/Maple mainly) */
+ nat tot_arrs, tot_arr_size;
+} GlobalParStats;
+
+extern GlobalParStats globalParStats;
+
+void globalParStat_exit(void);
+
+/* Pack.c */
+rtsBool InitPackBuffer(void);
+rtsPackBuffer *PackNearbyGraph(StgClosure* closure, StgTSO* tso,
+ nat *packBufferSize, GlobalTaskId dest);
+
+/* Unpack.c */
+void CommonUp(StgClosure *src, StgClosure *dst);
+StgClosure *UnpackGraph(rtsPackBuffer *buffer, globalAddr **gamap,
+ nat *nGAs);
+
+/* RBH.c */
+StgClosure *convertToRBH(StgClosure *closure);
+void convertToFetchMe(StgRBH *rbh, globalAddr *ga);
+
+/* HLComms.c */
+void blockFetch(StgBlockedFetch *bf, StgClosure *bh);
+void blockThread(StgTSO *tso);
+
+/* Global.c */
+void GALAdeprecate(globalAddr *ga);
+
+/* HLComms.c */
+nat pending_fetches_len(void);
+
+/* ParInit.c */
+void initParallelSystem(void);
+void shutdownParallelSystem(StgInt n);
+void synchroniseSystem(void);
+void par_exit(I_);
+
+#endif
+
+/* this routine should be moved to a more general module; currently in Pack.c
+StgInfoTable* get_closure_info(StgClosure* node,
+ nat *size, nat *ptrs, nat *nonptrs, nat *vhs,
+ char *info_hdr_ty);
+*/
+void doGlobalGC(void);
+
+//@node GC routines, Debugging routines, Spark handling routines
+//@subsection GC routines
+
+#if defined(PAR)
+/* HLComms.c */
+void freeRemoteGA(int pe, globalAddr *ga);
+void sendFreeMessages(void);
+void markPendingFetches(rtsBool major_gc);
+
+/* Global.c */
+void markLocalGAs(rtsBool full);
+void RebuildGAtables(rtsBool full);
+void RebuildLAGAtable(void);
+#endif
+
+//@node Debugging routines, Generating .gr profiles, GC routines
+//@subsection Debugging routines
+
+#if defined(PAR)
+void printGA (globalAddr *ga);
+void printGALA (GALA *gala);
+void printLAGAtable(void);
+
+rtsBool isOnLiveIndTable(globalAddr *ga);
+rtsBool isOnRemoteGATable(globalAddr *ga);
+void checkFreeGALAList(void);
+void checkFreeIndirectionsList(void);
+#endif
+
+//@node Generating .gr profiles, Index, Debugging routines
+//@subsection Generating .gr profiles
+
+#define STATS_FILENAME_MAXLEN 128
+
+/* Where to write the log file */
+//@cindex gr_file
+//@cindex gr_filename
+extern FILE *gr_file;
+extern char gr_filename[STATS_FILENAME_MAXLEN];
+
+//@cindex init_gr_stats
+//@cindex init_gr_simulation
+//@cindex end_gr_simulation
+void init_gr_stats (void);
+void init_gr_simulation(int rts_argc, char *rts_argv[],
+ int prog_argc, char *prog_argv[]);
+void end_gr_simulation(void);
+
+// TODO: move fcts in here (as static inline)
+StgInfoTable* get_closure_info(StgClosure* node, nat *size, nat *ptrs, nat *nonptrs, nat *vhs, char *info_hdr_ty);
+rtsBool IS_BLACK_HOLE(StgClosure* node);
+StgClosure *IS_INDIRECTION(StgClosure* node) ;
+StgClosure *UNWIND_IND (StgClosure *closure);
+
+
+#endif /* defined(PAR) || defined(GRAN) */
+
+//@node Common macros, Index, Generating .gr profiles
+//@subsection Common macros
+
+#define LOOKS_LIKE_PTR(r) \
+ (LOOKS_LIKE_STATIC_CLOSURE(r) || \
+ ((HEAP_ALLOCED(r) && Bdescr((P_)r)->free != (void *)-1)))
+
+/* see Sanity.c for this kind of test; doing this in these basic fcts
+ is paranoid (nuke it after debugging!)
+*/
+
+/* pathetic version of the check whether p can be a closure */
+#define LOOKS_LIKE_COOL_CLOSURE(p) 1
+
+//LOOKS_LIKE_GHC_INFO(get_itbl(p))
+
+ /* Is it a static closure (i.e. in the data segment)? */ \
+ /*
+#define LOOKS_LIKE_COOL_CLOSURE(p) \
+ ((LOOKS_LIKE_STATIC(p)) ? \
+ closure_STATIC(p) \
+ : !closure_STATIC(p) && LOOKS_LIKE_PTR(p))
+ */
+
+#endif /* PARALLEL_RTS_H */
+
+//@node Index, , Index
+//@subsection Index
+
+//@index
+//* IS_BLACK_HOLE:: @cindex\s-+IS_BLACK_HOLE
+//* IS_INDIRECTION:: @cindex\s-+IS_INDIRECTION
+//* end_gr_simulation:: @cindex\s-+end_gr_simulation
+//* get_closure_info:: @cindex\s-+get_closure_info
+//* gr_file:: @cindex\s-+gr_file
+//* gr_filename:: @cindex\s-+gr_filename
+//* init_gr_simulation:: @cindex\s-+init_gr_simulation
+//* unwindInd:: @cindex\s-+unwindInd
+//@end index
diff --git a/rts/parallel/RBH.c b/rts/parallel/RBH.c
new file mode 100644
index 0000000000..1612209027
--- /dev/null
+++ b/rts/parallel/RBH.c
@@ -0,0 +1,337 @@
+/*
+ Time-stamp: <Tue Mar 13 2001 19:07:13 Stardate: [-30]6323.98 hwloidl>
+
+ Revertible Black Hole Manipulation.
+ Used in GUM and GranSim during the packing of closures. These black holes
+ must be revertible because a GC might occur while the packet is being
+ transmitted. In this case all RBHs have to be reverted.
+ */
+
+#if defined(PAR) || defined(GRAN) /* whole file */
+
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "RtsUtils.h"
+#include "GranSimRts.h"
+#include "ParallelRts.h"
+# if defined(DEBUG)
+# include "ParallelDebug.h"
+# endif
+#include "Storage.h" // for recordMutable
+#include "StgMacros.h" // inlined IS_... fcts
+
+/*
+ Turn a closure into a revertible black hole. After the conversion, the
+ first two words of the closure (after the fixed header, of course) will
+ be a link to the mutables list (if appropriate for the garbage
+ collector), and a pointer to the blocking queue. The blocking queue is
+ terminated by a 2-word SPEC closure which holds the original contents of
+ the first two words of the closure.
+*/
+
+//@menu
+//* Externs and prototypes::
+//* Conversion Functions::
+//* Index::
+//@end menu
+
+//@node Externs and prototypes, Conversion Functions
+//@section Externs and prototypes
+
+EXTFUN(stg_RBH_Save_0_info);
+EXTFUN(stg_RBH_Save_1_info);
+EXTFUN(stg_RBH_Save_2_info);
+
+//@node Conversion Functions, Index, Externs and prototypes
+//@section Conversion Functions
+
+/*
+ A closure is turned into an RBH upon packing it (see PackClosure in Pack.c).
+ This is needed in case we have to do a GC before the packet is turned
+ into a graph on the PE receiving the packet.
+*/
+//@cindex convertToRBH
+StgClosure *
+convertToRBH(closure)
+StgClosure *closure;
+{
+ StgRBHSave *rbh_save;
+ StgInfoTable *info_ptr, *rbh_info_ptr, *old_info;
+ nat size, ptrs, nonptrs, vhs;
+ char str[80];
+
+ /*
+ Closure layout before this routine runs amuck:
+ +-------------------
+ | HEADER | DATA ...
+ +-------------------
+ | FIXED_HS |
+ */
+ /*
+ Turn closure into an RBH. This is done by modifying the info_ptr,
+ grabbing the info_ptr of the RBH for this closure out of its
+ ITBL. Additionally, we have to save the words from the closure, which
+ will hold the link to the blocking queue. For this purpose we use the
+ RBH_Save_N closures, with N being the number of pointers for this
+ closure. */
+ IF_GRAN_DEBUG(pack,
+ belch("*>:: %p (%s): Converting closure into an RBH",
+ closure, info_type(closure)));
+ IF_PAR_DEBUG(pack,
+ belch("*>:: %p (%s): Converting closure into an RBH",
+ closure, info_type(closure)));
+
+ ASSERT(closure_THUNK(closure));
+
+ IF_GRAN_DEBUG(pack,
+ old_info = get_itbl(closure));
+
+ /* Allocate a new closure for the holding data ripped out of closure */
+ if ((rbh_save = (StgRBHSave *)allocate(_HS + 2)) == NULL)
+ return NULL; /* have to Garbage Collect; check that in the caller! */
+
+ info_ptr = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str);
+ ASSERT(size >= _HS+MIN_UPD_SIZE);
+
+ /* Fill in the RBH_Save closure with the original data from closure */
+ rbh_save->payload[0] = (StgPtr) ((StgRBH *)closure)->blocking_queue;
+ rbh_save->payload[1] = (StgPtr) ((StgRBH *)closure)->mut_link;
+
+ /* Set the info_ptr for the rbh_Save closure according to the number of
+ pointers in the original */
+
+ rbh_info_ptr = (StgInfoTable *) (ptrs == 0 ? &stg_RBH_Save_0_info :
+ ptrs == 1 ? &stg_RBH_Save_1_info :
+ &stg_RBH_Save_2_info);
+ SET_INFO(rbh_save, rbh_info_ptr);
+ /* same bitmask as the original closure */
+ SET_GRAN_HDR(rbh_save, PROCS(closure));
+
+ /* Init the blocking queue of the RBH and have it point to the saved data */
+ ((StgRBH *)closure)->blocking_queue = (StgBlockingQueueElement *)rbh_save;
+
+ ASSERT(LOOKS_LIKE_GHC_INFO(RBH_INFOPTR(get_itbl(closure))));
+ /* Turn the closure into a RBH; a great system, indeed! */
+ SET_INFO(closure, RBH_INFOPTR(get_itbl(closure)));
+
+ /*
+ add closure to the mutable list!
+ do this after having turned the closure into an RBH, because an
+ RBH is mutable but the closure it was before wasn't mutable
+ */
+ recordMutable((StgMutClosure *)closure);
+
+ //IF_GRAN_DEBUG(pack,
+ /* sanity check; make sure that reverting the RBH yields the
+ orig closure, again */
+ //ASSERT(REVERT_INFOPTR(get_itbl(closure))==old_info));
+
+ /*
+ Closure layout after this routine has run amuck:
+ +---------------------
+ | RBH-HEADER | | | ...
+ +--------------|---|--
+ | FIXED_HS | | v
+ | Mutable-list ie another StgMutClosure
+ v
+ +---------
+ | RBH_SAVE with 0-2 words of DATA
+ +---------
+ */
+
+ return closure;
+}
+
+/*
+ An RBH closure is turned into a FETCH_ME when reveiving an ACK message
+ indicating that the transferred closure has been unpacked on the other PE
+ (see processAck in HLComms.c). The ACK also contains the new GA of the
+ closure to which the FETCH_ME closure has to point.
+
+ Converting a closure to a FetchMe is trivial, unless the closure has
+ acquired a blocking queue. If that has happened, we first have to awaken
+ the blocking queue. What a nuisance! Fortunately, @AwakenBlockingQueue@
+ should now know what to do.
+
+ A note on GrAnSim: In GrAnSim we don't have FetchMe closures. However,
+ we have to turn a RBH back to its original form when the simulated
+ transfer of the closure has been finished. Therefore we need the
+ @convertFromRBH@ routine below. After converting the RBH back to its
+ original form and awakening all TSOs, the first TSO will reenter the
+ closure which is now local and carry on merrily reducing it (the other
+ TSO will be less merrily blocked on the now local closure; we're costing
+ the difference between local and global blocks in the BQ code). -- HWL
+*/
+
+# if defined(PAR)
+
+EXTFUN(stg_FETCH_ME_info);
+
+//@cindex convertToFetchMe
+void
+convertToFetchMe(rbh, ga)
+StgRBH *rbh;
+globalAddr *ga;
+{
+ // StgInfoTable *ip = get_itbl(rbh);
+ StgBlockingQueueElement *bqe = rbh->blocking_queue;
+
+ ASSERT(get_itbl(rbh)->type==RBH);
+
+ IF_PAR_DEBUG(pack,
+ belch("**:: Converting RBH %p (%s) into a FETCH_ME for GA ((%x, %d, %x))",
+ rbh, info_type(rbh),
+ ga->payload.gc.gtid, ga->payload.gc.slot, ga->weight));
+
+ /* put closure on mutables list, while it is still a RBH */
+ recordMutable((StgMutClosure *)rbh);
+
+ /* actually turn it into a FETCH_ME */
+ SET_INFO((StgClosure *)rbh, &stg_FETCH_ME_info);
+
+ /* set the global pointer in the FETCH_ME closure to the given value */
+ ((StgFetchMe *)rbh)->ga = ga;
+
+ IF_PAR_DEBUG(pack,
+ if (get_itbl(bqe)->type==TSO || get_itbl(bqe)->type==BLOCKED_FETCH)
+ belch("**:: Awakening non-empty BQ of RBH closure %p (first TSO is %d (%p)",
+ rbh, ((StgTSO *)bqe)->id, ((StgTSO *)bqe)));
+
+ /* awaken all TSOs and BLOCKED_FETCHES on the blocking queue */
+ if (get_itbl(bqe)->type==TSO || get_itbl(bqe)->type==BLOCKED_FETCH)
+ awakenBlockedQueue(bqe, (StgClosure *)rbh);
+}
+# else /* GRAN */
+/* Prototype */
+// void UnlinkFromMUT(StgPtr closure);
+
+/*
+ This routine in fact reverts the RBH into its original form; this code
+ should be of interest for GUM, too, but is not needed in the current version.
+ convertFromRBH is called where GUM uses convertToFetchMe.
+*/
+void
+convertFromRBH(closure)
+StgClosure *closure;
+{
+ StgBlockingQueueElement *bqe = ((StgRBH*)closure)->blocking_queue;
+ char str[NODE_STR_LEN]; // debugging only
+ StgInfoTable *rip = REVERT_INFOPTR(get_itbl(closure)); // debugging only
+
+ IF_GRAN_DEBUG(pack,
+ if (get_itbl(bqe)->type==TSO)
+ sprintf(str, "%d (%p)",
+ ((StgTSO *)bqe)->id, ((StgTSO *)bqe));
+ else
+ strcpy(str, "empty");
+ belch("*<:: Reverting RBH %p (%s) into a ??? closure again; BQ start: %s",
+ closure, info_type(closure), str));
+
+ ASSERT(get_itbl(closure)->type==RBH);
+
+ /* awakenBlockedQueue also restores the RBH_Save closure
+ (have to call it even if there are no TSOs in the queue!) */
+ awakenBlockedQueue(bqe, closure);
+
+ /* Put back old info pointer (grabbed from the RBH's info table).
+ We do that *after* awakening the BQ to be sure node is an RBH when
+ calling awakenBlockedQueue (different in GUM!)
+ */
+ SET_INFO(closure, REVERT_INFOPTR(get_itbl(closure)));
+
+ /* put closure on mutables list */
+ recordMutable((StgMutClosure *)closure);
+
+# if 0 /* rest of this fct */
+ /* ngoq ngo' */
+ /* FETCHME_GA(closure) = ga; */
+ if (IS_MUTABLE(INFO_PTR(bqe))) {
+ PROC old_proc = CurrentProc, /* NB: For AwakenBlockingQueue, */
+ new_proc = where_is(closure); /* CurentProc must be where */
+ /* closure lives. */
+ CurrentProc = new_proc;
+
+# if defined(GRAN_CHECK)
+ if (RTSflags.GranFlags.debug & 0x100)
+ fprintf(stderr,"===== AwBQ of node 0x%lx (%s) [PE %2u]\n",
+ closure, (isSpec ? "SPEC_RBH" : "GEN_RBH"), new_proc);
+# endif
+
+ rbh_save = AwakenBlockingQueue(bqe); /* AwakenBlockingQueue(bqe); */
+ CurrentProc = old_proc;
+ } else {
+ rbh_save = bqe;
+ }
+
+ /* Put data from special RBH save closures back into the closure */
+ if ( rbh_save == NULL ) {
+ fprintf(stderr,"convertFromRBH: No RBH_Save_? closure found at end of BQ!\n");
+ EXIT(EXIT_FAILURE);
+ } else {
+ closure[isSpec ? SPEC_HS : GEN_HS] = rbh_save[SPEC_HS];
+ closure[(isSpec ? SPEC_HS : GEN_HS) + 1] = rbh_save[SPEC_HS + 1];
+ }
+# endif /* 0 */
+
+# if 0 && (defined(GCap) || defined(GCgn))
+ /* ngoq ngo' */
+ /* If we convert from an RBH in the old generation,
+ we have to make sure it goes on the mutables list */
+
+ if(closure <= StorageMgrInfo.OldLim) {
+ if (IS_MUTABLE(INFO_PTR(closure)) && MUT_LINK(closure) == MUT_NOT_LINKED) {
+ MUT_LINK(closure) = (StgWord) StorageMgrInfo.OldMutables;
+ StorageMgrInfo.OldMutables = closure;
+ }
+ }
+# endif /* 0 */
+}
+#endif /* PAR */
+
+/* Remove closure from the mutables list */
+#if 0
+/* ngoq ngo' */
+void
+UnlinkFromMUT(StgPtr closure)
+{
+ StgPtr curr = StorageMgrInfo.OldMutables, prev = NULL;
+
+ while (curr != NULL && curr != closure) {
+ ASSERT(MUT_LINK(curr)!=MUT_NOT_LINKED);
+ prev=curr;
+ curr=MUT_LINK(curr);
+ }
+ if (curr==closure) {
+ if (prev==NULL)
+ StorageMgrInfo.OldMutables = MUT_LINK(curr);
+ else
+ MUT_LINK(prev) = MUT_LINK(curr);
+ MUT_LINK(curr) = MUT_NOT_LINKED;
+ }
+
+# if 0 && (defined(GCap) || defined(GCgn))
+ {
+ closq newclos;
+ extern closq ex_RBH_q;
+
+ newclos = (closq) stgMallocBytes(sizeof(struct clos), "UnlinkFromMUT");
+ CLOS_CLOSURE(newclos) = closure;
+ CLOS_PREV(newclos) = NULL;
+ CLOS_NEXT(newclos) = ex_RBH_q;
+ if (ex_RBH_q!=NULL)
+ CLOS_PREV(ex_RBH_q) = newclos;
+ ex_RBH_q = newclos;
+ }
+# endif
+}
+#endif /* PAR */
+
+#endif /* PAR || GRAN -- whole file */
+
+//@node Index, , Conversion Functions
+//@section Index
+
+//@index
+//* convertToFetchMe:: @cindex\s-+convertToFetchMe
+//* convertToRBH:: @cindex\s-+convertToRBH
+//@end index
diff --git a/rts/parallel/SysMan.c b/rts/parallel/SysMan.c
new file mode 100644
index 0000000000..40bcf6a19e
--- /dev/null
+++ b/rts/parallel/SysMan.c
@@ -0,0 +1,650 @@
+/* ----------------------------------------------------------------------------
+ Time-stamp: <Wed Mar 21 2001 17:16:28 Stardate: [-30]6363.59 hwloidl>
+
+ GUM System Manager Program
+ Handles startup, shutdown and global synchronisation of the parallel system.
+
+ The Parade/AQUA Projects, Glasgow University, 1994-1995.
+ GdH/APART Projects, Heriot-Watt University, Edinburgh, 1997-2000.
+
+ ------------------------------------------------------------------------- */
+
+//@node GUM System Manager Program, , ,
+//@section GUM System Manager Program
+
+//@menu
+//* General docu::
+//* Includes::
+//* Macros etc::
+//* Variables::
+//* Prototypes::
+//* Aux startup and shutdown fcts::
+//* Main fct::
+//* Message handlers::
+//* Auxiliary fcts::
+//* Index::
+//@end menu
+
+//@node General docu, Includes, GUM System Manager Program, GUM System Manager Program
+//@subsection General docu
+
+/*
+The Sysman task currently controls initiation, termination, of a
+parallel Haskell program running under GUM. In the future it may
+control global GC synchronisation and statistics gathering. Based on
+K. Hammond's SysMan.lc in Graph for PVM. SysMan is unusual in that it
+is not part of the executable produced by ghc: it is a free-standing
+program that spawns PVM tasks (logical PEs) to evaluate the
+program. After initialisation it runs in parallel with the PE tasks,
+awaiting messages.
+
+OK children, buckle down for some serious weirdness, it works like this ...
+
+o The argument vector (argv) for SysMan has one the following 2 shapes:
+
+-------------------------------------------------------------------------------
+| SysMan path | debug flag | pvm-executable path | Num. PEs | Program Args ...|
+-------------------------------------------------------------------------------
+
+-------------------------------------------------------------------
+| SysMan path | pvm-executable path | Num. PEs | Program Args ... |
+-------------------------------------------------------------------
+
+The "pvm-executable path" is an absolute path of where PVM stashes the
+code for each PE. The arguments passed on to each PE-executable
+spawned by PVM are:
+
+-------------------------------
+| Num. PEs | Program Args ... |
+-------------------------------
+
+The arguments passed to the Main-thread PE-executable are
+
+-------------------------------------------------------------------
+| main flag | pvm-executable path | Num. PEs | Program Args ... |
+-------------------------------------------------------------------
+
+o SysMan's algorithm is as follows.
+
+o use PVM to spawn (nPE-1) PVM tasks
+o fork SysMan to create the main-thread PE. This permits the main-thread to
+ read and write to stdin and stdout.
+o Wait for all the PE-tasks to reply back saying they are ready and if they were the
+ main thread or not.
+o Broadcast an array of the PE task-ids out to all of the PE-tasks.
+o Enter a loop awaiting incoming messages, e.g. failure, Garbage-collection,
+ termination.
+
+The forked Main-thread algorithm, in SysMan, is as follows.
+
+o disconnects from PVM.
+o sets a flag in argv to indicate that it is the main thread.
+o `exec's a copy of the pvm-executable (i.e. the program being run)
+
+
+The pvm-executable run by each PE-task, is initialised as follows.
+
+o Registers with PVM, obtaining a task-id.
+o If it was main it gets SysMan's task-id from argv otherwise it can use pvm_parent.
+oSends a ready message to SysMan together with a flag indicating if it was main or not.
+o Receives from SysMan the array of task-ids of the other PEs.
+o If the number of task-ids sent was larger than expected then it must have been a task
+ generated after the rest of the program had started, so it sends its own task-id message
+ to all the tasks it was told about.
+o Begins execution.
+
+*/
+
+//@node Includes, Macros etc, General docu, GUM System Manager Program
+//@subsection Includes
+
+/* Evidently not Posix */
+/* #include "PosixSource.h" */
+
+#include "Rts.h"
+#include "ParTypes.h"
+#include "LLC.h"
+#include "Parallel.h"
+#include "ParallelRts.h" // stats only
+
+//@node Macros etc, Variables, Includes, GUM System Manager Program
+//@subsection Macros etc
+
+/* SysMan is put on top of the GHC routine that does the RtsFlags handling.
+ So, we cannot use the standard macros. For the time being we use a macro
+ that is fixed at compile time.
+*/
+
+#ifdef IF_PAR_DEBUG
+#undef IF_PAR_DEBUG
+#endif
+
+/* debugging enabled */
+//#define IF_PAR_DEBUG(c,s) { s; }
+/* debugging disabled */
+#define IF_PAR_DEBUG(c,s) /* nothing */
+
+void *stgMallocBytes (int n, char *msg);
+
+//@node Variables, Prototypes, Macros etc, GUM System Manager Program
+//@subsection Variables
+
+/*
+ The following definitions included so that SysMan can be linked with Low
+ Level Communications module (LLComms). They are not used in SysMan.
+*/
+GlobalTaskId mytid;
+
+static unsigned PEsArrived = 0;
+static GlobalTaskId gtids[MAX_PES];
+static GlobalTaskId sysman_id, sender_id;
+static unsigned PEsTerminated = 0;
+static rtsBool Finishing = rtsFalse;
+static long PEbuffer[MAX_PES];
+nat nSpawn = 0; // current no. of spawned tasks (see gtids)
+nat nPEs = 0; // number of PEs specified on startup
+nat nextPE;
+/* PVM-ish variables */
+char *petask, *pvmExecutable;
+char **pargv;
+int cc, spawn_flag = PvmTaskDefault;
+
+#if 0 && defined(PAR_TICKY)
+/* ToDo: use allGlobalParStats to collect stats of all PEs */
+GlobalParStats *allGlobalParStats[MAX_PES];
+#endif
+
+//@node Prototypes, Aux startup and shutdown fcts, Variables, GUM System Manager Program
+//@subsection Prototypes
+
+/* prototypes for message handlers called from the main loop of SysMan */
+void newPE(int nbytes, int opcode, int sender_id);
+void readyPE(int nbytes, int opcode, int sender_id);
+void finishPE(int nbytes, int opcode, int sender_id, int exit_code);
+
+//@node Aux startup and shutdown fcts, Main fct, Prototypes, GUM System Manager Program
+//@subsection Aux startup and shutdown fcts
+
+/*
+ Create the PE Tasks. We spawn (nPEs-1) pvm threads: the Main Thread
+ (which starts execution and performs IO) is created by forking SysMan
+*/
+static int
+createPEs(int total_nPEs) {
+ int i, spawn_nPEs, iSpawn = 0, nArch, nHost;
+ struct pvmhostinfo *hostp;
+ int sysman_host;
+
+ spawn_nPEs = total_nPEs-1;
+ if (spawn_nPEs > 0) {
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, "==== [%x] Spawning %d PEs(%s) ...\n",
+ sysman_id, spawn_nPEs, petask);
+ fprintf(stderr, " args: ");
+ for (i = 0; pargv[i]; ++i)
+ fprintf(stderr, "%s, ", pargv[i]);
+ fprintf(stderr, "\n"));
+
+ pvm_config(&nHost,&nArch,&hostp);
+ sysman_host=pvm_tidtohost(sysman_id);
+
+ /* create PEs on the specific machines in the specified order! */
+ for (i=0; (iSpawn<spawn_nPEs) && (i<nHost); i++)
+ if (hostp[i].hi_tid != sysman_host) {
+ checkComms(pvm_spawn(petask, pargv, spawn_flag+PvmTaskHost,
+ hostp[i].hi_name, 1, gtids+iSpawn),
+ "SysMan startup");
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, "==== [%x] Spawned PE %d onto %s\n",
+ sysman_id, i, hostp[i].hi_name));
+ iSpawn++;
+ }
+
+ /* create additional PEs anywhere you like */
+ if (iSpawn<spawn_nPEs) {
+ checkComms(pvm_spawn(petask, pargv, spawn_flag, "",
+ spawn_nPEs-iSpawn, gtids+iSpawn),
+ "SysMan startup");
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] Spawned %d additional PEs anywhere\n",
+ sysman_id, spawn_nPEs-iSpawn));
+ }
+ }
+
+#if 0
+ /* old code with random placement of PEs; make that a variant? */
+# error "Broken startup in SysMan"
+ { /* let pvm place the PEs anywhere; not used anymore */
+ checkComms(pvm_spawn(petask, pargv, spawn_flag, "", spawn_nPEs, gtids),"SysMan startup");
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] Spawned\n", sysman_id));
+
+ }
+#endif
+
+ // iSpawn=spawn_nPEs;
+
+ return iSpawn;
+}
+
+/*
+ Check if this pvm task is in the list of tasks we spawned and are waiting
+ on, if so then remove it.
+*/
+
+static rtsBool
+alreadySpawned (GlobalTaskId g) {
+ unsigned int i;
+
+ for (i=0; i<nSpawn; i++)
+ if (g==gtids[i]) {
+ nSpawn--;
+ gtids[i] = gtids[nSpawn]; //the last takes its place
+ return rtsTrue;
+ }
+ return rtsFalse;
+}
+
+static void
+broadcastFinish(void) {
+ int i,j;
+ int tids[MAX_PES]; /* local buffer of all surviving PEs */
+
+ for (i=0, j=0; i<nPEs; i++)
+ if (PEbuffer[i])
+ tids[j++]=PEbuffer[i]; //extract valid tids
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] Broadcasting Finish to %d PEs; initiating shutdown\n",
+ sysman_id, j));
+
+ /* ToDo: move into LLComms.c */
+ pvm_initsend(PvmDataDefault);
+ pvm_mcast(tids,j,PP_FINISH);
+}
+
+static void
+broadcastPEtids (void) {
+ nat i;
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] SysMan sending PE table to all PEs\n", sysman_id);
+ /* debugging */
+ fprintf(stderr,"++++ [%x] PE table as seen by SysMan:\n", mytid);
+ for (i = 0; i < nPEs; i++) {
+ fprintf(stderr,"++++ PEbuffer[%d] = %x\n", i, PEbuffer[i]);
+ }
+ )
+
+ broadcastOpN(PP_PETIDS, PEGROUP, nPEs, &PEbuffer);
+}
+
+//@node Main fct, Message handlers, Aux startup and shutdown fcts, GUM System Manager Program
+//@subsection Main fct
+
+//@cindex main
+int
+main (int argc, char **argv) {
+ int rbufid;
+ int opcode, nbytes, nSpawn;
+ unsigned int i;
+
+ setbuf(stdout, NULL); // disable buffering of stdout
+ setbuf(stderr, NULL); // disable buffering of stderr
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,
+ "==== RFP: GdH enabled SysMan reporting for duty\n"));
+
+ if (argc > 1) {
+ if (*argv[1] == '-') {
+ spawn_flag = PvmTaskDebug;
+ argv[1] = argv[0];
+ argv++; argc--;
+ }
+ sysman_id = pvm_mytid(); /* This must be the first PVM call */
+
+ if (sysman_id<0) {
+ fprintf(stderr, "==== PVM initialisation failure\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ Get the full path and filename of the pvm executable (stashed in some
+ PVM directory), and the number of PEs from the command line.
+ */
+ pvmExecutable = argv[1];
+ nPEs = atoi(argv[2]);
+
+ if (nPEs==0) {
+ /* as usual 0 means infinity: use all PEs specified in PVM config */
+ int nArch, nHost;
+ struct pvmhostinfo *hostp;
+
+ /* get info on PVM config */
+ pvm_config(&nHost,&nArch,&hostp);
+ nPEs=nHost;
+ sprintf(argv[2],"%d",nPEs); /* ToCheck: does this work on all archs */
+ }
+
+ /* get the name of the binary to execute */
+ if ((petask = getenv(PETASK)) == NULL) // PETASK set by driver
+ petask = PETASK;
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] nPEs: %d; executable: |%s|\n",
+ sysman_id, nPEs, petask));
+
+ /* Check that we can create the number of PE and IMU tasks requested.
+ ^^^
+ This comment is most entertaining since we haven't been using IMUs
+ for the last 10 years or so -- HWL */
+ if ((nPEs > MAX_PES) || (nPEs<1)) {
+ fprintf(stderr,"==** SysMan: No more than %d PEs allowed (%d requested)\n Reconfigure GUM setting MAX_PE in ghc/includes/Parallel.h to a higher value\n",
+ MAX_PES, nPEs);
+ exit(EXIT_FAILURE);
+ }
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] is SysMan Task\n", sysman_id));
+
+ /* Initialise the PE task arguments from Sysman's arguments */
+ pargv = argv + 2;
+
+ /* Initialise list of all PE identifiers */
+ PEsArrived=0;
+ nextPE=1;
+ for (i=0; i<nPEs; i++)
+ PEbuffer[i]=0;
+
+ /* start up the required number of PEs */
+ nSpawn = createPEs(nPEs);
+
+ /*
+ Create the MainThread PE by forking SysMan. This arcane coding
+ is required to allow MainThread to read stdin and write to stdout.
+ PWT 18/1/96
+ */
+ //nPEs++; /* Record that the number of PEs is increasing */
+ if ((cc = fork())) {
+ checkComms(cc,"SysMan fork"); /* Parent continues as SysMan */
+
+ PEbuffer[0]=0; /* we accept the first main and assume its valid. */
+ PEsArrived=1; /* assume you've got main */
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] Sysman successfully initialized!\n",
+ sysman_id));
+
+//@cindex message handling loop
+ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+ /* Main message handling loop */
+ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+ /* Process incoming messages */
+ while (1) {
+ if ((rbufid = pvm_recv(ANY_TASK, ANY_OPCODE)) < 0) {
+ pvm_perror("==** Sysman: Receiving Message (pvm_recv)");
+ /* never reached */
+ }
+
+ pvm_bufinfo(rbufid, &nbytes, &opcode, &sender_id);
+
+ /* very low level debugging
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"== [%x] SysMan: Message received by SysMan: rbufid=%x, nbytes = %d, opcode = %x, sender_id = %x\n",
+ sysman_id, rbufid, nbytes, opcode, sender_id));
+ */
+
+ switch (opcode) {
+
+ case PP_NEWPE: /* a new PE is registering for work */
+ newPE(nbytes, opcode, sender_id);
+ break;
+
+ case PP_READY: /* startup complete; let PEs start working */
+ readyPE(nbytes, opcode, sender_id);
+ break;
+
+
+ case PP_GC_INIT: /* start global GC */
+ /* This Function not yet implemented for GUM */
+ fprintf(stderr,"==** Global GC requested by PE %x. Not yet implemented for GUM!\n",
+ sender_id);
+ break;
+
+ case PP_STATS_ON: /* enable statistics gathering */
+ fprintf(stderr,"==** PP_STATS_ON requested by %x. Not yet implemented for GUM!\n",
+ sender_id);
+ break;
+
+ case PP_STATS_OFF: /* disable statistics gathering */
+ fprintf(stderr,"==** PP_STATS_OFF requested by %x. Not yet implemented for GUM!\n",
+ sender_id);
+ break;
+
+ case PP_FINISH:
+ {
+ int exit_code = getExitCode(nbytes, &sender_id);
+ finishPE(nbytes, opcode, sender_id, exit_code);
+ break;
+
+ default:
+ {
+ /*
+ char *opname = GetOpName(opcode);
+ fprintf(stderr,"Sysman: Unrecognised opcode %s (%x)\n",
+ opname,opcode); */
+ fprintf(stderr,"==** Qagh: Sysman: Unrecognised opcode (%x)\n",
+ opcode);
+ }
+ break;
+ } /* switch */
+ } /* else */
+ } /* while 1 */
+ /* end of SysMan!! */
+ } else {
+ /* forked main thread begins here */
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, "==== Main Thread PE has been forked; doing an execv(%s,...)\n",
+ pvmExecutable));
+ pvmendtask(); // Disconnect from PVM to avoid confusion:
+ // executable reconnects
+
+ // RFP: assumes that length(arvv[0])>=9 !!!
+ sprintf(argv[0],"-%08X",sysman_id); /*flag that its the Main Thread PE and include sysman's id*/
+ execv(pvmExecutable,argv); /* Parent task becomes Main Thread PE */
+ } /* else */
+ } /* argc > 1 */
+} /* main */
+
+//@node Message handlers, Auxiliary fcts, Main fct, GUM System Manager Program
+//@subsection Message handlers
+
+/*
+ Received PP_NEWPE:
+ A new PE has been added to the configuration.
+*/
+void
+newPE(int nbytes, int opcode, int sender_id) {
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] SysMan detected a new host\n",
+ sysman_id));
+
+ /* Determine the new machine... assume its the last on the config list? */
+ if (nSpawn < MAX_PES) {
+ int nArch,nHost;
+ struct pvmhostinfo *hostp;
+
+ /* get conmfiguration of PVM machine */
+ pvm_config(&nHost,&nArch,&hostp);
+ nHost--;
+ checkComms(pvm_spawn(petask, pargv, spawn_flag+PvmTaskHost,
+ hostp[nHost].hi_name, 1, gtids+nSpawn),
+ "SysMan loop");
+ nSpawn++;
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr, "==== [%x] Spawned onto %s\n",
+ sysman_id, hostp[nHost].hi_name));
+ }
+}
+
+/*
+ Received PP_READY:
+ Let it be known that PE @sender_id@ participates in the computation.
+*/
+void
+readyPE(int nbytes, int opcode, int sender_id) {
+ int i = 0, flag = 1;
+ long isMain;
+ int nArch, nHost;
+ struct pvmhostinfo *hostp;
+
+ //ASSERT(opcode==PP_READY);
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] SysMan received PP_READY message from %x\n",
+ sysman_id, sender_id));
+
+ pvm_config(&nHost,&nArch,&hostp);
+
+ GetArg1(isMain);
+
+ //if ((isMain && (PEbuffer[0]==0)) || alreadySpawned(sender_id)) {
+ if (nPEs >= MAX_PES) {
+ fprintf(stderr,"==== [%x] SysMan doesn't need PE %d (max %d PEs allowed)\n",
+ sysman_id, sender_id, MAX_PES);
+ pvm_kill(sender_id);
+ } else {
+ if (isMain) {
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] SysMan found Main PE %x\n",
+ sysman_id, sender_id));
+ PEbuffer[0]=sender_id;
+ } else {
+ /* search for PE in list of PEs */
+ for(i=1; i<nPEs; i++)
+ if (PEbuffer[i]==sender_id) {
+ flag=0;
+ break;
+ }
+ /* it's a new PE: add it to the list of PEs */
+ if (flag)
+ PEbuffer[nextPE++] = sender_id;
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] SysMan: found PE %d as [%x] on host %s\n",
+ sysman_id, PEsArrived, sender_id, hostp[PEsArrived].hi_name));
+
+ PEbuffer[PEsArrived++] = sender_id;
+ }
+
+
+ /* enable better handling of unexpected terminations */
+ checkComms( pvm_notify(PvmTaskExit, PP_FINISH, 1, &sender_id),
+ "SysMan loop");
+
+ /* finished registration of all PEs => enable notification */
+ if ((PEsArrived==nPEs) && PEbuffer[0]) {
+ checkComms( pvm_notify(PvmHostAdd, PP_NEWPE, -1, 0),
+ "SysMan startup");
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] SysMan initialising notificaton for new hosts\n", sysman_id));
+ }
+
+ /* finished notification => send off the PE ids */
+ if ((PEsArrived>=nPEs) && PEbuffer[0]) {
+ if (PEsArrived>nPEs) {
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] Weird: %d PEs registered, but we only asked for %d\n", sysman_id, PEsArrived, nPEs));
+ // nPEs=PEsArrived;
+ }
+ broadcastPEtids();
+ }
+ }
+}
+
+/*
+ Received PP_FINISH:
+ Shut down the corresponding PE. Check whether it is a regular shutdown
+ or an uncontrolled termination.
+*/
+void
+finishPE(int nbytes, int opcode, int sender_id, int exitCode) {
+ int i;
+
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] SysMan received PP_FINISH message from %x (exit code: %d)\n",
+ sysman_id, sender_id, exitCode));
+
+ /* Is it relevant to us? Count the first message */
+ for (i=0; i<nPEs; i++)
+ if (PEbuffer[i] == sender_id) {
+ PEsTerminated++;
+ PEbuffer[i]=0;
+
+ /* handle exit code */
+ if (exitCode<0) { /* a task exit before a controlled finish? */
+ fprintf(stderr,"==== [%x] Termination at %x with exit(%d)\n",
+ sysman_id, sender_id, exitCode);
+ } else if (exitCode>0) { /* an abnormal exit code? */
+ fprintf(stderr,"==== [%x] Uncontrolled termination at %x with exit(%d)\n",
+ sysman_id, sender_id, exitCode);
+ } else if (!Finishing) { /* exitCode==0 which is good news */
+ if (i!=0) { /* someone other than main PE terminated first? */
+ fprintf(stderr,"==== [%x] Unexpected early termination at %x\n",
+ sysman_id, sender_id);
+ } else {
+ /* start shutdown by broadcasting FINISH to other PEs */
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] Initiating shutdown (requested by [%x] RIP) (exit code: %d)\n", sysman_id, sender_id, exitCode));
+ Finishing = rtsTrue;
+ broadcastFinish();
+ }
+ } else {
+ /* we are in a shutdown already */
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] Finish from %x during shutdown (%d PEs terminated so far; %d total)\n",
+ sysman_id, sender_id, PEsTerminated, nPEs));
+ }
+
+ if (PEsTerminated >= nPEs) {
+ IF_PAR_DEBUG(verbose,
+ fprintf(stderr,"==== [%x] Global Shutdown, Goodbye!! (SysMan has received FINISHes from all PEs)\n", sysman_id));
+ //broadcastFinish();
+ /* received finish from everybody; now, we can exit, too */
+ exit(EXIT_SUCCESS); /* Qapla'! */
+ }
+ }
+}
+
+//@node Auxiliary fcts, Index, Message handlers, GUM System Manager Program
+//@subsection Auxiliary fcts
+
+/* Needed here because its used in loads of places like LLComms etc */
+
+//@cindex stg_exit
+
+/*
+ * called from STG-land to exit the program
+ */
+
+void
+stg_exit(I_ n)
+{
+ fprintf(stderr, "==// [%x] %s in SysMan code; sending PP_FINISH to all PEs ...\n",
+ mytid,(n!=0)?"FAILURE":"FINISH");
+ broadcastFinish();
+ //broadcastFinish();
+ pvm_exit();
+ exit(n);
+}
+
+//@node Index, , Auxiliary fcts, GUM System Manager Program
+//@subsection Index
+
+//@index
+//* main:: @cindex\s-+main
+//* message handling loop:: @cindex\s-+message handling loop
+//* stgMallocBytes:: @cindex\s-+stgMallocBytes
+//* stg_exit:: @cindex\s-+stg_exit
+//@end index
diff --git a/rts/posix/GetTime.c b/rts/posix/GetTime.c
new file mode 100644
index 0000000000..3a0764cb91
--- /dev/null
+++ b/rts/posix/GetTime.c
@@ -0,0 +1,141 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 2005
+ *
+ * Machine-dependent time measurement functions
+ *
+ * ---------------------------------------------------------------------------*/
+
+// Not POSIX, due to use of ru_majflt in getPageFaults()
+// #include "PosixSource.h"
+
+#include "Rts.h"
+#include "GetTime.h"
+
+#ifdef HAVE_TIME_H
+# include <time.h>
+#endif
+
+#ifdef HAVE_SYS_TIME_H
+# include <sys/time.h>
+#endif
+
+#if HAVE_SYS_RESOURCE_H
+# include <sys/resource.h>
+#endif
+
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
+
+#ifdef HAVE_SYS_TIMES_H
+# include <sys/times.h>
+#endif
+
+#if ! ((defined(HAVE_GETRUSAGE) && !irix_HOST_OS) || defined(HAVE_TIMES))
+#error No implementation for getProcessCPUTime() available.
+#endif
+
+#if defined(HAVE_GETTIMEOFDAY) && defined(HAVE_GETRUSAGE) && !irix_HOST_OS
+// we'll implement getProcessCPUTime() and getProcessElapsedTime()
+// separately, using getrusage() and gettimeofday() respectively
+
+Ticks getProcessCPUTime(void)
+{
+ struct rusage t;
+ getrusage(RUSAGE_SELF, &t);
+ return (t.ru_utime.tv_sec * TICKS_PER_SECOND +
+ ((Ticks)t.ru_utime.tv_usec * TICKS_PER_SECOND)/1000000);
+}
+
+Ticks getProcessElapsedTime(void)
+{
+ struct timeval tv;
+ gettimeofday(&tv, (struct timezone *) NULL);
+ return (tv.tv_sec * TICKS_PER_SECOND +
+ ((Ticks)tv.tv_usec * TICKS_PER_SECOND)/1000000);
+}
+
+void getProcessTimes(Ticks *user, Ticks *elapsed)
+{
+ *user = getProcessCPUTime();
+ *elapsed = getProcessElapsedTime();
+}
+
+#elif defined(HAVE_TIMES)
+
+// we'll use the old times() API.
+
+Ticks getProcessCPUTime(void)
+{
+ Ticks user, elapsed;
+ getProcessTimes(&user,&elapsed);
+ return user;
+}
+
+Ticks getProcessElapsedTime(void)
+{
+ Ticks user, elapsed;
+ getProcessTimes(&user,&elapsed);
+ return elapsed;
+}
+
+void getProcessTimes(Ticks *user, Ticks *elapsed)
+{
+ static nat ClockFreq = 0;
+
+ if (ClockFreq == 0) {
+#if defined(HAVE_SYSCONF)
+ long ticks;
+ ticks = sysconf(_SC_CLK_TCK);
+ if ( ticks == -1 ) {
+ errorBelch("sysconf\n");
+ stg_exit(EXIT_FAILURE);
+ }
+ ClockFreq = ticks;
+#elif defined(CLK_TCK) /* defined by POSIX */
+ ClockFreq = CLK_TCK;
+#elif defined(HZ)
+ ClockFreq = HZ;
+#elif defined(CLOCKS_PER_SEC)
+ ClockFreq = CLOCKS_PER_SEC;
+#else
+ errorBelch("can't get clock resolution");
+ stg_exit(EXIT_FAILURE);
+#endif
+ }
+
+ struct tms t;
+ clock_t r = times(&t);
+ *user = (((Ticks)t.tms_utime * TICKS_PER_SECOND) / ClockFreq);
+ *elapsed = (((Ticks)r * TICKS_PER_SECOND) / ClockFreq);
+}
+
+#endif // HAVE_TIMES
+
+Ticks getThreadCPUTime(void)
+{
+#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_THREAD_CPUTIME_ID)
+ // clock_gettime() gives us per-thread CPU time. It isn't
+ // reliable on Linux, but it's the best we have.
+ struct timespec ts;
+ clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
+ return (ts.tv_sec * TICKS_PER_SECOND +
+ ((Ticks)ts.tv_nsec * TICKS_PER_SECOND) / 1000000000);
+#else
+ return getProcessCPUTime();
+#endif
+}
+
+nat
+getPageFaults(void)
+{
+#if !defined(HAVE_GETRUSAGE) || irix_HOST_OS
+ return 0;
+#else
+ struct rusage t;
+ getrusage(RUSAGE_SELF, &t);
+ return(t.ru_majflt);
+#endif
+}
+
diff --git a/rts/posix/Itimer.c b/rts/posix/Itimer.c
new file mode 100644
index 0000000000..83ed84d6ef
--- /dev/null
+++ b/rts/posix/Itimer.c
@@ -0,0 +1,226 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1995-1999
+ *
+ * Interval timer for profiling and pre-emptive scheduling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+/*
+ * The interval timer is used for profiling and for context switching in the
+ * threaded build. Though POSIX 1003.1b includes a standard interface for
+ * such things, no one really seems to be implementing them yet. Even
+ * Solaris 2.3 only seems to provide support for @CLOCK_REAL@, whereas we're
+ * keen on getting access to @CLOCK_VIRTUAL@.
+ *
+ * Hence, we use the old-fashioned @setitimer@ that just about everyone seems
+ * to support. So much for standards.
+ */
+#include "Rts.h"
+#include "RtsFlags.h"
+#include "Timer.h"
+#include "Ticker.h"
+#include "posix/Itimer.h"
+#include "Proftimer.h"
+#include "Schedule.h"
+#include "posix/Select.h"
+
+/* As recommended in the autoconf manual */
+# ifdef TIME_WITH_SYS_TIME
+# include <sys/time.h>
+# include <time.h>
+# else
+# ifdef HAVE_SYS_TIME_H
+# include <sys/time.h>
+# else
+# include <time.h>
+# endif
+# endif
+
+#ifdef HAVE_SIGNAL_H
+# include <signal.h>
+#endif
+
+/* Major bogosity:
+ *
+ * In the threaded RTS, we can't set the virtual timer because the
+ * thread which has the virtual timer might be sitting waiting for a
+ * capability, and the virtual timer only ticks in CPU time.
+ *
+ * So, possible solutions:
+ *
+ * (1) tick in realtime. Not very good, because this ticker is used for
+ * profiling, and this will give us unreliable time profiling
+ * results. Furthermore, this requires picking a single OS thread
+ * to be the timekeeper, which is a bad idea because the thread in
+ * question might just be making a temporary call into Haskell land.
+ *
+ * (2) save/restore the virtual timer around excursions into STG land.
+ * Sounds great, but I tried it and the resolution of the virtual timer
+ * isn't good enough (on Linux) - most of our excursions fall
+ * within the timer's resolution and we never make any progress.
+ *
+ * (3) have a virtual timer in every OS thread. Might be reasonable,
+ * because most of the time there is only ever one of these
+ * threads running, so it approximates a single virtual timer.
+ * But still quite bogus (and I got crashes when I tried this).
+ *
+ * For now, we're using (1), but this needs a better solution. --SDM
+ */
+#ifdef THREADED_RTS
+#define ITIMER_FLAVOUR ITIMER_REAL
+#define ITIMER_SIGNAL SIGALRM
+#else
+#define ITIMER_FLAVOUR ITIMER_VIRTUAL
+#define ITIMER_SIGNAL SIGVTALRM
+#endif
+
+static
+int
+install_vtalrm_handler(TickProc handle_tick)
+{
+ struct sigaction action;
+
+ action.sa_handler = handle_tick;
+
+ sigemptyset(&action.sa_mask);
+
+#ifdef SA_RESTART
+ // specify SA_RESTART. One consequence if we don't do this is
+ // that readline gets confused by the -threaded RTS. It seems
+ // that if a SIGALRM handler is installed without SA_RESTART,
+ // readline installs its own SIGALRM signal handler (see
+ // readline's signals.c), and this somehow causes readline to go
+ // wrong when the input exceeds a single line (try it).
+ action.sa_flags = SA_RESTART;
+#else
+ action.sa_flags = 0;
+#endif
+
+ return sigaction(ITIMER_SIGNAL, &action, NULL);
+}
+
+int
+startTicker(nat ms, TickProc handle_tick)
+{
+# ifndef HAVE_SETITIMER
+ /* debugBelch("No virtual timer on this system\n"); */
+ return -1;
+# else
+ struct itimerval it;
+
+ install_vtalrm_handler(handle_tick);
+
+#if !defined(THREADED_RTS)
+ timestamp = getourtimeofday();
+#endif
+
+ it.it_value.tv_sec = ms / 1000;
+ it.it_value.tv_usec = 1000 * (ms - (1000 * it.it_value.tv_sec));
+ it.it_interval = it.it_value;
+ return (setitimer(ITIMER_FLAVOUR, &it, NULL));
+# endif
+}
+
+int
+stopTicker()
+{
+# ifndef HAVE_SETITIMER
+ /* debugBelch("No virtual timer on this system\n"); */
+ return -1;
+# else
+ struct itimerval it;
+
+ it.it_value.tv_sec = 0;
+ it.it_value.tv_usec = 0;
+ it.it_interval = it.it_value;
+ return (setitimer(ITIMER_FLAVOUR, &it, NULL));
+# endif
+}
+
+# if 0
+/* This is a potential POSIX version */
+int
+startTicker(nat ms)
+{
+ struct sigevent se;
+ struct itimerspec it;
+ timer_t tid;
+
+#if !defined(THREADED_RTS)
+ timestamp = getourtimeofday();
+#endif
+
+ se.sigev_notify = SIGEV_SIGNAL;
+ se.sigev_signo = ITIMER_SIGNAL;
+ se.sigev_value.sival_int = ITIMER_SIGNAL;
+ if (timer_create(CLOCK_VIRTUAL, &se, &tid)) {
+ barf("can't create virtual timer");
+ }
+ it.it_value.tv_sec = ms / 1000;
+ it.it_value.tv_nsec = 1000000 * (ms - 1000 * it.it_value.tv_sec);
+ it.it_interval = it.it_value;
+ return timer_settime(tid, TIMER_RELTIME, &it, NULL);
+}
+
+int
+stopTicker()
+{
+ struct sigevent se;
+ struct itimerspec it;
+ timer_t tid;
+
+#if !defined(THREADED_RTS)
+ timestamp = getourtimeofday();
+#endif
+
+ se.sigev_notify = SIGEV_SIGNAL;
+ se.sigev_signo = ITIMER_SIGNAL;
+ se.sigev_value.sival_int = ITIMER_SIGNAL;
+ if (timer_create(CLOCK_VIRTUAL, &se, &tid)) {
+ barf("can't create virtual timer");
+ }
+ it.it_value.tv_sec = 0;
+ it.it_value.tv_nsec = 0;
+ it.it_interval = it.it_value;
+ return timer_settime(tid, TIMER_RELTIME, &it, NULL);
+}
+# endif
+
+#if 0
+/* Currently unused */
+void
+block_vtalrm_signal(void)
+{
+ sigset_t signals;
+
+ sigemptyset(&signals);
+ sigaddset(&signals, ITIMER_SIGNAL);
+
+ (void) sigprocmask(SIG_BLOCK, &signals, NULL);
+}
+
+void
+unblock_vtalrm_signal(void)
+{
+ sigset_t signals;
+
+ sigemptyset(&signals);
+ sigaddset(&signals, ITIMER_SIGNAL);
+
+ (void) sigprocmask(SIG_UNBLOCK, &signals, NULL);
+}
+#endif
+
+/* gettimeofday() takes around 1us on our 500MHz PIII. Since we're
+ * only calling it 50 times/s, it shouldn't have any great impact.
+ */
+lnat
+getourtimeofday(void)
+{
+ struct timeval tv;
+ gettimeofday(&tv, (struct timezone *) NULL);
+ // cast to lnat because nat may be 64 bit when int is only 32 bit
+ return ((lnat)tv.tv_sec * TICK_FREQUENCY +
+ (lnat)tv.tv_usec * TICK_FREQUENCY / 1000000);
+}
diff --git a/rts/posix/Itimer.h b/rts/posix/Itimer.h
new file mode 100644
index 0000000000..09d01bde54
--- /dev/null
+++ b/rts/posix/Itimer.h
@@ -0,0 +1,19 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1998-2005
+ *
+ * Interval timer for profiling and pre-emptive scheduling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef ITIMER_H
+#define ITIMER_H
+
+extern lnat getourtimeofday ( void );
+#if 0
+/* unused */
+extern void block_vtalrm_signal ( void );
+extern void unblock_vtalrm_signal ( void );
+#endif
+
+#endif /* ITIMER_H */
diff --git a/rts/posix/OSThreads.c b/rts/posix/OSThreads.c
new file mode 100644
index 0000000000..07bd762130
--- /dev/null
+++ b/rts/posix/OSThreads.c
@@ -0,0 +1,166 @@
+/* ---------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2001-2005
+ *
+ * Accessing OS threads functionality in a (mostly) OS-independent
+ * manner.
+ *
+ * --------------------------------------------------------------------------*/
+
+#if defined(DEBUG) && defined(__linux__)
+/* We want GNU extensions in DEBUG mode for mutex error checking */
+#define _GNU_SOURCE
+#endif
+
+#include "Rts.h"
+#if defined(THREADED_RTS)
+#include "OSThreads.h"
+#include "RtsUtils.h"
+
+#if HAVE_STRING_H
+#include <string.h>
+#endif
+
+#if !defined(HAVE_PTHREAD_H)
+#error pthreads.h is required for the threaded RTS on Posix platforms
+#endif
+
+/*
+ * This (allegedly) OS threads independent layer was initially
+ * abstracted away from code that used Pthreads, so the functions
+ * provided here are mostly just wrappers to the Pthreads API.
+ *
+ */
+
+void
+initCondition( Condition* pCond )
+{
+ pthread_cond_init(pCond, NULL);
+ return;
+}
+
+void
+closeCondition( Condition* pCond )
+{
+ pthread_cond_destroy(pCond);
+ return;
+}
+
+rtsBool
+broadcastCondition ( Condition* pCond )
+{
+ return (pthread_cond_broadcast(pCond) == 0);
+}
+
+rtsBool
+signalCondition ( Condition* pCond )
+{
+ return (pthread_cond_signal(pCond) == 0);
+}
+
+rtsBool
+waitCondition ( Condition* pCond, Mutex* pMut )
+{
+ return (pthread_cond_wait(pCond,pMut) == 0);
+}
+
+void
+yieldThread()
+{
+ sched_yield();
+ return;
+}
+
+void
+shutdownThread()
+{
+ pthread_exit(NULL);
+}
+
+int
+createOSThread (OSThreadId* pId, OSThreadProc *startProc, void *param)
+{
+ int result = pthread_create(pId, NULL, (void *(*)(void *))startProc, param);
+ if(!result)
+ pthread_detach(*pId);
+ return result;
+}
+
+OSThreadId
+osThreadId()
+{
+ return pthread_self();
+}
+
+void
+initMutex(Mutex* pMut)
+{
+#if defined(DEBUG) && defined(linux_HOST_OS)
+ pthread_mutexattr_t attr;
+ pthread_mutexattr_init(&attr);
+ pthread_mutexattr_settype(&attr,PTHREAD_MUTEX_ERRORCHECK_NP);
+ pthread_mutex_init(pMut,&attr);
+#else
+ pthread_mutex_init(pMut,NULL);
+#endif
+ return;
+}
+
+void
+newThreadLocalKey (ThreadLocalKey *key)
+{
+ int r;
+ if ((r = pthread_key_create(key, NULL)) != 0) {
+ barf("newThreadLocalKey: %s", strerror(r));
+ }
+}
+
+void *
+getThreadLocalVar (ThreadLocalKey *key)
+{
+ return pthread_getspecific(*key);
+ // Note: a return value of NULL can indicate that either the key
+ // is not valid, or the key is valid and the data value has not
+ // yet been set. We need to use the latter case, so we cannot
+ // detect errors here.
+}
+
+void
+setThreadLocalVar (ThreadLocalKey *key, void *value)
+{
+ int r;
+ if ((r = pthread_setspecific(*key,value)) != 0) {
+ barf("setThreadLocalVar: %s", strerror(r));
+ }
+}
+
+static void *
+forkOS_createThreadWrapper ( void * entry )
+{
+ Capability *cap;
+ cap = rts_lock();
+ cap = rts_evalStableIO(cap, (HsStablePtr) entry, NULL);
+ rts_unlock(cap);
+ return NULL;
+}
+
+int
+forkOS_createThread ( HsStablePtr entry )
+{
+ pthread_t tid;
+ int result = pthread_create(&tid, NULL,
+ forkOS_createThreadWrapper, (void*)entry);
+ if(!result)
+ pthread_detach(tid);
+ return result;
+}
+
+#else /* !defined(THREADED_RTS) */
+
+int
+forkOS_createThread ( HsStablePtr entry STG_UNUSED )
+{
+ return -1;
+}
+
+#endif /* !defined(THREADED_RTS) */
diff --git a/rts/posix/Select.c b/rts/posix/Select.c
new file mode 100644
index 0000000000..e21ced03ab
--- /dev/null
+++ b/rts/posix/Select.c
@@ -0,0 +1,279 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1995-2002
+ *
+ * Support for concurrent non-blocking I/O and thread waiting.
+ *
+ * ---------------------------------------------------------------------------*/
+
+/* we're outside the realms of POSIX here... */
+/* #include "PosixSource.h" */
+
+#include "Rts.h"
+#include "Schedule.h"
+#include "RtsUtils.h"
+#include "RtsFlags.h"
+#include "Timer.h"
+#include "Itimer.h"
+#include "Signals.h"
+#include "Capability.h"
+#include "posix/Select.h"
+
+# ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+# endif
+
+# ifdef HAVE_SYS_TIME_H
+# include <sys/time.h>
+# endif
+
+#include <errno.h>
+#include <string.h>
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#if !defined(THREADED_RTS)
+/* last timestamp */
+lnat timestamp = 0;
+
+/*
+ * The threaded RTS uses an IO-manager thread in Haskell instead (see GHC.Conc)
+ */
+
+/* There's a clever trick here to avoid problems when the time wraps
+ * around. Since our maximum delay is smaller than 31 bits of ticks
+ * (it's actually 31 bits of microseconds), we can safely check
+ * whether a timer has expired even if our timer will wrap around
+ * before the target is reached, using the following formula:
+ *
+ * (int)((uint)current_time - (uint)target_time) < 0
+ *
+ * if this is true, then our time has expired.
+ * (idea due to Andy Gill).
+ */
+static rtsBool
+wakeUpSleepingThreads(lnat ticks)
+{
+ StgTSO *tso;
+ rtsBool flag = rtsFalse;
+
+ while (sleeping_queue != END_TSO_QUEUE &&
+ (int)(ticks - sleeping_queue->block_info.target) > 0) {
+ tso = sleeping_queue;
+ sleeping_queue = tso->link;
+ tso->why_blocked = NotBlocked;
+ tso->link = END_TSO_QUEUE;
+ IF_DEBUG(scheduler,debugBelch("Waking up sleeping thread %d\n", tso->id));
+ // MainCapability: this code is !THREADED_RTS
+ pushOnRunQueue(&MainCapability,tso);
+ flag = rtsTrue;
+ }
+ return flag;
+}
+
+/* Argument 'wait' says whether to wait for I/O to become available,
+ * or whether to just check and return immediately. If there are
+ * other threads ready to run, we normally do the non-waiting variety,
+ * otherwise we wait (see Schedule.c).
+ *
+ * SMP note: must be called with sched_mutex locked.
+ *
+ * Windows: select only works on sockets, so this doesn't really work,
+ * though it makes things better than before. MsgWaitForMultipleObjects
+ * should really be used, though it only seems to work for read handles,
+ * not write handles.
+ *
+ */
+void
+awaitEvent(rtsBool wait)
+{
+ StgTSO *tso, *prev, *next;
+ rtsBool ready;
+ fd_set rfd,wfd;
+ int numFound;
+ int maxfd = -1;
+ rtsBool select_succeeded = rtsTrue;
+ rtsBool unblock_all = rtsFalse;
+ struct timeval tv;
+ lnat min, ticks;
+
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+
+ IF_DEBUG(scheduler,
+ debugBelch("scheduler: checking for threads blocked on I/O");
+ if (wait) {
+ debugBelch(" (waiting)");
+ }
+ debugBelch("\n");
+ );
+
+ /* loop until we've woken up some threads. This loop is needed
+ * because the select timing isn't accurate, we sometimes sleep
+ * for a while but not long enough to wake up a thread in
+ * a threadDelay.
+ */
+ do {
+
+ ticks = timestamp = getourtimeofday();
+ if (wakeUpSleepingThreads(ticks)) {
+ return;
+ }
+
+ if (!wait) {
+ min = 0;
+ } else if (sleeping_queue != END_TSO_QUEUE) {
+ min = (sleeping_queue->block_info.target - ticks)
+ * TICK_MILLISECS * 1000;
+ } else {
+ min = 0x7ffffff;
+ }
+
+ /*
+ * Collect all of the fd's that we're interested in
+ */
+ FD_ZERO(&rfd);
+ FD_ZERO(&wfd);
+
+ for(tso = blocked_queue_hd; tso != END_TSO_QUEUE; tso = next) {
+ next = tso->link;
+
+ switch (tso->why_blocked) {
+ case BlockedOnRead:
+ {
+ int fd = tso->block_info.fd;
+ if (fd >= FD_SETSIZE) {
+ barf("awaitEvent: descriptor out of range");
+ }
+ maxfd = (fd > maxfd) ? fd : maxfd;
+ FD_SET(fd, &rfd);
+ continue;
+ }
+
+ case BlockedOnWrite:
+ {
+ int fd = tso->block_info.fd;
+ if (fd >= FD_SETSIZE) {
+ barf("awaitEvent: descriptor out of range");
+ }
+ maxfd = (fd > maxfd) ? fd : maxfd;
+ FD_SET(fd, &wfd);
+ continue;
+ }
+
+ default:
+ barf("AwaitEvent");
+ }
+ }
+
+ /* Check for any interesting events */
+
+ tv.tv_sec = min / 1000000;
+ tv.tv_usec = min % 1000000;
+
+ while ((numFound = select(maxfd+1, &rfd, &wfd, NULL, &tv)) < 0) {
+ if (errno != EINTR) {
+ /* Handle bad file descriptors by unblocking all the
+ waiting threads. Why? Because a thread might have been
+ a bit naughty and closed a file descriptor while another
+ was blocked waiting. This is less-than-good programming
+ practice, but having the RTS as a result fall over isn't
+ acceptable, so we simply unblock all the waiting threads
+ should we see a bad file descriptor & give the threads
+ a chance to clean up their act.
+
+ Note: assume here that threads becoming unblocked
+ will try to read/write the file descriptor before trying
+ to issue a threadWaitRead/threadWaitWrite again (==> an
+ IOError will result for the thread that's got the bad
+ file descriptor.) Hence, there's no danger of a bad
+ file descriptor being repeatedly select()'ed on, so
+ the RTS won't loop.
+ */
+ if ( errno == EBADF ) {
+ unblock_all = rtsTrue;
+ break;
+ } else {
+ perror("select");
+ barf("select failed");
+ }
+ }
+
+ /* We got a signal; could be one of ours. If so, we need
+ * to start up the signal handler straight away, otherwise
+ * we could block for a long time before the signal is
+ * serviced.
+ */
+#if defined(RTS_USER_SIGNALS)
+ if (signals_pending()) {
+ startSignalHandlers(&MainCapability);
+ return; /* still hold the lock */
+ }
+#endif
+
+ /* we were interrupted, return to the scheduler immediately.
+ */
+ if (sched_state >= SCHED_INTERRUPTING) {
+ return; /* still hold the lock */
+ }
+
+ /* check for threads that need waking up
+ */
+ wakeUpSleepingThreads(getourtimeofday());
+
+ /* If new runnable threads have arrived, stop waiting for
+ * I/O and run them.
+ */
+ if (!emptyRunQueue(&MainCapability)) {
+ return; /* still hold the lock */
+ }
+ }
+
+ /* Step through the waiting queue, unblocking every thread that now has
+ * a file descriptor in a ready state.
+ */
+
+ prev = NULL;
+ if (select_succeeded || unblock_all) {
+ for(tso = blocked_queue_hd; tso != END_TSO_QUEUE; tso = next) {
+ next = tso->link;
+ switch (tso->why_blocked) {
+ case BlockedOnRead:
+ ready = unblock_all || FD_ISSET(tso->block_info.fd, &rfd);
+ break;
+ case BlockedOnWrite:
+ ready = unblock_all || FD_ISSET(tso->block_info.fd, &wfd);
+ break;
+ default:
+ barf("awaitEvent");
+ }
+
+ if (ready) {
+ IF_DEBUG(scheduler,debugBelch("Waking up blocked thread %d\n", tso->id));
+ tso->why_blocked = NotBlocked;
+ tso->link = END_TSO_QUEUE;
+ pushOnRunQueue(&MainCapability,tso);
+ } else {
+ if (prev == NULL)
+ blocked_queue_hd = tso;
+ else
+ prev->link = tso;
+ prev = tso;
+ }
+ }
+
+ if (prev == NULL)
+ blocked_queue_hd = blocked_queue_tl = END_TSO_QUEUE;
+ else {
+ prev->link = END_TSO_QUEUE;
+ blocked_queue_tl = prev;
+ }
+ }
+
+ } while (wait && sched_state == SCHED_RUNNING
+ && emptyRunQueue(&MainCapability));
+}
+
+#endif /* THREADED_RTS */
diff --git a/rts/posix/Select.h b/rts/posix/Select.h
new file mode 100644
index 0000000000..8825562974
--- /dev/null
+++ b/rts/posix/Select.h
@@ -0,0 +1,26 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1998-2005
+ *
+ * Prototypes for functions in Select.c
+ *
+ * -------------------------------------------------------------------------*/
+
+#ifndef SELECT_H
+#define SELECT_H
+
+#if !defined(THREADED_RTS)
+/* In Select.c */
+extern lnat RTS_VAR(timestamp);
+
+/* awaitEvent(rtsBool wait)
+ *
+ * Checks for blocked threads that need to be woken.
+ *
+ * Called from STG : NO
+ * Locks assumed : sched_mutex
+ */
+void awaitEvent(rtsBool wait); /* In Select.c */
+#endif
+
+#endif /* SELECT_H */
diff --git a/rts/posix/Signals.c b/rts/posix/Signals.c
new file mode 100644
index 0000000000..5f5f77fd39
--- /dev/null
+++ b/rts/posix/Signals.c
@@ -0,0 +1,510 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Signal processing / handling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+/* This is non-Posix-compliant.
+ #include "PosixSource.h"
+*/
+#include "Rts.h"
+#include "SchedAPI.h"
+#include "Schedule.h"
+#include "RtsSignals.h"
+#include "posix/Signals.h"
+#include "RtsUtils.h"
+#include "RtsFlags.h"
+
+#ifdef alpha_HOST_ARCH
+# if defined(linux_HOST_OS)
+# include <asm/fpu.h>
+# else
+# include <machine/fpu.h>
+# endif
+#endif
+
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
+
+#ifdef HAVE_SIGNAL_H
+# include <signal.h>
+#endif
+
+#include <stdlib.h>
+
+/* This curious flag is provided for the benefit of the Haskell binding
+ * to POSIX.1 to control whether or not to include SA_NOCLDSTOP when
+ * installing a SIGCHLD handler.
+ */
+StgInt nocldstop = 0;
+
+/* -----------------------------------------------------------------------------
+ * The table of signal handlers
+ * -------------------------------------------------------------------------- */
+
+#if defined(RTS_USER_SIGNALS)
+
+/* SUP: The type of handlers is a little bit, well, doubtful... */
+StgInt *signal_handlers = NULL; /* Dynamically grown array of signal handlers */
+static StgInt nHandlers = 0; /* Size of handlers array */
+
+static nat n_haskell_handlers = 0;
+
+/* -----------------------------------------------------------------------------
+ * Allocate/resize the table of signal handlers.
+ * -------------------------------------------------------------------------- */
+
+static void
+more_handlers(I_ sig)
+{
+ StgInt i;
+
+ if (sig < nHandlers)
+ return;
+
+ if (signal_handlers == NULL)
+ signal_handlers = (StgInt *)stgMallocBytes((sig + 1) * sizeof(StgInt), "more_handlers");
+ else
+ signal_handlers = (StgInt *)stgReallocBytes(signal_handlers, (sig + 1) * sizeof(StgInt), "more_handlers");
+
+ for(i = nHandlers; i <= sig; i++)
+ // Fill in the new slots with default actions
+ signal_handlers[i] = STG_SIG_DFL;
+
+ nHandlers = sig + 1;
+}
+
+/* -----------------------------------------------------------------------------
+ * Pending Handlers
+ *
+ * The mechanism for starting handlers differs between the threaded
+ * (THREADED_RTS) and non-threaded versions of the RTS.
+ *
+ * When the RTS is single-threaded, we just write the pending signal
+ * handlers into a buffer, and start a thread for each one in the
+ * scheduler loop.
+ *
+ * When THREADED_RTS, the problem is that signals might be
+ * delivered to multiple threads, so we would need to synchronise
+ * access to pending_handler_buf somehow. Using thread
+ * synchronisation from a signal handler isn't possible in general
+ * (some OSs support it, eg. MacOS X, but not all). So instead:
+ *
+ * - the signal handler writes the signal number into the pipe
+ * managed by the IO manager thread (see GHC.Conc).
+ * - the IO manager picks up the signal number and calls
+ * startSignalHandler() to start the thread.
+ *
+ * This also has the nice property that we don't need to arrange to
+ * wake up a worker task to start the signal handler: the IO manager
+ * wakes up when we write into the pipe.
+ *
+ * -------------------------------------------------------------------------- */
+
+// Here's the pipe into which we will send our signals
+static int io_manager_pipe = -1;
+
+void
+setIOManagerPipe (int fd)
+{
+ // only called when THREADED_RTS, but unconditionally
+ // compiled here because GHC.Conc depends on it.
+ io_manager_pipe = fd;
+}
+
+#if !defined(THREADED_RTS)
+
+#define N_PENDING_HANDLERS 16
+
+StgPtr pending_handler_buf[N_PENDING_HANDLERS];
+StgPtr *next_pending_handler = pending_handler_buf;
+
+#endif /* THREADED_RTS */
+
+/* -----------------------------------------------------------------------------
+ * SIGCONT handler
+ *
+ * It seems that shells tend to put stdin back into blocking mode
+ * following a suspend/resume of the process. Here we arrange to put
+ * it back into non-blocking mode. We don't do anything to
+ * stdout/stderr because these handles don't get put into non-blocking
+ * mode at all - see the comments on stdout/stderr in PrelHandle.hsc.
+ * -------------------------------------------------------------------------- */
+
+static void
+cont_handler(int sig STG_UNUSED)
+{
+ setNonBlockingFd(0);
+}
+
+/* -----------------------------------------------------------------------------
+ * Low-level signal handler
+ *
+ * Places the requested handler on a stack of pending handlers to be
+ * started up at the next context switch.
+ * -------------------------------------------------------------------------- */
+
+static void
+generic_handler(int sig)
+{
+ sigset_t signals;
+
+#if defined(THREADED_RTS)
+
+ if (io_manager_pipe != -1)
+ {
+ // Write the signal number into the pipe as a single byte. We
+ // hope that signals fit into a byte...
+ StgWord8 csig = (StgWord8)sig;
+ write(io_manager_pipe, &csig, 1);
+ }
+ // If the IO manager hasn't told us what the FD of the write end
+ // of its pipe is, there's not much we can do here, so just ignore
+ // the signal..
+
+#else /* not THREADED_RTS */
+
+ /* Can't call allocate from here. Probably can't call malloc
+ either. However, we have to schedule a new thread somehow.
+
+ It's probably ok to request a context switch and allow the
+ scheduler to start the handler thread, but how do we
+ communicate this to the scheduler?
+
+ We need some kind of locking, but with low overhead (i.e. no
+ blocking signals every time around the scheduler).
+
+ Signal Handlers are atomic (i.e. they can't be interrupted), and
+ we can make use of this. We just need to make sure the
+ critical section of the scheduler can't be interrupted - the
+ only way to do this is to block signals. However, we can lower
+ the overhead by only blocking signals when there are any
+ handlers to run, i.e. the set of pending handlers is
+ non-empty.
+ */
+
+ /* We use a stack to store the pending signals. We can't
+ dynamically grow this since we can't allocate any memory from
+ within a signal handler.
+
+ Hence unfortunately we have to bomb out if the buffer
+ overflows. It might be acceptable to carry on in certain
+ circumstances, depending on the signal.
+ */
+
+ *next_pending_handler++ = deRefStablePtr((StgStablePtr)signal_handlers[sig]);
+
+ // stack full?
+ if (next_pending_handler == &pending_handler_buf[N_PENDING_HANDLERS]) {
+ errorBelch("too many pending signals");
+ stg_exit(EXIT_FAILURE);
+ }
+
+#endif /* THREADED_RTS */
+
+ // re-establish the signal handler, and carry on
+ sigemptyset(&signals);
+ sigaddset(&signals, sig);
+ sigprocmask(SIG_UNBLOCK, &signals, NULL);
+
+ // *always* do the SIGCONT handler, even if the user overrides it.
+ if (sig == SIGCONT) {
+ cont_handler(sig);
+ }
+
+ context_switch = 1;
+}
+
+/* -----------------------------------------------------------------------------
+ * Blocking/Unblocking of the user signals
+ * -------------------------------------------------------------------------- */
+
+static sigset_t userSignals;
+static sigset_t savedSignals;
+
+void
+initUserSignals(void)
+{
+ sigemptyset(&userSignals);
+}
+
+void
+blockUserSignals(void)
+{
+ sigprocmask(SIG_BLOCK, &userSignals, &savedSignals);
+}
+
+void
+unblockUserSignals(void)
+{
+ sigprocmask(SIG_SETMASK, &savedSignals, NULL);
+}
+
+rtsBool
+anyUserHandlers(void)
+{
+ return n_haskell_handlers != 0;
+}
+
+#if !defined(THREADED_RTS)
+void
+awaitUserSignals(void)
+{
+ while (!signals_pending() && sched_state == SCHED_RUNNING) {
+ pause();
+ }
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ * Install a Haskell signal handler.
+ * -------------------------------------------------------------------------- */
+
+int
+stg_sig_install(int sig, int spi, StgStablePtr *handler, void *mask)
+{
+ sigset_t signals, osignals;
+ struct sigaction action;
+ StgInt previous_spi;
+
+ // Block the signal until we figure out what to do
+ // Count on this to fail if the signal number is invalid
+ if (sig < 0 || sigemptyset(&signals) ||
+ sigaddset(&signals, sig) || sigprocmask(SIG_BLOCK, &signals, &osignals)) {
+ return STG_SIG_ERR;
+ }
+
+ more_handlers(sig);
+
+ previous_spi = signal_handlers[sig];
+
+ action.sa_flags = 0;
+
+ switch(spi) {
+ case STG_SIG_IGN:
+ signal_handlers[sig] = STG_SIG_IGN;
+ sigdelset(&userSignals, sig);
+ action.sa_handler = SIG_IGN;
+ break;
+
+ case STG_SIG_DFL:
+ signal_handlers[sig] = STG_SIG_DFL;
+ sigdelset(&userSignals, sig);
+ action.sa_handler = SIG_DFL;
+ break;
+
+ case STG_SIG_HAN:
+ case STG_SIG_RST:
+ signal_handlers[sig] = (StgInt)*handler;
+ sigaddset(&userSignals, sig);
+ action.sa_handler = generic_handler;
+ if (spi == STG_SIG_RST) {
+ action.sa_flags = SA_RESETHAND;
+ }
+ n_haskell_handlers++;
+ break;
+
+ default:
+ barf("stg_sig_install: bad spi");
+ }
+
+ if (mask != NULL)
+ action.sa_mask = *(sigset_t *)mask;
+ else
+ sigemptyset(&action.sa_mask);
+
+ action.sa_flags |= sig == SIGCHLD && nocldstop ? SA_NOCLDSTOP : 0;
+
+ if (sigaction(sig, &action, NULL) ||
+ sigprocmask(SIG_SETMASK, &osignals, NULL))
+ {
+ // need to return an error code, so avoid a stable pointer leak
+ // by freeing the previous handler if there was one.
+ if (previous_spi >= 0) {
+ freeStablePtr(stgCast(StgStablePtr,signal_handlers[sig]));
+ n_haskell_handlers--;
+ }
+ return STG_SIG_ERR;
+ }
+
+ if (previous_spi == STG_SIG_DFL || previous_spi == STG_SIG_IGN
+ || previous_spi == STG_SIG_ERR) {
+ return previous_spi;
+ } else {
+ *handler = (StgStablePtr)previous_spi;
+ return STG_SIG_HAN;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Creating new threads for signal handlers.
+ * -------------------------------------------------------------------------- */
+
+#if !defined(THREADED_RTS)
+void
+startSignalHandlers(Capability *cap)
+{
+ blockUserSignals();
+
+ while (next_pending_handler != pending_handler_buf) {
+
+ next_pending_handler--;
+
+ scheduleThread (cap,
+ createIOThread(cap,
+ RtsFlags.GcFlags.initialStkSize,
+ (StgClosure *) *next_pending_handler));
+ }
+
+ unblockUserSignals();
+}
+#endif
+
+/* ----------------------------------------------------------------------------
+ * Mark signal handlers during GC.
+ *
+ * We do this rather than trying to start all the signal handlers
+ * prior to GC, because that requires extra heap for the new threads.
+ * Signals must be blocked (see blockUserSignals() above) during GC to
+ * avoid race conditions.
+ * -------------------------------------------------------------------------- */
+
+#if !defined(THREADED_RTS)
+void
+markSignalHandlers (evac_fn evac)
+{
+ StgPtr *p;
+
+ p = next_pending_handler;
+ while (p != pending_handler_buf) {
+ p--;
+ evac((StgClosure **)p);
+ }
+}
+#else
+void
+markSignalHandlers (evac_fn evac STG_UNUSED)
+{
+}
+#endif
+
+#else /* !RTS_USER_SIGNALS */
+StgInt
+stg_sig_install(StgInt sig STG_UNUSED,
+ StgInt spi STG_UNUSED,
+ StgStablePtr* handler STG_UNUSED,
+ void* mask STG_UNUSED)
+{
+ //barf("User signals not supported");
+ return STG_SIG_DFL;
+}
+
+#endif
+
+#if defined(RTS_USER_SIGNALS)
+/* -----------------------------------------------------------------------------
+ * SIGINT handler.
+ *
+ * We like to shutdown nicely after receiving a SIGINT, write out the
+ * stats, write profiling info, close open files and flush buffers etc.
+ * -------------------------------------------------------------------------- */
+#ifdef SMP
+pthread_t startup_guy;
+#endif
+
+static void
+shutdown_handler(int sig STG_UNUSED)
+{
+#ifdef SMP
+ // if I'm a worker thread, send this signal to the guy who
+ // originally called startupHaskell(). Since we're handling
+ // the signal, it won't be a "send to all threads" type of signal
+ // (according to the POSIX threads spec).
+ if (pthread_self() != startup_guy) {
+ pthread_kill(startup_guy, sig);
+ return;
+ }
+#endif
+
+ // If we're already trying to interrupt the RTS, terminate with
+ // extreme prejudice. So the first ^C tries to exit the program
+ // cleanly, and the second one just kills it.
+ if (sched_state >= SCHED_INTERRUPTING) {
+ stg_exit(EXIT_INTERRUPTED);
+ } else {
+ interruptStgRts();
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Install default signal handlers.
+ *
+ * The RTS installs a default signal handler for catching
+ * SIGINT, so that we can perform an orderly shutdown.
+ *
+ * Haskell code may install their own SIGINT handler, which is
+ * fine, provided they're so kind as to put back the old one
+ * when they de-install.
+ *
+ * In addition to handling SIGINT, the RTS also handles SIGFPE
+ * by ignoring it. Apparently IEEE requires floating-point
+ * exceptions to be ignored by default, but alpha-dec-osf3
+ * doesn't seem to do so.
+ * -------------------------------------------------------------------------- */
+void
+initDefaultHandlers()
+{
+ struct sigaction action,oact;
+
+#ifdef SMP
+ startup_guy = pthread_self();
+#endif
+
+ // install the SIGINT handler
+ action.sa_handler = shutdown_handler;
+ sigemptyset(&action.sa_mask);
+ action.sa_flags = 0;
+ if (sigaction(SIGINT, &action, &oact) != 0) {
+ errorBelch("warning: failed to install SIGINT handler");
+ }
+
+#if defined(HAVE_SIGINTERRUPT)
+ siginterrupt(SIGINT, 1); // isn't this the default? --SDM
+#endif
+
+ // install the SIGCONT handler
+ action.sa_handler = cont_handler;
+ sigemptyset(&action.sa_mask);
+ action.sa_flags = 0;
+ if (sigaction(SIGCONT, &action, &oact) != 0) {
+ errorBelch("warning: failed to install SIGCONT handler");
+ }
+
+ // install the SIGFPE handler
+
+ // In addition to handling SIGINT, also handle SIGFPE by ignoring it.
+ // Apparently IEEE requires floating-point exceptions to be ignored by
+ // default, but alpha-dec-osf3 doesn't seem to do so.
+
+ // Commented out by SDM 2/7/2002: this causes an infinite loop on
+ // some architectures when an integer division by zero occurs: we
+ // don't recover from the floating point exception, and the
+ // program just generates another one immediately.
+#if 0
+ action.sa_handler = SIG_IGN;
+ sigemptyset(&action.sa_mask);
+ action.sa_flags = 0;
+ if (sigaction(SIGFPE, &action, &oact) != 0) {
+ errorBelch("warning: failed to install SIGFPE handler");
+ }
+#endif
+
+#ifdef alpha_HOST_ARCH
+ ieee_set_fp_control(0);
+#endif
+}
+
+#endif /* RTS_USER_SIGNALS */
diff --git a/rts/posix/Signals.h b/rts/posix/Signals.h
new file mode 100644
index 0000000000..39477f8c6a
--- /dev/null
+++ b/rts/posix/Signals.h
@@ -0,0 +1,26 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Signal processing / handling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef POSIX_SIGNALS_H
+#define POSIX_SIGNALS_H
+
+extern rtsBool anyUserHandlers(void);
+
+#if !defined(THREADED_RTS)
+
+extern StgPtr pending_handler_buf[];
+extern StgPtr *next_pending_handler;
+#define signals_pending() (next_pending_handler != pending_handler_buf)
+void startSignalHandlers(Capability *cap);
+
+#endif
+
+extern StgInt *signal_handlers;
+
+#endif /* POSIX_SIGNALS_H */
+
diff --git a/rts/win32/AsyncIO.c b/rts/win32/AsyncIO.c
new file mode 100644
index 0000000000..7bcf571cf8
--- /dev/null
+++ b/rts/win32/AsyncIO.c
@@ -0,0 +1,345 @@
+/* AsyncIO.c
+ *
+ * Integrating Win32 asynchronous I/O with the GHC RTS.
+ *
+ * (c) sof, 2002-2003.
+ */
+#include "Rts.h"
+#include "RtsUtils.h"
+#include <windows.h>
+#include <stdio.h>
+#include "Schedule.h"
+#include "RtsFlags.h"
+#include "Capability.h"
+#include "win32/AsyncIO.h"
+#include "win32/IOManager.h"
+
+/*
+ * Overview:
+ *
+ * Haskell code issue asynchronous I/O requests via the
+ * async{Read,Write,DoOp}# primops. These cause addIORequest()
+ * to be invoked, which forwards the request to the underlying
+ * asynchronous I/O subsystem. Each request is tagged with a unique
+ * ID.
+ *
+ * addIORequest() returns this ID, so that when the blocked CH
+ * thread is added onto blocked_queue, its TSO is annotated with
+ * it. Upon completion of an I/O request, the async I/O handling
+ * code makes a back-call to signal its completion; the local
+ * onIOComplete() routine. It adds the IO request ID (along with
+ * its result data) to a queue of completed requests before returning.
+ *
+ * The queue of completed IO request is read by the thread operating
+ * the RTS scheduler. It de-queues the CH threads corresponding
+ * to the request IDs, making them runnable again.
+ *
+ */
+
+typedef struct CompletedReq {
+ unsigned int reqID;
+ int len;
+ int errCode;
+} CompletedReq;
+
+#define MAX_REQUESTS 200
+
+static CRITICAL_SECTION queue_lock;
+static HANDLE completed_req_event;
+static HANDLE abandon_req_wait;
+static HANDLE wait_handles[2];
+static CompletedReq completedTable[MAX_REQUESTS];
+static int completed_hw;
+static HANDLE completed_table_sema;
+static int issued_reqs;
+
+static void
+onIOComplete(unsigned int reqID,
+ int fd STG_UNUSED,
+ int len,
+ void* buf STG_UNUSED,
+ int errCode)
+{
+ DWORD dwRes;
+ /* Deposit result of request in queue/table..when there's room. */
+ dwRes = WaitForSingleObject(completed_table_sema, INFINITE);
+ switch (dwRes) {
+ case WAIT_OBJECT_0:
+ break;
+ default:
+ /* Not likely */
+ fprintf(stderr, "onIOComplete: failed to grab table semaphore, dropping request 0x%x\n", reqID);
+ fflush(stderr);
+ return;
+ }
+ EnterCriticalSection(&queue_lock);
+ if (completed_hw == MAX_REQUESTS) {
+ /* Shouldn't happen */
+ fprintf(stderr, "onIOComplete: ERROR -- Request table overflow (%d); dropping.\n", reqID);
+ fflush(stderr);
+ } else {
+#if 0
+ fprintf(stderr, "onCompl: %d %d %d %d %d\n",
+ reqID, len, errCode, issued_reqs, completed_hw);
+ fflush(stderr);
+#endif
+ completedTable[completed_hw].reqID = reqID;
+ completedTable[completed_hw].len = len;
+ completedTable[completed_hw].errCode = errCode;
+ completed_hw++;
+ issued_reqs--;
+ if (completed_hw == 1) {
+ /* The event is used to wake up the scheduler thread should it
+ * be blocked waiting for requests to complete. The event resets once
+ * that thread has cleared out the request queue/table.
+ */
+ SetEvent(completed_req_event);
+ }
+ }
+ LeaveCriticalSection(&queue_lock);
+}
+
+unsigned int
+addIORequest(int fd,
+ int forWriting,
+ int isSock,
+ int len,
+ char* buf)
+{
+ EnterCriticalSection(&queue_lock);
+ issued_reqs++;
+ LeaveCriticalSection(&queue_lock);
+#if 0
+ fprintf(stderr, "addIOReq: %d %d %d\n", fd, forWriting, len); fflush(stderr);
+#endif
+ return AddIORequest(fd,forWriting,isSock,len,buf,onIOComplete);
+}
+
+unsigned int
+addDelayRequest(int msecs)
+{
+ EnterCriticalSection(&queue_lock);
+ issued_reqs++;
+ LeaveCriticalSection(&queue_lock);
+#if 0
+ fprintf(stderr, "addDelayReq: %d\n", msecs); fflush(stderr);
+#endif
+ return AddDelayRequest(msecs,onIOComplete);
+}
+
+unsigned int
+addDoProcRequest(void* proc, void* param)
+{
+ EnterCriticalSection(&queue_lock);
+ issued_reqs++;
+ LeaveCriticalSection(&queue_lock);
+#if 0
+ fprintf(stderr, "addProcReq: %p %p\n", proc, param); fflush(stderr);
+#endif
+ return AddProcRequest(proc,param,onIOComplete);
+}
+
+
+int
+startupAsyncIO()
+{
+ if (!StartIOManager()) {
+ return 0;
+ }
+ InitializeCriticalSection(&queue_lock);
+ /* Create a pair of events:
+ *
+ * - completed_req_event -- signals the deposit of request result; manual reset.
+ * - abandon_req_wait -- external OS thread tells current RTS/Scheduler
+ * thread to abandon wait for IO request completion.
+ * Auto reset.
+ */
+ completed_req_event = CreateEvent (NULL, TRUE, FALSE, NULL);
+ abandon_req_wait = CreateEvent (NULL, FALSE, FALSE, NULL);
+ wait_handles[0] = completed_req_event;
+ wait_handles[1] = abandon_req_wait;
+ completed_hw = 0;
+ if ( !(completed_table_sema = CreateSemaphore (NULL, MAX_REQUESTS, MAX_REQUESTS, NULL)) ) {
+ DWORD rc = GetLastError();
+ fprintf(stderr, "startupAsyncIO: CreateSemaphore failed 0x%x\n", rc);
+ fflush(stderr);
+ }
+
+ return ( completed_req_event != INVALID_HANDLE_VALUE &&
+ abandon_req_wait != INVALID_HANDLE_VALUE &&
+ completed_table_sema != NULL );
+}
+
+void
+shutdownAsyncIO()
+{
+ CloseHandle(completed_req_event);
+ ShutdownIOManager();
+}
+
+/*
+ * Function: awaitRequests(wait)
+ *
+ * Check for the completion of external IO work requests. Worker
+ * threads signal completion of IO requests by depositing them
+ * in a table (completedTable). awaitRequests() matches up
+ * requests in that table with threads on the blocked_queue,
+ * making the threads whose IO requests have completed runnable
+ * again.
+ *
+ * awaitRequests() is called by the scheduler periodically _or_ if
+ * it is out of work, and need to wait for the completion of IO
+ * requests to make further progress. In the latter scenario,
+ * awaitRequests() will simply block waiting for worker threads
+ * to complete if the 'completedTable' is empty.
+ */
+int
+awaitRequests(rtsBool wait)
+{
+#ifndef THREADED_RTS
+ // none of this is actually used in the threaded RTS
+
+start:
+#if 0
+ fprintf(stderr, "awaitRequests(): %d %d %d\n", issued_reqs, completed_hw, wait);
+ fflush(stderr);
+#endif
+ EnterCriticalSection(&queue_lock);
+ /* Nothing immediately available & we won't wait */
+ if ((!wait && completed_hw == 0)
+#if 0
+ // If we just return when wait==rtsFalse, we'll go into a busy
+ // wait loop, so I disabled this condition --SDM 18/12/2003
+ (issued_reqs == 0 && completed_hw == 0)
+#endif
+ ) {
+ LeaveCriticalSection(&queue_lock);
+ return 0;
+ }
+ if (completed_hw == 0) {
+ /* empty table, drop lock and wait */
+ LeaveCriticalSection(&queue_lock);
+ if ( wait && sched_state == SCHED_RUNNING ) {
+ DWORD dwRes = WaitForMultipleObjects(2, wait_handles, FALSE, INFINITE);
+ switch (dwRes) {
+ case WAIT_OBJECT_0:
+ /* a request was completed */
+ break;
+ case WAIT_OBJECT_0 + 1:
+ case WAIT_TIMEOUT:
+ /* timeout (unlikely) or told to abandon waiting */
+ return 0;
+ case WAIT_FAILED: {
+ DWORD dw = GetLastError();
+ fprintf(stderr, "awaitRequests: wait failed -- error code: %lu\n", dw); fflush(stderr);
+ return 0;
+ }
+ default:
+ fprintf(stderr, "awaitRequests: unexpected wait return code %lu\n", dwRes); fflush(stderr);
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+ goto start;
+ } else {
+ int i;
+ StgTSO *tso, *prev;
+
+ for (i=0; i < completed_hw; i++) {
+ /* For each of the completed requests, match up their Ids
+ * with those of the threads on the blocked_queue. If the
+ * thread that made the IO request has been subsequently
+ * killed (and removed from blocked_queue), no match will
+ * be found for that request Id.
+ *
+ * i.e., killing a Haskell thread doesn't attempt to cancel
+ * the IO request it is blocked on.
+ *
+ */
+ unsigned int rID = completedTable[i].reqID;
+
+ prev = NULL;
+ for(tso = blocked_queue_hd ; tso != END_TSO_QUEUE; prev = tso, tso = tso->link) {
+
+ switch(tso->why_blocked) {
+ case BlockedOnRead:
+ case BlockedOnWrite:
+ case BlockedOnDoProc:
+ if (tso->block_info.async_result->reqID == rID) {
+ /* Found the thread blocked waiting on request; stodgily fill
+ * in its result block.
+ */
+ tso->block_info.async_result->len = completedTable[i].len;
+ tso->block_info.async_result->errCode = completedTable[i].errCode;
+
+ /* Drop the matched TSO from blocked_queue */
+ if (prev) {
+ prev->link = tso->link;
+ } else {
+ blocked_queue_hd = tso->link;
+ }
+ if (blocked_queue_tl == tso) {
+ blocked_queue_tl = prev ? prev : END_TSO_QUEUE;
+ }
+
+ /* Terminates the run queue + this inner for-loop. */
+ tso->link = END_TSO_QUEUE;
+ tso->why_blocked = NotBlocked;
+ pushOnRunQueue(&MainCapability, tso);
+ break;
+ }
+ break;
+ default:
+ if (tso->why_blocked != NotBlocked) {
+ barf("awaitRequests: odd thread state");
+ }
+ break;
+ }
+ }
+ /* Signal that there's completed table slots available */
+ if ( !ReleaseSemaphore(completed_table_sema, 1, NULL) ) {
+ DWORD dw = GetLastError();
+ fprintf(stderr, "awaitRequests: failed to signal semaphore (error code=0x%x)\n", dw);
+ fflush(stderr);
+ }
+ }
+ completed_hw = 0;
+ ResetEvent(completed_req_event);
+ LeaveCriticalSection(&queue_lock);
+ return 1;
+ }
+#endif /* !THREADED_RTS */
+}
+
+/*
+ * Function: abandonRequestWait()
+ *
+ * Wake up a thread that's blocked waiting for new IO requests
+ * to complete (via awaitRequests().)
+ */
+void
+abandonRequestWait( void )
+{
+ /* the event is auto-reset, but in case there's no thread
+ * already waiting on the event, we want to return it to
+ * a non-signalled state.
+ *
+ * Careful! There is no synchronisation between
+ * abandonRequestWait and awaitRequest, which means that
+ * abandonRequestWait might be called just before a thread
+ * goes into a wait, and we miss the abandon signal. So we
+ * must SetEvent() here rather than PulseEvent() to ensure
+ * that the event isn't lost. We can re-optimise by resetting
+ * the event somewhere safe if we know the event has been
+ * properly serviced (see resetAbandon() below). --SDM 18/12/2003
+ */
+ SetEvent(abandon_req_wait);
+}
+
+void
+resetAbandonRequestWait( void )
+{
+ ResetEvent(abandon_req_wait);
+}
+
diff --git a/rts/win32/AsyncIO.h b/rts/win32/AsyncIO.h
new file mode 100644
index 0000000000..2077ea0cf7
--- /dev/null
+++ b/rts/win32/AsyncIO.h
@@ -0,0 +1,25 @@
+/* AsyncIO.h
+ *
+ * Integrating Win32 asynchronous I/O with the GHC RTS.
+ *
+ * (c) sof, 2002-2003.
+ */
+#ifndef __ASYNCHIO_H__
+#define __ASYNCHIO_H__
+extern unsigned int
+addIORequest(int fd,
+ int forWriting,
+ int isSock,
+ int len,
+ char* buf);
+extern unsigned int addDelayRequest(int msecs);
+extern unsigned int addDoProcRequest(void* proc, void* param);
+extern int startupAsyncIO(void);
+extern void shutdownAsyncIO(void);
+
+extern int awaitRequests(rtsBool wait);
+
+extern void abandonRequestWait(void);
+extern void resetAbandonRequestWait(void);
+
+#endif /* __ASYNCHIO_H__ */
diff --git a/rts/win32/AwaitEvent.c b/rts/win32/AwaitEvent.c
new file mode 100644
index 0000000000..43e188fb34
--- /dev/null
+++ b/rts/win32/AwaitEvent.c
@@ -0,0 +1,51 @@
+#if !defined(THREADED_RTS) /* to the end */
+/*
+ * Wait/check for external events. Periodically, the
+ * Scheduler checks for the completion of external operations,
+ * like the expiration of timers, completion of I/O requests
+ * issued by Haskell threads.
+ *
+ * If the Scheduler is otherwise out of work, it'll block
+ * herein waiting for external events to occur.
+ *
+ * This file mirrors the select()-based functionality
+ * for POSIX / Unix platforms in rts/Select.c, but for
+ * Win32.
+ *
+ */
+#include "Rts.h"
+#include "Schedule.h"
+#include "AwaitEvent.h"
+#include <windows.h>
+#include "win32/AsyncIO.h"
+
+// Used to avoid calling abandonRequestWait() if we don't need to.
+// Protected by sched_mutex.
+static nat workerWaitingForRequests = 0;
+
+void
+awaitEvent(rtsBool wait)
+{
+ int ret;
+
+ do {
+ /* Try to de-queue completed IO requests
+ */
+ workerWaitingForRequests = 1;
+ ret = awaitRequests(wait);
+ workerWaitingForRequests = 0;
+ if (!ret) {
+ return; /* still hold the lock */
+ }
+
+ // Return to the scheduler if:
+ //
+ // - we were interrupted
+ // - new threads have arrived
+
+ } while (wait
+ && sched_state == SCHED_RUNNING
+ && emptyRunQueue(&MainCapability)
+ );
+}
+#endif
diff --git a/rts/win32/ConsoleHandler.c b/rts/win32/ConsoleHandler.c
new file mode 100644
index 0000000000..d7096db632
--- /dev/null
+++ b/rts/win32/ConsoleHandler.c
@@ -0,0 +1,313 @@
+/*
+ * Console control handler support.
+ *
+ */
+#include "Rts.h"
+#include <windows.h>
+#include "ConsoleHandler.h"
+#include "SchedAPI.h"
+#include "Schedule.h"
+#include "RtsUtils.h"
+#include "RtsFlags.h"
+#include "AsyncIO.h"
+#include "RtsSignals.h"
+
+extern int stg_InstallConsoleEvent(int action, StgStablePtr *handler);
+
+static BOOL WINAPI shutdown_handler(DWORD dwCtrlType);
+static BOOL WINAPI generic_handler(DWORD dwCtrlType);
+
+static rtsBool deliver_event = rtsTrue;
+static StgInt console_handler = STG_SIG_DFL;
+
+static HANDLE hConsoleEvent = INVALID_HANDLE_VALUE;
+
+#define N_PENDING_EVENTS 16
+StgInt stg_pending_events = 0; /* number of undelivered events */
+DWORD stg_pending_buf[N_PENDING_EVENTS]; /* their associated event numbers. */
+
+/*
+ * Function: initUserSignals()
+ *
+ * Initialize the console handling substrate.
+ */
+void
+initUserSignals(void)
+{
+ stg_pending_events = 0;
+ console_handler = STG_SIG_DFL;
+ if (hConsoleEvent == INVALID_HANDLE_VALUE) {
+ hConsoleEvent =
+ CreateEvent ( NULL, /* default security attributes */
+ TRUE, /* manual-reset event */
+ FALSE, /* initially non-signalled */
+ NULL); /* no name */
+ }
+ return;
+}
+
+/*
+ * Function: shutdown_handler()
+ *
+ * Local function that performs the default handling of Ctrl+C kind
+ * events; gently shutting down the RTS
+ *
+ * To repeat Signals.c remark -- user code may choose to override the
+ * default handler. Which is fine, assuming they put back the default
+ * handler when/if they de-install the custom handler.
+ *
+ */
+static BOOL WINAPI shutdown_handler(DWORD dwCtrlType)
+{
+ switch (dwCtrlType) {
+
+ case CTRL_CLOSE_EVENT:
+ /* see generic_handler() comment re: this event */
+ return FALSE;
+ case CTRL_C_EVENT:
+ case CTRL_BREAK_EVENT:
+
+ // If we're already trying to interrupt the RTS, terminate with
+ // extreme prejudice. So the first ^C tries to exit the program
+ // cleanly, and the second one just kills it.
+ if (sched_state >= SCHED_INTERRUPTING) {
+ stg_exit(EXIT_INTERRUPTED);
+ } else {
+ interruptStgRts();
+ /* Cheesy pulsing of an event to wake up a waiting RTS thread, if any */
+ abandonRequestWait();
+ resetAbandonRequestWait();
+ }
+ return TRUE;
+
+ /* shutdown + logoff events are not handled here. */
+ default:
+ return FALSE;
+ }
+}
+
+
+/*
+ * Function: initDefaultHandlers()
+ *
+ * Install any default signal/console handlers. Currently we install a
+ * Ctrl+C handler that shuts down the RTS in an orderly manner.
+ */
+void initDefaultHandlers(void)
+{
+ if ( !SetConsoleCtrlHandler(shutdown_handler, TRUE) ) {
+ errorBelch("warning: failed to install default console handler");
+ }
+}
+
+
+/*
+ * Function: blockUserSignals()
+ *
+ * Temporarily block the delivery of further console events. Needed to
+ * avoid race conditions when GCing the stack of outstanding handlers or
+ * when emptying the stack by running the handlers.
+ *
+ */
+void
+blockUserSignals(void)
+{
+ deliver_event = rtsFalse;
+}
+
+
+/*
+ * Function: unblockUserSignals()
+ *
+ * The inverse of blockUserSignals(); re-enable the deliver of console events.
+ */
+void
+unblockUserSignals(void)
+{
+ deliver_event = rtsTrue;
+}
+
+
+/*
+ * Function: awaitUserSignals()
+ *
+ * Wait for the next console event. Currently a NOP (returns immediately.)
+ */
+void awaitUserSignals(void)
+{
+ return;
+}
+
+
+/*
+ * Function: startSignalHandlers()
+ *
+ * Run the handlers associated with the stacked up console events. Console
+ * event delivery is blocked for the duration of this call.
+ */
+void startSignalHandlers(Capability *cap)
+{
+ StgStablePtr handler;
+
+ if (console_handler < 0) {
+ return;
+ }
+
+ blockUserSignals();
+ ACQUIRE_LOCK(&sched_mutex);
+
+ handler = deRefStablePtr((StgStablePtr)console_handler);
+ while (stg_pending_events > 0) {
+ stg_pending_events--;
+ scheduleThread(cap,
+ createIOThread(cap,
+ RtsFlags.GcFlags.initialStkSize,
+ rts_apply(cap,
+ (StgClosure *)handler,
+ rts_mkInt(cap,
+ stg_pending_buf[stg_pending_events]))));
+ }
+
+ RELEASE_LOCK(&sched_mutex);
+ unblockUserSignals();
+}
+
+/*
+ * Function: markSignalHandlers()
+ *
+ * Evacuate the handler stack. _Assumes_ that console event delivery
+ * has already been blocked.
+ */
+void markSignalHandlers (evac_fn evac)
+{
+ if (console_handler >= 0) {
+ StgPtr p = deRefStablePtr((StgStablePtr)console_handler);
+ evac((StgClosure**)(void *)&p);
+ }
+}
+
+
+/*
+ * Function: generic_handler()
+ *
+ * Local function which handles incoming console event (done in a sep OS thread),
+ * recording the event in stg_pending_events.
+ */
+static BOOL WINAPI generic_handler(DWORD dwCtrlType)
+{
+ ACQUIRE_LOCK(&sched_mutex);
+
+ /* Ultra-simple -- up the counter + signal a switch. */
+ switch(dwCtrlType) {
+ case CTRL_CLOSE_EVENT:
+ /* Don't support the delivery of this event; if we
+ * indicate that we've handled it here and the Haskell handler
+ * doesn't take proper action (e.g., terminate the OS process),
+ * the user of the app will be unable to kill/close it. Not
+ * good, so disable the delivery for now.
+ */
+ return FALSE;
+ default:
+ if (!deliver_event) return TRUE;
+
+ if ( stg_pending_events < N_PENDING_EVENTS ) {
+ stg_pending_buf[stg_pending_events] = dwCtrlType;
+ stg_pending_events++;
+ }
+ /* Cheesy pulsing of an event to wake up a waiting RTS thread, if any */
+ abandonRequestWait();
+ resetAbandonRequestWait();
+ return TRUE;
+ }
+
+ RELEASE_LOCK(&sched_mutex);
+}
+
+
+/*
+ * Function: rts_InstallConsoleEvent()
+ *
+ * Install/remove a console event handler.
+ */
+int
+rts_InstallConsoleEvent(int action, StgStablePtr *handler)
+{
+ StgInt previous_hdlr = console_handler;
+
+ switch (action) {
+ case STG_SIG_IGN:
+ console_handler = STG_SIG_IGN;
+ if ( !SetConsoleCtrlHandler(NULL, TRUE) ) {
+ errorBelch("warning: unable to ignore console events");
+ }
+ break;
+ case STG_SIG_DFL:
+ console_handler = STG_SIG_IGN;
+ if ( !SetConsoleCtrlHandler(NULL, FALSE) ) {
+ errorBelch("warning: unable to restore default console event handling");
+ }
+ break;
+ case STG_SIG_HAN:
+ console_handler = (StgInt)*handler;
+ if ( previous_hdlr < 0 ) {
+ /* Only install generic_handler() once */
+ if ( !SetConsoleCtrlHandler(generic_handler, TRUE) ) {
+ errorBelch("warning: unable to install console event handler");
+ }
+ }
+ break;
+ }
+
+ if (previous_hdlr == STG_SIG_DFL ||
+ previous_hdlr == STG_SIG_IGN) {
+ return previous_hdlr;
+ } else {
+ *handler = (StgStablePtr)previous_hdlr;
+ return STG_SIG_HAN;
+ }
+}
+
+/*
+ * Function: rts_HandledConsoleEvent()
+ *
+ * Signal that a Haskell console event handler has completed its run.
+ * The explicit notification that a Haskell handler has completed is
+ * required to better handle the delivery of Ctrl-C/Break events whilst
+ * an async worker thread is handling a read request on stdin. The
+ * Win32 console implementation will abort such a read request when Ctrl-C
+ * is delivered. That leaves the worker thread in a bind: should it
+ * abandon the request (the Haskell thread reading from stdin has been
+ * thrown an exception to signal the delivery of Ctrl-C & hence have
+ * aborted the I/O request) or simply ignore the aborted read and retry?
+ * (the Haskell thread reading from stdin isn't concerned with the
+ * delivery and handling of Ctrl-C.) With both scenarios being
+ * possible, the worker thread needs to be told -- that is, did the
+ * console event handler cause the IO request to be abandoned?
+ *
+ */
+void
+rts_ConsoleHandlerDone(int ev)
+{
+ if ( (DWORD)ev == CTRL_BREAK_EVENT ||
+ (DWORD)ev == CTRL_C_EVENT ) {
+ /* only these two cause stdin system calls to abort.. */
+ SetEvent(hConsoleEvent); /* event is manual-reset */
+ Sleep(0); /* yield */
+ ResetEvent(hConsoleEvent); /* turn it back off again */
+ }
+}
+
+/*
+ * Function: rts_waitConsoleHandlerCompletion()
+ *
+ * Esoteric entry point used by worker thread that got woken
+ * up as part Ctrl-C delivery.
+ */
+int
+rts_waitConsoleHandlerCompletion()
+{
+ /* As long as the worker doesn't need to do a multiple wait,
+ * let's keep this HANDLE private to this 'module'.
+ */
+ return (WaitForSingleObject(hConsoleEvent, INFINITE) == WAIT_OBJECT_0);
+}
diff --git a/rts/win32/ConsoleHandler.h b/rts/win32/ConsoleHandler.h
new file mode 100644
index 0000000000..b09adf71cb
--- /dev/null
+++ b/rts/win32/ConsoleHandler.h
@@ -0,0 +1,63 @@
+/*
+ * Console control handler support.
+ *
+ */
+#ifndef __CONSOLEHANDLER_H__
+#define __CONSOLEHANDLER_H__
+
+/*
+ * Console control handlers lets an application handle Ctrl+C, Ctrl+Break etc.
+ * in Haskell under Win32. Akin to the Unix signal SIGINT.
+ *
+ * The API offered by ConsoleHandler.h is identical to that of the signal handling
+ * code (which isn't supported under win32.) Unsurprisingly, the underlying impl
+ * is derived from the signal handling code also.
+ */
+
+/*
+ * Function: signals_pending()
+ *
+ * Used by the RTS to check whether new signals have been 'recently' reported.
+ * If so, the RTS arranges for the delivered signals to be handled by
+ * de-queueing them from their table, running the associated Haskell
+ * signal handler.
+ */
+extern StgInt stg_pending_events;
+
+#define signals_pending() ( stg_pending_events > 0)
+
+/*
+ * Function: anyUserHandlers()
+ *
+ * Used by the Scheduler to decide whether its worth its while to stick
+ * around waiting for an external signal when there are no threads
+ * runnable. A console handler is used to handle termination events (Ctrl+C)
+ * and isn't considered a 'user handler'.
+ */
+#define anyUserHandlers() (rtsFalse)
+
+/*
+ * Function: startSignalHandlers()
+ *
+ * Run the handlers associated with the queued up console events. Console
+ * event delivery is blocked for the duration of this call.
+ */
+extern void startSignalHandlers(Capability *cap);
+
+/*
+ * Function: handleSignalsInThisThread()
+ *
+ * Have current (OS) thread assume responsibility of handling console events/signals.
+ * Currently not used (by the console event handling code.)
+ */
+extern void handleSignalsInThisThread(void);
+
+/*
+ * Function: rts_waitConsoleHandlerCompletion()
+ *
+ * Esoteric entry point used by worker thread that got woken
+ * up as part Ctrl-C delivery.
+ */
+extern int rts_waitConsoleHandlerCompletion(void);
+
+#endif /* __CONSOLEHANDLER_H__ */
diff --git a/rts/win32/GetTime.c b/rts/win32/GetTime.c
new file mode 100644
index 0000000000..584b994d53
--- /dev/null
+++ b/rts/win32/GetTime.c
@@ -0,0 +1,101 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 2005
+ *
+ * Machine-dependent time measurement functions
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Rts.h"
+#include "GetTime.h"
+
+#include <windows.h>
+
+#ifdef HAVE_TIME_H
+# include <time.h>
+#endif
+
+#define HNS_PER_SEC 10000000LL /* FILETIMES are in units of 100ns */
+/* Convert FILETIMEs into secs */
+
+static INLINE_ME Ticks
+fileTimeToTicks(FILETIME ft)
+{
+ Ticks t;
+ t = ((Ticks)ft.dwHighDateTime << 32) | ft.dwLowDateTime;
+ t = (t * TICKS_PER_SECOND) / HNS_PER_SEC;
+ return t;
+}
+
+static int is_win9x = -1;
+
+static INLINE_ME rtsBool
+isWin9x(void)
+{
+ if (is_win9x < 0) {
+ /* figure out whether we're on a Win9x box or not. */
+ OSVERSIONINFO oi;
+ BOOL b;
+
+ /* Need to init the size field first.*/
+ oi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
+ b = GetVersionEx(&oi);
+
+ is_win9x = ( (b && (oi.dwPlatformId & VER_PLATFORM_WIN32_WINDOWS)) ? 1 : 0);
+ }
+ return is_win9x;
+}
+
+
+void
+getProcessTimes(Ticks *user, Ticks *elapsed)
+{
+ *user = getProcessCPUTime();
+ *elapsed = getProcessElapsedTime();
+}
+
+Ticks
+getProcessCPUTime(void)
+{
+ FILETIME creationTime, exitTime, userTime, kernelTime = {0,0};
+
+ if (isWin9x()) return getProcessElapsedTime();
+
+ if (!GetProcessTimes(GetCurrentProcess(), &creationTime,
+ &exitTime, &kernelTime, &userTime)) {
+ return 0;
+ }
+
+ return fileTimeToTicks(userTime);
+}
+
+Ticks
+getProcessElapsedTime(void)
+{
+ FILETIME system_time;
+ GetSystemTimeAsFileTime(&system_time);
+ return fileTimeToTicks(system_time);
+}
+
+Ticks
+getThreadCPUTime(void)
+{
+ FILETIME creationTime, exitTime, userTime, kernelTime = {0,0};
+
+ if (isWin9x()) return getProcessCPUTime();
+
+ if (!GetThreadTimes(GetCurrentThread(), &creationTime,
+ &exitTime, &kernelTime, &userTime)) {
+ return 0;
+ }
+
+ return fileTimeToTicks(userTime);
+}
+
+nat
+getPageFaults(void)
+{
+ /* ToDo (on NT): better, get this via the performance data
+ that's stored in the registry. */
+ return 0;
+}
diff --git a/rts/win32/IOManager.c b/rts/win32/IOManager.c
new file mode 100644
index 0000000000..a67c3504c1
--- /dev/null
+++ b/rts/win32/IOManager.c
@@ -0,0 +1,510 @@
+/* IOManager.c
+ *
+ * Non-blocking / asynchronous I/O for Win32.
+ *
+ * (c) sof, 2002-2003.
+ */
+#include "Rts.h"
+#include "IOManager.h"
+#include "WorkQueue.h"
+#include "ConsoleHandler.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <io.h>
+#include <winsock.h>
+#include <process.h>
+
+/*
+ * Internal state maintained by the IO manager.
+ */
+typedef struct IOManagerState {
+ CritSection manLock;
+ WorkQueue* workQueue;
+ int queueSize;
+ int numWorkers;
+ int workersIdle;
+ HANDLE hExitEvent;
+ unsigned int requestID;
+ /* fields for keeping track of active WorkItems */
+ CritSection active_work_lock;
+ WorkItem* active_work_items;
+} IOManagerState;
+
+/* ToDo: wrap up this state via a IOManager handle instead? */
+static IOManagerState* ioMan;
+
+static void RegisterWorkItem ( IOManagerState* iom, WorkItem* wi);
+static void DeregisterWorkItem( IOManagerState* iom, WorkItem* wi);
+
+/*
+ * The routine executed by each worker thread.
+ */
+static
+unsigned
+WINAPI
+IOWorkerProc(PVOID param)
+{
+ HANDLE hWaits[2];
+ DWORD rc;
+ IOManagerState* iom = (IOManagerState*)param;
+ WorkQueue* pq = iom->workQueue;
+ WorkItem* work;
+ int len = 0, fd = 0;
+ DWORD errCode = 0;
+ void* complData;
+
+ hWaits[0] = (HANDLE)iom->hExitEvent;
+ hWaits[1] = GetWorkQueueHandle(pq);
+
+ while (1) {
+ /* The error code is communicated back on completion of request; reset. */
+ errCode = 0;
+
+ EnterCriticalSection(&iom->manLock);
+ /* Signal that the worker is idle.
+ *
+ * 'workersIdle' is used when determining whether or not to
+ * increase the worker thread pool when adding a new request.
+ * (see addIORequest().)
+ */
+ iom->workersIdle++;
+ LeaveCriticalSection(&iom->manLock);
+
+ /*
+ * A possible future refinement is to make long-term idle threads
+ * wake up and decide to shut down should the number of idle threads
+ * be above some threshold.
+ *
+ */
+ rc = WaitForMultipleObjects( 2, hWaits, FALSE, INFINITE );
+
+ if (rc == WAIT_OBJECT_0) {
+ // we received the exit event
+ return 0;
+ }
+
+ EnterCriticalSection(&iom->manLock);
+ /* Signal that the thread is 'non-idle' and about to consume
+ * a work item.
+ */
+ iom->workersIdle--;
+ iom->queueSize--;
+ LeaveCriticalSection(&iom->manLock);
+
+ if ( rc == (WAIT_OBJECT_0 + 1) ) {
+ /* work item available, fetch it. */
+ if (FetchWork(pq,(void**)&work)) {
+ work->abandonOp = 0;
+ RegisterWorkItem(iom,work);
+ if ( work->workKind & WORKER_READ ) {
+ if ( work->workKind & WORKER_FOR_SOCKET ) {
+ len = recv(work->workData.ioData.fd,
+ work->workData.ioData.buf,
+ work->workData.ioData.len,
+ 0);
+ if (len == SOCKET_ERROR) {
+ errCode = WSAGetLastError();
+ }
+ } else {
+ while (1) {
+ /* Do the read(), with extra-special handling for Ctrl+C */
+ len = read(work->workData.ioData.fd,
+ work->workData.ioData.buf,
+ work->workData.ioData.len);
+ if ( len == 0 && work->workData.ioData.len != 0 ) {
+ /* Given the following scenario:
+ * - a console handler has been registered that handles Ctrl+C
+ * events.
+ * - we've not tweaked the 'console mode' settings to turn on
+ * ENABLE_PROCESSED_INPUT.
+ * - we're blocked waiting on input from standard input.
+ * - the user hits Ctrl+C.
+ *
+ * The OS will invoke the console handler (in a separate OS thread),
+ * and the above read() (i.e., under the hood, a ReadFile() op) returns
+ * 0, with the error set to ERROR_OPERATION_ABORTED. We don't
+ * want to percolate this error condition back to the Haskell user.
+ * Do this by waiting for the completion of the Haskell console handler.
+ * If upon completion of the console handler routine, the Haskell thread
+ * that issued the request is found to have been thrown an exception,
+ * the worker abandons the request (since that's what the Haskell thread
+ * has done.) If the Haskell thread hasn't been interrupted, the worker
+ * retries the read request as if nothing happened.
+ */
+ if ( (GetLastError()) == ERROR_OPERATION_ABORTED ) {
+ /* For now, only abort when dealing with the standard input handle.
+ * i.e., for all others, an error is raised.
+ */
+ HANDLE h = (HANDLE)GetStdHandle(STD_INPUT_HANDLE);
+ if ( _get_osfhandle(work->workData.ioData.fd) == (long)h ) {
+ if (rts_waitConsoleHandlerCompletion()) {
+ /* If the Scheduler has set work->abandonOp, the Haskell thread has
+ * been thrown an exception (=> the worker must abandon this request.)
+ * We test for this below before invoking the on-completion routine.
+ */
+ if (work->abandonOp) {
+ break;
+ } else {
+ continue;
+ }
+ }
+ } else {
+ break; /* Treat it like an error */
+ }
+ } else {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+ if (len == -1) { errCode = errno; }
+ }
+ complData = work->workData.ioData.buf;
+ fd = work->workData.ioData.fd;
+ } else if ( work->workKind & WORKER_WRITE ) {
+ if ( work->workKind & WORKER_FOR_SOCKET ) {
+ len = send(work->workData.ioData.fd,
+ work->workData.ioData.buf,
+ work->workData.ioData.len,
+ 0);
+ if (len == SOCKET_ERROR) {
+ errCode = WSAGetLastError();
+ }
+ } else {
+ len = write(work->workData.ioData.fd,
+ work->workData.ioData.buf,
+ work->workData.ioData.len);
+ if (len == -1) { errCode = errno; }
+ }
+ complData = work->workData.ioData.buf;
+ fd = work->workData.ioData.fd;
+ } else if ( work->workKind & WORKER_DELAY ) {
+ /* Approximate implementation of threadDelay;
+ *
+ * Note: Sleep() is in milliseconds, not micros.
+ */
+ Sleep(work->workData.delayData.msecs / 1000);
+ len = work->workData.delayData.msecs;
+ complData = NULL;
+ fd = 0;
+ errCode = 0;
+ } else if ( work->workKind & WORKER_DO_PROC ) {
+ /* perform operation/proc on behalf of Haskell thread. */
+ if (work->workData.procData.proc) {
+ /* The procedure is assumed to encode result + success/failure
+ * via its param.
+ */
+ errCode=work->workData.procData.proc(work->workData.procData.param);
+ } else {
+ errCode=1;
+ }
+ complData = work->workData.procData.param;
+ } else {
+ fprintf(stderr, "unknown work request type (%d) , ignoring.\n", work->workKind);
+ fflush(stderr);
+ continue;
+ }
+ if (!work->abandonOp) {
+ work->onCompletion(work->requestID,
+ fd,
+ len,
+ complData,
+ errCode);
+ }
+ /* Free the WorkItem */
+ DeregisterWorkItem(iom,work);
+ free(work);
+ } else {
+ fprintf(stderr, "unable to fetch work; fatal.\n"); fflush(stderr);
+ return 1;
+ }
+ } else {
+ fprintf(stderr, "waiting failed (%lu); fatal.\n", rc); fflush(stderr);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static
+BOOL
+NewIOWorkerThread(IOManagerState* iom)
+{
+ unsigned threadId;
+ return ( 0 != _beginthreadex(NULL,
+ 0,
+ IOWorkerProc,
+ (LPVOID)iom,
+ 0,
+ &threadId) );
+}
+
+BOOL
+StartIOManager(void)
+{
+ HANDLE hExit;
+ WorkQueue* wq;
+
+ wq = NewWorkQueue();
+ if ( !wq ) return FALSE;
+
+ ioMan = (IOManagerState*)malloc(sizeof(IOManagerState));
+
+ if (!ioMan) {
+ FreeWorkQueue(wq);
+ return FALSE;
+ }
+
+ /* A manual-reset event */
+ hExit = CreateEvent ( NULL, TRUE, FALSE, NULL );
+ if ( !hExit ) {
+ FreeWorkQueue(wq);
+ free(ioMan);
+ return FALSE;
+ }
+
+ ioMan->hExitEvent = hExit;
+ InitializeCriticalSection(&ioMan->manLock);
+ ioMan->workQueue = wq;
+ ioMan->numWorkers = 0;
+ ioMan->workersIdle = 0;
+ ioMan->queueSize = 0;
+ ioMan->requestID = 1;
+ InitializeCriticalSection(&ioMan->active_work_lock);
+ ioMan->active_work_items = NULL;
+
+ return TRUE;
+}
+
+/*
+ * Function: depositWorkItem()
+ *
+ * Local function which deposits a WorkItem onto a work queue,
+ * deciding in the process whether or not the thread pool needs
+ * to be augmented with another thread to handle the new request.
+ *
+ */
+static
+int
+depositWorkItem( unsigned int reqID,
+ WorkItem* wItem )
+{
+ EnterCriticalSection(&ioMan->manLock);
+
+#if 0
+ fprintf(stderr, "depositWorkItem: %d/%d\n", ioMan->workersIdle, ioMan->numWorkers);
+ fflush(stderr);
+#endif
+ /* A new worker thread is created when there are fewer idle threads
+ * than non-consumed queue requests. This ensures that requests will
+ * be dealt with in a timely manner.
+ *
+ * [Long explanation of why the previous thread pool policy lead to
+ * trouble]
+ *
+ * Previously, the thread pool was augmented iff no idle worker threads
+ * were available. That strategy runs the risk of repeatedly adding to
+ * the request queue without expanding the thread pool to handle this
+ * sudden spike in queued requests.
+ * [How? Assume workersIdle is 1, and addIORequest() is called. No new
+ * thread is created and the request is simply queued. If addIORequest()
+ * is called again _before the OS schedules a worker thread to pull the
+ * request off the queue_, workersIdle is still 1 and another request is
+ * simply added to the queue. Once the worker thread is run, only one
+ * request is de-queued, leaving the 2nd request in the queue]
+ *
+ * Assuming none of the queued requests take an inordinate amount of to
+ * complete, the request queue would eventually be drained. But if that's
+ * not the case, the later requests will end up languishing in the queue
+ * indefinitely. The non-timely handling of requests may cause CH applications
+ * to misbehave / hang; bad.
+ *
+ */
+ ioMan->queueSize++;
+ if ( (ioMan->workersIdle < ioMan->queueSize) ) {
+ /* see if giving up our quantum ferrets out some idle threads.
+ */
+ LeaveCriticalSection(&ioMan->manLock);
+ Sleep(0);
+ EnterCriticalSection(&ioMan->manLock);
+ if ( (ioMan->workersIdle < ioMan->queueSize) ) {
+ /* No, go ahead and create another. */
+ ioMan->numWorkers++;
+ LeaveCriticalSection(&ioMan->manLock);
+ NewIOWorkerThread(ioMan);
+ } else {
+ LeaveCriticalSection(&ioMan->manLock);
+ }
+ } else {
+ LeaveCriticalSection(&ioMan->manLock);
+ }
+
+ if (SubmitWork(ioMan->workQueue,wItem)) {
+ /* Note: the work item has potentially been consumed by a worker thread
+ * (and freed) at this point, so we cannot use wItem's requestID.
+ */
+ return reqID;
+ } else {
+ return 0;
+ }
+}
+
+/*
+ * Function: AddIORequest()
+ *
+ * Conduit to underlying WorkQueue's SubmitWork(); adds IO
+ * request to work queue, deciding whether or not to augment
+ * the thread pool in the process.
+ */
+int
+AddIORequest ( int fd,
+ BOOL forWriting,
+ BOOL isSocket,
+ int len,
+ char* buffer,
+ CompletionProc onCompletion)
+{
+ WorkItem* wItem = (WorkItem*)malloc(sizeof(WorkItem));
+ unsigned int reqID = ioMan->requestID++;
+ if (!ioMan || !wItem) return 0;
+
+ /* Fill in the blanks */
+ wItem->workKind = ( isSocket ? WORKER_FOR_SOCKET : 0 ) |
+ ( forWriting ? WORKER_WRITE : WORKER_READ );
+ wItem->workData.ioData.fd = fd;
+ wItem->workData.ioData.len = len;
+ wItem->workData.ioData.buf = buffer;
+ wItem->link = NULL;
+
+ wItem->onCompletion = onCompletion;
+ wItem->requestID = reqID;
+
+ return depositWorkItem(reqID, wItem);
+}
+
+/*
+ * Function: AddDelayRequest()
+ *
+ * Like AddIORequest(), but this time adding a delay request to
+ * the request queue.
+ */
+BOOL
+AddDelayRequest ( unsigned int msecs,
+ CompletionProc onCompletion)
+{
+ WorkItem* wItem = (WorkItem*)malloc(sizeof(WorkItem));
+ unsigned int reqID = ioMan->requestID++;
+ if (!ioMan || !wItem) return FALSE;
+
+ /* Fill in the blanks */
+ wItem->workKind = WORKER_DELAY;
+ wItem->workData.delayData.msecs = msecs;
+ wItem->onCompletion = onCompletion;
+ wItem->requestID = reqID;
+ wItem->link = NULL;
+
+ return depositWorkItem(reqID, wItem);
+}
+
+/*
+ * Function: AddProcRequest()
+ *
+ * Add an asynchronous procedure request.
+ */
+BOOL
+AddProcRequest ( void* proc,
+ void* param,
+ CompletionProc onCompletion)
+{
+ WorkItem* wItem = (WorkItem*)malloc(sizeof(WorkItem));
+ unsigned int reqID = ioMan->requestID++;
+ if (!ioMan || !wItem) return FALSE;
+
+ /* Fill in the blanks */
+ wItem->workKind = WORKER_DO_PROC;
+ wItem->workData.procData.proc = proc;
+ wItem->workData.procData.param = param;
+ wItem->onCompletion = onCompletion;
+ wItem->requestID = reqID;
+ wItem->abandonOp = 0;
+ wItem->link = NULL;
+
+ return depositWorkItem(reqID, wItem);
+}
+
+void ShutdownIOManager ( void )
+{
+ SetEvent(ioMan->hExitEvent);
+ // ToDo: we can't free this now, because the worker thread(s)
+ // haven't necessarily finished with it yet. Perhaps it should
+ // have a reference count or something.
+ // free(ioMan);
+ // ioMan = NULL;
+}
+
+/* Keep track of WorkItems currently being serviced. */
+static
+void
+RegisterWorkItem(IOManagerState* ioMan,
+ WorkItem* wi)
+{
+ EnterCriticalSection(&ioMan->active_work_lock);
+ wi->link = ioMan->active_work_items;
+ ioMan->active_work_items = wi;
+ LeaveCriticalSection(&ioMan->active_work_lock);
+}
+
+static
+void
+DeregisterWorkItem(IOManagerState* ioMan,
+ WorkItem* wi)
+{
+ WorkItem *ptr, *prev;
+
+ EnterCriticalSection(&ioMan->active_work_lock);
+ for(prev=NULL,ptr=ioMan->active_work_items;ptr;prev=ptr,ptr=ptr->link) {
+ if (wi->requestID == ptr->requestID) {
+ if (prev==NULL) {
+ ioMan->active_work_items = ptr->link;
+ } else {
+ prev->link = ptr->link;
+ }
+ LeaveCriticalSection(&ioMan->active_work_lock);
+ return;
+ }
+ }
+ fprintf(stderr, "DeregisterWorkItem: unable to locate work item %d\n", wi->requestID);
+ LeaveCriticalSection(&ioMan->active_work_lock);
+}
+
+
+/*
+ * Function: abandonWorkRequest()
+ *
+ * Signal that a work request isn't of interest. Called by the Scheduler
+ * if a blocked Haskell thread has an exception thrown to it.
+ *
+ * Note: we're not aborting the system call that a worker might be blocked on
+ * here, just disabling the propagation of its result once its finished. We
+ * may have to go the whole hog here and switch to overlapped I/O so that we
+ * can abort blocked system calls.
+ */
+void
+abandonWorkRequest ( int reqID )
+{
+ WorkItem *ptr;
+ EnterCriticalSection(&ioMan->active_work_lock);
+ for(ptr=ioMan->active_work_items;ptr;ptr=ptr->link) {
+ if (ptr->requestID == (unsigned int)reqID ) {
+ ptr->abandonOp = 1;
+ LeaveCriticalSection(&ioMan->active_work_lock);
+ return;
+ }
+ }
+ /* Note: if the request ID isn't present, the worker will have
+ * finished sometime since awaitRequests() last drained the completed
+ * request table; i.e., not an error.
+ */
+ LeaveCriticalSection(&ioMan->active_work_lock);
+}
diff --git a/rts/win32/IOManager.h b/rts/win32/IOManager.h
new file mode 100644
index 0000000000..4893e2387c
--- /dev/null
+++ b/rts/win32/IOManager.h
@@ -0,0 +1,110 @@
+/* IOManager.h
+ *
+ * Non-blocking / asynchronous I/O for Win32.
+ *
+ * (c) sof, 2002-2003
+ */
+#ifndef __IOMANAGER_H__
+#define __IOMANAGER_H__
+/* On the yucky side..suppress -Wmissing-declarations warnings when
+ * including <windows.h>
+ */
+extern void* GetCurrentFiber ( void );
+extern void* GetFiberData ( void );
+#include <windows.h>
+
+/*
+ The IOManager subsystem provides a non-blocking view
+ of I/O operations. It lets one (or more) OS thread(s)
+ issue multiple I/O requests, which the IOManager then
+ handles independently of/concurrent to the thread(s)
+ that issued the request. Upon completion, the issuing
+ thread can inspect the result of the I/O operation &
+ take appropriate action.
+
+ The IOManager is intended used with the GHC RTS to
+ implement non-blocking I/O in Concurrent Haskell.
+ */
+
+/*
+ * Our WorkQueue holds WorkItems, encoding IO and
+ * delay requests.
+ *
+ */
+typedef void (*CompletionProc)(unsigned int requestID,
+ int fd,
+ int len,
+ void* buf,
+ int errCode);
+
+/*
+ * Asynchronous procedure calls executed by a worker thread
+ * take a generic state argument pointer and return an int by
+ * default.
+ */
+typedef int (*DoProcProc)(void *param);
+
+typedef union workData {
+ struct {
+ int fd;
+ int len;
+ char *buf;
+ } ioData;
+ struct {
+ int msecs;
+ } delayData;
+ struct {
+ DoProcProc proc;
+ void* param;
+ } procData;
+} WorkData;
+
+typedef struct WorkItem {
+ unsigned int workKind;
+ WorkData workData;
+ unsigned int requestID;
+ CompletionProc onCompletion;
+ unsigned int abandonOp;
+ struct WorkItem *link;
+} WorkItem;
+
+extern CompletionProc onComplete;
+
+/* the kind of operations supported; you could easily imagine
+ * that instead of passing a tag describing the work to be performed,
+ * a function pointer is passed instead. Maybe later.
+ */
+#define WORKER_READ 1
+#define WORKER_WRITE 2
+#define WORKER_DELAY 4
+#define WORKER_FOR_SOCKET 8
+#define WORKER_DO_PROC 16
+
+/*
+ * Starting up and shutting down.
+ */
+extern BOOL StartIOManager ( void );
+extern void ShutdownIOManager ( void );
+
+/*
+ * Adding I/O and delay requests. With each request a
+ * completion routine is supplied, which the worker thread
+ * will invoke upon completion.
+ */
+extern int AddDelayRequest ( unsigned int msecs,
+ CompletionProc onCompletion);
+
+extern int AddIORequest ( int fd,
+ BOOL forWriting,
+ BOOL isSocket,
+ int len,
+ char* buffer,
+ CompletionProc onCompletion);
+
+extern int AddProcRequest ( void* proc,
+ void* data,
+ CompletionProc onCompletion);
+
+extern void abandonWorkRequest ( int reqID );
+
+#endif /* __IOMANAGER_H__ */
diff --git a/rts/win32/OSThreads.c b/rts/win32/OSThreads.c
new file mode 100644
index 0000000000..c772be38f4
--- /dev/null
+++ b/rts/win32/OSThreads.c
@@ -0,0 +1,199 @@
+/* ---------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2001-2005
+ *
+ * Accessing OS threads functionality in a (mostly) OS-independent
+ * manner.
+ *
+ * --------------------------------------------------------------------------*/
+
+#include "Rts.h"
+#if defined(THREADED_RTS)
+#include "OSThreads.h"
+#include "RtsUtils.h"
+
+/* For reasons not yet clear, the entire contents of process.h is protected
+ * by __STRICT_ANSI__ not being defined.
+ */
+#undef __STRICT_ANSI__
+#include <process.h>
+
+/* Win32 threads and synchronisation objects */
+
+/* A Condition is represented by a Win32 Event object;
+ * a Mutex by a Mutex kernel object.
+ *
+ * ToDo: go through the defn and usage of these to
+ * make sure the semantics match up with that of
+ * the (assumed) pthreads behaviour. This is really
+ * just a first pass at getting something compilable.
+ */
+
+void
+initCondition( Condition* pCond )
+{
+ HANDLE h = CreateEvent(NULL,
+ FALSE, /* auto reset */
+ FALSE, /* initially not signalled */
+ NULL); /* unnamed => process-local. */
+
+ if ( h == NULL ) {
+ errorBelch("initCondition: unable to create");
+ }
+ *pCond = h;
+ return;
+}
+
+void
+closeCondition( Condition* pCond )
+{
+ if ( CloseHandle(*pCond) == 0 ) {
+ errorBelch("closeCondition: failed to close");
+ }
+ return;
+}
+
+rtsBool
+broadcastCondition ( Condition* pCond )
+{
+ PulseEvent(*pCond);
+ return rtsTrue;
+}
+
+rtsBool
+signalCondition ( Condition* pCond )
+{
+ if (SetEvent(*pCond) == 0) {
+ barf("SetEvent: %d", GetLastError());
+ }
+ return rtsTrue;
+}
+
+rtsBool
+waitCondition ( Condition* pCond, Mutex* pMut )
+{
+ RELEASE_LOCK(pMut);
+ WaitForSingleObject(*pCond, INFINITE);
+ /* Hmm..use WaitForMultipleObjects() ? */
+ ACQUIRE_LOCK(pMut);
+ return rtsTrue;
+}
+
+void
+yieldThread()
+{
+ Sleep(0);
+ return;
+}
+
+void
+shutdownThread()
+{
+ _endthreadex(0);
+}
+
+int
+createOSThread (OSThreadId* pId, OSThreadProc *startProc, void *param)
+{
+
+ return (_beginthreadex ( NULL, /* default security attributes */
+ 0,
+ (unsigned (__stdcall *)(void *)) startProc,
+ param,
+ 0,
+ (unsigned*)pId) == 0);
+}
+
+OSThreadId
+osThreadId()
+{
+ return GetCurrentThreadId();
+}
+
+#ifdef USE_CRITICAL_SECTIONS
+void
+initMutex (Mutex* pMut)
+{
+ InitializeCriticalSectionAndSpinCount(pMut,4000);
+}
+#else
+void
+initMutex (Mutex* pMut)
+{
+ HANDLE h = CreateMutex ( NULL, /* default sec. attributes */
+ FALSE, /* not owned => initially signalled */
+ NULL
+ );
+ *pMut = h;
+ return;
+}
+#endif
+
+void
+newThreadLocalKey (ThreadLocalKey *key)
+{
+ DWORD r;
+ r = TlsAlloc();
+ if (r == TLS_OUT_OF_INDEXES) {
+ barf("newThreadLocalKey: out of keys");
+ }
+ *key = r;
+}
+
+void *
+getThreadLocalVar (ThreadLocalKey *key)
+{
+ void *r;
+ r = TlsGetValue(*key);
+#ifdef DEBUG
+ // r is allowed to be NULL - it can mean that either there was an
+ // error or the stored value is in fact NULL.
+ if (GetLastError() != NO_ERROR) {
+ barf("getThreadLocalVar: key not found");
+ }
+#endif
+ return r;
+}
+
+void
+setThreadLocalVar (ThreadLocalKey *key, void *value)
+{
+ BOOL b;
+ b = TlsSetValue(*key, value);
+ if (!b) {
+ barf("setThreadLocalVar: %d", GetLastError());
+ }
+}
+
+
+static unsigned __stdcall
+forkOS_createThreadWrapper ( void * entry )
+{
+ Capability *cap;
+ cap = rts_lock();
+ cap = rts_evalStableIO(cap, (HsStablePtr) entry, NULL);
+ rts_unlock(cap);
+ return 0;
+}
+
+int
+forkOS_createThread ( HsStablePtr entry )
+{
+ unsigned long pId;
+ return (_beginthreadex ( NULL, /* default security attributes */
+ 0,
+ forkOS_createThreadWrapper,
+ (void*)entry,
+ 0,
+ (unsigned*)&pId) == 0);
+}
+
+#else /* !defined(THREADED_RTS) */
+
+int
+forkOS_createThread ( HsStablePtr entry STG_UNUSED )
+{
+ return -1;
+}
+
+#endif /* !defined(THREADED_RTS) */
diff --git a/rts/win32/Ticker.c b/rts/win32/Ticker.c
new file mode 100644
index 0000000000..ab791d8dc7
--- /dev/null
+++ b/rts/win32/Ticker.c
@@ -0,0 +1,124 @@
+/*
+ * RTS periodic timers.
+ *
+ */
+#include "Rts.h"
+#include "Timer.h"
+#include "Ticker.h"
+#include <windows.h>
+#include <stdio.h>
+#include <process.h>
+#include "OSThreads.h"
+
+/*
+ * Provide a timer service for the RTS, periodically
+ * notifying it that a number of 'ticks' has passed.
+ *
+ */
+
+/* To signal shutdown of the timer service, we use a local
+ * event which the timer thread listens to (and stopVirtTimer()
+ * signals.)
+ */
+static HANDLE hStopEvent = INVALID_HANDLE_VALUE;
+static HANDLE tickThread = INVALID_HANDLE_VALUE;
+
+static TickProc tickProc = NULL;
+
+/*
+ * Ticking is done by a separate thread which periodically
+ * wakes up to handle a tick.
+ *
+ * This is the portable way of providing a timer service under
+ * Win32; features like waitable timers or timer queues are only
+ * supported by a subset of the Win32 platforms (notably not
+ * under Win9x.)
+ *
+ */
+static
+unsigned
+WINAPI
+TimerProc(PVOID param)
+{
+ int ms = (int)param;
+ DWORD waitRes;
+
+ /* interpret a < 0 timeout period as 'instantaneous' */
+ if (ms < 0) ms = 0;
+
+ while (1) {
+ waitRes = WaitForSingleObject(hStopEvent, ms);
+
+ switch (waitRes) {
+ case WAIT_OBJECT_0:
+ /* event has become signalled */
+ tickProc = NULL;
+ CloseHandle(hStopEvent);
+ return 0;
+ case WAIT_TIMEOUT:
+ /* tick */
+ tickProc(0);
+ break;
+ case WAIT_FAILED: {
+ DWORD dw = GetLastError();
+ fprintf(stderr, "TimerProc: wait failed -- error code: %lu\n", dw); fflush(stderr);
+ break;
+ }
+ default:
+ fprintf(stderr, "TimerProc: unexpected result %lu\n", waitRes); fflush(stderr);
+ break;
+ }
+ }
+ return 0;
+}
+
+
+int
+startTicker(nat ms, TickProc handle_tick)
+{
+ unsigned threadId;
+ /* 'hStopEvent' is a manual-reset event that's signalled upon
+ * shutdown of timer service (=> timer thread.)
+ */
+ hStopEvent = CreateEvent ( NULL,
+ TRUE,
+ FALSE,
+ NULL);
+ if (hStopEvent == INVALID_HANDLE_VALUE) {
+ return 0;
+ }
+ tickProc = handle_tick;
+ tickThread = (HANDLE)(long)_beginthreadex( NULL,
+ 0,
+ TimerProc,
+ (LPVOID)ms,
+ 0,
+ &threadId);
+ return (tickThread != 0);
+}
+
+int
+stopTicker(void)
+{
+ // We must wait for the ticker thread to terminate, since if we
+ // are in a DLL that is about to be unloaded, the ticker thread
+ // cannot be allowed to return to a missing DLL.
+
+ if (hStopEvent != INVALID_HANDLE_VALUE &&
+ tickThread != INVALID_HANDLE_VALUE) {
+ DWORD exitCode;
+ SetEvent(hStopEvent);
+ while (1) {
+ WaitForSingleObject(tickThread, 20);
+ if (!GetExitCodeThread(tickThread, &exitCode)) {
+ return 1;
+ }
+ if (exitCode != STILL_ACTIVE) {
+ tickThread = INVALID_HANDLE_VALUE;
+ return 0;
+ }
+ TerminateThread(tickThread, 0);
+ }
+ }
+ return 0;
+}
diff --git a/rts/win32/WorkQueue.c b/rts/win32/WorkQueue.c
new file mode 100644
index 0000000000..85a23608be
--- /dev/null
+++ b/rts/win32/WorkQueue.c
@@ -0,0 +1,215 @@
+/*
+ * A fixed-size queue; MT-friendly.
+ *
+ * (c) sof, 2002-2003.
+ */
+#include "WorkQueue.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static void queue_error_rc( char* loc, DWORD err);
+static void queue_error( char* loc, char* reason);
+
+
+/* Wrapper around OS call to create semaphore */
+static Semaphore
+newSemaphore(int initCount, int max)
+{
+ Semaphore s;
+ s = CreateSemaphore ( NULL, /* LPSECURITY_ATTRIBUTES (default) */
+ initCount, /* LONG lInitialCount */
+ max, /* LONG lMaxCount */
+ NULL); /* LPCTSTR (anonymous / no object name) */
+ if ( NULL == s) {
+ queue_error_rc("newSemaphore", GetLastError());
+ return NULL;
+ }
+ return s;
+}
+
+/*
+ * Function: NewWorkQueue
+ *
+ * The queue constructor - semaphores are initialised to match
+ * max number of queue entries.
+ *
+ */
+WorkQueue*
+NewWorkQueue()
+{
+ WorkQueue* wq = (WorkQueue*)malloc(sizeof(WorkQueue));
+
+ if (!wq) {
+ queue_error("NewWorkQueue", "malloc() failed");
+ return wq;
+ }
+
+ wq->head = 0;
+ wq->tail = 0;
+
+ InitializeCriticalSection(&wq->queueLock);
+ wq->workAvailable = newSemaphore(0, WORKQUEUE_SIZE);
+ wq->roomAvailable = newSemaphore(WORKQUEUE_SIZE, WORKQUEUE_SIZE);
+
+ /* Fail if we were unable to create any of the sync objects. */
+ if ( NULL == wq->workAvailable ||
+ NULL == wq->roomAvailable ) {
+ FreeWorkQueue(wq);
+ return NULL;
+ }
+
+ return wq;
+}
+
+void
+FreeWorkQueue ( WorkQueue* pq )
+{
+ /* Close the semaphores; any threads blocked waiting
+ * on either will as a result be woken up.
+ */
+ if ( pq->workAvailable ) {
+ CloseHandle(pq->workAvailable);
+ }
+ if ( pq->roomAvailable ) {
+ CloseHandle(pq->workAvailable);
+ }
+ free(pq);
+ return;
+}
+
+HANDLE
+GetWorkQueueHandle ( WorkQueue* pq )
+{
+ if (!pq) return NULL;
+
+ return pq->workAvailable;
+}
+
+/*
+ * Function: GetWork
+ *
+ * Fetch a work item from the queue, blocking if none available.
+ * Return value indicates of FALSE indicates error/fatal condition.
+ */
+BOOL
+GetWork ( WorkQueue* pq, void** ppw )
+{
+ DWORD rc;
+
+ if (!pq) {
+ queue_error("GetWork", "NULL WorkQueue object");
+ return FALSE;
+ }
+ if (!ppw) {
+ queue_error("GetWork", "NULL WorkItem object");
+ return FALSE;
+ }
+
+ /* Block waiting for work item to become available */
+ if ( (rc = WaitForSingleObject( pq->workAvailable, INFINITE)) != WAIT_OBJECT_0 ) {
+ queue_error_rc("GetWork.WaitForSingleObject(workAvailable)",
+ ( (WAIT_FAILED == rc) ? GetLastError() : rc));
+ return FALSE;
+ }
+
+ return FetchWork(pq,ppw);
+}
+
+/*
+ * Function: FetchWork
+ *
+ * Fetch a work item from the queue, blocking if none available.
+ * Return value indicates of FALSE indicates error/fatal condition.
+ */
+BOOL
+FetchWork ( WorkQueue* pq, void** ppw )
+{
+ DWORD rc;
+
+ if (!pq) {
+ queue_error("FetchWork", "NULL WorkQueue object");
+ return FALSE;
+ }
+ if (!ppw) {
+ queue_error("FetchWork", "NULL WorkItem object");
+ return FALSE;
+ }
+
+ EnterCriticalSection(&pq->queueLock);
+ *ppw = pq->items[pq->head];
+ /* For sanity's sake, zero out the pointer. */
+ pq->items[pq->head] = NULL;
+ pq->head = (pq->head + 1) % WORKQUEUE_SIZE;
+ rc = ReleaseSemaphore(pq->roomAvailable,1, NULL);
+ LeaveCriticalSection(&pq->queueLock);
+ if ( 0 == rc ) {
+ queue_error_rc("FetchWork.ReleaseSemaphore()", GetLastError());
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+/*
+ * Function: SubmitWork
+ *
+ * Add work item to the queue, blocking if no room available.
+ * Return value indicates of FALSE indicates error/fatal condition.
+ */
+BOOL
+SubmitWork ( WorkQueue* pq, void* pw )
+{
+ DWORD rc;
+
+ if (!pq) {
+ queue_error("SubmitWork", "NULL WorkQueue object");
+ return FALSE;
+ }
+ if (!pw) {
+ queue_error("SubmitWork", "NULL WorkItem object");
+ return FALSE;
+ }
+
+ /* Block waiting for work item to become available */
+ if ( (rc = WaitForSingleObject( pq->roomAvailable, INFINITE)) != WAIT_OBJECT_0 ) {
+ queue_error_rc("SubmitWork.WaitForSingleObject(workAvailable)",
+ ( (WAIT_FAILED == rc) ? GetLastError() : rc));
+
+ return FALSE;
+ }
+
+ EnterCriticalSection(&pq->queueLock);
+ pq->items[pq->tail] = pw;
+ pq->tail = (pq->tail + 1) % WORKQUEUE_SIZE;
+ rc = ReleaseSemaphore(pq->workAvailable,1, NULL);
+ LeaveCriticalSection(&pq->queueLock);
+ if ( 0 == rc ) {
+ queue_error_rc("SubmitWork.ReleaseSemaphore()", GetLastError());
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+/* Error handling */
+
+static void
+queue_error_rc( char* loc,
+ DWORD err)
+{
+ fprintf(stderr, "%s failed: return code = 0x%lx\n", loc, err);
+ fflush(stderr);
+ return;
+}
+
+
+static void
+queue_error( char* loc,
+ char* reason)
+{
+ fprintf(stderr, "%s failed: %s\n", loc, reason);
+ fflush(stderr);
+ return;
+}
+
diff --git a/rts/win32/WorkQueue.h b/rts/win32/WorkQueue.h
new file mode 100644
index 0000000000..bde82a3a77
--- /dev/null
+++ b/rts/win32/WorkQueue.h
@@ -0,0 +1,37 @@
+/* WorkQueue.h
+ *
+ * A fixed-size queue; MT-friendly.
+ *
+ * (c) sof, 2002-2003
+ *
+ */
+#ifndef __WORKQUEUE_H__
+#define __WORKQUEUE_H__
+#include <windows.h>
+
+/* This is a fixed-size queue. */
+#define WORKQUEUE_SIZE 16
+
+typedef HANDLE Semaphore;
+typedef CRITICAL_SECTION CritSection;
+
+typedef struct WorkQueue {
+ /* the master lock, need to be grabbed prior to
+ using any of the other elements of the struct. */
+ CritSection queueLock;
+ /* consumers/workers block waiting for 'workAvailable' */
+ Semaphore workAvailable;
+ Semaphore roomAvailable;
+ int head;
+ int tail;
+ void** items[WORKQUEUE_SIZE];
+} WorkQueue;
+
+extern WorkQueue* NewWorkQueue ( void );
+extern void FreeWorkQueue ( WorkQueue* pq );
+extern HANDLE GetWorkQueueHandle ( WorkQueue* pq );
+extern BOOL GetWork ( WorkQueue* pq, void** ppw );
+extern BOOL FetchWork ( WorkQueue* pq, void** ppw );
+extern int SubmitWork ( WorkQueue* pq, void* pw );
+
+#endif /* __WORKQUEUE_H__ */