summaryrefslogtreecommitdiff
path: root/driver
diff options
context:
space:
mode:
Diffstat (limited to 'driver')
-rw-r--r--driver/Makefile28
-rw-r--r--driver/ghc-usage.txt80
-rw-r--r--driver/ghc/Makefile31
-rw-r--r--driver/ghc/ghc.sh2
-rw-r--r--driver/ghci-usage.txt26
-rw-r--r--driver/ghci/Makefile69
-rw-r--r--driver/ghci/ghci.c168
-rw-r--r--driver/ghci/ghci.icobin0 -> 766 bytes
-rw-r--r--driver/ghci/ghci.rc1
-rw-r--r--driver/ghci/ghci.sh2
-rw-r--r--driver/ghci/ghcii.sh3
-rw-r--r--driver/mangler/Makefile22
-rw-r--r--driver/mangler/ghc-asm.lprl1775
-rw-r--r--driver/ordering-passes257
-rw-r--r--driver/split/Makefile17
-rw-r--r--driver/split/ghc-split.lprl618
-rw-r--r--driver/test_mangler29
17 files changed, 3128 insertions, 0 deletions
diff --git a/driver/Makefile b/driver/Makefile
new file mode 100644
index 0000000000..3a87ab43c5
--- /dev/null
+++ b/driver/Makefile
@@ -0,0 +1,28 @@
+# -----------------------------------------------------------------------------=
+# $Id: Makefile,v 1.76 2005/03/02 09:49:11 simonmar Exp $
+#
+# (c) The University of Glasgow 2002
+#
+
+TOP=..
+include $(TOP)/mk/boilerplate.mk
+
+SUBDIRS = mangler split ghc ghci
+
+boot all :: package.conf.inplace package.conf
+
+package.conf.inplace :
+ echo "[]" > $@
+
+package.conf :
+ echo "[]" > $@
+
+override datadir = $(libdir)
+INSTALL_DATAS += package.conf ghc-usage.txt ghci-usage.txt
+
+# Since cleaning effectively uninstalls all the packages, we must
+# remove the stamp files that the build system uses to avoid unnecessarily
+# re-installing packages.
+CLEAN_FILES += package.conf* stamp-pkg-conf*
+
+include $(TOP)/mk/target.mk
diff --git a/driver/ghc-usage.txt b/driver/ghc-usage.txt
new file mode 100644
index 0000000000..e95d5846b9
--- /dev/null
+++ b/driver/ghc-usage.txt
@@ -0,0 +1,80 @@
+Usage:
+
+ $$ [command-line-options-and-input-files]
+
+To compile and link a complete Haskell program, run the compiler like
+so:
+
+ $$ --make Main
+
+where the module Main is in a file named Main.hs (or Main.lhs) in the
+current directory. The other modules in the program will be located
+and compiled automatically, and the linked program will be placed in
+the file `a.out' (or `Main.exe' on Windows).
+
+Alternatively, $$ can be used to compile files individually. Each
+input file is guided through (some of the) possible phases of a
+compilation:
+
+ - unlit: extract code from a "literate program"
+ - hscpp: run code through the C pre-processor (if -cpp flag given)
+ - hsc: run the Haskell compiler proper
+ - gcc: run the C compiler (if compiling via C)
+ - as: run the assembler
+ - ld: run the linker
+
+For each input file, the phase to START with is determined by the
+file's suffix:
+
+ - .lhs literate Haskell unlit
+ - .hs plain Haskell ghc
+ - .hc C from the Haskell compiler gcc
+ - .c C not from the Haskell compiler gcc
+ - .s assembly language as
+ - other passed directly to the linker ld
+
+The phase at which to STOP processing is determined by a command-line
+option:
+
+ -E stop after generating preprocessed, de-litted Haskell
+ (used in conjunction with -cpp)
+ -C stop after generating C (.hc output)
+ -S stop after generating assembler (.s output)
+ -c stop after generating object files (.o output)
+
+Other commonly-used options are:
+
+ -v[n] Control verbosity (n is 0--5, normal verbosity level is 1,
+ -v alone is equivalent to -v3)
+
+ -fglasgow-exts Allow Glasgow extensions (unboxed types, etc.)
+
+ -O An `optimising' package of compiler flags, for faster code
+
+ -prof Compile for cost-centre profiling
+ (add -auto-all for automagic cost-centres on all
+ top-level functions)
+
+ -H14m Increase compiler's heap size (might make compilation
+ faster, especially on large source files).
+
+ -M Output Makefile rules recording the
+ dependencies of a list of Haskell files.
+
+Given the above, here are some TYPICAL invocations of $$:
+
+ # compile a Haskell module to a .o file, optimising:
+ % $$ -c -O Foo.hs
+ # link three .o files into an executable called "test":
+ % $$ -o test Foo.o Bar.o Baz.o
+ # compile a Haskell module to C (a .hc file), using a bigger heap:
+ % $$ -C -H16m Foo.hs
+ # compile Haskell-produced C (.hc) to assembly language:
+ % $$ -S Foo.hc
+
+The User's Guide has more information about GHC's *many* options. An
+online copy can be found here:
+
+ http://www.haskell.org/ghc/documentation.html
+
+------------------------------------------------------------------------
diff --git a/driver/ghc/Makefile b/driver/ghc/Makefile
new file mode 100644
index 0000000000..26965569a4
--- /dev/null
+++ b/driver/ghc/Makefile
@@ -0,0 +1,31 @@
+#-----------------------------------------------------------------------------
+# $Id: Makefile,v 1.6 2001/10/23 16:32:30 rrt Exp $
+#
+
+TOP=../..
+include $(TOP)/mk/boilerplate.mk
+
+# -----------------------------------------------------------------------------
+# ghc script
+
+ifeq "$(BIN_DIST)" "1"
+GHCBIN=$$\"\"libexecdir/ghc-$(ProjectVersion)
+GHCLIB=$$\"\"libdir
+else
+GHCBIN=$(libexecdir)/ghc-$(ProjectVersion)
+GHCLIB=$(libdir)
+endif # BIN_DIST
+
+ifneq "$(TARGETPLATFORM)" "i386-unknown-mingw32"
+SCRIPT_PROG = ghc-$(ProjectVersion)
+LINK = ghc
+endif
+SCRIPT_OBJS = ghc.sh
+INTERP = $(SHELL)
+TOPDIROPT = -B$(GHCLIB)
+SCRIPT_SUBST_VARS = GHCBIN TOPDIROPT
+INSTALL_SCRIPTS += $(SCRIPT_PROG)
+
+# -----------------------------------------------------------------------------
+
+include $(TOP)/mk/target.mk
diff --git a/driver/ghc/ghc.sh b/driver/ghc/ghc.sh
new file mode 100644
index 0000000000..780b9498be
--- /dev/null
+++ b/driver/ghc/ghc.sh
@@ -0,0 +1,2 @@
+# Mini-driver for GHC
+exec $GHCBIN $TOPDIROPT ${1+"$@"}
diff --git a/driver/ghci-usage.txt b/driver/ghci-usage.txt
new file mode 100644
index 0000000000..4a633fc3e1
--- /dev/null
+++ b/driver/ghci-usage.txt
@@ -0,0 +1,26 @@
+Usage:
+
+ ghci [command-line-options-and-input-files]
+
+The kinds of input files that can be given on the command-line
+include:
+
+ - Haskell source files (.hs or .lhs suffix)
+ - Object files (.o suffix, or .obj on Windows)
+ - Dynamic libraries (.so suffix, or .dll on Windows)
+
+In addition, ghci accepts most of the command-line options that plain
+GHC does. Some of the options that are commonly used are:
+
+ -fglasgow-exts Allow Glasgow extensions (unboxed types, etc.)
+
+ -i<dir> Search for imported modules in the directory <dir>.
+
+ -H32m Increase GHC's default heap size to 32m
+
+ -cpp Enable CPP processing of source files
+
+Full details can be found in the User's Guide, an online copy of which
+can be found here:
+
+ http://www.haskell.org/ghc/documentation.html
diff --git a/driver/ghci/Makefile b/driver/ghci/Makefile
new file mode 100644
index 0000000000..7c67ac546c
--- /dev/null
+++ b/driver/ghci/Makefile
@@ -0,0 +1,69 @@
+#-----------------------------------------------------------------------------
+# $Id: Makefile,v 1.11 2005/05/05 00:58:38 sof Exp $
+#
+
+TOP=../..
+include $(TOP)/mk/boilerplate.mk
+
+# hack for ghci-inplace script, see below
+INSTALLING=1
+
+# -----------------------------------------------------------------------------
+# ghci script
+
+ifeq "$(INSTALLING)" "1"
+ifeq "$(BIN_DIST)" "1"
+GHCBIN=$$\"\"libexecdir/ghc-$(ProjectVersion)
+GHCLIB=$$\"\"libdir
+else
+GHCBIN=$(libexecdir)/ghc-$(ProjectVersion)
+GHCLIB=$(libdir)
+endif # BIN_DIST
+else
+GHCBIN=$(FPTOOLS_TOP_ABS)/$(GHC_COMPILER_DIR_REL)/ghc-$(ProjectVersion)
+GHCLIB=$(FPTOOLS_TOP_ABS)
+endif
+
+INSTALLED_SCRIPT_PROG = ghci-$(ProjectVersion)
+INPLACE_SCRIPT_PROG = ghci-inplace
+
+ifeq "$(TARGETPLATFORM)" "i386-unknown-mingw32"
+C_PROG = ghci
+C_OBJS += ghci.res
+else
+C_SRCS=
+endif
+
+SCRIPT_OBJS = ghci.sh
+INTERP = $(SHELL)
+SCRIPT_SUBST_VARS = GHCBIN TOPDIROPT
+ifneq "$(TARGETPLATFORM)" "i386-unknown-mingw32"
+INSTALL_SCRIPTS += $(SCRIPT_PROG)
+else
+INSTALL_SCRIPTS += ghcii.sh
+INSTALL_PROGS += $(C_PROG)
+endif
+TOPDIROPT = -B$(GHCLIB)
+
+ifeq "$(INSTALLING)" "1"
+SCRIPT_PROG = $(INSTALLED_SCRIPT_PROG)
+ifneq "$(TARGETPLATFORM)" "i386-unknown-mingw32"
+LINK = ghci
+endif
+else
+SCRIPT_PROG = $(INPLACE_SCRIPT_PROG)
+endif
+
+# don't recurse on 'make install'
+#
+ifeq "$(INSTALLING)" "1"
+all clean distclean maintainer-clean ::
+ $(MAKE) INSTALLING=0 BIN_DIST=0 $(MFLAGS) $@
+endif
+
+ifeq "$(TARGETPLATFORM)" "i386-unknown-mingw32"
+ghci.res : ghci.rc ghci.ico
+ windres -o ghci.res -i ghci.rc -O coff
+endif
+
+include $(TOP)/mk/target.mk
diff --git a/driver/ghci/ghci.c b/driver/ghci/ghci.c
new file mode 100644
index 0000000000..f21a12a4ba
--- /dev/null
+++ b/driver/ghci/ghci.c
@@ -0,0 +1,168 @@
+/*
+ *
+ * $Id: ghci.c,v 1.10 2005/05/05 00:58:38 sof Exp $
+ *
+ * ghci wrapper for Win32 only
+ *
+ * This wrapper invokes ghc.exe with the added command-line
+ * option "--interactive".
+ * (On Unix this is done by the ghci.sh shell script, but
+ * that does not work so well on Win32.)
+ *
+ * (c) The GHC Team 2001
+ *
+ * ghc.exe is searched for using the 'normal' search rules
+ * for DLLs / EXEs (i.e., first in the same dir as this wrapper,
+ * then system dirs, then PATH).
+ *
+ * To compile:
+ *
+ * MSVC: cl /o ghci.exe /c ghciwrap.c
+ * mingw: gcc -mno-cygwin -o ghci.exe ghciwrap.c
+ *
+ * If you want to associate your own icon with the wrapper,
+ * here's how to do it:
+ *
+ * * Create a one-line .rc file, ghci.rc (say), containing
+ * 0 ICON "hsicon.ico"
+ * (subst the string literal for the name of your icon file).
+ * * Compile it up (assuming the .ico file is in the same dir
+ * as the .rc file):
+ *
+ * MSVC: rc /i. /fo ghci.res ghci.rc
+ * mingw: windres -o ghci.res -i ghci.rc -O coff
+ *
+ * * Add the resulting .res file to the link line of the wrapper:
+ *
+ * MSVC: cl /o ghci.exe /c ghciwrap.c ghci.res
+ * mingw: gcc -mno-cygwin -o ghci.exe ghciwrap.c ghci.res
+ *
+ */
+
+#include <windows.h>
+#include <stdio.h>
+#include <process.h>
+#include <malloc.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <io.h>
+
+#define BINARY_NAME "ghc.exe"
+#define IACTIVE_OPTION "--interactive"
+
+#define errmsg(msg) fprintf(stderr, msg "\n"); fflush(stderr)
+#define errmsg1(msg,val) fprintf(stderr, msg "\n",val); fflush(stderr)
+
+int
+main(int argc, char** argv)
+{
+ TCHAR binPath[FILENAME_MAX+1];
+ TCHAR binPathShort[MAX_PATH+1];
+ DWORD dwSize = FILENAME_MAX;
+ TCHAR* szEnd;
+ int i;
+ char* new_cmdline;
+ char *ptr, *src;
+ unsigned int cmdline_len = 0;
+ char **pp;
+ LPTSTR pp1;
+
+ STARTUPINFO si;
+ PROCESS_INFORMATION pi;
+
+ ZeroMemory(&pi, sizeof(PROCESS_INFORMATION));
+ ZeroMemory(&si, sizeof(STARTUPINFO));
+ si.cb = sizeof(STARTUPINFO);
+
+ if ( getenv("_") ) {
+ printf("WARNING: GHCi invoked via 'ghci.exe' in *nix-like shells (cygwin-bash, in particular)\n");
+ printf(" doesn't handle Ctrl-C well; use the 'ghcii.sh' shell wrapper instead\n");
+ fflush(stdout);
+ }
+
+ /* Locate the binary we want to start up */
+ if ( !SearchPath(NULL,
+ BINARY_NAME,
+ NULL,
+ dwSize,
+ (char*)binPath,
+ &szEnd) ) {
+ errmsg1("%s: Unable to locate ghc.exe", argv[0]);
+ return 1;
+ }
+
+ dwSize = MAX_PATH;
+ /* Turn the path into short form - LFN form causes problems
+ when passed in argv[0]. */
+ if ( !(GetShortPathName(binPath, binPathShort, dwSize)) ) {
+ errmsg1("%s: Unable to locate ghc.exe", argv[0]);
+ return 1;
+ }
+
+ /* Compute length of the flattened 'argv', including extra IACTIVE_OPTION (and spaces!) */
+ cmdline_len += 1 + strlen(IACTIVE_OPTION);
+ for(i=1;i<argc;i++) {
+ /* Note: play it safe and quote all argv strings */
+ cmdline_len += 1 + strlen(argv[i]) + 2;
+ }
+ new_cmdline = (char*)malloc(sizeof(char) * (cmdline_len + 1));
+ if (!new_cmdline) {
+ errmsg1("%s: failed to start up ghc.exe; insufficient memory", argv[0]);
+ return 1;
+ }
+
+ strcpy(new_cmdline, " " IACTIVE_OPTION);
+ ptr = new_cmdline + strlen(" " IACTIVE_OPTION);
+ for(i=1;i<argc;i++) {
+ *ptr++ = ' ';
+ *ptr++ = '"';
+ src = argv[i];
+ while(*src) {
+ *ptr++ = *src++;
+ }
+ *ptr++ = '"';
+ }
+ *ptr = '\0';
+
+ /* Note: Used to use _spawnv(_P_WAIT, ...) here, but it suffered
+ from the parent intercepting console events such as Ctrl-C,
+ which it shouldn't. Installing an ignore-all console handler
+ didn't do the trick either.
+
+ Irrespective of this issue, using CreateProcess() is preferable,
+ as it makes this wrapper work on both mingw and cygwin.
+ */
+#if 0
+ fprintf(stderr, "Invoking ghc: %s %s\n", binPathShort, new_cmdline); fflush(stderr);
+#endif
+ if (!CreateProcess(binPathShort,
+ new_cmdline,
+ NULL,
+ NULL,
+ TRUE,
+ 0, /* dwCreationFlags */
+ NULL, /* lpEnvironment */
+ NULL, /* lpCurrentDirectory */
+ &si, /* lpStartupInfo */
+ &pi) ) {
+ errmsg1("Unable to start ghc.exe (error code: %lu)", GetLastError());
+ return 1;
+ }
+ /* Disable handling of console events in the parent by dropping its
+ * connection to the console. This has the (minor) downside of not being
+ * able to subsequently emit any error messages to the console.
+ */
+ FreeConsole();
+
+ switch (WaitForSingleObject(pi.hProcess, INFINITE) ) {
+ case WAIT_OBJECT_0:
+ return 0;
+ case WAIT_ABANDONED:
+ case WAIT_FAILED:
+ /* in the event we get any hard errors, bring the child to a halt. */
+ TerminateProcess(pi.hProcess,1);
+ return 1;
+ default:
+ return 1;
+ }
+}
diff --git a/driver/ghci/ghci.ico b/driver/ghci/ghci.ico
new file mode 100644
index 0000000000..680be76e71
--- /dev/null
+++ b/driver/ghci/ghci.ico
Binary files differ
diff --git a/driver/ghci/ghci.rc b/driver/ghci/ghci.rc
new file mode 100644
index 0000000000..01ed2f4081
--- /dev/null
+++ b/driver/ghci/ghci.rc
@@ -0,0 +1 @@
+0 ICON "ghci.ico"
diff --git a/driver/ghci/ghci.sh b/driver/ghci/ghci.sh
new file mode 100644
index 0000000000..b0200477b8
--- /dev/null
+++ b/driver/ghci/ghci.sh
@@ -0,0 +1,2 @@
+# Mini-driver for GHCi
+exec $GHCBIN $TOPDIROPT --interactive ${1+"$@"}
diff --git a/driver/ghci/ghcii.sh b/driver/ghci/ghcii.sh
new file mode 100644
index 0000000000..70d98988b8
--- /dev/null
+++ b/driver/ghci/ghcii.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+# Mini-driver for GHCi
+exec $0/../ghc --interactive ${1+"$@"}
diff --git a/driver/mangler/Makefile b/driver/mangler/Makefile
new file mode 100644
index 0000000000..7b482e1d98
--- /dev/null
+++ b/driver/mangler/Makefile
@@ -0,0 +1,22 @@
+#-----------------------------------------------------------------------------
+# $Id: Makefile,v 1.3 2001/03/23 16:36:21 simonmar Exp $
+
+TOP=../..
+include $(TOP)/mk/boilerplate.mk
+
+SCRIPT_PROG = ghc-asm
+
+INTERP=perl
+
+SCRIPT_SUBST_VARS := TARGETPLATFORM
+
+INSTALL_LIBEXEC_SCRIPTS += $(SCRIPT_PROG)
+
+CLEAN_FILES += $(SCRIPT_OBJS)
+
+# needed for bootstrapping with HC files
+ifeq "$(BootingFromHc)" "YES"
+boot :: all
+endif
+
+include $(TOP)/mk/target.mk
diff --git a/driver/mangler/ghc-asm.lprl b/driver/mangler/ghc-asm.lprl
new file mode 100644
index 0000000000..902593ea7f
--- /dev/null
+++ b/driver/mangler/ghc-asm.lprl
@@ -0,0 +1,1775 @@
+%************************************************************************
+%* *
+\section[Driver-asm-fiddling]{Fiddling with assembler files}
+%* *
+%************************************************************************
+
+Tasks:
+\begin{itemize}
+\item
+Utterly stomp out C functions' prologues and epilogues; i.e., the
+stuff to do with the C stack.
+\item
+Any other required tidying up.
+\end{itemize}
+
+General note [chak]: Many regexps are very fragile because they rely on white
+space being in the right place. This caused trouble with gcc 2.95 (at least
+on Linux), where the use of white space in .s files generated by gcc suddenly
+changed. To guarantee compatibility across different versions of gcc, make
+sure (at least on i386-.*-linux) that regexps tolerate varying amounts of white
+space between an assembler statement and its arguments as well as after a the
+comma separating multiple arguments.
+
+\emph{For the time being, I have corrected the regexps for i386-.*-linux. I
+didn't touch all the regexps for other i386 platforms, as I don't have
+a box to test these changes.}
+
+HPPA specific notes:
+\begin{itemize}
+\item
+The HP linker is very picky about symbols being in the appropriate
+space (code vs. data). When we mangle the threaded code to put the
+info tables just prior to the code, they wind up in code space
+rather than data space. This means that references to *_info from
+un-mangled parts of the RTS (e.g. unthreaded GC code) get
+unresolved symbols. Solution: mini-mangler for .c files on HP. I
+think this should really be triggered in the driver by a new -rts
+option, so that user code doesn't get mangled inappropriately.
+\item
+With reversed tables, jumps are to the _info label rather than to
+the _entry label. The _info label is just an address in code
+space, rather than an entry point with the descriptive blob we
+talked about yesterday. As a result, you can't use the call-style
+JMP_ macro. However, some JMP_ macros take _info labels as targets
+and some take code entry points within the RTS. The latter won't
+work with the goto-style JMP_ macro. Sigh. Solution: Use the goto
+style JMP_ macro, and mangle some more assembly, changing all
+"RP'literal" and "LP'literal" references to "R'literal" and
+"L'literal," so that you get the real address of the code, rather
+than the descriptive blob. Also change all ".word P%literal"
+entries in info tables and vector tables to just ".word literal,"
+for the same reason. Advantage: No more ridiculous call sequences.
+\end{itemize}
+
+%************************************************************************
+%* *
+\subsection{Top-level code}
+%* *
+%************************************************************************
+
+\begin{code}
+$TargetPlatform = $TARGETPLATFORM;
+
+($Pgm = $0) =~ s|.*/||;
+$ifile = $ARGV[0];
+$ofile = $ARGV[1];
+
+if ( $TargetPlatform =~ /^i386-/ ) {
+ if ($ARGV[2] eq '') {
+ $StolenX86Regs = 4;
+ } else {
+ $StolenX86Regs = $ARGV[2];
+ }
+}
+
+&mangle_asm($ifile,$ofile);
+
+exit(0);
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{Constants for various architectures}
+%* *
+%************************************************************************
+
+\begin{code}
+sub init_TARGET_STUFF {
+
+ #--------------------------------------------------------#
+ if ( $TargetPlatform =~ /^alpha-.*-.*/ ) {
+
+ $T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
+ $T_US = ''; # _ if symbols have an underscore on the front
+ $T_PRE_APP = 'DONT THINK THIS APPLIES'; # regexp that says what comes before APP/NO_APP
+ $T_CONST_LBL = '^\$L?C(\d+):$'; # regexp for what such a lbl looks like
+ $T_POST_LBL = ':';
+
+ $T_MOVE_DIRVS = '^(\s*(\$.*\.\.ng:|\.align\s+\d+|\.(globl|ent)\s+\S+|\#.*|\.(file|loc)\s+\S+\s+\S+|\.text|\.r?data)\n)';
+ $T_COPY_DIRVS = '^\s*(\$.*\.\.ng:|\#|\.(file|globl|ent|loc))';
+
+ $T_DOT_WORD = '\.(long|quad|byte|word)';
+ $T_DOT_GLOBAL = '^\t\.globl';
+ $T_HDR_literal = "\.rdata\n\t\.align 3\n";
+ $T_HDR_misc = "\.text\n\t\.align 3\n";
+ $T_HDR_data = "\.data\n\t\.align 3\n";
+ $T_HDR_rodata = "\.rdata\n\t\.align 3\n";
+ $T_HDR_closure = "\.data\n\t\.align 3\n";
+ $T_HDR_info = "\.text\n\t\.align 3\n";
+ $T_HDR_entry = "\.text\n\t\.align 3\n";
+ $T_HDR_vector = "\.text\n\t\.align 3\n";
+
+ #--------------------------------------------------------#
+ } elsif ( $TargetPlatform =~ /^hppa/ ) {
+
+ $T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
+ $T_US = ''; # _ if symbols have an underscore on the front
+ $T_PRE_APP = 'DONT THINK THIS APPLIES'; # regexp that says what comes before APP/NO_APP
+ $T_CONST_LBL = '^L\$C(\d+)$'; # regexp for what such a lbl looks like
+ $T_POST_LBL = '';
+
+ $T_MOVE_DIRVS = '^((\s+\.(IMPORT|EXPORT|PARAM).*|\s+\.align\s+\d+|\s+\.(SPACE|SUBSPA)\s+\S+|\s*)\n)';
+ $T_COPY_DIRVS = '^\s+\.(IMPORT|EXPORT)';
+
+ $T_DOT_WORD = '\.(blockz|word|half|byte)';
+ $T_DOT_GLOBAL = '^\s+\.EXPORT';
+ $T_HDR_literal = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$LIT\$\n";
+ $T_HDR_misc = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$CODE\$\n\t\.align 4\n";
+ $T_HDR_data = "\t.SPACE \$PRIVATE\$\n\t.SUBSPA \$DATA\$\n\t\.align 4\n";
+ $T_HDR_rodata = "\t.SPACE \$PRIVATE\$\n\t.SUBSPA \$DATA\$\n\t\.align 4\n";
+ $T_HDR_closure = "\t.SPACE \$PRIVATE\$\n\t.SUBSPA \$DATA\$\n\t\.align 4\n";
+ $T_HDR_info = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$CODE\$\n\t\.align 4\n";
+ $T_HDR_entry = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$CODE\$\n\t\.align 4\n";
+ $T_HDR_vector = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$CODE\$\n\t\.align 4\n";
+
+ #--------------------------------------------------------#
+ } elsif ( $TargetPlatform =~ /^i386-.*-(linuxaout|freebsd2|nextstep3|cygwin32|mingw32)$/ ) {
+ # NeXT added but not tested. CaS
+
+ $T_STABBY = 1; # 1 iff .stab things (usually if a.out format)
+ $T_US = '_'; # _ if symbols have an underscore on the front
+ $T_PRE_APP = '^#'; # regexp that says what comes before APP/NO_APP
+ $T_CONST_LBL = '^LC(\d+):$';
+ $T_POST_LBL = ':';
+ $T_X86_PRE_LLBL_PAT = 'L';
+ $T_X86_PRE_LLBL = 'L';
+ $T_X86_BADJMP = '^\tjmp [^L\*]';
+
+ $T_MOVE_DIRVS = '^(\s*(\.(p2)?align\s.*|\.globl\s+\S+|\.text|\.data|\.stab[^n].*|\.type\s+.*|\.size\s+.*|\.lcomm.*)\n)';
+ $T_COPY_DIRVS = '\.(globl|stab|lcomm)';
+ $T_DOT_WORD = '\.(long|word|value|byte|space)';
+ $T_DOT_GLOBAL = '\.globl';
+ $T_HDR_literal = "\.text\n\t\.align 2\n";
+ $T_HDR_misc = "\.text\n\t\.align 2,0x90\n";
+ $T_HDR_data = "\.data\n\t\.align 2\n";
+ $T_HDR_rodata = "\.text\n\t\.align 2\n";
+ $T_HDR_closure = "\.data\n\t\.align 2\n";
+ $T_HDR_info = "\.text\n\t\.align 2\n"; # NB: requires padding
+ $T_HDR_entry = "\.text\n"; # no .align so we're right next to _info (arguably wrong...?)
+ $T_HDR_vector = "\.text\n\t\.align 2\n"; # NB: requires padding
+
+ #--------------------------------------------------------#
+ } elsif ( $TargetPlatform =~ /^i386-.*-(solaris2|linux|gnu|freebsd|netbsd|openbsd|kfreebsdgnu)$/ ) {
+
+ $T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
+ $T_US = ''; # _ if symbols have an underscore on the front
+ $T_PRE_APP = # regexp that says what comes before APP/NO_APP
+ ($TargetPlatform =~ /-(linux|gnu|freebsd|netbsd|openbsd)$/) ? '#' : '/' ;
+ $T_CONST_LBL = '^\.LC(\d+):$'; # regexp for what such a lbl looks like
+ $T_POST_LBL = ':';
+ $T_X86_PRE_LLBL_PAT = '\.L';
+ $T_X86_PRE_LLBL = '.L';
+ $T_X86_BADJMP = '^\tjmp\s+[^\.\*]';
+
+ $T_MOVE_DIRVS = '^(\s*(\.(p2)?align\s.*|\.globl\s+\S+|\.text|\.data|\.section\s+.*|\.type\s+.*|\.size\s+\S+\s*,\s*\d+|\.ident.*|\.local.*)\n)';
+ $T_COPY_DIRVS = '^\s*\.(globl|type|size|local)';
+
+ $T_DOT_WORD = '\.(long|value|word|byte|zero)';
+ $T_DOT_GLOBAL = '\.globl';
+ $T_HDR_literal = "\.section\t\.rodata\n"; # or just use .text??? (WDP 95/11)
+ $T_HDR_misc = "\.text\n\t\.align 4\n";
+ $T_HDR_data = "\.data\n\t\.align 4\n";
+ $T_HDR_rodata = "\.section\t\.rodata\n\t\.align 4\n";
+ $T_HDR_closure = "\.data\n\t\.align 4\n";
+ $T_HDR_info = "\.text\n\t\.align 4\n";
+ $T_HDR_entry = "\.text\n"; # no .align so we're right next to _info (arguably wrong...?)
+ $T_HDR_vector = "\.text\n\t\.align 4\n"; # NB: requires padding
+
+ #--------------------------------------------------------#
+ } elsif ( $TargetPlatform =~ /^ia64-.*-linux$/ ) {
+
+ $T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
+ $T_US = ''; # _ if symbols have an underscore on the front
+ $T_PRE_APP = '#';
+ $T_CONST_LBL = '^\.LC(\d+):$'; # regexp for what such a lbl looks like
+ $T_POST_LBL = ':';
+
+ $T_MOVE_DIRVS = '^(\s*\.(global|proc|pred\.safe_across_calls|text|data|section|subsection|align|size|type|ident)\s+.*\n)';
+ $T_COPY_DIRVS = '\.(global|proc)';
+
+ $T_DOT_WORD = '\.(long|value|byte|zero)';
+ $T_DOT_GLOBAL = '\.global';
+ $T_HDR_literal = "\.section\t\.rodata\n";
+ $T_HDR_misc = "\.text\n\t\.align 8\n";
+ $T_HDR_data = "\.data\n\t\.align 8\n";
+ $T_HDR_rodata = "\.section\t\.rodata\n\t\.align 8\n";
+ $T_HDR_closure = "\.data\n\t\.align 8\n";
+ $T_HDR_info = "\.text\n\t\.align 8\n";
+ $T_HDR_entry = "\.text\n\t\.align 16\n";
+ $T_HDR_vector = "\.text\n\t\.align 8\n";
+
+ #--------------------------------------------------------#
+ } elsif ( $TargetPlatform =~ /^x86_64-.*-(linux|openbsd)$/ ) {
+
+ $T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
+ $T_US = ''; # _ if symbols have an underscore on the front
+ $T_PRE_APP = '#';
+ $T_CONST_LBL = '^\.LC(\d+):$'; # regexp for what such a lbl looks like
+ $T_POST_LBL = ':';
+
+ $T_MOVE_DIRVS = '^(\s*\.(globl|text|data|section|align|size|type|ident|local)\s+.*\n)';
+ $T_COPY_DIRVS = '\.(globl|type|size|local)';
+
+ $T_DOT_WORD = '\.(quad|long|value|byte|zero)';
+ $T_DOT_GLOBAL = '\.global';
+
+ $T_HDR_literal16 = "\.section\t\.rodata.cst16\n\t.align 16\n";
+ $T_HDR_literal = "\.section\t\.rodata\n";
+
+ $T_HDR_misc = "\.text\n\t\.align 8\n";
+ $T_HDR_data = "\.data\n\t\.align 8\n";
+ $T_HDR_rodata = "\.section\t\.rodata\n\t\.align 8\n";
+
+ # the assembler on x86_64/Linux refuses to generate code for
+ # .quad x - y
+ # where x is in the text section and y in the rodata section.
+ # It works if y is in the text section, though. This is probably
+ # going to cause difficulties for PIC, I imagine.
+ $T_HDR_relrodata= "\.text\n\t\.align 8\n";
+
+ $T_HDR_closure = "\.data\n\t\.align 8\n";
+ $T_HDR_info = "\.text\n\t\.align 8\n";
+ $T_HDR_entry = "\.text\n\t\.align 8\n";
+ $T_HDR_vector = "\.text\n\t\.align 8\n";
+
+ #--------------------------------------------------------#
+ } elsif ( $TargetPlatform =~ /^m68k-.*-sunos4/ ) {
+
+ $T_STABBY = 1; # 1 iff .stab things (usually if a.out format)
+ $T_US = '_'; # _ if symbols have an underscore on the front
+ $T_PRE_APP = '^# MAY NOT APPLY'; # regexp that says what comes before APP/NO_APP
+ $T_CONST_LBL = '^LC(\d+):$';
+ $T_POST_LBL = ':';
+
+ $T_MOVE_DIRVS = '^(\s*(\.align\s+\d+|\.proc\s+\d+|\.const|\.cstring|\.globl\s+\S+|\.text|\.data|\.even|\.stab[^n].*)\n)';
+ $T_COPY_DIRVS = '\.(globl|proc|stab)';
+
+ $T_DOT_WORD = '\.long';
+ $T_DOT_GLOBAL = '\.globl';
+ $T_HDR_literal = "\.text\n\t\.even\n";
+ $T_HDR_misc = "\.text\n\t\.even\n";
+ $T_HDR_data = "\.data\n\t\.even\n";
+ $T_HDR_rodata = "\.text\n\t\.even\n";
+ $T_HDR_closure = "\.data\n\t\.even\n";
+ $T_HDR_info = "\.text\n\t\.even\n";
+ $T_HDR_entry = "\.text\n\t\.even\n";
+ $T_HDR_vector = "\.text\n\t\.even\n";
+
+ #--------------------------------------------------------#
+ } elsif ( $TargetPlatform =~ /^mips-.*/ ) {
+
+ $T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
+ $T_US = ''; # _ if symbols have an underscore on the front
+ $T_PRE_APP = '^\s*#'; # regexp that says what comes before APP/NO_APP
+ $T_CONST_LBL = '^\$LC(\d+):$'; # regexp for what such a lbl looks like
+ $T_POST_LBL = ':';
+
+ $T_MOVE_DIRVS = '^(\s*(\.align\s+\d+|\.(globl|ent)\s+\S+|\.text|\.r?data)\n)';
+ $T_COPY_DIRVS = '\.(globl|ent)';
+
+ $T_DOT_WORD = '\.word';
+ $T_DOT_GLOBAL = '^\t\.globl';
+ $T_HDR_literal = "\t\.rdata\n\t\.align 2\n";
+ $T_HDR_misc = "\t\.text\n\t\.align 2\n";
+ $T_HDR_data = "\t\.data\n\t\.align 2\n";
+ $T_HDR_rodata = "\t\.rdata\n\t\.align 2\n";
+ $T_HDR_closure = "\t\.data\n\t\.align 2\n";
+ $T_HDR_info = "\t\.text\n\t\.align 2\n";
+ $T_HDR_entry = "\t\.text\n\t\.align 2\n";
+ $T_HDR_vector = "\t\.text\n\t\.align 2\n";
+
+ #--------------------------------------------------------#
+ } elsif ( $TargetPlatform =~ /^powerpc-apple-darwin.*/ ) {
+ # Apple PowerPC Darwin/MacOS X.
+ $T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
+ $T_US = '_'; # _ if symbols have an underscore on the front
+ $T_PRE_APP = 'DOESNT APPLY'; # regexp that says what comes before APP/NO_APP
+ $T_CONST_LBL = '^\LC\d+:'; # regexp for what such a lbl looks like
+ $T_POST_LBL = ':';
+
+ $T_MOVE_DIRVS = '^(\s*(\.align \d+|\.text|\.data|\.const_data|\.cstring|\.non_lazy_symbol_pointer|\.const|\.static_const|\.literal4|\.literal8|\.static_data|\.globl \S+|\.section .*|\.lcomm.*)\n)';
+ $T_COPY_DIRVS = '\.(globl|lcomm)';
+
+ $T_DOT_WORD = '\.(long|short|byte|fill|space)';
+ $T_DOT_GLOBAL = '\.globl';
+ $T_HDR_toc = "\.toc\n";
+ $T_HDR_literal = "\t\.const\n\t\.align 2\n";
+ $T_HDR_misc = "\t\.text\n\t\.align 2\n";
+ $T_HDR_data = "\t\.data\n\t\.align 2\n";
+ $T_HDR_rodata = "\t\.const\n\t\.align 2\n";
+ $T_HDR_relrodata= "\t\.const_data\n\t\.align 2\n";
+ $T_HDR_closure = "\t\.data\n\t\.align 2\n";
+ $T_HDR_info = "\t\.text\n\t\.align 2\n";
+ $T_HDR_entry = "\t\.text\n\t\.align 2\n";
+ $T_HDR_vector = "\t\.text\n\t\.align 2\n";
+
+ #--------------------------------------------------------#
+ } elsif ( $TargetPlatform =~ /^i386-apple-darwin.*/ ) {
+ # Apple PowerPC Darwin/MacOS X.
+ $T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
+ $T_US = '_'; # _ if symbols have an underscore on the front
+ $T_PRE_APP = 'DOESNT APPLY'; # regexp that says what comes before APP/NO_APP
+ $T_CONST_LBL = '^\LC\d+:'; # regexp for what such a lbl looks like
+ $T_POST_LBL = ':';
+ $T_X86_PRE_LLBL_PAT = 'L';
+ $T_X86_PRE_LLBL = 'L';
+ $T_X86_BADJMP = '^\tjmp [^L\*]';
+
+ $T_MOVE_DIRVS = '^(\s*(\.align \d+|\.text|\.data|\.const_data|\.cstring|\.non_lazy_symbol_pointer|\.const|\.static_const|\.literal4|\.literal8|\.static_data|\.globl \S+|\.section .*|\.lcomm.*)\n)';
+ $T_COPY_DIRVS = '\.(globl|lcomm)';
+
+ $T_DOT_WORD = '\.(long|short|byte|fill|space)';
+ $T_DOT_GLOBAL = '\.globl';
+ $T_HDR_toc = "\.toc\n";
+ $T_HDR_literal16= "\t\.literal8\n\t\.align 4\n";
+ $T_HDR_literal = "\t\.const\n\t\.align 4\n";
+ $T_HDR_misc = "\t\.text\n\t\.align 2\n";
+ $T_HDR_data = "\t\.data\n\t\.align 2\n";
+ $T_HDR_rodata = "\t\.const\n\t\.align 2\n";
+ $T_HDR_relrodata= "\t\.const_data\n\t\.align 2\n";
+ $T_HDR_closure = "\t\.data\n\t\.align 2\n";
+ $T_HDR_info = "\t\.text\n\t\.align 2\n";
+ $T_HDR_entry = "\t\.text\n\t\.align 2\n";
+ $T_HDR_vector = "\t\.text\n\t\.align 2\n";
+
+ #--------------------------------------------------------#
+ } elsif ( $TargetPlatform =~ /^powerpc-.*-linux/ ) {
+ # PowerPC Linux
+ $T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
+ $T_US = ''; # _ if symbols have an underscore on the front
+ $T_PRE_APP = '^#'; # regexp that says what comes before APP/NO_APP
+ $T_CONST_LBL = '^\.LC\d+:'; # regexp for what such a lbl looks like
+ $T_POST_LBL = ':';
+
+ $T_MOVE_DIRVS = '^(\s*(\.(p2)?align\s+\d+(,\s*0x90)?|\.globl\s+\S+|\.text|\.data|\.section\s+.*|\.type\s+.*|\.size\s+\S+\s*,\s*\d+|\.ident.*|\.local.*)\n)';
+ $T_COPY_DIRVS = '^\s*\.(globl|type|size|local)';
+
+ $T_DOT_WORD = '\.(long|short|byte|fill|space)';
+ $T_DOT_GLOBAL = '\.globl';
+ $T_HDR_toc = "\.toc\n";
+ $T_HDR_literal = "\t\.section\t.rodata\n\t\.align 2\n";
+ $T_HDR_misc = "\t\.text\n\t\.align 2\n";
+ $T_HDR_data = "\t\.data\n\t\.align 2\n";
+ $T_HDR_rodata = "\t\.section\t.rodata\n\t\.align 2\n";
+ $T_HDR_closure = "\t\.data\n\t\.align 2\n";
+ $T_HDR_info = "\t\.text\n\t\.align 2\n";
+ $T_HDR_entry = "\t\.text\n\t\.align 2\n";
+ $T_HDR_vector = "\t\.text\n\t\.align 2\n";
+
+ #--------------------------------------------------------#
+ } elsif ( $TargetPlatform =~ /^powerpc64-.*-linux/ ) {
+ # PowerPC 64 Linux
+ $T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
+ $T_US = '\.'; # _ if symbols have an underscore on the front
+ $T_PRE_APP = '^#'; # regexp that says what comes before APP/NO_APP
+ $T_CONST_LBL = '^\.LC\d+:'; # regexp for what such a lbl looks like
+ $T_POST_LBL = ':';
+
+ $T_MOVE_DIRVS = '^(\s*(\.(p2)?align\s+\d+(,\s*0x90)?|\.globl\s+\S+|\.text|\.data|\.section\s+.*|\.type\s+.*|\.size\s+\S+\s*,\s*\d+|\.ident.*|\.local.*)\n)';
+ $T_COPY_DIRVS = '^\s*\.(globl|type|size|local)';
+
+ $T_DOT_WORD = '\.(long|short|byte|fill|space)';
+ $T_DOT_GLOBAL = '\.globl';
+ $T_HDR_toc = "\.toc\n";
+ $T_HDR_literal = "\t\.section\t\".toc\",\"aw\"\n";
+ $T_HDR_misc = "\t\.text\n\t\.align 2\n";
+ $T_HDR_data = "\t\.data\n\t\.align 2\n";
+ $T_HDR_rodata = "\t\.section\t.rodata\n\t\.align 2\n";
+ $T_HDR_closure = "\t\.data\n\t\.align 2\n";
+ $T_HDR_info = "\t\.text\n\t\.align 2\n";
+ $T_HDR_entry = "\t\.text\n\t\.align 2\n";
+ $T_HDR_vector = "\t\.text\n\t\.align 2\n";
+
+ #--------------------------------------------------------#
+ } elsif ( $TargetPlatform =~ /^sparc-.*-(solaris2|openbsd)/ ) {
+
+ $T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
+ $T_US = ''; # _ if symbols have an underscore on the front
+ $T_PRE_APP = 'DOES NOT SEEM TO APPLY'; # regexp that says what comes before APP/NO_APP
+ $T_CONST_LBL = '^\.LLC(\d+):$'; # regexp for what such a lbl looks like
+ $T_POST_LBL = ':';
+
+ $T_MOVE_DIRVS = '^((\s+\.align\s+\d+|\s+\.proc\s+\d+|\s+\.global\s+\S+|\s+\.local\s+\S+|\.text|\.data|\.stab.*|\s*\.section.*|\s+\.type.*|\s+\.size.*)\n)';
+ $T_COPY_DIRVS = '\.(global|local|proc|stab)';
+
+ $T_DOT_WORD = '\.(long|word|byte|half|skip|uahalf|uaword)';
+ $T_DOT_GLOBAL = '^\t\.global';
+ $T_HDR_literal = "\.text\n\t\.align 8\n";
+ $T_HDR_misc = "\.text\n\t\.align 4\n";
+ $T_HDR_data = "\.data\n\t\.align 8\n";
+ $T_HDR_rodata = "\.text\n\t\.align 4\n";
+ $T_HDR_closure = "\.data\n\t\.align 4\n";
+ $T_HDR_info = "\.text\n\t\.align 4\n";
+ $T_HDR_entry = "\.text\n\t\.align 4\n";
+ $T_HDR_vector = "\.text\n\t\.align 4\n";
+
+ #--------------------------------------------------------#
+ } elsif ( $TargetPlatform =~ /^sparc-.*-sunos4/ ) {
+
+ $T_STABBY = 1; # 1 iff .stab things (usually if a.out format)
+ $T_US = '_'; # _ if symbols have an underscore on the front
+ $T_PRE_APP = '^# DOES NOT SEEM TO APPLY'; # regexp that says what comes before APP/NO_APP
+ $T_CONST_LBL = '^LC(\d+):$';
+ $T_POST_LBL = ':';
+
+ $T_MOVE_DIRVS = '^((\s+\.align\s+\d+|\s+\.proc\s+\d+|\s+\.global\s+\S+|\.text|\.data|\.stab.*)\n)';
+ $T_COPY_DIRVS = '\.(global|proc|stab)';
+
+ $T_DOT_WORD = '\.word';
+ $T_DOT_GLOBAL = '^\t\.global';
+ $T_HDR_literal = "\.text\n\t\.align 8\n";
+ $T_HDR_misc = "\.text\n\t\.align 4\n";
+ $T_HDR_data = "\.data\n\t\.align 8\n";
+ $T_HDR_rodata = "\.text\n\t\.align 4\n";
+ $T_HDR_closure = "\.data\n\t\.align 4\n";
+ $T_HDR_info = "\.text\n\t\.align 4\n";
+ $T_HDR_entry = "\.text\n\t\.align 4\n";
+ $T_HDR_vector = "\.text\n\t\.align 4\n";
+
+ #--------------------------------------------------------#
+ } elsif ( $TargetPlatform =~ /^sparc-.*-linux/ ) {
+ $T_STABBY = 0; # 1 iff .stab things (usually if a.out format)
+ $T_US = ''; # _ if symbols have an underscore on the front
+ $T_PRE_APP = '#'; # regexp that says what comes before APP/NO_APP
+ # Probably doesn't apply anyway
+ $T_CONST_LBL = '^\.LLC(\d+):$'; # regexp for what such a lbl looks like
+ $T_POST_LBL = ':';
+
+ $T_MOVE_DIRVS = '^((\s+\.align\s+\d+|\s+\.proc\s+\d+|\s+\.global\s+\S+|\.text|\.data|\.seg|\.stab.*|\s+?\.section.*|\s+\.type.*|\s+\.size.*)\n)';
+ $T_COPY_DIRVS = '\.(global|globl|proc|stab)';
+
+ $T_DOT_WORD = '\.(long|word|nword|xword|byte|half|short|skip|uahalf|uaword)';
+ $T_DOT_GLOBAL = '^\t\.global';
+ $T_HDR_literal = "\.text\n\t\.align 8\n";
+ $T_HDR_misc = "\.text\n\t\.align 4\n";
+ $T_HDR_data = "\.data\n\t\.align 8\n";
+ $T_HDR_rodata = "\.text\n\t\.align 4\n";
+ $T_HDR_closure = "\.data\n\t\.align 4\n";
+ $T_HDR_info = "\.text\n\t\.align 4\n";
+ $T_HDR_entry = "\.text\n\t\.align 4\n";
+ $T_HDR_vector = "\.text\n\t\.align 4\n";
+
+ #--------------------------------------------------------#
+ } else {
+ print STDERR "$Pgm: don't know how to mangle assembly language for: $TargetPlatform\n";
+ exit 1;
+ }
+
+ if($T_HDR_relrodata eq "") {
+ # default values:
+ # relrodata defaults to rodata.
+ $T_HDR_relrodata = $T_HDR_rodata;
+ }
+
+if ( 0 ) {
+print STDERR "T_STABBY: $T_STABBY\n";
+print STDERR "T_US: $T_US\n";
+print STDERR "T_PRE_APP: $T_PRE_APP\n";
+print STDERR "T_CONST_LBL: $T_CONST_LBL\n";
+print STDERR "T_POST_LBL: $T_POST_LBL\n";
+if ( $TargetPlatform =~ /^i386-/ ) {
+ print STDERR "T_X86_PRE_LLBL_PAT: $T_X86_PRE_LLBL_PAT\n";
+ print STDERR "T_X86_PRE_LLBL: $T_X86_PRE_LLBL\n";
+ print STDERR "T_X86_BADJMP: $T_X86_BADJMP\n";
+}
+print STDERR "T_MOVE_DIRVS: $T_MOVE_DIRVS\n";
+print STDERR "T_COPY_DIRVS: $T_COPY_DIRVS\n";
+print STDERR "T_DOT_WORD: $T_DOT_WORD\n";
+print STDERR "T_HDR_literal: $T_HDR_literal\n";
+print STDERR "T_HDR_misc: $T_HDR_misc\n";
+print STDERR "T_HDR_data: $T_HDR_data\n";
+print STDERR "T_HDR_rodata: $T_HDR_rodata\n";
+print STDERR "T_HDR_closure: $T_HDR_closure\n";
+print STDERR "T_HDR_info: $T_HDR_info\n";
+print STDERR "T_HDR_entry: $T_HDR_entry\n";
+print STDERR "T_HDR_vector: $T_HDR_vector\n";
+}
+
+}
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{Mangle away}
+%* *
+%************************************************************************
+
+\begin{code}
+sub mangle_asm {
+ local($in_asmf, $out_asmf) = @_;
+
+ # multi-line regexp matching:
+ local($*) = 1;
+ local($i, $c);
+
+
+ &init_TARGET_STUFF();
+ &init_FUNNY_THINGS();
+
+ open(INASM, "< $in_asmf")
+ || &tidy_up_and_die(1,"$Pgm: failed to open `$in_asmf' (to read)\n");
+ open(OUTASM,"> $out_asmf")
+ || &tidy_up_and_die(1,"$Pgm: failed to open `$out_asmf' (to write)\n");
+
+ # read whole file, divide into "chunks":
+ # record some info about what we've found...
+
+ @chk = (); # contents of the chunk
+ $numchks = 0; # number of them
+ @chkcat = (); # what category of thing in each chunk
+ @chksymb = (); # what symbol(base) is defined in this chunk
+ %entrychk = (); # ditto, its entry code
+ %closurechk = (); # ditto, the (static) closure
+ %srtchk = (); # ditto, its SRT (for top-level things)
+ %infochk = (); # given a symbol base, say what chunk its info tbl is in
+ %vectorchk = (); # ditto, return vector table
+ $EXTERN_DECLS = ''; # .globl <foo> .text (MIPS only)
+
+ $i = 0; $chkcat[0] = 'misc'; $chk[0] = '';
+
+ while (<INASM>) {
+ tr/\r//d if $TargetPlatform =~ /-mingw32$/; # In case Perl doesn't convert line endings
+ next if $T_STABBY && /^\.stab.*${T_US}__stg_split_marker/o;
+ next if $T_STABBY && /^\.stab.*ghc.*c_ID/;
+ next if /^\t\.def.*endef$/;
+ next if /${T_PRE_APP}(NO_)?APP/o;
+ next if /^;/ && $TargetPlatform =~ /^hppa/;
+
+ next if /(^$|^\t\.file\t|^ # )/ && $TargetPlatform =~ /^(mips|ia64)-/;
+
+ if ( $TargetPlatform =~ /^mips-/
+ && /^\t\.(globl\S+\.text|comm\t)/ ) {
+ $EXTERN_DECLS .= $_ unless /(__DISCARD__|\b(PK_|ASSIGN_)(FLT|DBL)\b)/;
+ # Treat .comm variables as data. These show up in two (known) places:
+ #
+ # - the module_registered variable used in the __stginit fragment.
+ # even though these are declared static and initialised, gcc 3.3
+ # likes to make them .comm, presumably to save space in the
+ # object file.
+ #
+ # - global variables used to pass arguments from C to STG in
+ # a foreign export. (is this still true? --SDM)
+ #
+ } elsif ( /^\t\.comm.*$/ ) {
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'data';
+ $chksymb[$i] = '';
+
+ # Labels ending "_str": these are literal strings.
+ } elsif ( /^${T_US}([A-Za-z0-9_]+)_str${T_POST_LBL}$/ ) {
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'rodata';
+ $chksymb[$i] = '';
+ } elsif ( $TargetPlatform =~ /-darwin/
+ && (/^\s*\.subsections_via_symbols/
+ ||/^\s*\.no_dead_strip.*/)) {
+ # Don't allow Apple's linker to do any dead-stripping of symbols
+ # in this file, because it will mess up info-tables in mangled
+ # code.
+ # The .no_dead_strip directives are actually put there by
+ # the gcc3 "used" attribute on entry points.
+
+ } elsif ( $TargetPlatform =~ /^.*-apple-darwin.*/ && (
+ /^\s*\.picsymbol_stub/
+ || /^\s*\.section __TEXT,__picsymbol_stub\d,.*/
+ || /^\s*\.section __TEXT,__picsymbolstub\d,.*/
+ || /^\s*\.symbol_stub/
+ || /^\s*\.section __TEXT,__symbol_stub\d,.*/
+ || /^\s*\.section __TEXT,__symbolstub\d,.*/
+ || /^\s*\.lazy_symbol_pointer/
+ || /^\s*\.non_lazy_symbol_pointer/
+ || /^\s*\.section __IMPORT.*/))
+ {
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'dyld';
+ $chksymb[$i] = '';
+ $dyld_section = $_;
+
+ } elsif ( $TargetPlatform =~ /^.*-apple-darwin.*/ && $chkcat[$i] eq 'dyld' && /^\s*\.data/)
+ { # non_lazy_symbol_ptrs that point to local symbols
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'dyld';
+ $chksymb[$i] = '';
+ $dyld_section = $_;
+ } elsif ( $TargetPlatform =~ /^.*-apple-darwin.*/ && $chkcat[$i] eq 'dyld' && /^\s*\.align/)
+ { # non_lazy_symbol_ptrs that point to local symbols
+ $dyld_section .= $_;
+ } elsif ( $TargetPlatform =~ /^.*-apple-darwin.*/ && $chkcat[$i] eq 'dyld' && /^L_.*:$/)
+ { # non_lazy_symbol_ptrs that point to local symbols
+ $chk[++$i] = $dyld_section . $_;
+ $chkcat[$i] = 'dyld';
+ $chksymb[$i] = '';
+
+ } elsif ( /^\s+/ ) { # most common case first -- a simple line!
+ # duplicated from the bottom
+
+ $chk[$i] .= $_;
+
+ } elsif ( /\.\.ng:$/ && $TargetPlatform =~ /^alpha-/ ) {
+ # Alphas: Local labels not to be confused with new chunks
+ $chk[$i] .= $_;
+ # NB: all the rest start with a non-space
+
+ } elsif ( $TargetPlatform =~ /^mips-/
+ && /^\d+:/ ) { # a funny-looking very-local label
+ $chk[$i] .= $_;
+
+ } elsif ( /$T_CONST_LBL/o ) {
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'literal';
+ $chksymb[$i] = $1;
+
+ } elsif ( /^${T_US}__stg_split_marker(\d*)${T_POST_LBL}$/o ) {
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'splitmarker';
+ $chksymb[$i] = $1;
+
+ } elsif ( /^${T_US}([A-Za-z0-9_]+)_info${T_POST_LBL}$/o ) {
+ $symb = $1;
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'infotbl';
+ $chksymb[$i] = $symb;
+
+ die "Info table already? $symb; $i\n" if defined($infochk{$symb});
+
+ $infochk{$symb} = $i;
+
+ } elsif ( /^${T_US}([A-Za-z0-9_]+)_(entry|ret)${T_POST_LBL}$/o ) {
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'entry';
+ $chksymb[$i] = $1;
+
+ $entrychk{$1} = $i;
+
+ } elsif ( /^${T_US}([A-Za-z0-9_]+)_closure${T_POST_LBL}$/o ) {
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'closure';
+ $chksymb[$i] = $1;
+
+ $closurechk{$1} = $i;
+
+ } elsif ( /^${T_US}([A-Za-z0-9_]+)_srt${T_POST_LBL}$/o ) {
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'srt';
+ $chksymb[$i] = $1;
+
+ $srtchk{$1} = $i;
+
+ } elsif ( /^${T_US}([A-Za-z0-9_]+)_ct${T_POST_LBL}$/o ) {
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'data';
+ $chksymb[$i] = '';
+
+ } elsif ( /^${T_US}(stg_ap_stack_entries|stg_stack_save_entries|stg_arg_bitmaps)${T_POST_LBL}$/o ) {
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'data';
+ $chksymb[$i] = '';
+
+ } elsif ( /^(${T_US}__gnu_compiled_c|gcc2_compiled\.)${T_POST_LBL}/o ) {
+ ; # toss it
+
+ } elsif ( /^${T_US}[A-Za-z0-9_]+\.\d+${T_POST_LBL}$/o
+ || /^${T_US}.*_CAT${T_POST_LBL}$/o # PROF: _entryname_CAT
+ || /^${T_US}.*_done${T_POST_LBL}$/o # PROF: _module_done
+ || /^${T_US}_module_registered${T_POST_LBL}$/o # PROF: _module_registered
+ ) {
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'data';
+ $chksymb[$i] = '';
+
+ } elsif ( /^([A-Za-z0-9_]+)\s+\.comm/ && $TargetPlatform =~ /^hppa/ ) {
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'bss';
+ $chksymb[$i] = '';
+
+ } elsif ( /^${T_US}([A-Za-z0-9_]+)_cc(s)?${T_POST_LBL}$/o ) {
+ # all CC_ symbols go in the data section...
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'data';
+ $chksymb[$i] = '';
+
+ } elsif ( /^${T_US}([A-Za-z0-9_]+)_(alt|dflt)${T_POST_LBL}$/o ) {
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'misc';
+ $chksymb[$i] = '';
+ } elsif ( /^${T_US}([A-Za-z0-9_]+)_vtbl${T_POST_LBL}$/o ) {
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'vector';
+ $chksymb[$i] = $1;
+
+ $vectorchk{$1} = $i;
+
+ } elsif ( $TargetPlatform =~ /^i386-.*-solaris2/
+ && /^[A-Za-z0-9][A-Za-z0-9_]*:/ ) {
+ # Some Solaris system headers contain function definitions (as
+ # opposed to mere prototypes), which end up in the .hc file when
+ # a Haskell module foreign imports the corresponding system
+ # functions (most notably stat()). We put them into the text
+ # segment. Note that this currently does not extend to function
+ # names starting with an underscore.
+ # - chak 7/2001
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'misc';
+ $chksymb[$i] = $1;
+
+ } elsif ( $TargetPlatform =~ /^i386-apple-darwin/ && /^(___i686\.get_pc_thunk\.[abcd]x):/o) {
+ # To handle PIC on Darwin/x86, we need to appropriately pass through
+ # the get_pc_thunk functions. The need to be put into a special section
+ # marked as coalesced (otherwise the .weak_definition doesn't work
+ # on Darwin).
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'get_pc_thunk';
+ $chksymb[$i] = $1;
+
+ } elsif ( /^${T_US}[A-Za-z0-9_]/o
+ && ( $TargetPlatform !~ /^hppa/ # need to avoid local labels in this case
+ || ! /^L\$\d+$/ )
+ && ( $TargetPlatform !~ /^powerpc64/ # we need to avoid local labels in this case
+ || ! /^\.L\d+:$/ ) ) {
+ local($thing);
+ chop($thing = $_);
+ $thing =~ s/:$//;
+ $chk[++$i] = $_;
+ $chksymb[$i] = '';
+ if (
+ /^${T_US}stg_.*${T_POST_LBL}$/o # RTS internals
+ || /^${T_US}__stg_.*${T_POST_LBL}$/o # more RTS internals
+ || /^${T_US}__fexp_.*${T_POST_LBL}$/o # foreign export
+ || /^${T_US}.*_slow${T_POST_LBL}$/o # slow entry
+ || /^${T_US}__stginit.*${T_POST_LBL}$/o # __stginit<module>
+ || /^${T_US}.*_btm${T_POST_LBL}$/o # large bitmaps
+ || /^${T_US}.*_fast${T_POST_LBL}$/o # primops
+ || /^_uname:/o # x86/Solaris2
+ )
+ {
+ $chkcat[$i] = 'misc';
+ } elsif (
+ /^${T_US}.*_srtd${T_POST_LBL}$/o # large bitmaps
+ || /^${T_US}.*_closure_tbl${T_POST_LBL}$/o # closure tables
+ )
+ {
+ $chkcat[$i] = 'relrodata';
+ } else
+ {
+ print STDERR "Warning: retaining unknown function \`$thing' in output from C compiler\n";
+ $chkcat[$i] = 'unknown';
+ }
+
+ } elsif ( $TargetPlatform =~ /^powerpc-.*-linux/ && /^\.LCTOC1 = /o ) {
+ # PowerPC Linux's large-model PIC (-fPIC) generates a gobal offset
+ # table "by hand". Be sure to copy it over.
+ # Note that this label and all entries in the table should actually
+ # go into the .got2 section, but it isn't easy to distinguish them
+ # from other constant literals (.LC\d+), so we just put everything
+ # in .rodata.
+ $chk[++$i] = $_;
+ $chkcat[$i] = 'literal';
+ $chksymb[$i] = 'LCTOC1';
+ } else { # simple line (duplicated at the top)
+
+ $chk[$i] .= $_;
+ }
+ }
+ $numchks = $#chk + 1;
+
+ # open CHUNKS, ">/tmp/chunks1" or die "Cannot open /tmp/chunks1: $!\n";
+ # for (my $i = 0; $i < @chk; ++$i) { print CHUNKS "======= $i =======\n", $chk[$i] }
+ # close CHUNKS;
+
+ # the division into chunks is imperfect;
+ # we throw some things over the fence into the next
+ # chunk.
+ #
+ # also, there are things we would like to know
+ # about the whole module before we start spitting
+ # output.
+
+ local($FIRST_MANGLABLE) = ($TargetPlatform =~ /^(alpha-|hppa|mips-)/) ? 1 : 0;
+ local($FIRST_TOSSABLE ) = ($TargetPlatform =~ /^(hppa|mips-)/) ? 1 : 0;
+
+# print STDERR "first chunk to mangle: $FIRST_MANGLABLE\n";
+
+ # Alphas: NB: we start meddling at chunk 1, not chunk 0
+ # The first ".rdata" is quite magical; as of GCC 2.7.x, it
+ # spits a ".quad 0" in after the very first ".rdata"; we
+ # detect this special case (tossing the ".quad 0")!
+ local($magic_rdata_seen) = 0;
+
+ # HPPAs, MIPSen: also start medding at chunk 1
+
+ for ($i = $FIRST_TOSSABLE; $i < $numchks; $i++) {
+ $c = $chk[$i]; # convenience copy
+
+# print STDERR "\nCHK $i (BEFORE) (",$chkcat[$i],"):\n", $c;
+
+ # toss all prologue stuff; HPPA is pretty weird
+ # (see elsewhere)
+ $c = &hppa_mash_prologue($c) if $TargetPlatform =~ /^hppa-/;
+
+ # be slightly paranoid to make sure there's
+ # nothing surprising in there
+ if ( $c =~ /--- BEGIN ---/ ) {
+ if (($p, $r) = split(/--- BEGIN ---/, $c)) {
+
+ # remove junk whitespace around the split point
+ $p =~ s/\t+$//;
+ $r =~ s/^\s*\n//;
+
+ if ($TargetPlatform =~ /^i386-/) {
+ if ($p =~ /^\tsubl\s+\$(\d+),\s*\%esp\n/) {
+ if ($1 >= 8192) {
+ die "Error: reserved stack space exceeded!\n Possible workarounds: compile with -fasm, or try another version of gcc.\n"
+ }
+ }
+
+ # gcc 3.4.3 puts this kind of stuff in the prologue, eg.
+ # when compiling PrimOps.cmm with -optc-O2:
+ # xorl %ecx, %ecx
+ # xorl %edx, %edx
+ # movl %ecx, 16(%esp)
+ # movl %edx, 20(%esp)
+ # but then the code of the function doesn't assume
+ # anything about the contnets of these stack locations.
+ # I think it's to do with the use of inline functions for
+ # PK_Word64() and friends, where gcc is initialising the
+ # contents of the struct to zero, and failing to optimise
+ # away the initialisation. Let's live dangerously and
+ # discard these initalisations.
+
+ $p =~ s/^\tpushl\s+\%e(di|si|bx)\n//g;
+ $p =~ s/^\txorl\s+\%e(ax|cx|dx),\s*\%e(ax|cx|dx)\n//g;
+ $p =~ s/^\tmovl\s+\%e(ax|cx|dx|si|di),\s*\d*\(\%esp\)\n//g;
+ $p =~ s/^\tmovl\s+\$\d+,\s*\d*\(\%esp\)\n//g;
+ $p =~ s/^\tsubl\s+\$\d+,\s*\%esp\n//;
+ $p =~ s/^\tmovl\s+\$\d+,\s*\%eax\n\tcall\s+__alloca\n// if ($TargetPlatform =~ /^.*-(cygwin32|mingw32)/);
+
+ if ($TargetPlatform =~ /^i386-apple-darwin/) {
+ $pcrel_label = $p;
+ $pcrel_label =~ s/(.|\n)*^(\"?L\d+\$pb\"?):\n(.|\n)*/$2/ or $pcrel_label = "";
+ $pcrel_reg = $p;
+ $pcrel_reg =~ s/(.|\n)*.*___i686\.get_pc_thunk\.([abcd]x)\n(.|\n)*/$2/ or $pcrel_reg = "";
+ $p =~ s/^\s+call\s+___i686\.get_pc_thunk\..x//;
+ $p =~ s/^\"?L\d+\$pb\"?:\n//;
+
+ if ($pcrel_reg eq "bx") {
+ # Bad gcc. Goes and uses %ebx, our BaseReg, for PIC. Bad gcc.
+ die "Darwin/x86: -fPIC -via-C doesn't work yet, use -fasm. Aborting."
+ }
+ }
+
+ } elsif ($TargetPlatform =~ /^x86_64-/) {
+ $p =~ s/^\tpushq\s+\%r(bx|bp|12|13|14)\n//g;
+ $p =~ s/^\tmovq\s+\%r(bx|bp|12|13|14),\s*\d*\(\%rsp\)\n//g;
+ $p =~ s/^\tsubq\s+\$\d+,\s*\%rsp\n//;
+
+ } elsif ($TargetPlatform =~ /^ia64-/) {
+ $p =~ s/^\t\.prologue .*\n//;
+ $p =~ s/^\t\.save ar\.pfs, r\d+\n\talloc r\d+ = ar\.pfs, 0, 3[12], \d+, 0\n//;
+ $p =~ s/^\t\.fframe \d+\n\tadds r12 = -\d+, r12\n//;
+ $p =~ s/^\t\.save rp, r\d+\n\tmov r\d+ = b0\n//;
+ $p =~ s/^\t\.(mii|mmi)\n//g; # bundling is no longer sensible
+ $p =~ s/^\t;;\n//g; # discard stops
+ $p =~ s/^\t\/\/.*\n//g; # gcc inserts timings in // comments
+
+ # GCC 3.3 saves r1 in the prologue, move this to the body
+ if ($p =~ /^\tmov r\d+ = r1\n/) {
+ $p = $` . $';
+ $r = $& . $r;
+ }
+ } elsif ($TargetPlatform =~ /^m68k-/) {
+ $p =~ s/^\tlink a6,#-?\d.*\n//;
+ $p =~ s/^\tpea a6@\n\tmovel sp,a6\n//;
+ # The above showed up in the asm code,
+ # so I added it here.
+ # I hope it's correct.
+ # CaS
+ $p =~ s/^\tmovel d2,sp\@-\n//;
+ $p =~ s/^\tmovel d5,sp\@-\n//; # SMmark.* only?
+ $p =~ s/^\tmoveml \#0x[0-9a-f]+,sp\@-\n//; # SMmark.* only?
+ } elsif ($TargetPlatform =~ /^mips-/) {
+ # the .frame/.mask/.fmask that we use is the same
+ # as that produced by GCC for miniInterpret; this
+ # gives GDB some chance of figuring out what happened
+ $FRAME = "\t.frame\t\$sp,2168,\$31\n\t.mask\t0x90000000,-4\n\t.fmask\t0x00000000,0\n";
+ $p =~ s/^\t\.(frame).*\n/__FRAME__/g;
+ $p =~ s/^\t\.(mask|fmask).*\n//g;
+ $p =~ s/^\t\.cprestore.*\n/\t\.cprestore 416\n/; # 16 + 100 4-byte args
+ $p =~ s/^\tsubu\t\$sp,\$sp,\d+\n//;
+ $p =~ s/^\tsw\t\$31,\d+\(\$sp\)\n//;
+ $p =~ s/^\tsw\t\$fp,\d+\(\$sp\)\n//;
+ $p =~ s/^\tsw\t\$28,\d+\(\$sp\)\n//;
+ $p =~ s/__FRAME__/$FRAME/;
+ } elsif ($TargetPlatform =~ /^powerpc-apple-darwin.*/) {
+ $pcrel_label = $p;
+ $pcrel_label =~ s/(.|\n)*^(\"?L\d+\$pb\"?):\n(.|\n)*/$2/ or $pcrel_label = "";
+
+ $p =~ s/^\tmflr r0\n//;
+ $p =~ s/^\tbl saveFP # f\d+\n//;
+ $p =~ s/^\tbl saveFP ; save f\d+-f\d+\n//;
+ $p =~ s/^\"?L\d+\$pb\"?:\n//;
+ $p =~ s/^\tstmw r\d+,-\d+\(r1\)\n//;
+ $p =~ s/^\tstfd f\d+,-\d+\(r1\)\n//g;
+ $p =~ s/^\tstw r0,\d+\(r1\)\n//g;
+ $p =~ s/^\tstwu r1,-\d+\(r1\)\n//;
+ $p =~ s/^\tstw r\d+,-\d+\(r1\)\n//g;
+ $p =~ s/^\tbcl 20,31,L\d+\$pb\n//;
+ $p =~ s/^L\d+\$pb:\n//;
+ $p =~ s/^\tmflr r31\n//;
+
+ # This is bad: GCC 3 seems to zero-fill some local variables in the prologue
+ # under some circumstances, only when generating position dependent code.
+ # I have no idea why, and I don't think it is necessary, so let's toss it.
+ $p =~ s/^\tli r\d+,0\n//g;
+ $p =~ s/^\tstw r\d+,\d+\(r1\)\n//g;
+ } elsif ($TargetPlatform =~ /^powerpc-.*-linux/) {
+ $p =~ s/^\tmflr 0\n//;
+ $p =~ s/^\tstmw \d+,\d+\(1\)\n//;
+ $p =~ s/^\tstfd \d+,\d+\(1\)\n//g;
+ $p =~ s/^\tstw r0,8\(1\)\n//;
+ $p =~ s/^\tstwu 1,-\d+\(1\)\n//;
+ $p =~ s/^\tstw \d+,\d+\(1\)\n//g;
+
+ # GCC's "large-model" PIC (-fPIC)
+ $pcrel_label = $p;
+ $pcrel_label =~ s/(.|\n)*^.LCF(\d+):\n(.|\n)*/$2/ or $pcrel_label = "";
+
+ $p =~ s/^\tbcl 20,31,.LCF\d+\n//;
+ $p =~ s/^.LCF\d+:\n//;
+ $p =~ s/^\tmflr 30\n//;
+ $p =~ s/^\tlwz 0,\.LCL\d+-\.LCF\d+\(30\)\n//;
+ $p =~ s/^\tadd 30,0,30\n//;
+
+ # This is bad: GCC 3 seems to zero-fill some local variables in the prologue
+ # under some circumstances, only when generating position dependent code.
+ # I have no idea why, and I don't think it is necessary, so let's toss it.
+ $p =~ s/^\tli \d+,0\n//g;
+ $p =~ s/^\tstw \d+,\d+\(1\)\n//g;
+ } elsif ($TargetPlatform =~ /^powerpc64-.*-linux/) {
+ $p =~ s/^\tmr 31,1\n//;
+ $p =~ s/^\tmflr 0\n//;
+ $p =~ s/^\tstmw \d+,\d+\(1\)\n//;
+ $p =~ s/^\tstfd \d+,-?\d+\(1\)\n//g;
+ $p =~ s/^\tstd r0,8\(1\)\n//;
+ $p =~ s/^\tstdu 1,-\d+\(1\)\n//;
+ $p =~ s/^\tstd \d+,-?\d+\(1\)\n//g;
+
+ # This is bad: GCC 3 seems to zero-fill some local variables in the prologue
+ # under some circumstances, only when generating position dependent code.
+ # I have no idea why, and I don't think it is necessary, so let's toss it.
+ $p =~ s/^\tli \d+,0\n//g;
+ $p =~ s/^\tstd \d+,\d+\(1\)\n//g;
+ } else {
+ print STDERR "$Pgm: unknown prologue mangling? $TargetPlatform\n";
+ }
+
+ # HWL HACK: dont die, just print a warning
+ #print stderr "HWL: this should die! Prologue junk?: $p\n" if $p =~ /^\t[^\.]/;
+ die "Prologue junk?: $p\n" if $p =~ /^\s+[^\s\.]/;
+
+ # For PIC, we want to keep part of the prologue
+ if ($TargetPlatform =~ /^powerpc-apple-darwin.*/ && $pcrel_label ne "") {
+ # Darwin: load the current instruction pointer into register r31
+ $p .= "bcl 20,31,$pcrel_label\n";
+ $p .= "$pcrel_label:\n";
+ $p .= "\tmflr r31\n";
+ } elsif ($TargetPlatform =~ /^powerpc-.*-linux/ && $pcrel_label ne "") {
+ # Linux: load the GOT pointer into register 30
+ $p .= "\tbcl 20,31,.LCF$pcrel_label\n";
+ $p .= ".LCF$pcrel_label:\n";
+ $p .= "\tmflr 30\n";
+ $p .= "\tlwz 0,.LCL$pcrel_label-.LCF$pcrel_label(30)\n";
+ $p .= "\tadd 30,0,30\n";
+ } elsif ($TargetPlatform =~ /^i386-apple-darwin.*/ && $pcrel_label ne "") {
+ $p .= "\tcall ___i686.get_pc_thunk.$pcrel_reg\n";
+ $p .= "$pcrel_label:\n";
+ }
+
+ # glue together what's left
+ $c = $p . $r;
+ }
+ }
+
+ if ( $TargetPlatform =~ /^mips-/ ) {
+ # MIPS: first, this basic sequence may occur "--- END ---" or not
+ $c =~ s/^\tlw\t\$31,\d+\(\$sp\)\n\taddu\t\$sp,\$sp,\d+\n\tj\t\$31\n\t\.end/\t\.end/;
+ }
+
+ # toss all epilogue stuff; again, paranoidly
+ if ( $c =~ /--- END ---/ ) {
+ if (($r, $e) = split(/--- END ---/, $c)) {
+ if ($TargetPlatform =~ /^i386-/) {
+ $e =~ s/^\tret\n//;
+ $e =~ s/^\tpopl\s+\%edi\n//;
+ $e =~ s/^\tpopl\s+\%esi\n//;
+ $e =~ s/^\tpopl\s+\%edx\n//;
+ $e =~ s/^\tpopl\s+\%ecx\n//;
+ $e =~ s/^\taddl\s+\$\d+,\s*\%esp\n//;
+ $e =~ s/^\tsubl\s+\$-\d+,\s*\%esp\n//;
+ } elsif ($TargetPlatform =~ /^ia64-/) {
+ $e =~ s/^\tmov ar\.pfs = r\d+\n//;
+ $e =~ s/^\tmov b0 = r\d+\n//;
+ $e =~ s/^\t\.restore sp\n\tadds r12 = \d+, r12\n//;
+ $e =~ s/^\tbr\.ret\.sptk\.many b0\n//;
+ $e =~ s/^\t\.(mii|mmi|mib)\n//g; # bundling is no longer sensible
+ $e =~ s/^\t;;\n//g; # discard stops - stop at end of body is sufficient
+ $e =~ s/^\t\/\/.*\n//g; # gcc inserts timings in // comments
+ } elsif ($TargetPlatform =~ /^m68k-/) {
+ $e =~ s/^\tunlk a6\n//;
+ $e =~ s/^\trts\n//;
+ } elsif ($TargetPlatform =~ /^mips-/) {
+ $e =~ s/^\tlw\t\$31,\d+\(\$sp\)\n//;
+ $e =~ s/^\tlw\t\$fp,\d+\(\$sp\)\n//;
+ $e =~ s/^\taddu\t\$sp,\$sp,\d+\n//;
+ $e =~ s/^\tj\t\$31\n//;
+ } elsif ($TargetPlatform =~ /^powerpc-apple-darwin.*/) {
+ $e =~ s/^\taddi r1,r1,\d+\n//;
+ $e =~ s/^\tlwz r\d+,\d+\(r1\)\n//;
+ $e =~ s/^\tlmw r\d+,-\d+\(r1\)\n//;
+ $e =~ s/^\tmtlr r0\n//;
+ $e =~ s/^\tblr\n//;
+ $e =~ s/^\tb restFP ;.*\n//;
+ } elsif ($TargetPlatform =~ /^powerpc64-.*-linux/) {
+ $e =~ s/^\tmr 3,0\n//;
+ $e =~ s/^\taddi 1,1,\d+\n//;
+ $e =~ s/^\tld 0,16\(1\)\n//;
+ $e =~ s/^\tmtlr 0\n//;
+
+ # callee-save registers
+ $e =~ s/^\tld \d+,-?\d+\(1\)\n//g;
+ $e =~ s/^\tlfd \d+,-?\d+\(1\)\n//g;
+
+ # get rid of the debug junk along with the blr
+ $e =~ s/^\tblr\n\t.long .*\n\t.byte .*\n//;
+
+ # incase we missed it with the last one get the blr alone
+ $e =~ s/^\tblr\n//;
+ } else {
+ print STDERR "$Pgm: unknown epilogue mangling? $TargetPlatform\n";
+ }
+
+ print STDERR "WARNING: Epilogue junk?: $e\n" if $e =~ /^\t\s*[^\.\s\n]/;
+
+ # glue together what's left
+ $c = $r . $e;
+ $c =~ s/\n\t\n/\n/; # junk blank line
+ }
+ }
+
+ # On SPARCs, we don't do --- BEGIN/END ---, we just
+ # toss the register-windowing save/restore/ret* instructions
+ # directly unless they've been generated by function definitions in header
+ # files on Solaris:
+ if ( $TargetPlatform =~ /^sparc-/ ) {
+ if ( ! ( $TargetPlatform =~ /solaris2$/ && $chkcat[$i] eq 'unknown' )) {
+ $c =~ s/^\t(save.*|restore.*|ret|retl)\n//g;
+ }
+ # throw away PROLOGUE comments
+ $c =~ s/^\t!#PROLOGUE# 0\n\t!#PROLOGUE# 1\n//;
+ }
+
+ # On Alphas, the prologue mangling is done a little later (below)
+
+ # toss all calls to __DISCARD__
+ $c =~ s/^\t(call|jbsr|jal)\s+${T_US}__DISCARD__\n//go;
+ $c =~ s/^\tjsr\s+\$26\s*,\s*${T_US}__DISCARD__\n//go if $TargetPlatform =~ /^alpha-/;
+ $c =~ s/^\tbl\s+L___DISCARD__\$stub\n//go if $TargetPlatform =~ /^powerpc-apple-darwin.*/;
+ $c =~ s/^\tbl\s+__DISCARD__(\@plt)?\n//go if $TargetPlatform =~ /^powerpc-.*-linux/;
+ $c =~ s/^\tbl\s+\.__DISCARD__\n\s+nop\n//go if $TargetPlatform =~ /^powerpc64-.*-linux/;
+ $c =~ s/^\tcall\s+L___DISCARD__\$stub\n//go if $TargetPlatform =~ /i386-apple-darwin.*/;
+
+ # IA64: mangle tailcalls into jumps here
+ if ($TargetPlatform =~ /^ia64-/) {
+ while ($c =~ s/^\tbr\.call\.sptk\.many b0 = (.*)\n(?:^\.L([0-9]*):\n)?(?:\t;;\n)?(?:\tmov r1 = r\d+\n)?(?:\t;;\n)?\t--- TAILCALL ---\n(?:\t;;\n\tbr \.L\d+\n)?/\tbr\.few $1\n/) {
+ # Eek, the gcc optimiser is getting smarter... if we see a jump to the --- TAILCALL ---
+ # marker then we reapply the substitution at the source sites
+ $c =~ s/^\tbr \.L$2\n/\t--- TAILCALL ---\n/g if ($2);
+ }
+ }
+
+ # MIPS: that may leave some gratuitous asm macros around
+ # (no harm done; but we get rid of them to be tidier)
+ $c =~ s/^\t\.set\tnoreorder\n\t\.set\tnomacro\n\taddu\t(\S+)\n\t\.set\tmacro\n\t\.set\treorder\n/\taddu\t$1\n/
+ if $TargetPlatform =~ /^mips-/;
+
+ # toss stack adjustment after DoSparks
+ $c =~ s/^(\tjbsr _DoSparks\n)\taddqw #8,sp/$1/g
+ if $TargetPlatform =~ /^m68k-/; # this looks old...
+
+ if ( $TargetPlatform =~ /^alpha-/ &&
+ ! $magic_rdata_seen &&
+ $c =~ /^\s*\.rdata\n\t\.quad 0\n\t\.align \d\n/ ) {
+ $c =~ s/^\s*\.rdata\n\t\.quad 0\n\t\.align (\d)\n/\.rdata\n\t\.align $1\n/;
+ $magic_rdata_seen = 1;
+ }
+
+ # pick some end-things and move them to the next chunk
+
+ # pin a funny end-thing on (for easier matching):
+ $c .= 'FUNNY#END#THING';
+
+ while ( $c =~ /${T_MOVE_DIRVS}FUNNY#END#THING/o ) {
+
+ $to_move = $1;
+
+ # on x86 we try not to copy any directives into a literal
+ # chunk, rather we keep looking for the next real chunk. This
+ # is because we get things like
+ #
+ # .globl blah_closure
+ # .LC32
+ # .string "..."
+ # blah_closure:
+ # ...
+ #
+ if ( $TargetPlatform =~ /^(i386|sparc|powerpc)/ && $to_move =~ /${T_COPY_DIRVS}/ ) {
+ $j = $i + 1;
+ while ( $j < $numchks && $chk[$j] =~ /$T_CONST_LBL/) {
+ $j++;
+ }
+ if ( $j < $numchks ) {
+ $chk[$j] = $to_move . $chk[$j];
+ }
+ }
+
+ elsif ( $i < ($numchks - 1)
+ && ( $to_move =~ /${T_COPY_DIRVS}/
+ || ($TargetPlatform =~ /^hppa/ && $to_move =~ /align/ && $chkcat[$i+1] eq 'literal') )) {
+ $chk[$i + 1] = $to_move . $chk[$i + 1];
+ # otherwise they're tossed
+ }
+
+ $c =~ s/${T_MOVE_DIRVS}FUNNY#END#THING/FUNNY#END#THING/o;
+ }
+
+ if ( $TargetPlatform =~ /^alpha-/ && $c =~ /^\t\.ent\s+(\S+)/ ) {
+ $ent = $1;
+ # toss all prologue stuff, except for loading gp, and the ..ng address
+ unless ($c =~ /\.ent.*\n\$.*\.\.ng:/) {
+ if (($p, $r) = split(/^\t\.prologue/, $c)) {
+ if (($keep, $junk) = split(/\.\.ng:/, $p)) {
+ $keep =~ s/^\t\.frame.*\n/\t.frame \$30,0,\$26,0\n/;
+ $keep =~ s/^\t\.(mask|fmask).*\n//g;
+ $c = $keep . "..ng:\n";
+ } else {
+ print STDERR "malformed code block ($ent)?\n"
+ }
+ }
+ $c .= "\t.prologue" . $r;
+ }
+ }
+
+ $c =~ s/FUNNY#END#THING//;
+
+# print STDERR "\nCHK $i (AFTER) (",$chkcat[$i],"):\n", $c;
+
+ $chk[$i] = $c; # update w/ convenience copy
+ }
+
+ # open CHUNKS, ">/tmp/chunks2" or die "Cannot open /tmp/chunks2: $!\n";
+ # for (my $i = 0; $i < @chk; ++$i) { print CHUNKS "======= $i =======\n", $chk[$i] }
+ # close CHUNKS;
+
+ if ( $TargetPlatform =~ /^alpha-/ ) {
+ # print out the header stuff first
+ $chk[0] =~ s/^(\t\.file.*)"(ghc\d+\.c)"/$1"$ifile_root.hc"/;
+ print OUTASM $chk[0];
+
+ } elsif ( $TargetPlatform =~ /^hppa/ ) {
+ print OUTASM $chk[0];
+
+ } elsif ( $TargetPlatform =~ /^mips-/ ) {
+ $chk[0] = "\t\.file\t1 \"$ifile_root.hc\"\n" . $chk[0];
+
+ # get rid of horrible "<dollar>Revision: .*$" strings
+ local(@lines0) = split(/\n/, $chk[0]);
+ local($z) = 0;
+ while ( $z <= $#lines0 ) {
+ if ( $lines0[$z] =~ /^\t\.byte\t0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f$/ ) {
+ undef($lines0[$z]);
+ $z++;
+ while ( $z <= $#lines0 ) {
+ undef($lines0[$z]);
+ last if $lines0[$z] =~ /[,\t]0x0$/;
+ $z++;
+ }
+ }
+ $z++;
+ }
+ $chk[0] = join("\n", @lines0);
+ $chk[0] =~ s/\n\n+/\n/;
+ print OUTASM $chk[0];
+ }
+
+ # print out all the literal strings next
+ for ($i = 0; $i < $numchks; $i++) {
+ if ( $chkcat[$i] eq 'literal' ) {
+
+ # HACK: try to detect 16-byte constants and align them
+ # on a 16-byte boundary. x86_64 sometimes needs 128-bit
+ # aligned constants, and so does Darwin/x86.
+ if ( $TargetPlatform =~ /^x86_64/
+ || $TargetPlatform =~ /^i386-apple-darwin/ ) {
+ $z = $chk[$i];
+ if ($z =~ /(\.long.*\n.*\.long.*\n.*\.long.*\n.*\.long|\.quad.*\n.*\.quad)/) {
+ print OUTASM $T_HDR_literal16;
+ } else {
+ print OUTASM $T_HDR_literal;
+ }
+ } else {
+ print OUTASM $T_HDR_literal;
+ }
+
+ print OUTASM $chk[$i];
+ print OUTASM "; end literal\n" if $TargetPlatform =~ /^hppa/; # for the splitter
+
+ $chkcat[$i] = 'DONE ALREADY';
+ }
+ }
+
+ # on the HPPA, print out all the bss next
+ if ( $TargetPlatform =~ /^hppa/ ) {
+ for ($i = 1; $i < $numchks; $i++) {
+ if ( $chkcat[$i] eq 'bss' ) {
+ print OUTASM "\t.SPACE \$PRIVATE\$\n\t.SUBSPA \$BSS\$\n\t.align 4\n";
+ print OUTASM $chk[$i];
+
+ $chkcat[$i] = 'DONE ALREADY';
+ }
+ }
+ }
+
+ for ($i = $FIRST_MANGLABLE; $i < $numchks; $i++) {
+# print STDERR "$i: cat $chkcat[$i], symb $chksymb[$i]\n";
+
+ next if $chkcat[$i] eq 'DONE ALREADY';
+
+ if ( $chkcat[$i] eq 'misc' || $chkcat[$i] eq 'unknown' ) {
+ if ($chk[$i] ne '') {
+ print OUTASM $T_HDR_misc;
+ &print_doctored($chk[$i], 0);
+ }
+
+ } elsif ( $chkcat[$i] eq 'toss' ) {
+ print STDERR "*** NB: TOSSING code for $chksymb[$i] !!! ***\n";
+
+ } elsif ( $chkcat[$i] eq 'data' ) {
+ if ($chk[$i] ne '') {
+ print OUTASM $T_HDR_data;
+ print OUTASM $chk[$i];
+ }
+
+ } elsif ( $chkcat[$i] eq 'splitmarker' ) {
+ # we can just re-constitute this one...
+ # NB: we emit _three_ underscores no matter what,
+ # so ghc-split doesn't have to care.
+ print OUTASM "___stg_split_marker",$chksymb[$i],"${T_POST_LBL}\n";
+
+ } elsif ( $chkcat[$i] eq 'closure'
+ || $chkcat[$i] eq 'srt'
+ || $chkcat[$i] eq 'infotbl'
+ || $chkcat[$i] eq 'entry') { # do them in that order
+ $symb = $chksymb[$i];
+
+ # CLOSURE
+ if ( defined($closurechk{$symb}) ) {
+ print OUTASM $T_HDR_closure;
+ print OUTASM $chk[$closurechk{$symb}];
+ $chkcat[$closurechk{$symb}] = 'DONE ALREADY';
+ }
+
+ # SRT
+ if ( defined($srtchk{$symb}) ) {
+ print OUTASM $T_HDR_relrodata;
+ print OUTASM $chk[$srtchk{$symb}];
+ $chkcat[$srtchk{$symb}] = 'DONE ALREADY';
+ }
+
+ # INFO TABLE
+ if ( defined($infochk{$symb}) ) {
+
+ print OUTASM $T_HDR_info;
+ print OUTASM &rev_tbl($symb, $chk[$infochk{$symb}], 1);
+
+ # entry code will be put here!
+
+ $chkcat[$infochk{$symb}] = 'DONE ALREADY';
+ }
+
+ # ENTRY POINT
+ if ( defined($entrychk{$symb}) ) {
+
+ $c = $chk[$entrychk{$symb}];
+
+ # If this is an entry point with an info table,
+ # eliminate the entry symbol and all directives involving it.
+ if (defined($infochk{$symb}) && $TargetPlatform !~ /^ia64-/) {
+ @o = ();
+ foreach $l (split(/\n/,$c)) {
+ next if $l =~ /^.*$symb_(entry|ret)${T_POST_LBL}/;
+
+ # If we have .type/.size direrctives involving foo_entry,
+ # then make them refer to foo_info instead. The information
+ # in these directives is used by the cachegrind annotator,
+ # so it is worthwhile keeping.
+ if ($l =~ /^\s*\.(type|size).*$symb_(entry|ret)/) {
+ $l =~ s/$symb(_entry|_ret)/${symb}_info/g;
+ push(@o,$l);
+ next;
+ }
+ next if $l =~ /^\s*\..*$symb.*\n?/;
+ push(@o,$l);
+ }
+ $c = join("\n",@o) . "\n";
+ }
+
+ print OUTASM $T_HDR_entry;
+
+ &print_doctored($c, 1); # NB: the 1!!!
+
+ $chkcat[$entrychk{$symb}] = 'DONE ALREADY';
+ }
+
+ } elsif ( $chkcat[$i] eq 'vector' ) {
+ $symb = $chksymb[$i];
+
+ # VECTOR TABLE
+ if ( defined($vectorchk{$symb}) ) {
+ print OUTASM $T_HDR_vector;
+ print OUTASM &rev_tbl($symb, $chk[$vectorchk{$symb}], 0);
+
+ # direct return code will be put here!
+ $chkcat[$vectorchk{$symb}] = 'DONE ALREADY';
+
+ } elsif ( $TargetPlatform =~ /^alpha-/ ) {
+ # Alphas: the commented nop is for the splitter, to ensure
+ # that no module ends with a label as the very last
+ # thing. (The linker will adjust the label to point
+ # to the first code word of the next module linked in,
+ # even if alignment constraints cause the label to move!)
+
+ print OUTASM "\t# nop\n";
+ }
+
+ } elsif ( $chkcat[$i] eq 'rodata' ) {
+ print OUTASM $T_HDR_rodata;
+ print OUTASM $chk[$i];
+ $chkcat[$i] = 'DONE ALREADY';
+ } elsif ( $chkcat[$i] eq 'relrodata' ) {
+ print OUTASM $T_HDR_relrodata;
+ print OUTASM $chk[$i];
+ $chkcat[$i] = 'DONE ALREADY';
+ } elsif ( $chkcat[$i] eq 'toc' ) {
+ # silly optimisation to print tocs, since they come in groups...
+ print OUTASM $T_HDR_toc;
+ local($j) = $i;
+ while ($chkcat[$j] eq 'toc')
+ { if ( $chk[$j] !~ /\.tc UpdatePAP\[TC\]/ # not needed: always turned into a jump.
+ )
+ {
+ print OUTASM $chk[$j];
+ }
+ $chkcat[$j] = 'DONE ALREADY';
+ $j++;
+ }
+
+ } elsif ( $TargetPlatform =~ /^.*-apple-darwin.*/ && $chkcat[$i] eq 'dyld' ) {
+ # apple-darwin: dynamic linker stubs
+ if($chk[$i] !~ /\.indirect_symbol ___DISCARD__/)
+ { # print them out unchanged, but remove the stubs for __DISCARD__
+ print OUTASM $chk[$i];
+ }
+ } elsif ( $TargetPlatform =~ /^i386-apple-darwin.*/ && $chkcat[$i] eq 'get_pc_thunk' ) {
+ # i386-apple-darwin: __i686.get_pc_thunk.[abcd]x
+ print OUTASM ".section __TEXT,__textcoal_nt,coalesced,no_toc\n";
+ print OUTASM $chk[$i];
+ } else {
+ &tidy_up_and_die(1,"$Pgm: unknown chkcat (ghc-asm: $TargetPlatform)\n$chkcat[$i]\n$chk[$i]\n");
+ }
+ }
+
+ print OUTASM $EXTERN_DECLS if $TargetPlatform =~ /^mips-/;
+
+ # finished
+ close(OUTASM) || &tidy_up_and_die(1,"Failed writing to $out_asmf\n");
+ close(INASM) || &tidy_up_and_die(1,"Failed reading from $in_asmf\n");
+}
+\end{code}
+
+\begin{code}
+sub hppa_mash_prologue { # OK, epilogue, too
+ local($_) = @_;
+
+ # toss all prologue stuff
+ s/^\s+\.ENTRY[^\0]*--- BEGIN ---/\t.ENTRY/;
+
+ # Lie about our .CALLINFO
+ s/^\s+\.CALLINFO.*$/\t.CALLINFO NO_CALLS,NO_UNWIND/;
+
+ # Get rid of P'
+
+ s/LP'/L'/g;
+ s/RP'/R'/g;
+
+ # toss all epilogue stuff
+ s/^\s+--- END ---[^\0]*\.EXIT/\t.EXIT/;
+
+ # Sorry; we moved the _info stuff to the code segment.
+ s/_info,DATA/_info,CODE/g;
+
+ return($_);
+}
+\end{code}
+
+\begin{code}
+sub print_doctored {
+ local($_, $need_fallthru_patch) = @_;
+
+ if ( $TargetPlatform =~ /^x86_64-/ ) {
+ # Catch things like
+ #
+ # movq -4(%ebp), %rax
+ # jmp *%rax
+ #
+ # and optimise:
+ #
+ s/^\tmovq\s+(-?\d*\(\%r(bx|bp|13)\)),\s*(\%r(ax|cx|dx|10|11))\n\tjmp\s+\*\3/\tjmp\t\*$1/g;
+ s/^\tmovl\s+\$${T_US}(.*),\s*(\%e(ax|cx|si|di))\n\tjmp\s+\*\%r\3/\tjmp\t$T_US$1/g;
+ }
+
+ if ( $TargetPlatform !~ /^i386-/
+ || ! /^\t[a-z]/ # no instructions in here, apparently
+ || /^${T_US}__stginit_[A-Za-z0-9_]+${T_POST_LBL}/) {
+ print OUTASM $_;
+ return;
+ }
+
+ # OK, must do some x86 **HACKING**
+
+ local($entry_patch) = '';
+ local($exit_patch) = '';
+
+ # gotta watch out for weird instructions that
+ # invisibly smash various regs:
+ # rep* %ecx used for counting
+ # scas* %edi used for destination index
+ # cmps* %e[sd]i used for indices
+ # loop* %ecx used for counting
+ #
+ # SIGH.
+
+ # We cater for:
+ # * use of STG reg [ nn(%ebx) ] where no machine reg avail
+ #
+ # * GCC used an "STG reg" for its own purposes
+ #
+ # * some secret uses of machine reg, requiring STG reg
+ # to be saved/restored
+
+ # The most dangerous "GCC uses" of an "STG reg" are when
+ # the reg holds the target of a jmp -- it's tricky to
+ # insert the patch-up code before we get to the target!
+ # So here we change the jmps:
+
+ # --------------------------------------------------------
+ # it can happen that we have jumps of the form...
+ # jmp *<something involving %esp>
+ # or
+ # jmp <something involving another naughty register...>
+ #
+ # a reasonably-common case is:
+ #
+ # movl $_blah,<bad-reg>
+ # jmp *<bad-reg>
+ #
+ s/^\tmovl\s+\$${T_US}(.*),\s*(\%e[acd]x)\n\tjmp\s+\*\2/\tjmp $T_US$1/g;
+
+ # Catch things like
+ #
+ # movl -4(%ebx), %eax
+ # jmp *%eax
+ #
+ # and optimise:
+ #
+ s/^\tmovl\s+(-?\d*\(\%e(bx|si)\)),\s*(\%e[acd]x)\n\tjmp\s+\*\3/\tjmp\t\*$1/g;
+
+ if ($StolenX86Regs <= 2 ) { # YURGH! spurious uses of esi?
+ s/^\tmovl\s+(.*),\s*\%esi\n\tjmp\s+\*%esi\n/\tmovl $1,\%eax\n\tjmp \*\%eax\n/g;
+ s/^\tjmp\s+\*(.*\(.*\%esi.*\))\n/\tmovl $1,\%eax\n\tjmp \*\%eax\n/g;
+ s/^\tjmp\s+\*\%esi\n/\tmovl \%esi,\%eax\n\tjmp \*\%eax\n/g;
+ die "$Pgm: (mangler) still have jump involving \%esi!\n$_"
+ if /(jmp|call)\s+.*\%esi/;
+ }
+ if ($StolenX86Regs <= 3 ) { # spurious uses of edi?
+ s/^\tmovl\s+(.*),\s*\%edi\n\tjmp\s+\*%edi\n/\tmovl $1,\%eax\n\tjmp \*\%eax\n/g;
+ s/^\tjmp\s+\*(.*\(.*\%edi.*\))\n/\tmovl $1,\%eax\n\tjmp \*\%eax\n/g;
+ s/^\tjmp\s+\*\%edi\n/\tmovl \%edi,\%eax\n\tjmp \*\%eax\n/g;
+ die "$Pgm: (mangler) still have jump involving \%edi!\n$_"
+ if /(jmp|call)\s+.*\%edi/;
+ }
+
+ # OK, now we can decide what our patch-up code is going to
+ # be:
+
+ # Offsets into register table - you'd better update these magic
+ # numbers should you change its contents!
+ # local($OFFSET_R1)=0; No offset for R1 in new RTS.
+ local($OFFSET_Hp)=88;
+
+ # Note funky ".=" stuff; we're *adding* to these _patch guys
+ if ( $StolenX86Regs <= 2
+ && ( /[^0-9]\(\%ebx\)/ || /\%esi/ || /^\tcmps/ ) ) { # R1 (esi)
+ $entry_patch .= "\tmovl \%esi,(\%ebx)\n";
+ $exit_patch .= "\tmovl (\%ebx),\%esi\n";
+
+ # nothing for call_{entry,exit} because %esi is callee-save
+ }
+ if ( $StolenX86Regs <= 3
+ && ( /${OFFSET_Hp}\(\%ebx\)/ || /\%edi/ || /^\t(scas|cmps)/ ) ) { # Hp (edi)
+ $entry_patch .= "\tmovl \%edi,${OFFSET_Hp}(\%ebx)\n";
+ $exit_patch .= "\tmovl ${OFFSET_Hp}(\%ebx),\%edi\n";
+
+ # nothing for call_{entry,exit} because %edi is callee-save
+ }
+
+ # --------------------------------------------------------
+ # next, here we go with non-%esp patching!
+ #
+ s/^(\t[a-z])/$entry_patch$1/; # before first instruction
+
+# Before calling GC we must set up the exit condition before the call
+# and entry condition when we come back
+
+ # fix _all_ non-local jumps:
+
+ if ( $TargetPlatform =~ /^.*-apple-darwin.*/ ) {
+ # On Darwin, we've got local-looking jumps that are
+ # actually global (i.e. jumps to Lfoo$stub or via
+ # Lfoo$non_lazy_ptr), so we fix those first.
+ # In fact, we just fix everything that contains a dollar
+ # because false positives don't hurt here.
+
+ s/^(\tjmp\s+\*?L.*\$.*\n)/$exit_patch$1/g;
+ }
+
+ s/^\tjmp\s+\*${T_X86_PRE_LLBL_PAT}/\tJMP___SL/go;
+ s/^\tjmp\s+${T_X86_PRE_LLBL_PAT}/\tJMP___L/go;
+
+ s/^(\tjmp\s+.*\n)/$exit_patch$1/g; # here's the fix...
+
+ s/^\tJMP___SL/\tjmp \*${T_X86_PRE_LLBL}/go;
+ s/^\tJMP___L/\tjmp ${T_X86_PRE_LLBL}/go;
+
+ if ($StolenX86Regs == 2 ) {
+ die "ARGH! Jump uses \%esi or \%edi with -monly-2-regs:\n$_"
+ if /^\t(jmp|call)\s+.*\%e(si|di)/;
+ } elsif ($StolenX86Regs == 3 ) {
+ die "ARGH! Jump uses \%edi with -monly-3-regs:\n$_"
+ if /^\t(jmp|call)\s+.*\%edi/;
+ }
+
+ # --------------------------------------------------------
+ # that's it -- print it
+ #
+ #die "Funny jumps?\n$_" if /${T_X86_BADJMP}/o; # paranoia
+
+ print OUTASM $_;
+
+ if ( $need_fallthru_patch ) { # exit patch for end of slow entry code
+ print OUTASM $exit_patch;
+ # ToDo: make it not print if there is a "jmp" at the end
+ }
+}
+\end{code}
+
+\begin{code}
+sub init_FUNNY_THINGS {
+ %KNOWN_FUNNY_THING = (
+ # example
+ # "${T_US}stg_.*{T_POST_LBL}", 1,
+ );
+}
+\end{code}
+
+The following table reversal is used for both info tables and return
+vectors. In both cases, we remove the first entry from the table,
+reverse the table, put the label at the end, and paste some code
+(that which is normally referred to by the first entry in the table)
+right after the table itself. (The code pasting is done elsewhere.)
+
+\begin{code}
+sub rev_tbl {
+ local($symb, $tbl, $discard1) = @_;
+
+ return ($tbl) if ($TargetPlatform =~ /^ia64-/);
+
+ local($before) = '';
+ local($label) = '';
+ local(@imports) = (); # hppa only
+ local(@words) = ();
+ local($after) = '';
+ local(@lines) = split(/\n/, $tbl);
+ local($i, $j);
+
+ # Deal with the header...
+ for ($i = 0; $i <= $#lines && $lines[$i] !~ /^\t?${T_DOT_WORD}\s+/o; $i++) {
+ $label .= $lines[$i] . "\n",
+ next if $lines[$i] =~ /^[A-Za-z0-9_]+_info${T_POST_LBL}$/o
+ || $lines[$i] =~ /${T_DOT_GLOBAL}/o
+ || $lines[$i] =~ /^${T_US}\S+_vtbl${T_POST_LBL}$/o;
+
+ $before .= $lines[$i] . "\n"; # otherwise...
+ }
+
+ $infoname = $label;
+ $infoname =~ s/(.|\n)*^([A-Za-z0-9_]+_info)${T_POST_LBL}$(.|\n)*/\2/;
+
+ # Grab the table data...
+ if ( $TargetPlatform !~ /^hppa/ ) {
+ for ( ; $i <= $#lines && $lines[$i] =~ /^\t?${T_DOT_WORD}\s+/o; $i++) {
+ $line = $lines[$i];
+ # Convert addresses of SRTs, slow entrypoints and large bitmaps
+ # to offsets (relative to the info label),
+ # in order to support position independent code.
+ $line =~ s/$infoname/0/
+ || $line =~ s/([A-Za-z0-9_]+_srtd)$/\1 - $infoname/
+ || $line =~ s/([A-Za-z0-9_]+_srt(\+\d+)?)$/\1 - $infoname/
+ || $line =~ s/([A-Za-z0-9_]+_slow)$/\1 - $infoname/
+ || $line =~ s/([A-Za-z0-9_]+_btm)$/\1 - $infoname/
+ || $line =~ s/([A-Za-z0-9_]+_alt)$/\1 - $infoname/
+ || $line =~ s/([A-Za-z0-9_]+_dflt)$/\1 - $infoname/
+ || $line =~ s/([A-Za-z0-9_]+_ret)$/\1 - $infoname/;
+ push(@words, $line);
+ }
+ } else { # hppa weirdness
+ for ( ; $i <= $#lines && $lines[$i] =~ /^\s+(${T_DOT_WORD}|\.IMPORT)/; $i++) {
+ # FIXME: the RTS now expects offsets instead of addresses
+ # for all labels in info tables.
+ if ($lines[$i] =~ /^\s+\.IMPORT/) {
+ push(@imports, $lines[$i]);
+ } else {
+ # We don't use HP's ``function pointers''
+ # We just use labels in code space, like normal people
+ $lines[$i] =~ s/P%//;
+ push(@words, $lines[$i]);
+ }
+ }
+ }
+
+ # Now throw away any initial zero word from the table. This is a hack
+ # that lets us reduce the size of info tables when the SRT field is not
+ # needed: see comments StgFunInfoTable in InfoTables.h.
+ #
+ # The .zero business is for Linux/ELF.
+ # The .skip business is for Sparc/Solaris/ELF.
+ # The .blockz business is for HPPA.
+# if ($discard1) {
+# if ($words[0] =~ /^\t?(${T_DOT_WORD}\s+0|\.zero\s+4|\.skip\s+4|\.blockz\s+4)/) {
+# shift(@words);
+# }
+# }
+
+ for (; $i <= $#lines; $i++) {
+ $after .= $lines[$i] . "\n";
+ }
+
+ # Alphas: If we have anonymous text (not part of a procedure), the
+ # linker may complain about missing exception information. Bleh.
+ # To suppress this, we place a .ent/.end pair around the code.
+ # At the same time, we have to be careful and not enclose any leading
+ # .file/.loc directives.
+ if ( $TargetPlatform =~ /^alpha-/ && $label =~ /^([A-Za-z0-9_]+):$/) {
+ local ($ident) = $1;
+ $before =~ s/^((\s*\.(file|loc)\s+[^\n]*\n)*)/$1\t.ent $ident\n/;
+ $after .= "\t.end $ident\n";
+ }
+
+ # Alphas: The heroic Simon Marlow found a bug in the Digital UNIX
+ # assembler (!) wherein .quad constants inside .text sections are
+ # first narrowed to 32 bits then sign-extended back to 64 bits.
+ # This obviously screws up our 64-bit bitmaps, so we work around
+ # the bug by replacing .quad with .align 3 + .long + .long [ccshan]
+ if ( $TargetPlatform =~ /^alpha-/ ) {
+ foreach (@words) {
+ if (/^\s*\.quad\s+([-+0-9].*\S)\s*$/ && length $1 >= 10) {
+ local ($number) = $1;
+ if ($number =~ /^([-+])?(0x?)?([0-9]+)$/) {
+ local ($sign, $base, $digits) = ($1, $2, $3);
+ $base = (10, 8, 16)[length $base];
+ local ($hi, $lo) = (0, 0);
+ foreach $i (split(//, $digits)) {
+ $j = $lo * $base + $i;
+ $lo = $j % 4294967296;
+ $hi = $hi * $base + ($j - $lo) / 4294967296;
+ }
+ ($hi, $lo) = (4294967295 - $hi, 4294967296 - $lo)
+ if $sign eq "-";
+ $_ = "\t.align 3\n\t.long $lo\n\t.long $hi\n";
+ # printf STDERR "TURNING %s into 0x %08x %08x\n", $number, $hi, $lo;
+ } else {
+ print STDERR "Cannot handle \".quad $number\" in info table\n";
+ exit 1;
+ }
+ }
+ }
+ }
+
+ $tbl = $before
+ . (($TargetPlatform !~ /^hppa/) ? '' : join("\n", @imports) . "\n")
+ . join("\n", @words) . "\n"
+ . $label . $after;
+
+# print STDERR "before=$before\n";
+# print STDERR "label=$label\n";
+# print STDERR "words=",(reverse @words),"\n";
+# print STDERR "after=$after\n";
+
+ $tbl;
+}
+\end{code}
+
+The HP is a major nuisance. The threaded code mangler moved info
+tables from data space to code space, but unthreaded code in the RTS
+still has references to info tables in data space. Since the HP
+linker is very precise about where symbols live, we need to patch the
+references in the unthreaded RTS as well.
+
+\begin{code}
+sub mini_mangle_asm_hppa {
+ local($in_asmf, $out_asmf) = @_;
+
+ open(INASM, "< $in_asmf")
+ || &tidy_up_and_die(1,"$Pgm: failed to open `$in_asmf' (to read)\n");
+ open(OUTASM,"> $out_asmf")
+ || &tidy_up_and_die(1,"$Pgm: failed to open `$out_asmf' (to write)\n");
+
+ while (<INASM>) {
+ s/_info,DATA/_info,CODE/; # Move _info references to code space
+ s/P%_PR/_PR/;
+ print OUTASM;
+ }
+
+ # finished:
+ close(OUTASM) || &tidy_up_and_die(1,"Failed writing to $out_asmf\n");
+ close(INASM) || &tidy_up_and_die(1,"Failed reading from $in_asmf\n");
+}
+
+\end{code}
+
+\begin{code}
+sub tidy_up_and_die {
+ local($return_val, $msg) = @_;
+ print STDERR $msg;
+ exit (($return_val == 0) ? 0 : 1);
+}
+\end{code}
diff --git a/driver/ordering-passes b/driver/ordering-passes
new file mode 100644
index 0000000000..305f3f06b4
--- /dev/null
+++ b/driver/ordering-passes
@@ -0,0 +1,257 @@
+ Ordering the compiler's passes
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Change notes
+~~~~~~~~~~~~
+1 Nov 94 * NB: if float-out is done after strictness, remember to
+ switch off demandedness flags on floated bindings!
+13 Oct 94 * Run Float Inwards once more after strictness-simplify [andre]
+ 4 Oct 94 * Do simplification between float-in and strictness [andre]
+ * Ignore-inline-pragmas flag for final simplification [andre]
+
+Aug 94 Original: Simon, Andy, Andre
+
+
+
+
+This ordering obeys all the constraints except (5)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ full laziness
+ simplify with foldr/build
+ float-in
+ simplify
+ strictness
+ float-in
+
+[check FFT2 still gets benefits with this ordering]
+
+=================================
+ Constraints
+=================================
+
+1. float-in before strictness.
+Reason: floating inwards moves definitions inwards to a site at which
+the binding might well be strict.
+
+Example let x = ... in
+ y = x+1
+ in
+ ...
+===>
+ let y = let x = ... in x+1
+ in ...
+
+The strictness analyser will do a better job of the latter
+than the former.
+
+2. Don't simplify between float-in and strictness,
+unless you disable float-let-out-of-let, otherwise
+the simiplifier's local floating might undo some
+useful floating-in.
+
+Example let f = let y = .. in \x-> x+y
+ in ...
+===>
+ let y = ...
+ f = \x -> x+y
+ in ...
+
+This is a bad move, because now y isn't strict.
+In the pre-float case, the binding for y is strict.
+Mind you, this isn't a very common case, and
+it's easy to disable float-let-from-let.
+
+3. Want full-laziness before foldr/build.
+Reason: Give priority to sharing rather than deforestation.
+
+Example \z -> let xs = build g
+ in foldr k z xs
+===>
+ let xs = build g
+ in \x -> foldr k z xs
+
+In the post-full-laziness case, xs is shared between all
+applications of the function. If we did foldr/build
+first, we'd have got
+
+ \z -> g k z
+
+and now we can't share xs.
+
+
+4. Want strictness after foldr/build.
+Reason: foldr/build makes new function definitions which
+can benefit from strictness analysis.
+
+Example: sum [1..10]
+===> (f/b)
+ let g x a | x > 10 = a
+ | otherwise = g (x+1) (a+x)
+
+Here we clearly want to get strictness analysis on g.
+
+
+5. Want full laziness after strictness
+Reason: absence may allow something to be floated out
+which would not otherwise be.
+
+Example \z -> let x = f (a,z) in ...
+===> (absence anal + inline wrapper of f)
+ \z -> let x = f.wrk a in ...
+===> (full laziness)
+ let x= f.wrk a in \z -> ...
+
+TOO BAD. This doesn't look a common case to me.
+
+
+6. Want float-in after foldr/build.
+Reason: Desugaring list comprehensions + foldr/build
+gives rise to new float-in opportunities.
+
+Example ...some list comp...
+==> (foldr/build)
+ let v = h xs in
+ case ... of
+ [] -> v
+ (y:ys) -> ...(t v)...
+==> (simplifier)
+ let v = h xs in
+ case ... of
+ [] -> h xs
+ (y:ys) -> ...(t v)...
+
+Now v could usefully be floated into the second branch.
+
+7. Want simplify after float-inwards.
+[Occurred in the prelude, compiling ITup2.hs, function dfun.Ord.(*,*)]
+This is due to the following (that happens with dictionaries):
+
+let a1 = case v of (a,b) -> a
+in let m1 = \ c -> case c of I# c# -> case c# of 1 -> a1 5
+ 2 -> 6
+in let m2 = \ c -> case c of I# c# ->
+ case c# +# 1# of cc# -> let cc = I# cc#
+ in m1 cc
+ in (m1,m2)
+
+floating inwards will push the definition of a1 into m1 (supposing
+it is only used there):
+
+in let m1 = let a1 = case v of (a,b) -> a
+ in \ c -> case c of I# c# -> case c# of 1 -> a1 5
+ 2 -> 6
+in let m2 = \ c -> case c of I# c# ->
+ case c# +# 1# of cc# -> let cc = I# cc#
+ in m1 cc
+ in (m1,m2)
+
+if we do strictness analysis now we will not get a worker-wrapper
+for m1, because of the "let a1 ..." (notice that a1 is not strict in
+its body).
+
+Not having this worker wrapper might be very bad, because it might
+mean that we will have to rebox arguments to m1 if they are
+already unboxed, generating extra allocations, as occurs with m2 (cc)
+above.
+
+To solve this problem we have decided to run the simplifier after
+float-inwards, so that lets whose body is a HNF are floated out,
+undoing the float-inwards transformation in these cases.
+We are then back to the original code, which would have a worker-wrapper
+for m1 after strictness analysis and would avoid the extra let in m2.
+
+What we lose in this case are the opportunities for case-floating
+that could be presented if, for example, a1 would indeed be demanded (strict)
+after the floating inwards.
+
+The only way of having the best of both is if we have the worker/wrapper
+pass explicitly called, and then we could do with
+
+float-in
+strictness analysis
+simplify
+strictness analysis
+worker-wrapper generation
+
+as we would
+a) be able to detect the strictness of m1 after the
+ first call to the strictness analyser, and exploit it with the simplifier
+ (in case it was strict).
+b) after the call to the simplifier (if m1 was not demanded)
+ it would be floated out just like we currently do, before stricness
+ analysis II and worker/wrapperisation.
+
+The reason to not do worker/wrapperisation twice is to avoid
+generating wrappers for wrappers which could happen.
+
+
+8. If full laziness is ever done after strictness, remember to switch off
+demandedness flags on floated bindings! This isn't done at the moment.
+
+
+Ignore-inline-pragmas flag for final simplification
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+[Occurred in the prelude, compiling ITup2.hs, function dfun.Ord.(*,*)]
+Sometimes (e.g. in dictionary methods) we generate
+worker/wrappers for functions but the wrappers are never
+inlined. In dictionaries we often have
+
+dict = let f1 = ...
+ f2 = ...
+ ...
+ in (f1,f2,...)
+
+and if we create worker/wrappers for f1,...,fn the wrappers will not
+be inlined anywhere, and we will have ended up with extra
+closures (one for the worker and one for the wrapper) and extra
+function calls, as when we access the dictionary we will be acessing
+the wrapper, which will call the worker.
+The simplifier never inlines workers into wrappers, as the wrappers
+themselves have INLINE pragmas attached to them (so that they are always
+inlined, and we do not know in advance how many times they will be inlined).
+
+To solve this problem, in the last call to the simplifier we will
+ignore these inline pragmas and handle the workers and the wrappers
+as normal definitions. This will allow a worker to be inlined into
+the wrapper if it satisfies all the criteria for inlining (e.g. it is
+the only occurrence of the worker etc.).
+
+Run Float Inwards once more after strictness-simplify
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+[Occurred in the prelude, compiling IInt.hs, function const.Int.index.wrk]
+When workers are generated after strictness analysis (worker/wrapper),
+we generate them with "reboxing" lets, that simply reboxes the unboxed
+arguments, as it may be the case that the worker will need the
+original boxed value:
+
+f x y = case x of
+ (a,b) -> case y of
+ (c,d) -> case a == c of
+ True -> (x,x)
+ False -> ((1,1),(2,2))
+
+==> (worker/wrapper)
+
+f_wrapper x y = case x of
+ (a,b) -> case y of
+ (c,d) -> f_worker a b c d
+
+f_worker a b c d = let x = (a,b)
+ y = (c,d)
+ in case a == c of
+ True -> (x,x)
+ False -> ((1,1),(2,2))
+
+in this case the simplifier will remove the binding for y as it is not
+used (we expected this to happen very often, but we do not know how
+many "reboxers" are eventually removed and how many are kept), and
+will keep the binding for x. But notice that x is only used in *one*
+of the branches in the case, but is always being allocated! The
+floating inwards pass would push its definition into the True branch.
+A similar benefit occurs if it is only used inside a let definition.
+These are basically the advantages of floating inwards, but they are
+only exposed after the S.A./worker-wrapperisation of the code! As we
+also have reasons to float inwards before S.A. we have to run it
+twice.
+
diff --git a/driver/split/Makefile b/driver/split/Makefile
new file mode 100644
index 0000000000..6b545de20f
--- /dev/null
+++ b/driver/split/Makefile
@@ -0,0 +1,17 @@
+#-----------------------------------------------------------------------------
+# $Id: Makefile,v 1.2 2000/11/03 16:54:52 simonmar Exp $
+
+TOP=../..
+include $(TOP)/mk/boilerplate.mk
+
+SCRIPT_PROG = ghc-split
+
+INTERP=perl
+
+SCRIPT_SUBST_VARS := TARGETPLATFORM
+
+INSTALL_LIBEXEC_SCRIPTS += $(SCRIPT_PROG)
+
+CLEAN_FILES += $(SCRIPT_OBJS)
+
+include $(TOP)/mk/target.mk
diff --git a/driver/split/ghc-split.lprl b/driver/split/ghc-split.lprl
new file mode 100644
index 0000000000..4d159ec04f
--- /dev/null
+++ b/driver/split/ghc-split.lprl
@@ -0,0 +1,618 @@
+%************************************************************************
+%* *
+\section[Driver-obj-splitting]{Splitting into many \tr{.o} files (for libraries)}
+%* *
+%************************************************************************
+
+\begin{code}
+$TargetPlatform = $TARGETPLATFORM;
+
+($Pgm = $0) =~ s|.*/||;
+$ifile = $ARGV[0];
+$Tmp_prefix = $ARGV[1];
+$Output = $ARGV[2];
+
+&split_asm_file($ifile);
+
+open(OUTPUT, "> $Output") || &tidy_up_and_die(1,"$Pgm: failed to open `$Output' (to write)\n");
+print OUTPUT "$NoOfSplitFiles\n";
+close(OUTPUT);
+
+exit(0);
+\end{code}
+
+
+\begin{code}
+sub split_asm_file {
+ local($asm_file) = @_;
+
+ open(TMPI, "< $asm_file") || &tidy_up_and_die(1,"$Pgm: failed to open `$asm_file' (to read)\n");
+
+ &collectExports_hppa() if $TargetPlatform =~ /^hppa/;
+ &collectExports_mips() if $TargetPlatform =~ /^mips/;
+ &collectDyldStuff_darwin() if $TargetPlatform =~ /-apple-darwin/;
+
+ $octr = 0; # output file counter
+ $* = 1; # multi-line matches are OK
+
+ %LocalConstant = (); # we have to subvert C compiler's commoning-up of constants...
+
+ $s_stuff = &ReadTMPIUpToAMarker( '', $octr );
+ # that first stuff is a prologue for all .s outputs
+ $prologue_stuff = &process_asm_block ( $s_stuff );
+ # $_ already has some of the next stuff in it...
+
+# &tidy_up_and_die(1,"$Pgm: no split markers in .s file!\n")
+# if $prologue_stuff eq $s_stuff;
+
+ # lie about where this stuff came from
+ # Note the \Q: this ignores regex meta-chars in $Tmp_prefix.
+ $prologue_stuff =~ s/\Q"$Tmp_prefix.c"/"$ifile_root.hc"/g;
+
+ while ( $_ ne '' ) { # not EOF
+ $octr++;
+
+ # grab and de-mangle a section of the .s file...
+ $s_stuff = &ReadTMPIUpToAMarker ( $_, $octr );
+ $this_piece = &process_asm_block ( $s_stuff );
+
+ # output to a file of its own
+ # open a new output file...
+ $ofname = "${Tmp_prefix}__${octr}.s";
+ open(OUTF, "> $ofname") || die "$Pgm: can't open output file: $ofname\n";
+
+ print OUTF $prologue_stuff;
+ print OUTF $this_piece;
+
+ close(OUTF)
+ || &tidy_up_and_die(1,"$Pgm:Failed writing ${Tmp_prefix}__${octr}.s\n");
+ }
+
+ # Make sure that we still have some output when the input file is empty
+ if ( $octr == 0 ) {
+ $octr = 1;
+ $ofname = "${Tmp_prefix}__${octr}.s";
+ open(OUTF, "> $ofname") || die "$Pgm: can't open output file: $ofname\n";
+
+ print OUTF $prologue_stuff;
+
+ close(OUTF)
+ || &tidy_up_and_die(1,"$Pgm:Failed writing ${Tmp_prefix}__${octr}.s\n");
+ }
+
+ $NoOfSplitFiles = $octr;
+
+ close(TMPI) || &tidy_up_and_die(1,"Failed reading $asm_file\n");
+}
+
+sub collectExports_hppa { # Note: HP-PA only
+
+ %LocalExport = (); # NB: global table
+
+ while(<TMPI>) {
+ if (/^\s+\.EXPORT\s+([^,]+),.*\n/) {
+ local($label) = $1;
+ local($body) = "\t.IMPORT $label";
+ if (/,DATA/) {
+ $body .= ",DATA\n";
+ } else {
+ $body .= ",CODE\n";
+ }
+ $label =~ s/\$/\\\$/g;
+ $LocalExport{$label} = $body;
+ }
+ }
+
+ seek(TMPI, 0, 0);
+}
+
+sub collectExports_mips { # Note: MIPS only
+ # (not really sure this is necessary [WDP 95/05])
+
+ $UNDEFINED_FUNS = ''; # NB: global table
+
+ while(<TMPI>) {
+ $UNDEFINED_FUNS .= $_ if /^\t\.globl\s+\S+ \.\S+\n/;
+ # just save 'em all
+ }
+
+ seek(TMPI, 0, 0);
+}
+
+sub collectDyldStuff_darwin {
+ local($chunk_label,$label,$cur_section,$section,$chunk,$alignment,$cur_alignment);
+
+ %DyldChunks = (); # NB: global table
+ %DyldChunksDefined = (); # NB: global table
+
+ $cur_section = '';
+ $section = '';
+ $label = '';
+ $chunk = '';
+ $alignment = '';
+ $cur_alignment = '';
+
+ while ( 1 ) {
+ $_ = <TMPI>;
+ if ( $_ eq '' || /^L(_.+)\$.+:/ ) {
+ if ( $label ne '' ) {
+ $DyldChunksDefined{$label} .= $section . $alignment . $chunk_label . $ chunk;
+ if( $section =~ s/\.data/\.non_lazy_symbol_pointer/ ) {
+ $chunk = "\t.indirect_symbol $label\n\t.long 0\n";
+ }
+ $DyldChunks{$label} .= $section . $alignment . $chunk_label . $chunk;
+ print STDERR "### dyld chunk: $label\n$section$alignment$chunk\n###\n" if $Dump_asm_splitting_info;
+ }
+ last if ($_ eq '');
+
+ $chunk = '';
+ $chunk_label = $_;
+ $label = $1;
+ $section = $cur_section;
+ $alignment = $cur_alignment;
+ print STDERR "label: $label\n" if $Dump_asm_splitting_info;
+ } elsif ( /^\s*\.(symbol_stub|picsymbol_stub|lazy_symbol_pointer|non_lazy_symbol_pointer|data|section __IMPORT,.*)/ ) {
+ $cur_section = $_;
+ printf STDERR "section: $cur_section\n" if $Dump_asm_splitting_info;
+ $cur_alignment = ''
+ } elsif ( /^\s*\.section\s+__TEXT,__symbol_stub1,symbol_stubs,pure_instructions,\d+/ ) {
+ $cur_section = $_;
+ printf STDERR "section: $cur_section\n" if $Dump_asm_splitting_info;
+ # always make sure we align things
+ $cur_alignment = '\t.align 2'
+ } elsif ( /^\s*\.align.*/ ) {
+ $cur_alignment = $_;
+ printf STDERR "alignment: $cur_alignment\n" if $Dump_asm_splitting_info;
+ } else {
+ $chunk .= $_;
+ }
+ }
+
+ seek(TMPI, 0, 0);
+}
+
+sub ReadTMPIUpToAMarker {
+ local($str, $count) = @_; # already read bits
+
+
+ for ( $_ = <TMPI>; $_ ne '' && ! /_?__stg_split_marker/; $_ = <TMPI> ) {
+ $str .= $_;
+ }
+ # if not EOF, then creep forward until next "real" line
+ # (throwing everything away).
+ # that first "real" line will stay in $_.
+
+ # This loop is intended to pick up the body of the split_marker function
+ # Note that the assembler mangler will already have eliminated this code
+ # if it's been invoked (which it probably has).
+
+ while ($_ ne '' && (/_?__stg_split_marker/
+ || /^L[^C].*:$/
+ || /^\.stab/
+ || /\t\.proc/
+ || /\t\.stabd/
+ || /\t\.even/
+ || /\tunlk a6/
+ || /^\t!#PROLOGUE/
+ || /\t\.prologue/
+ || /\t\.frame/
+ # || /\t\.end/ NOT! Let the split_marker regexp catch it
+ # || /\t\.ent/ NOT! Let the split_marker regexp catch it
+ || /^\s+(save|retl?|restore|nop)/)) {
+ $_ = <TMPI>;
+ }
+
+ print STDERR "### BLOCK:$count:\n$str" if $Dump_asm_splitting_info;
+
+ # return str
+ $str =~ tr/\r//d if $TargetPlatform =~ /-mingw32$/; # in case Perl doesn't convert line endings
+ $str;
+}
+\end{code}
+
+We must (a)~strip the marker off the block, (b)~record any literal C
+constants that are defined here, and (c)~inject copies of any C constants
+that are used-but-not-defined here.
+
+\begin{code}
+sub process_asm_block {
+ local($str) = @_;
+
+ return(&process_asm_block_darwin($str))
+ if $TargetPlatform =~ /-apple-darwin/;
+ return(&process_asm_block_m68k($str)) if $TargetPlatform =~ /^m68k-/;
+ return(&process_asm_block_sparc($str)) if $TargetPlatform =~ /^sparc-/;
+ return(&process_asm_block_iX86($str)) if $TargetPlatform =~ /^i[34]86-/;
+ return(&process_asm_block_x86_64($str)) if $TargetPlatform =~ /^x86_64-/;
+ return(&process_asm_block_alpha($str)) if $TargetPlatform =~ /^alpha-/;
+ return(&process_asm_block_hppa($str)) if $TargetPlatform =~ /^hppa/;
+ return(&process_asm_block_mips($str)) if $TargetPlatform =~ /^mips-/;
+ return(&process_asm_block_powerpc_linux($str))
+ if $TargetPlatform =~ /^powerpc-[^-]+-linux/;
+
+ # otherwise...
+ &tidy_up_and_die(1,"$Pgm: no process_asm_block for $TargetPlatform\n");
+}
+
+sub process_asm_block_sparc {
+ local($str) = @_;
+
+ # strip the marker
+ if ( $OptimiseC ) {
+ $str =~ s/_?__stg_split_marker.*:\n//;
+ } else {
+ $str =~ s/(\.text\n\t\.align .\n)\t\.global\s+.*_?__stg_split_marker.*\n\t\.proc.*\n/$1/;
+ $str =~ s/(\t\.align .\n)\t\.global\s+.*_?__stg_split_marker.*\n\t\.proc.*\n/$1/;
+ }
+
+ # make sure the *.hc filename gets saved; not just ghc*.c (temp name)
+ $str =~ s/^\.stabs "(ghc\d+\.c)"/.stabs "$ifile_root.hc"/g; # HACK HACK
+
+ # remove/record any literal constants defined here
+ while ( $str =~ /(\t\.align .\n\.?(L?LC\d+):\n(\t\.asci[iz].*\n)+)/ ) {
+ local($label) = $2;
+ local($body) = $1;
+
+ &tidy_up_and_die(1,"Local constant label $label already defined!\n")
+ if $LocalConstant{$label};
+
+ $LocalConstant{$label} = $body;
+
+ $str =~ s/\t\.align .\n\.?LL?C\d+:\n(\t\.asci[iz].*\n)+//;
+ }
+
+ # inject definitions for any local constants now used herein
+ foreach $k (keys %LocalConstant) {
+ if ( $str =~ /\b$k\b/ ) {
+ $str = $LocalConstant{$k} . $str;
+ }
+ }
+
+ print STDERR "### STRIPPED BLOCK (sparc):\n$str" if $Dump_asm_splitting_info;
+
+ $str;
+}
+
+sub process_asm_block_m68k {
+ local($str) = @_;
+
+ # strip the marker
+
+ $str =~ s/(\.text\n\t\.even\n)\t\.globl\s+.*_?__stg_split_marker.*\n/$1/;
+ $str =~ s/(\t\.even\n)\t\.globl\s+.*_?__stg_split_marker.*\n/$1/;
+
+ # it seems prudent to stick on one of these:
+ $str = "\.text\n\t.even\n" . $str;
+
+ # remove/record any literal constants defined here
+ while ( $str =~ /((LC\d+):\n\t\.ascii.*\n)/ ) {
+ local($label) = $2;
+ local($body) = $1;
+
+ &tidy_up_and_die(1,"Local constant label $label already defined!\n")
+ if $LocalConstant{$label};
+
+ $LocalConstant{$label} = $body;
+
+ $str =~ s/LC\d+:\n\t\.ascii.*\n//;
+ }
+
+ # inject definitions for any local constants now used herein
+ foreach $k (keys %LocalConstant) {
+ if ( $str =~ /\b$k\b/ ) {
+ $str = $LocalConstant{$k} . $str;
+ }
+ }
+
+ print STDERR "### STRIPPED BLOCK (m68k):\n$str" if $Dump_asm_splitting_info;
+
+ $str;
+}
+
+sub process_asm_block_alpha {
+ local($str) = @_;
+
+ # strip the marker
+ if ( $OptimiseC ) {
+ $str =~ s/_?__stg_split_marker.*:\n//;
+ } else {
+ $str =~ s/(\t\.align .\n)\t\.globl\s+.*_?__stg_split_marker.*\n\t\.ent.*\n/$1/;
+ }
+
+ # remove/record any literal constants defined here
+ while ( $str =~ /(\.rdata\n\t\.align \d\n)?(\$(C\d+):\n\t\..*\n)/ ) {
+ local($label) = $3;
+ local($body) = $2;
+
+ &tidy_up_and_die(1,"Local constant label $label already defined!\n")
+ if $LocalConstant{$label};
+
+ $LocalConstant{$label} = ".rdata\n\t.align 3\n" . $body . "\t.text\n";
+
+ $str =~ s/(\.rdata\n\t\.align \d\n)?\$C\d+:\n\t\..*\n//;
+ }
+
+ # inject definitions for any local constants now used herein
+ foreach $k (keys %LocalConstant) {
+ if ( $str =~ /\$\b$k\b/ ) {
+ $str = $LocalConstant{$k} . $str;
+ }
+ }
+
+ # Slide the dummy direct return code into the vtbl .ent/.end block,
+ # to keep the label fixed if it's the last thing in a module, and
+ # to avoid having any anonymous text that the linker will complain about
+ $str =~ s/(\t\.end [A-Za-z0-9_]+)\n\t# nop/\tnop\n$1/g;
+
+ print STDERR "### STRIPPED BLOCK (alpha):\n$str" if $Dump_asm_splitting_info;
+
+ $str;
+}
+
+sub process_asm_block_iX86 {
+ local($str) = @_;
+
+ # strip the marker
+
+ $str =~ s/(\.text\n\t\.align .(,0x90)?\n)\.globl\s+.*_?__stg_split_marker.*\n/$1/;
+ $str =~ s/(\t\.align .(,0x90)?\n)\.globl\s+.*_?__stg_split_marker.*\n/$1/;
+
+ # it seems prudent to stick on one of these:
+ $str = "\.text\n\t.align 4\n" . $str;
+
+ # remove/record any literal constants defined here
+ # [perl made uglier to work around the perl 5.7/5.8 bug documented at
+ # http://bugs6.perl.org/rt2/Ticket/Display.html?id=1760 and illustrated
+ # by the seg fault of perl -e '("x\n" x 5000) =~ /(.*\n)+/'
+ # -- ccshan 2002-09-05]
+ while ( ($str =~ /(\.?(LC\d+):\n(\t\.(ascii|string).*\n|\s*\.byte.*\n){1,100})/ )) {
+ local($label) = $2;
+ local($body) = $1;
+ local($prefix, $suffix, $*) = ($`, $', 0);
+
+ &tidy_up_and_die(1,"Local constant label $label already defined!\n")
+ if $LocalConstant{$label};
+
+ while ( $suffix =~ /^((\t\.(ascii|string).*\n|\s*\.byte.*\n){1,100})/ ) {
+ $body .= $1;
+ $suffix = $';
+ }
+ $LocalConstant{$label} = $body;
+ $str = $prefix . $suffix;
+ }
+
+ # inject definitions for any local constants now used herein
+ foreach $k (keys %LocalConstant) {
+ if ( $str =~ /\b$k\b/ ) {
+ $str = $LocalConstant{$k} . $str;
+ }
+ }
+
+ print STDERR "### STRIPPED BLOCK (iX86):\n$str" if $Dump_asm_splitting_info;
+
+ $str;
+}
+\end{code}
+
+\begin{code}
+sub process_asm_block_x86_64 {
+ local($str) = @_;
+
+ # remove/record any literal constants defined here
+ # [perl made uglier to work around the perl 5.7/5.8 bug documented at
+ # http://bugs6.perl.org/rt2/Ticket/Display.html?id=1760 and illustrated
+ # by the seg fault of perl -e '("x\n" x 5000) =~ /(.*\n)+/'
+ # -- ccshan 2002-09-05]
+ while ( ($str =~ /(\.?(LC\d+):\n(\t\.(ascii|string).*\n|\s*\.byte.*\n){1,100})/ )) {
+ local($label) = $2;
+ local($body) = $1;
+ local($prefix, $suffix, $*) = ($`, $', 0);
+
+ &tidy_up_and_die(1,"Local constant label $label already defined!\n")
+ if $LocalConstant{$label};
+
+ while ( $suffix =~ /^((\t\.(ascii|string).*\n|\s*\.byte.*\n){1,100})/ ) {
+ $body .= $1;
+ $suffix = $';
+ }
+ $LocalConstant{$label} = $body;
+ $str = $prefix . $suffix;
+ }
+
+ # inject definitions for any local constants now used herein
+ foreach $k (keys %LocalConstant) {
+ if ( $str =~ /\b$k\b/ ) {
+ $str = $LocalConstant{$k} . $str;
+ }
+ }
+
+ print STDERR "### STRIPPED BLOCK (x86_64):\n$str" if $Dump_asm_splitting_info;
+
+ $str;
+}
+\end{code}
+
+\begin{code}
+sub process_asm_block_hppa {
+ local($str) = @_;
+
+ # strip the marker
+ $str =~ s/___stg_split_marker.*\n//;
+
+ # remove/record any imports defined here
+ while ( $str =~ /^(\s+\.IMPORT\s.*\n)/ ) {
+ $Imports .= $1;
+
+ $str =~ s/^\s+\.IMPORT.*\n//;
+ }
+
+ # remove/record any literal constants defined here
+ while ( $str =~ /^(\s+\.align.*\n(L\$C\d+)\n(\s.*\n)+); end literal\n/ ) {
+ local($label) = $2;
+ local($body) = $1;
+ local($prefix) = $`;
+ local($suffix) = $';
+ $label =~ s/\$/\\\$/g;
+
+ &tidy_up_and_die(1,"Local constant label $label already defined!\n")
+ if $LocalConstant{$label};
+
+ $LocalConstant{$label} = "\t.SPACE \$TEXT\$\n\t.SUBSPA \$LIT\$\n\n" . $body;
+
+ $str = $prefix . $suffix;
+ }
+
+ # inject definitions for any local constants now used herein
+ foreach $k (keys %LocalConstant) {
+ if ( $str =~ /\b$k\b/ ) {
+ $str = $LocalConstant{$k} . $str;
+ }
+ }
+
+ # inject required imports for local exports in other chunks
+ foreach $k (keys %LocalExport) {
+ if ( $str =~ /\b$k\b/ && ! /EXPORT\s+$k\b/ ) {
+ $str = $LocalExport{$k} . $str;
+ }
+ }
+
+ # inject collected imports
+
+ $str = $Imports . $str;
+
+ print STDERR "### STRIPPED BLOCK (hppa):\n$str" if $Dump_asm_splitting_info;
+
+ $str;
+}
+\end{code}
+
+\begin{code}
+sub process_asm_block_mips {
+ local($str) = @_;
+
+ # strip the marker
+ if ( $OptimiseC ) {
+ $str =~ s/_?__stg_split_marker.*:\n//;
+ } else {
+ $str =~ s/(\t\.align .\n)\t\.globl\s+.*_?__stg_split_marker.*\n\t\.ent.*\n/$1/;
+ }
+
+ # remove/record any literal constants defined here
+ while ( $str =~ /(\t\.rdata\n\t\.align \d\n)?(\$(LC\d+):\n(\t\.byte\t.*\n)+)/ ) {
+ local($label) = $3;
+ local($body) = $2;
+
+ &tidy_up_and_die(1,"Local constant label $label already defined!\n")
+ if $LocalConstant{$label};
+
+ $LocalConstant{$label} = "\t.rdata\n\t.align 2\n" . $body . "\t.text\n";
+
+ $str =~ s/(\t\.rdata\n\t\.align \d\n)?\$LC\d+:\n(\t\.byte\t.*\n)+//;
+ }
+
+ # inject definitions for any local constants now used herein
+ foreach $k (keys %LocalConstant) {
+ if ( $str =~ /\$\b$k\b/ ) {
+ $str = $LocalConstant{$k} . $str;
+ }
+ }
+
+ # Slide the dummy direct return code into the vtbl .ent/.end block,
+ # to keep the label fixed if it's the last thing in a module, and
+ # to avoid having any anonymous text that the linker will complain about
+ $str =~ s/(\t\.end [A-Za-z0-9_]+)\n\t# nop/\tnop\n$1/g;
+
+ $str .= $UNDEFINED_FUNS; # pin on gratuitiously-large amount of info
+
+ print STDERR "### STRIPPED BLOCK (mips):\n$str" if $Dump_asm_splitting_info;
+
+ $str;
+}
+\end{code}
+
+\begin{code}
+# The logic for both Darwin/PowerPC and Darwin/x86 ends up being the same.
+
+sub process_asm_block_darwin {
+ local($str) = @_;
+ local($dyld_stuff) = '';
+
+ # strip the marker
+ $str =~ s/___stg_split_marker.*\n//;
+
+ $str =~ s/L_.*\$.*:\n(.|\n)*//;
+
+ # remove/record any literal constants defined here
+ while ( $str =~ s/^(\s+.const.*\n\s+\.align.*\n(LC\d+):\n(\s\.(byte|short|long|fill|space|ascii).*\n)+)// ) {
+ local($label) = $2;
+ local($body) = $1;
+
+ &tidy_up_and_die(1,"Local constant label $label already defined!\n")
+ if $LocalConstant{$label};
+
+ $LocalConstant{$label} = $body;
+ }
+
+ # inject definitions for any local constants now used herein
+ foreach $k (keys %LocalConstant) {
+ if ( $str =~ /\b$k(\b|\[)/ ) {
+ $str = $LocalConstant{$k} . $str;
+ }
+ }
+
+ foreach $k (keys %DyldChunks) {
+ if ( $str =~ /\bL$k\$/ ) {
+ if ( $str =~ /^$k:$/ ) {
+ $dyld_stuff .= $DyldChunksDefined{$k};
+ } else {
+ $dyld_stuff .= $DyldChunks{$k};
+ }
+ }
+ }
+
+ $str .= "\n" . $dyld_stuff;
+
+ print STDERR "### STRIPPED BLOCK (darwin):\n$str" if $Dump_asm_splitting_info;
+
+ $str;
+}
+\end{code}
+
+\begin{code}
+sub process_asm_block_powerpc_linux {
+ local($str) = @_;
+
+ # strip the marker
+ $str =~ s/__stg_split_marker.*\n//;
+
+ # remove/record any literal constants defined here
+ while ( $str =~ s/^(\s+.section\s+\.rodata\n\s+\.align.*\n(\.LC\d+):\n(\s\.(byte|short|long|quad|2byte|4byte|8byte|fill|space|ascii|string).*\n)+)// ) {
+ local($label) = $2;
+ local($body) = $1;
+
+ &tidy_up_and_die(1,"Local constant label $label already defined!\n")
+ if $LocalConstant{$label};
+
+ $LocalConstant{$label} = $body;
+ }
+
+ # inject definitions for any local constants now used herein
+ foreach $k (keys %LocalConstant) {
+ if ( $str =~ /[\s,]$k\b/ ) {
+ $str = $LocalConstant{$k} . $str;
+ }
+ }
+
+ print STDERR "### STRIPPED BLOCK (powerpc linux):\n$str" if $Dump_asm_splitting_info;
+
+ $str;
+}
+\end{code}
+
+\begin{code}
+sub tidy_up_and_die {
+ local($return_val, $msg) = @_;
+ print STDERR $msg;
+ exit (($return_val == 0) ? 0 : 1);
+}
+\end{code}
diff --git a/driver/test_mangler b/driver/test_mangler
new file mode 100644
index 0000000000..96cf31ca68
--- /dev/null
+++ b/driver/test_mangler
@@ -0,0 +1,29 @@
+#! /usr/bin/perl
+# a simple wrapper to test a .s-file mangler
+# reads stdin, writes stdout
+
+push(@INC,"/net/dazdak/BUILDS/gransim-4.04/i386-unknown-linux/ghc/driver");
+
+$TargetPlatform = $ARGV[0]; shift; # nice error checking, Will
+
+require("ghc-asm.prl") || die "require mangler failed!\n";
+
+$SpX86Mangling = 1;
+$StolenX86Regs = 4;
+
+open(INP, "> /tmp/mangle1.$$") || die "Can't open tmp file 1\n";
+while (<>) {
+ print INP $_;
+}
+close(INP) || die "Can't close tmp file 1";
+
+&mangle_asm("/tmp/mangle1.$$", "/tmp/mangle2.$$");
+
+open(INP, "< /tmp/mangle2.$$") || die "Can't open tmp file 2\n";
+while (<INP>) {
+ print STDOUT $_;
+}
+close(INP) || die "Can't close tmp file 2";
+
+unlink("/tmp/mangle1.$$", "/tmp/mangle2.$$");
+exit(0);