Import libffi-3.0.4, and use it to provide FFI support in GHCi

This replaces the hand-rolled architecture-specific FFI support in GHCi with the standard libffi as used in GCJ, Python and other projects. I've bundled the complete libffi-3.0.4 tarball in the source tree in the same way as we do for GMP, the difference being that we always build and install our own libffi regardless of whether there's one on the system (it's small, and we don't want dependency/versioning headaches). In particular this means that unregisterised builds will now have a fully working GHCi including FFI out of the box, provided libffi supports the platform. There is also code in the RTS to use libffi in place of rts/Adjustor.c, but it is currently not enabled if we already have support in Adjustor.c for the current platform. We need to assess the performance impact before using libffi here too (in GHCi we don't care too much about performance).
author: Simon Marlow <simonmar@microsoft.com> 2008-04-08 18:34:34 +0000
committer: Simon Marlow <simonmar@microsoft.com> 2008-04-08 18:34:34 +0000
commit: e0fcf61dca4dfac99cb5417e1bc4cbee18822cf2 (patch)
tree: 32382c35525e71b5f20dea714fb748b0870431e0
parent: 92986843271f2f400ebc0759b30eb11d47eaa193 (diff)
download: haskell-e0fcf61dca4dfac99cb5417e1bc4cbee18822cf2.tar.gz
12 files changed, 169 insertions, 907 deletions
diff --git a/Makefile b/Makefile
index e6d9866ed0..870e0c4dc6 100644
--- a/Makefile
+++ b/Makefile
@@ -67,12 +67,12 @@ include $(TOP)/mk/boilerplate.mk
 
 # We can't 'make boot' in libraries until stage1 is built
 ifeq "$(BootingFromHc)" "YES"
-SUBDIRS_BUILD = gmp includes rts compat compiler docs utils driver
+SUBDIRS_BUILD = gmp libffi includes rts compat compiler docs utils driver
 else
-SUBDIRS_BUILD = gmp includes compat utils driver docs compiler rts libraries/Cabal/doc
+SUBDIRS_BUILD = gmp libffi includes compat utils driver docs compiler rts libraries/Cabal/doc
 endif
 
-SUBDIRS = gmp includes compat utils driver docs rts libraries compiler libraries/Cabal/doc
+SUBDIRS = gmp libffi includes compat utils driver docs rts libraries compiler libraries/Cabal/doc
 
 # Sanity check that all the boot libraries are in the tree, to catch
 # failure to run darcs-all.
diff --git a/compiler/Makefile b/compiler/Makefile
index 2dd620328d..216e5f8a30 100644
--- a/compiler/Makefile
+++ b/compiler/Makefile
@@ -308,7 +308,7 @@ else
 	@echo "cRelocatableBuild     = False"                 >> $(CONFIG_HS)
 endif
 	@echo "cLibFFI               :: Bool"                 >> $(CONFIG_HS)
-ifeq "$(UseLibFFI)" "YES"
+ifeq "$(UseLibFFIForAdjustors)" "YES"
 	@echo "cLibFFI               = True"                  >> $(CONFIG_HS)
 else
 	@echo "cLibFFI               = False"                 >> $(CONFIG_HS)
@@ -457,11 +457,6 @@ ALL_DIRS += javaGen
 SRC_HC_OPTS += -DJAVA
 endif
 
-ifeq ($(UseLibFFI),YES)
-SRC_HC_OPTS += -DUSE_LIBFFI
-SRC_HSC2HS_OPTS += -DUSE_LIBFFI
-endif
-
 ifeq "$(BootingFromHc)" "YES"
 # HC files are always from a self-booted compiler
 bootstrapped = YES
@@ -517,6 +512,9 @@ ifeq "$(GhcDebugged)" "YES"
 SRC_LD_OPTS += -debug
 endif
 
+SRC_HC_OPTS     += -I$(FPTOOLS_TOP)/libffi/build/include
+SRC_HSC2HS_OPTS += -I$(FPTOOLS_TOP)/libffi/build/include
+
 ALL_DIRS += ghci
 
 # If we are going to use dynamic libraries instead of .o files for ghci,
diff --git a/compiler/ghci/ByteCodeFFI.lhs b/compiler/ghci/ByteCodeFFI.lhs
index d72f1ac94e..5c2b35f1aa 100644
--- a/compiler/ghci/ByteCodeFFI.lhs
+++ b/compiler/ghci/ByteCodeFFI.lhs
@@ -1,19 +1,10 @@
 %
-% (c) The University of Glasgow 2001-2006
+% (c) The University of Glasgow 2001-2008
 %
 
 ByteCodeGen: Generate machine-code sequences for foreign import
 
 \begin{code}
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
-#ifdef USE_LIBFFI
-
 module ByteCodeFFI ( moan64, newExec ) where
 
 import Outputable
@@ -21,844 +12,6 @@ import System.IO
 import Foreign
 import Foreign.C
 
-#else
-
-module ByteCodeFFI ( mkMarshalCode, moan64, newExec ) where
-
-#include "HsVersions.h"
-
-import TyCon
-import Outputable
-import SMRep
-import ForeignCall
-import Panic
-
--- DON'T remove apparently unused imports here .. 
--- there is ifdeffery below
-import Control.Exception ( throwDyn )
-import Data.Bits	( Bits(..), shiftR, shiftL )
-import Data.List        ( mapAccumL )
-
-import Data.Word	( Word8, Word32 )
-import Foreign		( Ptr, FunPtr, castPtrToFunPtr,
-			  Storable, sizeOf, pokeArray )
-import Foreign.C	( CUInt )
-import System.IO.Unsafe ( unsafePerformIO )
-import System.IO	( hPutStrLn, stderr )
--- import Debug.Trace	( trace )
-\end{code}
-
-%************************************************************************
-%*									*
-\subsection{The platform-dependent marshall-code-generator.}
-%*									*
-%************************************************************************
-
-\begin{code}
-
--- For sparc_TARGET_ARCH, i386_TARGET_ARCH, etc.
-#include "nativeGen/NCG.h"
-
-{-
-Make a piece of code which expects to see the Haskell stack
-looking like this.  It is given a pointer to the lowest word in
-the stack -- presumably the tag of the placeholder.
-                 
-                  <arg_n>
-                  ...
-                  <arg_1>
-                  Addr# address_of_C_fn
-                  <placeholder-for-result#> (must be an unboxed type)
-
-We cope with both ccall and stdcall for the C fn.  However, this code
-itself expects only to be called using the ccall convention -- that is,
-we don't clear our own (single) arg off the C stack.
--}
-mkMarshalCode :: CCallConv
-              -> (Int, PrimRep) -> Int -> [(Int, PrimRep)] 
-              -> IO (FunPtr ())
-mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
-   = let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep) 
-                                   addr_offW arg_offs_n_reps
-     in  newExec bytes
-
-mkMarshalCode_wrk :: CCallConv 
-                  -> (Int, PrimRep) -> Int -> [(Int, PrimRep)] 
-                  -> [Word8]
-
-mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
-
-#if i386_TARGET_ARCH
-
-   = let -- Don't change this without first consulting Intel Corp :-)
-         bytes_per_word = 4
-
-         offsets_to_pushW
-            = concat
-              [   -- reversed because x86 is little-endian
-                  reverse [a_offW .. a_offW + primRepSizeW a_rep - 1]
-
-                -- reversed because args are pushed L -> R onto C stack
-                | (a_offW, a_rep) <- reverse arg_offs_n_reps
-              ]
-         
-         arguments_size = bytes_per_word * length offsets_to_pushW
-#if darwin_TARGET_OS
-             -- Darwin: align stack frame size to a multiple of 16 bytes
-         stack_frame_size = (arguments_size + 15) .&. complement 15
-         stack_frame_pad = stack_frame_size - arguments_size
-#else
-         stack_frame_size = arguments_size
-#endif
-
-         -- some helpers to assemble x86 insns.
-         movl_offespmem_esi offB	-- movl   offB(%esp), %esi
-            = [0x8B, 0xB4, 0x24] ++ lit32 offB
-         movl_offesimem_ecx offB	-- movl   offB(%esi), %ecx
-            = [0x8B, 0x8E] ++ lit32 offB
-         save_regs			-- pushl  all intregs except %esp
-            = [0x50, 0x53, 0x51, 0x52, 0x56, 0x57, 0x55]
-         restore_regs			-- popl   ditto
-            = [0x5D, 0x5F, 0x5E, 0x5A, 0x59, 0x5B, 0x58]
-         pushl_ecx			-- pushl  %ecx
-            = [0x51]
-         call_star_ecx			-- call   * %ecx
-            = [0xFF, 0xD1]
-         add_lit_esp lit		-- addl   $lit, %esp
-            = [0x81, 0xC4] ++ lit32 lit
-         movl_eax_offesimem offB	-- movl   %eax, offB(%esi)
-            = [0x89, 0x86] ++ lit32 offB
-         movl_edx_offesimem offB	-- movl   %edx, offB(%esi)
-            = [0x89, 0x96] ++ lit32 offB
-         ret				-- ret
-            = [0xC3]
-         fstpl_offesimem offB		-- fstpl   offB(%esi)
-            = [0xDD, 0x9E] ++ lit32 offB
-         fstps_offesimem offB		-- fstps   offB(%esi)
-            = [0xD9, 0x9E] ++ lit32 offB
-         {-
-             2 0000 8BB42478    movl    0x12345678(%esp), %esi
-             2      563412
-             3 0007 8B8E7856    movl    0x12345678(%esi), %ecx
-             3      3412
-             4              
-             5 000d 50535152    pushl %eax ; pushl %ebx ; pushl %ecx ; pushl  %edx
-             6 0011 565755      pushl %esi ; pushl %edi ; pushl %ebp
-             7              
-             8 0014 5D5F5E      popl %ebp ; popl %edi ; popl %esi 
-             9 0017 5A595B58    popl %edx ; popl %ecx ; popl %ebx ; popl %eax
-            10              
-            11 001b 51          pushl %ecx
-            12 001c FFD1        call * %ecx
-            13              
-            14 001e 81C47856    addl $0x12345678, %esp
-            14      3412
-            15 0024 89867856    movl %eax, 0x12345678(%esi)
-            15      3412
-            16 002a 89967856    movl %edx, 0x12345678(%esi)
-            16      3412
-            17           
-            18 0030 DD967856    fstl    0x12345678(%esi)
-            18      3412
-            19 0036 DD9E7856    fstpl   0x12345678(%esi)
-            19      3412
-            20 003c D9967856    fsts    0x12345678(%esi)
-            20      3412
-            21 0042 D99E7856    fstps   0x12345678(%esi)
-            18              
-            19 0030 C3          ret
-            20              
-
-         -}
-
-     in
-     --trace (show (map fst arg_offs_n_reps))
-     (
-     {- On entry, top of C stack 0(%esp) is the RA and 4(%esp) is 
-        arg passed from the interpreter.
-
-        Push all callee saved regs.  Push all of them anyway ...
-           pushl       %eax
-           pushl       %ebx
-           pushl       %ecx
-           pushl       %edx
-           pushl       %esi
-           pushl       %edi
-           pushl       %ebp
-     -}
-     save_regs
-
-     {- Now 28+0(%esp) is RA and 28+4(%esp) is the arg (the H stack ptr).
-        We'll use %esi as a temporary to point at the H stack, and
-        %ecx as a temporary to copy via.
-
-           movl        28+4(%esp), %esi
-     -}
-     ++ movl_offespmem_esi 32
-
-#if darwin_TARGET_OS
-     {- On Darwin, add some padding so that the stack stays aligned. -}
-     ++ (if stack_frame_pad /= 0
-            then add_lit_esp (-stack_frame_pad)
-            else [])
-#endif
-
-     {- For each arg in args_offs_n_reps, examine the associated
-        CgRep to determine how many words there are.  This gives a
-        bunch of offsets on the H stack to copy to the C stack:
-
-           movl        off1(%esi), %ecx
-           pushl       %ecx
-     -}
-     ++ concatMap (\offW -> movl_offesimem_ecx (bytes_per_word * offW) 
-                            ++ pushl_ecx) 
-                  offsets_to_pushW
-
-     {- Get the addr to call into %ecx, bearing in mind that there's 
-        an Addr# tag at the indicated location, and do the call:
-
-           movl        4*(1 /*tag*/ +addr_offW)(%esi), %ecx
-           call        * %ecx
-     -}
-     ++ movl_offesimem_ecx (bytes_per_word * addr_offW)
-     ++ call_star_ecx
-
-     {- Nuke the args just pushed and re-establish %esi at the 
-        H-stack ptr:
-
-           addl        $4*number_of_args_pushed, %esp (ccall only)
-           movl        28+4(%esp), %esi
-     -}
-     ++ (if   cconv /= StdCallConv
-         then add_lit_esp stack_frame_size
-         else [])
-     ++ movl_offespmem_esi 32
-
-     {- Depending on what the return type is, get the result 
-        from %eax or %edx:%eax or %st(0).
-
-           movl        %eax, 4(%esi)        -- assuming tagged result
-        or
-           movl        %edx, 4(%esi)
-           movl        %eax, 8(%esi)
-        or
-           fstpl       4(%esi)
-        or
-           fstps       4(%esi)
-     -}
-     ++ let i32 = movl_eax_offesimem 0
-            i64 = movl_eax_offesimem 0 ++ movl_edx_offesimem 4
-            f32 = fstps_offesimem 0
-            f64 = fstpl_offesimem 0
-        in
-        case r_rep of
-           VoidRep   -> []
-           IntRep    -> i32
-           WordRep   -> i32
-           Int64Rep  -> i64
-           Word64Rep -> i64
-           AddrRep   -> i32
-           FloatRep  -> f32
-           DoubleRep -> f64
-           other     -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(x86)" 
-                               (ppr r_rep)
-
-     {- Restore all the pushed regs and go home.
-
-           pushl        %ebp
-           pushl        %edi
-           pushl        %esi
-           pushl        %edx
-           pushl        %ecx
-           pushl        %ebx
-           pushl        %eax
-
-           ret
-     -}
-     ++ restore_regs
-     ++ ret
-     )
-
-#elif x86_64_TARGET_ARCH
-
-   =
-     -- the address of the H stack is in %rdi.  We need to move it out, so
-     -- we can use %rdi as an arg reg for the following call:
-    pushq_rbp ++
-    movq_rdi_rbp ++
-	
-     -- ####### load / push the args
-
-     let
-	(stack_args, fregs_unused, reg_loads) = 
-	   load_arg_regs arg_offs_n_reps int_loads float_loads []
-
-	tot_arg_size = bytes_per_word * length stack_args
-
-	-- On entry to the called function, %rsp should be aligned
-	-- on a 16-byte boundary +8 (i.e. the first stack arg after
-	-- the return address is 16-byte aligned).  In STG land
-	-- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
-	-- need to make sure we push a multiple of 16-bytes of args,
-	-- plus the return address, to get the correct alignment.
-	(real_size, adjust_rsp)
-	  | tot_arg_size `rem` 16 == 0 	  = (tot_arg_size, [])
-	  | otherwise           	  = (tot_arg_size + 8, subq_lit_rsp 8)
-
-	(stack_pushes, stack_words) =
-		push_args stack_args [] 0
-
-	-- we need to know the number of SSE regs used in the call, see later
-	n_sse_regs_used = length float_loads - length fregs_unused
-     in
-        concat reg_loads
-     ++ adjust_rsp
-     ++ concat stack_pushes -- push in reverse order
-
-     -- ####### make the call
-
-	-- use %r10 to make the call, because we don't have to save it.
-        --      movq 8*addr_offW(%rbp), %r10
-     ++ movq_rbpoff_r10 (bytes_per_word * addr_offW)
-
-	-- The x86_64 ABI requires us to set %al to the number of SSE
-	-- registers that contain arguments, if the called routine
-	-- is a varargs function.  We don't know whether it's a
-	-- varargs function or not, so we have to assume it is.
-	--
-	-- It's not safe to omit this assignment, even if the number
-	-- of SSE regs in use is zero.  If %al is larger than 8
-	-- on entry to a varargs function, seg faults ensue.
-     ++ movq_lit_rax n_sse_regs_used
-     ++ call_star_r10
-
-	-- pop the args from the stack, only in ccall mode 
-	-- (in stdcall the callee does it).
-     ++ (if   cconv /= StdCallConv
-         then addq_lit_rsp real_size
-         else [])
-
-     -- ####### place the result in the right place and return
-
-     ++ assign_result
-     ++ popq_rbp
-     ++ ret
-
-  where
-     bytes_per_word = 8
-
-     -- int arg regs: rdi,rsi,rdx,rcx,r8,r9
-     -- flt arg regs: xmm0..xmm7
-     int_loads   = [ movq_rbpoff_rdi, movq_rbpoff_rsi, movq_rbpoff_rdx,
- 		     movq_rbpoff_rcx, movq_rbpoff_r8,  movq_rbpoff_r9 ]
-     float_loads = [ 0..7 ]
-
-     load_arg_regs args [] [] code     =  (args, [], code)
-     load_arg_regs [] iregs fregs code =  ([], fregs, code)
-     load_arg_regs ((off,rep):args) iregs fregs code
-	| FloatRep  <- rep =
-            case fregs of
-              [] -> push_this_arg
-              n : frest ->
-		load_arg_regs args iregs frest 
-                      (mov_f32_rbpoff_xmm n (bytes_per_word * off) : code)
-	| DoubleRep <- rep =
-            case fregs of
-              [] -> push_this_arg
-              n : frest ->
-		load_arg_regs args iregs frest 
-                       (mov_f64_rbpoff_xmm n (bytes_per_word * off) : code)
-	| (mov_reg:irest) <- iregs =
-		load_arg_regs args irest fregs (mov_reg (bytes_per_word * off) : code)
-	| otherwise =
-		 push_this_arg
-	where
-	   push_this_arg = ((off,rep):args',fregs', code')
-		where (args',fregs',code') = load_arg_regs args iregs fregs code
-
-     push_args [] code pushed_words = (code, pushed_words)
-     push_args ((off,rep):args) code pushed_words
-	| FloatRep  <- rep =
-		push_args args (push_f32_rbpoff (bytes_per_word * off) : code) 
-			(pushed_words+1)
-	| DoubleRep <- rep =
-		push_args args (push_f64_rbpoff (bytes_per_word * off) : code)
-			(pushed_words+1)
-	| otherwise =
-		push_args args (pushq_rbpoff (bytes_per_word * off) : code)
-			(pushed_words+1)
-
-
-     assign_result = 
-	case r_rep of
-	  DoubleRep -> f64
-	  FloatRep  -> f32
-          VoidRep   -> []
-	  _other    -> i64
-	where
-	  i64 = movq_rax_rbpoff 0
-	  f32 = mov_f32_xmm0_rbpoff 0
-	  f64 = mov_f64_xmm0_rbpoff 0
-
---    ######### x86_64 machine code:
-
---   0:   48 89 fd                mov    %rdi,%rbp
---   3:   48 8b bd 78 56 34 12    mov    0x12345678(%rbp),%rdi
---   a:   48 8b b5 78 56 34 12    mov    0x12345678(%rbp),%rsi
---  11:   48 8b 95 78 56 34 12    mov    0x12345678(%rbp),%rdx
---  18:   48 8b 8d 78 56 34 12    mov    0x12345678(%rbp),%rcx
---  1f:   4c 8b 85 78 56 34 12    mov    0x12345678(%rbp),%r8
---  26:   4c 8b 8d 78 56 34 12    mov    0x12345678(%rbp),%r9
---  2d:   4c 8b 95 78 56 34 12    mov    0x12345678(%rbp),%r10
---  34:   48 c7 c0 78 56 34 12    mov    $0x12345678,%rax
---  3b:   48 89 85 78 56 34 12    mov    %rax,0x12345678(%rbp)
---  42:   f3 0f 10 bd 78 56 34 12 movss  0x12345678(%rbp),%xmm7
---  4a:   f2 0f 10 9d 78 56 34 12 movsd  0x12345678(%rbp),%xmm3
---  52:   f2 44 0f 10 85 78 56 34 12 movsd  0x12345678(%rbp),%xmm8
---  5b:   f3 0f 11 9d 78 56 34 12 movss  %xmm3,0x12345678(%rbp)
---  63:   f2 0f 11 9d 78 56 34 12 movsd  %xmm3,0x12345678(%rbp)
---  6b:   f2 44 0f 11 85 78 56 34 12 movsd  %xmm8,0x12345678(%rbp)
---  74:   ff b5 78 56 34 12       pushq  0x12345678(%rbp)
---  7a:   f3 44 0f 11 04 24       movss  %xmm8,(%rsp)
---  80:   f2 44 0f 11 04 24       movsd  %xmm8,(%rsp)
---  86:   48 81 ec 78 56 34 12    sub    $0x12345678,%rsp
---  8d:   48 81 c4 78 56 34 12    add    $0x12345678,%rsp
---  94:   41 ff d2                callq  *%r10
---  97:   55                      push   %rbp
---  98:   5d                      pop    %rbp
---  99:   c3                      retq   
-
-     movq_rdi_rbp         = [0x48,0x89,0xfd]
-     movq_rbpoff_rdi  off = [0x48, 0x8b, 0xbd] ++ lit32 off
-     movq_rbpoff_rsi  off = [0x48, 0x8b, 0xb5] ++ lit32 off
-     movq_rbpoff_rdx  off = [0x48, 0x8b, 0x95] ++ lit32 off
-     movq_rbpoff_rcx  off = [0x48, 0x8b, 0x8d] ++ lit32 off 
-     movq_rbpoff_r8   off = [0x4c, 0x8b, 0x85] ++ lit32 off
-     movq_rbpoff_r9   off = [0x4c, 0x8b, 0x8d] ++ lit32 off
-     movq_rbpoff_r10  off = [0x4c, 0x8b, 0x95] ++ lit32 off
-     movq_lit_rax     lit = [0x48, 0xc7, 0xc0] ++ lit32 lit
-     movq_rax_rbpoff  off = [0x48, 0x89, 0x85] ++ lit32 off
-     mov_f32_rbpoff_xmm n off
-         = 0xf3 : if n >= 8 then 0x44 : rest else rest
-         where rest = [0x0f, 0x10, 0x85 + (n.&.7)`shiftL`3] ++ lit32 off
-     mov_f64_rbpoff_xmm n off
-         = 0xf2 : if n >= 8 then 0x44 : rest else rest
-         where rest = [0x0f, 0x10, 0x85 + (n.&.7)`shiftL`3] ++ lit32 off
-     mov_f32_xmm0_rbpoff  off = [0xf3, 0x0f, 0x11, 0x85] ++ lit32 off
-     mov_f64_xmm0_rbpoff  off = [0xf2, 0x0f, 0x11, 0x85] ++ lit32 off
-     pushq_rbpoff     off = [0xff, 0xb5] ++ lit32 off
-     push_f32_rbpoff  off = 
-	subq_lit_rsp 8 ++			 -- subq $8, %rsp
-	mov_f32_rbpoff_xmm 8 off ++		 -- movss off(%rbp), %xmm8
-	[0xf3, 0x44, 0x0f, 0x11, 0x04, 0x24]	 -- movss %xmm8, (%rsp)
-     push_f64_rbpoff  off =
-	subq_lit_rsp 8 ++			 -- subq $8, %rsp
-	mov_f64_rbpoff_xmm 8 off ++		 -- movsd off(%rbp), %xmm8
-	[0xf2, 0x44, 0x0f, 0x11, 0x04, 0x24]     -- movsd %xmm8, (%rsp)
-     subq_lit_rsp     lit = [0x48, 0x81, 0xec] ++ lit32 lit
-     addq_lit_rsp     lit = [0x48, 0x81, 0xc4] ++ lit32 lit
-     call_star_r10 = [0x41,0xff,0xd2]
-     ret = [0xc3]
-     pushq_rbp = [0x55]
-     popq_rbp = [0x5d]
-
-#elif sparc_TARGET_ARCH
-
-   = let -- At least for sparc V8
-         bytes_per_word = 4
-
-         -- speaks for itself
-         w32_to_w8s_bigEndian :: Word32 -> [Word8]
-         w32_to_w8s_bigEndian w
-            =  [fromIntegral (0xFF .&. (w `shiftR` 24)),
-                fromIntegral (0xFF .&. (w `shiftR` 16)),
-                fromIntegral (0xFF .&. (w `shiftR` 8)),
-                fromIntegral (0xFF .&. w)]
-
-         offsets_to_pushW
-            = concat
-              [  [a_offW .. a_offW + primRepSizeW a_rep - 1]
-
-                | (a_offW, a_rep) <- arg_offs_n_reps
-              ]
-
-         total_argWs    = length offsets_to_pushW
-         argWs_on_stack = if total_argWs > 6 then total_argWs - 6 
-                                             else 0
-
-         -- The stack pointer must be kept 8-byte aligned, which means
-         -- we need to calculate this quantity too
-         argWs_on_stack_ROUNDED_UP
-            | odd argWs_on_stack = 1 + argWs_on_stack
-            | otherwise          = argWs_on_stack
-
-         -- some helpers to assemble sparc insns.
-         -- REGS
-         iReg, oReg, gReg, fReg :: Int -> Word32
-         iReg = fromIntegral . (+ 24)
-         oReg = fromIntegral . (+ 8)
-         gReg = fromIntegral . (+ 0)
-         fReg = fromIntegral
-
-         sp = oReg 6
-         i0 = iReg 0
-         i7 = iReg 7
-         o0 = oReg 0
-         o1 = oReg 1
-         o7 = oReg 7
-         g0 = gReg 0
-         g1 = gReg 1
-         f0 = fReg 0
-         f1 = fReg 1
-
-         -- INSN templates
-         insn_r_r_i :: Word32 -> Word32 -> Word32 -> Int -> Word32
-         insn_r_r_i op3 rs1 rd imm13
-            = (3 `shiftL` 30) 
-              .|. (rs1 `shiftL` 25)
-              .|. (op3 `shiftL` 19)
-              .|. (rd `shiftL` 14) 
-              .|. (1 `shiftL` 13) 
-              .|. mkSimm13 imm13
-
-         insn_r_i_r :: Word32 -> Word32 -> Int -> Word32 -> Word32
-         insn_r_i_r op3 rs1 imm13 rd
-            = (2 `shiftL` 30) 
-              .|. (rd `shiftL` 25)
-              .|. (op3 `shiftL` 19)
-              .|. (rs1 `shiftL` 14) 
-              .|. (1 `shiftL` 13) 
-              .|. mkSimm13 imm13
-
-         mkSimm13 :: Int -> Word32
-         mkSimm13 imm13 
-            = let imm13w = (fromIntegral imm13) :: Word32
-              in  imm13w .&. 0x1FFF             
-
-         -- REAL (non-synthetic) insns
-         -- or %rs1, %rs2, %rd
-         mkOR :: Word32 -> Word32 -> Word32 -> Word32
-         mkOR rs1 rs2 rd 
-            = (2 `shiftL` 30) 
-              .|. (rd `shiftL` 25)
-              .|. (op3_OR `shiftL` 19)
-              .|. (rs1 `shiftL` 14) 
-              .|. (0 `shiftL` 13) 
-              .|. rs2
-              where op3_OR = 2 :: Word32
-
-         -- ld(int)   [%rs + imm13], %rd
-         mkLD rs1 imm13 rd = insn_r_r_i 0x00{-op3_LD-} rd rs1 imm13
-
-         -- st(int)   %rs, [%rd + imm13]
-         mkST   = insn_r_r_i 0x04 -- op3_ST
-
-         -- st(float) %rs, [%rd + imm13]
-         mkSTF  = insn_r_r_i 0x24 -- op3_STF
-
-         -- jmpl     %rs + imm13, %rd
-         mkJMPL = insn_r_i_r 0x38 -- op3_JMPL
-
-         -- save     %rs + imm13, %rd
-         mkSAVE = insn_r_i_r 0x3C -- op3_SAVE
-
-         -- restore  %rs + imm13, %rd
-         mkRESTORE = insn_r_i_r 0x3D -- op3_RESTORE
-
-         -- SYNTHETIC insns
-         mkNOP             = mkOR g0 g0 g0
-         mkCALL reg        = mkJMPL reg 0 o7
-         mkRET             = mkJMPL i7 8 g0
-         mkRESTORE_TRIVIAL = mkRESTORE g0 0 g0
-     in
-     --trace (show (map fst arg_offs_n_reps))
-     concatMap w32_to_w8s_bigEndian (
-
-     {- On entry, %o0 is the arg passed from the interpreter.  After
-        the initial save insn, it will be in %i0.  Studying the sparc
-        docs one would have thought that the minimum frame size is 92
-        bytes, but gcc always uses at least 112, and indeed there are
-        segfaults a-plenty with 92.  So I use 112 here as well.  I
-        don't understand why, tho.  
-     -}
-     [mkSAVE sp (- ({-92-}112 + 4*argWs_on_stack_ROUNDED_UP)) sp]
-
-     {- For each arg in args_offs_n_reps, examine the associated
-        CgRep to determine how many words there are.  This gives a
-        bunch of offsets on the H stack.  Move the first 6 words into
-        %o0 .. %o5 and the rest on the stack, starting at [%sp+92].
-        Use %g1 as a temp. 
-     -}
-     ++ let doArgW (offW, wordNo)
-              | wordNo < 6
-              = [mkLD i0 (bytes_per_word * offW) (oReg wordNo)]
-              | otherwise
-              = [mkLD i0 (bytes_per_word * offW) g1,
-                 mkST g1 sp (92 + bytes_per_word * (wordNo - 6))]
-        in  
-            concatMap doArgW (zip offsets_to_pushW [0 ..])
-
-     {- Get the addr to call into %g1, bearing in mind that there's 
-        an Addr# tag at the indicated location, and do the call:
-
-           ld     [4*(1 /*tag*/ +addr_offW) + %i0], %g1
-           call   %g1
-     -}
-     ++ [mkLD i0 (bytes_per_word * addr_offW) g1,
-         mkCALL g1,
-         mkNOP]
-
-     {- Depending on what the return type is, get the result 
-        from %o0 or %o1:%o0 or %f0 or %f1:%f0.
-
-           st          %o0, [%i0 + 4]        -- 32 bit int
-        or
-           st          %o0, [%i0 + 4]        -- 64 bit int
-           st          %o1, [%i0 + 8]        -- or the other way round?
-        or
-           st          %f0, [%i0 + 4]        -- 32 bit float
-        or
-           st          %f0, [%i0 + 4]        -- 64 bit float
-           st          %f1, [%i0 + 8]        -- or the other way round?
-
-     -}
-     ++ let i32 = [mkST o0 i0 0]
-            i64 = [mkST o0 i0 0, mkST o1 i0 4]
-            f32 = [mkSTF f0 i0 0]
-            f64 = [mkSTF f0 i0 0, mkSTF f1 i0 4]
-        in
-            case r_rep of
-               VoidRep   -> []
-               IntRep    -> i32
-               WordRep   -> i32
-               AddrRep   -> i32
-               FloatRep  -> f32
-               DoubleRep -> f64
-               other     -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(sparc)" 
-                                   (ppr r_rep)
-
-     ++ [mkRET,
-         mkRESTORE_TRIVIAL]  -- this is in the delay slot of the RET
-     )
-#elif powerpc_TARGET_ARCH && darwin_TARGET_OS
-
-   = let
-         bytes_per_word = 4
-
-         -- speaks for itself
-         w32_to_w8s_bigEndian :: Word32 -> [Word8]
-         w32_to_w8s_bigEndian w
-            =  [fromIntegral (0xFF .&. (w `shiftR` 24)),
-                fromIntegral (0xFF .&. (w `shiftR` 16)),
-                fromIntegral (0xFF .&. (w `shiftR` 8)),
-                fromIntegral (0xFF .&. w)]
-
-         -- addr and result bits offsetsW
-         a_off = addr_offW * bytes_per_word
-         result_off  = r_offW * bytes_per_word
-
-         linkageArea = 24
-         parameterArea = sum [ primRepSizeW a_rep * bytes_per_word
-                        | (_, a_rep) <- arg_offs_n_reps ]
-         savedRegisterArea = 4
-         frameSize = padTo16 (linkageArea + max parameterArea 32 + savedRegisterArea)
-         padTo16 x = case x `mod` 16 of
-            0 -> x
-            y -> x - y + 16
-             
-         pass_parameters [] _ _ = []
-         pass_parameters ((a_offW, a_rep):args) nextFPR offsetW =
-            let
-               haskellArgOffset = a_offW * bytes_per_word
-               offsetW' = offsetW + primRepSizeW a_rep
-               
-               pass_word w 
-                   | offsetW + w < 8 =
-                      [0x801f0000    -- lwz rX, src(r31)
-                        .|. (fromIntegral src .&. 0xFFFF)
-                        .|. (fromIntegral (offsetW+w+3) `shiftL` 21)]
-                   | otherwise =
-                      [0x801f0000    -- lwz r0, src(r31)
-                        .|. (fromIntegral src .&. 0xFFFF),
-                       0x90010000    -- stw r0, dst(r1)
-                        .|. (fromIntegral dst .&. 0xFFFF)]
-                  where
-                     src = haskellArgOffset + w*bytes_per_word
-                     dst = linkageArea + (offsetW+w) * bytes_per_word
-            in
-               case a_rep of
-                  FloatRep | nextFPR < 14 ->
-                      (0xc01f0000    -- lfs fX, haskellArgOffset(r31)
-                        .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
-                        .|. (fromIntegral nextFPR `shiftL` 21))
-                      : pass_parameters args (nextFPR+1) offsetW'
-                  DoubleRep | nextFPR < 14 ->
-                      (0xc81f0000    -- lfd fX, haskellArgOffset(r31)
-                        .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
-                        .|. (fromIntegral nextFPR `shiftL` 21))
-                      : pass_parameters args (nextFPR+1) offsetW'
-                  _ ->
-                      concatMap pass_word [0 .. primRepSizeW a_rep - 1]
-                      ++ pass_parameters args nextFPR offsetW'              
-               
-         gather_result = case r_rep of
-            VoidRep -> []
-            FloatRep -> 
-               [0xd03f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
-               -- stfs f1, result_off(r31)
-            DoubleRep -> 
-               [0xd83f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
-               -- stfd f1, result_off(r31)
-            _ | primRepSizeW r_rep == 2 ->
-               [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF),
-                0x909f0000 .|. (fromIntegral (result_off+4) .&. 0xFFFF)]
-               -- stw r3, result_off(r31)
-               -- stw r4, result_off+4(r31)
-            _ | primRepSizeW r_rep == 1 ->
-               [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
-               -- stw r3, result_off(r31)
-     in
-         concatMap w32_to_w8s_bigEndian $ [
-            0x7c0802a6,         -- mflr r0
-            0x93e1fffc,         -- stw r31,-4(r1)
-            0x90010008,         -- stw r0,8(r1)
-            0x94210000 .|. (fromIntegral (-frameSize) .&. 0xFFFF),
-                                -- stwu r1, -frameSize(r1)
-            0x7c7f1b78          -- mr r31, r3
-         ] ++ pass_parameters arg_offs_n_reps 1 0 ++ [
-            0x819f0000 .|. (fromIntegral a_off .&. 0xFFFF),
-                                -- lwz r12, a_off(r31)
-            0x7d8903a6,         -- mtctr r12
-            0x4e800421          -- bctrl
-         ] ++ gather_result ++ [
-            0x80210000,         -- lwz r1, 0(r1)
-            0x83e1fffc,         -- lwz r31, -4(r1)
-            0x80010008,         -- lwz r0, 8(r1)
-            0x7c0803a6,         -- mtlr r0
-            0x4e800020          -- blr
-         ]
-
-#elif powerpc_TARGET_ARCH && linux_TARGET_OS
-
-   -- All offsets here are measured in Words (not bytes).  This includes
-   -- arguments to the load/store machine code generators, alignment numbers
-   -- and the final 'framesize' among others.
-
-   = concatMap w32_to_w8s_bigEndian $ [
-            0x7c0802a6,                         -- mflr r0
-            0x93e1fffc,                         -- stw r31,-4(r1)
-            0x90010008,                         -- stw r0,8(r1)
-            0x94210000 .|. offset (-framesize), -- stwu r1, -frameSize(r1)
-            0x7c7f1b78                          -- mr r31, r3
-            ] ++ pass_parameters ++             -- pass the parameters
-            loadWord 12 addr_offW ++ [          -- lwz r12, a_off(r31)
-            0x7d8903a6,                         -- mtctr r12
-            0x4e800421                          -- bctrl
-            ] ++ gather_result ++ [             -- save the return value
-            0x80210000,                         -- lwz r1, 0(r1)
-            0x83e1fffc,                         -- lwz r31, -4(r1)
-            0x80010008,                         -- lwz r0, 8(r1)
-            0x7c0803a6,                         -- mtlr r0
-            0x4e800020                          -- blr
-         ]
-
-   where
-     gather_result :: [Word32]
-     gather_result = case r_rep of
-       VoidRep   -> []
-       FloatRep  -> storeFloat  1 r_offW
-       DoubleRep -> storeDouble 1 r_offW
-       Int64Rep  -> storeLong   3 r_offW
-       Word64Rep -> storeLong   3 r_offW
-       _         -> storeWord   3 r_offW
-
-     pass_parameters :: [Word32]
-     pass_parameters = concat params
-
-     -- vector aligned (4 word = 16 bytes) with 8 extra words of buffer space
-     framesize = alignedTo 4 (argsize + 8)
-
-     ((_,_,argsize), params) = mapAccumL loadparam (3,1,2) arg_offs_n_reps
-
-     -- handle one argument, returning machine code and the updated state
-     loadparam :: (Int, Int, Int) -> (Int, PrimRep) ->
-                  ((Int, Int, Int), [Word32])
-
-     loadparam (gpr, fpr, stack) (ofs, rep) = case rep of
-       FloatRep | fpr <= 8  -> ( (gpr, fpr + 1, stack),  loadFloat fpr ofs )
-       FloatRep             -> ( (gpr, fpr, stack + 1),  stackWord stack ofs )
-
-       DoubleRep | fpr <= 8 -> ( (gpr, fpr + 1, stack),  loadDouble fpr ofs )
-       DoubleRep            -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
-
-       r | is64 r && even gpr  -> loadparam (gpr + 1, fpr, stack) (ofs, rep)
-       r | is64 r && gpr <= 9  -> ( (gpr + 2, fpr, stack),  loadLong gpr ofs )
-       r | is64 r              -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
-       _ | gpr <= 10        -> ( (gpr + 1, fpr, stack),  loadWord gpr ofs )
-       _                    -> ( (gpr, fpr, stack + 1),  stackWord stack ofs )
-      where astack = alignedTo 2 stack
-
-            is64 Int64Rep = True
-            is64 Word64Rep = True
-            is64 _ = False
-
-     alignedTo :: Int -> Int -> Int
-     alignedTo alignment x = case x `mod` alignment of
-                               0 -> x
-                               y -> x - y + alignment
-
-     -- convenience macros to do multiple-instruction data moves
-     stackWord dst src = loadWord 0 src ++ storeWordC 0 dst
-     stackLong dst src = stackWord dst src ++ stackWord (dst + 1) (src + 1)
-     loadLong  dst src = loadWord dst src ++ loadWord (dst + 1) (src + 1)
-     storeLong dst src = storeWord dst src ++ storeWord (dst + 1) (src + 1)
-
-     -- load data from the Haskell stack (relative to r31)
-     loadFloat   = loadstoreInstr 0xc01f0000 -- lfs fpr, ofs(r31)
-     loadDouble  = loadstoreInstr 0xc81f0000 -- lfd fpr, ofs(r31)
-     loadWord    = loadstoreInstr 0x801f0000 -- lwz gpr, ofs(r31)
-
-     -- store data to the Haskell stack (relative to r31)
-     storeFloat  = loadstoreInstr 0xd01f0000 -- stfs fpr, ofs(r31)
-     storeDouble = loadstoreInstr 0xd81f0000 -- stfd fpr, ofs(r31)
-     storeWord   = loadstoreInstr 0x901f0000 -- stw gpr, ofs(r31)
-
-     -- store data to the C stack (relative to r1)
-     storeWordC  = loadstoreInstr 0x90010000 -- stw gpr, ofs(r1)
-
-     -- machine code building blocks
-     loadstoreInstr :: Word32 -> Int -> Int -> [Word32]
-     loadstoreInstr code reg ofs = [ code .|. register reg .|. offset ofs ]
-
-     register :: Int -> Word32
-     register reg = fromIntegral reg `shiftL` 21
-
-     offset :: Int -> Word32
-     offset ofs   = fromIntegral (ofs * 4) .&. 0xFFFF
-
-     -- speaks for itself
-     w32_to_w8s_bigEndian :: Word32 -> [Word8]
-     w32_to_w8s_bigEndian w =  [fromIntegral (0xFF .&. (w `shiftR` 24)),
-                                fromIntegral (0xFF .&. (w `shiftR` 16)),
-                                fromIntegral (0xFF .&. (w `shiftR` 8)),
-                                fromIntegral (0xFF .&. w)]
-
-#else 
-
-   = throwDyn (InstallationError "foreign import is not implemented for GHCi on this platform.")
-
-#endif
-
-#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-lit32 :: Int -> [Word8]
-lit32 i = let w32 = (fromIntegral i) :: Word32
-          in  map (fromIntegral . ( .&. 0xFF))
-                  [w32, w32 `shiftR` 8, 
-                   w32 `shiftR` 16,  w32 `shiftR` 24]
-#endif
-
-#endif /* !USE_LIBFFI */
-
 moan64 :: String -> SDoc -> a
 moan64 msg pp_rep
    = unsafePerformIO (
diff --git a/compiler/ghci/ByteCodeGen.lhs b/compiler/ghci/ByteCodeGen.lhs
index 2e0079e6a8..007f3eb155 100644
--- a/compiler/ghci/ByteCodeGen.lhs
+++ b/compiler/ghci/ByteCodeGen.lhs
@@ -21,9 +21,7 @@ import ByteCodeItbls
 import ByteCodeAsm
 import ByteCodeLink
 import ByteCodeFFI
-#ifdef USE_LIBFFI
 import LibFFI
-#endif
 
 import Outputable
 import Name
@@ -1063,19 +1061,11 @@ generateCCall d0 s p ccall_spec@(CCallSpec target cconv safety) fn args_r_to_l
 	 stk_offset   = d_after_r - s
 
      -- in
-#if !defined(USE_LIBFFI)
-     -- In the native case, we build marshalling code and attach the
-     -- address of that to the CCALL instruction
-     addr_of_marshaller <- ioToBc (mkMarshalCode cconv
-                                (r_offW, r_rep) addr_offW
-                                (zip args_offW a_reps))
-#else
      -- the only difference in libffi mode is that we prepare a cif
      -- describing the call type by calling libffi, and we attach the
      -- address of this to the CCALL instruction.
      token <- ioToBc $ prepForeignCall cconv a_reps r_rep
      let addr_of_marshaller = castPtrToFunPtr token
-#endif
 
      recordItblMallocBc (ItblPtr (castFunPtrToPtr addr_of_marshaller))
      let
diff --git a/compiler/ghci/LibFFI.hsc b/compiler/ghci/LibFFI.hsc
index 3708238133..7f24d01a7d 100644
--- a/compiler/ghci/LibFFI.hsc
+++ b/compiler/ghci/LibFFI.hsc
@@ -6,12 +6,6 @@
 --
 -----------------------------------------------------------------------------
 
-#ifndef USE_LIBFFI
-
-module LibFFI () where
-
-#else
-
 #include <ffi.h>
 
 module LibFFI (
@@ -142,5 +136,3 @@ foreign import ccall "ffi_prep_cif"
 --            -> Ptr ()                    -- put result here
 --            -> Ptr (Ptr ())              -- arg values
 --            -> IO ()
-
-#endif
diff --git a/libffi/Makefile b/libffi/Makefile
new file mode 100644
index 0000000000..3a7cb693f7
--- /dev/null
+++ b/libffi/Makefile
@@ -0,0 +1,124 @@
+
+TOP=..
+
+include $(TOP)/mk/boilerplate.mk
+
+# -----------------------------------------------------------------------------
+# This Makefile is copied from the one we use for GMP in ../gmp.
+#
+# We use libffi's own configuration stuff.
+
+PLATFORM := $(shell echo $(HOSTPLATFORM) | sed 's/i[567]86/i486/g')
+
+# 2007-09-26
+#     set -o igncr 
+# is not a valid command on non-Cygwin-systems.
+# Let it fail silently instead of aborting the build.
+#
+# 2007-07-05
+# We do
+#     set -o igncr; export SHELLOPTS
+# here as otherwise checking the size of limbs
+# makes the build fall over on Cygwin. See the thread
+# http://www.cygwin.com/ml/cygwin/2006-12/msg00011.html
+# for more details.
+
+# 2007-07-05
+# Passing
+#     as_ln_s='cp -p'
+# isn't sufficient to stop cygwin using symlinks the mingw gcc can't
+# follow, as it isn't used consistently. Instead we put an ln.bat in
+# path that always fails.
+
+LIBFFI_TARBALL := $(firstword $(wildcard libffi*.tar.gz))
+LIBFFI_DIR := $(subst .tar.gz,,$(LIBFFI_TARBALL))
+
+ifeq "$(findstring dyn, $(GhcRTSWays))" "dyn"
+BUILD_SHARED=yes
+else
+BUILD_SHARED=no
+endif
+
+boot :: stamp.ffi.static
+BINDIST_STAMPS = stamp.ffi.static
+INSTALL_HEADERS += ffi.h
+INSTALL_LIBS += libffi.a
+
+ifeq "$(BUILD_SHARED)" "yes"
+boot :: stamp.ffi.shared
+BINDIST_STAMPS += stamp.ffi.shared
+INSTALL_LIBS += libffi.dll.a
+INSTALL_PROGS += libffi-3.dll
+endif
+
+install all :: $(INSTALL_HEADERS) $(INSTALL_LIBS) $(INSTALL_PROGS)
+
+stamp.ffi.static:
+	$(RM) -rf $(LIBFFI_DIR) build
+	$(TAR) -zxf $(LIBFFI_TARBALL)
+	mv $(LIBFFI_DIR) build
+#	chmod +x ln
+	(set -o igncr 2>/dev/null) && set -o igncr; export SHELLOPTS; \
+	    PATH=`pwd`:$$PATH; \
+	    export PATH; \
+	    cd build && \
+	    CC=$(WhatGccIsCalled) $(SHELL) configure \
+	          --enable-shared=no --host=$(PLATFORM) --build=$(PLATFORM)
+	touch $@
+
+stamp.ffi.shared:
+	$(RM) -rf $(LIBFFI_DIR) build-shared
+	$(TAR) -zxf $(LIBFFI_TARBALL)
+	mv $(LIBFFI_DIR) build-shared
+#	chmod +x ln
+	(set -o igncr 2>/dev/null) && set -o igncr; export SHELLOPTS; \
+	    PATH=`pwd`:$$PATH; \
+	    export PATH; \
+	    cd build-shared && \
+	    CC=$(WhatGccIsCalled) $(SHELL) configure \
+	          --enable-shared=yes --disable-static --host=$(PLATFORM) --build=$(PLATFORM)
+	touch $@
+
+ffi.h: stamp.ffi.static
+	$(CP) build/include/ffi.h .
+
+libffi.a: stamp.ffi.static
+	$(MAKE) -C build MAKEFLAGS=
+	$(CP) build/.libs/libffi.a .
+	$(RANLIB) libffi.a
+
+libffi-3.dll: stamp.ffi.shared
+	$(MAKE) -C build-shared MAKEFLAGS=
+	$(CP) build-shared/.libs/libffi-3.dll .
+
+libffi.dll.a: libffi-3.dll
+	$(CP) build-shared/.libs/libffi.dll.a .
+
+clean distclean maintainer-clean ::
+	$(RM) -f stamp.ffi.static stamp.ffi.shared ffi.h
+	$(RM) -f libffi.a libffi-3.dll libffi.dll.a
+	$(RM) -rf build
+	$(RM) -rf build-shared
+
+#-----------------------------------------------------------------------------
+#
+# binary-dist
+
+include $(TOP)/mk/target.mk
+
+binary-dist:
+	$(INSTALL_DIR)                         $(BIN_DIST_DIR)/libffi
+	$(INSTALL_DATA)    Makefile            $(BIN_DIST_DIR)/libffi/
+ifneq "$(HaveLibFFI)" "YES"
+	$(INSTALL_DATA)    $(BINDIST_STAMPS)   $(BIN_DIST_DIR)/libffi/
+  ifneq "$(INSTALL_PROGS)" ""
+	$(INSTALL_DATA)    $(INSTALL_PROGS)    $(BIN_DIST_DIR)/libffi/
+  endif
+  ifneq "$(INSTALL_LIBS)" ""
+	$(INSTALL_DATA)    $(INSTALL_LIBS)     $(BIN_DIST_DIR)/libffi/
+  endif
+  ifneq "$(INSTALL_HEADERS)" ""
+	$(INSTALL_HEADER)  $(INSTALL_HEADERS)  $(BIN_DIST_DIR)/libffi/
+  endif
+endif
+
diff --git a/libffi/libffi-3.0.4.tar.gz b/libffi/libffi-3.0.4.tar.gz
new file mode 100644
index 0000000000..0b20310aae
--- /dev/null
+++ b/libffi/libffi-3.0.4.tar.gz
diff --git a/rts/Adjustor.c b/rts/Adjustor.c
index 8c38df68d3..40bc9ddd07 100644
--- a/rts/Adjustor.c
+++ b/rts/Adjustor.c
@@ -42,9 +42,9 @@ Haskell side.
 #include "RtsUtils.h"
 #include <stdlib.h>
 
-#if defined(USE_LIBFFI)
+#if defined(USE_LIBFFI_FOR_ADJUSTORS)
 
-#include <ffi.h>
+#include "ffi.h"
 #include <string.h>
 
 void
@@ -1172,4 +1172,4 @@ if ( *(unsigned char*)ptr != 0xe8 ) {
  freeExec(ptr);
 }
 
-#endif // !USE_LIBFFI
+#endif // !USE_LIBFFI_FOR_ADJUSTORS
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index 39628569c0..ab59533666 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -28,9 +28,7 @@
 #include <errno.h>
 #endif
 
-#ifdef USE_LIBFFI
-#include <ffi.h>
-#endif
+#include "ffi.h"
 
 /* --------------------------------------------------------------------------
  * The bytecode interpreter
@@ -1347,7 +1345,6 @@ run_BCO:
                So we make a copy of the argument block.
             */
 
-#ifdef USE_LIBFFI
 #define ROUND_UP_WDS(p)  ((((StgWord)(p)) + sizeof(W_)-1)/sizeof(W_))
 
             ffi_cif *cif = (ffi_cif *)marshall_fn;
@@ -1384,10 +1381,6 @@ run_BCO:
 
             // this is the function we're going to call
             fn = (void(*)(void))Sp[ret_size];
-#else
-	    W_ arguments[stk_offset];
-	    memcpy(arguments, Sp, sizeof(W_) * stk_offset);
-#endif
 
 	    // Restore the Haskell thread's current value of errno
 	    errno = cap->r.rCurrentTSO->saved_errno;
@@ -1415,11 +1408,7 @@ run_BCO:
 	    tok = suspendThread(&cap->r);
 
 	    // We already made a copy of the arguments above.
-#ifdef USE_LIBFFI
             ffi_call(cif, fn, ret, argptrs);
-#else
-	    marshall_fn ( arguments );
-#endif
 
 	    // And restart the thread again, popping the RET_DYN frame.
 	    cap = (Capability *)((void *)((unsigned char*)resumeThread(tok) - sizeof(StgFunTable)));
@@ -1441,11 +1430,7 @@ run_BCO:
 		
 	    // Copy the return value back to the TSO stack.  It is at
             // most 2 words large, and resides at arguments[0].
-#ifdef USE_LIBFFI
             memcpy(Sp, ret, sizeof(W_) * stg_min(stk_offset,ret_size));
-#else
-	    memcpy(Sp, arguments, sizeof(W_) * stg_min(stk_offset,2));
-#endif
 
 	    goto nextInsn;
 	}
diff --git a/rts/Linker.c b/rts/Linker.c
index 59143b9b0e..b193f014c3 100644
--- a/rts/Linker.c
+++ b/rts/Linker.c
@@ -441,6 +441,22 @@ typedef struct _RtsSymbolVal {
    SymX(console_handler)
 #endif
 
+#define RTS_LIBFFI_SYMBOLS                      \
+     Sym(ffi_prep_cif)                          \
+     Sym(ffi_call)                              \
+     Sym(ffi_type_void)                         \
+     Sym(ffi_type_float)                        \
+     Sym(ffi_type_double)                       \
+     Sym(ffi_type_sint64)                       \
+     Sym(ffi_type_uint64)                       \
+     Sym(ffi_type_sint32)                       \
+     Sym(ffi_type_uint32)                       \
+     Sym(ffi_type_sint16)                       \
+     Sym(ffi_type_uint16)                       \
+     Sym(ffi_type_sint8)                        \
+     Sym(ffi_type_uint8)                        \
+     Sym(ffi_type_pointer)
+
 #ifdef TABLES_NEXT_TO_CODE
 #define RTS_RET_SYMBOLS /* nothing */
 #else
@@ -828,6 +844,7 @@ RTS_MINGW_ONLY_SYMBOLS
 RTS_CYGWIN_ONLY_SYMBOLS
 RTS_DARWIN_ONLY_SYMBOLS
 RTS_LIBGCC_SYMBOLS
+RTS_LIBFFI_SYMBOLS
 #undef Sym
 #undef SymX
 #undef SymX_redirect
@@ -860,6 +877,7 @@ static RtsSymbolVal rtsSyms[] = {
       RTS_CYGWIN_ONLY_SYMBOLS
       RTS_DARWIN_ONLY_SYMBOLS
       RTS_LIBGCC_SYMBOLS
+      RTS_LIBFFI_SYMBOLS
 #if defined(darwin_HOST_OS) && defined(i386_HOST_ARCH)
       // dyld stub code contains references to this,
       // but it should never be called because we treat
diff --git a/rts/Makefile b/rts/Makefile
index 6fb168824a..516879af9f 100644
--- a/rts/Makefile
+++ b/rts/Makefile
@@ -159,9 +159,8 @@ SRC_CC_OPTS += -DNOSMP
 SRC_HC_OPTS += -optc-DNOSMP
 endif
 
-ifeq "$(UseLibFFI)" "YES"
-SRC_CC_OPTS += -DUSE_LIBFFI
-PACKAGE_CPP_OPTS += -DUSE_LIBFFI
+ifeq "$(UseLibFFIForAdjustors)" "YES"
+SRC_CC_OPTS += -DUSE_LIBFFI_FOR_ADJUSTORS
 endif
 
 ifneq "$(DYNAMIC_RTS)" "YES"
@@ -202,6 +201,9 @@ RtsUtils_CC_OPTS += -DTargetPlatform=$(DQ)$(TARGETPLATFORM)$(DQ)
 RtsUtils_CC_OPTS += -DGhcUnregisterised=$(DQ)$(GhcUnregisterised)$(DQ)
 RtsUtils_CC_OPTS += -DGhcEnableTablesNextToCode=$(DQ)$(GhcEnableTablesNextToCode)$(DQ)
 
+# ffi.h triggers prototype warnings, so disable them here:
+Interpreter_CC_OPTS += -Wno-strict-prototypes
+
 StgCRun_CC_OPTS += -w
 Typeable_CC_OPTS += -w
 RetainerProfile_CC_OPTS += -w
@@ -312,12 +314,6 @@ CLEAN_FILES += $(AUTO_APPLY_CMM)
 endif
 
 # -----------------------------------------------------------------------------
-# Compile GMP only if we don't have it already
-#
-# We use GMP's own configuration stuff, because it's all rather hairy
-# and not worth re-implementing in our Makefile framework.
-
-CLEAN_FILES += gmp/libgmp.a
 
 # Need to get the GMP vars in through CPP to package.conf.in, and put
 # quotes around each element.
@@ -343,6 +339,14 @@ SRC_HSC2HS_OPTS += -I../gmp/gmpbuild
 SRC_LD_OPTS += -L../gmp/gmpbuild
 
 #-----------------------------------------------------------------------------
+# libffi stuff
+
+SRC_CC_OPTS     += -I../libffi/build/include
+SRC_HC_OPTS     += -I../libffi/build/include
+SRC_HSC2HS_OPTS += -I../libffi/build/include
+SRC_LD_OPTS     += -L../libffi/build/include
+
+#-----------------------------------------------------------------------------
 #
 # Building the GUM SysMan
 #
@@ -429,7 +433,6 @@ endif
 
 binary-dist:
 	$(INSTALL_DIR)                         $(BIN_DIST_DIR)/rts
-	$(INSTALL_DIR)                         $(BIN_DIST_DIR)/rts/gmp
 	$(INSTALL_DATA)    Makefile            $(BIN_DIST_DIR)/rts/
 	$(INSTALL_DATA)    package.conf.in     $(BIN_DIST_DIR)/rts/
 ifneq "$(INSTALL_LIBS)" ""
diff --git a/rts/package.conf.in b/rts/package.conf.in
index 1642101ba0..7e0ee020a1 100644
--- a/rts/package.conf.in
+++ b/rts/package.conf.in
@@ -25,11 +25,13 @@ library-dirs:		FPTOOLS_TOP_ABS"/rts" GMP_LIB_DIRS
 # if !defined(HAVE_LIBGMP) && !defined(HAVE_FRAMEWORK_GMP)
 			, FPTOOLS_TOP_ABS"/gmp"
 # endif
+                        , FPTOOLS_TOP_ABS"/libffi"
 #endif
 
 hs-libraries:   "HSrts"
 
 extra-libraries:		"m"		/* for ldexp() */
+			      , "ffi"
 #ifndef HAVE_FRAMEWORK_GMP
 			      , "gmp"
 #ifdef HAVE_LIBDL
@@ -56,9 +58,6 @@ extra-libraries:		"m"		/* for ldexp() */
 #if USE_PAPI
 			     , "papi"
 #endif
-#ifdef USE_LIBFFI
-			      , "ffi"
-#endif
 
 #ifdef INSTALLING
 include-dirs:		INCLUDE_DIR GMP_INCLUDE_DIRS
author	Simon Marlow <simonmar@microsoft.com>	2008-04-08 18:34:34 +0000
committer	Simon Marlow <simonmar@microsoft.com>	2008-04-08 18:34:34 +0000
commit	e0fcf61dca4dfac99cb5417e1bc4cbee18822cf2 (patch)
tree	32382c35525e71b5f20dea714fb748b0870431e0
parent	92986843271f2f400ebc0759b30eb11d47eaa193 (diff)
download	haskell-e0fcf61dca4dfac99cb5417e1bc4cbee18822cf2.tar.gz