add support for x86_64; foreign import is now supported in GHCi on x86_64

author: Simon Marlow <simonmar@microsoft.com> 2006-04-05 13:33:44 +0000
committer: Simon Marlow <simonmar@microsoft.com> 2006-04-05 13:33:44 +0000
commit: c88a984f05a4f93c1086a95a49c109b5c29867b6 (patch)
tree: a8f0e47dc94743843b676267c015eac7ef2a223f /ghc/compiler
parent: 18e65b5a41257108a7963ef9e3220e5700b89679 (diff)
download: haskell-c88a984f05a4f93c1086a95a49c109b5c29867b6.tar.gz
1 files changed, 176 insertions, 7 deletions
diff --git a/ghc/compiler/ghci/ByteCodeFFI.lhs b/ghc/compiler/ghci/ByteCodeFFI.lhs
index dd55e4999a..61e70d64e4 100644
--- a/ghc/compiler/ghci/ByteCodeFFI.lhs
+++ b/ghc/compiler/ghci/ByteCodeFFI.lhs
@@ -24,7 +24,7 @@ import DATA_WORD	( Word8, Word32 )
 import Foreign		( Ptr )
 import System.IO.Unsafe ( unsafePerformIO )
 import IO		( hPutStrLn, stderr )
--- import Debug.Trace	( trace )
+import Debug.Trace	( trace )
 \end{code}
 
 %************************************************************************
@@ -74,7 +74,7 @@ mkMarshalCode :: CCallConv
 mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
    = let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep) 
                                    addr_offW arg_offs_n_reps
-     in  Foreign.newArray bytes
+     in  trace (show bytes) $ Foreign.newArray bytes
 
 
 
@@ -133,11 +133,6 @@ mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
             = [0xDD, 0x9E] ++ lit32 offB
          fstps_offesimem offB		-- fstps   offB(%esi)
             = [0xD9, 0x9E] ++ lit32 offB
-         lit32 :: Int -> [Word8]
-         lit32 i = let w32 = (fromIntegral i) :: Word32
-                   in  map (fromIntegral . ( .&. 0xFF))
-                           [w32, w32 `shiftR` 8, 
-                            w32 `shiftR` 16,  w32 `shiftR` 24]
          {-
              2 0000 8BB42478    movl    0x12345678(%esp), %esi
              2      563412
@@ -278,6 +273,173 @@ mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
      ++ ret
      )
 
+#elif x86_64_TARGET_ARCH
+
+   =
+     -- the address of the H stack is in %rdi.  We need to move it out, so
+     -- we can use %rdi as an arg reg for the following call:
+    pushq_rbp ++
+    movq_rdi_rbp ++
+	
+     -- ####### load / push the args
+
+     let
+	(stack_args, fregs_unused, reg_loads) = 
+	   load_arg_regs arg_offs_n_reps int_loads float_loads []
+
+	tot_arg_size = bytes_per_word * length stack_args
+
+	-- On entry to the called function, %rsp should be aligned
+	-- on a 16-byte boundary +8 (i.e. the first stack arg after
+	-- the return address is 16-byte aligned).  In STG land
+	-- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
+	-- need to make sure we push a multiple of 16-bytes of args,
+	-- plus the return address, to get the correct alignment.
+	(real_size, adjust_rsp)
+	  | tot_arg_size `rem` 16 == 0 	  = (tot_arg_size, [])
+	  | otherwise           	  = (tot_arg_size + 8, subq_lit_rsp 8)
+
+	(stack_pushes, stack_words) =
+		push_args stack_args [] 0
+
+	-- we need to know the number of SSE regs used in the call, see later
+	n_sse_regs_used = length float_loads - length fregs_unused
+     in
+        concat reg_loads
+     ++ adjust_rsp
+     ++ concat stack_pushes -- push in reverse order
+
+     -- ####### make the call
+
+	-- use %r10 to make the call, because we don't have to save it.
+        --      movq 8*addr_offW(%rbp), %r10
+     ++ movq_rbpoff_r10 (bytes_per_word * addr_offW)
+
+	-- The x86_64 ABI requires us to set %al to the number of SSE
+	-- registers that contain arguments, if the called routine
+	-- is a varargs function.  We don't know whether it's a
+	-- varargs function or not, so we have to assume it is.
+	--
+	-- It's not safe to omit this assignment, even if the number
+	-- of SSE regs in use is zero.  If %al is larger than 8
+	-- on entry to a varargs function, seg faults ensue.
+     ++ movq_lit_rax n_sse_regs_used
+     ++ call_star_r10
+
+	-- pop the args from the stack, only in ccall mode 
+	-- (in stdcall the callee does it).
+     ++ (if   cconv /= StdCallConv
+         then addq_lit_rsp real_size
+         else [])
+
+     -- ####### place the result in the right place and return
+
+     ++ assign_result
+     ++ popq_rbp
+     ++ ret
+
+  where
+     bytes_per_word = 8
+
+     -- int arg regs: rdi,rsi,rdx,rcx,r8,r9
+     -- flt arg regs: xmm0..xmm7
+     int_loads   = [ movq_rbpoff_rdi, movq_rbpoff_rsi, movq_rbpoff_rdx,
+ 		     movq_rbpoff_rcx, movq_rbpoff_r8,  movq_rbpoff_r9 ]
+     float_loads = [ (mov_f32_rbpoff_xmm n, mov_f64_rbpoff_xmm n) | n <- [0..7] ]
+
+     load_arg_regs args [] [] code     =  (args, [], code)
+     load_arg_regs [] iregs fregs code =  ([], fregs, code)
+     load_arg_regs ((off,rep):args) iregs fregs code
+	| FloatArg  <- rep, ((mov_f32,_):frest) <- fregs =
+		load_arg_regs args iregs frest (mov_f32 (bytes_per_word * off) : code)
+	| DoubleArg <- rep, ((_,mov_f64):frest) <- fregs =
+		load_arg_regs args iregs frest (mov_f64 (bytes_per_word * off) : code)
+	| (mov_reg:irest) <- iregs =
+		load_arg_regs args irest fregs (mov_reg (bytes_per_word * off) : code)
+	| otherwise =
+		 push_this_arg
+	where
+	   push_this_arg = ((off,rep):args',fregs', code')
+		where (args',fregs',code') = load_arg_regs args iregs fregs code
+
+     push_args [] code pushed_words = (code, pushed_words)
+     push_args ((off,rep):args) code pushed_words
+	| FloatArg  <- rep =
+		push_args args (push_f32_rbpoff (bytes_per_word * off) : code) 
+			(pushed_words+1)
+	| DoubleArg <- rep =
+		push_args args (push_f64_rbpoff (bytes_per_word * off) : code)
+			(pushed_words+1)
+	| otherwise =
+		push_args args (pushq_rbpoff (bytes_per_word * off) : code)
+			(pushed_words+1)
+
+
+     assign_result = 
+	case r_rep of
+	  DoubleArg -> f64
+	  FloatArg  -> f32
+          VoidArg   -> []
+	  _other    -> i64
+	where
+	  i64 = movq_rax_rbpoff 0
+	  f32 = mov_f32_xmm0_rbpoff 0
+	  f64 = mov_f64_xmm0_rbpoff 0
+
+-- ######### x86_64 machine code:
+
+--   0:   48 89 fd                mov    %rdi,%rbp
+--   3:   48 8b bd 78 56 34 12    mov    0x12345678(%rbp),%rdi
+--   a:   48 8b b5 78 56 34 12    mov    0x12345678(%rbp),%rsi
+--  11:   48 8b 95 78 56 34 12    mov    0x12345678(%rbp),%rdx
+--  18:   48 8b 8d 78 56 34 12    mov    0x12345678(%rbp),%rcx
+--  1f:   4c 8b 85 78 56 34 12    mov    0x12345678(%rbp),%r8
+--  26:   4c 8b 8d 78 56 34 12    mov    0x12345678(%rbp),%r9
+--  2d:   4c 8b 95 78 56 34 12    mov    0x12345678(%rbp),%r10
+--  34:   48 c7 c0 78 56 34 12    mov    $0x12345678,%rax
+--  3b:   48 89 85 78 56 34 12    mov    %rax,0x12345678(%rbp)
+--  42:   f3 0f 10 85 78 56 34 12 movss  0x12345678(%rbp),%xmm0
+--  4a:   f2 0f 10 85 78 56 34 12 movsd  0x12345678(%rbp),%xmm0
+--  52:   f3 0f 11 85 78 56 34 12 movss  %xmm0,0x12345678(%rbp)
+--  5a:   f2 0f 11 85 78 56 34 12 movsd  %xmm0,0x12345678(%rbp)
+--  62:   ff b5 78 56 34 12       pushq  0x12345678(%rbp)
+--  68:   f3 44 0f 11 04 24       movss  %xmm8,(%rsp)
+--  6e:   f2 44 0f 11 04 24       movsd  %xmm8,(%rsp)
+--  74:   48 81 ec 78 56 34 12    sub    $0x12345678,%rsp
+--  7b:   48 81 c4 78 56 34 12    add    $0x12345678,%rsp
+--  82:   41 ff d2                callq  *%r10
+--  85:   c3                      retq   
+
+     movq_rdi_rbp         = [0x48,0x89,0xfd]
+     movq_rbpoff_rdi  off = [0x48, 0x8b, 0xbd] ++ lit32 off
+     movq_rbpoff_rsi  off = [0x48, 0x8b, 0xb5] ++ lit32 off
+     movq_rbpoff_rdx  off = [0x48, 0x8b, 0x95] ++ lit32 off
+     movq_rbpoff_rcx  off = [0x48, 0x8b, 0x8d] ++ lit32 off 
+     movq_rbpoff_r8   off = [0x4c, 0x8b, 0x85] ++ lit32 off
+     movq_rbpoff_r9   off = [0x4c, 0x8b, 0x8d] ++ lit32 off
+     movq_rbpoff_r10  off = [0x4c, 0x8b, 0x95] ++ lit32 off
+     movq_lit_rax     lit = [0x48, 0xc7, 0xc0] ++ lit32 lit
+     movq_rax_rbpoff  off = [0x48, 0x89, 0x85] ++ lit32 off
+     mov_f32_rbpoff_xmm n off = [0xf3, 0x0f, 0x10, 0x85 + n`shiftL`3] ++ lit32 off
+     mov_f64_rbpoff_xmm n off = [0xf2, 0x0f, 0x10, 0x85 + n`shiftL`3] ++ lit32 off
+     mov_f32_xmm0_rbpoff  off = [0xf3, 0x0f, 0x11, 0x85] ++ lit32 off
+     mov_f64_xmm0_rbpoff  off = [0xf2, 0x0f, 0x11, 0x85] ++ lit32 off
+     pushq_rbpoff     off = [0xff, 0xb5] ++ lit32 off
+     push_f32_rbpoff  off = 
+	mov_f32_rbpoff_xmm 8 off ++		 -- movss off(%rbp), %xmm8
+	[0xf3, 0x44, 0x0f, 0x11, 0x04, 0x24] ++	 -- movss %xmm8, (%rsp)
+	subq_lit_rsp 8				 -- subq $8, %rsp
+     push_f64_rbpoff  off =
+	mov_f64_rbpoff_xmm 8 off ++		 -- movsd off(%rbp), %xmm8
+	[0xf2, 0x44, 0x0f, 0x11, 0x04, 0x24] ++  -- movsd %xmm8, (%rsp)
+	subq_lit_rsp 8				 -- subq $8, %rsp
+     subq_lit_rsp     lit = [0x48, 0x81, 0xec] ++ lit32 lit
+     addq_lit_rsp     lit = [0x48, 0x81, 0xc4] ++ lit32 lit
+     call_star_r10 = [0x41,0xff,0xd2]
+     ret = [0xc3]
+     pushq_rbp = [0x55]
+     popq_rbp = [0x5d]
+
 #elif sparc_TARGET_ARCH
 
    = let -- At least for sparc V8
@@ -659,5 +821,12 @@ mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
 
 #endif
 
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+lit32 :: Int -> [Word8]
+lit32 i = let w32 = (fromIntegral i) :: Word32
+          in  map (fromIntegral . ( .&. 0xFF))
+                  [w32, w32 `shiftR` 8, 
+                   w32 `shiftR` 16,  w32 `shiftR` 24]
+#endif
 \end{code}
author	Simon Marlow <simonmar@microsoft.com>	2006-04-05 13:33:44 +0000
committer	Simon Marlow <simonmar@microsoft.com>	2006-04-05 13:33:44 +0000
commit	c88a984f05a4f93c1086a95a49c109b5c29867b6 (patch)
tree	a8f0e47dc94743843b676267c015eac7ef2a223f /ghc/compiler
parent	18e65b5a41257108a7963ef9e3220e5700b89679 (diff)
download	haskell-c88a984f05a4f93c1086a95a49c109b5c29867b6.tar.gz