summaryrefslogtreecommitdiff
path: root/compiler/ghci/ByteCodeFFI.lhs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/ghci/ByteCodeFFI.lhs')
-rw-r--r--compiler/ghci/ByteCodeFFI.lhs832
1 files changed, 832 insertions, 0 deletions
diff --git a/compiler/ghci/ByteCodeFFI.lhs b/compiler/ghci/ByteCodeFFI.lhs
new file mode 100644
index 0000000000..61e70d64e4
--- /dev/null
+++ b/compiler/ghci/ByteCodeFFI.lhs
@@ -0,0 +1,832 @@
+%
+% (c) The University of Glasgow 2001
+%
+\section[ByteCodeGen]{Generate machine-code sequences for foreign import}
+
+\begin{code}
+module ByteCodeFFI ( mkMarshalCode, moan64 ) where
+
+#include "HsVersions.h"
+
+import Outputable
+import SMRep ( CgRep(..), cgRepSizeW )
+import ForeignCall ( CCallConv(..) )
+import Panic
+
+-- DON'T remove apparently unused imports here ..
+-- there is ifdeffery below
+import Control.Exception ( throwDyn )
+import DATA_BITS ( Bits(..), shiftR, shiftL )
+import Foreign ( newArray )
+import Data.List ( mapAccumL )
+
+import DATA_WORD ( Word8, Word32 )
+import Foreign ( Ptr )
+import System.IO.Unsafe ( unsafePerformIO )
+import IO ( hPutStrLn, stderr )
+import Debug.Trace ( trace )
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{The platform-dependent marshall-code-generator.}
+%* *
+%************************************************************************
+
+\begin{code}
+
+moan64 :: String -> SDoc -> a
+moan64 msg pp_rep
+ = unsafePerformIO (
+ hPutStrLn stderr (
+ "\nGHCi's bytecode generation machinery can't handle 64-bit\n" ++
+ "code properly yet. You can work around this for the time being\n" ++
+ "by compiling this module and all those it imports to object code,\n" ++
+ "and re-starting your GHCi session. The panic below contains information,\n" ++
+ "intended for the GHC implementors, about the exact place where GHC gave up.\n"
+ )
+ )
+ `seq`
+ pprPanic msg pp_rep
+
+
+-- For sparc_TARGET_ARCH, i386_TARGET_ARCH, etc.
+#include "nativeGen/NCG.h"
+
+{-
+Make a piece of code which expects to see the Haskell stack
+looking like this. It is given a pointer to the lowest word in
+the stack -- presumably the tag of the placeholder.
+
+ <arg_n>
+ ...
+ <arg_1>
+ Addr# address_of_C_fn
+ <placeholder-for-result#> (must be an unboxed type)
+
+We cope with both ccall and stdcall for the C fn. However, this code
+itself expects only to be called using the ccall convention -- that is,
+we don't clear our own (single) arg off the C stack.
+-}
+mkMarshalCode :: CCallConv
+ -> (Int, CgRep) -> Int -> [(Int, CgRep)]
+ -> IO (Ptr Word8)
+mkMarshalCode cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
+ = let bytes = mkMarshalCode_wrk cconv (r_offW, r_rep)
+ addr_offW arg_offs_n_reps
+ in trace (show bytes) $ Foreign.newArray bytes
+
+
+
+
+mkMarshalCode_wrk :: CCallConv
+ -> (Int, CgRep) -> Int -> [(Int, CgRep)]
+ -> [Word8]
+
+mkMarshalCode_wrk cconv (r_offW, r_rep) addr_offW arg_offs_n_reps
+
+#if i386_TARGET_ARCH
+
+ = let -- Don't change this without first consulting Intel Corp :-)
+ bytes_per_word = 4
+
+ offsets_to_pushW
+ = concat
+ [ -- reversed because x86 is little-endian
+ reverse [a_offW .. a_offW + cgRepSizeW a_rep - 1]
+
+ -- reversed because args are pushed L -> R onto C stack
+ | (a_offW, a_rep) <- reverse arg_offs_n_reps
+ ]
+
+ arguments_size = bytes_per_word * length offsets_to_pushW
+#if darwin_TARGET_OS
+ -- Darwin: align stack frame size to a multiple of 16 bytes
+ stack_frame_size = (arguments_size + 15) .&. complement 15
+ stack_frame_pad = stack_frame_size - arguments_size
+#else
+ stack_frame_size = arguments_size
+#endif
+
+ -- some helpers to assemble x86 insns.
+ movl_offespmem_esi offB -- movl offB(%esp), %esi
+ = [0x8B, 0xB4, 0x24] ++ lit32 offB
+ movl_offesimem_ecx offB -- movl offB(%esi), %ecx
+ = [0x8B, 0x8E] ++ lit32 offB
+ save_regs -- pushl all intregs except %esp
+ = [0x50, 0x53, 0x51, 0x52, 0x56, 0x57, 0x55]
+ restore_regs -- popl ditto
+ = [0x5D, 0x5F, 0x5E, 0x5A, 0x59, 0x5B, 0x58]
+ pushl_ecx -- pushl %ecx
+ = [0x51]
+ call_star_ecx -- call * %ecx
+ = [0xFF, 0xD1]
+ add_lit_esp lit -- addl $lit, %esp
+ = [0x81, 0xC4] ++ lit32 lit
+ movl_eax_offesimem offB -- movl %eax, offB(%esi)
+ = [0x89, 0x86] ++ lit32 offB
+ movl_edx_offesimem offB -- movl %edx, offB(%esi)
+ = [0x89, 0x96] ++ lit32 offB
+ ret -- ret
+ = [0xC3]
+ fstpl_offesimem offB -- fstpl offB(%esi)
+ = [0xDD, 0x9E] ++ lit32 offB
+ fstps_offesimem offB -- fstps offB(%esi)
+ = [0xD9, 0x9E] ++ lit32 offB
+ {-
+ 2 0000 8BB42478 movl 0x12345678(%esp), %esi
+ 2 563412
+ 3 0007 8B8E7856 movl 0x12345678(%esi), %ecx
+ 3 3412
+ 4
+ 5 000d 50535152 pushl %eax ; pushl %ebx ; pushl %ecx ; pushl %edx
+ 6 0011 565755 pushl %esi ; pushl %edi ; pushl %ebp
+ 7
+ 8 0014 5D5F5E popl %ebp ; popl %edi ; popl %esi
+ 9 0017 5A595B58 popl %edx ; popl %ecx ; popl %ebx ; popl %eax
+ 10
+ 11 001b 51 pushl %ecx
+ 12 001c FFD1 call * %ecx
+ 13
+ 14 001e 81C47856 addl $0x12345678, %esp
+ 14 3412
+ 15 0024 89867856 movl %eax, 0x12345678(%esi)
+ 15 3412
+ 16 002a 89967856 movl %edx, 0x12345678(%esi)
+ 16 3412
+ 17
+ 18 0030 DD967856 fstl 0x12345678(%esi)
+ 18 3412
+ 19 0036 DD9E7856 fstpl 0x12345678(%esi)
+ 19 3412
+ 20 003c D9967856 fsts 0x12345678(%esi)
+ 20 3412
+ 21 0042 D99E7856 fstps 0x12345678(%esi)
+ 18
+ 19 0030 C3 ret
+ 20
+
+ -}
+
+ in
+ --trace (show (map fst arg_offs_n_reps))
+ (
+ {- On entry, top of C stack 0(%esp) is the RA and 4(%esp) is
+ arg passed from the interpreter.
+
+ Push all callee saved regs. Push all of them anyway ...
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ pushl %esi
+ pushl %edi
+ pushl %ebp
+ -}
+ save_regs
+
+ {- Now 28+0(%esp) is RA and 28+4(%esp) is the arg (the H stack ptr).
+ We'll use %esi as a temporary to point at the H stack, and
+ %ecx as a temporary to copy via.
+
+ movl 28+4(%esp), %esi
+ -}
+ ++ movl_offespmem_esi 32
+
+#if darwin_TARGET_OS
+ {- On Darwin, add some padding so that the stack stays aligned. -}
+ ++ (if stack_frame_pad /= 0
+ then add_lit_esp (-stack_frame_pad)
+ else [])
+#endif
+
+ {- For each arg in args_offs_n_reps, examine the associated
+ CgRep to determine how many words there are. This gives a
+ bunch of offsets on the H stack to copy to the C stack:
+
+ movl off1(%esi), %ecx
+ pushl %ecx
+ -}
+ ++ concatMap (\offW -> movl_offesimem_ecx (bytes_per_word * offW)
+ ++ pushl_ecx)
+ offsets_to_pushW
+
+ {- Get the addr to call into %ecx, bearing in mind that there's
+ an Addr# tag at the indicated location, and do the call:
+
+ movl 4*(1 /*tag*/ +addr_offW)(%esi), %ecx
+ call * %ecx
+ -}
+ ++ movl_offesimem_ecx (bytes_per_word * addr_offW)
+ ++ call_star_ecx
+
+ {- Nuke the args just pushed and re-establish %esi at the
+ H-stack ptr:
+
+ addl $4*number_of_args_pushed, %esp (ccall only)
+ movl 28+4(%esp), %esi
+ -}
+ ++ (if cconv /= StdCallConv
+ then add_lit_esp stack_frame_size
+ else [])
+ ++ movl_offespmem_esi 32
+
+ {- Depending on what the return type is, get the result
+ from %eax or %edx:%eax or %st(0).
+
+ movl %eax, 4(%esi) -- assuming tagged result
+ or
+ movl %edx, 4(%esi)
+ movl %eax, 8(%esi)
+ or
+ fstpl 4(%esi)
+ or
+ fstps 4(%esi)
+ -}
+ ++ let i32 = movl_eax_offesimem 0
+ i64 = movl_eax_offesimem 0 ++ movl_edx_offesimem 4
+ f32 = fstps_offesimem 0
+ f64 = fstpl_offesimem 0
+ in
+ case r_rep of
+ NonPtrArg -> i32
+ DoubleArg -> f64
+ FloatArg -> f32
+ -- LongArg -> i64
+ VoidArg -> []
+ other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(x86)"
+ (ppr r_rep)
+
+ {- Restore all the pushed regs and go home.
+
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %edx
+ pushl %ecx
+ pushl %ebx
+ pushl %eax
+
+ ret
+ -}
+ ++ restore_regs
+ ++ ret
+ )
+
+#elif x86_64_TARGET_ARCH
+
+ =
+ -- the address of the H stack is in %rdi. We need to move it out, so
+ -- we can use %rdi as an arg reg for the following call:
+ pushq_rbp ++
+ movq_rdi_rbp ++
+
+ -- ####### load / push the args
+
+ let
+ (stack_args, fregs_unused, reg_loads) =
+ load_arg_regs arg_offs_n_reps int_loads float_loads []
+
+ tot_arg_size = bytes_per_word * length stack_args
+
+ -- On entry to the called function, %rsp should be aligned
+ -- on a 16-byte boundary +8 (i.e. the first stack arg after
+ -- the return address is 16-byte aligned). In STG land
+ -- %rsp is kept 16-byte aligned (see StgCRun.c), so we just
+ -- need to make sure we push a multiple of 16-bytes of args,
+ -- plus the return address, to get the correct alignment.
+ (real_size, adjust_rsp)
+ | tot_arg_size `rem` 16 == 0 = (tot_arg_size, [])
+ | otherwise = (tot_arg_size + 8, subq_lit_rsp 8)
+
+ (stack_pushes, stack_words) =
+ push_args stack_args [] 0
+
+ -- we need to know the number of SSE regs used in the call, see later
+ n_sse_regs_used = length float_loads - length fregs_unused
+ in
+ concat reg_loads
+ ++ adjust_rsp
+ ++ concat stack_pushes -- push in reverse order
+
+ -- ####### make the call
+
+ -- use %r10 to make the call, because we don't have to save it.
+ -- movq 8*addr_offW(%rbp), %r10
+ ++ movq_rbpoff_r10 (bytes_per_word * addr_offW)
+
+ -- The x86_64 ABI requires us to set %al to the number of SSE
+ -- registers that contain arguments, if the called routine
+ -- is a varargs function. We don't know whether it's a
+ -- varargs function or not, so we have to assume it is.
+ --
+ -- It's not safe to omit this assignment, even if the number
+ -- of SSE regs in use is zero. If %al is larger than 8
+ -- on entry to a varargs function, seg faults ensue.
+ ++ movq_lit_rax n_sse_regs_used
+ ++ call_star_r10
+
+ -- pop the args from the stack, only in ccall mode
+ -- (in stdcall the callee does it).
+ ++ (if cconv /= StdCallConv
+ then addq_lit_rsp real_size
+ else [])
+
+ -- ####### place the result in the right place and return
+
+ ++ assign_result
+ ++ popq_rbp
+ ++ ret
+
+ where
+ bytes_per_word = 8
+
+ -- int arg regs: rdi,rsi,rdx,rcx,r8,r9
+ -- flt arg regs: xmm0..xmm7
+ int_loads = [ movq_rbpoff_rdi, movq_rbpoff_rsi, movq_rbpoff_rdx,
+ movq_rbpoff_rcx, movq_rbpoff_r8, movq_rbpoff_r9 ]
+ float_loads = [ (mov_f32_rbpoff_xmm n, mov_f64_rbpoff_xmm n) | n <- [0..7] ]
+
+ load_arg_regs args [] [] code = (args, [], code)
+ load_arg_regs [] iregs fregs code = ([], fregs, code)
+ load_arg_regs ((off,rep):args) iregs fregs code
+ | FloatArg <- rep, ((mov_f32,_):frest) <- fregs =
+ load_arg_regs args iregs frest (mov_f32 (bytes_per_word * off) : code)
+ | DoubleArg <- rep, ((_,mov_f64):frest) <- fregs =
+ load_arg_regs args iregs frest (mov_f64 (bytes_per_word * off) : code)
+ | (mov_reg:irest) <- iregs =
+ load_arg_regs args irest fregs (mov_reg (bytes_per_word * off) : code)
+ | otherwise =
+ push_this_arg
+ where
+ push_this_arg = ((off,rep):args',fregs', code')
+ where (args',fregs',code') = load_arg_regs args iregs fregs code
+
+ push_args [] code pushed_words = (code, pushed_words)
+ push_args ((off,rep):args) code pushed_words
+ | FloatArg <- rep =
+ push_args args (push_f32_rbpoff (bytes_per_word * off) : code)
+ (pushed_words+1)
+ | DoubleArg <- rep =
+ push_args args (push_f64_rbpoff (bytes_per_word * off) : code)
+ (pushed_words+1)
+ | otherwise =
+ push_args args (pushq_rbpoff (bytes_per_word * off) : code)
+ (pushed_words+1)
+
+
+ assign_result =
+ case r_rep of
+ DoubleArg -> f64
+ FloatArg -> f32
+ VoidArg -> []
+ _other -> i64
+ where
+ i64 = movq_rax_rbpoff 0
+ f32 = mov_f32_xmm0_rbpoff 0
+ f64 = mov_f64_xmm0_rbpoff 0
+
+-- ######### x86_64 machine code:
+
+-- 0: 48 89 fd mov %rdi,%rbp
+-- 3: 48 8b bd 78 56 34 12 mov 0x12345678(%rbp),%rdi
+-- a: 48 8b b5 78 56 34 12 mov 0x12345678(%rbp),%rsi
+-- 11: 48 8b 95 78 56 34 12 mov 0x12345678(%rbp),%rdx
+-- 18: 48 8b 8d 78 56 34 12 mov 0x12345678(%rbp),%rcx
+-- 1f: 4c 8b 85 78 56 34 12 mov 0x12345678(%rbp),%r8
+-- 26: 4c 8b 8d 78 56 34 12 mov 0x12345678(%rbp),%r9
+-- 2d: 4c 8b 95 78 56 34 12 mov 0x12345678(%rbp),%r10
+-- 34: 48 c7 c0 78 56 34 12 mov $0x12345678,%rax
+-- 3b: 48 89 85 78 56 34 12 mov %rax,0x12345678(%rbp)
+-- 42: f3 0f 10 85 78 56 34 12 movss 0x12345678(%rbp),%xmm0
+-- 4a: f2 0f 10 85 78 56 34 12 movsd 0x12345678(%rbp),%xmm0
+-- 52: f3 0f 11 85 78 56 34 12 movss %xmm0,0x12345678(%rbp)
+-- 5a: f2 0f 11 85 78 56 34 12 movsd %xmm0,0x12345678(%rbp)
+-- 62: ff b5 78 56 34 12 pushq 0x12345678(%rbp)
+-- 68: f3 44 0f 11 04 24 movss %xmm8,(%rsp)
+-- 6e: f2 44 0f 11 04 24 movsd %xmm8,(%rsp)
+-- 74: 48 81 ec 78 56 34 12 sub $0x12345678,%rsp
+-- 7b: 48 81 c4 78 56 34 12 add $0x12345678,%rsp
+-- 82: 41 ff d2 callq *%r10
+-- 85: c3 retq
+
+ movq_rdi_rbp = [0x48,0x89,0xfd]
+ movq_rbpoff_rdi off = [0x48, 0x8b, 0xbd] ++ lit32 off
+ movq_rbpoff_rsi off = [0x48, 0x8b, 0xb5] ++ lit32 off
+ movq_rbpoff_rdx off = [0x48, 0x8b, 0x95] ++ lit32 off
+ movq_rbpoff_rcx off = [0x48, 0x8b, 0x8d] ++ lit32 off
+ movq_rbpoff_r8 off = [0x4c, 0x8b, 0x85] ++ lit32 off
+ movq_rbpoff_r9 off = [0x4c, 0x8b, 0x8d] ++ lit32 off
+ movq_rbpoff_r10 off = [0x4c, 0x8b, 0x95] ++ lit32 off
+ movq_lit_rax lit = [0x48, 0xc7, 0xc0] ++ lit32 lit
+ movq_rax_rbpoff off = [0x48, 0x89, 0x85] ++ lit32 off
+ mov_f32_rbpoff_xmm n off = [0xf3, 0x0f, 0x10, 0x85 + n`shiftL`3] ++ lit32 off
+ mov_f64_rbpoff_xmm n off = [0xf2, 0x0f, 0x10, 0x85 + n`shiftL`3] ++ lit32 off
+ mov_f32_xmm0_rbpoff off = [0xf3, 0x0f, 0x11, 0x85] ++ lit32 off
+ mov_f64_xmm0_rbpoff off = [0xf2, 0x0f, 0x11, 0x85] ++ lit32 off
+ pushq_rbpoff off = [0xff, 0xb5] ++ lit32 off
+ push_f32_rbpoff off =
+ mov_f32_rbpoff_xmm 8 off ++ -- movss off(%rbp), %xmm8
+ [0xf3, 0x44, 0x0f, 0x11, 0x04, 0x24] ++ -- movss %xmm8, (%rsp)
+ subq_lit_rsp 8 -- subq $8, %rsp
+ push_f64_rbpoff off =
+ mov_f64_rbpoff_xmm 8 off ++ -- movsd off(%rbp), %xmm8
+ [0xf2, 0x44, 0x0f, 0x11, 0x04, 0x24] ++ -- movsd %xmm8, (%rsp)
+ subq_lit_rsp 8 -- subq $8, %rsp
+ subq_lit_rsp lit = [0x48, 0x81, 0xec] ++ lit32 lit
+ addq_lit_rsp lit = [0x48, 0x81, 0xc4] ++ lit32 lit
+ call_star_r10 = [0x41,0xff,0xd2]
+ ret = [0xc3]
+ pushq_rbp = [0x55]
+ popq_rbp = [0x5d]
+
+#elif sparc_TARGET_ARCH
+
+ = let -- At least for sparc V8
+ bytes_per_word = 4
+
+ -- speaks for itself
+ w32_to_w8s_bigEndian :: Word32 -> [Word8]
+ w32_to_w8s_bigEndian w
+ = [fromIntegral (0xFF .&. (w `shiftR` 24)),
+ fromIntegral (0xFF .&. (w `shiftR` 16)),
+ fromIntegral (0xFF .&. (w `shiftR` 8)),
+ fromIntegral (0xFF .&. w)]
+
+ offsets_to_pushW
+ = concat
+ [ [a_offW .. a_offW + cgRepSizeW a_rep - 1]
+
+ | (a_offW, a_rep) <- arg_offs_n_reps
+ ]
+
+ total_argWs = length offsets_to_pushW
+ argWs_on_stack = if total_argWs > 6 then total_argWs - 6
+ else 0
+
+ -- The stack pointer must be kept 8-byte aligned, which means
+ -- we need to calculate this quantity too
+ argWs_on_stack_ROUNDED_UP
+ | odd argWs_on_stack = 1 + argWs_on_stack
+ | otherwise = argWs_on_stack
+
+ -- some helpers to assemble sparc insns.
+ -- REGS
+ iReg, oReg, gReg, fReg :: Int -> Word32
+ iReg = fromIntegral . (+ 24)
+ oReg = fromIntegral . (+ 8)
+ gReg = fromIntegral . (+ 0)
+ fReg = fromIntegral
+
+ sp = oReg 6
+ i0 = iReg 0
+ i7 = iReg 7
+ o0 = oReg 0
+ o1 = oReg 1
+ o7 = oReg 7
+ g0 = gReg 0
+ g1 = gReg 1
+ f0 = fReg 0
+ f1 = fReg 1
+
+ -- INSN templates
+ insn_r_r_i :: Word32 -> Word32 -> Word32 -> Int -> Word32
+ insn_r_r_i op3 rs1 rd imm13
+ = (3 `shiftL` 30)
+ .|. (rs1 `shiftL` 25)
+ .|. (op3 `shiftL` 19)
+ .|. (rd `shiftL` 14)
+ .|. (1 `shiftL` 13)
+ .|. mkSimm13 imm13
+
+ insn_r_i_r :: Word32 -> Word32 -> Int -> Word32 -> Word32
+ insn_r_i_r op3 rs1 imm13 rd
+ = (2 `shiftL` 30)
+ .|. (rd `shiftL` 25)
+ .|. (op3 `shiftL` 19)
+ .|. (rs1 `shiftL` 14)
+ .|. (1 `shiftL` 13)
+ .|. mkSimm13 imm13
+
+ mkSimm13 :: Int -> Word32
+ mkSimm13 imm13
+ = let imm13w = (fromIntegral imm13) :: Word32
+ in imm13w .&. 0x1FFF
+
+ -- REAL (non-synthetic) insns
+ -- or %rs1, %rs2, %rd
+ mkOR :: Word32 -> Word32 -> Word32 -> Word32
+ mkOR rs1 rs2 rd
+ = (2 `shiftL` 30)
+ .|. (rd `shiftL` 25)
+ .|. (op3_OR `shiftL` 19)
+ .|. (rs1 `shiftL` 14)
+ .|. (0 `shiftL` 13)
+ .|. rs2
+ where op3_OR = 2 :: Word32
+
+ -- ld(int) [%rs + imm13], %rd
+ mkLD rs1 imm13 rd = insn_r_r_i 0x00{-op3_LD-} rd rs1 imm13
+
+ -- st(int) %rs, [%rd + imm13]
+ mkST = insn_r_r_i 0x04 -- op3_ST
+
+ -- st(float) %rs, [%rd + imm13]
+ mkSTF = insn_r_r_i 0x24 -- op3_STF
+
+ -- jmpl %rs + imm13, %rd
+ mkJMPL = insn_r_i_r 0x38 -- op3_JMPL
+
+ -- save %rs + imm13, %rd
+ mkSAVE = insn_r_i_r 0x3C -- op3_SAVE
+
+ -- restore %rs + imm13, %rd
+ mkRESTORE = insn_r_i_r 0x3D -- op3_RESTORE
+
+ -- SYNTHETIC insns
+ mkNOP = mkOR g0 g0 g0
+ mkCALL reg = mkJMPL reg 0 o7
+ mkRET = mkJMPL i7 8 g0
+ mkRESTORE_TRIVIAL = mkRESTORE g0 0 g0
+ in
+ --trace (show (map fst arg_offs_n_reps))
+ concatMap w32_to_w8s_bigEndian (
+
+ {- On entry, %o0 is the arg passed from the interpreter. After
+ the initial save insn, it will be in %i0. Studying the sparc
+ docs one would have thought that the minimum frame size is 92
+ bytes, but gcc always uses at least 112, and indeed there are
+ segfaults a-plenty with 92. So I use 112 here as well. I
+ don't understand why, tho.
+ -}
+ [mkSAVE sp (- ({-92-}112 + 4*argWs_on_stack_ROUNDED_UP)) sp]
+
+ {- For each arg in args_offs_n_reps, examine the associated
+ CgRep to determine how many words there are. This gives a
+ bunch of offsets on the H stack. Move the first 6 words into
+ %o0 .. %o5 and the rest on the stack, starting at [%sp+92].
+ Use %g1 as a temp.
+ -}
+ ++ let doArgW (offW, wordNo)
+ | wordNo < 6
+ = [mkLD i0 (bytes_per_word * offW) (oReg wordNo)]
+ | otherwise
+ = [mkLD i0 (bytes_per_word * offW) g1,
+ mkST g1 sp (92 + bytes_per_word * (wordNo - 6))]
+ in
+ concatMap doArgW (zip offsets_to_pushW [0 ..])
+
+ {- Get the addr to call into %g1, bearing in mind that there's
+ an Addr# tag at the indicated location, and do the call:
+
+ ld [4*(1 /*tag*/ +addr_offW) + %i0], %g1
+ call %g1
+ -}
+ ++ [mkLD i0 (bytes_per_word * addr_offW) g1,
+ mkCALL g1,
+ mkNOP]
+
+ {- Depending on what the return type is, get the result
+ from %o0 or %o1:%o0 or %f0 or %f1:%f0.
+
+ st %o0, [%i0 + 4] -- 32 bit int
+ or
+ st %o0, [%i0 + 4] -- 64 bit int
+ st %o1, [%i0 + 8] -- or the other way round?
+ or
+ st %f0, [%i0 + 4] -- 32 bit float
+ or
+ st %f0, [%i0 + 4] -- 64 bit float
+ st %f1, [%i0 + 8] -- or the other way round?
+
+ -}
+ ++ let i32 = [mkST o0 i0 0]
+ i64 = [mkST o0 i0 0, mkST o1 i0 4]
+ f32 = [mkSTF f0 i0 0]
+ f64 = [mkSTF f0 i0 0, mkSTF f1 i0 4]
+ in
+ case r_rep of
+ NonPtrArg -> i32
+ DoubleArg -> f64
+ FloatArg -> f32
+ VoidArg -> []
+ other -> moan64 "ByteCodeFFI.mkMarshalCode_wrk(sparc)"
+ (ppr r_rep)
+
+ ++ [mkRET,
+ mkRESTORE_TRIVIAL] -- this is in the delay slot of the RET
+ )
+#elif powerpc_TARGET_ARCH && darwin_TARGET_OS
+
+ = let
+ bytes_per_word = 4
+
+ -- speaks for itself
+ w32_to_w8s_bigEndian :: Word32 -> [Word8]
+ w32_to_w8s_bigEndian w
+ = [fromIntegral (0xFF .&. (w `shiftR` 24)),
+ fromIntegral (0xFF .&. (w `shiftR` 16)),
+ fromIntegral (0xFF .&. (w `shiftR` 8)),
+ fromIntegral (0xFF .&. w)]
+
+ -- addr and result bits offsetsW
+ a_off = addr_offW * bytes_per_word
+ result_off = r_offW * bytes_per_word
+
+ linkageArea = 24
+ parameterArea = sum [ cgRepSizeW a_rep * bytes_per_word
+ | (_, a_rep) <- arg_offs_n_reps ]
+ savedRegisterArea = 4
+ frameSize = padTo16 (linkageArea + max parameterArea 32 + savedRegisterArea)
+ padTo16 x = case x `mod` 16 of
+ 0 -> x
+ y -> x - y + 16
+
+ pass_parameters [] _ _ = []
+ pass_parameters ((a_offW, a_rep):args) nextFPR offsetW =
+ let
+ haskellArgOffset = a_offW * bytes_per_word
+ offsetW' = offsetW + cgRepSizeW a_rep
+
+ pass_word w
+ | offsetW + w < 8 =
+ [0x801f0000 -- lwz rX, src(r31)
+ .|. (fromIntegral src .&. 0xFFFF)
+ .|. (fromIntegral (offsetW+w+3) `shiftL` 21)]
+ | otherwise =
+ [0x801f0000 -- lwz r0, src(r31)
+ .|. (fromIntegral src .&. 0xFFFF),
+ 0x90010000 -- stw r0, dst(r1)
+ .|. (fromIntegral dst .&. 0xFFFF)]
+ where
+ src = haskellArgOffset + w*bytes_per_word
+ dst = linkageArea + (offsetW+w) * bytes_per_word
+ in
+ case a_rep of
+ FloatArg | nextFPR < 14 ->
+ (0xc01f0000 -- lfs fX, haskellArgOffset(r31)
+ .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
+ .|. (fromIntegral nextFPR `shiftL` 21))
+ : pass_parameters args (nextFPR+1) offsetW'
+ DoubleArg | nextFPR < 14 ->
+ (0xc81f0000 -- lfd fX, haskellArgOffset(r31)
+ .|. (fromIntegral haskellArgOffset .&. 0xFFFF)
+ .|. (fromIntegral nextFPR `shiftL` 21))
+ : pass_parameters args (nextFPR+1) offsetW'
+ _ ->
+ concatMap pass_word [0 .. cgRepSizeW a_rep - 1]
+ ++ pass_parameters args nextFPR offsetW'
+
+ gather_result = case r_rep of
+ VoidArg -> []
+ FloatArg ->
+ [0xd03f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
+ -- stfs f1, result_off(r31)
+ DoubleArg ->
+ [0xd83f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
+ -- stfd f1, result_off(r31)
+ _ | cgRepSizeW r_rep == 2 ->
+ [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF),
+ 0x909f0000 .|. (fromIntegral (result_off+4) .&. 0xFFFF)]
+ -- stw r3, result_off(r31)
+ -- stw r4, result_off+4(r31)
+ _ | cgRepSizeW r_rep == 1 ->
+ [0x907f0000 .|. (fromIntegral result_off .&. 0xFFFF)]
+ -- stw r3, result_off(r31)
+ in
+ concatMap w32_to_w8s_bigEndian $ [
+ 0x7c0802a6, -- mflr r0
+ 0x93e1fffc, -- stw r31,-4(r1)
+ 0x90010008, -- stw r0,8(r1)
+ 0x94210000 .|. (fromIntegral (-frameSize) .&. 0xFFFF),
+ -- stwu r1, -frameSize(r1)
+ 0x7c7f1b78 -- mr r31, r3
+ ] ++ pass_parameters arg_offs_n_reps 1 0 ++ [
+ 0x819f0000 .|. (fromIntegral a_off .&. 0xFFFF),
+ -- lwz r12, a_off(r31)
+ 0x7d8903a6, -- mtctr r12
+ 0x4e800421 -- bctrl
+ ] ++ gather_result ++ [
+ 0x80210000, -- lwz r1, 0(r1)
+ 0x83e1fffc, -- lwz r31, -4(r1)
+ 0x80010008, -- lwz r0, 8(r1)
+ 0x7c0803a6, -- mtlr r0
+ 0x4e800020 -- blr
+ ]
+
+#elif powerpc_TARGET_ARCH && linux_TARGET_OS
+
+ -- All offsets here are measured in Words (not bytes). This includes
+ -- arguments to the load/store machine code generators, alignment numbers
+ -- and the final 'framesize' among others.
+
+ = concatMap w32_to_w8s_bigEndian $ [
+ 0x7c0802a6, -- mflr r0
+ 0x93e1fffc, -- stw r31,-4(r1)
+ 0x90010008, -- stw r0,8(r1)
+ 0x94210000 .|. offset (-framesize), -- stwu r1, -frameSize(r1)
+ 0x7c7f1b78 -- mr r31, r3
+ ] ++ pass_parameters ++ -- pass the parameters
+ loadWord 12 addr_offW ++ [ -- lwz r12, a_off(r31)
+ 0x7d8903a6, -- mtctr r12
+ 0x4e800421 -- bctrl
+ ] ++ gather_result ++ [ -- save the return value
+ 0x80210000, -- lwz r1, 0(r1)
+ 0x83e1fffc, -- lwz r31, -4(r1)
+ 0x80010008, -- lwz r0, 8(r1)
+ 0x7c0803a6, -- mtlr r0
+ 0x4e800020 -- blr
+ ]
+
+ where
+ gather_result :: [Word32]
+ gather_result = case r_rep of
+ VoidArg -> []
+ FloatArg -> storeFloat 1 r_offW
+ DoubleArg -> storeDouble 1 r_offW
+ LongArg -> storeLong 3 r_offW
+ _ -> storeWord 3 r_offW
+
+ pass_parameters :: [Word32]
+ pass_parameters = concat params
+
+ -- vector aligned (4 word = 16 bytes) with 8 extra words of buffer space
+ framesize = alignedTo 4 (argsize + 8)
+
+ ((_,_,argsize), params) = mapAccumL loadparam (3,1,2) arg_offs_n_reps
+
+ -- handle one argument, returning machine code and the updated state
+ loadparam :: (Int, Int, Int) -> (Int, CgRep) ->
+ ((Int, Int, Int), [Word32])
+
+ loadparam (gpr, fpr, stack) (ofs, rep) = case rep of
+ FloatArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadFloat fpr ofs )
+ FloatArg -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
+
+ DoubleArg | fpr <= 8 -> ( (gpr, fpr + 1, stack), loadDouble fpr ofs )
+ DoubleArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
+
+ LongArg | even gpr -> loadparam (gpr + 1, fpr, stack) (ofs, rep)
+ LongArg | gpr <= 9 -> ( (gpr + 2, fpr, stack), loadLong gpr ofs )
+ LongArg -> ( (gpr, fpr, astack + 2), stackLong astack ofs )
+
+ _ | gpr <= 10 -> ( (gpr + 1, fpr, stack), loadWord gpr ofs )
+ _ -> ( (gpr, fpr, stack + 1), stackWord stack ofs )
+ where astack = alignedTo 2 stack
+
+ alignedTo :: Int -> Int -> Int
+ alignedTo alignment x = case x `mod` alignment of
+ 0 -> x
+ y -> x - y + alignment
+
+ -- convenience macros to do multiple-instruction data moves
+ stackWord dst src = loadWord 0 src ++ storeWordC 0 dst
+ stackLong dst src = stackWord dst src ++ stackWord (dst + 1) (src + 1)
+ loadLong dst src = loadWord dst src ++ loadWord (dst + 1) (src + 1)
+ storeLong dst src = storeWord dst src ++ storeWord (dst + 1) (src + 1)
+
+ -- load data from the Haskell stack (relative to r31)
+ loadFloat = loadstoreInstr 0xc01f0000 -- lfs fpr, ofs(r31)
+ loadDouble = loadstoreInstr 0xc81f0000 -- lfd fpr, ofs(r31)
+ loadWord = loadstoreInstr 0x801f0000 -- lwz gpr, ofs(r31)
+
+ -- store data to the Haskell stack (relative to r31)
+ storeFloat = loadstoreInstr 0xd01f0000 -- stfs fpr, ofs(r31)
+ storeDouble = loadstoreInstr 0xd81f0000 -- stfd fpr, ofs(r31)
+ storeWord = loadstoreInstr 0x901f0000 -- stw gpr, ofs(r31)
+
+ -- store data to the C stack (relative to r1)
+ storeWordC = loadstoreInstr 0x90010000 -- stw gpr, ofs(r1)
+
+ -- machine code building blocks
+ loadstoreInstr :: Word32 -> Int -> Int -> [Word32]
+ loadstoreInstr code reg ofs = [ code .|. register reg .|. offset ofs ]
+
+ register :: Int -> Word32
+ register reg = fromIntegral reg `shiftL` 21
+
+ offset :: Int -> Word32
+ offset ofs = fromIntegral (ofs * 4) .&. 0xFFFF
+
+ -- speaks for itself
+ w32_to_w8s_bigEndian :: Word32 -> [Word8]
+ w32_to_w8s_bigEndian w = [fromIntegral (0xFF .&. (w `shiftR` 24)),
+ fromIntegral (0xFF .&. (w `shiftR` 16)),
+ fromIntegral (0xFF .&. (w `shiftR` 8)),
+ fromIntegral (0xFF .&. w)]
+
+#else
+
+ = throwDyn (InstallationError "foreign import is not implemented for GHCi on this platform.")
+
+#endif
+
+#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
+lit32 :: Int -> [Word8]
+lit32 i = let w32 = (fromIntegral i) :: Word32
+ in map (fromIntegral . ( .&. 0xFF))
+ [w32, w32 `shiftR` 8,
+ w32 `shiftR` 16, w32 `shiftR` 24]
+#endif
+\end{code}
+