diff options
author | Xavier Leroy <xavier.leroy@inria.fr> | 2006-05-31 08:16:34 +0000 |
---|---|---|
committer | Xavier Leroy <xavier.leroy@inria.fr> | 2006-05-31 08:16:34 +0000 |
commit | d9be4fee6af0646b153cf0cc562b632f67ceca58 (patch) | |
tree | c0634a5d96c084ad6acd0de8f73281539935d016 | |
parent | 0b0804968f90e9b3cef55a614c777ba7f7d65a47 (diff) | |
download | ocaml-d9be4fee6af0646b153cf0cc562b632f67ceca58.tar.gz |
Portage PowerPC 64 bits
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@7430 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
-rw-r--r-- | asmcomp/power/arch.ml | 7 | ||||
-rw-r--r-- | asmcomp/power/emit.mlp | 207 | ||||
-rw-r--r-- | asmcomp/power/proc.ml | 11 | ||||
-rw-r--r-- | asmrun/power-rhapsody.S | 245 | ||||
-rw-r--r-- | asmrun/stack.h | 4 | ||||
-rwxr-xr-x | configure | 13 | ||||
-rw-r--r-- | otherlibs/num/bng_ppc.c | 8 |
7 files changed, 285 insertions, 210 deletions
diff --git a/asmcomp/power/arch.ml b/asmcomp/power/arch.ml index ace7bebd03..729601650b 100644 --- a/asmcomp/power/arch.ml +++ b/asmcomp/power/arch.ml @@ -39,8 +39,11 @@ type addressing_mode = let big_endian = true -let size_addr = 4 -let size_int = 4 +let ppc64 = + match Config.model with "ppc64" -> true | _ -> false + +let size_addr = if ppc64 then 8 else 4 +let size_int = size_addr let size_float = 8 (* Operations on addressing modes *) diff --git a/asmcomp/power/emit.mlp b/asmcomp/power/emit.mlp index d35cb242c7..274e111a3b 100644 --- a/asmcomp/power/emit.mlp +++ b/asmcomp/power/emit.mlp @@ -33,16 +33,17 @@ let stack_offset = ref 0 let frame_size () = let size = !stack_offset + (* Trap frame, outgoing parameters *) - 4 * num_stack_slots.(0) + 8 * num_stack_slots.(1) + (* Local variables *) - (if !contains_calls then 4 else 0) in (* The return address *) + size_int * num_stack_slots.(0) + (* Local int variables *) + size_float * num_stack_slots.(1) + (* Local float variables *) + (if !contains_calls then size_int else 0) in (* The return address *) Misc.align size 16 let slot_offset loc cls = match loc with Local n -> if cls = 0 - then !stack_offset + num_stack_slots.(1) * 8 + n * 4 - else !stack_offset + n * 8 + then !stack_offset + num_stack_slots.(1) * size_float + n * size_int + else !stack_offset + n * size_float | Incoming n -> frame_size() + n | Outgoing n -> n @@ -85,6 +86,22 @@ let rodata_space = | "rhapsody" -> " .const\n" | _ -> assert false +(* Names of instructions that differ in 32 and 64-bit modes *) + +let lg = if ppc64 then "ld" else "lwz" +let stg = if ppc64 then "std" else "stw" +let lwa = if ppc64 then "lwa" else "lwz" +let cmpg = if ppc64 then "cmpd" else "cmpw" +let cmplg = if ppc64 then "cmpld" else "cmplw" +let datag = if ppc64 then ".quad" else ".long" +let aligng = if ppc64 then 3 else 2 +let mullg = if ppc64 then "mulld" else "mullw" +let divg = if ppc64 then "divd" else "divw" +let tglle = if ppc64 then "tdlle" else "twlle" +let sragi = if ppc64 then "sradi" else "srawi" +let slgi = if ppc64 then "sldi" else "slwi" +let fctigz = if ppc64 then "fctidz" else "fctiwz" + (* Output a pseudo-register *) let emit_reg r = @@ -127,7 +144,7 @@ let is_immediate n = n <= 32767 && n >= -32768 let is_native_immediate n = - n <= Nativeint.of_int 32767 && n >= Nativeint.of_int (-32768) + n <= 32767n && n >= -32768n (* Output a "upper 16 bits" or "lower 16 bits" operator. *) @@ -154,13 +171,16 @@ let emit_symbol_offset (s, d) = if d > 0 then `+`; if d <> 0 then emit_int d +let valid_offset instr ofs = + ofs land 3 = 0 || (instr <> "ld" && instr <> "std") + let emit_load_store instr addressing_mode addr n arg = match addressing_mode with Ibased(s, d) -> ` addis {emit_gpr 11}, 0, {emit_upper emit_symbol_offset (s,d)}\n`; ` {emit_string instr} {emit_reg arg}, {emit_lower emit_symbol_offset (s,d)}({emit_gpr 11})\n` | Iindexed ofs -> - if is_immediate ofs then + if is_immediate ofs && valid_offset instr ofs then ` {emit_string instr} {emit_reg arg}, {emit_int ofs}({emit_reg addr.(n)})\n` else begin ` lis {emit_gpr 0}, {emit_int(high ofs)}\n`; @@ -213,18 +233,19 @@ let record_frame live = `{emit_label lbl}:` let emit_frame fd = - ` .long {emit_label fd.fd_lbl} + 4\n`; + ` {emit_string datag} {emit_label fd.fd_lbl} + 4\n`; ` .short {emit_int fd.fd_frame_size}\n`; ` .short {emit_int (List.length fd.fd_live_offset)}\n`; List.iter (fun n -> ` .short {emit_int n}\n`) fd.fd_live_offset; - ` .align 2\n` + ` .align {emit_int aligng}\n` -(* Record floating-point literals *) +(* Record floating-point and large integer literals *) let float_literals = ref ([] : (string * int) list) +let int_literals = ref ([] : (nativeint * int) list) (* Record external C functions to be called in a position-independent way (for MacOSX) *) @@ -237,7 +258,7 @@ let emit_external s = ` .non_lazy_symbol_pointer\n`; `L{emit_symbol s}$non_lazy_ptr:\n`; ` .indirect_symbol {emit_symbol s}\n`; - ` .long 0\n` + ` {emit_string datag} 0\n` (* Names for conditional branches after comparisons *) @@ -247,21 +268,21 @@ let branch_for_comparison = function | Cge -> "bge" | Clt -> "blt" let name_for_int_comparison = function - Isigned cmp -> ("cmpw", branch_for_comparison cmp) - | Iunsigned cmp -> ("cmplw", branch_for_comparison cmp) + Isigned cmp -> (cmpg, branch_for_comparison cmp) + | Iunsigned cmp -> (cmplg, branch_for_comparison cmp) (* Names for various instructions *) let name_for_intop = function Iadd -> "add" - | Imul -> "mullw" - | Idiv -> "divw" + | Imul -> if ppc64 then "mulld" else "mullw" + | Idiv -> if ppc64 then "divd" else "divw" | Iand -> "and" | Ior -> "or" | Ixor -> "xor" - | Ilsl -> "slw" - | Ilsr -> "srw" - | Iasr -> "sraw" + | Ilsl -> if ppc64 then "sld" else "slw" + | Ilsr -> if ppc64 then "srd" else "srw" + | Iasr -> if ppc64 then "srad" else "sraw" | _ -> Misc.fatal_error "Emit.Intop" let name_for_intop_imm = function @@ -270,9 +291,9 @@ let name_for_intop_imm = function | Iand -> "andi." | Ior -> "ori" | Ixor -> "xori" - | Ilsl -> "slwi" - | Ilsr -> "srwi" - | Iasr -> "srawi" + | Ilsl -> if ppc64 then "sldi" else "slwi" + | Ilsr -> if ppc64 then "srdi" else "srwi" + | Iasr -> if ppc64 then "sradi" else "srawi" | _ -> Misc.fatal_error "Emit.Intop_imm" let name_for_floatop1 = function @@ -443,11 +464,11 @@ let rec emit_instr i dslot = | {loc = Reg rs; typ = Float}, {loc = Reg rd; typ = Float} -> ` fmr {emit_reg dst}, {emit_reg src}\n` | {loc = Reg rs; typ = (Int | Addr)}, {loc = Stack sd} -> - ` stw {emit_reg src}, {emit_stack dst}\n` + ` {emit_string stg} {emit_reg src}, {emit_stack dst}\n` | {loc = Reg rs; typ = Float}, {loc = Stack sd} -> ` stfd {emit_reg src}, {emit_stack dst}\n` | {loc = Stack ss; typ = (Int | Addr)}, {loc = Reg rd} -> - ` lwz {emit_reg dst}, {emit_stack src}\n` + ` {emit_string lg} {emit_reg dst}, {emit_stack src}\n` | {loc = Stack ss; typ = Float}, {loc = Reg rd} -> ` lfd {emit_reg dst}, {emit_stack src}\n` | (_, _) -> @@ -456,10 +477,15 @@ let rec emit_instr i dslot = | Lop(Iconst_int n) -> if is_native_immediate n then ` li {emit_reg i.res.(0)}, {emit_nativeint n}\n` - else begin + else if n >= -0x8000_0000n && n <= 0x7FFF_FFFFn then begin ` lis {emit_reg i.res.(0)}, {emit_int(nativehigh n)}\n`; if nativelow n <> 0 then ` ori {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, {emit_int(nativelow n)}\n` + end else begin + let lbl = new_label() in + int_literals := (n, lbl) :: !int_literals; + ` addis {emit_gpr 11}, 0, {emit_upper emit_label lbl}\n`; + ` {emit_string lg} {emit_reg i.res.(0)}, {emit_lower emit_label lbl}({emit_gpr 11})\n` end | Lop(Iconst_float s) -> let lbl = new_label() in @@ -480,7 +506,7 @@ let rec emit_instr i dslot = let n = frame_size() in ` mtctr {emit_reg i.arg.(0)}\n`; if !contains_calls then begin - ` lwz {emit_gpr 11}, {emit_int(n - 4)}({emit_gpr 1})\n`; + ` {emit_string lg} {emit_gpr 11}, {emit_int(n - size_addr)}({emit_gpr 1})\n`; ` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int n}\n`; ` mtlr {emit_gpr 11}\n` end else begin @@ -494,7 +520,7 @@ let rec emit_instr i dslot = else begin let n = frame_size() in if !contains_calls then begin - ` lwz {emit_gpr 11}, {emit_int(n - 4)}({emit_gpr 1})\n`; + ` {emit_string lg} {emit_gpr 11}, {emit_int(n - size_addr)}({emit_gpr 1})\n`; ` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int n}\n`; ` mtlr {emit_gpr 11}\n` end else begin @@ -508,7 +534,7 @@ let rec emit_instr i dslot = if pic_externals then begin external_functions := StringSet.add s !external_functions; ` addis {emit_gpr 11}, 0, ha16(L{emit_symbol s}$non_lazy_ptr)\n`; - ` lwz {emit_gpr 11}, lo16(L{emit_symbol s}$non_lazy_ptr)({emit_gpr 11})\n` + ` {emit_string lg} {emit_gpr 11}, lo16(L{emit_symbol s}$non_lazy_ptr)({emit_gpr 11})\n` end else begin ` addis {emit_gpr 11}, 0, {emit_upper emit_symbol s}\n`; ` addi {emit_gpr 11}, {emit_gpr 11}, {emit_lower emit_symbol s}\n` @@ -519,7 +545,7 @@ let rec emit_instr i dslot = if pic_externals then begin external_functions := StringSet.add s !external_functions; ` addis {emit_gpr 11}, 0, ha16(L{emit_symbol s}$non_lazy_ptr)\n`; - ` lwz {emit_gpr 11}, lo16(L{emit_symbol s}$non_lazy_ptr)({emit_gpr 11})\n`; + ` {emit_string lg} {emit_gpr 11}, lo16(L{emit_symbol s}$non_lazy_ptr)({emit_gpr 11})\n`; ` mtctr {emit_gpr 11}\n`; ` bctrl\n` end else @@ -535,9 +561,11 @@ let rec emit_instr i dslot = | Byte_signed -> "lbz" | Sixteen_unsigned -> "lhz" | Sixteen_signed -> "lha" + | Thirtytwo_unsigned -> "lwz" + | Thirtytwo_signed -> if ppc64 then "lwa" else "lwz" + | Word -> lg | Single -> "lfs" - | Double | Double_u -> "lfd" - | _ -> "lwz" in + | Double | Double_u -> "lfd" in emit_load_store loadinstr addr i.arg 0 i.res.(0); if chunk = Byte_signed then ` extsb {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` @@ -546,43 +574,44 @@ let rec emit_instr i dslot = match chunk with Byte_unsigned | Byte_signed -> "stb" | Sixteen_unsigned | Sixteen_signed -> "sth" + | Thirtytwo_unsigned | Thirtytwo_signed -> "stw" + | Word -> stg | Single -> "stfs" - | Double | Double_u -> "stfd" - | _ -> "stw" in + | Double | Double_u -> "stfd" in emit_load_store storeinstr addr i.arg 1 i.arg.(0) | Lop(Ialloc n) -> if !call_gc_label = 0 then call_gc_label := new_label(); ` addi {emit_gpr 31}, {emit_gpr 31}, {emit_int(-n)}\n`; - ` cmplw {emit_gpr 31}, {emit_gpr 30}\n`; - ` addi {emit_reg i.res.(0)}, {emit_gpr 31}, 4\n`; + ` {emit_string cmplg} {emit_gpr 31}, {emit_gpr 30}\n`; + ` addi {emit_reg i.res.(0)}, {emit_gpr 31}, {emit_int size_addr}\n`; record_frame i.live; ` bltl {emit_label !call_gc_label}\n` | Lop(Ispecific(Ialloc_far n)) -> if !call_gc_label = 0 then call_gc_label := new_label(); let lbl = new_label() in ` addi {emit_gpr 31}, {emit_gpr 31}, {emit_int(-n)}\n`; - ` cmplw {emit_gpr 31}, {emit_gpr 30}\n`; + ` {emit_string cmplg} {emit_gpr 31}, {emit_gpr 30}\n`; ` bge {emit_label lbl}\n`; record_frame i.live; ` bl {emit_label !call_gc_label}\n`; - `{emit_label lbl}: addi {emit_reg i.res.(0)}, {emit_gpr 31}, 4\n` + `{emit_label lbl}: addi {emit_reg i.res.(0)}, {emit_gpr 31}, {emit_int size_addr}\n` | Lop(Iintop Isub) -> (* subfc has swapped arguments *) ` subfc {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}\n` | Lop(Iintop Imod) -> - ` divw {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; - ` mullw {emit_gpr 0}, {emit_gpr 0}, {emit_reg i.arg.(1)}\n`; + ` {emit_string divg} {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` {emit_string mullg} {emit_gpr 0}, {emit_gpr 0}, {emit_reg i.arg.(1)}\n`; ` subfc {emit_reg i.res.(0)}, {emit_gpr 0}, {emit_reg i.arg.(0)}\n` | Lop(Iintop(Icomp cmp)) -> begin match cmp with Isigned c -> - ` cmpw {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` {emit_string cmpg} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; emit_set_comp c i.res.(0) | Iunsigned c -> - ` cmplw {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` {emit_string cmplg} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; emit_set_comp c i.res.(0) end | Lop(Iintop Icheckbound) -> - ` twlle {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` + ` {emit_string tglle} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` | Lop(Iintop op) -> let instr = name_for_intop op in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` @@ -590,25 +619,25 @@ let rec emit_instr i dslot = ` addi {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int(-n)}\n` | Lop(Iintop_imm(Idiv, n)) -> (* n is guaranteed to be a power of 2 *) let l = Misc.log2 n in - ` srawi {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int l}\n`; + ` {emit_string sragi} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int l}\n`; ` addze {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` | Lop(Iintop_imm(Imod, n)) -> (* n is guaranteed to be a power of 2 *) let l = Misc.log2 n in - ` srawi {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_int l}\n`; + ` {emit_string sragi} {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_int l}\n`; ` addze {emit_gpr 0}, {emit_gpr 0}\n`; - ` slwi {emit_gpr 0}, {emit_gpr 0}, {emit_int l}\n`; + ` {emit_string slgi} {emit_gpr 0}, {emit_gpr 0}, {emit_int l}\n`; ` subfc {emit_reg i.res.(0)}, {emit_gpr 0}, {emit_reg i.arg.(0)}\n` | Lop(Iintop_imm(Icomp cmp, n)) -> begin match cmp with Isigned c -> - ` cmpwi {emit_reg i.arg.(0)}, {emit_int n}\n`; + ` {emit_string cmpg}i {emit_reg i.arg.(0)}, {emit_int n}\n`; emit_set_comp c i.res.(0) | Iunsigned c -> - ` cmplwi {emit_reg i.arg.(0)}, {emit_int n}\n`; + ` {emit_string cmplg}i {emit_reg i.arg.(0)}, {emit_int n}\n`; emit_set_comp c i.res.(0) end | Lop(Iintop_imm(Icheckbound, n)) -> - ` twllei {emit_reg i.arg.(0)}, {emit_int n}\n` + ` {emit_string tglle}i {emit_reg i.arg.(0)}, {emit_int n}\n` | Lop(Iintop_imm(op, n)) -> let instr = name_for_intop_imm op in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_int n}\n` @@ -619,29 +648,37 @@ let rec emit_instr i dslot = let instr = name_for_floatop2 op in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` | Lop(Ifloatofint) -> - let lbl = new_label() in - float_literals := ("4.503601774854144e15", lbl) :: !float_literals; - (* That float above represents 0x4330000080000000 *) - ` addis {emit_gpr 11}, 0, {emit_upper emit_label lbl}\n`; - ` lfd {emit_fpr 0}, {emit_lower emit_label lbl}({emit_gpr 11})\n`; - ` lis {emit_gpr 0}, 0x4330\n`; - ` stwu {emit_gpr 0}, -8({emit_gpr 1})\n`; - ` xoris {emit_gpr 0}, {emit_reg i.arg.(0)}, 0x8000\n`; - ` stw {emit_gpr 0}, 4({emit_gpr 1})\n`; - ` lfd {emit_reg i.res.(0)}, 0({emit_gpr 1})\n`; - ` addi {emit_gpr 1}, {emit_gpr 1}, 8\n`; - ` fsub {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, {emit_fpr 0}\n` + if ppc64 then begin + ` stdu {emit_reg i.arg.(0)}, -16({emit_gpr 1})\n`; + ` lfd {emit_reg i.res.(0)}, 0({emit_gpr 1})\n`; + ` addi {emit_gpr 1}, {emit_gpr 1}, 16\n`; + ` fcfid {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` + end else begin + let lbl = new_label() in + float_literals := ("4.503601774854144e15", lbl) :: !float_literals; + (* That float above represents 0x4330000080000000 *) + ` addis {emit_gpr 11}, 0, {emit_upper emit_label lbl}\n`; + ` lfd {emit_fpr 0}, {emit_lower emit_label lbl}({emit_gpr 11})\n`; + ` lis {emit_gpr 0}, 0x4330\n`; + ` stwu {emit_gpr 0}, -16({emit_gpr 1})\n`; + ` xoris {emit_gpr 0}, {emit_reg i.arg.(0)}, 0x8000\n`; + ` stw {emit_gpr 0}, 4({emit_gpr 1})\n`; + ` lfd {emit_reg i.res.(0)}, 0({emit_gpr 1})\n`; + ` addi {emit_gpr 1}, {emit_gpr 1}, 16\n`; + ` fsub {emit_reg i.res.(0)}, {emit_reg i.res.(0)}, {emit_fpr 0}\n` + end | Lop(Iintoffloat) -> - ` fctiwz {emit_fpr 0}, {emit_reg i.arg.(0)}\n`; - ` stfdu {emit_fpr 0}, -8({emit_gpr 1})\n`; - ` lwz {emit_reg i.res.(0)}, 4({emit_gpr 1})\n`; - ` addi {emit_gpr 1}, {emit_gpr 1}, 8\n` + let ofs = if ppc64 then 0 else 4 in + ` {emit_string fctigz} {emit_fpr 0}, {emit_reg i.arg.(0)}\n`; + ` stfdu {emit_fpr 0}, -16({emit_gpr 1})\n`; + ` {emit_string lg} {emit_reg i.res.(0)}, {emit_int ofs}({emit_gpr 1})\n`; + ` addi {emit_gpr 1}, {emit_gpr 1}, 16\n` | Lop(Ispecific sop) -> let instr = name_for_specific sop in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n` | Lreloadretaddr -> let n = frame_size() in - ` lwz {emit_gpr 11}, {emit_int(n - 4)}({emit_gpr 1})\n`; + ` {emit_string lg} {emit_gpr 11}, {emit_int(n - size_addr)}({emit_gpr 1})\n`; ` mtlr {emit_gpr 11}\n` | Lreturn -> let n = frame_size() in @@ -655,11 +692,11 @@ let rec emit_instr i dslot = | Lcondbranch(tst, lbl) -> begin match tst with Itruetest -> - ` cmpwi {emit_reg i.arg.(0)}, 0\n`; + ` {emit_string cmpg}i {emit_reg i.arg.(0)}, 0\n`; emit_delay dslot; ` bne {emit_label lbl}\n` | Ifalsetest -> - ` cmpwi {emit_reg i.arg.(0)}, 0\n`; + ` {emit_string cmpg}i {emit_reg i.arg.(0)}, 0\n`; emit_delay dslot; ` beq {emit_label lbl}\n` | Iinttest cmp -> @@ -699,7 +736,7 @@ let rec emit_instr i dslot = ` beq {emit_label lbl}\n` end | Lcondbranch3(lbl0, lbl1, lbl2) -> - ` cmpwi {emit_reg i.arg.(0)}, 1\n`; + ` {emit_string cmpg}i {emit_reg i.arg.(0)}, 1\n`; emit_delay dslot; begin match lbl0 with None -> () @@ -718,8 +755,8 @@ let rec emit_instr i dslot = ` addis {emit_gpr 11}, 0, {emit_upper emit_label !lbl_jumptbl}\n`; ` addi {emit_gpr 11}, {emit_gpr 11}, {emit_lower emit_label !lbl_jumptbl}\n`; ` addi {emit_gpr 0}, {emit_reg i.arg.(0)}, {emit_int !num_jumptbl_entries}\n`; - ` slwi {emit_gpr 0}, {emit_gpr 0}, 2\n`; - ` lwzx {emit_gpr 0}, {emit_gpr 11}, {emit_gpr 0}\n`; + ` {emit_string slgi} {emit_gpr 0}, {emit_gpr 0}, 2\n`; + ` {emit_string lwa}x {emit_gpr 0}, {emit_gpr 11}, {emit_gpr 0}\n`; ` add {emit_gpr 0}, {emit_gpr 11}, {emit_gpr 0}\n`; ` mtctr {emit_gpr 0}\n`; ` bctr\n`; @@ -732,18 +769,18 @@ let rec emit_instr i dslot = | Lpushtrap -> stack_offset := !stack_offset + 16; ` mflr {emit_gpr 0}\n`; - ` stwu {emit_gpr 0}, -16({emit_gpr 1})\n`; - ` stw {emit_gpr 29}, 4({emit_gpr 1})\n`; + ` {emit_string stg}u {emit_gpr 0}, -16({emit_gpr 1})\n`; + ` {emit_string stg} {emit_gpr 29}, {emit_int size_addr}({emit_gpr 1})\n`; ` mr {emit_gpr 29}, {emit_gpr 1}\n` | Lpoptrap -> - ` lwz {emit_gpr 29}, 4({emit_gpr 1})\n`; + ` {emit_string lg} {emit_gpr 29}, {emit_int size_addr}({emit_gpr 1})\n`; ` addi {emit_gpr 1}, {emit_gpr 1}, 16\n`; stack_offset := !stack_offset - 16 | Lraise -> - ` lwz {emit_gpr 0}, 0({emit_gpr 29})\n`; + ` {emit_string lg} {emit_gpr 0}, 0({emit_gpr 29})\n`; ` mr {emit_gpr 1}, {emit_gpr 29}\n`; ` mtlr {emit_gpr 0}\n`; - ` lwz {emit_gpr 29}, 4({emit_gpr 1})\n`; + ` {emit_string lg} {emit_gpr 29}, {emit_int size_addr}({emit_gpr 1})\n`; ` addi {emit_gpr 1}, {emit_gpr 1}, 16\n`; ` blr\n` @@ -802,6 +839,7 @@ let fundecl fundecl = stack_offset := 0; call_gc_label := 0; float_literals := []; + int_literals := []; ` .globl {emit_symbol fundecl.fun_name}\n`; begin match Config.system with | "elf" | "bsd" -> @@ -815,7 +853,7 @@ let fundecl fundecl = if !contains_calls then begin ` mflr {emit_gpr 0}\n`; ` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int(-n)}\n`; - ` stw {emit_gpr 0}, {emit_int(n - 4)}({emit_gpr 1})\n` + ` {emit_string stg} {emit_gpr 0}, {emit_int(n - size_addr)}({emit_gpr 1})\n` end else begin if n > 0 then ` addi {emit_gpr 1}, {emit_gpr 1}, {emit_int(-n)}\n` @@ -828,14 +866,18 @@ let fundecl fundecl = `{emit_label !call_gc_label}:\n`; ` b {emit_symbol "caml_call_gc"}\n` end; - (* Emit the floating-point literals *) - if !float_literals <> [] then begin + (* Emit the numeric literals *) + if !float_literals <> [] || !int_literals <> [] then begin emit_string rodata_space; ` .align 3\n`; List.iter (fun (f, lbl) -> `{emit_label lbl}: .double 0d{emit_string f}\n`) - !float_literals + !float_literals; + List.iter + (fun (n, lbl) -> + `{emit_label lbl}: {emit_string datag} {emit_nativeint n}\n`) + !int_literals end (* Emission of data *) @@ -859,15 +901,15 @@ let emit_item = function | Cint32 n -> ` .long {emit_nativeint n}\n` | Cint n -> - ` .long {emit_nativeint n}\n` + ` {emit_string datag} {emit_nativeint n}\n` | Csingle f -> ` .float 0d{emit_string f}\n` | Cdouble f -> ` .double 0d{emit_string f}\n` | Csymbol_address s -> - ` .long {emit_symbol s}\n` + ` {emit_string datag} {emit_symbol s}\n` | Clabel_address lbl -> - ` .long {emit_label (lbl + 100000)}\n` + ` {emit_string datag} {emit_label (lbl + 100000)}\n` | Cstring s -> emit_bytes_directive " .byte " s | Cskip n -> @@ -915,16 +957,17 @@ let end_assembly() = let lbl_end = Compilenv.make_symbol (Some "code_end") in declare_global_data lbl_end; `{emit_symbol lbl_end}:\n`; + ` .long 0\n`; emit_string data_space; let lbl_end = Compilenv.make_symbol (Some "data_end") in declare_global_data lbl_end; `{emit_symbol lbl_end}:\n`; - ` .long 0\n`; + ` {emit_string datag} 0\n`; (* Emit the frame descriptors *) emit_string rodata_space; let lbl = Compilenv.make_symbol (Some "frametable") in declare_global_data lbl; `{emit_symbol lbl}:\n`; - ` .long {emit_int (List.length !frame_descriptors)}\n`; + ` {emit_string datag} {emit_int (List.length !frame_descriptors)}\n`; List.iter emit_frame !frame_descriptors; frame_descriptors := [] diff --git a/asmcomp/power/proc.ml b/asmcomp/power/proc.ml index 445770a114..48611bab9e 100644 --- a/asmcomp/power/proc.ml +++ b/asmcomp/power/proc.ml @@ -145,7 +145,8 @@ let loc_results res = (* C calling conventions under PowerOpen: use GPR 3-10 and FPR 1-13 just like ML calling conventions, but always reserve stack space for all arguments. - Also, using a float register automatically reserves two int registers. + Also, using a float register automatically reserves two int registers + (in 32-bit mode) or one int register (in 64-bit mode). (If we were to call a non-prototyped C function, each float argument would have to go both in a float reg and in the matching pair of integer regs.) @@ -161,7 +162,7 @@ let poweropen_external_conventions first_int last_int let loc = Array.create (Array.length arg) Reg.dummy in let int = ref first_int in let float = ref first_float in - let ofs = ref 56 in + let ofs = ref (14 * size_addr) in for i = 0 to Array.length arg - 1 do match arg.(i).typ with Int | Addr as ty -> @@ -180,7 +181,7 @@ let poweropen_external_conventions first_int last_int loc.(i) <- stack_slot (Outgoing !ofs) Float; ofs := !ofs + size_float end; - int := !int + 2 + int := !int + (if ppc64 then 1 else 2) done; (loc, Misc.align !ofs 16) (* Keep stack 16-aligned *) @@ -238,7 +239,9 @@ let assemble_file infile outfile = match Config.system with | "elf" -> Ccomp.command ("as -u -m ppc -o " ^ outfile ^ " " ^ infile) - | "rhapsody" | "bsd" -> + | "rhapsody" -> + Ccomp.command ("as -arch " ^ Config.model ^ " -o " ^ outfile ^ " " ^ infile) + | "bsd" -> Ccomp.command ("as -o " ^ outfile ^ " " ^ infile) | _ -> assert false diff --git a/asmrun/power-rhapsody.S b/asmrun/power-rhapsody.S index e483c90ada..370d201dc4 100644 --- a/asmrun/power-rhapsody.S +++ b/asmrun/power-rhapsody.S @@ -13,17 +13,30 @@ /* $Id$ */ +#ifdef __ppc64__ +#define X(a,b) b +#else +#define X(a,b) a +#endif + +#define WORD X(4,8) +#define lg X(lwz,ld) +#define lgu X(lwzu,ldu) +#define stg X(stw,std) +#define stgu X(stwu,stdu) +#define gdata X(.long,.quad) + .macro Addrglobal /* reg, glob */ addis $0, 0, ha16($1) addi $0, $0, lo16($1) .endmacro .macro Loadglobal /* reg,glob,tmp */ addis $2, 0, ha16($1) - lwz $0, lo16($1)($2) + lg $0, lo16($1)($2) .endmacro .macro Storeglobal /* reg,glob,tmp */ addis $2, 0, ha16($1) - stw $0, lo16($1)($2) + stg $0, lo16($1)($2) .endmacro .text @@ -33,13 +46,13 @@ .globl _caml_call_gc _caml_call_gc: /* Set up stack frame */ - stwu r1, -0x1A0(r1) - /* 0x1A0 = 4*32 (int regs) + 8*32 (float regs) + 32 (space for C call) */ +#define FRAMESIZE (32*WORD + 32*8 + 32) + stwu r1, -FRAMESIZE(r1) /* Record return address into Caml code */ mflr r0 Storeglobal r0, _caml_last_return_address, r11 /* Record lowest stack address */ - addi r0, r1, 0x1A0 + addi r0, r1, FRAMESIZE Storeglobal r0, _caml_bottom_of_stack, r11 /* Record pointer to register array */ addi r0, r1, 8*32 + 32 @@ -49,30 +62,30 @@ _caml_call_gc: /* Save exception pointer (if e.g. a sighandler raises) */ Storeglobal r29, _caml_exception_pointer, r11 /* Save all registers used by the code generator */ - addi r11, r1, 8*32 + 32 - 4 - stwu r3, 4(r11) - stwu r4, 4(r11) - stwu r5, 4(r11) - stwu r6, 4(r11) - stwu r7, 4(r11) - stwu r8, 4(r11) - stwu r9, 4(r11) - stwu r10, 4(r11) - stwu r14, 4(r11) - stwu r15, 4(r11) - stwu r16, 4(r11) - stwu r17, 4(r11) - stwu r18, 4(r11) - stwu r19, 4(r11) - stwu r20, 4(r11) - stwu r21, 4(r11) - stwu r22, 4(r11) - stwu r23, 4(r11) - stwu r24, 4(r11) - stwu r25, 4(r11) - stwu r26, 4(r11) - stwu r27, 4(r11) - stwu r28, 4(r11) + addi r11, r1, 8*32 + 32 - WORD + stgu r3, WORD(r11) + stgu r4, WORD(r11) + stgu r5, WORD(r11) + stgu r6, WORD(r11) + stgu r7, WORD(r11) + stgu r8, WORD(r11) + stgu r9, WORD(r11) + stgu r10, WORD(r11) + stgu r14, WORD(r11) + stgu r15, WORD(r11) + stgu r16, WORD(r11) + stgu r17, WORD(r11) + stgu r18, WORD(r11) + stgu r19, WORD(r11) + stgu r20, WORD(r11) + stgu r21, WORD(r11) + stgu r22, WORD(r11) + stgu r23, WORD(r11) + stgu r24, WORD(r11) + stgu r25, WORD(r11) + stgu r26, WORD(r11) + stgu r27, WORD(r11) + stgu r28, WORD(r11) addi r11, r1, 32 - 8 stfdu f1, 8(r11) stfdu f2, 8(r11) @@ -111,30 +124,30 @@ _caml_call_gc: Loadglobal r31, _caml_young_ptr, r11 Loadglobal r30, _caml_young_limit, r11 /* Restore all regs used by the code generator */ - addi r11, r1, 8*32 + 32 - 4 - lwzu r3, 4(r11) - lwzu r4, 4(r11) - lwzu r5, 4(r11) - lwzu r6, 4(r11) - lwzu r7, 4(r11) - lwzu r8, 4(r11) - lwzu r9, 4(r11) - lwzu r10, 4(r11) - lwzu r14, 4(r11) - lwzu r15, 4(r11) - lwzu r16, 4(r11) - lwzu r17, 4(r11) - lwzu r18, 4(r11) - lwzu r19, 4(r11) - lwzu r20, 4(r11) - lwzu r21, 4(r11) - lwzu r22, 4(r11) - lwzu r23, 4(r11) - lwzu r24, 4(r11) - lwzu r25, 4(r11) - lwzu r26, 4(r11) - lwzu r27, 4(r11) - lwzu r28, 4(r11) + addi r11, r1, 8*32 + 32 - WORD + lgu r3, WORD(r11) + lgu r4, WORD(r11) + lgu r5, WORD(r11) + lgu r6, WORD(r11) + lgu r7, WORD(r11) + lgu r8, WORD(r11) + lgu r9, WORD(r11) + lgu r10, WORD(r11) + lgu r14, WORD(r11) + lgu r15, WORD(r11) + lgu r16, WORD(r11) + lgu r17, WORD(r11) + lgu r18, WORD(r11) + lgu r19, WORD(r11) + lgu r20, WORD(r11) + lgu r21, WORD(r11) + lgu r22, WORD(r11) + lgu r23, WORD(r11) + lgu r24, WORD(r11) + lgu r25, WORD(r11) + lgu r26, WORD(r11) + lgu r27, WORD(r11) + lgu r28, WORD(r11) addi r11, r1, 32 - 8 lfdu f1, 8(r11) lfdu f2, 8(r11) @@ -175,9 +188,10 @@ _caml_call_gc: li r12, 0 Storeglobal r12, _caml_last_return_address, r11 /* Deallocate stack frame */ - addi r1, r1, 0x1A0 + addi r1, r1, FRAMESIZE /* Return */ blr +#undef FRAMESIZE /* Call a C function from Caml */ @@ -218,8 +232,8 @@ _caml_raise_exception: li r0, 0 Storeglobal r0, _caml_last_return_address, r11 /* Pop trap frame */ - lwz r0, 0(r1) - lwz r29, 4(r1) + lg r0, 0(r1) + lg r29, WORD(r1) mtlr r0 addi r1, r1, 16 /* Branch to handler */ @@ -234,32 +248,32 @@ _caml_start_program: /* Code shared between caml_start_program and caml_callback */ L102: /* Allocate and link stack frame */ - stwu r1, -256(r1) +#define FRAMESIZE (16 + 20*WORD + 18*8) + stgu r1, -FRAMESIZE(r1) /* Save return address */ mflr r0 - stw r0, 256+4(r1) + stg r0, WORD(r1) /* Save all callee-save registers */ - /* GPR 14 at sp+16 ... GPR 31 at sp+84 - FPR 14 at sp+92 ... FPR 31 at sp+228 */ - addi r11, r1, 16-4 - stwu r14, 4(r11) - stwu r15, 4(r11) - stwu r16, 4(r11) - stwu r17, 4(r11) - stwu r18, 4(r11) - stwu r19, 4(r11) - stwu r20, 4(r11) - stwu r21, 4(r11) - stwu r22, 4(r11) - stwu r23, 4(r11) - stwu r24, 4(r11) - stwu r25, 4(r11) - stwu r26, 4(r11) - stwu r27, 4(r11) - stwu r28, 4(r11) - stwu r29, 4(r11) - stwu r30, 4(r11) - stwu r31, 4(r11) + /* GPR14 ... GPR31, then FPR14 ... FPR31 starting at sp+16 */ + addi r11, r1, 16-WORD + stgu r14, WORD(r11) + stgu r15, WORD(r11) + stgu r16, WORD(r11) + stgu r17, WORD(r11) + stgu r18, WORD(r11) + stgu r19, WORD(r11) + stgu r20, WORD(r11) + stgu r21, WORD(r11) + stgu r22, WORD(r11) + stgu r23, WORD(r11) + stgu r24, WORD(r11) + stgu r25, WORD(r11) + stgu r26, WORD(r11) + stgu r27, WORD(r11) + stgu r28, WORD(r11) + stgu r29, WORD(r11) + stgu r30, WORD(r11) + stgu r31, WORD(r11) stfdu f14, 8(r11) stfdu f15, 8(r11) stfdu f16, 8(r11) @@ -279,22 +293,22 @@ L102: stfdu f30, 8(r11) stfdu f31, 8(r11) /* Set up a callback link */ - addi r1, r1, -16 + addi r1, r1, -32 Loadglobal r9, _caml_bottom_of_stack, r11 Loadglobal r10, _caml_last_return_address, r11 Loadglobal r11, _caml_gc_regs, r11 - stw r9, 0(r1) - stw r10, 4(r1) - stw r11, 8(r1) + stg r9, 0(r1) + stg r10, WORD(r1) + stg r11, 2*WORD(r1) /* Build an exception handler to catch exceptions escaping out of Caml */ bl L103 b L104 L103: addi r1, r1, -16 mflr r0 - stw r0, 0(r1) + stg r0, 0(r1) Loadglobal r11, _caml_exception_pointer, r11 - stw r11, 4(r1) + stg r11, WORD(r1) mr r29, r1 /* Reload allocation pointers */ Loadglobal r31, _caml_young_ptr, r11 @@ -307,40 +321,40 @@ L103: L105: bctrl /* Pop the trap frame, restoring caml_exception_pointer */ - lwz r9, 4(r1) + lg r9, WORD(r1) Storeglobal r9, _caml_exception_pointer, r11 addi r1, r1, 16 /* Pop the callback link, restoring the global variables */ L106: - lwz r9, 0(r1) - lwz r10, 4(r1) - lwz r11, 8(r1) + lg r9, 0(r1) + lg r10, WORD(r1) + lg r11, 2*WORD(r1) Storeglobal r9, _caml_bottom_of_stack, r12 Storeglobal r10, _caml_last_return_address, r12 Storeglobal r11, _caml_gc_regs, r12 - addi r1, r1, 16 + addi r1, r1, 32 /* Update allocation pointer */ Storeglobal r31, _caml_young_ptr, r11 /* Restore callee-save registers */ - addi r11, r1, 16-4 - lwzu r14, 4(r11) - lwzu r15, 4(r11) - lwzu r16, 4(r11) - lwzu r17, 4(r11) - lwzu r18, 4(r11) - lwzu r19, 4(r11) - lwzu r20, 4(r11) - lwzu r21, 4(r11) - lwzu r22, 4(r11) - lwzu r23, 4(r11) - lwzu r24, 4(r11) - lwzu r25, 4(r11) - lwzu r26, 4(r11) - lwzu r27, 4(r11) - lwzu r28, 4(r11) - lwzu r29, 4(r11) - lwzu r30, 4(r11) - lwzu r31, 4(r11) + addi r11, r1, 16-WORD + lgu r14, WORD(r11) + lgu r15, WORD(r11) + lgu r16, WORD(r11) + lgu r17, WORD(r11) + lgu r18, WORD(r11) + lgu r19, WORD(r11) + lgu r20, WORD(r11) + lgu r21, WORD(r11) + lgu r22, WORD(r11) + lgu r23, WORD(r11) + lgu r24, WORD(r11) + lgu r25, WORD(r11) + lgu r26, WORD(r11) + lgu r27, WORD(r11) + lgu r28, WORD(r11) + lgu r29, WORD(r11) + lgu r30, WORD(r11) + lgu r31, WORD(r11) lfdu f14, 8(r11) lfdu f15, 8(r11) lfdu f16, 8(r11) @@ -360,10 +374,10 @@ L106: lfdu f30, 8(r11) lfdu f31, 8(r11) /* Reload return address */ - lwz r0, 256+4(r1) + lg r0, WORD(r1) mtlr r0 /* Return */ - addi r1, r1, 256 + addi r1, r1, FRAMESIZE blr /* The trap handler: */ @@ -373,6 +387,7 @@ L104: /* Encode exception bucket as an exception result and return it */ ori r3, r3, 2 b L106 +#undef FRAMESIZE /* Callback from C to Caml */ @@ -382,7 +397,7 @@ _caml_callback_exn: mr r0, r3 /* Closure */ mr r3, r4 /* Argument */ mr r4, r0 - lwz r12, 0(r4) /* Code pointer */ + lg r12, 0(r4) /* Code pointer */ b L102 .globl _caml_callback2_exn @@ -409,8 +424,8 @@ _caml_callback3_exn: .const .globl _caml_system__frametable _caml_system__frametable: - .long 1 /* one descriptor */ - .long L105 + 4 /* return address into callback */ + gdata 1 /* one descriptor */ + gdata L105 + 4 /* return address into callback */ .short -1 /* negative size count => use callback link */ .short 0 /* no roots here */ - + .align X(2,3) diff --git a/asmrun/stack.h b/asmrun/stack.h index 9805bfb696..e8d1b5807f 100644 --- a/asmrun/stack.h +++ b/asmrun/stack.h @@ -53,9 +53,9 @@ #endif #ifdef TARGET_power -#define Saved_return_address(sp) *((intnat *)((sp) - 4)) +#define Saved_return_address(sp) *((intnat *)((sp) - SIZEOF_PTR)) #define Already_scanned(sp, retaddr) ((retaddr) & 1) -#define Mark_scanned(sp, retaddr) (*((intnat *)((sp) - 4)) = (retaddr) | 1) +#define Mark_scanned(sp, retaddr) (*((intnat *)((sp) - SIZEOF_PTR)) = (retaddr) | 1) #define Mask_already_scanned(retaddr) ((retaddr) & ~1) #ifdef SYS_aix #define Trap_frame_size 32 @@ -325,9 +325,11 @@ echo "Checking the sizes of integers and pointers..." set `sh ./runtest sizes.c` case "$2,$3" in 4,4) echo "OK, this is a regular 32 bit architecture." - echo "#undef ARCH_SIXTYFOUR" >> m.h;; + echo "#undef ARCH_SIXTYFOUR" >> m.h + arch64=false;; *,8) echo "Wow! A 64 bit architecture!" - echo "#define ARCH_SIXTYFOUR" >> m.h;; + echo "#define ARCH_SIXTYFOUR" >> m.h + arch64=true;; *,*) echo "This architecture seems to be neither 32 bits nor 64 bits." echo "Objective Caml won't run on this architecture." exit 2;; @@ -537,7 +539,7 @@ if test $withsharedlibs = "yes"; then mksharedlibrpath="-rpath " shared_libraries_supported=true;; powerpc-apple-darwin*) - mksharedlib="cc -bundle -flat_namespace -undefined suppress -o" + mksharedlib="$bytecc -bundle -flat_namespace -undefined suppress -o" bytecccompopts="$dl_defs $bytecccompopts" #sharedcccompopts="-fnocommon" dl_needs_underscore=true @@ -587,7 +589,8 @@ case "$host" in powerpc-*-linux*) arch=power; model=ppc; system=elf;; powerpc-*-netbsd*) arch=power; model=ppc; system=bsd;; powerpc-*-rhapsody*) arch=power; model=ppc; system=rhapsody;; - powerpc-*-darwin*) arch=power; model=ppc; system=rhapsody;; + powerpc-*-darwin*) arch=power; system=rhapsody + if $arch64; then model=ppc64; else model=ppc; fi;; arm*-*-linux*) arch=arm; system=linux;; arm*-*-gnu*) arch=arm; system=gnu;; ia64-*-linux*) arch=ia64; system=linux;; @@ -654,7 +657,7 @@ case "$arch,$model,$system" in hppa,*,*) aspp="$cc"; asppflags='-traditional -c -DSYS_$(SYSTEM)';; power,*,elf) aspp='gcc'; asppflags='-c';; power,*,bsd) aspp='gcc'; asppflags='-c -DSYS_$(SYSTEM)';; - power,*,rhapsody) ;; + power,*,rhapsody) aspp="$bytecc"; asppflags='-c';; arm,*,linux) aspp='gcc'; asppflags='-c -DSYS_$(SYSTEM)';; arm,*,gnu) aspp='gcc'; asppflags='-c -DSYS_$(SYSTEM)';; ia64,*,*) asflags=-xexplicit diff --git a/otherlibs/num/bng_ppc.c b/otherlibs/num/bng_ppc.c index d0e33a2f25..32d573cd0f 100644 --- a/otherlibs/num/bng_ppc.c +++ b/otherlibs/num/bng_ppc.c @@ -79,8 +79,16 @@ : "=&r" (res), "=&r" (carryaccu) \ : "r" (arg1), "r" (arg2), "r" (arg3), "1" (carryaccu)) +#ifdef __ppc64__ +#define BngMult(resh,resl,arg1,arg2) \ + asm("mulld %0, %2, %3 \n\t" \ + "mulhdu %1, %2, %3" \ + : "=&r" (resl), "=r" (resh) \ + : "r" (arg1), "r" (arg2)) +#else #define BngMult(resh,resl,arg1,arg2) \ asm("mullw %0, %2, %3 \n\t" \ "mulhwu %1, %2, %3" \ : "=&r" (resl), "=r" (resh) \ : "r" (arg1), "r" (arg2)) +#endif
\ No newline at end of file |