diff options
Diffstat (limited to 'asmcomp/arm/emit.mlp')
-rw-r--r-- | asmcomp/arm/emit.mlp | 919 |
1 files changed, 620 insertions, 299 deletions
diff --git a/asmcomp/arm/emit.mlp b/asmcomp/arm/emit.mlp index dd995f26be..b0baf86523 100644 --- a/asmcomp/arm/emit.mlp +++ b/asmcomp/arm/emit.mlp @@ -2,11 +2,12 @@ (* *) (* OCaml *) (* *) -(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* Benedikt Meurer, University of Siegen *) (* *) -(* Copyright 1998 Institut National de Recherche en Informatique et *) -(* en Automatique. All rights reserved. This file is distributed *) -(* under the terms of the Q Public License version 1.0. *) +(* Copyright 1998 Institut National de Recherche en Informatique *) +(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *) +(* reserved. This file is distributed under the terms of the Q *) +(* Public License version 1.0. *) (* *) (***********************************************************************) @@ -36,16 +37,25 @@ let emit_label lbl = let emit_data_label lbl = emit_string ".Ld"; emit_int lbl -(* Output a symbol *) +(* Symbols *) let emit_symbol s = Emitaux.emit_symbol '$' s +let emit_call s = + if !Clflags.dlcode || !pic_code + then `bl {emit_symbol s}(PLT)` + else `bl {emit_symbol s}` + +let emit_jump s = + if !Clflags.dlcode || !pic_code + then `b {emit_symbol s}(PLT)` + else `b {emit_symbol s}` + (* Output a pseudo-register *) -let emit_reg r = - match r.loc with - | Reg r -> emit_string (register_name r) +let emit_reg = function + {loc = Reg r} -> emit_string (register_name r) | _ -> fatal_error "Emit_arm.emit_reg" (* Layout of the stack frame *) @@ -56,14 +66,23 @@ let frame_size () = let sz = !stack_offset + 4 * num_stack_slots.(0) + + 8 * num_stack_slots.(1) + + 8 * num_stack_slots.(2) + (if !contains_calls then 4 else 0) in Misc.align sz 8 let slot_offset loc cl = match loc with - Incoming n -> frame_size() + n - | Local n -> !stack_offset + n * 4 - | Outgoing n -> n + Incoming n -> + assert (n >= 0); + frame_size() + n + | Local n -> + if cl = 0 + then !stack_offset + n * 4 + else !stack_offset + num_stack_slots.(0) * 4 + n * 8 + | Outgoing n -> + assert (n >= 0); + n (* Output a stack reference *) @@ -82,20 +101,13 @@ let emit_addressing addr r n = (* Record live pointers at call points *) -type frame_descr = - { fd_lbl: int; (* Return address *) - fd_frame_size: int; (* Size of stack frame *) - fd_live_offset: int list } (* Offsets/regs of live addresses *) - -let frame_descriptors = ref([] : frame_descr list) - -let record_frame live = +let record_frame_label live dbg = let lbl = new_label() in let live_offset = ref [] in Reg.Set.iter (function {typ = Addr; loc = Reg r} -> - live_offset := (r lsl 1) + 1 :: !live_offset + live_offset := ((r lsl 1) + 1) :: !live_offset | {typ = Addr; loc = Stack s} as reg -> live_offset := slot_offset s (register_class reg) :: !live_offset | _ -> ()) @@ -103,18 +115,57 @@ let record_frame live = frame_descriptors := { fd_lbl = lbl; fd_frame_size = frame_size(); - fd_live_offset = !live_offset } :: !frame_descriptors; - `{emit_label lbl}:` - -let emit_frame fd = - ` .word {emit_label fd.fd_lbl} + 4\n`; - ` .short {emit_int fd.fd_frame_size}\n`; - ` .short {emit_int (List.length fd.fd_live_offset)}\n`; - List.iter - (fun n -> - ` .short {emit_int n}\n`) - fd.fd_live_offset; - ` .align 2\n` + fd_live_offset = !live_offset; + fd_debuginfo = dbg } :: !frame_descriptors; + lbl + +let record_frame live dbg = + let lbl = record_frame_label live dbg in `{emit_label lbl}:` + +(* Record calls to the GC -- we've moved them out of the way *) + +type gc_call = + { gc_lbl: label; (* Entry label *) + gc_return_lbl: label; (* Where to branch after GC *) + gc_frame_lbl: label } (* Label of frame descriptor *) + +let call_gc_sites = ref ([] : gc_call list) + +let emit_call_gc gc = + `{emit_label gc.gc_lbl}: {emit_call "caml_call_gc"}\n`; + `{emit_label gc.gc_frame_lbl}: b {emit_label gc.gc_return_lbl}\n` + +(* Record calls to caml_ml_array_bound_error. + In debug mode, we maintain one call to caml_ml_array_bound_error + per bound check site. Otherwise, we can share a single call. *) + +type bound_error_call = + { bd_lbl: label; (* Entry label *) + bd_frame_lbl: label } (* Label of frame descriptor *) + +let bound_error_sites = ref ([] : bound_error_call list) + +let bound_error_label dbg = + if !Clflags.debug || !bound_error_sites = [] then begin + let lbl_bound_error = new_label() in + let lbl_frame = record_frame_label Reg.Set.empty dbg in + bound_error_sites := + { bd_lbl = lbl_bound_error; + bd_frame_lbl = lbl_frame } :: !bound_error_sites; + lbl_bound_error + end else begin + let bd = List.hd !bound_error_sites in bd.bd_lbl + end + +let emit_call_bound_error bd = + `{emit_label bd.bd_lbl}: {emit_call "caml_ml_array_bound_error"}\n`; + `{emit_label bd.bd_frame_lbl}:\n` + +(* Negate a comparison *) + +let negate_integer_comparison = function + Isigned cmp -> Isigned(negate_comparison cmp) + | Iunsigned cmp -> Iunsigned(negate_comparison cmp) (* Names of various instructions *) @@ -124,22 +175,13 @@ let name_for_comparison = function | Iunsigned Ceq -> "eq" | Iunsigned Cne -> "ne" | Iunsigned Cle -> "ls" | Iunsigned Cge -> "cs" | Iunsigned Clt -> "cc" | Iunsigned Cgt -> "hi" -let name_for_float_comparison cmp neg = - match cmp with - Ceq -> if neg then "ne" else "eq" - | Cne -> if neg then "eq" else "ne" - | Cle -> if neg then "hi" else "ls" - | Cge -> if neg then "lt" else "ge" - | Clt -> if neg then "pl" else "mi" - | Cgt -> if neg then "le" else "gt" - let name_for_int_operation = function Iadd -> "add" | Isub -> "sub" | Imul -> "mul" - | Iand -> "and" - | Ior -> "orr" - | Ixor -> "eor" + | Iand -> "and" + | Ior -> "orr" + | Ixor -> "eor" | _ -> assert false let name_for_shift_operation = function @@ -148,193 +190,306 @@ let name_for_shift_operation = function | Iasr -> "asr" | _ -> assert false -let name_for_shift_int_operation = function - Ishiftadd -> "add" - | Ishiftsub -> "sub" - | Ishiftsubrev -> "rsb" - -(* Recognize immediate operands *) - -(* Immediate operands are 8-bit immediate values, zero-extended, and rotated - right by 0, 2, 4, ... 30 bits. - We check only with 8-bit values shifted left 0 to 24 bits. *) - -let rec is_immed n shift = - shift <= 24 && - (Nativeint.logand n (Nativeint.shift_left (Nativeint.of_int 0xFF) shift) = n - || is_immed n (shift + 2)) - -let is_immediate n = is_immed n 0 - (* General functional to decompose a non-immediate integer constant - into 8-bit chunks shifted left 0 ... 24 bits *) + into 8-bit chunks shifted left 0 ... 30 bits. *) let decompose_intconst n fn = let i = ref n in let shift = ref 0 in let ninstr = ref 0 in - while !i <> 0n do - if Nativeint.to_int (Nativeint.shift_right !i !shift) land 3 = 0 then + while !i <> 0l do + if Int32.logand (Int32.shift_right !i !shift) 3l = 0l then shift := !shift + 2 else begin - let mask = Nativeint.shift_left 0xFFn !shift in - let bits = Nativeint.logand !i mask in - fn bits; + let bits = Int32.logand !i (Int32.shift_left 0xffl !shift) in + i := Int32.sub !i bits; shift := !shift + 8; - i := Nativeint.sub !i bits; - incr ninstr + incr ninstr; + fn bits end done; !ninstr (* Load an integer constant into a register *) -let emit_intconst r n = - let nr = Nativeint.lognot n in +let emit_intconst dst n = + let nr = Int32.lognot n in if is_immediate n then begin - ` mov {emit_reg r}, #{emit_nativeint n}\n`; 1 + (* Use movs here to enable 16-bit T1 encoding *) + ` movs {emit_reg dst}, #{emit_int32 n}\n`; 1 end else if is_immediate nr then begin - ` mvn {emit_reg r}, #{emit_nativeint nr}\n`; 1 + ` mvn {emit_reg dst}, #{emit_int32 nr}\n`; 1 + end else if !arch > ARMv6 then begin + let nl = Int32.logand 0xffffl n in + let nh = Int32.logand 0xffffl (Int32.shift_right_logical n 16) in + if nh = 0l then begin + ` movw {emit_reg dst}, #{emit_int32 nl}\n`; 1 + end else if Int32.logand nl 0xffl = nl then begin + ` movs {emit_reg dst}, #{emit_int32 nl}\n`; + ` movt {emit_reg dst}, #{emit_int32 nh}\n`; 2 + end else begin + ` movw {emit_reg dst}, #{emit_int32 nl}\n`; + ` movt {emit_reg dst}, #{emit_int32 nh}\n`; 2 + end end else begin let first = ref true in decompose_intconst n (fun bits -> if !first - then ` mov {emit_reg r}, #{emit_nativeint bits} @ {emit_nativeint n}\n` - else ` add {emit_reg r}, {emit_reg r}, #{emit_nativeint bits}\n`; + then ` mov {emit_reg dst}, #{emit_int32 bits} @ {emit_int32 n}\n` + else ` add {emit_reg dst}, {emit_reg dst}, #{emit_int32 bits}\n`; first := false) end (* Adjust sp (up or down) by the given byte amount *) -let emit_stack_adjustment instr n = - if n <= 0 then 0 else - decompose_intconst (Nativeint.of_int n) - (fun bits -> - ` {emit_string instr} sp, sp, #{emit_nativeint bits}\n`) +let emit_stack_adjustment n = + if n = 0 then 0 else begin + let instr = if n < 0 then "sub" else "add" in + let ninstr = decompose_intconst (Int32.of_int (abs n)) + (fun bits -> + ` {emit_string instr} sp, sp, #{emit_int32 bits}\n`) in + cfi_adjust_cfa_offset (-n); + ninstr + end + +(* Deallocate the stack frame before a return or tail call *) + +let output_epilogue f = + let n = frame_size() in + if n > 0 then begin + let ninstr = emit_stack_adjustment n in + let ninstr = ninstr + f () in + (* reset CFA back cause function body may continue *) + cfi_adjust_cfa_offset n; + ninstr + end else + f () (* Name of current function *) let function_name = ref "" (* Entry point for tail recursive calls *) let tailrec_entry_point = ref 0 -(* Table of symbols referenced *) -let symbol_constants = (Hashtbl.create 11 : (string, int) Hashtbl.t) -(* Table of floating-point literals *) -let float_constants = (Hashtbl.create 11 : (string, int) Hashtbl.t) -(* Total space (in word) occupied by pending literals *) +(* Pending floating-point literals *) +let float_literals = ref ([] : (string * label) list) +(* Pending relative references to the global offset table *) +let gotrel_literals = ref ([] : (label * label) list) +(* Pending symbol literals *) +let symbol_literals = ref ([] : (string * label) list) +(* Total space (in words) occupied by pending literals *) let num_literals = ref 0 -(* Label a symbol or float constant *) -let label_constant tbl s size = +(* Label a floating-point literal *) +let float_literal f = try - Hashtbl.find tbl s + List.assoc f !float_literals with Not_found -> let lbl = new_label() in - Hashtbl.add tbl s lbl; - num_literals := !num_literals + size; + num_literals := !num_literals + 2; + float_literals := (f, lbl) :: !float_literals; lbl -(* Emit all pending constants *) - -let emit_constants () = - Hashtbl.iter - (fun s lbl -> - `{emit_label lbl}: .word {emit_symbol s}\n`) - symbol_constants; - Hashtbl.iter - (fun s lbl -> - `{emit_label lbl}: .double {emit_string s}\n`) - float_constants; - Hashtbl.clear symbol_constants; - Hashtbl.clear float_constants; +(* Label a GOTREL literal *) +let gotrel_literal l = + let lbl = new_label() in + num_literals := !num_literals + 1; + gotrel_literals := (l, lbl) :: !gotrel_literals; + lbl + +(* Label a symbol literal *) +let symbol_literal s = + try + List.assoc s !symbol_literals + with Not_found -> + let lbl = new_label() in + num_literals := !num_literals + 1; + symbol_literals := (s, lbl) :: !symbol_literals; + lbl + +(* Emit all pending literals *) +let emit_literals() = + if !float_literals <> [] then begin + ` .align 3\n`; + List.iter + (fun (f, lbl) -> + `{emit_label lbl}: .double {emit_string f}\n`) + !float_literals; + float_literals := [] + end; + if !symbol_literals <> [] then begin + let offset = if !thumb then 4 else 8 in + let suffix = if !pic_code then "(GOT)" else "" in + ` .align 2\n`; + List.iter + (fun (l, lbl) -> + `{emit_label lbl}: .word _GLOBAL_OFFSET_TABLE_-({emit_label l}+{emit_int offset})\n`) + !gotrel_literals; + List.iter + (fun (s, lbl) -> + `{emit_label lbl}: .word {emit_symbol s}{emit_string suffix}\n`) + !symbol_literals; + gotrel_literals := []; + symbol_literals := [] + end; num_literals := 0 +(* Emit code to load the address of a symbol *) + +let emit_load_symbol_addr dst s = + if !pic_code then begin + let lbl_pic = new_label() in + let lbl_got = gotrel_literal lbl_pic in + let lbl_sym = symbol_literal s in + (* Both r3 and r12 are marked as clobbered in PIC mode (cf. proc.ml), + so use r12 as temporary scratch register unless the destination is + r12, then we use r3 instead. *) + let tmp = if dst.loc = Reg 8 (*r12*) + then phys_reg 3 (*r3*) + else phys_reg 8 (*r12*) in + ` ldr {emit_reg tmp}, {emit_label lbl_got}\n`; + ` ldr {emit_reg dst}, {emit_label lbl_sym}\n`; + `{emit_label lbl_pic}: add {emit_reg tmp}, pc, {emit_reg tmp}\n`; + ` ldr {emit_reg dst}, [{emit_reg tmp}, {emit_reg dst}] @ {emit_symbol s}\n`; + 4 + end else if !arch > ARMv6 && not !Clflags.dlcode && !fastcode_flag then begin + ` movw {emit_reg dst}, #:lower16:{emit_symbol s}\n`; + ` movt {emit_reg dst}, #:upper16:{emit_symbol s}\n`; + 2 + end else begin + let lbl = symbol_literal s in + ` ldr {emit_reg dst}, {emit_label lbl} @ {emit_symbol s}\n`; + 1 + end + (* Output the assembly code for an instruction *) let emit_instr i = + emit_debug_info i.dbg; match i.desc with Lend -> 0 | Lop(Imove | Ispill | Ireload) -> let src = i.arg.(0) and dst = i.res.(0) in if src.loc = dst.loc then 0 else begin - match (src, dst) with - {loc = Reg rs; typ = Int|Addr}, {loc = Reg rd; typ = Int|Addr} -> - ` mov {emit_reg dst}, {emit_reg src}\n`; 1 - | {loc = Reg rs; typ = Int|Addr}, {loc = Stack sd} -> - ` str {emit_reg src}, {emit_stack dst}\n`; 1 - | {loc = Stack ss; typ = Int|Addr}, {loc = Reg rd} -> - ` ldr {emit_reg dst}, {emit_stack src}\n`; 1 + begin match (src, dst) with + {loc = Reg _; typ = Float}, {loc = Reg _} -> + ` fcpyd {emit_reg dst}, {emit_reg src}\n` + | {loc = Reg _}, {loc = Reg _} -> + ` mov {emit_reg dst}, {emit_reg src}\n` + | {loc = Reg _; typ = Float}, _ -> + ` fstd {emit_reg src}, {emit_stack dst}\n` + | {loc = Reg _}, _ -> + ` str {emit_reg src}, {emit_stack dst}\n` + | {typ = Float}, _ -> + ` fldd {emit_reg dst}, {emit_stack src}\n` | _ -> - assert false + ` ldr {emit_reg dst}, {emit_stack src}\n` + end; 1 end | Lop(Iconst_int n) -> - emit_intconst i.res.(0) n - | Lop(Iconst_float s) -> - let bits = Int64.bits_of_float (float_of_string s) in - let high_bits = Int64.to_nativeint (Int64.shift_right_logical bits 32) - and low_bits = Int64.to_nativeint bits in - if is_immediate low_bits && is_immediate high_bits then begin - ` mov {emit_reg i.res.(0)}, #{emit_nativeint low_bits} @ {emit_string s}\n`; - ` mov {emit_reg i.res.(1)}, #{emit_nativeint high_bits}\n`; - 2 + emit_intconst i.res.(0) (Nativeint.to_int32 n) + | Lop(Iconst_float f) when !fpu = Soft -> + ` @ {emit_string f}\n`; + let bits = Int64.bits_of_float (float_of_string f) in + let high_bits = Int64.to_int32 (Int64.shift_right_logical bits 32) + and low_bits = Int64.to_int32 bits in + if is_immediate low_bits || is_immediate high_bits then begin + let ninstr_low = emit_intconst i.res.(0) low_bits + and ninstr_high = emit_intconst i.res.(1) high_bits in + ninstr_low + ninstr_high end else begin - let lbl = label_constant float_constants s 2 in - ` ldr {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string s}\n`; + let lbl = float_literal f in + ` ldr {emit_reg i.res.(0)}, {emit_label lbl}\n`; ` ldr {emit_reg i.res.(1)}, {emit_label lbl} + 4\n`; 2 end + | Lop(Iconst_float f) -> + let encode imm = + let sg = Int64.to_int (Int64.shift_right_logical imm 63) in + let ex = Int64.to_int (Int64.shift_right_logical imm 52) in + let ex = (ex land 0x7ff) - 1023 in + let mn = Int64.logand imm 0xfffffffffffffL in + if Int64.logand mn 0xffffffffffffL <> 0L || ex < -3 || ex > 4 + then + None + else begin + let mn = Int64.to_int (Int64.shift_right_logical mn 48) in + if mn land 0x0f <> mn then + None + else + let ex = ((ex + 3) land 0x07) lxor 0x04 in + Some((sg lsl 7) lor (ex lsl 4) lor mn) + end in + begin match encode (Int64.bits_of_float (float_of_string f)) with + None -> + let lbl = float_literal f in + ` fldd {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string f}\n` + | Some imm8 -> + ` fconstd {emit_reg i.res.(0)}, #{emit_int imm8} @ {emit_string f}\n` + end; 1 | Lop(Iconst_symbol s) -> - let lbl = label_constant symbol_constants s 1 in - ` ldr {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_symbol s}\n`; 1 + emit_load_symbol_addr i.res.(0) s | Lop(Icall_ind) -> - ` mov lr, pc\n`; - `{record_frame i.live} bx {emit_reg i.arg.(0)}\n`; 2 + if !arch >= ARMv5 then begin + ` blx {emit_reg i.arg.(0)}\n`; + `{record_frame i.live i.dbg}\n`; 1 + end else begin + ` mov lr, pc\n`; + ` bx {emit_reg i.arg.(0)}\n`; + `{record_frame i.live i.dbg}\n`; 2 + end | Lop(Icall_imm s) -> - `{record_frame i.live} bl {emit_symbol s}\n`; 1 + ` {emit_call s}\n`; + `{record_frame i.live i.dbg}\n`; 1 | Lop(Itailcall_ind) -> - let n = frame_size() in - if !contains_calls then - ` ldr lr, [sp, #{emit_int (n-4)}]\n`; - let ninstr = emit_stack_adjustment "add" n in - ` bx {emit_reg i.arg.(0)}\n`; - 2 + ninstr + output_epilogue begin fun () -> + if !contains_calls then + ` ldr lr, [sp, #{emit_int (-4)}]\n`; + ` bx {emit_reg i.arg.(0)}\n`; 2 + end | Lop(Itailcall_imm s) -> if s = !function_name then begin ` b {emit_label !tailrec_entry_point}\n`; 1 end else begin - let n = frame_size() in - if !contains_calls then - ` ldr lr, [sp, #{emit_int (n-4)}]\n`; - let ninstr = emit_stack_adjustment "add" n in - ` b {emit_symbol s}\n`; - 2 + ninstr - end - | Lop(Iextcall(s, alloc)) -> - if alloc then begin - let lbl = label_constant symbol_constants s 1 in - ` ldr r12, {emit_label lbl} @ {emit_symbol s}\n`; - `{record_frame i.live} bl caml_c_call\n`; 2 - end else begin - ` bl {emit_symbol s}\n`; 1 + output_epilogue begin fun () -> + if !contains_calls then + ` ldr lr, [sp, #{emit_int (-4)}]\n`; + ` {emit_jump s}\n`; 2 + end end + | Lop(Iextcall(s, false)) -> + ` {emit_call s}\n`; 1 + | Lop(Iextcall(s, true)) -> + let ninstr = emit_load_symbol_addr (phys_reg 7 (* r7 *)) s in + ` {emit_call "caml_c_call"}\n`; + `{record_frame i.live i.dbg}\n`; + 1 + ninstr | Lop(Istackoffset n) -> assert (n mod 8 = 0); - let ninstr = - if n >= 0 - then emit_stack_adjustment "sub" n - else emit_stack_adjustment "add" (-n) in + let ninstr = emit_stack_adjustment (-n) in stack_offset := !stack_offset + n; ninstr - | Lop(Iload((Double | Double_u), addr)) -> - let addr' = offset_addressing addr 4 in - if i.res.(0).loc <> i.arg.(0).loc then begin - ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`; - ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n` - end else begin - ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`; - ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n` - end; - 2 + | Lop(Iload(Single, addr)) when !fpu >= VFPv3_D16 -> + ` flds s14, {emit_addressing addr i.arg 0}\n`; + ` fcvtds {emit_reg i.res.(0)}, s14\n`; 2 + | Lop(Iload((Double | Double_u), addr)) when !fpu = Soft -> + (* Use LDM or LDRD if possible *) + begin match i.res.(0), i.res.(1), addr with + {loc = Reg rt}, {loc = Reg rt2}, Iindexed 0 + when rt < rt2 -> + ` ldm {emit_reg i.arg.(0)}, \{{emit_reg i.res.(0)}, {emit_reg i.res.(1)}}\n`; 1 + | {loc = Reg rt}, {loc = Reg rt2}, addr + when !arch >= ARMv5TE && rt mod 2 == 0 && rt2 = rt + 1 -> + ` ldrd {emit_reg i.res.(0)}, {emit_reg i.res.(1)}, {emit_addressing addr i.arg 0}\n`; 1 + | _ -> + let addr' = offset_addressing addr 4 in + if i.res.(0).loc <> i.arg.(0).loc then begin + ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`; + ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n` + end else begin + ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`; + ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n` + end; 2 + end | Lop(Iload(size, addr)) -> let r = i.res.(0) in let instr = @@ -343,65 +498,114 @@ let emit_instr i = | Byte_signed -> "ldrsb" | Sixteen_unsigned -> "ldrh" | Sixteen_signed -> "ldrsh" + | Double + | Double_u -> "fldd" | _ (* 32-bit quantities *) -> "ldr" in - ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 0}\n`; - 1 - | Lop(Istore((Double | Double_u), addr)) -> - let addr' = offset_addressing addr 4 in - ` str {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 2}\n`; - ` str {emit_reg i.arg.(1)}, {emit_addressing addr' i.arg 2}\n`; - 2 + ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 0}\n`; 1 + | Lop(Istore(Single, addr)) when !fpu >= VFPv3_D16 -> + ` fcvtsd s14, {emit_reg i.arg.(0)}\n`; + ` fsts s14, {emit_addressing addr i.arg 1}\n`; 2 + | Lop(Istore((Double | Double_u), addr)) when !fpu = Soft -> + (* Use STM or STRD if possible *) + begin match i.arg.(0), i.arg.(1), addr with + {loc = Reg rt}, {loc = Reg rt2}, Iindexed 0 + when rt < rt2 -> + ` stm {emit_reg i.arg.(2)}, \{{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}}\n`; 1 + | {loc = Reg rt}, {loc = Reg rt2}, addr + when !arch >= ARMv5TE && rt mod 2 == 0 && rt2 = rt + 1 -> + ` strd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_addressing addr i.arg 2}\n`; 1 + | _ -> + let addr' = offset_addressing addr 4 in + ` str {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 2}\n`; + ` str {emit_reg i.arg.(1)}, {emit_addressing addr' i.arg 2}\n`; 2 + end | Lop(Istore(size, addr)) -> let r = i.arg.(0) in let instr = match size with - Byte_unsigned | Byte_signed -> "strb" - | Sixteen_unsigned | Sixteen_signed -> "strh" + Byte_unsigned + | Byte_signed -> "strb" + | Sixteen_unsigned + | Sixteen_signed -> "strh" + | Double + | Double_u -> "fstd" | _ (* 32-bit quantities *) -> "str" in - ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 1}\n`; - 1 + ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 1}\n`; 1 | Lop(Ialloc n) -> + let lbl_frame = record_frame_label i.live i.dbg in if !fastcode_flag then begin - let ni = emit_intconst (phys_reg 8 (*r12*)) (Nativeint.of_int n) in - ` sub alloc_ptr, alloc_ptr, r12\n`; + let lbl_redo = new_label() in + `{emit_label lbl_redo}:`; + let ninstr = decompose_intconst + (Int32.of_int n) + (fun i -> + ` sub alloc_ptr, alloc_ptr, #{emit_int32 i}\n`) in ` cmp alloc_ptr, alloc_limit\n`; - `{record_frame i.live} blcc caml_call_gc\n`; ` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`; - 4 + ni - end else if n = 8 || n = 12 || n = 16 then begin - `{record_frame i.live} bl caml_alloc{emit_int ((n-4)/4)}\n`; - ` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`; 2 + let lbl_call_gc = new_label() in + ` bcc {emit_label lbl_call_gc}\n`; + call_gc_sites := + { gc_lbl = lbl_call_gc; + gc_return_lbl = lbl_redo; + gc_frame_lbl = lbl_frame } :: !call_gc_sites; + 3 + ninstr end else begin - let ni = emit_intconst (phys_reg 8 (*r12*)) (Nativeint.of_int n) in - `{record_frame i.live} bl caml_allocN\n`; - ` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`; - 2 + ni + let ninstr = + begin match n with + 8 -> ` {emit_call "caml_alloc1"}\n`; 1 + | 12 -> ` {emit_call "caml_alloc2"}\n`; 1 + | 16 -> ` {emit_call "caml_alloc3"}\n`; 1 + | _ -> let ninstr = emit_intconst (phys_reg 7) (Int32.of_int n) in + ` {emit_call "caml_allocN"}\n`; 1 + ninstr + end in + `{emit_label lbl_frame}: add {emit_reg i.res.(0)}, alloc_ptr, #4\n`; + 1 + ninstr end | Lop(Iintop(Ilsl | Ilsr | Iasr as op)) -> let shift = name_for_shift_operation op in ` mov {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_string shift} {emit_reg i.arg.(1)}\n`; 1 | Lop(Iintop(Icomp cmp)) -> - let comp = name_for_comparison cmp in + let compthen = name_for_comparison cmp in + let compelse = name_for_comparison (negate_integer_comparison cmp) in ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; - ` mov {emit_reg i.res.(0)}, #0\n`; - ` mov{emit_string comp} {emit_reg i.res.(0)}, #1\n`; 3 - | Lop(Iintop(Icheckbound)) -> + ` ite {emit_string compthen}\n`; + ` mov{emit_string compthen} {emit_reg i.res.(0)}, #1\n`; + ` mov{emit_string compelse} {emit_reg i.res.(0)}, #0\n`; 4 + | Lop(Iintop_imm(Icomp cmp, n)) -> + let compthen = name_for_comparison cmp in + let compelse = name_for_comparison (negate_integer_comparison cmp) in + ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`; + ` ite {emit_string compthen}\n`; + ` mov{emit_string compthen} {emit_reg i.res.(0)}, #1\n`; + ` mov{emit_string compelse} {emit_reg i.res.(0)}, #0\n`; 4 + | Lop(Iintop Icheckbound) -> + let lbl = bound_error_label i.dbg in ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; - ` blls caml_ml_array_bound_error\n`; 2 + ` bls {emit_label lbl}\n`; 2 + | Lop(Iintop_imm(Icheckbound, n)) -> + let lbl = bound_error_label i.dbg in + ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`; + ` bls {emit_label lbl}\n`; 2 + | Lop(Ispecific(Ishiftcheckbound shift)) -> + let lbl = bound_error_label i.dbg in + ` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`; + ` bcs {emit_label lbl}\n`; 2 | Lop(Iintop op) -> let instr = name_for_int_operation op in - ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; 1 + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; 1 | Lop(Iintop_imm(Idiv, n)) -> (* n is a power of 2 *) let l = Misc.log2 n in let r = i.res.(0) in ` movs {emit_reg r}, {emit_reg i.arg.(0)}\n`; - if n <= 256 then + if n <= 256 then begin + ` it lt\n`; ` addlt {emit_reg r}, {emit_reg r}, #{emit_int (n-1)}\n` - else begin + end else begin + ` itt lt\n`; ` addlt {emit_reg r}, {emit_reg r}, #{emit_int n}\n`; ` sublt {emit_reg r}, {emit_reg r}, #1\n` end; - ` mov {emit_reg r}, {emit_reg r}, asr #{emit_int l}\n`; 4 + ` mov {emit_reg r}, {emit_reg r}, asr #{emit_int l}\n`; 5 | Lop(Iintop_imm(Imod, n)) -> (* n is a power of 2 *) let l = Misc.log2 n in let a = i.arg.(0) in @@ -412,47 +616,78 @@ let emit_instr i = ` mov {emit_reg r}, {emit_reg r}, lsr #{emit_int (32-l)}\n`; ` bpl {emit_label lbl}\n`; ` cmp {emit_reg r}, #0\n`; + ` it ne\n`; ` subne {emit_reg r}, {emit_reg r}, #{emit_int n}\n`; - `{emit_label lbl}:\n`; 6 + `{emit_label lbl}:\n`; 7 | Lop(Iintop_imm((Ilsl | Ilsr | Iasr as op), n)) -> let shift = name_for_shift_operation op in ` mov {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_string shift} #{emit_int n}\n`; 1 - | Lop(Iintop_imm(Icomp cmp, n)) -> - let comp = name_for_comparison cmp in - ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`; - ` mov {emit_reg i.res.(0)}, #0\n`; - ` mov{emit_string comp} {emit_reg i.res.(0)}, #1\n`; 3 - | Lop(Iintop_imm(Icheckbound, n)) -> - ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`; - ` blls caml_ml_array_bound_error\n`; 2 | Lop(Iintop_imm(op, n)) -> let instr = name_for_int_operation op in - ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1 - | Lop(Inegf) -> (* argument and result in (r0, r1) *) - ` eor r1, r1, #0x80000000\n`; 1 - | Lop(Iabsf) -> (* argument and result in (r0, r1) *) - ` bic r1, r1, #0x80000000\n`; 1 - | Lop(Ifloatofint | Iintoffloat | Iaddf | Isubf | Imulf | Idivf) -> - assert false + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1 + | Lop(Iabsf | Inegf as op) when !fpu = Soft -> + let instr = (match op with + Iabsf -> "bic" + | Inegf -> "eor" + | _ -> assert false) in + ` {emit_string instr} {emit_reg i.res.(1)}, {emit_reg i.arg.(1)}, #0x80000000\n`; 1 + | Lop(Iabsf | Inegf | Ispecific Isqrtf as op) -> + let instr = (match op with + Iabsf -> "fabsd" + | Inegf -> "fnegd" + | Ispecific Isqrtf -> "fsqrtd" + | _ -> assert false) in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`; 1 + | Lop(Ifloatofint) -> + ` fmsr s14, {emit_reg i.arg.(0)}\n`; + ` fsitod {emit_reg i.res.(0)}, s14\n`; 2 + | Lop(Iintoffloat) -> + ` ftosizd s14, {emit_reg i.arg.(0)}\n`; + ` fmrs {emit_reg i.res.(0)}, s14\n`; 2 + | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific Inegmulf as op) -> + let instr = (match op with + Iaddf -> "faddd" + | Isubf -> "fsubd" + | Imulf -> "fmuld" + | Idivf -> "fdivd" + | Ispecific Inegmulf -> "fnmuld" + | _ -> assert false) in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + 1 + | Lop(Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf as op)) -> + let instr = (match op with + Imuladdf -> "fmacd" + | Inegmuladdf -> "fnmacd" + | Imulsubf -> "fmscd" + | Inegmulsubf -> "fnmscd" + | _ -> assert false) in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`; + 1 | Lop(Ispecific(Ishiftarith(op, shift))) -> - let instr = name_for_shift_int_operation op in - ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}`; + let instr = (match op with + Ishiftadd -> "add" + | Ishiftsub -> "sub" + | Ishiftsubrev -> "rsb") in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}`; if shift >= 0 then `, lsl #{emit_int shift}\n` else `, asr #{emit_int (-shift)}\n`; 1 - | Lop(Ispecific(Ishiftcheckbound shift)) -> - ` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`; - ` blcs caml_ml_array_bound_error\n`; 2 | Lop(Ispecific(Irevsubimm n)) -> ` rsb {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1 + | Lop(Ispecific(Imuladd | Imulsub as op)) -> + let instr = (match op with + Imuladd -> "mla" + | Imulsub -> "mls" + | _ -> assert false) in + ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`; 1 | Lreloadretaddr -> let n = frame_size() in ` ldr lr, [sp, #{emit_int(n-4)}]\n`; 1 | Lreturn -> - let ninstr = emit_stack_adjustment "add" (frame_size()) in - ` bx lr\n`; - ninstr + 1 + output_epilogue begin fun () -> + ` bx lr\n`; 1 + end | Llabel lbl -> `{emit_label lbl}:\n`; 0 | Lbranch lbl -> @@ -461,29 +696,41 @@ let emit_instr i = begin match tst with Itruetest -> ` cmp {emit_reg i.arg.(0)}, #0\n`; - ` bne {emit_label lbl}\n` + ` bne {emit_label lbl}\n`; 2 | Ifalsetest -> ` cmp {emit_reg i.arg.(0)}, #0\n`; - ` beq {emit_label lbl}\n` + ` beq {emit_label lbl}\n`; 2 | Iinttest cmp -> ` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; let comp = name_for_comparison cmp in - ` b{emit_string comp} {emit_label lbl}\n` + ` b{emit_string comp} {emit_label lbl}\n`; 2 | Iinttest_imm(cmp, n) -> ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`; let comp = name_for_comparison cmp in - ` b{emit_string comp} {emit_label lbl}\n` + ` b{emit_string comp} {emit_label lbl}\n`; 2 | Ifloattest(cmp, neg) -> - assert false + let comp = (match (cmp, neg) with + (Ceq, false) | (Cne, true) -> "eq" + | (Cne, false) | (Ceq, true) -> "ne" + | (Clt, false) -> "cc" + | (Clt, true) -> "cs" + | (Cle, false) -> "ls" + | (Cle, true) -> "hi" + | (Cgt, false) -> "gt" + | (Cgt, true) -> "le" + | (Cge, false) -> "ge" + | (Cge, true) -> "lt") in + ` fcmpd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` fmstat\n`; + ` b{emit_string comp} {emit_label lbl}\n`; 3 | Ioddtest -> ` tst {emit_reg i.arg.(0)}, #1\n`; - ` bne {emit_label lbl}\n` + ` bne {emit_label lbl}\n`; 2 | Ieventest -> ` tst {emit_reg i.arg.(0)}, #1\n`; - ` beq {emit_label lbl}\n` - end; - 2 - | Lcondbranch3(lbl0, lbl1, lbl2) -> + ` beq {emit_label lbl}\n`; 2 + end + | Lcondbranch3(lbl0, lbl1, lbl2) -> ` cmp {emit_reg i.arg.(0)}, #1\n`; begin match lbl0 with None -> () @@ -498,108 +745,151 @@ let emit_instr i = | Some lbl -> ` bgt {emit_label lbl}\n` end; 4 - | Lswitch jumptbl -> - ` ldr pc, [pc, {emit_reg i.arg.(0)}, lsl #2]\n`; - ` mov r0, r0\n`; (* nop *) - for i = 0 to Array.length jumptbl - 1 do - ` .word {emit_label jumptbl.(i)}\n` - done; + | Lswitch jumptbl -> + if !arch > ARMv6 && !thumb then begin + (* The Thumb-2 TBH instruction supports only forward branches, + so we need to generate appropriate trampolines for all labels + that appear before this switch instruction (PR#5623) *) + let tramtbl = Array.copy jumptbl in + ` tbh [pc, {emit_reg i.arg.(0)}, lsl #1]\n`; + for j = 0 to Array.length tramtbl - 1 do + let rec label i = + match i.desc with + Lend -> new_label() + | Llabel lbl when lbl = tramtbl.(j) -> lbl + | _ -> label i.next in + tramtbl.(j) <- label i.next; + ` .short ({emit_label tramtbl.(j)}-.)/2+{emit_int j}\n` + done; + (* Generate the necessary trampolines *) + for j = 0 to Array.length tramtbl - 1 do + if tramtbl.(j) <> jumptbl.(j) then + `{emit_label tramtbl.(j)}: b {emit_label jumptbl.(j)}\n` + done + end else if not !pic_code then begin + ` ldr pc, [pc, {emit_reg i.arg.(0)}, lsl #2]\n`; + ` nop\n`; + for j = 0 to Array.length jumptbl - 1 do + ` .word {emit_label jumptbl.(j)}\n` + done + end else begin + (* Slightly slower, but position-independent *) + ` add pc, pc, {emit_reg i.arg.(0)}, lsl #2\n`; + ` nop\n`; + for j = 0 to Array.length jumptbl - 1 do + ` b {emit_label jumptbl.(j)}\n` + done + end; 2 + Array.length jumptbl | Lsetuptrap lbl -> ` bl {emit_label lbl}\n`; 1 | Lpushtrap -> stack_offset := !stack_offset + 8; - ` stmfd sp!, \{trap_ptr, lr}\n`; + ` push \{trap_ptr, lr}\n`; + cfi_adjust_cfa_offset 8; ` mov trap_ptr, sp\n`; 2 | Lpoptrap -> - ` ldmfd sp!, \{trap_ptr, lr}\n`; + ` pop \{trap_ptr, lr}\n`; + cfi_adjust_cfa_offset (-8); stack_offset := !stack_offset - 8; 1 | Lraise -> - ` mov sp, trap_ptr\n`; - ` ldmfd sp!, \{trap_ptr, pc}\n`; 2 + if !Clflags.debug then begin + ` {emit_call "caml_raise_exn"}\n`; + `{record_frame Reg.Set.empty i.dbg}\n`; 1 + end else begin + ` mov sp, trap_ptr\n`; + ` pop \{trap_ptr, pc}\n`; 2 + end (* Emission of an instruction sequence *) -let no_fallthrough = function - Lop(Itailcall_ind | Itailcall_imm _) -> true - | Lreturn -> true - | Lbranch _ -> true - | Lswitch _ -> true - | Lraise -> true - | _ -> false - let rec emit_all ninstr i = if i.desc = Lend then () else begin let n = emit_instr i in let ninstr' = ninstr + n in - let limit = 511 - !num_literals in - if ninstr' >= limit - 64 && no_fallthrough i.desc then begin - emit_constants(); + (* fldd can address up to +/-1KB, ldr can address up to +/-4KB *) + let limit = (if !fpu >= VFPv3_D16 && !float_literals <> [] + then 127 + else 511) in + let limit = limit - !num_literals in + if ninstr' >= limit - 64 && not(has_fallthrough i.desc) then begin + emit_literals(); emit_all 0 i.next - end else - if ninstr' >= limit then begin + end else if !num_literals != 0 && ninstr' >= limit then begin let lbl = new_label() in ` b {emit_label lbl}\n`; - emit_constants(); + emit_literals(); `{emit_label lbl}:\n`; emit_all 0 i.next end else emit_all ninstr' i.next end +(* Emission of the profiling prelude *) + +let emit_profile() = + match Config.system with + "linux_eabi" | "linux_eabihf" -> + ` push \{lr}\n`; + ` {emit_call "__gnu_mcount_nc"}\n` + | _ -> () + (* Emission of a function declaration *) let fundecl fundecl = function_name := fundecl.fun_name; fastcode_flag := fundecl.fun_fast; tailrec_entry_point := new_label(); + float_literals := []; + gotrel_literals := []; + symbol_literals := []; stack_offset := 0; - Hashtbl.clear symbol_constants; - Hashtbl.clear float_constants; + call_gc_sites := []; + bound_error_sites := []; ` .text\n`; ` .align 2\n`; - ` .global {emit_symbol fundecl.fun_name}\n`; + ` .globl {emit_symbol fundecl.fun_name}\n`; + if !arch > ARMv6 && !thumb then + ` .thumb\n` + else + ` .arm\n`; ` .type {emit_symbol fundecl.fun_name}, %function\n`; `{emit_symbol fundecl.fun_name}:\n`; + emit_debug_info fundecl.fun_dbg; + cfi_startproc(); + if !Clflags.gprofile then emit_profile(); let n = frame_size() in - ignore(emit_stack_adjustment "sub" n); - if !contains_calls then - ` str lr, [sp, #{emit_int(n - 4)}]\n`; + if n > 0 then begin + ignore(emit_stack_adjustment (-n)); + if !contains_calls then + ` str lr, [sp, #{emit_int(n - 4)}]\n` + end; `{emit_label !tailrec_entry_point}:\n`; emit_all 0 fundecl.fun_body; - emit_constants() + emit_literals(); + List.iter emit_call_gc !call_gc_sites; + List.iter emit_call_bound_error !bound_error_sites; + cfi_endproc(); + ` .type {emit_symbol fundecl.fun_name}, %function\n`; + ` .size {emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n` (* Emission of data *) let emit_item = function - Cglobal_symbol s -> - ` .global {emit_symbol s}\n`; - | Cdefine_symbol s -> - `{emit_symbol s}:\n` - | Cdefine_label lbl -> - `{emit_data_label lbl}:\n` - | Cint8 n -> - ` .byte {emit_int n}\n` - | Cint16 n -> - ` .short {emit_int n}\n` - | Cint32 n -> - ` .word {emit_nativeint n}\n` - | Cint n -> - ` .word {emit_nativeint n}\n` - | Csingle f -> - emit_float32_directive ".long" f - | Cdouble f -> - emit_float64_split_directive ".long" f - | Csymbol_address s -> - ` .word {emit_symbol s}\n` - | Clabel_address lbl -> - ` .word {emit_data_label lbl}\n` - | Cstring s -> - emit_string_directive " .ascii " s - | Cskip n -> - if n > 0 then ` .space {emit_int n}\n` - | Calign n -> - ` .align {emit_int(Misc.log2 n)}\n` + Cglobal_symbol s -> ` .globl {emit_symbol s}\n`; + | Cdefine_symbol s -> `{emit_symbol s}:\n` + | Cdefine_label lbl -> `{emit_data_label lbl}:\n` + | Cint8 n -> ` .byte {emit_int n}\n` + | Cint16 n -> ` .short {emit_int n}\n` + | Cint32 n -> ` .long {emit_int32 (Nativeint.to_int32 n)}\n` + | Cint n -> ` .long {emit_int32 (Nativeint.to_int32 n)}\n` + | Csingle f -> ` .single {emit_string f}\n` + | Cdouble f -> ` .double {emit_string f}\n` + | Csymbol_address s -> ` .word {emit_symbol s}\n` + | Clabel_address lbl -> ` .word {emit_data_label lbl}\n` + | Cstring s -> emit_string_directive " .ascii " s + | Cskip n -> if n > 0 then ` .space {emit_int n}\n` + | Calign n -> ` .align {emit_int(Misc.log2 n)}\n` let data l = ` .data\n`; @@ -608,32 +898,63 @@ let data l = (* Beginning / end of an assembly file *) let begin_assembly() = - `trap_ptr .req r11\n`; - `alloc_ptr .req r8\n`; - `alloc_limit .req r10\n`; + reset_debug_info(); + ` .syntax unified\n`; + begin match !arch with + | ARMv4 -> ` .arch armv4t\n` + | ARMv5 -> ` .arch armv5t\n` + | ARMv5TE -> ` .arch armv5te\n` + | ARMv6 -> ` .arch armv6\n` + | ARMv6T2 -> ` .arch armv6t2\n` + | ARMv7 -> ` .arch armv7-a\n` + end; + begin match !fpu with + Soft -> ` .fpu softvfp\n` + | VFPv3_D16 -> ` .fpu vfpv3-d16\n` + | VFPv3 -> ` .fpu vfpv3\n` + end; + `trap_ptr .req r8\n`; + `alloc_ptr .req r10\n`; + `alloc_limit .req r11\n`; let lbl_begin = Compilenv.make_symbol (Some "data_begin") in ` .data\n`; - ` .global {emit_symbol lbl_begin}\n`; + ` .globl {emit_symbol lbl_begin}\n`; `{emit_symbol lbl_begin}:\n`; let lbl_begin = Compilenv.make_symbol (Some "code_begin") in ` .text\n`; - ` .global {emit_symbol lbl_begin}\n`; + ` .globl {emit_symbol lbl_begin}\n`; `{emit_symbol lbl_begin}:\n` let end_assembly () = let lbl_end = Compilenv.make_symbol (Some "code_end") in ` .text\n`; - ` .global {emit_symbol lbl_end}\n`; + ` .globl {emit_symbol lbl_end}\n`; `{emit_symbol lbl_end}:\n`; let lbl_end = Compilenv.make_symbol (Some "data_end") in ` .data\n`; - ` .global {emit_symbol lbl_end}\n`; + ` .globl {emit_symbol lbl_end}\n`; `{emit_symbol lbl_end}:\n`; - ` .word 0\n`; + ` .long 0\n`; let lbl = Compilenv.make_symbol (Some "frametable") in - ` .data\n`; - ` .global {emit_symbol lbl}\n`; + ` .globl {emit_symbol lbl}\n`; `{emit_symbol lbl}:\n`; - ` .word {emit_int (List.length !frame_descriptors)}\n`; - List.iter emit_frame !frame_descriptors; - frame_descriptors := [] + emit_frames + { efa_label = (fun lbl -> + ` .type {emit_label lbl}, %function\n`; + ` .word {emit_label lbl}\n`); + efa_16 = (fun n -> ` .short {emit_int n}\n`); + efa_32 = (fun n -> ` .long {emit_int32 n}\n`); + efa_word = (fun n -> ` .word {emit_int n}\n`); + efa_align = (fun n -> ` .align {emit_int(Misc.log2 n)}\n`); + efa_label_rel = (fun lbl ofs -> + ` .word {emit_label lbl} - . + {emit_int32 ofs}\n`); + efa_def_label = (fun lbl -> `{emit_label lbl}:\n`); + efa_string = (fun s -> emit_string_directive " .asciz " s) }; + ` .type {emit_symbol lbl}, %object\n`; + ` .size {emit_symbol lbl}, .-{emit_symbol lbl}\n`; + begin match Config.system with + "linux_eabihf" | "linux_eabi" -> + (* Mark stack as non-executable *) + ` .section .note.GNU-stack,\"\",%progbits\n` + | _ -> () + end |