summaryrefslogtreecommitdiff
path: root/asmcomp/arm/emit.mlp
diff options
context:
space:
mode:
Diffstat (limited to 'asmcomp/arm/emit.mlp')
-rw-r--r--asmcomp/arm/emit.mlp919
1 files changed, 620 insertions, 299 deletions
diff --git a/asmcomp/arm/emit.mlp b/asmcomp/arm/emit.mlp
index dd995f26be..b0baf86523 100644
--- a/asmcomp/arm/emit.mlp
+++ b/asmcomp/arm/emit.mlp
@@ -2,11 +2,12 @@
(* *)
(* OCaml *)
(* *)
-(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* Benedikt Meurer, University of Siegen *)
(* *)
-(* Copyright 1998 Institut National de Recherche en Informatique et *)
-(* en Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the Q Public License version 1.0. *)
+(* Copyright 1998 Institut National de Recherche en Informatique *)
+(* et en Automatique. Copyright 2012 Benedikt Meurer. All rights *)
+(* reserved. This file is distributed under the terms of the Q *)
+(* Public License version 1.0. *)
(* *)
(***********************************************************************)
@@ -36,16 +37,25 @@ let emit_label lbl =
let emit_data_label lbl =
emit_string ".Ld"; emit_int lbl
-(* Output a symbol *)
+(* Symbols *)
let emit_symbol s =
Emitaux.emit_symbol '$' s
+let emit_call s =
+ if !Clflags.dlcode || !pic_code
+ then `bl {emit_symbol s}(PLT)`
+ else `bl {emit_symbol s}`
+
+let emit_jump s =
+ if !Clflags.dlcode || !pic_code
+ then `b {emit_symbol s}(PLT)`
+ else `b {emit_symbol s}`
+
(* Output a pseudo-register *)
-let emit_reg r =
- match r.loc with
- | Reg r -> emit_string (register_name r)
+let emit_reg = function
+ {loc = Reg r} -> emit_string (register_name r)
| _ -> fatal_error "Emit_arm.emit_reg"
(* Layout of the stack frame *)
@@ -56,14 +66,23 @@ let frame_size () =
let sz =
!stack_offset +
4 * num_stack_slots.(0) +
+ 8 * num_stack_slots.(1) +
+ 8 * num_stack_slots.(2) +
(if !contains_calls then 4 else 0)
in Misc.align sz 8
let slot_offset loc cl =
match loc with
- Incoming n -> frame_size() + n
- | Local n -> !stack_offset + n * 4
- | Outgoing n -> n
+ Incoming n ->
+ assert (n >= 0);
+ frame_size() + n
+ | Local n ->
+ if cl = 0
+ then !stack_offset + n * 4
+ else !stack_offset + num_stack_slots.(0) * 4 + n * 8
+ | Outgoing n ->
+ assert (n >= 0);
+ n
(* Output a stack reference *)
@@ -82,20 +101,13 @@ let emit_addressing addr r n =
(* Record live pointers at call points *)
-type frame_descr =
- { fd_lbl: int; (* Return address *)
- fd_frame_size: int; (* Size of stack frame *)
- fd_live_offset: int list } (* Offsets/regs of live addresses *)
-
-let frame_descriptors = ref([] : frame_descr list)
-
-let record_frame live =
+let record_frame_label live dbg =
let lbl = new_label() in
let live_offset = ref [] in
Reg.Set.iter
(function
{typ = Addr; loc = Reg r} ->
- live_offset := (r lsl 1) + 1 :: !live_offset
+ live_offset := ((r lsl 1) + 1) :: !live_offset
| {typ = Addr; loc = Stack s} as reg ->
live_offset := slot_offset s (register_class reg) :: !live_offset
| _ -> ())
@@ -103,18 +115,57 @@ let record_frame live =
frame_descriptors :=
{ fd_lbl = lbl;
fd_frame_size = frame_size();
- fd_live_offset = !live_offset } :: !frame_descriptors;
- `{emit_label lbl}:`
-
-let emit_frame fd =
- ` .word {emit_label fd.fd_lbl} + 4\n`;
- ` .short {emit_int fd.fd_frame_size}\n`;
- ` .short {emit_int (List.length fd.fd_live_offset)}\n`;
- List.iter
- (fun n ->
- ` .short {emit_int n}\n`)
- fd.fd_live_offset;
- ` .align 2\n`
+ fd_live_offset = !live_offset;
+ fd_debuginfo = dbg } :: !frame_descriptors;
+ lbl
+
+let record_frame live dbg =
+ let lbl = record_frame_label live dbg in `{emit_label lbl}:`
+
+(* Record calls to the GC -- we've moved them out of the way *)
+
+type gc_call =
+ { gc_lbl: label; (* Entry label *)
+ gc_return_lbl: label; (* Where to branch after GC *)
+ gc_frame_lbl: label } (* Label of frame descriptor *)
+
+let call_gc_sites = ref ([] : gc_call list)
+
+let emit_call_gc gc =
+ `{emit_label gc.gc_lbl}: {emit_call "caml_call_gc"}\n`;
+ `{emit_label gc.gc_frame_lbl}: b {emit_label gc.gc_return_lbl}\n`
+
+(* Record calls to caml_ml_array_bound_error.
+ In debug mode, we maintain one call to caml_ml_array_bound_error
+ per bound check site. Otherwise, we can share a single call. *)
+
+type bound_error_call =
+ { bd_lbl: label; (* Entry label *)
+ bd_frame_lbl: label } (* Label of frame descriptor *)
+
+let bound_error_sites = ref ([] : bound_error_call list)
+
+let bound_error_label dbg =
+ if !Clflags.debug || !bound_error_sites = [] then begin
+ let lbl_bound_error = new_label() in
+ let lbl_frame = record_frame_label Reg.Set.empty dbg in
+ bound_error_sites :=
+ { bd_lbl = lbl_bound_error;
+ bd_frame_lbl = lbl_frame } :: !bound_error_sites;
+ lbl_bound_error
+ end else begin
+ let bd = List.hd !bound_error_sites in bd.bd_lbl
+ end
+
+let emit_call_bound_error bd =
+ `{emit_label bd.bd_lbl}: {emit_call "caml_ml_array_bound_error"}\n`;
+ `{emit_label bd.bd_frame_lbl}:\n`
+
+(* Negate a comparison *)
+
+let negate_integer_comparison = function
+ Isigned cmp -> Isigned(negate_comparison cmp)
+ | Iunsigned cmp -> Iunsigned(negate_comparison cmp)
(* Names of various instructions *)
@@ -124,22 +175,13 @@ let name_for_comparison = function
| Iunsigned Ceq -> "eq" | Iunsigned Cne -> "ne" | Iunsigned Cle -> "ls"
| Iunsigned Cge -> "cs" | Iunsigned Clt -> "cc" | Iunsigned Cgt -> "hi"
-let name_for_float_comparison cmp neg =
- match cmp with
- Ceq -> if neg then "ne" else "eq"
- | Cne -> if neg then "eq" else "ne"
- | Cle -> if neg then "hi" else "ls"
- | Cge -> if neg then "lt" else "ge"
- | Clt -> if neg then "pl" else "mi"
- | Cgt -> if neg then "le" else "gt"
-
let name_for_int_operation = function
Iadd -> "add"
| Isub -> "sub"
| Imul -> "mul"
- | Iand -> "and"
- | Ior -> "orr"
- | Ixor -> "eor"
+ | Iand -> "and"
+ | Ior -> "orr"
+ | Ixor -> "eor"
| _ -> assert false
let name_for_shift_operation = function
@@ -148,193 +190,306 @@ let name_for_shift_operation = function
| Iasr -> "asr"
| _ -> assert false
-let name_for_shift_int_operation = function
- Ishiftadd -> "add"
- | Ishiftsub -> "sub"
- | Ishiftsubrev -> "rsb"
-
-(* Recognize immediate operands *)
-
-(* Immediate operands are 8-bit immediate values, zero-extended, and rotated
- right by 0, 2, 4, ... 30 bits.
- We check only with 8-bit values shifted left 0 to 24 bits. *)
-
-let rec is_immed n shift =
- shift <= 24 &&
- (Nativeint.logand n (Nativeint.shift_left (Nativeint.of_int 0xFF) shift) = n
- || is_immed n (shift + 2))
-
-let is_immediate n = is_immed n 0
-
(* General functional to decompose a non-immediate integer constant
- into 8-bit chunks shifted left 0 ... 24 bits *)
+ into 8-bit chunks shifted left 0 ... 30 bits. *)
let decompose_intconst n fn =
let i = ref n in
let shift = ref 0 in
let ninstr = ref 0 in
- while !i <> 0n do
- if Nativeint.to_int (Nativeint.shift_right !i !shift) land 3 = 0 then
+ while !i <> 0l do
+ if Int32.logand (Int32.shift_right !i !shift) 3l = 0l then
shift := !shift + 2
else begin
- let mask = Nativeint.shift_left 0xFFn !shift in
- let bits = Nativeint.logand !i mask in
- fn bits;
+ let bits = Int32.logand !i (Int32.shift_left 0xffl !shift) in
+ i := Int32.sub !i bits;
shift := !shift + 8;
- i := Nativeint.sub !i bits;
- incr ninstr
+ incr ninstr;
+ fn bits
end
done;
!ninstr
(* Load an integer constant into a register *)
-let emit_intconst r n =
- let nr = Nativeint.lognot n in
+let emit_intconst dst n =
+ let nr = Int32.lognot n in
if is_immediate n then begin
- ` mov {emit_reg r}, #{emit_nativeint n}\n`; 1
+ (* Use movs here to enable 16-bit T1 encoding *)
+ ` movs {emit_reg dst}, #{emit_int32 n}\n`; 1
end else if is_immediate nr then begin
- ` mvn {emit_reg r}, #{emit_nativeint nr}\n`; 1
+ ` mvn {emit_reg dst}, #{emit_int32 nr}\n`; 1
+ end else if !arch > ARMv6 then begin
+ let nl = Int32.logand 0xffffl n in
+ let nh = Int32.logand 0xffffl (Int32.shift_right_logical n 16) in
+ if nh = 0l then begin
+ ` movw {emit_reg dst}, #{emit_int32 nl}\n`; 1
+ end else if Int32.logand nl 0xffl = nl then begin
+ ` movs {emit_reg dst}, #{emit_int32 nl}\n`;
+ ` movt {emit_reg dst}, #{emit_int32 nh}\n`; 2
+ end else begin
+ ` movw {emit_reg dst}, #{emit_int32 nl}\n`;
+ ` movt {emit_reg dst}, #{emit_int32 nh}\n`; 2
+ end
end else begin
let first = ref true in
decompose_intconst n
(fun bits ->
if !first
- then ` mov {emit_reg r}, #{emit_nativeint bits} @ {emit_nativeint n}\n`
- else ` add {emit_reg r}, {emit_reg r}, #{emit_nativeint bits}\n`;
+ then ` mov {emit_reg dst}, #{emit_int32 bits} @ {emit_int32 n}\n`
+ else ` add {emit_reg dst}, {emit_reg dst}, #{emit_int32 bits}\n`;
first := false)
end
(* Adjust sp (up or down) by the given byte amount *)
-let emit_stack_adjustment instr n =
- if n <= 0 then 0 else
- decompose_intconst (Nativeint.of_int n)
- (fun bits ->
- ` {emit_string instr} sp, sp, #{emit_nativeint bits}\n`)
+let emit_stack_adjustment n =
+ if n = 0 then 0 else begin
+ let instr = if n < 0 then "sub" else "add" in
+ let ninstr = decompose_intconst (Int32.of_int (abs n))
+ (fun bits ->
+ ` {emit_string instr} sp, sp, #{emit_int32 bits}\n`) in
+ cfi_adjust_cfa_offset (-n);
+ ninstr
+ end
+
+(* Deallocate the stack frame before a return or tail call *)
+
+let output_epilogue f =
+ let n = frame_size() in
+ if n > 0 then begin
+ let ninstr = emit_stack_adjustment n in
+ let ninstr = ninstr + f () in
+ (* reset CFA back cause function body may continue *)
+ cfi_adjust_cfa_offset n;
+ ninstr
+ end else
+ f ()
(* Name of current function *)
let function_name = ref ""
(* Entry point for tail recursive calls *)
let tailrec_entry_point = ref 0
-(* Table of symbols referenced *)
-let symbol_constants = (Hashtbl.create 11 : (string, int) Hashtbl.t)
-(* Table of floating-point literals *)
-let float_constants = (Hashtbl.create 11 : (string, int) Hashtbl.t)
-(* Total space (in word) occupied by pending literals *)
+(* Pending floating-point literals *)
+let float_literals = ref ([] : (string * label) list)
+(* Pending relative references to the global offset table *)
+let gotrel_literals = ref ([] : (label * label) list)
+(* Pending symbol literals *)
+let symbol_literals = ref ([] : (string * label) list)
+(* Total space (in words) occupied by pending literals *)
let num_literals = ref 0
-(* Label a symbol or float constant *)
-let label_constant tbl s size =
+(* Label a floating-point literal *)
+let float_literal f =
try
- Hashtbl.find tbl s
+ List.assoc f !float_literals
with Not_found ->
let lbl = new_label() in
- Hashtbl.add tbl s lbl;
- num_literals := !num_literals + size;
+ num_literals := !num_literals + 2;
+ float_literals := (f, lbl) :: !float_literals;
lbl
-(* Emit all pending constants *)
-
-let emit_constants () =
- Hashtbl.iter
- (fun s lbl ->
- `{emit_label lbl}: .word {emit_symbol s}\n`)
- symbol_constants;
- Hashtbl.iter
- (fun s lbl ->
- `{emit_label lbl}: .double {emit_string s}\n`)
- float_constants;
- Hashtbl.clear symbol_constants;
- Hashtbl.clear float_constants;
+(* Label a GOTREL literal *)
+let gotrel_literal l =
+ let lbl = new_label() in
+ num_literals := !num_literals + 1;
+ gotrel_literals := (l, lbl) :: !gotrel_literals;
+ lbl
+
+(* Label a symbol literal *)
+let symbol_literal s =
+ try
+ List.assoc s !symbol_literals
+ with Not_found ->
+ let lbl = new_label() in
+ num_literals := !num_literals + 1;
+ symbol_literals := (s, lbl) :: !symbol_literals;
+ lbl
+
+(* Emit all pending literals *)
+let emit_literals() =
+ if !float_literals <> [] then begin
+ ` .align 3\n`;
+ List.iter
+ (fun (f, lbl) ->
+ `{emit_label lbl}: .double {emit_string f}\n`)
+ !float_literals;
+ float_literals := []
+ end;
+ if !symbol_literals <> [] then begin
+ let offset = if !thumb then 4 else 8 in
+ let suffix = if !pic_code then "(GOT)" else "" in
+ ` .align 2\n`;
+ List.iter
+ (fun (l, lbl) ->
+ `{emit_label lbl}: .word _GLOBAL_OFFSET_TABLE_-({emit_label l}+{emit_int offset})\n`)
+ !gotrel_literals;
+ List.iter
+ (fun (s, lbl) ->
+ `{emit_label lbl}: .word {emit_symbol s}{emit_string suffix}\n`)
+ !symbol_literals;
+ gotrel_literals := [];
+ symbol_literals := []
+ end;
num_literals := 0
+(* Emit code to load the address of a symbol *)
+
+let emit_load_symbol_addr dst s =
+ if !pic_code then begin
+ let lbl_pic = new_label() in
+ let lbl_got = gotrel_literal lbl_pic in
+ let lbl_sym = symbol_literal s in
+ (* Both r3 and r12 are marked as clobbered in PIC mode (cf. proc.ml),
+ so use r12 as temporary scratch register unless the destination is
+ r12, then we use r3 instead. *)
+ let tmp = if dst.loc = Reg 8 (*r12*)
+ then phys_reg 3 (*r3*)
+ else phys_reg 8 (*r12*) in
+ ` ldr {emit_reg tmp}, {emit_label lbl_got}\n`;
+ ` ldr {emit_reg dst}, {emit_label lbl_sym}\n`;
+ `{emit_label lbl_pic}: add {emit_reg tmp}, pc, {emit_reg tmp}\n`;
+ ` ldr {emit_reg dst}, [{emit_reg tmp}, {emit_reg dst}] @ {emit_symbol s}\n`;
+ 4
+ end else if !arch > ARMv6 && not !Clflags.dlcode && !fastcode_flag then begin
+ ` movw {emit_reg dst}, #:lower16:{emit_symbol s}\n`;
+ ` movt {emit_reg dst}, #:upper16:{emit_symbol s}\n`;
+ 2
+ end else begin
+ let lbl = symbol_literal s in
+ ` ldr {emit_reg dst}, {emit_label lbl} @ {emit_symbol s}\n`;
+ 1
+ end
+
(* Output the assembly code for an instruction *)
let emit_instr i =
+ emit_debug_info i.dbg;
match i.desc with
Lend -> 0
| Lop(Imove | Ispill | Ireload) ->
let src = i.arg.(0) and dst = i.res.(0) in
if src.loc = dst.loc then 0 else begin
- match (src, dst) with
- {loc = Reg rs; typ = Int|Addr}, {loc = Reg rd; typ = Int|Addr} ->
- ` mov {emit_reg dst}, {emit_reg src}\n`; 1
- | {loc = Reg rs; typ = Int|Addr}, {loc = Stack sd} ->
- ` str {emit_reg src}, {emit_stack dst}\n`; 1
- | {loc = Stack ss; typ = Int|Addr}, {loc = Reg rd} ->
- ` ldr {emit_reg dst}, {emit_stack src}\n`; 1
+ begin match (src, dst) with
+ {loc = Reg _; typ = Float}, {loc = Reg _} ->
+ ` fcpyd {emit_reg dst}, {emit_reg src}\n`
+ | {loc = Reg _}, {loc = Reg _} ->
+ ` mov {emit_reg dst}, {emit_reg src}\n`
+ | {loc = Reg _; typ = Float}, _ ->
+ ` fstd {emit_reg src}, {emit_stack dst}\n`
+ | {loc = Reg _}, _ ->
+ ` str {emit_reg src}, {emit_stack dst}\n`
+ | {typ = Float}, _ ->
+ ` fldd {emit_reg dst}, {emit_stack src}\n`
| _ ->
- assert false
+ ` ldr {emit_reg dst}, {emit_stack src}\n`
+ end; 1
end
| Lop(Iconst_int n) ->
- emit_intconst i.res.(0) n
- | Lop(Iconst_float s) ->
- let bits = Int64.bits_of_float (float_of_string s) in
- let high_bits = Int64.to_nativeint (Int64.shift_right_logical bits 32)
- and low_bits = Int64.to_nativeint bits in
- if is_immediate low_bits && is_immediate high_bits then begin
- ` mov {emit_reg i.res.(0)}, #{emit_nativeint low_bits} @ {emit_string s}\n`;
- ` mov {emit_reg i.res.(1)}, #{emit_nativeint high_bits}\n`;
- 2
+ emit_intconst i.res.(0) (Nativeint.to_int32 n)
+ | Lop(Iconst_float f) when !fpu = Soft ->
+ ` @ {emit_string f}\n`;
+ let bits = Int64.bits_of_float (float_of_string f) in
+ let high_bits = Int64.to_int32 (Int64.shift_right_logical bits 32)
+ and low_bits = Int64.to_int32 bits in
+ if is_immediate low_bits || is_immediate high_bits then begin
+ let ninstr_low = emit_intconst i.res.(0) low_bits
+ and ninstr_high = emit_intconst i.res.(1) high_bits in
+ ninstr_low + ninstr_high
end else begin
- let lbl = label_constant float_constants s 2 in
- ` ldr {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string s}\n`;
+ let lbl = float_literal f in
+ ` ldr {emit_reg i.res.(0)}, {emit_label lbl}\n`;
` ldr {emit_reg i.res.(1)}, {emit_label lbl} + 4\n`;
2
end
+ | Lop(Iconst_float f) ->
+ let encode imm =
+ let sg = Int64.to_int (Int64.shift_right_logical imm 63) in
+ let ex = Int64.to_int (Int64.shift_right_logical imm 52) in
+ let ex = (ex land 0x7ff) - 1023 in
+ let mn = Int64.logand imm 0xfffffffffffffL in
+ if Int64.logand mn 0xffffffffffffL <> 0L || ex < -3 || ex > 4
+ then
+ None
+ else begin
+ let mn = Int64.to_int (Int64.shift_right_logical mn 48) in
+ if mn land 0x0f <> mn then
+ None
+ else
+ let ex = ((ex + 3) land 0x07) lxor 0x04 in
+ Some((sg lsl 7) lor (ex lsl 4) lor mn)
+ end in
+ begin match encode (Int64.bits_of_float (float_of_string f)) with
+ None ->
+ let lbl = float_literal f in
+ ` fldd {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_string f}\n`
+ | Some imm8 ->
+ ` fconstd {emit_reg i.res.(0)}, #{emit_int imm8} @ {emit_string f}\n`
+ end; 1
| Lop(Iconst_symbol s) ->
- let lbl = label_constant symbol_constants s 1 in
- ` ldr {emit_reg i.res.(0)}, {emit_label lbl} @ {emit_symbol s}\n`; 1
+ emit_load_symbol_addr i.res.(0) s
| Lop(Icall_ind) ->
- ` mov lr, pc\n`;
- `{record_frame i.live} bx {emit_reg i.arg.(0)}\n`; 2
+ if !arch >= ARMv5 then begin
+ ` blx {emit_reg i.arg.(0)}\n`;
+ `{record_frame i.live i.dbg}\n`; 1
+ end else begin
+ ` mov lr, pc\n`;
+ ` bx {emit_reg i.arg.(0)}\n`;
+ `{record_frame i.live i.dbg}\n`; 2
+ end
| Lop(Icall_imm s) ->
- `{record_frame i.live} bl {emit_symbol s}\n`; 1
+ ` {emit_call s}\n`;
+ `{record_frame i.live i.dbg}\n`; 1
| Lop(Itailcall_ind) ->
- let n = frame_size() in
- if !contains_calls then
- ` ldr lr, [sp, #{emit_int (n-4)}]\n`;
- let ninstr = emit_stack_adjustment "add" n in
- ` bx {emit_reg i.arg.(0)}\n`;
- 2 + ninstr
+ output_epilogue begin fun () ->
+ if !contains_calls then
+ ` ldr lr, [sp, #{emit_int (-4)}]\n`;
+ ` bx {emit_reg i.arg.(0)}\n`; 2
+ end
| Lop(Itailcall_imm s) ->
if s = !function_name then begin
` b {emit_label !tailrec_entry_point}\n`; 1
end else begin
- let n = frame_size() in
- if !contains_calls then
- ` ldr lr, [sp, #{emit_int (n-4)}]\n`;
- let ninstr = emit_stack_adjustment "add" n in
- ` b {emit_symbol s}\n`;
- 2 + ninstr
- end
- | Lop(Iextcall(s, alloc)) ->
- if alloc then begin
- let lbl = label_constant symbol_constants s 1 in
- ` ldr r12, {emit_label lbl} @ {emit_symbol s}\n`;
- `{record_frame i.live} bl caml_c_call\n`; 2
- end else begin
- ` bl {emit_symbol s}\n`; 1
+ output_epilogue begin fun () ->
+ if !contains_calls then
+ ` ldr lr, [sp, #{emit_int (-4)}]\n`;
+ ` {emit_jump s}\n`; 2
+ end
end
+ | Lop(Iextcall(s, false)) ->
+ ` {emit_call s}\n`; 1
+ | Lop(Iextcall(s, true)) ->
+ let ninstr = emit_load_symbol_addr (phys_reg 7 (* r7 *)) s in
+ ` {emit_call "caml_c_call"}\n`;
+ `{record_frame i.live i.dbg}\n`;
+ 1 + ninstr
| Lop(Istackoffset n) ->
assert (n mod 8 = 0);
- let ninstr =
- if n >= 0
- then emit_stack_adjustment "sub" n
- else emit_stack_adjustment "add" (-n) in
+ let ninstr = emit_stack_adjustment (-n) in
stack_offset := !stack_offset + n;
ninstr
- | Lop(Iload((Double | Double_u), addr)) ->
- let addr' = offset_addressing addr 4 in
- if i.res.(0).loc <> i.arg.(0).loc then begin
- ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`;
- ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`
- end else begin
- ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`;
- ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`
- end;
- 2
+ | Lop(Iload(Single, addr)) when !fpu >= VFPv3_D16 ->
+ ` flds s14, {emit_addressing addr i.arg 0}\n`;
+ ` fcvtds {emit_reg i.res.(0)}, s14\n`; 2
+ | Lop(Iload((Double | Double_u), addr)) when !fpu = Soft ->
+ (* Use LDM or LDRD if possible *)
+ begin match i.res.(0), i.res.(1), addr with
+ {loc = Reg rt}, {loc = Reg rt2}, Iindexed 0
+ when rt < rt2 ->
+ ` ldm {emit_reg i.arg.(0)}, \{{emit_reg i.res.(0)}, {emit_reg i.res.(1)}}\n`; 1
+ | {loc = Reg rt}, {loc = Reg rt2}, addr
+ when !arch >= ARMv5TE && rt mod 2 == 0 && rt2 = rt + 1 ->
+ ` ldrd {emit_reg i.res.(0)}, {emit_reg i.res.(1)}, {emit_addressing addr i.arg 0}\n`; 1
+ | _ ->
+ let addr' = offset_addressing addr 4 in
+ if i.res.(0).loc <> i.arg.(0).loc then begin
+ ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`;
+ ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`
+ end else begin
+ ` ldr {emit_reg i.res.(1)}, {emit_addressing addr' i.arg 0}\n`;
+ ` ldr {emit_reg i.res.(0)}, {emit_addressing addr i.arg 0}\n`
+ end; 2
+ end
| Lop(Iload(size, addr)) ->
let r = i.res.(0) in
let instr =
@@ -343,65 +498,114 @@ let emit_instr i =
| Byte_signed -> "ldrsb"
| Sixteen_unsigned -> "ldrh"
| Sixteen_signed -> "ldrsh"
+ | Double
+ | Double_u -> "fldd"
| _ (* 32-bit quantities *) -> "ldr" in
- ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 0}\n`;
- 1
- | Lop(Istore((Double | Double_u), addr)) ->
- let addr' = offset_addressing addr 4 in
- ` str {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 2}\n`;
- ` str {emit_reg i.arg.(1)}, {emit_addressing addr' i.arg 2}\n`;
- 2
+ ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 0}\n`; 1
+ | Lop(Istore(Single, addr)) when !fpu >= VFPv3_D16 ->
+ ` fcvtsd s14, {emit_reg i.arg.(0)}\n`;
+ ` fsts s14, {emit_addressing addr i.arg 1}\n`; 2
+ | Lop(Istore((Double | Double_u), addr)) when !fpu = Soft ->
+ (* Use STM or STRD if possible *)
+ begin match i.arg.(0), i.arg.(1), addr with
+ {loc = Reg rt}, {loc = Reg rt2}, Iindexed 0
+ when rt < rt2 ->
+ ` stm {emit_reg i.arg.(2)}, \{{emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}}\n`; 1
+ | {loc = Reg rt}, {loc = Reg rt2}, addr
+ when !arch >= ARMv5TE && rt mod 2 == 0 && rt2 = rt + 1 ->
+ ` strd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_addressing addr i.arg 2}\n`; 1
+ | _ ->
+ let addr' = offset_addressing addr 4 in
+ ` str {emit_reg i.arg.(0)}, {emit_addressing addr i.arg 2}\n`;
+ ` str {emit_reg i.arg.(1)}, {emit_addressing addr' i.arg 2}\n`; 2
+ end
| Lop(Istore(size, addr)) ->
let r = i.arg.(0) in
let instr =
match size with
- Byte_unsigned | Byte_signed -> "strb"
- | Sixteen_unsigned | Sixteen_signed -> "strh"
+ Byte_unsigned
+ | Byte_signed -> "strb"
+ | Sixteen_unsigned
+ | Sixteen_signed -> "strh"
+ | Double
+ | Double_u -> "fstd"
| _ (* 32-bit quantities *) -> "str" in
- ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 1}\n`;
- 1
+ ` {emit_string instr} {emit_reg r}, {emit_addressing addr i.arg 1}\n`; 1
| Lop(Ialloc n) ->
+ let lbl_frame = record_frame_label i.live i.dbg in
if !fastcode_flag then begin
- let ni = emit_intconst (phys_reg 8 (*r12*)) (Nativeint.of_int n) in
- ` sub alloc_ptr, alloc_ptr, r12\n`;
+ let lbl_redo = new_label() in
+ `{emit_label lbl_redo}:`;
+ let ninstr = decompose_intconst
+ (Int32.of_int n)
+ (fun i ->
+ ` sub alloc_ptr, alloc_ptr, #{emit_int32 i}\n`) in
` cmp alloc_ptr, alloc_limit\n`;
- `{record_frame i.live} blcc caml_call_gc\n`;
` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`;
- 4 + ni
- end else if n = 8 || n = 12 || n = 16 then begin
- `{record_frame i.live} bl caml_alloc{emit_int ((n-4)/4)}\n`;
- ` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`; 2
+ let lbl_call_gc = new_label() in
+ ` bcc {emit_label lbl_call_gc}\n`;
+ call_gc_sites :=
+ { gc_lbl = lbl_call_gc;
+ gc_return_lbl = lbl_redo;
+ gc_frame_lbl = lbl_frame } :: !call_gc_sites;
+ 3 + ninstr
end else begin
- let ni = emit_intconst (phys_reg 8 (*r12*)) (Nativeint.of_int n) in
- `{record_frame i.live} bl caml_allocN\n`;
- ` add {emit_reg i.res.(0)}, alloc_ptr, #4\n`;
- 2 + ni
+ let ninstr =
+ begin match n with
+ 8 -> ` {emit_call "caml_alloc1"}\n`; 1
+ | 12 -> ` {emit_call "caml_alloc2"}\n`; 1
+ | 16 -> ` {emit_call "caml_alloc3"}\n`; 1
+ | _ -> let ninstr = emit_intconst (phys_reg 7) (Int32.of_int n) in
+ ` {emit_call "caml_allocN"}\n`; 1 + ninstr
+ end in
+ `{emit_label lbl_frame}: add {emit_reg i.res.(0)}, alloc_ptr, #4\n`;
+ 1 + ninstr
end
| Lop(Iintop(Ilsl | Ilsr | Iasr as op)) ->
let shift = name_for_shift_operation op in
` mov {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_string shift} {emit_reg i.arg.(1)}\n`; 1
| Lop(Iintop(Icomp cmp)) ->
- let comp = name_for_comparison cmp in
+ let compthen = name_for_comparison cmp in
+ let compelse = name_for_comparison (negate_integer_comparison cmp) in
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
- ` mov {emit_reg i.res.(0)}, #0\n`;
- ` mov{emit_string comp} {emit_reg i.res.(0)}, #1\n`; 3
- | Lop(Iintop(Icheckbound)) ->
+ ` ite {emit_string compthen}\n`;
+ ` mov{emit_string compthen} {emit_reg i.res.(0)}, #1\n`;
+ ` mov{emit_string compelse} {emit_reg i.res.(0)}, #0\n`; 4
+ | Lop(Iintop_imm(Icomp cmp, n)) ->
+ let compthen = name_for_comparison cmp in
+ let compelse = name_for_comparison (negate_integer_comparison cmp) in
+ ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
+ ` ite {emit_string compthen}\n`;
+ ` mov{emit_string compthen} {emit_reg i.res.(0)}, #1\n`;
+ ` mov{emit_string compelse} {emit_reg i.res.(0)}, #0\n`; 4
+ | Lop(Iintop Icheckbound) ->
+ let lbl = bound_error_label i.dbg in
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
- ` blls caml_ml_array_bound_error\n`; 2
+ ` bls {emit_label lbl}\n`; 2
+ | Lop(Iintop_imm(Icheckbound, n)) ->
+ let lbl = bound_error_label i.dbg in
+ ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
+ ` bls {emit_label lbl}\n`; 2
+ | Lop(Ispecific(Ishiftcheckbound shift)) ->
+ let lbl = bound_error_label i.dbg in
+ ` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`;
+ ` bcs {emit_label lbl}\n`; 2
| Lop(Iintop op) ->
let instr = name_for_int_operation op in
- ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; 1
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; 1
| Lop(Iintop_imm(Idiv, n)) -> (* n is a power of 2 *)
let l = Misc.log2 n in
let r = i.res.(0) in
` movs {emit_reg r}, {emit_reg i.arg.(0)}\n`;
- if n <= 256 then
+ if n <= 256 then begin
+ ` it lt\n`;
` addlt {emit_reg r}, {emit_reg r}, #{emit_int (n-1)}\n`
- else begin
+ end else begin
+ ` itt lt\n`;
` addlt {emit_reg r}, {emit_reg r}, #{emit_int n}\n`;
` sublt {emit_reg r}, {emit_reg r}, #1\n`
end;
- ` mov {emit_reg r}, {emit_reg r}, asr #{emit_int l}\n`; 4
+ ` mov {emit_reg r}, {emit_reg r}, asr #{emit_int l}\n`; 5
| Lop(Iintop_imm(Imod, n)) -> (* n is a power of 2 *)
let l = Misc.log2 n in
let a = i.arg.(0) in
@@ -412,47 +616,78 @@ let emit_instr i =
` mov {emit_reg r}, {emit_reg r}, lsr #{emit_int (32-l)}\n`;
` bpl {emit_label lbl}\n`;
` cmp {emit_reg r}, #0\n`;
+ ` it ne\n`;
` subne {emit_reg r}, {emit_reg r}, #{emit_int n}\n`;
- `{emit_label lbl}:\n`; 6
+ `{emit_label lbl}:\n`; 7
| Lop(Iintop_imm((Ilsl | Ilsr | Iasr as op), n)) ->
let shift = name_for_shift_operation op in
` mov {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_string shift} #{emit_int n}\n`; 1
- | Lop(Iintop_imm(Icomp cmp, n)) ->
- let comp = name_for_comparison cmp in
- ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
- ` mov {emit_reg i.res.(0)}, #0\n`;
- ` mov{emit_string comp} {emit_reg i.res.(0)}, #1\n`; 3
- | Lop(Iintop_imm(Icheckbound, n)) ->
- ` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
- ` blls caml_ml_array_bound_error\n`; 2
| Lop(Iintop_imm(op, n)) ->
let instr = name_for_int_operation op in
- ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1
- | Lop(Inegf) -> (* argument and result in (r0, r1) *)
- ` eor r1, r1, #0x80000000\n`; 1
- | Lop(Iabsf) -> (* argument and result in (r0, r1) *)
- ` bic r1, r1, #0x80000000\n`; 1
- | Lop(Ifloatofint | Iintoffloat | Iaddf | Isubf | Imulf | Idivf) ->
- assert false
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1
+ | Lop(Iabsf | Inegf as op) when !fpu = Soft ->
+ let instr = (match op with
+ Iabsf -> "bic"
+ | Inegf -> "eor"
+ | _ -> assert false) in
+ ` {emit_string instr} {emit_reg i.res.(1)}, {emit_reg i.arg.(1)}, #0x80000000\n`; 1
+ | Lop(Iabsf | Inegf | Ispecific Isqrtf as op) ->
+ let instr = (match op with
+ Iabsf -> "fabsd"
+ | Inegf -> "fnegd"
+ | Ispecific Isqrtf -> "fsqrtd"
+ | _ -> assert false) in
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`; 1
+ | Lop(Ifloatofint) ->
+ ` fmsr s14, {emit_reg i.arg.(0)}\n`;
+ ` fsitod {emit_reg i.res.(0)}, s14\n`; 2
+ | Lop(Iintoffloat) ->
+ ` ftosizd s14, {emit_reg i.arg.(0)}\n`;
+ ` fmrs {emit_reg i.res.(0)}, s14\n`; 2
+ | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific Inegmulf as op) ->
+ let instr = (match op with
+ Iaddf -> "faddd"
+ | Isubf -> "fsubd"
+ | Imulf -> "fmuld"
+ | Idivf -> "fdivd"
+ | Ispecific Inegmulf -> "fnmuld"
+ | _ -> assert false) in
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+ 1
+ | Lop(Ispecific(Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf as op)) ->
+ let instr = (match op with
+ Imuladdf -> "fmacd"
+ | Inegmuladdf -> "fnmacd"
+ | Imulsubf -> "fmscd"
+ | Inegmulsubf -> "fnmscd"
+ | _ -> assert false) in
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`;
+ 1
| Lop(Ispecific(Ishiftarith(op, shift))) ->
- let instr = name_for_shift_int_operation op in
- ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}`;
+ let instr = (match op with
+ Ishiftadd -> "add"
+ | Ishiftsub -> "sub"
+ | Ishiftsubrev -> "rsb") in
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}`;
if shift >= 0
then `, lsl #{emit_int shift}\n`
else `, asr #{emit_int (-shift)}\n`;
1
- | Lop(Ispecific(Ishiftcheckbound shift)) ->
- ` cmp {emit_reg i.arg.(1)}, {emit_reg i.arg.(0)}, lsr #{emit_int shift}\n`;
- ` blcs caml_ml_array_bound_error\n`; 2
| Lop(Ispecific(Irevsubimm n)) ->
` rsb {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n`; 1
+ | Lop(Ispecific(Imuladd | Imulsub as op)) ->
+ let instr = (match op with
+ Imuladd -> "mla"
+ | Imulsub -> "mls"
+ | _ -> assert false) in
+ ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n`; 1
| Lreloadretaddr ->
let n = frame_size() in
` ldr lr, [sp, #{emit_int(n-4)}]\n`; 1
| Lreturn ->
- let ninstr = emit_stack_adjustment "add" (frame_size()) in
- ` bx lr\n`;
- ninstr + 1
+ output_epilogue begin fun () ->
+ ` bx lr\n`; 1
+ end
| Llabel lbl ->
`{emit_label lbl}:\n`; 0
| Lbranch lbl ->
@@ -461,29 +696,41 @@ let emit_instr i =
begin match tst with
Itruetest ->
` cmp {emit_reg i.arg.(0)}, #0\n`;
- ` bne {emit_label lbl}\n`
+ ` bne {emit_label lbl}\n`; 2
| Ifalsetest ->
` cmp {emit_reg i.arg.(0)}, #0\n`;
- ` beq {emit_label lbl}\n`
+ ` beq {emit_label lbl}\n`; 2
| Iinttest cmp ->
` cmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
let comp = name_for_comparison cmp in
- ` b{emit_string comp} {emit_label lbl}\n`
+ ` b{emit_string comp} {emit_label lbl}\n`; 2
| Iinttest_imm(cmp, n) ->
` cmp {emit_reg i.arg.(0)}, #{emit_int n}\n`;
let comp = name_for_comparison cmp in
- ` b{emit_string comp} {emit_label lbl}\n`
+ ` b{emit_string comp} {emit_label lbl}\n`; 2
| Ifloattest(cmp, neg) ->
- assert false
+ let comp = (match (cmp, neg) with
+ (Ceq, false) | (Cne, true) -> "eq"
+ | (Cne, false) | (Ceq, true) -> "ne"
+ | (Clt, false) -> "cc"
+ | (Clt, true) -> "cs"
+ | (Cle, false) -> "ls"
+ | (Cle, true) -> "hi"
+ | (Cgt, false) -> "gt"
+ | (Cgt, true) -> "le"
+ | (Cge, false) -> "ge"
+ | (Cge, true) -> "lt") in
+ ` fcmpd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`;
+ ` fmstat\n`;
+ ` b{emit_string comp} {emit_label lbl}\n`; 3
| Ioddtest ->
` tst {emit_reg i.arg.(0)}, #1\n`;
- ` bne {emit_label lbl}\n`
+ ` bne {emit_label lbl}\n`; 2
| Ieventest ->
` tst {emit_reg i.arg.(0)}, #1\n`;
- ` beq {emit_label lbl}\n`
- end;
- 2
- | Lcondbranch3(lbl0, lbl1, lbl2) ->
+ ` beq {emit_label lbl}\n`; 2
+ end
+ | Lcondbranch3(lbl0, lbl1, lbl2) ->
` cmp {emit_reg i.arg.(0)}, #1\n`;
begin match lbl0 with
None -> ()
@@ -498,108 +745,151 @@ let emit_instr i =
| Some lbl -> ` bgt {emit_label lbl}\n`
end;
4
- | Lswitch jumptbl ->
- ` ldr pc, [pc, {emit_reg i.arg.(0)}, lsl #2]\n`;
- ` mov r0, r0\n`; (* nop *)
- for i = 0 to Array.length jumptbl - 1 do
- ` .word {emit_label jumptbl.(i)}\n`
- done;
+ | Lswitch jumptbl ->
+ if !arch > ARMv6 && !thumb then begin
+ (* The Thumb-2 TBH instruction supports only forward branches,
+ so we need to generate appropriate trampolines for all labels
+ that appear before this switch instruction (PR#5623) *)
+ let tramtbl = Array.copy jumptbl in
+ ` tbh [pc, {emit_reg i.arg.(0)}, lsl #1]\n`;
+ for j = 0 to Array.length tramtbl - 1 do
+ let rec label i =
+ match i.desc with
+ Lend -> new_label()
+ | Llabel lbl when lbl = tramtbl.(j) -> lbl
+ | _ -> label i.next in
+ tramtbl.(j) <- label i.next;
+ ` .short ({emit_label tramtbl.(j)}-.)/2+{emit_int j}\n`
+ done;
+ (* Generate the necessary trampolines *)
+ for j = 0 to Array.length tramtbl - 1 do
+ if tramtbl.(j) <> jumptbl.(j) then
+ `{emit_label tramtbl.(j)}: b {emit_label jumptbl.(j)}\n`
+ done
+ end else if not !pic_code then begin
+ ` ldr pc, [pc, {emit_reg i.arg.(0)}, lsl #2]\n`;
+ ` nop\n`;
+ for j = 0 to Array.length jumptbl - 1 do
+ ` .word {emit_label jumptbl.(j)}\n`
+ done
+ end else begin
+ (* Slightly slower, but position-independent *)
+ ` add pc, pc, {emit_reg i.arg.(0)}, lsl #2\n`;
+ ` nop\n`;
+ for j = 0 to Array.length jumptbl - 1 do
+ ` b {emit_label jumptbl.(j)}\n`
+ done
+ end;
2 + Array.length jumptbl
| Lsetuptrap lbl ->
` bl {emit_label lbl}\n`; 1
| Lpushtrap ->
stack_offset := !stack_offset + 8;
- ` stmfd sp!, \{trap_ptr, lr}\n`;
+ ` push \{trap_ptr, lr}\n`;
+ cfi_adjust_cfa_offset 8;
` mov trap_ptr, sp\n`; 2
| Lpoptrap ->
- ` ldmfd sp!, \{trap_ptr, lr}\n`;
+ ` pop \{trap_ptr, lr}\n`;
+ cfi_adjust_cfa_offset (-8);
stack_offset := !stack_offset - 8; 1
| Lraise ->
- ` mov sp, trap_ptr\n`;
- ` ldmfd sp!, \{trap_ptr, pc}\n`; 2
+ if !Clflags.debug then begin
+ ` {emit_call "caml_raise_exn"}\n`;
+ `{record_frame Reg.Set.empty i.dbg}\n`; 1
+ end else begin
+ ` mov sp, trap_ptr\n`;
+ ` pop \{trap_ptr, pc}\n`; 2
+ end
(* Emission of an instruction sequence *)
-let no_fallthrough = function
- Lop(Itailcall_ind | Itailcall_imm _) -> true
- | Lreturn -> true
- | Lbranch _ -> true
- | Lswitch _ -> true
- | Lraise -> true
- | _ -> false
-
let rec emit_all ninstr i =
if i.desc = Lend then () else begin
let n = emit_instr i in
let ninstr' = ninstr + n in
- let limit = 511 - !num_literals in
- if ninstr' >= limit - 64 && no_fallthrough i.desc then begin
- emit_constants();
+ (* fldd can address up to +/-1KB, ldr can address up to +/-4KB *)
+ let limit = (if !fpu >= VFPv3_D16 && !float_literals <> []
+ then 127
+ else 511) in
+ let limit = limit - !num_literals in
+ if ninstr' >= limit - 64 && not(has_fallthrough i.desc) then begin
+ emit_literals();
emit_all 0 i.next
- end else
- if ninstr' >= limit then begin
+ end else if !num_literals != 0 && ninstr' >= limit then begin
let lbl = new_label() in
` b {emit_label lbl}\n`;
- emit_constants();
+ emit_literals();
`{emit_label lbl}:\n`;
emit_all 0 i.next
end else
emit_all ninstr' i.next
end
+(* Emission of the profiling prelude *)
+
+let emit_profile() =
+ match Config.system with
+ "linux_eabi" | "linux_eabihf" ->
+ ` push \{lr}\n`;
+ ` {emit_call "__gnu_mcount_nc"}\n`
+ | _ -> ()
+
(* Emission of a function declaration *)
let fundecl fundecl =
function_name := fundecl.fun_name;
fastcode_flag := fundecl.fun_fast;
tailrec_entry_point := new_label();
+ float_literals := [];
+ gotrel_literals := [];
+ symbol_literals := [];
stack_offset := 0;
- Hashtbl.clear symbol_constants;
- Hashtbl.clear float_constants;
+ call_gc_sites := [];
+ bound_error_sites := [];
` .text\n`;
` .align 2\n`;
- ` .global {emit_symbol fundecl.fun_name}\n`;
+ ` .globl {emit_symbol fundecl.fun_name}\n`;
+ if !arch > ARMv6 && !thumb then
+ ` .thumb\n`
+ else
+ ` .arm\n`;
` .type {emit_symbol fundecl.fun_name}, %function\n`;
`{emit_symbol fundecl.fun_name}:\n`;
+ emit_debug_info fundecl.fun_dbg;
+ cfi_startproc();
+ if !Clflags.gprofile then emit_profile();
let n = frame_size() in
- ignore(emit_stack_adjustment "sub" n);
- if !contains_calls then
- ` str lr, [sp, #{emit_int(n - 4)}]\n`;
+ if n > 0 then begin
+ ignore(emit_stack_adjustment (-n));
+ if !contains_calls then
+ ` str lr, [sp, #{emit_int(n - 4)}]\n`
+ end;
`{emit_label !tailrec_entry_point}:\n`;
emit_all 0 fundecl.fun_body;
- emit_constants()
+ emit_literals();
+ List.iter emit_call_gc !call_gc_sites;
+ List.iter emit_call_bound_error !bound_error_sites;
+ cfi_endproc();
+ ` .type {emit_symbol fundecl.fun_name}, %function\n`;
+ ` .size {emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n`
(* Emission of data *)
let emit_item = function
- Cglobal_symbol s ->
- ` .global {emit_symbol s}\n`;
- | Cdefine_symbol s ->
- `{emit_symbol s}:\n`
- | Cdefine_label lbl ->
- `{emit_data_label lbl}:\n`
- | Cint8 n ->
- ` .byte {emit_int n}\n`
- | Cint16 n ->
- ` .short {emit_int n}\n`
- | Cint32 n ->
- ` .word {emit_nativeint n}\n`
- | Cint n ->
- ` .word {emit_nativeint n}\n`
- | Csingle f ->
- emit_float32_directive ".long" f
- | Cdouble f ->
- emit_float64_split_directive ".long" f
- | Csymbol_address s ->
- ` .word {emit_symbol s}\n`
- | Clabel_address lbl ->
- ` .word {emit_data_label lbl}\n`
- | Cstring s ->
- emit_string_directive " .ascii " s
- | Cskip n ->
- if n > 0 then ` .space {emit_int n}\n`
- | Calign n ->
- ` .align {emit_int(Misc.log2 n)}\n`
+ Cglobal_symbol s -> ` .globl {emit_symbol s}\n`;
+ | Cdefine_symbol s -> `{emit_symbol s}:\n`
+ | Cdefine_label lbl -> `{emit_data_label lbl}:\n`
+ | Cint8 n -> ` .byte {emit_int n}\n`
+ | Cint16 n -> ` .short {emit_int n}\n`
+ | Cint32 n -> ` .long {emit_int32 (Nativeint.to_int32 n)}\n`
+ | Cint n -> ` .long {emit_int32 (Nativeint.to_int32 n)}\n`
+ | Csingle f -> ` .single {emit_string f}\n`
+ | Cdouble f -> ` .double {emit_string f}\n`
+ | Csymbol_address s -> ` .word {emit_symbol s}\n`
+ | Clabel_address lbl -> ` .word {emit_data_label lbl}\n`
+ | Cstring s -> emit_string_directive " .ascii " s
+ | Cskip n -> if n > 0 then ` .space {emit_int n}\n`
+ | Calign n -> ` .align {emit_int(Misc.log2 n)}\n`
let data l =
` .data\n`;
@@ -608,32 +898,63 @@ let data l =
(* Beginning / end of an assembly file *)
let begin_assembly() =
- `trap_ptr .req r11\n`;
- `alloc_ptr .req r8\n`;
- `alloc_limit .req r10\n`;
+ reset_debug_info();
+ ` .syntax unified\n`;
+ begin match !arch with
+ | ARMv4 -> ` .arch armv4t\n`
+ | ARMv5 -> ` .arch armv5t\n`
+ | ARMv5TE -> ` .arch armv5te\n`
+ | ARMv6 -> ` .arch armv6\n`
+ | ARMv6T2 -> ` .arch armv6t2\n`
+ | ARMv7 -> ` .arch armv7-a\n`
+ end;
+ begin match !fpu with
+ Soft -> ` .fpu softvfp\n`
+ | VFPv3_D16 -> ` .fpu vfpv3-d16\n`
+ | VFPv3 -> ` .fpu vfpv3\n`
+ end;
+ `trap_ptr .req r8\n`;
+ `alloc_ptr .req r10\n`;
+ `alloc_limit .req r11\n`;
let lbl_begin = Compilenv.make_symbol (Some "data_begin") in
` .data\n`;
- ` .global {emit_symbol lbl_begin}\n`;
+ ` .globl {emit_symbol lbl_begin}\n`;
`{emit_symbol lbl_begin}:\n`;
let lbl_begin = Compilenv.make_symbol (Some "code_begin") in
` .text\n`;
- ` .global {emit_symbol lbl_begin}\n`;
+ ` .globl {emit_symbol lbl_begin}\n`;
`{emit_symbol lbl_begin}:\n`
let end_assembly () =
let lbl_end = Compilenv.make_symbol (Some "code_end") in
` .text\n`;
- ` .global {emit_symbol lbl_end}\n`;
+ ` .globl {emit_symbol lbl_end}\n`;
`{emit_symbol lbl_end}:\n`;
let lbl_end = Compilenv.make_symbol (Some "data_end") in
` .data\n`;
- ` .global {emit_symbol lbl_end}\n`;
+ ` .globl {emit_symbol lbl_end}\n`;
`{emit_symbol lbl_end}:\n`;
- ` .word 0\n`;
+ ` .long 0\n`;
let lbl = Compilenv.make_symbol (Some "frametable") in
- ` .data\n`;
- ` .global {emit_symbol lbl}\n`;
+ ` .globl {emit_symbol lbl}\n`;
`{emit_symbol lbl}:\n`;
- ` .word {emit_int (List.length !frame_descriptors)}\n`;
- List.iter emit_frame !frame_descriptors;
- frame_descriptors := []
+ emit_frames
+ { efa_label = (fun lbl ->
+ ` .type {emit_label lbl}, %function\n`;
+ ` .word {emit_label lbl}\n`);
+ efa_16 = (fun n -> ` .short {emit_int n}\n`);
+ efa_32 = (fun n -> ` .long {emit_int32 n}\n`);
+ efa_word = (fun n -> ` .word {emit_int n}\n`);
+ efa_align = (fun n -> ` .align {emit_int(Misc.log2 n)}\n`);
+ efa_label_rel = (fun lbl ofs ->
+ ` .word {emit_label lbl} - . + {emit_int32 ofs}\n`);
+ efa_def_label = (fun lbl -> `{emit_label lbl}:\n`);
+ efa_string = (fun s -> emit_string_directive " .asciz " s) };
+ ` .type {emit_symbol lbl}, %object\n`;
+ ` .size {emit_symbol lbl}, .-{emit_symbol lbl}\n`;
+ begin match Config.system with
+ "linux_eabihf" | "linux_eabi" ->
+ (* Mark stack as non-executable *)
+ ` .section .note.GNU-stack,\"\",%progbits\n`
+ | _ -> ()
+ end