29 files changed, 535 insertions, 383 deletions
diff --git a/asmcomp/afl_instrument.ml b/asmcomp/afl_instrument.ml
index b8f2d9f7a5..9c21154e5d 100644
--- a/asmcomp/afl_instrument.ml
+++ b/asmcomp/afl_instrument.ml
@@ -103,7 +103,7 @@ let instrument_initialiser c dbg =
      calls *)
   with_afl_logging
     (Csequence
-       (Cop (Cextcall ("caml_setup_afl", typ_int, false, None),
+       (Cop (Cextcall ("caml_setup_afl", typ_int, [], false, None),
              [Cconst_int (0, dbg ())],
              dbg ()),
         c))
diff --git a/asmcomp/amd64/proc.ml b/asmcomp/amd64/proc.ml
index 61d55c7795..f82a7e41ac 100644
--- a/asmcomp/amd64/proc.ml
+++ b/asmcomp/amd64/proc.ml
@@ -165,7 +165,7 @@ let calling_conventions first_int last_int first_float last_float make_stack
   let float = ref first_float in
   let ofs = ref 0 in
   for i = 0 to Array.length arg - 1 do
-    match arg.(i).typ with
+    match arg.(i) with
     | Val | Int | Addr as ty ->
         if !int <= last_int then begin
           loc.(i) <- phys_reg !int;
@@ -234,7 +234,7 @@ let win64_loc_external_arguments arg =
   let reg = ref 0
   and ofs = ref 32 in
   for i = 0 to Array.length arg - 1 do
-    match arg.(i).typ with
+    match arg.(i) with
     | Val | Int | Addr as ty ->
         if !reg < 4 then begin
           loc.(i) <- phys_reg win64_int_external_arguments.(!reg);
@@ -254,15 +254,14 @@ let win64_loc_external_arguments arg =
   done;
   (loc, Misc.align !ofs 16)  (* keep stack 16-aligned *)
 
-let loc_external_arguments arg =
-  let arg =
-    Array.map (fun regs -> assert (Array.length regs = 1); regs.(0)) arg
-  in
-  let loc, alignment =
-    if win64 then win64_loc_external_arguments arg
+let loc_external_arguments ty_args =
+  let arg = Cmm.machtype_of_exttype_list ty_args in
+  let loc, stack_ofs =
+    if win64
+    then win64_loc_external_arguments arg
     else unix_loc_external_arguments arg
   in
-  Array.map (fun reg -> [|reg|]) loc, alignment
+  Array.map (fun reg -> [|reg|]) loc, stack_ofs
 
 let loc_exn_bucket = rax
 
diff --git a/asmcomp/amd64/selection.ml b/asmcomp/amd64/selection.ml
index 4ef5fec2e2..cbaeb72c78 100644
--- a/asmcomp/amd64/selection.ml
+++ b/asmcomp/amd64/selection.ml
@@ -135,7 +135,7 @@ method is_immediate_natint n = n <= 0x7FFFFFFFn && n >= -0x80000000n
 
 method! is_simple_expr e =
   match e with
-  | Cop(Cextcall (fn, _, _, _), args, _)
+  | Cop(Cextcall (fn, _, _, _, _), args, _)
     when List.mem fn inline_ops ->
       (* inlined ops are simple if their arguments are *)
       List.for_all self#is_simple_expr args
@@ -144,7 +144,7 @@ method! is_simple_expr e =
 
 method! effects_of e =
   match e with
-  | Cop(Cextcall(fn, _, _, _), args, _)
+  | Cop(Cextcall(fn, _, _, _, _), args, _)
     when List.mem fn inline_ops ->
       Selectgen.Effect_and_coeffect.join_list_map args self#effects_of
   | _ ->
@@ -197,7 +197,7 @@ method! select_operation op args dbg =
       self#select_floatarith true Imulf Ifloatmul args
   | Cdivf ->
       self#select_floatarith false Idivf Ifloatdiv args
-  | Cextcall("sqrt", _, false, _) ->
+  | Cextcall("sqrt", _, _, false, _) ->
      begin match args with
        [Cop(Cload {memory_chunk=(Double|Double_u as chunk)}, [loc], _dbg)] ->
          let (addr, arg) = self#select_addressing chunk loc in
@@ -217,12 +217,12 @@ method! select_operation op args dbg =
       | _ ->
           super#select_operation op args dbg
       end
-  | Cextcall("caml_bswap16_direct", _, _, _) ->
+  | Cextcall("caml_bswap16_direct", _, _, _, _) ->
       (Ispecific (Ibswap 16), args)
-  | Cextcall("caml_int32_direct_bswap", _, _, _) ->
+  | Cextcall("caml_int32_direct_bswap", _, _, _, _) ->
       (Ispecific (Ibswap 32), args)
-  | Cextcall("caml_int64_direct_bswap", _, _, _)
-  | Cextcall("caml_nativeint_direct_bswap", _, _, _) ->
+  | Cextcall("caml_int64_direct_bswap", _, _, _, _)
+  | Cextcall("caml_nativeint_direct_bswap", _, _, _, _) ->
       (Ispecific (Ibswap 64), args)
   (* AMD64 does not support immediate operands for multiply high signed *)
   | Cmulhi ->
diff --git a/asmcomp/arm/proc.ml b/asmcomp/arm/proc.ml
index 9ac9cf13a2..2c79f380f5 100644
--- a/asmcomp/arm/proc.ml
+++ b/asmcomp/arm/proc.ml
@@ -111,67 +111,58 @@ let loc_spacetime_node_hole = Reg.dummy  (* Spacetime unsupported *)
 
 (* Calling conventions *)
 
+let loc_int last_int make_stack int ofs =
+  if !int <= last_int then begin
+    let l = phys_reg !int in
+    incr int; l
+  end else begin
+    let l = stack_slot (make_stack !ofs) Int in
+    ofs := !ofs + size_int; l
+  end
+
+let loc_float last_float make_stack float ofs =
+  assert (abi = EABI_HF);
+  assert (!fpu >= VFPv2);
+  if !float <= last_float then begin
+    let l = phys_reg !float in
+    incr float; l
+  end else begin
+    ofs := Misc.align !ofs size_float;
+    let l = stack_slot (make_stack !ofs) Float in
+    ofs := !ofs + size_float; l
+  end
+
+let loc_int_pair last_int make_stack int ofs =
+  (* 64-bit quantities split across two registers must either be in a
+     consecutive pair of registers where the lowest numbered is an
+     even-numbered register; or in a stack slot that is 8-byte aligned. *)
+  int := Misc.align !int 2;
+  if !int <= last_int - 1 then begin
+    let reg_lower = phys_reg !int in
+    let reg_upper = phys_reg (1 + !int) in
+    int := !int + 2;
+    [| reg_lower; reg_upper |]
+  end else begin
+    let size_int64 = size_int * 2 in
+    ofs := Misc.align !ofs size_int64;
+    let stack_lower = stack_slot (make_stack !ofs) Int in
+    let stack_upper = stack_slot (make_stack (size_int + !ofs)) Int in
+    ofs := !ofs + size_int64;
+    [| stack_lower; stack_upper |]
+  end
+
 let calling_conventions first_int last_int first_float last_float make_stack
       arg =
-  let loc = Array.make (Array.length arg) [| Reg.dummy |] in
+  let loc = Array.make (Array.length arg) Reg.dummy in
   let int = ref first_int in
   let float = ref first_float in
   let ofs = ref 0 in
   for i = 0 to Array.length arg - 1 do
     match arg.(i) with
-    | [| arg |] ->
-      begin match arg.typ with
-      | Val | Int | Addr as ty ->
-          if !int <= last_int then begin
-            loc.(i) <- [| phys_reg !int |];
-            incr int
-          end else begin
-            loc.(i) <- [| stack_slot (make_stack !ofs) ty |];
-            ofs := !ofs + size_int
-          end
-      | Float ->
-          assert (abi = EABI_HF);
-          assert (!fpu >= VFPv2);
-          if !float <= last_float then begin
-            loc.(i) <- [| phys_reg !float |];
-            incr float
-          end else begin
-            ofs := Misc.align !ofs size_float;
-            loc.(i) <- [| stack_slot (make_stack !ofs) Float |];
-            ofs := !ofs + size_float
-          end
-      end
-    | [| arg1; arg2 |] ->
-      (* Passing of 64-bit quantities to external functions. *)
-      begin match arg1.typ, arg2.typ with
-      | Int, Int ->
-          (* 64-bit quantities split across two registers must either be in a
-             consecutive pair of registers where the lowest numbered is an
-             even-numbered register; or in a stack slot that is 8-byte
-             aligned. *)
-          int := Misc.align !int 2;
-          if !int <= last_int - 1 then begin
-            let reg_lower = phys_reg !int in
-            let reg_upper = phys_reg (1 + !int) in
-            loc.(i) <- [| reg_lower; reg_upper |];
-            int := !int + 2
-          end else begin
-            let size_int64 = size_int * 2 in
-            ofs := Misc.align !ofs size_int64;
-            let stack_lower = stack_slot (make_stack !ofs) Int in
-            let stack_upper = stack_slot (make_stack (size_int + !ofs)) Int in
-            loc.(i) <- [| stack_lower; stack_upper |];
-            ofs := !ofs + size_int64
-          end
-      | _, _ ->
-        let f = function Int -> "I" | Addr -> "A" | Val -> "V" | Float -> "F" in
-        fatal_error (Printf.sprintf "Proc.calling_conventions: bad register \
-            type(s) for multi-register argument: %s, %s"
-          (f arg1.typ) (f arg2.typ))
-      end
-    | _ ->
-      fatal_error "Proc.calling_conventions: bad number of registers for \
-        multi-register argument"
+    | Val | Int | Addr ->
+        loc.(i) <- loc_int last_int make_stack int ofs
+    | Float ->
+        loc.(i) <- loc_float last_float make_stack float ofs
   done;
   (loc, Misc.align !ofs 8)  (* keep stack 8-aligned *)
 
@@ -187,40 +178,50 @@ let not_supported _ofs = fatal_error "Proc.loc_results: cannot call"
 
 let max_arguments_for_tailcalls = 8
 
-let single_regs arg = Array.map (fun arg -> [| arg |]) arg
-let ensure_single_regs res =
-  Array.map (function
-      | [| res |] -> res
-      | _ -> failwith "Proc.ensure_single_regs")
-    res
-
 let loc_arguments arg =
-  let (loc, alignment) =
-    calling_conventions 0 7 100 115 outgoing (single_regs arg)
-  in
-  ensure_single_regs loc, alignment
+  calling_conventions 0 7 100 115 outgoing arg
+
 let loc_parameters arg =
-  let (loc, _) = calling_conventions 0 7 100 115 incoming (single_regs arg) in
-  ensure_single_regs loc
+  let (loc, _) = calling_conventions 0 7 100 115 incoming arg in loc
+
 let loc_results res =
-  let (loc, _) =
-    calling_conventions 0 7 100 115 not_supported (single_regs res)
-  in
-  ensure_single_regs loc
+  let (loc, _) = calling_conventions 0 7 100 115 not_supported res in loc
 
 (* C calling convention:
      first integer args in r0...r3
+     first 64-bit integer args in r0-r1, r2-r3
      first float args in d0...d7 (EABI+VFP)
+     first float args in r0-r1, r2-r3 (soft FP)
      remaining args on stack.
-   Return values in r0...r1 or d0. *)
+   Return values in r0, r0-r1, or d0. *)
+
+let external_calling_conventions first_int last_int first_float last_float
+                                 make_stack ty_args =
+  let loc = Array.make (List.length ty_args) [| Reg.dummy |] in
+  let int = ref first_int in
+  let float = ref first_float in
+  let ofs = ref 0 in
+  List.iteri
+    (fun i ty_arg ->
+      match ty_arg with
+      | XInt | XInt32 ->
+        loc.(i) <- [| loc_int last_int make_stack int ofs |]
+      | XInt64 ->
+        loc.(i) <- loc_int_pair last_int make_stack int ofs
+      | XFloat ->
+        loc.(i) <-
+         (if abi = EABI_HF
+          then [| loc_float last_float make_stack float ofs |]
+          else loc_int_pair last_int make_stack int ofs))
+    ty_args;
+  (loc, Misc.align !ofs 8)  (* keep stack 8-aligned *)
+
+let loc_external_arguments ty_args =
+  external_calling_conventions 0 3 100 107 outgoing ty_args
 
-let loc_external_arguments arg =
-  calling_conventions 0 3 100 107 outgoing arg
 let loc_external_results res =
-  let (loc, _) =
-    calling_conventions 0 1 100 100 not_supported (single_regs res)
-  in
-  ensure_single_regs loc
+  let (loc, _) = calling_conventions 0 1 100 100 not_supported res
+  in loc
 
 let loc_exn_bucket = phys_reg 0
 
diff --git a/asmcomp/arm/selection.ml b/asmcomp/arm/selection.ml
index ec4ec2b542..7659ea87af 100644
--- a/asmcomp/arm/selection.ml
+++ b/asmcomp/arm/selection.ml
@@ -79,7 +79,7 @@ let pseudoregs_for_operation op arg res =
       (arg', res)
   (* We use __aeabi_idivmod for Cmodi only, and hence we care only
      for the remainder in r1, so fix up the destination register. *)
-  | Iextcall { func = "__aeabi_idivmod"; alloc = false; } ->
+  | Iextcall { func = "__aeabi_idivmod"; _ } ->
       (arg, [|r1|])
   (* Other instructions are regular *)
   | _ -> raise Use_default
@@ -108,25 +108,25 @@ method is_immediate n =
 
 method! is_simple_expr = function
   (* inlined floating-point ops are simple if their arguments are *)
-  | Cop(Cextcall("sqrt", _, _, _), args, _) when !fpu >= VFPv2 ->
+  | Cop(Cextcall("sqrt", _, _, _, _), args, _) when !fpu >= VFPv2 ->
       List.for_all self#is_simple_expr args
   (* inlined byte-swap ops are simple if their arguments are *)
-  | Cop(Cextcall("caml_bswap16_direct", _, _, _), args, _)
+  | Cop(Cextcall("caml_bswap16_direct", _, _, _, _), args, _)
     when !arch >= ARMv6T2 ->
       List.for_all self#is_simple_expr args
-  | Cop(Cextcall("caml_int32_direct_bswap",_,_,_), args, _)
+  | Cop(Cextcall("caml_int32_direct_bswap", _, _, _, _), args, _)
     when !arch >= ARMv6 ->
       List.for_all self#is_simple_expr args
   | e -> super#is_simple_expr e
 
 method! effects_of e =
   match e with
-  | Cop(Cextcall("sqrt", _, _, _), args, _) when !fpu >= VFPv2 ->
+  | Cop(Cextcall("sqrt", _, _, _, _), args, _) when !fpu >= VFPv2 ->
       Selectgen.Effect_and_coeffect.join_list_map args self#effects_of
-  | Cop(Cextcall("caml_bswap16_direct", _, _, _), args, _)
+  | Cop(Cextcall("caml_bswap16_direct", _, _, _, _), args, _)
     when !arch >= ARMv6T2 ->
       Selectgen.Effect_and_coeffect.join_list_map args self#effects_of
-  | Cop(Cextcall("caml_int32_direct_bswap",_,_,_), args, _)
+  | Cop(Cextcall("caml_int32_direct_bswap",_ ,_ , _, _), args, _)
     when !arch >= ARMv6 ->
       Selectgen.Effect_and_coeffect.join_list_map args self#effects_of
   | e -> super#effects_of e
@@ -180,8 +180,9 @@ method select_shift_arith op dbg arithop arithrevop args =
       | op_args -> op_args
       end
 
-method private iextcall (func, alloc) =
-  Iextcall { func; alloc; label_after = Cmm.new_label (); }
+method private iextcall func ty_res ty_args =
+  Iextcall { func; ty_res; ty_args;
+             alloc = false; label_after = Cmm.new_label (); }
 
 method! select_operation op args dbg =
   match (op, args) with
@@ -216,15 +217,15 @@ method! select_operation op args dbg =
       (Iintop Imulh, args)
   (* Turn integer division/modulus into runtime ABI calls *)
   | (Cdivi, args) ->
-      (self#iextcall("__aeabi_idiv", false), args)
+      (self#iextcall "__aeabi_idiv" typ_int [], args)
   | (Cmodi, args) ->
       (* See above for fix up of return register *)
-      (self#iextcall("__aeabi_idivmod", false), args)
+      (self#iextcall "__aeabi_idivmod" typ_int [], args)
   (* Recognize 16-bit bswap instruction (ARMv6T2 because we need movt) *)
-  | (Cextcall("caml_bswap16_direct", _, _, _), args) when !arch >= ARMv6T2 ->
+  | (Cextcall("caml_bswap16_direct", _, _, _, _), args) when !arch >= ARMv6T2 ->
       (Ispecific(Ibswap 16), args)
   (* Recognize 32-bit bswap instructions (ARMv6 and above) *)
-  | (Cextcall("caml_int32_direct_bswap", _, _, _), args)
+  | (Cextcall("caml_int32_direct_bswap", _, _, _, _), args)
     when !arch >= ARMv6 ->
       (Ispecific(Ibswap 32), args)
   (* Turn floating-point operations into runtime ABI calls for softfp *)
@@ -235,12 +236,18 @@ method! select_operation op args dbg =
 method private select_operation_softfp op args dbg =
   match (op, args) with
   (* Turn floating-point operations into runtime ABI calls *)
-  | (Caddf, args) -> (self#iextcall("__aeabi_dadd", false), args)
-  | (Csubf, args) -> (self#iextcall("__aeabi_dsub", false), args)
-  | (Cmulf, args) -> (self#iextcall("__aeabi_dmul", false), args)
-  | (Cdivf, args) -> (self#iextcall("__aeabi_ddiv", false), args)
-  | (Cfloatofint, args) -> (self#iextcall("__aeabi_i2d", false), args)
-  | (Cintoffloat, args) -> (self#iextcall("__aeabi_d2iz", false), args)
+  | (Caddf, args) ->
+      (self#iextcall "__aeabi_dadd" typ_float [XFloat;XFloat], args)
+  | (Csubf, args) ->
+      (self#iextcall "__aeabi_dsub" typ_float [XFloat;XFloat], args)
+  | (Cmulf, args) ->
+      (self#iextcall "__aeabi_dmul" typ_float [XFloat;XFloat], args)
+  | (Cdivf, args) ->
+      (self#iextcall "__aeabi_ddiv" typ_float [XFloat;XFloat], args)
+  | (Cfloatofint, args) ->
+      (self#iextcall "__aeabi_i2d" typ_float [XInt], args)
+  | (Cintoffloat, args) ->
+      (self#iextcall "__aeabi_d2iz" typ_int [XFloat], args)
   | (Ccmpf comp, args) ->
       let comp, func =
         match comp with
@@ -256,14 +263,16 @@ method private select_operation_softfp op args dbg =
         | CFnge -> Ceq, "__aeabi_dcmpge"
       in
       (Iintop_imm(Icomp(Iunsigned comp), 0),
-       [Cop(Cextcall(func, typ_int, false, None), args, dbg)])
+       [Cop(Cextcall(func, typ_int, [XFloat;XFloat], false, None),
+            args, dbg)])
   (* Add coercions around loads and stores of 32-bit floats *)
   | (Cload {memory_chunk=Single; mutability; is_atomic=false}, args) ->
-      (self#iextcall("__aeabi_f2d", false),
+      (self#iextcall "__aeabi_f2d" typ_float [XInt],
         [Cop(Cload {memory_chunk=Word_int; mutability; is_atomic=false}, args, dbg)])
   | (Cstore (Single, init), [arg1; arg2]) ->
       let arg2' =
-        Cop(Cextcall("__aeabi_d2f", typ_int, false, None), [arg2], dbg) in
+        Cop(Cextcall("__aeabi_d2f", typ_int, [XFloat], false, None),
+            [arg2], dbg) in
       self#select_operation (Cstore (Word_int, init)) [arg1; arg2'] dbg
   (* Other operations are regular *)
   | (op, args) -> super#select_operation op args dbg
@@ -288,7 +297,7 @@ method private select_operation_vfpv3 op args dbg =
   | (Csubf, [Cop(Cmulf, args, _); arg]) ->
       (Ispecific Imulsubf, arg :: args)
   (* Recognize floating-point square root *)
-  | (Cextcall("sqrt", _, false, _), args) ->
+  | (Cextcall("sqrt", _, _, false, _), args) ->
       (Ispecific Isqrtf, args)
   (* Other operations are regular *)
   | (op, args) -> super#select_operation op args dbg
diff --git a/asmcomp/arm64/arch.ml b/asmcomp/arm64/arch.ml
index 9cf923c6c3..41edd3e4a6 100644
--- a/asmcomp/arm64/arch.ml
+++ b/asmcomp/arm64/arch.ml
@@ -19,6 +19,10 @@
 
 open Format
 
+let macosx = (Config.system = "macosx")
+
+(* Machine-specific command-line options *)
+
 let command_line_options = []
 
 (* Addressing modes *)
@@ -56,6 +60,7 @@ type specific_operation =
   | Inegmulsubf   (* floating-point negate, multiply and subtract *)
   | Isqrtf        (* floating-point square root *)
   | Ibswap of int (* endianness conversion *)
+  | Imove32       (* 32-bit integer move *)
 
 and arith_operation =
     Ishiftadd
@@ -65,7 +70,7 @@ let spacetime_node_hole_pointer_is_live_before = function
   | Ifar_alloc _ | Ifar_intop_checkbound _ | Ifar_intop_imm_checkbound _
   | Ishiftarith _ | Ishiftcheckbound _ | Ifar_shiftcheckbound _ -> false
   | Imuladd | Imulsub | Inegmulf | Imuladdf | Inegmuladdf | Imulsubf
-  | Inegmulsubf | Isqrtf | Ibswap _ -> false
+  | Inegmulsubf | Isqrtf | Ibswap _ | Imove32 -> false
 
 (* Sizes, endianness *)
 
@@ -170,3 +175,6 @@ let print_specific_operation printreg op ppf arg =
   | Ibswap n ->
       fprintf ppf "bswap%i %a" n
         printreg arg.(0)
+  | Imove32 ->
+      fprintf ppf "move32 %a"
+        printreg arg.(0)
diff --git a/asmcomp/arm64/emit.mlp b/asmcomp/arm64/emit.mlp
index 7e789bb8e5..bccb8872e0 100644
--- a/asmcomp/arm64/emit.mlp
+++ b/asmcomp/arm64/emit.mlp
@@ -512,6 +512,7 @@ module BR = Branch_relaxation.Make (struct
     | Lop (Ispecific (Imuladd | Imulsub)) -> 1
     | Lop (Ispecific (Ibswap 16)) -> 2
     | Lop (Ispecific (Ibswap _)) -> 1
+    | Lop (Ispecific Imove32) -> 1
     | Lop (Iname_for_debugger _) -> 0
     | Lreloadretaddr -> 0
     | Lreturn -> epilogue_size ()
@@ -644,6 +645,19 @@ let emit_instr i =
           | _ ->
               assert false
         end
+    | Lop(Ispecific Imove32) ->
+        let src = i.arg.(0) and dst = i.res.(0) in
+        if src.loc <> dst.loc then begin
+          match (src, dst) with
+          | {loc = Reg _}, {loc = Reg _} ->
+              `	mov	{emit_wreg dst}, {emit_wreg src}\n`
+          | {loc = Reg _}, {loc = Stack _} ->
+              `	str	{emit_wreg src}, {emit_stack dst}\n`
+          | {loc = Stack _}, {loc = Reg _} ->
+              `	ldr	{emit_wreg dst}, {emit_stack src}\n`
+          | _ ->
+              assert false
+        end
     | Lop(Iconst_int n) ->
         emit_intconst i.res.(0) n
     | Lop(Iconst_float f) ->
diff --git a/asmcomp/arm64/proc.ml b/asmcomp/arm64/proc.ml
index a043fce178..e362b6b63b 100644
--- a/asmcomp/arm64/proc.ml
+++ b/asmcomp/arm64/proc.ml
@@ -111,6 +111,36 @@ let loc_spacetime_node_hole = Reg.dummy  (* Spacetime unsupported *)
 
 (* Calling conventions *)
 
+let loc_int last_int make_stack int ofs =
+  if !int <= last_int then begin
+    let l = phys_reg !int in
+    incr int; l
+  end else begin
+    ofs := Misc.align !ofs size_int;
+    let l = stack_slot (make_stack !ofs) Int in
+    ofs := !ofs + size_int; l
+  end
+
+let loc_float last_float make_stack float ofs =
+  if !float <= last_float then begin
+    let l = phys_reg !float in
+    incr float; l
+  end else begin
+    ofs := Misc.align !ofs size_float;
+    let l = stack_slot (make_stack !ofs) Float in
+    ofs := !ofs + size_float; l
+  end
+
+let loc_int32 last_int make_stack int ofs =
+  if !int <= last_int then begin
+    let l = phys_reg !int in
+    incr int; l
+  end else begin
+    let l = stack_slot (make_stack !ofs) Int in
+    ofs := !ofs + (if macosx then 4 else 8);
+    l
+  end
+
 let calling_conventions
     first_int last_int first_float last_float make_stack arg =
   let loc = Array.make (Array.length arg) Reg.dummy in
@@ -118,23 +148,11 @@ let calling_conventions
   let float = ref first_float in
   let ofs = ref 0 in
   for i = 0 to Array.length arg - 1 do
-    match arg.(i).typ with
-    | Val | Int | Addr as ty ->
-        if !int <= last_int then begin
-          loc.(i) <- phys_reg !int;
-          incr int
-        end else begin
-          loc.(i) <- stack_slot (make_stack !ofs) ty;
-          ofs := !ofs + size_int
-        end
+    match arg.(i) with
+    | Val | Int | Addr ->
+        loc.(i) <- loc_int last_int make_stack int ofs
     | Float ->
-        if !float <= last_float then begin
-          loc.(i) <- phys_reg !float;
-          incr float
-        end else begin
-          loc.(i) <- stack_slot (make_stack !ofs) Float;
-          ofs := !ofs + size_float
-        end
+        loc.(i) <- loc_float last_float make_stack float ofs
   done;
   (loc, Misc.align !ofs 16)  (* keep stack 16-aligned *)
 
@@ -163,14 +181,31 @@ let loc_results res =
      first integer args in x0...x7
      first float args in d0...d7
      remaining args on stack.
-   Return values in x0...x1 or d0. *)
-
-let loc_external_arguments arg =
-  let arg =
-    Array.map (fun regs -> assert (Array.length regs = 1); regs.(0)) arg
-  in
-  let loc, alignment = calling_conventions 0 7 100 107 outgoing arg in
-  Array.map (fun reg -> [|reg|]) loc, alignment
+   macOS/iOS peculiarity: int32 arguments passed on stack occupy 4 bytes,
+   while the AAPCS64 says 8 bytes.
+   Return values in r0...r1 or d0. *)
+
+let external_calling_conventions
+    first_int last_int first_float last_float make_stack ty_args =
+  let loc = Array.make (List.length ty_args) [| Reg.dummy |] in
+  let int = ref first_int in
+  let float = ref first_float in
+  let ofs = ref 0 in
+  List.iteri (fun i ty_arg ->
+    begin match ty_arg with
+    | XInt | XInt64 ->
+        loc.(i) <- [| loc_int last_int make_stack int ofs |]
+    | XInt32 ->
+        loc.(i) <- [| loc_int32 last_int make_stack int ofs |]
+    | XFloat ->
+        loc.(i) <- [| loc_float last_float make_stack float ofs |]
+    end)
+    ty_args;
+  (loc, Misc.align !ofs 16)  (* keep stack 16-aligned *)
+
+let loc_external_arguments ty_args =
+  external_calling_conventions 0 7 100 107 outgoing ty_args
+
 let loc_external_results res =
   let (loc, _) = calling_conventions 0 1 100 100 not_supported res in loc
 
diff --git a/asmcomp/arm64/reload.ml b/asmcomp/arm64/reload.ml
index 0c342b6448..7d27e0760a 100644
--- a/asmcomp/arm64/reload.ml
+++ b/asmcomp/arm64/reload.ml
@@ -15,5 +15,26 @@
 
 (* Reloading for the ARM 64 bits *)
 
+open Reg
+
+class reload = object (self)
+
+inherit Reloadgen.reload_generic as super
+
+method! reload_operation op arg res =
+  match op with
+  | Ispecific Imove32 ->
+      (* Like Imove: argument or result can be on stack but not both *)
+      begin match arg.(0), res.(0) with
+      | {loc = Stack s1}, {loc = Stack s2} when s1 <> s2 ->
+          ([| self#makereg arg.(0) |], res)
+      | _ ->
+          (arg, res)
+      end
+   | _ ->
+      super#reload_operation op arg res
+
+end
+
 let fundecl f num_stack_slots =
-  (new Reloadgen.reload_generic)#fundecl f num_stack_slots
+  (new reload)#fundecl f num_stack_slots
diff --git a/asmcomp/arm64/selection.ml b/asmcomp/arm64/selection.ml
index 90166141dd..8b1ce1b687 100644
--- a/asmcomp/arm64/selection.ml
+++ b/asmcomp/arm64/selection.ml
@@ -85,6 +85,11 @@ let inline_ops =
 let use_direct_addressing _symb =
   not !Clflags.dlcode
 
+let is_stack_slot rv =
+  Reg.(match rv with
+        | [| { loc = Stack _ } |] -> true
+        | _ -> false)
+
 (* Instruction selection *)
 
 class selector = object(self)
@@ -98,13 +103,13 @@ method is_immediate n =
 
 method! is_simple_expr = function
   (* inlined floating-point ops are simple if their arguments are *)
-  | Cop(Cextcall (fn, _, _, _), args, _) when List.mem fn inline_ops ->
+  | Cop(Cextcall (fn, _, _, _, _), args, _) when List.mem fn inline_ops ->
       List.for_all self#is_simple_expr args
   | e -> super#is_simple_expr e
 
 method! effects_of e =
   match e with
-  | Cop(Cextcall (fn, _, _, _), args, _) when List.mem fn inline_ops ->
+  | Cop(Cextcall (fn, _, _, _, _), args, _) when List.mem fn inline_ops ->
       Selectgen.Effect_and_coeffect.join_list_map args self#effects_of
   | e -> super#effects_of e
 
@@ -228,15 +233,15 @@ method! select_operation op args dbg =
           super#select_operation op args dbg
       end
   (* Recognize floating-point square root *)
-  | Cextcall("sqrt", _, _, _) ->
+  | Cextcall("sqrt", _, _, _, _) ->
       (Ispecific Isqrtf, args)
   (* Recognize bswap instructions *)
-  | Cextcall("caml_bswap16_direct", _, _, _) ->
+  | Cextcall("caml_bswap16_direct", _, _, _, _) ->
       (Ispecific(Ibswap 16), args)
-  | Cextcall("caml_int32_direct_bswap", _, _, _) ->
+  | Cextcall("caml_int32_direct_bswap", _, _, _, _) ->
       (Ispecific(Ibswap 32), args)
   | Cextcall(("caml_int64_direct_bswap"|"caml_nativeint_direct_bswap"),
-              _, _, _) ->
+              _, _, _, _) ->
       (Ispecific (Ibswap 64), args)
   (* Other operations are regular *)
   | _ ->
@@ -250,6 +255,10 @@ method select_logical op = function
   | args ->
       (Iintop op, args)
 
+method! insert_move_extcall_arg env ty_arg src dst =
+  if macosx && ty_arg = XInt32 && is_stack_slot dst
+  then self#insert env (Iop (Ispecific Imove32)) src dst
+  else self#insert_moves env src dst
 end
 
 let fundecl f = (new selector)#emit_fundecl f
diff --git a/asmcomp/cmm.ml b/asmcomp/cmm.ml
index d0fc52495e..249d1853ef 100644
--- a/asmcomp/cmm.ml
+++ b/asmcomp/cmm.ml
@@ -77,6 +77,21 @@ let ge_component comp1 comp2 =
   | Float, (Int | Addr | Val) ->
     assert false
 
+type exttype =
+  | XInt
+  | XInt32
+  | XInt64
+  | XFloat
+
+let machtype_of_exttype = function
+  | XInt -> typ_int
+  | XInt32 -> typ_int
+  | XInt64 -> if Arch.size_int = 4 then [|Int;Int|] else typ_int
+  | XFloat -> typ_float
+
+let machtype_of_exttype_list xtl =
+  Array.concat (List.map machtype_of_exttype xtl)
+
 type integer_comparison = Lambda.integer_comparison =
   | Ceq | Cne | Clt | Cgt | Cle | Cge
 
@@ -124,7 +139,7 @@ type memory_chunk =
 
 and operation =
     Capply of machtype
-  | Cextcall of string * machtype * bool * label option
+  | Cextcall of string * machtype * exttype list * bool * label option
     (** If specified, the given label will be placed immediately after the
         call (at the same place as any frame descriptor would reference). *)
   | Cload of
diff --git a/asmcomp/cmm.mli b/asmcomp/cmm.mli
index 2674717c7a..2dfe24f4e6 100644
--- a/asmcomp/cmm.mli
+++ b/asmcomp/cmm.mli
@@ -68,6 +68,17 @@ val ge_component
   -> machtype_component
   -> bool
 
+type exttype =
+  | XInt                                (**r OCaml value, word-sized integer *)
+  | XInt32                              (**r 32-bit integer *)
+  | XInt64                              (**r 64-bit integer  *)
+  | XFloat                              (**r double-precision FP number  *)
+(** A variant of [machtype] used to describe arguments
+    to external C functions *)
+
+val machtype_of_exttype: exttype -> machtype
+val machtype_of_exttype_list: exttype list -> machtype
+
 type integer_comparison = Lambda.integer_comparison =
   | Ceq | Cne | Clt | Cgt | Cle | Cge
 
@@ -127,7 +138,10 @@ type memory_chunk =
 
 and operation =
     Capply of machtype
-  | Cextcall of string * machtype * bool * label option
+  | Cextcall of string * machtype * exttype list * bool * label option
+      (** The [machtype] is the machine type of the result.
+          The [exttype list] describes the unboxing types of the arguments.
+          An empty list means "all arguments are machine words [XInt]". *)
   | Cload of
       { memory_chunk: memory_chunk
       ; mutability: Asttypes.mutable_flag
diff --git a/asmcomp/cmm_helpers.ml b/asmcomp/cmm_helpers.ml
index 54f745a8fe..367446e724 100644
--- a/asmcomp/cmm_helpers.ml
+++ b/asmcomp/cmm_helpers.ml
@@ -619,8 +619,8 @@ let rec remove_unit = function
       Clet(id, c1, remove_unit c2)
   | Cop(Capply _mty, args, dbg) ->
       Cop(Capply typ_void, args, dbg)
-  | Cop(Cextcall(proc, _mty, alloc, label_after), args, dbg) ->
-      Cop(Cextcall(proc, typ_void, alloc, label_after), args, dbg)
+  | Cop(Cextcall(proc, _ty_res, ty_args, alloc, label_after), args, dbg) ->
+      Cop(Cextcall(proc, typ_void, ty_args, alloc, label_after), args, dbg)
   | Cexit (_,_) as c -> c
   | Ctuple [] as c -> c
   | c -> Csequence(c, Ctuple [])
@@ -746,10 +746,10 @@ let float_array_ref arr ofs dbg =
   box_float dbg (unboxed_float_array_ref arr ofs dbg)
 
 let addr_array_set arr ofs newval dbg =
-  Cop(Cextcall("caml_modify_field", typ_void, false, None),
+  Cop(Cextcall("caml_modify_field", typ_void, [], false, None),
       [arr; untag_int ofs dbg; newval], dbg)
 let addr_array_initialize arr ofs newval dbg =
-  Cop(Cextcall("caml_initialize_field", typ_void, false, None),
+  Cop(Cextcall("caml_initialize_field", typ_void, [], false, None),
       [arr; untag_int ofs dbg; newval], dbg)
 let int_array_set arr ofs newval dbg =
   Cop(Cstore (Word_int, Lambda.Assignment),
@@ -785,7 +785,7 @@ let bigstring_length ba dbg =
 
 let lookup_tag obj tag dbg =
   bind "tag" tag (fun tag ->
-    Cop(Cextcall("caml_get_public_method", typ_val, false, None),
+    Cop(Cextcall("caml_get_public_method", typ_val, [], false, None),
         [obj; tag],
         dbg))
 
@@ -815,14 +815,14 @@ let make_alloc_generic set_fn dbg tag wordsize args =
     | e1::el -> Csequence(set_fn (Cvar id) (Cconst_int (idx, dbg)) e1 dbg,
                           fill_fields (idx + 2) el) in
     Clet(VP.create id,
-         Cop(Cextcall("caml_alloc", typ_val, true, None),
+         Cop(Cextcall("caml_alloc", typ_val, [], true, None),
                  [Cconst_int (wordsize, dbg); Cconst_int (tag, dbg)], dbg),
          fill_fields 1 args)
   end
 
 let make_alloc dbg tag args =
   let addr_array_init arr ofs newval dbg =
-    Cop(Cextcall("caml_initialize_field", typ_void, false, None),
+    Cop(Cextcall("caml_initialize_field", typ_void, [], false, None),
         [arr; untag_int ofs dbg; newval], dbg)
   in
   make_alloc_generic addr_array_init dbg tag (List.length args) args
@@ -2159,18 +2159,18 @@ let arraylength kind arg dbg =
       Cop(Cor, [float_array_length_shifted hdr dbg; Cconst_int (1, dbg)], dbg)
 
 let bbswap bi arg dbg =
-  let prim = match (bi : Primitive.boxed_integer) with
-    | Pnativeint -> "nativeint"
-    | Pint32 -> "int32"
-    | Pint64 -> "int64"
+  let prim, tyarg = match (bi : Primitive.boxed_integer) with
+    | Pnativeint -> "nativeint", XInt
+    | Pint32 -> "int32", XInt32
+    | Pint64 -> "int64", XInt64
   in
   Cop(Cextcall(Printf.sprintf "caml_%s_direct_bswap" prim,
-               typ_int, false, None),
+               typ_int, [tyarg], false, None),
       [arg],
       dbg)
 
 let bswap16 arg dbg =
-  (Cop(Cextcall("caml_bswap16_direct", typ_int, false, None),
+  (Cop(Cextcall("caml_bswap16_direct", typ_int, [], false, None),
        [arg],
        dbg))
 
@@ -2197,12 +2197,12 @@ let setfield n ptr init arg1 arg2 dbg =
   match assignment_kind ptr init with
   | Caml_modify ->
       return_unit dbg (Cop(Cextcall("caml_modify_field",
-                                    typ_void, false, None),
+                                    typ_void, [], false, None),
                       [arg1; Cconst_int (n, dbg); arg2],
                       dbg))
   | Caml_initialize ->
       return_unit dbg (Cop(Cextcall("caml_initialize_field",
-                                    typ_void, false, None),
+                                    typ_void, [], false, None),
                       [arg1; Cconst_int (n, dbg); arg2],
                       dbg))
   | Simple ->
diff --git a/asmcomp/cmmgen.ml b/asmcomp/cmmgen.ml
index 15cfd6c0b3..bea15d0b3f 100644
--- a/asmcomp/cmmgen.ml
+++ b/asmcomp/cmmgen.ml
@@ -729,7 +729,7 @@ and transl_catch env nfail ids body handler dbg =
 and transl_make_array dbg env kind args =
   match kind with
   | Pgenarray ->
-      Cop(Cextcall("caml_make_array", typ_val, true, None),
+      Cop(Cextcall("caml_make_array", typ_val, [], true, None),
           [make_alloc dbg 0 (List.map (transl env) args)], dbg)
   | Paddrarray | Pintarray ->
       make_alloc dbg 0 (List.map (transl env) args)
@@ -740,20 +740,32 @@ and transl_make_array dbg env kind args =
 and transl_ccall env prim args dbg =
   let transl_arg native_repr arg =
     match native_repr with
-    | Same_as_ocaml_repr -> transl env arg
-    | Unboxed_float -> transl_unbox_float dbg env arg
-    | Unboxed_integer bi -> transl_unbox_int dbg env bi arg
-    | Untagged_int -> untag_int (transl env arg) dbg
+    | Same_as_ocaml_repr ->
+        (XInt, transl env arg)
+    | Unboxed_float ->
+        (XFloat, transl_unbox_float dbg env arg)
+    | Unboxed_integer bi ->
+        let xty =
+          match bi with
+          | Pnativeint -> XInt
+          | Pint32 -> XInt32
+          | Pint64 -> XInt64 in
+        (xty, transl_unbox_int dbg env bi arg)
+    | Untagged_int ->
+        (XInt, untag_int (transl env arg) dbg)
   in
   let rec transl_args native_repr_args args =
     match native_repr_args, args with
     | [], args ->
         (* We don't require the two lists to be of the same length as
            [default_prim] always sets the arity to [0]. *)
-        List.map (transl env) args
-    | _, [] -> assert false
+        (List.map (fun _ -> XInt) args, List.map (transl env) args)
+    | _, [] ->
+        assert false
     | native_repr :: native_repr_args, arg :: args ->
-        transl_arg native_repr arg :: transl_args native_repr_args args
+        let (ty1, arg') = transl_arg native_repr arg in
+        let (tys, args') = transl_args native_repr_args args in
+        (ty1 :: tys, arg' :: args')
   in
   let typ_res, wrap_result =
     match prim.prim_native_repr_res with
@@ -764,10 +776,10 @@ and transl_ccall env prim args dbg =
     | Unboxed_integer bi -> (typ_int, box_int dbg bi)
     | Untagged_int -> (typ_int, (fun i -> tag_int i dbg))
   in
-  let args = transl_args prim.prim_native_repr_args args in
+  let typ_args, args = transl_args prim.prim_native_repr_args args in
   wrap_result
     (Cop(Cextcall(Primitive.native_name prim,
-                  typ_res, prim.prim_alloc, None), args, dbg))
+                  typ_res, typ_args, prim.prim_alloc, None), args, dbg))
 
 and transl_prim_1 env p arg dbg =
   match p with
@@ -1031,10 +1043,10 @@ and transl_prim_2 env p arg1 arg2 dbg =
                      [transl_unbox_int dbg env bi arg1;
                       transl_unbox_int dbg env bi arg2], dbg)) dbg
   | Patomic_exchange ->
-     Cop (Cextcall ("caml_atomic_exchange", typ_val, false, None),
+     Cop (Cextcall ("caml_atomic_exchange", typ_val, [], false, None),
           [transl env arg1; transl env arg2], dbg)
   | Patomic_fetch_add ->
-     Cop (Cextcall ("caml_atomic_fetch_add", typ_int, false, None),
+     Cop (Cextcall ("caml_atomic_fetch_add", typ_int, [], false, None),
           [transl env arg1; transl env arg2], dbg)
   | Prunstack | Pperform | Presume | Preperform | Ppoll | Pnop
   | Patomic_cas | Patomic_load _
@@ -1090,7 +1102,7 @@ and transl_prim_3 env p arg1 arg2 arg3 dbg =
         (transl_unbox_sized size dbg env arg3) dbg
 
   | Patomic_cas ->
-     Cop (Cextcall ("caml_atomic_cas", typ_int, false, None),
+     Cop (Cextcall ("caml_atomic_cas", typ_int, [], false, None),
           [transl env arg1; transl env arg2; transl env arg3], dbg)
 
   (* Effects *)
@@ -1351,7 +1363,7 @@ and transl_letrec env bindings cont =
       bindings
   in
   let op_alloc prim args =
-    Cop(Cextcall(prim, typ_val, true, None), args, dbg) in
+    Cop(Cextcall(prim, typ_val, [], true, None), args, dbg) in
   let rec init_blocks = function
     | [] -> fill_nonrec bsz
     | (id, _exp, RHS_block sz) :: rem ->
@@ -1377,7 +1389,7 @@ and transl_letrec env bindings cont =
     | [] -> cont
     | (id, exp, (RHS_block _ | RHS_infix _ | RHS_floatblock _)) :: rem ->
         let op =
-          Cop(Cextcall("caml_update_dummy", typ_void, false, None),
+          Cop(Cextcall("caml_update_dummy", typ_void, [], false, None),
               [Cvar (VP.var id); transl env exp], dbg) in
         Csequence(op, fill_blocks rem)
     | (_id, _exp, RHS_nonrec) :: rem ->
diff --git a/asmcomp/i386/proc.ml b/asmcomp/i386/proc.ml
index e3e114a688..5d3f0546cb 100644
--- a/asmcomp/i386/proc.ml
+++ b/asmcomp/i386/proc.ml
@@ -121,7 +121,7 @@ let calling_conventions first_int last_int first_float last_float make_stack
   let float = ref first_float in
   let ofs = ref (-64) in
   for i = 0 to Array.length arg - 1 do
-    match arg.(i).typ with
+    match arg.(i) with
       Val | Int | Addr as ty ->
         if !int <= last_int then begin
           loc.(i) <- phys_reg !int;
@@ -158,7 +158,7 @@ let loc_external_arguments _arg =
   fatal_error "Proc.loc_external_arguments"
 let loc_external_results res =
   match res with
-  | [|{typ=Int};{typ=Int}|] -> [|eax; edx|]
+  | [| Int; Int |] -> [|eax; edx|]
   | _ ->
       let (loc, _ofs) = calling_conventions 0 0 100 100 not_supported res in loc
 
diff --git a/asmcomp/i386/selection.ml b/asmcomp/i386/selection.ml
index 28049b7c0a..45bf40d9f3 100644
--- a/asmcomp/i386/selection.ml
+++ b/asmcomp/i386/selection.ml
@@ -89,7 +89,7 @@ let rec float_needs = function
       let n1 = float_needs arg1 in
       let n2 = float_needs arg2 in
       if n1 = n2 then 1 + n1 else if n1 > n2 then n1 else n2
-  | Cop(Cextcall(fn, _ty_res, _alloc, _label), args, _dbg)
+  | Cop(Cextcall(fn, _ty_res, _ty_args, _alloc, _label), args, _dbg)
     when !fast_math && List.mem fn inline_float_ops ->
       begin match args with
         [arg] -> float_needs arg
@@ -162,7 +162,7 @@ method is_immediate (_n : int) = true
 
 method! is_simple_expr e =
   match e with
-  | Cop(Cextcall(fn, _, _alloc, _), args, _)
+  | Cop(Cextcall(fn, _, _, _, _), args, _)
     when !fast_math && List.mem fn inline_float_ops ->
       (* inlined float ops are simple if their arguments are *)
       List.for_all self#is_simple_expr args
@@ -171,7 +171,7 @@ method! is_simple_expr e =
 
 method! effects_of e =
   match e with
-  | Cop(Cextcall(fn, _, _, _), args, _)
+  | Cop(Cextcall(fn, _, _, _, _), args, _)
     when !fast_math && List.mem fn inline_float_ops ->
       Selectgen.Effect_and_coeffect.join_list_map args self#effects_of
   | _ ->
@@ -233,7 +233,7 @@ method! select_operation op args dbg =
           super#select_operation op args dbg
       end
   (* Recognize inlined floating point operations *)
-  | Cextcall(fn, _ty_res, false, _label)
+  | Cextcall(fn, _ty_res, _ty_args, false, _label)
     when !fast_math && List.mem fn inline_float_ops ->
       (Ispecific(Ifloatspecial fn), args)
   (* i386 does not support immediate operands for multiply high signed *)
@@ -297,7 +297,7 @@ method select_push exp =
 method! mark_c_tailcall =
   contains_calls := true
 
-method! emit_extcall_args env args =
+method! emit_extcall_args env _ty_args args =
   let rec size_pushes = function
   | [] -> 0
   | e :: el -> Selectgen.size_expr env e + size_pushes el in
diff --git a/asmcomp/mach.ml b/asmcomp/mach.ml
index acd0e306ff..2383068a31 100644
--- a/asmcomp/mach.ml
+++ b/asmcomp/mach.ml
@@ -50,7 +50,10 @@ type operation =
   | Icall_imm of { func : string; label_after : label; }
   | Itailcall_ind of { label_after : label; }
   | Itailcall_imm of { func : string; label_after : label; }
-  | Iextcall of { func : string; alloc : bool; label_after : label; stack_ofs : int; }
+  | Iextcall of { func : string;
+                  ty_res : Cmm.machtype; ty_args : Cmm.exttype list;
+                  alloc : bool; label_after : label;
+                  stack_ofs : int; }
   | Istackoffset of int
   | Iload of Cmm.memory_chunk * Arch.addressing_mode
   | Istore of Cmm.memory_chunk * Arch.addressing_mode * bool
diff --git a/asmcomp/mach.mli b/asmcomp/mach.mli
index 96a16077e4..51276e2d11 100644
--- a/asmcomp/mach.mli
+++ b/asmcomp/mach.mli
@@ -57,8 +57,10 @@ type operation =
   | Icall_imm of { func : string; label_after : label; }
   | Itailcall_ind of { label_after : label; }
   | Itailcall_imm of { func : string; label_after : label; }
-  | Iextcall of { func : string; alloc : bool; label_after : label;
-                  stack_ofs : int}
+  | Iextcall of { func : string;
+                  ty_res : Cmm.machtype; ty_args : Cmm.exttype list;
+                  alloc : bool; label_after : label;
+                  stack_ofs : int; }
   | Istackoffset of int
   | Iload of Cmm.memory_chunk * Arch.addressing_mode
   | Istore of Cmm.memory_chunk * Arch.addressing_mode * bool
diff --git a/asmcomp/power/proc.ml b/asmcomp/power/proc.ml
index 3bcd12fcbf..5ee902f6ac 100644
--- a/asmcomp/power/proc.ml
+++ b/asmcomp/power/proc.ml
@@ -95,107 +95,81 @@ let loc_spacetime_node_hole = Reg.dummy  (* Spacetime unsupported *)
 
 (* Calling conventions *)
 
-let calling_conventions
-    first_int last_int first_float last_float
-    make_stack stack_ofs reg_use_stack arg =
-  let loc = Array.make (Array.length arg) [| Reg.dummy |] in
+let loc_int last_int make_stack reg_use_stack int ofs =
+  if !int <= last_int then begin
+    let l = phys_reg !int in
+    incr int;
+    if reg_use_stack then ofs := !ofs + size_int;
+    l
+  end else begin
+    let l = stack_slot (make_stack !ofs) Int in
+    ofs := !ofs + size_int; l
+  end
+
+let loc_float last_float make_stack reg_use_stack int float ofs =
+  if !float <= last_float then begin
+    let l = phys_reg !float in
+    incr float;
+    (* On 64-bit platforms, passing a float in a float register
+       reserves a normal register as well *)
+    if size_int = 8 then incr int;
+    if reg_use_stack then ofs := !ofs + size_float;
+    l
+  end else begin
+    ofs := Misc.align !ofs size_float;
+    let l = stack_slot (make_stack !ofs) Float in
+    ofs := !ofs + size_float; l
+  end
+
+let loc_int_pair last_int make_stack int ofs =
+  (* 64-bit quantities split across two registers must either be in a
+     consecutive pair of registers where the lowest numbered is an
+     even-numbered register; or in a stack slot that is 8-byte aligned. *)
+  int := Misc.align !int 2;
+  if !int <= last_int - 1 then begin
+    let reg_lower = phys_reg !int in
+    let reg_upper = phys_reg (1 + !int) in
+    int := !int + 2;
+    [| reg_lower; reg_upper |]
+  end else begin
+    ofs := Misc.align !ofs 8;
+    let stack_lower = stack_slot (make_stack !ofs) Int in
+    let stack_upper = stack_slot (make_stack (size_int + !ofs)) Int in
+    ofs := !ofs + 8;
+    [| stack_lower; stack_upper |]
+  end
+
+let calling_conventions first_int last_int first_float last_float make_stack
+      arg =
+  let loc = Array.make (Array.length arg) Reg.dummy in
   let int = ref first_int in
   let float = ref first_float in
-  let ofs = ref stack_ofs in
+  let ofs = ref 0 in
   for i = 0 to Array.length arg - 1 do
     match arg.(i) with
-    | [| arg |] ->
-      begin match arg.typ with
-      | Val | Int | Addr as ty ->
-          if !int <= last_int then begin
-            loc.(i) <- [| phys_reg !int |];
-            incr int;
-            if reg_use_stack then ofs := !ofs + size_int
-          end else begin
-            loc.(i) <- [| stack_slot (make_stack !ofs) ty |];
-            ofs := !ofs + size_int
-          end
-      | Float ->
-          if !float <= last_float then begin
-            loc.(i) <- [| phys_reg !float |];
-            incr float;
-            (* On 64-bit platforms, passing a float in a float register
-               reserves a normal register as well *)
-            if size_int = 8 then incr int;
-            if reg_use_stack then ofs := !ofs + size_float
-          end else begin
-            ofs := Misc.align !ofs size_float;
-            loc.(i) <- [| stack_slot (make_stack !ofs) Float |];
-            ofs := !ofs + size_float
-          end
-      end
-    | [| arg1; arg2 |] ->
-      (* Passing of 64-bit quantities to external functions
-         on 32-bit platform. *)
-      assert (size_int = 4);
-      begin match arg1.typ, arg2.typ with
-      | Int, Int ->
-          (* 64-bit quantities split across two registers must either be in a
-             consecutive pair of registers where the lowest numbered is an
-             even-numbered register; or in a stack slot that is 8-byte
-             aligned. *)
-          int := Misc.align !int 2;
-          if !int <= last_int - 1 then begin
-            let reg_lower = phys_reg !int in
-            let reg_upper = phys_reg (!int + 1) in
-            loc.(i) <- [| reg_lower; reg_upper |];
-            int := !int + 2
-          end else begin
-            let size_int64 = 8 in
-            ofs := Misc.align !ofs size_int64;
-            let ofs_lower = !ofs in
-            let ofs_upper = !ofs + size_int in
-            let stack_lower = stack_slot (make_stack ofs_lower) Int in
-            let stack_upper = stack_slot (make_stack ofs_upper) Int in
-            loc.(i) <- [| stack_lower; stack_upper |];
-            ofs := !ofs + size_int64
-          end
-      | _, _ ->
-        let f = function Int -> "I" | Addr -> "A" | Val -> "V" | Float -> "F" in
-        fatal_error (Printf.sprintf "Proc.calling_conventions: bad register \
-            type(s) for multi-register argument: %s, %s"
-          (f arg1.typ) (f arg2.typ))
-      end
-    | _ ->
-      fatal_error "Proc.calling_conventions: bad number of registers for \
-                   multi-register argument"
+    | Val | Int | Addr ->
+        loc.(i) <- loc_int last_int make_stack false int ofs
+    | Float ->
+        loc.(i) <- loc_float last_float make_stack false int float ofs
   done;
-  (loc, Misc.align !ofs 16)
-  (* Keep stack 16-aligned. *)
+  (loc, Misc.align !ofs 16)  (* keep stack 16-aligned *)
 
 let incoming ofs = Incoming ofs
 let outgoing ofs = Outgoing ofs
 let not_supported _ofs = fatal_error "Proc.loc_results: cannot call"
 
-let single_regs arg = Array.map (fun arg -> [| arg |]) arg
-let ensure_single_regs res =
-  Array.map (function
-      | [| res |] -> res
-      | _ -> failwith "Proc.ensure_single_regs")
-    res
-
 let max_arguments_for_tailcalls = 8
 
 let loc_arguments arg =
-  let (loc, ofs) =
-    calling_conventions 0 7 100 112 outgoing 0 false (single_regs arg)
-  in
-  (ensure_single_regs loc, ofs)
+    calling_conventions 0 7 100 112 outgoing arg
+
 let loc_parameters arg =
-  let (loc, _ofs) =
-    calling_conventions 0 7 100 112 incoming 0 false (single_regs arg)
-  in
-  ensure_single_regs loc
+  let (loc, _ofs) = calling_conventions 0 7 100 112 incoming arg
+  in loc
+
 let loc_results res =
-  let (loc, _ofs) =
-    calling_conventions 0 7 100 112 not_supported 0 false (single_regs res)
-  in
-  ensure_single_regs loc
+  let (loc, _ofs) = calling_conventions 0 7 100 112 not_supported res
+  in loc
 
 (* C calling conventions for ELF32:
      use GPR 3-10 and FPR 1-8 just like ML calling conventions.
@@ -223,19 +197,43 @@ let loc_results res =
      and need not appear here.
 *)
 
-let loc_external_arguments =
+let external_calling_conventions
+    first_int last_int first_float last_float
+    make_stack stack_ofs reg_use_stack ty_args =
+  let loc = Array.make (List.length ty_args) [| Reg.dummy |] in
+  let int = ref first_int in
+  let float = ref first_float in
+  let ofs = ref stack_ofs in
+  List.iteri
+    (fun i ty_arg ->
+      match ty_arg with
+      | XInt | XInt32 ->
+        loc.(i) <-
+          [| loc_int last_int make_stack reg_use_stack int ofs |]
+      | XInt64 ->
+          if size_int = 4 then begin
+            assert (not reg_use_stack);
+            loc.(i) <- loc_int_pair last_int make_stack int ofs
+          end else
+            loc.(i) <-
+              [| loc_int last_int make_stack reg_use_stack int ofs |]
+      | XFloat ->
+        loc.(i) <-
+          [| loc_float last_float make_stack reg_use_stack int float ofs |])
+    ty_args;
+  (loc, Misc.align !ofs 16) (* Keep stack 16-aligned *)
+
+let loc_external_arguments ty_args =
   match abi with
   | ELF32 ->
-      calling_conventions 0 7 100 107 outgoing 8 false
+      external_calling_conventions 0 7 100 107 outgoing 8 false ty_args
   | ELF64v1 ->
-      fun args ->
       let (loc, ofs) =
-        calling_conventions 0 7 100 112 outgoing 0 true args in
+        external_calling_conventions 0 7 100 112 outgoing 0 true ty_args in
       (loc, max ofs 64)
   | ELF64v2 ->
-      fun args ->
       let (loc, ofs) =
-        calling_conventions 0 7 100 112 outgoing 0 true args in
+        external_calling_conventions 0 7 100 112 outgoing 0 true ty_args in
       if Array.fold_left
            (fun stk r ->
               assert (Array.length r = 1);
@@ -249,10 +247,8 @@ let loc_external_arguments =
 (* Results are in GPR 3 and FPR 1 *)
 
 let loc_external_results res =
-  let (loc, _ofs) =
-    calling_conventions 0 1 100 100 not_supported 0 false (single_regs res)
-  in
-  ensure_single_regs loc
+  let (loc, _ofs) = calling_conventions 0 1 100 100 not_supported res
+  in loc
 
 (* Exceptions are in GPR 3 *)
 
diff --git a/asmcomp/printcmm.ml b/asmcomp/printcmm.ml
index 3f6178ec98..9814b42e1b 100644
--- a/asmcomp/printcmm.ml
+++ b/asmcomp/printcmm.ml
@@ -39,6 +39,21 @@ let machtype ppf mty =
            fprintf ppf "*%a" machtype_component mty.(i)
          done
 
+let exttype ppf = function
+  | XInt -> fprintf ppf "int"
+  | XInt32 -> fprintf ppf "int32"
+  | XInt64 -> fprintf ppf "int64"
+  | XFloat -> fprintf ppf "float"
+
+let extcall_signature ppf (ty_res, ty_args) =
+  begin match ty_args with
+  | [] -> ()
+  | ty_arg1 :: ty_args ->
+      exttype ppf ty_arg1;
+      List.iter (fun ty -> fprintf ppf ",%a" exttype ty) ty_args
+  end;
+  fprintf ppf "->%a" machtype ty_res
+
 let integer_comparison = function
   | Ceq -> "=="
   | Cne -> "!="
@@ -101,7 +116,7 @@ let location d =
 
 let operation d = function
   | Capply _ty -> "app" ^ location d
-  | Cextcall(lbl, _ty, _alloc, _) ->
+  | Cextcall(lbl, _ty_res, _ty_args, _alloc, _) ->
       Printf.sprintf "extcall \"%s\"%s" lbl (location d)
   | Cload {memory_chunk; mutability} -> (
     match mutability with
@@ -211,7 +226,8 @@ let rec expr ppf = function
       List.iter (fun e -> fprintf ppf "@ %a" expr e) el;
       begin match op with
       | Capply mty -> fprintf ppf "@ %a" machtype mty
-      | Cextcall(_, mty, _, _) -> fprintf ppf "@ %a" machtype mty
+      | Cextcall(_, ty_res, ty_args, _, _) ->
+          fprintf ppf "@ %a" extcall_signature (ty_res, ty_args)
       | _ -> ()
       end;
       fprintf ppf ")@]"
diff --git a/asmcomp/printcmm.mli b/asmcomp/printcmm.mli
index 462239ac82..f88d8866cb 100644
--- a/asmcomp/printcmm.mli
+++ b/asmcomp/printcmm.mli
@@ -19,7 +19,9 @@ open Format
 
 val rec_flag : formatter -> Cmm.rec_flag -> unit
 val machtype_component : formatter -> Cmm.machtype_component -> unit
-val machtype : formatter -> Cmm.machtype_component array -> unit
+val machtype : formatter -> Cmm.machtype -> unit
+val exttype : formatter -> Cmm.exttype -> unit
+val extcall_signature : formatter -> Cmm.machtype * Cmm.exttype list -> unit
 val integer_comparison : Cmm.integer_comparison -> string
 val float_comparison : Cmm.float_comparison -> string
 val chunk : Cmm.memory_chunk -> string
diff --git a/asmcomp/proc.mli b/asmcomp/proc.mli
index 91b15de45c..835ca1d0a6 100644
--- a/asmcomp/proc.mli
+++ b/asmcomp/proc.mli
@@ -28,16 +28,14 @@ val phys_reg: int -> Reg.t
 val rotate_registers: bool
 
 (* Calling conventions *)
-val loc_arguments: Reg.t array -> Reg.t array * int
-val loc_results: Reg.t array -> Reg.t array
-val loc_parameters: Reg.t array -> Reg.t array
+val loc_arguments: Cmm.machtype -> Reg.t array * int
+val loc_results: Cmm.machtype -> Reg.t array
+val loc_parameters: Cmm.machtype -> Reg.t array
 (* For argument number [n] split across multiple registers, the target-specific
    implementation of [loc_external_arguments] must return [regs] such that
-   [regs.(n).(0)] is to hold the part of the value at the lowest address.
-   (All that matters for the input to [loc_external_arguments] is the pattern
-   of lengths and register types of the various supplied arrays.) *)
-val loc_external_arguments: Reg.t array array -> Reg.t array array * int
-val loc_external_results: Reg.t array -> Reg.t array
+   [regs.(n).(0)] is to hold the part of the value at the lowest address. *)
+val loc_external_arguments: Cmm.exttype list -> Reg.t array array * int
+val loc_external_results: Cmm.machtype -> Reg.t array
 val loc_exn_bucket: Reg.t
 val loc_spacetime_node_hole: Reg.t
 
diff --git a/asmcomp/reg.ml b/asmcomp/reg.ml
index 3c9c09ac1f..bc36dc9962 100644
--- a/asmcomp/reg.ml
+++ b/asmcomp/reg.ml
@@ -117,6 +117,9 @@ let at_location ty loc =
   incr currstamp;
   r
 
+let typv rv =
+  Array.map (fun r -> r.typ) rv
+
 let anonymous t =
   match Raw_name.to_string t.raw_name with
   | None -> true
diff --git a/asmcomp/reg.mli b/asmcomp/reg.mli
index d89d02ce19..89b4e4220b 100644
--- a/asmcomp/reg.mli
+++ b/asmcomp/reg.mli
@@ -49,7 +49,7 @@ val createv: Cmm.machtype -> t array
 val createv_like: t array -> t array
 val clone: t -> t
 val at_location: Cmm.machtype_component -> location -> t
-
+val typv: t array -> Cmm.machtype
 val anonymous : t -> bool
 
 (* Name for printing *)
diff --git a/asmcomp/riscv/proc.ml b/asmcomp/riscv/proc.ml
index 4c7b586120..502cbb1588 100644
--- a/asmcomp/riscv/proc.ml
+++ b/asmcomp/riscv/proc.ml
@@ -127,7 +127,7 @@ let calling_conventions
   let float = ref first_float in
   let ofs = ref 0 in
   for i = 0 to Array.length arg - 1 do
-    match arg.(i).typ with
+    match arg.(i) with
     | Val | Int | Addr as ty ->
         if !int <= last_int then begin
           loc.(i) <- phys_reg !int;
@@ -161,13 +161,6 @@ let loc_spacetime_node_hole = Reg.dummy  (* Spacetime unsupported *)
      remaining args on stack.
    Return values in a0 .. a7, s2 .. s9 or fa0 .. fa7, fs2 .. fs9. *)
 
-let single_regs arg = Array.map (fun arg -> [| arg |]) arg
-let ensure_single_regs res =
-  Array.map (function
-      | [| res |] -> res
-      | _ -> failwith "proc.ensure_single_regs"
-    ) res
-
 let loc_arguments arg =
   calling_conventions 0 15 110 125 outgoing arg
 
@@ -199,42 +192,35 @@ let external_calling_conventions
   let ofs = ref 0 in
   for i = 0 to Array.length arg - 1 do
     match arg.(i) with
-    | [| arg |] ->
-        begin match arg.typ with
-        | Val | Int | Addr as ty ->
-            if !int <= last_int then begin
-              loc.(i) <- [| phys_reg !int |];
-              incr int
-            end else begin
-              loc.(i) <- [| stack_slot (make_stack !ofs) ty |];
-              ofs := !ofs + size_int
-            end
-        | Float ->
-            if !float <= last_float then begin
-              loc.(i) <- [| phys_reg !float |];
-              incr float
-            end else if !int <= last_int then begin
-              loc.(i) <- [| phys_reg !int |];
-              incr int
-            end else begin
-              loc.(i) <- [| stack_slot (make_stack !ofs) Float |];
-              ofs := !ofs + size_float
-            end
+    | Val | Int | Addr as ty ->
+        if !int <= last_int then begin
+          loc.(i) <- [| phys_reg !int |];
+          incr int
+        end else begin
+          loc.(i) <- [| stack_slot (make_stack !ofs) ty |];
+          ofs := !ofs + size_int
+        end
+    | Float ->
+        if !float <= last_float then begin
+          loc.(i) <- [| phys_reg !float |];
+          incr float
+        end else if !int <= last_int then begin
+          loc.(i) <- [| phys_reg !int |];
+          incr int
+        end else begin
+          loc.(i) <- [| stack_slot (make_stack !ofs) Float |];
+          ofs := !ofs + size_float
         end
-    | _ ->
-        fatal_error "Proc.calling_conventions: bad number of register for \
-                     multi-register argument"
   done;
   (loc, Misc.align !ofs 16) (* Keep stack 16-aligned. *)
 
-let loc_external_arguments arg =
+let loc_external_arguments ty_args =
+  let arg = Cmm.machtype_of_exttype_list ty_args in
   external_calling_conventions 0 7 110 117 outgoing arg
 
 let loc_external_results res =
-  let (loc, _ofs) =
-    external_calling_conventions 0 1 110 111 not_supported (single_regs res)
-  in
-  ensure_single_regs loc
+  let (loc, _ofs) = calling_conventions 0 1 110 111 not_supported res
+  in loc
 
 (* Exceptions are in a0 *)
 
diff --git a/asmcomp/s390x/proc.ml b/asmcomp/s390x/proc.ml
index 9f0dff2132..2c6c357338 100644
--- a/asmcomp/s390x/proc.ml
+++ b/asmcomp/s390x/proc.ml
@@ -105,7 +105,7 @@ let calling_conventions
   let float = ref first_float in
   let ofs = ref stack_ofs in
   for i = 0 to Array.length arg - 1 do
-    match arg.(i).typ with
+    match arg.(i) with
     | Val | Int | Addr as ty ->
         if !int <= last_int then begin
           loc.(i) <- phys_reg !int;
@@ -145,11 +145,9 @@ let loc_results res =
      Always reserve 160 bytes at bottom of stack, plus whatever is needed
      to hold the overflow arguments. *)
 
-let loc_external_arguments arg =
-  let arg =
-    Array.map (fun regs -> assert (Array.length regs = 1); regs.(0)) arg in
-  let (loc, ofs) =
-    calling_conventions 0 4 100 103 outgoing 160 arg in
+let loc_external_arguments ty_args =
+  let arg = Cmm.machtype_of_exttype_list ty_args in
+  let (loc, ofs) = calling_conventions 0 4 100 103 outgoing 160 arg in
   (Array.map (fun reg -> [|reg|]) loc, ofs)
 
 (* Results are in GPR 2 and FPR 0 *)
diff --git a/asmcomp/selectgen.ml b/asmcomp/selectgen.ml
index 0299aca387..da8e003a5a 100644
--- a/asmcomp/selectgen.ml
+++ b/asmcomp/selectgen.ml
@@ -66,7 +66,7 @@ let env_empty = {
 
 let oper_result_type = function
     Capply ty -> ty
-  | Cextcall(_s, ty, _alloc, _) -> ty
+  | Cextcall(_s, ty_res, _ty_args, _alloc, _) -> ty_res
   | Cload {memory_chunk} ->
       begin match memory_chunk with
       | Word_val -> typ_val
@@ -448,13 +448,13 @@ method select_operation op args _dbg =
   | (Capply _, _) ->
     let label_after = Cmm.new_label () in
     (Icall_ind { label_after; }, args)
-  | (Cextcall(func, _ty, alloc, label_after), _) ->
+  | (Cextcall(func, ty_res, ty_args, alloc, label_after), _) ->
     let label_after =
       match label_after with
       | None -> Cmm.new_label ()
       | Some label_after -> label_after
     in
-    Iextcall { func; alloc; label_after; stack_ofs = -1}, args
+    Iextcall { func; alloc; ty_res; ty_args; label_after; stack_ofs = -1}, args
   | (Cload {memory_chunk}, [arg]) ->
       let (addr, eloc) = self#select_addressing memory_chunk arg in
       (Iload(memory_chunk, addr), [eloc])
@@ -722,8 +722,8 @@ method emit_expr (env:environment) exp =
               let r1 = self#emit_tuple env new_args in
               let rarg = Array.sub r1 1 (Array.length r1 - 1) in
               let rd = self#regs_for ty in
-              let (loc_arg, stack_ofs) = Proc.loc_arguments rarg in
-              let loc_res = Proc.loc_results rd in
+              let (loc_arg, stack_ofs) = Proc.loc_arguments (Reg.typv rarg) in
+              let loc_res = Proc.loc_results (Reg.typv rd) in
               let spacetime_reg =
                 self#about_to_emit_call env (Iop new_op) [| r1.(0) |] dbg
               in
@@ -736,8 +736,8 @@ method emit_expr (env:environment) exp =
           | Icall_imm _ ->
               let r1 = self#emit_tuple env new_args in
               let rd = self#regs_for ty in
-              let (loc_arg, stack_ofs) = Proc.loc_arguments r1 in
-              let loc_res = Proc.loc_results rd in
+              let (loc_arg, stack_ofs) = Proc.loc_arguments (Reg.typv r1) in
+              let loc_res = Proc.loc_results (Reg.typv rd) in
               let spacetime_reg =
                 self#about_to_emit_call env (Iop new_op) [| |] dbg
               in
@@ -748,14 +748,14 @@ method emit_expr (env:environment) exp =
               Some rd
           | Iextcall r ->
               let spacetime_reg =
-                self#about_to_emit_call env (Iop new_op) [| |] dbg
-              in
-              let (loc_arg, stack_ofs) = self#emit_extcall_args env new_args in
+                self#about_to_emit_call env (Iop new_op) [| |] dbg in
+              let (loc_arg, stack_ofs) =
+                self#emit_extcall_args env r.ty_args new_args in
               self#maybe_emit_spacetime_move env ~spacetime_reg;
               let rd = self#regs_for ty in
               let loc_res =
                 self#insert_op_debug env (Iextcall {r with stack_ofs = stack_ofs}) dbg
-                  loc_arg (Proc.loc_external_results rd) in
+                  loc_arg (Proc.loc_external_results (Reg.typv rd)) in
               self#insert_move_results env loc_res rd stack_ofs;
               Some rd
           | Ialloc { bytes = _; spacetime_index; label_after_call_gc; } ->
@@ -894,7 +894,7 @@ method private bind_let (env:environment) v r1 =
   end
 
 method private bind_let_mut (env:environment) v k r1 =
-  let rv = Reg.createv k in
+  let rv = self#regs_for k in
   name_regs v rv;
   self#insert_moves env r1 rv;
   env_add ~mut:Mutable v rv env
@@ -1003,19 +1003,26 @@ method private emit_tuple_not_flattened env exp_list =
 method private emit_tuple env exp_list =
   Array.concat (self#emit_tuple_not_flattened env exp_list)
 
-method emit_extcall_args env args =
+method emit_extcall_args env ty_args args =
   let args = self#emit_tuple_not_flattened env args in
-  let arg_hard_regs, stack_ofs =
-    Proc.loc_external_arguments (Array.of_list args)
-  in
-  (* Flattening [args] and [arg_hard_regs] causes parts of values split
-     across multiple registers to line up correctly, by virtue of the
-     semantics of [split_int64_for_32bit_target] in cmmgen.ml, and the
-     required semantics of [loc_external_arguments] (see proc.mli). *)
-  let args = Array.concat args in
-  let arg_hard_regs = Array.concat (Array.to_list arg_hard_regs) in
-  self#insert_move_args env args arg_hard_regs stack_ofs;
-  arg_hard_regs, stack_ofs
+  let ty_args =
+    if ty_args = [] then List.map (fun _ -> XInt) args else ty_args in
+  let locs, stack_ofs = Proc.loc_external_arguments ty_args in
+  let ty_args = Array.of_list ty_args in
+  if stack_ofs <> 0 then
+    self#insert env (Iop(Istackoffset stack_ofs)) [||] [||];
+  List.iteri
+    (fun i arg ->
+      self#insert_move_extcall_arg env ty_args.(i) arg locs.(i))
+    args;
+  Array.concat (Array.to_list locs), stack_ofs
+
+method insert_move_extcall_arg env _ty_arg src dst =
+  (* The default implementation is one or two ordinary moves.
+     (Two in the case of an int64 argument on a 32-bit platform.)
+     It can be overriden to use special move instructions,
+     for example a "32-bit move" instruction for int32 arguments. *)
+  self#insert_moves env src dst
 
 method emit_stores env data regs_addr =
   let a =
@@ -1047,7 +1054,7 @@ method private emit_return (env:environment) exp =
   match self#emit_expr env exp with
     None -> ()
   | Some r ->
-      let loc = Proc.loc_results r in
+      let loc = Proc.loc_results (Reg.typv r) in
       self#insert_moves env r loc;
       self#insert env Ireturn loc [||]
 
@@ -1074,7 +1081,7 @@ method emit_tail (env:environment) exp =
             Icall_ind { label_after; } ->
               let r1 = self#emit_tuple env new_args in
               let rarg = Array.sub r1 1 (Array.length r1 - 1) in
-              let (loc_arg, stack_ofs) = Proc.loc_arguments rarg in
+              let (loc_arg, stack_ofs) = Proc.loc_arguments (Reg.typv rarg) in
               if stack_ofs = 0 then begin
                 let call = Iop (Itailcall_ind { label_after; }) in
                 let spacetime_reg =
@@ -1086,7 +1093,7 @@ method emit_tail (env:environment) exp =
                             (Array.append [|r1.(0)|] loc_arg) [||];
               end else begin
                 let rd = self#regs_for ty in
-                let loc_res = Proc.loc_results rd in
+                let loc_res = Proc.loc_results (Reg.typv rd) in
                 let spacetime_reg =
                   self#about_to_emit_call env (Iop new_op) [| r1.(0) |] dbg
                 in
@@ -1099,7 +1106,7 @@ method emit_tail (env:environment) exp =
               end
           | Icall_imm { func; label_after; } ->
               let r1 = self#emit_tuple env new_args in
-              let (loc_arg, stack_ofs) = Proc.loc_arguments r1 in
+              let (loc_arg, stack_ofs) = Proc.loc_arguments (Reg.typv r1) in
               if stack_ofs = 0 then begin
                 let call = Iop (Itailcall_imm { func; label_after; }) in
                 let spacetime_reg =
@@ -1110,7 +1117,7 @@ method emit_tail (env:environment) exp =
                 self#insert_debug env call dbg loc_arg [||];
               end else if func = !current_function_name then begin
                 let call = Iop (Itailcall_imm { func; label_after; }) in
-                let loc_arg' = Proc.loc_parameters r1 in
+                let loc_arg' = Proc.loc_parameters (Reg.typv r1) in
                 let spacetime_reg =
                   self#about_to_emit_call env call [| |] dbg
                 in
@@ -1119,7 +1126,7 @@ method emit_tail (env:environment) exp =
                 self#insert_debug env call dbg loc_arg' [||];
               end else begin
                 let rd = self#regs_for ty in
-                let loc_res = Proc.loc_results rd in
+                let loc_res = Proc.loc_results (Reg.typv rd) in
                 let spacetime_reg =
                   self#about_to_emit_call env (Iop new_op) [| |] dbg
                 in
@@ -1194,7 +1201,7 @@ method emit_tail (env:environment) exp =
       begin match opt_r1 with
         None -> ()
       | Some r1 ->
-          let loc = Proc.loc_results r1 in
+          let loc = Proc.loc_results (Reg.typv r1) in
           self#insert_moves env r1 loc;
           self#insert env Ireturn loc [||]
       end
@@ -1229,7 +1236,7 @@ method emit_fundecl f =
       (fun (id, ty) -> let r = self#regs_for ty in name_regs id r; r)
       f.Cmm.fun_args in
   let rarg = Array.concat rargs in
-  let loc_arg = Proc.loc_parameters rarg in
+  let loc_arg = Proc.loc_parameters (Reg.typv rarg) in
   (* To make it easier to add the Spacetime instrumentation code, we
      first emit the body and extract the resulting instruction sequence;
      then we emit the prologue followed by any Spacetime instrumentation.  The
@@ -1273,9 +1280,8 @@ end
 *)
 
 let is_tail_call nargs =
-  assert (Reg.dummy.typ = Int);
-  let args = Array.make (nargs + 1) Reg.dummy in
-  let (_loc_arg, stack_ofs) = Proc.loc_arguments args in
+  let ty = Array.make (nargs + 1) Int in
+  let (_loc_arg, stack_ofs) = Proc.loc_arguments ty in
   stack_ofs = 0
 
 let _ =
diff --git a/asmcomp/selectgen.mli b/asmcomp/selectgen.mli
index 3e5b294ed1..bb21b5c10a 100644
--- a/asmcomp/selectgen.mli
+++ b/asmcomp/selectgen.mli
@@ -97,8 +97,13 @@ class virtual selector_generic : object
       -> Reg.t array -> Reg.t array
     (* Can be overridden to deal with 2-address instructions
        or instructions with hardwired input/output registers *)
+  method insert_move_extcall_arg :
+    environment -> Cmm.exttype -> Reg.t array -> Reg.t array -> unit
+    (* Can be overriden to deal with unusual unboxed calling conventions,
+       e.g. on a 64-bit platform, passing unboxed 32-bit arguments
+       in 32-bit stack slots. *)
   method emit_extcall_args :
-    environment -> Cmm.expression list -> Reg.t array * int
+    environment -> Cmm.exttype list -> Cmm.expression list -> Reg.t array * int
     (* Can be overridden to deal with stack-based calling conventions *)
   method emit_stores :
     environment -> Cmm.expression list -> Reg.t array -> unit
diff --git a/asmcomp/spacetime_profiling.ml b/asmcomp/spacetime_profiling.ml
index 21a9cb4955..9b6e649058 100644
--- a/asmcomp/spacetime_profiling.ml
+++ b/asmcomp/spacetime_profiling.ml
@@ -115,7 +115,7 @@ let code_for_function_prologue ~function_name ~fun_dbg:dbg ~node_hole =
           Clet (VP.create is_new_node,
             Clet (VP.create pc, cconst_symbol function_name,
               Cop (Cextcall ("caml_spacetime_allocate_node",
-                  [| Int |], false, None),
+                  typ_int, [], false, None),
                 [cconst_int (1 (* header *) + !index_within_node);
                 Cvar pc;
                 Cvar node_hole;
@@ -155,7 +155,7 @@ let code_for_blockheader ~value's_header ~node ~dbg =
        the latter table to be used for resolving a program counter at such
        a point to a location.
     *)
-    Cop (Cextcall ("caml_spacetime_generate_profinfo", [| Int |],
+    Cop (Cextcall ("caml_spacetime_generate_profinfo", typ_int, [],
         false, Some label),
       [Cvar address_of_profinfo;
        cconst_int (index_within_node + 1)],
@@ -276,7 +276,7 @@ let code_for_call ~node ~callee ~is_tail ~label dbg =
         else cconst_int 1  (* [Val_unit] *)
       in
       Cop (Cextcall ("caml_spacetime_indirect_node_hole_ptr",
-          [| Int |], false, None),
+           typ_int, [], false, None),
         [callee; Cvar place_within_node; caller_node],
         dbg))
 
@@ -340,7 +340,7 @@ class virtual instruction_selection = object (self)
         assert (Array.length arg = 1);
         self#instrument_indirect_call ~env ~callee:arg.(0)
           ~is_tail:true ~label_after dbg
-      | M.Iop (M.Iextcall { func; alloc = true; label_after; stack_ofs = _; }) ->
+      | M.Iop (M.Iextcall { func; alloc = true; label_after; _ }) ->
         (* N.B. No need to instrument "noalloc" external calls. *)
         assert (Array.length arg = 0);
         self#instrument_direct_call ~env ~func ~is_tail:false ~label_after dbg