summaryrefslogtreecommitdiff
path: root/asmcomp/power/proc.ml
diff options
context:
space:
mode:
authorXavier Leroy <xavierleroy@users.noreply.github.com>2021-09-09 09:33:55 +0200
committerGitHub <noreply@github.com>2021-09-09 09:33:55 +0200
commit0117428c3ea163f42f4549ff4e7f255f29f6b4d4 (patch)
tree0675265efc28097af8e9b1d9afd4bf68dcbf55be /asmcomp/power/proc.ml
parent817796733f199adff7f3f6e92d046e1777392ff1 (diff)
downloadocaml-0117428c3ea163f42f4549ff4e7f255f29f6b4d4.tar.gz
Support more arguments to tail calls by passing them through the domain state (#10595)
In 2004, commit af9b98fcb, the calling conventions for the i386 port of ocamlopt were changed: the first 6 integer arguments go into registers, like before, but the next 16 arguments go into a global array `caml_extra_params`, instead of being passed on stack like before. The reason for this hack is that passing arguments in global memory does not preclude tail call optimization, unlike passing arguments on stack. Parameters passed via `caml_extra_params` are immediately copied on stack or in registers on function entry, before another function call, a GC, or a context switch can take place, so everything is safe in OCaml, and in Multicore OCaml as long as there is only one execution domain. This hack was justified by the paucity of registers provided by the i386 architecture. It was believed that other architectures provide enough registers for parameter passing that most if not all reasonable tail calls can be accommodated. Now it's 2021 and users want tail calls with more arguments than available registers on all the architectures we support. So, biting the bullet and swallowing some pride, this commit extends the 2004 i386 hack to all the architectures supported by OCaml. Once the registers available for passing function arguments are exhausted, the next 64 arguments are passed in a memory area that is part of the domain state. This argument passing is compatible with tail calls, so we get guaranteed tail calls up to 70 arguments (in the worst case). The domain state is used instead of a global array so that (1) this is compatible with Multicore OCaml and concurrent execution of multiple domains, and (2) we benefit from efficient addressing from the domain state register. For i386, we don't have a domain state register, and Multicore OCaml will support only one domain on this architecture, so we keep using a global `caml_extra_params` array; only, its size was increased to support 64 arguments. The tests for tail calls were extended to - Test tail calls to other functions, not just to self - Test up to 32 arguments.
Diffstat (limited to 'asmcomp/power/proc.ml')
-rw-r--r--asmcomp/power/proc.ml33
1 files changed, 21 insertions, 12 deletions
diff --git a/asmcomp/power/proc.ml b/asmcomp/power/proc.ml
index 2563d841b5..1a37578111 100644
--- a/asmcomp/power/proc.ml
+++ b/asmcomp/power/proc.ml
@@ -92,6 +92,8 @@ let stack_slot slot ty =
(* Calling conventions *)
+let size_domainstate_args = 64 * size_int
+
let loc_int last_int make_stack reg_use_stack int ofs =
if !int <= last_int then begin
let l = phys_reg !int in
@@ -136,12 +138,12 @@ let loc_int_pair last_int make_stack int ofs =
[| stack_lower; stack_upper |]
end
-let calling_conventions first_int last_int first_float last_float make_stack
- arg =
+let calling_conventions first_int last_int first_float last_float
+ make_stack first_stack arg =
let loc = Array.make (Array.length arg) Reg.dummy in
let int = ref first_int in
let float = ref first_float in
- let ofs = ref 0 in
+ let ofs = ref first_stack in
for i = 0 to Array.length arg - 1 do
match arg.(i) with
| Val | Int | Addr ->
@@ -149,23 +151,30 @@ let calling_conventions first_int last_int first_float last_float make_stack
| Float ->
loc.(i) <- loc_float last_float make_stack false int float ofs
done;
- (loc, Misc.align !ofs 16) (* keep stack 16-aligned *)
-
-let incoming ofs = Incoming ofs
-let outgoing ofs = Outgoing ofs
+ (loc, Misc.align (max 0 !ofs) 16) (* keep stack 16-aligned *)
+
+let incoming ofs =
+ if ofs >= 0
+ then Incoming ofs
+ else Domainstate (ofs + size_domainstate_args)
+let outgoing ofs =
+ if ofs >= 0
+ then Outgoing ofs
+ else Domainstate (ofs + size_domainstate_args)
let not_supported _ofs = fatal_error "Proc.loc_results: cannot call"
-let max_arguments_for_tailcalls = 16
+let max_arguments_for_tailcalls = 16 (* in regs *) + 64 (* in domain state *)
let loc_arguments arg =
- calling_conventions 0 15 100 112 outgoing arg
+ calling_conventions 0 15 100 112 outgoing (- size_domainstate_args) arg
let loc_parameters arg =
- let (loc, _ofs) = calling_conventions 0 15 100 112 incoming arg
+ let (loc, _ofs) =
+ calling_conventions 0 15 100 112 incoming (- size_domainstate_args) arg
in loc
let loc_results res =
- let (loc, _ofs) = calling_conventions 0 15 100 112 not_supported res
+ let (loc, _ofs) = calling_conventions 0 15 100 112 not_supported 0 res
in loc
(* C calling conventions for ELF32:
@@ -244,7 +253,7 @@ let loc_external_arguments ty_args =
(* Results are in GPR 3 and FPR 1 *)
let loc_external_results res =
- let (loc, _ofs) = calling_conventions 0 1 100 100 not_supported res
+ let (loc, _ofs) = calling_conventions 0 1 100 100 not_supported 0 res
in loc
(* Exceptions are in GPR 3 *)