summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenedikt Meurer <benedikt.meurer@googlemail.com>2013-11-02 15:19:20 +0000
committerBenedikt Meurer <benedikt.meurer@googlemail.com>2013-11-02 15:19:20 +0000
commitdb057d0bebd6c4c3510bd3158558b918f568a3dc (patch)
tree53ac735e99b79b00eb52040e5f87033cd3dacfda
parent954f8e5605c8f65a4858aee11ee258c3cc39484f (diff)
downloadocaml-db057d0bebd6c4c3510bd3158558b918f568a3dc.tar.gz
[arm] Optimize integer division and modulus by constant.
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@14259 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
-rw-r--r--asmcomp/arm/emit.mlp90
-rw-r--r--asmcomp/arm/proc.ml2
-rw-r--r--asmcomp/arm/selection.ml15
3 files changed, 81 insertions, 26 deletions
diff --git a/asmcomp/arm/emit.mlp b/asmcomp/arm/emit.mlp
index f5802270ec..3a5fa7606e 100644
--- a/asmcomp/arm/emit.mlp
+++ b/asmcomp/arm/emit.mlp
@@ -597,33 +597,79 @@ let emit_instr i =
| Lop(Iintop op) ->
let instr = name_for_int_operation op in
` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; 1
- | Lop(Iintop_imm(Idiv, n)) -> (* n is a power of 2 *)
+ | Lop(Iintop_imm(Idiv, n)) ->
let l = Misc.log2 n in
- let r = i.res.(0) in
- ` movs {emit_reg r}, {emit_reg i.arg.(0)}\n`;
- if n <= 256 then begin
- ` it lt\n`;
- ` addlt {emit_reg r}, {emit_reg r}, #{emit_int (n-1)}\n`
+ if n = 1 lsl l then begin
+ let r = i.res.(0) in
+ ` movs {emit_reg r}, {emit_reg i.arg.(0)}\n`;
+ if n <= 256 then begin
+ ` it lt\n`;
+ ` addlt {emit_reg r}, {emit_reg r}, #{emit_int (n-1)}\n`
+ end else begin
+ ` itt lt\n`;
+ ` addlt {emit_reg r}, {emit_reg r}, #{emit_int n}\n`;
+ ` sublt {emit_reg r}, {emit_reg r}, #1\n`
+ end;
+ (* Use movs to enable 16-bit T1 encoding *)
+ ` movs {emit_reg r}, {emit_reg r}, asr #{emit_int l}\n`; 5
end else begin
- ` itt lt\n`;
- ` addlt {emit_reg r}, {emit_reg r}, #{emit_int n}\n`;
- ` sublt {emit_reg r}, {emit_reg r}, #1\n`
- end;
- (* Use movs to enable 16-bit T1 encoding *)
- ` movs {emit_reg r}, {emit_reg r}, asr #{emit_int l}\n`; 5
- | Lop(Iintop_imm(Imod, n)) -> (* n is a power of 2 *)
+ assert (!arch >= ARMv6);
+ let (m, p) = Selectgen.divimm_parameters (Nativeint.of_int n) in
+ (* Algorithm:
+ t = multiply-high-signed(arg, m)
+ if m < 0, t = t + m
+ t = shift-right-signed(t, p)
+ res = t + sign-bit(arg)
+ *)
+ let a = i.arg.(0) in
+ let r = i.res.(0) in
+ let ninstr = emit_intconst r (Nativeint.to_int32 m) in
+ if m >= 0n then
+ ` smmul {emit_reg r}, {emit_reg r}, {emit_reg a}\n`
+ else
+ ` smmla {emit_reg r}, {emit_reg r}, {emit_reg a}, {emit_reg r}\n`;
+ if p > 0 then
+ ` movs {emit_reg r}, {emit_reg r}, asr #{emit_int p}\n`;
+ ` add {emit_reg r}, {emit_reg r}, {emit_reg a}, lsr #31\n`;
+ ninstr + 3
+ end
+ | Lop(Iintop_imm(Imod, n)) ->
let l = Misc.log2 n in
let a = i.arg.(0) in
let r = i.res.(0) in
- let lbl = new_label() in
- ` cmp {emit_reg a}, #0\n`;
- ` mov {emit_reg r}, {emit_reg a}, lsl #{emit_int (32-l)}\n`;
- ` mov {emit_reg r}, {emit_reg r}, lsr #{emit_int (32-l)}\n`;
- ` bpl {emit_label lbl}\n`;
- ` cmp {emit_reg r}, #0\n`;
- ` it ne\n`;
- ` subne {emit_reg r}, {emit_reg r}, #{emit_int n}\n`;
- `{emit_label lbl}:\n`; 7
+ if n = 1 lsl l then begin
+ let lbl = new_label() in
+ ` cmp {emit_reg a}, #0\n`;
+ ` mov {emit_reg r}, {emit_reg a}, lsl #{emit_int (32-l)}\n`;
+ ` mov {emit_reg r}, {emit_reg r}, lsr #{emit_int (32-l)}\n`;
+ ` bpl {emit_label lbl}\n`;
+ ` cmp {emit_reg r}, #0\n`;
+ ` it ne\n`;
+ ` subne {emit_reg r}, {emit_reg r}, #{emit_int n}\n`;
+ `{emit_label lbl}:\n`; 7
+ end else begin
+ assert (!arch >= ARMv6);
+ let (m, p) = Selectgen.divimm_parameters (Nativeint.of_int n) in
+ (* Algorithm:
+ t = multiply-high-signed(arg, m)
+ if m < 0, t = t + m
+ t = shift-right-signed(t, p)
+ t = (t + sign-bit(arg)) * n
+ res = arg - t
+ *)
+ let r12 = phys_reg 8 in
+ let ninstr = emit_intconst r (Nativeint.to_int32 m) in
+ if m >= 0n then
+ ` smmul {emit_reg r}, {emit_reg r}, {emit_reg a}\n`
+ else
+ ` smmla {emit_reg r}, {emit_reg r}, {emit_reg a}, {emit_reg r}\n`;
+ if p > 0 then
+ ` movs {emit_reg r}, {emit_reg r}, asr #{emit_int p}\n`;
+ ` add {emit_reg r}, {emit_reg r}, {emit_reg a}, lsr #31\n`;
+ let ninstr = ninstr + emit_intconst r12 (Int32.of_int n) in
+ ` mls {emit_reg r}, {emit_reg r}, r12, {emit_reg a}\n`;
+ ninstr + 4
+ end
| Lop(Iintop_imm((Ilsl | Ilsr | Iasr as op), n)) ->
let shift = name_for_shift_operation op in
(* Use movs to enable 16-bit T1 encoding *)
diff --git a/asmcomp/arm/proc.ml b/asmcomp/arm/proc.ml
index dbb13173a9..2dd573f054 100644
--- a/asmcomp/arm/proc.ml
+++ b/asmcomp/arm/proc.ml
@@ -201,6 +201,8 @@ let destroyed_at_oper = function
destroyed_at_alloc
| Iop(Iconst_symbol _) when !pic_code ->
[| phys_reg 3; phys_reg 8 |] (* r3 and r12 destroyed *)
+ | Iop(Iintop_imm(Imod, n)) when !arch >= ARMv6 && n = 1 lsl Misc.log2 n ->
+ [| phys_reg 8 |] (* r12 destroyed *)
| Iop(Iintoffloat | Ifloatofint | Iload(Single, _) | Istore(Single, _)) ->
[| phys_reg 107 |] (* d7 (s14-s15) destroyed *)
| _ -> [||]
diff --git a/asmcomp/arm/selection.ml b/asmcomp/arm/selection.ml
index 97f615ec78..023202e5eb 100644
--- a/asmcomp/arm/selection.ml
+++ b/asmcomp/arm/selection.ml
@@ -54,6 +54,13 @@ let pseudoregs_for_operation op arg res =
is also a result of the mul / mla operation. *)
Iintop Imul | Ispecific Imuladd when !arch < ARMv6 ->
(arg, [| res.(0); arg.(0) |])
+ (* For integer division by a constant, which is not a power of 2, on ARMv6
+ and later, the result and argument registers must be different. We deal
+ with this by pretending that the argument value is also a result of the
+ operation. *)
+ | Iintop_imm((Idiv | Imod), n) when !arch >= ARMv6
+ && n <> 1 lsl Misc.log2 n->
+ (arg, [| res.(0); arg.(0) |])
(* Soft-float Iabsf and Inegf: arg.(0) and res.(0) must be the same *)
| Iabsf | Inegf when !fpu = Soft ->
([|res.(0); arg.(1)|], res)
@@ -168,13 +175,13 @@ method! select_operation op args =
| (Cmuli, args) ->
(Iintop Imul, args)
(* Turn integer division/modulus into runtime ABI calls *)
- | (Cdivi, [arg; Cconst_int n])
- when n = 1 lsl Misc.log2 n ->
+ | (Cdivi, [arg; Cconst_int n]) when n > 0 && (!arch >= ARMv6
+ || n = 1 lsl Misc.log2 n) ->
(Iintop_imm(Idiv, n), [arg])
| (Cdivi, args) ->
(Iextcall("__aeabi_idiv", false), args)
- | (Cmodi, [arg; Cconst_int n])
- when n > 1 && n = 1 lsl Misc.log2 n ->
+ | (Cmodi, [arg; Cconst_int n]) when n > 0 && (!arch >= ARMv6
+ || n = 1 lsl Misc.log2 n) ->
(Iintop_imm(Imod, n), [arg])
| (Cmodi, args) ->
(* See above for fix up of return register *)