summaryrefslogtreecommitdiff
path: root/asmcomp
diff options
context:
space:
mode:
authorXavier Leroy <xavier.leroy@inria.fr>2010-05-24 15:26:23 +0000
committerXavier Leroy <xavier.leroy@inria.fr>2010-05-24 15:26:23 +0000
commit9d27a7fc8ec929763a78a46845f3deef5ee8aebe (patch)
tree15b5b5972947afe9925e772a9464d8cb5550f955 /asmcomp
parentcd648ee2dd620d950376112033d8113e7a227e3a (diff)
downloadocaml-9d27a7fc8ec929763a78a46845f3deef5ee8aebe.tar.gz
Slightly better code generated for floating-point comparisons.
Windows-MSVC port updated as well but not tested yet. The testsuite contains appropriate tests (in tests/basic/equality.ml). git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@10460 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
Diffstat (limited to 'asmcomp')
-rw-r--r--asmcomp/amd64/emit.mlp74
-rw-r--r--asmcomp/amd64/emit_nt.mlp74
-rw-r--r--asmcomp/amd64/reload.ml12
3 files changed, 93 insertions, 67 deletions
diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp
index b1f886da9c..d9dd162ebc 100644
--- a/asmcomp/amd64/emit.mlp
+++ b/asmcomp/amd64/emit.mlp
@@ -270,40 +270,50 @@ let output_test_zero arg =
(* Output a floating-point compare and branch *)
let emit_float_test cmp neg arg lbl =
- begin match cmp with
- | Ceq | Cne -> ` ucomisd `
- | _ -> ` comisd `
- end;
- `{emit_reg arg.(1)}, {emit_reg arg.(0)}\n`;
- let (branch_opcode, need_jp) =
- match (cmp, neg) with
- (Ceq, false) -> ("je", true)
- | (Ceq, true) -> ("jne", true)
- | (Cne, false) -> ("jne", true)
- | (Cne, true) -> ("je", true)
- | (Clt, false) -> ("jb", true)
- | (Clt, true) -> ("jae", true)
- | (Cle, false) -> ("jbe", true)
- | (Cle, true) -> ("ja", true)
- | (Cgt, false) -> ("ja", false)
- | (Cgt, true) -> ("jbe", false)
- | (Cge, false) -> ("jae", true)
- | (Cge, true) -> ("jb", false) in
- let branch_if_not_comparable =
- if cmp = Cne then not neg else neg in
- if need_jp then
- if branch_if_not_comparable then begin
- ` jp {emit_label lbl}\n`;
- ` {emit_string branch_opcode} {emit_label lbl}\n`
- end else begin
+ (* Effect of comisd on flags and conditional branches:
+ ZF PF CF cond. branches taken
+ unordered 1 1 1 je, jb, jbe, jp
+ > 0 0 0 jne, jae, ja
+ < 0 0 1 jne, jbe, jb
+ = 1 0 0 je, jae, jbe.
+ If FP traps are on (they are off by default),
+ comisd traps on QNaN and SNaN but ucomisd traps on SNaN only.
+ *)
+ match (cmp, neg) with
+ | (Ceq, false) | (Cne, true) ->
let next = new_label() in
- ` jp {emit_label next}\n`;
- ` {emit_string branch_opcode} {emit_label lbl}\n`;
+ ` ucomisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`;
+ ` jp {emit_label next}\n`; (* skip if unordered *)
+ ` je {emit_label lbl}\n`; (* branch taken if x=y *)
`{emit_label next}:\n`
- end
- else begin
- ` {emit_string branch_opcode} {emit_label lbl}\n`
- end
+ | (Cne, false) | (Ceq, true) ->
+ ` ucomisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`;
+ ` jp {emit_label lbl}\n`; (* branch taken if unordered *)
+ ` jne {emit_label lbl}\n` (* branch taken if x<y or x>y *)
+ | (Clt, _) ->
+ ` comisd {emit_reg arg.(0)}, {emit_reg arg.(1)}\n`; (* swap compare *)
+ if not neg then
+ ` ja {emit_label lbl}\n` (* branch taken if y>x i.e. x<y *)
+ else
+ ` jbe {emit_label lbl}\n` (* taken if unordered or y<=x i.e. !(x<y) *)
+ | (Cle, _) ->
+ ` comisd {emit_reg arg.(0)}, {emit_reg arg.(1)}\n`; (* swap compare *)
+ if not neg then
+ ` jae {emit_label lbl}\n` (* branch taken if y>=x i.e. x<=y *)
+ else
+ ` jb {emit_label lbl}\n` (* taken if unordered or y<x i.e. !(x<=y) *)
+ | (Cgt, _) ->
+ ` comisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`;
+ if not neg then
+ ` ja {emit_label lbl}\n` (* branch taken if x>y *)
+ else
+ ` jbe {emit_label lbl}\n` (* taken if unordered or x<=y i.e. !(x>y) *)
+ | (Cge, _) ->
+ ` comisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`; (* swap compare *)
+ if not neg then
+ ` jae {emit_label lbl}\n` (* branch taken if x>=y *)
+ else
+ ` jb {emit_label lbl}\n` (* taken if unordered or x<y i.e. !(x>=y) *)
(* Deallocate the stack frame before a return or tail call *)
diff --git a/asmcomp/amd64/emit_nt.mlp b/asmcomp/amd64/emit_nt.mlp
index 23c5b34ec3..3374f4d36b 100644
--- a/asmcomp/amd64/emit_nt.mlp
+++ b/asmcomp/amd64/emit_nt.mlp
@@ -264,40 +264,50 @@ let output_test_zero arg =
(* Output a floating-point compare and branch *)
let emit_float_test cmp neg arg lbl =
- begin match cmp with
- | Ceq | Cne -> ` ucomisd `
- | _ -> ` comisd `
- end;
- `{emit_reg arg.(0)}, {emit_reg arg.(1)}\n`;
- let (branch_opcode, need_jp) =
- match (cmp, neg) with
- (Ceq, false) -> ("je", true)
- | (Ceq, true) -> ("jne", true)
- | (Cne, false) -> ("jne", true)
- | (Cne, true) -> ("je", true)
- | (Clt, false) -> ("jb", true)
- | (Clt, true) -> ("jae", true)
- | (Cle, false) -> ("jbe", true)
- | (Cle, true) -> ("ja", true)
- | (Cgt, false) -> ("ja", false)
- | (Cgt, true) -> ("jbe", false)
- | (Cge, false) -> ("jae", true)
- | (Cge, true) -> ("jb", false) in
- let branch_if_not_comparable =
- if cmp = Cne then not neg else neg in
- if need_jp then
- if branch_if_not_comparable then begin
- ` jp {emit_label lbl}\n`;
- ` {emit_string branch_opcode} {emit_label lbl}\n`
- end else begin
+ (* Effect of comisd on flags and conditional branches:
+ ZF PF CF cond. branches taken
+ unordered 1 1 1 je, jb, jbe, jp
+ > 0 0 0 jne, jae, ja
+ < 0 0 1 jne, jbe, jb
+ = 1 0 0 je, jae, jbe.
+ If FP traps are on (they are off by default),
+ comisd traps on QNaN and SNaN but ucomisd traps on SNaN only.
+ *)
+ match (cmp, neg) with
+ | (Ceq, false) | (Cne, true) ->
let next = new_label() in
- ` jp {emit_label next}\n`;
- ` {emit_string branch_opcode} {emit_label lbl}\n`;
+ ` ucomisd {emit_reg arg.(0)}, {emit_reg arg.(1)}\n`;
+ ` jp {emit_label next}\n`; (* skip if unordered *)
+ ` je {emit_label lbl}\n`; (* branch taken if x=y *)
`{emit_label next}:\n`
- end
- else begin
- ` {emit_string branch_opcode} {emit_label lbl}\n`
- end
+ | (Cne, false) | (Ceq, true) ->
+ ` ucomisd {emit_reg arg.(0)}, {emit_reg arg.(1)}\n`;
+ ` jp {emit_label lbl}\n`; (* branch taken if unordered *)
+ ` jne {emit_label lbl}\n` (* branch taken if x<y or x>y *)
+ | (Clt, _) ->
+ ` comisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`; (* swap compare *)
+ if not neg then
+ ` ja {emit_label lbl}\n` (* branch taken if y>x i.e. x<y *)
+ else
+ ` jbe {emit_label lbl}\n` (* taken if unordered or y<=x i.e. !(x<y) *)
+ | (Cle, _) ->
+ ` comisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`; (* swap compare *)
+ if not neg then
+ ` jae {emit_label lbl}\n` (* branch taken if y>=x i.e. x<=y *)
+ else
+ ` jb {emit_label lbl}\n` (* taken if unordered or y<x i.e. !(x<=y) *)
+ | (Cgt, _) ->
+ ` comisd {emit_reg arg.(0)}, {emit_reg arg.(1)}\n`;
+ if not neg then
+ ` ja {emit_label lbl}\n` (* branch taken if x>y *)
+ else
+ ` jbe {emit_label lbl}\n` (* taken if unordered or x<=y i.e. !(x>y) *)
+ | (Cge, _) ->
+ ` comisd {emit_reg arg.(0)}, {emit_reg arg.(1)}\n`; (* swap compare *)
+ if not neg then
+ ` jae {emit_label lbl}\n` (* branch taken if x>=y *)
+ else
+ ` jb {emit_label lbl}\n` (* taken if unordered or x<y i.e. !(x>=y) *)
(* Deallocate the stack frame before a return or tail call *)
diff --git a/asmcomp/amd64/reload.ml b/asmcomp/amd64/reload.ml
index 50b962f891..66772de97a 100644
--- a/asmcomp/amd64/reload.ml
+++ b/asmcomp/amd64/reload.ml
@@ -49,7 +49,7 @@ open Mach
Conditional branches:
Iinttest S R
or R S
- Ifloattest R S
+ Ifloattest R S (or S R if swapped test)
other tests S
*)
@@ -64,7 +64,7 @@ inherit Reloadgen.reload_generic as super
method! reload_operation op arg res =
match op with
- Iintop(Iadd|Isub|Iand|Ior|Ixor|Icomp _|Icheckbound) ->
+ | Iintop(Iadd|Isub|Iand|Ior|Ixor|Icomp _|Icheckbound) ->
(* One of the two arguments can reside in the stack, but not both *)
if stackp arg.(0) && stackp arg.(1)
then ([|arg.(0); self#makereg arg.(1)|], res)
@@ -106,7 +106,13 @@ method! reload_test tst arg =
if stackp arg.(0) && stackp arg.(1)
then [| self#makereg arg.(0); arg.(1) |]
else arg
- | Ifloattest(_, _) ->
+ | Ifloattest((Clt|Cle), _) ->
+ (* Cf. emit.mlp: we swap arguments in this case *)
+ (* First argument can be on stack, second must be in register *)
+ if stackp arg.(1)
+ then [| arg.(0); self#makereg arg.(1) |]
+ else arg
+ | Ifloattest((Ceq|Cne|Cgt|Cge), _) ->
(* Second argument can be on stack, first must be in register *)
if stackp arg.(0)
then [| self#makereg arg.(0); arg.(1) |]