diff options
author | Xavier Leroy <xavier.leroy@inria.fr> | 2010-05-24 15:26:23 +0000 |
---|---|---|
committer | Xavier Leroy <xavier.leroy@inria.fr> | 2010-05-24 15:26:23 +0000 |
commit | 9d27a7fc8ec929763a78a46845f3deef5ee8aebe (patch) | |
tree | 15b5b5972947afe9925e772a9464d8cb5550f955 /asmcomp | |
parent | cd648ee2dd620d950376112033d8113e7a227e3a (diff) | |
download | ocaml-9d27a7fc8ec929763a78a46845f3deef5ee8aebe.tar.gz |
Slightly better code generated for floating-point comparisons.
Windows-MSVC port updated as well but not tested yet.
The testsuite contains appropriate tests (in tests/basic/equality.ml).
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@10460 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
Diffstat (limited to 'asmcomp')
-rw-r--r-- | asmcomp/amd64/emit.mlp | 74 | ||||
-rw-r--r-- | asmcomp/amd64/emit_nt.mlp | 74 | ||||
-rw-r--r-- | asmcomp/amd64/reload.ml | 12 |
3 files changed, 93 insertions, 67 deletions
diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp index b1f886da9c..d9dd162ebc 100644 --- a/asmcomp/amd64/emit.mlp +++ b/asmcomp/amd64/emit.mlp @@ -270,40 +270,50 @@ let output_test_zero arg = (* Output a floating-point compare and branch *) let emit_float_test cmp neg arg lbl = - begin match cmp with - | Ceq | Cne -> ` ucomisd ` - | _ -> ` comisd ` - end; - `{emit_reg arg.(1)}, {emit_reg arg.(0)}\n`; - let (branch_opcode, need_jp) = - match (cmp, neg) with - (Ceq, false) -> ("je", true) - | (Ceq, true) -> ("jne", true) - | (Cne, false) -> ("jne", true) - | (Cne, true) -> ("je", true) - | (Clt, false) -> ("jb", true) - | (Clt, true) -> ("jae", true) - | (Cle, false) -> ("jbe", true) - | (Cle, true) -> ("ja", true) - | (Cgt, false) -> ("ja", false) - | (Cgt, true) -> ("jbe", false) - | (Cge, false) -> ("jae", true) - | (Cge, true) -> ("jb", false) in - let branch_if_not_comparable = - if cmp = Cne then not neg else neg in - if need_jp then - if branch_if_not_comparable then begin - ` jp {emit_label lbl}\n`; - ` {emit_string branch_opcode} {emit_label lbl}\n` - end else begin + (* Effect of comisd on flags and conditional branches: + ZF PF CF cond. branches taken + unordered 1 1 1 je, jb, jbe, jp + > 0 0 0 jne, jae, ja + < 0 0 1 jne, jbe, jb + = 1 0 0 je, jae, jbe. + If FP traps are on (they are off by default), + comisd traps on QNaN and SNaN but ucomisd traps on SNaN only. + *) + match (cmp, neg) with + | (Ceq, false) | (Cne, true) -> let next = new_label() in - ` jp {emit_label next}\n`; - ` {emit_string branch_opcode} {emit_label lbl}\n`; + ` ucomisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`; + ` jp {emit_label next}\n`; (* skip if unordered *) + ` je {emit_label lbl}\n`; (* branch taken if x=y *) `{emit_label next}:\n` - end - else begin - ` {emit_string branch_opcode} {emit_label lbl}\n` - end + | (Cne, false) | (Ceq, true) -> + ` ucomisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`; + ` jp {emit_label lbl}\n`; (* branch taken if unordered *) + ` jne {emit_label lbl}\n` (* branch taken if x<y or x>y *) + | (Clt, _) -> + ` comisd {emit_reg arg.(0)}, {emit_reg arg.(1)}\n`; (* swap compare *) + if not neg then + ` ja {emit_label lbl}\n` (* branch taken if y>x i.e. x<y *) + else + ` jbe {emit_label lbl}\n` (* taken if unordered or y<=x i.e. !(x<y) *) + | (Cle, _) -> + ` comisd {emit_reg arg.(0)}, {emit_reg arg.(1)}\n`; (* swap compare *) + if not neg then + ` jae {emit_label lbl}\n` (* branch taken if y>=x i.e. x<=y *) + else + ` jb {emit_label lbl}\n` (* taken if unordered or y<x i.e. !(x<=y) *) + | (Cgt, _) -> + ` comisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`; + if not neg then + ` ja {emit_label lbl}\n` (* branch taken if x>y *) + else + ` jbe {emit_label lbl}\n` (* taken if unordered or x<=y i.e. !(x>y) *) + | (Cge, _) -> + ` comisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`; (* swap compare *) + if not neg then + ` jae {emit_label lbl}\n` (* branch taken if x>=y *) + else + ` jb {emit_label lbl}\n` (* taken if unordered or x<y i.e. !(x>=y) *) (* Deallocate the stack frame before a return or tail call *) diff --git a/asmcomp/amd64/emit_nt.mlp b/asmcomp/amd64/emit_nt.mlp index 23c5b34ec3..3374f4d36b 100644 --- a/asmcomp/amd64/emit_nt.mlp +++ b/asmcomp/amd64/emit_nt.mlp @@ -264,40 +264,50 @@ let output_test_zero arg = (* Output a floating-point compare and branch *) let emit_float_test cmp neg arg lbl = - begin match cmp with - | Ceq | Cne -> ` ucomisd ` - | _ -> ` comisd ` - end; - `{emit_reg arg.(0)}, {emit_reg arg.(1)}\n`; - let (branch_opcode, need_jp) = - match (cmp, neg) with - (Ceq, false) -> ("je", true) - | (Ceq, true) -> ("jne", true) - | (Cne, false) -> ("jne", true) - | (Cne, true) -> ("je", true) - | (Clt, false) -> ("jb", true) - | (Clt, true) -> ("jae", true) - | (Cle, false) -> ("jbe", true) - | (Cle, true) -> ("ja", true) - | (Cgt, false) -> ("ja", false) - | (Cgt, true) -> ("jbe", false) - | (Cge, false) -> ("jae", true) - | (Cge, true) -> ("jb", false) in - let branch_if_not_comparable = - if cmp = Cne then not neg else neg in - if need_jp then - if branch_if_not_comparable then begin - ` jp {emit_label lbl}\n`; - ` {emit_string branch_opcode} {emit_label lbl}\n` - end else begin + (* Effect of comisd on flags and conditional branches: + ZF PF CF cond. branches taken + unordered 1 1 1 je, jb, jbe, jp + > 0 0 0 jne, jae, ja + < 0 0 1 jne, jbe, jb + = 1 0 0 je, jae, jbe. + If FP traps are on (they are off by default), + comisd traps on QNaN and SNaN but ucomisd traps on SNaN only. + *) + match (cmp, neg) with + | (Ceq, false) | (Cne, true) -> let next = new_label() in - ` jp {emit_label next}\n`; - ` {emit_string branch_opcode} {emit_label lbl}\n`; + ` ucomisd {emit_reg arg.(0)}, {emit_reg arg.(1)}\n`; + ` jp {emit_label next}\n`; (* skip if unordered *) + ` je {emit_label lbl}\n`; (* branch taken if x=y *) `{emit_label next}:\n` - end - else begin - ` {emit_string branch_opcode} {emit_label lbl}\n` - end + | (Cne, false) | (Ceq, true) -> + ` ucomisd {emit_reg arg.(0)}, {emit_reg arg.(1)}\n`; + ` jp {emit_label lbl}\n`; (* branch taken if unordered *) + ` jne {emit_label lbl}\n` (* branch taken if x<y or x>y *) + | (Clt, _) -> + ` comisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`; (* swap compare *) + if not neg then + ` ja {emit_label lbl}\n` (* branch taken if y>x i.e. x<y *) + else + ` jbe {emit_label lbl}\n` (* taken if unordered or y<=x i.e. !(x<y) *) + | (Cle, _) -> + ` comisd {emit_reg arg.(1)}, {emit_reg arg.(0)}\n`; (* swap compare *) + if not neg then + ` jae {emit_label lbl}\n` (* branch taken if y>=x i.e. x<=y *) + else + ` jb {emit_label lbl}\n` (* taken if unordered or y<x i.e. !(x<=y) *) + | (Cgt, _) -> + ` comisd {emit_reg arg.(0)}, {emit_reg arg.(1)}\n`; + if not neg then + ` ja {emit_label lbl}\n` (* branch taken if x>y *) + else + ` jbe {emit_label lbl}\n` (* taken if unordered or x<=y i.e. !(x>y) *) + | (Cge, _) -> + ` comisd {emit_reg arg.(0)}, {emit_reg arg.(1)}\n`; (* swap compare *) + if not neg then + ` jae {emit_label lbl}\n` (* branch taken if x>=y *) + else + ` jb {emit_label lbl}\n` (* taken if unordered or x<y i.e. !(x>=y) *) (* Deallocate the stack frame before a return or tail call *) diff --git a/asmcomp/amd64/reload.ml b/asmcomp/amd64/reload.ml index 50b962f891..66772de97a 100644 --- a/asmcomp/amd64/reload.ml +++ b/asmcomp/amd64/reload.ml @@ -49,7 +49,7 @@ open Mach Conditional branches: Iinttest S R or R S - Ifloattest R S + Ifloattest R S (or S R if swapped test) other tests S *) @@ -64,7 +64,7 @@ inherit Reloadgen.reload_generic as super method! reload_operation op arg res = match op with - Iintop(Iadd|Isub|Iand|Ior|Ixor|Icomp _|Icheckbound) -> + | Iintop(Iadd|Isub|Iand|Ior|Ixor|Icomp _|Icheckbound) -> (* One of the two arguments can reside in the stack, but not both *) if stackp arg.(0) && stackp arg.(1) then ([|arg.(0); self#makereg arg.(1)|], res) @@ -106,7 +106,13 @@ method! reload_test tst arg = if stackp arg.(0) && stackp arg.(1) then [| self#makereg arg.(0); arg.(1) |] else arg - | Ifloattest(_, _) -> + | Ifloattest((Clt|Cle), _) -> + (* Cf. emit.mlp: we swap arguments in this case *) + (* First argument can be on stack, second must be in register *) + if stackp arg.(1) + then [| arg.(0); self#makereg arg.(1) |] + else arg + | Ifloattest((Ceq|Cne|Cgt|Cge), _) -> (* Second argument can be on stack, first must be in register *) if stackp arg.(0) then [| self#makereg arg.(0); arg.(1) |] |