abstract_asm: first step in checking args of mnemonicsabstract_x86_asm

git-svn-id: http://caml.inria.fr/svn/ocaml/branches/abstract_x86_asm@14606 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
author: Fabrice Le Fessant <Fabrice.Le_fessant@inria.fr> 2014-04-15 15:56:01 +0000
committer: Fabrice Le Fessant <Fabrice.Le_fessant@inria.fr> 2014-04-15 15:56:01 +0000
commit: 7edccb792788fe83df11e0b8e523e1984460220c (patch)
tree: 980d7af53eae29ca2c2dc80679fb6554787c3e4f
parent: 34046e68599d08a069954bb0443d5942b15a0db1 (diff)
download: ocaml-abstract_x86_asm.tar.gz
9 files changed, 1236 insertions, 1128 deletions
diff --git a/Makefile b/Makefile
index a703b51c3c..5f3e5d5490 100644
--- a/Makefile
+++ b/Makefile
@@ -81,7 +81,11 @@ BYTECOMP=bytecomp/meta.cmo bytecomp/instruct.cmo bytecomp/bytegen.cmo \
 
 ASMCOMP=asmcomp/arch.cmo asmcomp/debuginfo.cmo \
   asmcomp/cmm.cmo asmcomp/printcmm.cmo \
-  asmcomp/reg.cmo asmcomp/mach.cmo asmcomp/proc.cmo \
+  asmcomp/reg.cmo asmcomp/mach.cmo \
+  asmcomp/intel_proc.cmo \
+  asmcomp/intel_gas.cmo \
+  asmcomp/intel_masm.cmo \
+  asmcomp/proc.cmo \
   asmcomp/clambda.cmo asmcomp/printclambda.cmo asmcomp/compilenv.cmo \
   asmcomp/closure.cmo asmcomp/cmmgen.cmo \
   asmcomp/printmach.cmo asmcomp/selectgen.cmo asmcomp/selection.cmo \
@@ -91,9 +95,6 @@ ASMCOMP=asmcomp/arch.cmo asmcomp/debuginfo.cmo \
   asmcomp/reloadgen.cmo asmcomp/reload.cmo \
   asmcomp/printlinear.cmo asmcomp/linearize.cmo \
   asmcomp/schedgen.cmo asmcomp/scheduling.cmo \
-  asmcomp/intel_proc.cmo \
-  asmcomp/intel_gas.cmo \
-  asmcomp/intel_masm.cmo \
   asmcomp/emitaux.cmo \
   asmcomp/emit.cmo \
   asmcomp/asmgen.cmo \
diff --git a/Makefile.nt b/Makefile.nt
index 63988827ee..8d9df41014 100644
--- a/Makefile.nt
+++ b/Makefile.nt
@@ -77,6 +77,9 @@ BYTECOMP=bytecomp/meta.cmo bytecomp/instruct.cmo bytecomp/bytegen.cmo \
 
 ASMCOMP=asmcomp/arch.cmo asmcomp/debuginfo.cmo \
   asmcomp/cmm.cmo asmcomp/printcmm.cmo \
+  asmcomp/intel_proc.cmo \
+  asmcomp/intel_gas.cmo \
+  asmcomp/intel_masm.cmo \
   asmcomp/reg.cmo asmcomp/mach.cmo asmcomp/proc.cmo \
   asmcomp/clambda.cmo asmcomp/printclambda.cmo asmcomp/compilenv.cmo \
   asmcomp/closure.cmo asmcomp/cmmgen.cmo \
@@ -87,10 +90,6 @@ ASMCOMP=asmcomp/arch.cmo asmcomp/debuginfo.cmo \
   asmcomp/reloadgen.cmo asmcomp/reload.cmo \
   asmcomp/printlinear.cmo asmcomp/linearize.cmo \
   asmcomp/schedgen.cmo asmcomp/scheduling.cmo \
-  asmcomp/coff.cmo \
-  asmcomp/intel_proc.cmo \
-  asmcomp/intel_gas.cmo \
-  asmcomp/intel_masm.cmo \
   asmcomp/emitaux.cmo asmcomp/emit.cmo asmcomp/asmgen.cmo \
   asmcomp/asmlink.cmo asmcomp/asmlibrarian.cmo asmcomp/asmpackager.cmo \
   driver/opterrors.cmo driver/optcompile.cmo
diff --git a/asmcomp/amd64/emit.mlp b/asmcomp/amd64/emit.mlp
index 4a30e99794..9fc3e09ef8 100644
--- a/asmcomp/amd64/emit.mlp
+++ b/asmcomp/amd64/emit.mlp
@@ -11,6 +11,8 @@
 (*                                                                     *)
 (***********************************************************************)
 
+(* Emission of Intel x86_64 assembly code *)
+
 open Cmm
 open Arch
 open Proc
@@ -20,6 +22,14 @@ open Linearize
 open Emitaux
 open Intel_proc
 
+let _r r = Reg r
+let _label s = emit (NewLabel (s, QWORD))
+let _mem offset reg = Mem(NO, reg, 1, NoBase, offset)
+(* On win32/win64, some memory references need to specify the size of
+   the operand in 'pref' *)
+let _mem_ptr pref offset reg = Mem(pref, reg, 1, NoBase, offset)
+
+(*
 module Emitter = MakeEmitter(struct
 
     type reg = register64
@@ -30,10 +40,12 @@ module Emitter = MakeEmitter(struct
       | _ -> Intel_gas.bprint_instr
     let string_of_register = string_of_register64
     let word_size = QWORD
+    let passes = Intel_proc.register64_passes
+
   end)
 
 open Emitter
-
+*)
 
 (* Override proc.ml *)
 
@@ -44,7 +56,7 @@ let int_reg_name =
 let float_reg_name = Array.init 16 (fun i -> XMM i)
 
 let register_name r =
-  if r < 100 then Reg (int_reg_name.(r))
+  if r < 100 then _r (int_reg_name.(r))
   else Regf (float_reg_name.(r - 100))
 
 (* CFI directives *)
@@ -62,22 +74,18 @@ let cfi_endproc () =
 
 let cfi_adjust_cfa_offset n =
   if is_cfi_enabled () then
-  begin
-    _specific (Printf.sprintf ".cfi_adjust_cfa_offset\t%d" n)
-  end
+    Printf.kprintf _specific ".cfi_adjust_cfa_offset\t%d" n
 
 let emit_debug_info dbg =
   if system <> S_win64 then
   emit_debug_info_gen dbg
     (fun file_num file_name ->
-      _specific
-          (Printf.sprintf ".file\t%d\t\"%s\""
-             file_num (Intel_proc.string_of_string_literal file_name))
+      Printf.kprintf _specific ".file\t%d\t\"%s\""
+             file_num (Intel_proc.string_of_string_literal file_name)
     )
     (fun file_num line ->
-      _specific
-          (Printf.sprintf ".loc\t%d\t%d"
-             file_num line)
+          Printf.kprintf _specific ".loc\t%d\t%d"
+             file_num line
     )
 
 let fp = Config.with_frame_pointers
@@ -131,29 +139,29 @@ let add_used_symbol s =
   symbols_used := StringSet.add s !symbols_used
 
 let emit_call s =
-  _call [|
+  _call (
     if !Clflags.dlcode && (match system with
            S_macosx | S_mingw64 | S_cygwin | S_win64 -> false
          | _ -> true)
     then LabelPLT (emit_symbol s)
     else _l (emit_symbol s)
-  |]
+  )
 
 let emit_jump s =
-  _jmp [|
+  _jmp (
     if !Clflags.dlcode && (match system with
            S_macosx | S_mingw64 |S_cygwin | S_win64 -> false
          | _ -> true)
     then LabelPLT (emit_symbol s)
     else _l (emit_symbol s)
-  |]
+  )
 
 let load_symbol_addr s arg =
   if system = S_win64 then
     if !pic_code then
-          _leaq [| _l (emit_symbol s); arg |]
+          _leaq ( _l (emit_symbol s) , arg )
         else
-          _movq [| _offset (emit_symbol s); arg |]
+          _movq (_offset (emit_symbol s), arg )
   else
     let addr =
       if !Clflags.dlcode && (match system with
@@ -164,7 +172,7 @@ let load_symbol_addr s arg =
       then at_rip NO (emit_symbol s)
       else _l (emit_symbol s)
     in
-    _movq [| addr; arg |]
+    _movq (addr,  arg)
 
 (* Output a label *)
 
@@ -243,7 +251,7 @@ let emit_addressing addr pref r n =
   | Ibased(s, d) ->
     add_used_symbol s;
     if system = S_win64 then
-      LabelRel(NO, emit_symbol s, d )
+      LabelRel(pref, emit_symbol s, d )
     else
       Mem (pref, RIP, 1, BaseSymbol s, d)
   | Iindexed d ->
@@ -294,7 +302,7 @@ let emit_call_gc gc =
   _llabel (emit_label gc.gc_lbl);
   emit_call "caml_call_gc";
   _llabel (emit_label gc.gc_frame);
-  _jmp [| _l (emit_label gc.gc_return_lbl) |]
+  _jmp ( _l (emit_label gc.gc_return_lbl)  )
 
 (* Record calls to caml_ml_array_bound_error.
    In -g mode, we maintain one call to caml_ml_array_bound_error
@@ -336,7 +344,7 @@ let emit_call_bound_errors () =
 let instr_for_intop = function
     Iadd -> _addq
   | Isub -> _subq
-  | Imul -> _imulq
+  | Imul -> (fun (arg1, arg2) -> _imulq (arg1, Some arg2))
   | Iand -> _andq
   | Ior -> _orq
   | Ixor -> _xorq
@@ -370,8 +378,8 @@ let name_for_cond_branch = function
 
 let output_test_zero arg =
   match arg.loc with
-    Reg.Reg r -> _testq  [| emit_reg arg; emit_reg arg;|]
-  | _  -> _cmpq  [| _int 0; emit_reg arg;|]
+    Reg.Reg r -> _testq  ( emit_reg arg, emit_reg arg )
+  | _  -> _cmpq  ( _int 0, emit_reg arg )
 
 (* Output a floating-point compare and branch *)
 
@@ -388,48 +396,48 @@ let emit_float_test cmp neg arg lbl =
   match (cmp, neg) with
   | (Ceq, false) | (Cne, true) ->
     let next = new_label() in
-    _ucomisd [| emit_reg arg.(1); emit_reg arg.(0) |];
-    _jp [| _l (emit_label next) |]; (* skip if unordered *)
-    _je [| _l (emit_label lbl) |]; (* branch taken if x=y *)
+    _ucomisd ( emit_reg arg.(1) , emit_reg arg.(0) );
+    _jp ( _l (emit_label next)  ); (* skip if unordered *)
+    _je ( _l (emit_label lbl)  ); (* branch taken if x=y *)
     _llabel (emit_label next)
   | (Cne, false) | (Ceq, true) ->
-    _ucomisd [| emit_reg arg.(1); emit_reg arg.(0); |];
-    _jp [| _l (emit_label lbl) |];(* branch taken if unordered *)
-    _jne [| _l (emit_label lbl) |] (* branch taken if x<y or x>y *)
+    _ucomisd ( emit_reg arg.(1), emit_reg arg.(0);  );
+    _jp ( _l (emit_label lbl)  );(* branch taken if unordered *)
+    _jne ( _l (emit_label lbl)  ) (* branch taken if x<y or x>y *)
   | (Clt, _) ->
-    _comisd [| emit_reg arg.(0); emit_reg arg.(1); |];
+    _comisd ( emit_reg arg.(0), emit_reg arg.(1);  );
     if not neg then
-      _ja [| _l (emit_label lbl) |]  (* branch taken if y>x i.e. x<y *)
+      _ja ( _l (emit_label lbl)  )  (* branch taken if y>x i.e. x<y *)
     else
-      _jbe [| _l (emit_label lbl) |] (* taken if unordered or y<=x i.e. !(x<y) *)
+      _jbe ( _l (emit_label lbl)  ) (* taken if unordered or y<=x i.e. !(x<y) *)
   | (Cle, _) ->
-    _comisd [| emit_reg arg.(0); emit_reg arg.(1) |];  (* swap compare *)
+    _comisd ( emit_reg arg.(0), emit_reg arg.(1)  );  (* swap compare *)
     if not neg then
-      _jae [| _l (emit_label lbl) |]  (* branch taken if y>=x i.e. x<=y *)
+      _jae ( _l (emit_label lbl)  )  (* branch taken if y>=x i.e. x<=y *)
     else
-      _jb [| _l (emit_label lbl) |] (* taken if unordered or y<x i.e. !(x<=y) *)
+      _jb ( _l (emit_label lbl)  ) (* taken if unordered or y<x i.e. !(x<=y) *)
   | (Cgt, _) ->
-    _comisd [| emit_reg arg.(1); emit_reg arg.(0); |];
+    _comisd ( emit_reg arg.(1), emit_reg arg.(0);  );
     if not neg then
-      _ja [| _l (emit_label lbl) |]     (* branch taken if x>y *)
+      _ja ( _l (emit_label lbl)  )     (* branch taken if x>y *)
     else
-      _jbe [| _l (emit_label lbl) |] (* taken if unordered or x<=y i.e. !(x>y) *)
+      _jbe ( _l (emit_label lbl)  ) (* taken if unordered or x<=y i.e. !(x>y) *)
   | (Cge, _) ->
-    _comisd [|  emit_reg arg.(1); emit_reg arg.(0); |];  (* swap compare *)
+    _comisd (  emit_reg arg.(1), emit_reg arg.(0);  );  (* swap compare *)
     if not neg then
-      _jae [| _l (emit_label lbl) |]    (* branch taken if x>=y *)
+      _jae ( _l (emit_label lbl)  )    (* branch taken if x>=y *)
     else
-      _jb [| _l (emit_label lbl) |] (* taken if unordered or x<y i.e. !(x>=y) *)
+      _jb ( _l (emit_label lbl)  ) (* taken if unordered or x<y i.e. !(x>=y) *)
 
 (* Deallocate the stack frame before a return or tail call *)
 
 let output_epilogue f =
   if frame_required() then begin
     let n = frame_size() - 8 - (if fp then 8 else 0) in
-    _addq [| _int n; Reg RSP; |];
+    _addq ( _int n, _r RSP;  );
     cfi_adjust_cfa_offset (-n);
     if fp then begin
-       _popq [| Reg RBP |]
+       _popq ( _r RBP  )
     end;
     f ();
     (* reset CFA back cause function body may continue *)
@@ -478,38 +486,38 @@ let emit_instr fallthrough i =
     if src.loc <> dst.loc then
       begin match src.typ, src.loc, dst.loc with
           Float, Reg.Reg _, Reg.Reg _ ->
-          _movapd [| emit_reg src; emit_reg dst;  |]
+          _movapd ( emit_reg src, emit_reg dst;   )
         | Float, _, _ ->
-          _movsd [| emit_reg src; emit_reg dst;  |]
+          _movsd ( emit_reg src, emit_reg dst;   )
         | _ ->
-          _movq [| emit_reg src; emit_reg dst;  |]
+          _movq ( emit_reg src, emit_reg dst;   )
       end
   | Lop(Iconst_int n) ->
     if n = 0n then begin
       match i.res.(0).loc with
-        Reg n -> _xorq [| emit_reg i.res.(0); emit_reg i.res.(0) |]
-      | _     -> _movq [| _int 0; emit_reg i.res.(0); |]
+        Reg n -> _xorq ( emit_reg i.res.(0), emit_reg i.res.(0)  )
+      | _     -> _movq ( _int 0, emit_reg i.res.(0);  )
     end else if n <= 0x7FFFFFFFn && n >= -0x80000000n then
-      _movq [| emit_nativeint n; emit_reg i.res.(0);  |]
+      _movq ( emit_nativeint n, emit_reg i.res.(0);   )
     else
     if system = S_win64 && n >= 0x80000000n && n <= 0xFFFFFFFFn  then
       (* work around bug in ml64 *)
-      _movq [| emit_nativeint n; emit_reg32 i.res.(0);  |]
+      _movq ( emit_nativeint n, emit_reg32 i.res.(0);   )
     else
-      _movabsq [| emit_nativeint n; emit_reg i.res.(0);  |]
+      _movabsq ( emit_nativeint n, emit_reg i.res.(0);   )
   | Lop(Iconst_float s) ->
     begin match Int64.bits_of_float (float_of_string s) with
       | 0x0000_0000_0000_0000L ->       (* +0.0 *)
-        _xorpd [| emit_reg i.res.(0); emit_reg i.res.(0) |]
+        _xorpd ( emit_reg i.res.(0), emit_reg i.res.(0)  )
       | _ ->
         let lbl = add_float_constant s in
-        _movsd [| at_rip NO (emit_label lbl);  emit_reg i.res.(0); |]
+        _movsd ( at_rip NO (emit_label lbl),  emit_reg i.res.(0);  )
     end
   | Lop(Iconst_symbol s) ->
     add_used_symbol s;
     load_symbol_addr s (emit_reg i.res.(0))
   | Lop(Icall_ind) ->
-    _call [| emit_reg i.arg.(0) |];
+    _call ( emit_reg i.arg.(0)  );
     record_frame i.live i.dbg
   | Lop(Icall_imm(s)) ->
     add_used_symbol s;
@@ -517,11 +525,11 @@ let emit_instr fallthrough i =
     record_frame i.live i.dbg
   | Lop(Itailcall_ind) ->
     output_epilogue begin fun () ->
-      _jmp [| emit_reg i.arg.(0) |]
+      _jmp ( emit_reg i.arg.(0)  )
     end
   | Lop(Itailcall_imm s) ->
     if s = !function_name then
-      _jmp [|  _l(emit_label !tailrec_entry_point) |]
+      _jmp (  _l(emit_label !tailrec_entry_point)  )
     else begin
       output_epilogue begin fun () ->
         add_used_symbol s;
@@ -531,75 +539,75 @@ let emit_instr fallthrough i =
   | Lop(Iextcall(s, alloc)) ->
     add_used_symbol s;
     if alloc then begin
-      load_symbol_addr s (Reg RAX);
+      load_symbol_addr s (_r RAX);
       emit_call "caml_c_call";
       record_frame i.live i.dbg;
       if system <> S_win64 then begin (* TODO: investigate why such a diff *)
-        load_symbol_addr "caml_young_ptr" (Reg R11);
-        _movq [| _mem_ptr QWORD 0 R11; Reg R15; |]
+        load_symbol_addr "caml_young_ptr" (_r R11);
+        _movq ( _mem_ptr QWORD 0 R11, _r R15;  )
       end;
     end else begin
       emit_call s
     end
   | Lop(Istackoffset n) ->
     if n < 0
-    then _addq [| _int(-n); Reg RSP; |]
-    else _subq [| _int n; Reg RSP;  |];
+    then _addq ( _int(-n), _r RSP;  )
+    else _subq ( _int n, _r RSP;   );
     cfi_adjust_cfa_offset n;
     stack_offset := !stack_offset + n
   | Lop(Iload(chunk, addr)) ->
     let dest = i.res.(0) in
     begin match chunk with
       | Word ->
-        _movq [| emit_addressing addr QWORD i.arg 0; emit_reg dest; |]
+        _movq ( emit_addressing addr QWORD i.arg 0, emit_reg dest;  )
       | Byte_unsigned ->
-        _movzbq [| emit_addressing addr BYTE i.arg 0; emit_reg dest; |]
+        _movzbq ( emit_addressing addr BYTE i.arg 0, emit_reg dest;  )
       | Byte_signed ->
-        _movsbq [| emit_addressing addr BYTE i.arg 0; emit_reg dest; |]
+        _movsbq ( emit_addressing addr BYTE i.arg 0, emit_reg dest;  )
       | Sixteen_unsigned ->
-        _movzwq [| emit_addressing addr WORD i.arg 0; emit_reg dest; |]
+        _movzwq ( emit_addressing addr WORD i.arg 0, emit_reg dest;  )
       | Sixteen_signed ->
-        _movswq [| emit_addressing addr WORD i.arg 0; emit_reg dest; |];
+        _movswq ( emit_addressing addr WORD i.arg 0, emit_reg dest;  );
       | Thirtytwo_unsigned ->
         (* load to low 32 bits sets high 32 bits to 0. TODO: check ! *)
-        _movl [| emit_addressing addr DWORD i.arg 0; emit_reg32 dest; |]
+        _movl ( emit_addressing addr DWORD i.arg 0, emit_reg32 dest;  )
       | Thirtytwo_signed ->
-        _movslq [| emit_addressing addr DWORD i.arg 0; emit_reg dest; |]
+        _movslq ( emit_addressing addr DWORD i.arg 0, emit_reg dest;  )
       | Single ->
-        _cvtss2sd [| emit_addressing addr REAL8 i.arg 0; emit_reg dest; |]
+        _cvtss2sd ( emit_addressing addr REAL8 i.arg 0, emit_reg dest;  )
       | Double | Double_u ->
-        _movsd [| emit_addressing addr REAL8 i.arg 0; emit_reg dest; |]
+        _movsd ( emit_addressing addr REAL8 i.arg 0, emit_reg dest;  )
     end
   | Lop(Istore(chunk, addr)) ->
     begin match chunk with
       | Word ->
-        _movq [| emit_reg i.arg.(0); emit_addressing addr QWORD i.arg 1; |]
+        _movq ( emit_reg i.arg.(0), emit_addressing addr QWORD i.arg 1;  )
       | Byte_unsigned | Byte_signed ->
-        _movb [| emit_reg8 i.arg.(0); emit_addressing addr BYTE i.arg 1; |]
+        _movb ( emit_reg8 i.arg.(0), emit_addressing addr BYTE i.arg 1  )
       | Sixteen_unsigned | Sixteen_signed ->
-        _movw [| emit_reg16 i.arg.(0); emit_addressing addr WORD i.arg 1; |]
+        _movw ( emit_reg16 i.arg.(0), emit_addressing addr WORD i.arg 1  )
       | Thirtytwo_signed | Thirtytwo_unsigned ->
-        _movl [| emit_reg32 i.arg.(0); emit_addressing addr DWORD i.arg 1; |]
+        _movl ( emit_reg32 i.arg.(0), emit_addressing addr DWORD i.arg 1  )
       | Single ->
-        _cvtsd2ss [| emit_reg i.arg.(0); Regf (XMM 15);  |];
-        _movss [| Regf (XMM 15); emit_addressing addr REAL8 i.arg 1; |]
+        _cvtsd2ss ( emit_reg i.arg.(0), Regf (XMM 15);   );
+        _movss ( Regf (XMM 15), emit_addressing addr REAL8 i.arg 1  )
       | Double | Double_u ->
-        _movsd [| emit_reg i.arg.(0); emit_addressing addr REAL8 i.arg 1; |]
+        _movsd ( emit_reg i.arg.(0), emit_addressing addr REAL8 i.arg 1  )
     end
   | Lop(Ialloc n) ->
     if !fastcode_flag then begin
       let lbl_redo = new_label() in
       _llabel (emit_label lbl_redo);
-      _subq [| _int n; Reg R15; |];
+      _subq ( _int n, _r R15  );
       if !Clflags.dlcode && system <> S_win64 then begin
-        load_symbol_addr "caml_young_limit" (Reg RAX);
-        _cmpq [| _mem_ptr QWORD 0 RAX; Reg R15; |];
+        load_symbol_addr "caml_young_limit" (_r RAX);
+        _cmpq ( _mem_ptr QWORD 0 RAX, _r R15  );
       end else
-        _cmpq [| at_rip NO (emit_symbol "caml_young_limit"); Reg R15; |];
+        _cmpq ( at_rip NO (emit_symbol "caml_young_limit"), _r R15  );
       let lbl_call_gc = new_label() in
       let lbl_frame = record_frame_label i.live Debuginfo.none in
-      _jb [| _l( emit_label lbl_call_gc) |];
-      _leaq [| _mem 8 R15; emit_reg i.res.(0); |];
+      _jb ( _l( emit_label lbl_call_gc)  );
+      _leaq ( _mem 8 R15, emit_reg i.res.(0)  );
       call_gc_sites :=
         { gc_lbl = lbl_call_gc;
           gc_return_lbl = lbl_redo;
@@ -609,143 +617,143 @@ let emit_instr fallthrough i =
           16  -> emit_call "caml_alloc1"
         | 24 -> emit_call "caml_alloc2"
         | 32 -> emit_call "caml_alloc3"
-        | _  -> _movq [| _int n; Reg RAX; |];
+        | _  -> _movq ( _int n, _r RAX  );
           emit_call "caml_allocN"
       end;
       record_frame i.live Debuginfo.none;
-      _leaq [| _mem 8 R15; emit_reg i.res.(0);  |]
+      _leaq ( _mem 8 R15, emit_reg i.res.(0);   )
     end
   | Lop(Iintop(Icomp cmp)) ->
-    _cmpq [| emit_reg i.arg.(1); emit_reg i.arg.(0); |];
+    _cmpq ( emit_reg i.arg.(1), emit_reg i.arg.(0)  );
     let b = name_for_cond_branch cmp in
-    _set b [| Reg8 AL |];
-    _movzbq [| Reg8 AL; emit_reg i.res.(0); |]
+    _set b ( Reg8 AL  );
+    _movzbq ( Reg8 AL, emit_reg i.res.(0)  )
   | Lop(Iintop_imm(Icomp cmp, n)) ->
-    _cmpq [| _int n; emit_reg i.arg.(0); |];
+    _cmpq ( _int n, emit_reg i.arg.(0)  );
     let b = name_for_cond_branch cmp in
-    _set b [| Reg8 AL |];
-    _movzbq [| Reg8 AL; emit_reg i.res.(0);  |]
+    _set b ( Reg8 AL  );
+    _movzbq ( Reg8 AL, emit_reg i.res.(0);   )
   | Lop(Iintop Icheckbound) ->
     let lbl = bound_error_label i.dbg in
-    _cmpq [| emit_reg i.arg.(1); emit_reg i.arg.(0);  |];
-    _jbe [| _l ( emit_label lbl ) |]
+    _cmpq ( emit_reg i.arg.(1), emit_reg i.arg.(0);   );
+    _jbe ( _l ( emit_label lbl )  )
   | Lop(Iintop_imm(Icheckbound, n)) ->
     let lbl = bound_error_label i.dbg in
-    _cmpq [| _int n; emit_reg i.arg.(0);  |];
-    _jbe [| _l( emit_label lbl ) |]
+    _cmpq ( _int n, emit_reg i.arg.(0);   );
+    _jbe ( _l( emit_label lbl )  )
   | Lop(Iintop(Idiv | Imod)) ->
-    _cqto [||];
-    _idivq [| emit_reg i.arg.(1) |]
+    _cqto ();
+    _idivq ( emit_reg i.arg.(1)  )
   | Lop(Iintop(Ilsl | Ilsr | Iasr as op)) ->
     (* We have i.arg.(0) = i.res.(0) and i.arg.(1) = %rcx *)
-    instr_for_intop op [| Reg8 CL; emit_reg i.res.(0) |]
+    instr_for_intop op ( Reg8 CL, emit_reg i.res.(0)  )
   | Lop(Iintop Imulh) ->
-    _imulq [| emit_reg i.arg.(1) |]
+    _imulq ( emit_reg i.arg.(1), None  )
   | Lop(Iintop op) ->
     (* We have i.arg.(0) = i.res.(0) *)
-    instr_for_intop op [| emit_reg i.arg.(1); emit_reg i.res.(0) |]
+    instr_for_intop op ( emit_reg i.arg.(1), emit_reg i.res.(0)  )
   | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
-    _leaq [| _mem n (emit_reg64 i.arg.(0)); emit_reg i.res.(0) |]
+    _leaq ( _mem n (emit_reg64 i.arg.(0)), emit_reg i.res.(0)  )
   | Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) ->
-    _incq [| emit_reg i.res.(0) |]
+    _incq ( emit_reg i.res.(0)  )
   | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) ->
-    _decq [| emit_reg i.res.(0) |]
+    _decq ( emit_reg i.res.(0)  )
   | Lop(Iintop_imm(op, n)) ->
     (* We have i.arg.(0) = i.res.(0) *)
-    instr_for_intop op [| _int n; emit_reg i.res.(0) |]
+    instr_for_intop op ( _int n, emit_reg i.res.(0)  )
   | Lop(Inegf) ->
-    _xorpd [| at_rip OWORD (emit_symbol "caml_negf_mask"); emit_reg i.res.(0);  |]
+    _xorpd ( at_rip OWORD (emit_symbol "caml_negf_mask"), emit_reg i.res.(0);   )
   | Lop(Iabsf) ->
-    _andpd [| at_rip OWORD (emit_symbol "caml_absf_mask"); emit_reg i.res.(0) |]
+    _andpd ( at_rip OWORD (emit_symbol "caml_absf_mask"), emit_reg i.res.(0)  )
   | Lop(Iaddf | Isubf | Imulf | Idivf as floatop) ->
-    instr_for_floatop floatop [| emit_reg i.arg.(1); emit_reg i.res.(0);  |]
+    instr_for_floatop floatop ( emit_reg i.arg.(1), emit_reg i.res.(0);   )
   | Lop(Ifloatofint) ->
-    _cvtsi2sd  [| emit_reg i.arg.(0); emit_reg i.res.(0);  |]
+    _cvtsi2sd  ( emit_reg i.arg.(0), emit_reg i.res.(0);   )
   | Lop(Iintoffloat) ->
-    _cvttsd2si [| emit_reg i.arg.(0); emit_reg i.res.(0);  |]
+    _cvttsd2si ( emit_reg i.arg.(0), emit_reg i.res.(0);   )
   | Lop(Ispecific(Ilea addr)) ->
-    _leaq [| emit_addressing addr NO i.arg 0; emit_reg i.res.(0);  |]
+    _leaq ( emit_addressing addr NO i.arg 0, emit_reg i.res.(0);   )
   | Lop(Ispecific(Istore_int(n, addr))) ->
-    _movq [| emit_nativeint n; emit_addressing addr QWORD i.arg 0;  |]
+    _movq ( emit_nativeint n, emit_addressing addr QWORD i.arg 0;   )
   | Lop(Ispecific(Istore_symbol(s, addr))) ->
     assert (not !pic_code );
     (*     assert (not !Clflags.dlcode);  ONLY on Unix *)
     add_used_symbol s;
-    _movq [| _offset (emit_symbol s); emit_addressing addr QWORD i.arg 0; |]
+    _movq ( _offset (emit_symbol s), emit_addressing addr QWORD i.arg 0  )
   | Lop(Ispecific(Ioffset_loc(n, addr))) ->
-    _addq [| _int n; emit_addressing addr QWORD i.arg 0;  |]
+    _addq ( _int n, emit_addressing addr QWORD i.arg 0;   )
   | Lop(Ispecific(Ifloatarithmem(op, addr))) ->
-    instr_for_floatarithmem op [| emit_addressing addr REAL8 i.arg 1; emit_reg i.res.(0); |]
+    instr_for_floatarithmem op ( emit_addressing addr REAL8 i.arg 1, emit_reg i.res.(0)  )
   | Lop(Ispecific(Ibswap size)) ->
     begin match size with
       | 16 ->
-        _xchg [| Reg8 AH; Reg8 AL; |];
-        _movzwq [| emit_reg16 i.res.(0); emit_reg i.res.(0); |]
+        _xchg ( Reg8 AH, Reg8 AL  );
+        _movzwq ( emit_reg16 i.res.(0), emit_reg i.res.(0)  )
       | 32 ->
-        _bswap [| emit_reg32 i.res.(0) |];
-        _movslq [| emit_reg32 i.res.(0); emit_reg i.res.(0); |]
+        _bswap ( emit_reg32 i.res.(0)  );
+        _movslq ( emit_reg32 i.res.(0), emit_reg i.res.(0)  )
       | 64 ->
-        _bswap [| emit_reg i.res.(0) |]
+        _bswap ( emit_reg i.res.(0)  )
       | _ -> assert false
     end
   | Lop(Ispecific Isqrtf) ->
-    _sqrtsd [| emit_reg i.arg.(0); emit_reg i.res.(0); |]
+    _sqrtsd ( emit_reg i.arg.(0), emit_reg i.res.(0)  )
 
   | Lop(Ispecific(Ifloatsqrtf addr)) ->
-    _sqrtsd [| emit_addressing addr REAL8 i.arg 0; emit_reg i.res.(0); |]
+    _sqrtsd ( emit_addressing addr REAL8 i.arg 0, emit_reg i.res.(0)  )
   | Lreloadretaddr ->
     ()
   | Lreturn ->
     output_epilogue begin fun () ->
-      _ret [||]
+      _ret ( )
     end
   | Llabel lbl ->
     _llabel (emit_Llabel fallthrough lbl)
   | Lbranch lbl ->
-    _jmp [| _l( emit_label lbl) |]
+    _jmp ( _l( emit_label lbl)  )
   | Lcondbranch(tst, lbl) ->
     begin match tst with
         Itruetest ->
         output_test_zero i.arg.(0);
-        _jne [| _l( emit_label lbl ) |]
+        _jne ( _l( emit_label lbl )  )
       | Ifalsetest ->
         output_test_zero i.arg.(0);
-        _je [| _l( emit_label lbl ) |]
+        _je ( _l( emit_label lbl )  )
       | Iinttest cmp ->
-        _cmpq [| emit_reg i.arg.(1); emit_reg i.arg.(0); |];
+        _cmpq ( emit_reg i.arg.(1), emit_reg i.arg.(0)  );
         let b = name_for_cond_branch cmp in
-        _j b [| _l( emit_label lbl ) |]
+        _j b ( _l( emit_label lbl )  )
       | Iinttest_imm((Isigned Ceq | Isigned Cne |
                       Iunsigned Ceq | Iunsigned Cne) as cmp, 0) ->
         output_test_zero i.arg.(0);
         let b = name_for_cond_branch cmp in
-        _j b [| _l ( emit_label lbl ) |]
+        _j b ( _l ( emit_label lbl )  )
       | Iinttest_imm(cmp, n) ->
-        _cmpq [| _int n; emit_reg i.arg.(0); |];
+        _cmpq ( _int n, emit_reg i.arg.(0)  );
         let b = name_for_cond_branch cmp in
-        _j b [| _l( emit_label lbl ) |]
+        _j b ( _l( emit_label lbl )  )
       | Ifloattest(cmp, neg) ->
         emit_float_test cmp neg i.arg lbl
       | Ioddtest ->
-        _testb [| _int 1; emit_reg8 i.arg.(0); |];
-        _jne [| _l( emit_label lbl ) |]
+        _testb ( _int 1, emit_reg8 i.arg.(0)  );
+        _jne ( _l( emit_label lbl )  )
       | Ieventest ->
-        _testb [| _int 1; emit_reg8 i.arg.(0); |];
-        _je [| _l( emit_label lbl ) |]
+        _testb ( _int 1, emit_reg8 i.arg.(0)  );
+        _je ( _l( emit_label lbl )  )
     end
   | Lcondbranch3(lbl0, lbl1, lbl2) ->
-    _cmpq [| _int 1; emit_reg i.arg.(0); |];
+    _cmpq ( _int 1, emit_reg i.arg.(0)  );
     begin match lbl0 with
         None -> ()
-      | Some lbl -> _jb [| _l ( emit_label lbl ) |]
+      | Some lbl -> _jb ( _l ( emit_label lbl )  )
     end;
     begin match lbl1 with
         None -> ()
-      | Some lbl -> _je [| _l ( emit_label lbl ) |]
+      | Some lbl -> _je ( _l ( emit_label lbl )  )
     end;
     begin match lbl2 with
         None -> ()
-      | Some lbl -> _jg [| _l ( emit_label lbl ) |]
+      | Some lbl -> _jg ( _l ( emit_label lbl )  )
     end
   | Lswitch jumptbl ->
     let lbl = new_label() in
@@ -759,12 +767,12 @@ let emit_instr fallthrough i =
       then (phys_reg 4 (*rdx*), phys_reg 0 (*rax*))
       else (phys_reg 0 (*rax*), phys_reg 4 (*rdx*)) in
 
-    _leaq [| at_rip NO ( emit_label lbl ); emit_reg tmp1; |];
-    _movslq [|
-              Mem(DWORD, emit_reg64 i.arg.(0), 4, BaseReg (emit_reg64 tmp1), 0);
-              emit_reg tmp2; |];
-    _addq [| emit_reg tmp2; emit_reg tmp1; |];
-    _jmp [| emit_reg tmp1 |];
+    _leaq ( at_rip NO ( emit_label lbl ), emit_reg tmp1  );
+    _movslq (
+              Mem(DWORD, emit_reg64 i.arg.(0), 4, BaseReg (emit_reg64 tmp1), 0),
+              emit_reg tmp2  );
+    _addq ( emit_reg tmp2, emit_reg tmp1  );
+    _jmp ( emit_reg tmp1  );
 
     begin match system with
       | S_macosx -> _specific ".const"
@@ -774,24 +782,24 @@ let emit_instr fallthrough i =
       | _ -> _specific ".section .rodata"
     end;
     emit_align 4;
-    emit (NewLabel (emit_label lbl, DWORD)) [||];
+    emit (NewLabel (emit_label lbl, DWORD));
     for i = 0 to Array.length jumptbl - 1 do
       _long (ConstSub (ConstLabel( emit_label jumptbl.(i) ),
             ConstLabel( emit_label lbl )))
     done;
     _text ()
   | Lsetuptrap lbl ->
-    _call [| _l( emit_label lbl ) |]
+    _call ( _l( emit_label lbl )  )
   | Lpushtrap ->
     cfi_adjust_cfa_offset 8;
-    _pushq [| Reg R14 |];
+    _pushq ( _r R14  );
     cfi_adjust_cfa_offset 8;
-    _movq [| Reg RSP; Reg R14; |];
+    _movq ( _r RSP, _r R14  );
     stack_offset := !stack_offset + 16
   | Lpoptrap ->
-    _popq [| Reg R14 |];
+    _popq ( _r R14  );
     cfi_adjust_cfa_offset (-8);
-    _addq [| _int 8; Reg RSP; |];
+    _addq ( _int 8, _r RSP  );
     cfi_adjust_cfa_offset (-8);
     stack_offset := !stack_offset - 16
   | Lraise k ->
@@ -804,9 +812,9 @@ let emit_instr fallthrough i =
         record_frame Reg.Set.empty i.dbg
       | false, _
       | true, Lambda.Raise_notrace ->
-        _movq [| Reg R14; Reg RSP |];
-        _popq [| Reg R14 |];
-        _ret [||]
+        _movq ( _r R14, _r RSP  );
+        _popq ( _r R14  );
+        _ret ( )
     end
 
 (* DONE UNTIL HERE REVERSING ARGUMENTS *)
@@ -827,11 +835,11 @@ let emit_profile () =
        all the registers used for argument passing, so we don't
        need to preserve other regs.  We do need to initialize rbp
        like mcount expects it, though. *)
-    _pushq [| Reg R10 |];
+    _pushq ( _r R10  );
     if not fp then
-      _movq [| Reg RSP; Reg RBP  |];
+      _movq ( _r RSP, _r RBP   );
     emit_call "mcount";
-    _popq [| Reg R10 |]
+    _popq ( _r R10  )
   end
 
 (* Emission of a function declaration *)
@@ -851,22 +859,22 @@ let fundecl fundecl =
   && not !Clflags.output_c_object
   && is_generic_function fundecl.fun_name
   then (* PR#4690 *)
-    _specific (Printf.sprintf
-          ".private_extern %s" (emit_symbol fundecl.fun_name))
+    Printf.kprintf _specific
+          ".private_extern %s" (emit_symbol fundecl.fun_name)
   else
     _global (emit_symbol fundecl.fun_name);
   _llabel (emit_symbol fundecl.fun_name);
   emit_debug_info fundecl.fun_dbg;
   cfi_startproc ();
   if fp then begin
-       _pushq [| Reg RBP |];
+       _pushq ( _r RBP  );
        cfi_adjust_cfa_offset 8;
-       _movq [| Reg RSP; Reg RBP; |];
+       _movq ( _r RSP, _r RBP  );
   end;
   if !Clflags.gprofile then emit_profile();
   if frame_required() then begin
     let n = frame_size() - 8 - (if fp then 8 else 0) in
-    _subq [| _int n; Reg RSP |];
+    _subq ( _int n, _r RSP  );
     cfi_adjust_cfa_offset n;
   end;
   _llabel (emit_label !tailrec_entry_point);
@@ -875,11 +883,11 @@ let fundecl fundecl =
   emit_call_bound_errors ();
   cfi_endproc ();
   if system = S_gnu || system = S_linux then begin
-      _specific (Printf.sprintf
-            ".type\t%s,@function" (emit_symbol fundecl.fun_name));
-      _specific (Printf.sprintf
+      Printf.kprintf _specific
+            ".type\t%s,@function" (emit_symbol fundecl.fun_name);
+      Printf.kprintf _specific
            ".size\t%s,.-%s"
-           (emit_symbol fundecl.fun_name) (emit_symbol fundecl.fun_name))
+           (emit_symbol fundecl.fun_name) (emit_symbol fundecl.fun_name)
   end
 
 (* Emission of data *)
@@ -903,15 +911,16 @@ let emit_item = function
   | Clabel_address lbl ->
     _qword (ConstLabel (emit_data_label lbl))
   | Cstring s ->
-    emit (Bytes s) [||]
+    emit (Bytes s)
   | Cskip n ->
-    if n>0 then emit (Space n) [||]
+    if n>0 then emit (Space n)
   | Calign n ->
     emit_align n
 
   (* Beginning / end of an assembly file *)
 
 let begin_assembly() =
+  arch64 := true;
   init_segments ();
   reset_debug_info();                   (* PR#5603 *)
   float_constants := StringMap.empty;
@@ -960,7 +969,7 @@ let begin_assembly() =
   _text ();
   emit_global_label lbl_begin;
 
-  if system = S_macosx then emit NOP [||]; (* PR#4690 *)
+  if system = S_macosx then _nop (); (* PR#4690 *)
   ()
 
 
@@ -986,7 +995,7 @@ let end_assembly() =
   let lbl_end = Compilenv.make_symbol (Some "code_end") in
   add_def_symbol lbl_end;
   _text ();
-  if system = S_macosx then emit NOP [||]; (* suppress "ld warning: atom sorting error" *)
+  if system = S_macosx then _nop (); (* suppress "ld warning: atom sorting error" *)
   emit_global_label lbl_end;
   _data ();
   let lbl_end = Compilenv.make_symbol (Some "data_end") in
@@ -1010,9 +1019,9 @@ let end_assembly() =
           fun lbl ofs ->
             incr setcnt;
             let s = Printf.sprintf "L$set$%d" !setcnt in
-            emit Set   [| Direct s;
+            emit (Set ( Direct s,
                           LabelRel(NO, emit_label lbl, 4 + Int32.to_int ofs);
-                       |];
+                        ));
             _long (ConstLabel s)
         end else begin
           fun lbl ofs ->
@@ -1022,7 +1031,7 @@ let end_assembly() =
         end;
       efa_def_label = (fun l -> _label (emit_label l));
       efa_string = (fun s ->
-        emit (Bytes (s ^ "\000")) [||])
+        emit (Bytes (s ^ "\000")))
     };
 
   if system = S_linux then
@@ -1040,12 +1049,13 @@ let end_assembly() =
     symbols_defined := StringSet.empty;
   end;
 
-  emit End [||];
+  emit End;
 
   let oc = !Emitaux.output_channel in
 
-  let b = Buffer.create 10000 in
-  List.iter (arch.bprint_instr b arch) (List.rev seg.seg_instrs);
-  let s = Buffer.contents b in
-  output_string oc s
-
+    let bprint_instr =
+      match system with
+      | S_win32 | S_win64 -> Intel_masm.bprint_instr
+      | _ -> Intel_gas.bprint_instr
+    in
+  generate_code oc bprint_instr
diff --git a/asmcomp/amd64/proc.ml b/asmcomp/amd64/proc.ml
index b6e0fa94ab..b4a748b8dc 100644
--- a/asmcomp/amd64/proc.ml
+++ b/asmcomp/amd64/proc.ml
@@ -302,13 +302,7 @@ let contains_calls = ref false
 (* Calling the assembler *)
 
 let assemble_file infile outfile =
-  if masm then
-    Ccomp.command (Config.asm ^
-                   Filename.quote outfile ^ " " ^ Filename.quote infile ^
-                   (if !Clflags.verbose then "" else ">NUL"))
-  else
-    Ccomp.command (Config.asm ^ " -o " ^
-                   Filename.quote outfile ^ " " ^ Filename.quote infile)
+  Intel_proc.assemble_file infile outfile
 
 let init () =
   if fp then begin
diff --git a/asmcomp/i386/emit.mlp b/asmcomp/i386/emit.mlp
index 9c86b7159e..dcb2deeecc 100644
--- a/asmcomp/i386/emit.mlp
+++ b/asmcomp/i386/emit.mlp
@@ -11,13 +11,6 @@
 (*                                                                     *)
 (***********************************************************************)
 
-(* DONE UNTIL HERE REVERSING ARGUMENTS *)
-
-(* TODO:
-  * Check @stub on MacOSX
-
-*)
-
 (* Emission of Intel 386 assembly code *)
 
 open Misc
@@ -30,6 +23,15 @@ open Linearize
 open Emitaux
 open Intel_proc
 
+let _r r = Reg (register r)
+let _label s = emit (NewLabel (s, DWORD))
+let _mem offset reg = Mem(NO, register reg, 1, NoBase, offset)
+
+(* On win32/win64, some memory references need to specify the size of
+   the operand in 'pref' *)
+let _mem_ptr pref offset reg = Mem(pref, register reg, 1, NoBase, offset)
+
+(*
 module Emitter = MakeEmitter(struct
 
     type reg = register32
@@ -40,9 +42,12 @@ module Emitter = MakeEmitter(struct
       | _ -> Intel_gas.bprint_instr
     let string_of_register = string_of_register32
     let word_size = DWORD
+    let passes = Intel_proc.assembler_passes
+
   end)
 
 open Emitter
+*)
 
 (* CFI directives *)
 
@@ -132,7 +137,7 @@ let emit_symbol s =
   string_of_symbol symbol_prefix s
 
 let emit_call s =
-  _call [| _l (emit_symbol s) |]
+  _call (  _l (emit_symbol s)  )
 
 (* Output a label *)
 
@@ -174,12 +179,12 @@ let emit_Llabel fallthrough lbl =
 
 (* Output a pseudo-register *)
 
-let int_reg_name =  [| EAX; EBX; ECX; EDX; ESI; EDI; EBP |]
+let int_reg_name =  [| EAX; EBX; ECX; EDX; ESI; EDI; EBP  |]
 
 let float_reg_name = [| TOS |]
 
 let register_name r =
-  if r < 100 then Reg (int_reg_name.(r))
+  if r < 100 then _r (int_reg_name.(r))
   else Regf (float_reg_name.(r - 100))
 
 let emit_reg = function
@@ -188,16 +193,16 @@ let emit_reg = function
     LabelRel(NO, emit_symbol "caml_extra_params", n + 64)
   | { loc = Stack s; typ = Float } as r ->
       let ofs = slot_offset s (register_class r) in
-      Mem (REAL8, ESP, 1, NoBase, ofs)
+      _mem_ptr REAL8 ofs ESP
   | { loc = Stack s } as r ->
       let ofs = slot_offset s (register_class r) in
-      Mem (DWORD, ESP, 1, NoBase, ofs)
+      _mem_ptr DWORD ofs ESP
   | { loc = Unknown } ->
       fatal_error "Emit_i386.emit_reg"
 
 (* Output a reference to the lower 8 bits or lower 16 bits of a register *)
 
-let reg_low_byte_name = [| AL; BL; CL; DL |]
+let reg_low_byte_name = [|  AL; BL; CL; DL |]
 let reg_low_half_name = [| AX; BX; CX; DX; SI; DI; BP |]
 
 let emit_reg8 r =
@@ -220,17 +225,20 @@ let emit_addressing addr pref r n =
   match addr with
     Ibased(s, d) ->
     add_used_symbol s;
-    LabelRel( NO, emit_symbol s, d)
+    LabelRel( pref, emit_symbol s, d)
   | Iindexed d ->
-    Mem(pref, emit_reg32 r.(n), 1, NoBase, d)
+    _mem_ptr pref d (emit_reg32 r.(n))
   | Iindexed2 d ->
-    Mem(pref, emit_reg32 r.(n+1), 1, BaseReg (emit_reg32 r.(n)), d)
+    Mem(pref, register (emit_reg32 r.(n+1)), 1,
+      BaseReg (register (emit_reg32 r.(n))), d)
   | Iscaled(2, d) ->
-    Mem(pref, emit_reg32 r.(n), 1, BaseReg (emit_reg32 r.(n)), d)
+    Mem(pref, register (emit_reg32 r.(n)), 1,
+      BaseReg (register (emit_reg32 r.(n))), d)
   | Iscaled(scale, d) ->
-    Mem(pref, emit_reg32 r.(n), scale, NoBase,  d)
+    Mem(pref, register (emit_reg32 r.(n)), scale, NoBase,  d)
   | Iindexed2scaled(scale, d) ->
-    Mem(pref, emit_reg32 r.(n+1), scale, BaseReg (emit_reg32 r.(n)), d)
+    Mem(pref, register (emit_reg32 r.(n+1)), scale,
+      BaseReg (register (emit_reg32 r.(n))), d)
 
 (* Record live pointers at call points *)
 
@@ -269,7 +277,7 @@ let emit_call_gc gc =
   _llabel (emit_label gc.gc_lbl);
   emit_call "caml_call_gc";
   _llabel (emit_label gc.gc_frame);
-  _jmp [| _l (emit_label gc.gc_return_lbl)|]
+  _jmp (  _l (emit_label gc.gc_return_lbl) )
 
 (* Record calls to caml_ml_array_bound_error.
    In -g mode, we maintain one call to caml_ml_array_bound_error
@@ -311,7 +319,7 @@ let emit_call_bound_errors () =
 let instr_for_intop = function
     Iadd -> _addl
   | Isub -> _subl
-  | Imul -> _imull
+  | Imul -> (fun (arg1,arg2) ->  _imull (arg1, Some arg2))
   | Iand -> _andl
   | Ior -> _orl
   | Ixor -> _xorl
@@ -369,8 +377,8 @@ let name_for_cond_branch = function
 
 let output_test_zero arg =
   match arg.loc with
-    Reg.Reg r -> _testl  [|  emit_reg arg;emit_reg arg (* rev *) |]
-  | _  -> _cmpl  [|  emit_int 0;emit_reg arg (* rev *) |]
+    Reg.Reg r -> _testl  (   emit_reg arg, emit_reg arg (* rev *)  )
+  | _  -> _cmpl  (   emit_int 0, emit_reg arg (* rev *)  )
 
 (* Deallocate the stack frame before a return or tail call *)
 
@@ -378,7 +386,7 @@ let output_epilogue f =
   let n = frame_size() - 4 in
   if n > 0 then
     begin
-      _addl [|  emit_int n;Reg ESP (* rev *) |];
+      _addl ( _int n, _r ESP );
       cfi_adjust_cfa_offset (-n);
       f ();
     (* reset CFA back cause function body may continue *)
@@ -398,80 +406,80 @@ let emit_float_test cmp neg arg lbl =
     match (is_tos arg.(0), is_tos arg.(1)) with
       (true, true) ->
       (* both args on top of FP stack *)
-      _fcompp [||];
+      _fcompp ();
       cmp
     | (true, false) ->
       (* first arg on top of FP stack *)
-      _fcompl	[| emit_reg arg.(1) |];
+      _fcompl (  emit_reg arg.(1)  );
       cmp
     | (false, true) ->
       (* second arg on top of FP stack *)
-      _fcompl	[| emit_reg arg.(0) |];
+      _fcompl (  emit_reg arg.(0)  );
       Cmm.swap_comparison cmp
     | (false, false) ->
-      _fldl    [| emit_reg arg.(0) |];
-      _fcompl  [| emit_reg arg.(1) |];
+      _fldl    (  emit_reg arg.(0)  );
+      _fcompl  (  emit_reg arg.(1)  );
       cmp
   in
-  _fnstsw [| Reg16 AX |];
+  _fnstsw (  Reg16 AX  );
   begin match actual_cmp with
     Ceq ->
       if neg then begin
-      _andb [|  _int 68 ; Reg8 AH (* rev *) |];
-      _xorb [|  _int 64 ; Reg8 AH (* rev *) |];
+      _andb (   _int 68 , Reg8 AH  );
+      _xorb (   _int 64 , Reg8 AH   );
       _jne
       end else begin
-      _andb [|  _int 69 ; Reg8 AH (* rev *) |];
-      _cmpb [|  _int 64 ; Reg8 AH (* rev *) |];
+      _andb ( _int 69 , Reg8 AH   );
+      _cmpb ( _int 64 , Reg8 AH   );
       _je
       end
   | Cne ->
       if neg then begin
-        _andb [|  _int 69 ; Reg8 AH (* rev *) |];
-        _cmpb [|  _int 64 ; Reg8 AH (* rev *) |];
+        _andb (   _int 69 , Reg8 AH   );
+        _cmpb (   _int 64 , Reg8 AH   );
         _je
       end else begin
-        _andb [|  _int 68 ; Reg8 AH (* rev *) |];
-        _xorb [|  _int 64 ; Reg8 AH (* rev *) |];
+        _andb (   _int 68 , Reg8 AH   );
+        _xorb (   _int 64 , Reg8 AH   );
         _jne
       end
   | Cle ->
-      _andb [|  _int 69 ; Reg8 AH (* rev *) |];
-      _decb [| Reg8 AH |];
-      _cmpb [|  _int 64 ; Reg8 AH (* rev *) |];
+      _andb (   _int 69 , Reg8 AH   );
+      _decb (  Reg8 AH  );
+      _cmpb (   _int 64 , Reg8 AH   );
       if neg
       then _jae
       else _jb
   | Cge ->
-      _andb [|  _int 5 ; Reg8 AH (* rev *) |];
+      _andb (   _int 5 , Reg8 AH   );
       if neg
       then _jne
       else _je
   | Clt ->
-      _andb [|  _int 69 ; Reg8 AH (* rev *) |];
-      _cmpb [|  _int 1 ; Reg8 AH (* rev *) |];
+      _andb (   _int 69 , Reg8 AH   );
+      _cmpb (   _int 1 , Reg8 AH   );
       if neg
       then _jne
       else _je
   | Cgt ->
-      _andb [|  _int 69 ; Reg8 AH (* rev *) |];
+      _andb (   _int 69 , Reg8 AH   );
       if neg
       then _jne
       else _je
   end
-  [| _l (emit_label lbl) |]
+  (  _l (emit_label lbl)  )
 
 (* Emit a Ifloatspecial instruction *)
 
 let emit_floatspecial = function
-    "atan"  -> _fld1 [||]; _fpatan [||]
-  | "atan2" -> _fpatan [||]
-  | "cos"   -> _fcos [||]
-  | "log"   -> _fldln2 [||]; _fxch [||]; _fyl2x [||]
-  | "log10" -> _fldlg2 [||]; _fxch [||]; _fyl2x [||]
-  | "sin"   -> _fsin [||]
-  | "sqrt"  -> _fsqrt [||]
-  | "tan"   -> _fptan [||]; _fstp [| Regf (ST 0) |] (* %st(0) *)
+    "atan"  -> _fld1 (  ); _fpatan (  )
+  | "atan2" -> _fpatan (  )
+  | "cos"   -> _fcos (  )
+  | "log"   -> _fldln2 (  ); _fxch None; _fyl2x (  )
+  | "log10" -> _fldlg2 (  ); _fxch None; _fyl2x (  )
+  | "sin"   -> _fsin (  )
+  | "sqrt"  -> _fsqrt (  )
+  | "tan"   -> _fptan (  ); _fstp (  Regf (ST 0)  ) (* %st(0) *)
   | _ -> assert false
 
 (* Floating-point constants *)
@@ -520,70 +528,70 @@ let emit_instr fallthrough i =
     if src.loc <> dst.loc then begin
       if src.typ = Float then
         if is_tos src then
-          _fstpl [| emit_reg dst |]
+          _fstpl (  emit_reg dst  )
         else if is_tos dst then
-          _fldl [| emit_reg src |]
+          _fldl (  emit_reg src  )
         else begin
-          _fldl  [| emit_reg src |];
-          _fstpl [| emit_reg dst |]
+          _fldl  (  emit_reg src  );
+          _fstpl (  emit_reg dst  )
         end
       else
-        _movl [|  emit_reg src ; emit_reg dst (* rev *) |]
+        _movl (   emit_reg src , emit_reg dst   )
     end
   | Lop(Iconst_int n) ->
     if n = 0n then begin
       match i.res.(0).loc with
-        Reg n -> _xorl [|  emit_reg i.res.(0) ; emit_reg i.res.(0) (* rev *) |]
-      | _     -> _movl [|  emit_int 0 ; emit_reg i.res.(0) (* rev *) |]
+        Reg n -> _xorl (   emit_reg i.res.(0) , emit_reg i.res.(0)   )
+      | _     -> _movl (   emit_int 0 , emit_reg i.res.(0)   )
     end else
-      _movl [|  emit_nativeint n ; emit_reg i.res.(0) (* rev *) |]
+      _movl (   emit_nativeint n , emit_reg i.res.(0)   )
   | Lop(Iconst_float s) ->
     begin match Int64.bits_of_float (float_of_string s) with
       | 0x0000_0000_0000_0000L ->       (* +0.0 *)
-        _fldz [||]
+        _fldz (  )
       | 0x8000_0000_0000_0000L ->       (* -0.0 *)
-        _fldz [||]; _fchs [||]
+        _fldz (  ); _fchs None
       | 0x3FF0_0000_0000_0000L ->       (*  1.0 *)
-        _fld1 [||]
+        _fld1 (  )
       | 0xBFF0_0000_0000_0000L ->       (* -1.0 *)
-        _fld1 [||]; _fchs [||]
+        _fld1 (  ); _fchs None
       | _ ->
         let lbl = add_float_constant s in
-        _fldl [| _l (emit_label lbl) |]
+        _fldl (  _l (emit_label lbl)  )
     end
   | Lop(Iconst_symbol s) ->
     add_used_symbol s;
-    _movl [|  _offset (emit_symbol s) ; emit_reg i.res.(0) (* rev *) |]
+    _movl (   _offset (emit_symbol s) , emit_reg i.res.(0)   )
   | Lop(Icall_ind) ->
-    _call [|emit_reg i.arg.(0)|];
+    _call ( emit_reg i.arg.(0) );
     record_frame i.live i.dbg
   | Lop(Icall_imm s) ->
     add_used_symbol s;
-    _call [| _l (emit_symbol s) |];
+    _call (  _l (emit_symbol s)  );
     record_frame i.live i.dbg
   | Lop(Itailcall_ind) ->
     output_epilogue begin fun () ->
-      _jmp [| emit_reg i.arg.(0) |]
+      _jmp (  emit_reg i.arg.(0)  )
     end
   | Lop(Itailcall_imm s) ->
     if s = !function_name then
-      _jmp	[| _l (emit_label !tailrec_entry_point) |]
+      _jmp (  _l (emit_label !tailrec_entry_point)  )
     else begin
       output_epilogue begin fun () ->
         add_used_symbol s;
-        _jmp [| _l (emit_symbol s) |]
+        _jmp (  _l (emit_symbol s)  )
       end
     end
   | Lop(Iextcall(s, alloc)) ->
     add_used_symbol s;
     if alloc then begin
       if system <> S_macosx then
-        _movl [|  _offset (emit_symbol s) ; Reg EAX (* rev *) |]
+        _movl (   _offset (emit_symbol s) , _r EAX   )
       else begin
         external_symbols_indirect :=
           StringSet.add s !external_symbols_indirect;
-        _movl [|  _l (Printf.sprintf "L%s$non_lazy_ptr"
-                      (emit_symbol s)) ; Reg EAX (* rev *) |]
+        _movl (   _l (Printf.sprintf "L%s$non_lazy_ptr"
+                      (emit_symbol s)) , _r EAX   )
       end;
       emit_call "caml_c_call";
       record_frame i.live i.dbg
@@ -593,68 +601,68 @@ let emit_instr fallthrough i =
       else begin
         external_symbols_direct :=
           StringSet.add s !external_symbols_direct;
-        _call [| _l (Printf.sprintf "L%s$stub" (emit_symbol s)) |]
+        _call (  _l (Printf.sprintf "L%s$stub" (emit_symbol s))  )
       end
     end
   | Lop(Istackoffset n) ->
     if n < 0
-    then _addl [|  emit_int(-n) ; Reg ESP (* rev *) |]
-    else _subl [|  emit_int(n) ; Reg ESP (* rev *) |];
+    then _addl (   emit_int(-n) , _r ESP   )
+    else _subl (   emit_int(n) , _r ESP   );
     cfi_adjust_cfa_offset n;
     stack_offset := !stack_offset + n
   | Lop(Iload(chunk, addr)) ->
     let dest = i.res.(0) in
     begin match chunk with
       | Word | Thirtytwo_signed | Thirtytwo_unsigned ->
-        _movl [|  emit_addressing addr DWORD i.arg 0 ; emit_reg dest (* rev *) |]
+        _movl (   emit_addressing addr DWORD i.arg 0 , emit_reg dest   )
       | Byte_unsigned ->
-        _movzbl [|  emit_addressing addr BYTE i.arg 0 ; emit_reg dest (* rev *) |]
+        _movzbl (   emit_addressing addr BYTE i.arg 0 , emit_reg dest   )
       | Byte_signed ->
-        _movsbl [|  emit_addressing addr BYTE i.arg 0 ; emit_reg dest (* rev *) |]
+        _movsbl (   emit_addressing addr BYTE i.arg 0 , emit_reg dest   )
       | Sixteen_unsigned ->
-        _movzwl [|  emit_addressing addr WORD i.arg 0 ; emit_reg dest (* rev *) |]
+        _movzwl (   emit_addressing addr WORD i.arg 0 , emit_reg dest   )
       | Sixteen_signed ->
-        _movswl [|  emit_addressing addr WORD i.arg 0 ; emit_reg dest (* rev *) |]
+        _movswl (   emit_addressing addr WORD i.arg 0 , emit_reg dest   )
       | Single ->
-        _flds [| emit_addressing addr REAL4 i.arg 0 |]
+        _flds (  emit_addressing addr REAL4 i.arg 0  )
       | Double | Double_u ->
-        _fldl [| emit_addressing addr REAL8 i.arg 0 |]
+        _fldl (  emit_addressing addr REAL8 i.arg 0  )
     end
   | Lop(Istore(chunk, addr)) ->
     begin match chunk with
       | Word | Thirtytwo_signed | Thirtytwo_unsigned ->
-        _movl [|  emit_reg i.arg.(0) ; emit_addressing addr DWORD i.arg 1 (* rev *) |]
+        _movl (   emit_reg i.arg.(0) , emit_addressing addr DWORD i.arg 1   )
       | Byte_unsigned | Byte_signed ->
-        _movb [|  emit_reg8 i.arg.(0) ; emit_addressing addr BYTE i.arg 1 (* rev *) |]
+        _movb (   emit_reg8 i.arg.(0) , emit_addressing addr BYTE i.arg 1   )
       | Sixteen_unsigned | Sixteen_signed ->
-        _movw [|  emit_reg16 i.arg.(0) ; emit_addressing addr WORD i.arg 1 (* rev *) |]
+        _movw (   emit_reg16 i.arg.(0) , emit_addressing addr WORD i.arg 1   )
       | Single ->
         if is_tos i.arg.(0) then
-          _fstps [| emit_addressing addr REAL4 i.arg 1 |]
+          _fstps (  emit_addressing addr REAL4 i.arg 1  )
         else begin
-          _fldl	[| emit_reg i.arg.(0) |];
-          _fstps	[| emit_addressing addr REAL4 i.arg 1 |]
+          _fldl (  emit_reg i.arg.(0)  );
+          _fstps (  emit_addressing addr REAL4 i.arg 1  )
         end
       | Double | Double_u ->
         if is_tos i.arg.(0) then
-          _fstpl [| emit_addressing addr REAL8 i.arg 1 |]
+          _fstpl (  emit_addressing addr REAL8 i.arg 1  )
         else begin
-          _fldl [|	emit_reg i.arg.(0) |];
-          _fstpl [| emit_addressing addr REAL8 i.arg 1 |]
+          _fldl (  emit_reg i.arg.(0)  );
+          _fstpl (  emit_addressing addr REAL8 i.arg 1  )
         end
     end
   | Lop(Ialloc n) ->
     if !fastcode_flag then begin
       let lbl_redo = new_label() in
       _llabel (emit_label lbl_redo);
-      _movl [|  _l (emit_symbol "caml_young_ptr") ; Reg EAX (* rev *) |];
-      _subl [|  emit_int n ; Reg EAX (* rev *) |];
-      _movl	[|  Reg EAX ; _l (emit_symbol "caml_young_ptr") (* rev *) |];
-      _cmpl	[|  _l (emit_symbol "caml_young_limit") ; Reg EAX (* rev *) |];
+      _movl (   _l (emit_symbol "caml_young_ptr") , _r EAX   );
+      _subl (   emit_int n , _r EAX   );
+      _movl (   _r EAX , _l (emit_symbol "caml_young_ptr")   );
+      _cmpl (   _l (emit_symbol "caml_young_limit") , _r EAX   );
       let lbl_call_gc = new_label() in
       let lbl_frame = record_frame_label i.live Debuginfo.none in
-      _jb [| _l (emit_label lbl_call_gc) |];
-      _leal	[|  Mem(NO, EAX, 1, NoBase, 4) ; emit_reg i.res.(0) (* rev *) |];
+      _jb (  _l (emit_label lbl_call_gc)  );
+      _leal  (   _mem 4 EAX , emit_reg i.res.(0)   );
       call_gc_sites :=
         { gc_lbl = lbl_call_gc;
           gc_return_lbl = lbl_redo;
@@ -665,240 +673,241 @@ let emit_instr fallthrough i =
         | 12 -> emit_call "caml_alloc2"
         | 16 -> emit_call "caml_alloc3"
         | _  ->
-          _movl [|  emit_int n ; Reg EAX (* rev *) |];
+          _movl (   emit_int n , _r EAX   );
           emit_call "caml_allocN"
       end;
       record_frame i.live Debuginfo.none;
-      _leal	[|  Mem(NO, EAX, 1, NoBase, 4) ; emit_reg i.res.(0) (* rev *) |]
+      _leal (  _mem 4 EAX , emit_reg i.res.(0)   )
     end
   | Lop(Iintop(Icomp cmp)) ->
-    _cmpl [|  emit_reg i.arg.(1) ; emit_reg i.arg.(0) (* rev *) |];
+    _cmpl (   emit_reg i.arg.(1) , emit_reg i.arg.(0)   );
     let b = name_for_cond_branch cmp in
-    _set b [| Reg8 AL |];
-    _movzbl [|  Reg8 AL ; emit_reg i.res.(0) (* rev *) |];
+    _set b (  Reg8 AL  );
+    _movzbl (   Reg8 AL , emit_reg i.res.(0)   );
   | Lop(Iintop_imm(Icomp cmp, n)) ->
-    _cmpl [|  emit_int n ; emit_reg i.arg.(0) (* rev *) |];
+    _cmpl (   emit_int n , emit_reg i.arg.(0)   );
     let b = name_for_cond_branch cmp in
-    _set b [| Reg8 AL |];
-    _movzbl [|  Reg8 AL ; emit_reg i.res.(0) (* rev *) |]
+    _set b (  Reg8 AL  );
+    _movzbl (   Reg8 AL , emit_reg i.res.(0)   )
   | Lop(Iintop Icheckbound) ->
     let lbl = bound_error_label i.dbg in
-    _cmpl [|  emit_reg i.arg.(1) ;	emit_reg i.arg.(0) (* rev *) |];
-    _jbe [| _l ( emit_label lbl ) |]
+    _cmpl (   emit_reg i.arg.(1) , emit_reg i.arg.(0)   );
+    _jbe (  _l ( emit_label lbl )  )
   | Lop(Iintop_imm(Icheckbound, n)) ->
     let lbl = bound_error_label i.dbg in
-    _cmpl [|  emit_int n ;	emit_reg i.arg.(0) (* rev *) |];
-    _jbe [| _l (emit_label lbl) |]
+    _cmpl (   emit_int n , emit_reg i.arg.(0)   );
+    _jbe (  _l (emit_label lbl)  )
   | Lop(Iintop(Idiv | Imod)) ->
-    _cltd [||];
-    _idivl [| emit_reg i.arg.(1) |]
+    _cltd (  );
+    _idivl (  emit_reg i.arg.(1)  )
   | Lop(Iintop(Ilsl | Ilsr | Iasr as op)) ->
     (* We have i.arg.(0) = i.res.(0) and i.arg.(1) = %ecx *)
-    instr_for_intop op [|  Reg8 CL ; emit_reg i.res.(0) (* rev *) |]
+    instr_for_intop op (   Reg8 CL , emit_reg i.res.(0)   )
   | Lop(Iintop Imulh) ->
-    _imull	[| emit_reg i.arg.(1) |]
+    _imull (  emit_reg i.arg.(1), None )
   | Lop(Iintop op) ->
     (* We have i.arg.(0) = i.res.(0) *)
-    instr_for_intop op [|  emit_reg i.arg.(1) ; emit_reg i.res.(0) (* rev *) |]
+    instr_for_intop op (   emit_reg i.arg.(1) , emit_reg i.res.(0)   )
   | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
-    _leal [|  Mem(NO, emit_reg32 i.arg.(0), 1, NoBase, n) ;	emit_reg i.res.(0) (* rev *) |]
+    _leal (  _mem n (emit_reg32 i.arg.(0)), emit_reg i.res.(0)   )
   | Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) ->
-    _incl [| emit_reg i.res.(0) |]
+    _incl (  emit_reg i.res.(0)  )
   | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) ->
-    _decl [| emit_reg i.res.(0) |]
+    _decl (  emit_reg i.res.(0)  )
   | Lop(Iintop_imm(op, n)) ->
     (* We have i.arg.(0) = i.res.(0) *)
-    instr_for_intop op [|  emit_int n ; emit_reg i.res.(0) (* rev *) |]
+    instr_for_intop op (   emit_int n , emit_reg i.res.(0)   )
   | Lop(Inegf | Iabsf as floatop) ->
     if not (is_tos i.arg.(0)) then
-      _fldl [| emit_reg i.arg.(0) |];
-    instr_for_floatop floatop [||]
+      _fldl (  emit_reg i.arg.(0)  );
+    instr_for_floatop floatop None
   | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev)
       as floatop) ->
     begin match (is_tos i.arg.(0), is_tos i.arg.(1)) with
         (true, true) ->
         (* both operands on top of FP stack *)
-        instr_for_floatop_pop floatop [|  Regf (ST 0) ; Regf (ST 1) (* rev *) |]
+        instr_for_floatop_pop floatop (   Regf (ST 0) , Regf (ST 1)   )
       | (true, false) ->
         (* first operand on stack *)
-        instr_for_floatop floatop [| emit_reg i.arg.(1) |]
+        instr_for_floatop floatop (Some (  emit_reg i.arg.(1)  ))
       | (false, true) ->
         (* second operand on stack *)
-        instr_for_floatop_reversed floatop [| emit_reg i.arg.(0) |]
+        instr_for_floatop_reversed floatop (Some (  emit_reg i.arg.(0)  ))
       | (false, false) ->
         (* both operands in memory *)
-        _fldl [| emit_reg i.arg.(0) |];
-        instr_for_floatop floatop [| emit_reg i.arg.(1) |]
+        _fldl (  emit_reg i.arg.(0)  );
+        instr_for_floatop floatop (Some (  emit_reg i.arg.(1)  ))
     end
   | Lop(Ifloatofint) ->
     begin match i.arg.(0).loc with
         Stack s ->
-        _fildl [| emit_reg i.arg.(0) |]
+        _fildl (  emit_reg i.arg.(0)  )
       | _ ->
-        _pushl [| emit_reg i.arg.(0) |];
-        _fildl [| _mem_ptr DWORD 0 ESP |];
-        _addl [|  emit_int 4 ; Reg ESP (* rev *) |]
+        _pushl (  emit_reg i.arg.(0)  );
+        _fildl (  _mem_ptr DWORD 0 ESP  );
+        _addl (   emit_int 4 , _r ESP   )
     end
   | Lop(Iintoffloat) ->
     if not (is_tos i.arg.(0)) then
-      _fldl [| emit_reg i.arg.(0) |];
+      _fldl (  emit_reg i.arg.(0)  );
     stack_offset := !stack_offset - 8;
-    _subl [|  emit_int 8 ; Reg ESP (* rev *) |];
+    _subl (   emit_int 8 , _r ESP   );
     cfi_adjust_cfa_offset 8;
-    _fnstcw [| _mem 4 ESP |];
-    _movw [|  _mem 4 ESP ; Reg16 AX (* rev *) |];
-    _movb [|  emit_int 12 ; Reg8 AH (* rev *) |];
-    _movw [|  Reg16 AX ; _mem 0 ESP (* rev *) |];
-    _fldcw [| _mem 0 ESP |];
+    _fnstcw (  _mem 4 ESP  );
+    _movw (   _mem 4 ESP , Reg16 AX   );
+    _movb (   emit_int 12 , Reg8 AH   );
+    _movw (   Reg16 AX , _mem 0 ESP   );
+    _fldcw (  _mem 0 ESP  );
     begin match i.res.(0).loc with
         Stack s ->
-        _fistpl [| emit_reg i.res.(0) |]
+        _fistpl (  emit_reg i.res.(0)  )
       | _ ->
-        _fistpl [| _mem_ptr DWORD 0 ESP |];
-        _movl [|  _mem 0 ESP ; emit_reg i.res.(0) (* rev *) |]
+        _fistpl (  _mem_ptr DWORD 0 ESP  );
+        _movl (   _mem 0 ESP , emit_reg i.res.(0)   )
     end;
-    _fldcw	[| _mem 4 ESP |];
-    _addl	[|  emit_int 8 ; Reg ESP (* rev *) |];
+    _fldcw (  _mem 4 ESP  );
+    _addl (   emit_int 8 , _r ESP   );
     cfi_adjust_cfa_offset (-8);
     stack_offset := !stack_offset + 8
   | Lop(Ispecific(Ilea addr)) ->
-    _leal [|  emit_addressing addr DWORD i.arg 0 ; emit_reg i.res.(0) (* rev *) |]
+    _leal (   emit_addressing addr DWORD i.arg 0 , emit_reg i.res.(0)   )
   | Lop(Ispecific(Istore_int(n, addr))) ->
-    _movl [|  emit_nativeint n ; emit_addressing addr DWORD i.arg 0 (* rev *) |]
+    _movl (   emit_nativeint n , emit_addressing addr DWORD i.arg 0   )
   | Lop(Ispecific(Istore_symbol(s, addr))) ->
     add_used_symbol s;
-    _movl [|  _offset (emit_symbol s) ; emit_addressing addr DWORD i.arg 0 (* rev *) |]
+    _movl (   _offset (emit_symbol s) , emit_addressing addr DWORD i.arg 0   )
   | Lop(Ispecific(Ioffset_loc(n, addr))) ->
-    _addl [|   _int n ; emit_addressing addr DWORD i.arg 0 (* rev *) |]
+    _addl (    _int n , emit_addressing addr DWORD i.arg 0   )
   | Lop(Ispecific(Ipush)) ->
     (* Push arguments in reverse order *)
     for n = Array.length i.arg - 1 downto 0 do
       let r = i.arg.(n) in
       match r with
         {loc = Reg _; typ = Float} ->
-        _subl [|  _int 8 ; Reg ESP (* rev *) |];
+        _subl (   _int 8 , _r ESP   );
         cfi_adjust_cfa_offset 8;
-        _fstpl [| _mem_ptr REAL8 0 ESP |];
+        _fstpl (  _mem_ptr REAL8 0 ESP  );
         stack_offset := !stack_offset + 8
       | {loc = Stack sl; typ = Float} ->
         let ofs = slot_offset sl 1 in
-        _pushl [| _mem_ptr DWORD (ofs + 4) ESP |];
-        _pushl [| _mem_ptr DWORD (ofs + 4) ESP |];
+        _pushl (  _mem_ptr DWORD (ofs + 4) ESP  );
+        _pushl (  _mem_ptr DWORD (ofs + 4) ESP  );
         cfi_adjust_cfa_offset 8;
         stack_offset := !stack_offset + 8
       | _ ->
-        _pushl [| emit_reg r |];
+        _pushl (  emit_reg r  );
         cfi_adjust_cfa_offset 4;
         stack_offset := !stack_offset + 4
     done
   | Lop(Ispecific(Ipush_int n)) ->
-    _pushl [| emit_nativeint n |];
+    _pushl (  emit_nativeint n  );
     cfi_adjust_cfa_offset 4;
     stack_offset := !stack_offset + 4
   | Lop(Ispecific(Ipush_symbol s)) ->
     add_used_symbol s;
-    _pushl [| _offset (emit_symbol s) |];
+    _pushl (  _offset (emit_symbol s)  );
     cfi_adjust_cfa_offset 4;
     stack_offset := !stack_offset + 4
   | Lop(Ispecific(Ipush_load addr)) ->
-    _pushl [| emit_addressing addr DWORD i.arg 0 |];
+    _pushl (  emit_addressing addr DWORD i.arg 0  );
     cfi_adjust_cfa_offset 4;
     stack_offset := !stack_offset + 4
   | Lop(Ispecific(Ipush_load_float addr)) ->
-    _pushl [| emit_addressing (offset_addressing addr 4) DWORD i.arg 0 |];
-    _pushl [| emit_addressing addr DWORD i.arg 0 |];
+    _pushl (  emit_addressing (offset_addressing addr 4) DWORD i.arg 0  );
+    _pushl (  emit_addressing addr DWORD i.arg 0  );
     cfi_adjust_cfa_offset 8;
     stack_offset := !stack_offset + 8
   | Lop(Ispecific(Ifloatarithmem(double, op, addr))) ->
     if not (is_tos i.arg.(0)) then
-      _fldl [| emit_reg i.arg.(0) |];
-    instr_for_floatarithmem double op [|
+      _fldl (  emit_reg i.arg.(0)  );
+    instr_for_floatarithmem double op ( Some (
       emit_addressing addr
-        (if double then REAL8 else REAL4) i.arg 1 |]
+        (if double then REAL8 else REAL4) i.arg 1  ))
   | Lop(Ispecific(Ifloatspecial s)) ->
     (* Push args on float stack if necessary *)
     for k = 0 to Array.length i.arg - 1 do
-      if not (is_tos i.arg.(k)) then _fldl [| emit_reg i.arg.(k) |]
+      if not (is_tos i.arg.(k)) then _fldl (  emit_reg i.arg.(k)  )
     done;
     (* Fix-up for binary instrs whose args were swapped *)
     if Array.length i.arg = 2 && is_tos i.arg.(1) then
-      _fxch [| _st 1 |];
+      _fxch (Some (  _st 1  ));
     emit_floatspecial s
   | Lreloadretaddr ->
     ()
   | Lreturn ->
     output_epilogue begin fun () ->
-      _ret [||]
+      _ret (  )
     end
   | Llabel lbl ->
     emit_Llabel fallthrough lbl
   | Lbranch lbl ->
-    _jmp [| _l(emit_label lbl) |]
+    _jmp (  _l(emit_label lbl)  )
   | Lcondbranch(tst, lbl) ->
     begin match tst with
         Itruetest ->
         output_test_zero i.arg.(0);
-        _jne [| _l (emit_label lbl) |];
+        _jne (  _l (emit_label lbl)  );
       | Ifalsetest ->
         output_test_zero i.arg.(0);
-        _je [| _l (emit_label lbl) |]
+        _je (  _l (emit_label lbl)  )
       | Iinttest cmp ->
-        _cmpl [| emit_reg i.arg.(1) ; emit_reg i.arg.(0)  (* rev *) |];
+        _cmpl (  emit_reg i.arg.(1) , emit_reg i.arg.(0)    );
         let b = name_for_cond_branch cmp in
-        _j b [| _l (emit_label lbl) |]
+        _j b (  _l (emit_label lbl)  )
       | Iinttest_imm((Isigned Ceq | Isigned Cne |
                       Iunsigned Ceq | Iunsigned Cne) as cmp, 0) ->
         output_test_zero i.arg.(0);
         let b = name_for_cond_branch cmp in
-        _j b [| _l (emit_label lbl) |]
+        _j b (  _l (emit_label lbl)  )
       | Iinttest_imm(cmp, n) ->
-        _cmpl [|   _int n ; emit_reg i.arg.(0) (* rev *) |];
+        _cmpl (    _int n , emit_reg i.arg.(0)   );
         let b = name_for_cond_branch cmp in
-        _j b [| _l (emit_label lbl) |]
+        _j b (  _l (emit_label lbl)  )
       | Ifloattest(cmp, neg) ->
         emit_float_test cmp neg i.arg lbl
       | Ioddtest ->
-        _testl [|  _int 1 ; emit_reg i.arg.(0) (* rev *) |];
-        _jne [| _l (emit_label lbl) |]
+        _testl (   _int 1 , emit_reg i.arg.(0)   );
+        _jne (  _l (emit_label lbl)  )
       | Ieventest ->
-        _testl [|  _int 1 ; emit_reg i.arg.(0) (* rev *) |];
-        _je [| _l (emit_label lbl) |]
+        _testl (   _int 1 , emit_reg i.arg.(0)   );
+        _je (  _l (emit_label lbl)  )
     end
   | Lcondbranch3(lbl0, lbl1, lbl2) ->
-    _cmpl [|  _int 1 ; emit_reg i.arg.(0) (* rev *) |];
+    _cmpl (   _int 1 , emit_reg i.arg.(0)   );
     begin match lbl0 with
         None -> ()
-      | Some lbl -> _jb [| _l (emit_label lbl) |]
+      | Some lbl -> _jb (  _l (emit_label lbl)  )
     end;
     begin match lbl1 with
         None -> ()
-      | Some lbl -> _je [| _l (emit_label lbl) |]
+      | Some lbl -> _je (  _l (emit_label lbl)  )
     end;
     begin match lbl2 with
         None -> ()
-      | Some lbl -> _jg [| _l (emit_label lbl) |]
+      | Some lbl -> _jg (  _l (emit_label lbl)  )
     end
   | Lswitch jumptbl ->
     let lbl = new_label() in
-    _jmp [| Mem(NO, emit_reg32 i.arg.(0), 4, BaseSymbol (emit_label lbl), 0)|];
+    _jmp (  Mem(NO, register (emit_reg32 i.arg.(0)), 4,
+              BaseSymbol (emit_label lbl), 0) );
     _data ();
-    emit (NewLabel (emit_label lbl, DWORD)) [||];
+    emit (NewLabel (emit_label lbl, DWORD));
     for i = 0 to Array.length jumptbl - 1 do
       _long (ConstLabel (emit_label jumptbl.(i) ))
     done;
     _text ()
   | Lsetuptrap lbl ->
-    _call [| _l (emit_label lbl) |]
+    _call (  _l (emit_label lbl)  )
   | Lpushtrap ->
     if trap_frame_size > 8 then
-      _subl [|  _int (trap_frame_size - 8) ; Reg ESP (* rev *) |];
-    _pushl [| _l (emit_symbol "caml_exception_pointer" ) |];
+      _subl (   _int (trap_frame_size - 8) , _r ESP   );
+    _pushl (  _l (emit_symbol "caml_exception_pointer" )  );
     cfi_adjust_cfa_offset trap_frame_size;
-    _movl [|  Reg ESP ; _l (emit_symbol "caml_exception_pointer") (* rev *) |];
+    _movl (   _r ESP , _l (emit_symbol "caml_exception_pointer")   );
     stack_offset := !stack_offset + trap_frame_size
   | Lpoptrap ->
-    _popl [| _l (emit_symbol "caml_exception_pointer") |];
-    _addl [|  _int (trap_frame_size - 4) ; Reg ESP (* rev *) |];
+    _popl (  _l (emit_symbol "caml_exception_pointer")  );
+    _addl (   _int (trap_frame_size - 4) , _r ESP   );
     cfi_adjust_cfa_offset (-trap_frame_size);
     stack_offset := !stack_offset - trap_frame_size
   | Lraise k  ->
@@ -911,11 +920,11 @@ let emit_instr fallthrough i =
         record_frame Reg.Set.empty i.dbg
       | false, _
       | true, Lambda.Raise_notrace ->
-        _movl [|  _l (emit_symbol "caml_exception_pointer") ; Reg ESP (* rev *) |];
-        _popl [| _l (emit_symbol "caml_exception_pointer") |];
+        _movl (   _l (emit_symbol "caml_exception_pointer") , _r ESP   );
+        _popl (  _l (emit_symbol "caml_exception_pointer")  );
         if trap_frame_size > 8 then
-          _addl [|  _int (trap_frame_size - 8) ; Reg ESP (* rev *) |];
-        _ret [||]
+          _addl (   _int (trap_frame_size - 8) , _r ESP   );
+        _ret (  )
     end
 
 let rec emit_all fallthrough i =
@@ -932,7 +941,7 @@ let rec emit_all fallthrough i =
 let emit_external_symbol_direct s =
   _label (Printf.sprintf "L%s$stub" (emit_symbol s));
   _specific (Printf.sprintf ".indirect_symbol %s" (emit_symbol s));
-  _hlt [||]; _hlt [||]; _hlt [||]; _hlt [||] ; _hlt [||]
+  _hlt (  ); _hlt (  ); _hlt (  ); _hlt (  ) ; _hlt (  )
 
 let emit_external_symbol_indirect s =
   _label (Printf.sprintf "L%s$non_lazy_ptr" ( emit_symbol s ) );
@@ -949,7 +958,7 @@ let emit_external_symbols () =
   if !Clflags.gprofile then begin
     _label "Lmcount$stub";
     _specific ".indirect_symbol mcount";
-    _hlt [||]; _hlt [||]; _hlt [||] ; _hlt [||] ; _hlt [||]
+    _hlt (  ); _hlt (  ); _hlt (  ) ; _hlt (  ) ; _hlt (  )
   end
 
 (* Emission of the profiling prelude *)
@@ -957,33 +966,33 @@ let emit_external_symbols () =
 let emit_profile () =
   match system with
     S_linux_elf | S_gnu ->
-      _pushl [| Reg EAX |];
-      _movl  [|  Reg ESP ; Reg EBP (* rev *) |];
-      _pushl [| Reg ECX |];
-      _pushl [| Reg EDX |];
-      _call [| _l (emit_symbol "mcount") |];
-      _movl [|  Reg ESP ; Reg EBP (* rev *) |];
-      _popl [| Reg EDX |];
-      _popl [| Reg ECX |];
-      _popl [| Reg EAX |];
+      _pushl (  _r EAX  );
+      _movl  (   _r ESP , _r EBP   );
+      _pushl (  _r ECX  );
+      _pushl (  _r EDX  );
+      _call (  _l (emit_symbol "mcount")  );
+      _movl (   _r ESP , _r EBP   );
+      _popl (  _r EDX  );
+      _popl (  _r ECX  );
+      _popl (  _r EAX  );
   | S_bsd_elf ->
-      _pushl [| Reg EAX |];
-      _movl  [|  Reg ESP ; Reg EBP (* rev *) |];
-      _pushl [| Reg ECX |];
-      _pushl [| Reg EDX |];
-      _call [| _l ".mcount" |];
-      _popl [| Reg EDX |];
-      _popl [| Reg ECX |];
-      _popl [| Reg EAX |];
+      _pushl (  _r EAX  );
+      _movl  (   _r ESP , _r EBP   );
+      _pushl (  _r ECX  );
+      _pushl (  _r EDX  );
+      _call (  _l ".mcount"  );
+      _popl (  _r EDX  );
+      _popl (  _r ECX  );
+      _popl (  _r EAX  );
   | S_macosx ->
-      _pushl [| Reg EAX |];
-      _movl  [|  Reg ESP ; Reg EBP (* rev *) |];
-      _pushl [| Reg ECX |];
-      _pushl [| Reg EDX |];
-      _call [| _l "Lmcount$stub" |];
-      _popl [| Reg EDX |];
-      _popl [| Reg ECX |];
-      _popl [| Reg EAX |];
+      _pushl (  _r EAX  );
+      _movl  (   _r ESP , _r EBP   );
+      _pushl (  _r ECX  );
+      _pushl (  _r EDX  );
+      _call (  _l "Lmcount$stub"  );
+      _popl (  _r EDX  );
+      _popl (  _r ECX  );
+      _popl (  _r EAX  );
   | _ -> () (*unsupported yet*)
 
 (* Emission of a function declaration *)
@@ -1014,7 +1023,7 @@ let fundecl fundecl =
   let n = frame_size() - 4 in
   if n > 0 then
   begin
-    _subl [|  _int n ; Reg ESP (* rev *) |];
+    _subl (   _int n , _r ESP   );
     cfi_adjust_cfa_offset n;
   end;
   _llabel (emit_label !tailrec_entry_point);
@@ -1064,6 +1073,7 @@ let data l =
 (* Beginning / end of an assembly file *)
 
 let begin_assembly() =
+  arch64 := false;
   init_segments ();
   reset_debug_info();                   (* PR#5603 *)
   float_constants := StringMap.empty;
@@ -1094,7 +1104,7 @@ let begin_assembly() =
   _text ();
   _global ( emit_symbol lbl_begin );
   _label ( emit_symbol lbl_begin );
-  if system = S_macosx then _nop [||] (* PR#4690 *)
+  if system = S_macosx then _nop (  ) (* PR#4690 *)
 
 let end_assembly() =
   if !float_constants <> StringMap.empty then begin
@@ -1105,7 +1115,7 @@ let end_assembly() =
   add_def_symbol lbl_end;
   _text ();
   if system = S_macosx then
-    _nop [||]; (* suppress "ld warning: atom sorting error" *)
+    _nop (  ); (* suppress "ld warning: atom sorting error" *)
   _global ( emit_symbol lbl_end );
   _label ( emit_symbol lbl_end );
   _data ();
@@ -1147,13 +1157,14 @@ let end_assembly() =
       !symbols_used;
     symbols_used := StringSet.empty;
     symbols_defined := StringSet.empty;
-    emit End [||];
+    emit End;
   end;
 
   let oc = !Emitaux.output_channel in
-
-  let b = Buffer.create 10000 in
-  List.iter (arch.bprint_instr b arch) (List.rev seg.seg_instrs);
-  let s = Buffer.contents b in
-  output_string oc s
+  let bprint_instr =
+    match system with
+    | S_win32 | S_win64 -> Intel_masm.bprint_instr
+    | _ -> Intel_gas.bprint_instr
+  in
+  generate_code oc bprint_instr
 
diff --git a/asmcomp/i386/proc.ml b/asmcomp/i386/proc.ml
index 363af0a058..d8e945f0b7 100644
--- a/asmcomp/i386/proc.ml
+++ b/asmcomp/i386/proc.ml
@@ -190,12 +190,6 @@ let contains_calls = ref false
 (* Calling the assembler *)
 
 let assemble_file infile outfile =
-  if masm then
-    Ccomp.command (Config.asm ^
-                   Filename.quote outfile ^ " " ^ Filename.quote infile ^
-                   (if !Clflags.verbose then "" else ">NUL"))
-  else
-    Ccomp.command (Config.asm ^ " -o " ^
-                   Filename.quote outfile ^ " " ^ Filename.quote infile)
+  Intel_proc.assemble_file infile outfile
 
 let init () = ()
diff --git a/asmcomp/intel_gas.ml b/asmcomp/intel_gas.ml
index 1e9c47efc3..7099f25be7 100644
--- a/asmcomp/intel_gas.ml
+++ b/asmcomp/intel_gas.ml
@@ -12,69 +12,34 @@
 
 open Intel_proc
 
-let bprint_arg arch b ins arg =
-  match arg with
-  | Constant int ->
-    Printf.bprintf b "$%d" int
-  | ConstantNat int ->
-    Printf.bprintf b "$%nd" int
-  | LabelPLT label ->
-    Printf.bprintf b "%s@PLT" label
-  | LabelGOTPCREL label ->
-    Printf.bprintf b "%s@GOTPCREL(%%rip)" label
-  | LabelRel (_, label, 0) ->
-    Printf.bprintf b "%s" label
-  | LabelRel (_, label, i) ->
-    Printf.bprintf b "%s+%d" label i
-  | LabelDiff (l1, l2) ->
-    Printf.bprintf b "%s-%s" l1 l2
-  | LabelAbs (label, 0L) ->
-    Printf.bprintf b "%s" label
-  | LabelAbs (label, iL) ->
-    Printf.bprintf b "%s+%Ld" label iL
-  | LabelOffset l ->
-    (* only in win?? or 32 bits *)
-    Printf.bprintf b "$%s" l
-  | Direct s ->
-    Buffer.add_string b s
-
-  | Reg8 register8 ->
-    Printf.bprintf b "%%%s" (string_of_register8 register8)
-  | Reg16 register16 ->
-    Printf.bprintf b "%%%s" (string_of_register16 register16)
-  | Reg32 register32 ->
-    Printf.bprintf b "%%%s" (string_of_register32 register32)
-  | Reg register ->
-    Printf.bprintf b "%%%s" (arch.string_of_register register)
-  | Regf registerf ->
-    Printf.bprintf b "%%%s" (string_of_registerf registerf)
+let bprint_arg_mem b arch mem = match mem with
 
-  | Mem (_, reg1, 1, BaseSymbol s, 0) ->
-    Printf.bprintf b "%s(%%%s)" s (arch.string_of_register reg1)
+  |  (_, reg1, 1, BaseSymbol s, 0) ->
+    Printf.bprintf b "%s(%%%s)" s (string_of_register arch reg1)
 
-  | Mem (_, reg1, 1, BaseSymbol s, offset) ->
+  |  (_, reg1, 1, BaseSymbol s, offset) ->
     if offset < 0 then
-      Printf.bprintf b "%s%d(%%%s)" s offset (arch.string_of_register reg1)
+      Printf.bprintf b "%s%d(%%%s)" s offset (string_of_register arch reg1)
     else
-      Printf.bprintf b "%s+%d(%%%s)" s offset (arch.string_of_register reg1)
+      Printf.bprintf b "%s+%d(%%%s)" s offset (string_of_register arch reg1)
 
-  | Mem (_, reg1, scale, BaseSymbol s, offset) ->
+  |  (_, reg1, scale, BaseSymbol s, offset) ->
     if offset = 0 then
       Printf.bprintf b "%s(,%%%s,%d)" s
-        (arch.string_of_register reg1) scale
+        (string_of_register arch reg1) scale
     else    if offset < 0 then
       Printf.bprintf b "%s%d(,%%%s,%d)" s
-        offset (arch.string_of_register reg1) scale
+        offset (string_of_register arch reg1) scale
     else
       Printf.bprintf b "%s+%d(,%%%s,%d)" s
-        offset (arch.string_of_register reg1) scale
+        offset (string_of_register arch reg1) scale
 
-  | Mem (_, reg1, 1, NoBase, 0) ->
+  |  (_, reg1, 1, NoBase, 0) ->
     Buffer.add_char b '(';
-    Printf.bprintf b "%%%s" (arch.string_of_register reg1);
+    Printf.bprintf b "%%%s" (string_of_register arch reg1);
     Buffer.add_char b ')'
 
-  | Mem (_, reg1, 1, NoBase, offset) ->
+  |  (_, reg1, 1, NoBase, offset) ->
     if offset <> 0 then begin
       if offset < 0 then
         Printf.bprintf b "-%d" (-offset)
@@ -82,10 +47,10 @@ let bprint_arg arch b ins arg =
         Printf.bprintf b "%d" offset
     end;
     Buffer.add_char b '(';
-    Printf.bprintf b "%%%s" (arch.string_of_register reg1);
+    Printf.bprintf b "%%%s" (string_of_register arch reg1);
     Buffer.add_char b ')'
 
-  | Mem (_, reg1, scale, reg2, offset) ->
+  |  (_, reg1, scale, reg2, offset) ->
     if offset <> 0 then begin
       if offset < 0 then
         Printf.bprintf b "-%d" (-offset)
@@ -97,23 +62,64 @@ let bprint_arg arch b ins arg =
       match reg2 with
 	NoBase -> ()
       | BaseReg reg2 ->
-        Printf.bprintf b "%%%s" (arch.string_of_register reg2)
+        Printf.bprintf b "%%%s" (string_of_register arch reg2)
       | BaseSymbol s ->
         Printf.bprintf b "%s" s
     end;
     Buffer.add_char b ',';
-    Printf.bprintf b "%%%s" (arch.string_of_register reg1);
+    Printf.bprintf b "%%%s" (string_of_register arch reg1);
     if scale <> 1 then
       Printf.bprintf b ",%d" scale;
     Buffer.add_char b ')'
 
-let bprint_args arch b instr =
-  match instr with
-    { args = [||] } -> ()
-  | { args = [|Reg _ | Mem _ as arg|]; instr = (CALL | JMP _) } ->
+
+let bprint_arg arch b ins arg =
+  match arg with
+  | ConstantInt int ->
+    Printf.bprintf b "$%d" int
+  | ConstantNat int ->
+    Printf.bprintf b "$%nd" int
+  | LabelPLT label ->
+    Printf.bprintf b "%s@PLT" label
+  | LabelGOTPCREL label ->
+    Printf.bprintf b "%s@GOTPCREL(%%rip)" label
+  | LabelRel (_, label, 0) ->
+    Printf.bprintf b "%s" label
+  | LabelRel (_, label, i) ->
+    Printf.bprintf b "%s+%d" label i
+  | LabelDiff (l1, l2) ->
+    Printf.bprintf b "%s-%s" l1 l2
+  | LabelAbs (label, 0L) ->
+    Printf.bprintf b "%s" label
+  | LabelAbs (label, iL) ->
+    Printf.bprintf b "%s+%Ld" label iL
+  | LabelOffset l ->
+    (* only in win?? or 32 bits *)
+    Printf.bprintf b "$%s" l
+  | Direct s ->
+    Buffer.add_string b s
+
+  | Reg8 register8 ->
+    Printf.bprintf b "%%%s" (string_of_register8 register8)
+  | Reg16 register16 ->
+    Printf.bprintf b "%%%s" (string_of_register16 register16)
+  | Reg32 register32 ->
+    Printf.bprintf b "%%%s" (string_of_register32 register32)
+  | Reg register ->
+    Printf.bprintf b "%%%s" (string_of_register arch register)
+  | Regf registerf ->
+    Printf.bprintf b "%%%s" (string_of_registerf registerf)
+
+  | Mem ( ptr, r, scale, base, offset) ->
+    bprint_arg_mem b arch (ptr, r, scale, base, offset)
+
+let bprint_args arch b instr args =
+  match args, instr with
+    [], _ -> ()
+  | [ Reg _  | Mem _ as arg ],  (CALL _ | JMP _) ->
     tab b; Buffer.add_char b '*'; bprint_arg arch b instr arg
-  | { args = [|arg|] } -> tab b; bprint_arg arch b instr arg
-  | { args = [|arg1; arg2|] } ->
+  | [ arg ], _ -> tab b; bprint_arg arch b instr arg
+  | [ arg1; arg2 ], _ ->
     tab b; bprint_arg arch b instr arg1;
     Buffer.add_char b ',';
     Buffer.add_char b ' ';
@@ -149,9 +155,6 @@ and string_of_simple_constant = function
     Printf.sprintf "(%s - %s)"
       (string_of_simple_constant c1) (string_of_simple_constant c2)
 
-let suff arch ins =
-  if arch.arch64 then ins ^ "q" else ins ^ "l"
-
 let get_suffix s =
   match s with
    | B -> "b"
@@ -166,12 +169,14 @@ let suffix2 ins s1 s2 =
   ins ^ (get_suffix s1) ^ (get_suffix s2)
 
 
+let list_o arg = match arg with None -> [] | Some arg -> [arg]
+
 let bprint_instr b arch instr =
   begin
-    match instr.instr with
+    match instr with
       Global s ->
       Printf.bprintf b "\t.globl\t%s" s;
-    | Align (data,n) ->
+    | Align (_data,n) ->
       Printf.bprintf b "\t.align\t%d" n
     | NewLabel (s, _) ->
       Printf.bprintf b "%s:" s
@@ -181,168 +186,168 @@ let bprint_instr b arch instr =
       Printf.bprintf b "\t%s" s
     | End -> ()
     | _ ->
-      let ins =
-        match instr.instr with
+      let ins, args =
+        match instr with
 	  Global _ | Align _ | NewLabel _
         | Comment _ | Specific _
 	| End
         | External _ -> assert false
 
-        | Segment Text -> ".text"
-        | Segment Data -> ".data"
-        | Set -> ".set"
+        | Segment Text -> ".text", []
+        | Segment Data -> ".data", []
+        | Set (arg1, arg2) -> ".set", [ arg1; arg2 ]
         | Space n ->
           if system = S_solaris then
-            Printf.sprintf ".zero\t%d" n
+            Printf.sprintf ".zero\t%d" n, []
           else
-            Printf.sprintf ".space\t%d" n
+            Printf.sprintf ".space\t%d" n, []
         | Constant (n, BYTE) ->
-          Printf.sprintf ".byte\t%s" (string_of_constant n)
+          Printf.sprintf ".byte\t%s" (string_of_constant n), []
         | Constant (n, WORD) ->
           if system = S_solaris then
-            Printf.sprintf ".value\t%s" (string_of_constant n)
+            Printf.sprintf ".value\t%s" (string_of_constant n), []
           else
-            Printf.sprintf ".word\t%s" (string_of_constant n)
+            Printf.sprintf ".word\t%s" (string_of_constant n), []
         | Constant (n, DWORD) ->
-          Printf.sprintf ".long\t%s" (string_of_constant n)
+          Printf.sprintf ".long\t%s" (string_of_constant n), []
         | Constant (n, QWORD) ->
-	  Printf.sprintf ".quad\t%s" (string_of_constant n)
+	  Printf.sprintf ".quad\t%s" (string_of_constant n), []
         | Constant _ -> assert false
 	| Bytes s ->
           if system = S_solaris then
             assert false (* TODO *)
           else
 	    Printf.sprintf ".ascii\t\"%s\""
-              (string_of_string_literal s)
-
-        | NOP -> "nop"
-	| NEG ->  "neg"
-	| ADD s ->  suffix "add" s
-	| SUB s -> suffix "sub" s
-	| XOR s ->  suffix "xor" s
-	| OR s -> suffix "or" s
-	| AND s -> suffix "and" s
-	| CMP s -> suffix "cmp" s
-
-	| LEAVE -> "leave"
-	| SAR s -> suffix "sar" s
-	| SHR s -> suffix "shr" s
-	| SAL s -> suffix "sal" s
-
-        | MOVABSQ -> "movabsq"
-        | FISTP s -> suffix "fistp" s
-
-	| FSTP None -> "fstp"
-	| FSTP (Some s) -> suffix "fstp" s
-	| FSTPS -> "fstps"
-        | FILD s -> suffix "fild" s
-        | HLT -> "hlt"
-
-        | FCOMPP -> "fcompp"
-        | FCOMPL -> "fcompl"
-        | FLDL -> "fldl"
-        | FLDS -> "flds"
-        | FNSTSW -> "fnstsw"
-        | FNSTCW -> "fnstcw"
-        | FLDCW -> "fldcw"
-
-        | FCHS -> "fchs"
-        | FABS -> "fabs"
-
-        | FADDL -> "faddl"
-        | FSUBL -> "fsubl"
-        | FMULL -> "fmull"
-        | FDIVL -> "fdivl"
-        | FSUBRL -> "fsubrl"
-        | FDIVRL -> "fdivrl"
-
-        | FLD1 -> "fld1"
-        | FPATAN -> "fpatan"
-        | FPTAN -> "fptan"
-        | FCOS -> "fcos"
-        | FLDLN2 -> "fldln2"
-        | FLDLG2 -> "fldlg2"
-        | FXCH -> "fxch"
-        | FYL2X -> "fyl2x"
-        | FSIN -> "fsin"
-        | FSQRT -> "fsqrt"
-        | FLDZ -> "fldz"
-
-        | FADDP -> "faddp"
-        | FSUBP -> "fsubp"
-        | FMULP -> "fmulp"
-        | FDIVP -> "fdivp"
-        | FSUBRP -> "fsubrp"
-        | FDIVRP -> "fdivrp"
-
-        | FADDS -> "fadds"
-        | FSUBS -> "fsubs"
-        | FMULS -> "fmuls"
-        | FDIVS -> "fdivs"
-        | FSUBRS -> "fsubrs"
-        | FDIVRS -> "fdivrs"
-
-	| INC s ->  suffix "inc" s
-	| DEC s ->  suffix "dec" s
-
-	| IMUL s ->  suffix "imul" s
-	| IDIV s ->  suffix "idiv" s
-
-	| MOV s ->  suffix "mov" s
-	| MOVZX (s1, s2) -> suffix2 "movz" s1 s2
-	| MOVSX (s1, s2) -> suffix2 "movs" s1 s2
-	| MOVSS ->  "movss"
-	| MOVSXD ->  "movslq"
-
-	| MOVSD ->  "movsd"
-	| ADDSD ->  "addsd"
-	| SUBSD ->  "subsd"
-	| MULSD ->  "mulsd"
-	| DIVSD ->  "divsd"
-	| SQRTSD -> "sqrtsd"
+              (string_of_string_literal s), []
+
+        | NOP -> "nop", []
+	| NEG arg ->  "neg", [ arg ]
+	| ADD (s, arg1, arg2) ->  suffix "add" s, [arg1; arg2]
+	| SUB (s, arg1, arg2) -> suffix "sub" s, [arg1; arg2]
+	| XOR (s, arg1, arg2) ->  suffix "xor" s, [arg1; arg2]
+	| OR (s, arg1, arg2) -> suffix "or" s, [arg1; arg2]
+	| AND (s, arg1, arg2) -> suffix "and" s, [arg1; arg2]
+	| CMP (s, arg1, arg2) -> suffix "cmp" s, [arg1; arg2]
+
+	| LEAVE -> "leave", []
+	| SAR (s, arg1, arg2) -> suffix "sar" s, [arg1; arg2]
+	| SHR (s, arg1, arg2) -> suffix "shr" s, [arg1; arg2]
+	| SAL (s, arg1, arg2) -> suffix "sal" s, [arg1; arg2]
+
+        | MOVABSQ (arg1, arg2) -> "movabsq", [arg1; arg2]
+        | FISTP (s, arg) -> suffix "fistp" s, [ arg ]
+
+	| FSTP (None, arg) -> "fstp", [arg]
+	| FSTP (Some s, arg) -> suffix "fstp" s, [arg]
+	| FSTPS arg -> "fstps", [ arg ]
+        | FILD (s, arg) -> suffix "fild" s, [ arg ]
+        | HLT -> "hlt", []
+
+        | FCOMPP -> "fcompp", []
+        | FCOMPL arg -> "fcompl", [ arg ]
+        | FLDL arg -> "fldl", [ arg ]
+        | FLDS arg -> "flds", [ arg ]
+        | FNSTSW arg -> "fnstsw", [ arg ]
+        | FNSTCW arg -> "fnstcw", [ arg ]
+        | FLDCW arg -> "fldcw", [ arg ]
+
+        | FCHS arg -> "fchs", list_o arg
+        | FABS  arg -> "fabs", list_o arg
+
+        | FADDL arg -> "faddl", list_o arg
+        | FSUBL arg -> "fsubl", list_o arg
+        | FMULL arg -> "fmull", list_o arg
+        | FDIVL  arg-> "fdivl", list_o arg
+        | FSUBRL arg -> "fsubrl", list_o arg
+        | FDIVRL arg -> "fdivrl", list_o arg
+
+        | FLD1 -> "fld1", []
+        | FPATAN -> "fpatan", []
+        | FPTAN -> "fptan", []
+        | FCOS -> "fcos", []
+        | FLDLN2 -> "fldln2", []
+        | FLDLG2 -> "fldlg2", []
+        | FXCH arg -> "fxch", list_o arg
+        | FYL2X -> "fyl2x", []
+        | FSIN -> "fsin", []
+        | FSQRT -> "fsqrt", []
+        | FLDZ -> "fldz", []
+
+        | FADDP (arg1, arg2)  -> "faddp", [ arg1; arg2 ]
+        | FSUBP (arg1, arg2)  -> "fsubp", [ arg1; arg2 ]
+        | FMULP (arg1, arg2)  -> "fmulp", [ arg1; arg2 ]
+        | FDIVP (arg1, arg2)  -> "fdivp", [ arg1; arg2 ]
+        | FSUBRP (arg1, arg2)  -> "fsubrp", [ arg1; arg2 ]
+        | FDIVRP (arg1, arg2)  -> "fdivrp", [ arg1; arg2 ]
+
+        | FADDS arg -> "fadds", list_o arg
+        | FSUBS arg -> "fsubs", list_o arg
+        | FMULS arg -> "fmuls", list_o arg
+        | FDIVS arg -> "fdivs", list_o arg
+        | FSUBRS arg -> "fsubrs", list_o arg
+        | FDIVRS arg -> "fdivrs", list_o arg
+
+	| INC (s, arg) ->  suffix "inc" s, [ arg ]
+	| DEC (s, arg) ->  suffix "dec" s, [ arg ]
+
+	| IMUL (s, arg1, arg2) ->  suffix "imul" s, arg1 :: list_o arg2
+	| IDIV (s, arg) ->  suffix "idiv" s, [ arg ]
+
+	| MOV (s, arg1, arg2) ->  suffix "mov" s, [arg1; arg2]
+	| MOVZX (s1, s2, arg1, arg2) -> suffix2 "movz" s1 s2, [arg1; arg2]
+	| MOVSX (s1, s2, arg1, arg2) -> suffix2 "movs" s1 s2, [arg1; arg2]
+	| MOVSS (arg1, arg2) ->  "movss", [arg1; arg2]
+	| MOVSXD (arg1, arg2) ->  "movslq", [arg1; arg2]
+
+	| MOVSD (arg1, arg2) ->  "movsd", [ arg1 ; arg2 ]
+	| ADDSD (arg1, arg2) ->  "addsd", [ arg1 ; arg2 ]
+	| SUBSD (arg1, arg2) ->  "subsd", [ arg1 ; arg2 ]
+	| MULSD (arg1, arg2) ->  "mulsd", [ arg1 ; arg2 ]
+	| DIVSD (arg1, arg2) ->  "divsd", [ arg1 ; arg2 ]
+	| SQRTSD (arg1, arg2) -> "sqrtsd", [ arg1; arg2 ]
 	| ROUNDSD rounding ->
 	  Printf.sprintf "roundsd.%s" (match rounding with
 	      RoundDown -> "down"
 	    | RoundUp -> "up"
 	    | RoundTruncate -> "trunc"
-	    | RoundNearest -> "near")
-	| CVTSS2SD ->  "cvtss2sd"
-	| CVTSD2SS ->  "cvtsd2ss"
-	| CVTSI2SD ->  "cvtsi2sd"
-	| CVTSI2SDQ ->  "cvtsi2sdq"
-	| CVTSD2SI ->  "cvtsd2si"
-	| CVTTSD2SI ->  "cvttsd2si"
-	| UCOMISD ->  "ucomisd"
-	| COMISD ->  "comisd"
-
-	| CALL  ->  "call"
-	| JMP _ ->  "jmp"
-	| RET ->  "ret"
-	| PUSH s -> suffix "push" s
-	| POP s -> suffix "pop" s
-
-	| TEST s ->  suffix "test" s
-	| SET condition ->
-	  Printf.sprintf  "set%s" (string_of_condition condition)
-	| J (_,condition) ->
-	  Printf.sprintf  "j%s" (string_of_condition condition)
+	    | RoundNearest -> "near"), []
+	| CVTSS2SD (arg1, arg2) ->  "cvtss2sd", [ arg1; arg2 ]
+	| CVTSD2SS (arg1, arg2) ->  "cvtsd2ss", [ arg1; arg2 ]
+	| CVTSI2SD (arg1, arg2) ->  "cvtsi2sd", [ arg1; arg2 ]
+	| CVTSI2SDQ (arg1, arg2) ->  "cvtsi2sdq", [ arg1; arg2 ]
+	| CVTSD2SI (arg1, arg2) ->  "cvtsd2si", [ arg1; arg2 ]
+	| CVTTSD2SI (arg1, arg2) ->  "cvttsd2si", [ arg1; arg2 ]
+	| UCOMISD (arg1, arg2) ->  "ucomisd", [ arg1; arg2 ]
+	| COMISD (arg1, arg2) ->  "comisd", [ arg1; arg2 ]
+
+	| CALL arg  ->  "call", [ arg ]
+	| JMP (_, arg) ->  "jmp", [ arg ]
+	| RET ->  "ret", []
+	| PUSH (s, arg) -> suffix "push" s, [ arg ]
+	| POP (s, arg) -> suffix "pop" s, [ arg ]
+
+	| TEST (s, arg1, arg2) ->  suffix "test" s, [ arg1; arg2]
+	| SET (condition, arg) ->
+	  Printf.sprintf  "set%s" (string_of_condition condition), [ arg ]
+	| J (_,condition, arg) ->
+	  Printf.sprintf  "j%s" (string_of_condition condition), [arg]
+
 
 	| CMOV condition ->
-	  Printf.sprintf "cmov%s" (string_of_condition condition)
-	| XORPD ->  "xorpd"
-	| ANDPD ->  "andpd"
-	| MOVLPD ->  "movlpd"
-	| MOVAPD ->  "movapd"
-
-	| LEA s ->  suffix "lea" s
-	| CQTO ->  "cqto"
-        | CLTD -> "cltd"
-
-        | XCHG -> "xchg"
-        | BSWAP -> "bswap"
+	  Printf.sprintf "cmov%s" (string_of_condition condition), []
+	| XORPD (arg1, arg2) ->  "xorpd", [ arg1; arg2 ]
+	| ANDPD (arg1, arg2) ->  "andpd", [ arg1; arg2 ]
+	| MOVLPD (arg1, arg2) ->  "movlpd", [arg1; arg2]
+	| MOVAPD (arg1, arg2) ->  "movapd", [arg1; arg2]
+	| LEA (s, arg1, arg2) ->  suffix "lea" s, [ arg1; arg2 ]
+	| CQTO ->  "cqto", []
+        | CLTD -> "cltd", []
+
+        | XCHG (arg1, arg2) -> "xchg", [ arg1; arg2 ]
+        | BSWAP arg -> "bswap", [ arg ]
       in
-      bprint b ins
+      bprint b ins;
+      bprint_args arch b instr args;
   end;
-  bprint_args arch b instr;
   Buffer.add_string b "\n"
 
diff --git a/asmcomp/intel_masm.ml b/asmcomp/intel_masm.ml
index ad0a5c7b29..081f0f5d1f 100644
--- a/asmcomp/intel_masm.ml
+++ b/asmcomp/intel_masm.ml
@@ -36,17 +36,88 @@ let string_of_datatype_ptr = function
   | NEAR -> "NEAR PTR "
   | PROC -> "PROC PTR"
 
+let bprint_arg_mem b arch mem = match mem with
+
+  |  (ptr, reg1, 1, NoBase, 0) ->
+    Printf.bprintf b "%s[%s]"
+      (string_of_datatype_ptr ptr)
+      (string_of_register arch reg1);
+
+  |  (ptr, reg1, 1, NoBase, offset) ->
+    Printf.bprintf b "%s[%s%s%d]"
+      (string_of_datatype_ptr ptr)
+      (string_of_register arch reg1)
+      (if offset > 0 then "+" else "")
+      offset
+
+  |  (ptr, reg1, scale, NoBase, 0) ->
+    Printf.bprintf b "%s[%s*%d]"
+      (string_of_datatype_ptr ptr)
+      (string_of_register arch reg1)
+      scale
+  |  (ptr, reg1, scale, NoBase, offset) ->
+    Printf.bprintf b "%s[%s*%d%s%d]"
+      (string_of_datatype_ptr ptr)
+      (string_of_register arch reg1)
+      scale
+      (if offset > 0 then "+" else "")
+      offset
+  |  (ptr, reg1, 1, reg2, 0) ->
+    Printf.bprintf b "%s[%s+%s]"
+      (string_of_datatype_ptr ptr)
+      (match reg2 with
+	 NoBase -> assert false
+       | BaseReg reg2 ->
+         string_of_register arch reg2
+       | BaseSymbol s -> s)
+      (string_of_register arch reg1)
+  |  (ptr, reg1, 1, reg2, offset) ->
+    Printf.bprintf b "%s[%s+%s%s%d]"
+      (string_of_datatype_ptr ptr)
+      (match reg2 with
+	 NoBase -> assert false
+       | BaseReg reg2 ->
+         string_of_register arch reg2
+       | BaseSymbol s -> s)
+      (string_of_register arch reg1)
+      (if offset > 0 then "+" else "")
+      offset
+  |  (ptr, reg1, scale, reg2, 0) ->
+    Printf.bprintf b "%s[%s+%s*%d]"
+      (string_of_datatype_ptr ptr)
+      (match reg2 with
+	 NoBase -> assert false
+       | BaseReg reg2 ->
+         string_of_register arch reg2
+       | BaseSymbol s -> s)
+      (string_of_register arch reg1)
+      scale
+  |  (ptr, reg1, scale, reg2, offset) ->
+    Printf.bprintf b "%s[%s+%s*%d%s%d]"
+      (string_of_datatype_ptr ptr)
+      (match reg2 with
+	 NoBase -> assert false
+       | BaseReg reg2 ->
+         string_of_register arch reg2
+       | BaseSymbol s -> s)
+      (string_of_register arch reg1)
+      scale
+      (if offset > 0 then "+" else "")
+      offset
+
+
   let bprint_arg arch b ins arg =
     match arg with
-    | Constant int ->
+    | ConstantInt int ->
       Printf.bprintf b "%d" int
-    | ConstantNat int when ins.instr = MOVABSQ ->
-  (* force ml64 to use mov reg, imm64 instruction *)
-      Printf.bprintf b "0%nxH" int
     | ConstantNat int ->
-      Printf.bprintf b "%nd" int
-    | LabelPLT _ -> assert false
-    | LabelGOTPCREL _ -> assert false
+      begin match ins with
+        | MOVABSQ _ ->
+          (* force ml64 to use mov reg, imm64 instruction *)
+          Printf.bprintf b "0%nxH" int
+        | _ ->
+          Printf.bprintf b "%nd" int
+      end
     | LabelRel (ptr, string, 0) ->
       Printf.bprintf b "%s%s"  (string_of_datatype_ptr ptr) string
     | LabelRel ( ptr, string, i) ->
@@ -68,82 +139,21 @@ let string_of_datatype_ptr = function
     | Reg32 register32 ->
       Printf.bprintf b "%s" (string_of_register32 register32)
     | Reg register ->
-      Printf.bprintf b "%s" (arch.string_of_register register)
+      Printf.bprintf b "%s" (string_of_register arch register)
     | Regf registerf ->
       Printf.bprintf b "%s" (string_of_registerf registerf)
 
-    | Mem (ptr, reg1, 1, NoBase, 0) ->
-      Printf.bprintf b "%s[%s]"
-        (string_of_datatype_ptr ptr)
-        (arch.string_of_register reg1);
-
-    | Mem (ptr, reg1, 1, NoBase, offset) ->
-      Printf.bprintf b "%s[%s%s%d]"
-                  (string_of_datatype_ptr ptr)
-                  (arch.string_of_register reg1)
-                  (if offset > 0 then "+" else "")
-                  offset
-
-    | Mem (ptr, reg1, scale, NoBase, 0) ->
-      Printf.bprintf b "%s[%s*%d]"
-        (string_of_datatype_ptr ptr)
-        (arch.string_of_register reg1)
-        scale
-    | Mem (ptr, reg1, scale, NoBase, offset) ->
-      Printf.bprintf b "%s[%s*%d%s%d]"
-        (string_of_datatype_ptr ptr)
-        (arch.string_of_register reg1)
-        scale
-        (if offset > 0 then "+" else "")
-        offset
-    | Mem (ptr, reg1, 1, reg2, 0) ->
-      Printf.bprintf b "%s[%s+%s]"
-        (string_of_datatype_ptr ptr)
-        (match reg2 with
-	   NoBase -> assert false
-         | BaseReg reg2 ->
-           arch.string_of_register reg2
-        | BaseSymbol s -> s)
-        (arch.string_of_register reg1)
-    | Mem (ptr, reg1, 1, reg2, offset) ->
-      Printf.bprintf b "%s[%s+%s%s%d]"
-        (string_of_datatype_ptr ptr)
-        (match reg2 with
-	   NoBase -> assert false
-         | BaseReg reg2 ->
-           arch.string_of_register reg2
-        | BaseSymbol s -> s)
-        (arch.string_of_register reg1)
-        (if offset > 0 then "+" else "")
-        offset
-    | Mem (ptr, reg1, scale, reg2, 0) ->
-      Printf.bprintf b "%s[%s+%s*%d]"
-        (string_of_datatype_ptr ptr)
-        (match reg2 with
-	   NoBase -> assert false
-         | BaseReg reg2 ->
-           arch.string_of_register reg2
-        | BaseSymbol s -> s)
-        (arch.string_of_register reg1)
-        scale
-    | Mem (ptr, reg1, scale, reg2, offset) ->
-      Printf.bprintf b "%s[%s+%s*%d%s%d]"
-        (string_of_datatype_ptr ptr)
-        (match reg2 with
-	   NoBase -> assert false
-         | BaseReg reg2 ->
-           arch.string_of_register reg2
-        | BaseSymbol s -> s)
-        (arch.string_of_register reg1)
-        scale
-        (if offset > 0 then "+" else "")
-        offset
-
-let bprint_args arch b instr =
-  match instr with
-    { args = [||] } -> ()
-  | { args = [|arg|] } -> tab b; bprint_arg arch b instr arg
-  | { args = [|arg2; arg1|] } ->
+    | LabelPLT _ -> assert false
+    | LabelGOTPCREL _ -> assert false
+
+    | Mem ( ptr, r, scale, base, offset) ->
+      bprint_arg_mem b arch (ptr, r, scale, base, offset)
+
+let bprint_args arch b instr args =
+  match args with
+  | [] -> ()
+  | [ arg ] -> tab b; bprint_arg arch b instr arg
+  | [ arg2; arg1 ] ->
     tab b; bprint_arg arch b instr arg1;
     Buffer.add_char b ',';
     Buffer.add_char b ' ';
@@ -208,11 +218,15 @@ let buf_bytes_directive b directive s =
      if !pos >= 16 then begin pos := 0 end
    done
 
+
+
+let list_o arg = match arg with None -> [] | Some arg -> [arg]
+
 let bprint_instr_name b arch instr =
-  match instr.instr with
+  match instr with
     Global s ->
     Printf.bprintf b "\tPUBLIC\t%s" s
-  | Align (data,n) ->
+  | Align (_data,n) ->
     Printf.bprintf b "\tALIGN\t%d" n
   | NewLabel (s, NO) ->
     Printf.bprintf b "%s:" s
@@ -241,130 +255,139 @@ let bprint_instr_name b arch instr =
   | Bytes s -> buf_bytes_directive b "BYTE" s
 
   | _ ->
-    let name =
-      match instr.instr with
+    let name, args =
+      match instr with
 	Global _ | Align _
       | NewLabel _ | Comment _
       | Specific _ | End
       | Segment _ | Constant _
       | Bytes _ | Space _ | External _
         -> assert false
-      | Set -> ".set"
-      | NOP -> "nop"
-
-      | NEG ->  "neg"
-      | ADD _ ->  "add"
-      | SUB _ ->   "sub"
-      | XOR _ ->   "xor"
-      | OR _ ->    "or"
-      | AND _ ->   "and"
-      | CMP _ ->  "cmp"
-
-      | MOVABSQ -> "mov"
-
-      | LEAVE -> "leave"
-      | SAR _ ->  "sar"
-      | SHR _ ->  "shr"
-      | SAL _ ->  "sal"
-
-      | FSTP _ -> "fstp"
-      | FSTPS -> "fstps"
-      | FILD _ -> "fild"
-      | FCOMPP -> "fcompp"
-      | FCOMPL -> "fcomp"
-      | FLDL -> "fld"
-      | FLDS -> "flds"
-      | FLDCW -> "fldcw"
-      | FISTP _ -> "fistp"
-
-      | FNSTSW -> "fnstsw"
-      | FNSTCW -> "fnstcw"
-
-      | FCHS -> "fchs"
-      | FABS -> "fabs"
-      | FADDL | FADDP | FADDS -> "fadd"
-      | FSUBL | FSUBP | FSUBS -> "fsub"
-      | FMULL | FMULP | FMULS -> "fmul"
-      | FDIVL | FDIVP | FDIVS  -> "fdiv"
-      | FSUBRL | FSUBRP | FSUBRS -> "fsubr"
-      | FDIVRL | FDIVRP | FDIVRS -> "fdivr"
-
-      | INC _ ->  "inc"
-      | DEC _ ->  "dec"
-
-      | IMUL _ ->  "imul"
-      | IDIV _ ->  "idiv"
+      | Set (arg1, arg2) -> ".set", [ arg1; arg2 ]
+
+
+      | NEG arg ->  "neg", [arg]
+      | NOP -> "nop", []
+      | ADD (_s, arg1, arg2) ->  "add", [arg1; arg2]
+      | SUB (_s, arg1, arg2) ->   "sub", [arg1; arg2]
+      | XOR (_s, arg1, arg2) ->   "xor", [arg1; arg2]
+      | OR (_s, arg1, arg2) ->    "or", [arg1; arg2]
+      | AND (_s, arg1, arg2) ->   "and", [arg1; arg2]
+      | CMP (_s, arg1, arg2) ->  "cmp", [arg1; arg2]
+
+      | MOVABSQ (arg1, arg2) -> "mov", [ arg1; arg2]
+
+      | LEAVE -> "leave", []
+      | SAR (_s, arg1, arg2) ->  "sar", [arg1; arg2]
+      | SHR (_s, arg1, arg2) ->  "shr", [arg1; arg2]
+      | SAL (_s, arg1, arg2) ->  "sal", [arg1; arg2]
+
+      | FSTP (_, arg) -> "fstp", [ arg ]
+      | FSTPS arg -> "fstps", [ arg]
+      | FILD (_, arg) -> "fild", [arg]
+      | FCOMPP -> "fcompp", []
+      | FCOMPL arg -> "fcomp", [ arg ]
+      | FLDL arg -> "fld", [ arg ]
+      | FLDS arg -> "flds", [ arg]
+      | FLDCW arg -> "fldcw", [ arg ]
+      | FISTP (_, arg) -> "fistp", [ arg]
+
+      | FNSTSW arg -> "fnstsw", [ arg ]
+      | FNSTCW arg -> "fnstcw", [ arg ]
+
+      | FCHS arg -> "fchs", list_o arg
+      | FABS arg -> "fabs", list_o arg
+      | FADDL arg | FADDS arg -> "fadd", list_o arg
+      | FSUBL arg | FSUBS arg -> "fsub", list_o arg
+      | FMULL arg | FMULS arg -> "fmul", list_o arg
+      | FDIVL arg | FDIVS  arg -> "fdiv", list_o arg
+      | FSUBRL arg | FSUBRS arg -> "fsubr", list_o arg
+      | FDIVRL arg | FDIVRS arg -> "fdivr", list_o arg
+
+      | FADDP (arg1, arg2)  -> "faddp", [ arg1; arg2 ]
+      | FSUBP (arg1, arg2)  -> "fsubp", [ arg1; arg2 ]
+      | FMULP (arg1, arg2)  -> "fmulp", [ arg1; arg2 ]
+      | FDIVP (arg1, arg2)  -> "fdivp", [ arg1; arg2 ]
+      | FSUBRP (arg1, arg2)  -> "fsubrp", [ arg1; arg2 ]
+      | FDIVRP (arg1, arg2)  -> "fdivrp", [ arg1; arg2 ]
+
+      | INC (_s, arg) ->  "inc", [ arg ]
+      | DEC (_s, arg) ->  "dec", [ arg ]
+
+      | IMUL (_s, arg1, arg2) ->  "imul", arg1 :: list_o arg2
+      | IDIV (_s, arg) ->  "idiv", [ arg ]
       | HLT -> assert false
-      | MOV _ ->  "mov"
-
-      | MOVZX _ ->  "movzx"
-      | MOVSX _ ->  "movsx"
-      | MOVSS ->  "movss"
-      | MOVSXD ->  "movsxd"
-
-      | MOVSD ->  "movsd"
-      | ADDSD ->  "addsd"
-      | SUBSD ->  "subsd"
-      | MULSD ->  "mulsd"
-      | DIVSD ->  "divsd"
-      | SQRTSD -> "sqrtsd"
+      | MOV (_s, arg1, arg2) ->  "mov", [ arg1; arg2]
+
+      | MOVZX (_, _, arg1, arg2) ->  "movzx", [ arg1; arg2]
+      | MOVSX (_, _, arg1, arg2) ->  "movsx",  [ arg1; arg2]
+      | MOVSS (arg1, arg2) ->  "movss", [ arg1; arg2 ]
+      | MOVSXD (arg1, arg2) ->  "movsxd", [ arg1; arg2 ]
+
+      | MOVSD (arg1, arg2) ->  "movsd", [ arg1; arg2 ]
+      | ADDSD (arg1, arg2) ->  "addsd", [ arg1 ; arg2 ]
+      | SUBSD (arg1, arg2) ->  "subsd", [ arg1 ; arg2 ]
+      | MULSD (arg1, arg2) ->  "mulsd", [ arg1 ; arg2 ]
+      | DIVSD (arg1, arg2) ->  "divsd", [ arg1 ; arg2 ]
+      | SQRTSD (arg1, arg2) -> "sqrtsd", [ arg1; arg2]
       | ROUNDSD rounding ->
 	Printf.sprintf "roundsd.%s" (match rounding with
 	    RoundDown -> "down"
 	  | RoundUp -> "up"
 	  | RoundTruncate -> "trunc"
-	  | RoundNearest -> "near")
-      | CVTSS2SD ->  "cvtss2sd"
-      | CVTSD2SS ->  "cvtsd2ss"
-      | CVTSI2SD ->  "cvtsi2sd"
-      | CVTSD2SI ->  "cvtsd2si"
-      | CVTSI2SDQ ->  "cvtsi2sdq"
-      | CVTTSD2SI ->  "cvttsd2si"
-      | UCOMISD ->  "ucomisd"
-      | COMISD ->  "comisd"
-
-      | FLD1 -> "fld1"
-      | FPATAN -> "fpatan"
-      | FPTAN -> "fptan"
-      | FCOS -> "fcos"
-      | FLDLN2 -> "fldln2"
-      | FLDLG2 -> "fldlg2"
-      | FXCH -> "fxch"
-      | FYL2X -> "fyl2x"
-      | FSIN -> "fsin"
-      | FSQRT -> "fsqrt"
-      | FLDZ -> "fldz"
-
-      | CALL  ->  "call"
-      | JMP _ ->  "jmp"
-      | RET ->  "ret"
-      | PUSH _ ->  "push"
-      | POP _ ->  "pop"
-
-      | TEST _ ->  "test"
-      | SET condition ->
-	Printf.sprintf  "set%s" (string_of_condition condition)
-      | J (_,condition) ->
-	Printf.sprintf  "j%s" (string_of_condition condition)
+	  | RoundNearest -> "near"), []
+      | CVTSS2SD (arg1, arg2) ->  "cvtss2sd", [ arg1; arg2 ]
+      | CVTSD2SS (arg1, arg2) ->  "cvtsd2ss", [ arg1; arg2 ]
+      | CVTSI2SD (arg1, arg2) ->  "cvtsi2sd", [ arg1; arg2 ]
+      | CVTSD2SI (arg1, arg2) ->  "cvtsd2si", [ arg1; arg2 ]
+      | CVTSI2SDQ (arg1, arg2) ->  "cvtsi2sdq", [ arg1; arg2 ]
+      | CVTTSD2SI (arg1, arg2) ->  "cvttsd2si", [ arg1; arg2 ]
+      | UCOMISD (arg1, arg2) ->  "ucomisd", [ arg1; arg2]
+      | COMISD (arg1, arg2) ->  "comisd", [arg1; arg2]
+
+      | FLD1 -> "fld1", []
+      | FPATAN -> "fpatan", []
+      | FPTAN -> "fptan", []
+      | FCOS -> "fcos", []
+      | FLDLN2 -> "fldln2", []
+      | FLDLG2 -> "fldlg2", []
+      | FXCH arg -> "fxch", list_o arg
+      | FYL2X -> "fyl2x", []
+      | FSIN -> "fsin", []
+      | FSQRT -> "fsqrt", []
+      | FLDZ -> "fldz", []
+
+      | CALL arg  ->  "call", [ arg ]
+      | JMP (_, arg) ->  "jmp", [ arg]
+      | RET ->  "ret", []
+      | PUSH (_, arg) ->  "push", [arg]
+      | POP (_, arg) ->  "pop", [arg]
+
+      | TEST (_s, arg1, arg2) ->  "test", [arg1; arg2]
+      | SET (condition, arg) ->
+	Printf.sprintf  "set%s" (string_of_condition condition), [ arg ]
+      | J (_,condition, arg) ->
+	Printf.sprintf  "j%s" (string_of_condition condition), [ arg ]
 
       | CMOV condition ->
-	Printf.sprintf "cmov%s" (string_of_condition condition)
-      | XORPD ->  "xorpd"
-      | ANDPD ->  "andpd"
-      | MOVLPD ->  "movlpd"
-      | MOVAPD ->  "movapd"
-      | CLTD -> "cdq"
-
-      | LEA _ ->  "lea"
-      | CQTO ->  "cqo"
-      | XCHG -> "xchg"
-      | BSWAP -> "bswap"
+	Printf.sprintf "cmov%s" (string_of_condition condition), []
+      | XORPD (arg1, arg2) ->  "xorpd", [ arg1; arg2 ]
+      | ANDPD (arg1, arg2) ->  "andpd", [ arg1; arg2 ]
+      | MOVLPD (arg1, arg2) ->  "movlpd", [ arg1; arg2 ]
+      | MOVAPD (arg1, arg2) ->  "movapd", [ arg1; arg2 ]
+      | CLTD -> "cdq", []
+
+      | LEA (_s, arg1, arg2) ->  "lea", [arg1; arg2]
+      | CQTO ->  "cqo", []
+      | XCHG (arg1, arg2) -> "xchg", [ arg1; arg2 ]
+      | BSWAP arg -> "bswap", [ arg ]
 
     in
-    bprint b name
+    bprint b name;
+    bprint_args arch b instr args;
+()
 
 let bprint_instr b arch instr =
   bprint_instr_name b arch instr;
-  bprint_args arch b instr;
   Buffer.add_string b "\n"
 
diff --git a/asmcomp/intel_proc.ml b/asmcomp/intel_proc.ml
index 55faddd54a..bf4736bb39 100644
--- a/asmcomp/intel_proc.ml
+++ b/asmcomp/intel_proc.ml
@@ -34,7 +34,7 @@ type condition =
   | NLE | G
 
 type locality =
-    Loc_unknown
+    Loc_unknown of int (* position of instruction *)
   | Loc_near (* 8 bits offset *)
   | Loc_far  (* 32 bits offset *)
 
@@ -62,136 +62,6 @@ type data_type = (* only used for MASM *)
 
 type suffix = B | W | L | Q
 
-type instr =
-    Segment of segment_type
-  | Global of string
-  | Constant of constant * data_type
-  | Align of bool * int
-  | NewLabel of string * data_type
-  | Bytes of string
-  | Space of int
-  | Comment of string
-  | Specific of string
-  | External of string * data_type
-  | Set
-  | End
-
-  | NOP
-
-  | ADD of suffix
-  | SUB of suffix
-  | XOR of suffix
-  | OR of suffix
-  | AND of suffix
-  | CMP of suffix
-
-  | FSTP of suffix option
-  | FSTPS
-
-  | FCOMPP
-  | FCOMPL
-  | FLDL
-  | FLDS
-  | FNSTSW
-  | FNSTCW
-  | FLDCW
-
-  | FCHS
-  | FABS
-  | FADDL
-  | FSUBL
-  | FMULL
-  | FDIVL
-  | FSUBRL
-  | FDIVRL
-  | FILD of suffix
-  | FISTP of suffix
-  | HLT
-
-  | FADDP
-  | FSUBP
-  | FMULP
-  | FDIVP
-  | FSUBRP
-  | FDIVRP
-
-  | FADDS
-  | FSUBS
-  | FMULS
-  | FDIVS
-  | FSUBRS
-  | FDIVRS
-
-  | FLD1
-  | FPATAN
-  | FPTAN
-  | FCOS
-  | FLDLN2
-  | FLDLG2
-  | FXCH
-  | FYL2X
-  | FSIN
-  | FSQRT
-  | FLDZ
-
-  | SAR of suffix
-  | SHR of suffix
-  | SAL of suffix
-  | INC of suffix
-  | DEC of suffix
-  | IMUL of suffix
-  | IDIV of suffix
-  | PUSH of suffix
-  | POP of suffix
-
-  | MOV of suffix
-
-  | MOVZX of suffix * suffix
-  | MOVSX of suffix * suffix
-  | MOVSS
-  | MOVSXD (* MOVSLQ *)
-
-  | MOVSD
-  | ADDSD
-  | SUBSD
-  | MULSD
-  | DIVSD
-  | SQRTSD
-  | ROUNDSD of rounding
-  | NEG
-
-  | CVTSS2SD
-  | CVTSD2SS
-  | CVTSI2SD
-  | CVTSI2SDQ
-  | CVTSD2SI
-  | CVTTSD2SI
-  | UCOMISD
-  | COMISD
-
-  | CALL
-  | JMP of locality
-  | RET
-
-  | TEST of suffix
-  | SET of condition
-  | J of locality * condition
-
-  | CMOV of condition
-  | XORPD
-  | ANDPD
-  | MOVAPD
-  | MOVLPD
-  | MOVABSQ
-
-  | CLTD
-  | LEA of suffix
-  | CQTO
-  | LEAVE
-
-  | XCHG
-  | BSWAP
-
 type register64 =
   | RAX | RBX | RDI | RSI | RDX | RCX | RBP | RSP
   | R8 | R9 | R10 | R11 | R12 | R13 | R14 | R15
@@ -218,8 +88,8 @@ type 'reg base =
   | BaseReg of 'reg
   | BaseSymbol of string
 
-type 'reg arg =
-  | Constant of int
+type arg =
+  | ConstantInt of int
   | ConstantNat of nativeint
   | LabelRel of data_type * string * int
   | LabelDiff of string * string (* label - label *)
@@ -232,26 +102,145 @@ type 'reg arg =
   | Reg8 of register8
   | Reg16 of register16
   | Reg32 of register32
-  | Reg of 'reg
+  | Reg of register64 (* register with architecture size *)
   | Regf of registerf
   | Mem of
       data_type *
-        'reg * (* scale *) int *
-        'reg base * (* offset *) int
+       register64 * (* scale *) int *
+        register64 base * (* offset *) int
 
-type 'reg instruction = {
-  mutable instr : instr;
-  mutable args : 'reg arg array;
-}
+type instruction =
+    Segment of segment_type
+  | Global of string
+  | Constant of constant * data_type
+  | Align of bool * int
+  | NewLabel of string * data_type
+  | Bytes of string
+  | Space of int
+  | Comment of string
+  | Specific of string
+  | External of string * data_type
+  | Set of arg * arg
+  | End
 
-type 'reg segment = {
-  mutable seg_instrs : 'reg instruction list;
-}
+  | NOP
+
+  | ADD of suffix * arg * arg
+  | SUB of suffix * arg * arg
+  | XOR of suffix * arg * arg
+  | OR of suffix * arg * arg
+  | AND of suffix * arg * arg
+  | CMP of suffix * arg * arg
+
+  | FSTP of suffix option * arg
+  | FSTPS of arg
+
+  | FCOMPP
+  | FCOMPL of arg
+  | FLDL of arg
+  | FLDS of arg
+  | FNSTSW of arg
+  | FNSTCW of arg
+  | FLDCW of arg
+
+  | FCHS of arg option
+  | FABS of arg option
+  | FADDL of arg option
+  | FSUBL of arg option
+  | FMULL of arg option
+  | FDIVL of arg option
+  | FSUBRL of arg option
+  | FDIVRL of arg option
+  | FILD of suffix * arg
+  | FISTP of suffix * arg
+  | HLT
+
+  | FADDP of arg * arg
+  | FSUBP of arg * arg
+  | FMULP of arg * arg
+  | FDIVP of arg * arg
+  | FSUBRP of arg * arg
+  | FDIVRP of arg * arg
 
-type 'reg arch = {
-  arch64 : bool;
-  string_of_register : ('reg -> string);
-  bprint_instr : (Buffer.t -> 'reg arch -> 'reg instruction -> unit);
+  | FADDS of arg option
+  | FSUBS of arg option
+  | FMULS of arg option
+  | FDIVS of arg option
+  | FSUBRS of arg option
+  | FDIVRS of arg option
+
+  | FLD1
+  | FPATAN
+  | FPTAN
+  | FCOS
+  | FLDLN2
+  | FLDLG2
+  | FXCH of arg option
+  | FYL2X
+  | FSIN
+  | FSQRT
+  | FLDZ
+
+  | SAR of suffix * arg * arg
+  | SHR of suffix * arg * arg
+  | SAL of suffix * arg * arg
+  | INC of suffix * arg
+  | DEC of suffix * arg
+  | IMUL of suffix * arg * arg option
+  | IDIV of suffix * arg
+  | PUSH of suffix * arg
+  | POP of suffix * arg
+
+  | MOV of suffix * arg * arg
+
+  | MOVZX of suffix * suffix * arg * arg
+  | MOVSX of suffix * suffix * arg * arg
+  | MOVSS of arg * arg
+  | MOVSXD (* MOVSLQ *) of arg * arg
+
+  | MOVSD of arg * arg
+  | ADDSD of arg * arg
+  | SUBSD of arg * arg
+  | MULSD of arg * arg
+  | DIVSD of arg * arg
+  | SQRTSD of arg * arg
+  | ROUNDSD of rounding
+  | NEG of arg
+
+  | CVTSS2SD of arg * arg
+  | CVTSD2SS of arg * arg
+  | CVTSI2SD of arg * arg
+  | CVTSI2SDQ of arg * arg
+  | CVTSD2SI of arg * arg
+  | CVTTSD2SI of arg * arg
+  | UCOMISD of arg * arg
+  | COMISD of arg * arg
+
+  | CALL of arg
+  | JMP of locality ref * arg
+  | RET
+
+  | TEST of suffix * arg * arg
+  | SET of condition * arg
+  | J of locality ref * condition * arg
+
+  | CMOV of condition
+  | XORPD of arg * arg
+  | ANDPD of arg * arg
+  | MOVAPD of arg * arg
+  | MOVLPD of arg * arg
+  | MOVABSQ of arg * arg
+
+  | CLTD
+  | LEA of suffix * arg * arg
+  | CQTO
+  | LEAVE
+
+  | XCHG of arg * arg
+  | BSWAP of arg
+
+type segment = {
+  mutable seg_instrs : instruction list;
 }
 
 
@@ -300,11 +289,8 @@ let new_segment () = {
 let clear_segment s =
   s.seg_instrs <- []
 
-let emit seg ins args =
-  seg.seg_instrs <- {
-    instr = ins;
-    args = args
-  } :: seg.seg_instrs
+let emit seg ins =
+  seg.seg_instrs <- ins :: seg.seg_instrs
 
 let string_of_string_literal s =
   let b = Buffer.create (String.length s + 2) in
@@ -357,6 +343,46 @@ let string_of_register64 reg64 =
     | R15 -> "r15"
     | RIP -> "rip"
 
+let string_of_register arch64 reg =
+  if arch64 then string_of_register64 reg else
+  match reg with
+    RAX -> "eax"
+  | RBX -> "ebx"
+  | RDI -> "edi"
+  | RSI -> "esi"
+  | RDX -> "edx"
+  | RCX -> "ecx"
+  | RSP -> "esp"
+  | RBP -> "ebp"
+  | R8 -> "r8d"
+  | R9 -> "r9d"
+  | R10 -> "r10d"
+  | R11 -> "r11d"
+  | R12 -> "r12d"
+  | R13 -> "r13d"
+  | R14 -> "r14d"
+  | R15 -> "r15d"
+  | RIP -> assert false
+
+let register reg32 =
+  match reg32 with
+    EAX -> RAX
+  | EBX -> RBX
+  | EDI -> RDI
+  | ESI -> RSI
+  | EDX -> RDX
+  | ECX -> RCX
+  | ESP -> RSP
+  | EBP -> RBP
+  | R8D -> R8
+  | R9D -> R9
+  | R10D -> R10
+  | R11D -> R11
+  | R12D -> R12
+  | R13D -> R13
+  | R14D -> R14
+  | R15D -> R15
+
 let string_of_register8 reg8 = match reg8 with
   | AL -> "al"
   | BL -> "bl"
@@ -459,13 +485,49 @@ let string_of_condition condition = match condition with
 let tab b = Buffer.add_char b '\t'
 let bprint b s = tab b; Buffer.add_string b s
 
-
+let arch64 = ref true
+
+(* [print_assembler] is used to decide whether assembly code
+  should be printed in the .s file or not. *)
+let print_assembler = ref true
+
+(* These hooks can be used to insert optimization passes on
+  the assembly code. *)
+let assembler_passes = ref ([] :
+      (instruction list -> instruction list) list)
+
+(* Which asm conventions to use *)
+let masm =
+  match Config.ccomp_type with
+  | "msvc" | "masm" -> true
+  | _      -> false
+
+(* Shall we use an external assembler command ?
+   If [binary_content] contains some data, we can directly
+   save it. Otherwise, we have to ask an external command.
+*)
+let binary_content = ref None
+let assemble_file infile outfile =
+  match !binary_content with
+  | None ->
+     if masm then
+      Ccomp.command (Config.asm ^
+                     Filename.quote outfile ^ " " ^ Filename.quote infile ^
+                     (if !Clflags.verbose then "" else ">NUL"))
+    else
+      Ccomp.command (Config.asm ^ " -o " ^
+                     Filename.quote outfile ^ " " ^ Filename.quote infile)
+  | Some content ->
+    let oc = open_out_bin outfile in
+    output_string oc content;
+    close_out oc;
+    binary_content := None;
+    0
+
+(*
 module MakeEmitter(M : sig
 
       type reg
-      val arch64 : bool
-      val string_of_register : reg -> string
-      val bprint_instr : Buffer.t -> reg arch -> reg instruction -> unit
       val word_size : data_type
 
     end) = struct
@@ -474,45 +536,45 @@ module MakeEmitter(M : sig
     arch64 = M.arch64;
     string_of_register = M.string_of_register;
     bprint_instr = M.bprint_instr;  }
+*)
 
 (* Override emitaux.ml *)
 
-  let emit_int n = Constant n
+  let emit_int n = ConstantInt n
   let emit_nativeint n = ConstantNat n
   let emit_float64_directive f = ConstFloat f
 
-  let (seg : M.reg segment) = new_segment ()
+  let (seg : segment) = new_segment ()
 
   let init_segments () =
     clear_segment seg;
     ()
   let emit = emit seg
 
+(* eta-expand to create ref everytime *)
+  let _jmp arg = emit (JMP (ref (Loc_unknown 0), arg))
+  let _j cond arg = emit (J (ref (Loc_unknown 0), cond, arg))
 
+  let _global s = emit (Global s)
+  let _specific s = emit (Specific s)
+  let _text () = emit (Segment Text)
+  let _data () = emit (Segment Data)
+  let _align n = emit (Align (false, n))
+  let _llabel s = emit (NewLabel (s, NO)) (* local label *)
+  let _comment s = emit (Comment s)
+  let _extrn s ptr = emit (External (s, ptr))
 
-  let _global s = emit (Global s) [||]
-  let _specific s = emit (Specific s) [||]
-  let _text () = emit (Segment Text) [||]
-  let _data () = emit (Segment Data) [||]
-  let _align n = emit (Align (false, n)) [||]
-  let _llabel s = emit (NewLabel (s, NO)) [||] (* local label *)
-  let _label s = emit (NewLabel (s, M.word_size)) [||]
-  let _comment s = emit (Comment s) [||]
-  let _extrn s ptr = emit (External (s, ptr)) [||]
-
-  let _qword cst = emit (Constant (cst, QWORD)) [||]
-  let _long cst = emit (Constant (cst, DWORD)) [||]
-  let _word cst = emit (Constant (cst, WORD)) [||]
-  let _byte n = emit (Constant (n, BYTE)) [||]
-  let _ascii s = emit (Bytes s) [||]
-  let _space n = emit (Space n) [||]
+  let _qword cst = emit (Constant (cst, QWORD))
+  let _long cst = emit (Constant (cst, DWORD))
+  let _word cst = emit (Constant (cst, WORD))
+  let _byte n = emit (Constant (n, BYTE))
+  let _ascii s = emit (Bytes s)
+  let _space n = emit (Space n)
 
 
   (* mnemonics *)
-  let _call = emit CALL
-  let _jmp = emit (JMP Loc_unknown)
-  let _j cond = emit (J (Loc_unknown, cond))
-  let _set cond = emit (SET cond)
+  let _call arg = emit (CALL arg)
+  let _set cond arg = emit (SET (cond, arg))
 
   let _je = _j E
   let _jae = _j AE
@@ -524,142 +586,143 @@ module MakeEmitter(M : sig
   let _jp = _j P
 
   (* Qword mnemonics *)
-  let _addq = emit (ADD Q)
-  let _subq = emit (SUB Q)
-  let _andq = emit (AND Q)
-  let _orq = emit (OR Q)
-  let _salq = emit (SAL Q)
-  let _sarq = emit (SAR Q)
-  let _shrq = emit (SHR Q)
-  let _imulq = emit (IMUL Q)
-  let _xorq = emit (XOR Q)
-  let _cmpq = emit (CMP Q)
-  let _popq = emit (POP Q)
-  let _pushq = emit (PUSH Q)
-  let _testq = emit (TEST Q)
-  let _movq = emit (MOV Q)
-  let _leaq = emit (LEA Q)
-  let _movzbq = emit (MOVZX (B,Q))
-  let _movsbq = emit (MOVSX (B,Q))
-  let _movzwq = emit (MOVZX (W,Q))
-  let _movswq = emit (MOVSX (W,Q))
-  let _idivq = emit (IDIV Q)
+  let _addq (arg1, arg2) = emit (ADD (Q, arg1, arg2))
+  let _subq (arg1, arg2) = emit (SUB (Q, arg1, arg2))
+  let _andq (arg1, arg2) = emit (AND (Q, arg1, arg2))
+  let _orq (arg1, arg2) = emit (OR (Q, arg1, arg2))
+  let _salq (arg1, arg2) = emit (SAL (Q, arg1, arg2))
+  let _sarq (arg1, arg2) = emit (SAR (Q, arg1, arg2))
+  let _shrq (arg1, arg2) = emit (SHR (Q, arg1, arg2))
+  let _imulq (arg1, arg2) = emit (IMUL (Q, arg1, arg2))
+  let _xorq (arg1, arg2) = emit (XOR (Q, arg1, arg2))
+  let _cmpq (arg1, arg2) = emit (CMP (Q, arg1, arg2))
+  let _popq arg = emit (POP (Q, arg))
+  let _pushq arg = emit (PUSH (Q, arg))
+  let _testq (arg1, arg2) = emit (TEST (Q, arg1, arg2))
+  let _movq (arg1, arg2) = emit (MOV (Q, arg1, arg2))
+  let _leaq (arg1, arg2) = emit (LEA (Q, arg1, arg2))
+  let _movzbq (arg1, arg2)  = emit (MOVZX (B,Q, arg1, arg2))
+  let _movsbq (arg1, arg2)  = emit (MOVSX (B,Q, arg1, arg2))
+  let _movzwq (arg1, arg2)  = emit (MOVZX (W,Q, arg1, arg2))
+  let _movswq (arg1, arg2)  = emit (MOVSX (W,Q, arg1, arg2))
+  let _idivq arg = emit (IDIV (Q, arg))
 
   (* Long-word mnemonics *)
-  let _addl = emit (ADD L)
-  let _subl = emit (SUB L)
-  let _andl = emit (AND L)
-  let _orl = emit (OR L)
-  let _sall = emit (SAL L)
-  let _sarl = emit (SAR L)
-  let _shrl = emit (SHR L)
-  let _imull = emit (IMUL L)
-  let _idivl = emit (IDIV L)
-  let _xorl = emit (XOR L)
-  let _cmpl = emit (CMP L)
-  let _popl = emit (POP L)
-  let _pushl = emit (PUSH L)
-  let _testl = emit (TEST L)
-  let _decl = emit (DEC L)
-  let _movw = emit (MOV W)
-  let _movl = emit (MOV L)
-  let _incl = emit (INC L)
-  let _leal = emit (LEA L)
-  let _fistpl = emit (FISTP L)
-  let _movzbl = emit (MOVZX (B,L))
-  let _movsbl = emit (MOVSX (B,L))
-  let _movzwl = emit (MOVZX (W,L))
-  let _movswl = emit (MOVSX (W,L))
-  let _fildl = emit (FILD L)
-  let _fstpl = emit  (FSTP (Some L))
+  let _addl (arg1, arg2) = emit (ADD (L, arg1, arg2))
+  let _subl (arg1, arg2) = emit (SUB  (L, arg1, arg2))
+  let _andl (arg1, arg2) = emit (AND  (L, arg1, arg2))
+  let _orl (arg1, arg2) = emit (OR  (L, arg1, arg2))
+  let _sall (arg1, arg2) = emit (SAL  (L, arg1, arg2))
+  let _sarl (arg1, arg2) = emit (SAR  (L, arg1, arg2))
+  let _shrl (arg1, arg2) = emit (SHR  (L, arg1, arg2))
+  let _xorl (arg1, arg2) = emit (XOR (L, arg1, arg2))
+  let _cmpl (arg1, arg2) = emit (CMP  (L, arg1, arg2))
+  let _testl (arg1, arg2) = emit (TEST (L, arg1, arg2))
+  let _movl (arg1, arg2) = emit (MOV (L, arg1, arg2))
+  let _imull (arg1, arg2) = emit (IMUL (L, arg1, arg2))
+  let _idivl arg = emit (IDIV (L, arg))
+  let _popl arg = emit (POP (L, arg))
+  let _pushl arg = emit (PUSH (L, arg))
+  let _decl arg = emit (DEC (L, arg))
+  let _incl arg = emit (INC (L, arg))
+  let _leal (arg1, arg2) = emit (LEA (L, arg1, arg2))
+  let _fistpl arg = emit (FISTP (L, arg))
+  let _movzbl (arg1, arg2) = emit (MOVZX (B,L, arg1, arg2))
+  let _movsbl (arg1, arg2) = emit (MOVSX (B,L, arg1, arg2))
+  let _movzwl (arg1, arg2) = emit (MOVZX (W,L, arg1, arg2))
+  let _movswl (arg1, arg2) = emit (MOVSX (W,L, arg1, arg2))
+  let _fildl arg = emit (FILD (L, arg))
+  let _fstpl arg = emit  (FSTP (Some L, arg))
 
 (* Word mnemonics *)
-  let _movw = emit (MOV W)
+  let _movw (arg1, arg2) = emit (MOV (W, arg1, arg2))
 
   (* Byte mnemonics *)
-  let _decb = emit (DEC B)
-  let _cmpb = emit (CMP B)
-  let _movb = emit (MOV B)
-  let _andb = emit (AND B)
-  let _xorb = emit (XOR B)
-  let _movb = emit (MOV B)
-  let _testb = emit (TEST B)
-
-
-  let _movsd = emit MOVSD
-  let _ucomisd = emit UCOMISD
-  let _comisd = emit COMISD
-  let _movapd = emit MOVAPD
-  let _xorpd = emit XORPD
-  let _movabsq = emit MOVABSQ
-
-  let _movslq = emit MOVSXD
-  let _cvtss2sd = emit CVTSS2SD
-  let _movss = emit MOVSS
-  let _cvtsd2ss = emit CVTSD2SS
-  let _cqto = emit CQTO
-  let _addsd = emit ADDSD
-  let _subsd = emit SUBSD
-  let _mulsd = emit MULSD
-  let _divsd = emit DIVSD
-  let _incq = emit (INC Q)
-  let _decq = emit (DEC Q)
-  let _andpd = emit ANDPD
-  let _cvtsi2sd = emit CVTSI2SD
-  let _cvttsd2si = emit CVTTSD2SI
-  let _xchg = emit XCHG
-  let _bswap = emit BSWAP
-  let _sqrtsd = emit SQRTSD
-  let _ret = emit RET
-  let _cltd = emit CLTD
-  let _hlt = emit HLT
-
-  let _nop = emit NOP
-  let _fchs = emit FCHS
-  let _fabs = emit FABS
-
-  let _faddl = emit  FADDL
-  let _fsubl = emit  FSUBL
-  let _fmull = emit  FMULL
-  let _fdivl = emit  FDIVL
-  let _fsubrl = emit  FSUBRL
-  let _fdivrl = emit  FDIVRL
-
-  let _faddp = emit  FADDP
-  let _fsubp = emit  FSUBP
-  let _fmulp = emit  FMULP
-  let _fdivp = emit  FDIVP
-  let _fsubrp = emit  FSUBRP
-  let _fdivrp = emit  FDIVRP
-
-  let _fadds = emit  FADDS
-  let _fsubs = emit  FSUBS
-  let _fmuls = emit  FMULS
-  let _fdivs = emit  FDIVS
-  let _fsubrs = emit  FSUBRS
-  let _fdivrs = emit  FDIVRS
-
-  let _fcompp = emit FCOMPP
-  let _fcompl = emit FCOMPL
-  let _fldl = emit FLDL
-  let _flds = emit FLDS
-  let _fnstsw = emit FNSTSW
-
-  let _fld1 = emit FLD1
-  let _fpatan = emit  FPATAN
-  let _fptan = emit  FPTAN
-  let _fcos = emit  FCOS
-  let _fldln2 = emit  FLDLN2
-  let _fldlg2 = emit  FLDLG2
-  let _fxch = emit  FXCH
-  let _fyl2x = emit  FYL2X
-  let _fsin = emit  FSIN
-  let _fsqrt = emit  FSQRT
-  let _fstp = emit  (FSTP None)
-  let _fstps = emit  FSTPS
-  let _fldz = emit FLDZ
-  let _fnstcw = emit FNSTCW
-  let _fldcw = emit FLDCW
+  let _decb arg = emit (DEC (B, arg))
+  let _cmpb (arg1, arg2) = emit (CMP (B, arg1, arg2))
+  let _movb (arg1, arg2) = emit (MOV (B, arg1, arg2))
+  let _andb (arg1, arg2) = emit (AND (B, arg1, arg2))
+  let _xorb (arg1, arg2) = emit (XOR (B, arg1, arg2))
+  let _movb (arg1, arg2) = emit (MOV (B, arg1, arg2))
+  let _testb (arg1, arg2) = emit (TEST (B, arg1, arg2))
+
+
+  let _movsd (arg1, arg2) = emit (MOVSD (arg1, arg2))
+  let _ucomisd (arg1, arg2) = emit (UCOMISD (arg1, arg2))
+  let _comisd (arg1, arg2) = emit (COMISD (arg1, arg2))
+  let _movapd (arg1, arg2) = emit (MOVAPD  (arg1, arg2))
+  let _movabsq (arg1, arg2) = emit (MOVABSQ  (arg1, arg2))
+  let _xorpd (arg1, arg2) = emit (XORPD  (arg1, arg2))
+  let _andpd (arg1, arg2) = emit (ANDPD  (arg1, arg2))
+
+  let _movslq (arg1, arg2) = emit (MOVSXD  (arg1, arg2))
+  let _movss (arg1, arg2) = emit (MOVSS (arg1, arg2))
+  let _cvtss2sd (arg1, arg2) = emit (CVTSS2SD (arg1, arg2))
+  let _cvtsd2ss (arg1, arg2) = emit (CVTSD2SS (arg1, arg2))
+  let _cvtsi2sd (arg1, arg2) = emit (CVTSI2SD (arg1, arg2))
+  let _cvttsd2si (arg1, arg2) = emit (CVTTSD2SI (arg1, arg2))
+  let _addsd (arg1, arg2) = emit (ADDSD (arg1, arg2))
+  let _subsd  (arg1, arg2) = emit (SUBSD (arg1, arg2))
+  let _mulsd (arg1, arg2) = emit (MULSD (arg1, arg2))
+  let _divsd (arg1, arg2) = emit (DIVSD (arg1, arg2))
+  let _sqrtsd (arg1, arg2) = emit (SQRTSD (arg1, arg2))
+
+let _cqto () = emit CQTO
+
+  let _incq arg = emit (INC (Q, arg))
+  let _decq arg = emit (DEC (Q, arg))
+  let _xchg (arg1, arg2) = emit (XCHG (arg1, arg2))
+  let _bswap arg = emit (BSWAP arg)
+  let _ret () = emit RET
+  let _cltd () = emit CLTD
+  let _hlt () = emit HLT
+
+  let _nop () = emit NOP
+  let _fchs arg_o = emit (FCHS arg_o)
+  let _fabs arg_o = emit (FABS arg_o)
+
+  let _faddl arg = emit  (FADDL arg)
+  let _fsubl arg = emit  (FSUBL arg)
+  let _fmull arg = emit  (FMULL arg)
+  let _fdivl arg = emit  (FDIVL arg)
+  let _fsubrl arg = emit  (FSUBRL arg)
+  let _fdivrl arg = emit  (FDIVRL arg)
+
+  let _faddp (arg1, arg2) = emit  (FADDP (arg1, arg2))
+  let _fsubp (arg1, arg2) = emit  (FSUBP (arg1, arg2))
+  let _fmulp (arg1, arg2) = emit  (FMULP (arg1, arg2))
+  let _fdivp (arg1, arg2) = emit  (FDIVP (arg1, arg2))
+  let _fsubrp (arg1, arg2) = emit  (FSUBRP (arg1, arg2))
+  let _fdivrp (arg1, arg2) = emit  (FDIVRP (arg1, arg2))
+
+  let _fadds arg = emit  (FADDS arg)
+  let _fsubs arg = emit  (FSUBS arg)
+  let _fmuls arg  = emit  (FMULS arg)
+  let _fdivs arg = emit  (FDIVS arg)
+  let _fsubrs arg = emit  (FSUBRS arg)
+  let _fdivrs arg = emit  (FDIVRS arg)
+
+  let _fcompp () = emit FCOMPP
+  let _fcompl arg = emit (FCOMPL arg)
+  let _fldl arg = emit (FLDL arg)
+  let _flds arg = emit (FLDS arg)
+  let _fnstsw arg = emit (FNSTSW arg)
+
+  let _fld1 () = emit FLD1
+  let _fpatan () = emit  FPATAN
+  let _fptan () = emit  FPTAN
+  let _fcos () = emit  FCOS
+  let _fldln2 () = emit  FLDLN2
+  let _fldlg2 () = emit  FLDLG2
+  let _fxch arg = emit  (FXCH arg)
+  let _fyl2x () = emit  FYL2X
+  let _fsin () = emit  FSIN
+  let _fsqrt () = emit  FSQRT
+  let _fstp arg = emit  (FSTP (None, arg))
+  let _fstps arg = emit  (FSTPS arg)
+  let _fldz () = emit FLDZ
+  let _fnstcw arg = emit (FNSTCW arg)
+  let _fldcw arg = emit (FLDCW arg)
 
 
  (* arguments *)
@@ -672,14 +735,22 @@ module MakeEmitter(M : sig
     if system = S_win64 then LabelRel(pref, s, 0)
     else Mem (pref, RIP, 1, BaseSymbol s, 0)
 
-  let _mem offset reg = Mem(NO, reg, 1, NoBase, offset)
-
-(* On win32/win64, some memory references need to specify the size of
-   the operand in 'pref' *)
-  let _mem_ptr pref offset reg = Mem(pref, reg, 1, NoBase, offset)
 
   let _int n = emit_int n
   let _st n = Regf (ST n)
   let _offset l = LabelOffset l
 
-end
+let generate_code oc bprint_instr =
+  let instrs = List.rev seg.seg_instrs in
+  let instrs = List.fold_left (fun instrs pass ->
+      pass instrs
+    ) instrs !assembler_passes in
+
+  if ! print_assembler then
+    let b = Buffer.create 10000 in
+    List.iter (bprint_instr b !arch64) instrs;
+    let s = Buffer.contents b in
+    output_string oc s
+
+
+
author	Fabrice Le Fessant <Fabrice.Le_fessant@inria.fr>	2014-04-15 15:56:01 +0000
committer	Fabrice Le Fessant <Fabrice.Le_fessant@inria.fr>	2014-04-15 15:56:01 +0000
commit	7edccb792788fe83df11e0b8e523e1984460220c (patch)
tree	980d7af53eae29ca2c2dc80679fb6554787c3e4f
parent	34046e68599d08a069954bb0443d5942b15a0db1 (diff)
download	ocaml-abstract_x86_asm.tar.gz