summaryrefslogtreecommitdiff
path: root/asmcomp
diff options
context:
space:
mode:
authorXavier Leroy <xavier.leroy@inria.fr>1995-08-13 09:31:50 +0000
committerXavier Leroy <xavier.leroy@inria.fr>1995-08-13 09:31:50 +0000
commit0b9972d50fbacf54ea3fc91794410fb43772480a (patch)
treee7757c2e72e03d9c92e17ad6236726dbde9d6a5c /asmcomp
parent937fece4e311e11edcc3f0651e7ea24ae46c6d16 (diff)
downloadocaml-0b9972d50fbacf54ea3fc91794410fb43772480a.tar.gz
Ajout du scheduler (a tester serieusement)
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@202 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
Diffstat (limited to 'asmcomp')
-rw-r--r--asmcomp/asmgen.ml61
-rw-r--r--asmcomp/emit_sparc.mlp87
-rw-r--r--asmcomp/linearize.ml5
-rw-r--r--asmcomp/linearize.mli4
-rw-r--r--asmcomp/proc.mli4
-rw-r--r--asmcomp/proc_alpha.ml20
-rw-r--r--asmcomp/proc_i386.ml18
-rw-r--r--asmcomp/proc_mips.ml16
-rw-r--r--asmcomp/proc_sparc.ml17
-rw-r--r--asmcomp/scheduling.ml274
-rw-r--r--asmcomp/scheduling.mli16
11 files changed, 456 insertions, 66 deletions
diff --git a/asmcomp/asmgen.ml b/asmcomp/asmgen.ml
index 10758d0772..416f2ace6e 100644
--- a/asmcomp/asmgen.ml
+++ b/asmcomp/asmgen.ml
@@ -22,43 +22,54 @@ type error = Assembler_error of string
exception Error of error
+let liveness phrase =
+ Liveness.fundecl phrase; phrase
+
+let dump_if flag message phrase =
+ if !flag then Printmach.phase message phrase;
+ phrase
+
+let dump_linear_if flag message phrase =
+ if !flag then begin
+ print_string "*** "; print_string message; print_newline();
+ Printlinear.fundecl phrase; print_newline()
+ end;
+ phrase
+
let rec regalloc fd =
- if !dump_live then Printmach.phase "Liveness analysis" fd;
+ dump_if dump_live "Liveness analysis" fd;
Interf.build_graph fd;
if !dump_interf then Printmach.interferences();
if !dump_prefer then Printmach.preferences();
Coloring.allocate_registers();
- if !dump_regalloc then
- Printmach.phase "After register allocation" fd;
+ dump_if dump_regalloc "After register allocation" fd;
let (newfd, redo_regalloc) = Reload.fundecl fd in
- if !dump_reload then
- Printmach.phase "After insertion of reloading code" newfd;
+ dump_if dump_reload "After insertion of reloading code" newfd;
if redo_regalloc
then begin Reg.reinit(); Liveness.fundecl newfd; regalloc newfd end
else newfd
+let (++) x f = f x
+
let compile_fundecl fd_cmm =
Reg.reset();
- let fd_sel = Selection.fundecl fd_cmm in
- if !dump_selection then
- Printmach.phase "After instruction selection" fd_sel;
- Liveness.fundecl fd_sel;
- if !dump_live then Printmach.phase "Liveness analysis" fd_sel;
- let fd_spill = Spill.fundecl fd_sel in
- Liveness.fundecl fd_spill;
- if !dump_spill then
- Printmach.phase "After spilling" fd_spill;
- let fd_split = Split.fundecl fd_spill in
- Liveness.fundecl fd_split;
- if !dump_split then
- Printmach.phase "After live range splitting" fd_split;
- let fd_reload = regalloc fd_split in
- let fd_linear = Linearize.fundecl fd_reload in
- if !dump_linear then begin
- print_string "*** Linearized code"; print_newline();
- Printlinear.fundecl fd_linear; print_newline()
- end;
- Emit.fundecl fd_linear
+ fd_cmm
+ ++ Selection.fundecl
+ ++ dump_if dump_selection "After instruction selection"
+ ++ liveness
+ ++ dump_if dump_live "Liveness analysis"
+ ++ Spill.fundecl
+ ++ liveness
+ ++ dump_if dump_spill "After spilling"
+ ++ Split.fundecl
+ ++ dump_if dump_split "After live range splitting"
+ ++ liveness
+ ++ regalloc
+ ++ Linearize.fundecl
+ ++ dump_linear_if dump_linear "Linearized code"
+ ++ Scheduling.fundecl
+ ++ dump_linear_if dump_scheduling "After instruction scheduling"
+ ++ Emit.fundecl
let compile_phrase p =
if !dump_cmm then begin Printcmm.phrase p; print_newline() end;
diff --git a/asmcomp/emit_sparc.mlp b/asmcomp/emit_sparc.mlp
index 59f89d134f..1b3fea88c5 100644
--- a/asmcomp/emit_sparc.mlp
+++ b/asmcomp/emit_sparc.mlp
@@ -211,6 +211,13 @@ let name_for_int_operation = function
| Iasr -> "sra"
| _ -> Misc.fatal_error "Emit.name_for_int_operation"
+let name_for_float_operation = function
+ Iaddf -> "faddd"
+ | Isbuf -> "fsubd"
+ | Imulf -> "fmuld"
+ | Idivf -> "fdivd"
+ | _ -> Misc.fatal_error "Emit.name_for_float_operation"
+
let name_for_int_comparison = function
Isigned Ceq -> "be" | Isigned Cne -> "bne"
| Isigned Cle -> "ble" | Isigned Cgt -> "bg"
@@ -234,8 +241,7 @@ let emit_instr i =
Lend -> ()
| Lop(Imove | Ispill | Ireload) ->
let src = i.arg.(0) and dst = i.res.(0) in
- if src.loc <> dst.loc then begin
- match (src, dst) with
+ begin match (src, dst) with
{loc = Reg rs; typ = (Int | Addr)}, {loc = Reg rd} ->
` mov {emit_reg src}, {emit_reg dst}\n`
| {loc = Reg rs; typ = Float}, {loc = Reg rd; typ = Float} ->
@@ -288,35 +294,42 @@ let emit_instr i =
let n = frame_size() in
if !contains_calls then begin
` ld [%sp + {emit_int(n - 4 + 96)}], %o7\n`;
- ` andn %o7, 1, %o7\n`
- end;
- ` jmp {emit_reg i.arg.(0)}\n`;
- if n > 0 then
- ` add %sp, {emit_int n}, %sp\n`
- else
- ` nop\n`
- | Lop(Itailcall_imm s) ->
- if s = !function_name then begin
- ` b {emit_label !tailrec_entry_point}\n`;
- ` nop\n`
+ ` add %sp, {emit_int n}, %sp\n`;
+ ` jmp {emit_reg i.arg.(0)}\n`;
+ ` andn %o7, 1, %o7\n` (* in delay slot *)
end else begin
- let n = frame_size() in
+ ` jmp {emit_reg i.arg.(0)}\n`;
+ ` add %sp, {emit_int n}, %sp\n` (* in delay slot *)
+ end
+ | Lop(Itailcall_imm s) ->
+ let n = frame_size() in
+ if s = !function_name then
if !contains_calls then begin
` ld [%sp + {emit_int(n - 4 + 96)}], %o7\n`;
- ` andn %o7, 1, %o7\n`
- end;
- ` sethi %hi({emit_symbol s}), %g1\n`;
- ` jmp %g1 + %lo({emit_symbol s})\n`;
- if n > 0 then
- ` add %sp, {emit_int n}, %sp\n`
- else
+ ` andn %o7, 1, %o7\n`;
+ ` b {emit_label !tailrec_entry_point}\n`;
+ ` st %o7, [%sp + {emit_int(n - 4 + 96)}]\n`
+ end else
+ ` b {emit_label !tailrec_entry_point}\n`;
` nop\n`
- end
+ end
+ else
+ if !contains_calls then begin
+ ` ld [%sp + {emit_int(n - 4 + 96)}], %o7\n`;
+ ` add %sp, {emit_int n}, %sp\n`;
+ ` sethi %hi({emit_symbol s}), %g1\n`;
+ ` jmp %g1 + %lo({emit_symbol s})\n`;
+ ` andn %o7, 1, %o7\n` (* in delay slot *)
+ end else begin
+ ` sethi %hi({emit_symbol s}), %g1\n`;
+ ` jmp %g1 + %lo({emit_symbol s})\n`;
+ ` add %sp, {emit_int n}, %sp\n` (* in delay slot *)
+ end
| Lop(Iextcall(s, alloc)) ->
if alloc then begin
` sethi %hi({emit_symbol s}), %g4\n`;
`{record_frame i.live} call {emit_symbol "caml_c_call"}\n`;
- ` or %g4, %lo({emit_symbol s}), %g4\n`
+ ` or %g4, %lo({emit_symbol s}), %g4\n` (* in delay slot *)
end else begin
` call {emit_symbol s}\n`;
` nop\n`
@@ -358,14 +371,14 @@ let emit_instr i =
` sub %g6, {emit_int n}, %g6\n`;
` cmp %g6, %g7\n`;
` bgeu {emit_label lbl_cont}\n`;
- ` add %g6, 4, {emit_reg i.res.(0)}\n`;
+ ` add %g6, 4, {emit_reg i.res.(0)}\n`; (* in delay slot *)
`{record_frame i.live} call {emit_symbol "caml_call_gc"}\n`;
- ` mov {emit_int n}, %g4\n`;
+ ` mov {emit_int n}, %g4\n`; (* in delay slot *)
` add %g6, 4, {emit_reg i.res.(0)}\n`;
`{emit_label lbl_cont}:\n`
end else begin
`{record_frame i.live} call {emit_symbol "caml_alloc"}\n`;
- ` mov {emit_int n}, %g4\n`;
+ ` mov {emit_int n}, %g4\n`; (* in delay slot *)
` add %g6, 4, {emit_reg i.res.(0)}\n`
end
| Lop(Iintop Idiv) ->
@@ -451,14 +464,9 @@ let emit_instr i =
| Lop(Iintop_imm(op, n)) ->
let instr = name_for_int_operation op in
` {emit_string instr} {emit_reg i.arg.(0)}, {emit_int n}, {emit_reg i.res.(0)}\n`
- | Lop(Iaddf) ->
- ` faddd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
- | Lop(Isubf) ->
- ` fsubd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
- | Lop(Imulf) ->
- ` fmuld {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
- | Lop(Idivf) ->
- ` fdivd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
+ | Lop(Iaddf | Isubf | Imulf | Idivf) ->
+ let instr = name_for_float_operation op in
+ ` {emit_string instr} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n`
| Lop(Ifloatofint) ->
` sub %sp, 4, %sp\n`;
` st {emit_reg i.arg.(0)}, [%sp + 96]\n`;
@@ -477,13 +485,10 @@ let emit_instr i =
let n = frame_size() in
if !contains_calls then begin
` ld [%sp + {emit_int(n - 4 + 96)}], %o7\n`;
- ` andn %o7, 1, %o7\n`
+ ` andn %o7, 1, %o7\n` (* poor scheduling *)
end;
` retl\n`;
- if n > 0 then
- ` add %sp, {emit_int n}, %sp\n`
- else
- ` nop\n`
+ ` add %sp, {emit_int n}, %sp\n`
| Llabel lbl ->
`{emit_label lbl}:\n`
| Lbranch lbl ->
@@ -543,7 +548,7 @@ let emit_instr i =
` or %g1, %lo({emit_label lbl_jumptbl}), %g1\n`;
` sll {emit_reg i.arg.(0)}, 2, %g4\n`;
` ld [%g1 + %g4], %g1\n`;
- ` jmp %g1\n`;
+ ` jmp %g1\n`; (* poor scheduling *)
` nop\n`;
`{emit_label lbl_jumptbl}:`;
for i = 0 to Array.length jumptbl - 1 do
@@ -564,7 +569,7 @@ let emit_instr i =
| Lraise ->
` mov %g5, %sp\n`;
` ldd [%sp + 96], %g4\n`; (* Load %g4 and %g5 *)
- ` jmp %g4 + 8\n`;
+ ` jmp %g4 + 8\n`; (* poor scheduling *)
` add %sp, 8, %sp\n`
let rec emit_all i =
diff --git a/asmcomp/linearize.ml b/asmcomp/linearize.ml
index 49095b340a..025890962b 100644
--- a/asmcomp/linearize.ml
+++ b/asmcomp/linearize.ml
@@ -72,6 +72,11 @@ let rec end_instr =
res = [||];
live = Reg.Set.empty }
+(* Cons an instruction (live empty) *)
+
+let instr_cons d a r n =
+ { desc = d; next = n; arg = a; res = r; live = Reg.Set.empty }
+
(* Cons a simple instruction (arg, res, live empty) *)
let cons_instr d n =
diff --git a/asmcomp/linearize.mli b/asmcomp/linearize.mli
index 837a1ddddd..45ccd85cb3 100644
--- a/asmcomp/linearize.mli
+++ b/asmcomp/linearize.mli
@@ -37,6 +37,10 @@ and instruction_desc =
| Lpoptrap
| Lraise
+val end_instr: instruction
+val instr_cons:
+ instruction_desc -> Reg.t array -> Reg.t array -> instruction -> instruction
+
type fundecl =
{ fun_name: string;
fun_body: instruction;
diff --git a/asmcomp/proc.mli b/asmcomp/proc.mli
index 231af20b8f..8b1d5fa3b0 100644
--- a/asmcomp/proc.mli
+++ b/asmcomp/proc.mli
@@ -63,6 +63,10 @@ val reload_operation:
(Reg.t -> Reg.t) -> Mach.operation -> Reg.t array -> Reg.t array ->
Reg.t array * Reg.t array
+(* Latency info for instruction scheduling *)
+val need_scheduling: bool
+val oper_latency: Mach.operation -> int
+
(* Info for laying out the stack frame *)
val num_stack_slots: int array
val contains_calls: bool ref
diff --git a/asmcomp/proc_alpha.ml b/asmcomp/proc_alpha.ml
index 88c52cf2c2..1f61b31cf3 100644
--- a/asmcomp/proc_alpha.ml
+++ b/asmcomp/proc_alpha.ml
@@ -237,6 +237,26 @@ let max_register_pressure = function
let reload_test makereg tst args = raise Use_default
let reload_operation makereg op args res = raise Use_default
+(* Latencies (in cycles).
+ Cf. Appendix A of the Alpha architecture handbook *)
+
+let need_scheduling = true
+
+let oper_latency = function
+ Ireload -> 3
+ | Iload(Word, _) -> 3
+ | Iload(_, _) -> 5 (* 3 for load, 2 for extension *)
+ | Iconst_symbol _ -> 3 (* turned into a load *)
+ | Iconst_float _ -> 3 (* turned into a load *)
+ | Iintop Imul -> 10
+ | Iintop_imm(Imul, _) -> 10
+ | Iintop(Ilsl | Ilsr | Iasr) -> 2
+ | Iintop_imm((Ilsl | Ilsr | Iasr), _) -> 2
+ | Iaddf | Isubf -> 4
+ | Imulf -> 5
+ | Idivf -> 10
+ | _ -> 1
+
(* Layout of the stack *)
let num_stack_slots = [| 0; 0 |]
diff --git a/asmcomp/proc_i386.ml b/asmcomp/proc_i386.ml
index 04456fc69e..2ed3daf7f5 100644
--- a/asmcomp/proc_i386.ml
+++ b/asmcomp/proc_i386.ml
@@ -327,6 +327,24 @@ let reload_operation makereg op arg res =
| _ -> (* Other operations: all args and results in registers *)
raise Use_default
+(* Instruction scheduling. Only effective on the Pentium. *)
+
+let need_scheduling = true
+
+(* Wild guesses *)
+
+let oper_latency = function
+ Ireload -> 2
+ | Iload(_, _) -> 2
+ | Iintop_imm(Imul, _) -> 10
+ | Iintop(Idiv | Imod) -> 20
+ | Iintop_imm((Idiv | Imod), _) -> 20
+ | Iaddf | Isubf -> 5
+ | Imulf -> 10
+ | Idivf -> 20
+ | Ispecific(Istore_int(_, _) | Istore_symbol(_, _) | Ioffset_loc(_, _)) -> -1
+ | _ -> 1
+
(* Layout of the stack frame *)
let num_stack_slots = [| 0; 0 |]
diff --git a/asmcomp/proc_mips.ml b/asmcomp/proc_mips.ml
index fc5ed7a6c6..61ae5e5252 100644
--- a/asmcomp/proc_mips.ml
+++ b/asmcomp/proc_mips.ml
@@ -215,6 +215,22 @@ let max_register_pressure = function
let reload_test makereg tst args = raise Use_default
let reload_operation makereg op args res = raise Use_default
+(* Latencies (in cycles). Use R3000 timings. *)
+
+let need_scheduling = true
+
+let oper_latency = function
+ Ireload -> 2
+ | Iload(_, _) -> 2
+ | Iintop Imul -> 10 (* wild guess *)
+ | Iintop_imm(Imul, _) -> 10
+ | Iintop(Idiv | Imod) -> 20 (* wild guess *)
+ | Iintop_imm(Idiv | Imod, _) -> 20
+ | Iaddf | Isubf -> 2
+ | Imulf -> 5
+ | Idivf -> 19
+ | _ -> 1
+
(* Layout of the stack *)
let num_stack_slots = [| 0; 0 |]
diff --git a/asmcomp/proc_sparc.ml b/asmcomp/proc_sparc.ml
index 4dea3141ed..47cd732e41 100644
--- a/asmcomp/proc_sparc.ml
+++ b/asmcomp/proc_sparc.ml
@@ -230,6 +230,23 @@ let max_register_pressure = function
let reload_test makereg tst args = raise Use_default
let reload_operation makereg op args res = raise Use_default
+(* Latencies (in cycles). Wild guesses. *)
+
+let need_scheduling = true
+
+let oper_latency = function
+ Ireload -> 3
+ | Iload(_, _) -> 3 (* 3 for load, 2 for extension *)
+ | Iconst_float -> 3 (* turned into a load *)
+ | Iintop Imul -> 10
+ | Iintop_imm(Imul, _) -> 10
+ | Iintop(Idiv | Imod) -> 20
+ | Iintop_imm(Idiv | Imod, _) -> 20
+ | Iaddf | Isubf -> 3
+ | Imulf -> 5
+ | Idivf -> 15
+ | _ -> 1
+
(* Layout of the stack *)
let num_stack_slots = [| 0; 0 |]
diff --git a/asmcomp/scheduling.ml b/asmcomp/scheduling.ml
new file mode 100644
index 0000000000..746ce5a915
--- /dev/null
+++ b/asmcomp/scheduling.ml
@@ -0,0 +1,274 @@
+(***********************************************************************)
+(* *)
+(* Caml Special Light *)
+(* *)
+(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* *)
+(* Copyright 1995 Institut National de Recherche en Informatique et *)
+(* Automatique. Distributed only by permission. *)
+(* *)
+(***********************************************************************)
+
+(* $Id$ *)
+
+(* Instruction scheduling *)
+
+open Misc
+open Reg
+open Mach
+open Linearize
+
+(* Determine whether an operation ends a basic block or not *)
+
+let in_basic_block = function
+ Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ -> false
+ | Iextcall(_, _) -> false
+ | Istackoffset _ -> false
+ | Istore(_, _) -> false
+ | Ialloc _ -> false
+ | op -> Proc.oper_latency op >= 0
+ (* The processor description can return a latency of -1 to signal
+ a specific instruction that terminates a basic block, e.g.
+ Istore_symbol for the I386. *)
+
+(* Estimate the delay needed to evaluate an instruction. *)
+
+let instr_latency instr =
+ match instr.desc with
+ Lop op -> Proc.oper_latency op
+ | _ -> fatal_error "Scheduling.instr_latency"
+
+(* Representation of the code DAG. *)
+
+type code_dag_node =
+ { instr: instruction; (* The instruction *)
+ delay: int; (* How many cycles it needs *)
+ mutable sons: (code_dag_node * int) list;
+ (* Instructions that depend on it *)
+ mutable date: int; (* Start date *)
+ mutable length: int; (* Length of longest path to result *)
+ mutable ancestors: int; (* Number of ancestors *)
+ mutable emitted_ancestors: int } (* Number of emitted ancestors *)
+
+let dummy_node =
+ { instr = end_instr; delay = 0; sons = []; date = 0;
+ length = -1; ancestors = 0; emitted_ancestors = 0 }
+
+(* The code dag itself is represented by two tables from registers to nodes:
+ - "results" maps registers to the instructions that produced them;
+ - "uses" maps registers to the instructions that use them. *)
+
+let code_results = (Hashtbl.new 31 : (location, code_dag_node) Hashtbl.t)
+let code_uses = (Hashtbl.new 31 : (location, code_dag_node) Hashtbl.t)
+
+let clear_code_dag () =
+ Hashtbl.clear code_results;
+ Hashtbl.clear code_uses
+
+(* Add an instruction to the code DAG *)
+
+let add_edge ancestor son delay =
+ ancestor.sons <- (son, delay) :: ancestor.sons;
+ son.ancestors <- son.ancestors + 1
+
+let add_instruction ready_queue instr =
+ let delay = instr_latency instr in
+ let node =
+ { instr = instr;
+ delay = delay;
+ sons = [];
+ date = 0;
+ length = -1;
+ ancestors = 0;
+ emitted_ancestors = 0 } in
+ (* Add edges from all instructions that define one of the registers used *)
+ for i = 0 to Array.length instr.arg - 1 do
+ try
+ let ancestor = Hashtbl.find code_results instr.arg.(i).loc in
+ add_edge ancestor node ancestor.delay
+ with Not_found ->
+ ()
+ done;
+ (* Also add edges from all instructions that use one of the results
+ of this instruction, so that evaluation order is preserved. *)
+ for i = 0 to Array.length instr.res - 1 do
+ let ancestors = Hashtbl.find_all code_uses instr.res.(i).loc in
+ List.iter (fun ancestor -> add_edge ancestor node 0) ancestors
+ done;
+ (* Also add edges from all instructions that have already defined one
+ of the results of this instruction, so that evaluation order
+ is preserved. *)
+ for i = 0 to Array.length instr.res - 1 do
+ try
+ let ancestor = Hashtbl.find code_results instr.res.(i).loc in
+ add_edge ancestor node 0
+ with Not_found ->
+ ()
+ done;
+ (* Remember the registers used and produced by this instruction *)
+ for i = 0 to Array.length instr.res - 1 do
+ Hashtbl.add code_results instr.res.(i).loc node
+ done;
+ for i = 0 to Array.length instr.arg - 1 do
+ Hashtbl.add code_uses instr.arg.(i).loc node
+ done;
+ (* If this is a root instruction (all arguments already computed),
+ add it to the ready queue *)
+ if node.ancestors = 0 then node :: ready_queue else ready_queue
+
+(* Compute length of longest path to a result.
+ For leafs of the DAG, see whether their result is used in the instruction
+ immediately following the basic block (a "critical" output). *)
+
+let is_critical critical_outputs results =
+ try
+ for i = 0 to Array.length results - 1 do
+ let r = results.(i).loc in
+ for j = 0 to Array.length critical_outputs - 1 do
+ if critical_outputs.(j).loc = r then raise Exit
+ done
+ done;
+ false
+ with Exit ->
+ true
+
+let rec longest_path critical_outputs node =
+ if node.length < 0 then begin
+ match node.sons with
+ [] ->
+ node.length <-
+ if is_critical critical_outputs node.instr.res
+ then node.delay
+ else 0
+ | sons ->
+ node.length <-
+ List.fold_left
+ (fun len (son, delay) ->
+ max len (longest_path critical_outputs son + delay))
+ 0 sons
+ end;
+ node.length
+
+(* Given a list of instructions with estimated start date, choose one
+ that we can start (start date <= current date) and that has
+ maximal distance to result. If we can't find any, return None. *)
+
+let extract_ready_instr date queue =
+ let rec extract best = function
+ [] ->
+ if best == dummy_node then None else Some best
+ | instr :: rem ->
+ let new_best =
+ if instr.date <= date & instr.length > best.length
+ then instr else best in
+ extract new_best rem in
+ extract dummy_node queue
+
+(* Remove an instruction from the ready queue *)
+
+let rec remove_instr node = function
+ [] -> []
+ | instr :: rem ->
+ if instr == node then rem else instr :: remove_instr node rem
+
+(* Print the dag *)
+
+(****
+open Format
+
+let printed = ref ([] : (code_dag_node * int) list)
+let print_counter = ref 0
+
+let rec print_node n =
+ try
+ List.assq n !printed
+ with Not_found ->
+ let i = !print_counter in
+ incr print_counter;
+ printed := (n, i) :: !printed;
+ let num_sons =
+ List.map (fun (son, delay) -> (print_node son, delay)) n.sons in
+ print_int i; print_string ": ";
+ let (Lop op) = n.instr.desc in
+ Printmach.operation op n.instr.arg n.instr.res; print_newline();
+ print_string " Distance to output: ";
+ print_int n.length; print_newline();
+ print_string " Sons: ";
+ List.iter
+ (fun (son, delay) ->
+ print_int son; print_string "/"; print_int delay; print_space())
+ num_sons;
+ print_newline();
+ i
+***)
+
+(* Schedule a basic block, adding its instructions in front of the given
+ instruction sequence *)
+
+let rec reschedule ready_queue date cont =
+ match ready_queue with
+ [] -> cont
+ | _ ->
+ (* Find "most ready" instruction in queue *)
+ match extract_ready_instr date ready_queue with
+ None ->
+ (* Try again, one cycle later *)
+ reschedule ready_queue (date + 1) cont
+ | Some node ->
+ (* Update the start date and number of ancestors emitted of
+ all descendents of this node. Enter those that become ready
+ in the queue. *)
+ let new_queue = ref (remove_instr node ready_queue) in
+ List.iter
+ (fun (son, delay) ->
+ let completion_date = date + delay in
+ if son.date < completion_date then son.date <- completion_date;
+ son.emitted_ancestors <- son.emitted_ancestors + 1;
+ if son.emitted_ancestors = son.ancestors then
+ new_queue := son :: !new_queue)
+ node.sons;
+ instr_cons node.instr.desc node.instr.arg node.instr.res
+ (reschedule !new_queue (date + 1) cont)
+
+(* Schedule basic blocks in an instruction sequence *)
+
+let rec schedule i =
+ match i.desc with
+ Lend -> i
+ | Lop op when in_basic_block op ->
+ clear_code_dag();
+ schedule_block [] i
+ | _ ->
+ { desc = i.desc; arg = i.arg; res = i.res; live = i.live;
+ next = schedule i.next }
+
+and schedule_block ready_queue i =
+ match i.desc with
+ Lop op when in_basic_block op ->
+ schedule_block (add_instruction ready_queue i) i.next
+ | _ ->
+ let critical_outputs =
+ match i.desc with
+ Lop(Icall_ind | Itailcall_ind) -> [| i.arg.(0) |]
+ | Lop(Icall_imm _ | Itailcall_imm _ | Iextcall(_, _)) -> [||]
+ | _ -> i.arg in
+ List.iter (longest_path critical_outputs) ready_queue;
+(***
+ print_string "******"; print_newline();
+ printed := []; print_counter := 0;
+ List.iter print_node ready_queue;
+***)
+ reschedule ready_queue 0 (schedule i)
+
+(* Entry point *)
+(* Don't bother to schedule for initialization code and the like. *)
+
+let fundecl f =
+ if Proc.need_scheduling & f.fun_fast then begin
+ let new_body = schedule f.fun_body in
+ clear_code_dag();
+ { fun_name = f.fun_name;
+ fun_body = new_body;
+ fun_fast = f.fun_fast }
+ end else
+ f
diff --git a/asmcomp/scheduling.mli b/asmcomp/scheduling.mli
new file mode 100644
index 0000000000..c338fa1e54
--- /dev/null
+++ b/asmcomp/scheduling.mli
@@ -0,0 +1,16 @@
+(***********************************************************************)
+(* *)
+(* Caml Special Light *)
+(* *)
+(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
+(* *)
+(* Copyright 1995 Institut National de Recherche en Informatique et *)
+(* Automatique. Distributed only by permission. *)
+(* *)
+(***********************************************************************)
+
+(* $Id$ *)
+
+(* Instruction scheduling *)
+
+val fundecl: Linearize.fundecl -> Linearize.fundecl