diff options
author | Xavier Leroy <xavier.leroy@inria.fr> | 1995-08-13 09:31:50 +0000 |
---|---|---|
committer | Xavier Leroy <xavier.leroy@inria.fr> | 1995-08-13 09:31:50 +0000 |
commit | 0b9972d50fbacf54ea3fc91794410fb43772480a (patch) | |
tree | e7757c2e72e03d9c92e17ad6236726dbde9d6a5c /asmcomp | |
parent | 937fece4e311e11edcc3f0651e7ea24ae46c6d16 (diff) | |
download | ocaml-0b9972d50fbacf54ea3fc91794410fb43772480a.tar.gz |
Ajout du scheduler (a tester serieusement)
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@202 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
Diffstat (limited to 'asmcomp')
-rw-r--r-- | asmcomp/asmgen.ml | 61 | ||||
-rw-r--r-- | asmcomp/emit_sparc.mlp | 87 | ||||
-rw-r--r-- | asmcomp/linearize.ml | 5 | ||||
-rw-r--r-- | asmcomp/linearize.mli | 4 | ||||
-rw-r--r-- | asmcomp/proc.mli | 4 | ||||
-rw-r--r-- | asmcomp/proc_alpha.ml | 20 | ||||
-rw-r--r-- | asmcomp/proc_i386.ml | 18 | ||||
-rw-r--r-- | asmcomp/proc_mips.ml | 16 | ||||
-rw-r--r-- | asmcomp/proc_sparc.ml | 17 | ||||
-rw-r--r-- | asmcomp/scheduling.ml | 274 | ||||
-rw-r--r-- | asmcomp/scheduling.mli | 16 |
11 files changed, 456 insertions, 66 deletions
diff --git a/asmcomp/asmgen.ml b/asmcomp/asmgen.ml index 10758d0772..416f2ace6e 100644 --- a/asmcomp/asmgen.ml +++ b/asmcomp/asmgen.ml @@ -22,43 +22,54 @@ type error = Assembler_error of string exception Error of error +let liveness phrase = + Liveness.fundecl phrase; phrase + +let dump_if flag message phrase = + if !flag then Printmach.phase message phrase; + phrase + +let dump_linear_if flag message phrase = + if !flag then begin + print_string "*** "; print_string message; print_newline(); + Printlinear.fundecl phrase; print_newline() + end; + phrase + let rec regalloc fd = - if !dump_live then Printmach.phase "Liveness analysis" fd; + dump_if dump_live "Liveness analysis" fd; Interf.build_graph fd; if !dump_interf then Printmach.interferences(); if !dump_prefer then Printmach.preferences(); Coloring.allocate_registers(); - if !dump_regalloc then - Printmach.phase "After register allocation" fd; + dump_if dump_regalloc "After register allocation" fd; let (newfd, redo_regalloc) = Reload.fundecl fd in - if !dump_reload then - Printmach.phase "After insertion of reloading code" newfd; + dump_if dump_reload "After insertion of reloading code" newfd; if redo_regalloc then begin Reg.reinit(); Liveness.fundecl newfd; regalloc newfd end else newfd +let (++) x f = f x + let compile_fundecl fd_cmm = Reg.reset(); - let fd_sel = Selection.fundecl fd_cmm in - if !dump_selection then - Printmach.phase "After instruction selection" fd_sel; - Liveness.fundecl fd_sel; - if !dump_live then Printmach.phase "Liveness analysis" fd_sel; - let fd_spill = Spill.fundecl fd_sel in - Liveness.fundecl fd_spill; - if !dump_spill then - Printmach.phase "After spilling" fd_spill; - let fd_split = Split.fundecl fd_spill in - Liveness.fundecl fd_split; - if !dump_split then - Printmach.phase "After live range splitting" fd_split; - let fd_reload = regalloc fd_split in - let fd_linear = Linearize.fundecl fd_reload in - if !dump_linear then begin - print_string "*** Linearized code"; print_newline(); - Printlinear.fundecl fd_linear; print_newline() - end; - Emit.fundecl fd_linear + fd_cmm + ++ Selection.fundecl + ++ dump_if dump_selection "After instruction selection" + ++ liveness + ++ dump_if dump_live "Liveness analysis" + ++ Spill.fundecl + ++ liveness + ++ dump_if dump_spill "After spilling" + ++ Split.fundecl + ++ dump_if dump_split "After live range splitting" + ++ liveness + ++ regalloc + ++ Linearize.fundecl + ++ dump_linear_if dump_linear "Linearized code" + ++ Scheduling.fundecl + ++ dump_linear_if dump_scheduling "After instruction scheduling" + ++ Emit.fundecl let compile_phrase p = if !dump_cmm then begin Printcmm.phrase p; print_newline() end; diff --git a/asmcomp/emit_sparc.mlp b/asmcomp/emit_sparc.mlp index 59f89d134f..1b3fea88c5 100644 --- a/asmcomp/emit_sparc.mlp +++ b/asmcomp/emit_sparc.mlp @@ -211,6 +211,13 @@ let name_for_int_operation = function | Iasr -> "sra" | _ -> Misc.fatal_error "Emit.name_for_int_operation" +let name_for_float_operation = function + Iaddf -> "faddd" + | Isbuf -> "fsubd" + | Imulf -> "fmuld" + | Idivf -> "fdivd" + | _ -> Misc.fatal_error "Emit.name_for_float_operation" + let name_for_int_comparison = function Isigned Ceq -> "be" | Isigned Cne -> "bne" | Isigned Cle -> "ble" | Isigned Cgt -> "bg" @@ -234,8 +241,7 @@ let emit_instr i = Lend -> () | Lop(Imove | Ispill | Ireload) -> let src = i.arg.(0) and dst = i.res.(0) in - if src.loc <> dst.loc then begin - match (src, dst) with + begin match (src, dst) with {loc = Reg rs; typ = (Int | Addr)}, {loc = Reg rd} -> ` mov {emit_reg src}, {emit_reg dst}\n` | {loc = Reg rs; typ = Float}, {loc = Reg rd; typ = Float} -> @@ -288,35 +294,42 @@ let emit_instr i = let n = frame_size() in if !contains_calls then begin ` ld [%sp + {emit_int(n - 4 + 96)}], %o7\n`; - ` andn %o7, 1, %o7\n` - end; - ` jmp {emit_reg i.arg.(0)}\n`; - if n > 0 then - ` add %sp, {emit_int n}, %sp\n` - else - ` nop\n` - | Lop(Itailcall_imm s) -> - if s = !function_name then begin - ` b {emit_label !tailrec_entry_point}\n`; - ` nop\n` + ` add %sp, {emit_int n}, %sp\n`; + ` jmp {emit_reg i.arg.(0)}\n`; + ` andn %o7, 1, %o7\n` (* in delay slot *) end else begin - let n = frame_size() in + ` jmp {emit_reg i.arg.(0)}\n`; + ` add %sp, {emit_int n}, %sp\n` (* in delay slot *) + end + | Lop(Itailcall_imm s) -> + let n = frame_size() in + if s = !function_name then if !contains_calls then begin ` ld [%sp + {emit_int(n - 4 + 96)}], %o7\n`; - ` andn %o7, 1, %o7\n` - end; - ` sethi %hi({emit_symbol s}), %g1\n`; - ` jmp %g1 + %lo({emit_symbol s})\n`; - if n > 0 then - ` add %sp, {emit_int n}, %sp\n` - else + ` andn %o7, 1, %o7\n`; + ` b {emit_label !tailrec_entry_point}\n`; + ` st %o7, [%sp + {emit_int(n - 4 + 96)}]\n` + end else + ` b {emit_label !tailrec_entry_point}\n`; ` nop\n` - end + end + else + if !contains_calls then begin + ` ld [%sp + {emit_int(n - 4 + 96)}], %o7\n`; + ` add %sp, {emit_int n}, %sp\n`; + ` sethi %hi({emit_symbol s}), %g1\n`; + ` jmp %g1 + %lo({emit_symbol s})\n`; + ` andn %o7, 1, %o7\n` (* in delay slot *) + end else begin + ` sethi %hi({emit_symbol s}), %g1\n`; + ` jmp %g1 + %lo({emit_symbol s})\n`; + ` add %sp, {emit_int n}, %sp\n` (* in delay slot *) + end | Lop(Iextcall(s, alloc)) -> if alloc then begin ` sethi %hi({emit_symbol s}), %g4\n`; `{record_frame i.live} call {emit_symbol "caml_c_call"}\n`; - ` or %g4, %lo({emit_symbol s}), %g4\n` + ` or %g4, %lo({emit_symbol s}), %g4\n` (* in delay slot *) end else begin ` call {emit_symbol s}\n`; ` nop\n` @@ -358,14 +371,14 @@ let emit_instr i = ` sub %g6, {emit_int n}, %g6\n`; ` cmp %g6, %g7\n`; ` bgeu {emit_label lbl_cont}\n`; - ` add %g6, 4, {emit_reg i.res.(0)}\n`; + ` add %g6, 4, {emit_reg i.res.(0)}\n`; (* in delay slot *) `{record_frame i.live} call {emit_symbol "caml_call_gc"}\n`; - ` mov {emit_int n}, %g4\n`; + ` mov {emit_int n}, %g4\n`; (* in delay slot *) ` add %g6, 4, {emit_reg i.res.(0)}\n`; `{emit_label lbl_cont}:\n` end else begin `{record_frame i.live} call {emit_symbol "caml_alloc"}\n`; - ` mov {emit_int n}, %g4\n`; + ` mov {emit_int n}, %g4\n`; (* in delay slot *) ` add %g6, 4, {emit_reg i.res.(0)}\n` end | Lop(Iintop Idiv) -> @@ -451,14 +464,9 @@ let emit_instr i = | Lop(Iintop_imm(op, n)) -> let instr = name_for_int_operation op in ` {emit_string instr} {emit_reg i.arg.(0)}, {emit_int n}, {emit_reg i.res.(0)}\n` - | Lop(Iaddf) -> - ` faddd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n` - | Lop(Isubf) -> - ` fsubd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n` - | Lop(Imulf) -> - ` fmuld {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n` - | Lop(Idivf) -> - ` fdivd {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n` + | Lop(Iaddf | Isubf | Imulf | Idivf) -> + let instr = name_for_float_operation op in + ` {emit_string instr} {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.res.(0)}\n` | Lop(Ifloatofint) -> ` sub %sp, 4, %sp\n`; ` st {emit_reg i.arg.(0)}, [%sp + 96]\n`; @@ -477,13 +485,10 @@ let emit_instr i = let n = frame_size() in if !contains_calls then begin ` ld [%sp + {emit_int(n - 4 + 96)}], %o7\n`; - ` andn %o7, 1, %o7\n` + ` andn %o7, 1, %o7\n` (* poor scheduling *) end; ` retl\n`; - if n > 0 then - ` add %sp, {emit_int n}, %sp\n` - else - ` nop\n` + ` add %sp, {emit_int n}, %sp\n` | Llabel lbl -> `{emit_label lbl}:\n` | Lbranch lbl -> @@ -543,7 +548,7 @@ let emit_instr i = ` or %g1, %lo({emit_label lbl_jumptbl}), %g1\n`; ` sll {emit_reg i.arg.(0)}, 2, %g4\n`; ` ld [%g1 + %g4], %g1\n`; - ` jmp %g1\n`; + ` jmp %g1\n`; (* poor scheduling *) ` nop\n`; `{emit_label lbl_jumptbl}:`; for i = 0 to Array.length jumptbl - 1 do @@ -564,7 +569,7 @@ let emit_instr i = | Lraise -> ` mov %g5, %sp\n`; ` ldd [%sp + 96], %g4\n`; (* Load %g4 and %g5 *) - ` jmp %g4 + 8\n`; + ` jmp %g4 + 8\n`; (* poor scheduling *) ` add %sp, 8, %sp\n` let rec emit_all i = diff --git a/asmcomp/linearize.ml b/asmcomp/linearize.ml index 49095b340a..025890962b 100644 --- a/asmcomp/linearize.ml +++ b/asmcomp/linearize.ml @@ -72,6 +72,11 @@ let rec end_instr = res = [||]; live = Reg.Set.empty } +(* Cons an instruction (live empty) *) + +let instr_cons d a r n = + { desc = d; next = n; arg = a; res = r; live = Reg.Set.empty } + (* Cons a simple instruction (arg, res, live empty) *) let cons_instr d n = diff --git a/asmcomp/linearize.mli b/asmcomp/linearize.mli index 837a1ddddd..45ccd85cb3 100644 --- a/asmcomp/linearize.mli +++ b/asmcomp/linearize.mli @@ -37,6 +37,10 @@ and instruction_desc = | Lpoptrap | Lraise +val end_instr: instruction +val instr_cons: + instruction_desc -> Reg.t array -> Reg.t array -> instruction -> instruction + type fundecl = { fun_name: string; fun_body: instruction; diff --git a/asmcomp/proc.mli b/asmcomp/proc.mli index 231af20b8f..8b1d5fa3b0 100644 --- a/asmcomp/proc.mli +++ b/asmcomp/proc.mli @@ -63,6 +63,10 @@ val reload_operation: (Reg.t -> Reg.t) -> Mach.operation -> Reg.t array -> Reg.t array -> Reg.t array * Reg.t array +(* Latency info for instruction scheduling *) +val need_scheduling: bool +val oper_latency: Mach.operation -> int + (* Info for laying out the stack frame *) val num_stack_slots: int array val contains_calls: bool ref diff --git a/asmcomp/proc_alpha.ml b/asmcomp/proc_alpha.ml index 88c52cf2c2..1f61b31cf3 100644 --- a/asmcomp/proc_alpha.ml +++ b/asmcomp/proc_alpha.ml @@ -237,6 +237,26 @@ let max_register_pressure = function let reload_test makereg tst args = raise Use_default let reload_operation makereg op args res = raise Use_default +(* Latencies (in cycles). + Cf. Appendix A of the Alpha architecture handbook *) + +let need_scheduling = true + +let oper_latency = function + Ireload -> 3 + | Iload(Word, _) -> 3 + | Iload(_, _) -> 5 (* 3 for load, 2 for extension *) + | Iconst_symbol _ -> 3 (* turned into a load *) + | Iconst_float _ -> 3 (* turned into a load *) + | Iintop Imul -> 10 + | Iintop_imm(Imul, _) -> 10 + | Iintop(Ilsl | Ilsr | Iasr) -> 2 + | Iintop_imm((Ilsl | Ilsr | Iasr), _) -> 2 + | Iaddf | Isubf -> 4 + | Imulf -> 5 + | Idivf -> 10 + | _ -> 1 + (* Layout of the stack *) let num_stack_slots = [| 0; 0 |] diff --git a/asmcomp/proc_i386.ml b/asmcomp/proc_i386.ml index 04456fc69e..2ed3daf7f5 100644 --- a/asmcomp/proc_i386.ml +++ b/asmcomp/proc_i386.ml @@ -327,6 +327,24 @@ let reload_operation makereg op arg res = | _ -> (* Other operations: all args and results in registers *) raise Use_default +(* Instruction scheduling. Only effective on the Pentium. *) + +let need_scheduling = true + +(* Wild guesses *) + +let oper_latency = function + Ireload -> 2 + | Iload(_, _) -> 2 + | Iintop_imm(Imul, _) -> 10 + | Iintop(Idiv | Imod) -> 20 + | Iintop_imm((Idiv | Imod), _) -> 20 + | Iaddf | Isubf -> 5 + | Imulf -> 10 + | Idivf -> 20 + | Ispecific(Istore_int(_, _) | Istore_symbol(_, _) | Ioffset_loc(_, _)) -> -1 + | _ -> 1 + (* Layout of the stack frame *) let num_stack_slots = [| 0; 0 |] diff --git a/asmcomp/proc_mips.ml b/asmcomp/proc_mips.ml index fc5ed7a6c6..61ae5e5252 100644 --- a/asmcomp/proc_mips.ml +++ b/asmcomp/proc_mips.ml @@ -215,6 +215,22 @@ let max_register_pressure = function let reload_test makereg tst args = raise Use_default let reload_operation makereg op args res = raise Use_default +(* Latencies (in cycles). Use R3000 timings. *) + +let need_scheduling = true + +let oper_latency = function + Ireload -> 2 + | Iload(_, _) -> 2 + | Iintop Imul -> 10 (* wild guess *) + | Iintop_imm(Imul, _) -> 10 + | Iintop(Idiv | Imod) -> 20 (* wild guess *) + | Iintop_imm(Idiv | Imod, _) -> 20 + | Iaddf | Isubf -> 2 + | Imulf -> 5 + | Idivf -> 19 + | _ -> 1 + (* Layout of the stack *) let num_stack_slots = [| 0; 0 |] diff --git a/asmcomp/proc_sparc.ml b/asmcomp/proc_sparc.ml index 4dea3141ed..47cd732e41 100644 --- a/asmcomp/proc_sparc.ml +++ b/asmcomp/proc_sparc.ml @@ -230,6 +230,23 @@ let max_register_pressure = function let reload_test makereg tst args = raise Use_default let reload_operation makereg op args res = raise Use_default +(* Latencies (in cycles). Wild guesses. *) + +let need_scheduling = true + +let oper_latency = function + Ireload -> 3 + | Iload(_, _) -> 3 (* 3 for load, 2 for extension *) + | Iconst_float -> 3 (* turned into a load *) + | Iintop Imul -> 10 + | Iintop_imm(Imul, _) -> 10 + | Iintop(Idiv | Imod) -> 20 + | Iintop_imm(Idiv | Imod, _) -> 20 + | Iaddf | Isubf -> 3 + | Imulf -> 5 + | Idivf -> 15 + | _ -> 1 + (* Layout of the stack *) let num_stack_slots = [| 0; 0 |] diff --git a/asmcomp/scheduling.ml b/asmcomp/scheduling.ml new file mode 100644 index 0000000000..746ce5a915 --- /dev/null +++ b/asmcomp/scheduling.ml @@ -0,0 +1,274 @@ +(***********************************************************************) +(* *) +(* Caml Special Light *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1995 Institut National de Recherche en Informatique et *) +(* Automatique. Distributed only by permission. *) +(* *) +(***********************************************************************) + +(* $Id$ *) + +(* Instruction scheduling *) + +open Misc +open Reg +open Mach +open Linearize + +(* Determine whether an operation ends a basic block or not *) + +let in_basic_block = function + Icall_ind | Icall_imm _ | Itailcall_ind | Itailcall_imm _ -> false + | Iextcall(_, _) -> false + | Istackoffset _ -> false + | Istore(_, _) -> false + | Ialloc _ -> false + | op -> Proc.oper_latency op >= 0 + (* The processor description can return a latency of -1 to signal + a specific instruction that terminates a basic block, e.g. + Istore_symbol for the I386. *) + +(* Estimate the delay needed to evaluate an instruction. *) + +let instr_latency instr = + match instr.desc with + Lop op -> Proc.oper_latency op + | _ -> fatal_error "Scheduling.instr_latency" + +(* Representation of the code DAG. *) + +type code_dag_node = + { instr: instruction; (* The instruction *) + delay: int; (* How many cycles it needs *) + mutable sons: (code_dag_node * int) list; + (* Instructions that depend on it *) + mutable date: int; (* Start date *) + mutable length: int; (* Length of longest path to result *) + mutable ancestors: int; (* Number of ancestors *) + mutable emitted_ancestors: int } (* Number of emitted ancestors *) + +let dummy_node = + { instr = end_instr; delay = 0; sons = []; date = 0; + length = -1; ancestors = 0; emitted_ancestors = 0 } + +(* The code dag itself is represented by two tables from registers to nodes: + - "results" maps registers to the instructions that produced them; + - "uses" maps registers to the instructions that use them. *) + +let code_results = (Hashtbl.new 31 : (location, code_dag_node) Hashtbl.t) +let code_uses = (Hashtbl.new 31 : (location, code_dag_node) Hashtbl.t) + +let clear_code_dag () = + Hashtbl.clear code_results; + Hashtbl.clear code_uses + +(* Add an instruction to the code DAG *) + +let add_edge ancestor son delay = + ancestor.sons <- (son, delay) :: ancestor.sons; + son.ancestors <- son.ancestors + 1 + +let add_instruction ready_queue instr = + let delay = instr_latency instr in + let node = + { instr = instr; + delay = delay; + sons = []; + date = 0; + length = -1; + ancestors = 0; + emitted_ancestors = 0 } in + (* Add edges from all instructions that define one of the registers used *) + for i = 0 to Array.length instr.arg - 1 do + try + let ancestor = Hashtbl.find code_results instr.arg.(i).loc in + add_edge ancestor node ancestor.delay + with Not_found -> + () + done; + (* Also add edges from all instructions that use one of the results + of this instruction, so that evaluation order is preserved. *) + for i = 0 to Array.length instr.res - 1 do + let ancestors = Hashtbl.find_all code_uses instr.res.(i).loc in + List.iter (fun ancestor -> add_edge ancestor node 0) ancestors + done; + (* Also add edges from all instructions that have already defined one + of the results of this instruction, so that evaluation order + is preserved. *) + for i = 0 to Array.length instr.res - 1 do + try + let ancestor = Hashtbl.find code_results instr.res.(i).loc in + add_edge ancestor node 0 + with Not_found -> + () + done; + (* Remember the registers used and produced by this instruction *) + for i = 0 to Array.length instr.res - 1 do + Hashtbl.add code_results instr.res.(i).loc node + done; + for i = 0 to Array.length instr.arg - 1 do + Hashtbl.add code_uses instr.arg.(i).loc node + done; + (* If this is a root instruction (all arguments already computed), + add it to the ready queue *) + if node.ancestors = 0 then node :: ready_queue else ready_queue + +(* Compute length of longest path to a result. + For leafs of the DAG, see whether their result is used in the instruction + immediately following the basic block (a "critical" output). *) + +let is_critical critical_outputs results = + try + for i = 0 to Array.length results - 1 do + let r = results.(i).loc in + for j = 0 to Array.length critical_outputs - 1 do + if critical_outputs.(j).loc = r then raise Exit + done + done; + false + with Exit -> + true + +let rec longest_path critical_outputs node = + if node.length < 0 then begin + match node.sons with + [] -> + node.length <- + if is_critical critical_outputs node.instr.res + then node.delay + else 0 + | sons -> + node.length <- + List.fold_left + (fun len (son, delay) -> + max len (longest_path critical_outputs son + delay)) + 0 sons + end; + node.length + +(* Given a list of instructions with estimated start date, choose one + that we can start (start date <= current date) and that has + maximal distance to result. If we can't find any, return None. *) + +let extract_ready_instr date queue = + let rec extract best = function + [] -> + if best == dummy_node then None else Some best + | instr :: rem -> + let new_best = + if instr.date <= date & instr.length > best.length + then instr else best in + extract new_best rem in + extract dummy_node queue + +(* Remove an instruction from the ready queue *) + +let rec remove_instr node = function + [] -> [] + | instr :: rem -> + if instr == node then rem else instr :: remove_instr node rem + +(* Print the dag *) + +(**** +open Format + +let printed = ref ([] : (code_dag_node * int) list) +let print_counter = ref 0 + +let rec print_node n = + try + List.assq n !printed + with Not_found -> + let i = !print_counter in + incr print_counter; + printed := (n, i) :: !printed; + let num_sons = + List.map (fun (son, delay) -> (print_node son, delay)) n.sons in + print_int i; print_string ": "; + let (Lop op) = n.instr.desc in + Printmach.operation op n.instr.arg n.instr.res; print_newline(); + print_string " Distance to output: "; + print_int n.length; print_newline(); + print_string " Sons: "; + List.iter + (fun (son, delay) -> + print_int son; print_string "/"; print_int delay; print_space()) + num_sons; + print_newline(); + i +***) + +(* Schedule a basic block, adding its instructions in front of the given + instruction sequence *) + +let rec reschedule ready_queue date cont = + match ready_queue with + [] -> cont + | _ -> + (* Find "most ready" instruction in queue *) + match extract_ready_instr date ready_queue with + None -> + (* Try again, one cycle later *) + reschedule ready_queue (date + 1) cont + | Some node -> + (* Update the start date and number of ancestors emitted of + all descendents of this node. Enter those that become ready + in the queue. *) + let new_queue = ref (remove_instr node ready_queue) in + List.iter + (fun (son, delay) -> + let completion_date = date + delay in + if son.date < completion_date then son.date <- completion_date; + son.emitted_ancestors <- son.emitted_ancestors + 1; + if son.emitted_ancestors = son.ancestors then + new_queue := son :: !new_queue) + node.sons; + instr_cons node.instr.desc node.instr.arg node.instr.res + (reschedule !new_queue (date + 1) cont) + +(* Schedule basic blocks in an instruction sequence *) + +let rec schedule i = + match i.desc with + Lend -> i + | Lop op when in_basic_block op -> + clear_code_dag(); + schedule_block [] i + | _ -> + { desc = i.desc; arg = i.arg; res = i.res; live = i.live; + next = schedule i.next } + +and schedule_block ready_queue i = + match i.desc with + Lop op when in_basic_block op -> + schedule_block (add_instruction ready_queue i) i.next + | _ -> + let critical_outputs = + match i.desc with + Lop(Icall_ind | Itailcall_ind) -> [| i.arg.(0) |] + | Lop(Icall_imm _ | Itailcall_imm _ | Iextcall(_, _)) -> [||] + | _ -> i.arg in + List.iter (longest_path critical_outputs) ready_queue; +(*** + print_string "******"; print_newline(); + printed := []; print_counter := 0; + List.iter print_node ready_queue; +***) + reschedule ready_queue 0 (schedule i) + +(* Entry point *) +(* Don't bother to schedule for initialization code and the like. *) + +let fundecl f = + if Proc.need_scheduling & f.fun_fast then begin + let new_body = schedule f.fun_body in + clear_code_dag(); + { fun_name = f.fun_name; + fun_body = new_body; + fun_fast = f.fun_fast } + end else + f diff --git a/asmcomp/scheduling.mli b/asmcomp/scheduling.mli new file mode 100644 index 0000000000..c338fa1e54 --- /dev/null +++ b/asmcomp/scheduling.mli @@ -0,0 +1,16 @@ +(***********************************************************************) +(* *) +(* Caml Special Light *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1995 Institut National de Recherche en Informatique et *) +(* Automatique. Distributed only by permission. *) +(* *) +(***********************************************************************) + +(* $Id$ *) + +(* Instruction scheduling *) + +val fundecl: Linearize.fundecl -> Linearize.fundecl |