(**************************************************************************) (* *) (* OCaml *) (* *) (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) (* *) (* Copyright 1996 Institut National de Recherche en Informatique et *) (* en Automatique. *) (* *) (* All rights reserved. This file is distributed under the terms of *) (* the GNU Lesser General Public License version 2.1, with the *) (* special exception on linking described in the file LICENSE. *) (* *) (**************************************************************************) (* Output the DFA tables and its entry points *) open Printf open Lexgen open Common type ctx = { oc: out_channel; has_refill: bool; goto_state: (ctx -> string -> int -> unit); last_action: int option; } let pr ctx = fprintf ctx.oc let output_auto_defs ctx = if ctx.has_refill then begin pr ctx "\n"; pr ctx "let rec __ocaml_lex_refill_buf lexbuf _buf _len _curr _last \ _last_action state k =\n"; pr ctx " if lexbuf.Lexing.lex_eof_reached then\n"; pr ctx " state lexbuf _last_action _buf _len _curr _last k 256\n"; pr ctx " else begin\n"; pr ctx " lexbuf.Lexing.lex_curr_pos <- _curr;\n"; pr ctx " lexbuf.Lexing.lex_last_pos <- _last;\n"; pr ctx " __ocaml_lex_refill\n"; pr ctx " (fun lexbuf ->\n"; pr ctx " let _curr = lexbuf.Lexing.lex_curr_pos in\n"; pr ctx " let _last = lexbuf.Lexing.lex_last_pos in\n"; pr ctx " let _len = lexbuf.Lexing.lex_buffer_len in\n"; pr ctx " let _buf = lexbuf.Lexing.lex_buffer in\n"; pr ctx " if _curr < _len then\n"; pr ctx " state lexbuf _last_action _buf _len (_curr + 1) \ _last k\n"; pr ctx " (Char.code (Bytes.unsafe_get _buf _curr))\n"; pr ctx " else\n"; pr ctx " __ocaml_lex_refill_buf lexbuf _buf _len _curr _last \ _last_action\n"; pr ctx " state k\n"; pr ctx " )\n"; pr ctx " lexbuf\n"; pr ctx " end\n"; pr ctx "\n"; end else begin pr ctx "\n"; pr ctx "let rec __ocaml_lex_refill_buf lexbuf _buf _len _curr _last =\n"; pr ctx " if lexbuf.Lexing.lex_eof_reached then\n"; pr ctx " 256, _buf, _len, _curr, _last\n"; pr ctx " else begin\n"; pr ctx " lexbuf.Lexing.lex_curr_pos <- _curr;\n"; pr ctx " lexbuf.Lexing.lex_last_pos <- _last;\n"; pr ctx " lexbuf.Lexing.refill_buff lexbuf;\n"; pr ctx " let _curr = lexbuf.Lexing.lex_curr_pos in\n"; pr ctx " let _last = lexbuf.Lexing.lex_last_pos in\n"; pr ctx " let _len = lexbuf.Lexing.lex_buffer_len in\n"; pr ctx " let _buf = lexbuf.Lexing.lex_buffer in\n"; pr ctx " if _curr < _len then\n"; pr ctx " Char.code (Bytes.unsafe_get _buf _curr), _buf, _len, \ (_curr + 1), _last\n"; pr ctx " else\n"; pr ctx " __ocaml_lex_refill_buf lexbuf _buf _len _curr _last\n"; pr ctx " end\n"; pr ctx "\n"; end let output_memory_actions pref oc = function | [] -> () | mvs -> output_string oc pref; output_string oc "(* " ; fprintf oc "L=%d " (List.length mvs) ; List.iter (fun mv -> match mv with | Copy (tgt, src) -> fprintf oc "[%d] <- [%d] ;" tgt src | Set tgt -> fprintf oc "[%d] <- p ; " tgt) mvs ; output_string oc " *)\n" ; List.iter (fun mv -> match mv with | Copy (tgt, src) -> fprintf oc "%s%a <- %a ;\n" pref output_mem_access tgt output_mem_access src | Set tgt -> fprintf oc "%s%a <- _curr;\n" pref output_mem_access tgt) mvs let output_pats ctx = function | [x] -> pr ctx "| %d" x | pats -> List.iter (fun p -> pr ctx "|%d" p) pats let last_action ctx = match ctx.last_action with | None -> "_last_action" | Some i -> Printf.sprintf "%i (* = last_action *)" i let output_action ctx pref mems r = output_memory_actions pref ctx.oc mems; match r with | Backtrack -> pr ctx "%slet _curr = _last in\n\ %slexbuf.Lexing.lex_curr_pos <- _curr;\n\ %slexbuf.Lexing.lex_last_pos <- _last;\n" pref pref pref; if ctx.has_refill then pr ctx "%sk lexbuf %s\n" pref (last_action ctx) else pr ctx "%s%s\n" pref (last_action ctx) | Goto n -> ctx.goto_state ctx pref n let output_pat ctx i = if i >= 256 then pr ctx "|eof" else pr ctx "|'%s'" (Char.escaped (Char.chr i)) let output_clause ctx pref pats mems r = pr ctx "%s(* " pref; List.iter (output_pat ctx) pats; pr ctx " *)\n%s" pref; output_pats ctx pats; pr ctx " ->\n"; output_action ctx (" "^pref) mems r let output_default_clause ctx pref mems r = pr ctx "%s| _ ->\n" pref; output_action ctx (" "^pref) mems r let output_moves ctx pref moves = let t = Hashtbl.create 17 in let add_move i (m,mems) = let mems,r = try Hashtbl.find t m with Not_found -> mems,[] in Hashtbl.replace t m (mems,(i::r)) in for i = 0 to 256 do add_move i moves.(i) done ; let most_frequent = ref Backtrack and most_mems = ref [] and size = ref 0 in Hashtbl.iter (fun m (mems,pats) -> let size_m = List.length pats in if size_m > !size then begin most_frequent := m ; most_mems := mems ; size := size_m end) t ; Hashtbl.iter (fun m (mems,pats) -> if m <> !most_frequent then output_clause ctx pref (List.rev pats) mems m) t ; output_default_clause ctx pref !most_mems !most_frequent let output_tag_actions pref ctx mvs = pr ctx "%s(*" pref; List.iter (fun i -> match i with | SetTag (t,m) -> pr ctx " t%d <- [%d] ;" t m | EraseTag t -> pr ctx " t%d <- -1 ;" t) mvs ; pr ctx " *)\n" ; List.iter (fun i -> match i with | SetTag (t,m) -> pr ctx "%s%a <- %a ;\n" pref output_mem_access t output_mem_access m | EraseTag t -> pr ctx "%s%a <- -1 ;\n" pref output_mem_access t) mvs let output_trans_body pref ctx = function | Perform (n,mvs) -> output_tag_actions pref ctx mvs ; pr ctx "%slexbuf.Lexing.lex_curr_pos <- _curr;\n" pref; pr ctx "%slexbuf.Lexing.lex_last_pos <- _last;\n" pref; pr ctx "%s%s%d\n" pref (if ctx.has_refill then "k lexbuf " else "") n | Shift (trans, move) -> let ctx = match trans with | Remember (n,mvs) -> output_tag_actions pref ctx mvs ; pr ctx "%slet _last = _curr in\n" pref; begin match ctx.last_action with | Some i when i = n -> pr ctx "%s(* let _last_action = %d in*)\n" pref n; ctx | _ -> pr ctx "%slet _last_action = %d in\n" pref n; {ctx with last_action = Some n} end | No_remember -> ctx in if ctx.has_refill then begin (* TODO: bind this 'state' function at toplevel instead *) pr ctx "%slet state lexbuf _last_action _buf _len _curr _last k = function\n" pref; output_moves ctx pref move; pr ctx "%sin\n\ %sif _curr >= _len then\n\ %s __ocaml_lex_refill_buf lexbuf _buf _len _curr _last \ _last_action state k\n\ %selse\n\ %s state lexbuf _last_action _buf _len (_curr + 1) _last k\n\ %s (Char.code (Bytes.unsafe_get _buf _curr))\n" pref pref pref pref pref pref end else begin pr ctx "%slet next_char, _buf, _len, _curr, _last =\n\ %s if _curr >= _len then\n\ %s __ocaml_lex_refill_buf lexbuf _buf _len _curr _last\n\ %s else\n\ %s Char.code (Bytes.unsafe_get _buf _curr),\n\ %s _buf, _len, (_curr + 1), _last\n\ %sin\n\ %sbegin match next_char with\n" pref pref pref pref pref pref pref pref; output_moves ctx (pref ^ " ") move; pr ctx "%send\n" pref end let output_automata ctx auto inline = output_auto_defs ctx; let n = Array.length auto in let first = ref true in for i = 0 to n-1 do if not inline.(i) then begin pr ctx "%s __ocaml_lex_state%d lexbuf _last_action _buf _len _curr _last %s=\n" (if !first then "let rec" else "\nand") i (if ctx.has_refill then "k " else ""); output_trans_body " " ctx auto.(i); first := false; end done; pr ctx "\n\n" (* Output the entries *) let output_init ctx pref e init_moves = if e.auto_mem_size > 0 then pr ctx "%slexbuf.Lexing.lex_mem <- Array.make %d (-1);\n" pref e.auto_mem_size; pr ctx "%slet _curr = lexbuf.Lexing.lex_curr_pos in\n" pref; pr ctx "%slet _last = _curr in\n" pref; pr ctx "%slet _len = lexbuf.Lexing.lex_buffer_len in\n" pref; pr ctx "%slet _buf = lexbuf.Lexing.lex_buffer in\n" pref; pr ctx "%slet _last_action = -1 in\n" pref; pr ctx "%slexbuf.Lexing.lex_start_pos <- _curr;\n" pref; output_memory_actions pref ctx.oc init_moves let output_rules ic ctx pref tr e = pr ctx "%sbegin\n" pref; pr ctx "%s let _curr_p = lexbuf.Lexing.lex_curr_p in\n" pref; pr ctx "%s if _curr_p != Lexing.dummy_pos then begin\n" pref; pr ctx "%s lexbuf.Lexing.lex_start_p <- _curr_p;\n" pref; pr ctx "%s lexbuf.Lexing.lex_curr_p <-\n" pref; pr ctx "%s {_curr_p with Lexing.pos_cnum =\n" pref; pr ctx "%s lexbuf.Lexing.lex_abs_pos+lexbuf.Lexing.lex_curr_pos}\n" pref; pr ctx "%s end\n" pref; pr ctx "%send;\n" pref; pr ctx "%smatch __ocaml_lex_result with\n" pref; List.iter (fun (num, env, loc) -> pr ctx "%s| %d ->\n" pref num; output_env ic ctx.oc tr env; copy_chunk ic ctx.oc tr loc true; pr ctx "\n") e.auto_actions; pr ctx "%s| _ -> raise (Failure \"lexing: empty token\")\n" pref let output_entry ic ctx tr e = let init_num, init_moves = e.auto_initial_state in pr ctx "%s %alexbuf =\n" e.auto_name output_args e.auto_args; if ctx.has_refill then begin pr ctx " let k lexbuf __ocaml_lex_result =\n"; output_rules ic ctx " " tr e; pr ctx " in\n"; output_init ctx " " e init_moves; ctx.goto_state ctx " " init_num end else begin pr ctx " let __ocaml_lex_result =\n"; output_init ctx " " e init_moves; ctx.goto_state ctx " " init_num; pr ctx " in\n"; output_rules ic ctx " " tr e end; pr ctx "\n\n" (* Determine which states to inline *) let choose_inlining entry_points transitions = let counters = Array.make (Array.length transitions) 0 in let count i = counters.(i) <- counters.(i) + 1 in List.iter (fun e -> count (fst e.auto_initial_state)) entry_points; Array.iter (function | Shift (_, a) -> let tbl = Hashtbl.create 8 in Array.iter (function | (Goto i, _) when not (Hashtbl.mem tbl i) -> Hashtbl.add tbl i (); count i | _ -> () ) a | Perform _ -> () ) transitions; Array.mapi (fun i -> function | Perform _ -> true | Shift _ -> counters.(i) = 1 ) transitions let goto_state inline transitions ctx pref n = if inline.(n) then output_trans_body pref ctx transitions.(n) else pr ctx "%s__ocaml_lex_state%d lexbuf %s _buf _len _curr _last%s\n" pref n (last_action ctx) (if ctx.has_refill then " k" else "") (* Main output function *) let output_lexdef ic oc tr header rh entry_points transitions trailer = copy_chunk ic oc tr header false; let has_refill = output_refill_handler ic oc tr rh in let inline = choose_inlining entry_points transitions in let ctx = { has_refill; oc; goto_state = goto_state inline transitions; last_action = None; } in output_automata ctx transitions inline; begin match entry_points with [] -> () | entry1 :: entries -> output_string oc "let rec "; output_entry ic ctx tr entry1; List.iter (fun e -> output_string oc "and "; output_entry ic ctx tr e) entries; output_string oc ";;\n\n"; end; copy_chunk ic oc tr trailer false