summaryrefslogtreecommitdiff
path: root/test/Lex/output.ml
blob: 2a94efe0bc072f83cc4d5867b131070fcdb50065 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
(***********************************************************************)
(*                                                                     *)
(*                           Objective Caml                            *)
(*                                                                     *)
(*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
(*                                                                     *)
(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
(*  Automatique.  Distributed only by permission.                      *)
(*                                                                     *)
(***********************************************************************)

(* $Id$ *)

(* Generating a DFA as a set of mutually recursive functions *)

open Syntax

let ic = ref stdin
let oc = ref stdout

(* 1- Generating the actions *)

let copy_buffer = String.create 1024

let copy_chunk (Location(start,stop)) =
  seek_in !ic start;
  let tocopy = ref(stop - start) in
  while !tocopy > 0 do
    let m =
      input !ic copy_buffer 0 (min !tocopy (String.length copy_buffer)) in
    output !oc copy_buffer 0 m;
    tocopy := !tocopy - m
  done


let output_action (i,act) =
  output_string !oc ("action_" ^ string_of_int i ^ " lexbuf = (\n");
  copy_chunk act;
  output_string !oc ")\nand "


(* 2- Generating the states *)

let states = ref ([||] : automata array)

type occurrence =
  { mutable pos: int list;
    mutable freq: int }

let enumerate_vect v =
  let env = ref [] in
  for pos = 0 to Array.length v - 1 do
    try
      let occ = List.assoc v.(pos) !env in
      occ.pos <- pos :: occ.pos;
      occ.freq <- occ.freq + 1
    with Not_found ->
      env := (v.(pos), {pos = [pos]; freq = 1 }) :: !env
  done;
  Sort.list (fun (e1, occ1) (e2, occ2) -> occ1.freq >= occ2.freq) !env


let output_move = function
    Backtrack ->
      output_string !oc "lexing.backtrack lexbuf"
  | Goto dest ->
      match !states.(dest) with
        Perform act_num ->
          output_string !oc ("action_" ^ string_of_int act_num ^ " lexbuf")
      | _ ->
          output_string !oc ("state_" ^ string_of_int dest ^ " lexbuf")


(* Cannot use standard char_for_read because the characters to escape
   are not the same in CL6 and CL1999. *)

let output_char_lit oc = function
    '\'' -> output_string oc "\\'"
  | '\\' -> output_string oc "\\\\"
  | '\n' -> output_string oc "\\n"
  | '\t' -> output_string oc "\\t"
  | c ->  if Char.code c >= 32 & Char.code c < 128 then
            output_char oc c
          else begin
            let n = Char.code c in
            output_char oc '\\';
            output_char oc (Char.chr (48 + n / 100));
            output_char oc (Char.chr (48 + (n / 10) mod 10));
            output_char oc (Char.chr (48 + n mod 10))
          end

let rec output_chars = function
    [] ->
      failwith "output_chars"
  | [c] ->
      output_string !oc "'";
      output_char_lit !oc (Char.chr c);
      output_string !oc "'"
  | c::cl ->
      output_string !oc "'";
      output_char_lit !oc (Char.chr c);
      output_string !oc "'|";
      output_chars cl

let output_one_trans (dest, occ) =
  output_chars occ.pos;
  output_string !oc " -> ";
  output_move dest;
  output_string !oc "\n |  "

let output_all_trans trans =
  output_string !oc "  match lexing.next_char lexbuf with\n    ";
  match enumerate_vect trans with
    [] ->
      failwith "output_all_trans"
  | (default, _) :: rest ->
      List.iter output_one_trans rest;
      output_string !oc "_ -> ";
      output_move default;
      output_string !oc "\nand "

let output_state state_num = function
    Perform i ->
      ()
  | Shift(what_to_do, moves) ->
      output_string !oc
        ("state_"  ^ string_of_int state_num ^ " lexbuf =\n");
      begin match what_to_do with
        No_remember -> ()
      | Remember i ->
          output_string !oc
            ("  Lexing.set_backtrack lexbuf action_" ^
             string_of_int i ^ ";\n")
      end;
      output_all_trans moves


(* 3- Generating the entry points *)
          
let rec output_entries = function
    [] -> failwith "output_entries"
  | (name,state_num) :: rest ->
      output_string !oc (name ^ " lexbuf =\n");
      output_string !oc "  Lexing.init lexbuf;\n";
      output_string !oc ("  state_" ^ string_of_int state_num ^
                        " lexbuf\n");
      match rest with
        [] -> output_string !oc "\n"; ()
      | _  -> output_string !oc "\nand "; output_entries rest


(* All together *)

let output_lexdef header (initial_st, st, actions) =
  prerr_int (Array.length st); prerr_string " states, ";
  prerr_int (List.length actions); prerr_string " actions.";
  prerr_newline();
  copy_chunk header;
  output_string !oc "\nlet rec ";
  states := st;
  List.iter output_action actions;
  for i = 0 to Array.length st - 1 do
    output_state i st.(i)
  done;
  output_entries initial_st