diff options
Diffstat (limited to 'test/Lex')
-rw-r--r-- | test/Lex/gram_aux.ml | 33 | ||||
-rw-r--r-- | test/Lex/grammar.mly | 100 | ||||
-rw-r--r-- | test/Lex/lexgen.ml | 252 | ||||
-rw-r--r-- | test/Lex/main.ml | 104 | ||||
-rw-r--r-- | test/Lex/output.ml | 155 | ||||
-rw-r--r-- | test/Lex/scan_aux.ml | 46 | ||||
-rw-r--r-- | test/Lex/scanner.mll | 118 | ||||
-rw-r--r-- | test/Lex/syntax.ml | 26 | ||||
-rw-r--r-- | test/Lex/testmain.ml | 34 | ||||
-rw-r--r-- | test/Lex/testscanner.mll | 121 |
10 files changed, 0 insertions, 989 deletions
diff --git a/test/Lex/gram_aux.ml b/test/Lex/gram_aux.ml deleted file mode 100644 index 525ee69b5e..0000000000 --- a/test/Lex/gram_aux.ml +++ /dev/null @@ -1,33 +0,0 @@ -(* Auxiliaries for the parser. *) - -open Syntax - -let regexp_for_string s = - let l = String.length s in - if l = 0 then - Epsilon - else begin - let re = ref(Characters [String.get s (l - 1)]) in - for i = l - 2 downto 0 do - re := Sequence(Characters [String.get s i], !re) - done; - !re - end - - -let char_class c1 c2 = - let class = ref [] in - for i = Char.code c2 downto Char.code c1 do - class := Char.chr i :: !class - done; - !class - - -let all_chars = char_class '\001' '\255' - - -let rec subtract l1 l2 = - match l1 with - [] -> [] - | a::l -> if List.mem a l2 then subtract l l2 else a :: subtract l l2 - diff --git a/test/Lex/grammar.mly b/test/Lex/grammar.mly deleted file mode 100644 index eb1c8cc248..0000000000 --- a/test/Lex/grammar.mly +++ /dev/null @@ -1,100 +0,0 @@ -/* The grammar for lexer definitions */ - -%{ -open Syntax -open Gram_aux -%} - -%token <string> Tident -%token <char> Tchar -%token <string> Tstring -%token <Syntax.location> Taction -%token Trule Tparse Tand Tequal Tend Tor Tunderscore Teof Tlbracket Trbracket -%token Tstar Tmaybe Tplus Tlparen Trparen Tcaret Tdash - -%left Tor -%left CONCAT -%nonassoc Tmaybe -%left Tstar -%left Tplus - -%start lexer_definition -%type <Syntax.lexer_definition> lexer_definition - -%% - -lexer_definition: - header Trule definition other_definitions Tend - { Lexdef($1, $3::(List.rev $4)) } -; -header: - Taction - { $1 } - | - { Location(0,0) } -; -other_definitions: - other_definitions Tand definition - { $3::$1 } - | - { [] } -; -definition: - Tident Tequal entry - { ($1,$3) } -; -entry: - Tparse case rest_of_entry - { $2 :: List.rev $3 } -; -rest_of_entry: - rest_of_entry Tor case - { $3::$1 } - | - { [] } -; -case: - regexp Taction - { ($1,$2) } -; -regexp: - Tunderscore - { Characters all_chars } - | Teof - { Characters ['\000'] } - | Tchar - { Characters [$1] } - | Tstring - { regexp_for_string $1 } - | Tlbracket char_class Trbracket - { Characters $2 } - | regexp Tstar - { Repetition $1 } - | regexp Tmaybe - { Alternative($1, Epsilon) } - | regexp Tplus - { Sequence($1, Repetition $1) } - | regexp Tor regexp - { Alternative($1,$3) } - | regexp regexp %prec CONCAT - { Sequence($1,$2) } - | Tlparen regexp Trparen - { $2 } -; -char_class: - Tcaret char_class1 - { subtract all_chars $2 } - | char_class1 - { $1 } -; -char_class1: - Tchar Tdash Tchar - { char_class $1 $3 } - | Tchar - { [$1] } - | char_class char_class %prec CONCAT - { $1 @ $2 } -; - -%% - diff --git a/test/Lex/lexgen.ml b/test/Lex/lexgen.ml deleted file mode 100644 index 73d011577f..0000000000 --- a/test/Lex/lexgen.ml +++ /dev/null @@ -1,252 +0,0 @@ -(* Compiling a lexer definition *) - -open Syntax - -(* Deep abstract syntax for regular expressions *) - -type regexp = - Empty - | Chars of int - | Action of int - | Seq of regexp * regexp - | Alt of regexp * regexp - | Star of regexp - -(* From shallow to deep syntax *) - -(*** - -let print_char_class c = - let print_interval low high = - prerr_int low; - if high - 1 > low then begin - prerr_char '-'; - prerr_int (high-1) - end; - prerr_char ' ' in - let rec print_class first next = function - [] -> print_interval first next - | c::l -> - if char.code c = next - then print_class first (next+1) l - else begin - print_interval first next; - print_class (char.code c) (char.code c + 1) l - end in - match c with - [] -> prerr_newline() - | c::l -> print_class (char.code c) (char.code c + 1) l; prerr_newline() - - -let rec print_regexp = function - Empty -> prerr_string "Empty" - | Chars n -> prerr_string "Chars "; prerr_int n - | Action n -> prerr_string "Action "; prerr_int n - | Seq(r1,r2) -> print_regexp r1; prerr_string "; "; print_regexp r2 - | Alt(r1,r2) -> prerr_string "("; print_regexp r1; prerr_string " | "; print_regexp r2; prerr_string ")" - | Star r -> prerr_string "("; print_regexp r; prerr_string ")*" - -***) - -let chars = ref ([] : char list list) -let chars_count = ref 0 -let actions = ref ([] : (int * location) list) -let actions_count = ref 0 - -let rec encode_regexp = function - Epsilon -> Empty - | Characters cl -> - let n = !chars_count in -(*** prerr_int n; prerr_char ' '; print_char_class cl; ***) - chars := cl :: !chars; - chars_count := !chars_count + 1; - Chars(n) - | Sequence(r1,r2) -> - Seq(encode_regexp r1, encode_regexp r2) - | Alternative(r1,r2) -> - Alt(encode_regexp r1, encode_regexp r2) - | Repetition r -> - Star (encode_regexp r) - - -let encode_casedef = - List.fold_left - (fun reg (expr,act) -> - let act_num = !actions_count in - actions_count := !actions_count + 1; - actions := (act_num, act) :: !actions; - Alt(reg, Seq(encode_regexp expr, Action act_num))) - Empty - - -let encode_lexdef (Lexdef(_, ld)) = - chars := []; - chars_count := 0; - actions := []; - actions_count := 0; - let name_regexp_list = - List.map (fun (name, casedef) -> (name, encode_casedef casedef)) ld in -(* List.iter print_char_class chars; *) - let chr = Array.of_list (List.rev !chars) - and act = !actions in - chars := []; - actions := []; - (chr, name_regexp_list, act) - - -(* To generate directly a NFA from a regular expression. - Confer Aho-Sethi-Ullman, dragon book, chap. 3 *) - -type transition = - OnChars of int - | ToAction of int - - -let rec merge_trans l1 l2 = - match (l1, l2) with - ([], s2) -> s2 - | (s1, []) -> s1 - | ((OnChars n1 as t1) :: r1 as s1), ((OnChars n2 as t2) :: r2 as s2) -> - if n1 = n2 then t1 :: merge_trans r1 r2 else - if n1 < n2 then t1 :: merge_trans r1 s2 else - t2 :: merge_trans s1 r2 - | ((ToAction n1 as t1) :: r1 as s1), ((ToAction n2 as t2) :: r2 as s2) -> - if n1 = n2 then t1 :: merge_trans r1 r2 else - if n1 < n2 then t1 :: merge_trans r1 s2 else - t2 :: merge_trans s1 r2 - | ((OnChars n1 as t1) :: r1 as s1), ((ToAction n2 as t2) :: r2 as s2) -> - t1 :: merge_trans r1 s2 - | ((ToAction n1 as t1) :: r1 as s1), ((OnChars n2 as t2) :: r2 as s2) -> - t2 :: merge_trans s1 r2 - - -let rec nullable = function - Empty -> true - | Chars _ -> false - | Action _ -> false - | Seq(r1,r2) -> nullable r1 & nullable r2 - | Alt(r1,r2) -> nullable r1 or nullable r2 - | Star r -> true - - -let rec firstpos = function - Empty -> [] - | Chars pos -> [OnChars pos] - | Action act -> [ToAction act] - | Seq(r1,r2) -> if nullable r1 - then merge_trans (firstpos r1) (firstpos r2) - else firstpos r1 - | Alt(r1,r2) -> merge_trans (firstpos r1) (firstpos r2) - | Star r -> firstpos r - - -let rec lastpos = function - Empty -> [] - | Chars pos -> [OnChars pos] - | Action act -> [ToAction act] - | Seq(r1,r2) -> if nullable r2 - then merge_trans (lastpos r1) (lastpos r2) - else lastpos r2 - | Alt(r1,r2) -> merge_trans (lastpos r1) (lastpos r2) - | Star r -> lastpos r - - -let followpos size name_regexp_list = - let v = Array.new size [] in - let fill_pos first = function - OnChars pos -> v.(pos) <- merge_trans first v.(pos); () - | ToAction _ -> () in - let rec fill = function - Seq(r1,r2) -> - fill r1; fill r2; - List.iter (fill_pos (firstpos r2)) (lastpos r1) - | Alt(r1,r2) -> - fill r1; fill r2 - | Star r -> - fill r; - List.iter (fill_pos (firstpos r)) (lastpos r) - | _ -> () in - List.iter (fun (name, regexp) -> fill regexp) name_regexp_list; - v - - -let no_action = 0x3FFFFFFF - -let split_trans_set = - List.fold_left - (fun (act, pos_set as act_pos_set) trans -> - match trans with - OnChars pos -> (act, pos :: pos_set) - | ToAction act1 -> if act1 < act then (act1, pos_set) - else act_pos_set) - (no_action, []) - - -let memory = (Hashtbl.new 131 : (transition list, int) Hashtbl.t) -let todo = ref ([] : (transition list * int) list) -let next = ref 0 - -let get_state st = - try - Hashtbl.find memory st - with Not_found -> - let nbr = !next in - next := !next + 1; - Hashtbl.add memory st nbr; - todo := (st, nbr) :: !todo; - nbr - -let rec map_on_states f = - match !todo with - [] -> [] - | (st,i)::r -> todo := r; let res = f st in (res,i) :: map_on_states f - -let number_of_states () = !next - -let goto_state = function - [] -> Backtrack - | ps -> Goto (get_state ps) - - -let transition_from chars follow pos_set = - let tr = Array.new 256 [] - and shift = Array.new 256 Backtrack in - List.iter - (fun pos -> - List.iter - (fun c -> - tr.(Char.code c) <- - merge_trans tr.(Char.code c) follow.(pos)) - chars.(pos)) - pos_set; - for i = 0 to 255 do - shift.(i) <- goto_state tr.(i) - done; - shift - - -let translate_state chars follow state = - match split_trans_set state with - n, [] -> Perform n - | n, ps -> Shift( (if n = no_action then No_remember else Remember n), - transition_from chars follow ps) - - -let make_dfa lexdef = - let (chars, name_regexp_list, actions) = - encode_lexdef lexdef in -(** - List.iter (fun (name, regexp) -> prerr_string name; prerr_string " = "; print_regexp regexp; prerr_newline()) name_regexp_list; -**) - let follow = - followpos (Array.length chars) name_regexp_list in - let initial_states = - List.map (fun (name, regexp) -> (name, get_state(firstpos regexp))) - name_regexp_list in - let states = - map_on_states (translate_state chars follow) in - let v = - Array.new (number_of_states()) (Perform 0) in - List.iter (fun (auto, i) -> v.(i) <- auto) states; - (initial_states, v, actions) - diff --git a/test/Lex/main.ml b/test/Lex/main.ml deleted file mode 100644 index 94902ed21b..0000000000 --- a/test/Lex/main.ml +++ /dev/null @@ -1,104 +0,0 @@ -(* The lexer generator. Command-line parsing. *) - -open Syntax -open Scanner -open Grammar -open Lexgen -open Output - -let main () = - if Array.length Sys.argv <> 2 then begin - prerr_string "Usage: camllex <input file>\n"; - exit 2 - end; - let source_name = Sys.argv.(1) in - let dest_name = - if Filename.check_suffix source_name ".mll" then - Filename.chop_suffix source_name ".mll" ^ ".ml" - else - source_name ^ ".ml" in - ic := open_in source_name; - oc := open_out dest_name; - let lexbuf = Lexing.from_channel !ic in - let (Lexdef(header,_) as def) = - try - Grammar.lexer_definition Scanner.main lexbuf - with - Parsing.Parse_error -> - prerr_string "Syntax error around char "; - prerr_int (Lexing.lexeme_start lexbuf); - prerr_endline "."; - exit 2 - | Scan_aux.Lexical_error s -> - prerr_string "Lexical error around char "; - prerr_int (Lexing.lexeme_start lexbuf); - prerr_string ": "; - prerr_string s; - prerr_endline "."; - exit 2 in - let ((init, states, acts) as dfa) = make_dfa def in - output_lexdef header dfa; - close_in !ic; - close_out !oc - -let _ = main(); exit 0 - - -(***** -let main () = - ic := stdin; - oc := stdout; - let lexbuf = lexing.from_channel ic in - let (Lexdef(header,_) as def) = - try - grammar.lexer_definition scanner.main lexbuf - with - parsing.Parse_error x -> - prerr_string "Syntax error around char "; - prerr_int (lexing.lexeme_start lexbuf); - prerr_endline "."; - sys.exit 2 - | scan_aux.Lexical_error s -> - prerr_string "Lexical error around char "; - prerr_int (lexing.lexeme_start lexbuf); - prerr_string ": "; - prerr_string s; - prerr_endline "."; - sys.exit 2 in - let ((init, states, acts) as dfa) = make_dfa def in - output_lexdef header dfa - -****) - -(**** -let debug_scanner lexbuf = - let tok = scanner.main lexbuf in - begin match tok with - Tident s -> prerr_string "Tident "; prerr_string s - | Tchar c -> prerr_string "Tchar "; prerr_char c - | Tstring s -> prerr_string "Tstring "; prerr_string s - | Taction(Location(i1,i2)) -> - prerr_string "Taction "; prerr_int i1; prerr_string "-"; - prerr_int i2 - | Trule -> prerr_string "Trule" - | Tparse -> prerr_string "Tparse" - | Tand -> prerr_string "Tand" - | Tequal -> prerr_string "Tequal" - | Tend -> prerr_string "Tend" - | Tor -> prerr_string "Tor" - | Tunderscore -> prerr_string "Tunderscore" - | Teof -> prerr_string "Teof" - | Tlbracket -> prerr_string "Tlbracket" - | Trbracket -> prerr_string "Trbracket" - | Tstar -> prerr_string "Tstar" - | Tmaybe -> prerr_string "Tmaybe" - | Tplus -> prerr_string "Tplus" - | Tlparen -> prerr_string "Tlparen" - | Trparen -> prerr_string "Trparen" - | Tcaret -> prerr_string "Tcaret" - | Tdash -> prerr_string "Tdash" - end; - prerr_newline(); - tok - -****) diff --git a/test/Lex/output.ml b/test/Lex/output.ml deleted file mode 100644 index 301edcba3c..0000000000 --- a/test/Lex/output.ml +++ /dev/null @@ -1,155 +0,0 @@ -(* Generating a DFA as a set of mutually recursive functions *) - -open Syntax - -let ic = ref stdin -let oc = ref stdout - -(* 1- Generating the actions *) - -let copy_buffer = String.create 1024 - -let copy_chunk (Location(start,stop)) = - seek_in !ic start; - let tocopy = ref(stop - start) in - while !tocopy > 0 do - let m = - input !ic copy_buffer 0 (min !tocopy (String.length copy_buffer)) in - output !oc copy_buffer 0 m; - tocopy := !tocopy - m - done - - -let output_action (i,act) = - output_string !oc ("action_" ^ string_of_int i ^ " lexbuf = (\n"); - copy_chunk act; - output_string !oc ")\nand " - - -(* 2- Generating the states *) - -let states = ref ([||] : automata array) - -type occurrence = - { mutable pos: int list; - mutable freq: int } - -let enumerate_vect v = - let env = ref [] in - for pos = 0 to Array.length v - 1 do - try - let occ = List.assoc v.(pos) !env in - occ.pos <- pos :: occ.pos; - occ.freq <- occ.freq + 1 - with Not_found -> - env := (v.(pos), {pos = [pos]; freq = 1 }) :: !env - done; - Sort.list (fun (e1, occ1) (e2, occ2) -> occ1.freq >= occ2.freq) !env - - -let output_move = function - Backtrack -> - output_string !oc "lexing.backtrack lexbuf" - | Goto dest -> - match !states.(dest) with - Perform act_num -> - output_string !oc ("action_" ^ string_of_int act_num ^ " lexbuf") - | _ -> - output_string !oc ("state_" ^ string_of_int dest ^ " lexbuf") - - -(* Cannot use standard char_for_read because the characters to escape - are not the same in CL6 and CL1999. *) - -let output_char_lit oc = function - '\'' -> output_string oc "\\'" - | '\\' -> output_string oc "\\\\" - | '\n' -> output_string oc "\\n" - | '\t' -> output_string oc "\\t" - | c -> if Char.code c >= 32 & Char.code c < 128 then - output_char oc c - else begin - let n = Char.code c in - output_char oc '\\'; - output_char oc (Char.chr (48 + n / 100)); - output_char oc (Char.chr (48 + (n / 10) mod 10)); - output_char oc (Char.chr (48 + n mod 10)) - end - -let rec output_chars = function - [] -> - failwith "output_chars" - | [c] -> - output_string !oc "'"; - output_char_lit !oc (Char.chr c); - output_string !oc "'" - | c::cl -> - output_string !oc "'"; - output_char_lit !oc (Char.chr c); - output_string !oc "'|"; - output_chars cl - -let output_one_trans (dest, occ) = - output_chars occ.pos; - output_string !oc " -> "; - output_move dest; - output_string !oc "\n | " - -let output_all_trans trans = - output_string !oc " match lexing.next_char lexbuf with\n "; - match enumerate_vect trans with - [] -> - failwith "output_all_trans" - | (default, _) :: rest -> - List.iter output_one_trans rest; - output_string !oc "_ -> "; - output_move default; - output_string !oc "\nand " - -let output_state state_num = function - Perform i -> - () - | Shift(what_to_do, moves) -> - output_string !oc - ("state_" ^ string_of_int state_num ^ " lexbuf =\n"); - begin match what_to_do with - No_remember -> () - | Remember i -> - output_string !oc - (" Lexing.set_backtrack lexbuf action_" ^ - string_of_int i ^ ";\n") - end; - output_all_trans moves - - -(* 3- Generating the entry points *) - -let rec output_entries = function - [] -> failwith "output_entries" - | (name,state_num) :: rest -> - output_string !oc (name ^ " lexbuf =\n"); - output_string !oc " Lexing.init lexbuf;\n"; - output_string !oc (" state_" ^ string_of_int state_num ^ - " lexbuf\n"); - match rest with - [] -> output_string !oc "\n"; () - | _ -> output_string !oc "\nand "; output_entries rest - - -(* All together *) - -let output_lexdef header (initial_st, st, actions) = - prerr_int (Array.length st); prerr_string " states, "; - prerr_int (List.length actions); prerr_string " actions."; - prerr_newline(); - copy_chunk header; - output_string !oc "\nlet rec "; - states := st; - List.iter output_action actions; - for i = 0 to Array.length st - 1 do - output_state i st.(i) - done; - output_entries initial_st - - - diff --git a/test/Lex/scan_aux.ml b/test/Lex/scan_aux.ml deleted file mode 100644 index 8b01d63479..0000000000 --- a/test/Lex/scan_aux.ml +++ /dev/null @@ -1,46 +0,0 @@ -(* Auxiliaries for the lexical analyzer *) - -let brace_depth = ref 0 -let comment_depth = ref 0 - -exception Lexical_error of string - -let initial_string_buffer = String.create 256 -let string_buff = ref initial_string_buffer -let string_index = ref 0 - -let reset_string_buffer () = - string_buff := initial_string_buffer; - string_index := 0 - - -let store_string_char c = - begin - if !string_index >= String.length !string_buff then begin - let new_buff = String.create (String.length !string_buff * 2) in - String.blit new_buff 0 !string_buff 0 (String.length !string_buff); - string_buff := new_buff - end - end; - String.unsafe_set !string_buff !string_index c; - incr string_index - -let get_stored_string () = - let s = String.sub !string_buff 0 !string_index in - string_buff := initial_string_buffer; - s - - -let char_for_backslash = function - 'n' -> '\010' (* '\n' when bootstrapped *) - | 't' -> '\009' (* '\t' *) - | 'b' -> '\008' (* '\b' *) - | 'r' -> '\013' (* '\r' *) - | c -> c - - -let char_for_decimal_code lexbuf i = - Char.chr(100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) + - 10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) + - (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48)) - diff --git a/test/Lex/scanner.mll b/test/Lex/scanner.mll deleted file mode 100644 index 7cb13ba70e..0000000000 --- a/test/Lex/scanner.mll +++ /dev/null @@ -1,118 +0,0 @@ -(* The lexical analyzer for lexer definitions. *) - -{ -open Syntax -open Grammar -open Scan_aux -} - -rule main = parse - [' ' '\010' '\013' '\009' ] + - { main lexbuf } - | "(*" - { comment_depth := 1; - comment lexbuf; - main lexbuf } - | (['A'-'Z' 'a'-'z'] | '_' ['A'-'Z' 'a'-'z' '\'' '0'-'9']) - ( '_' ? ['A'-'Z' 'a'-'z' ''' '0'-'9'] ) * - { match Lexing.lexeme lexbuf with - "rule" -> Trule - | "parse" -> Tparse - | "and" -> Tand - | "eof" -> Teof - | s -> Tident s } - | '"' - { reset_string_buffer(); - string lexbuf; - Tstring(get_stored_string()) } - | "'" - { Tchar(char lexbuf) } - | '{' - { let n1 = Lexing.lexeme_end lexbuf in - brace_depth := 1; - let n2 = action lexbuf in - Taction(Location(n1, n2)) } - | '=' { Tequal } - | ";;" { Tend } - | '|' { Tor } - | '_' { Tunderscore } - | "eof" { Teof } - | '[' { Tlbracket } - | ']' { Trbracket } - | '*' { Tstar } - | '?' { Tmaybe } - | '+' { Tplus } - | '(' { Tlparen } - | ')' { Trparen } - | '^' { Tcaret } - | '-' { Tdash } - | eof - { raise(Lexical_error "unterminated lexer definition") } - | _ - { raise(Lexical_error("illegal character " ^ Lexing.lexeme lexbuf)) } - -and action = parse - '{' - { incr brace_depth; - action lexbuf } - | '}' - { decr brace_depth; - if !brace_depth = 0 then Lexing.lexeme_start lexbuf else action lexbuf } - | '"' - { reset_string_buffer(); - string lexbuf; - reset_string_buffer(); - action lexbuf } - | '\'' - { char lexbuf; action lexbuf } - | "(*" - { comment_depth := 1; - comment lexbuf; - action lexbuf } - | eof - { raise (Lexical_error "unterminated action") } - | _ - { action lexbuf } - -and string = parse - '"' - { () } - | '\\' [' ' '\010' '\013' '\009' '\026' '\012'] + - { string lexbuf } - | '\\' ['\\' '"' 'n' 't' 'b' 'r'] - { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1)); - string lexbuf } - | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] - { store_string_char(char_for_decimal_code lexbuf 1); - string lexbuf } - | eof - { raise(Lexical_error "unterminated string") } - | _ - { store_string_char(Lexing.lexeme_char lexbuf 0); - string lexbuf } - -and char = parse - [^ '\\'] "'" - { Lexing.lexeme_char lexbuf 0 } - | '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'" - { char_for_backslash (Lexing.lexeme_char lexbuf 1) } - | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" - { char_for_decimal_code lexbuf 1 } - | _ - { raise(Lexical_error "bad character constant") } - -and comment = parse - "(*" - { incr comment_depth; comment lexbuf } - | "*)" - { decr comment_depth; - if !comment_depth = 0 then () else comment lexbuf } - | '"' - { reset_string_buffer(); - string lexbuf; - reset_string_buffer(); - comment lexbuf } - | eof - { raise(Lexical_error "unterminated comment") } - | _ - { comment lexbuf } diff --git a/test/Lex/syntax.ml b/test/Lex/syntax.ml deleted file mode 100644 index f692e6f625..0000000000 --- a/test/Lex/syntax.ml +++ /dev/null @@ -1,26 +0,0 @@ -(* The shallow abstract syntax *) - -type location = - Location of int * int - -type regular_expression = - Epsilon - | Characters of char list - | Sequence of regular_expression * regular_expression - | Alternative of regular_expression * regular_expression - | Repetition of regular_expression - -type lexer_definition = - Lexdef of location * (string * (regular_expression * location) list) list - -(* Representation of automata *) - -type automata = - Perform of int - | Shift of automata_trans * automata_move array -and automata_trans = - No_remember - | Remember of int -and automata_move = - Backtrack - | Goto of int diff --git a/test/Lex/testmain.ml b/test/Lex/testmain.ml deleted file mode 100644 index e0a914ee09..0000000000 --- a/test/Lex/testmain.ml +++ /dev/null @@ -1,34 +0,0 @@ -(* The lexer generator. Command-line parsing. *) - -#open "syntax";; -#open "testscanner";; -#open "grammar";; -#open "lexgen";; -#open "output";; - -let main () = - ic := stdin; - oc := stdout; - let lexbuf = lexing.from_channel ic in - let (Lexdef(header,_) as def) = - try - grammar.lexer_definition testscanner.main lexbuf - with - parsing.Parse_error x -> - prerr_string "Syntax error around char "; - prerr_int (lexing.lexeme_start lexbuf); - prerr_endline "."; - sys.exit 2 - | scan_aux.Lexical_error s -> - prerr_string "Lexical error around char "; - prerr_int (lexing.lexeme_start lexbuf); - prerr_string ": "; - prerr_string s; - prerr_endline "."; - sys.exit 2 in - let ((init, states, acts) as dfa) = make_dfa def in - output_lexdef header dfa -;; - -main(); sys.exit 0 -;; diff --git a/test/Lex/testscanner.mll b/test/Lex/testscanner.mll deleted file mode 100644 index 91ada299f2..0000000000 --- a/test/Lex/testscanner.mll +++ /dev/null @@ -1,121 +0,0 @@ -(* The lexical analyzer for lexer definitions. *) - -{ -#open "syntax";; -#open "grammar";; -#open "scan_aux";; -} - -rule main = parse - _ * "qwertyuiopasdfghjklzxcvbnm0123456789!@#$%^&*()" - { main lexbuf } - | [' ' '\010' '\013' '\009' ] + - { main lexbuf } - | "(*" - { comment_depth := 1; - comment lexbuf; - main lexbuf } - | (['A'-'Z' 'a'-'z'] | '_' ['A'-'Z' 'a'-'z' '\'' '0'-'9']) - ( '_' ? ['A'-'Z' 'a'-'z' ''' '0'-'9'] ) * - { match lexing.lexeme lexbuf with - "rule" -> Trule - | "parse" -> Tparse - | "and" -> Tand - | "eof" -> Teof - | s -> Tident s } - | '"' - { reset_string_buffer(); - string lexbuf; - Tstring(get_stored_string()) } - | "'" - { Tchar(char lexbuf) } - | '{' - { let n1 = lexing.lexeme_end lexbuf in - brace_depth := 1; - let n2 = action lexbuf in - Taction(Location(n1, n2)) } - | '=' { Tequal } - | ";;" { Tend } - | '|' { Tor } - | '_' { Tunderscore } - | "eof" { Teof } - | '[' { Tlbracket } - | ']' { Trbracket } - | '*' { Tstar } - | '?' { Tmaybe } - | '+' { Tplus } - | '(' { Tlparen } - | ')' { Trparen } - | '^' { Tcaret } - | '-' { Tdash } - | eof - { raise(Lexical_error "unterminated lexer definition") } - | _ - { raise(Lexical_error("illegal character " ^ lexing.lexeme lexbuf)) } - -and action = parse - '{' - { brace_depth := brace_depth + 1; - action lexbuf } - | '}' - { brace_depth := brace_depth - 1; - if brace_depth = 0 then lexing.lexeme_start lexbuf else action lexbuf } - | '"' - { reset_string_buffer(); - string lexbuf; - reset_string_buffer(); - action lexbuf } - | '\'' - { char lexbuf; action lexbuf } - | "(*" - { comment_depth := 1; - comment lexbuf; - action lexbuf } - | eof - { raise (Lexical_error "unterminated action") } - | _ - { action lexbuf } - -and string = parse - '"' - { () } - | '\\' [' ' '\010' '\013' '\009' '\026' '\012'] + - { string lexbuf } - | '\\' ['\\' '"' 'n' 't' 'b' 'r'] - { store_string_char(char_for_backslash(lexing.lexeme_char lexbuf 1)); - string lexbuf } - | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] - { store_string_char(char_for_decimal_code lexbuf 1); - string lexbuf } - | eof - { raise(Lexical_error "unterminated string") } - | _ - { store_string_char(lexing.lexeme_char lexbuf 0); - string lexbuf } - -and char = parse - [^ '\\'] "'" - { lexing.lexeme_char lexbuf 0 } - | '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'" - { char_for_backslash (lexing.lexeme_char lexbuf 1) } - | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" - { char_for_decimal_code lexbuf 1 } - | _ - { raise(Lexical_error "bad character constant") } - -and comment = parse - "(*" - { comment_depth := comment_depth + 1; comment lexbuf } - | "*)" - { comment_depth := comment_depth - 1; - if comment_depth = 0 then () else comment lexbuf } - | '"' - { reset_string_buffer(); - string lexbuf; - reset_string_buffer(); - comment lexbuf } - | eof - { raise(Lexical_error "unterminated comment") } - | _ - { comment lexbuf } -;; |