diff options
-rw-r--r-- | camlp4/CHANGES | 7 | ||||
-rw-r--r-- | camlp4/etc/Makefile | 8 | ||||
-rw-r--r-- | camlp4/etc/pa_ocamllex.ml | 236 |
3 files changed, 249 insertions, 2 deletions
diff --git a/camlp4/CHANGES b/camlp4/CHANGES index 3092c35a90..9262da9ea6 100644 --- a/camlp4/CHANGES +++ b/camlp4/CHANGES @@ -1,6 +1,13 @@ Camlp4 Version .... ------------------- +- [04 Jan 02] Alain Frisch's contribution: + Added pa_ocamllex.cma, syntax for ocamllex files. The command: + camlp4 pa_ocamllex.cmo pr_o.cmo -ocamllex -impl foo.mll > foo.ml + does the same thing as: + ocamllex foo.mll + Allow to compile directly mll files. Without option -ocamllex, allow + to insert lex rules in a ml file. - [29 Dec 01] Added variable "inter_phrases" in Pcaml, of type ref (option string) to specify the string to print between phrases in pretty printers. The default is None, meaning to copy the inter phrases from the source diff --git a/camlp4/etc/Makefile b/camlp4/etc/Makefile index 9e3205a210..c5e75ed1f5 100644 --- a/camlp4/etc/Makefile +++ b/camlp4/etc/Makefile @@ -2,9 +2,9 @@ include ../config/Makefile -INCLUDES=-I ../camlp4 -I ../boot +INCLUDES=-I ../camlp4 -I ../boot -I $(OTOP)/lex OCAMLCFLAGS=$(INCLUDES) -OBJS=q_phony.cmo pa_o.cmo pa_op.cmo pa_oop.cmo pa_ru.cmo pa_format.cmo pa_olabl.cmo pa_sml.cmo pa_lisp.cmo pa_extfun.cmo pa_fstream.cmo pr_r.cmo pr_rp.cmo pr_o.cmo pr_op.cmo pr_extend.cmo pr_extfun.cmo pr_null.cmo pr_depend.cmo +OBJS=q_phony.cmo pa_o.cmo pa_op.cmo pa_oop.cmo pa_ru.cmo pa_format.cmo pa_olabl.cmo pa_sml.cmo pa_lisp.cmo pa_extfun.cmo pa_fstream.cmo pa_ocamllex.cma pr_r.cmo pr_rp.cmo pr_o.cmo pr_op.cmo pr_extend.cmo pr_extfun.cmo pr_null.cmo pr_depend.cmo INTF=pa_o.cmi CAMLP4OM=pa_o.cmo pa_op.cmo ../meta/pr_dump.cmo CAMLP4OMX=pa_o.cmx pa_op.cmx ../meta/pr_dump.cmx @@ -26,6 +26,9 @@ camlp4o.opt: $(CAMLP4OMX) mkcamlp4.sh: mkcamlp4.sh.tpl sed -e "s'LIBDIR'$(LIBDIR)'g" mkcamlp4.sh.tpl > mkcamlp4.sh +pa_ocamllex.cma: pa_ocamllex.cmo + $(OCAMLC) -I $(OTOP)/lex syntax.cmo lexgen.cmo compact.cmo pa_ocamllex.cmo -a -o pa_ocamllex.cma + bootstrap_lisp: ../boot/camlp4 ./pa_lispr.cmo -I ../boot pa_extend.cmo q_MLast.cmo ./pr_r.cmo ./pr_extend.cmo ./pr_rp.cmo -phony_quot pa_lisp.ml | sed -e 's/^;; \(.*\)$$/(* \1 *)/' -e 's|./pa_lispr.cmo|pa_r.cmo pa_rp.cmo|' -e 's/$$Id.*\$$/File generated by pretty print; do not edit!/' > tmp mv pa_lispr.ml pa_lispr.ml.old @@ -58,6 +61,7 @@ install: chmod a+x $(BINDIR)/mkcamlp4 pa_lisp.cmo: pa_lispr.cmo +pa_ocamllex.cmo: pa_o.cmo pr_extend.cmo: pa_extfun.cmo pr_o.cmo: pa_extfun.cmo pr_op.cmo: pa_extfun.cmo diff --git a/camlp4/etc/pa_ocamllex.ml b/camlp4/etc/pa_ocamllex.ml new file mode 100644 index 0000000000..f3418588c8 --- /dev/null +++ b/camlp4/etc/pa_ocamllex.ml @@ -0,0 +1,236 @@ +(* camlp4 ./pa_o.cmo q_MLast.cmo pa_extend.cmo pr_dump.cmo *) +(* $Id$ *) +(* Alain Frisch's contribution *) + +open Syntax +open Lexgen +open Compact + +(* Adapted from output.ml *) +(**************************) + +(* Output the DFA tables and its entry points *) + +(* To output an array of short ints, encoded as a string *) + +let output_byte buf b = + Buffer.add_char buf '\\'; + Buffer.add_char buf (Char.chr(48 + b / 100)); + Buffer.add_char buf (Char.chr(48 + (b / 10) mod 10)); + Buffer.add_char buf (Char.chr(48 + b mod 10)) + +let loc = (-1,-1) + +let output_array v = + let b = Buffer.create (Array.length v * 3) in + for i = 0 to Array.length v - 1 do + output_byte b (v.(i) land 0xFF); + output_byte b ((v.(i) asr 8) land 0xFF); + if i land 7 = 7 then Buffer.add_string b "\\\n " + done; + let s = Buffer.contents b in + <:expr< $str:s$ >> + +(* Output the tables *) + +let output_tables tbl = + <:str_item< value lex_tables = { + Lexing.lex_base = $output_array tbl.tbl_base$; + Lexing.lex_backtrk = $output_array tbl.tbl_backtrk$; + Lexing.lex_default = $output_array tbl.tbl_default$; + Lexing.lex_trans = $output_array tbl.tbl_trans$; + Lexing.lex_check = $output_array tbl.tbl_check$ + } >> + +(* Output the entries *) + +let rec make_alias n = function + | [] -> [] + | h::t -> + (h, "__ocaml_lex_arg_" ^ (string_of_int n)) :: (make_alias (succ n) t) + +let abstraction = + List.fold_right (fun (p,a) e -> <:expr< fun ($p$ as $lid:a$) -> $e$ >>) + + +let application = + List.fold_left (fun f (_,a) -> <:expr< $f$ $lid:a$ >>) + +let output_entry e = + let args = make_alias 0 (<:patt< lexbuf >> :: e.auto_args) in + let f = "__ocaml_lex_rec_" ^ e.auto_name ^ "_rec" in + let call_f = application <:expr< $lid:f$ >> args in + let inistate = <:expr< $int:string_of_int e.auto_initial_state$ >> in + let cases = + List.map + (fun (num, (loc,e)) -> + <:patt< $int:string_of_int num$ >>, + None, (* when ... *) + e + ) e.auto_actions @ + [ <:patt< __ocaml_lex_n >>, + None, + <:expr< do + { lexbuf.Lexing.refill_buff lexbuf; $call_f$ __ocaml_lex_n }>> ] + in + [ + <:patt< $lid:e.auto_name$ >>, + (abstraction args <:expr< $call_f$ $inistate$ >>); + + <:patt< $lid:f$ >>, + (abstraction args <:expr< + fun state -> + match Lexing.engine lex_tables state lexbuf with + [ $list:cases$ ] >>) + ] + +(* Main output function *) + +exception Table_overflow + +let output_lexdef tables entry_points = + Printf.eprintf + "pa_ocamllex: found lexer; %d states, %d transitions, table size %d bytes\n" + (Array.length tables.tbl_base) + (Array.length tables.tbl_trans) + (2 * (Array.length tables.tbl_base + Array.length tables.tbl_backtrk + + Array.length tables.tbl_default + Array.length tables.tbl_trans + + Array.length tables.tbl_check)); + flush stderr; + if Array.length tables.tbl_trans > 0x8000 then raise Table_overflow; + + let entries = List.map output_entry entry_points in + [output_tables tables; <:str_item< value rec $list:List.flatten entries$ >> ] + + +(* Adapted from parser.mly and main.ml *) +(***************************************) + +(* Auxiliaries for the parser. *) + +let char s = Char.code (Token.eval_char s) + +let named_regexps = + (Hashtbl.create 13 : (string, regular_expression) Hashtbl.t) + +let regexp_for_string s = + let rec re_string n = + if n >= String.length s then Epsilon + else if succ n = String.length s then Characters([Char.code (s.[n])]) + else Sequence(Characters([Char.code (s.[n])]), re_string (succ n)) + in re_string 0 + +let char_class c1 c2 = + let rec cl n = + if n > c2 then [] else n :: cl(succ n) + in cl c1 + +let all_chars = char_class 0 255 + +let rec subtract l1 l2 = + match l1 with + [] -> [] + | a::r -> if List.mem a l2 then subtract r l2 else a :: subtract r l2 + +(* The parser *) + +let ocamllex = Grammar.Entry.create Pcaml.gram "ocamllex" + +EXTEND + GLOBAL: Pcaml.str_item ocamllex; + + ocamllex: [ + [ h = header; + l = [LIST0 ["let"; let_regexp]; "rule"; d = lexer_def -> (d,loc)]; + t = header; EOI -> h @ (l :: t) ,false + ] + ]; + + let_regexp: [ + [ x = LIDENT; "="; r = regexp -> + if Hashtbl.mem named_regexps x then + Printf.eprintf + "pa_ocamllex (warning): multiple definition of named regexp '%s'\n" + x; + Hashtbl.add named_regexps x r; + ] + ]; + + lexer_def: [ + [ def = LIST0 definition SEP "and" -> + (try + let (entries, transitions) = make_dfa def in + let tables = compact_tables transitions in + let output = output_lexdef tables entries in + <:str_item< declare $list: output$ end >> + with Table_overflow -> + failwith "Transition table overflow in lexer, automaton is too big") + ] + ]; + + + Pcaml.str_item: [ + [ "rule"; d = lexer_def -> d + | "let_regexp"; let_regexp -> <:str_item< declare $list: []$ end >> + ] + ]; + + definition: [ + [ x=LIDENT; pl = LIST0 Pcaml.patt; "="; "parse"; + OPT "|"; l = LIST0 [ r=regexp; a=action -> (r,a) ] SEP "|" -> ((x,pl),l) ] + ]; + + action: [ + [ "{"; e = OPT Pcaml.expr; "}" -> + let e = match e with + | Some e -> e + | None -> <:expr< () >> + in + (loc,e) + ] + ]; + + header: [ + [ "{"; e = LIST0 [ si = Pcaml.str_item; OPT ";;" -> si ]; "}" -> + [<:str_item< declare $list:e$ end>>, loc] ] + | [ -> [] ] + ]; + + regexp: [ + [ r1 = regexp; "|"; r2 = regexp -> Alternative(r1,r2) ] + | [ r1 = regexp; r2 = regexp -> Sequence(r1,r2) ] + | [ r = regexp; "*" -> Repetition r + | r = regexp; "+" -> Sequence(r, Repetition r) + | r = regexp; "?" -> Alternative(r, Epsilon) + | "("; r = regexp; ")" -> r + | "_" -> Characters all_chars + | "eof" -> Characters [256] + | c = CHAR -> Characters [char c] + | s = STRING -> regexp_for_string (Token.eval_string s) + | "["; cc = ch_class; "]" -> Characters cc + | x = LIDENT -> + try Hashtbl.find named_regexps x + with Not_found -> + failwith + ("pa_ocamllex (error): reference to unbound regexp name `"^x^"'") + ] + ]; + + ch_class: [ + [ "^"; cc = ch_class -> subtract all_chars cc] + | [ c1 = CHAR; "-"; c2 = CHAR -> char_class (char c1) (char c2) + | c = CHAR -> [char c] + | cc1 = ch_class; cc2 = ch_class -> cc1 @ cc2 + ] + ]; +END + + +let standalone () = + Printf.eprintf "pa_ocamllex: stand-alone mode\n"; + Pcaml.parse_implem := Grammar.Entry.parse ocamllex + +let () = + Pcaml.add_option "-ocamllex" (Arg.Unit standalone) + " Activate (standalone) ocamllex emulation mode." + |