diff options
Diffstat (limited to 'stdlib')
-rw-r--r-- | stdlib/.depend | 7 | ||||
-rw-r--r-- | stdlib/Makefile | 2 | ||||
-rw-r--r-- | stdlib/genlex.ml | 168 | ||||
-rw-r--r-- | stdlib/genlex.mli | 62 | ||||
-rw-r--r-- | stdlib/pervasives.ml | 2 | ||||
-rw-r--r-- | stdlib/pervasives.mli | 5 | ||||
-rw-r--r-- | stdlib/stream.mli | 4 |
7 files changed, 242 insertions, 8 deletions
diff --git a/stdlib/.depend b/stdlib/.depend index 45bb423ca1..d3cb9523e0 100644 --- a/stdlib/.depend +++ b/stdlib/.depend @@ -1,3 +1,4 @@ +genlex.cmi: stream.cmi parsing.cmi: lexing.cmi obj.cmi arg.cmo: array.cmi list.cmi printf.cmi string.cmi sys.cmi arg.cmi arg.cmx: array.cmx list.cmx printf.cmx string.cmx sys.cmx arg.cmi @@ -13,6 +14,8 @@ format.cmo: queue.cmi string.cmi format.cmi format.cmx: queue.cmx string.cmx format.cmi gc.cmo: printf.cmi gc.cmi gc.cmx: printf.cmx gc.cmi +genlex.cmo: char.cmi hashtbl.cmi list.cmi stream.cmi string.cmi genlex.cmi +genlex.cmx: char.cmx hashtbl.cmx list.cmx stream.cmx string.cmx genlex.cmi hashtbl.cmo: array.cmi hashtbl.cmi hashtbl.cmx: array.cmx hashtbl.cmi lexing.cmo: string.cmi lexing.cmi @@ -23,8 +26,8 @@ map.cmo: map.cmi map.cmx: map.cmi obj.cmo: obj.cmi obj.cmx: obj.cmi -oo.cmo: hashtbl.cmi obj.cmi oo.cmi -oo.cmx: hashtbl.cmx obj.cmx oo.cmi +oo.cmo: array.cmi hashtbl.cmi list.cmi obj.cmi random.cmi sort.cmi oo.cmi +oo.cmx: array.cmx hashtbl.cmx list.cmx obj.cmx random.cmx sort.cmx oo.cmi parsing.cmo: array.cmi lexing.cmi obj.cmi parsing.cmi parsing.cmx: array.cmx lexing.cmx obj.cmx parsing.cmi pervasives.cmo: pervasives.cmi diff --git a/stdlib/Makefile b/stdlib/Makefile index c615156b91..408eefbf11 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -12,7 +12,7 @@ OBJS=pervasives.cmo list.cmo string.cmo char.cmo array.cmo sys.cmo \ hashtbl.cmo sort.cmo filename.cmo obj.cmo lexing.cmo parsing.cmo \ set.cmo map.cmo stack.cmo queue.cmo stream.cmo \ printf.cmo format.cmo arg.cmo printexc.cmo gc.cmo \ - digest.cmo random.cmo oo.cmo + digest.cmo random.cmo oo.cmo genlex.cmo all: stdlib.cma std_exit.cmo cslheader diff --git a/stdlib/genlex.ml b/stdlib/genlex.ml new file mode 100644 index 0000000000..276db124d3 --- /dev/null +++ b/stdlib/genlex.ml @@ -0,0 +1,168 @@ +(***********************************************************************) +(* *) +(* Caml Special Light *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1995 Institut National de Recherche en Informatique et *) +(* Automatique. Distributed only by permission. *) +(* *) +(***********************************************************************) + +(* $Id$ *) + +type token = + Kwd of string + | Ident of string + | Int of int + | Float of float + | String of string + | Char of char + + +(* The string buffering machinery *) + +let initial_buffer = String.create 32 + +let buffer = ref initial_buffer +let bufpos = ref 0 + +let reset_buffer () = + buffer := initial_buffer; + bufpos := 0 + +let store c = + if !bufpos >= String.length !buffer then begin + let newbuffer = String.create (2 * !bufpos) in + String.blit !buffer 0 newbuffer 0 !bufpos; + buffer := newbuffer + end; + String.set !buffer !bufpos c; + incr bufpos + +let get_string () = + let s = String.sub !buffer 0 !bufpos in buffer := initial_buffer; s + +(* The lexer *) + +let make_lexer keywords = + + let kwd_table = Hashtbl.create 17 in + List.iter (fun s -> Hashtbl.add kwd_table s (Kwd s)) keywords; + + let ident_or_keyword id = + try Hashtbl.find kwd_table id with Not_found -> Ident id + + and keyword_or_error c = + let s = String.make 1 c in + try Hashtbl.find kwd_table s + with Not_found -> raise(Stream.Parse_error("Illegal character " ^ s)) in + + let rec next_token = parser + [< ' ' '|'\010'|'\013'|'\009'|'\026'|'\012'; s >] -> + next_token s + | [< ' 'A'..'Z'|'a'..'z'|'\192'..'\255' as c; s>] -> + reset_buffer(); store c; ident s + | [< ' '!'|'%'|'&'|'$'|'#'|'+'|'/'|':'|'<'|'='|'>'|'?'|'@'|'\\'| + '~'|'^'|'|'|'*' as c; s >] -> + reset_buffer(); store c; ident2 s + | [< ' '0'..'9' as c; s>] -> + reset_buffer(); store c; number s + | [< ' '\''; c = char; ' '\'' >] -> + Some(Char c) + | [< ' '"' (* '"' *); s >] -> + reset_buffer(); Some(String(string s)) + | [< ' '-'; s >] -> + neg_number s + | [< ' '('; s >] -> + maybe_comment s + | [< ' c >] -> + Some(keyword_or_error c) + | [< >] -> + None + + and ident = parser + [< ' 'A'..'Z'|'a'..'z'|'\192'..'\255'|'0'..'9'|'_'|'\'' as c; s>] -> + store c; ident s + | [< >] -> + Some(ident_or_keyword(get_string())) + + and ident2 = parser + [< ' '!'|'%'|'&'|'$'|'#'|'+'|'-'|'/'|':'|'<'|'='|'>'|'?'|'@'|'\\'| + '~'|'^'|'|'|'*' as c; s >] -> + store c; ident2 s + | [< >] -> + Some(ident_or_keyword(get_string())) + + and neg_number = parser + [< ' '0'..'9' as c; s >] -> + reset_buffer(); store '-'; store c; number s + | [< s >] -> + reset_buffer(); store '-'; ident2 s + + and number = parser + [< ' '0'..'9' as c; s >] -> + store c; number s + | [< ' '.'; s >] -> + store '.'; decimal_part s + | [< ' 'e'|'E'; s >] -> + store 'E'; exponent_part s + | [< >] -> + Some(Int(int_of_string(get_string()))) + + and decimal_part = parser + [< ' '0'..'9' as c; s >] -> + store c; decimal_part s + | [< ' 'e'|'E'; s >] -> + store 'E'; exponent_part s + | [< >] -> + Some(Float(float_of_string(get_string()))) + + and exponent_part = parser + [< ' '+'|'-' as c; s >] -> + store c; end_exponent_part s + | [< s >] -> + end_exponent_part s + + and end_exponent_part = parser + [< ' '0'..'9' as c; s >] -> + store c; end_exponent_part s + | [< >] -> + Some(Float(float_of_string(get_string()))) + + and string = parser + [< ' '"' (* '"' *) >] -> get_string() + | [< ' '\\'; c = escape; s >] -> store c; string s + | [< ' c; s >] -> store c; string s + + and char = parser + [< ' '\\'; c = escape >] -> c + | [< ' c >] -> c + + and escape = parser + [< ' 'n' >] -> '\n' + | [< ' 'r' >] -> '\r' + | [< ' 't' >] -> '\t' + | [< ' '0'..'9' as c1; ' '0'..'9' as c2; ' '0'..'9' as c3 >] -> + Char.chr((Char.code c1 - 48) * 100 + + (Char.code c2 - 48) * 10 + (Char.code c3)) + | [< ' c >] -> c + + and maybe_comment = parser + [< ' '*'; s >] -> comment s; next_token s + | [< >] -> Some(keyword_or_error '(') + + and comment = parser + [< ' '('; s >] -> maybe_nested_comment s + | [< ' '*'; s >] -> maybe_end_comment s + | [< ' c; s >] -> comment s + + and maybe_nested_comment = parser + [< ' '*'; s >] -> comment s; comment s + | [< ' c; s >] -> comment s + + and maybe_end_comment = parser + [< ' ')' >] -> () + | [< ' c; s >] -> comment s + + in fun input -> Stream.from (fun count -> next_token input) diff --git a/stdlib/genlex.mli b/stdlib/genlex.mli new file mode 100644 index 0000000000..54c75be5b9 --- /dev/null +++ b/stdlib/genlex.mli @@ -0,0 +1,62 @@ +(***********************************************************************) +(* *) +(* Caml Special Light *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1995 Institut National de Recherche en Informatique et *) +(* Automatique. Distributed only by permission. *) +(* *) +(***********************************************************************) + +(* $Id$ *) + +(* Module [Genlex]: a generic lexical analyzer *) + +(* This module implements a simple ``standard'' lexical analyzer, presented + as a function from character streams to token streams. It implements + roughly the lexical conventions of Caml, but is parameterized by the + set of keywords of your language. *) + +type token = + Kwd of string + | Ident of string + | Int of int + | Float of float + | String of string + | Char of char + (* The type of tokens. The lexical classes are: [Int] and [Float] + for integer and floating-point numbers; [String] for + string literals, enclosed in double quotes; [Char] for + character literals, enclosed in single quotes; [Ident] for + identifiers (either sequences of letters, digits, underscores + and quotes, or sequences of ``operator characters'' such as + [+], [*], etc); and [Kwd] for keywords (either identifiers or + single ``special characters'' such as [(], [}], etc). *) + +val make_lexer: string list -> (char Stream.t -> token Stream.t) + (* Construct the lexer function. The first argument is the list of + keywords. An identifier [s] is returned as [Kwd s] if [s] + belongs to this list, and as [Ident s] otherwise. + A special character [s] is returned as [Kwd s] if [s] + belongs to this list, and cause a lexical error (exception + [Parse_error]) otherwise. Blanks and newlines are skipped. + Comments delimited by [(*] and [*)] are skipped as well, + and can be nested. *) + + (* Example: a lexer suitable for a desk calculator is obtained by + [ + let lexer = make_lexer ["+";"-";"*";"/";"let";"="; "("; ")"] + ] + The associated parser would be a function from [token stream] + to, for instance, [int], and would have rules such as: + [ + let parse_expr = function + [< 'Int n >] -> n + | [< 'Kwd "("; n = parse_expr; 'Kwd ")" >] -> n + | [< n1 = parse_expr; n2 = parse_remainder n1 >] -> n2 + and parse_remainder n1 = function + [< 'Kwd "+"; n2 = parse_expr >] -> n1+n2 + | ... + ] +*) diff --git a/stdlib/pervasives.ml b/stdlib/pervasives.ml index 4b17cc35aa..d2ee5a4474 100644 --- a/stdlib/pervasives.ml +++ b/stdlib/pervasives.ml @@ -155,7 +155,7 @@ let stderr = open_descriptor_out 2 type open_flag = Open_rdonly | Open_wronly | Open_append | Open_creat | Open_trunc | Open_excl - | Open_binary | Open_text + | Open_binary | Open_text | Open_nonblock external open_desc: string -> open_flag list -> int -> int = "sys_open" diff --git a/stdlib/pervasives.mli b/stdlib/pervasives.mli index 027d5bffae..33c9738021 100644 --- a/stdlib/pervasives.mli +++ b/stdlib/pervasives.mli @@ -344,7 +344,7 @@ val read_float : unit -> float type open_flag = Open_rdonly | Open_wronly | Open_append | Open_creat | Open_trunc | Open_excl - | Open_binary | Open_text + | Open_binary | Open_text | Open_nonblock (* Opening modes for [open_out_gen] and [open_in_gen]. - [Open_rdonly]: open for reading. - [Open_wronly]: open for writing. @@ -353,7 +353,8 @@ type open_flag = - [Open_trunc]: empty the file if it already exists. - [Open_excl]: fail if the file already exists. - [Open_binary]: open in binary mode (no conversion). -- [Open_text]: open in text mode (may perform conversions). *) +- [Open_text]: open in text mode (may perform conversions). +- [Open_nonblock]: open in non-blocking mode. *) val open_out : string -> out_channel (* Open the named file for writing, and return a new output channel diff --git a/stdlib/stream.mli b/stdlib/stream.mli index 6d9f6b007c..18db4d7c9e 100644 --- a/stdlib/stream.mli +++ b/stdlib/stream.mli @@ -59,8 +59,8 @@ val empty : 'a t -> unit (** Useful functions *) val peek : 'a t -> 'a option - (* Return [Some] of "the first element" of the stream, or [None] if - the stream is empty. *) + (* Return [Some c] where [c] is the first element of the stream, + or [None] if the stream is empty. *) val junk : 'a t -> unit (* Remove the first element of the stream, possibly unfreezing it before. *) |