7 files changed, 242 insertions, 8 deletions
diff --git a/stdlib/.depend b/stdlib/.depend
index 45bb423ca1..d3cb9523e0 100644
--- a/stdlib/.depend
+++ b/stdlib/.depend
@@ -1,3 +1,4 @@
+genlex.cmi: stream.cmi 
 parsing.cmi: lexing.cmi obj.cmi 
 arg.cmo: array.cmi list.cmi printf.cmi string.cmi sys.cmi arg.cmi 
 arg.cmx: array.cmx list.cmx printf.cmx string.cmx sys.cmx arg.cmi 
@@ -13,6 +14,8 @@ format.cmo: queue.cmi string.cmi format.cmi
 format.cmx: queue.cmx string.cmx format.cmi 
 gc.cmo: printf.cmi gc.cmi 
 gc.cmx: printf.cmx gc.cmi 
+genlex.cmo: char.cmi hashtbl.cmi list.cmi stream.cmi string.cmi genlex.cmi 
+genlex.cmx: char.cmx hashtbl.cmx list.cmx stream.cmx string.cmx genlex.cmi 
 hashtbl.cmo: array.cmi hashtbl.cmi 
 hashtbl.cmx: array.cmx hashtbl.cmi 
 lexing.cmo: string.cmi lexing.cmi 
@@ -23,8 +26,8 @@ map.cmo: map.cmi
 map.cmx: map.cmi 
 obj.cmo: obj.cmi 
 obj.cmx: obj.cmi 
-oo.cmo: hashtbl.cmi obj.cmi oo.cmi 
-oo.cmx: hashtbl.cmx obj.cmx oo.cmi 
+oo.cmo: array.cmi hashtbl.cmi list.cmi obj.cmi random.cmi sort.cmi oo.cmi 
+oo.cmx: array.cmx hashtbl.cmx list.cmx obj.cmx random.cmx sort.cmx oo.cmi 
 parsing.cmo: array.cmi lexing.cmi obj.cmi parsing.cmi 
 parsing.cmx: array.cmx lexing.cmx obj.cmx parsing.cmi 
 pervasives.cmo: pervasives.cmi 
diff --git a/stdlib/Makefile b/stdlib/Makefile
index c615156b91..408eefbf11 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -12,7 +12,7 @@ OBJS=pervasives.cmo list.cmo string.cmo char.cmo array.cmo sys.cmo \
   hashtbl.cmo sort.cmo filename.cmo obj.cmo lexing.cmo parsing.cmo \
   set.cmo map.cmo stack.cmo queue.cmo stream.cmo \
   printf.cmo format.cmo arg.cmo printexc.cmo gc.cmo \
-  digest.cmo random.cmo oo.cmo
+  digest.cmo random.cmo oo.cmo genlex.cmo
 
 all: stdlib.cma std_exit.cmo cslheader
 
diff --git a/stdlib/genlex.ml b/stdlib/genlex.ml
new file mode 100644
index 0000000000..276db124d3
--- /dev/null
+++ b/stdlib/genlex.ml
@@ -0,0 +1,168 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                         Caml Special Light                          *)
+(*                                                                     *)
+(*              Xavier Leroy, projet Cristal, INRIA Rocquencourt       *)
+(*                                                                     *)
+(*  Copyright 1995 Institut National de Recherche en Informatique et   *)
+(*  Automatique.  Distributed only by permission.                      *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* $Id$ *)
+
+type token =
+    Kwd of string
+  | Ident of string
+  | Int of int
+  | Float of float
+  | String of string
+  | Char of char
+
+
+(* The string buffering machinery *)
+
+let initial_buffer = String.create 32
+
+let buffer = ref initial_buffer
+let bufpos = ref 0
+
+let reset_buffer () =
+  buffer := initial_buffer;
+  bufpos := 0
+
+let store c =
+  if !bufpos >= String.length !buffer then begin
+    let newbuffer = String.create (2 * !bufpos) in
+    String.blit !buffer 0 newbuffer 0 !bufpos;
+    buffer := newbuffer
+  end;
+  String.set !buffer !bufpos c;
+  incr bufpos
+
+let get_string () =
+  let s = String.sub !buffer 0 !bufpos in buffer := initial_buffer; s
+
+(* The lexer *)
+
+let make_lexer keywords =
+
+  let kwd_table = Hashtbl.create 17 in
+  List.iter (fun s -> Hashtbl.add kwd_table s (Kwd s)) keywords;
+
+  let ident_or_keyword id =
+    try Hashtbl.find kwd_table id with Not_found -> Ident id
+
+  and keyword_or_error c =
+    let s = String.make 1 c in
+      try Hashtbl.find kwd_table s
+      with Not_found -> raise(Stream.Parse_error("Illegal character " ^ s)) in
+
+  let rec next_token = parser
+    [< '  ' '|'\010'|'\013'|'\009'|'\026'|'\012'; s >] ->
+      next_token s
+  | [< '  'A'..'Z'|'a'..'z'|'\192'..'\255' as c; s>] ->
+      reset_buffer(); store c; ident s
+  | [< '  '!'|'%'|'&'|'$'|'#'|'+'|'/'|':'|'<'|'='|'>'|'?'|'@'|'\\'|
+              '~'|'^'|'|'|'*' as c; s >] ->
+      reset_buffer(); store c; ident2 s
+  | [< '  '0'..'9' as c; s>] ->
+      reset_buffer(); store c; number s
+  | [< '  '\''; c = char; '  '\'' >] ->
+      Some(Char c)
+  | [< '  '"' (* '"' *); s >] ->
+      reset_buffer(); Some(String(string s))
+  | [< '  '-'; s >] ->
+      neg_number s
+  | [< '  '('; s >] ->
+      maybe_comment s
+  | [< ' c >] ->
+      Some(keyword_or_error c)
+  | [< >] ->
+      None
+
+  and ident = parser
+    [< '  'A'..'Z'|'a'..'z'|'\192'..'\255'|'0'..'9'|'_'|'\'' as c; s>] ->
+      store c; ident s
+  | [< >] ->
+      Some(ident_or_keyword(get_string()))
+
+  and ident2 = parser
+    [< '  '!'|'%'|'&'|'$'|'#'|'+'|'-'|'/'|':'|'<'|'='|'>'|'?'|'@'|'\\'|
+              '~'|'^'|'|'|'*' as c; s >] ->
+      store c; ident2 s
+  | [< >] ->
+      Some(ident_or_keyword(get_string()))
+
+  and neg_number = parser
+    [< '  '0'..'9' as c; s >] ->
+      reset_buffer(); store '-'; store c; number s
+  | [< s >] ->
+      reset_buffer(); store '-'; ident2 s
+    
+  and number = parser
+    [< '  '0'..'9' as c; s >] ->
+      store c; number s
+  | [< '  '.'; s >] ->
+      store '.'; decimal_part s
+  | [< '  'e'|'E'; s >] ->
+      store 'E'; exponent_part s
+  | [< >] ->
+      Some(Int(int_of_string(get_string())))
+
+  and decimal_part = parser
+    [< '  '0'..'9' as c; s >] ->
+      store c; decimal_part s
+  | [< '  'e'|'E'; s >] ->
+      store 'E'; exponent_part s
+  | [< >] ->
+      Some(Float(float_of_string(get_string())))
+
+  and exponent_part = parser
+    [< '  '+'|'-' as c; s >] ->
+      store c; end_exponent_part s
+  | [< s >] ->
+      end_exponent_part s
+
+  and end_exponent_part = parser
+    [< '  '0'..'9' as c; s >] ->
+      store c; end_exponent_part s
+  | [< >] ->
+      Some(Float(float_of_string(get_string())))
+
+  and string = parser
+    [< '  '"' (* '"' *)  >] -> get_string()
+  | [< '  '\\'; c = escape; s >] -> store c; string s
+  | [< ' c; s >] -> store c; string s
+
+  and char = parser
+    [< '  '\\'; c = escape >] -> c
+  | [< ' c >] -> c
+
+  and escape = parser
+    [< '  'n' >] -> '\n'
+  | [< '  'r' >] -> '\r'
+  | [< '  't' >] -> '\t'
+  | [< '  '0'..'9' as c1; '  '0'..'9' as c2; '  '0'..'9' as c3 >] ->
+      Char.chr((Char.code c1 - 48) * 100 +
+               (Char.code c2 - 48) * 10 + (Char.code c3))
+  | [< ' c >] -> c
+
+  and maybe_comment = parser
+    [< '  '*'; s >] -> comment s; next_token s
+  | [< >] -> Some(keyword_or_error '(')
+
+  and comment = parser
+    [< '  '('; s >] -> maybe_nested_comment s
+  | [< '  '*'; s >] -> maybe_end_comment s
+  | [< ' c; s >] -> comment s
+
+  and maybe_nested_comment = parser
+    [< '  '*'; s >] -> comment s; comment s
+  | [< ' c; s >] -> comment s
+
+  and maybe_end_comment = parser
+    [< '  ')' >] -> ()
+  | [< ' c; s >] -> comment s
+
+  in fun input -> Stream.from (fun count -> next_token input)
diff --git a/stdlib/genlex.mli b/stdlib/genlex.mli
new file mode 100644
index 0000000000..54c75be5b9
--- /dev/null
+++ b/stdlib/genlex.mli
@@ -0,0 +1,62 @@
+(***********************************************************************)
+(*                                                                     *)
+(*                         Caml Special Light                          *)
+(*                                                                     *)
+(*              Xavier Leroy, projet Cristal, INRIA Rocquencourt       *)
+(*                                                                     *)
+(*  Copyright 1995 Institut National de Recherche en Informatique et   *)
+(*  Automatique.  Distributed only by permission.                      *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* $Id$ *)
+
+(* Module [Genlex]: a generic lexical analyzer *)
+
+(* This module implements a simple ``standard'' lexical analyzer, presented
+   as a function from character streams to token streams. It implements
+   roughly the lexical conventions of Caml, but is parameterized by the
+   set of keywords of your language. *)
+
+type token =
+    Kwd of string
+  | Ident of string
+  | Int of int
+  | Float of float
+  | String of string
+  | Char of char
+        (* The type of tokens. The lexical classes are: [Int] and [Float]
+           for integer and floating-point numbers; [String] for
+           string literals, enclosed in double quotes; [Char] for
+           character literals, enclosed in single quotes; [Ident] for
+           identifiers (either sequences of letters, digits, underscores
+           and quotes, or sequences of ``operator characters'' such as
+           [+], [*], etc); and [Kwd] for keywords (either identifiers or
+           single ``special characters'' such as [(], [}], etc). *)
+           
+val make_lexer: string list -> (char Stream.t -> token Stream.t)
+        (* Construct the lexer function. The first argument is the list of
+           keywords. An identifier [s] is returned as [Kwd s] if [s]
+           belongs to this list, and as [Ident s] otherwise.
+           A special character [s] is returned as [Kwd s] if [s]
+           belongs to this list, and cause a lexical error (exception
+           [Parse_error]) otherwise. Blanks and newlines are skipped.
+           Comments delimited by [(*] and [*)] are skipped as well,
+           and can be nested. *)
+
+        (* Example: a lexer suitable for a desk calculator is obtained by
+           [
+           let lexer = make_lexer ["+";"-";"*";"/";"let";"="; "("; ")"]
+           ]
+           The associated parser would be a function from [token stream]
+           to, for instance, [int], and would have rules such as:
+           [
+           let parse_expr = function
+                  [< 'Int n >] -> n
+                | [< 'Kwd "("; n = parse_expr; 'Kwd ")" >] -> n
+                | [< n1 = parse_expr; n2 = parse_remainder n1 >] -> n2
+           and parse_remainder n1 = function
+                  [< 'Kwd "+"; n2 = parse_expr >] -> n1+n2
+                | ...
+           ]
+*)
diff --git a/stdlib/pervasives.ml b/stdlib/pervasives.ml
index 4b17cc35aa..d2ee5a4474 100644
--- a/stdlib/pervasives.ml
+++ b/stdlib/pervasives.ml
@@ -155,7 +155,7 @@ let stderr = open_descriptor_out 2
 type open_flag =
     Open_rdonly | Open_wronly | Open_append
   | Open_creat | Open_trunc | Open_excl
-  | Open_binary | Open_text
+  | Open_binary | Open_text | Open_nonblock
 
 external open_desc: string -> open_flag list -> int -> int = "sys_open"
 
diff --git a/stdlib/pervasives.mli b/stdlib/pervasives.mli
index 027d5bffae..33c9738021 100644
--- a/stdlib/pervasives.mli
+++ b/stdlib/pervasives.mli
@@ -344,7 +344,7 @@ val read_float : unit -> float
 type open_flag =
     Open_rdonly | Open_wronly | Open_append
   | Open_creat | Open_trunc | Open_excl
-  | Open_binary | Open_text
+  | Open_binary | Open_text | Open_nonblock
         (* Opening modes for [open_out_gen] and [open_in_gen].
 -          [Open_rdonly]: open for reading.
 -          [Open_wronly]: open for writing.
@@ -353,7 +353,8 @@ type open_flag =
 -          [Open_trunc]: empty the file if it already exists.
 -          [Open_excl]: fail if the file already exists.
 -          [Open_binary]: open in binary mode (no conversion).
--          [Open_text]: open in text mode (may perform conversions). *)
+-          [Open_text]: open in text mode (may perform conversions).
+-          [Open_nonblock]: open in non-blocking mode. *)
            
 val open_out : string -> out_channel
         (* Open the named file for writing, and return a new output channel
diff --git a/stdlib/stream.mli b/stdlib/stream.mli
index 6d9f6b007c..18db4d7c9e 100644
--- a/stdlib/stream.mli
+++ b/stdlib/stream.mli
@@ -59,8 +59,8 @@ val empty : 'a t -> unit
 (** Useful functions *)
 
 val peek : 'a t -> 'a option
-        (* Return [Some] of "the first element" of the stream, or [None] if
-           the stream is empty. *)
+        (* Return [Some c] where [c] is the first element of the stream,
+           or [None] if the stream is empty. *)
 val junk : 'a t -> unit
         (* Remove the first element of the stream, possibly unfreezing
            it before. *)