ocamlbuild/glob_lexer.mll


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116

(***********************************************************************)
(*                             ocamlbuild                              *)
(*                                                                     *)
(*  Nicolas Pouillard, Berke Durak, projet Gallium, INRIA Rocquencourt *)
(*                                                                     *)
(*  Copyright 2007 Institut National de Recherche en Informatique et   *)
(*  en Automatique.  All rights reserved.  This file is distributed    *)
(*  under the terms of the Q Public License version 1.0.               *)
(*                                                                     *)
(***********************************************************************)

(* $Id$ *)
(* Original author: Berke Durak *)
(* Glob *)
{
open Bool;;
open Glob_ast;;

type token =
| ATOM of pattern atom
| AND
| OR
| NOT
| LPAR
| RPAR
| TRUE
| FALSE
| EOF
;;

let sf = Printf.sprintf;;

let concat_patterns p1 p2 =
  match (p1,p2) with
  | (Epsilon,_) -> p2
  | (_,Epsilon) -> p1
  | (_,_)       -> Concat(p1,p2)
;;

let slash = Class(Atom('/','/'));;
let not_slash = Class(Not(Atom('/','/')));;
let any = Class True;;
}

let pattern_chars = ['a'-'z']|['A'-'Z']|'_'|'-'|['0'-'9']|'.'
let space_chars = [' ' '\t' '\n' '\r' '\012']

rule token = parse
| '<'             { ATOM(Pattern(let (p,_) = parse_pattern ['>'] Epsilon lexbuf in p)) }
| '"'             { ATOM(Constant(parse_string (Buffer.create 32) lexbuf)) }
| "and"|"AND"|"&" { AND }
| "or"|"OR"|"|"   { OR }
| "not"|"NOT"|"~" { NOT }
| "true"|"1"      { TRUE }
| "false"|"0"     { FALSE }
| "("             { LPAR }
| ")"             { RPAR }
| space_chars+    { token lexbuf }
| eof             { EOF }

and parse_pattern eof_chars p = parse
| (pattern_chars+ as u) { parse_pattern eof_chars (concat_patterns p (Word u)) lexbuf }
| '{'                   
  {
    let rec loop pl =
      let (p',c) = parse_pattern ['}';','] Epsilon lexbuf in
      let pl = p' :: pl in
      if c = ',' then
        loop pl
      else
        parse_pattern eof_chars (concat_patterns p (Union pl)) lexbuf
    in
    loop []
  }
| "[^"
  {
    let cl = Not(Or(parse_class [] lexbuf)) in
    parse_pattern eof_chars (concat_patterns p (Class cl)) lexbuf
  }
| '['                   
  {
    let cl = Or(parse_class [] lexbuf) in
    parse_pattern eof_chars (concat_patterns p (Class cl)) lexbuf
  }
(* Random thought... **/* seems to be equal to True *)
| "/**/" (* / | /\Sigma^*/ *)
  { let q = Union[slash; Concat(slash, Concat(Star any, slash)) ] in
    parse_pattern eof_chars (concat_patterns p q) lexbuf }
| "/**" (* \varepsilon | /\Sigma^* *)
  { let q = Union[Epsilon; Concat(slash, Star any)] in
    parse_pattern eof_chars (concat_patterns p q) lexbuf }
| "**/" (* \varepsilon | \Sigma^*/ *)
  { let q = Union[Epsilon; Concat(Star any, slash)] in
    parse_pattern eof_chars (concat_patterns p q) lexbuf }
| "**" { raise (Parse_error("Ambiguous ** pattern not allowed unless surrounded by one or more slashes")) }
| '*' { parse_pattern eof_chars (concat_patterns p (Star not_slash)) lexbuf }
| '/' { parse_pattern eof_chars (concat_patterns p slash) lexbuf }
| '?' { parse_pattern eof_chars (concat_patterns p not_slash) lexbuf }
| _ as c
  { if List.mem c eof_chars then 
      (p,c)
    else
      raise (Parse_error(sf "Unexpected character %C in glob pattern" c))
  }

and parse_string b = parse
| "\""                  { Buffer.contents b }
| "\\\""                { Buffer.add_char b '"'; parse_string b lexbuf }
| [^'"' '\\']+ as u     { Buffer.add_string b u; parse_string b lexbuf }
| _ as c                { raise (Parse_error(sf "Unexpected character %C in string" c)) }

and parse_class cl = parse
| ']'                     { cl }
| "-]"                    { ((Atom('-','-'))::cl) }
| (_ as c1) '-' (_ as c2) { parse_class ((Atom(c1,c2))::cl) lexbuf }
| _ as c                  { parse_class ((Atom(c,c))::cl) lexbuf }