1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
|
(***********************************************************************)
(* *)
(* Objective Caml *)
(* *)
(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
(* *)
(* Copyright 1996 Institut National de Recherche en Informatique et *)
(* en Automatique. All rights reserved. This file is distributed *)
(* under the terms of the GNU Library General Public License. *)
(* *)
(***********************************************************************)
(* $Id$ *)
(* Module [Str]: regular expressions and high-level string processing *)
(*** Regular expressions *)
type regexp
(* The type of compiled regular expressions. *)
val regexp: string -> regexp
(* Compile a regular expression. The syntax for regular expressions
is the same as in Gnu Emacs. The special characters are
[$^.*+?[]]. The following constructs are recognized:
- [. ] matches any character except newline
- [* ] (postfix) matches the previous expression zero, one or
several times
- [+ ] (postfix) matches the previous expression one or
several times
- [? ] (postfix) matches the previous expression once or
not at all
- [[..] ] character set; ranges are denoted with [-], as in [[a-z]];
an initial [^], as in [[^0-9]], complements the set
- [^ ] matches at beginning of line
- [$ ] matches at end of line
- [\| ] (infix) alternative between two expressions
- [\(..\)] grouping and naming of the enclosed expression
- [\1 ] the text matched by the first [\(...\)] expression
([\2] for the second expression, etc)
- [\b ] matches word boundaries
- [\ ] quotes special characters. *)
val regexp_case_fold: string -> regexp
(* Same as [regexp], but the compiled expression will match text
in a case-insensitive way: uppercase and lowercase letters will
be considered equivalent. *)
val quote: string -> string
(* [Str.quote s] returns a regexp string that matches exactly
[s] and nothing else. *)
val regexp_string: string -> regexp
val regexp_string_case_fold: string -> regexp
(* [Str.regexp_string s] returns a regular expression
that matches exactly [s] and nothing else.
[Str.regexp_string_case_fold] is similar, but the regexp
matches in a case-insensitive way. *)
(*** String matching and searching *)
external string_match: regexp -> string -> int -> bool = "str_string_match"
(* [string_match r s start] tests whether the characters in [s]
starting at position [start] match the regular expression [r].
The first character of a string has position [0], as usual. *)
external search_forward: regexp -> string -> int -> int = "str_search_forward"
(* [search_forward r s start] searchs the string [s] for a substring
matching the regular expression [r]. The search starts at position
[start] and proceeds towards the end of the string.
Return the position of the first character of the matched
substring, or raise [Not_found] if no substring matches. *)
external search_backward: regexp -> string -> int -> int = "str_search_backward"
(* Same as [search_forward], but the search proceeds towards the
beginning of the string. *)
external string_partial_match: regexp -> string -> int -> bool = "str_string_partial_match"
(* Similar to [string_match], but succeeds whenever the argument
string is a prefix of a string that matches. This includes
the case of a true complete match. *)
val matched_string: string -> string
(* [matched_string s] returns the substring of [s] that was matched
by the latest [string_match], [search_forward] or [search_backward].
The user must make sure that the parameter [s] is the same string
that was passed to the matching or searching function. *)
val match_beginning: unit -> int
val match_end: unit -> int
(* [match_beginning()] returns the position of the first character
of the substring that was matched by [string_match],
[search_forward] or [search_backward]. [match_end()] returns
the position of the character following the last character of
the matched substring. *)
val matched_group: int -> string -> string
(* [matched_group n s] returns the substring of [s] that was matched
by the [n]th group [\(...\)] of the regular expression during
the latest [string_match], [search_forward] or [search_backward].
The user must make sure that the parameter [s] is the same string
that was passed to the matching or searching function. *)
val group_beginning: int -> int
val group_end: int -> int
(* [group_beginning n] returns the position of the first character
of the substring that was matched by the [n]th group of
the regular expression. [group_end n] returns
the position of the character following the last character of
the matched substring. *)
(*** Replacement *)
val global_replace: regexp -> string -> string -> string
(* [global_replace regexp repl s] returns a string identical to [s],
except that all substrings of [s] that match [regexp] have been
replaced by [repl]. The replacement text [repl] can contain
[\1], [\2], etc; these sequences will be replaced by the text
matched by the corresponding group in the regular expression.
[\0] stands for the text matched by the whole regular expression. *)
val replace_first: regexp -> string -> string -> string
(* Same as [global_replace], except that only the first substring
matching the regular expression is replaced. *)
val global_substitute: regexp -> (string -> string) -> string -> string
(* [global_substitute regexp subst s] returns a string identical
to [s], except that all substrings of [s] that match [regexp]
have been replaced by the result of function [subst]. The
function [subst] is called once for each matching substring,
and receives [s] (the whole text) as argument. *)
val substitute_first: regexp -> (string -> string) -> string -> string
(* Same as [global_substitute], except that only the first substring
matching the regular expression is replaced. *)
val replace_matched : string -> string -> string
(* [replace_matched repl s] returns the replacement text [repl]
in which [\1], [\2], etc. have been replaced by the text
matched by the corresponding groups in the most recent matching
operation. [s] must be the same string that was matched during
this matching operation. *)
(*** Splitting *)
val split: regexp -> string -> string list
(* [split r s] splits [s] into substrings, taking as delimiters
the substrings that match [r], and returns the list of substrings.
For instance, [split (regexp "[ \t]+") s] splits [s] into
blank-separated words. An occurrence of the delimiter at the
beginning and at the end of the string is ignored. *)
val bounded_split: regexp -> string -> int -> string list
(* Same as [split], but splits into at most [n] substrings,
where [n] is the extra integer parameter. *)
val split_delim: regexp -> string -> string list
val bounded_split_delim: regexp -> string -> int -> string list
(* Same as [split] and [bounded_split], but occurrences of the
delimiter at the beginning and at the end of the string are
recognized and returned as empty strings in the result.
For instance, [split_delim (regexp " ") " abc "]
returns [[""; "abc"; ""]], while [split] with the same
arguments returns [["abc"]]. *)
type split_result = Text of string | Delim of string
val full_split: regexp -> string -> split_result list
val bounded_full_split: regexp -> string -> int -> split_result list
(* Same as [split_delim] and [bounded_split_delim], but returns
the delimiters as well as the substrings contained between
delimiters. The former are tagged [Delim] in the result list;
the latter are tagged [Text]. For instance,
[full_split (regexp "[{}]") "{ab}"] returns
[[Delim "{"; Text "ab"; Delim "}"]]. *)
(*** Extracting substrings *)
val string_before: string -> int -> string
(* [string_before s n] returns the substring of all characters of [s]
that precede position [n] (excluding the character at
position [n]). *)
val string_after: string -> int -> string
(* [string_after s n] returns the substring of all characters of [s]
that follow position [n] (including the character at
position [n]). *)
val first_chars: string -> int -> string
(* [first_chars s n] returns the first [n] characters of [s].
This is the same function as [string_before]. *)
val last_chars: string -> int -> string
(* [last_chars s n] returns the last [n] characters of [s]. *)
|