1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
-----------------------------------------------------------------------------
-- |
-- Module : Text.Regex.Posix
-- Copyright : (c) The University of Glasgow 2001
-- License : BSD-style (see the file libraries/core/LICENSE)
--
-- Maintainer : libraries@haskell.org
-- Stability : experimental
-- Portability : non-portable (only on platforms that provide POSIX regexps)
--
-- Interface to the POSIX regular expression library.
-- ToDo: should have an interface using PackedStrings.
--
-----------------------------------------------------------------------------
module Text.Regex.Posix (
Regex, -- abstract
regcomp, -- :: String -> Int -> IO Regex
regexec, -- :: Regex -- pattern
-- -> String -- string to match
-- -> IO (Maybe (String, -- everything before match
-- String, -- matched portion
-- String, -- everything after match
-- [String])) -- subexpression matches
regExtended, -- (flag to regcomp) use extended regex syntax
regIgnoreCase, -- (flag to regcomp) ignore case when matching
regNewline -- (flag to regcomp) '.' doesn't match newline
) where
#include <sys/types.h>
#include "regex.h"
import Prelude
import Foreign
import Foreign.C
newtype Regex = Regex (ForeignPtr CRegex)
-- -----------------------------------------------------------------------------
-- regcomp
regcomp :: String -> Int -> IO Regex
regcomp pattern flags = do
regex_ptr <- mallocBytes (#const sizeof(regex_t))
regex_fptr <- newForeignPtr regex_ptr (regfree regex_ptr)
r <- withCString pattern $ \cstr ->
withForeignPtr regex_fptr $ \p ->
c_regcomp p cstr (fromIntegral flags)
if (r == 0)
then return (Regex regex_fptr)
else error "Text.Regex.Posix.regcomp: error in pattern" -- ToDo
regfree :: Ptr CRegex -> IO ()
regfree p_regex = do
c_regfree p_regex
free p_regex
-- -----------------------------------------------------------------------------
-- regexec
regexec :: Regex -- pattern
-> String -- string to match
-> IO (Maybe (String, -- everything before match
String, -- matched portion
String, -- everything after match
[String])) -- subexpression matches
regexec (Regex regex_fptr) str = do
withCString str $ \cstr -> do
withForeignPtr regex_fptr $ \regex_ptr -> do
nsub <- (#peek regex_t, re_nsub) regex_ptr
let nsub_int = fromIntegral (nsub :: CSize)
allocaBytes ((1 + nsub_int) * (#const sizeof(regmatch_t))) $ \p_match -> do
-- add one because index zero covers the whole match
r <- c_regexec regex_ptr cstr (1 + nsub) p_match 0{-no flags for now-}
if (r /= 0) then return Nothing else do
(before,match,after) <- matched_parts str p_match
sub_strs <-
mapM (unpack str) $ take nsub_int $ tail $
iterate (`plusPtr` (#const sizeof(regmatch_t))) p_match
return (Just (before, match, after, sub_strs))
matched_parts :: String -> Ptr CRegMatch -> IO (String, String, String)
matched_parts string p_match = do
start <- (#peek regmatch_t, rm_so) p_match :: IO CInt
end <- (#peek regmatch_t, rm_eo) p_match :: IO CInt
let s = fromIntegral start; e = fromIntegral end
return ( take (s-1) string,
take (e-s) (drop s string),
drop e string )
unpack :: String -> Ptr CRegMatch -> IO (String)
unpack string p_match = do
start <- (#peek regmatch_t, rm_so) p_match :: IO CInt
end <- (#peek regmatch_t, rm_eo) p_match :: IO CInt
-- the subexpression may not have matched at all, perhaps because it
-- was optional. In this case, the offsets are set to -1.
if (start == -1) then return "" else do
return (take (fromIntegral (end-start)) (drop (fromIntegral start) string))
-- -----------------------------------------------------------------------------
-- The POSIX regex C interface
-- Flags for regexec
#enum Int,, \
REG_NOTBOL, \
REG_NOTEOL \
-- Return values from regexec
#enum Int,, \
REG_NOMATCH
-- REG_ESPACE
-- Flags for regcomp
#enum Int,, \
REG_EXTENDED, \
regIgnoreCase = REG_ICASE, \
REG_NOSUB, \
REG_NEWLINE
-- Error codes from regcomp
#enum Int,, \
REG_BADBR, \
REG_BADPAT, \
REG_BADRPT, \
REG_ECOLLATE, \
REG_ECTYPE, \
REG_EESCAPE, \
REG_ESUBREG, \
REG_EBRACK, \
REG_EPAREN, \
REG_EBRACE, \
REG_ERANGE, \
REG_ESPACE
type CRegex = ()
type CRegMatch = ()
foreign import ccall unsafe "regcomp"
c_regcomp :: Ptr CRegex -> CString -> CInt -> IO CInt
foreign import ccall unsafe "regfree"
c_regfree :: Ptr CRegex -> IO ()
foreign import ccall unsafe "regexec"
c_regexec :: Ptr CRegex -> CString -> CSize
-> Ptr CRegMatch -> CInt -> IO CInt
|