summaryrefslogtreecommitdiff
path: root/ice-9/regex.scm
blob: d2f7b309d19d8d25d61b9ae1f96cc00611bf225c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
;;;; 	Copyright (C) 1997 Free Software Foundation, Inc.
;;;; 
;;;; This program is free software; you can redistribute it and/or modify
;;;; it under the terms of the GNU General Public License as published by
;;;; the Free Software Foundation; either version 2, or (at your option)
;;;; any later version.
;;;; 
;;;; This program is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;;; GNU General Public License for more details.
;;;; 
;;;; You should have received a copy of the GNU General Public License
;;;; along with this software; see the file COPYING.  If not, write to
;;;; the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
;;;; 

;;;; POSIX regex support functions.

(define-module (ice-9 regex))

;;; FIXME:
;;;   It is not clear what should happen if a `match' function
;;;   is passed a `match number' which is out of bounds for the
;;;   regexp match: return #f, or throw an error?  These routines
;;;   throw an out-of-range error.

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; These procedures are not defined in SCSH, but I found them useful.

(define-public (match:count match)
  (- (vector-length match) 1))

(define-public (match:string match)
  (vector-ref match 0))

(define-public (match:prefix match)
  (make-shared-substring (match:string match)
			 0
			 (match:start match 0)))

(define-public (match:suffix match)
  (make-shared-substring (match:string match)
			 (match:end match 0)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; SCSH compatibility routines.

(define-public (regexp-match? match)
  (and (vector? match)
       (string? (vector-ref match 0))
       (let loop ((i 1))
	 (cond ((>= i (vector-length match)) #t)
	       ((and (pair? (vector-ref match i))
		     (integer? (car (vector-ref match i)))
		     (integer? (cdr (vector-ref match i))))
		(loop (+ 1 i)))
	       (else #f)))))

(define-public (regexp-quote regexp)
  (call-with-output-string
   (lambda (p)
     (let loop ((i 0))
       (and (< i (string-length regexp))
	    (begin
	      (case (string-ref regexp i)
		((#\* #\. #\( #\) #\+ #\? #\\ #\^ #\$ #\{ #\})
		 (write-char #\\ p)))
	      (write-char (string-ref regexp i) p)
	      (loop (1+ i))))))))

(define-public (match:start match . args)
  (let* ((matchnum (if (pair? args)
		       (+ 1 (car args))
		       1))
	 (start (car (vector-ref match matchnum))))
    (if (= start -1) #f start)))

(define-public (match:end match . args)
  (let* ((matchnum (if (pair? args)
		       (+ 1 (car args))
		       1))
	 (end (cdr (vector-ref match matchnum))))
    (if (= end -1) #f end)))

(define-public (match:substring match . args)
  (let* ((matchnum (if (pair? args)
		       (car args)
		       0))
	 (start (match:start match matchnum))
	 (end   (match:end match matchnum)))
    (and start end (make-shared-substring (match:string match)
					  start
					  end))))

(define-public (string-match pattern str . args)
  (let ((rx (make-regexp pattern))
	(start (if (pair? args) (car args) 0)))
    (regexp-exec rx str start)))

(define-public (regexp-substitute port match . items)
  ;; If `port' is #f, send output to a string.
  (if (not port)
      (call-with-output-string
       (lambda (p)
	 (apply regexp-substitute p match items)))

      ;; Otherwise, process each substitution argument in `items'.
      (for-each (lambda (obj)
		  (cond ((string? obj)   (display obj port))
			((integer? obj)  (display (match:substring match obj) port))
			((eq? 'pre obj)  (display (match:prefix match) port))
			((eq? 'post obj) (display (match:suffix match) port))
			(else (error 'wrong-type-arg obj))))
		items)))

(define-public (regexp-substitute/global port regexp string . items)
  ;; If `port' is #f, send output to a string.
  (if (not port)
      (call-with-output-string
       (lambda (p)
	 (apply regexp-substitute/global p regexp string items)))

      ;; Otherwise, compile the regexp and match it against the
      ;; string, looping if 'post is encountered in `items'.
      (let ((rx (make-regexp regexp)))
	(let next-match ((str string))
	  (let ((match (regexp-exec rx str)))
	    (if (not match)
		(display str port)

		;; Process all of the items for this match.
		(for-each
		 (lambda (obj)
		   (cond
		    ((string? obj)    (display obj port))
		    ((integer? obj)   (display (match:substring match obj) port))
		    ((procedure? obj) (display (obj match) port))
		    ((eq? 'pre obj)   (display (match:prefix match) port))
		    ((eq? 'post obj)  (next-match (match:suffix match)))
		    (else (error 'wrong-type-arg obj))))
		 items)))))))