summaryrefslogtreecommitdiff
path: root/module/ice-9/getopt-long.scm
blob: 14eaf8e23bf0d03e1734876e6fb3a051e355cc61 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
;;; Copyright (C) 1998, 2001, 2006, 2009, 2011 Free Software Foundation, Inc.
;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;; 
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;;; Lesser General Public License for more details.
;;;; 
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

;;; Author: Russ McManus (rewritten by Thien-Thi Nguyen)

;;; Commentary:

;;; This module implements some complex command line option parsing, in
;;; the spirit of the GNU C library function `getopt_long'.  Both long
;;; and short options are supported.
;;;
;;; The theory is that people should be able to constrain the set of
;;; options they want to process using a grammar, rather than some arbitrary
;;; structure.  The grammar makes the option descriptions easy to read.
;;;
;;; `getopt-long' is a procedure for parsing command-line arguments in a
;;; manner consistent with other GNU programs.  `option-ref' is a procedure
;;; that facilitates processing of the `getopt-long' return value.

;;; (getopt-long ARGS GRAMMAR)
;;; Parse the arguments ARGS according to the argument list grammar GRAMMAR.
;;;
;;; ARGS should be a list of strings.  Its first element should be the
;;; name of the program; subsequent elements should be the arguments
;;; that were passed to the program on the command line.  The
;;; `program-arguments' procedure returns a list of this form.
;;;
;;; GRAMMAR is a list of the form:
;;; ((OPTION (PROPERTY VALUE) ...) ...)
;;;
;;; Each OPTION should be a symbol.  `getopt-long' will accept a
;;; command-line option named `--OPTION'.
;;; Each option can have the following (PROPERTY VALUE) pairs:
;;;
;;;   (single-char CHAR) --- Accept `-CHAR' as a single-character
;;;		equivalent to `--OPTION'.  This is how to specify traditional
;;;		Unix-style flags.
;;;   (required? BOOL) --- If BOOL is true, the option is required.
;;;		getopt-long will raise an error if it is not found in ARGS.
;;;   (value BOOL) --- If BOOL is #t, the option accepts a value; if
;;;		it is #f, it does not; and if it is the symbol
;;;		`optional', the option may appear in ARGS with or
;;;		without a value.
;;;   (predicate FUNC) --- If the option accepts a value (i.e. you
;;;		specified `(value #t)' for this option), then getopt
;;;		will apply FUNC to the value, and throw an exception
;;;		if it returns #f.  FUNC should be a procedure which
;;;		accepts a string and returns a boolean value; you may
;;;		need to use quasiquotes to get it into GRAMMAR.
;;;
;;; The (PROPERTY VALUE) pairs may occur in any order, but each
;;; property may occur only once.  By default, options do not have
;;; single-character equivalents, are not required, and do not take
;;; values.
;;;
;;; In ARGS, single-character options may be combined, in the usual
;;; Unix fashion: ("-x" "-y") is equivalent to ("-xy").  If an option
;;; accepts values, then it must be the last option in the
;;; combination; the value is the next argument.  So, for example, using
;;; the following grammar:
;;;      ((apples    (single-char #\a))
;;;       (blimps    (single-char #\b) (value #t))
;;;       (catalexis (single-char #\c) (value #t)))
;;; the following argument lists would be acceptable:
;;;    ("-a" "-b" "bang" "-c" "couth")     ("bang" and "couth" are the values
;;;                                         for "blimps" and "catalexis")
;;;    ("-ab" "bang" "-c" "couth")         (same)
;;;    ("-ac" "couth" "-b" "bang")         (same)
;;;    ("-abc" "couth" "bang")             (an error, since `-b' is not the
;;;                                         last option in its combination)
;;;
;;; If an option's value is optional, then `getopt-long' decides
;;; whether it has a value by looking at what follows it in ARGS.  If
;;; the next element is does not appear to be an option itself, then
;;; that element is the option's value.
;;;
;;; The value of a long option can appear as the next element in ARGS,
;;; or it can follow the option name, separated by an `=' character.
;;; Thus, using the same grammar as above, the following argument lists
;;; are equivalent:
;;;   ("--apples" "Braeburn" "--blimps" "Goodyear")
;;;   ("--apples=Braeburn" "--blimps" "Goodyear")
;;;   ("--blimps" "Goodyear" "--apples=Braeburn")
;;;
;;; If the option "--" appears in ARGS, argument parsing stops there;
;;; subsequent arguments are returned as ordinary arguments, even if
;;; they resemble options.  So, in the argument list:
;;;         ("--apples" "Granny Smith" "--" "--blimp" "Goodyear")
;;; `getopt-long' will recognize the `apples' option as having the
;;; value "Granny Smith", but it will not recognize the `blimp'
;;; option; it will return the strings "--blimp" and "Goodyear" as
;;; ordinary argument strings.
;;;
;;; The `getopt-long' function returns the parsed argument list as an
;;; assocation list, mapping option names --- the symbols from GRAMMAR
;;; --- onto their values, or #t if the option does not accept a value.
;;; Unused options do not appear in the alist.
;;;
;;; All arguments that are not the value of any option are returned
;;; as a list, associated with the empty list.
;;;
;;; `getopt-long' throws an exception if:
;;; - it finds an unrecognized property in GRAMMAR
;;; - the value of the `single-char' property is not a character
;;; - it finds an unrecognized option in ARGS
;;; - a required option is omitted
;;; - an option that requires an argument doesn't get one
;;; - an option that doesn't accept an argument does get one (this can
;;;   only happen using the long option `--opt=value' syntax)
;;; - an option predicate fails
;;;
;;; So, for example:
;;;
;;; (define grammar
;;;   `((lockfile-dir (required? #t)
;;;                   (value #t)
;;;                   (single-char #\k)
;;;                   (predicate ,file-is-directory?))
;;;     (verbose (required? #f)
;;;              (single-char #\v)
;;;              (value #f))
;;;     (x-includes (single-char #\x))
;;;     (rnet-server (single-char #\y)
;;;                  (predicate ,string?))))
;;;
;;; (getopt-long '("my-prog" "-vk" "/tmp" "foo1" "--x-includes=/usr/include"
;;;                "--rnet-server=lamprod" "--" "-fred" "foo2" "foo3")
;;;                grammar)
;;; => ((() "foo1" "-fred" "foo2" "foo3")
;;; 	(rnet-server . "lamprod")
;;; 	(x-includes . "/usr/include")
;;; 	(lockfile-dir . "/tmp")
;;; 	(verbose . #t))

;;; (option-ref OPTIONS KEY DEFAULT)
;;; Return value in alist OPTIONS using KEY, a symbol; or DEFAULT if not
;;; found.  The value is either a string or `#t'.
;;;
;;; For example, using the `getopt-long' return value from above:
;;;
;;; (option-ref (getopt-long ...) 'x-includes 42) => "/usr/include"
;;; (option-ref (getopt-long ...) 'not-a-key! 31) => 31

;;; Code:

(define-module (ice-9 getopt-long)
  #:use-module ((ice-9 common-list) #:select (remove-if-not))
  #:use-module (srfi srfi-9)
  #:use-module (ice-9 match)
  #:use-module (ice-9 regex)
  #:use-module (ice-9 optargs)
  #:export (getopt-long option-ref))

(define %program-name (make-fluid "guile"))
(define (program-name)
  (fluid-ref %program-name))

(define (fatal-error fmt . args)
  (format (current-error-port) "~a: " (program-name))
  (apply format (current-error-port) fmt args)
  (newline (current-error-port))
  (exit 1))

(define-record-type option-spec
  (%make-option-spec name required? option-spec->single-char predicate value-policy)
  option-spec?
  (name
   option-spec->name set-option-spec-name!)
  (required?
   option-spec->required? set-option-spec-required?!)
  (option-spec->single-char
   option-spec->single-char set-option-spec-single-char!)
  (predicate
   option-spec->predicate set-option-spec-predicate!)
  (value-policy
   option-spec->value-policy set-option-spec-value-policy!))

(define (make-option-spec name)
  (%make-option-spec name #f #f #f #f))

(define (parse-option-spec desc)
  (let ((spec (make-option-spec (symbol->string (car desc)))))
    (for-each (match-lambda
               (('required? val)
                (set-option-spec-required?! spec val))
               (('value val)
                (set-option-spec-value-policy! spec val))
               (('single-char val)
                (or (char? val)
                    (error "`single-char' value must be a char!"))
                (set-option-spec-single-char! spec val))
               (('predicate pred)
                (set-option-spec-predicate!
                 spec (lambda (name val)
                        (or (not val)
                            (pred val)
                            (fatal-error "option predicate failed: --~a"
                                         name)))))
               ((prop val)
                (error "invalid getopt-long option property:" prop)))
              (cdr desc))
    spec))

(define (split-arg-list argument-list)
  ;; Scan ARGUMENT-LIST for "--" and return (BEFORE-LS . AFTER-LS).
  ;; Discard the "--".  If no "--" is found, AFTER-LS is empty.
  (let loop ((yes '()) (no argument-list))
    (cond ((null? no)               (cons (reverse yes) no))
	  ((string=? "--" (car no)) (cons (reverse yes) (cdr no)))
	  (else (loop (cons (car no) yes) (cdr no))))))

(define short-opt-rx           (make-regexp "^-([a-zA-Z]+)(.*)"))
(define long-opt-no-value-rx   (make-regexp "^--([^=]+)$"))
(define long-opt-with-value-rx (make-regexp "^--([^=]+)=(.*)"))

(define (looks-like-an-option string)
  (or (regexp-exec short-opt-rx string)
      (regexp-exec long-opt-with-value-rx string)
      (regexp-exec long-opt-no-value-rx string)))

(define (process-options specs argument-ls stop-at-first-non-option)
  ;; Use SPECS to scan ARGUMENT-LS; return (FOUND . ETC).
  ;; FOUND is an unordered list of option specs for found options, while ETC
  ;; is an order-maintained list of elements in ARGUMENT-LS that are neither
  ;; options nor their values.
  (let ((idx (map (lambda (spec)
                    (cons (option-spec->name spec) spec))
                  specs))
        (sc-idx (map (lambda (spec)
                       (cons (make-string 1 (option-spec->single-char spec))
                             spec))
                     (remove-if-not option-spec->single-char specs))))
    (let loop ((unclumped 0) (argument-ls argument-ls) (found '()) (etc '()))
      (define (eat! spec ls)
        (cond
         ((eq? 'optional (option-spec->value-policy spec))
          (if (or (null? ls)
                  (looks-like-an-option (car ls)))
              (loop (- unclumped 1) ls (acons spec #t found) etc)
              (loop (- unclumped 2) (cdr ls) (acons spec (car ls) found) etc)))
         ((eq? #t (option-spec->value-policy spec))
          (if (or (null? ls)
                  (looks-like-an-option (car ls)))
              (fatal-error "option must be specified with argument: --~a"
                           (option-spec->name spec))
              (loop (- unclumped 2) (cdr ls) (acons spec (car ls) found) etc)))
         (else
          (loop (- unclumped 1) ls (acons spec #t found) etc))))
      
      (match argument-ls
        (()
         (cons found (reverse etc)))
        ((opt . rest)
         (cond
          ((regexp-exec short-opt-rx opt)
           => (lambda (match)
                (if (> unclumped 0)
                    ;; Next option is known not to be clumped.
                    (let* ((c (match:substring match 1))
                           (spec (or (assoc-ref sc-idx c)
                                     (fatal-error "no such option: -~a" c))))
                      (eat! spec rest))
                    ;; Expand a clumped group of short options.
                    (let* ((extra (match:substring match 2))
                           (unclumped-opts
                            (append (map (lambda (c)
                                           (string-append "-" (make-string 1 c)))
                                         (string->list
                                          (match:substring match 1)))
                                    (if (string=? "" extra) '() (list extra)))))
                      (loop (length unclumped-opts)
                            (append unclumped-opts rest)
                            found
                            etc)))))
          ((regexp-exec long-opt-no-value-rx opt)
           => (lambda (match)
                (let* ((opt (match:substring match 1))
                       (spec (or (assoc-ref idx opt)
                                 (fatal-error "no such option: --~a" opt))))
                  (eat! spec rest))))
          ((regexp-exec long-opt-with-value-rx opt)
           => (lambda (match)
                (let* ((opt (match:substring match 1))
                       (spec (or (assoc-ref idx opt)
                                 (fatal-error "no such option: --~a" opt))))
                  (if (option-spec->value-policy spec)
                      (eat! spec (cons (match:substring match 2) rest))
                      (fatal-error "option does not support argument: --~a"
                                   opt)))))
          ((and stop-at-first-non-option
                (<= unclumped 0))
           (cons found (append (reverse etc) argument-ls)))
          (else
           (loop (- unclumped 1) rest found (cons opt etc)))))))))

(define* (getopt-long program-arguments option-desc-list
                      #:key stop-at-first-non-option)
  "Process options, handling both long and short options, similar to
the glibc function 'getopt_long'.  PROGRAM-ARGUMENTS should be a value
similar to what (program-arguments) returns.  OPTION-DESC-LIST is a
list of option descriptions.  Each option description must satisfy the
following grammar:

    <option-spec>           :: (<name> . <attribute-ls>)
    <attribute-ls>          :: (<attribute> . <attribute-ls>)
                               | ()
    <attribute>             :: <required-attribute>
                               | <arg-required-attribute>
                               | <single-char-attribute>
                               | <predicate-attribute>
                               | <value-attribute>
    <required-attribute>    :: (required? <boolean>)
    <single-char-attribute> :: (single-char <char>)
    <value-attribute>       :: (value #t)
                               (value #f)
                               (value optional)
    <predicate-attribute>   :: (predicate <1-ary-function>)

    The procedure returns an alist of option names and values.  Each
option name is a symbol.  The option value will be '#t' if no value
was specified.  There is a special item in the returned alist with a
key of the empty list, (): the list of arguments that are not options
or option values.
    By default, options are not required, and option values are not
required.  By default, single character equivalents are not supported;
if you want to allow the user to use single character options, you need
to add a `single-char' clause to the option description."
  (with-fluids ((%program-name (car program-arguments)))
    (let* ((specifications (map parse-option-spec option-desc-list))
           (pair (split-arg-list (cdr program-arguments)))
           (split-ls (car pair))
           (non-split-ls (cdr pair))
           (found/etc (process-options specifications split-ls
                                       stop-at-first-non-option))
           (found (car found/etc))
           (rest-ls (append (cdr found/etc) non-split-ls)))
      (for-each (lambda (spec)
                  (let ((name (option-spec->name spec))
                        (val (assq-ref found spec)))
                    (and (option-spec->required? spec)
                         (or val
                             (fatal-error "option must be specified: --~a"
                                          name)))
                    (let ((pred (option-spec->predicate spec)))
                      (and pred (pred name val)))))
                specifications)
      (for-each (lambda (spec+val)
                  (set-car! spec+val
                            (string->symbol (option-spec->name (car spec+val)))))
                found)
      (cons (cons '() rest-ls) found))))

(define (option-ref options key default)
  "Return value in alist OPTIONS using KEY, a symbol; or DEFAULT if not found.
The value is either a string or `#t'."
  (or (assq-ref options key) default))

;;; getopt-long.scm ends here