summaryrefslogtreecommitdiff
path: root/ext/File-Glob/Glob.pm
blob: f9f0eddd2e5c59ebc497474bde447e93c65dc461 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
package File::Glob;

use strict;
our($VERSION, @ISA, @EXPORT_OK, @EXPORT_FAIL, %EXPORT_TAGS, $DEFAULT_FLAGS);

require XSLoader;
use feature 'switch';

@ISA = qw(Exporter);

# NOTE: The glob() export is only here for compatibility with 5.6.0.
# csh_glob() should not be used directly, unless you know what you're doing.

%EXPORT_TAGS = (
    'glob' => [ qw(
        GLOB_ABEND
	GLOB_ALPHASORT
        GLOB_ALTDIRFUNC
        GLOB_BRACE
        GLOB_CSH
        GLOB_ERR
        GLOB_ERROR
        GLOB_LIMIT
        GLOB_MARK
        GLOB_NOCASE
        GLOB_NOCHECK
        GLOB_NOMAGIC
        GLOB_NOSORT
        GLOB_NOSPACE
        GLOB_QUOTE
        GLOB_TILDE
        glob
        bsd_glob
    ) ],
);

@EXPORT_OK   = (@{$EXPORT_TAGS{'glob'}}, 'csh_glob');

$VERSION = '1.14';

sub import {
    require Exporter;
    local $Exporter::ExportLevel = $Exporter::ExportLevel + 1;
    Exporter::import(grep {
	my $passthrough;
	given ($_) {
	    $DEFAULT_FLAGS &= ~GLOB_NOCASE() when ':case';
	    $DEFAULT_FLAGS |= GLOB_NOCASE() when ':nocase';
	    when (':globally') {
		no warnings 'redefine';
		*CORE::GLOBAL::glob = \&File::Glob::csh_glob;
	    }
	    $passthrough = 1;
	}
	$passthrough;
    } @_);
}

XSLoader::load();

$DEFAULT_FLAGS = GLOB_CSH();
if ($^O =~ /^(?:MSWin32|VMS|os2|dos|riscos)$/) {
    $DEFAULT_FLAGS |= GLOB_NOCASE();
}

# File::Glob::glob() is deprecated because its prototype is different from
# CORE::glob() (use bsd_glob() instead)
sub glob {
    splice @_, 1; # don't pass PL_glob_index as flags!
    goto &bsd_glob;
}

## borrowed heavily from gsar's File::DosGlob
my %iter;
my %entries;

sub csh_glob {
    my $pat = shift;
    my $cxix = shift;

    # assume global context if not provided one
    $cxix = '_G_' unless defined $cxix;
    $iter{$cxix} = 0 unless exists $iter{$cxix};

    # if we're just beginning, do it all first
    if ($iter{$cxix} == 0) {
	my @pat;

	# glob without args defaults to $_
	$pat = $_ unless defined $pat;

	# extract patterns
	$pat =~ s/^\s+//;	# Protect against empty elements in
				# things like < *.c>, which alone
				# shouldn't trigger ParseWords.  Patterns
				# with a trailing space must be passed
				# to ParseWords, in case it is escaped,
				# as in glob('\ ').
	if ($pat =~ /[\s"']/) {
	    # XXX this is needed for compatibility with the csh
	    # implementation in Perl.  Need to support a flag
	    # to disable this behavior.
	    require Text::ParseWords;
	    for (@pat = Text::ParseWords::parse_line('\s+',1,$pat)) {
		s/^['"]// and chop;
	    }
	}
	if (@pat) {
	    $entries{$cxix} = [ map { doglob($_, $DEFAULT_FLAGS) } @pat ];
	}
	else {
	    $entries{$cxix} = [ doglob($pat, $DEFAULT_FLAGS) ];
	}
    }

    # chuck it all out, quick or slow
    if (wantarray) {
        delete $iter{$cxix};
        return @{delete $entries{$cxix}};
    }
    else {
        if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
            return shift @{$entries{$cxix}};
        }
        else {
            # return undef for EOL
            delete $iter{$cxix};
            delete $entries{$cxix};
            return undef;
        }
    }
}

1;
__END__

=head1 NAME

File::Glob - Perl extension for BSD glob routine

=head1 SYNOPSIS

  use File::Glob ':glob';

  @list = bsd_glob('*.[ch]');
  $homedir = bsd_glob('~gnat', GLOB_TILDE | GLOB_ERR);

  if (GLOB_ERROR) {
    # an error occurred reading $homedir
  }

  ## override the core glob (CORE::glob() does this automatically
  ## by default anyway, since v5.6.0)
  use File::Glob ':globally';
  my @sources = <*.{c,h,y}>;

  ## override the core glob, forcing case sensitivity
  use File::Glob qw(:globally :case);
  my @sources = <*.{c,h,y}>;

  ## override the core glob forcing case insensitivity
  use File::Glob qw(:globally :nocase);
  my @sources = <*.{c,h,y}>;

  ## glob on all files in home directory
  use File::Glob ':globally';
  my @sources = <~gnat/*>;

=head1 DESCRIPTION

The glob angle-bracket operator C<< <> >> is a pathname generator that
implements the rules for file name pattern matching used by Unix-like shells
such as the Bourne shell or C shell.

File::Glob::bsd_glob() implements the FreeBSD glob(3) routine, which is
a superset of the POSIX glob() (described in IEEE Std 1003.2 "POSIX.2").
bsd_glob() takes a mandatory C<pattern> argument, and an optional
C<flags> argument, and returns a list of filenames matching the
pattern, with interpretation of the pattern modified by the C<flags>
variable.

Since v5.6.0, Perl's CORE::glob() is implemented in terms of bsd_glob().
Note that they don't share the same prototype--CORE::glob() only accepts
a single argument.  Due to historical reasons, CORE::glob() will also
split its argument on whitespace, treating it as multiple patterns,
whereas bsd_glob() considers them as one pattern.

=head2 META CHARACTERS

  \       Quote the next metacharacter
  []      Character class
  {}      Multiple pattern
  *       Match any string of characters
  ?       Match any single character
  ~       User name home directory

The metanotation C<a{b,c,d}e> is a shorthand for C<abe ace ade>.  Left to
right order is preserved, with results of matches being sorted separately
at a low level to preserve this order. As a special case C<{>, C<}>, and
C<{}> are passed undisturbed.

=head2 POSIX FLAGS

The POSIX defined flags for bsd_glob() are:

=over 4

=item C<GLOB_ERR>

Force bsd_glob() to return an error when it encounters a directory it
cannot open or read.  Ordinarily bsd_glob() continues to find matches.

=item C<GLOB_LIMIT>

Make bsd_glob() return an error (GLOB_NOSPACE) when the pattern expands
to a size bigger than the system constant C<ARG_MAX> (usually found in
limits.h).  If your system does not define this constant, bsd_glob() uses
C<sysconf(_SC_ARG_MAX)> or C<_POSIX_ARG_MAX> where available (in that
order).  You can inspect these values using the standard C<POSIX>
extension.

=item C<GLOB_MARK>

Each pathname that is a directory that matches the pattern has a slash
appended.

=item C<GLOB_NOCASE>

By default, file names are assumed to be case sensitive; this flag
makes bsd_glob() treat case differences as not significant.

=item C<GLOB_NOCHECK>

If the pattern does not match any pathname, then bsd_glob() returns a list
consisting of only the pattern.  If C<GLOB_QUOTE> is set, its effect
is present in the pattern returned.

=item C<GLOB_NOSORT>

By default, the pathnames are sorted in ascending ASCII order; this
flag prevents that sorting (speeding up bsd_glob()).

=back

The FreeBSD extensions to the POSIX standard are the following flags:

=over 4

=item C<GLOB_BRACE>

Pre-process the string to expand C<{pat,pat,...}> strings like csh(1).
The pattern '{}' is left unexpanded for historical reasons (and csh(1)
does the same thing to ease typing of find(1) patterns).

=item C<GLOB_NOMAGIC>

Same as C<GLOB_NOCHECK> but it only returns the pattern if it does not
contain any of the special characters "*", "?" or "[".  C<NOMAGIC> is
provided to simplify implementing the historic csh(1) globbing
behaviour and should probably not be used anywhere else.

=item C<GLOB_QUOTE>

Use the backslash ('\') character for quoting: every occurrence of a
backslash followed by a character in the pattern is replaced by that
character, avoiding any special interpretation of the character.
(But see below for exceptions on DOSISH systems).

=item C<GLOB_TILDE>

Expand patterns that start with '~' to user name home directories.

=item C<GLOB_CSH>

For convenience, C<GLOB_CSH> is a synonym for
C<GLOB_BRACE | GLOB_NOMAGIC | GLOB_QUOTE | GLOB_TILDE | GLOB_ALPHASORT>.

=back

The POSIX provided C<GLOB_APPEND>, C<GLOB_DOOFFS>, and the FreeBSD
extensions C<GLOB_ALTDIRFUNC>, and C<GLOB_MAGCHAR> flags have not been
implemented in the Perl version because they involve more complex
interaction with the underlying C structures.

The following flag has been added in the Perl implementation for
csh compatibility:

=over 4

=item C<GLOB_ALPHASORT>

If C<GLOB_NOSORT> is not in effect, sort filenames is alphabetical
order (case does not matter) rather than in ASCII order.

=back

=head1 DIAGNOSTICS

bsd_glob() returns a list of matching paths, possibly zero length.  If an
error occurred, &File::Glob::GLOB_ERROR will be non-zero and C<$!> will be
set.  &File::Glob::GLOB_ERROR is guaranteed to be zero if no error occurred,
or one of the following values otherwise:

=over 4

=item C<GLOB_NOSPACE>

An attempt to allocate memory failed.

=item C<GLOB_ABEND>

The glob was stopped because an error was encountered.

=back

In the case where bsd_glob() has found some matching paths, but is
interrupted by an error, it will return a list of filenames B<and>
set &File::Glob::ERROR.

Note that bsd_glob() deviates from POSIX and FreeBSD glob(3) behaviour
by not considering C<ENOENT> and C<ENOTDIR> as errors - bsd_glob() will
continue processing despite those errors, unless the C<GLOB_ERR> flag is
set.

Be aware that all filenames returned from File::Glob are tainted.

=head1 NOTES

=over 4

=item *

If you want to use multiple patterns, e.g. C<bsd_glob("a* b*")>, you should
probably throw them in a set as in C<bsd_glob("{a*,b*}")>.  This is because
the argument to bsd_glob() isn't subjected to parsing by the C shell.
Remember that you can use a backslash to escape things.

=item *

On DOSISH systems, backslash is a valid directory separator character.
In this case, use of backslash as a quoting character (via GLOB_QUOTE)
interferes with the use of backslash as a directory separator. The
best (simplest, most portable) solution is to use forward slashes for
directory separators, and backslashes for quoting. However, this does
not match "normal practice" on these systems. As a concession to user
expectation, therefore, backslashes (under GLOB_QUOTE) only quote the
glob metacharacters '[', ']', '{', '}', '-', '~', and backslash itself.
All other backslashes are passed through unchanged.

=item *

Win32 users should use the real slash.  If you really want to use
backslashes, consider using Sarathy's File::DosGlob, which comes with
the standard Perl distribution.

=item *

Mac OS (Classic) users should note a few differences. Since
Mac OS is not Unix, when the glob code encounters a tilde glob (e.g.
~user) and the C<GLOB_TILDE> flag is used, it simply returns that
pattern without doing any expansion.

Glob on Mac OS is case-insensitive by default (if you don't use any
flags). If you specify any flags at all and still want glob
to be case-insensitive, you must include C<GLOB_NOCASE> in the flags.

The path separator is ':' (aka colon), not '/' (aka slash). Mac OS users
should be careful about specifying relative pathnames. While a full path
always begins with a volume name, a relative pathname should always
begin with a ':'.  If specifying a volume name only, a trailing ':' is
required.

The specification of pathnames in glob patterns adheres to the usual Mac
OS conventions: The path separator is a colon ':', not a slash '/'. A
full path always begins with a volume name. A relative pathname on Mac
OS must always begin with a ':', except when specifying a file or
directory name in the current working directory, where the leading colon
is optional. If specifying a volume name only, a trailing ':' is
required. Due to these rules, a glob like E<lt>*:E<gt> will find all
mounted volumes, while a glob like E<lt>*E<gt> or E<lt>:*E<gt> will find
all files and directories in the current directory.

Note that updirs in the glob pattern are resolved before the matching begins,
i.e. a pattern like "*HD:t?p::a*" will be matched as "*HD:a*". Note also,
that a single trailing ':' in the pattern is ignored (unless it's a volume
name pattern like "*HD:"), i.e. a glob like E<lt>:*:E<gt> will find both
directories I<and> files (and not, as one might expect, only directories).
You can, however, use the C<GLOB_MARK> flag to distinguish (without a file
test) directory names from file names.

If the C<GLOB_MARK> flag is set, all directory paths will have a ':' appended.
Since a directory like 'lib:' is I<not> a valid I<relative> path on Mac OS,
both a leading and a trailing colon will be added, when the directory name in
question doesn't contain any colons (e.g. 'lib' becomes ':lib:').

=back

=head1 SEE ALSO

L<perlfunc/glob>, glob(3)

=head1 AUTHOR

The Perl interface was written by Nathan Torkington E<lt>gnat@frii.comE<gt>,
and is released under the artistic license.  Further modifications were
made by Greg Bacon E<lt>gbacon@cs.uah.eduE<gt>, Gurusamy Sarathy
E<lt>gsar@activestate.comE<gt>, and Thomas Wegner
E<lt>wegner_thomas@yahoo.comE<gt>.  The C glob code has the
following copyright:

    Copyright (c) 1989, 1993 The Regents of the University of California.
    All rights reserved.

    This code is derived from software contributed to Berkeley by
    Guido van Rossum.

    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions
    are met:

    1. Redistributions of source code must retain the above copyright
       notice, this list of conditions and the following disclaimer.
    2. Redistributions in binary form must reproduce the above copyright
       notice, this list of conditions and the following disclaimer in the
       documentation and/or other materials provided with the distribution.
    3. Neither the name of the University nor the names of its contributors
       may be used to endorse or promote products derived from this software
       without specific prior written permission.

    THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    SUCH DAMAGE.

=cut