diff options
author | Gurusamy Sarathy <gsar@cpan.org> | 1999-10-12 16:53:31 +0000 |
---|---|---|
committer | Gurusamy Sarathy <gsar@cpan.org> | 1999-10-12 16:53:31 +0000 |
commit | 72b166521443a1b89b0fed156fa8334cfab6e61b (patch) | |
tree | 1b861dc3472acd4a3bf3b2f7a4ed71e073c143f9 /ext/File | |
parent | 883d36a60f1dbc0dec153bce822eebaa5172be5d (diff) | |
download | perl-72b166521443a1b89b0fed156fa8334cfab6e61b.tar.gz |
add File::BSDGlob as File::Glob and load it at compile-time
if perl was built with -DPERL_INTERNAL_GLOB
TODO: we currently get a compile-time failure if File/Glob.pm
can't be found; such failure needs to be made to emit a warning
and use the csh implementation instead
p4raw-id: //depot/perl@4356
Diffstat (limited to 'ext/File')
-rw-r--r-- | ext/File/Glob/Changes | 38 | ||||
-rw-r--r-- | ext/File/Glob/Glob.pm | 310 | ||||
-rw-r--r-- | ext/File/Glob/Glob.xs | 203 | ||||
-rw-r--r-- | ext/File/Glob/Makefile.PL | 11 | ||||
-rw-r--r-- | ext/File/Glob/TODO | 21 | ||||
-rw-r--r-- | ext/File/Glob/bsd_glob.c | 856 | ||||
-rw-r--r-- | ext/File/Glob/bsd_glob.h | 91 |
7 files changed, 1530 insertions, 0 deletions
diff --git a/ext/File/Glob/Changes b/ext/File/Glob/Changes new file mode 100644 index 0000000000..7b8ef7d706 --- /dev/null +++ b/ext/File/Glob/Changes @@ -0,0 +1,38 @@ +Revision history for Perl extension File::Glob + +0.00 Tue Dec 17 10:51:33 1996 + - original version; created by h2xs 1.16 + +0.90 Tue Dec 17 13:58:32 MST 1996 + - implemented first pass access to glob(3), + but it's clumsy and it looks like it leaks + memory. + +0.91 Thu Sep 4 08:43:55 CDT 1997 + - included CORE/config.h portability macros + - s/glob/bsd_glob/ to avoid calling and including the + system's glob stuff + - added GLOB_DEBUG for (surprise!) glob debugging + - tainted all filenames returned from &Glob::BSD::glob + +0.92 Tue Sep 30 08:31:57 CDT 1997 + - only use lstat if HAS_LSTAT is defined + - renamed the glob flags to GLOB_* + - added GLOB_CSH convenience macro for csh(1) globbing + These changes thanks to Hans Mulder <hansm@icgned.nl> + - fixed an incompatibility with csh(1) globbing where a + pattern like {A*,b,c} wouldn't expand properly + - various compatibility changes + - fixed and added tests + +0.93 Wed Jul 1 10:39:47 CDT 1998 + - renamed module to File::BSDGlob + - enabled 'globally' import directive to override the core + glob + - added Sarathy's tests for File::DosGlob +0.99 Tue Oct 12 06:42:02 PDT 1999 + - renamed module to File::Glob for incorporation into the + Perl source distribution + - ansified prototypes + - s/struct stat/Stat_t/ + - split on spaces to make <*.c *.h> work (for compatibility) diff --git a/ext/File/Glob/Glob.pm b/ext/File/Glob/Glob.pm new file mode 100644 index 0000000000..a4531a105c --- /dev/null +++ b/ext/File/Glob/Glob.pm @@ -0,0 +1,310 @@ +package File::Glob; + +use strict; +use Carp; +use vars qw($VERSION @ISA @EXPORT_OK @EXPORT_FAIL %EXPORT_TAGS $AUTOLOAD); + +require Exporter; +require DynaLoader; +require AutoLoader; + +@ISA = qw(Exporter DynaLoader AutoLoader); + +@EXPORT_OK = qw( + globally + csh_glob + glob + GLOB_ABEND + GLOB_ALTDIRFUNC + GLOB_BRACE + GLOB_ERR + GLOB_ERROR + GLOB_MARK + GLOB_NOCHECK + GLOB_NOMAGIC + GLOB_NOSORT + GLOB_NOSPACE + GLOB_QUOTE + GLOB_TILDE +); + +@EXPORT_FAIL = ( 'globally' ); + +%EXPORT_TAGS = ( + 'glob' => [ qw( + GLOB_ABEND + GLOB_ALTDIRFUNC + GLOB_BRACE + GLOB_ERR + GLOB_ERROR + GLOB_MARK + GLOB_NOCHECK + GLOB_NOMAGIC + GLOB_NOSORT + GLOB_NOSPACE + GLOB_QUOTE + GLOB_TILDE + glob + ) ], +); + +$VERSION = '0.99'; + +sub export_fail { + shift; + + if ($_[0] eq 'globally') { + local $^W; + *CORE::GLOBAL::glob = \&File::Glob::csh_glob; + shift; + } + + @_; +} + +sub AUTOLOAD { + # This AUTOLOAD is used to 'autoload' constants from the constant() + # XS function. If a constant is not found then control is passed + # to the AUTOLOAD in AutoLoader. + + my $constname; + ($constname = $AUTOLOAD) =~ s/.*:://; + my $val = constant($constname, @_ ? $_[0] : 0); + if ($! != 0) { + if ($! =~ /Invalid/) { + $AutoLoader::AUTOLOAD = $AUTOLOAD; + goto &AutoLoader::AUTOLOAD; + } + else { + croak "Your vendor has not defined File::Glob macro $constname"; + } + } + eval "sub $AUTOLOAD { $val }"; + goto &$AUTOLOAD; +} + +bootstrap File::Glob $VERSION; + +# Preloaded methods go here. + +sub GLOB_ERROR { + return constant('GLOB_ERROR', 0); +} + +sub GLOB_CSH () { GLOB_BRACE() | GLOB_NOMAGIC() | GLOB_QUOTE() | GLOB_TILDE() } + +# Autoload methods go after =cut, and are processed by the autosplit program. + +sub glob { + return doglob(@_); +} + +## borrowed heavily from gsar's File::DosGlob +my %iter; +my %entries; + +sub csh_glob { + my $pat = shift; + my $cxix = shift; + my @pat; + + # glob without args defaults to $_ + $pat = $_ unless defined $pat; + + # extract patterns + if ($pat =~ /\s/) { + # XXX this is needed for compatibility with the csh + # implementation in Perl. Need to support a flag + # to disable this behavior. + require Text::ParseWords; + @pat = Text::ParseWords::parse_line('\s+',0,$pat); + } + + # assume global context if not provided one + $cxix = '_G_' unless defined $cxix; + $iter{$cxix} = 0 unless exists $iter{$cxix}; + + # if we're just beginning, do it all first + if ($iter{$cxix} == 0) { + if (@pat) { + $entries{$cxix} = [ map { doglob($_, GLOB_CSH) } @pat ]; + } + else { + $entries{$cxix} = [ doglob($pat, GLOB_CSH) ]; + } + } + + # chuck it all out, quick or slow + if (wantarray) { + delete $iter{$cxix}; + return @{delete $entries{$cxix}}; + } + else { + if ($iter{$cxix} = scalar @{$entries{$cxix}}) { + return shift @{$entries{$cxix}}; + } + else { + # return undef for EOL + delete $iter{$cxix}; + delete $entries{$cxix}; + return undef; + } + } +} + +1; +__END__ + +=head1 NAME + +File::Glob - Perl extension for BSD glob routine + +=head1 SYNOPSIS + + use File::Glob ':glob'; + @list = glob('*.[ch]'); + $homedir = glob('~gnat', GLOB_TILDE | GLOB_ERR); + if (GLOB_ERROR) { + # an error occurred reading $homedir + } + + ## override the core glob (even with -T) + use File::Glob 'globally'; + my @sources = <*.{c,h,y}> + +=head1 DESCRIPTION + +File::Glob implements the FreeBSD glob(3) routine, which is a superset +of the POSIX glob() (described in IEEE Std 1003.2 "POSIX.2"). The +glob() routine takes a mandatory C<pattern> argument, and an optional +C<flags> argument, and returns a list of filenames matching the +pattern, with interpretation of the pattern modified by the C<flags> +variable. The POSIX defined flags are: + +=over 4 + +=item C<GLOB_ERR> + +Force glob() to return an error when it encounters a directory it +cannot open or read. Ordinarily glob() continues to find matches. + +=item C<GLOB_MARK> + +Each pathname that is a directory that matches the pattern has a slash +appended. + +=item C<GLOB_NOCHECK> + +If the pattern does not match any pathname, then glob() returns a list +consisting of only the pattern. If C<GLOB_QUOTE> is set, its effect +is present in the pattern returned. + +=item C<GLOB_NOSORT> + +By default, the pathnames are sorted in ascending ASCII order; this +flag prevents that sorting (speeding up glob()). + +=back + +The FreeBSD extensions to the POSIX standard are the following flags: + +=over 4 + +=item C<GLOB_BRACE> + +Pre-process the string to expand C<{pat,pat,...} strings like csh(1). +The pattern '{}' is left unexpanded for historical reasons (and csh(1) +does the same thing to ease typing of find(1) patterns). + +=item C<GLOB_NOMAGIC> + +Same as C<GLOB_NOCHECK> but it only returns the pattern if it does not +contain any of the special characters "*", "?" or "[". C<NOMAGIC> is +provided to simplify implementing the historic csh(1) globbing +behaviour and should probably not be used anywhere else. + +=item C<GLOB_QUOTE> + +Use the backslash ('\') character for quoting: every occurrence of a +backslash followed by a character in the pattern is replaced by that +character, avoiding any special interpretation of the character. + +=item C<GLOB_TILDE> + +Expand patterns that start with '~' to user name home directories. + +=item C<GLOB_CSH> + +For convenience, C<GLOB_CSH> is a synonym for +C<GLOB_BRACE | GLOB_NOMAGIC | GLOB_QUOTE | GLOB_TILDE>. + +=back + +The POSIX provided C<GLOB_APPEND>, C<GLOB_DOOFFS>, and the FreeBSD +extensions C<GLOB_ALTDIRFUNC>, and C<GLOB_MAGCHAR> flags have not been +implemented in the Perl version because they involve more complex +interaction with the underlying C structures. + +=head1 DIAGNOSTICS + +glob() returns a list of matching paths, possibly zero length. If an +error occurred, &File::Glob::GLOB_ERROR will be non-zero and C<$!> will be +set. &File::Glob::GLOB_ERROR is guaranteed to be zero if no error occurred, +or one of the following values otherwise: + +=over 4 + +=item C<GLOB_NOSPACE> + +An attempt to allocate memory failed. + +=item C<GLOB_ABEND> + +The glob was stopped because an error was encountered. + +=back + +In the case where glob() has found some matching paths, but is +interrupted by an error, glob() will return a list of filenames B<and> +set &File::Glob::ERROR. + +Note that glob() deviates from POSIX and FreeBSD glob(3) behaviour by +not considering C<ENOENT> and C<ENOTDIR> as errors - glob() will +continue processing despite those errors, unless the C<GLOB_ERR> flag is +set. + +Be aware that all filenames returned from File::Glob are tainted. + +=head1 NOTES + +=over 4 + +=item * + +If you want to use multiple patterns, e.g. C<glob "a* b*">, you should +probably throw them in a set as in C<glob "{a*,b*}>. This is because +the argument to glob isn't subjected to parsing by the C shell. Remember +that you can use a backslash to escape things. + +=item * + +Win32 users should use the real slash. If you really want to use +backslashes, consider using Sarathy's File::DosGlob, which comes with +the standard Perl distribution. + +=head1 AUTHOR + +The Perl interface was written by Nathan Torkington (gnat@frii.com), +and is released under the artistic license. Further modifications were +made by Greg Bacon E<lt>gbacon@cs.uah.eduE<gt> and Gurusamy Sarathy +E<lt>gsar@activestate.comE<gt>. The C glob code has the +following copyright: + + Copyright (c) 1989, 1993 The Regents of the University of California. + All rights reserved. This code is derived from software contributed + to Berkeley by Guido van Rossum. + +For redistribution of the C glob code, read the copyright notice in +the file bsd_glob.c, which is part of the File::Glob source distribution. + +=cut diff --git a/ext/File/Glob/Glob.xs b/ext/File/Glob/Glob.xs new file mode 100644 index 0000000000..98e366c401 --- /dev/null +++ b/ext/File/Glob/Glob.xs @@ -0,0 +1,203 @@ +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" + +#include "bsd_glob.h" + +static int GLOB_ERROR = 0; + +static int +not_here(char *s) +{ + croak("%s not implemented on this architecture", s); + return -1; +} + + +static double +constant(char *name, int arg) +{ + errno = 0; + if (strlen(name) <= 5) + goto not_there; + switch (*(name+5)) { + case 'A': + if (strEQ(name, "GLOB_ABEND")) +#ifdef GLOB_ABEND + return GLOB_ABEND; +#else + goto not_there; +#endif + if (strEQ(name, "GLOB_ALTDIRFUNC")) +#ifdef GLOB_ALTDIRFUNC + return GLOB_ALTDIRFUNC; +#else + goto not_there; +#endif + break; + case 'B': + if (strEQ(name, "GLOB_BRACE")) +#ifdef GLOB_BRACE + return GLOB_BRACE; +#else + goto not_there; +#endif + break; + case 'C': + break; + case 'D': + break; + case 'E': + if (strEQ(name, "GLOB_ERR")) +#ifdef GLOB_ERR + return GLOB_ERR; +#else + goto not_there; +#endif + if (strEQ(name, "GLOB_ERROR")) + return GLOB_ERROR; + break; + case 'F': + break; + case 'G': + break; + case 'H': + break; + case 'I': + break; + case 'J': + break; + case 'K': + break; + case 'L': + break; + case 'M': + if (strEQ(name, "GLOB_MARK")) +#ifdef GLOB_MARK + return GLOB_MARK; +#else + goto not_there; +#endif + break; + case 'N': + if (strEQ(name, "GLOB_NOCHECK")) +#ifdef GLOB_NOCHECK + return GLOB_NOCHECK; +#else + goto not_there; +#endif + if (strEQ(name, "GLOB_NOMAGIC")) +#ifdef GLOB_NOMAGIC + return GLOB_NOMAGIC; +#else + goto not_there; +#endif + if (strEQ(name, "GLOB_NOSORT")) +#ifdef GLOB_NOSORT + return GLOB_NOSORT; +#else + goto not_there; +#endif + if (strEQ(name, "GLOB_NOSPACE")) +#ifdef GLOB_NOSPACE + return GLOB_NOSPACE; +#else + goto not_there; +#endif + break; + case 'O': + break; + case 'P': + break; + case 'Q': + if (strEQ(name, "GLOB_QUOTE")) +#ifdef GLOB_QUOTE + return GLOB_QUOTE; +#else + goto not_there; +#endif + break; + case 'R': + break; + case 'S': + break; + case 'T': + if (strEQ(name, "GLOB_TILDE")) +#ifdef GLOB_TILDE + return GLOB_TILDE; +#else + goto not_there; +#endif + break; + case 'U': + break; + case 'V': + break; + case 'W': + break; + case 'X': + break; + case 'Y': + break; + case 'Z': + break; + } + errno = EINVAL; + return 0; + +not_there: + errno = ENOENT; + return 0; +} + +#ifdef WIN32 +#define errfunc NULL +#else +int +errfunc(const char *foo, int bar) { + return !(bar == ENOENT || bar == ENOTDIR); +} +#endif + +MODULE = File::Glob PACKAGE = File::Glob + +void +doglob(pattern,...) + char *pattern +PROTOTYPE: +PREINIT: + glob_t pglob; + int i; + int retval; + int flags = 0; + SV *tmp; +PPCODE: + { + /* allow for optional flags argument */ + if (items > 1) { + flags = (int) SvIV(ST(1)); + } + + /* call glob */ + retval = bsd_glob(pattern, flags, errfunc, &pglob); + GLOB_ERROR = retval; + + /* return any matches found */ + EXTEND(sp, pglob.gl_pathc); + for (i = 0; i < pglob.gl_pathc; i++) { + /* printf("# bsd_glob: %s\n", pglob.gl_pathv[i]); */ + tmp = sv_2mortal(newSVpvn(pglob.gl_pathv[i], + strlen(pglob.gl_pathv[i]))); + TAINT; + SvTAINT(tmp); + PUSHs(tmp); + } + + bsd_globfree(&pglob); + } + +double +constant(name,arg) + char *name + int arg +PROTOTYPE: diff --git a/ext/File/Glob/Makefile.PL b/ext/File/Glob/Makefile.PL new file mode 100644 index 0000000000..c82988fdb1 --- /dev/null +++ b/ext/File/Glob/Makefile.PL @@ -0,0 +1,11 @@ +use ExtUtils::MakeMaker; +WriteMakefile( + NAME => 'File::Glob', + VERSION_FROM => 'Glob.pm', + MAN3PODS => {}, # Pods will be built by installman. + OBJECT => 'bsd_glob$(OBJ_EXT) Glob$(OBJ_EXT)', + +## uncomment for glob debugging (will cause make test to fail) +# DEFINE => '-DGLOB_DEBUG', +# OPTIMIZE => '-g', +); diff --git a/ext/File/Glob/TODO b/ext/File/Glob/TODO new file mode 100644 index 0000000000..ef2547fa41 --- /dev/null +++ b/ext/File/Glob/TODO @@ -0,0 +1,21 @@ +Some issues left to take care of: + + o sane ~ handling on non-Unix platforms + + Currently on non-Unix, when the glob code encounters a tilde glob + (.e.g ~user/foo or ~/.cshrc), it simply returns that pattern + without doing any expansion (meaning perl will weed it out since a + file of that name isn't likely to exist). + + Please, if you have strong feelings about how tilde expansion + should be done on your favorite non-Unix platform(s), submit a + patch. + + o path separator handling + + Guido's code contains the assumption that the path separator is one + character (byte, probably) in length. Win32 doesn't object to the + true slash as a separator. I imagine MacPerl could change the SEP + cpp #define to ":". I have no idea what it is for VMS. Again, if + you have ideas and especially patches, please feel free to share + them. diff --git a/ext/File/Glob/bsd_glob.c b/ext/File/Glob/bsd_glob.c new file mode 100644 index 0000000000..38ace47ca5 --- /dev/null +++ b/ext/File/Glob/bsd_glob.c @@ -0,0 +1,856 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Guido van Rossum. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Clause 3 above should be considered "deleted in its entirety". + * For the actual notice of withdrawal, see: + * ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)glob.c 8.3 (Berkeley) 10/13/93"; +#endif /* LIBC_SCCS and not lint */ + +/* + * glob(3) -- a superset of the one defined in POSIX 1003.2. + * + * The [!...] convention to negate a range is supported (SysV, Posix, ksh). + * + * Optional extra services, controlled by flags not defined by POSIX: + * + * GLOB_QUOTE: + * Escaping convention: \ inhibits any special meaning the following + * character might have (except \ at end of string is retained). + * GLOB_MAGCHAR: + * Set in gl_flags if pattern contained a globbing character. + * GLOB_NOMAGIC: + * Same as GLOB_NOCHECK, but it will only append pattern if it did + * not contain any magic characters. [Used in csh style globbing] + * GLOB_ALTDIRFUNC: + * Use alternately specified directory access functions. + * GLOB_TILDE: + * expand ~user/foo to the /home/dir/of/user/foo + * GLOB_BRACE: + * expand {1,2}{a,b} to 1a 1b 2a 2b + * gl_matchc: + * Number of matches in the current invocation of glob. + */ + +#include <EXTERN.h> +#include <perl.h> +#include "bsd_glob.h" +#ifdef I_PWD +# include <pwd.h> +#else +#ifdef HAS_PASSWD + struct passwd *getpwnam(char *); + struct passwd *getpwuid(Uid_t); +#endif +#endif + +#ifndef MAXPATHLEN +# ifdef PATH_MAX +# define MAXPATHLEN PATH_MAX +# else +# define MAXPATHLEN 1024 +# endif +#endif + +#define DOLLAR '$' +#define DOT '.' +#define EOS '\0' +#define LBRACKET '[' +#define NOT '!' +#define QUESTION '?' +#define QUOTE '\\' +#define RANGE '-' +#define RBRACKET ']' +#define SEP '/' +#define STAR '*' +#define TILDE '~' +#define UNDERSCORE '_' +#define LBRACE '{' +#define RBRACE '}' +#define SLASH '/' +#define COMMA ',' + +#ifndef GLOB_DEBUG + +#define M_QUOTE 0x8000 +#define M_PROTECT 0x4000 +#define M_MASK 0xffff +#define M_ASCII 0x00ff + +typedef U16 Char; + +#else + +#define M_QUOTE 0x80 +#define M_PROTECT 0x40 +#define M_MASK 0xff +#define M_ASCII 0x7f + +typedef U8 Char; + +#endif /* !GLOB_DEBUG */ + + +#define CHAR(c) ((Char)((c)&M_ASCII)) +#define META(c) ((Char)((c)|M_QUOTE)) +#define M_ALL META('*') +#define M_END META(']') +#define M_NOT META('!') +#define M_ONE META('?') +#define M_RNG META('-') +#define M_SET META('[') +#define ismeta(c) (((c)&M_QUOTE) != 0) + + +static int compare(const void *, const void *); +static void g_Ctoc(const Char *, char *); +static int g_lstat(Char *, Stat_t *, glob_t *); +static DIR *g_opendir(Char *, glob_t *); +static Char *g_strchr(Char *, int); +#ifdef notdef +static Char *g_strcat(Char *, const Char *); +#endif +static int g_stat(Char *, Stat_t *, glob_t *); +static int glob0(const Char *, glob_t *); +static int glob1(Char *, glob_t *); +static int glob2(Char *, Char *, Char *, glob_t *); +static int glob3(Char *, Char *, Char *, Char *, glob_t *); +static int globextend(const Char *, glob_t *); +static const Char * globtilde(const Char *, Char *, glob_t *); +static int globexp1(const Char *, glob_t *); +static int globexp2(const Char *, const Char *, glob_t *, int *); +static int match(Char *, Char *, Char *); +#ifdef GLOB_DEBUG +static void qprintf(const char *, Char *); +#endif /* GLOB_DEBUG */ + +int +bsd_glob(const char *pattern, int flags, + int (*errfunc)(const char *, int), glob_t *pglob) +{ + const U8 *patnext; + int c; + Char *bufnext, *bufend, patbuf[MAXPATHLEN+1]; + + patnext = (U8 *) pattern; + if (!(flags & GLOB_APPEND)) { + pglob->gl_pathc = 0; + pglob->gl_pathv = NULL; + if (!(flags & GLOB_DOOFFS)) + pglob->gl_offs = 0; + } + pglob->gl_flags = flags & ~GLOB_MAGCHAR; + pglob->gl_errfunc = errfunc; + pglob->gl_matchc = 0; + + bufnext = patbuf; + bufend = bufnext + MAXPATHLEN; + if (flags & GLOB_QUOTE) { + /* Protect the quoted characters. */ + while (bufnext < bufend && (c = *patnext++) != EOS) + if (c == QUOTE) { + if ((c = *patnext++) == EOS) { + c = QUOTE; + --patnext; + } + *bufnext++ = c | M_PROTECT; + } + else + *bufnext++ = c; + } + else + while (bufnext < bufend && (c = *patnext++) != EOS) + *bufnext++ = c; + *bufnext = EOS; + + if (flags & GLOB_BRACE) + return globexp1(patbuf, pglob); + else + return glob0(patbuf, pglob); +} + +/* + * Expand recursively a glob {} pattern. When there is no more expansion + * invoke the standard globbing routine to glob the rest of the magic + * characters + */ +static int globexp1(const Char *pattern, glob_t *pglob) +{ + const Char* ptr = pattern; + int rv; + + /* Protect a single {}, for find(1), like csh */ + if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS) + return glob0(pattern, pglob); + + while ((ptr = (const Char *) g_strchr((Char *) ptr, LBRACE)) != NULL) + if (!globexp2(ptr, pattern, pglob, &rv)) + return rv; + + return glob0(pattern, pglob); +} + + +/* + * Recursive brace globbing helper. Tries to expand a single brace. + * If it succeeds then it invokes globexp1 with the new pattern. + * If it fails then it tries to glob the rest of the pattern and returns. + */ +static int globexp2(const Char *ptr, const Char *pattern, + glob_t *pglob, int *rv) +{ + int i; + Char *lm, *ls; + const Char *pe, *pm, *pl; + Char patbuf[MAXPATHLEN + 1]; + + /* copy part up to the brace */ + for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++) + continue; + ls = lm; + + /* Find the balanced brace */ + for (i = 0, pe = ++ptr; *pe; pe++) + if (*pe == LBRACKET) { + /* Ignore everything between [] */ + for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++) + continue; + if (*pe == EOS) { + /* + * We could not find a matching RBRACKET. + * Ignore and just look for RBRACE + */ + pe = pm; + } + } + else if (*pe == LBRACE) + i++; + else if (*pe == RBRACE) { + if (i == 0) + break; + i--; + } + + /* Non matching braces; just glob the pattern */ + if (i != 0 || *pe == EOS) { + *rv = glob0(patbuf, pglob); + return 0; + } + + for (i = 0, pl = pm = ptr; pm <= pe; pm++) + switch (*pm) { + case LBRACKET: + /* Ignore everything between [] */ + for (pl = pm++; *pm != RBRACKET && *pm != EOS; pm++) + continue; + if (*pm == EOS) { + /* + * We could not find a matching RBRACKET. + * Ignore and just look for RBRACE + */ + pm = pl; + } + break; + + case LBRACE: + i++; + break; + + case RBRACE: + if (i) { + i--; + break; + } + /* FALLTHROUGH */ + case COMMA: + if (i && *pm == COMMA) + break; + else { + /* Append the current string */ + for (lm = ls; (pl < pm); *lm++ = *pl++) + continue; + /* + * Append the rest of the pattern after the + * closing brace + */ + for (pl = pe + 1; (*lm++ = *pl++) != EOS;) + continue; + + /* Expand the current pattern */ +#ifdef GLOB_DEBUG + qprintf("globexp2:", patbuf); +#endif /* GLOB_DEBUG */ + *rv = globexp1(patbuf, pglob); + + /* move after the comma, to the next string */ + pl = pm + 1; + } + break; + + default: + break; + } + *rv = 0; + return 0; +} + + + +/* + * expand tilde from the passwd file. + */ +static const Char * +globtilde(const Char *pattern, Char *patbuf, glob_t *pglob) +{ + struct passwd *pwd; + char *h; + const Char *p; + Char *b; + + if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE)) + return pattern; + + /* Copy up to the end of the string or / */ + for (p = pattern + 1, h = (char *) patbuf; *p && *p != SLASH; + *h++ = *p++) + continue; + + *h = EOS; + + if (((char *) patbuf)[0] == EOS) { + /* + * handle a plain ~ or ~/ by expanding $HOME + * first and then trying the password file + */ + if ((h = getenv("HOME")) == NULL) { +#ifdef HAS_PASSWD + if ((pwd = getpwuid(getuid())) == NULL) + return pattern; + else + h = pwd->pw_dir; +#else + return pattern; +#endif + } + } + else { + /* + * Expand a ~user + */ +#ifdef HAS_PASSWD + if ((pwd = getpwnam((char*) patbuf)) == NULL) + return pattern; + else + h = pwd->pw_dir; +#else + return pattern; +#endif + } + + /* Copy the home directory */ + for (b = patbuf; *h; *b++ = *h++) + continue; + + /* Append the rest of the pattern */ + while ((*b++ = *p++) != EOS) + continue; + + return patbuf; +} + + +/* + * The main glob() routine: compiles the pattern (optionally processing + * quotes), calls glob1() to do the real pattern matching, and finally + * sorts the list (unless unsorted operation is requested). Returns 0 + * if things went well, nonzero if errors occurred. It is not an error + * to find no matches. + */ +static int +glob0(const Char *pattern, glob_t *pglob) +{ + const Char *qpat, *qpatnext; + int c, err, oldflags, oldpathc; + Char *bufnext, patbuf[MAXPATHLEN+1]; + + qpat = globtilde(pattern, patbuf, pglob); + qpatnext = qpat; + oldflags = pglob->gl_flags; + oldpathc = pglob->gl_pathc; + bufnext = patbuf; + + /* We don't need to check for buffer overflow any more. */ + while ((c = *qpatnext++) != EOS) { + switch (c) { + case LBRACKET: + c = *qpatnext; + if (c == NOT) + ++qpatnext; + if (*qpatnext == EOS || + g_strchr((Char *) qpatnext+1, RBRACKET) == NULL) { + *bufnext++ = LBRACKET; + if (c == NOT) + --qpatnext; + break; + } + *bufnext++ = M_SET; + if (c == NOT) + *bufnext++ = M_NOT; + c = *qpatnext++; + do { + *bufnext++ = CHAR(c); + if (*qpatnext == RANGE && + (c = qpatnext[1]) != RBRACKET) { + *bufnext++ = M_RNG; + *bufnext++ = CHAR(c); + qpatnext += 2; + } + } while ((c = *qpatnext++) != RBRACKET); + pglob->gl_flags |= GLOB_MAGCHAR; + *bufnext++ = M_END; + break; + case QUESTION: + pglob->gl_flags |= GLOB_MAGCHAR; + *bufnext++ = M_ONE; + break; + case STAR: + pglob->gl_flags |= GLOB_MAGCHAR; + /* collapse adjacent stars to one, + * to avoid exponential behavior + */ + if (bufnext == patbuf || bufnext[-1] != M_ALL) + *bufnext++ = M_ALL; + break; + default: + *bufnext++ = CHAR(c); + break; + } + } + *bufnext = EOS; +#ifdef GLOB_DEBUG + qprintf("glob0:", patbuf); +#endif /* GLOB_DEBUG */ + + if ((err = glob1(patbuf, pglob)) != 0) { + pglob->gl_flags = oldflags; + return(err); + } + + /* + * If there was no match we are going to append the pattern + * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified + * and the pattern did not contain any magic characters + * GLOB_NOMAGIC is there just for compatibility with csh. + */ + if (pglob->gl_pathc == oldpathc && + ((pglob->gl_flags & GLOB_NOCHECK) || + ((pglob->gl_flags & GLOB_NOMAGIC) && + !(pglob->gl_flags & GLOB_MAGCHAR)))) + { +#ifdef GLOB_DEBUG + printf("calling globextend from glob0\n"); +#endif /* GLOB_DEBUG */ + pglob->gl_flags = oldflags; + return(globextend(qpat, pglob)); + } + else if (!(pglob->gl_flags & GLOB_NOSORT)) + qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, + pglob->gl_pathc - oldpathc, sizeof(char *), compare); + pglob->gl_flags = oldflags; + return(0); +} + +static int +compare(const void *p, const void *q) +{ + return(strcmp(*(char **)p, *(char **)q)); +} + +static int +glob1(Char *pattern, glob_t *pglob) +{ + Char pathbuf[MAXPATHLEN+1]; + + /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ + if (*pattern == EOS) + return(0); + return(glob2(pathbuf, pathbuf, pattern, pglob)); +} + +/* + * The functions glob2 and glob3 are mutually recursive; there is one level + * of recursion for each segment in the pattern that contains one or more + * meta characters. + */ +static int +glob2(Char *pathbuf, Char *pathend, Char *pattern, glob_t *pglob) +{ + Stat_t sb; + Char *p, *q; + int anymeta; + + /* + * Loop over pattern segments until end of pattern or until + * segment with meta character found. + */ + for (anymeta = 0;;) { + if (*pattern == EOS) { /* End of pattern? */ + *pathend = EOS; + +#ifdef HAS_LSTAT + if (g_lstat(pathbuf, &sb, pglob)) + return(0); +#endif /* HAS_LSTAT */ + + if (((pglob->gl_flags & GLOB_MARK) && + pathend[-1] != SEP) && (S_ISDIR(sb.st_mode) + || (S_ISLNK(sb.st_mode) && + (g_stat(pathbuf, &sb, pglob) == 0) && + S_ISDIR(sb.st_mode)))) { + *pathend++ = SEP; + *pathend = EOS; + } + ++pglob->gl_matchc; +#ifdef GLOB_DEBUG + printf("calling globextend from glob2\n"); +#endif /* GLOB_DEBUG */ + return(globextend(pathbuf, pglob)); + } + + /* Find end of next segment, copy tentatively to pathend. */ + q = pathend; + p = pattern; + while (*p != EOS && *p != SEP) { + if (ismeta(*p)) + anymeta = 1; + *q++ = *p++; + } + + if (!anymeta) { /* No expansion, do next segment. */ + pathend = q; + pattern = p; + while (*pattern == SEP) + *pathend++ = *pattern++; + } else /* Need expansion, recurse. */ + return(glob3(pathbuf, pathend, pattern, p, pglob)); + } + /* NOTREACHED */ +} + +static int +glob3(Char *pathbuf, Char *pathend, Char *pattern, + Char *restpattern, glob_t *pglob) +{ + register Direntry_t *dp; + DIR *dirp; + int err; + char buf[MAXPATHLEN]; + + /* + * The readdirfunc declaration can't be prototyped, because it is + * assigned, below, to two functions which are prototyped in glob.h + * and dirent.h as taking pointers to differently typed opaque + * structures. + */ + Direntry_t *(*readdirfunc)(); + + *pathend = EOS; + errno = 0; + + if ((dirp = g_opendir(pathbuf, pglob)) == NULL) { + /* TODO: don't call for ENOENT or ENOTDIR? */ + if (pglob->gl_errfunc) { + g_Ctoc(pathbuf, buf); + if (pglob->gl_errfunc(buf, errno) || + (pglob->gl_flags & GLOB_ERR)) + return (GLOB_ABEND); + } + return(0); + } + + err = 0; + + /* Search directory for matching names. */ + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + readdirfunc = pglob->gl_readdir; + else + readdirfunc = readdir; + while ((dp = (*readdirfunc)(dirp))) { + register U8 *sc; + register Char *dc; + + /* Initial DOT must be matched literally. */ + if (dp->d_name[0] == DOT && *pattern != DOT) + continue; + for (sc = (U8 *) dp->d_name, dc = pathend; + (*dc++ = *sc++) != EOS;) + continue; + if (!match(pathend, pattern, restpattern)) { + *pathend = EOS; + continue; + } + err = glob2(pathbuf, --dc, restpattern, pglob); + if (err) + break; + } + + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + (*pglob->gl_closedir)(dirp); + else + closedir(dirp); + return(err); +} + + +/* + * Extend the gl_pathv member of a glob_t structure to accomodate a new item, + * add the new item, and update gl_pathc. + * + * This assumes the BSD realloc, which only copies the block when its size + * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic + * behavior. + * + * Return 0 if new item added, error code if memory couldn't be allocated. + * + * Invariant of the glob_t structure: + * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and + * gl_pathv points to (gl_offs + gl_pathc + 1) items. + */ +static int +globextend(const Char *path, glob_t *pglob) +{ + register char **pathv; + register int i; + Size_t newsize; + char *copy; + const Char *p; + +#ifdef GLOB_DEBUG + printf("Adding "); + for (p = path; *p; p++) + (void)printf("%c", CHAR(*p)); + printf("\n"); +#endif GLOB_DEBUG + + newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); + pathv = pglob->gl_pathv ? + realloc((char *)pglob->gl_pathv, newsize) : + malloc(newsize); + if (pathv == NULL) + return(GLOB_NOSPACE); + + if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { + /* first time around -- clear initial gl_offs items */ + pathv += pglob->gl_offs; + for (i = pglob->gl_offs; --i >= 0; ) + *--pathv = NULL; + } + pglob->gl_pathv = pathv; + + for (p = path; *p++;) + continue; + if ((copy = malloc(p - path)) != NULL) { + g_Ctoc(path, copy); + pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; + } + pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; + return(copy == NULL ? GLOB_NOSPACE : 0); +} + + +/* + * pattern matching function for filenames. Each occurrence of the * + * pattern causes a recursion level. + */ +static int +match(register Char *name, register Char *pat, register Char *patend) +{ + int ok, negate_range; + Char c, k; + + while (pat < patend) { + c = *pat++; + switch (c & M_MASK) { + case M_ALL: + if (pat == patend) + return(1); + do + if (match(name, pat, patend)) + return(1); + while (*name++ != EOS); + return(0); + case M_ONE: + if (*name++ == EOS) + return(0); + break; + case M_SET: + ok = 0; + if ((k = *name++) == EOS) + return(0); + if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS) + ++pat; + while (((c = *pat++) & M_MASK) != M_END) + if ((*pat & M_MASK) == M_RNG) { + if (c <= k && k <= pat[1]) + ok = 1; + pat += 2; + } else if (c == k) + ok = 1; + if (ok == negate_range) + return(0); + break; + default: + if (*name++ != c) + return(0); + break; + } + } + return(*name == EOS); +} + +/* Free allocated data belonging to a glob_t structure. */ +void +bsd_globfree(glob_t *pglob) +{ + register int i; + register char **pp; + + if (pglob->gl_pathv != NULL) { + pp = pglob->gl_pathv + pglob->gl_offs; + for (i = pglob->gl_pathc; i--; ++pp) + if (*pp) + free(*pp); + free(pglob->gl_pathv); + } +} + +static DIR * +g_opendir(register Char *str, glob_t *pglob) +{ + char buf[MAXPATHLEN]; + + if (!*str) + strcpy(buf, "."); + else + g_Ctoc(str, buf); + + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + return((*pglob->gl_opendir)(buf)); + + return(opendir(buf)); +} + +#ifdef HAS_LSTAT +static int +g_lstat(register Char *fn, Stat_t *sb, glob_t *pglob) +{ + char buf[MAXPATHLEN]; + + g_Ctoc(fn, buf); + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + return((*pglob->gl_lstat)(buf, sb)); + return(lstat(buf, sb)); +} +#endif /* HAS_LSTAT */ + +static int +g_stat(register Char *fn, Stat_t *sb, glob_t *pglob) +{ + char buf[MAXPATHLEN]; + + g_Ctoc(fn, buf); + if (pglob->gl_flags & GLOB_ALTDIRFUNC) + return((*pglob->gl_stat)(buf, sb)); + return(stat(buf, sb)); +} + +static Char * +g_strchr(Char *str, int ch) +{ + do { + if (*str == ch) + return (str); + } while (*str++); + return (NULL); +} + +#ifdef notdef +static Char * +g_strcat(Char *dst, const Char *src) +{ + Char *sdst = dst; + + while (*dst++) + continue; + --dst; + while((*dst++ = *src++) != EOS) + continue; + + return (sdst); +} +#endif + +static void +g_Ctoc(register const Char *str, char *buf) +{ + register char *dc; + + for (dc = buf; (*dc++ = *str++) != EOS;) + continue; +} + +#ifdef GLOB_DEBUG +static void +qprintf(const char *str, register Char *s) +{ + register Char *p; + + (void)printf("%s:\n", str); + for (p = s; *p; p++) + (void)printf("%c", CHAR(*p)); + (void)printf("\n"); + for (p = s; *p; p++) + (void)printf("%c", *p & M_PROTECT ? '"' : ' '); + (void)printf("\n"); + for (p = s; *p; p++) + (void)printf("%c", ismeta(*p) ? '_' : ' '); + (void)printf("\n"); +} +#endif /* GLOB_DEBUG */ diff --git a/ext/File/Glob/bsd_glob.h b/ext/File/Glob/bsd_glob.h new file mode 100644 index 0000000000..625adfdc64 --- /dev/null +++ b/ext/File/Glob/bsd_glob.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Guido van Rossum. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)glob.h 8.1 (Berkeley) 6/2/93 + */ + +/* + * Clause 3 above should be considered "deleted in its entirety". + * For the actual notice of withdrawal, see: + * ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change + */ + +#ifndef _BSD_GLOB_H_ +#define _BSD_GLOB_H_ + +/* #include <sys/cdefs.h> */ + +typedef struct { + int gl_pathc; /* Count of total paths so far. */ + int gl_matchc; /* Count of paths matching pattern. */ + int gl_offs; /* Reserved at beginning of gl_pathv. */ + int gl_flags; /* Copy of flags parameter to glob. */ + char **gl_pathv; /* List of paths matching pattern. */ + /* Copy of errfunc parameter to glob. */ + int (*gl_errfunc)(const char *, int); + + /* + * Alternate filesystem access methods for glob; replacement + * versions of closedir(3), readdir(3), opendir(3), stat(2) + * and lstat(2). + */ + void (*gl_closedir)(void *); + Direntry_t *(*gl_readdir)(void *); + void *(*gl_opendir)(const char *); + int (*gl_lstat)(const char *, Stat_t *); + int (*gl_stat)(const char *, Stat_t *); +} glob_t; + +#define GLOB_APPEND 0x0001 /* Append to output from previous call. */ +#define GLOB_DOOFFS 0x0002 /* Use gl_offs. */ +#define GLOB_ERR 0x0004 /* Return on error. */ +#define GLOB_MARK 0x0008 /* Append / to matching directories. */ +#define GLOB_NOCHECK 0x0010 /* Return pattern itself if nothing matches. */ +#define GLOB_NOSORT 0x0020 /* Don't sort. */ + +#define GLOB_ALTDIRFUNC 0x0040 /* Use alternately specified directory funcs. */ +#define GLOB_BRACE 0x0080 /* Expand braces ala csh. */ +#define GLOB_MAGCHAR 0x0100 /* Pattern had globbing characters. */ +#define GLOB_NOMAGIC 0x0200 /* GLOB_NOCHECK without magic chars (csh). */ +#define GLOB_QUOTE 0x0400 /* Quote special chars with \. */ +#define GLOB_TILDE 0x0800 /* Expand tilde names from the passwd file. */ + +#define GLOB_NOSPACE (-1) /* Malloc call failed. */ +#define GLOB_ABEND (-2) /* Unignored error. */ + +int bsd_glob(const char *, int, int (*)(const char *, int), glob_t *); +void bsd_globfree(glob_t *); + +#endif /* !_BSD_GLOB_H_ */ |