diff options
author | Gurusamy Sarathy <gsar@cpan.org> | 1999-12-09 00:36:24 +0000 |
---|---|---|
committer | Gurusamy Sarathy <gsar@cpan.org> | 1999-12-09 00:36:24 +0000 |
commit | 81793b9077171abb50d56e2bdf5d4208e13a783d (patch) | |
tree | e2beb7ba49cc916d7f886e228522d86158ce557e /lib/File | |
parent | 2d6d9f7a85258c4d9bd37d884754ead5775be3b9 (diff) | |
download | perl-81793b9077171abb50d56e2bdf5d4208e13a783d.tar.gz |
newer version of File::Find with support for following symlinks and
other features, from Helmut Jarausch <jarausch@igpm.rwth-aachen.de>
p4raw-id: //depot/perl@4671
Diffstat (limited to 'lib/File')
-rw-r--r-- | lib/File/Find.pm | 706 |
1 files changed, 576 insertions, 130 deletions
diff --git a/lib/File/Find.pm b/lib/File/Find.pm index 28e2e90e44..56ab7980ec 100644 --- a/lib/File/Find.pm +++ b/lib/File/Find.pm @@ -12,70 +12,159 @@ finddepth - traverse a directory structure depth-first =head1 SYNOPSIS use File::Find; - find(\&wanted, '/foo','/bar'); + find(\&wanted, '/foo', '/bar'); sub wanted { ... } use File::Find; - finddepth(\&wanted, '/foo','/bar'); + finddepth(\&wanted, '/foo', '/bar'); sub wanted { ... } + + use File::Find; + find({ wanted => \&process, follow => 1 }, '.'); =head1 DESCRIPTION The first argument to find() is either a hash reference describing the -operations to be performed for each file, a code reference, or a string -that contains a subroutine name. If it is a hash reference, then the -value for the key C<wanted> should be a code reference. This code -reference is called I<the wanted() function> below. +operations to be performed for each file, or a code reference. -Currently the only other supported key for the above hash is -C<bydepth>, in presense of which the walk over directories is -performed depth-first. Entry point finddepth() is a shortcut for -specifying C<{ bydepth => 1}> in the first argument of find(). +Here are the possible keys for the hash: + +=over 3 + +=item C<wanted> + +The value should be a code reference. This code reference is called +I<the wanted() function> below. + +=item C<bydepth> + +Reports the name of a directory only AFTER all its entries +have been reported. Entry point finddepth() is a shortcut for +specifying C<{ bydepth => 1 }> in the first argument of find(). + +=item C<follow> + +Causes symbolic links to be followed. Since directory trees with symbolic +links (followed) may contain files more than once and may even have +cycles, a hash has to be built up with an entry for each file. +This might be expensive both in space and time for a large +directory tree. See I<follow_fast> and I<follow_skip> below. +If either I<follow> or I<follow_fast> is in effect: + +=over 6 + +=item + +It is guarantueed that an I<lstat> has been called before the user's +I<wanted()> function is called. This enables fast file checks involving S< _>. + +=item + +There is a variable C<$File::Find::fullname> which holds the absolute +pathname of the file with all symbolic links resolved + +=back + +=item C<follow_fast> + +This is similar to I<follow> except that it may report some files +more than once. It does detect cycles however. +Since only symbolic links have to be hashed, this is +much cheaper both in space and time. +If processing a file more than once (by the user's I<wanted()> function) +is worse than just taking time, the option I<follow> should be used. + +=item C<follow_skip> + +C<follow_skip==1>, which is the default, causes all files which are +neither directories nor symbolic links to be ignored if they are about +to be processed a second time. If a directory or a symbolic link +are about to be processed a second time, File::Find dies. +C<follow_skip==0> causes File::Find to die if any file is about to be +processed a second time. +C<follow_skip==2> causes File::Find to ignore any duplicate files and +dirctories but to proceed normally otherwise. -The wanted() function does whatever verifications you want. -$File::Find::dir contains the current directory name, and $_ the -current filename within that directory. $File::Find::name contains -C<"$File::Find::dir/$_">. You are chdir()'d to $File::Find::dir when -the function is called. The function may set $File::Find::prune to -prune the tree. -File::Find assumes that you don't alter the $_ variable. If you do then -make sure you return it to its original value before exiting your function. +=item C<no_chdir> + +Does not C<chdir()> to each directory as it recurses. The wanted() +function will need to be aware of this, of course. In this case, +C<$_> will be the same as C<$File::Find::name>. + +=item C<untaint> + +If find is used in taint-mode (-T command line switch or if EUID != UID +or if EGID != GID) then internally directory names have to be untainted +before they can be cd'ed to. Therefore they are checked against a regular +expression I<untaint_pattern>. Note, that all names passed to the +user's I<wanted()> function are still tainted. + +=item C<untaint_pattern> + +See above. This should be set using the C<qr> quoting operator. +The default is set to C<qr|^([-+@\w./]+)$|>. +Note that the paranthesis which are vital. + +=item C<untaint_skip> + +If set, directories (subtrees) which fail the I<untaint_pattern> +are skipped. The default is to 'die' in such a case. + +=back + +The wanted() function does whatever verifications you want. +C<$File::Find::dir> contains the current directory name, and C<$_> the +current filename within that directory. C<$File::Find::name> contains +the complete pathname to the file. You are chdir()'d to C<$File::Find::dir> when +the function is called, unless C<no_chdir> was specified. +When <follow> or <follow_fast> are in effect there is also a +C<$File::Find::fullname>. +The function may set C<$File::Find::prune> to prune the tree +unless C<bydepth> was specified. This library is useful for the C<find2perl> tool, which when fed, find2perl / -name .nfs\* -mtime +7 \ - -exec rm -f {} \; -o -fstype nfs -prune + -exec rm -f {} \; -o -fstype nfs -prune produces something like: sub wanted { /^\.nfs.*$/ && - (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && + (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) && int(-M _) > 7 && unlink($_) || - ($nlink || (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_))) && + ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) && $dev < 0 && ($File::Find::prune = 1); } -Set the variable $File::Find::dont_use_nlink if you're using AFS, +Set the variable C<$File::Find::dont_use_nlink> if you're using AFS, since AFS cheats. -C<finddepth> is just like C<find>, except that it does a depth-first -search. Here's another interesting wanted function. It will find all symlinks that don't resolve: sub wanted { - -l && !-e && print "bogus link: $File::Find::name\n"; + -l && !-e && print "bogus link: $File::Find::name\n"; } -=head1 BUGS +See also the script C<pfind> on CPAN for a nice application of this +module. + +=head1 CAVEAT + +Be aware that the option to follow symblic links can be dangerous. +Depending on the structure of the directory tree (including symbolic +links to directories) you might traverse a given (physical) directory +more than once (only if C<follow_fast> is in effect). +Furthermore, deleting or changing files in a symbolically linked directory +might cause very unpleasant surprises, since you delete or change files +in an unknown directory. -There is no way to make find or finddepth follow symlinks. =cut @@ -83,151 +172,508 @@ There is no way to make find or finddepth follow symlinks. @EXPORT = qw(find finddepth); -sub find_opt { - my $wanted = shift; - my $bydepth = $wanted->{bydepth}; - my $cwd = $bydepth ? Cwd::fastcwd() : Cwd::cwd(); - # Localize these rather than lexicalizing them for backwards - # compatibility. - local($topdir,$topdev,$topino,$topmode,$topnlink); - foreach $topdir (@_) { - (($topdev,$topino,$topmode,$topnlink) = - ($Is_VMS ? stat($topdir) : lstat($topdir))) - || (warn("Can't stat $topdir: $!\n"), next); - if (-d _) { - if (chdir($topdir)) { - $prune = 0; - unless ($bydepth) { - ($dir,$_) = ($topdir,'.'); - $name = $topdir; - $wanted->{wanted}->(); - } - next if $prune; - my $fixtopdir = $topdir; - $fixtopdir =~ s,/$,, ; - $fixtopdir =~ s/\.dir$// if $Is_VMS; - &finddir($wanted,$fixtopdir,$topnlink, $bydepth); - if ($bydepth) { - ($dir,$_) = ($fixtopdir,'.'); - $name = $fixtopdir; - $wanted->{wanted}->(); - } +use strict; +my $Is_VMS; + +require File::Basename; + +my %SLnkSeen; +my ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, + $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat); + +sub contract_name { + my ($cdir,$fn) = @_; + + return substr($cdir,0,rindex($cdir,'/')) if $fn eq '.'; + + $cdir = substr($cdir,0,rindex($cdir,'/')+1); + + $fn =~ s|^\./||; + + my $abs_name= $cdir . $fn; + + if (substr($fn,0,3) eq '../') { + do 1 while ($abs_name=~ s|/(?>[^/]+)/\.\./|/|); + } + + return $abs_name; +} + + +sub PathCombine($$) { + my ($Base,$Name) = @_; + my $AbsName; + + if (substr($Name,0,1) eq '/') { + $AbsName= $Name; + } + else { + $AbsName= contract_name($Base,$Name); + } + + # (simple) check for recursion + my $newlen= length($AbsName); + if ($newlen <= length($Base)) { + if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/') + && $AbsName eq substr($Base,0,$newlen)) + { + return undef; + } + } + return $AbsName; +} + +sub Follow_SymLink($) { + my ($AbsName) = @_; + + my ($NewName,$DEV, $INO); + ($DEV, $INO)= lstat $AbsName; + + while (-l _) { + if ($SLnkSeen{$DEV, $INO}++) { + if ($follow_skip < 2) { + die "$AbsName is encountered a second time"; } else { - warn "Can't cd to $topdir: $!\n"; + return undef; } } - else { - require File::Basename; - unless (($_,$dir) = File::Basename::fileparse($topdir)) { - ($dir,$_) = ('.', $topdir); + $NewName= PathCombine($AbsName, readlink($AbsName)); + unless(defined $NewName) { + if ($follow_skip < 2) { + die "$AbsName is a recursive symbolic link"; + } + else { + return undef; } - if (chdir($dir)) { - $name = $topdir; - $wanted->{wanted}->(); + } + else { + $AbsName= $NewName; + } + ($DEV, $INO) = lstat($AbsName); + return undef unless defined $DEV; # dangling symbolic link + } + + if ($full_check && $SLnkSeen{$DEV, $INO}++) { + if ($follow_skip < 1) { + die "$AbsName encountered a second time"; + } + else { + return undef; + } + } + + return $AbsName; +} + +use vars qw/ $dir $name $fullname $prune /; +sub _find_dir_symlnk($$$); +sub _find_dir($$$); + +sub _find_opt { + my $wanted = shift; + die "invalid top directory" unless defined $_[0]; + + my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::cwd(); + my $cwd_untainted = $cwd; + $wanted_callback = $wanted->{wanted}; + $bydepth = $wanted->{bydepth}; + $no_chdir = $wanted->{no_chdir}; + $full_check = $wanted->{follow}; + $follow = $full_check || $wanted->{follow_fast}; + $follow_skip = $wanted->{follow_skip}; + $untaint = $wanted->{untaint}; + $untaint_pat = $wanted->{untaint_pattern}; + $untaint_skip = $wanted->{untaint_skip}; + + + # a symbolic link to a directory doesn't increase the link count + $avoid_nlink = $follow || $File::Find::dont_use_nlink; + + if ( $untaint ) { + $cwd_untainted= $1 if $cwd_untainted =~ m|$untaint_pat|; + die "insecure cwd in find(depth)" unless defined($cwd_untainted); + } + + my ($abs_dir, $nlink, $Is_Dir); + + Proc_Top_Item: + foreach my $TOP (@_) { + my $top_item = $TOP; + $top_item =~ s|/$||; + $Is_Dir= 0; + + if ($follow) { + if (substr($top_item,0,1) eq '/') { + $abs_dir = $top_item; + } + elsif ($top_item eq '.') { + $abs_dir = $cwd; } + else { # care about any ../ + $abs_dir = contract_name("$cwd/",$top_item); + } + $abs_dir= Follow_SymLink($abs_dir); + unless (defined $abs_dir) { + warn "$top_item is a dangling symbolic link\n"; + next Proc_Top_Item; + } + if (-d _) { + _find_dir_symlnk($wanted, $abs_dir, $top_item); + $Is_Dir= 1; + } + } + else { # no follow + $nlink = (lstat $top_item)[3]; + unless (defined $nlink) { + warn "Can't stat $top_item: $!\n"; + next Proc_Top_Item; + } + if (-d _) { + $top_item =~ s/\.dir$// if $Is_VMS; + _find_dir($wanted, $top_item, $nlink); + $Is_Dir= 1; + } else { - warn "Can't cd to $dir: $!\n"; + $abs_dir= $top_item; + } + } + + unless ($Is_Dir) { + unless (($_,$dir) = File::Basename::fileparse($abs_dir)) { + ($dir,$_) = ('.', $top_item); + } + + $abs_dir = $dir; + if ($untaint) { + my $abs_dir_save = $abs_dir; + $abs_dir = $1 if $abs_dir =~ m|$untaint_pat|; + unless (defined $abs_dir) { + if ($untaint_skip == 0) { + die "directory $abs_dir_save is still tainted"; + } + else { + next Proc_Top_Item; + } + } + } + + unless ($no_chdir or chdir $abs_dir) { + warn "Couldn't chdir $abs_dir: $!\n"; + next Proc_Top_Item; + } + + $name = $abs_dir; + + &$wanted_callback; + + } + + $no_chdir or chdir $cwd_untainted; + } +} + +# API: +# $wanted +# $p_dir : "parent directory" +# $nlink : what came back from the stat +# preconditions: +# chdir (if not no_chdir) to dir + +sub _find_dir($$$) { + my ($wanted, $p_dir, $nlink) = @_; + my ($CdLvl,$Level) = (0,0); + my @Stack; + my @filenames; + my ($subcount,$sub_nlink); + my $SE= []; + my $dir_name= $p_dir; + my $dir_rel= '.'; # directory name relative to current directory + + local ($dir, $name, $prune, *DIR); + + unless ($no_chdir or $p_dir eq '.') { + my $udir = $p_dir; + if ($untaint) { + $udir = $1 if $p_dir =~ m|$untaint_pat|; + unless (defined $udir) { + if ($untaint_skip == 0) { + die "directory $p_dir is still tainted"; + } + else { + return; + } } } + unless (chdir $udir) { + warn "Can't cd to $udir: $!\n"; + return; + } + } + + while (defined $SE) { + unless ($bydepth) { + $dir= $p_dir; + $name= $dir_name; + $_= ($no_chdir ? $dir_name : $dir_rel ); + # prune may happen here + $prune= 0; + &$wanted_callback; + next if $prune; + } + + # change to that directory + unless ($no_chdir or $dir_rel eq '.') { + my $udir= $dir_rel; + if ($untaint) { + $udir = $1 if $dir_rel =~ m|$untaint_pat|; + unless (defined $udir) { + if ($untaint_skip == 0) { + die "directory ($p_dir/) $dir_rel is still tainted"; + } + } + } + unless (chdir $udir) { + warn "Can't cd to ($p_dir/) $udir : $!\n"; + next; + } + $CdLvl++; + } + + $dir= $dir_name; + + # Get the list of files in the current directory. + unless (opendir DIR, ($no_chdir ? $dir_name : '.')) { + warn "Can't opendir($dir_name): $!\n"; + next; + } + @filenames = readdir DIR; + closedir(DIR); + + if ($nlink == 2 && !$avoid_nlink) { + # This dir has no subdirectories. + for my $FN (@filenames) { + next if $FN =~ /^\.{1,2}$/; + + $name = "$dir_name/$FN"; + $_ = ($no_chdir ? $name : $FN); + &$wanted_callback; + } + + } + else { + # This dir has subdirectories. + $subcount = $nlink - 2; + + for my $FN (@filenames) { + next if $FN =~ /^\.{1,2}$/; + if ($subcount > 0 || $avoid_nlink) { + # Seen all the subdirs? + # check for directoriness. + # stat is faster for a file in the current directory + $sub_nlink = (lstat ($no_chdir ? "$dir_name/$FN" : $FN))[3]; + + if (-d _) { + --$subcount; + $FN =~ s/\.dir$// if $Is_VMS; + push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink]; + } + else { + $name = "$dir_name/$FN"; + $_= ($no_chdir ? $name : $FN); + &$wanted_callback; + } + } + else { $name = "$dir_name/$FN"; + $_= ($no_chdir ? $name : $FN); + &$wanted_callback; + } + } + } + if ($bydepth) { + $name = $dir_name; + $dir = $p_dir; + $_ = ($no_chdir ? $dir_name : $dir_rel ); + &$wanted_callback; + } } continue { - chdir $cwd; + if ( defined ($SE = pop @Stack) ) { + ($Level, $p_dir, $dir_rel, $nlink) = @$SE; + if ($CdLvl > $Level && !$no_chdir) { + die "Can't cd to $dir_name" . '../' x ($CdLvl-$Level) + unless chdir '../' x ($CdLvl-$Level); + $CdLvl = $Level; + } + $dir_name = "$p_dir/$dir_rel"; + } } } -sub finddir { - my($wanted, $nlink, $bydepth); - local($dir, $name); - ($wanted, $dir, $nlink, $bydepth) = @_; - - my($dev, $ino, $mode, $subcount); - - # Get the list of files in the current directory. - opendir(DIR,'.') || (warn("Can't open $dir: $!\n"), $bydepth || return); - my(@filenames) = readdir(DIR); - closedir(DIR); - - if ($nlink == 2 && !$dont_use_nlink) { # This dir has no subdirectories. - for (@filenames) { - next if $_ eq '.'; - next if $_ eq '..'; - $name = "$dir/$_"; - $nlink = 0; - $wanted->{wanted}->(); - } - } - else { # This dir has subdirectories. - $subcount = $nlink - 2; - for (@filenames) { - next if $_ eq '.'; - next if $_ eq '..'; - $nlink = 0; - $prune = 0 unless $bydepth; - $name = "$dir/$_"; - $wanted->{wanted}->() unless $bydepth; - if ($subcount > 0 || $dont_use_nlink) { # Seen all the subdirs? - - # Get link count and check for directoriness. - - $_ = "" if (!defined($_)); - ($dev,$ino,$mode,$nlink) = ($Is_VMS ? stat($_) : lstat($_)); - # unless ($nlink || $dont_use_nlink); - - if (-d _) { - - # It really is a directory, so do it recursively. - - --$subcount; - next if $prune; - if (chdir $_) { - $name =~ s/\.dir$// if $Is_VMS; - &finddir($wanted,$name,$nlink, $bydepth); - chdir '..'; + +# API: +# $wanted +# $dir_loc : absolute location of a dir +# $p_dir : "parent directory" +# preconditions: +# chdir (if not no_chdir) to dir + +sub _find_dir_symlnk($$$) { + my ($wanted, $dir_loc, $p_dir) = @_; + my @Stack; + my @filenames; + my $new_loc; + my $SE = []; + my $dir_name = $p_dir; + my $dir_rel = '.'; # directory name relative to current directory + + local ($dir, $name, $fullname, $prune, *DIR); + + unless ($no_chdir or $p_dir eq '.') { + my $udir = $dir_loc; + if ($untaint) { + $udir = $1 if $dir_loc =~ m|$untaint_pat|; + unless (defined $udir) { + if ($untaint_skip == 0) { + die "directory $dir_loc is still tainted"; + } + else { + return; + } + } + } + unless (chdir $udir) { + warn "Can't cd to $udir: $!\n"; + return; + } + } + + while (defined $SE) { + + unless ($bydepth) { + $dir= $p_dir; + $name= $dir_name; + $_= ($no_chdir ? $dir_name : $dir_rel ); + $fullname= $dir_loc; + # prune may happen here + $prune= 0; + &$wanted_callback; + next if $prune; + } + + # change to that directory + unless ($no_chdir or $dir_rel eq '.') { + my $udir = $dir_loc; + if ($untaint) { + $udir = $1 if $dir_loc =~ m|$untaint_pat|; + unless (defined $udir ) { + if ($untaint_skip == 0) { + die "directory $dir_loc is still tainted"; } else { - warn "Can't cd to $_: $!\n"; + next; } } } - $wanted->{wanted}->() if $bydepth; + unless (chdir $udir) { + warn "Can't cd to $udir: $!\n"; + next; + } + } + + $dir = $dir_name; + + # Get the list of files in the current directory. + unless (opendir DIR, ($no_chdir ? $dir_loc : '.')) { + warn "Can't opendir($dir_loc): $!\n"; + next; + } + @filenames = readdir DIR; + closedir(DIR); + + for my $FN (@filenames) { + next if $FN =~ /^\.{1,2}$/; + + # follow symbolic links / do an lstat + $new_loc= Follow_SymLink("$dir_loc/$FN"); + + # ignore if invalid symlink + next unless defined $new_loc; + + if (-d _) { + push @Stack,[$new_loc,$dir_name,$FN]; + } + else { + $fullname = $new_loc; + $name = "$dir_name/$FN"; + $_ = ($no_chdir ? $name : $FN); + &$wanted_callback; + } + } + + if ($bydepth) { + $fullname = $dir_loc; + $name = $dir_name; + $_ = ($no_chdir ? $dir_name : $dir_rel); + &$wanted_callback; + } + } + continue { + if (defined($SE = pop @Stack)) { + ($dir_loc, $p_dir, $dir_rel) = @$SE; + $dir_name = "$p_dir/$dir_rel"; } } } + sub wrap_wanted { - my $wanted = shift; - ref($wanted) eq 'HASH' ? $wanted : { wanted => $wanted }; + my $wanted = shift; + if ( ref($wanted) eq 'HASH' ) { + if ( $wanted->{follow} || $wanted->{follow_fast}) { + $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip}; + } + if ( $wanted->{untaint} ) { + $wanted->{untaint_pattern} = qr|^([-+@\w./]+)$| + unless defined $wanted->{untaint_pattern}; + $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip}; + } + return $wanted; + } + else { + return { wanted => $wanted }; + } } sub find { - my $wanted = shift; - find_opt(wrap_wanted($wanted), @_); + my $wanted = shift; + _find_opt(wrap_wanted($wanted), @_); + %SLnkSeen= (); # free memory } sub finddepth { - my $wanted = wrap_wanted(shift); - $wanted->{bydepth} = 1; - find_opt($wanted, @_); + my $wanted = wrap_wanted(shift); + $wanted->{bydepth} = 1; + _find_opt($wanted, @_); + %SLnkSeen= (); # free memory } # These are hard-coded for now, but may move to hint files. if ($^O eq 'VMS') { - $Is_VMS = 1; - $dont_use_nlink = 1; + $Is_VMS = 1; + $File::Find::dont_use_nlink = 1; } -$dont_use_nlink = 1 +$File::Find::dont_use_nlink = 1 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32'; # Set dont_use_nlink in your hint file if your system's stat doesn't # report the number of links in a directory as an indication # of the number of files. # See, e.g. hints/machten.sh for MachTen 2.2. -unless ($dont_use_nlink) { - require Config; - $dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'}); +unless ($File::Find::dont_use_nlink) { + require Config; + $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'}); } 1; - |