diff options
author | Michael G. Schwern <schwern@pobox.com> | 2005-07-05 13:11:27 -0700 |
---|---|---|
committer | Steve Hay <SteveHay@planit.com> | 2005-07-06 09:36:08 +0000 |
commit | 767010ca49e6a0dff07d97842a8341decbed33d9 (patch) | |
tree | d2ea5a0130269e6f9eaa28dc63dc028a2d343a8f /lib/File | |
parent | c08507ee64e0d5a7fcd83abd77804ef8a1cd8098 (diff) | |
download | perl-767010ca49e6a0dff07d97842a8341decbed33d9.tar.gz |
File::Basename doc overhaul
Message-ID: <20050706031127.GL9227@windhund.schwern.org>
(with some minor changes)
p4raw-id: //depot/perl@25086
Diffstat (limited to 'lib/File')
-rw-r--r-- | lib/File/Basename.pm | 299 |
1 files changed, 156 insertions, 143 deletions
diff --git a/lib/File/Basename.pm b/lib/File/Basename.pm index 887c7bae4a..e6b16e7503 100644 --- a/lib/File/Basename.pm +++ b/lib/File/Basename.pm @@ -1,12 +1,6 @@ -package File::Basename; - =head1 NAME -fileparse - split a pathname into pieces - -basename - extract just the filename from a path - -dirname - extract just the directory from a path +File::Basename - Parse file paths into directory, filename and suffix. =head1 SYNOPSIS @@ -14,120 +8,25 @@ dirname - extract just the directory from a path ($name,$path,$suffix) = fileparse($fullname,@suffixlist); $name = fileparse($fullname,@suffixlist); - fileparse_set_fstype($os_string); + $basename = basename($fullname,@suffixlist); - $dirname = dirname($fullname); + $dirname = dirname($fullname); - ($name,$path,$suffix) = fileparse("lib/File/Basename.pm",qr{\.pm}); - fileparse_set_fstype("VMS"); - $basename = basename("lib/File/Basename.pm",".pm"); - $dirname = dirname("lib/File/Basename.pm"); =head1 DESCRIPTION -These routines allow you to parse file specifications into useful -pieces using the syntax of different operating systems. - -=over 4 - -=item fileparse_set_fstype - -You select the syntax via the routine fileparse_set_fstype(). - -If the argument passed to it contains one of the substrings -"VMS", "MSDOS", "MacOS", "AmigaOS" or "MSWin32", the file specification -syntax of that operating system is used in future calls to -fileparse(), basename(), and dirname(). If it contains none of -these substrings, Unix syntax is used. This pattern matching is -case-insensitive. If you've selected VMS syntax, and the file -specification you pass to one of these routines contains a "/", -they assume you are using Unix emulation and apply the Unix syntax -rules instead, for that function call only. - -If the argument passed to it contains one of the substrings "VMS", -"MSDOS", "MacOS", "AmigaOS", "os2", "MSWin32" or "RISCOS", then the pattern -matching for suffix removal is performed without regard for case, -since those systems are not case-sensitive when opening existing files -(though some of them preserve case on file creation). - -If you haven't called fileparse_set_fstype(), the syntax is chosen -by examining the builtin variable C<$^O> according to these rules. - -=item fileparse - -The fileparse() routine divides a file specification into three -parts: a leading B<path>, a file B<name>, and a B<suffix>. The -B<path> contains everything up to and including the last directory -separator in the input file specification. The remainder of the input -file specification is then divided into B<name> and B<suffix> based on -the optional patterns you specify in C<@suffixlist>. Each element of -this list can be a qr-quoted pattern (or a string which is interpreted -as a regular expression), and is matched -against the end of B<name>. If this succeeds, the matching portion of -B<name> is removed and prepended to B<suffix>. By proper use of -C<@suffixlist>, you can remove file types or versions for examination. - -You are guaranteed that if you concatenate B<path>, B<name>, and -B<suffix> together in that order, the result will denote the same -file as the input file specification. - -In scalar context, fileparse() returns only the B<name> part of the filename. - -=back - -=head1 EXAMPLES - -Using Unix file syntax: - - ($base,$path,$type) = fileparse('/virgil/aeneid/draft.book7', - qr{\.book\d+}); - -would yield - - $base eq 'draft' - $path eq '/virgil/aeneid/', - $type eq '.book7' - -Similarly, using VMS syntax: - - ($name,$dir,$type) = fileparse('Doc_Root:[Help]Rhetoric.Rnh', - qr{\..*}); - -would yield - - $name eq 'Rhetoric' - $dir eq 'Doc_Root:[Help]' - $type eq '.Rnh' - -=over - -=item C<basename> - -The basename() routine returns the first element of the list produced -by calling fileparse() with the same arguments, except that it always -quotes metacharacters in the given suffixes. It is provided for -programmer compatibility with the Unix shell command basename(1). - -=item C<dirname> - -The dirname() routine returns the directory portion of the input file -specification. When using VMS or MacOS syntax, this is identical to the -second element of the list produced by calling fileparse() with the same -input file specification. (Under VMS, if there is no directory information -in the input file specification, then the current default device and -directory are returned.) When using Unix or MSDOS syntax, the return -value conforms to the behavior of the Unix shell command dirname(1). This -is usually the same as the behavior of fileparse(), but differs in some -cases. For example, for the input file specification F<lib/>, fileparse() -considers the directory name to be F<lib/>, while dirname() considers the -directory name to be F<.>). +These routines allow you to parse file paths into their directory, filename +and suffix. -=back +B<NOTE>: C<dirname()> and C<basename()> emulate the behaviours, and quirks, of +the shell and C functions of the same name. See each function's documention +for details. =cut -## use strict; +package File::Basename; + # A bit of juggling to insure that C<use re 'taint';> always works, since # File::Basename is used during the Perl build, when the re extension may # not be available. @@ -138,7 +37,7 @@ BEGIN { } - +use strict; use 5.006; use warnings; our(@ISA, @EXPORT, $VERSION, $Fileparse_fstype, $Fileparse_igncase); @@ -147,25 +46,49 @@ require Exporter; @EXPORT = qw(fileparse fileparse_set_fstype basename dirname); $VERSION = "2.73"; +fileparse_set_fstype($^O); -# fileparse_set_fstype() - specify OS-based rules used in future -# calls to routines in this package -# -# Currently recognized values: VMS, MSDOS, MacOS, AmigaOS, os2, RISCOS -# Any other name uses Unix-style rules and is case-sensitive -sub fileparse_set_fstype { - my @old = ($Fileparse_fstype, $Fileparse_igncase); - if (@_) { - $Fileparse_fstype = $_[0]; - $Fileparse_igncase = ($_[0] =~ /^(?:MacOS|VMS|AmigaOS|os2|RISCOS|MSWin32|MSDOS)/i); - } - wantarray ? @old : $old[0]; -} +=over 4 + +=item C<fileparse> + + my($filename, $directories, $suffix) = fileparse($path); + my($filename, $directories, $suffix) = fileparse($path, @suffixes); + my $filename = fileparse($path, @suffixes); + +The C<fileparse()> routine divides a file path into its $directories, $filename +and (optionally) the filename $suffix. + +$directories contains everything up to and including the last +directory separator in the $path including the volume (if applicable). +The remainder of the $path is the $filename. + + # On Unix returns ("baz", "/foo/bar/", "") + fileparse("/foo/bar/baz"); + + # On Windows returns ("baz", "C:\foo\bar\", "") + fileparse("C:\foo\bar\baz"); + + # On Unix returns ("", "/foo/bar/baz/", "") + fileparse("/foo/bar/baz/"); + +If @suffixes are given each element is a pattern (either a string or a +C<qr//>) matched against the end of the $filename. The matching +portion is removed and becomes the $suffix. + + # On Unix returns ("baz", "/foo/bar", ".txt") + fileparse("/foo/bar/baz", qr/\.[^.]*/); + +If type is one of "VMS", "MSDOS", "MacOS", "AmigaOS", "OS2", "MSWin32" +or "RISCOS" (see C<fileparse_set_fstype()>) then the pattern matching +for suffix removal is performed case-insensitively, since those +systems are not case-sensitive when opening existing files. + +You are guaranteed that C<$directories . $filename . $suffix> will +denote the same location as the original $path. -# fileparse() - parse file specification -# -# Version 2.4 27-Sep-1996 Charles Bailey bailey@genetics.upenn.edu +=cut sub fileparse { @@ -227,13 +150,31 @@ sub fileparse { } } + # Ensure taint is propgated from the path to its pieces. $tail .= $taint if defined $tail; # avoid warning if $tail == undef wantarray ? ($basename .= $taint, $dirpath .= $taint, $tail) : ($basename .= $taint); } -# basename() - returns first element of list returned by fileparse() + +=item C<basename> + + my $filename = basename($path); + my $filename = basename($path, @suffixes); + +C<basename()> works just like C<fileparse()> in scalar context - you only get +the $filename - except that it always quotes metacharacters in the @suffixes. + + # These two function calls are equivalent. + my $filename = basename("/foo/bar/baz.txt", ".txt"); + my $filename = fileparse("/foo/bar/baz.txt", qr/\Q.txt\E/); + +This function is provided for compatibility with the Unix shell command +C<basename(1)>. + +=cut + sub basename { my($name) = shift; @@ -241,22 +182,64 @@ sub basename { } -# dirname() - returns device and directory portion of file specification -# Behavior matches that of Unix dirname(1) exactly for Unix and MSDOS -# filespecs except for names ending with a separator, e.g., "/xx/yy/". -# This differs from the second element of the list returned -# by fileparse() in that the trailing '/' (Unix) or '\' (MSDOS) (and -# the last directory name if the filespec ends in a '/' or '\'), is lost. + +=item C<dirname> + +This function is provided for compatibility with the Unix shell +command C<dirname(1)> and has inherited some of its quirks. In spite of +its name it does B<NOT> always return the directory name as you might +expect. To be safe, if you want the directory name of a path use +C<fileparse()>. + + # On all but Unix and MSDOS + my $directories = dirname($path); + +On all system types but Unix and MSDOS this works just like +C<fileparse($path)> but returning just the $directories. + + # On Unix and MSDOS + my $path_one_level_up = dirname($path); + +When using Unix or MSDOS syntax this emulates the C<dirname(1)> shell function +which is subtly different from how C<fileparse()> works. It returns all but +the last level of a file path even if the last level is clearly a directory. +In effect, it is not returning the directory portion but simply the path one +level up acting like C<chop()> for file paths. + +Also unlike C<fileparse()>, C<dirname()> does not include a trailing slash on +its returned path. + + # returns /foo/bar. fileparse() would return /foo/bar/ + dirname("/foo/bar/baz"); + + # also returns /foo/bar despite the fact that baz is clearly a + # directory. fileparse() would return /foo/bar/baz/ + dirname("/foo/bar/baz/"); + + # returns '.'. fileparse() would return 'foo/' + dirname("foo/"); + +Under VMS, if there is no directory information in the $path, then the +current default device and directory is used. + +=cut + sub dirname { - my($basename,$dirname) = fileparse($_[0]); my($fstype) = $Fileparse_fstype; + if( $fstype =~ /VMS/i and $_[0] =~ m{/} ) { + # Parse as Unix + local($File::Basename::Fileparse_fstype) = ''; + return dirname(@_); + } + + my($basename,$dirname) = fileparse($_[0]); + if ($fstype =~ /VMS/i) { - if ($_[0] =~ m#/#) { $fstype = '' } - else { return $dirname || $ENV{DEFAULT} } + $dirname ||= $ENV{DEFAULT}; } - if ($fstype =~ /MacOS/i) { + elsif ($fstype =~ /MacOS/i) { if( !length($basename) && $dirname !~ /^[^:]+:\z/) { $dirname =~ s/([^:]):\z/$1/s; ($basename,$dirname) = fileparse $dirname; @@ -276,17 +259,47 @@ sub dirname { $dirname =~ s#[^:/]+\z## unless length($basename); } else { - $dirname =~ s:(.)/*\z:$1:s; + $dirname =~ s{(.)/*\z}{$1}s; unless( length($basename) ) { - local($File::Basename::Fileparse_fstype) = $fstype; ($basename,$dirname) = fileparse $dirname; - $dirname =~ s:(.)/*\z:$1:s; + $dirname =~ s{(.)/*\z}{$1}s; } } $dirname; } -fileparse_set_fstype $^O; + +=item C<fileparse_set_fstype> + + my $previous_fstype = fileparse_set_fstype($type); + +Normally File::Basename will assume a file path type native to your current +operating system (ie. /foo/bar style on Unix, \foo\bar on Windows, etc...). +With this function you can override that assumption. + +Valid $types are "VMS", "MSDOS", "MacOS", "AmigaOS", "OS2", "RISCOS", +"MSWin32" and "Unix" (case-insensitive). If an unrecognized $type is +given Unix semantics will be assumed. + +If you've selected VMS syntax, and the file specification you pass to +one of these routines contains a "/", they assume you are using Unix +emulation and apply the Unix syntax rules instead, for that function +call only. + +=back + +=cut + + +sub fileparse_set_fstype { + my @old = ($Fileparse_fstype, $Fileparse_igncase); + if (@_) { + $Fileparse_fstype = $_[0]; + $Fileparse_igncase = ($_[0] =~ /^(?:MacOS|VMS|AmigaOS|os2|RISCOS|MSWin32|MSDOS)/i); + } + wantarray ? @old : $old[0]; +} + 1; |