summaryrefslogtreecommitdiff
path: root/lib/File/Basename.pm
diff options
context:
space:
mode:
authorMichael G. Schwern <schwern@pobox.com>2005-07-05 13:11:27 -0700
committerSteve Hay <SteveHay@planit.com>2005-07-06 09:36:08 +0000
commit767010ca49e6a0dff07d97842a8341decbed33d9 (patch)
treed2ea5a0130269e6f9eaa28dc63dc028a2d343a8f /lib/File/Basename.pm
parentc08507ee64e0d5a7fcd83abd77804ef8a1cd8098 (diff)
downloadperl-767010ca49e6a0dff07d97842a8341decbed33d9.tar.gz
File::Basename doc overhaul
Message-ID: <20050706031127.GL9227@windhund.schwern.org> (with some minor changes) p4raw-id: //depot/perl@25086
Diffstat (limited to 'lib/File/Basename.pm')
-rw-r--r--lib/File/Basename.pm299
1 files changed, 156 insertions, 143 deletions
diff --git a/lib/File/Basename.pm b/lib/File/Basename.pm
index 887c7bae4a..e6b16e7503 100644
--- a/lib/File/Basename.pm
+++ b/lib/File/Basename.pm
@@ -1,12 +1,6 @@
-package File::Basename;
-
=head1 NAME
-fileparse - split a pathname into pieces
-
-basename - extract just the filename from a path
-
-dirname - extract just the directory from a path
+File::Basename - Parse file paths into directory, filename and suffix.
=head1 SYNOPSIS
@@ -14,120 +8,25 @@ dirname - extract just the directory from a path
($name,$path,$suffix) = fileparse($fullname,@suffixlist);
$name = fileparse($fullname,@suffixlist);
- fileparse_set_fstype($os_string);
+
$basename = basename($fullname,@suffixlist);
- $dirname = dirname($fullname);
+ $dirname = dirname($fullname);
- ($name,$path,$suffix) = fileparse("lib/File/Basename.pm",qr{\.pm});
- fileparse_set_fstype("VMS");
- $basename = basename("lib/File/Basename.pm",".pm");
- $dirname = dirname("lib/File/Basename.pm");
=head1 DESCRIPTION
-These routines allow you to parse file specifications into useful
-pieces using the syntax of different operating systems.
-
-=over 4
-
-=item fileparse_set_fstype
-
-You select the syntax via the routine fileparse_set_fstype().
-
-If the argument passed to it contains one of the substrings
-"VMS", "MSDOS", "MacOS", "AmigaOS" or "MSWin32", the file specification
-syntax of that operating system is used in future calls to
-fileparse(), basename(), and dirname(). If it contains none of
-these substrings, Unix syntax is used. This pattern matching is
-case-insensitive. If you've selected VMS syntax, and the file
-specification you pass to one of these routines contains a "/",
-they assume you are using Unix emulation and apply the Unix syntax
-rules instead, for that function call only.
-
-If the argument passed to it contains one of the substrings "VMS",
-"MSDOS", "MacOS", "AmigaOS", "os2", "MSWin32" or "RISCOS", then the pattern
-matching for suffix removal is performed without regard for case,
-since those systems are not case-sensitive when opening existing files
-(though some of them preserve case on file creation).
-
-If you haven't called fileparse_set_fstype(), the syntax is chosen
-by examining the builtin variable C<$^O> according to these rules.
-
-=item fileparse
-
-The fileparse() routine divides a file specification into three
-parts: a leading B<path>, a file B<name>, and a B<suffix>. The
-B<path> contains everything up to and including the last directory
-separator in the input file specification. The remainder of the input
-file specification is then divided into B<name> and B<suffix> based on
-the optional patterns you specify in C<@suffixlist>. Each element of
-this list can be a qr-quoted pattern (or a string which is interpreted
-as a regular expression), and is matched
-against the end of B<name>. If this succeeds, the matching portion of
-B<name> is removed and prepended to B<suffix>. By proper use of
-C<@suffixlist>, you can remove file types or versions for examination.
-
-You are guaranteed that if you concatenate B<path>, B<name>, and
-B<suffix> together in that order, the result will denote the same
-file as the input file specification.
-
-In scalar context, fileparse() returns only the B<name> part of the filename.
-
-=back
-
-=head1 EXAMPLES
-
-Using Unix file syntax:
-
- ($base,$path,$type) = fileparse('/virgil/aeneid/draft.book7',
- qr{\.book\d+});
-
-would yield
-
- $base eq 'draft'
- $path eq '/virgil/aeneid/',
- $type eq '.book7'
-
-Similarly, using VMS syntax:
-
- ($name,$dir,$type) = fileparse('Doc_Root:[Help]Rhetoric.Rnh',
- qr{\..*});
-
-would yield
-
- $name eq 'Rhetoric'
- $dir eq 'Doc_Root:[Help]'
- $type eq '.Rnh'
-
-=over
-
-=item C<basename>
-
-The basename() routine returns the first element of the list produced
-by calling fileparse() with the same arguments, except that it always
-quotes metacharacters in the given suffixes. It is provided for
-programmer compatibility with the Unix shell command basename(1).
-
-=item C<dirname>
-
-The dirname() routine returns the directory portion of the input file
-specification. When using VMS or MacOS syntax, this is identical to the
-second element of the list produced by calling fileparse() with the same
-input file specification. (Under VMS, if there is no directory information
-in the input file specification, then the current default device and
-directory are returned.) When using Unix or MSDOS syntax, the return
-value conforms to the behavior of the Unix shell command dirname(1). This
-is usually the same as the behavior of fileparse(), but differs in some
-cases. For example, for the input file specification F<lib/>, fileparse()
-considers the directory name to be F<lib/>, while dirname() considers the
-directory name to be F<.>).
+These routines allow you to parse file paths into their directory, filename
+and suffix.
-=back
+B<NOTE>: C<dirname()> and C<basename()> emulate the behaviours, and quirks, of
+the shell and C functions of the same name. See each function's documention
+for details.
=cut
-## use strict;
+package File::Basename;
+
# A bit of juggling to insure that C<use re 'taint';> always works, since
# File::Basename is used during the Perl build, when the re extension may
# not be available.
@@ -138,7 +37,7 @@ BEGIN {
}
-
+use strict;
use 5.006;
use warnings;
our(@ISA, @EXPORT, $VERSION, $Fileparse_fstype, $Fileparse_igncase);
@@ -147,25 +46,49 @@ require Exporter;
@EXPORT = qw(fileparse fileparse_set_fstype basename dirname);
$VERSION = "2.73";
+fileparse_set_fstype($^O);
-# fileparse_set_fstype() - specify OS-based rules used in future
-# calls to routines in this package
-#
-# Currently recognized values: VMS, MSDOS, MacOS, AmigaOS, os2, RISCOS
-# Any other name uses Unix-style rules and is case-sensitive
-sub fileparse_set_fstype {
- my @old = ($Fileparse_fstype, $Fileparse_igncase);
- if (@_) {
- $Fileparse_fstype = $_[0];
- $Fileparse_igncase = ($_[0] =~ /^(?:MacOS|VMS|AmigaOS|os2|RISCOS|MSWin32|MSDOS)/i);
- }
- wantarray ? @old : $old[0];
-}
+=over 4
+
+=item C<fileparse>
+
+ my($filename, $directories, $suffix) = fileparse($path);
+ my($filename, $directories, $suffix) = fileparse($path, @suffixes);
+ my $filename = fileparse($path, @suffixes);
+
+The C<fileparse()> routine divides a file path into its $directories, $filename
+and (optionally) the filename $suffix.
+
+$directories contains everything up to and including the last
+directory separator in the $path including the volume (if applicable).
+The remainder of the $path is the $filename.
+
+ # On Unix returns ("baz", "/foo/bar/", "")
+ fileparse("/foo/bar/baz");
+
+ # On Windows returns ("baz", "C:\foo\bar\", "")
+ fileparse("C:\foo\bar\baz");
+
+ # On Unix returns ("", "/foo/bar/baz/", "")
+ fileparse("/foo/bar/baz/");
+
+If @suffixes are given each element is a pattern (either a string or a
+C<qr//>) matched against the end of the $filename. The matching
+portion is removed and becomes the $suffix.
+
+ # On Unix returns ("baz", "/foo/bar", ".txt")
+ fileparse("/foo/bar/baz", qr/\.[^.]*/);
+
+If type is one of "VMS", "MSDOS", "MacOS", "AmigaOS", "OS2", "MSWin32"
+or "RISCOS" (see C<fileparse_set_fstype()>) then the pattern matching
+for suffix removal is performed case-insensitively, since those
+systems are not case-sensitive when opening existing files.
+
+You are guaranteed that C<$directories . $filename . $suffix> will
+denote the same location as the original $path.
-# fileparse() - parse file specification
-#
-# Version 2.4 27-Sep-1996 Charles Bailey bailey@genetics.upenn.edu
+=cut
sub fileparse {
@@ -227,13 +150,31 @@ sub fileparse {
}
}
+ # Ensure taint is propgated from the path to its pieces.
$tail .= $taint if defined $tail; # avoid warning if $tail == undef
wantarray ? ($basename .= $taint, $dirpath .= $taint, $tail)
: ($basename .= $taint);
}
-# basename() - returns first element of list returned by fileparse()
+
+=item C<basename>
+
+ my $filename = basename($path);
+ my $filename = basename($path, @suffixes);
+
+C<basename()> works just like C<fileparse()> in scalar context - you only get
+the $filename - except that it always quotes metacharacters in the @suffixes.
+
+ # These two function calls are equivalent.
+ my $filename = basename("/foo/bar/baz.txt", ".txt");
+ my $filename = fileparse("/foo/bar/baz.txt", qr/\Q.txt\E/);
+
+This function is provided for compatibility with the Unix shell command
+C<basename(1)>.
+
+=cut
+
sub basename {
my($name) = shift;
@@ -241,22 +182,64 @@ sub basename {
}
-# dirname() - returns device and directory portion of file specification
-# Behavior matches that of Unix dirname(1) exactly for Unix and MSDOS
-# filespecs except for names ending with a separator, e.g., "/xx/yy/".
-# This differs from the second element of the list returned
-# by fileparse() in that the trailing '/' (Unix) or '\' (MSDOS) (and
-# the last directory name if the filespec ends in a '/' or '\'), is lost.
+
+=item C<dirname>
+
+This function is provided for compatibility with the Unix shell
+command C<dirname(1)> and has inherited some of its quirks. In spite of
+its name it does B<NOT> always return the directory name as you might
+expect. To be safe, if you want the directory name of a path use
+C<fileparse()>.
+
+ # On all but Unix and MSDOS
+ my $directories = dirname($path);
+
+On all system types but Unix and MSDOS this works just like
+C<fileparse($path)> but returning just the $directories.
+
+ # On Unix and MSDOS
+ my $path_one_level_up = dirname($path);
+
+When using Unix or MSDOS syntax this emulates the C<dirname(1)> shell function
+which is subtly different from how C<fileparse()> works. It returns all but
+the last level of a file path even if the last level is clearly a directory.
+In effect, it is not returning the directory portion but simply the path one
+level up acting like C<chop()> for file paths.
+
+Also unlike C<fileparse()>, C<dirname()> does not include a trailing slash on
+its returned path.
+
+ # returns /foo/bar. fileparse() would return /foo/bar/
+ dirname("/foo/bar/baz");
+
+ # also returns /foo/bar despite the fact that baz is clearly a
+ # directory. fileparse() would return /foo/bar/baz/
+ dirname("/foo/bar/baz/");
+
+ # returns '.'. fileparse() would return 'foo/'
+ dirname("foo/");
+
+Under VMS, if there is no directory information in the $path, then the
+current default device and directory is used.
+
+=cut
+
sub dirname {
- my($basename,$dirname) = fileparse($_[0]);
my($fstype) = $Fileparse_fstype;
+ if( $fstype =~ /VMS/i and $_[0] =~ m{/} ) {
+ # Parse as Unix
+ local($File::Basename::Fileparse_fstype) = '';
+ return dirname(@_);
+ }
+
+ my($basename,$dirname) = fileparse($_[0]);
+
if ($fstype =~ /VMS/i) {
- if ($_[0] =~ m#/#) { $fstype = '' }
- else { return $dirname || $ENV{DEFAULT} }
+ $dirname ||= $ENV{DEFAULT};
}
- if ($fstype =~ /MacOS/i) {
+ elsif ($fstype =~ /MacOS/i) {
if( !length($basename) && $dirname !~ /^[^:]+:\z/) {
$dirname =~ s/([^:]):\z/$1/s;
($basename,$dirname) = fileparse $dirname;
@@ -276,17 +259,47 @@ sub dirname {
$dirname =~ s#[^:/]+\z## unless length($basename);
}
else {
- $dirname =~ s:(.)/*\z:$1:s;
+ $dirname =~ s{(.)/*\z}{$1}s;
unless( length($basename) ) {
- local($File::Basename::Fileparse_fstype) = $fstype;
($basename,$dirname) = fileparse $dirname;
- $dirname =~ s:(.)/*\z:$1:s;
+ $dirname =~ s{(.)/*\z}{$1}s;
}
}
$dirname;
}
-fileparse_set_fstype $^O;
+
+=item C<fileparse_set_fstype>
+
+ my $previous_fstype = fileparse_set_fstype($type);
+
+Normally File::Basename will assume a file path type native to your current
+operating system (ie. /foo/bar style on Unix, \foo\bar on Windows, etc...).
+With this function you can override that assumption.
+
+Valid $types are "VMS", "MSDOS", "MacOS", "AmigaOS", "OS2", "RISCOS",
+"MSWin32" and "Unix" (case-insensitive). If an unrecognized $type is
+given Unix semantics will be assumed.
+
+If you've selected VMS syntax, and the file specification you pass to
+one of these routines contains a "/", they assume you are using Unix
+emulation and apply the Unix syntax rules instead, for that function
+call only.
+
+=back
+
+=cut
+
+
+sub fileparse_set_fstype {
+ my @old = ($Fileparse_fstype, $Fileparse_igncase);
+ if (@_) {
+ $Fileparse_fstype = $_[0];
+ $Fileparse_igncase = ($_[0] =~ /^(?:MacOS|VMS|AmigaOS|os2|RISCOS|MSWin32|MSDOS)/i);
+ }
+ wantarray ? @old : $old[0];
+}
+
1;