summaryrefslogtreecommitdiff
path: root/lib/File/Basename.pm
blob: ed59e9a7a05c2b247cb796e3edeb3bacb36f4446 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
package File::Basename;

=head1 NAME

fileparse - split a pathname into pieces

basename - extract just the filename from a path

dirname - extract just the directory from a path

=head1 SYNOPSIS

    use File::Basename;

    ($name,$path,$suffix) = fileparse($fullname,@suffixlist)
    fileparse_set_fstype($os_string);
    $basename = basename($fullname,@suffixlist);
    $dirname = dirname($fullname);

    ($name,$path,$suffix) = fileparse("lib/File/Basename.pm",qr{\.pm});
    fileparse_set_fstype("VMS");
    $basename = basename("lib/File/Basename.pm",qr{\.pm});
    $dirname = dirname("lib/File/Basename.pm");

=head1 DESCRIPTION

These routines allow you to parse file specifications into useful
pieces using the syntax of different operating systems.

=over 4

=item fileparse_set_fstype

You select the syntax via the routine fileparse_set_fstype().

If the argument passed to it contains one of the substrings
"VMS", "MSDOS", "MacOS", "AmigaOS" or "MSWin32", the file specification 
syntax of that operating system is used in future calls to 
fileparse(), basename(), and dirname().  If it contains none of
these substrings, Unix syntax is used.  This pattern matching is
case-insensitive.  If you've selected VMS syntax, and the file
specification you pass to one of these routines contains a "/",
they assume you are using Unix emulation and apply the Unix syntax
rules instead, for that function call only.

If the argument passed to it contains one of the substrings "VMS",
"MSDOS", "MacOS", "AmigaOS", "os2", "MSWin32" or "RISCOS", then the pattern
matching for suffix removal is performed without regard for case,
since those systems are not case-sensitive when opening existing files
(though some of them preserve case on file creation).

If you haven't called fileparse_set_fstype(), the syntax is chosen
by examining the builtin variable C<$^O> according to these rules.

=item fileparse

The fileparse() routine divides a file specification into three
parts: a leading B<path>, a file B<name>, and a B<suffix>.  The
B<path> contains everything up to and including the last directory
separator in the input file specification.  The remainder of the input
file specification is then divided into B<name> and B<suffix> based on
the optional patterns you specify in C<@suffixlist>.  Each element of
this list can be a qr-quoted pattern (or a string which is interpreted
as a regular expression), and is matched
against the end of B<name>.  If this succeeds, the matching portion of
B<name> is removed and prepended to B<suffix>.  By proper use of
C<@suffixlist>, you can remove file types or versions for examination.

You are guaranteed that if you concatenate B<path>, B<name>, and
B<suffix> together in that order, the result will denote the same
file as the input file specification.

=back

=head1 EXAMPLES

Using Unix file syntax:

    ($base,$path,$type) = fileparse('/virgil/aeneid/draft.book7',
				    qr{\.book\d+});

would yield

    $base eq 'draft'
    $path eq '/virgil/aeneid/',
    $type eq '.book7'

Similarly, using VMS syntax:

    ($name,$dir,$type) = fileparse('Doc_Root:[Help]Rhetoric.Rnh',
				   qr{\..*});

would yield

    $name eq 'Rhetoric'
    $dir  eq 'Doc_Root:[Help]'
    $type eq '.Rnh'

=over

=item C<basename>

The basename() routine returns the first element of the list produced
by calling fileparse() with the same arguments, except that it always
quotes metacharacters in the given suffixes.  It is provided for
programmer compatibility with the Unix shell command basename(1).

=item C<dirname>

The dirname() routine returns the directory portion of the input file
specification.  When using VMS or MacOS syntax, this is identical to the
second element of the list produced by calling fileparse() with the same
input file specification.  (Under VMS, if there is no directory information
in the input file specification, then the current default device and
directory are returned.)  When using Unix or MSDOS syntax, the return
value conforms to the behavior of the Unix shell command dirname(1).  This
is usually the same as the behavior of fileparse(), but differs in some
cases.  For example, for the input file specification F<lib/>, fileparse()
considers the directory name to be F<lib/>, while dirname() considers the
directory name to be F<.>).

=back

=cut


## use strict;
# A bit of juggling to insure that C<use re 'taint';> always works, since
# File::Basename is used during the Perl build, when the re extension may
# not be available.
BEGIN {
  unless (eval { require re; })
    { eval ' sub re::import { $^H |= 0x00100000; } ' } # HINT_RE_TAINT
  import re 'taint';
}



use 5.006;
use warnings;
our(@ISA, @EXPORT, $VERSION, $Fileparse_fstype, $Fileparse_igncase);
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(fileparse fileparse_set_fstype basename dirname);
$VERSION = "2.71";


#   fileparse_set_fstype() - specify OS-based rules used in future
#                            calls to routines in this package
#
#   Currently recognized values: VMS, MSDOS, MacOS, AmigaOS, os2, RISCOS
#       Any other name uses Unix-style rules and is case-sensitive

sub fileparse_set_fstype {
  my @old = ($Fileparse_fstype, $Fileparse_igncase);
  if (@_) {
    $Fileparse_fstype = $_[0];
    $Fileparse_igncase = ($_[0] =~ /^(?:MacOS|VMS|AmigaOS|os2|RISCOS|MSWin32|MSDOS)/i);
  }
  wantarray ? @old : $old[0];
}

#   fileparse() - parse file specification
#
#   Version 2.4  27-Sep-1996  Charles Bailey  bailey@genetics.upenn.edu


sub fileparse {
  my($fullname,@suffices) = @_;
  unless (defined $fullname) {
      require Carp;
      Carp::croak("fileparse(): need a valid pathname");
  }
  my($fstype,$igncase) = ($Fileparse_fstype, $Fileparse_igncase);
  my($dirpath,$tail,$suffix,$basename);
  my($taint) = substr($fullname,0,0);  # Is $fullname tainted?

  if ($fstype =~ /^VMS/i) {
    if ($fullname =~ m#/#) { $fstype = '' }  # We're doing Unix emulation
    else {
      ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/s);
      $dirpath ||= '';  # should always be defined
    }
  }
  if ($fstype =~ /^MS(DOS|Win32)|epoc/i) {
    ($dirpath,$basename) = ($fullname =~ /^((?:.*[:\\\/])?)(.*)/s);
    $dirpath .= '.\\' unless $dirpath =~ /[\\\/]\z/;
  }
  elsif ($fstype =~ /^os2/i) {
    ($dirpath,$basename) = ($fullname =~ m#^((?:.*[:\\/])?)(.*)#s);
    $dirpath = './' unless $dirpath;	# Can't be 0
    $dirpath .= '/' unless $dirpath =~ m#[\\/]\z#;
  }
  elsif ($fstype =~ /^MacOS/si) {
    ($dirpath,$basename) = ($fullname =~ /^(.*:)?(.*)/s);
    $dirpath = ':' unless $dirpath;
  }
  elsif ($fstype =~ /^AmigaOS/i) {
    ($dirpath,$basename) = ($fullname =~ /(.*[:\/])?(.*)/s);
    $dirpath = './' unless $dirpath;
  }
  elsif ($fstype !~ /^VMS/i) {  # default to Unix
    ($dirpath,$basename) = ($fullname =~ m#^(.*/)?(.*)#s);
    if ($^O eq 'VMS' and $fullname =~ m:^(/[^/]+/000000(/|$))(.*):) {
      # dev:[000000] is top of VMS tree, similar to Unix '/'
      # so strip it off and treat the rest as "normal"
      my $devspec  = $1;
      my $remainder = $3;
      ($dirpath,$basename) = ($remainder =~ m#^(.*/)?(.*)#s);
      $dirpath ||= '';  # should always be defined
      $dirpath = $devspec.$dirpath;
    }
    $dirpath = './' unless $dirpath;
  }

  if (@suffices) {
    $tail = '';
    foreach $suffix (@suffices) {
      my $pat = ($igncase ? '(?i)' : '') . "($suffix)\$";
      if ($basename =~ s/$pat//s) {
        $taint .= substr($suffix,0,0);
        $tail = $1 . $tail;
      }
    }
  }

  $tail .= $taint if defined $tail; # avoid warning if $tail == undef
  wantarray ? ($basename .= $taint, $dirpath .= $taint, $tail)
            : ($basename .= $taint);
}


#   basename() - returns first element of list returned by fileparse()

sub basename {
  my($name) = shift;
  (fileparse($name, map("\Q$_\E",@_)))[0];
}


#    dirname() - returns device and directory portion of file specification
#        Behavior matches that of Unix dirname(1) exactly for Unix and MSDOS
#        filespecs except for names ending with a separator, e.g., "/xx/yy/".
#        This differs from the second element of the list returned
#        by fileparse() in that the trailing '/' (Unix) or '\' (MSDOS) (and
#        the last directory name if the filespec ends in a '/' or '\'), is lost.

sub dirname {
    my($basename,$dirname) = fileparse($_[0]);
    my($fstype) = $Fileparse_fstype;

    if ($fstype =~ /VMS/i) { 
        if ($_[0] =~ m#/#) { $fstype = '' }
        else { return $dirname || $ENV{DEFAULT} }
    }
    if ($fstype =~ /MacOS/i) {
	if( !length($basename) && $dirname !~ /^[^:]+:\z/) {
	    $dirname =~ s/([^:]):\z/$1/s;
	    ($basename,$dirname) = fileparse $dirname;
	}
	$dirname .= ":" unless $dirname =~ /:\z/;
    }
    elsif ($fstype =~ /MS(DOS|Win32)|os2/i) { 
        $dirname =~ s/([^:])[\\\/]*\z/$1/;
        unless( length($basename) ) {
	    ($basename,$dirname) = fileparse $dirname;
	    $dirname =~ s/([^:])[\\\/]*\z/$1/;
	}
    }
    elsif ($fstype =~ /AmigaOS/i) {
        if ( $dirname =~ /:\z/) { return $dirname }
        chop $dirname;
        $dirname =~ s#[^:/]+\z## unless length($basename);
    }
    else {
        $dirname =~ s:(.)/*\z:$1:s;
        unless( length($basename) ) {
	    local($File::Basename::Fileparse_fstype) = $fstype;
	    ($basename,$dirname) = fileparse $dirname;
	    $dirname =~ s:(.)/*\z:$1:s;
	}
    }

    $dirname;
}

fileparse_set_fstype $^O;

1;