diff options
author | Barrie Slaymaker <barries@slaysys.com> | 1999-02-11 11:29:24 -0500 |
---|---|---|
committer | Gurusamy Sarathy <gsar@cpan.org> | 1999-02-15 04:23:29 +0000 |
commit | 29f227c9ad9c7325fbd0ac33567c35e06a70acb5 (patch) | |
tree | 7926613b9917c9f8fe242e4e17af0b40272ec0e2 /lib | |
parent | c27914c9eca8e82f17c1981f2a8473db3b90ad36 (diff) | |
download | perl-29f227c9ad9c7325fbd0ac33567c35e06a70acb5.tar.gz |
backout change#2811 and add newer version based on File::Spec
Message-ID: <36C34BB4.A62090E0@telerama.com>
Subject: [PATCH]5.005_54 (pod2html) Relative URLs using new File::Spec
p4raw-link: @2811 on //depot/cfgperl: 5a039dd3f529422cb070070772502cedaf09ae20
p4raw-id: //depot/perl@2931
Diffstat (limited to 'lib')
-rw-r--r-- | lib/File/PathConvert.pm | 1119 | ||||
-rw-r--r-- | lib/Pod/Html.pm | 181 |
2 files changed, 149 insertions, 1151 deletions
diff --git a/lib/File/PathConvert.pm b/lib/File/PathConvert.pm deleted file mode 100644 index a709601d5b..0000000000 --- a/lib/File/PathConvert.pm +++ /dev/null @@ -1,1119 +0,0 @@ -# -# Copyright (c) 1996, 1997, 1998 Shigio Yamaguchi. All rights reserved. -# This program is free software; you can redistribute it and/or -# modify it under the same terms as Perl itself. -# -# File::PathConvert.pm -# - -package File::PathConvert; -require 5.002; - -use strict ; - -BEGIN { - use Exporter (); - use vars qw($VERSION @ISA @EXPORT_OK); - $VERSION = 0.85; - @ISA = qw(Exporter); - @EXPORT_OK = qw(setfstype splitpath joinpath splitdirs joindirs realpat - abs2rel rel2abs $maxsymlinks $verbose $SL $resolved ); -} - -use vars qw( $maxsymlinks $verbose $SL $resolved ) ; -use Cwd; - -# -# Initialize @EXPORT_OK vars -# -$maxsymlinks = 32; # allowed symlink number in a path -$verbose = 0; # 1: verbose on, 0: verbose off -$SL = '' ; # Separator char export -$resolved = '' ; # realpath() intermediate value export - -############################################################################# -# -# Package Globals -# - -my $fstype ; # A name indicating the type of filesystem currently in us - -my $sep ; # separator -my $sepRE ; # RE to match spearator -my $notsepRE ; # RE to match anything else -my $volumeRE ; # RE to match the volume name -my $directoryRE ; # RE to match the directory name -my $isrootRE ; # RE to match root path: applied to directory portion only -my $thisDir ; # Name of this directory -my $thisDirRE ; # Name of this directory -my $parentDir ; # Name of parent directory -my $parentDirRE ; # RE to match parent dir name -my $casesensitive ; # Set to non-zero for case sensitive name comprisions. On -y - # affects names, not any other REs, so $isrootRE for Win32 - # must be case insensitive -my $idempotent ; # Set to non-zero if '//' is equivalent to '/'. This - # does not affect leading '//' and '\\' under Win32, - # but will fold '///' and '////', etc, in to '//' on this - # Win32 - - - -########### -# -# The following globals are regexs used in the indicated routines. These -# are initialized by setfstype, so they don't need to be rebuilt each time -# the routine that uses them is called. - -my $basenamesplitRE ; # Used in realpath() to split filenames. - - -########### -# -# This RE matches (and saves) the portion of the string that is just before -# the beginning of a name -# -my $beginning_of_name ; - -# -# This whopper of an RE looks for the pattern "name/.." if it occurs -# after the beginning of the string or after the root RE, or after a separator - -# We don't assume that the isrootRE has a trailing separator. -# It also makes sure that we aren't eliminating '../..' and './..' patterns -# by using the negative lookahead assertion '(?!' ... ')' construct. It also -# ignores 'name/..name'. -# -my $name_sep_parentRE ; - -# -# Matches '..$', '../' after a root -my $leading_parentRE ; - -# -# Matches things like '/(./)+' and '^(./)+' -# -my $dot_sep_etcRE ; - -# -# Matches trailing '/' or '/.' -# -my $trailing_sepRE ; - - -############################################################################# -# -# Functions -# - - -# -# setfstype: takes the name of an operating system and sets up globals that -# allow the other functions to operate on multiple OSs. See -# %fsconfig for the sets of settings. -# -# This is run once on module load to configure for the OS named -# in $^O. -# -# Interface: -# i) $osname, as in $^O or plain english: "MacOS", "DOS, etc. -# This is _not_ usually case sensitive. -# r) Name of recognized name on success else undef. Note that, as -# shipped, 'unix' is the default is nothing else matches. -# go) $fstype and lots of internal parameters and regexs. -# x) Dies if a parameter required in @fsconfig is missing. -# -# -# There are some things I couldn't figure a way to parameterize by setting -# globals. $fstype is checked for filesystem type-specific logic, like -# VMS directory syntax. -# -# Setting up for a particular OS type takes two steps: identify the OS and -# set all of the 'atomic' global variables, then take some of the atomic -# globals which are regexps and build composite values from them. -# -# The atomic regexp terms are generally used to build the larger composite -# regexps that recognize and break apart paths. This leads to -# two important rules for the atomic regexp terms: -# -# (1) Do not use '(' ... ')' in the regex terms, since they are used to build -# regexs that use '(' ... ')' to parse paths. -# -# (2) They must be built so that a '?' or other quantifier may be appended. -# This generally means using the '(?:' ... ')' or '[' ... ']' to group -# multicharacter patterns. Other '(?' ... ')' may also do. -# -# The routines herein strive to preserve the -# original separator and root settings, and, it turns out, never need to -# prepend root to a string (although they do need to insert separators on -# occasion). This is good, since the Win32 root expressions can be like -# '/', '\', 'A:/', 'a:/', or even '\\' or '//' for UNC style names. -# -# Note that the default root and default notsep are not used, and so are -# undefined. -# -# For DOS, MacOS, and VMS, we assume that all paths handed in are on the same -# volume. This is not a significant limitation except for abs2rel, since the -# absolute path is assumed to be on the same volume as the base path. -# -sub setfstype($;) { - my( $osname ) = @_ ; - - # Find the best match for OS and set up our atomic globals accordingly - if ( $osname =~ /^(?:(ms)?(dos|win(32|nt)?))/i ) - { - $fstype = 'Win32' ; - $sep = '/' ; - $sepRE = '[\\\\/]' ; - $notsepRE = '[^\\\\/]' ; - $volumeRE = '(?:^(?:[a-zA-Z]:|(?:\\\\\\\\|//)[^\\\\/]+[\\\\/][^\ -\\/]+)?)' ; - $directoryRE = '(?:(?:.*[\\\\/](?:\.\.?$)?)?)' ; - $isrootRE = '(?:^[\\\\/])' ; - $thisDir = '.' ; - $thisDirRE = '\.' ; - $parentDir = '..' ; - $parentDirRE = '(?:\.\.)' ; - $casesensitive = 0 ; - $idempotent = 1 ; - } - elsif ( $osname =~ /^MacOS$/i ) - { - $fstype = 'MacOS' ; - $sep = ':' ; - $sepRE = '\:' ; - $notsepRE = '[^:]' ; - $volumeRE = '(?:^(?:.*::)?)' ; - $directoryRE = '(?:(?:.*:)?)' ; - $isrootRE = '(?:^:)' ; - $thisDir = '.' ; - $thisDirRE = '\.' ; - $parentDir = '..' ; - $parentDirRE = '(?:\.\.)' ; - $casesensitive = 0 ; - $idempotent = 1 ; - } - elsif ( $osname =~ /^VMS$/i ) - { - $fstype = 'VMS' ; - $sep = '.' ; - $sepRE = '[\.\]]' ; - $notsepRE = '[^\.\]]' ; - # volume is node::volume:, where node:: and volume: are optional - # and node:: cannot be present without volume. node can include - # an access control string in double quotes. - # Not supported: - # quoted full node names - # embedding a double quote in a string ("" to put " in) - # support ':' in node names - # foreign file specifications - # task specifications - # UIC Directory format (use the 6 digit name for it, instead) - $volumeRE = '(?:^(?:(?:[\w\$-]+(?:"[^"]*")?::)?[\w\$-]+:)?)' ; - $directoryRE = '(?:(?:\[.*\])?)' ; - - # Root is the lack of a leading '.', unless string is empty, which - # means 'cwd', which is relative. - $isrootRE = '(?:^[^\.])' ; - $thisDir = '' ; - $thisDirRE = '\[\]' ; - $parentDir = '-' ; - $parentDirRE = '-' ; - $casesensitive = 0 ; - $idempotent = 0 ; - } - elsif ( $osname =~ /^URL$/i ) - { - # URL spec based on RFC2396 (ftp://ftp.isi.edu/in-notes/rfc2396.txt) - $fstype = 'URL' ; - $sep = '/' ; - $sepRE = '/' ; - $notsepRE = '[^/]' ; - # Volume= scheme + authority, both optional - $volumeRE = '(?:^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?(?://[^/?]*)?)' ; - - # Directories do _not_ include the query component: we pretend that - # anything after a "?" is the filename or part of it. So a '/' - # terminates and is part of the directory spec, while a '?' or '#' - # terminate and are not part of the directory spec. - # - # We pretend that ";param" syntax does not exist - # - $directoryRE = '(?:(?:[^?#]*/(?:\.\.?(?:$|(?=[?#])))?)?)' ; - $isrootRE = '(?:^/)' ; - $thisDir = '.' ; - $thisDirRE = '\.' ; - $parentDir = '..' ; - $parentDirRE = '(?:\.\.)' ; - # Assume case sensitive, since many (most?) are. The user can override - # this if they so desire. - $casesensitive = 1 ; - $idempotent = 1 ; - } - else - { - $fstype = 'Unix' ; - $sep = '/' ; - $sepRE = '/' ; - $notsepRE = '[^/]' ; - $volumeRE = '' ; - $directoryRE = '(?:(?:.*/(?:\.\.?$)?)?)' ; - $isrootRE = '(?:^/)' ; - $thisDir = '.' ; - $thisDirRE = '\.' ; - $parentDir = '..' ; - $parentDirRE = '(?:\.\.)' ; - $casesensitive = 1 ; - $idempotent = 1 ; - } - - # Now set our composite regexps. - - # Maintain old name for backward compatibility - $SL= $sep ; - - # Build lots of REs used below, so they don't need to be built every time - # the routines that use them are called. - $basenamesplitRE = '^(.*)' . $sepRE . '(' . $notsepRE . '*)$' ; - - $leading_parentRE = '(' . $isrootRE . '?)(?:' . $parentDirRE . $sepRE . ') -(?:' . $parentDirRE . '$)?' ; - $trailing_sepRE = '(.)' . $sepRE . $thisDirRE . '?$' ; - - $beginning_of_name = '(?:^|' . $isrootRE . '|' . $sepRE . ')' ; - - $dot_sep_etcRE = - '(' . $beginning_of_name . ')(?:' . $thisDirRE . $sepRE . ')+'; - - $name_sep_parentRE = - '(' . $beginning_of_name . ')' - . '(?!(?:' . $thisDirRE . '|' . $parentDirRE . ')' . $sepRE . ')' - . $notsepRE . '+' - . $sepRE . $parentDirRE - . '(?:' . $sepRE . '|$)' - ; - - if ( $verbose ) { - print( <<TOHERE ) ; -fstype = "$fstype" -sep = "$sep" -sepRE = /$sepRE/ -notsepRE = /$notsepRE/ -volumeRE = /$volumeRE/ -directoryRE = /$directoryRE/ -isrootRE = /$isrootRE/ -thisDir = "$thisDir" -thisDirRE = /$thisDirRE/ -parentDir = "$parentDir" -parentDirRE = /$parentDirRE/ -casesensitive = "$casesensitive" -TOHERE - } - - return $fstype ; -} - - -setfstype( $^O ) ; - - -# -# splitpath: Splits a path into component parts: volume, dirpath, and filename - -# -# Very much like File::Basename::fileparse(), but doesn't concern -# itself with extensions and knows about volume names. -# -# Returns ($volume, $directory, $filename ). -# -# The contents of the returned list varies by operating system. -# -# Unix: -# $volume: always '' -# $directory: up to, and including, final '/' -# $filename: after final '/' -# -# Win32: -# $volume: drive letter and ':', if present -# $directory and $filename are like on Unix, but '\' and '/' are -# equivalent and the $volume is not in $directory.. -# -# VMS: -# $volume: up to and including first ":" -# $directory: "[...]" component -# $filename: the rest. -# $nofile is ignored -# -# URL: -# $volume: up to ':', then '//stuff/morestuff'. No trailing '/'. -# $directory: after $volume, up to last '/' -# $filename: the rest. -# $nofile is ignored -# -# Interface: -# i) $path -# i) $nofile: if true, then any trailing filename is assumed to -# belong to the directory for non-VMS systems. -# r) list of ( $volume, $directory, $filename ). -# -sub splitpath { - my( $path, $nofile )= @_ ; - my( $volume, $directory, $file ) ; - if ( $fstype ne 'VMS' && $fstype ne 'URL' && $nofile ) { - $path =~ m/($volumeRE)(.*)$/ ; - $volume = $1 ; - $directory= $2 ; - $file = '' ; - } - else { - $path =~ m/($volumeRE)($directoryRE)(.*)$/ ; - $volume = $1 ; - $directory= $2 ; - $file = $3 ; - } - - # For Win32 UNC, force the directory portion to be non-empty. This is - # because all UNC names are absolute, even if there's no trailing separator - # after the sharename. - # - # This is a bit of a hack, necesitated by the implementation of $isrootRE, - # which is only applied to the directory portion. - # - # A better long term solution might be to make the isroot test a member - # function in the future, object-oriented version of this. - # - $directory = $1 - if ( $fstype eq 'Win32' && $volume =~ /^($sepRE)$sepRE/ && $directory eq -' ) ; - - return ( $volume, $directory, $file ) ; -} - - -# -# joinpath: joins the results of splitpath(). Not really necessary now, but -# good to have: -# -# - API completeness -# - Self documenting code -# - Future handling of other filesystems -# -# For instance, if you leave the ':' or the '[' and ']' out of VMS $volume -# and $directory strings, this patches it up. If you leave out the '[' -# and provide the ']', or vice versa, it is not cleaned up. This is -# because it's useful to automatically insert both '[' and ']', but if you -# leave off only one, it's likely that there's a bug elsewhere that needs -# looking in to. -# -# Automatically inserts a separator between directory and filename if needed -# for non-VMS OSs. -# -# Automatically inserts a separator between volume and directory or file -# if needed for Win32 UNC names. -# -sub joinpath($;$;$;) { - my( $volume, $directory, $filename )= @_ ; - - # Fix up delimiters for $volume and $directory as needed for various OSs - if ( $fstype eq 'VMS' ) { - $volume .= ':' - if ( $volume ne '' && $volume !~ m/:$/ ) ; - - $directory = join( '', ( '[', $directory, ']' ) ) - if ( $directory ne '' && $directory !~ m/^\[.*\]$/ ) ; - } - else { - # Add trailing separator to directory names that require it and - # need it. URLs always require it if there are any directory - # components. - $directory .= $sep - if ( $directory ne '' - && ( $fstype eq 'URL' || $filename ne '' ) - && $directory !~ m/$sepRE$/ - ) ; - - # Add trailing separator to volume for UNC and HTML volume - # names that lack it and need it. - # Note that if a URL volume is a scheme only (ends in ':'), - # we don't require a separator: it's a relative URL. - $volume .= $sep - if ( ( ( $fstype eq 'Win32' && $volume =~ m#^$sepRE{2}# ) - || ( $fstype eq 'URL' && $volume =~ m#[^:/]$# ) - ) - && $volume !~ m#$sepRE$# - && $directory !~ m#^$sepRE# - && ( $directory ne '' || $filename ne '' ) - ) ; - } - - return join( '', $volume, $directory, $filename ) ; -} - - -# -# splitdirs: Splits a string containing directory portion of a path -# in to component parts. Preserves trailing null entries, unlike split(). -# -# "a/b" should get you [ 'a', 'b' ] -# -# "a/b/" should get you [ 'a', 'b', '' ] -# -# "/a/b/" should get you [ '', 'a', 'b', '' ] -# -# "a/b" returns the same array as 'a/////b' for those OSs where -# the seperator is idempotent (Unix and DOS, at least, but not VMS). -# -# Interface: -# i) directory path string -# -sub splitdirs($;) { - my( $directorypath )= @_ ; - - $directorypath =~ s/^\[(.*)\]$/$1/ - if ( $fstype eq 'VMS' ) ; - - # - # split() likes to forget about trailing null fields, so here we - # check to be sure that there will not be any before handling the - # simple case. - # - return split( $sepRE, $directorypath ) - if ( $directorypath !~ m/$sepRE$/ ) ; - - # - # since there was a trailing separator, add a file name to the end, then - # do the split, then replace it with ''. - # - $directorypath.= "file" ; - my( @directories )= split( $sepRE, $directorypath ) ; - $directories[ $#directories ]= '' ; - - return @directories ; -} - -# -# joindirs: Joins an array of directory names in to a string, adding -# OS-specific delimiters, like '[' and ']' for VMS. -# -# Note that empty strings '' are no different then non-empty strings, -# but that undefined strings are skipped by this algorithm. -# -# This is done the hard way to preserve separators that are already -# present in any of the directory names. -# -# Could this be made faster by using a join() followed -# by s/($sepRE)$sepRE+/$1/g? -# -# Interface: -# i) array of directory names -# o) string representation of directory path -# -sub joindirs { - my $directory_path ; - - $directory_path = shift - while ( ! defined( $directory_path ) && @_ ) ; - - if ( ! defined( $directory_path ) ) { - $directory_path = '' ; - } - else { - local $_ ; - - for ( @_ ) { - next if ( ! defined( $_ ) ) ; - - $directory_path .= $sep - if ( $directory_path !~ /$sepRE$/ && ! /^$sepRE/ ) ; - - $directory_path .= $_ ; - } - } - - $directory_path = join( '', '[', $directory_path, ']' ) - if ( $fstype eq 'VMS' ) ; - - return $directory_path ; -} - - -# -# realpath: returns the canonicalized absolute path name -# -# Interface: -# i) $path path -# r) resolved name on success else undef -# go) $resolved -# resolved name on success else the path name which -# caused the problem. -$resolved = ''; -# -# Note: this implementation is based 4.4BSD version realpath(3). -# -# TODO: Speed up by using Cwd::abs_path()? -# -sub realpath($;) { - ($resolved) = @_; - my($backdir) = cwd(); - my($dirname, $basename, $links, $reg); - - $resolved = regularize($resolved); -LOOP: - { - # - # Find the dirname and basename. - # Change directory to the dirname component. - # - if ($resolved =~ /$sepRE/) { - ($dirname, $basename) = $resolved =~ /$basenamesplitRE/ ; - $dirname = $sep if ( $dirname eq '' ); - $resolved = $dirname; - unless (chdir($dirname)) { - warn("realpath: chdir($dirname) failed: $! (in ${\cwd()}).") i - $verbose; - chdir($backdir); - return undef; - } - } else { - $dirname = ''; - $basename = $resolved; - } - # - # If it is a symlink, read in the value and loop. - # If it is a directory, then change to that directory. - # - if ( $basename ne '' ) { - if (-l $basename) { - unless ($resolved = readlink($basename)) { - warn("realpath: readlink($basename) failed: $! (in ${\cwd( -}).") if $verbose; - chdir($backdir); - return undef; - } - $basename = ''; - if (++$links > $maxsymlinks) { - warn("realpath: too many symbolic links: $links.") if $ver -ose; - chdir($backdir); - return undef; - } - redo LOOP; - } elsif (-d _) { - unless (chdir($basename)) { - warn("realpath: chdir($basename) failed: $! (in ${\cwd()}) -") if $verbose; - chdir($backdir); - return undef; - } - $basename = ''; - } - } - } - # - # Get the current directory name and append the basename. - # - $resolved = cwd(); - if ( $basename ne '' ) { - $resolved .= $sep if ($resolved ne $sep); - $resolved .= $basename - } - chdir($backdir); - return $resolved; -} # end sub realpath - - -# -# abs2rel: make a relative pathname from an absolute pathname -# -# Interface: -# i) $path absolute path(needed) -# i) $base base directory(optional) -# r) relative path of $path -# -# Note: abs2rel doesn't check whether the specified path exist or not. -# -sub abs2rel($;$;) { - my($path, $base) = @_; - my($reg ); - - my( $path_volume, $path_directory, $path_file )= splitpath( $path,'nofile' -; - if ( $path_directory !~ /$isrootRE/ ) { - warn("abs2rel: nothing to do: '$path' is relative.") if $verbose; - return $path; - } - - $base = cwd() - if ( $base eq '' ) ; - - my( $base_volume, $base_directory, $base_file )= splitpath( $base,'nofile' -; - # check for a filename, since the nofile parameter does not work for OSs - # like VMS that have explicit delimiters between the dir and file portions - warn( "abs2rel: filename '$base_file' passed in \$base" ) - if ( $base_file ne '' && $verbose ) ; - - if ( $base_directory !~ /$isrootRE/ ) { - # Make $base absolute - my( $cw_volume, $cw_directory, $dummy ) = splitpath( cwd(), 'nofile' ) -; - # maybe we should warn if $cw_volume ne $base_volume and both are not -' - $base_volume= $cw_volume - if ( $base_volume eq '' && $cw_volume ne '' ) ; - $base_directory = join( '', $cw_directory, $sep, $base_directory ) ; - } - -#print( "[$path_directory,$base_directory]\n" ) ; - $path_directory = regularize( $path_directory ); - $base_directory = regularize( $base_directory ); -#print( "[$path_directory,$base_directory]\n" ) ; - # Now, remove all leading components that are the same, so 'name/a' - # 'name/b' become 'a' and 'b'. - my @pathchunks = split($sepRE, $path_directory); - my @basechunks = split($sepRE, $base_directory); - - if ( $casesensitive ) - { - while (@pathchunks && @basechunks && $pathchunks[0] eq $basechunks[0]) -+ { - shift @pathchunks ; - shift @basechunks ; - } - } - else { - while ( @pathchunks - && @basechunks - && lc( $pathchunks[0] ) eq lc( $basechunks[0] ) - ) - { - shift @pathchunks ; - shift @basechunks ; - } - } - - # No need to use joindirs() here, since we know that the arrays - # are well formed. - $path_directory= join( $sep, @pathchunks ); - $base_directory= join( $sep, @basechunks ); -#print( "[$path_directory,$base_directory]\n" ) ; - - # Convert $base_directory from absolute to relative - if ( $fstype eq 'VMS' ) { - $base_directory= $sep . $base_directory - if ( $base_directory ne '' ) ; - } - else { - $base_directory=~ s/^$sepRE// ; - } - -#print( "[$base_directory]\n" ) ; - # $base_directory now contains the directories the resulting relative path -+ # must ascend out of before it can descend to $path_directory. So, - # replace all names with $parentDir - $base_directory =~ s/$notsepRE+/$parentDir/g ; -#print( "[$base_directory]\n" ) ; - - # Glue the two together, using a separator if necessary, and preventing an - # empty result. - if ( $path_directory ne '' && $base_directory ne '' ) { - $path_directory = "$base_directory$sep$path_directory" ; - } else { - $path_directory = "$base_directory$path_directory" ; - } - - $path_directory = regularize( $path_directory ) ; - - # relative URLs should have no name in the volume, only a scheme. - $path_volume=~ s#/.*## - if ( $fstype eq 'URL' ) ; - return joinpath( $path_volume, $path_directory, $path_file ) ; -} - -# -# rel2abs: make an absolute pathname from a relative pathname -# -# Assumes no trailing file name on $base. Ignores it if present on an OS -# like $VMS. -# -# Interface: -# i) $path relative path (needed) -# i) $base base directory (optional) -# r) absolute path of $path -# -# Note: rel2abs doesn't check if the paths exist. -# -sub rel2abs($;$;) { - my( $path, $base ) = @_; - my( $reg ); - - my( $path_volume, $path_directory, $path_file )= splitpath( $path, 'nofile - ) ; - if ( $path_directory =~ /$isrootRE/ ) { - warn( "rel2abs: nothing to do: '$path' is absolute" ) - if $verbose; - return $path; - } - - warn( "rel2abs: volume '$path_volume' passed in relative path: \$path" ) - if ( $path_volume ne '' && $verbose ) ; - - $base = cwd() - if ( !defined( $base ) || $base eq '' ) ; - - my( $base_volume, $base_directory, $base_file )= splitpath( $base, 'nofile - ) ; - # check for a filename, since the nofile parameter does not work for OSs - # like VMS that have explicit delimiters between the dir and file portions - warn( "rel2abs: filename '$base_file' passed in \$base" ) - if ( $base_file ne '' && $verbose ) ; - - if ( $base_directory !~ /$isrootRE/ ) { - # Make $base absolute - my( $cw_volume, $cw_directory, $dummy ) = splitpath( cwd(), 'nofile' ) -; - # maybe we should warn if $cw_volume ne $base_volume and both are not -' - $base_volume= $cw_volume - if ( $base_volume eq '' && $cw_volume ne '' ) ; - $base_directory = join( '', $cw_directory, $sep, $base_directory ) ; - } - - $path_directory = regularize( $path_directory ); - $base_directory = regularize( $base_directory ); - - my $result_directory ; - # Avoid using a separator if either directory component is empty. - if ( $base_directory ne '' && $path_directory ne '' ) { - $result_directory= joindirs( $base_directory, $path_directory ) ; - } - else { - $result_directory= "$base_directory$path_directory" ; - } - - $result_directory = regularize( $result_directory ); - - return joinpath( $base_volume, $result_directory, $path_file ) ; -} - -# -# regularize a path. -# -# Removes dubious and redundant information. -# should only be called on directory portion on OSs -# with volumes and with delimeters that separate dir names from file names, -# since the separators can take on different semantics, like "\\" for UNC -# under Win32, or '.' in filenames under VMS. -# -sub regularize { - my( $in )= $_[ 0 ] ; - - # Combine idempotent separators. Do this first so all other REs only - # need to match one separator. Use the first sep found instead of - # sepRE to preserve slashes on Win32. - $in =~ s/($sepRE)$sepRE+/$1/g - if ( $idempotent ) ; - - # We do this after deleting redundant separators in order to be consistent - - # If a Win32 path ended in \/, we want to be sure that the \ is returned, - # no the /. - $in =~ /($sepRE)$sepRE*$/ ; - my $trailing_sep = defined( $1 ) ? $1 : '' ; - - # Delete all occurences of 'name/..(/|$)'. This is done with a while - # loop to get rid of things like 'name1/name2/../..'. We chose the pattern - # name/../ as the target instead of /name/.. so as to preserve 'rootness'. - while ($in =~ s/$name_sep_parentRE/$1/g) {} - - # Get rid of ./ in '^./' and '/./' - $in =~ s/$dot_sep_etcRE/$1/g ; - - # Get rid of trailing '/' and '/.' unless it would leave an empty string - $in =~ s/$trailing_sepRE/$1/ ; - - # Get rid of '../' constructs from absolute paths - $in =~ s/$leading_parentRE/$1/ - if ( $in =~ /$isrootRE/ ) ; - -# # Default to current directory if it's now empty. -# $in = $thisDir if $_[0] eq '' ; -# - # Restore trailing separator if it was lost. We do this to preserve - # the 'dir-ness' of the path: paths that ended in a separator on entry - # should leave with one in case the caller is using trailing slashes to - # indicate paths to directories. - $in .= $trailing_sep - if ( $trailing_sep ne '' && $in !~ /$sepRE$/ ) ; - - return $in ; -} - -1; - -__END__ - -=head1 NAME - -abs2rel - convert an absolute path to a relative path - -rel2abs - convert a relative path to an absolute path - -realpath - convert a logical path to a physical path (resolve symlinks) - -splitpath - split a path in to volume, directory and filename components - -joinpath - join volume, directory, and filename components to form a path - -splitdirs - split directory specification in to component names - -joindirs - join component names in to a directory specification - -setfstype - set the file system type - - -=head1 SYNOPSIS - - use File::PathConvert qw(realpath abs2rel rel2abs setfstype splitpath - joinpath splitdirs joindirs $resolved); - - $relpath = abs2rel($abspath); - $abspath = abs2rel($abspath, $base); - - $abspath = rel2abs($relpath); - $abspath = rel2abs($relpath, $base); - - $path = realpath($logpath) || die "resolution stopped at $resolved"; - - ( $volume, $directory, $filename )= splitpath( $path ) ; - ( $volume, $directory, $filename )= splitpath( $path, 'nofile' ) ; - - $path= joinpath( $volume, $directory, $filename ) ; - - @directories= splitdirs( $directory ) ; - $directory= joindirs( @directories ) ; - -=head1 DESCRIPTION - -File::PathConvert provides functions to convert between absolute and -relative paths, and from logical paths to physical paths on a variety of -filesystems, including the URL 'filesystem'. - -Paths are decomposed internally in to volume, directory, and, sometimes -filename portions as appropriate to the operation and filesystem, then -recombined. This preserves the volume and filename portions so that they may -be returned, and prevents them from interfering with the path conversions. - -Here are some examples of path decomposition. A '****' in a column indicates -the column is not used in C<abs2rel> and C<rel2abs> functions for that -filesystem type. - - - FS VOLUME Directory filename - ======= ======================= =============== ============= - URL http: /a/b/ c?query - http://fubar.com /a/b/ c?query - //p.d.q.com /a/b/c/ ?query - - VMS Server::Volume: [a.b] c - Server"access spec":: [a.b] c - Volume: [a.b] c - - Win32 A: \a\b\c **** - \\server\Volume \a\b\c **** - \\server\Volume \a/b/c **** - - Unix **** \a\b\c **** - - MacOS Volume:: a:b:c **** - -Many more examples abound in the test.pl included with this module. - -Only the VMS and URL filesystems indicate if the last name in a path is a -directory or file. For other filesystems, all non-volume names are assumed to -be directory names. For URLs, the last name in a path is assumed to be a -filename unless it ends in '/', '/.', or '/..'. - -Other assumptions are made as well, especially MacOS and VMS. THESE MAY CHANGE -BASED ON PROGRAMMER FEEDBACK! - -The conversion routines C<abs2rel>, C<rel2abs>, and C<realpath> are the -main focus of this package. C<splitpath> and C<joinpath> are provided to -allow volume oriented filesystems (almost anything non-unixian, actually) -to be accomodated. C<splitdirs> and C<joindirs> provide directory path -grammar parsing and encoding, which is especially useful for VMS. - -=over 4 - -=item setfstype - -This is called automatically on module load to set the filesystem type -according to $^O. The user can call this later set the filesystem type -manually. If the name is not recognized, unix defaults are used. Names -matching /^URL$/i, /^VMS$/i, /^MacOS$/i, or /^(ms)?(win|dos)/32|nt)?$/i yield -the appropriate (hopefully) filesystem settings. These strings may be -generalized in the future. - -Examples: - - File::PathConvert::setfstype( 'url' ) ; - File::PathConvert::setfstype( 'Win32' ) ; - File::PathConvert::setfstype( 'HAL9000' ) ; # Results in Unix default - -=item abs2rel - -C<abs2rel> converts an absolute path name to a relative path: -converting /1/2/3/a/b/c relative to /1/2/3 returns a/b/c - - $relpath= abs2rel( $abspath ) ; - $relpath= abs2rel( $abspath, $base ) ; - -If $abspath is already relative, it is returned unchanged. Otherwise the -relative path from $base to $abspath is returned. If $base is undefined the -current directory is used. - -The volume and filename portions of $base are ignored if present. -If $abspath and $base are on different volumes, the volume from $abspath is -used. - -No filesystem calls are made except for getting the current working directory -if $base is undefined, so symbolic links are not checked for or resolved, and -no check is done for existance. - -Examples - - # Unix - 'a/b/c' == abs2rel( 'a/b/c', $anything ) - 'a/b/c' == abs2rel( '/1/2/3/a/b/c', '/1/2/3' ) - - # DOS - 'a\\b/c' == abs2rel( 'a\\b/c', $anything ) - 'a\\b/c' == abs2rel( '/1\\2/3/a\\b/c', '/1/2/3' ) - - # URL - 'http:a/b/c' == abs2rel( 'http:a/b/c', $anything ) - 'http:a/b/c' == abs2rel( 'http:/1/2/3/a/b/c', - 'ftp://t.org/1/2/3/?z' ) - 'http:a/b/c?q' == abs2rel( 'http:/1/2/3/a/b/c/?q', - 'ftp://t.org/1/2/3?z' ) - 'http://s.com/a/b/c?q' == abs2rel( 'http://s.com/1/2/3/a/b/c?q', - 'ftp://t.org/1/2/3/?z') - -=item rel2abs - -C<rel2abs> makes converts a relative path name to an absolute path: -converting a/b/c relative to /1/2/3 returns /1/2/3/a/b/c. - - $abspath= rel2abs( $relpath ) ; - $abspath= rel2abs( $relpath, $base ) ; - -If $relpath is already absolute, it is returned unchanged. Otherwise $relpath -is taken to be relative to $base and the resulting absolute path is returned. -If $base is not supplied, the current working directory is used. - -The volume portion of $relpath is ignored. The filename portion of $base is -also ignored. The volume from $base is returned if present. The filename -portion of $abspath is returned if present. - -No filesystem calls are made except for getting the current working directory -if $base is undefined, so symbolic links are not checked for or resolved, and -no check is done for existance. - -C<rel2abs> will not return a path of the form "./file". - -Examples - - # Unix - '/a/b/c' == rel2abs( '/a/b/c', $anything ) - '/1/2/3/a/b/c' == rel2abs( 'a/b/c', '/1/2/3' ) - - # DOS - '\\a\\b/c' == rel2abs( '\\a\\b/c', $anything ) - '/1\\2/3\\a\\b/c' == rel2abs( 'a\\b/c', '/1\\2/3' ) - 'C:/1\\2/3\\a\\b/c' == rel2abs( 'D:a\\b/c', 'C:/1\\2/3' ) - '\\\\s\\v/1\\2/3\\a\\b/c' == rel2abs( 'D:a\\b/c', '\\\\s\\v/1\\2/3' ) - - # URL - 'http:/a/b/c?q' == rel2abs( 'http:/a/b/c?q', $anything ) - 'ftp://t.org/1/2/3/a/b/c?q'== rel2abs( 'http:a/b/c?q', - 'ftp://t.org/1/2/3?z' ) - - -=item realpath - -C<realpath> makes a canonicalized absolute pathname and -resolves all symbolic links, extra ``/'' characters, and references -to /./ and /../ in the path. -C<realpath> resolves both absolute and relative paths. -It returns the resolved name on success, otherwise it returns undef -and sets the valiable C<$File::PathConvert::resolved> to the pathname -that caused the problem. - -All but the last component of the path must exist. - -This implementation is based on 4.4BSD realpath(3). It is not tested under -other operating systems at this time. - -If '/sys' is a symbolic link to '/usr/src/sys': - - chdir('/usr'); - '/usr/src/sys/kern' == realpath('../sys/kern'); - '/usr/src/sys/kern' == realpath('/sys/kern'); - -=item splitpath - -To be written... - -=item joinpath - -To be written... - -Note that joinpath( splitpath( $path ) ) usually yields path. URLs -with directory components ending in '/.' or '/..' will be fixed -up to end in '/./' and '/../'. - -=item splitdirs - -To be written... - -=item joindirs - - -=back - -=head1 BUGS - -C<realpath> is not fully multiplatform. - - -=head1 LIMITATIONS - -=over 4 - -=item * - -In URLs, paths not ending in '/' are split such that the last name in the -path is a filename. This is not intuitive: many people use such URLs for -directories, and most servers send a redirect. This may cause programers -using this package to code in bugs, it may be more pragmatic to always assume -all names are directory names. (Note that the query portion is always part -of the filename). - -=item * - -If the relative and base paths are on different volumes, no error is -returned. A silent, hopefully reasonable assumption is made. - -=item * - -No detection of unix style paths is done when other filesystems are -selected, like File::Basename does. - -=back - -=head1 AUTHORS - -Barrie Slaymaker <rbs@telerama.com> -Shigio Yamaguchi <shigio@wafu.netgate.net> - -=cut diff --git a/lib/Pod/Html.pm b/lib/Pod/Html.pm index 3176e4fdcd..9245315dbd 100644 --- a/lib/Pod/Html.pm +++ b/lib/Pod/Html.pm @@ -2,10 +2,10 @@ package Pod::Html; use Pod::Functions; use Getopt::Long; # package for handling command-line parameters -use File::PathConvert 0.84 ; # Used to do relative URLs +use File::Spec::Unix; require Exporter; use vars qw($VERSION); -$VERSION = 1.01; +$VERSION = 1.02; @ISA = Exporter; @EXPORT = qw(pod2html htmlify); use Cwd; @@ -50,7 +50,9 @@ Displays the usage message. --htmldir=name Sets the directory in which the resulting HTML file is placed. This -is used to generate relative links to other files. +is used to generate relative links to other files. Not passing this +causes all links to be absolute, since this is the value that tells +Pod::Html the root of the documentation tree. =item htmlroot @@ -177,13 +179,13 @@ my $itemcache = "pod2html-itemcache"; my @begin_stack = (); # begin/end stack -my @libpods = (); # files to search for links from C<> directives -my $htmlroot = "/"; # http-server base directory from which all +my @libpods = (); # files to search for links from C<> directives +my $htmlroot = "/"; # http-server base directory from which all # relative paths in $podpath stem. my $htmldir = ""; # The directory to which the html pages # will (eventually) be written. my $htmlfile = ""; # write to stdout by default -my $htmlfileurl = ""; # The url that other files would use to +my $htmlfileurl = "" ; # The url that other files would use to # refer to this file. This is only used # to make relative urls that point to # other files. @@ -297,14 +299,19 @@ sub pod2html { } $htmlfile = "-" unless $htmlfile; # stdout $htmlroot = "" if $htmlroot eq "/"; # so we don't get a // - $htmldir =~ s#/$## ; # so we don't get a // - if ( $htmldir ne '' - && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir - ) + $htmldir =~ s#/$## ; # so we don't get a // + if ( $htmlroot eq '' + && defined( $htmldir ) + && $htmldir ne '' + && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir + ) { - $htmlfileurl= "$htmlroot/" . substr( $htmlfile, length( $htmldir ) + 1 ); + # Set the 'base' url for this file, so that we can use it + # as the location from which to calculate relative links + # to other files. If this is '', then absolute links will + # be used throughout. + $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1); } - File::PathConvert::setfstype( 'URL' ) ; # read the pod a paragraph at a time warn "Scanning for sections in input file(s)\n" if $verbose; @@ -487,15 +494,13 @@ Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name> END_OF_USAGE sub parse_command_line { - my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile -,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecur -se,$opt_recurse,$opt_title,$opt_verbose); + my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose); my $result = GetOptions( - 'flush' => \$opt_flush, - 'help' => \$opt_help, - 'htmldir=s' => \$opt_htmldir, + 'flush' => \$opt_flush, + 'help' => \$opt_help, + 'htmldir=s' => \$opt_htmldir, 'htmlroot=s' => \$opt_htmlroot, - 'index!' => \$opt_index, + 'index!' => \$opt_index, 'infile=s' => \$opt_infile, 'libpods=s' => \$opt_libpods, 'netscape!' => \$opt_netscape, @@ -568,7 +573,7 @@ sub get_cache { sub cache_key { my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_; return join('!', $dircache, $itemcache, $recurse, - @$podpath, $podroot, stat($dircache), stat($itemcache)); + @$podpath, $podroot, stat($dircache), stat($itemcache)); } # @@ -674,7 +679,9 @@ sub scan_podpath { next unless defined $pages{$libpod} && $pages{$libpod}; # if there is a directory then use the .pod and .pm files within it. - if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) { + # NOTE: Only finds the first so-named directory in the tree. +# if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) { + if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) { # find all the .pod and .pm files within the directory $dirname = $1; opendir(DIR, $dirname) || @@ -1126,11 +1133,25 @@ sub process_text { }xeg; # $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g; $rest =~ s{ - (<A\ HREF="?)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)? - }{ - my $url= - File::PathConvert::abs2rel( "$3.html", $htmlfileurl ); -# print( " $htmlfileurl $3.html [$url]\n" ) ; + (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)? + }{ + my $url ; + if ( $htmlfileurl ne '' ) { + # Here, we take advantage of the knowledge + # that $htmlfileurl ne '' implies $htmlroot eq ''. + # Since $htmlroot eq '', we need to prepend $htmldir + # on the fron of the link to get the absolute path + # of the link's target. We check for a leading '/' + # to avoid corrupting links that are #, file:, etc. + my $old_url = $3 ; + $old_url = "$htmldir$old_url" + if ( $old_url =~ m{^\/} ) ; + $url = relativize_url( "$old_url.html", $htmlfileurl ); +# print( " a: [$old_url.html,$htmlfileurl,$url]\n" ) ; + } + else { + $url = "$3.html" ; + } "$1$url" ; }xeg; @@ -1156,7 +1177,8 @@ sub process_text { $rest =~ s{ \b # start at word boundary ( # begin $1 { - $urls :[^:] # need resource and a colon + $urls : # need resource and a colon + (?!:) # Ignore File::, among others. [$any] +? # followed by on or more # of any valid character, but # be conservative and take only @@ -1428,6 +1450,9 @@ sub process_L { $section = $page; $page = ""; } + + # remove trailing punctuation, like () + $section =~ s/\W*$// ; } $page83=dosify($page); @@ -1438,6 +1463,29 @@ sub process_L { } elsif ( $page =~ /::/ ) { $linktext = ($section ? "$section" : "$page"); $page =~ s,::,/,g; + # Search page cache for an entry keyed under the html page name, + # then look to see what directory that page might be in. NOTE: + # this will only find one page. A better solution might be to produce + # an intermediate page that is an index to all such pages. + my $page_name = $page ; + $page_name =~ s,^.*/,, ; + if ( defined( $pages{ $page_name } ) && + $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/ + ) { + $page = $1 ; + } + else { + # NOTE: This branch assumes that all A::B pages are located in + # $htmlroot/A/B.html . This is often incorrect, since they are + # often in $htmlroot/lib/A/B.html or such like. Perhaps we could + # analyze the contents of %pages and figure out where any + # cousins of A::B are, then assume that. So, if A::B isn't found, + # but A::C is found in lib/A/C.pm, then A::B is assumed to be in + # lib/A/B.pm. This is also limited, but it's an improvement. + # Maybe a hints file so that the links point to the correct places + # non-theless? + # Also, maybe put a warn "$0: cannot resolve..." here. + } $link = "$htmlroot/$page.html"; $link .= "#" . htmlify(0,$section) if ($section); } elsif (!defined $pages{$page}) { @@ -1450,7 +1498,8 @@ sub process_L { # if there is a directory by the name of the page, then assume that an # appropriate section will exist in the subdirectory - if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) { +# if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) { + if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) { $link = "$htmlroot/$1/$section.html"; # since there is no directory by the name of the page, the section will @@ -1474,8 +1523,23 @@ sub process_L { process_text(\$linktext, 0); if ($link) { - my $url= File::PathConvert::abs2rel( $link, $htmlfileurl ) ; -# print( " $htmlfileurl $link [$url]\n" ) ; + # Here, we take advantage of the knowledge that $htmlfileurl ne '' + # implies $htmlroot eq ''. This means that the link in question + # needs a prefix of $htmldir if it begins with '/'. The test for + # the initial '/' is done to avoid '#'-only links, and to allow + # for other kinds of links, like file:, ftp:, etc. + my $url ; + if ( $htmlfileurl ne '' ) { + $link = "$htmldir$link" + if ( $link =~ m{^/} ) ; + + $url = relativize_url( $link, $htmlfileurl ) ; +# print( " b: [$link,$htmlfileurl,$url]\n" ) ; + } + else { + $url = $link ; + } + $s1 = "<A HREF=\"$url\">$linktext</A>"; } else { $s1 = "<EM>$linktext</EM>"; @@ -1484,6 +1548,39 @@ sub process_L { } # +# relativize_url - convert an absolute URL to one relative to a base URL. +# Assumes both end in a filename. +# +sub relativize_url { + my ($dest,$source) = @_ ; + + my ($dest_volume,$dest_directory,$dest_file) = + File::Spec::Unix->splitpath( $dest ) ; + $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ; + + my ($source_volume,$source_directory,$source_file) = + File::Spec::Unix->splitpath( $source ) ; + $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ; + + my $rel_path = '' ; + if ( $dest ne '' ) { + $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ; + } + + if ( $rel_path ne '' && + substr( $rel_path, -1 ) ne '/' && + substr( $dest_file, 0, 1 ) ne '#' + ) { + $rel_path .= "/$dest_file" ; + } + else { + $rel_path .= "$dest_file" ; + } + + return $rel_path ; +} + +# # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and # convert them to corresponding HTML directives. # @@ -1517,8 +1614,16 @@ sub process_C { if ($doref && defined $items{$s1}) { if ( $items{$s1} ) { my $link = "$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) ; - my $url = File::PathConvert::abs2rel( $link, $htmlfileurl ) ; -# print( " $htmlfileurl $link [$url]\n" ) ; + # Here, we take advantage of the knowledge that $htmlfileurl ne '' + # implies $htmlroot eq ''. + my $url ; + if ( $htmlfileurl ne '' ) { + $link = "$htmldir$link" ; + $url = relativize_url( $link, $htmlfileurl ) ; + } + else { + $url = $link ; + } $s1 = "<A HREF=\"$url\">$str</A>" ; } else { @@ -1582,6 +1687,18 @@ sub process_X { # +# Adapted from Nick Ing-Simmons' PodToHtml package. +sub relative_url { + my $source_file = shift ; + my $destination_file = shift; + + my $source = URI::file->new_abs($source_file); + my $uo = URI::file->new($destination_file,$source)->abs; + return $uo->rel->as_string; +} + + +# # finish_list - finish off any pending HTML lists. this should be called # after the entire pod file has been read and converted. # |