diff options
Diffstat (limited to 'installhtml')
-rwxr-xr-x | installhtml | 562 |
1 files changed, 562 insertions, 0 deletions
diff --git a/installhtml b/installhtml new file mode 100755 index 0000000000..d42faa17d6 --- /dev/null +++ b/installhtml @@ -0,0 +1,562 @@ +#!/usr/bin/perl -w + +use Config; # for config options in the makefile +use Getopt::Long; # for command-line parsing +use Cwd; +use Pod::Html; + +umask 022; + +=head1 NAME + +installhtml - converts a collection of POD pages to HTML format. + +=head1 SYNOPSIS + + installhtml [--help] [--podpath=<name>:...:<name>] [--podroot=<name>] + [--htmldir=<name>] [--htmlroot=<name>] [--norecurse] [--recurse] + [--splithead=<name>,...,<name>] [--splititem=<name>,...,<name>] + [--libpods=<name>,...,<name>] [--verbose] + +=head1 DESCRIPTION + +I<installhtml> converts a collection of POD pages to a corresponding +collection of HTML pages. This is primarily used to convert the pod +pages found in the perl distribution. + +=head1 OPTIONS + +=over 4 + +=item B<--help> help + +Displays the usage. + +=item B<--podpath> POD search path + +The list of directories to search for .pod and .pm files to be converted. +Default is `podroot/.'. + +=item B<--podroot> POD search path base directory + +The base directory to search for all .pod and .pm files to be converted. +Default is current directory. + +=item B<--htmldir> HTML destination directory + +The base directory which all HTML files will be written to. This should +be a path relative to the filesystem, not the resulting URL. + +=item B<--htmlroot> URL base directory + +The base directory which all resulting HTML files will be visible at in +a URL. The default is `/'. + +=item B<--recurse> recurse on subdirectories + +Whether or not to convert all .pm and .pod files found in subdirectories +too. Default is to not recurse. + +=item B<--splithead> POD files to split on =head directive + +Colon-separated list of pod files to split by the =head directive. These +files should have names specified relative to podroot. + +=item B<--splititem> POD files to split on =item directive + +Colon-separated list of all pod files to split by the =item directive. +I<installhtml> does not do the actual split, rather it invokes I<splitpod> +to do the dirty work. As with --splithead, these files should have names +specified relative to podroot. + +=item B<--libpods> library PODs for LE<lt>E<gt> links + +Colon-separated list of "library" pod files. This is the same list that +will be passed to pod2html when any pod is converted. + +=item B<--verbose> verbose output + +Self-explanatory. + +=back + +=head1 EXAMPLE + +The following command-line is an example of the one we use to convert +perl documentation: + + ./installhtml --podpath=lib:ext:pod:vms \ + --podroot=/usr/src/perl \ + --htmldir=/perl/nmanual \ + --htmlroot=/perl/nmanual \ + --splithead=pod/perlipc.pod \ + --splititem=pod/perlfunc \ + --libpods=perlfunc:perlguts:perlvar:perlrun:perlop \ + --recurse \ + --verbose + +=head1 AUTHOR + +Chris Hall E<lt>hallc@cs.colorado.eduE<gt> + +=head1 TODO + +=cut + +$usage =<<END_OF_USAGE; +Usage: $0 --help --podpath=<name>:...:<name> --podroot=<name> + --htmldir=<name> --htmlroot=<name> --norecurse --recurse + --splithead=<name>,...,<name> --splititem=<name>,...,<name> + --libpods=<name>,...,<name> --verbose + + --help - this message + --podpath - colon-separated list of directories containing .pod and + .pm files to be converted (. by default). + --podroot - filesystem base directory from which all relative paths in + podpath stem (default is .). + --htmldir - directory to store resulting html files in relative + to the filesystem (\$podroot/html by default). + --htmlroot - http-server base directory from which all relative paths + in podpath stem (default is /). + --libpods - comma-separated list of files to search for =item pod + directives in as targets of C<> and implicit links (empty + by default). + --norecurse - don't recurse on those subdirectories listed in podpath. + (default behavior). + --recurse - recurse on those subdirectories listed in podpath + --splithead - comma-separated list of .pod or .pm files to split. will + split each file into several smaller files at every occurrence + of a pod =head[1-6] directive. + --splititem - comma-separated list of .pod or .pm files to split using + splitpod. + --splitpod - where the program splitpod can be found (\$podroot/pod by + default). + --verbose - self-explanatory. + +END_OF_USAGE + +@libpods = (); +@podpath = ( "." ); # colon-separated list of directories containing .pod + # and .pm files to be converted. +$podroot = "."; # assume the pods we want are here +$htmldir = ""; # nothing for now... +$htmlroot = "/"; # default value +$recurse = 0; # default behavior +@splithead = (); # don't split any files by default +@splititem = (); # don't split any files by default +$splitpod = ""; # nothing for now. + +$verbose = 0; # whether or not to print debugging info + +$pod2html = "pod/pod2html"; + + +# parse the command-line +$result = GetOptions( qw( + help + podpath=s + podroot=s + htmldir=s + htmlroot=s + libpods=s + recurse! + splithead=s + splititem=s + splitpod=s + verbose +)); +usage("invalid parameters") unless $result; +parse_command_line(); + + +# set these variables to appropriate values if the user didn't specify +# values for them. +$htmldir = "$htmlroot/html" unless $htmldir; +$splitpod = "$podroot/pod" unless $splitpod; + + +# make sure that the destination directory exists +(mkdir($htmldir, 0755) || + die "$0: cannot make directory $htmldir: $!\n") if ! -d $htmldir; + + +# the following array will eventually contain files that are to be +# ignored in the conversion process. these are files that have been +# process by splititem or splithead and should not be converted as a +# result. +@ignore = (); + + +# split pods. its important to do this before convert ANY pods because +# it may effect some of the links +@splitdirs = (); # files in these directories won't get an index +split_on_head($podroot, $htmldir, \@splitdirs, \@ignore, @splithead); +split_on_item($podroot, \@splitdirs, \@ignore, @splititem); + + +# convert the pod pages found in @poddirs +#warn "converting files\n" if $verbose; +#warn "\@ignore\t= @ignore\n" if $verbose; +foreach $dir (@podpath) { + installdir($dir, $recurse, $podroot, \@splitdirs, \@ignore); +} + + +# now go through and create master indices for each pod we split +foreach $dir (@splititem) { + print "creating index $htmldir/$dir.html\n" if $verbose; + create_index("$htmldir/$dir.html", "$htmldir/$dir"); +} + +foreach $dir (@splithead) { + $dir .= ".pod" unless $dir =~ /(\.pod|\.pm)$/; + # let pod2html create the file + runpod2html($dir, 1); + + # now go through and truncate after the index + $dir =~ /^(.*?)(\.pod|\.pm)?$/sm; + $file = "$htmldir/$1"; + print "creating index $file.html\n" if $verbose; + + # read in everything until what would have been the first =head + # directive, patching the index as we go. + open(H, "<$file.html") || + die "$0: error opening $file.html for input: $!\n"; + $/ = ""; + @data = (); + while (<H>) { + last if /NAME=/; + s,HREF="#(.*)">,HREF="$file/$1.html">,g; + push @data, $_; + } + close(H); + + # now rewrite the file + open(H, ">$file.html") || + die "$0: error opening $file.html for output: $!\n"; + print H "@data\n"; + close(H); +} + +############################################################################## + + +sub usage { + warn "$0: @_\n" if @_; + die $usage; +} + + +sub parse_command_line { + usage() if defined $opt_help; + $opt_help = ""; # make -w shut up + + # list of directories + @podpath = split(":", $opt_podpath) if defined $opt_podpath; + + # lists of files + @splithead = split(",", $opt_splithead) if defined $opt_splithead; + @splititem = split(",", $opt_splititem) if defined $opt_splititem; + @libpods = split(",", $opt_libpods) if defined $opt_libpods; + + $htmldir = $opt_htmldir if defined $opt_htmldir; + $htmlroot = $opt_htmlroot if defined $opt_htmlroot; + $podroot = $opt_podroot if defined $opt_podroot; + $splitpod = $opt_splitpod if defined $opt_splitpod; + + $recurse = $opt_recurse if defined $opt_recurse; + $verbose = $opt_verbose if defined $opt_verbose; +} + + +sub create_index { + my($html, $dir) = @_; + my(@files, @filedata, @index, $file); + + # get the list of .html files in this directory + opendir(DIR, $dir) || + die "$0: error opening directory $dir for reading: $!\n"; + @files = sort(grep(/\.html$/, readdir(DIR))); + closedir(DIR); + + open(HTML, ">$html") || + die "$0: error opening $html for output: $!\n"; + + # for each .html file in the directory, extract the index + # embedded in the file and throw it into the big index. + print HTML "<DL COMPACT>\n"; + foreach $file (@files) { + $/ = ""; + + open(IN, "<$dir/$file") || + die "$0: error opening $dir/$file for input: $!\n"; + @filedata = <IN>; + close(IN); + + # pull out the NAME section + ($name) = grep(/NAME=/, @filedata); + $name =~ m,/H1>\s(\S+)\s[\s-]*(.*?)\s*$,sm; + print HTML qq(<A HREF="$dir/$file">); + print HTML "<DT>$1</A><DD>$2\n" if defined $1; +# print HTML qq(<A HREF="$dir/$file">$1</A><BR>\n") if defined $1; + + next; + + @index = grep(/<!-- INDEX BEGIN -->.*<!-- INDEX END -->/s, + @filedata); + for (@index) { + s/<!-- INDEX BEGIN -->(\s*<!--)(.*)(-->\s*)<!-- INDEX END -->/$2/s; + s,#,$dir/$file#,g; + # print HTML "$_\n"; + print HTML "$_\n<P><HR><P>\n"; + } + } + print HTML "</DL>\n"; + + close(HTML); +} + + +sub split_on_head { + my($podroot, $htmldir, $splitdirs, $ignore, @splithead) = @_; + my($pod, $dirname, $filename); + + # split the files specified in @splithead on =head[1-6] pod directives + print "splitting files by head.\n" if $verbose && $#splithead >= 0; + foreach $pod (@splithead) { + # figure out the directory name and filename + $pod =~ s,^([^/]*)$,/$1,; + $pod =~ m,(.*?)/(.*?)(\.pod)?$,; + $dirname = $1; + $filename = "$2.pod"; + + # since we are splitting this file it shouldn't be converted. + push(@$ignore, "$podroot/$dirname/$filename"); + + # split the pod + splitpod("$podroot/$dirname/$filename", "$podroot/$dirname", $htmldir, + $splitdirs); + } +} + + +sub split_on_item { + my($podroot, $splitdirs, $ignore, @splititem) = @_; + my($pwd, $dirname, $filename); + + print "splitting files by item.\n" if $verbose && $#splititem >= 0; + $pwd = getcwd(); + foreach $pod (@splititem) { + # figure out the directory to split into + $pod =~ s,^([^/]*)$,/$1,; + $pod =~ m,(.*?)/(.*?)(\.pod)?$,; + $dirname = "$1/$2"; + $filename = "$2.pod"; + + # since we are splitting this file it shouldn't be converted. + push(@$ignore, "$podroot/$dirname.pod"); + + # split the pod + push(@$splitdirs, "$podroot/$dirname"); + if (! -d "$podroot/$dirname") { + mkdir("$podroot/$dirname", 0755) || + die "$0: error creating directory $podroot/$dirname: $!\n"; + } + chdir("$podroot/$dirname") || + die "$0: error changing to directory $podroot/$dirname: $!\n"; + system("../splitpod", "../$filename") && + warn "$0: error running '../splitpod ../$filename'" + ." from $podroot/$dirname"; + } + chdir($pwd); +} + + +# +# splitpod - splits a .pod file into several smaller .pod files +# where a new file is started each time a =head[1-6] pod directive +# is encountered in the input file. +# +sub splitpod { + my($pod, $poddir, $htmldir, $splitdirs) = @_; + my(@poddata, @filedata, @heads); + my($file, $i, $j, $prevsec, $section, $nextsec); + + print "splitting $pod\n" if $verbose; + + # read the file in paragraphs + $/ = ""; + open(SPLITIN, "<$pod") || + die "$0: error opening $pod for input: $!\n"; + @filedata = <SPLITIN>; + close(SPLITIN) || + die "$0: error closing $pod: $!\n"; + + # restore the file internally by =head[1-6] sections + @poddata = (); + for ($i = 0, $j = -1; $i <= $#filedata; $i++) { + $j++ if ($filedata[$i] =~ /^\s*=head[1-6]/); + if ($j >= 0) { + $poddata[$j] = "" unless defined $poddata[$j]; + $poddata[$j] .= "\n$filedata[$i]" if $j >= 0; + } + } + + # create list of =head[1-6] sections so that we can rewrite + # L<> links as necessary. + %heads = (); + foreach $i (0..$#poddata) { + $heads{htmlize($1)} = 1 if $poddata[$i] =~ /=head[1-6]\s+(.*)/; + } + + # create a directory of a similar name and store all the + # files in there + $pod =~ s,.*/(.*),$1,; # get the last part of the name + $dir = $pod; + $dir =~ s/\.pod//g; + push(@$splitdirs, "$poddir/$dir"); + mkdir("$poddir/$dir", 0755) || + die "$0: could not create directory $poddir/$dir: $!\n" + unless -d "$poddir/$dir"; + + $poddata[0] =~ /^\s*=head[1-6]\s+(.*)/; + $section = ""; + $nextsec = $1; + + # for each section of the file create a separate pod file + for ($i = 0; $i <= $#poddata; $i++) { + # determine the "prev" and "next" links + $prevsec = $section; + $section = $nextsec; + if ($i < $#poddata) { + $poddata[$i+1] =~ /^\s*=head[1-6]\s+(.*)/; + $nextsec = $1; + } else { + $nextsec = ""; + } + + # determine an appropriate filename (this must correspond with + # what pod2html will try and guess) + # $poddata[$i] =~ /^\s*=head[1-6]\s+(.*)/; + $file = "$dir/" . htmlize($section) . ".pod"; + + # create the new .pod file + print "\tcreating $poddir/$file\n" if $verbose; + open(SPLITOUT, ">$poddir/$file") || + die "$0: error opening $poddir/$file for output: $!\n"; + $poddata[$i] =~ s,L<([^<>]*)>, + defined $heads{htmlize($1)} ? "L<$dir/$1>" : "L<$1>" + ,ge; + print SPLITOUT $poddata[$i]."\n\n"; + print SPLITOUT "=over 4\n\n"; + print SPLITOUT "=item *\n\nBack to L<$dir/\"$prevsec\">\n\n" if $prevsec; + print SPLITOUT "=item *\n\nForward to L<$dir/\"$nextsec\">\n\n" if $nextsec; + print SPLITOUT "=item *\n\nUp to L<$dir>\n\n"; + print SPLITOUT "=back\n\n"; + close(SPLITOUT) || + die "$0: error closing $poddir/$file: $!\n"; + } +} + + +# +# installdir - takes care of converting the .pod and .pm files in the +# current directory to .html files and then installing those. +# +sub installdir { + my($dir, $recurse, $podroot, $splitdirs, $ignore) = @_; + my(@dirlist, @podlist, @pmlist, $doindex); + + @dirlist = (); # directories to recurse on + @podlist = (); # .pod files to install + @pmlist = (); # .pm files to install + + # should files in this directory get an index? + $doindex = (grep($_ eq "$podroot/$dir", @$splitdirs) ? 0 : 1); + + opendir(DIR, "$podroot/$dir") + || die "$0: error opening directory $podroot/$dir: $!\n"; + + # find the directories to recurse on + @dirlist = map { "$dir/$_" } + grep(-d "$podroot/$dir/$_" && !/^\.{1,2}/, readdir(DIR)) if $recurse; + rewinddir(DIR); + + # find all the .pod files within the directory + @podlist = map { /^(.*)\.pod$/; "$dir/$1" } + grep(! -d "$podroot/$dir/$_" && /\.pod$/, readdir(DIR)); + rewinddir(DIR); + + # find all the .pm files within the directory + @pmlist = map { /^(.*)\.pm$/; "$dir/$1" } + grep(! -d "$podroot/$dir/$_" && /\.pm$/, readdir(DIR)); + + closedir(DIR); + + # recurse on all subdirectories we kept track of + foreach $dir (@dirlist) { + installdir($dir, $recurse, $podroot, $splitdirs, $ignore); + } + + # install all the pods we found + foreach $pod (@podlist) { + # check if we should ignore it. + next if grep($_ eq "$podroot/$pod.pod", @$ignore); + + # check if a .pm files exists too + if (grep($_ eq "$pod.pm", @pmlist)) { + print "$0: Warning both `$podroot/$pod.pod' and " + . "`$podroot/$pod.pm' exist, using pod\n"; + push(@ignore, "$pod.pm"); + } + runpod2html("$pod.pod", $doindex); + } + + # install all the .pm files we found + foreach $pm (@pmlist) { + # check if we should ignore it. + next if grep($_ eq "$pm.pm", @ignore); + + runpod2html("$pm.pm", $doindex); + } +} + + +# +# runpod2html - invokes pod2html to convert a .pod or .pm file to a .html +# file. +# +sub runpod2html { + my($pod, $doindex) = @_; + my($html, $i, $dir, @dirs); + + $html = $pod; + $html =~ s/\.(pod|pm)$/.html/g; + + # make sure the destination directories exist + @dirs = split("/", $html); + $dir = "$htmldir/"; + for ($i = 0; $i < $#dirs; $i++) { + if (! -d "$dir$dirs[$i]") { + mkdir("$dir$dirs[$i]", 0755) || + die "$0: error creating directory $dir$dirs[$i]: $!\n"; + } + $dir .= "$dirs[$i]/"; + } + + # invoke pod2html + print "$podroot/$pod => $htmldir/$html\n" if $verbose; +#system("./pod2html", + Pod::Html'pod2html( + #Pod::Html'pod2html($pod2html, + "--htmlroot=$htmlroot", + "--podpath=".join(":", @podpath), + "--podroot=$podroot", "--netscape", + ($doindex ? "--index" : "--noindex"), + "--" . ($recurse ? "" : "no") . "recurse", + ($#libpods >= 0) ? "--libpods=" . join(":", @libpods) : "", + "--infile=$podroot/$pod", "--outfile=$htmldir/$html"); + die "$0: error running $pod2html: $!\n" if $?; +} + +sub htmlize { htmlify(0, @_) } |