diff options
author | Gurusamy Sarathy <gsar@cpan.org> | 2000-02-02 09:55:45 +0000 |
---|---|---|
committer | Gurusamy Sarathy <gsar@cpan.org> | 2000-02-02 09:55:45 +0000 |
commit | e2c3adefd8c31a020997b83179ab5ab417e7e4ac (patch) | |
tree | 513a832cef954f8cff19ef5c4930701caa7ec664 /lib/Pod/ParseUtils.pm | |
parent | f2545c0771a68cccebc3e62a37fb91f07e98f87c (diff) | |
download | perl-e2c3adefd8c31a020997b83179ab5ab417e7e4ac.tar.gz |
PodParser-1.092 update via CPAN (from Brad Appleton)
p4raw-id: //depot/perl@4961
Diffstat (limited to 'lib/Pod/ParseUtils.pm')
-rw-r--r-- | lib/Pod/ParseUtils.pm | 792 |
1 files changed, 792 insertions, 0 deletions
diff --git a/lib/Pod/ParseUtils.pm b/lib/Pod/ParseUtils.pm new file mode 100644 index 0000000000..a66e8f5e8b --- /dev/null +++ b/lib/Pod/ParseUtils.pm @@ -0,0 +1,792 @@ +############################################################################# +# Pod/ParseUtils.pm -- helpers for POD parsing and conversion +# +# Copyright (C) 1999 by Marek Rouchal. All rights reserved. +# This file is part of "PodParser". PodParser is free software; +# you can redistribute it and/or modify it under the same terms +# as Perl itself. +############################################################################# + +package Pod::ParseUtils; + +use vars qw($VERSION); +$VERSION = 0.2; ## Current version of this package +require 5.004; ## requires this Perl version or later + +=head1 NAME + +Pod::ParseUtils - helpers for POD parsing and conversion + +=head1 SYNOPSIS + + use Pod::ParseUtils; + + my $list = new Pod::List; + my $link = Pod::Hyperlink->new('Pod::Parser'); + +=head1 DESCRIPTION + +B<Pod::ParseUtils> contains a few object-oriented helper packages for +POD parsing and processing (i.e. in POD formatters and translators). + +=cut + +#----------------------------------------------------------------------------- +# Pod::List +# +# class to hold POD list info (=over, =item, =back) +#----------------------------------------------------------------------------- + +package Pod::List; + +use Carp; + +=head2 Pod::List + +B<Pod::List> can be used to hold information about POD lists +(written as =over ... =item ... =back) for further processing. +The following methods are available: + +=over 4 + +=item new() + +Create a new list object. Properties may be specified through a hash +reference like this: + + my $list = Pod::List->new({ -start => $., -indent => 4 }); + +See the individual methods/properties for details. + +=cut + +sub new { + my $this = shift; + my $class = ref($this) || $this; + my %params = @_; + my $self = {%params}; + bless $self, $class; + $self->initialize(); + return $self; +} + +sub initialize { + my $self = shift; + $self->{-file} ||= 'unknown'; + $self->{-start} ||= 'unknown'; + $self->{-indent} ||= 4; # perlpod: "should be the default" + $self->{_items} = []; + $self->{-type} ||= ''; +} + +=item file() + +Without argument, retrieves the file name the list is in. This must +have been set before by either specifying B<-file> in the B<new()> +method or by calling the B<file()> method with a scalar argument. + +=cut + +# The POD file name the list appears in +sub file { + return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file}; +} + +=item start() + +Without argument, retrieves the line number where the list started. +This must have been set before by either specifying B<-start> in the +B<new()> method or by calling the B<start()> method with a scalar +argument. + +=cut + +# The line in the file the node appears +sub start { + return (@_ > 1) ? ($_[0]->{-start} = $_[1]) : $_[0]->{-start}; +} + +=item indent() + +Without argument, retrieves the indent level of the list as specified +in C<=over n>. This must have been set before by either specifying +B<-indent> in the B<new()> method or by calling the B<indent()> method +with a scalar argument. + +=cut + +# indent level +sub indent { + return (@_ > 1) ? ($_[0]->{-indent} = $_[1]) : $_[0]->{-indent}; +} + +=item type() + +Without argument, retrieves the list type, which can be an arbitrary value, +e.g. C<OL>, C<UL>, ... when thinking the HTML way. +This must have been set before by either specifying +B<-type> in the B<new()> method or by calling the B<type()> method +with a scalar argument. + +=cut + +# The type of the list (UL, OL, ...) +sub type { + return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type}; +} + +=item rx() + +Without argument, retrieves a regular expression for simplifying the +individual item strings once the list type has been determined. Usage: +E.g. when converting to HTML, one might strip the leading number in +an ordered list as C<E<lt>OLE<gt>> already prints numbers itself. +This must have been set before by either specifying +B<-rx> in the B<new()> method or by calling the B<rx()> method +with a scalar argument. + +=cut + +# The regular expression to simplify the items +sub rx { + return (@_ > 1) ? ($_[0]->{-rx} = $_[1]) : $_[0]->{-rx}; +} + +=item item() + +Without argument, retrieves the array of the items in this list. +The items may be represented by any scalar. +If an argument has been given, it is pushed on the list of items. + +=cut + +# The individual =items of this list +sub item { + my ($self,$item) = @_; + if(defined $item) { + push(@{$self->{_items}}, $item); + return $item; + } + else { + return @{$self->{_items}}; + } +} + +=item parent() + +Without argument, retrieves information about the parent holding this +list, which is represented as an arbitrary scalar. +This must have been set before by either specifying +B<-parent> in the B<new()> method or by calling the B<parent()> method +with a scalar argument. + +=cut + +# possibility for parsers/translators to store information about the +# lists's parent object +sub parent { + return (@_ > 1) ? ($_[0]->{-parent} = $_[1]) : $_[0]->{-parent}; +} + +=item tag() + +Without argument, retrieves information about the list tag, which can be +any scalar. +This must have been set before by either specifying +B<-tag> in the B<new()> method or by calling the B<tag()> method +with a scalar argument. + +=back + +=cut + +# possibility for parsers/translators to store information about the +# list's object +sub tag { + return (@_ > 1) ? ($_[0]->{-tag} = $_[1]) : $_[0]->{-tag}; +} + +#----------------------------------------------------------------------------- +# Pod::Hyperlink +# +# class to manipulate POD hyperlinks (L<>) +#----------------------------------------------------------------------------- + +package Pod::Hyperlink; + +=head2 Pod::Hyperlink + +B<Pod::Hyperlink> is a class for manipulation of POD hyperlinks. Usage: + + my $link = Pod::Hyperlink->new('alternative text|page/"section in page"'); + +The B<Pod::Hyperlink> class is mainly designed to parse the contents of the +C<LE<lt>...E<gt>> sequence, providing a simple interface for accessing the +different parts of a POD hyperlink for further processing. It can also be +used to construct hyperlinks. + +=over 4 + +=item new() + +The B<new()> method can either be passed a set of key/value pairs or a single +scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object +of the class C<Pod::Hyperlink> is returned. The value C<undef> indicates a +failure, the error message is stored in C<$@>. + +=cut + +use Carp; + +sub new { + my $this = shift; + my $class = ref($this) || $this; + my $self = +{}; + bless $self, $class; + $self->initialize(); + if(defined $_[0]) { + if(ref($_[0])) { + # called with a list of parameters + %$self = %{$_[0]}; + $self->_construct_text(); + } + else { + # called with L<> contents + return undef unless($self->parse($_[0])); + } + } + return $self; +} + +sub initialize { + my $self = shift; + $self->{-line} ||= 'undef'; + $self->{-file} ||= 'undef'; + $self->{-page} ||= ''; + $self->{-node} ||= ''; + $self->{-alttext} ||= ''; + $self->{-type} ||= 'undef'; + $self->{_warnings} = []; +} + +=item parse($string) + +This method can be used to (re)parse a (new) hyperlink, i.e. the contents +of a C<LE<lt>...E<gt>> sequence. The result is stored in the current object. + +=cut + +sub parse { + my $self = shift; + local($_) = $_[0]; + # syntax check the link and extract destination + my ($alttext,$page,$node,$type) = ('','','',''); + + $self->{_warnings} = []; + + # collapse newlines with whitespace + if(s/\s*\n+\s*/ /g) { + $self->warning("collapsing newlines to blanks"); + } + # strip leading/trailing whitespace + if(s/^[\s\n]+//) { + $self->warning("ignoring leading whitespace in link"); + } + if(s/[\s\n]+$//) { + $self->warning("ignoring trailing whitespace in link"); + } + unless(length($_)) { + _invalid_link("empty link"); + return undef; + } + + ## Check for different possibilities. This is tedious and error-prone + # we match all possibilities (alttext, page, section/item) + #warn "DEBUG: link=$_\n"; + + # only page + if(m!^(\w+(?:::\w+)*)\s*(\(\w*\)|)$!) { + $page = $1 . $2; + $type = 'page'; + } + # alttext, page and section + elsif(m!^(.+?)\s*[|]\s*(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*"(.+)"$!) { + ($alttext, $page, $node) = ($1, $2 . $3, $4); + $type = 'section'; + } + # page and section + elsif(m!^(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*"(.+)"$!) { + ($page, $node) = ($1 . $2, $3); + $type = 'section'; + } + # page and item + elsif(m!^(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*(.+)$!) { + ($page, $node) = ($1 . $2, $3); + $type = 'item'; + } + # only section + elsif(m!^(?:/\s*|)"(.+)"$!) { + $node = $1; + $type = 'section'; + } + # only item + elsif(m!^/(.+)$!) { + $node = $1; + $type = 'item'; + } + # non-standard: Hyperlink + elsif(m!^((?:http|ftp|mailto|news):.+)$!i) { + $node = $1; + $type = 'hyperlink'; + } + # alttext, page and item + elsif(m!^(.+?)\s*[|]\s*(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*(.+)$!) { + ($alttext, $page, $node) = ($1, $2 . $3, $4); + $type = 'item'; + } + # alttext and page + elsif(m!^(.+?)\s*[|]\s*(\w+(?:::\w+)*)\s*(\(\w*\)|)$!) { + ($alttext, $page) = ($1, $2 . $3); + $type = 'page'; + } + # alttext and section + elsif(m!^(.+?)\s*[|]\s*(?:/\s*|)"(.+)"$!) { + ($alttext, $node) = ($1,$2); + $type = 'section'; + } + # alttext and item + elsif(m!^(.+?)\s*[|]\s*/(.+)$!) { + ($alttext, $node) = ($1,$2); + } + # nonstandard: alttext and hyperlink + elsif(m!^(.+?)\s*[|]\s*((?:http|ftp|mailto|news):.+)$!) { + ($alttext, $node) = ($1,$2); + $type = 'hyperlink'; + } + # must be an item or a "malformed" section (without "") + else { + $node = $_; + $type = 'item'; + } + + if($page =~ /[(]\w*[)]$/) { + $self->warning("section in `$page' deprecated"); + } + $self->{-page} = $page; + $self->{-node} = $node; + $self->{-alttext} = $alttext; + #warn "DEBUG: page=$page section=$section item=$item alttext=$alttext\n"; + $self->{-type} = $type; + $self->_construct_text(); + 1; +} + +sub _construct_text { + my $self = shift; + my $alttext = $self->alttext(); + my $type = $self->type(); + my $section = $self->node(); + my $page = $self->page(); + my $page_ext = ''; + $page =~ s/([(]\w*[)])$// && ($page_ext = $1); + if($alttext) { + $self->{_text} = $alttext; + } + elsif($type eq 'hyperlink') { + $self->{_text} = $section; + } + else { + $self->{_text} = (!$section ? '' : + $type eq 'item' ? "the $section entry" : + "the section on $section" ) . + ($page ? ($section ? ' in ':'') . "the $page$page_ext manpage" : + ' elsewhere in this document'); + } + # for being marked up later + # use the non-standard markers P<> and Q<>, so that the resulting + # text can be parsed by the translators. It's their job to put + # the correct hypertext around the linktext + if($alttext) { + $self->{_markup} = "Q<$alttext>"; + } + elsif($type eq 'hyperlink') { + $self->{_markup} = "Q<$section>"; + } + else { + $self->{_markup} = (!$section ? '' : + $type eq 'item' ? "the Q<$section> entry" : + "the section on Q<$section>" ) . + ($page ? ($section ? ' in ':'') . "the P<$page>$page_ext manpage" : + ' elsewhere in this document'); + } +} + +=item markup($string) + +Set/retrieve the textual value of the link. This string contains special +markers C<PE<lt>E<gt>> and C<QE<lt>E<gt>> that should be expanded by the +translator's interior sequence expansion engine to the +formatter-specific code to highlight/activate the hyperlink. The details +have to be implemented in the translator. + +=cut + +#' retrieve/set markuped text +sub markup { + return (@_ > 1) ? ($_[0]->{_markup} = $_[1]) : $_[0]->{_markup}; +} + +=item text() + +This method returns the textual representation of the hyperlink as above, +but without markers (read only). Depending on the link type this is one of +the following alternatives (the + and * denote the portions of the text +that are marked up): + + the +perl+ manpage + the *$|* entry in the +perlvar+ manpage + the section on *OPTIONS* in the +perldoc+ manpage + the section on *DESCRIPTION* elsewhere in this document + +=cut + +# The complete link's text +sub text { + $_[0]->{_text}; +} + +=item warning() + +After parsing, this method returns any warnings encountered during the +parsing process. + +=cut + +# Set/retrieve warnings +sub warning { + my $self = shift; + if(@_) { + push(@{$self->{_warnings}}, @_); + return @_; + } + return @{$self->{_warnings}}; +} + +=item line(), file() + +Just simple slots for storing information about the line and the file +the link was encountered in. Has to be filled in manually. + +=cut + +# The line in the file the link appears +sub line { + return (@_ > 1) ? ($_[0]->{-line} = $_[1]) : $_[0]->{-line}; +} + +# The POD file name the link appears in +sub file { + return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file}; +} + +=item page() + +This method sets or returns the POD page this link points to. + +=cut + +# The POD page the link appears on +sub page { + if (@_ > 1) { + $_[0]->{-page} = $_[1]; + $_[0]->_construct_text(); + } + $_[0]->{-page}; +} + +=item node() + +As above, but the destination node text of the link. + +=cut + +# The link destination +sub node { + if (@_ > 1) { + $_[0]->{-node} = $_[1]; + $_[0]->_construct_text(); + } + $_[0]->{-node}; +} + +=item alttext() + +Sets or returns an alternative text specified in the link. + +=cut + +# Potential alternative text +sub alttext { + if (@_ > 1) { + $_[0]->{-alttext} = $_[1]; + $_[0]->_construct_text(); + } + $_[0]->{-alttext}; +} + +=item type() + +The node type, either C<section> or C<item>. As an unofficial type, +there is also C<hyperlink>, derived from e.g. C<LE<lt>http://perl.comE<gt>> + +=cut + +# The type: item or headn +sub type { + return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type}; +} + +=item link() + +Returns the link as contents of C<LE<lt>E<gt>>. Reciprocal to B<parse()>. + +=back + +=cut + +# The link itself +sub link { + my $self = shift; + my $link = $self->page() || ''; + if($self->node()) { + if($self->type() eq 'section') { + $link .= ($link ? '/' : '') . '"' . $self->node() . '"'; + } + elsif($self->type() eq 'hyperlink') { + $link = $self->node(); + } + else { # item + $link .= '/' . $self->node(); + } + } + if($self->alttext()) { + $link = $self->alttext() . '|' . $link; + } + $link; +} + +sub _invalid_link { + my ($msg) = @_; + # this sets @_ + #eval { die "$msg\n" }; + #chomp $@; + $@ = $msg; # this seems to work, too! + undef; +} + +#----------------------------------------------------------------------------- +# Pod::Cache +# +# class to hold POD page details +#----------------------------------------------------------------------------- + +package Pod::Cache; + +=head2 Pod::Cache + +B<Pod::Cache> holds information about a set of POD documents, +especially the nodes for hyperlinks. +The following methods are available: + +=over 4 + +=item new() + +Create a new cache object. This object can hold an arbitrary number of +POD documents of class Pod::Cache::Item. + +=cut + +sub new { + my $this = shift; + my $class = ref($this) || $this; + my $self = []; + bless $self, $class; + return $self; +} + +=item item() + +Add a new item to the cache. Without arguments, this method returns a +list of all cache elements. + +=cut + +sub item { + my ($self,%param) = @_; + if(%param) { + my $item = Pod::Cache::Item->new(%param); + push(@$self, $item); + return $item; + } + else { + return @{$self}; + } +} + +=item find_page($name) + +Look for a POD document named C<$name> in the cache. Returns the +reference to the corresponding Pod::Cache::Item object or undef if +not found. + +=back + +=cut + +sub find_page { + my ($self,$page) = @_; + foreach(@$self) { + if($_->page() eq $page) { + return $_; + } + } + undef; +} + +package Pod::Cache::Item; + +=head2 Pod::Cache::Item + +B<Pod::Cache::Item> holds information about individual POD documents, +that can be grouped in a Pod::Cache object. +It is intended to hold information about the hyperlink nodes of POD +documents. +The following methods are available: + +=over 4 + +=item new() + +Create a new object. + +=cut + +sub new { + my $this = shift; + my $class = ref($this) || $this; + my %params = @_; + my $self = {%params}; + bless $self, $class; + $self->initialize(); + return $self; +} + +sub initialize { + my $self = shift; + $self->{-nodes} = [] unless(defined $self->{-nodes}); +} + +=item page() + +Set/retrieve the POD document name (e.g. "Pod::Parser"). + +=cut + +# The POD page +sub page { + return (@_ > 1) ? ($_[0]->{-page} = $_[1]) : $_[0]->{-page}; +} + +=item description() + +Set/retrieve the POD short description as found in the C<=head1 NAME> +section. + +=cut + +# The POD description, taken out of NAME if present +sub description { + return (@_ > 1) ? ($_[0]->{-description} = $_[1]) : $_[0]->{-description}; +} + +=item path() + +Set/retrieve the POD file storage path. + +=cut + +# The file path +sub path { + return (@_ > 1) ? ($_[0]->{-path} = $_[1]) : $_[0]->{-path}; +} + +=item file() + +Set/retrieve the POD file name. + +=cut + +# The POD file name +sub file { + return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file}; +} + +=item nodes() + +Add a node (or a list of nodes) to the document's node list. Note that +the order is kept, i.e. start with the first node and end with the last. +If no argument is given, the current list of nodes is returned in the +same order the nodes have been added. +A node can be any scalar, but usually is a pair of node string and +unique id for the C<find_node> method to work correctly. + +=cut + +# The POD nodes +sub nodes { + my ($self,@nodes) = @_; + if(@nodes) { + push(@{$self->{-nodes}}, @nodes); + return @nodes; + } + else { + return @{$self->{-nodes}}; + } +} + +=item find_node($name) + +Look for a node named C<$name> in the object's node list. Returns the +unique id of the node (i.e. the second element of the array stored in +the node arry) or undef if not found. + +=back + +=cut + +sub find_node { + my ($self,$node) = @_; + foreach(@{$self->{-nodes}}) { + if($_->[0] eq $node) { + return $_->[1]; # id + } + } + undef; +} + + +=head1 AUTHOR + +Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>, borrowing +a lot of things from L<pod2man> and L<pod2roff> as well as other POD +processing tools by Tom Christiansen, Brad Appleton and Russ Allbery. + +=head1 SEE ALSO + +L<pod2man>, L<pod2roff>, L<Pod::Parser>, L<Pod::Checker>, +L<pod2html> + +=cut + +1; |