From a2d12bc84fb2af87dd1c0c6e5bc854554902cd67 Mon Sep 17 00:00:00 2001 From: Lorry Tar Creator Date: Sun, 26 Jan 2003 19:35:03 +0000 Subject: Imported from /home/lorry/working-area/delta_perl-xml-xpath/XML-XPath-1.13.tar.gz. --- XPath.pm | 553 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 553 insertions(+) create mode 100644 XPath.pm (limited to 'XPath.pm') diff --git a/XPath.pm b/XPath.pm new file mode 100644 index 0000000..579d2cd --- /dev/null +++ b/XPath.pm @@ -0,0 +1,553 @@ +# $Id: XPath.pm,v 1.56 2003/01/26 19:33:17 matt Exp $ + +package XML::XPath; + +use strict; +use vars qw($VERSION $AUTOLOAD $revision); + +$VERSION = '1.13'; + +$XML::XPath::Namespaces = 1; +$XML::XPath::Debug = 0; + +use XML::XPath::XMLParser; +use XML::XPath::Parser; +use IO::File; + +# For testing +#use Data::Dumper; +#$Data::Dumper::Indent = 1; + +# Parameters for new() +my @options = qw( + filename + parser + xml + ioref + context + ); + +sub new { + my $proto = shift; + my $class = ref($proto) || $proto; + + my(%args); + # Try to figure out what the user passed + if ($#_ == 0) { # passed a scalar + my $string = $_[0]; + if ($string =~ m{<.*?>}s) { # it's an XML string + $args{'xml'} = $string; + } elsif (ref($string)) { # read XML from file handle + $args{'ioref'} = $string; + } elsif ($string eq '-') { # read XML from stdin + $args{'ioref'} = IO::File->new($string); + } else { # read XML from a file + $args{'filename'} = $string; + } + } else { # passed a hash or hash reference + # just pass the parameters on to the XPath constructor + %args = ((ref($_[0]) eq "HASH") ? %{$_[0]} : @_); + } + + if ($args{filename} && (!-e $args{filename} || !-r $args{filename})) { + die "Cannot open file '$args{filename}'"; + } + my %hash = map(( "_$_" => $args{$_} ), @options); + $hash{path_parser} = XML::XPath::Parser->new(); + return bless \%hash, $class; +} + +sub find { + my $self = shift; + my $path = shift; + my $context = shift; + die "No path to find" unless $path; + + if (!defined $context) { + $context = $self->get_context; + } + if (!defined $context) { + # Still no context? Need to parse... + my $parser = XML::XPath::XMLParser->new( + filename => $self->get_filename, + xml => $self->get_xml, + ioref => $self->get_ioref, + parser => $self->get_parser, + ); + $context = $parser->parse; + $self->set_context($context); +# warn "CONTEXT:\n", Data::Dumper->Dumpxs([$context], ['context']); + } + + my $parsed_path = $self->{path_parser}->parse($path); +# warn "\n\nPATH: ", $parsed_path->as_string, "\n\n"; + +# warn "evaluating path\n"; + return $parsed_path->evaluate($context); +} + +# sub memsize { +# print STDERR @_, "\t"; +# open(FH, '/proc/self/status'); +# while() { +# print STDERR $_ if /^VmSize/; +# } +# close FH; +# } +# +sub findnodes { + my $self = shift; + my ($path, $context) = @_; + + my $results = $self->find($path, $context); + + if ($results->isa('XML::XPath::NodeSet')) { + return wantarray ? $results->get_nodelist : $results; +# return $results->get_nodelist; + } + +# warn("findnodes returned a ", ref($results), " object\n") if $XML::XPath::Debug; + return wantarray ? () : XML::XPath::NodeSet->new(); +} + +sub matches { + my $self = shift; + my ($node, $path, $context) = @_; + + my @nodes = $self->findnodes($path, $context); + + if (grep { "$node" eq "$_" } @nodes) { + return 1; + } + return; +} + +sub findnodes_as_string { + my $self = shift; + my ($path, $context) = @_; + + my $results = $self->find($path, $context); + + if ($results->isa('XML::XPath::NodeSet')) { + return join('', map { $_->toString } $results->get_nodelist); + } + elsif ($results->isa('XML::XPath::Node')) { + return $results->toString; + } + else { + return XML::XPath::Node::XMLescape($results->value); + } +} + +sub findvalue { + my $self = shift; + my ($path, $context) = @_; + + my $results = $self->find($path, $context); + + if ($results->isa('XML::XPath::NodeSet')) { + return $results->to_literal; + } + + return $results; +} + +sub exists +{ + my $self = shift; + my ($path, $context) = @_; + $path = '/' if (!defined $path); + my @nodeset = $self->findnodes($path, $context); + return 1 if (scalar( @nodeset )); + return 0; +} + +sub getNodeAsXML { + my $self = shift; + my $node_path = shift; + $node_path = '/' if (!defined $node_path); + if (ref($node_path)) { + return $node_path->as_string(); + } else { + return $self->findnodes_as_string($node_path); + } +} + +sub getNodeText { + my $self = shift; + my $node_path = shift; + if (ref($node_path)) { + return $node_path->string_value(); + } else { + return $self->findvalue($node_path); + } +} + +sub setNodeText { + my $self = shift; + my($node_path, $new_text) = @_; + my $nodeset = $self->findnodes($node_path); + return undef if (!defined $nodeset); # could not find node + my @nodes = $nodeset->get_nodelist; + if ($#nodes < 0) { + if ($node_path =~ m|/@([^/]+)$|) { + # attribute not found, so try to create it + my $parent_path = $`; + my $attr = $1; + $nodeset = $self->findnodes($parent_path); + return undef if (!defined $nodeset); # could not find node + foreach my $node ($nodeset->get_nodelist) { + my $newnode = XML::XPath::Node::Attribute->new($attr, $new_text); + return undef if (!defined $newnode); # could not create new node + $node->appendAttribute($newnode); + } + } else { + return undef; # could not find node + } + } + foreach my $node (@nodes) { + if ($node->getNodeType == XML::XPath::Node::ATTRIBUTE_NODE) { + $node->setNodeValue($new_text); + } else { + foreach my $delnode ($node->getChildNodes()) { + $node->removeChild($delnode); + } + my $newnode = XML::XPath::Node::Text->new($new_text); + return undef if (!defined $newnode); # could not create new node + $node->appendChild($newnode); + } + } + return 1; +} + +sub createNode { + my $self = shift; + my($node_path) = @_; + my $path_steps = $self->{path_parser}->parse($node_path); + my @path_steps = (); + foreach my $step (@{$path_steps->get_lhs()}) { + my $string = $step->as_string(); + push(@path_steps, $string) if (defined $string && $string ne ""); + } + my $prev_node = undef; + my $nodeset = undef; + my $nodes = undef; + my $p = undef; + my $test_path = ""; + # Start with the deepest node, working up the path (right to left), + # trying to find a node that exists. + for ($p = $#path_steps; $p >= 0; $p--) { + my $path = $path_steps[$p]; + $test_path = "(/" . join("/", @path_steps[0..$p]) . ")"; + $nodeset = $self->findnodes($test_path); + return undef if (!defined $nodeset); # error looking for node + $nodes = $nodeset->size; + return undef if ($nodes > 1); # too many paths - path not specific enough + if ($nodes == 1) { # found a node -- need to create nodes below it + $prev_node = $nodeset->get_node(1); + last; + } + } + if (!defined $prev_node) { + my @root_nodes = $self->findnodes('/')->get_nodelist(); + $prev_node = $root_nodes[0]; + } + # We found a node that exists, or we'll start at the root. + # Create all lower nodes working left to right along the path. + for ($p++ ; $p <= $#path_steps; $p++) { + my $path = $path_steps[$p]; + my $newnode = undef; + my($axis,$name) = ($path =~ /^(.*?)::(.*)$/); + if ($axis =~ /^child$/i) { + $newnode = XML::XPath::Node::Element->new($name); + return undef if (!defined $newnode); # could not create new node + $prev_node->appendChild($newnode); + } elsif ($axis =~ /^attribute$/i) { + $newnode = XML::XPath::Node::Attribute->new($name, ""); + return undef if (!defined $newnode); # could not create new node + $prev_node->appendAttribute($newnode); + } + $prev_node = $newnode; + } + return $prev_node; +} + +sub get_filename { + my $self = shift; + $self->{_filename}; +} + +sub set_filename { + my $self = shift; + $self->{_filename} = shift; +} + +sub get_parser { + my $self = shift; + $self->{_parser}; +} + +sub set_parser { + my $self = shift; + $self->{_parser} = shift; +} + +sub get_xml { + my $self = shift; + $self->{_xml}; +} + +sub set_xml { + my $self = shift; + $self->{_xml} = shift; +} + +sub get_ioref { + my $self = shift; + $self->{_ioref}; +} + +sub set_ioref { + my $self = shift; + $self->{_ioref} = shift; +} + +sub get_context { + my $self = shift; + $self->{_context}; +} + +sub set_context { + my $self = shift; + $self->{_context} = shift; +} + +sub cleanup { + my $self = shift; + if ($XML::XPath::SafeMode) { + my $context = $self->get_context; + return unless $context; + $context->dispose; + } +} + +sub set_namespace { + my $self = shift; + my ($prefix, $expanded) = @_; + $self->{path_parser}->set_namespace($prefix, $expanded); +} + +sub clear_namespaces { + my $self = shift; + $self->{path_parser}->clear_namespaces(); +} + +1; +__END__ + +=head1 NAME + +XML::XPath - a set of modules for parsing and evaluating XPath statements + +=head1 DESCRIPTION + +This module aims to comply exactly to the XPath specification at +http://www.w3.org/TR/xpath and yet allow extensions to be added in the +form of functions. Modules such as XSLT and XPointer may need to do +this as they support functionality beyond XPath. + +=head1 SYNOPSIS + + use XML::XPath; + use XML::XPath::XMLParser; + + my $xp = XML::XPath->new(filename => 'test.xhtml'); + + my $nodeset = $xp->find('/html/body/p'); # find all paragraphs + + foreach my $node ($nodeset->get_nodelist) { + print "FOUND\n\n", + XML::XPath::XMLParser::as_string($node), + "\n\n"; + } + +=head1 DETAILS + +There's an awful lot to all of this, so bear with it - if you stick it +out it should be worth it. Please get a good understanding of XPath +by reading the spec before asking me questions. All of the classes +and parts herein are named to be synonimous with the names in the +specification, so consult that if you don't understand why I'm doing +something in the code. + +=head1 API + +The API of XML::XPath itself is extremely simple to allow you to get +going almost immediately. The deeper API's are more complex, but you +shouldn't have to touch most of that. + +=head2 new() + +This constructor follows the often seen named parameter method call. +Parameters you can use are: filename, parser, xml, ioref and context. +The filename parameter specifies an XML file to parse. The xml +parameter specifies a string to parse, and the ioref parameter +specifies an ioref to parse. The context option allows you to +specify a context node. The context node has to be in the format +of a node as specified in L. The 4 parameters +filename, xml, ioref and context are mutually exclusive - you should +only specify one (if you specify anything other than context, the +context node is the root of your document). +The parser option allows you to pass in an already prepared +XML::Parser object, to save you having to create more than one +in your application (if, for example, you're doing more than just XPath). + + my $xp = XML::XPath->new( context => $node ); + +It is very much recommended that you use only 1 XPath object throughout +the life of your application. This is because the object (and it's sub-objects) +maintain certain bits of state information that will be useful (such +as XPath variables) to later calls to find(). It's also a good idea because +you'll use less memory this way. + +=head2 I = find($path, [$context]) + +The find function takes an XPath expression (a string) and returns either an +XML::XPath::NodeSet object containing the nodes it found (or empty if +no nodes matched the path), or one of XML::XPath::Literal (a string), +XML::XPath::Number, or XML::XPath::Boolean. It should always return +something - and you can use ->isa() to find out what it returned. If you +need to check how many nodes it found you should check $nodeset->size. +See L. An optional second parameter of a context +node allows you to use this method repeatedly, for example XSLT needs +to do this. + +=head2 findnodes($path, [$context]) + +Returns a list of nodes found by $path, optionally in context $context. +In scalar context returns an XML::XPath::NodeSet object. + +=head2 findnodes_as_string($path, [$context]) + +Returns the nodes found reproduced as XML. The result is not guaranteed +to be valid XML though. + +=head2 findvalue($path, [$context]) + +Returns either a C, a C or a +C object. If the path returns a NodeSet, +$nodeset->to_literal is called automatically for you (and thus a +C is returned). Note that +for each of the objects stringification is overloaded, so you can just +print the value found, or manipulate it in the ways you would a normal +perl value (e.g. using regular expressions). + +=head2 exists($path, [$context]) + +Returns true if the given path exists. + +=head2 matches($node, $path, [$context]) + +Returns true if the node matches the path (optionally in context $context). + +=head2 getNodeText($path) + +Returns the text string for a particular XML node. Returns a string, +or undef if the node doesn't exist. + +=head2 setNodeText($path, $text) + +Sets the text string for a particular XML node. The node can be an +element or an attribute. If the node to be set is an attribute, and +the attribute node does not exist, it will be created automatically. + +=head2 createNode($path) + +Creates the node matching the path given. If part of the path given, or +all of the path do not exist, the necessary nodes will be created +automatically. + +=head2 set_namespace($prefix, $uri) + +Sets the namespace prefix mapping to the uri. + +Normally in XML::XPath the prefixes in XPath node tests take their +context from the current node. This means that foo:bar will always +match an element regardless of the namespace that the prefix +foo is mapped to (which might even change within the document, resulting +in unexpected results). In order to make prefixes in XPath node tests +actually map to a real URI, you need to enable that via a call +to the set_namespace method of your XML::XPath object. + +=head2 clear_namespaces() + +Clears all previously set namespace mappings. + +=head2 $XML::XPath::Namespaces + +Set this to 0 if you I want namespace processing to occur. This +will make everything a little (tiny) bit faster, but you'll suffer for it, +probably. + +=head1 Node Object Model + +See L, L, +L, L, +L, L, +and L. + +=head1 On Garbage Collection + +XPath nodes work in a special way that allows circular references, and +yet still lets Perl's reference counting garbage collector to clean up +the nodes after use. This should be totally transparent to the user, +with one caveat: B. What does this +mean to the average user? Not much. Provided you don't free (or let go +out of scope) either the tree you passed to XML::XPath->new, or if you +didn't pass a tree, and passed a filename or IO-ref, then provided you +don't let the XML::XPath object go out of scope before you let results +of find() and its friends go out of scope, then you'll be fine. Even if +you B let the tree go out of scope before results, you'll probably +still be fine. The only case where you may get stung is when the last +part of your path/query is either an ancestor or parent axis. In that +case the worst that will happen is you'll end up with a circular reference +that won't get cleared until interpreter destruction time. You can get +around that by explicitly calling $node->DESTROY on each of your result +nodes, if you really need to do that. + +Mail me direct if that's not clear. Note that it's not doom and gloom. It's +by no means perfect, but the worst that will happen is a long running process +could leak memory. Most long running processes will therefore be able to +explicitly be careful not to free the tree (or XML::XPath object) before +freeing results. AxKit, an application that uses XML::XPath, does this and +I didn't have to make any changes to the code - it's already sensible +programming. + +If you I don't want all this to happen, then set the variable +$XML::XPath::SafeMode, and call $xp->cleanup() on the XML::XPath object +when you're finished, or $tree->dispose() if you have a tree instead. + +=head1 Example + +Please see the test files in t/ for examples on how to use XPath. + +=head1 Support/Author + +This module is copyright 2000 AxKit.com Ltd. This is free +software, and as such comes with NO WARRANTY. No dates are used in this +module. You may distribute this module under the terms of either the +Gnu GPL, or the Artistic License (the same terms as Perl itself). + +For support, please subscribe to the Perl-XML mailing list at the URL +http://listserv.activestate.com/mailman/listinfo/perl-xml + +Matt Sergeant, matt@sergeant.org + +=head1 SEE ALSO + +L, L, L, +L, L, L, +L. + +=cut -- cgit v1.2.1