From a2d12bc84fb2af87dd1c0c6e5bc854554902cd67 Mon Sep 17 00:00:00 2001 From: Lorry Tar Creator Date: Sun, 26 Jan 2003 19:35:03 +0000 Subject: Imported from /home/lorry/working-area/delta_perl-xml-xpath/XML-XPath-1.13.tar.gz. --- MANIFEST | 63 ++++ Makefile.PL | 16 + README | 193 +++++++++++ TODO | 8 + XPath.pm | 553 +++++++++++++++++++++++++++++++ XPath/Boolean.pm | 73 +++++ XPath/Builder.pm | 198 +++++++++++ XPath/Expr.pm | 619 ++++++++++++++++++++++++++++++++++ XPath/Function.pm | 392 ++++++++++++++++++++++ XPath/Literal.pm | 99 ++++++ XPath/LocationPath.pm | 61 ++++ XPath/Node.pm | 592 +++++++++++++++++++++++++++++++++ XPath/Node/Attribute.pm | 135 ++++++++ XPath/Node/Comment.pm | 91 +++++ XPath/Node/Element.pm | 503 ++++++++++++++++++++++++++++ XPath/Node/Namespace.pm | 99 ++++++ XPath/Node/PI.pm | 81 +++++ XPath/Node/Text.pm | 96 ++++++ XPath/NodeSet.pm | 184 +++++++++++ XPath/Number.pm | 87 +++++ XPath/Parser.pm | 821 ++++++++++++++++++++++++++++++++++++++++++++++ XPath/PerlSAX.pm | 166 ++++++++++ XPath/Root.pm | 36 ++ XPath/Step.pm | 519 +++++++++++++++++++++++++++++ XPath/Variable.pm | 43 +++ XPath/XMLParser.pm | 385 ++++++++++++++++++++++ examples/test.xml | 34 ++ examples/xpath | 83 +++++ t/01basic.t | 33 ++ t/02descendant.t | 23 ++ t/03star.t | 26 ++ t/04pos.t | 22 ++ t/05attrib.t | 28 ++ t/06attrib_val.t | 25 ++ t/07count.t | 27 ++ t/08name.t | 25 ++ t/09a_string_length.t | 30 ++ t/09string_length.t | 28 ++ t/10pipe.t | 27 ++ t/11axischild.t | 18 + t/12axisdescendant.t | 27 ++ t/13axisparent.t | 19 ++ t/14axisancestor.t | 22 ++ t/15axisfol_sib.t | 24 ++ t/16axisprec_sib.t | 44 +++ t/17axisfollowing.t | 45 +++ t/18axispreceding.t | 39 +++ t/19axisd_or_s.t | 22 ++ t/20axisa_or_s.t | 22 ++ t/21allnodes.t | 60 ++++ t/22name_select.t | 23 ++ t/23func.t | 41 +++ t/24namespaces.t | 56 ++++ t/25scope.t | 27 ++ t/26predicate.t | 26 ++ t/27asxml.t | 13 + t/28ancestor2.t | 37 +++ t/29desc_with_predicate.t | 21 ++ t/30lang.t | 20 ++ t/insert.t | 53 +++ t/rdf.t | 59 ++++ t/remove.t | 44 +++ t/stress.t | 57 ++++ 63 files changed, 7343 insertions(+) create mode 100644 MANIFEST create mode 100644 Makefile.PL create mode 100644 README create mode 100644 TODO create mode 100644 XPath.pm create mode 100644 XPath/Boolean.pm create mode 100644 XPath/Builder.pm create mode 100644 XPath/Expr.pm create mode 100644 XPath/Function.pm create mode 100644 XPath/Literal.pm create mode 100644 XPath/LocationPath.pm create mode 100644 XPath/Node.pm create mode 100644 XPath/Node/Attribute.pm create mode 100644 XPath/Node/Comment.pm create mode 100644 XPath/Node/Element.pm create mode 100644 XPath/Node/Namespace.pm create mode 100644 XPath/Node/PI.pm create mode 100644 XPath/Node/Text.pm create mode 100644 XPath/NodeSet.pm create mode 100644 XPath/Number.pm create mode 100644 XPath/Parser.pm create mode 100644 XPath/PerlSAX.pm create mode 100644 XPath/Root.pm create mode 100644 XPath/Step.pm create mode 100644 XPath/Variable.pm create mode 100644 XPath/XMLParser.pm create mode 100644 examples/test.xml create mode 100755 examples/xpath create mode 100644 t/01basic.t create mode 100644 t/02descendant.t create mode 100644 t/03star.t create mode 100644 t/04pos.t create mode 100644 t/05attrib.t create mode 100644 t/06attrib_val.t create mode 100644 t/07count.t create mode 100644 t/08name.t create mode 100644 t/09a_string_length.t create mode 100644 t/09string_length.t create mode 100644 t/10pipe.t create mode 100644 t/11axischild.t create mode 100644 t/12axisdescendant.t create mode 100644 t/13axisparent.t create mode 100644 t/14axisancestor.t create mode 100644 t/15axisfol_sib.t create mode 100644 t/16axisprec_sib.t create mode 100644 t/17axisfollowing.t create mode 100644 t/18axispreceding.t create mode 100644 t/19axisd_or_s.t create mode 100644 t/20axisa_or_s.t create mode 100644 t/21allnodes.t create mode 100644 t/22name_select.t create mode 100644 t/23func.t create mode 100644 t/24namespaces.t create mode 100644 t/25scope.t create mode 100644 t/26predicate.t create mode 100644 t/27asxml.t create mode 100644 t/28ancestor2.t create mode 100644 t/29desc_with_predicate.t create mode 100644 t/30lang.t create mode 100644 t/insert.t create mode 100644 t/rdf.t create mode 100644 t/remove.t create mode 100644 t/stress.t diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..48945df --- /dev/null +++ b/MANIFEST @@ -0,0 +1,63 @@ +MANIFEST +Makefile.PL +TODO +README +XPath.pm +XPath/XMLParser.pm +XPath/Parser.pm +XPath/Expr.pm +XPath/Function.pm +XPath/Literal.pm +XPath/LocationPath.pm +XPath/Number.pm +XPath/Node.pm +XPath/Node/Element.pm +XPath/Node/Attribute.pm +XPath/Node/Text.pm +XPath/Node/Namespace.pm +XPath/Node/PI.pm +XPath/Node/Comment.pm +XPath/Step.pm +XPath/Variable.pm +XPath/NodeSet.pm +XPath/Boolean.pm +XPath/Root.pm +XPath/PerlSAX.pm +XPath/Builder.pm +t/01basic.t +t/02descendant.t +t/03star.t +t/04pos.t +t/05attrib.t +t/06attrib_val.t +t/07count.t +t/08name.t +t/09string_length.t +t/09a_string_length.t +t/10pipe.t +t/11axischild.t +t/12axisdescendant.t +t/13axisparent.t +t/14axisancestor.t +t/15axisfol_sib.t +t/16axisprec_sib.t +t/17axisfollowing.t +t/18axispreceding.t +t/19axisd_or_s.t +t/20axisa_or_s.t +t/21allnodes.t +t/22name_select.t +t/23func.t +t/24namespaces.t +t/25scope.t +t/26predicate.t +t/27asxml.t +t/28ancestor2.t +t/29desc_with_predicate.t +t/30lang.t +t/rdf.t +t/remove.t +t/insert.t +t/stress.t +examples/test.xml +examples/xpath diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..0821dde --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,16 @@ +use ExtUtils::MakeMaker; +# See lib/ExtUtils/MakeMaker.pm for details of how to influence +# the contents of the Makefile that is written. + +require 5.005; + +WriteMakefile( + 'NAME' => 'XML::XPath', + 'VERSION_FROM' => 'XPath.pm', # finds $VERSION + 'AUTHOR' => 'Matt Sergeant, AxKit.com Ltd', + 'ABSTRACT_FROM' => 'XPath.pm', + 'PREREQ_PM' => { + 'XML::Parser' => '2.23', + }, + 'EXE_FILES' => [ 'examples/xpath' ], +); diff --git a/README b/README new file mode 100644 index 0000000..154e79f --- /dev/null +++ b/README @@ -0,0 +1,193 @@ +NAME + XML::XPath - a set of modules for parsing and evaluating XPath + statements + +DESCRIPTION + This module aims to comply exactly to the XPath specification at + http://www.w3.org/TR/xpath and yet allow extensions to be added + in the form of functions. Modules such as XSLT and XPointer may + need to do this as they support functionality beyond XPath. + +SYNOPSIS + use XML::XPath; + use XML::XPath::XMLParser; + + my $xp = XML::XPath->new(filename => 'test.xhtml'); + + my $nodeset = $xp->find('/html/body/p'); # find all paragraphs + + foreach my $node ($nodeset->get_nodelist) { + print "FOUND\n\n", + XML::XPath::XMLParser::as_string($node), + "\n\n"; + } + +DETAILS + There's an awful lot to all of this, so bear with it - if you + stick it out it should be worth it. Please get a good + understanding of XPath by reading the spec before asking me + questions. All of the classes and parts herein are named to be + synonimous with the names in the specification, so consult that + if you don't understand why I'm doing something in the code. + +API + The API of XML::XPath itself is extremely simple to allow you to + get going almost immediately. The deeper API's are more complex, + but you shouldn't have to touch most of that. + + new() + + This constructor follows the often seen named parameter method + call. Parameters you can use are: filename, parser, xml, ioref + and context. The filename parameter specifies an XML file to + parse. The xml parameter specifies a string to parse, and the + ioref parameter specifies an ioref to parse. The context option + allows you to specify a context node. The context node has to be + in the format of a node as specified in the + XML::XPath::XMLParser manpage. The 4 parameters filename, xml, + ioref and context are mutually exclusive - you should only + specify one (if you specify anything other than context, the + context node is the root of your document). The parser option + allows you to pass in an already prepared XML::Parser object, to + save you having to create more than one in your application (if, + for example, you're doing more than just XPath). + + my $xp = XML::XPath->new( context => $node ); + + It is very much recommended that you use only 1 XPath object + throughout the life of your application. This is because the + object (and it's sub-objects) maintain certain bits of state + information that will be useful (such as XPath variables) to + later calls to find(). It's also a good idea because you'll use + less memory this way. + + *nodeset* = find($path, [$context]) + + The find function takes an XPath expression (a string) and + returns either an XML::XPath::NodeSet object containing the + nodes it found (or empty if no nodes matched the path), or one + of XML::XPath::Literal (a string), XML::XPath::Number, or + XML::XPath::Boolean. It should always return something - and you + can use ->isa() to find out what it returned. If you need to + check how many nodes it found you should check $nodeset->size. + See the XML::XPath::NodeSet manpage. An optional second + parameter of a context node allows you to use this method + repeatedly, for example XSLT needs to do this. + + findnodes($path, [$context]) + + Returns a list of nodes found by $path, optionally in context + $context. In scalar context returns an XML::XPath::NodeSet + object. + + findnodes_as_string($path, [$context]) + + Returns the nodes found reproduced as XML. The result is not + guaranteed to be valid XML though. + + findvalue($path, [$context]) + + Returns either a `XML::XPath::Literal', a `XML::XPath::Boolean' + or a `XML::XPath::Number' object. If the path returns a NodeSet, + $nodeset->to_literal is called automatically for you (and thus a + `XML::XPath::Literal' is returned). Note that for each of the + objects stringification is overloaded, so you can just print the + value found, or manipulate it in the ways you would a normal + perl value (e.g. using regular expressions). + + matches($node, $path, [$context]) + + Returns true if the node matches the path (optionally in context + $context). + + set_namespace($prefix, $uri) + + Sets the namespace prefix mapping to the uri. + + Normally in XML::XPath the prefixes in XPath node tests take + their context from the current node. This means that foo:bar + will always match an element regardless of the + namespace that the prefix foo is mapped to (which might even + change within the document, resulting in unexpected results). In + order to make prefixes in XPath node tests actually map to a + real URI, you need to enable that via a call to the + set_namespace method of your XML::XPath object. + + clear_namespaces() + + Clears all previously set namespace mappings. + + $XML::XPath::Namespaces + + Set this to 0 if you *don't* want namespace processing to occur. + This will make everything a little (tiny) bit faster, but you'll + suffer for it, probably. + +Node Object Model + See the XML::XPath::Node manpage, the XML::XPath::Node::Element + manpage, the XML::XPath::Node::Text manpage, the + XML::XPath::Node::Comment manpage, the + XML::XPath::Node::Attribute manpage, the + XML::XPath::Node::Namespace manpage, and the + XML::XPath::Node::PI manpage. + +On Garbage Collection + XPath nodes work in a special way that allows circular + references, and yet still lets Perl's reference counting garbage + collector to clean up the nodes after use. This should be + totally transparent to the user, with one caveat: If you free + your tree before letting go of a sub-tree, consider that playing + with fire and you may get burned. What does this mean to the + average user? Not much. Provided you don't free (or let go out + of scope) either the tree you passed to XML::XPath->new, or if + you didn't pass a tree, and passed a filename or IO-ref, then + provided you don't let the XML::XPath object go out of scope + before you let results of find() and its friends go out of + scope, then you'll be fine. Even if you do let the tree go out + of scope before results, you'll probably still be fine. The only + case where you may get stung is when the last part of your + path/query is either an ancestor or parent axis. In that case + the worst that will happen is you'll end up with a circular + reference that won't get cleared until interpreter destruction + time. You can get around that by explicitly calling $node- + >DESTROY on each of your result nodes, if you really need to do + that. + + Mail me direct if that's not clear. Note that it's not doom and + gloom. It's by no means perfect, but the worst that will happen + is a long running process could leak memory. Most long running + processes will therefore be able to explicitly be careful not to + free the tree (or XML::XPath object) before freeing results. + AxKit, an application that uses XML::XPath, does this and I + didn't have to make any changes to the code - it's already + sensible programming. + + If you *really* don't want all this to happen, then set the + variable $XML::XPath::SafeMode, and call $xp->cleanup() on the + XML::XPath object when you're finished, or $tree->dispose() if + you have a tree instead. + +Example + Please see the test files in t/ for examples on how to use + XPath. + +Support/Author + This module is copyright 2000 AxKit.com Ltd. This is free + software, and as such comes with NO WARRANTY. No dates are used + in this module. You may distribute this module under the terms + of either the Gnu GPL, or the Artistic License (the same terms + as Perl itself). + + For support, please subscribe to the Perl-XML mailing list at + the URL http://listserv.activestate.com/mailman/listinfo/perl- + xml + + Matt Sergeant, matt@sergeant.org + +SEE ALSO + the XML::XPath::Literal manpage, the XML::XPath::Boolean + manpage, the XML::XPath::Number manpage, the + XML::XPath::XMLParser manpage, the XML::XPath::NodeSet manpage, + the XML::XPath::PerlSAX manpage, the XML::XPath::Builder + manpage. + diff --git a/TODO b/TODO new file mode 100644 index 0000000..0044f87 --- /dev/null +++ b/TODO @@ -0,0 +1,8 @@ +$Id: TODO,v 1.5 2001/01/19 16:00:39 matt Exp $ + +TODO List for XML::XPath + +- Mostly None. Bug fix cycle now. +- Somehow to allow namespaced extension functions +- Make SAX parser a SAX2 parser + diff --git a/XPath.pm b/XPath.pm new file mode 100644 index 0000000..579d2cd --- /dev/null +++ b/XPath.pm @@ -0,0 +1,553 @@ +# $Id: XPath.pm,v 1.56 2003/01/26 19:33:17 matt Exp $ + +package XML::XPath; + +use strict; +use vars qw($VERSION $AUTOLOAD $revision); + +$VERSION = '1.13'; + +$XML::XPath::Namespaces = 1; +$XML::XPath::Debug = 0; + +use XML::XPath::XMLParser; +use XML::XPath::Parser; +use IO::File; + +# For testing +#use Data::Dumper; +#$Data::Dumper::Indent = 1; + +# Parameters for new() +my @options = qw( + filename + parser + xml + ioref + context + ); + +sub new { + my $proto = shift; + my $class = ref($proto) || $proto; + + my(%args); + # Try to figure out what the user passed + if ($#_ == 0) { # passed a scalar + my $string = $_[0]; + if ($string =~ m{<.*?>}s) { # it's an XML string + $args{'xml'} = $string; + } elsif (ref($string)) { # read XML from file handle + $args{'ioref'} = $string; + } elsif ($string eq '-') { # read XML from stdin + $args{'ioref'} = IO::File->new($string); + } else { # read XML from a file + $args{'filename'} = $string; + } + } else { # passed a hash or hash reference + # just pass the parameters on to the XPath constructor + %args = ((ref($_[0]) eq "HASH") ? %{$_[0]} : @_); + } + + if ($args{filename} && (!-e $args{filename} || !-r $args{filename})) { + die "Cannot open file '$args{filename}'"; + } + my %hash = map(( "_$_" => $args{$_} ), @options); + $hash{path_parser} = XML::XPath::Parser->new(); + return bless \%hash, $class; +} + +sub find { + my $self = shift; + my $path = shift; + my $context = shift; + die "No path to find" unless $path; + + if (!defined $context) { + $context = $self->get_context; + } + if (!defined $context) { + # Still no context? Need to parse... + my $parser = XML::XPath::XMLParser->new( + filename => $self->get_filename, + xml => $self->get_xml, + ioref => $self->get_ioref, + parser => $self->get_parser, + ); + $context = $parser->parse; + $self->set_context($context); +# warn "CONTEXT:\n", Data::Dumper->Dumpxs([$context], ['context']); + } + + my $parsed_path = $self->{path_parser}->parse($path); +# warn "\n\nPATH: ", $parsed_path->as_string, "\n\n"; + +# warn "evaluating path\n"; + return $parsed_path->evaluate($context); +} + +# sub memsize { +# print STDERR @_, "\t"; +# open(FH, '/proc/self/status'); +# while() { +# print STDERR $_ if /^VmSize/; +# } +# close FH; +# } +# +sub findnodes { + my $self = shift; + my ($path, $context) = @_; + + my $results = $self->find($path, $context); + + if ($results->isa('XML::XPath::NodeSet')) { + return wantarray ? $results->get_nodelist : $results; +# return $results->get_nodelist; + } + +# warn("findnodes returned a ", ref($results), " object\n") if $XML::XPath::Debug; + return wantarray ? () : XML::XPath::NodeSet->new(); +} + +sub matches { + my $self = shift; + my ($node, $path, $context) = @_; + + my @nodes = $self->findnodes($path, $context); + + if (grep { "$node" eq "$_" } @nodes) { + return 1; + } + return; +} + +sub findnodes_as_string { + my $self = shift; + my ($path, $context) = @_; + + my $results = $self->find($path, $context); + + if ($results->isa('XML::XPath::NodeSet')) { + return join('', map { $_->toString } $results->get_nodelist); + } + elsif ($results->isa('XML::XPath::Node')) { + return $results->toString; + } + else { + return XML::XPath::Node::XMLescape($results->value); + } +} + +sub findvalue { + my $self = shift; + my ($path, $context) = @_; + + my $results = $self->find($path, $context); + + if ($results->isa('XML::XPath::NodeSet')) { + return $results->to_literal; + } + + return $results; +} + +sub exists +{ + my $self = shift; + my ($path, $context) = @_; + $path = '/' if (!defined $path); + my @nodeset = $self->findnodes($path, $context); + return 1 if (scalar( @nodeset )); + return 0; +} + +sub getNodeAsXML { + my $self = shift; + my $node_path = shift; + $node_path = '/' if (!defined $node_path); + if (ref($node_path)) { + return $node_path->as_string(); + } else { + return $self->findnodes_as_string($node_path); + } +} + +sub getNodeText { + my $self = shift; + my $node_path = shift; + if (ref($node_path)) { + return $node_path->string_value(); + } else { + return $self->findvalue($node_path); + } +} + +sub setNodeText { + my $self = shift; + my($node_path, $new_text) = @_; + my $nodeset = $self->findnodes($node_path); + return undef if (!defined $nodeset); # could not find node + my @nodes = $nodeset->get_nodelist; + if ($#nodes < 0) { + if ($node_path =~ m|/@([^/]+)$|) { + # attribute not found, so try to create it + my $parent_path = $`; + my $attr = $1; + $nodeset = $self->findnodes($parent_path); + return undef if (!defined $nodeset); # could not find node + foreach my $node ($nodeset->get_nodelist) { + my $newnode = XML::XPath::Node::Attribute->new($attr, $new_text); + return undef if (!defined $newnode); # could not create new node + $node->appendAttribute($newnode); + } + } else { + return undef; # could not find node + } + } + foreach my $node (@nodes) { + if ($node->getNodeType == XML::XPath::Node::ATTRIBUTE_NODE) { + $node->setNodeValue($new_text); + } else { + foreach my $delnode ($node->getChildNodes()) { + $node->removeChild($delnode); + } + my $newnode = XML::XPath::Node::Text->new($new_text); + return undef if (!defined $newnode); # could not create new node + $node->appendChild($newnode); + } + } + return 1; +} + +sub createNode { + my $self = shift; + my($node_path) = @_; + my $path_steps = $self->{path_parser}->parse($node_path); + my @path_steps = (); + foreach my $step (@{$path_steps->get_lhs()}) { + my $string = $step->as_string(); + push(@path_steps, $string) if (defined $string && $string ne ""); + } + my $prev_node = undef; + my $nodeset = undef; + my $nodes = undef; + my $p = undef; + my $test_path = ""; + # Start with the deepest node, working up the path (right to left), + # trying to find a node that exists. + for ($p = $#path_steps; $p >= 0; $p--) { + my $path = $path_steps[$p]; + $test_path = "(/" . join("/", @path_steps[0..$p]) . ")"; + $nodeset = $self->findnodes($test_path); + return undef if (!defined $nodeset); # error looking for node + $nodes = $nodeset->size; + return undef if ($nodes > 1); # too many paths - path not specific enough + if ($nodes == 1) { # found a node -- need to create nodes below it + $prev_node = $nodeset->get_node(1); + last; + } + } + if (!defined $prev_node) { + my @root_nodes = $self->findnodes('/')->get_nodelist(); + $prev_node = $root_nodes[0]; + } + # We found a node that exists, or we'll start at the root. + # Create all lower nodes working left to right along the path. + for ($p++ ; $p <= $#path_steps; $p++) { + my $path = $path_steps[$p]; + my $newnode = undef; + my($axis,$name) = ($path =~ /^(.*?)::(.*)$/); + if ($axis =~ /^child$/i) { + $newnode = XML::XPath::Node::Element->new($name); + return undef if (!defined $newnode); # could not create new node + $prev_node->appendChild($newnode); + } elsif ($axis =~ /^attribute$/i) { + $newnode = XML::XPath::Node::Attribute->new($name, ""); + return undef if (!defined $newnode); # could not create new node + $prev_node->appendAttribute($newnode); + } + $prev_node = $newnode; + } + return $prev_node; +} + +sub get_filename { + my $self = shift; + $self->{_filename}; +} + +sub set_filename { + my $self = shift; + $self->{_filename} = shift; +} + +sub get_parser { + my $self = shift; + $self->{_parser}; +} + +sub set_parser { + my $self = shift; + $self->{_parser} = shift; +} + +sub get_xml { + my $self = shift; + $self->{_xml}; +} + +sub set_xml { + my $self = shift; + $self->{_xml} = shift; +} + +sub get_ioref { + my $self = shift; + $self->{_ioref}; +} + +sub set_ioref { + my $self = shift; + $self->{_ioref} = shift; +} + +sub get_context { + my $self = shift; + $self->{_context}; +} + +sub set_context { + my $self = shift; + $self->{_context} = shift; +} + +sub cleanup { + my $self = shift; + if ($XML::XPath::SafeMode) { + my $context = $self->get_context; + return unless $context; + $context->dispose; + } +} + +sub set_namespace { + my $self = shift; + my ($prefix, $expanded) = @_; + $self->{path_parser}->set_namespace($prefix, $expanded); +} + +sub clear_namespaces { + my $self = shift; + $self->{path_parser}->clear_namespaces(); +} + +1; +__END__ + +=head1 NAME + +XML::XPath - a set of modules for parsing and evaluating XPath statements + +=head1 DESCRIPTION + +This module aims to comply exactly to the XPath specification at +http://www.w3.org/TR/xpath and yet allow extensions to be added in the +form of functions. Modules such as XSLT and XPointer may need to do +this as they support functionality beyond XPath. + +=head1 SYNOPSIS + + use XML::XPath; + use XML::XPath::XMLParser; + + my $xp = XML::XPath->new(filename => 'test.xhtml'); + + my $nodeset = $xp->find('/html/body/p'); # find all paragraphs + + foreach my $node ($nodeset->get_nodelist) { + print "FOUND\n\n", + XML::XPath::XMLParser::as_string($node), + "\n\n"; + } + +=head1 DETAILS + +There's an awful lot to all of this, so bear with it - if you stick it +out it should be worth it. Please get a good understanding of XPath +by reading the spec before asking me questions. All of the classes +and parts herein are named to be synonimous with the names in the +specification, so consult that if you don't understand why I'm doing +something in the code. + +=head1 API + +The API of XML::XPath itself is extremely simple to allow you to get +going almost immediately. The deeper API's are more complex, but you +shouldn't have to touch most of that. + +=head2 new() + +This constructor follows the often seen named parameter method call. +Parameters you can use are: filename, parser, xml, ioref and context. +The filename parameter specifies an XML file to parse. The xml +parameter specifies a string to parse, and the ioref parameter +specifies an ioref to parse. The context option allows you to +specify a context node. The context node has to be in the format +of a node as specified in L. The 4 parameters +filename, xml, ioref and context are mutually exclusive - you should +only specify one (if you specify anything other than context, the +context node is the root of your document). +The parser option allows you to pass in an already prepared +XML::Parser object, to save you having to create more than one +in your application (if, for example, you're doing more than just XPath). + + my $xp = XML::XPath->new( context => $node ); + +It is very much recommended that you use only 1 XPath object throughout +the life of your application. This is because the object (and it's sub-objects) +maintain certain bits of state information that will be useful (such +as XPath variables) to later calls to find(). It's also a good idea because +you'll use less memory this way. + +=head2 I = find($path, [$context]) + +The find function takes an XPath expression (a string) and returns either an +XML::XPath::NodeSet object containing the nodes it found (or empty if +no nodes matched the path), or one of XML::XPath::Literal (a string), +XML::XPath::Number, or XML::XPath::Boolean. It should always return +something - and you can use ->isa() to find out what it returned. If you +need to check how many nodes it found you should check $nodeset->size. +See L. An optional second parameter of a context +node allows you to use this method repeatedly, for example XSLT needs +to do this. + +=head2 findnodes($path, [$context]) + +Returns a list of nodes found by $path, optionally in context $context. +In scalar context returns an XML::XPath::NodeSet object. + +=head2 findnodes_as_string($path, [$context]) + +Returns the nodes found reproduced as XML. The result is not guaranteed +to be valid XML though. + +=head2 findvalue($path, [$context]) + +Returns either a C, a C or a +C object. If the path returns a NodeSet, +$nodeset->to_literal is called automatically for you (and thus a +C is returned). Note that +for each of the objects stringification is overloaded, so you can just +print the value found, or manipulate it in the ways you would a normal +perl value (e.g. using regular expressions). + +=head2 exists($path, [$context]) + +Returns true if the given path exists. + +=head2 matches($node, $path, [$context]) + +Returns true if the node matches the path (optionally in context $context). + +=head2 getNodeText($path) + +Returns the text string for a particular XML node. Returns a string, +or undef if the node doesn't exist. + +=head2 setNodeText($path, $text) + +Sets the text string for a particular XML node. The node can be an +element or an attribute. If the node to be set is an attribute, and +the attribute node does not exist, it will be created automatically. + +=head2 createNode($path) + +Creates the node matching the path given. If part of the path given, or +all of the path do not exist, the necessary nodes will be created +automatically. + +=head2 set_namespace($prefix, $uri) + +Sets the namespace prefix mapping to the uri. + +Normally in XML::XPath the prefixes in XPath node tests take their +context from the current node. This means that foo:bar will always +match an element regardless of the namespace that the prefix +foo is mapped to (which might even change within the document, resulting +in unexpected results). In order to make prefixes in XPath node tests +actually map to a real URI, you need to enable that via a call +to the set_namespace method of your XML::XPath object. + +=head2 clear_namespaces() + +Clears all previously set namespace mappings. + +=head2 $XML::XPath::Namespaces + +Set this to 0 if you I want namespace processing to occur. This +will make everything a little (tiny) bit faster, but you'll suffer for it, +probably. + +=head1 Node Object Model + +See L, L, +L, L, +L, L, +and L. + +=head1 On Garbage Collection + +XPath nodes work in a special way that allows circular references, and +yet still lets Perl's reference counting garbage collector to clean up +the nodes after use. This should be totally transparent to the user, +with one caveat: B. What does this +mean to the average user? Not much. Provided you don't free (or let go +out of scope) either the tree you passed to XML::XPath->new, or if you +didn't pass a tree, and passed a filename or IO-ref, then provided you +don't let the XML::XPath object go out of scope before you let results +of find() and its friends go out of scope, then you'll be fine. Even if +you B let the tree go out of scope before results, you'll probably +still be fine. The only case where you may get stung is when the last +part of your path/query is either an ancestor or parent axis. In that +case the worst that will happen is you'll end up with a circular reference +that won't get cleared until interpreter destruction time. You can get +around that by explicitly calling $node->DESTROY on each of your result +nodes, if you really need to do that. + +Mail me direct if that's not clear. Note that it's not doom and gloom. It's +by no means perfect, but the worst that will happen is a long running process +could leak memory. Most long running processes will therefore be able to +explicitly be careful not to free the tree (or XML::XPath object) before +freeing results. AxKit, an application that uses XML::XPath, does this and +I didn't have to make any changes to the code - it's already sensible +programming. + +If you I don't want all this to happen, then set the variable +$XML::XPath::SafeMode, and call $xp->cleanup() on the XML::XPath object +when you're finished, or $tree->dispose() if you have a tree instead. + +=head1 Example + +Please see the test files in t/ for examples on how to use XPath. + +=head1 Support/Author + +This module is copyright 2000 AxKit.com Ltd. This is free +software, and as such comes with NO WARRANTY. No dates are used in this +module. You may distribute this module under the terms of either the +Gnu GPL, or the Artistic License (the same terms as Perl itself). + +For support, please subscribe to the Perl-XML mailing list at the URL +http://listserv.activestate.com/mailman/listinfo/perl-xml + +Matt Sergeant, matt@sergeant.org + +=head1 SEE ALSO + +L, L, L, +L, L, L, +L. + +=cut diff --git a/XPath/Boolean.pm b/XPath/Boolean.pm new file mode 100644 index 0000000..e0910d5 --- /dev/null +++ b/XPath/Boolean.pm @@ -0,0 +1,73 @@ +# $Id: Boolean.pm,v 1.7 2000/07/03 08:54:47 matt Exp $ + +package XML::XPath::Boolean; +use XML::XPath::Number; +use XML::XPath::Literal; +use strict; + +use overload + '""' => \&value, + '<=>' => \&cmp; + +sub True { + my $class = shift; + my $val = 1; + bless \$val, $class; +} + +sub False { + my $class = shift; + my $val = 0; + bless \$val, $class; +} + +sub value { + my $self = shift; + $$self; +} + +sub cmp { + my $self = shift; + my ($other, $swap) = @_; + if ($swap) { + return $other <=> $$self; + } + return $$self <=> $other; +} + +sub to_number { XML::XPath::Number->new($_[0]->value); } +sub to_boolean { $_[0]; } +sub to_literal { XML::XPath::Literal->new($_[0]->value ? "true" : "false"); } + +sub string_value { return $_[0]->to_literal->value; } + +1; +__END__ + +=head1 NAME + +XML::XPath::Boolean - Boolean true/false values + +=head1 DESCRIPTION + +XML::XPath::Boolean objects implement simple boolean true/false objects. + +=head1 API + +=head2 XML::XPath::Boolean->True + +Creates a new Boolean object with a true value. + +=head2 XML::XPath::Boolean->False + +Creates a new Boolean object with a false value. + +=head2 value() + +Returns true or false. + +=head2 to_literal() + +Returns the string "true" or "false". + +=cut diff --git a/XPath/Builder.pm b/XPath/Builder.pm new file mode 100644 index 0000000..cc37a9d --- /dev/null +++ b/XPath/Builder.pm @@ -0,0 +1,198 @@ +# $Id: Builder.pm,v 1.10 2001/06/12 20:56:56 matt Exp $ + +package XML::XPath::Builder; + +use strict; + +# to get array index constants +use XML::XPath::Node; +use XML::XPath::Node::Element; +use XML::XPath::Node::Attribute; +use XML::XPath::Node::Namespace; +use XML::XPath::Node::Text; +use XML::XPath::Node::PI; +use XML::XPath::Node::Comment; + +use vars qw/$xmlns_ns $xml_ns/; + +$xmlns_ns = "http://www.w3.org/2000/xmlns/"; +$xml_ns = "http://www.w3.org/XML/1998/namespace"; + +sub new { + my $class = shift; + my $self = ($#_ == 0) ? { %{ (shift) } } : { @_ }; + + bless $self, $class; +} + +sub start_document { + my $self = shift; + + $self->{IdNames} = {}; + $self->{InScopeNamespaceStack} = [ { + '_Default' => undef, + 'xmlns' => $xmlns_ns, + 'xml' => $xml_ns, + } ]; + + $self->{NodeStack} = [ ]; + + my $document = XML::XPath::Node::Element->new(); + my $newns = XML::XPath::Node::Namespace->new('xml', $xml_ns); + $document->appendNamespace($newns); + $self->{current} = $self->{DOC_Node} = $document; +} + +sub end_document { + my $self = shift; + + return $self->{DOC_Node}; +} + +sub characters { + my $self = shift; + my $sarg = shift; + my $text = $sarg->{Data}; + + my $parent = $self->{current}; + + my $last = $parent->getLastChild; + if ($last && $last->isTextNode) { + # append to previous text node + $last->appendText($text); + return; + } + + my $node = XML::XPath::Node::Text->new($text); + $parent->appendChild($node, 1); +} + +sub start_element { + my $self = shift; + my $sarg = shift; + my $tag = $sarg->{'Name'}; + my $attr = $sarg->{'Attributes'}; + + push @{ $self->{InScopeNamespaceStack} }, + { %{ $self->{InScopeNamespaceStack}[-1] } }; + $self->_scan_namespaces(@_); + + my ($prefix, $namespace) = $self->_namespace($tag); + + my $node = XML::XPath::Node::Element->new($tag, $prefix); + + foreach my $name (keys %$attr) { + my $value = $attr->{$name}; + + if ($name =~ /^xmlns(:(.*))?$/) { + # namespace node + my $prefix = $2 || '#default'; +# warn "Creating NS node: $prefix = $value\n"; + my $newns = XML::XPath::Node::Namespace->new($prefix, $value); + $node->appendNamespace($newns); + } + else { + my ($prefix, $namespace) = $self->_namespace($name); + undef $namespace unless $prefix; + + my $newattr = XML::XPath::Node::Attribute->new($name, $value, $prefix); + $node->appendAttribute($newattr, 1); + if (exists($self->{IdNames}{$tag}) && ($self->{IdNames}{$tag} eq $name)) { + # warn "appending Id Element: $val for ", $node->getName, "\n"; + $self->{DOC_Node}->appendIdElement($value, $node); + } + } + } + + $self->{current}->appendChild($node, 1); + $self->{current} = $node; +} + +sub end_element { + my $self = shift; + $self->{current} = $self->{current}->getParentNode; +} + +sub processing_instruction { + my $self = shift; + my $pi = shift; + my $node = XML::XPath::Node::PI->new($pi->{Target}, $pi->{Data}); + $self->{current}->appendChild($node, 1); +} + +sub comment { + my $self = shift; + my $comment = shift; + my $node = XML::XPath::Node::Comment->new($comment->{Data}); + $self->{current}->appendChild($node, 1); +} + +sub _scan_namespaces { + my ($self, %attributes) = @_; + + while (my ($attr_name, $value) = each %attributes) { + if ($attr_name eq 'xmlns') { + $self->{InScopeNamespaceStack}[-1]{'_Default'} = $value; + } elsif ($attr_name =~ /^xmlns:(.*)$/) { + my $prefix = $1; + $self->{InScopeNamespaceStack}[-1]{$prefix} = $value; + } + } +} + +sub _namespace { + my ($self, $name) = @_; + + my ($prefix, $localname) = split(/:/, $name); + if (!defined($localname)) { + if ($prefix eq 'xmlns') { + return '', undef; + } else { + return '', $self->{InScopeNamespaceStack}[-1]{'_Default'}; + } + } else { + return $prefix, $self->{InScopeNamespaceStack}[-1]{$prefix}; + } +} + +1; + +__END__ + +=head1 NAME + +XML::XPath::Builder - SAX handler for building an XPath tree + +=head1 SYNOPSIS + + use AnySAXParser; + use XML::XPath::Builder; + + $builder = XML::XPath::Builder->new(); + $parser = AnySAXParser->new( Handler => $builder ); + + $root_node = $parser->parse( Source => [SOURCE] ); + +=head1 DESCRIPTION + +C is a SAX handler for building an XML::XPath +tree. + +C is used by creating a new instance of +C and providing it as the Handler for a SAX +parser. Calling `C' on the SAX parser will return the +root node of the tree built from that parse. + +=head1 AUTHOR + +Ken MacLeod, + +=head1 SEE ALSO + +perl(1), XML::XPath(3) + +PerlSAX.pod in libxml-perl + +Extensible Markup Language (XML) + +=cut diff --git a/XPath/Expr.pm b/XPath/Expr.pm new file mode 100644 index 0000000..3cb4714 --- /dev/null +++ b/XPath/Expr.pm @@ -0,0 +1,619 @@ +# $Id: Expr.pm,v 1.20 2003/01/26 19:33:24 matt Exp $ + +package XML::XPath::Expr; +use strict; + +sub new { + my $class = shift; + my ($pp) = @_; + bless { predicates => [], pp => $pp }, $class; +} + +sub as_string { + my $self = shift; + local $^W; # Use of uninitialized value! grrr + my $string = "(" . $self->{lhs}->as_string; + $string .= " " . $self->{op} . " " if defined $self->{op}; + $string .= $self->{rhs}->as_string if defined $self->{rhs}; + $string .= ")"; + foreach my $predicate (@{$self->{predicates}}) { + $string .= "[" . $predicate->as_string . "]"; + } + return $string; +} + +sub as_xml { + my $self = shift; + local $^W; # Use of uninitialized value! grrr + my $string; + if (defined $self->{op}) { + $string .= $self->op_xml(); + } + else { + $string .= $self->{lhs}->as_xml(); + } + foreach my $predicate (@{$self->{predicates}}) { + $string .= "\n" . $predicate->as_xml() . "\n"; + } + return $string; +} + +sub op_xml { + my $self = shift; + my $op = $self->{op}; + + my $tag; + for ($op) { + /^or$/ && do { + $tag = "Or"; + }; + /^and$/ && do { + $tag = "And"; + }; + /^=$/ && do { + $tag = "Equals"; + }; + /^!=$/ && do { + $tag = "NotEquals"; + }; + /^<=$/ && do { + $tag = "LessThanOrEquals"; + }; + /^>=$/ && do { + $tag = "GreaterThanOrEquals"; + }; + /^>$/ && do { + $tag = "GreaterThan"; + }; + /^<$/ && do { + $tag = "LessThan"; + }; + /^\+$/ && do { + $tag = "Plus"; + }; + /^-$/ && do { + $tag = "Minus"; + }; + /^div$/ && do { + $tag = "Div"; + }; + /^mod$/ && do { + $tag = "Mod"; + }; + /^\*$/ && do { + $tag = "Multiply"; + }; + /^\|$/ && do { + $tag = "Union"; + }; + } + + return "<$tag>\n" . $self->{lhs}->as_xml() . $self->{rhs}->as_xml() . "\n"; +} + +sub set_lhs { + my $self = shift; + $self->{lhs} = $_[0]; +} + +sub set_op { + my $self = shift; + $self->{op} = $_[0]; +} + +sub set_rhs { + my $self = shift; + $self->{rhs} = $_[0]; +} + +sub push_predicate { + my $self = shift; + + die "Only 1 predicate allowed on FilterExpr in W3C XPath 1.0" + if @{$self->{predicates}}; + + push @{$self->{predicates}}, $_[0]; +} + +sub get_lhs { $_[0]->{lhs}; } +sub get_rhs { $_[0]->{rhs}; } +sub get_op { $_[0]->{op}; } + +sub evaluate { + my $self = shift; + my $node = shift; + + # If there's an op, result is result of that op. + # If no op, just resolve Expr + +# warn "Evaluate Expr: ", $self->as_string, "\n"; + + my $results; + + if ($self->{op}) { + die ("No RHS of ", $self->as_string) unless $self->{rhs}; + $results = $self->op_eval($node); + } + else { + $results = $self->{lhs}->evaluate($node); + } + + if (my @predicates = @{$self->{predicates}}) { + if (!$results->isa('XML::XPath::NodeSet')) { + die "Can't have predicates execute on object type: " . ref($results); + } + + # filter initial nodeset by each predicate + foreach my $predicate (@{$self->{predicates}}) { + $results = $self->filter_by_predicate($results, $predicate); + } + } + + return $results; +} + +sub op_eval { + my $self = shift; + my $node = shift; + + my $op = $self->{op}; + + for ($op) { + /^or$/ && do { + return op_or($node, $self->{lhs}, $self->{rhs}); + }; + /^and$/ && do { + return op_and($node, $self->{lhs}, $self->{rhs}); + }; + /^=$/ && do { + return op_equals($node, $self->{lhs}, $self->{rhs}); + }; + /^!=$/ && do { + return op_nequals($node, $self->{lhs}, $self->{rhs}); + }; + /^<=$/ && do { + return op_le($node, $self->{lhs}, $self->{rhs}); + }; + /^>=$/ && do { + return op_ge($node, $self->{lhs}, $self->{rhs}); + }; + /^>$/ && do { + return op_gt($node, $self->{lhs}, $self->{rhs}); + }; + /^<$/ && do { + return op_lt($node, $self->{lhs}, $self->{rhs}); + }; + /^\+$/ && do { + return op_plus($node, $self->{lhs}, $self->{rhs}); + }; + /^-$/ && do { + return op_minus($node, $self->{lhs}, $self->{rhs}); + }; + /^div$/ && do { + return op_div($node, $self->{lhs}, $self->{rhs}); + }; + /^mod$/ && do { + return op_mod($node, $self->{lhs}, $self->{rhs}); + }; + /^\*$/ && do { + return op_mult($node, $self->{lhs}, $self->{rhs}); + }; + /^\|$/ && do { + return op_union($node, $self->{lhs}, $self->{rhs}); + }; + + die "No such operator, or operator unimplemented in ", $self->as_string, "\n"; + } +} + +# Operators + +use XML::XPath::Boolean; + +sub op_or { + my ($node, $lhs, $rhs) = @_; + if($lhs->evaluate($node)->to_boolean->value) { + return XML::XPath::Boolean->True; + } + else { + return $rhs->evaluate($node)->to_boolean; + } +} + +sub op_and { + my ($node, $lhs, $rhs) = @_; + if( ! $lhs->evaluate($node)->to_boolean->value ) { + return XML::XPath::Boolean->False; + } + else { + return $rhs->evaluate($node)->to_boolean; + } +} + +sub op_equals { + my ($node, $lhs, $rhs) = @_; + + my $lh_results = $lhs->evaluate($node); + my $rh_results = $rhs->evaluate($node); + + if ($lh_results->isa('XML::XPath::NodeSet') && + $rh_results->isa('XML::XPath::NodeSet')) { + # True if and only if there is a node in the + # first set and a node in the second set such + # that the result of performing the comparison + # on the string-values of the two nodes is true. + foreach my $lhnode ($lh_results->get_nodelist) { + foreach my $rhnode ($rh_results->get_nodelist) { + if ($lhnode->string_value eq $rhnode->string_value) { + return XML::XPath::Boolean->True; + } + } + } + return XML::XPath::Boolean->False; + } + elsif (($lh_results->isa('XML::XPath::NodeSet') || + $rh_results->isa('XML::XPath::NodeSet')) && + (!$lh_results->isa('XML::XPath::NodeSet') || + !$rh_results->isa('XML::XPath::NodeSet'))) { + # (that says: one is a nodeset, and one is not a nodeset) + + my ($nodeset, $other); + if ($lh_results->isa('XML::XPath::NodeSet')) { + $nodeset = $lh_results; + $other = $rh_results; + } + else { + $nodeset = $rh_results; + $other = $lh_results; + } + + # True if and only if there is a node in the + # nodeset such that the result of performing + # the comparison on (string_value($node)) + # is true. + if ($other->isa('XML::XPath::Number')) { + foreach my $node ($nodeset->get_nodelist) { + if ($node->string_value == $other->value) { + return XML::XPath::Boolean->True; + } + } + } + elsif ($other->isa('XML::XPath::Literal')) { + foreach my $node ($nodeset->get_nodelist) { + if ($node->string_value eq $other->value) { + return XML::XPath::Boolean->True; + } + } + } + elsif ($other->isa('XML::XPath::Boolean')) { + if ($nodeset->to_boolean->value == $other->value) { + return XML::XPath::Boolean->True; + } + } + + return XML::XPath::Boolean->False; + } + else { # Neither is a nodeset + if ($lh_results->isa('XML::XPath::Boolean') || + $rh_results->isa('XML::XPath::Boolean')) { + # if either is a boolean + if ($lh_results->to_boolean->value == $rh_results->to_boolean->value) { + return XML::XPath::Boolean->True; + } + return XML::XPath::Boolean->False; + } + elsif ($lh_results->isa('XML::XPath::Number') || + $rh_results->isa('XML::XPath::Number')) { + # if either is a number + local $^W; # 'number' might result in undef + if ($lh_results->to_number->value == $rh_results->to_number->value) { + return XML::XPath::Boolean->True; + } + return XML::XPath::Boolean->False; + } + else { + if ($lh_results->to_literal->value eq $rh_results->to_literal->value) { + return XML::XPath::Boolean->True; + } + return XML::XPath::Boolean->False; + } + } +} + +sub op_nequals { + my ($node, $lhs, $rhs) = @_; + if (op_equals($node, $lhs, $rhs)->value) { + return XML::XPath::Boolean->False; + } + return XML::XPath::Boolean->True; +} + +sub op_le { + my ($node, $lhs, $rhs) = @_; + op_gt($node, $rhs, $lhs); +} + +sub op_ge { + my ($node, $lhs, $rhs) = @_; + + my $lh_results = $lhs->evaluate($node); + my $rh_results = $rhs->evaluate($node); + + if ($lh_results->isa('XML::XPath::NodeSet') && + $rh_results->isa('XML::XPath::NodeSet')) { + + foreach my $lhnode ($lh_results->get_nodelist) { + foreach my $rhnode ($rh_results->get_nodelist) { + my $lhNum = XML::XPath::Number->new($lhnode->string_value); + my $rhNum = XML::XPath::Number->new($rhnode->string_value); + if ($lhNum->value >= $rhNum->value) { + return XML::XPath::Boolean->True; + } + } + } + return XML::XPath::Boolean->False; + } + elsif (($lh_results->isa('XML::XPath::NodeSet') || + $rh_results->isa('XML::XPath::NodeSet')) && + (!$lh_results->isa('XML::XPath::NodeSet') || + !$rh_results->isa('XML::XPath::NodeSet'))) { + # (that says: one is a nodeset, and one is not a nodeset) + + my ($nodeset, $other); + my ($true, $false); + if ($lh_results->isa('XML::XPath::NodeSet')) { + $nodeset = $lh_results; + $other = $rh_results; + # we do this because unlike ==, these ops are direction dependant + ($false, $true) = (XML::XPath::Boolean->False, XML::XPath::Boolean->True); + } + else { + $nodeset = $rh_results; + $other = $lh_results; + # ditto above comment + ($true, $false) = (XML::XPath::Boolean->False, XML::XPath::Boolean->True); + } + + # True if and only if there is a node in the + # nodeset such that the result of performing + # the comparison on (string_value($node)) + # is true. + foreach my $node ($nodeset->get_nodelist) { + if ($node->to_number->value >= $other->to_number->value) { + return $true; + } + } + return $false; + } + else { # Neither is a nodeset + if ($lh_results->isa('XML::XPath::Boolean') || + $rh_results->isa('XML::XPath::Boolean')) { + # if either is a boolean + if ($lh_results->to_boolean->to_number->value + >= $rh_results->to_boolean->to_number->value) { + return XML::XPath::Boolean->True; + } + } + else { + if ($lh_results->to_number->value >= $rh_results->to_number->value) { + return XML::XPath::Boolean->True; + } + } + return XML::XPath::Boolean->False; + } +} + +sub op_gt { + my ($node, $lhs, $rhs) = @_; + + my $lh_results = $lhs->evaluate($node); + my $rh_results = $rhs->evaluate($node); + + if ($lh_results->isa('XML::XPath::NodeSet') && + $rh_results->isa('XML::XPath::NodeSet')) { + + foreach my $lhnode ($lh_results->get_nodelist) { + foreach my $rhnode ($rh_results->get_nodelist) { + my $lhNum = XML::XPath::Number->new($lhnode->string_value); + my $rhNum = XML::XPath::Number->new($rhnode->string_value); + if ($lhNum->value > $rhNum->value) { + return XML::XPath::Boolean->True; + } + } + } + return XML::XPath::Boolean->False; + } + elsif (($lh_results->isa('XML::XPath::NodeSet') || + $rh_results->isa('XML::XPath::NodeSet')) && + (!$lh_results->isa('XML::XPath::NodeSet') || + !$rh_results->isa('XML::XPath::NodeSet'))) { + # (that says: one is a nodeset, and one is not a nodeset) + + my ($nodeset, $other); + my ($true, $false); + if ($lh_results->isa('XML::XPath::NodeSet')) { + $nodeset = $lh_results; + $other = $rh_results; + # we do this because unlike ==, these ops are direction dependant + ($false, $true) = (XML::XPath::Boolean->False, XML::XPath::Boolean->True); + } + else { + $nodeset = $rh_results; + $other = $lh_results; + # ditto above comment + ($true, $false) = (XML::XPath::Boolean->False, XML::XPath::Boolean->True); + } + + # True if and only if there is a node in the + # nodeset such that the result of performing + # the comparison on (string_value($node)) + # is true. + foreach my $node ($nodeset->get_nodelist) { + if ($node->to_number->value > $other->to_number->value) { + return $true; + } + } + return $false; + } + else { # Neither is a nodeset + if ($lh_results->isa('XML::XPath::Boolean') || + $rh_results->isa('XML::XPath::Boolean')) { + # if either is a boolean + if ($lh_results->to_boolean->value > $rh_results->to_boolean->value) { + return XML::XPath::Boolean->True; + } + } + else { + if ($lh_results->to_number->value > $rh_results->to_number->value) { + return XML::XPath::Boolean->True; + } + } + return XML::XPath::Boolean->False; + } +} + +sub op_lt { + my ($node, $lhs, $rhs) = @_; + op_gt($node, $rhs, $lhs); +} + +sub op_plus { + my ($node, $lhs, $rhs) = @_; + my $lh_results = $lhs->evaluate($node); + my $rh_results = $rhs->evaluate($node); + + my $result = + $lh_results->to_number->value + + + $rh_results->to_number->value + ; + return XML::XPath::Number->new($result); +} + +sub op_minus { + my ($node, $lhs, $rhs) = @_; + my $lh_results = $lhs->evaluate($node); + my $rh_results = $rhs->evaluate($node); + + my $result = + $lh_results->to_number->value + - + $rh_results->to_number->value + ; + return XML::XPath::Number->new($result); +} + +sub op_div { + my ($node, $lhs, $rhs) = @_; + my $lh_results = $lhs->evaluate($node); + my $rh_results = $rhs->evaluate($node); + + my $result = eval { + $lh_results->to_number->value + / + $rh_results->to_number->value + ; + }; + if ($@) { + # assume divide by zero + # This is probably a terrible way to handle this! + # Ah well... who wants to live forever... + return XML::XPath::Literal->new('Infinity'); + } + return XML::XPath::Number->new($result); +} + +sub op_mod { + my ($node, $lhs, $rhs) = @_; + my $lh_results = $lhs->evaluate($node); + my $rh_results = $rhs->evaluate($node); + + my $result = + $lh_results->to_number->value + % + $rh_results->to_number->value + ; + return XML::XPath::Number->new($result); +} + +sub op_mult { + my ($node, $lhs, $rhs) = @_; + my $lh_results = $lhs->evaluate($node); + my $rh_results = $rhs->evaluate($node); + + my $result = + $lh_results->to_number->value + * + $rh_results->to_number->value + ; + return XML::XPath::Number->new($result); +} + +sub op_union { + my ($node, $lhs, $rhs) = @_; + my $lh_result = $lhs->evaluate($node); + my $rh_result = $rhs->evaluate($node); + + if ($lh_result->isa('XML::XPath::NodeSet') && + $rh_result->isa('XML::XPath::NodeSet')) { + my %found; + my $results = XML::XPath::NodeSet->new; + foreach my $lhnode ($lh_result->get_nodelist) { + $found{"$lhnode"}++; + $results->push($lhnode); + } + foreach my $rhnode ($rh_result->get_nodelist) { + $results->push($rhnode) + unless exists $found{"$rhnode"}; + } + $results->sort; + return $results; + } + die "Both sides of a union must be Node Sets\n"; +} + +sub filter_by_predicate { + my $self = shift; + my ($nodeset, $predicate) = @_; + + # See spec section 2.4, paragraphs 2 & 3: + # For each node in the node-set to be filtered, the predicate Expr + # is evaluated with that node as the context node, with the number + # of nodes in the node set as the context size, and with the + # proximity position of the node in the node set with respect to + # the axis as the context position. + + if (!ref($nodeset)) { # use ref because nodeset has a bool context + die "No nodeset!!!"; + } + +# warn "Filter by predicate: $predicate\n"; + + my $newset = XML::XPath::NodeSet->new(); + + for(my $i = 1; $i <= $nodeset->size; $i++) { + # set context set each time 'cos a loc-path in the expr could change it + $self->{pp}->set_context_set($nodeset); + $self->{pp}->set_context_pos($i); + my $result = $predicate->evaluate($nodeset->get_node($i)); + if ($result->isa('XML::XPath::Boolean')) { + if ($result->value) { + $newset->push($nodeset->get_node($i)); + } + } + elsif ($result->isa('XML::XPath::Number')) { + if ($result->value == $i) { + $newset->push($nodeset->get_node($i)); + } + } + else { + if ($result->to_boolean->value) { + $newset->push($nodeset->get_node($i)); + } + } + } + + return $newset; +} + +1; diff --git a/XPath/Function.pm b/XPath/Function.pm new file mode 100644 index 0000000..e29b379 --- /dev/null +++ b/XPath/Function.pm @@ -0,0 +1,392 @@ +# $Id: Function.pm,v 1.26 2002/12/26 17:24:50 matt Exp $ + +package XML::XPath::Function; +use XML::XPath::Number; +use XML::XPath::Literal; +use XML::XPath::Boolean; +use XML::XPath::NodeSet; +use XML::XPath::Node::Attribute; +use strict; + +sub new { + my $class = shift; + my ($pp, $name, $params) = @_; + bless { + pp => $pp, + name => $name, + params => $params + }, $class; +} + +sub as_string { + my $self = shift; + my $string = $self->{name} . "("; + my $second; + foreach (@{$self->{params}}) { + $string .= "," if $second++; + $string .= $_->as_string; + } + $string .= ")"; + return $string; +} + +sub as_xml { + my $self = shift; + my $string = "{name}\""; + my $params = ""; + foreach (@{$self->{params}}) { + $params .= "" . $_->as_string . "\n"; + } + if ($params) { + $string .= ">\n$params\n"; + } + else { + $string .= " />\n"; + } + + return $string; +} + +sub evaluate { + my $self = shift; + my $node = shift; + if ($node->isa('XML::XPath::NodeSet')) { + $node = $node->get_node(1); + } + my @params; + foreach my $param (@{$self->{params}}) { + my $results = $param->evaluate($node); + push @params, $results; + } + $self->_execute($self->{name}, $node, @params); +} + +sub _execute { + my $self = shift; + my ($name, $node, @params) = @_; + $name =~ s/-/_/g; + no strict 'refs'; + $self->$name($node, @params); +} + +# All functions should return one of: +# XML::XPath::Number +# XML::XPath::Literal (string) +# XML::XPath::NodeSet +# XML::XPath::Boolean + +### NODESET FUNCTIONS ### + +sub last { + my $self = shift; + my ($node, @params) = @_; + die "last: function doesn't take parameters\n" if (@params); + return XML::XPath::Number->new($self->{pp}->get_context_size); +} + +sub position { + my $self = shift; + my ($node, @params) = @_; + if (@params) { + die "position: function doesn't take parameters [ ", @params, " ]\n"; + } + # return pos relative to axis direction + return XML::XPath::Number->new($self->{pp}->get_context_pos); +} + +sub count { + my $self = shift; + my ($node, @params) = @_; + die "count: Parameter must be a NodeSet\n" unless $params[0]->isa('XML::XPath::NodeSet'); + return XML::XPath::Number->new($params[0]->size); +} + +sub id { + my $self = shift; + my ($node, @params) = @_; + die "id: Function takes 1 parameter\n" unless @params == 1; + my $results = XML::XPath::NodeSet->new(); + if ($params[0]->isa('XML::XPath::NodeSet')) { + # result is the union of applying id() to the + # string value of each node in the nodeset. + foreach my $node ($params[0]->get_nodelist) { + my $string = $node->string_value; + $results->append($self->id($node, XML::XPath::Literal->new($string))); + } + } + else { # The actual id() function... + my $string = $self->string($node, $params[0]); + $_ = $string->value; # get perl scalar + my @ids = split; # splits $_ + foreach my $id (@ids) { + if (my $found = $node->getElementById($id)) { + $results->push($found); + } + } + } + return $results; +} + +sub local_name { + my $self = shift; + my ($node, @params) = @_; + if (@params > 1) { + die "name() function takes one or no parameters\n"; + } + elsif (@params) { + my $nodeset = shift(@params); + $node = $nodeset->get_node(1); + } + + return XML::XPath::Literal->new($node->getLocalName); +} + +sub namespace_uri { + my $self = shift; + my ($node, @params) = @_; + die "namespace-uri: Function not supported\n"; +} + +sub name { + my $self = shift; + my ($node, @params) = @_; + if (@params > 1) { + die "name() function takes one or no parameters\n"; + } + elsif (@params) { + my $nodeset = shift(@params); + $node = $nodeset->get_node(1); + } + + return XML::XPath::Literal->new($node->getName); +} + +### STRING FUNCTIONS ### + +sub string { + my $self = shift; + my ($node, @params) = @_; + die "string: Too many parameters\n" if @params > 1; + if (@params) { + return XML::XPath::Literal->new($params[0]->string_value); + } + + # TODO - this MUST be wrong! - not sure now. -matt + return XML::XPath::Literal->new($node->string_value); + # default to nodeset with just $node in. +} + +sub concat { + my $self = shift; + my ($node, @params) = @_; + die "concat: Too few parameters\n" if @params < 2; + my $string = join('', map {$_->string_value} @params); + return XML::XPath::Literal->new($string); +} + +sub starts_with { + my $self = shift; + my ($node, @params) = @_; + die "starts-with: incorrect number of params\n" unless @params == 2; + my ($string1, $string2) = ($params[0]->string_value, $params[1]->string_value); + if (substr($string1, 0, length($string2)) eq $string2) { + return XML::XPath::Boolean->True; + } + return XML::XPath::Boolean->False; +} + +sub contains { + my $self = shift; + my ($node, @params) = @_; + die "starts-with: incorrect number of params\n" unless @params == 2; + my $value = $params[1]->string_value; + if ($params[0]->string_value =~ /(.*?)\Q$value\E(.*)/) { + # $1 and $2 stored for substring funcs below + # TODO: Fix this nasty implementation! + return XML::XPath::Boolean->True; + } + return XML::XPath::Boolean->False; +} + +sub substring_before { + my $self = shift; + my ($node, @params) = @_; + die "starts-with: incorrect number of params\n" unless @params == 2; + if ($self->contains($node, @params)->value) { + return XML::XPath::Literal->new($1); # hope that works! + } + else { + return XML::XPath::Literal->new(''); + } +} + +sub substring_after { + my $self = shift; + my ($node, @params) = @_; + die "starts-with: incorrect number of params\n" unless @params == 2; + if ($self->contains($node, @params)->value) { + return XML::XPath::Literal->new($2); + } + else { + return XML::XPath::Literal->new(''); + } +} + +sub substring { + my $self = shift; + my ($node, @params) = @_; + die "substring: Wrong number of parameters\n" if (@params < 2 || @params > 3); + my ($str, $offset, $len); + $str = $params[0]->string_value; + $offset = $params[1]->value; + $offset--; # uses 1 based offsets + if (@params == 3) { + $len = $params[2]->value; + } + return XML::XPath::Literal->new(substr($str, $offset, $len)); +} + +sub string_length { + my $self = shift; + my ($node, @params) = @_; + die "string-length: Wrong number of params\n" if @params > 1; + if (@params) { + return XML::XPath::Number->new(length($params[0]->string_value)); + } + else { + return XML::XPath::Number->new( + length($node->string_value) + ); + } +} + +sub normalize_space { + my $self = shift; + my ($node, @params) = @_; + die "normalize-space: Wrong number of params\n" if @params > 1; + my $str; + if (@params) { + $str = $params[0]->string_value; + } + else { + $str = $node->string_value; + } + $str =~ s/^\s*//; + $str =~ s/\s*$//; + $str =~ s/\s+/ /g; + return XML::XPath::Literal->new($str); +} + +sub translate { + my $self = shift; + my ($node, @params) = @_; + die "translate: Wrong number of params\n" if @params != 3; + local $_ = $params[0]->string_value; + my $find = $params[1]->string_value; + my $repl = $params[2]->string_value; + eval "tr/\\Q$find\\E/\\Q$repl\\E/d, 1" or die $@; + return XML::XPath::Literal->new($_); +} + +### BOOLEAN FUNCTIONS ### + +sub boolean { + my $self = shift; + my ($node, @params) = @_; + die "boolean: Incorrect number of parameters\n" if @params != 1; + return $params[0]->to_boolean; +} + +sub not { + my $self = shift; + my ($node, @params) = @_; + $params[0] = $params[0]->to_boolean unless $params[0]->isa('XML::XPath::Boolean'); + $params[0]->value ? XML::XPath::Boolean->False : XML::XPath::Boolean->True; +} + +sub true { + my $self = shift; + my ($node, @params) = @_; + die "true: function takes no parameters\n" if @params > 0; + XML::XPath::Boolean->True; +} + +sub false { + my $self = shift; + my ($node, @params) = @_; + die "true: function takes no parameters\n" if @params > 0; + XML::XPath::Boolean->False; +} + +sub lang { + my $self = shift; + my ($node, @params) = @_; + die "lang: function takes 1 parameter\n" if @params != 1; + my $lang = $node->findvalue('(ancestor-or-self::*[@xml:lang]/@xml:lang)[last()]'); + my $lclang = lc($params[0]->string_value); + # warn("Looking for lang($lclang) in $lang\n"); + if (substr(lc($lang), 0, length($lclang)) eq $lclang) { + return XML::XPath::Boolean->True; + } + else { + return XML::XPath::Boolean->False; + } +} + +### NUMBER FUNCTIONS ### + +sub number { + my $self = shift; + my ($node, @params) = @_; + die "number: Too many parameters\n" if @params > 1; + if (@params) { + if ($params[0]->isa('XML::XPath::Node')) { + return XML::XPath::Number->new( + $params[0]->string_value + ); + } + return $params[0]->to_number; + } + + return XML::XPath::Number->new( $node->string_value ); +} + +sub sum { + my $self = shift; + my ($node, @params) = @_; + die "sum: Parameter must be a NodeSet\n" unless $params[0]->isa('XML::XPath::NodeSet'); + my $sum = 0; + foreach my $node ($params[0]->get_nodelist) { + $sum += $self->number($node)->value; + } + return XML::XPath::Number->new($sum); +} + +sub floor { + my $self = shift; + my ($node, @params) = @_; + require POSIX; + my $num = $self->number($node, @params); + return XML::XPath::Number->new( + POSIX::floor($num->value)); +} + +sub ceiling { + my $self = shift; + my ($node, @params) = @_; + require POSIX; + my $num = $self->number($node, @params); + return XML::XPath::Number->new( + POSIX::ceil($num->value)); +} + +sub round { + my $self = shift; + my ($node, @params) = @_; + my $num = $self->number($node, @params); + require POSIX; + return XML::XPath::Number->new( + POSIX::floor($num->value + 0.5)); # Yes, I know the spec says don't do this... +} + +1; diff --git a/XPath/Literal.pm b/XPath/Literal.pm new file mode 100644 index 0000000..5dfee18 --- /dev/null +++ b/XPath/Literal.pm @@ -0,0 +1,99 @@ +# $Id: Literal.pm,v 1.11 2001/03/16 11:10:08 matt Exp $ + +package XML::XPath::Literal; +use XML::XPath::Boolean; +use XML::XPath::Number; +use strict; + +use overload + '""' => \&value, + 'cmp' => \&cmp; + +sub new { + my $class = shift; + my ($string) = @_; + +# $string =~ s/"/"/g; +# $string =~ s/'/'/g; + + bless \$string, $class; +} + +sub as_string { + my $self = shift; + my $string = $$self; + $string =~ s/'/'/g; + return "'$string'"; +} + +sub as_xml { + my $self = shift; + my $string = $$self; + return "$string\n"; +} + +sub value { + my $self = shift; + $$self; +} + +sub cmp { + my $self = shift; + my ($cmp, $swap) = @_; + if ($swap) { + return $cmp cmp $$self; + } + return $$self cmp $cmp; +} + +sub evaluate { + my $self = shift; + $self; +} + +sub to_boolean { + my $self = shift; + return (length($$self) > 0) ? XML::XPath::Boolean->True : XML::XPath::Boolean->False; +} + +sub to_number { return XML::XPath::Number->new($_[0]->value); } +sub to_literal { return $_[0]; } + +sub string_value { return $_[0]->value; } + +1; +__END__ + +=head1 NAME + +XML::XPath::Literal - Simple string values. + +=head1 DESCRIPTION + +In XPath terms a Literal is what we know as a string. + +=head1 API + +=head2 new($string) + +Create a new Literal object with the value in $string. Note that " and +' will be converted to " and ' respectively. That is not part of the XPath +specification, but I consider it useful. Note though that you have to go +to extraordinary lengths in an XML template file (be it XSLT or whatever) to +make use of this: + + + +Which produces a Literal of: + + I'm feeling "sad" + +=head2 value() + +Also overloaded as stringification, simply returns the literal string value. + +=head2 cmp($literal) + +Returns the equivalent of perl's cmp operator against the given $literal. + +=cut diff --git a/XPath/LocationPath.pm b/XPath/LocationPath.pm new file mode 100644 index 0000000..eb483d6 --- /dev/null +++ b/XPath/LocationPath.pm @@ -0,0 +1,61 @@ +# $Id: LocationPath.pm,v 1.8 2001/03/16 11:10:08 matt Exp $ + +package XML::XPath::LocationPath; +use XML::XPath::Root; +use strict; + +sub new { + my $class = shift; + my $self = []; + bless $self, $class; +} + +sub as_string { + my $self = shift; + my $string; + for (my $i = 0; $i < @$self; $i++) { + $string .= $self->[$i]->as_string; + $string .= "/" if $self->[$i+1]; + } + return $string; +} + +sub as_xml { + my $self = shift; + my $string = "\n"; + + for (my $i = 0; $i < @$self; $i++) { + $string .= $self->[$i]->as_xml; + } + + $string .= "\n"; + return $string; +} + +sub set_root { + my $self = shift; + unshift @$self, XML::XPath::Root->new(); +} + +sub evaluate { + my $self = shift; + # context _MUST_ be a single node + my $context = shift; + die "No context" unless $context; + + # I _think_ this is how it should work :) + + my $nodeset = XML::XPath::NodeSet->new(); + $nodeset->push($context); + + foreach my $step (@$self) { + # For each step + # evaluate the step with the nodeset + my $pos = 1; + $nodeset = $step->evaluate($nodeset); + } + + return $nodeset; +} + +1; diff --git a/XPath/Node.pm b/XPath/Node.pm new file mode 100644 index 0000000..e86db0a --- /dev/null +++ b/XPath/Node.pm @@ -0,0 +1,592 @@ +# $Id: Node.pm,v 1.13 2002/12/26 17:24:50 matt Exp $ + +package XML::XPath::Node; + +use strict; +use vars qw(@ISA @EXPORT $AUTOLOAD %EXPORT_TAGS @EXPORT_OK); +use Exporter; +use Carp; +@ISA = ('Exporter'); + +sub UNKNOWN_NODE () {0;} +sub ELEMENT_NODE () {1;} +sub ATTRIBUTE_NODE () {2;} +sub TEXT_NODE () {3;} +sub CDATA_SECTION_NODE () {4;} +sub ENTITY_REFERENCE_NODE () {5;} +sub ENTITY_NODE () {6;} +sub PROCESSING_INSTRUCTION_NODE () {7;} +sub COMMENT_NODE () {8;} +sub DOCUMENT_NODE () {9;} +sub DOCUMENT_TYPE_NODE () {10;} +sub DOCUMENT_FRAGMENT_NODE () {11;} +sub NOTATION_NODE () {12;} + +# Non core DOM stuff +sub ELEMENT_DECL_NODE () {13;} +sub ATT_DEF_NODE () {14;} +sub XML_DECL_NODE () {15;} +sub ATTLIST_DECL_NODE () {16;} +sub NAMESPACE_NODE () {17;} + +# per-node constants + +# All +sub node_parent () { 0; } +sub node_pos () { 1; } +sub node_global_pos () { 2; } + +# Element +sub node_prefix () { 3; } +sub node_children () { 4; } +sub node_name () { 5; } +sub node_attribs () { 6; } +sub node_namespaces () { 7; } +sub node_ids () { 8; } + +# Char +sub node_text () { 3; } + +# PI +sub node_target () { 3; } +sub node_data () { 4; } + +# Comment +sub node_comment () { 3; } + +# Attribute +# sub node_prefix () { 3; } +sub node_key () { 4; } +sub node_value () { 5; } + +# Namespaces +# sub node_prefix () { 3; } +sub node_expanded () { 4; } + +@EXPORT = qw( + UNKNOWN_NODE + ELEMENT_NODE + ATTRIBUTE_NODE + TEXT_NODE + CDATA_SECTION_NODE + ENTITY_REFERENCE_NODE + ENTITY_NODE + PROCESSING_INSTRUCTION_NODE + COMMENT_NODE + DOCUMENT_NODE + DOCUMENT_TYPE_NODE + DOCUMENT_FRAGMENT_NODE + NOTATION_NODE + ELEMENT_DECL_NODE + ATT_DEF_NODE + XML_DECL_NODE + ATTLIST_DECL_NODE + NAMESPACE_NODE + ); + +@EXPORT_OK = qw( + node_parent + node_pos + node_global_pos + node_prefix + node_children + node_name + node_attribs + node_namespaces + node_text + node_target + node_data + node_comment + node_key + node_value + node_expanded + node_ids + ); + +%EXPORT_TAGS = ( + 'node_keys' => [ + qw( + node_parent + node_pos + node_global_pos + node_prefix + node_children + node_name + node_attribs + node_namespaces + node_text + node_target + node_data + node_comment + node_key + node_value + node_expanded + node_ids + ), @EXPORT, + ], +); + + +my $global_pos = 0; + +sub nextPos { + my $class = shift; + return $global_pos += 5; +} + +sub resetPos { + $global_pos = 0; +} + +my %DecodeDefaultEntity = +( + '"' => """, + ">" => ">", + "<" => "<", + "'" => "'", + "&" => "&" +); + +sub XMLescape { + my ($str, $default) = @_; + return undef unless defined $str; + $default ||= ''; + + if ($XML::XPath::EncodeUtf8AsEntity) { + $str =~ s/([\xC0-\xDF].|[\xE0-\xEF]..|[\xF0-\xFF]...)|([$default])|(]]>)/ + defined($1) ? XmlUtf8Decode ($1) : + defined ($2) ? $DecodeDefaultEntity{$2} : "]]>" /egsx; + } + else { + $str =~ s/([$default])|(]]>)/ + defined ($1) ? $DecodeDefaultEntity{$1} : ']]>' /gsex; + } + +#?? could there be references that should not be expanded? +# e.g. should not replace &#nn; ¯ and &abc; +# $str =~ s/&(?!($ReName|#[0-9]+|#x[0-9a-fA-F]+);)/&/go; + + $str; +} + +# +# Opposite of XmlUtf8Decode plus it adds prefix "&#" or "&#x" and suffix ";" +# The 2nd parameter ($hex) indicates whether the result is hex encoded or not. +# +sub XmlUtf8Decode +{ + my ($str, $hex) = @_; + my $len = length ($str); + my $n; + + if ($len == 2) { + my @n = unpack "C2", $str; + $n = (($n[0] & 0x3f) << 6) + ($n[1] & 0x3f); + } + elsif ($len == 3) { + my @n = unpack "C3", $str; + $n = (($n[0] & 0x1f) << 12) + (($n[1] & 0x3f) << 6) + + ($n[2] & 0x3f); + } + elsif ($len == 4) { + my @n = unpack "C4", $str; + $n = (($n[0] & 0x0f) << 18) + (($n[1] & 0x3f) << 12) + + (($n[2] & 0x3f) << 6) + ($n[3] & 0x3f); + } + elsif ($len == 1) { # just to be complete... + $n = ord ($str); + } + else { + die "bad value [$str] for XmlUtf8Decode"; + } + $hex ? sprintf ("&#x%x;", $n) : "&#$n;"; +} + +sub new { + my $class = shift; + no strict 'refs'; + my $impl = $class . "Impl"; + my $this = $impl->new(@_); + if ($XML::XPath::SafeMode) { + return $this; + } + my $self = \$this; + return bless $self, $class; +} + +sub AUTOLOAD { + my $method = $AUTOLOAD; + $method =~ s/.*:://; +# warn "AUTOLOAD $method!\n"; + no strict 'refs'; + *{$AUTOLOAD} = sub { + my $self = shift; + my $olderror = $@; # store previous exceptions + my $obj = eval { $$self }; + if ($@) { + if ($@ =~ /Not a SCALAR reference/) { + croak("No such method $method in " . ref($self)); + } + croak $@; + } + if ($obj) { + # make sure $@ propogates if this method call was the result + # of losing scope because of a die(). + if ($method =~ /^(DESTROY|del_parent_link)$/) { + $obj->$method(@_); + $@ = $olderror if $olderror; + return; + } + return $obj->$method(@_); + } + }; + goto &$AUTOLOAD; +} + +package XML::XPath::NodeImpl; + +use vars qw/@ISA $AUTOLOAD/; +@ISA = ('XML::XPath::Node'); + +sub new { + die "Virtual base method"; +} + +sub getNodeType { + my $self = shift; + return XML::XPath::Node::UNKNOWN_NODE; +} + +sub isElementNode {} +sub isAttributeNode {} +sub isNamespaceNode {} +sub isTextNode {} +sub isProcessingInstructionNode {} +sub isPINode {} +sub isCommentNode {} + +sub getNodeValue { + return; +} + +sub getValue { + shift->getNodeValue(@_); +} + +sub setNodeValue { + return; +} + +sub setValue { + shift->setNodeValue(@_); +} + +sub getParentNode { + my $self = shift; + return $self->[XML::XPath::Node::node_parent]; +} + +sub getRootNode { + my $self = shift; + while (my $parent = $self->getParentNode) { + $self = $parent; + } + return $self; +} + +sub getElementById { + my $self = shift; + my ($id) = @_; +# warn "getElementById: $id\n"; + my $root = $self->getRootNode; + my $node = $root->[XML::XPath::Node::node_ids]{$id}; +# warn "returning node: ", $node->getName, "\n"; + return $node; +} + +sub getName { } +sub getData { } + +sub getChildNodes { + return wantarray ? () : []; +} + +sub getChildNode { + return; +} + +sub getAttribute { + return; +} + +sub getAttributes { + return wantarray ? () : []; +} + +sub getAttributeNodes { + shift->getAttributes(@_); +} + +sub getNamespaceNodes { + return wantarray ? () : []; +} + +sub getNamespace { + return; +} + +sub getLocalName { + return; +} + +sub string_value { return; } + +sub get_pos { + my $self = shift; + return $self->[XML::XPath::Node::node_pos]; +} + +sub set_pos { + my $self = shift; + $self->[XML::XPath::Node::node_pos] = shift; +} + +sub get_global_pos { + my $self = shift; + return $self->[XML::XPath::Node::node_global_pos]; +} + +sub set_global_pos { + my $self = shift; + $self->[XML::XPath::Node::node_global_pos] = shift; +} + +sub renumber { + my $self = shift; + my $search = shift; + my $diff = shift; + + foreach my $node ($self->findnodes($search)) { + $node->set_global_pos( + $node->get_global_pos + $diff + ); + } +} + +sub insertAfter { + my $self = shift; + my $newnode = shift; + my $posnode = shift; + + my $pos_number = eval { $posnode->[XML::XPath::Node::node_children][-1]->get_global_pos() + 1; }; + if (!defined $pos_number) { + $pos_number = $posnode->get_global_pos() + 1; + } + + eval { + if ($pos_number == + $posnode->findnodes( + 'following::node()' + )->get_node(1)->get_global_pos()) { + $posnode->renumber('following::node()', +5); + } + }; + + my $pos = $posnode->get_pos; + + $newnode->setParentNode($self); + splice @{$self->[XML::XPath::Node::node_children]}, $pos + 1, 0, $newnode; + + for (my $i = $pos + 1; $i < @{$self->[XML::XPath::Node::node_children]}; $i++) { + $self->[XML::XPath::Node::node_children][$i]->set_pos($i); + } + + $newnode->set_global_pos($pos_number); +} + +sub insertBefore { + my $self = shift; + my $newnode = shift; + my $posnode = shift; + + my $pos_number = ($posnode->getPreviousSibling() || $posnode->getParentNode)->get_global_pos(); + if ($pos_number == $posnode->get_global_pos()) { + $posnode->renumber('self::node() | descendant::node() | following::node()', +5); + } + + my $pos = $posnode->get_pos; + + $newnode->setParentNode($self); + splice @{$self->[XML::XPath::Node::node_children]}, $pos, 0, $newnode; + + for (my $i = $pos; $i < @{$self->[XML::XPath::Node::node_children]}; $i++) { + $self->[XML::XPath::Node::node_children][$i]->set_pos($i); + } + + $newnode->set_global_pos($pos_number); +} + +sub getPreviousSibling { + my $self = shift; + my $pos = $self->[XML::XPath::Node::node_pos]; + return unless $self->[XML::XPath::Node::node_parent]; + return $self->[XML::XPath::Node::node_parent]->getChildNode($pos); +} + +sub getNextSibling { + my $self = shift; + my $pos = $self->[XML::XPath::Node::node_pos]; + return unless $self->[XML::XPath::Node::node_parent]; + return $self->[XML::XPath::Node::node_parent]->getChildNode($pos + 2); +} + +sub setParentNode { + my $self = shift; + my $parent = shift; +# warn "SetParent of ", ref($self), " to ", $parent->[XML::XPath::Node::node_name], "\n"; + $self->[XML::XPath::Node::node_parent] = $parent; +} + +sub del_parent_link { + my $self = shift; + $self->[XML::XPath::Node::node_parent] = undef; +} + +sub dispose { + my $self = shift; + foreach my $kid ($self->getChildNodes) { + $kid->dispose; + } + foreach my $kid ($self->getAttributeNodes) { + $kid->dispose; + } + foreach my $kid ($self->getNamespaceNodes) { + $kid->dispose; + } + $self->[XML::XPath::Node::node_parent] = undef; +} + +sub to_number { + my $num = shift->string_value; + return XML::XPath::Number->new($num); +} + +sub find { + my $node = shift; + my ($path) = @_; + my $xp = XML::XPath->new(); # new is v. lightweight + return $xp->find($path, $node); +} + +sub findvalue { + my $node = shift; + my ($path) = @_; + my $xp = XML::XPath->new(); + return $xp->findvalue($path, $node); +} + +sub findnodes { + my $node = shift; + my ($path) = @_; + my $xp = XML::XPath->new(); + return $xp->findnodes($path, $node); +} + +sub matches { + my $node = shift; + my ($path, $context) = @_; + my $xp = XML::XPath->new(); + return $xp->matches($node, $path, $context); +} + +sub to_sax { + my $self = shift; + unshift @_, 'Handler' if @_ == 1; + my %handlers = @_; + + my $doch = $handlers{DocumentHandler} || $handlers{Handler}; + my $dtdh = $handlers{DTDHandler} || $handlers{Handler}; + my $enth = $handlers{EntityResolver} || $handlers{Handler}; + + $self->_to_sax ($doch, $dtdh, $enth); +} + +sub DESTROY {} + +use Carp; + +sub _to_sax { + carp "_to_sax not implemented in ", ref($_[0]); +} + +1; +__END__ + +=head1 NAME + +XML::XPath::Node - internal representation of a node + +=head1 API + +The Node API aims to emulate DOM to some extent, however the API +isn't quite compatible with DOM. This is to ease transition from +XML::DOM programming to XML::XPath. Compatibility with DOM may +arise once XML::DOM gets namespace support. + +=head2 new + +Creates a new node. See the sub-classes for parameters to pass to new(). + +=head2 getNodeType + +Returns one of ELEMENT_NODE, TEXT_NODE, COMMENT_NODE, ATTRIBUTE_NODE, +PROCESSING_INSTRUCTION_NODE or NAMESPACE_NODE. UNKNOWN_NODE is returned +if the sub-class doesn't implement getNodeType - but that means +something is broken! The constants are exported by default from +XML::XPath::Node. The constants have the same numeric value as the +XML::DOM versions. + +=head2 getParentNode + +Returns the parent of this node, or undef if this is the root node. Note +that the root node is the root node in terms of XPath - not the root +element node. + +=head2 to_sax ( $handler | %handlers ) + +Generates sax calls to the handler or handlers. See the PerlSAX docs for +details (not yet implemented correctly). + +=head1 MORE INFO + +See the sub-classes for the meaning of the rest of the API: + +=over 4 + +=item * + +L + +=item * + +L + +=item * + +L + +=item * + +L + +=item * + +L + +=item * + +L + +=back + +=cut diff --git a/XPath/Node/Attribute.pm b/XPath/Node/Attribute.pm new file mode 100644 index 0000000..3e7a6b6 --- /dev/null +++ b/XPath/Node/Attribute.pm @@ -0,0 +1,135 @@ +# $Id: Attribute.pm,v 1.9 2001/11/05 19:57:47 matt Exp $ + +package XML::XPath::Node::Attribute; + +use strict; +use vars qw/@ISA/; + +@ISA = ('XML::XPath::Node'); + +package XML::XPath::Node::AttributeImpl; + +use vars qw/@ISA/; +@ISA = ('XML::XPath::NodeImpl', 'XML::XPath::Node::Attribute'); +use XML::XPath::Node ':node_keys'; + +sub new { + my $class = shift; + my ($key, $val, $prefix) = @_; + + my $pos = XML::XPath::Node->nextPos; + + my @vals; + @vals[node_global_pos, node_prefix, node_key, node_value] = + ($pos, $prefix, $key, $val); + my $self = \@vals; + + bless $self, $class; + +} + +sub getNodeType { ATTRIBUTE_NODE } + +sub isAttributeNode { 1; } + +sub getName { + my $self = shift; + $self->[node_key]; +} + +sub getLocalName { + my $self = shift; + my $local = $self->[node_key]; + $local =~ s/.*://; + return $local; +} + +sub getNodeValue { + my $self = shift; + $self->[node_value]; +} + +sub getData { + shift->getNodeValue(@_); +} + +sub setNodeValue { + my $self = shift; + $self->[node_value] = shift; +} + +sub getPrefix { + my $self = shift; + $self->[node_prefix]; +} + +sub string_value { + my $self = shift; + return $self->[node_value]; +} + +sub toString { + my $self = shift; + my $string = ' '; +# if ($self->[node_prefix]) { +# $string .= $self->[node_prefix] . ':'; +# } + $string .= join('', + $self->[node_key], + '="', + XML::XPath::Node::XMLescape($self->[node_value], '"&><'), + '"'); + return $string; +} + +sub getNamespace { + my $self = shift; + my ($prefix) = @_; + $prefix ||= $self->getPrefix; + if (my $parent = $self->getParentNode) { + return $parent->getNamespace($prefix); + } +} + +1; +__END__ + +=head1 NAME + +Attribute - a single attribute + +=head1 API + +=head2 new ( key, value, prefix ) + +Create a new attribute node. + +=head2 getName + +Returns the key for the attribute + +=head2 getLocalName + +As getName above, but without namespace information + +=head2 getNodeValue / getData + +Returns the value + +=head2 setNodeValue + +Sets the value of the attribute node. + +=head2 getPrefix + +Returns the prefix + +=head2 getNamespace + +Return the namespace. + +=head2 toString + +Generates key="value", encoded correctly. + +=cut diff --git a/XPath/Node/Comment.pm b/XPath/Node/Comment.pm new file mode 100644 index 0000000..e110710 --- /dev/null +++ b/XPath/Node/Comment.pm @@ -0,0 +1,91 @@ +# $Id: Comment.pm,v 1.5 2000/09/05 13:05:46 matt Exp $ + +package XML::XPath::Node::Comment; + +use strict; +use vars qw/@ISA/; + +@ISA = ('XML::XPath::Node'); + +package XML::XPath::Node::CommentImpl; + +use vars qw/@ISA/; +@ISA = ('XML::XPath::NodeImpl', 'XML::XPath::Node::Comment'); +use XML::XPath::Node ':node_keys'; + +sub new { + my $class = shift; + my ($comment) = @_; + + my $pos = XML::XPath::Node->nextPos; + + my @vals; + @vals[node_global_pos, node_comment] = + ($pos, $comment); + my $self = \@vals; + + bless $self, $class; +} + +sub getNodeType { COMMENT_NODE } + +sub isCommentNode { 1; } + +sub getNodeValue { + return shift->[node_comment]; +} + +sub getData { + shift->getNodeValue; +} + +sub setNodeValue { + shift->[node_comment] = shift; +} + +sub _to_sax { + my $self = shift; + my ($doch, $dtdh, $enth) = @_; + + $doch->comment( { Data => $self->getValue } ); +} + +sub comment_escape { + my $data = shift; + $data =~ s/--/--/g; + return $data; +} + +sub string_value { + my $self = shift; + return $self->[node_comment]; +} + +sub toString { + my $self = shift; + return ''; +} + +1; +__END__ + +=head1 NAME + +Comment - an XML comment: + +=head1 API + +=head2 new ( data ) + +Create a new comment node. + +=head2 getValue / getData + +Returns the value in the comment + +=head2 toString + +Returns the comment with -- encoded as a numeric entity (if it +exists in the comment text). + +=cut diff --git a/XPath/Node/Element.pm b/XPath/Node/Element.pm new file mode 100644 index 0000000..5dbafac --- /dev/null +++ b/XPath/Node/Element.pm @@ -0,0 +1,503 @@ +# $Id: Element.pm,v 1.14 2002/12/26 17:24:50 matt Exp $ + +package XML::XPath::Node::Element; + +use strict; +use vars qw/@ISA/; + +@ISA = ('XML::XPath::Node'); + +package XML::XPath::Node::ElementImpl; + +use vars qw/@ISA/; +@ISA = ('XML::XPath::NodeImpl', 'XML::XPath::Node::Element'); +use XML::XPath::Node ':node_keys'; + +sub new { + my $class = shift; + my ($tag, $prefix) = @_; + + my $pos = XML::XPath::Node->nextPos; + + my @vals; + @vals[node_global_pos, node_prefix, node_children, node_name, node_attribs] = + ($pos, $prefix, [], $tag, []); + + my $self = \@vals; + bless $self, $class; +} + +sub getNodeType { ELEMENT_NODE } + +sub isElementNode { 1; } + +sub appendChild { + my $self = shift; + my $newnode = shift; + if (shift) { # called from internal to XML::XPath +# warn "AppendChild $newnode to $self\n"; + push @{$self->[node_children]}, $newnode; + $newnode->setParentNode($self); + $newnode->set_pos($#{$self->[node_children]}); + } + else { + if (@{$self->[node_children]}) { + $self->insertAfter($newnode, $self->[node_children][-1]); + } + else { + my $pos_number = $self->get_global_pos() + 1; + + if (my $brother = $self->getNextSibling()) { # optimisation + if ($pos_number == $brother->get_global_pos()) { + $self->renumber('following::node()', +5); + } + } + else { + eval { + if ($pos_number == + $self->findnodes( + 'following::node()' + )->get_node(1)->get_global_pos()) { + $self->renumber('following::node()', +5); + } + }; + } + + push @{$self->[node_children]}, $newnode; + $newnode->setParentNode($self); + $newnode->set_pos($#{$self->[node_children]}); + $newnode->set_global_pos($pos_number); + } + } +} + +sub removeChild { + my $self = shift; + my $delnode = shift; + + my $pos = $delnode->get_pos; + +# warn "removeChild: $pos\n"; + +# warn "children: ", scalar @{$self->[node_children]}, "\n"; + +# my $node = $self->[node_children][$pos]; +# warn "child at $pos is: $node\n"; + + splice @{$self->[node_children]}, $pos, 1; + +# warn "children now: ", scalar @{$self->[node_children]}, "\n"; + + for (my $i = $pos; $i < @{$self->[node_children]}; $i++) { +# warn "Changing pos of child: $i\n"; + $self->[node_children][$i]->set_pos($i); + } + + $delnode->del_parent_link; + +} + +sub appendIdElement { + my $self = shift; + my ($val, $element) = @_; +# warn "Adding '$val' to ID hash\n"; + $self->[node_ids]{$val} = $element; +} + +sub DESTROY { + my $self = shift; +# warn "DESTROY ELEMENT: ", $self->[node_name], "\n"; +# warn "DESTROY ROOT\n" unless $self->[node_name]; + + foreach my $kid ($self->getChildNodes) { + $kid && $kid->del_parent_link; + } + foreach my $attr ($self->getAttributeNodes) { + $attr && $attr->del_parent_link; + } + foreach my $ns ($self->getNamespaceNodes) { + $ns && $ns->del_parent_link; + } +# $self->[node_children] = undef; +# $self->[node_attribs] = undef; +# $self->[node_namespaces] = undef; +} + +sub getName { + my $self = shift; + $self->[node_name]; +} + +sub getTagName { + shift->getName(@_); +} + +sub getLocalName { + my $self = shift; + my $local = $self->[node_name]; + $local =~ s/.*://; + return $local; +} + +sub getChildNodes { + my $self = shift; + return wantarray ? @{$self->[node_children]} : $self->[node_children]; +} + +sub getChildNode { + my $self = shift; + my ($pos) = @_; + if ($pos < 1 || $pos > @{$self->[node_children]}) { + return; + } + return $self->[node_children][$pos - 1]; +} + +sub getFirstChild { + my $self = shift; + return unless @{$self->[node_children]}; + return $self->[node_children][0]; +} + +sub getLastChild { + my $self = shift; + return unless @{$self->[node_children]}; + return $self->[node_children][-1]; +} + +sub getAttributeNode { + my $self = shift; + my ($name) = @_; + my $attribs = $self->[node_attribs]; + foreach my $attr (@$attribs) { + return $attr if $attr->getName eq $name; + } +} + +sub getAttribute { + my $self = shift; + my $attr = $self->getAttributeNode(@_); + if ($attr) { + return $attr->getValue; + } +} + +sub getAttributes { + my $self = shift; + if ($self->[node_attribs]) { + return wantarray ? @{$self->[node_attribs]} : $self->[node_attribs]; + } + return wantarray ? () : []; +} + +sub appendAttribute { + my $self = shift; + my $attribute = shift; + + if (shift) { # internal call + push @{$self->[node_attribs]}, $attribute; + $attribute->setParentNode($self); + $attribute->set_pos($#{$self->[node_attribs]}); + } + else { + my $node_num; + if (@{$self->[node_attribs]}) { + $node_num = $self->[node_attribs][-1]->get_global_pos() + 1; + } + else { + $node_num = $self->get_global_pos() + 1; + } + + eval { + if (@{$self->[node_children]}) { + if ($node_num == $self->[node_children][-1]->get_global_pos()) { + $self->renumber('descendant::node() | following::node()', +5); + } + } + elsif ($node_num == + $self->findnodes('following::node()')->get_node(1)->get_global_pos()) { + $self->renumber('following::node()', +5); + } + }; + + push @{$self->[node_attribs]}, $attribute; + $attribute->setParentNode($self); + $attribute->set_pos($#{$self->[node_attribs]}); + $attribute->set_global_pos($node_num); + + } +} + +sub removeAttribute { + my $self = shift; + my $attrib = shift; + + if (!ref($attrib)) { + $attrib = $self->getAttributeNode($attrib); + } + + my $pos = $attrib->get_pos; + + splice @{$self->[node_attribs]}, $pos, 1; + + for (my $i = $pos; $i < @{$self->[node_attribs]}; $i++) { + $self->[node_attribs][$i]->set_pos($i); + } + + $attrib->del_parent_link; +} + +sub setAttribute { + my $self = shift; + my ($name, $value) = @_; + + if (my $attrib = $self->getAttributeNode($name)) { + $attrib->setNodeValue($value); + return $attrib; + } + + my ($nsprefix) = ($name =~ /^($XML::XPath::Parser::NCName):($XML::XPath::Parser::NCName)$/o); + + if ($nsprefix && !$self->getNamespace($nsprefix)) { + die "No namespace matches prefix: $nsprefix"; + } + + my $newnode = XML::XPath::Node::Attribute->new($name, $value, $nsprefix); + $self->appendAttribute($newnode); +} + +sub setAttributeNode { + my $self = shift; + my ($node) = @_; + + if (my $attrib = $self->getAttributeNode($node->getName)) { + $attrib->setNodeValue($node->getValue); + return $attrib; + } + + my ($nsprefix) = ($node->getName() =~ /^($XML::XPath::Parser::NCName):($XML::XPath::Parser::NCName)$/o); + + if ($nsprefix && !$self->getNamespace($nsprefix)) { + die "No namespace matches prefix: $nsprefix"; + } + + $self->appendAttribute($node); +} + +sub getNamespace { + my $self = shift; + my ($prefix) = @_; + $prefix ||= $self->getPrefix || '#default'; + my $namespaces = $self->[node_namespaces] || []; + foreach my $ns (@$namespaces) { + return $ns if $ns->getPrefix eq $prefix; + } + my $parent = $self->getParentNode; + + return $parent->getNamespace($prefix) if $parent; +} + +sub getNamespaces { + my $self = shift; + if ($self->[node_namespaces]) { + return wantarray ? @{$self->[node_namespaces]} : $self->[node_namespaces]; + } + return wantarray ? () : []; +} + +sub getNamespaceNodes { goto &getNamespaces } + +sub appendNamespace { + my $self = shift; + my ($ns) = @_; + push @{$self->[node_namespaces]}, $ns; + $ns->setParentNode($self); + $ns->set_pos($#{$self->[node_namespaces]}); +} + +sub getPrefix { + my $self = shift; + $self->[node_prefix]; +} + +sub getExpandedName { + my $self = shift; + warn "Expanded name not implemented for ", ref($self), "\n"; + return; +} + +sub _to_sax { + my $self = shift; + my ($doch, $dtdh, $enth) = @_; + + my $tag = $self->getName; + my @attr; + + for my $attr ($self->getAttributes) { + push @attr, $attr->getName, $attr->getValue; + } + + my $ns = $self->getNamespace($self->[node_prefix]); + if ($ns) { + $doch->start_element( + { + Name => $tag, + Attributes => { @attr }, + NamespaceURI => $ns->getExpanded, + Prefix => $ns->getPrefix, + LocalName => $self->getLocalName, + } + ); + } + else { + $doch->start_element( + { + Name => $tag, + Attributes => { @attr }, + } + ); + } + + for my $kid ($self->getChildNodes) { + $kid->_to_sax($doch, $dtdh, $enth); + } + + if ($ns) { + $doch->end_element( + { + Name => $tag, + NamespaceURI => $ns->getExpanded, + Prefix => $ns->getPrefix, + LocalName => $self->getLocalName + } + ); + } + else { + $doch->end_element( { Name => $tag } ); + } +} + +sub string_value { + my $self = shift; + my $string = ''; + foreach my $kid (@{$self->[node_children]}) { + if ($kid->getNodeType == ELEMENT_NODE + || $kid->getNodeType == TEXT_NODE) { + $string .= $kid->string_value; + } + } + return $string; +} + +sub toString { + my $self = shift; + my $norecurse = shift; + my $string = ''; + if (! $self->[node_name] ) { + # root node + return join('', map { $_->toString($norecurse) } @{$self->[node_children]}); + } + $string .= "<" . $self->[node_name]; + + $string .= join('', map { $_->toString } @{$self->[node_namespaces]}); + + $string .= join('', map { $_->toString } @{$self->[node_attribs]}); + + if (@{$self->[node_children]}) { + $string .= ">"; + + if (!$norecurse) { + $string .= join('', map { $_->toString($norecurse) } @{$self->[node_children]}); + } + + $string .= "[node_name] . ">"; + } + else { + $string .= " />"; + } + + return $string; +} + +1; +__END__ + +=head1 NAME + +Element - an + +=head1 API + +=head2 new ( name, prefix ) + +Create a new Element node with name "name" and prefix "prefix". The name +be "prefix:local" if prefix is defined. I know that sounds wierd, but it +works ;-) + +=head2 getName + +Returns the name (including "prefix:" if defined) of this element. + +=head2 getLocalName + +Returns just the local part of the name (the bit after "prefix:"). + +=head2 getChildNodes + +Returns the children of this element. In list context returns a list. In +scalar context returns an array ref. + +=head2 getChildNode ( pos ) + +Returns the child at position pos. + +=head2 appendChild ( childnode ) + +Appends the child node to the list of current child nodes. + +=head2 getAttribute ( name ) + +Returns the attribute node with key name. + +=head2 getAttributes / getAttributeNodes + +Returns the attribute nodes. In list context returns a list. In scalar +context returns an array ref. + +=head2 appendAttribute ( attrib_node) + +Appends the attribute node to the list of attributes (XML::XPath stores +attributes in order). + +=head2 getNamespace ( prefix ) + +Returns the namespace node by the given prefix + +=head2 getNamespaces / getNamespaceNodes + +Returns the namespace nodes. In list context returns a list. In scalar +context returns an array ref. + +=head2 appendNamespace ( ns_node ) + +Appends the namespace node to the list of namespaces. + +=head2 getPrefix + +Returns the prefix of this element + +=head2 getExpandedName + +Returns the expanded name of this element (not yet implemented right). + +=head2 string_value + +For elements, the string_value is the concatenation of all string_values +of all text-descendants of the element node in document order. + +=head2 toString ( [ norecurse ] ) + +Output (and all children) the node to a string. Doesn't process children +if the norecurse option is a true value. + +=cut diff --git a/XPath/Node/Namespace.pm b/XPath/Node/Namespace.pm new file mode 100644 index 0000000..736d9a3 --- /dev/null +++ b/XPath/Node/Namespace.pm @@ -0,0 +1,99 @@ +# $Id: Namespace.pm,v 1.4 2000/08/24 16:23:02 matt Exp $ + +package XML::XPath::Node::Namespace; + +use strict; +use vars qw/@ISA/; + +@ISA = ('XML::XPath::Node'); + +package XML::XPath::Node::NamespaceImpl; + +use vars qw/@ISA/; +@ISA = ('XML::XPath::NodeImpl', 'XML::XPath::Node::Namespace'); +use XML::XPath::Node ':node_keys'; + +sub new { + my $class = shift; + my ($prefix, $expanded) = @_; + + my $pos = XML::XPath::Node->nextPos; + + my @vals; + @vals[node_global_pos, node_prefix, node_expanded] = + ($pos, $prefix, $expanded); + my $self = \@vals; + + bless $self, $class; +} + +sub getNodeType { NAMESPACE_NODE } + +sub isNamespaceNode { 1; } + +sub getPrefix { + my $self = shift; + $self->[node_prefix]; +} + +sub getExpanded { + my $self = shift; + $self->[node_expanded]; +} + +sub getValue { + my $self = shift; + $self->[node_expanded]; +} + +sub getData { + my $self = shift; + $self->[node_expanded]; +} + +sub string_value { + my $self = shift; + $self->[node_expanded]; +} + +sub toString { + my $self = shift; + my $string = ''; + return '' unless defined $self->[node_expanded]; + if ($self->[node_prefix] eq '#default') { + $string .= ' xmlns="'; + } + else { + $string .= ' xmlns:' . $self->[node_prefix] . '="'; + } + $string .= XML::XPath::Node::XMLescape($self->[node_expanded], '"&<'); + $string .= '"'; +} + +1; +__END__ + +=head1 NAME + +Namespace - an XML namespace node + +=head1 API + +=head2 new ( prefix, expanded ) + +Create a new namespace node, expanded is the expanded namespace URI. + +=head2 getPrefix + +Returns the prefix + +=head2 getExpanded + +Returns the expanded URI + +=head2 toString + +Returns a string that you can add to the list +of attributes of an element: xmlns:prefix="expanded" + +=cut diff --git a/XPath/Node/PI.pm b/XPath/Node/PI.pm new file mode 100644 index 0000000..bf2eb25 --- /dev/null +++ b/XPath/Node/PI.pm @@ -0,0 +1,81 @@ +# $Id: PI.pm,v 1.4 2000/08/24 16:23:02 matt Exp $ + +package XML::XPath::Node::PI; + +use strict; +use vars qw/@ISA/; + +@ISA = ('XML::XPath::Node'); + +package XML::XPath::Node::PIImpl; + +use vars qw/@ISA/; +@ISA = ('XML::XPath::NodeImpl', 'XML::XPath::Node::PI'); +use XML::XPath::Node ':node_keys'; + +sub new { + my $class = shift; + my ($target, $data) = @_; + + my $pos = XML::XPath::Node->nextPos; + + my @vals; + @vals[node_global_pos, node_target, node_data] = + ($pos, $target, $data); + my $self = \@vals; + bless $self, $class; +} + +sub getNodeType { PROCESSING_INSTRUCTION_NODE } + +sub isPINode { 1; } +sub isProcessingInstructionNode { 1; } + +sub getTarget { + my $self = shift; + $self->[node_target]; +} + +sub getData { + my $self = shift; + $self->[node_data]; +} + +sub _to_sax { + my $self = shift; + my ($doch, $dtdh, $enth) = @_; + # PI's not supported in PerlSAX 1 +} + +sub string_value { + my $self = shift; + return $self->[node_data]; +} + +sub toString { + my $self = shift; + return "[node_target] . " " . XML::XPath::Node::XMLescape($self->[node_data], ">") . "?>"; +} + +1; +__END__ + +=head1 NAME + +PI - an XML processing instruction node + +=head1 API + +=head2 new ( target, data ) + +Create a new PI node. + +=head2 getTarget + +Returns the target + +=head2 getData + +Returns the data + +=cut diff --git a/XPath/Node/Text.pm b/XPath/Node/Text.pm new file mode 100644 index 0000000..dad3c04 --- /dev/null +++ b/XPath/Node/Text.pm @@ -0,0 +1,96 @@ +# $Id: Text.pm,v 1.5 2000/09/05 13:05:47 matt Exp $ + +package XML::XPath::Node::Text; + +use strict; +use vars qw/@ISA/; + +@ISA = ('XML::XPath::Node'); + +package XML::XPath::Node::TextImpl; + +use vars qw/@ISA/; +@ISA = ('XML::XPath::NodeImpl', 'XML::XPath::Node::Text'); +use XML::XPath::Node ':node_keys'; + +sub new { + my $class = shift; + my ($text) = @_; + + my $pos = XML::XPath::Node->nextPos; + + my @vals; + @vals[node_global_pos, node_text] = ($pos, $text); + my $self = \@vals; + + bless $self, $class; +} + +sub getNodeType { TEXT_NODE } + +sub isTextNode { 1; } + +sub appendText { + my $self = shift; + my ($text) = @_; + $self->[node_text] .= $text; +} + +sub getNodeValue { + my $self = shift; + $self->[node_text]; +} + +sub getData { + my $self = shift; + $self->[node_text]; +} + +sub setNodeValue { + my $self = shift; + $self->[node_text] = shift; +} + +sub _to_sax { + my $self = shift; + my ($doch, $dtdh, $enth) = @_; + + $doch->characters( { Data => $self->getValue } ); +} + +sub string_value { + my $self = shift; + $self->[node_text]; +} + +sub toString { + my $self = shift; + XML::XPath::Node::XMLescape($self->[node_text], "<&"); +} + +1; +__END__ + +=head1 NAME + +Text - an XML text node + +=head1 API + +=head2 new ( text ) + +Create a new text node. + +=head2 getValue / getData + +Returns the text + +=head2 string_value + +Returns the text + +=head2 appendText ( text ) + +Adds the given text string to this node. + +=cut diff --git a/XPath/NodeSet.pm b/XPath/NodeSet.pm new file mode 100644 index 0000000..f1955b2 --- /dev/null +++ b/XPath/NodeSet.pm @@ -0,0 +1,184 @@ +# $Id: NodeSet.pm,v 1.17 2002/04/24 13:06:08 matt Exp $ + +package XML::XPath::NodeSet; +use strict; + +use XML::XPath::Boolean; + +use overload + '""' => \&to_literal, + 'bool' => \&to_boolean, + ; + +sub new { + my $class = shift; + bless [], $class; +} + +sub sort { + my $self = CORE::shift; + @$self = CORE::sort { $a->get_global_pos <=> $b->get_global_pos } @$self; + return $self; +} + +sub pop { + my $self = CORE::shift; + CORE::pop @$self; +} + +sub push { + my $self = CORE::shift; + my (@nodes) = @_; + CORE::push @$self, @nodes; +} + +sub append { + my $self = CORE::shift; + my ($nodeset) = @_; + CORE::push @$self, $nodeset->get_nodelist; +} + +sub shift { + my $self = CORE::shift; + CORE::shift @$self; +} + +sub unshift { + my $self = CORE::shift; + my (@nodes) = @_; + CORE::unshift @$self, @nodes; +} + +sub prepend { + my $self = CORE::shift; + my ($nodeset) = @_; + CORE::unshift @$self, $nodeset->get_nodelist; +} + +sub size { + my $self = CORE::shift; + scalar @$self; +} + +sub get_node { # uses array index starting at 1, not 0 + my $self = CORE::shift; + my ($pos) = @_; + $self->[$pos - 1]; +} + +sub getRootNode { + my $self = CORE::shift; + return $self->[0]->getRootNode; +} + +sub get_nodelist { + my $self = CORE::shift; + @$self; +} + +sub to_boolean { + my $self = CORE::shift; + return (@$self > 0) ? XML::XPath::Boolean->True : XML::XPath::Boolean->False; +} + +sub string_value { + my $self = CORE::shift; + return '' unless @$self; + return $self->[0]->string_value; +} + +sub to_literal { + my $self = CORE::shift; + return XML::XPath::Literal->new( + join('', map { $_->string_value } @$self) + ); +} + +sub to_number { + my $self = CORE::shift; + return XML::XPath::Number->new( + $self->to_literal + ); +} + +1; +__END__ + +=head1 NAME + +XML::XPath::NodeSet - a list of XML document nodes + +=head1 DESCRIPTION + +An XML::XPath::NodeSet object contains an ordered list of nodes. The nodes +each take the same format as described in L. + +=head1 SYNOPSIS + + my $results = $xp->find('//someelement'); + if (!$results->isa('XML::XPath::NodeSet')) { + print "Found $results\n"; + exit; + } + foreach my $context ($results->get_nodelist) { + my $newresults = $xp->find('./other/element', $context); + ... + } + +=head1 API + +=head2 new() + +You will almost never have to create a new NodeSet object, as it is all +done for you by XPath. + +=head2 get_nodelist() + +Returns a list of nodes. See L for the format of +the nodes. + +=head2 string_value() + +Returns the string-value of the first node in the list. +See the XPath specification for what "string-value" means. + +=head2 to_literal() + +Returns the concatenation of all the string-values of all +the nodes in the list. + +=head2 get_node($pos) + +Returns the node at $pos. The node position in XPath is based at 1, not 0. + +=head2 size() + +Returns the number of nodes in the NodeSet. + +=head2 pop() + +Equivalent to perl's pop function. + +=head2 push(@nodes) + +Equivalent to perl's push function. + +=head2 append($nodeset) + +Given a nodeset, appends the list of nodes in $nodeset to the end of the +current list. + +=head2 shift() + +Equivalent to perl's shift function. + +=head2 unshift(@nodes) + +Equivalent to perl's unshift function. + +=head2 prepend($nodeset) + +Given a nodeset, prepends the list of nodes in $nodeset to the front of +the current list. + +=cut diff --git a/XPath/Number.pm b/XPath/Number.pm new file mode 100644 index 0000000..74fff2e --- /dev/null +++ b/XPath/Number.pm @@ -0,0 +1,87 @@ +# $Id: Number.pm,v 1.14 2002/12/26 17:57:09 matt Exp $ + +package XML::XPath::Number; +use XML::XPath::Boolean; +use XML::XPath::Literal; +use strict; + +use overload + '""' => \&value, + '0+' => \&value, + '<=>' => \&cmp; + +sub new { + my $class = shift; + my $number = shift; + if ($number !~ /^\s*[+-]?(\d+(\.\d*)?|\.\d+)\s*$/) { + $number = undef; + } + else { + $number =~ s/^\s*(.*)\s*$/$1/; + } + bless \$number, $class; +} + +sub as_string { + my $self = shift; + defined $$self ? $$self : 'NaN'; +} + +sub as_xml { + my $self = shift; + return "" . (defined($$self) ? $$self : 'NaN') . "\n"; +} + +sub value { + my $self = shift; + $$self; +} + +sub cmp { + my $self = shift; + my ($other, $swap) = @_; + if ($swap) { + return $other <=> $$self; + } + return $$self <=> $other; +} + +sub evaluate { + my $self = shift; + $self; +} + +sub to_boolean { + my $self = shift; + return $$self ? XML::XPath::Boolean->True : XML::XPath::Boolean->False; +} + +sub to_literal { XML::XPath::Literal->new($_[0]->as_string); } +sub to_number { $_[0]; } + +sub string_value { return $_[0]->value } + +1; +__END__ + +=head1 NAME + +XML::XPath::Number - Simple numeric values. + +=head1 DESCRIPTION + +This class holds simple numeric values. It doesn't support -0, +/- Infinity, +or NaN, as the XPath spec says it should, but I'm not hurting anyone I don't think. + +=head1 API + +=head2 new($num) + +Creates a new XML::XPath::Number object, with the value in $num. Does some +rudimentary numeric checking on $num to ensure it actually is a number. + +=head2 value() + +Also as overloaded stringification. Returns the numeric value held. + +=cut diff --git a/XPath/Parser.pm b/XPath/Parser.pm new file mode 100644 index 0000000..ec69b0d --- /dev/null +++ b/XPath/Parser.pm @@ -0,0 +1,821 @@ +# $Id: Parser.pm,v 1.33 2001/11/26 17:41:18 matt Exp $ + +package XML::XPath::Parser; + +use strict; +use vars qw/ + $NCName + $QName + $NCWild + $QNWild + $NUMBER_RE + $NODE_TYPE + $AXIS_NAME + %AXES + $LITERAL + %CACHE/; + +use XML::XPath::XMLParser; +use XML::XPath::Step; +use XML::XPath::Expr; +use XML::XPath::Function; +use XML::XPath::LocationPath; +use XML::XPath::Variable; +use XML::XPath::Literal; +use XML::XPath::Number; +use XML::XPath::NodeSet; + +# Axis name to principal node type mapping +%AXES = ( + 'ancestor' => 'element', + 'ancestor-or-self' => 'element', + 'attribute' => 'attribute', + 'namespace' => 'namespace', + 'child' => 'element', + 'descendant' => 'element', + 'descendant-or-self' => 'element', + 'following' => 'element', + 'following-sibling' => 'element', + 'parent' => 'element', + 'preceding' => 'element', + 'preceding-sibling' => 'element', + 'self' => 'element', + ); + +$NCName = '([A-Za-z_][\w\\.\\-]*)'; +$QName = "($NCName:)?$NCName"; +$NCWild = "${NCName}:\\*"; +$QNWild = "\\*"; +$NODE_TYPE = '((text|comment|processing-instruction|node)\\(\\))'; +$AXIS_NAME = '(' . join('|', keys %AXES) . ')::'; +$NUMBER_RE = '\d+(\\.\d*)?|\\.\d+'; +$LITERAL = '\\"[^\\"]*\\"|\\\'[^\\\']*\\\''; + +sub new { + my $class = shift; + my $self = bless {}, $class; + debug("New Parser being created.\n"); + $self->{context_set} = XML::XPath::NodeSet->new(); + $self->{context_pos} = undef; # 1 based position in array context + $self->{context_size} = 0; # total size of context + $self->clear_namespaces(); + $self->{vars} = {}; + $self->{direction} = 'forward'; + $self->{cache} = {}; + return $self; +} + +sub get_var { + my $self = shift; + my $var = shift; + $self->{vars}->{$var}; +} + +sub set_var { + my $self = shift; + my $var = shift; + my $val = shift; + $self->{vars}->{$var} = $val; +} + +sub set_namespace { + my $self = shift; + my ($prefix, $expanded) = @_; + $self->{namespaces}{$prefix} = $expanded; +} + +sub clear_namespaces { + my $self = shift; + $self->{namespaces} = {}; +} + +sub get_namespace { + my $self = shift; + my ($prefix, $node) = @_; + if (my $ns = $self->{namespaces}{$prefix}) { + return $ns; + } + if (my $nsnode = $node->getNamespace($prefix)) { + return $nsnode->getValue(); + } +} + +sub get_context_set { $_[0]->{context_set}; } +sub set_context_set { $_[0]->{context_set} = $_[1]; } +sub get_context_pos { $_[0]->{context_pos}; } +sub set_context_pos { $_[0]->{context_pos} = $_[1]; } +sub get_context_size { $_[0]->{context_set}->size; } +sub get_context_node { $_[0]->{context_set}->get_node($_[0]->{context_pos}); } + +sub my_sub { + return (caller(1))[3]; +} + +sub parse { + my $self = shift; + my $path = shift; + if ($CACHE{$path}) { + return $CACHE{$path}; + } + my $tokens = $self->tokenize($path); + + $self->{_tokpos} = 0; + my $tree = $self->analyze($tokens); + + if ($self->{_tokpos} < scalar(@$tokens)) { + # didn't manage to parse entire expression - throw an exception + die "Parse of expression $path failed - junk after end of expression: $tokens->[$self->{_tokpos}]"; + } + + $CACHE{$path} = $tree; + + debug("PARSED Expr to:\n", $tree->as_string, "\n") if $XML::XPath::Debug; + + return $tree; +} + +sub tokenize { + my $self = shift; + my $path = shift; + study $path; + + my @tokens; + + debug("Parsing: $path\n"); + + # Bug: We don't allow "'@' NodeType" which is in the grammar, but I think is just plain stupid. + + while($path =~ m/\G + \s* # ignore all whitespace + ( # tokens + $LITERAL| + $NUMBER_RE| # Match digits + \.\.| # match parent + \.| # match current + ($AXIS_NAME)?$NODE_TYPE| # match tests + processing-instruction| + \@($NCWild|$QName|$QNWild)| # match attrib + \$$QName| # match variable reference + ($AXIS_NAME)?($NCWild|$QName|$QNWild)| # match NCName,NodeType,Axis::Test + \!=|<=|\-|>=|\/\/|and|or|mod|div| # multi-char seps + [,\+=\|<>\/\(\[\]\)]| # single char seps + (?{_curr_match} = ''; + return 0 unless $self->{_tokpos} < @$tokens; + + local $^W; + +# debug ("match: $match\n"); + + if ($tokens->[$self->{_tokpos}] =~ /^$match$/) { + $self->{_curr_match} = $tokens->[$self->{_tokpos}]; + $self->{_tokpos}++; + return 1; + } + else { + if ($fatal) { + die "Invalid token: ", $tokens->[$self->{_tokpos}], "\n"; + } + else { + return 0; + } + } +} + +sub Expr { + my ($self, $tokens) = @_; + + debug("in SUB\n"); + + return OrExpr($self, $tokens); +} + +sub OrExpr { + my ($self, $tokens) = @_; + + debug("in SUB\n"); + + my $expr = AndExpr($self, $tokens); + while (match($self, $tokens, 'or')) { + my $or_expr = XML::XPath::Expr->new($self); + $or_expr->set_lhs($expr); + $or_expr->set_op('or'); + + my $rhs = AndExpr($self, $tokens); + + $or_expr->set_rhs($rhs); + $expr = $or_expr; + } + + return $expr; +} + +sub AndExpr { + my ($self, $tokens) = @_; + + debug("in SUB\n"); + + my $expr = EqualityExpr($self, $tokens); + while (match($self, $tokens, 'and')) { + my $and_expr = XML::XPath::Expr->new($self); + $and_expr->set_lhs($expr); + $and_expr->set_op('and'); + + my $rhs = EqualityExpr($self, $tokens); + + $and_expr->set_rhs($rhs); + $expr = $and_expr; + } + + return $expr; +} + +sub EqualityExpr { + my ($self, $tokens) = @_; + + debug("in SUB\n"); + + my $expr = RelationalExpr($self, $tokens); + while (match($self, $tokens, '!?=')) { + my $eq_expr = XML::XPath::Expr->new($self); + $eq_expr->set_lhs($expr); + $eq_expr->set_op($self->{_curr_match}); + + my $rhs = RelationalExpr($self, $tokens); + + $eq_expr->set_rhs($rhs); + $expr = $eq_expr; + } + + return $expr; +} + +sub RelationalExpr { + my ($self, $tokens) = @_; + + debug("in SUB\n"); + + my $expr = AdditiveExpr($self, $tokens); + while (match($self, $tokens, '(<|>|<=|>=)')) { + my $rel_expr = XML::XPath::Expr->new($self); + $rel_expr->set_lhs($expr); + $rel_expr->set_op($self->{_curr_match}); + + my $rhs = AdditiveExpr($self, $tokens); + + $rel_expr->set_rhs($rhs); + $expr = $rel_expr; + } + + return $expr; +} + +sub AdditiveExpr { + my ($self, $tokens) = @_; + + debug("in SUB\n"); + + my $expr = MultiplicativeExpr($self, $tokens); + while (match($self, $tokens, '[\\+\\-]')) { + my $add_expr = XML::XPath::Expr->new($self); + $add_expr->set_lhs($expr); + $add_expr->set_op($self->{_curr_match}); + + my $rhs = MultiplicativeExpr($self, $tokens); + + $add_expr->set_rhs($rhs); + $expr = $add_expr; + } + + return $expr; +} + +sub MultiplicativeExpr { + my ($self, $tokens) = @_; + + debug("in SUB\n"); + + my $expr = UnaryExpr($self, $tokens); + while (match($self, $tokens, '(\\*|div|mod)')) { + my $mult_expr = XML::XPath::Expr->new($self); + $mult_expr->set_lhs($expr); + $mult_expr->set_op($self->{_curr_match}); + + my $rhs = UnaryExpr($self, $tokens); + + $mult_expr->set_rhs($rhs); + $expr = $mult_expr; + } + + return $expr; +} + +sub UnaryExpr { + my ($self, $tokens) = @_; + + debug("in SUB\n"); + + if (match($self, $tokens, '-')) { + my $expr = XML::XPath::Expr->new($self); + $expr->set_lhs(XML::XPath::Number->new(0)); + $expr->set_op('-'); + $expr->set_rhs(UnaryExpr($self, $tokens)); + return $expr; + } + else { + return UnionExpr($self, $tokens); + } +} + +sub UnionExpr { + my ($self, $tokens) = @_; + + debug("in SUB\n"); + + my $expr = PathExpr($self, $tokens); + while (match($self, $tokens, '\\|')) { + my $un_expr = XML::XPath::Expr->new($self); + $un_expr->set_lhs($expr); + $un_expr->set_op('|'); + + my $rhs = PathExpr($self, $tokens); + + $un_expr->set_rhs($rhs); + $expr = $un_expr; + } + + return $expr; +} + +sub PathExpr { + my ($self, $tokens) = @_; + + debug("in SUB\n"); + + # PathExpr is LocationPath | FilterExpr | FilterExpr '//?' RelativeLocationPath + + # Since we are being predictive we need to find out which function to call next, then. + + # LocationPath either starts with "/", "//", ".", ".." or a proper Step. + + my $expr = XML::XPath::Expr->new($self); + + my $test = $tokens->[$self->{_tokpos}]; + + # Test for AbsoluteLocationPath and AbbreviatedRelativeLocationPath + if ($test =~ /^(\/\/?|\.\.?)$/) { + # LocationPath + $expr->set_lhs(LocationPath($self, $tokens)); + } + # Test for AxisName::... + elsif (is_step($self, $tokens)) { + $expr->set_lhs(LocationPath($self, $tokens)); + } + else { + # Not a LocationPath + # Use FilterExpr instead: + + $expr = FilterExpr($self, $tokens); + if (match($self, $tokens, '//?')) { + my $loc_path = XML::XPath::LocationPath->new(); + push @$loc_path, $expr; + if ($self->{_curr_match} eq '//') { + push @$loc_path, XML::XPath::Step->new($self, 'descendant-or-self', + XML::XPath::Step::test_nt_node); + } + push @$loc_path, RelativeLocationPath($self, $tokens); + my $new_expr = XML::XPath::Expr->new($self); + $new_expr->set_lhs($loc_path); + return $new_expr; + } + } + + return $expr; +} + +sub FilterExpr { + my ($self, $tokens) = @_; + + debug("in SUB\n"); + + my $expr = PrimaryExpr($self, $tokens); + while (match($self, $tokens, '\\[')) { + # really PredicateExpr... + $expr->push_predicate(Expr($self, $tokens)); + match($self, $tokens, '\\]', 1); + } + + return $expr; +} + +sub PrimaryExpr { + my ($self, $tokens) = @_; + + debug("in SUB\n"); + + my $expr = XML::XPath::Expr->new($self); + + if (match($self, $tokens, $LITERAL)) { + # new Literal with $self->{_curr_match}... + $self->{_curr_match} =~ m/^(["'])(.*)\1$/; + $expr->set_lhs(XML::XPath::Literal->new($2)); + } + elsif (match($self, $tokens, $NUMBER_RE)) { + # new Number with $self->{_curr_match}... + $expr->set_lhs(XML::XPath::Number->new($self->{_curr_match})); + } + elsif (match($self, $tokens, '\\(')) { + $expr->set_lhs(Expr($self, $tokens)); + match($self, $tokens, '\\)', 1); + } + elsif (match($self, $tokens, "\\\$$QName")) { + # new Variable with $self->{_curr_match}... + $self->{_curr_match} =~ /^\$(.*)$/; + $expr->set_lhs(XML::XPath::Variable->new($self, $1)); + } + elsif (match($self, $tokens, $QName)) { + # check match not Node_Type - done in lexer... + # new Function + my $func_name = $self->{_curr_match}; + match($self, $tokens, '\\(', 1); + $expr->set_lhs( + XML::XPath::Function->new( + $self, + $func_name, + Arguments($self, $tokens) + ) + ); + match($self, $tokens, '\\)', 1); + } + else { + die "Not a PrimaryExpr at ", $tokens->[$self->{_tokpos}], "\n"; + } + + return $expr; +} + +sub Arguments { + my ($self, $tokens) = @_; + + debug("in SUB\n"); + + my @args; + + if($tokens->[$self->{_tokpos}] eq ')') { + return \@args; + } + + push @args, Expr($self, $tokens); + while (match($self, $tokens, ',')) { + push @args, Expr($self, $tokens); + } + + return \@args; +} + +sub LocationPath { + my ($self, $tokens) = @_; + + debug("in SUB\n"); + + my $loc_path = XML::XPath::LocationPath->new(); + + if (match($self, $tokens, '/')) { + # root + debug("SUB: Matched root\n"); + push @$loc_path, XML::XPath::Root->new(); + if (is_step($self, $tokens)) { + debug("Next is step\n"); + push @$loc_path, RelativeLocationPath($self, $tokens); + } + } + elsif (match($self, $tokens, '//')) { + # root + push @$loc_path, XML::XPath::Root->new(); + my $optimised = optimise_descendant_or_self($self, $tokens); + if (!$optimised) { + push @$loc_path, XML::XPath::Step->new($self, 'descendant-or-self', + XML::XPath::Step::test_nt_node); + push @$loc_path, RelativeLocationPath($self, $tokens); + } + else { + push @$loc_path, $optimised, RelativeLocationPath($self, $tokens); + } + } + else { + push @$loc_path, RelativeLocationPath($self, $tokens); + } + + return $loc_path; +} + +sub optimise_descendant_or_self { + my ($self, $tokens) = @_; + + debug("in SUB\n"); + + my $tokpos = $self->{_tokpos}; + + # // must be followed by a Step. + if ($tokens->[$tokpos+1] && $tokens->[$tokpos+1] eq '[') { + # next token is a predicate + return; + } + elsif ($tokens->[$tokpos] =~ /^\.\.?$/) { + # abbreviatedStep - can't optimise. + return; + } + else { + debug("Trying to optimise //\n"); + my $step = Step($self, $tokens); + if ($step->{axis} ne 'child') { + # can't optimise axes other than child for now... + $self->{_tokpos} = $tokpos; + return; + } + $step->{axis} = 'descendant'; + $step->{axis_method} = 'axis_descendant'; + $self->{_tokpos}--; + $tokens->[$self->{_tokpos}] = '.'; + return $step; + } +} + +sub RelativeLocationPath { + my ($self, $tokens) = @_; + + debug("in SUB\n"); + + my @steps; + + push @steps, Step($self, $tokens); + while (match($self, $tokens, '//?')) { + if ($self->{_curr_match} eq '//') { + my $optimised = optimise_descendant_or_self($self, $tokens); + if (!$optimised) { + push @steps, XML::XPath::Step->new($self, 'descendant-or-self', + XML::XPath::Step::test_nt_node); + } + else { + push @steps, $optimised; + } + } + push @steps, Step($self, $tokens); + if (@steps > 1 && + $steps[-1]->{axis} eq 'self' && + $steps[-1]->{test} == XML::XPath::Step::test_nt_node) { + pop @steps; + } + } + + return @steps; +} + +sub Step { + my ($self, $tokens) = @_; + + debug("in SUB\n"); + + if (match($self, $tokens, '\\.')) { + # self::node() + return XML::XPath::Step->new($self, 'self', XML::XPath::Step::test_nt_node); + } + elsif (match($self, $tokens, '\\.\\.')) { + # parent::node() + return XML::XPath::Step->new($self, 'parent', XML::XPath::Step::test_nt_node); + } + else { + # AxisSpecifier NodeTest Predicate(s?) + my $token = $tokens->[$self->{_tokpos}]; + + debug("SUB: Checking $token\n"); + + my $step; + if ($token eq 'processing-instruction') { + $self->{_tokpos}++; + match($self, $tokens, '\\(', 1); + match($self, $tokens, $LITERAL); + $self->{_curr_match} =~ /^["'](.*)["']$/; + $step = XML::XPath::Step->new($self, 'child', + XML::XPath::Step::test_nt_pi, + XML::XPath::Literal->new($1)); + match($self, $tokens, '\\)', 1); + } + elsif ($token =~ /^\@($NCWild|$QName|$QNWild)$/o) { + $self->{_tokpos}++; + if ($token eq '@*') { + $step = XML::XPath::Step->new($self, + 'attribute', + XML::XPath::Step::test_attr_any, + '*'); + } + elsif ($token =~ /^\@($NCName):\*$/o) { + $step = XML::XPath::Step->new($self, + 'attribute', + XML::XPath::Step::test_attr_ncwild, + $1); + } + elsif ($token =~ /^\@($QName)$/o) { + $step = XML::XPath::Step->new($self, + 'attribute', + XML::XPath::Step::test_attr_qname, + $1); + } + } + elsif ($token =~ /^($NCName):\*$/o) { # ns:* + $self->{_tokpos}++; + $step = XML::XPath::Step->new($self, 'child', + XML::XPath::Step::test_ncwild, + $1); + } + elsif ($token =~ /^$QNWild$/o) { # * + $self->{_tokpos}++; + $step = XML::XPath::Step->new($self, 'child', + XML::XPath::Step::test_any, + $token); + } + elsif ($token =~ /^$QName$/o) { # name:name + $self->{_tokpos}++; + $step = XML::XPath::Step->new($self, 'child', + XML::XPath::Step::test_qname, + $token); + } + elsif ($token eq 'comment()') { + $self->{_tokpos}++; + $step = XML::XPath::Step->new($self, 'child', + XML::XPath::Step::test_nt_comment); + } + elsif ($token eq 'text()') { + $self->{_tokpos}++; + $step = XML::XPath::Step->new($self, 'child', + XML::XPath::Step::test_nt_text); + } + elsif ($token eq 'node()') { + $self->{_tokpos}++; + $step = XML::XPath::Step->new($self, 'child', + XML::XPath::Step::test_nt_node); + } + elsif ($token eq 'processing-instruction()') { + $self->{_tokpos}++; + $step = XML::XPath::Step->new($self, 'child', + XML::XPath::Step::test_nt_pi); + } + elsif ($token =~ /^$AXIS_NAME($NCWild|$QName|$QNWild|$NODE_TYPE)$/o) { + my $axis = $1; + $self->{_tokpos}++; + $token = $2; + if ($token eq 'processing-instruction') { + match($self, $tokens, '\\(', 1); + match($self, $tokens, $LITERAL); + $self->{_curr_match} =~ /^["'](.*)["']$/; + $step = XML::XPath::Step->new($self, $axis, + XML::XPath::Step::test_nt_pi, + XML::XPath::Literal->new($1)); + match($self, $tokens, '\\)', 1); + } + elsif ($token =~ /^($NCName):\*$/o) { # ns:* + $step = XML::XPath::Step->new($self, $axis, + (($axis eq 'attribute') ? + XML::XPath::Step::test_attr_ncwild + : + XML::XPath::Step::test_ncwild), + $1); + } + elsif ($token =~ /^$QNWild$/o) { # * + $step = XML::XPath::Step->new($self, $axis, + (($axis eq 'attribute') ? + XML::XPath::Step::test_attr_any + : + XML::XPath::Step::test_any), + $token); + } + elsif ($token =~ /^$QName$/o) { # name:name + $step = XML::XPath::Step->new($self, $axis, + (($axis eq 'attribute') ? + XML::XPath::Step::test_attr_qname + : + XML::XPath::Step::test_qname), + $token); + } + elsif ($token eq 'comment()') { + $step = XML::XPath::Step->new($self, $axis, + XML::XPath::Step::test_nt_comment); + } + elsif ($token eq 'text()') { + $step = XML::XPath::Step->new($self, $axis, + XML::XPath::Step::test_nt_text); + } + elsif ($token eq 'node()') { + $step = XML::XPath::Step->new($self, $axis, + XML::XPath::Step::test_nt_node); + } + elsif ($token eq 'processing-instruction()') { + $step = XML::XPath::Step->new($self, $axis, + XML::XPath::Step::test_nt_pi); + } + else { + die "Shouldn't get here"; + } + } + else { + die "token $token doesn't match format of a 'Step'\n"; + } + + while (match($self, $tokens, '\\[')) { + push @{$step->{predicates}}, Expr($self, $tokens); + match($self, $tokens, '\\]', 1); + } + + return $step; + } +} + +sub is_step { + my ($self, $tokens) = @_; + + my $token = $tokens->[$self->{_tokpos}]; + + return unless defined $token; + + debug("SUB: Checking if '$token' is a step\n"); + + local $^W; + + if ($token eq 'processing-instruction') { + return 1; + } + elsif ($token =~ /^\@($NCWild|$QName|$QNWild)$/o) { + return 1; + } + elsif ($token =~ /^($NCWild|$QName|$QNWild)$/o && $tokens->[$self->{_tokpos}+1] ne '(') { + return 1; + } + elsif ($token =~ /^$NODE_TYPE$/o) { + return 1; + } + elsif ($token =~ /^$AXIS_NAME($NCWild|$QName|$QNWild|$NODE_TYPE)$/o) { + return 1; + } + + debug("SUB: '$token' not a step\n"); + + return; +} + +sub debug { + return unless $XML::XPath::Debug; + + my ($pkg, $file, $line, $sub) = caller(1); + + $sub =~ s/^$pkg\:://; + + while (@_) { + my $x = shift; + $x =~ s/\bPKG\b/$pkg/g; + $x =~ s/\bLINE\b/$line/g; + $x =~ s/\bSUB\b/$sub/g; + print STDERR $x; + } +} + +1; diff --git a/XPath/PerlSAX.pm b/XPath/PerlSAX.pm new file mode 100644 index 0000000..669f101 --- /dev/null +++ b/XPath/PerlSAX.pm @@ -0,0 +1,166 @@ +# $Id: PerlSAX.pm,v 1.6 2000/02/28 10:40:21 matt Exp $ + +package XML::XPath::PerlSAX; +use XML::XPath::XMLParser; +use strict; + +sub new { + my $class = shift; + my %args = @_; + bless \%args, $class; +} + +sub parse { + my $self = shift; + + die "XML::XPath::PerlSAX: parser instance ($self) already parsing\n" + if (defined $self->{ParseOptions}); + + # If there's one arg and it's an array ref, assume it's a node we're parsing + my $args; + if (@_ == 1 && ref($_[0]) =~ /^(text|comment|element|namespace|attribute|pi)$/) { +# warn "Parsing node\n"; + my $node = shift; +# warn "PARSING: $node ", XML::XPath::XMLParser::as_string($node), "\n\n"; + $args = { Source => { Node => $node } }; + } + else { + $args = (@_ == 1) ? shift : { @_ }; + } + + my $parse_options = { %$self, %$args }; + $self->{ParseOptions} = $parse_options; + + # ensure that we have at least one source + if (!defined $parse_options->{Source} || + !defined $parse_options->{Source}{Node}) { + die "XML::XPath::PerlSAX: no source defined for parse\n"; + } + + # assign default Handler to any undefined handlers + if (defined $parse_options->{Handler}) { + $parse_options->{DocumentHandler} = $parse_options->{Handler} + if (!defined $parse_options->{DocumentHandler}); + } + + # ensure that we have a DocumentHandler + if (!defined $parse_options->{DocumentHandler}) { + die "XML::XPath::PerlSAX: no Handler or DocumentHandler defined for parse\n"; + } + + # cache DocumentHandler in self for callbacks + $self->{DocumentHandler} = $parse_options->{DocumentHandler}; + + if ((ref($parse_options->{Source}{Node}) eq 'element') && + !($parse_options->{Source}{Node}->[node_parent])) { + # Got root node + $self->{DocumentHandler}->start_document( { } ); + $self->parse_node($parse_options->{Source}{Node}); + return $self->{DocumentHandler}->end_document( { } ); + } + else { + $self->parse_node($parse_options->{Source}{Node}); + } + + # clean up parser instance + delete $self->{ParseOptions}; + delete $self->{DocumentHandler}; + +} + +sub parse_node { + my $self = shift; + my $node = shift; +# warn "parse_node $node\n"; + if (ref($node) eq 'element' && $node->[node_parent]) { + # bundle up attributes + my @attribs; + foreach my $attr (@{$node->[node_attribs]}) { + if ($attr->[node_prefix]) { + push @attribs, $attr->[node_prefix] . ":" . $attr->[node_key]; + } + else { + push @attribs, $attr->[node_key]; + } + push @attribs, $attr->[node_value]; + } + + $self->{DocumentHandler}->start_element( + { Name => $node->[node_name], + Attributes => \@attribs, + } + ); + foreach my $kid (@{$node->[node_children]}) { + $self->parse_node($kid); + } + $self->{DocumentHandler}->end_element( + { + Name => $node->[node_name], + } + ); + } + elsif (ref($node) eq 'text') { + $self->{DocumentHandler}->characters($node->[node_text]); + } + elsif (ref($node) eq 'comment') { + $self->{DocumentHandler}->comment($node->[node_comment]); + } + elsif (ref($node) eq 'pi') { + $self->{DocumentHandler}->processing_instruction( + { + Target => $node->[node_target], + Data => $node->[node_data] + } + ); + } + elsif (ref($node) eq 'element') { # root node + # just do kids + foreach my $kid (@{$node->[node_children]}) { + $self->parse_node($kid); + } + } + else { + die "Unknown node type: '", ref($node), "' ", scalar(@$node), "\n"; + } +} + +1; + +__END__ + +=head1 NAME + +XML::XPath::PerlSAX - A PerlSAX event generator for my wierd node structure + +=head1 SYNOPSIS + + use XML::XPath; + use XML::XPath::PerlSAX; + use XML::DOM::PerlSAX; + + my $xp = XML::XPath->new(filename => 'test.xhtml'); + my $paras = $xp->find('/html/body/p'); + + my $handler = XML::DOM::PerlSAX->new(); + my $generator = XML::XPath::PerlSAX->new( Handler => $handler ); + + foreach my $node ($paras->get_nodelist) { + my $domtree = $generator->parse($node); + # do something with $domtree + } + +=head1 DESCRIPTION + +This module generates PerlSAX events to pass to a PerlSAX handler such +as XML::DOM::PerlSAX. It operates specifically on my wierd tree format. + +Unfortunately SAX doesn't seem to cope with namespaces, so these are +lost completely. I believe SAX2 is doing namespaces. + +=head1 Other + +The XML::DOM::PerlSAX handler I tried was completely broken (didn't even +compile before I patched it a bit), so I don't know how correct this +is or how far it will work. + +This software may only be distributed as part of the XML::XPath package. diff --git a/XPath/Root.pm b/XPath/Root.pm new file mode 100644 index 0000000..ee5914b --- /dev/null +++ b/XPath/Root.pm @@ -0,0 +1,36 @@ +# $Id: Root.pm,v 1.6 2001/03/16 11:10:08 matt Exp $ + +package XML::XPath::Root; +use strict; +use XML::XPath::XMLParser; +use XML::XPath::NodeSet; + +sub new { + my $class = shift; + my $self; # actually don't need anything here - just a placeholder + bless \$self, $class; +} + +sub as_string { + # do nothing +} + +sub as_xml { + return "\n"; +} + +sub evaluate { + my $self = shift; + my $nodeset = shift; + +# warn "Eval ROOT\n"; + + # must only ever occur on 1 node + die "Can't go to root on > 1 node!" unless $nodeset->size == 1; + + my $newset = XML::XPath::NodeSet->new(); + $newset->push($nodeset->get_node(1)->getRootNode()); + return $newset; +} + +1; diff --git a/XPath/Step.pm b/XPath/Step.pm new file mode 100644 index 0000000..cff64a3 --- /dev/null +++ b/XPath/Step.pm @@ -0,0 +1,519 @@ +# $Id: Step.pm,v 1.35 2001/04/01 16:56:40 matt Exp $ + +package XML::XPath::Step; +use XML::XPath::Parser; +use XML::XPath::Node; +use strict; + +# the beginnings of using XS for this file... +# require DynaLoader; +# use vars qw/$VERSION @ISA/; +# $VERSION = '1.0'; +# @ISA = qw(DynaLoader); +# +# bootstrap XML::XPath::Step $VERSION; + +sub test_qname () { 0; } # Full name +sub test_ncwild () { 1; } # NCName:* +sub test_any () { 2; } # * + +sub test_attr_qname () { 3; } # @ns:attrib +sub test_attr_ncwild () { 4; } # @nc:* +sub test_attr_any () { 5; } # @* + +sub test_nt_comment () { 6; } # comment() +sub test_nt_text () { 7; } # text() +sub test_nt_pi () { 8; } # processing-instruction() +sub test_nt_node () { 9; } # node() + +sub new { + my $class = shift; + my ($pp, $axis, $test, $literal) = @_; + my $axis_method = "axis_$axis"; + $axis_method =~ tr/-/_/; + my $self = { + pp => $pp, # the XML::XPath::Parser class + axis => $axis, + axis_method => $axis_method, + test => $test, + literal => $literal, + predicates => [], + }; + bless $self, $class; +} + +sub as_string { + my $self = shift; + my $string = $self->{axis} . "::"; + + my $test = $self->{test}; + + if ($test == test_nt_pi) { + $string .= 'processing-instruction('; + if ($self->{literal}->value) { + $string .= $self->{literal}->as_string; + } + $string .= ")"; + } + elsif ($test == test_nt_comment) { + $string .= 'comment()'; + } + elsif ($test == test_nt_text) { + $string .= 'text()'; + } + elsif ($test == test_nt_node) { + $string .= 'node()'; + } + elsif ($test == test_ncwild || $test == test_attr_ncwild) { + $string .= $self->{literal} . ':*'; + } + else { + $string .= $self->{literal}; + } + + foreach (@{$self->{predicates}}) { + next unless defined $_; + $string .= "[" . $_->as_string . "]"; + } + return $string; +} + +sub as_xml { + my $self = shift; + my $string = "\n"; + $string .= "" . $self->{axis} . "\n"; + my $test = $self->{test}; + + $string .= ""; + + if ($test == test_nt_pi) { + $string .= '{literal}->value) { + $string .= '>'; + $string .= $self->{literal}->as_string; + $string .= ''; + } + else { + $string .= '/>'; + } + } + elsif ($test == test_nt_comment) { + $string .= ''; + } + elsif ($test == test_nt_text) { + $string .= ''; + } + elsif ($test == test_nt_node) { + $string .= ''; + } + elsif ($test == test_ncwild || $test == test_attr_ncwild) { + $string .= '' . $self->{literal} . ''; + } + else { + $string .= '' . $self->{literal} . ''; + } + + $string .= "\n"; + + foreach (@{$self->{predicates}}) { + next unless defined $_; + $string .= "\n" . $_->as_xml() . "\n"; + } + + $string .= "\n"; + + return $string; +} + +sub evaluate { + my $self = shift; + my $from = shift; # context nodeset + +# warn "Step::evaluate called with ", $from->size, " length nodeset\n"; + + $self->{pp}->set_context_set($from); + + my $initial_nodeset = XML::XPath::NodeSet->new(); + + # See spec section 2.1, paragraphs 3,4,5: + # The node-set selected by the location step is the node-set + # that results from generating an initial node set from the + # axis and node-test, and then filtering that node-set by + # each of the predicates in turn. + + # Make each node in the nodeset be the context node, one by one + for(my $i = 1; $i <= $from->size; $i++) { + $self->{pp}->set_context_pos($i); + $initial_nodeset->append($self->evaluate_node($from->get_node($i))); + } + +# warn "Step::evaluate initial nodeset size: ", $initial_nodeset->size, "\n"; + + $self->{pp}->set_context_set(undef); + + $initial_nodeset->sort; + + return $initial_nodeset; +} + +# Evaluate the step against a particular node +sub evaluate_node { + my $self = shift; + my $context = shift; + +# warn "Evaluate node: $self->{axis}\n"; + +# warn "Node: ", $context->[node_name], "\n"; + + my $method = $self->{axis_method}; + + my $results = XML::XPath::NodeSet->new(); + no strict 'refs'; + eval { + $method->($self, $context, $results); + }; + if ($@) { + die "axis $method not implemented [$@]\n"; + } + +# warn("results: ", join('><', map {$_->string_value} @$results), "\n"); + # filter initial nodeset by each predicate + foreach my $predicate (@{$self->{predicates}}) { + $results = $self->filter_by_predicate($results, $predicate); + } + + return $results; +} + +sub axis_ancestor { + my $self = shift; + my ($context, $results) = @_; + + my $parent = $context->getParentNode; + + START: + return $results unless $parent; + if (node_test($self, $parent)) { + $results->push($parent); + } + $parent = $parent->getParentNode; + goto START; +} + +sub axis_ancestor_or_self { + my $self = shift; + my ($context, $results) = @_; + + START: + return $results unless $context; + if (node_test($self, $context)) { + $results->push($context); + } + $context = $context->getParentNode; + goto START; +} + +sub axis_attribute { + my $self = shift; + my ($context, $results) = @_; + + foreach my $attrib (@{$context->getAttributes}) { + if ($self->test_attribute($attrib)) { + $results->push($attrib); + } + } +} + +sub axis_child { + my $self = shift; + my ($context, $results) = @_; + + foreach my $node (@{$context->getChildNodes}) { + if (node_test($self, $node)) { + $results->push($node); + } + } +} + +sub axis_descendant { + my $self = shift; + my ($context, $results) = @_; + + my @stack = $context->getChildNodes; + + while (@stack) { + my $node = pop @stack; + if (node_test($self, $node)) { + $results->unshift($node); + } + push @stack, $node->getChildNodes; + } +} + +sub axis_descendant_or_self { + my $self = shift; + my ($context, $results) = @_; + + my @stack = ($context); + + while (@stack) { + my $node = pop @stack; + if (node_test($self, $node)) { + $results->unshift($node); + } + push @stack, $node->getChildNodes; + } +} + +sub axis_following { + my $self = shift; + my ($context, $results) = @_; + + START: + + my $parent = $context->getParentNode; + return $results unless $parent; + + while ($context = $context->getNextSibling) { + axis_descendant_or_self($self, $context, $results); + } + + $context = $parent; + goto START; +} + +sub axis_following_sibling { + my $self = shift; + my ($context, $results) = @_; + + while ($context = $context->getNextSibling) { + if (node_test($self, $context)) { + $results->push($context); + } + } +} + +sub axis_namespace { + my $self = shift; + my ($context, $results) = @_; + + return $results unless $context->isElementNode; + foreach my $ns (@{$context->getNamespaces}) { + if ($self->test_namespace($ns)) { + $results->push($ns); + } + } +} + +sub axis_parent { + my $self = shift; + my ($context, $results) = @_; + + my $parent = $context->getParentNode; + return $results unless $parent; + if (node_test($self, $parent)) { + $results->push($parent); + } +} + +sub axis_preceding { + my $self = shift; + my ($context, $results) = @_; + + # all preceding nodes in document order, except ancestors + + START: + + my $parent = $context->getParentNode; + return $results unless $parent; + + while ($context = $context->getPreviousSibling) { + axis_descendant_or_self($self, $context, $results); + } + + $context = $parent; + goto START; +} + +sub axis_preceding_sibling { + my $self = shift; + my ($context, $results) = @_; + + while ($context = $context->getPreviousSibling) { + if (node_test($self, $context)) { + $results->push($context); + } + } +} + +sub axis_self { + my $self = shift; + my ($context, $results) = @_; + + if (node_test($self, $context)) { + $results->push($context); + } +} + +sub node_test { + my $self = shift; + my $node = shift; + + # if node passes test, return true + + my $test = $self->{test}; + + return 1 if $test == test_nt_node; + + if ($test == test_any) { + return 1 if $node->isElementNode && defined $node->getName; + } + + local $^W; + + if ($test == test_ncwild) { + return unless $node->isElementNode; + my $match_ns = $self->{pp}->get_namespace($self->{literal}, $node); + if (my $node_nsnode = $node->getNamespace()) { + return 1 if $match_ns eq $node_nsnode->getValue; + } + } + elsif ($test == test_qname) { + return unless $node->isElementNode; + if ($self->{literal} =~ /:/) { + my ($prefix, $name) = split(':', $self->{literal}, 2); + my $match_ns = $self->{pp}->get_namespace($prefix, $node); + if (my $node_nsnode = $node->getNamespace()) { +# warn "match: '$self->{literal}' match NS: '$match_ns' got NS: '", $node_nsnode->getValue, "'\n"; + return 1 if ($match_ns eq $node_nsnode->getValue) && + ($name eq $node->getLocalName); + } + } + else { +# warn "Node test: ", $node->getName, "\n"; + return 1 if $node->getName eq $self->{literal}; + } + } + elsif ($test == test_nt_text) { + return 1 if $node->isTextNode; + } + elsif ($test == test_nt_comment) { + return 1 if $node->isCommentNode; + } +# elsif ($test == test_nt_pi && !$self->{literal}) { +# warn "Unreachable code???"; +# return 1 if $node->isPINode; +# } + elsif ($test == test_nt_pi) { + return unless $node->isPINode; + if (my $val = $self->{literal}->value) { + return 1 if $node->getTarget eq $val; + } + else { + return 1; + } + } + + return; # fallthrough returns false +} + +sub test_attribute { + my $self = shift; + my $node = shift; + +# warn "test_attrib: '$self->{test}' against: ", $node->getName, "\n"; +# warn "node type: $node->[node_type]\n"; + + my $test = $self->{test}; + + return 1 if ($test == test_attr_any) || ($test == test_nt_node); + + if ($test == test_attr_ncwild) { + my $match_ns = $self->{pp}->get_namespace($self->{literal}, $node); + if (my $node_nsnode = $node->getNamespace()) { + return 1 if $match_ns eq $node_nsnode->getValue; + } + } + elsif ($test == test_attr_qname) { + if ($self->{literal} =~ /:/) { + my ($prefix, $name) = split(':', $self->{literal}, 2); + my $match_ns = $self->{pp}->get_namespace($prefix, $node); + if (my $node_nsnode = $node->getNamespace()) { + return 1 if ($match_ns eq $node_nsnode->getValue) && + ($name eq $node->getLocalName); + } + } + else { + return 1 if $node->getName eq $self->{literal}; + } + } + + return; # fallthrough returns false +} + +sub test_namespace { + my $self = shift; + my $node = shift; + + # Not sure if this is correct. The spec seems very unclear on what + # constitutes a namespace test... bah! + + my $test = $self->{test}; + + return 1 if $test == test_any; # True for all nodes of principal type + + if ($test == test_any) { + return 1; + } + elsif ($self->{literal} eq $node->getExpanded) { + return 1; + } + + return; +} + +sub filter_by_predicate { + my $self = shift; + my ($nodeset, $predicate) = @_; + + # See spec section 2.4, paragraphs 2 & 3: + # For each node in the node-set to be filtered, the predicate Expr + # is evaluated with that node as the context node, with the number + # of nodes in the node set as the context size, and with the + # proximity position of the node in the node set with respect to + # the axis as the context position. + + if (!ref($nodeset)) { # use ref because nodeset has a bool context + die "No nodeset!!!"; + } + +# warn "Filter by predicate: $predicate\n"; + + my $newset = XML::XPath::NodeSet->new(); + + for(my $i = 1; $i <= $nodeset->size; $i++) { + # set context set each time 'cos a loc-path in the expr could change it + $self->{pp}->set_context_set($nodeset); + $self->{pp}->set_context_pos($i); + my $result = $predicate->evaluate($nodeset->get_node($i)); + if ($result->isa('XML::XPath::Boolean')) { + if ($result->value) { + $newset->push($nodeset->get_node($i)); + } + } + elsif ($result->isa('XML::XPath::Number')) { + if ($result->value == $i) { + $newset->push($nodeset->get_node($i)); + } + } + else { + if ($result->to_boolean->value) { + $newset->push($nodeset->get_node($i)); + } + } + } + + return $newset; +} + +1; diff --git a/XPath/Variable.pm b/XPath/Variable.pm new file mode 100644 index 0000000..9c8b59e --- /dev/null +++ b/XPath/Variable.pm @@ -0,0 +1,43 @@ +# $Id: Variable.pm,v 1.5 2001/03/16 11:10:08 matt Exp $ + +package XML::XPath::Variable; +use strict; + +# This class does NOT contain 1 instance of a variable +# see the XML::XPath::Parser class for the instances +# This class simply holds the name of the var + +sub new { + my $class = shift; + my ($pp, $name) = @_; + bless { name => $name, path_parser => $pp }, $class; +} + +sub as_string { + my $self = shift; + '\$' . $self->{name}; +} + +sub as_xml { + my $self = shift; + return "" . $self->{name} . "\n"; +} + +sub get_value { + my $self = shift; + $self->{path_parser}->get_var($self->{name}); +} + +sub set_value { + my $self = shift; + my ($val) = @_; + $self->{path_parser}->set_var($self->{name}, $val); +} + +sub evaluate { + my $self = shift; + my $val = $self->get_value; + return $val; +} + +1; diff --git a/XPath/XMLParser.pm b/XPath/XMLParser.pm new file mode 100644 index 0000000..78664d3 --- /dev/null +++ b/XPath/XMLParser.pm @@ -0,0 +1,385 @@ +# $Id: XMLParser.pm,v 1.49 2001/03/14 17:13:57 matt Exp $ + +package XML::XPath::XMLParser; + +use strict; + +use XML::Parser; +#use XML::XPath; +use XML::XPath::Node; +use XML::XPath::Node::Element; +use XML::XPath::Node::Text; +use XML::XPath::Node::Comment; +use XML::XPath::Node::PI; +use XML::XPath::Node::Attribute; +use XML::XPath::Node::Namespace; + +my @options = qw( + filename + xml + parser + ioref + ); + +my ($_current, $_namespaces_on); +my %IdNames; + +use vars qw/$xmlns_ns $xml_ns/; + +$xmlns_ns = "http://www.w3.org/2000/xmlns/"; +$xml_ns = "http://www.w3.org/XML/1998/namespace"; + +sub new { + my $proto = shift; + my $class = ref($proto) || $proto; + my %args = @_; + my %hash = map(( "_$_" => $args{$_} ), @options); + bless \%hash, $class; +} + +sub parse { + my $self = shift; + + $self->{IdNames} = {}; + $self->{InScopeNamespaceStack} = [ { + '_Default' => undef, + 'xmlns' => $xmlns_ns, + 'xml' => $xml_ns, + } ]; + + $self->{NodeStack} = [ ]; + + $self->set_xml($_[0]) if $_[0]; + + my $parser = $self->get_parser || XML::Parser->new( + ErrorContext => 2, + ParseParamEnt => 1, + ); + + $parser->setHandlers( + Init => sub { $self->parse_init(@_) }, + Char => sub { $self->parse_char(@_) }, + Start => sub { $self->parse_start(@_) }, + End => sub { $self->parse_end(@_) }, + Final => sub { $self->parse_final(@_) }, + Proc => sub { $self->parse_pi(@_) }, + Comment => sub { $self->parse_comment(@_) }, + Attlist => sub { $self->parse_attlist(@_) }, + ); + + my $toparse; + if ($toparse = $self->get_filename) { + return $parser->parsefile($toparse); + } + else { + return $parser->parse($self->get_xml || $self->get_ioref); + } +} + +sub parsefile { + my $self = shift; + my ($filename) = @_; + $self->set_filename($filename); + $self->parse; +} + +sub parse_init { + my $self = shift; + my $e = shift; + my $document = XML::XPath::Node::Element->new(); + my $newns = XML::XPath::Node::Namespace->new('xml', $xml_ns); + $document->appendNamespace($newns); + $self->{current} = $self->{DOC_Node} = $document; +} + +sub parse_final { + my $self = shift; + return $self->{DOC_Node}; +} + +sub parse_char { + my $self = shift; + my $e = shift; + my $text = shift; + + my $parent = $self->{current}; + + my $last = $parent->getLastChild; + if ($last && $last->isTextNode) { + # append to previous text node + $last->appendText($text); + return; + } + + my $node = XML::XPath::Node::Text->new($text); + $parent->appendChild($node, 1); +} + +sub parse_start { + my $self = shift; + my $e = shift; + my $tag = shift; + + push @{ $self->{InScopeNamespaceStack} }, + { %{ $self->{InScopeNamespaceStack}[-1] } }; + $self->_scan_namespaces(@_); + + my ($prefix, $namespace) = $self->_namespace($tag); + + my $node = XML::XPath::Node::Element->new($tag, $prefix); + + my @attributes; + for (my $ii = 0; $ii < $#_; $ii += 2) { + my ($name, $value) = ($_[$ii], $_[$ii+1]); + if ($name =~ /^xmlns(:(.*))?$/) { + # namespace node + my $prefix = $2 || '#default'; +# warn "Creating NS node: $prefix = $value\n"; + my $newns = XML::XPath::Node::Namespace->new($prefix, $value); + $node->appendNamespace($newns); + } + else { + my ($prefix, $namespace) = $self->_namespace($name); + undef $namespace unless $prefix; + + my $newattr = XML::XPath::Node::Attribute->new($name, $value, $prefix); + $node->appendAttribute($newattr, 1); + if (exists($self->{IdNames}{$tag}) && ($self->{IdNames}{$tag} eq $name)) { + # warn "appending Id Element: $val for ", $node->getName, "\n"; + $self->{DOC_Node}->appendIdElement($value, $node); + } + } + } + + $self->{current}->appendChild($node, 1); + $self->{current} = $node; +} + +sub parse_end { + my $self = shift; + my $e = shift; + $self->{current} = $self->{current}->getParentNode; +} + +sub parse_pi { + my $self = shift; + my $e = shift; + my ($target, $data) = @_; + my $node = XML::XPath::Node::PI->new($target, $data); + $self->{current}->appendChild($node, 1); +} + +sub parse_comment { + my $self = shift; + my $e = shift; + my ($data) = @_; + my $node = XML::XPath::Node::Comment->new($data); + $self->{current}->appendChild($node, 1); +} + +sub parse_attlist { + my $self = shift; + my $e = shift; + my ($elname, $attname, $type, $default, $fixed) = @_; + if ($type eq 'ID') { + $self->{IdNames}{$elname} = $attname; + } +} + +sub _scan_namespaces { + my ($self, %attributes) = @_; + + while (my ($attr_name, $value) = each %attributes) { + if ($attr_name eq 'xmlns') { + $self->{InScopeNamespaceStack}[-1]{'_Default'} = $value; + } elsif ($attr_name =~ /^xmlns:(.*)$/) { + my $prefix = $1; + $self->{InScopeNamespaceStack}[-1]{$prefix} = $value; + } + } +} + +sub _namespace { + my ($self, $name) = @_; + + my ($prefix, $localname) = split(/:/, $name); + if (!defined($localname)) { + if ($prefix eq 'xmlns') { + return '', undef; + } else { + return '', $self->{InScopeNamespaceStack}[-1]{'_Default'}; + } + } else { + return $prefix, $self->{InScopeNamespaceStack}[-1]{$prefix}; + } +} + +sub as_string { + my $node = shift; + $node->toString; +} + +sub get_parser { shift->{_parser}; } +sub get_filename { shift->{_filename}; } +sub get_xml { shift->{_xml}; } +sub get_ioref { shift->{_ioref}; } + +sub set_parser { $_[0]->{_parser} = $_[1]; } +sub set_filename { $_[0]->{_filename} = $_[1]; } +sub set_xml { $_[0]->{_xml} = $_[1]; } +sub set_ioref { $_[0]->{_ioref} = $_[1]; } + +1; + +__END__ + +=head1 NAME + +XML::XPath::XMLParser - The default XML parsing class that produces a node tree + +=head1 SYNOPSIS + + my $parser = XML::XPath::XMLParser->new( + filename => $self->get_filename, + xml => $self->get_xml, + ioref => $self->get_ioref, + parser => $self->get_parser, + ); + my $root_node = $parser->parse; + +=head1 DESCRIPTION + +This module generates a node tree for use as the context node for XPath processing. +It aims to be a quick parser, nothing fancy, and yet has to store more information +than most parsers. To achieve this I've used array refs everywhere - no hashes. +I don't have any performance figures for the speedups achieved, so I make no +appologies for anyone not used to using arrays instead of hashes. I think they +make good sense here where we know the attributes of each type of node. + +=head1 Node Structure + +All nodes have the same first 2 entries in the array: node_parent +and node_pos. The type of the node is determined using the ref() function. +The node_parent always contains an entry for the parent of the current +node - except for the root node which has undef in there. And node_pos is the +position of this node in the array that it is in (think: +$node == $node->[node_parent]->[node_children]->[$node->[node_pos]] ) + +Nodes are structured as follows: + +=head2 Root Node + +The root node is just an element node with no parent. + + [ + undef, # node_parent - check for undef to identify root node + undef, # node_pos + undef, # node_prefix + [ ... ], # node_children (see below) + ] + +=head2 Element Node + + [ + $parent, # node_parent + , # node_pos + 'xxx', # node_prefix - namespace prefix on this element + [ ... ], # node_children + 'yyy', # node_name - element tag name + [ ... ], # node_attribs - attributes on this element + [ ... ], # node_namespaces - namespaces currently in scope + ] + +=head2 Attribute Node + + [ + $parent, # node_parent - the element node + , # node_pos + 'xxx', # node_prefix - namespace prefix on this element + 'href', # node_key - attribute name + 'ftp://ftp.com/', # node_value - value in the node + ] + +=head2 Namespace Nodes + +Each element has an associated set of namespace nodes that are currently +in scope. Each namespace node stores a prefix and the expanded name (retrieved +from the xmlns:prefix="..." attribute). + + [ + $parent, + , + 'a', # node_prefix - the namespace as it was written as a prefix + 'http://my.namespace.com', # node_expanded - the expanded name. + ] + +=head2 Text Nodes + + [ + $parent, + , + 'This is some text' # node_text - the text in the node + ] + +=head2 Comment Nodes + + [ + $parent, + , + 'This is a comment' # node_comment + ] + +=head2 Processing Instruction Nodes + + [ + $parent, + , + 'target', # node_target + 'data', # node_data + ] + +=head1 Usage + +If you feel the need to use this module outside of XML::XPath (for example +you might use this module directly so that you can cache parsed trees), you +can follow the following API: + +=head2 new + +The new method takes either no parameters, or any of the following parameters: + + filename + xml + parser + ioref + +This uses the familiar hash syntax, so an example might be: + + use XML::XPath::XMLParser; + + my $parser = XML::XPath::XMLParser->new(filename => 'example.xml'); + +The parameters represent a filename, a string containing XML, an XML::Parser +instance and an open filehandle ref respectively. You can also set or get all +of these properties using the get_ and set_ functions that have the same +name as the property: e.g. get_filename, set_ioref, etc. + +=head2 parse + +The parse method generally takes no parameters, however you are free to +pass either an open filehandle reference or an XML string if you so require. +The return value is a tree that XML::XPath can use. The parse method will +die if there is an error in your XML, so be sure to use perl's exception +handling mechanism (eval{};) if you want to avoid this. + +=head2 parsefile + +The parsefile method is identical to parse() except it expects a single +parameter that is a string naming a file to open and parse. Again it +returns a tree and also dies if there are XML errors. + +=head1 NOTICES + +This file is distributed as part of the XML::XPath module, and is copyright +2000 Fastnet Software Ltd. Please see the documentation for the module as a +whole for licencing information. diff --git a/examples/test.xml b/examples/test.xml new file mode 100644 index 0000000..7a26967 --- /dev/null +++ b/examples/test.xml @@ -0,0 +1,34 @@ + + + + + Matt + Sergeant + + Development IT + + + NextRule1 + NextRule2 + + + + 0.00 + 0.00 + 7.75 + 8.75 + 7.75 + 6.5 + 0.00 + + + 0.00 + 7.75 + 0.00 + 0.00 + 0.00 + 0.00 + 0.00 + + + diff --git a/examples/xpath b/examples/xpath new file mode 100755 index 0000000..bb7f0fc --- /dev/null +++ b/examples/xpath @@ -0,0 +1,83 @@ +#!/usr/bin/perl -w +use strict; + +$| = 1; + +unless (@ARGV >= 1) { + print STDERR qq(Usage: +$0 [filename] query + + If no filename is given, supply XML on STDIN. +); + exit; +} + +use XML::XPath; + +my $xpath; + +my $pipeline; + +if ($ARGV[0] eq '-p') { + # pipeline mode + $pipeline = 1; + shift @ARGV; +} +if (@ARGV >= 2) { + $xpath = XML::XPath->new(filename => shift(@ARGV)); +} +else { + $xpath = XML::XPath->new(ioref => \*STDIN); +} + +my $nodes = $xpath->find(shift @ARGV); + +unless ($nodes->isa('XML::XPath::NodeSet')) { +NOTNODES: + print STDERR "Query didn't return a nodeset. Value: "; + print $nodes->value, "\n"; + exit; +} + +if ($pipeline) { + $nodes = find_more($nodes); + goto NOTNODES unless $nodes->isa('XML::XPath::NodeSet'); +} + +if ($nodes->size) { + print STDERR "Found ", $nodes->size, " nodes:\n"; + foreach my $node ($nodes->get_nodelist) { + print STDERR "-- NODE --\n"; + print $node->toString; + } +} +else { + print STDERR "No nodes found"; +} + +print STDERR "\n"; + +exit; + +sub find_more { + my ($nodes) = @_; + if (!@ARGV) { + return $nodes; + } + + my $newnodes = XML::XPath::NodeSet->new; + + my $find = shift @ARGV; + + foreach my $node ($nodes->get_nodelist) { + my $new = $xpath->find($find, $node); + if ($new->isa('XML::XPath::NodeSet')) { + $newnodes->append($new); + } + else { + warn "Not a nodeset: ", $new->value, "\n"; + } + } + + return find_more($newnodes); +} diff --git a/t/01basic.t b/t/01basic.t new file mode 100644 index 0000000..1c68c12 --- /dev/null +++ b/t/01basic.t @@ -0,0 +1,33 @@ +use Test; +BEGIN { plan tests => 5 } + +use XML::XPath; + +ok(1); +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @root = $xp->findnodes('/AAA'); +ok(@root, 1); + +my @ccc = $xp->findnodes('/AAA/CCC'); +ok(@ccc, 3); + +my @bbb = $xp->findnodes('/AAA/DDD/BBB'); +ok(@bbb, 2); + +__DATA__ + + + + + + + + + + Text + + + + diff --git a/t/02descendant.t b/t/02descendant.t new file mode 100644 index 0000000..6c83e46 --- /dev/null +++ b/t/02descendant.t @@ -0,0 +1,23 @@ +use Test; +BEGIN { plan tests => 4 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @bbb = $xp->findnodes('//BBB'); +ok(@bbb, 5); + +my @subbbb = $xp->findnodes('//DDD/BBB'); +ok(@subbbb, 3); + +__DATA__ + + + + + + + diff --git a/t/03star.t b/t/03star.t new file mode 100644 index 0000000..8fc6a4e --- /dev/null +++ b/t/03star.t @@ -0,0 +1,26 @@ +use Test; +BEGIN { plan tests => 5 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; + +@nodes = $xp->findnodes('/AAA/CCC/DDD/*'); +ok(@nodes, 4); + +@nodes = $xp->findnodes('/*/*/*/BBB'); +ok(@nodes, 5); + +@nodes = $xp->findnodes('//*'); +ok(@nodes, 17); + +__DATA__ + + + + + diff --git a/t/04pos.t b/t/04pos.t new file mode 100644 index 0000000..8552ab2 --- /dev/null +++ b/t/04pos.t @@ -0,0 +1,22 @@ +use Test; +BEGIN { plan tests => 4 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my $first = $xp->findvalue('/AAA/BBB[1]/@id'); +ok($first, "first"); + +my $last = $xp->findvalue('/AAA/BBB[last()]/@id'); +ok($last, "last"); + +__DATA__ + + + + + + diff --git a/t/05attrib.t b/t/05attrib.t new file mode 100644 index 0000000..62c7370 --- /dev/null +++ b/t/05attrib.t @@ -0,0 +1,28 @@ +use Test; +BEGIN { plan tests => 6 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @ids = $xp->findnodes('//BBB[@id]'); +ok(@ids, 2); + +my @names = $xp->findnodes('//BBB[@name]'); +ok(@names, 1); + +my @attribs = $xp->findnodes('//BBB[@*]'); +ok(@attribs, 3); + +my @noattribs = $xp->findnodes('//BBB[not(@*)]'); +ok(@noattribs, 1); + +__DATA__ + + + + + + diff --git a/t/06attrib_val.t b/t/06attrib_val.t new file mode 100644 index 0000000..b659bf5 --- /dev/null +++ b/t/06attrib_val.t @@ -0,0 +1,25 @@ +use Test; +BEGIN { plan tests => 5 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; +@nodes = $xp->findnodes('//BBB[@id = "b1"]'); +ok(@nodes, 1); + +@nodes = $xp->findnodes('//BBB[@name = "bbb"]'); +ok(@nodes, 1); + +@nodes = $xp->findnodes('//BBB[normalize-space(@name) = "bbb"]'); +ok(@nodes, 2); + +__DATA__ + + + + + diff --git a/t/07count.t b/t/07count.t new file mode 100644 index 0000000..6c0cae9 --- /dev/null +++ b/t/07count.t @@ -0,0 +1,27 @@ +use Test; +BEGIN { plan tests => 7 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; +@nodes = $xp->findnodes('//*[count(BBB) = 2]'); +ok($nodes[0]->getName, "DDD"); + +@nodes = $xp->findnodes('//*[count(*) = 2]'); +ok(@nodes, 2); + +@nodes = $xp->findnodes('//*[count(*) = 3]'); +ok(@nodes, 2); +ok($nodes[0]->getName, "AAA"); +ok($nodes[1]->getName, "CCC"); + +__DATA__ + + + + + diff --git a/t/08name.t b/t/08name.t new file mode 100644 index 0000000..15eb7fb --- /dev/null +++ b/t/08name.t @@ -0,0 +1,25 @@ +use Test; +BEGIN { plan tests => 5 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; +@nodes = $xp->findnodes('//*[name() = "BBB"]'); +ok(@nodes, 5); + +@nodes = $xp->findnodes('//*[starts-with(name(), "B")]'); +ok(@nodes, 7); + +@nodes = $xp->findnodes('//*[contains(name(), "C")]'); +ok(@nodes, 3); + +__DATA__ + + + + + diff --git a/t/09a_string_length.t b/t/09a_string_length.t new file mode 100644 index 0000000..0a1b806 --- /dev/null +++ b/t/09a_string_length.t @@ -0,0 +1,30 @@ +use Test; +BEGIN { plan tests => 5 } + +use XML::XPath; + +my $doc_one = qq|para one|; + +my $xp = XML::XPath->new(xml => $doc_one); +ok($xp); + +my $doc_one_chars = $xp->find('string-length(/doc/text())'); +ok($doc_one_chars == 0, 1); + +my $doc_two = qq| + + para one has bold text + +|; + +$xp = undef; + +$xp = XML::XPath->new(xml => $doc_two); +ok($xp); + +my $doc_two_chars = $xp->find('string-length(/doc/text())'); +ok($doc_two_chars == 3, 1); + +my $doc_two_para_chars = $xp->find('string-length(/doc/para/text())'); +ok($doc_two_para_chars == 13, 1); + diff --git a/t/09string_length.t b/t/09string_length.t new file mode 100644 index 0000000..b08bee4 --- /dev/null +++ b/t/09string_length.t @@ -0,0 +1,28 @@ +use Test; +BEGIN { plan tests => 5 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; +@nodes = $xp->findnodes('//*[string-length(name()) = 3]'); +ok(@nodes, 2); + +@nodes = $xp->findnodes('//*[string-length(name()) < 3]'); +ok(@nodes, 2); + +@nodes = $xp->findnodes('//*[string-length(name()) > 3]'); +ok(@nodes, 3); + +__DATA__ + + + + + + + + diff --git a/t/10pipe.t b/t/10pipe.t new file mode 100644 index 0000000..85a2677 --- /dev/null +++ b/t/10pipe.t @@ -0,0 +1,27 @@ +use Test; +BEGIN { plan tests => 6, todo => [] } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; +@nodes = $xp->findnodes('//CCC | //BBB'); +ok(@nodes, 3); +ok($nodes[0]->getName, "BBB"); # test document order + +@nodes = $xp->findnodes('/AAA/EEE | //BBB'); +ok(@nodes, 2); + +@nodes = $xp->findnodes('/AAA/EEE | //DDD/CCC | /AAA | //BBB'); +ok(@nodes, 4); + +__DATA__ + + + + + + diff --git a/t/11axischild.t b/t/11axischild.t new file mode 100644 index 0000000..58058a1 --- /dev/null +++ b/t/11axischild.t @@ -0,0 +1,18 @@ +use Test; +BEGIN { plan tests => 6 } + +use XML::XPath::Parser; + +ok(1); + +my $xp = XML::XPath::Parser->new(); +ok($xp); + +ok($xp->parse('/AAA')->as_string, "(/child::AAA)"); + +ok($xp->parse('/AAA/BBB')->as_string, "(/child::AAA/child::BBB)"); + +ok($xp->parse('/child::AAA/child::BBB')->as_string, + "(/child::AAA/child::BBB)"); + +ok($xp->parse('/child::AAA/BBB')->as_string, "(/child::AAA/child::BBB)"); diff --git a/t/12axisdescendant.t b/t/12axisdescendant.t new file mode 100644 index 0000000..e08992b --- /dev/null +++ b/t/12axisdescendant.t @@ -0,0 +1,27 @@ +use Test; +BEGIN { plan tests => 6 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; +@nodes = $xp->findnodes('/descendant::*'); +ok(@nodes, 11); + +@nodes = $xp->findnodes('/AAA/BBB/descendant::*'); +ok(@nodes, 4); + +@nodes = $xp->findnodes('//CCC/descendant::*'); +ok(@nodes, 6); + +@nodes = $xp->findnodes('//CCC/descendant::DDD'); +ok(@nodes, 3); + +__DATA__ + + + + diff --git a/t/13axisparent.t b/t/13axisparent.t new file mode 100644 index 0000000..21d7706 --- /dev/null +++ b/t/13axisparent.t @@ -0,0 +1,19 @@ +use Test; +BEGIN { plan tests => 4 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; +@nodes = $xp->findnodes('//DDD/parent::*'); +ok(@nodes, 4); +ok($nodes[3]->getName, "EEE"); + +__DATA__ + + + + diff --git a/t/14axisancestor.t b/t/14axisancestor.t new file mode 100644 index 0000000..c10e5b5 --- /dev/null +++ b/t/14axisancestor.t @@ -0,0 +1,22 @@ +use Test; +BEGIN { plan tests => 5 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; +@nodes = $xp->findnodes('/AAA/BBB/DDD/CCC/EEE/ancestor::*'); +ok(@nodes, 4); +ok($nodes[1]->getName, "BBB"); # test document order + +@nodes = $xp->findnodes('//FFF/ancestor::*'); +ok(@nodes, 5); + +__DATA__ + + + + diff --git a/t/15axisfol_sib.t b/t/15axisfol_sib.t new file mode 100644 index 0000000..494f326 --- /dev/null +++ b/t/15axisfol_sib.t @@ -0,0 +1,24 @@ +use Test; +BEGIN { plan tests => 6 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; +@nodes = $xp->findnodes('/AAA/BBB/following-sibling::*'); +ok(@nodes, 2); +ok($nodes[1]->getName, "CCC"); # test document order + +@nodes = $xp->findnodes('//CCC/following-sibling::*'); +ok(@nodes, 3); +ok($nodes[1]->getName, "FFF"); + +__DATA__ + + + + + diff --git a/t/16axisprec_sib.t b/t/16axisprec_sib.t new file mode 100644 index 0000000..b1bbc6e --- /dev/null +++ b/t/16axisprec_sib.t @@ -0,0 +1,44 @@ +use Test; +BEGIN { plan tests => 7 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; +@nodes = $xp->findnodes('/AAA/XXX/preceding-sibling::*'); +ok(@nodes, 1); +ok($nodes[0]->getName, "BBB"); + +@nodes = $xp->findnodes('//CCC/preceding-sibling::*'); +ok(@nodes, 4); + +@nodes = $xp->findnodes('/AAA/CCC/preceding-sibling::*[1]'); +ok($nodes[0]->getName, "XXX"); + +@nodes = $xp->findnodes('/AAA/CCC/preceding-sibling::*[2]'); +ok($nodes[0]->getName, "BBB"); + +__DATA__ + + + + + + + + + + + + + + + + + + + + diff --git a/t/17axisfollowing.t b/t/17axisfollowing.t new file mode 100644 index 0000000..e85322c --- /dev/null +++ b/t/17axisfollowing.t @@ -0,0 +1,45 @@ +use Test; +BEGIN { plan tests => 4 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; +@nodes = $xp->findnodes('/AAA/XXX/following::*'); +ok(@nodes, 2); + +@nodes = $xp->findnodes('//ZZZ/following::*'); +ok(@nodes, 12); + +__DATA__ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/t/18axispreceding.t b/t/18axispreceding.t new file mode 100644 index 0000000..97e4e18 --- /dev/null +++ b/t/18axispreceding.t @@ -0,0 +1,39 @@ +use Test; +BEGIN { plan tests => 4 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; +@nodes = $xp->findnodes('/AAA/XXX/preceding::*'); +ok(@nodes, 4); + +@nodes = $xp->findnodes('//GGG/preceding::*'); +ok(@nodes, 8); + +__DATA__ + + + + + + + + + + + + + + + + + + + + + + diff --git a/t/19axisd_or_s.t b/t/19axisd_or_s.t new file mode 100644 index 0000000..8fc455d --- /dev/null +++ b/t/19axisd_or_s.t @@ -0,0 +1,22 @@ +use Test; +BEGIN { plan tests => 4 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; +@nodes = $xp->findnodes('/AAA/XXX/descendant-or-self::*'); +ok(@nodes, 8); + +@nodes = $xp->findnodes('//CCC/descendant-or-self::*'); +ok(@nodes, 4); + +__DATA__ + + + + + diff --git a/t/20axisa_or_s.t b/t/20axisa_or_s.t new file mode 100644 index 0000000..d5e767a --- /dev/null +++ b/t/20axisa_or_s.t @@ -0,0 +1,22 @@ +use Test; +BEGIN { plan tests => 4 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; +@nodes = $xp->findnodes('/AAA/XXX/DDD/EEE/ancestor-or-self::*'); +ok(@nodes, 4); + +@nodes = $xp->findnodes('//GGG/ancestor-or-self::*'); +ok(@nodes, 5); + +__DATA__ + + + + + diff --git a/t/21allnodes.t b/t/21allnodes.t new file mode 100644 index 0000000..1010538 --- /dev/null +++ b/t/21allnodes.t @@ -0,0 +1,60 @@ +use Test; +BEGIN { plan tests => 11 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; +@nodes = $xp->findnodes('//GGG/ancestor::*'); +ok(@nodes, 4); + +@nodes = $xp->findnodes('//GGG/descendant::*'); +ok(@nodes, 3); + +@nodes = $xp->findnodes('//GGG/following::*'); +ok(@nodes, 3); +ok($nodes[0]->getName, "VVV"); + +@nodes = $xp->findnodes('//GGG/preceding::*'); +ok(@nodes, 5); +ok($nodes[0]->getName, "BBB"); # document order, not HHH + +@nodes = $xp->findnodes('//GGG/self::*'); +ok(@nodes, 1); +ok($nodes[0]->getName, "GGG"); + +@nodes = $xp->findnodes('//GGG/ancestor::* | + //GGG/descendant::* | + //GGG/following::* | + //GGG/preceding::* | + //GGG/self::*'); +ok(@nodes, 16); + +__DATA__ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/t/22name_select.t b/t/22name_select.t new file mode 100644 index 0000000..5977540 --- /dev/null +++ b/t/22name_select.t @@ -0,0 +1,23 @@ +use Test; +BEGIN { plan tests => 4 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; +@nodes = $xp->findnodes('//*[name() = /AAA/SELECT]'); +ok(@nodes, 2); +ok($nodes[0]->getName, "BBB"); + +__DATA__ + + + + + + + + diff --git a/t/23func.t b/t/23func.t new file mode 100644 index 0000000..e2514c3 --- /dev/null +++ b/t/23func.t @@ -0,0 +1,41 @@ +use Test; +BEGIN { plan tests => 5 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; +@nodes = $xp->findnodes('//BBB[position() mod 2 = 0 ]'); +ok(@nodes, 4); + +@nodes = $xp->findnodes('//BBB + [ position() = floor(last() div 2 + 0.5) + or + position() = ceiling(last() div 2 + 0.5) ]'); + +ok(@nodes, 2); + +@nodes = $xp->findnodes('//CCC + [ position() = floor(last() div 2 + 0.5) + or + position() = ceiling(last() div 2 + 0.5) ]'); + +ok(@nodes, 1); + +__DATA__ + + + + + + + + + + + + + diff --git a/t/24namespaces.t b/t/24namespaces.t new file mode 100644 index 0000000..b77f9dc --- /dev/null +++ b/t/24namespaces.t @@ -0,0 +1,56 @@ +use Test; +BEGIN { plan tests => 9 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; + +# Don't set namespace prefixes - uses element context namespaces + +@nodes = $xp->findnodes('//foo:foo'); # should find foobar.com foos +ok(@nodes, 3); + +@nodes = $xp->findnodes('//goo:foo'); # should find no foos +ok(@nodes, 0); + +@nodes = $xp->findnodes('//foo'); # should find default NS foos +ok(@nodes, 2); + +# Set namespace mappings. + +$xp->set_namespace("foo" => "flubber.example.com"); +$xp->set_namespace("goo" => "foobar.example.com"); + +# warn "TEST 6\n"; +@nodes = $xp->findnodes('//foo:foo'); # should find flubber.com foos +# warn "found: ", scalar @nodes, "\n"; +ok(@nodes, 2); + +@nodes = $xp->findnodes('//goo:foo'); # should find foobar.com foos +ok(@nodes, 3); + +@nodes = $xp->findnodes('//foo'); # should find default NS foos +ok(@nodes, 2); + +ok($xp->findvalue('//attr:node/@attr:findme'), 'someval'); + +__DATA__ + + + + + + + + + + + + + diff --git a/t/25scope.t b/t/25scope.t new file mode 100644 index 0000000..df58f0b --- /dev/null +++ b/t/25scope.t @@ -0,0 +1,27 @@ +use Test; +BEGIN { plan tests => 4 } + +use XML::XPath; +ok(1); + +eval +{ + # Removing the 'my' makes this work?!? + my $xp = XML::XPath->new(xml => ''); + ok($xp); + + $xp->findnodes('/test'); + + ok(1); + + die "This should be caught\n"; + +}; + +if ($@) +{ + ok(1); +} +else { + ok(0); +} diff --git a/t/26predicate.t b/t/26predicate.t new file mode 100644 index 0000000..8312c0a --- /dev/null +++ b/t/26predicate.t @@ -0,0 +1,26 @@ +use Test; +BEGIN { plan tests => 4 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @bbb = $xp->findnodes('//a/b[2]'); +ok(@bbb, 2); + +@bbb = $xp->findnodes('(//a/b)[2]'); +ok(@bbb, 1); + +__DATA__ + + + some 1 + value 1 + + + some 2 + value 2 + + diff --git a/t/27asxml.t b/t/27asxml.t new file mode 100644 index 0000000..d2a887c --- /dev/null +++ b/t/27asxml.t @@ -0,0 +1,13 @@ +use Test; +BEGIN { plan tests => 3 } + +use XML::XPath; +ok(1); + +my $parser = XML::XPath::Parser->new(); +ok($parser); + +my $path = $parser->parse('/foo[position() < 1]/bar[$variable = 3]'); +ok($path); + +# warn("Path: ", $path->as_xml(), "\n"); diff --git a/t/28ancestor2.t b/t/28ancestor2.t new file mode 100644 index 0000000..1b90f15 --- /dev/null +++ b/t/28ancestor2.t @@ -0,0 +1,37 @@ +use Test; +BEGIN { plan tests => 5 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @nodes; +@nodes = $xp->findnodes('//Footnote'); +ok(@nodes, 1); + +my $footnote = $nodes[0]; + +@nodes = $footnote->findnodes('ancestor::*'); +ok(@nodes, 3); + +@nodes = $footnote->findnodes('ancestor::text:footnote'); +ok(@nodes, 1); + +__DATA__ + + +2 + +AxKit +is very flexible in how it lets you transform the XML on the +server, and there are many modules you can plug in to AxKit to +allow you to do these transformations. For this reason, the AxKit +installation does not mandate any particular modules to use, +instead it will simply suggest modules that might help when you +install AxKit. + + + diff --git a/t/29desc_with_predicate.t b/t/29desc_with_predicate.t new file mode 100644 index 0000000..b6675ba --- /dev/null +++ b/t/29desc_with_predicate.t @@ -0,0 +1,21 @@ +use Test; +BEGIN { plan tests => 4 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @bbb = $xp->findnodes('/descendant::BBB[1]'); +ok(@bbb, 1); +ok($bbb[0]->string_value, "OK"); + +__DATA__ + +OK + + + +NOT OK + diff --git a/t/30lang.t b/t/30lang.t new file mode 100644 index 0000000..a8c5dc6 --- /dev/null +++ b/t/30lang.t @@ -0,0 +1,20 @@ +use Test; +BEGIN { plan tests => 4 } + +use XML::XPath; +ok(1); + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my @en = $xp->findnodes('//*[lang("en")]'); +ok(@en, 2); + +my @de = $xp->findnodes('//content[lang("de")]'); +ok(@de, 1); + +__DATA__ + + Here we go... + und hier deutschsprachiger Text :-) + diff --git a/t/insert.t b/t/insert.t new file mode 100644 index 0000000..d9b0e59 --- /dev/null +++ b/t/insert.t @@ -0,0 +1,53 @@ +#!/usr/bin/perl + +use Test; +BEGIN { plan tests => 8 } + +use XML::XPath; +use XML::XPath::Node::Comment; +#$XML::XPath::SafeMode = 1; + +ok(1); +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my ($root) = $xp->findnodes('/'); + +ok($root); + +($root) = $root->getChildNodes; +my @nodes = $root->findnodes('//Cart'); + +ok(@nodes, 2); + +my $comment = XML::XPath::Node::Comment->new("Before Comment"); + +$root->insertBefore($comment, $nodes[0]); + +my $other_comment = XML::XPath::Node::Comment->new("After Comment"); + +$root->insertAfter($other_comment, $nodes[0]); + +@nodes = $xp->findnodes('/Shop/node()'); + +# foreach (@nodes) { +# print STDERR $_->toString; +# } + +ok($nodes[1]->isCommentNode); +ok($nodes[3]->isCommentNode); + +my ($before) = $xp->findnodes('/Shop/comment()[contains( string() , "Before")]'); +ok($before->get_pos, 1); + +my ($after) = $xp->findnodes('/Shop/comment()[contains( string() , "After")]'); +ok($after->get_pos, 3); + + +__DATA__ + + + + + + diff --git a/t/rdf.t b/t/rdf.t new file mode 100644 index 0000000..a477a03 --- /dev/null +++ b/t/rdf.t @@ -0,0 +1,59 @@ +use Test; +BEGIN { plan tests => 5 } + +use XML::XPath; + +#$XML::XPath::Debug = 1; +#$XML::XPath::SafeMode = 1; + +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my $nodeset = $xp->find('/rdf:RDF/channel//@rdf:*'); +ok($nodeset); + +ok($nodeset->size); + +ok(4); +ok(5); + +__DATA__ + + + + + Meerkat + http://meerkat.oreillynet.com + Meerkat: An Open Wire Service + + + + + Meerkat Powered! + http://meerkat.oreillynet.com/icons/meerkat-powered.jpg + http://meerkat.oreillynet.com + + + + + XML: A Disruptive Technology + http://c.moreover.com/click/here.pl?r123 + + XML is placing increasingly heavy loads on the existing technical + infrastructure of the Internet. + + + + + + Search XML.com + Search XML.com's XML collection + s + http://search.xml.com + + + diff --git a/t/remove.t b/t/remove.t new file mode 100644 index 0000000..ac09453 --- /dev/null +++ b/t/remove.t @@ -0,0 +1,44 @@ +#!/usr/bin/perl + +use Test; +BEGIN { plan tests => 7 } + +use XML::XPath; +use XML::XPath::XMLParser; +$XML::XPath::SafeMode = 1; + +ok(1); +my $xp = XML::XPath->new(ioref => *DATA); +ok($xp); + +my ($root) = $xp->findnodes('/'); + +ok($root); + +($root) = $root->getChildNodes; +my @nodes = $xp->findnodes('//Cart',$root); + +ok(@nodes, 2); + +$root->removeChild($nodes[0]); + +@nodes = $xp->findnodes('//Cart', $root); +ok(@nodes, 1); + +my $cart = $nodes[0]; + +@nodes = $xp->findnodes('//Cart/@*', $root); +ok(@nodes, 2); + +$cart->removeAttribute('crap'); +@nodes = $xp->findnodes('//Cart/@*', $root); + +ok(@nodes, 1); + +__DATA__ + + + + + + diff --git a/t/stress.t b/t/stress.t new file mode 100644 index 0000000..1f9b351 --- /dev/null +++ b/t/stress.t @@ -0,0 +1,57 @@ +# $Id: stress.t,v 1.3 2000/04/17 17:08:58 matt Exp $ + +print "1..7\n"; +my $x; $x++; +use XML::XPath; +use XML::XPath::Parser; + +my $xp = XML::XPath->new( filename => 'examples/test.xml' ); + +print "ok $x\n" if $xp; +print "not ok $x\n" unless $xp; +$x++; + +my $pp = XML::XPath::Parser->new(); + +print "ok $x\n" if $pp; +print "not ok $x\n" unless $pp; +$x++; + +# test path parse time +for (1..5000) { + $pp->parse('//project/wednesday'); +} + +print "ok $x\n" if $pp; +print "not ok $x\n" unless $pp; +$x++; + +my $parser = XML::XPath::XMLParser->new( + filename => 'examples/test.xml' + ); + +print "ok $x\n" if $parser; +print "not ok $x\n" unless $parser; +$x++; + +my $root = $parser->parse; + +print "ok $x\n" if $root; +print "not ok $x\n" unless $root; +$x++; + +# test evaluation time +my $path = $pp->parse('/timesheet/projects/project/wednesday'); + +print "ok $x\n" if $path; +print "not ok $x\n" unless $path; +$x++; + +for (1..1000) { + $path->evaluate($root); +} + +print "ok $x\n"; +$x++; + + -- cgit v1.2.1