summaryrefslogtreecommitdiff
path: root/XPath/PerlSAX.pm
diff options
context:
space:
mode:
Diffstat (limited to 'XPath/PerlSAX.pm')
-rw-r--r--XPath/PerlSAX.pm166
1 files changed, 166 insertions, 0 deletions
diff --git a/XPath/PerlSAX.pm b/XPath/PerlSAX.pm
new file mode 100644
index 0000000..669f101
--- /dev/null
+++ b/XPath/PerlSAX.pm
@@ -0,0 +1,166 @@
+# $Id: PerlSAX.pm,v 1.6 2000/02/28 10:40:21 matt Exp $
+
+package XML::XPath::PerlSAX;
+use XML::XPath::XMLParser;
+use strict;
+
+sub new {
+ my $class = shift;
+ my %args = @_;
+ bless \%args, $class;
+}
+
+sub parse {
+ my $self = shift;
+
+ die "XML::XPath::PerlSAX: parser instance ($self) already parsing\n"
+ if (defined $self->{ParseOptions});
+
+ # If there's one arg and it's an array ref, assume it's a node we're parsing
+ my $args;
+ if (@_ == 1 && ref($_[0]) =~ /^(text|comment|element|namespace|attribute|pi)$/) {
+# warn "Parsing node\n";
+ my $node = shift;
+# warn "PARSING: $node ", XML::XPath::XMLParser::as_string($node), "\n\n";
+ $args = { Source => { Node => $node } };
+ }
+ else {
+ $args = (@_ == 1) ? shift : { @_ };
+ }
+
+ my $parse_options = { %$self, %$args };
+ $self->{ParseOptions} = $parse_options;
+
+ # ensure that we have at least one source
+ if (!defined $parse_options->{Source} ||
+ !defined $parse_options->{Source}{Node}) {
+ die "XML::XPath::PerlSAX: no source defined for parse\n";
+ }
+
+ # assign default Handler to any undefined handlers
+ if (defined $parse_options->{Handler}) {
+ $parse_options->{DocumentHandler} = $parse_options->{Handler}
+ if (!defined $parse_options->{DocumentHandler});
+ }
+
+ # ensure that we have a DocumentHandler
+ if (!defined $parse_options->{DocumentHandler}) {
+ die "XML::XPath::PerlSAX: no Handler or DocumentHandler defined for parse\n";
+ }
+
+ # cache DocumentHandler in self for callbacks
+ $self->{DocumentHandler} = $parse_options->{DocumentHandler};
+
+ if ((ref($parse_options->{Source}{Node}) eq 'element') &&
+ !($parse_options->{Source}{Node}->[node_parent])) {
+ # Got root node
+ $self->{DocumentHandler}->start_document( { } );
+ $self->parse_node($parse_options->{Source}{Node});
+ return $self->{DocumentHandler}->end_document( { } );
+ }
+ else {
+ $self->parse_node($parse_options->{Source}{Node});
+ }
+
+ # clean up parser instance
+ delete $self->{ParseOptions};
+ delete $self->{DocumentHandler};
+
+}
+
+sub parse_node {
+ my $self = shift;
+ my $node = shift;
+# warn "parse_node $node\n";
+ if (ref($node) eq 'element' && $node->[node_parent]) {
+ # bundle up attributes
+ my @attribs;
+ foreach my $attr (@{$node->[node_attribs]}) {
+ if ($attr->[node_prefix]) {
+ push @attribs, $attr->[node_prefix] . ":" . $attr->[node_key];
+ }
+ else {
+ push @attribs, $attr->[node_key];
+ }
+ push @attribs, $attr->[node_value];
+ }
+
+ $self->{DocumentHandler}->start_element(
+ { Name => $node->[node_name],
+ Attributes => \@attribs,
+ }
+ );
+ foreach my $kid (@{$node->[node_children]}) {
+ $self->parse_node($kid);
+ }
+ $self->{DocumentHandler}->end_element(
+ {
+ Name => $node->[node_name],
+ }
+ );
+ }
+ elsif (ref($node) eq 'text') {
+ $self->{DocumentHandler}->characters($node->[node_text]);
+ }
+ elsif (ref($node) eq 'comment') {
+ $self->{DocumentHandler}->comment($node->[node_comment]);
+ }
+ elsif (ref($node) eq 'pi') {
+ $self->{DocumentHandler}->processing_instruction(
+ {
+ Target => $node->[node_target],
+ Data => $node->[node_data]
+ }
+ );
+ }
+ elsif (ref($node) eq 'element') { # root node
+ # just do kids
+ foreach my $kid (@{$node->[node_children]}) {
+ $self->parse_node($kid);
+ }
+ }
+ else {
+ die "Unknown node type: '", ref($node), "' ", scalar(@$node), "\n";
+ }
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+XML::XPath::PerlSAX - A PerlSAX event generator for my wierd node structure
+
+=head1 SYNOPSIS
+
+ use XML::XPath;
+ use XML::XPath::PerlSAX;
+ use XML::DOM::PerlSAX;
+
+ my $xp = XML::XPath->new(filename => 'test.xhtml');
+ my $paras = $xp->find('/html/body/p');
+
+ my $handler = XML::DOM::PerlSAX->new();
+ my $generator = XML::XPath::PerlSAX->new( Handler => $handler );
+
+ foreach my $node ($paras->get_nodelist) {
+ my $domtree = $generator->parse($node);
+ # do something with $domtree
+ }
+
+=head1 DESCRIPTION
+
+This module generates PerlSAX events to pass to a PerlSAX handler such
+as XML::DOM::PerlSAX. It operates specifically on my wierd tree format.
+
+Unfortunately SAX doesn't seem to cope with namespaces, so these are
+lost completely. I believe SAX2 is doing namespaces.
+
+=head1 Other
+
+The XML::DOM::PerlSAX handler I tried was completely broken (didn't even
+compile before I patched it a bit), so I don't know how correct this
+is or how far it will work.
+
+This software may only be distributed as part of the XML::XPath package.