summaryrefslogtreecommitdiff
path: root/Expat
diff options
context:
space:
mode:
authorLorry <lorry@roadtrain.codethink.co.uk>2012-05-21 16:44:15 +0100
committerLorry <lorry@roadtrain.codethink.co.uk>2012-05-21 16:44:15 +0100
commit891c29af147fcbe6c4dd5d8ffbbb426665d4b558 (patch)
treecd26f770e9f8dc426e40761fc50da03ac1a18921 /Expat
downloadXML-Parser-891c29af147fcbe6c4dd5d8ffbbb426665d4b558.tar.gz
Tarball conversion
Diffstat (limited to 'Expat')
-rw-r--r--Expat/Expat.pm1234
-rw-r--r--Expat/Expat.xs2214
-rw-r--r--Expat/Makefile.PL29
-rw-r--r--Expat/encoding.h91
-rw-r--r--Expat/typemap24
5 files changed, 3592 insertions, 0 deletions
diff --git a/Expat/Expat.pm b/Expat/Expat.pm
new file mode 100644
index 0000000..b1fd920
--- /dev/null
+++ b/Expat/Expat.pm
@@ -0,0 +1,1234 @@
+package XML::Parser::Expat;
+
+require 5.004;
+
+use strict;
+use vars qw($VERSION @ISA %Handler_Setters %Encoding_Table @Encoding_Path
+ $have_File_Spec);
+use Carp;
+
+require DynaLoader;
+
+@ISA = qw(DynaLoader);
+$VERSION = "2.41";
+
+$have_File_Spec = $INC{'File/Spec.pm'} || do 'File/Spec.pm';
+
+%Encoding_Table = ();
+if ($have_File_Spec) {
+ @Encoding_Path = (grep(-d $_,
+ map(File::Spec->catdir($_, qw(XML Parser Encodings)),
+ @INC)),
+ File::Spec->curdir);
+}
+else {
+ @Encoding_Path = (grep(-d $_, map($_ . '/XML/Parser/Encodings', @INC)), '.');
+}
+
+
+bootstrap XML::Parser::Expat $VERSION;
+
+%Handler_Setters = (
+ Start => \&SetStartElementHandler,
+ End => \&SetEndElementHandler,
+ Char => \&SetCharacterDataHandler,
+ Proc => \&SetProcessingInstructionHandler,
+ Comment => \&SetCommentHandler,
+ CdataStart => \&SetStartCdataHandler,
+ CdataEnd => \&SetEndCdataHandler,
+ Default => \&SetDefaultHandler,
+ Unparsed => \&SetUnparsedEntityDeclHandler,
+ Notation => \&SetNotationDeclHandler,
+ ExternEnt => \&SetExternalEntityRefHandler,
+ ExternEntFin => \&SetExtEntFinishHandler,
+ Entity => \&SetEntityDeclHandler,
+ Element => \&SetElementDeclHandler,
+ Attlist => \&SetAttListDeclHandler,
+ Doctype => \&SetDoctypeHandler,
+ DoctypeFin => \&SetEndDoctypeHandler,
+ XMLDecl => \&SetXMLDeclHandler
+ );
+
+sub new {
+ my ($class, %args) = @_;
+ my $self = bless \%args, $_[0];
+ $args{_State_} = 0;
+ $args{Context} = [];
+ $args{Namespaces} ||= 0;
+ $args{ErrorMessage} ||= '';
+ if ($args{Namespaces}) {
+ $args{Namespace_Table} = {};
+ $args{Namespace_List} = [undef];
+ $args{Prefix_Table} = {};
+ $args{New_Prefixes} = [];
+ }
+ $args{_Setters} = \%Handler_Setters;
+ $args{Parser} = ParserCreate($self, $args{ProtocolEncoding},
+ $args{Namespaces});
+ $self;
+}
+
+sub load_encoding {
+ my ($file) = @_;
+
+ $file =~ s!([^/]+)$!\L$1\E!;
+ $file .= '.enc' unless $file =~ /\.enc$/;
+ unless ($file =~ m!^/!) {
+ foreach (@Encoding_Path) {
+ my $tmp = ($have_File_Spec
+ ? File::Spec->catfile($_, $file)
+ : "$_/$file");
+ if (-e $tmp) {
+ $file = $tmp;
+ last;
+ }
+ }
+ }
+
+ local(*ENC);
+ open(ENC, $file) or croak("Couldn't open encmap $file:\n$!\n");
+ binmode(ENC);
+ my $data;
+ my $br = sysread(ENC, $data, -s $file);
+ croak("Trouble reading $file:\n$!\n")
+ unless defined($br);
+ close(ENC);
+
+ my $name = LoadEncoding($data, $br);
+ croak("$file isn't an encmap file")
+ unless defined($name);
+
+ $name;
+} # End load_encoding
+
+sub setHandlers {
+ my ($self, @handler_pairs) = @_;
+
+ croak("Uneven number of arguments to setHandlers method")
+ if (int(@handler_pairs) & 1);
+
+ my @ret;
+
+ while (@handler_pairs) {
+ my $type = shift @handler_pairs;
+ my $handler = shift @handler_pairs;
+ croak "Handler for $type not a Code ref"
+ unless (! defined($handler) or ! $handler or ref($handler) eq 'CODE');
+
+ my $hndl = $self->{_Setters}->{$type};
+
+ unless (defined($hndl)) {
+ my @types = sort keys %{$self->{_Setters}};
+ croak("Unknown Expat handler type: $type\n Valid types: @types");
+ }
+
+ my $old = &$hndl($self->{Parser}, $handler);
+ push (@ret, $type, $old);
+ }
+
+ return @ret;
+}
+
+sub xpcroak
+ {
+ my ($self, $message) = @_;
+
+ my $eclines = $self->{ErrorContext};
+ my $line = GetCurrentLineNumber($_[0]->{Parser});
+ $message .= " at line $line";
+ $message .= ":\n" . $self->position_in_context($eclines)
+ if defined($eclines);
+ croak $message;
+}
+
+sub xpcarp {
+ my ($self, $message) = @_;
+
+ my $eclines = $self->{ErrorContext};
+ my $line = GetCurrentLineNumber($_[0]->{Parser});
+ $message .= " at line $line";
+ $message .= ":\n" . $self->position_in_context($eclines)
+ if defined($eclines);
+ carp $message;
+}
+
+sub default_current {
+ my $self = shift;
+ if ($self->{_State_} == 1) {
+ return DefaultCurrent($self->{Parser});
+ }
+}
+
+sub recognized_string {
+ my $self = shift;
+ if ($self->{_State_} == 1) {
+ return RecognizedString($self->{Parser});
+ }
+}
+
+sub original_string {
+ my $self = shift;
+ if ($self->{_State_} == 1) {
+ return OriginalString($self->{Parser});
+ }
+}
+
+sub current_line {
+ my $self = shift;
+ if ($self->{_State_} == 1) {
+ return GetCurrentLineNumber($self->{Parser});
+ }
+}
+
+sub current_column {
+ my $self = shift;
+ if ($self->{_State_} == 1) {
+ return GetCurrentColumnNumber($self->{Parser});
+ }
+}
+
+sub current_byte {
+ my $self = shift;
+ if ($self->{_State_} == 1) {
+ return GetCurrentByteIndex($self->{Parser});
+ }
+}
+
+sub base {
+ my ($self, $newbase) = @_;
+ my $p = $self->{Parser};
+ my $oldbase = GetBase($p);
+ SetBase($p, $newbase) if @_ > 1;
+ return $oldbase;
+}
+
+sub context {
+ my $ctx = $_[0]->{Context};
+ @$ctx;
+}
+
+sub current_element {
+ my ($self) = @_;
+ @{$self->{Context}} ? $self->{Context}->[-1] : undef;
+}
+
+sub in_element {
+ my ($self, $element) = @_;
+ @{$self->{Context}} ? $self->eq_name($self->{Context}->[-1], $element)
+ : undef;
+}
+
+sub within_element {
+ my ($self, $element) = @_;
+ my $cnt = 0;
+ foreach (@{$self->{Context}}) {
+ $cnt++ if $self->eq_name($_, $element);
+ }
+ return $cnt;
+}
+
+sub depth {
+ my ($self) = @_;
+ int(@{$self->{Context}});
+}
+
+sub element_index {
+ my ($self) = @_;
+
+ if ($self->{_State_} == 1) {
+ return ElementIndex($self->{Parser});
+ }
+}
+
+################
+# Namespace methods
+
+sub namespace {
+ my ($self, $name) = @_;
+ local($^W) = 0;
+ $self->{Namespace_List}->[int($name)];
+}
+
+sub eq_name {
+ my ($self, $nm1, $nm2) = @_;
+ local($^W) = 0;
+
+ int($nm1) == int($nm2) and $nm1 eq $nm2;
+}
+
+sub generate_ns_name {
+ my ($self, $name, $namespace) = @_;
+
+ $namespace ?
+ GenerateNSName($name, $namespace, $self->{Namespace_Table},
+ $self->{Namespace_List})
+ : $name;
+}
+
+sub new_ns_prefixes {
+ my ($self) = @_;
+ if ($self->{Namespaces}) {
+ return @{$self->{New_Prefixes}};
+ }
+ return ();
+}
+
+sub expand_ns_prefix {
+ my ($self, $prefix) = @_;
+
+ if ($self->{Namespaces}) {
+ my $stack = $self->{Prefix_Table}->{$prefix};
+ return (defined($stack) and @$stack) ? $stack->[-1] : undef;
+ }
+
+ return undef;
+}
+
+sub current_ns_prefixes {
+ my ($self) = @_;
+
+ if ($self->{Namespaces}) {
+ my %set = %{$self->{Prefix_Table}};
+
+ if (exists $set{'#default'} and not defined($set{'#default'}->[-1])) {
+ delete $set{'#default'};
+ }
+
+ return keys %set;
+ }
+
+ return ();
+}
+
+
+################################################################
+# Namespace declaration handlers
+#
+
+sub NamespaceStart {
+ my ($self, $prefix, $uri) = @_;
+
+ $prefix = '#default' unless defined $prefix;
+ my $stack = $self->{Prefix_Table}->{$prefix};
+
+ if (defined $stack) {
+ push(@$stack, $uri);
+ }
+ else {
+ $self->{Prefix_Table}->{$prefix} = [$uri];
+ }
+
+ # The New_Prefixes list gets emptied at end of startElement function
+ # in Expat.xs
+
+ push(@{$self->{New_Prefixes}}, $prefix);
+}
+
+sub NamespaceEnd {
+ my ($self, $prefix) = @_;
+
+ $prefix = '#default' unless defined $prefix;
+
+ my $stack = $self->{Prefix_Table}->{$prefix};
+ if (@$stack > 1) {
+ pop(@$stack);
+ }
+ else {
+ delete $self->{Prefix_Table}->{$prefix};
+ }
+}
+
+################
+
+sub specified_attr {
+ my $self = shift;
+
+ if ($self->{_State_} == 1) {
+ return GetSpecifiedAttributeCount($self->{Parser});
+ }
+}
+
+sub finish {
+ my ($self) = @_;
+ if ($self->{_State_} == 1) {
+ my $parser = $self->{Parser};
+ UnsetAllHandlers($parser);
+ }
+}
+
+sub position_in_context {
+ my ($self, $lines) = @_;
+ if ($self->{_State_} == 1) {
+ my $parser = $self->{Parser};
+ my ($string, $linepos) = PositionContext($parser, $lines);
+
+ return '' unless defined($string);
+
+ my $col = GetCurrentColumnNumber($parser);
+ my $ptr = ('=' x ($col - 1)) . '^' . "\n";
+ my $ret;
+ my $dosplit = $linepos < length($string);
+
+ $string .= "\n" unless $string =~ /\n$/;
+
+ if ($dosplit) {
+ $ret = substr($string, 0, $linepos) . $ptr
+ . substr($string, $linepos);
+ } else {
+ $ret = $string . $ptr;
+ }
+
+ return $ret;
+ }
+}
+
+sub xml_escape {
+ my $self = shift;
+ my $text = shift;
+
+ study $text;
+ $text =~ s/\&/\&amp;/g;
+ $text =~ s/</\&lt;/g;
+ foreach (@_) {
+ croak "xml_escape: '$_' isn't a single character" if length($_) > 1;
+
+ if ($_ eq '>') {
+ $text =~ s/>/\&gt;/g;
+ }
+ elsif ($_ eq '"') {
+ $text =~ s/\"/\&quot;/;
+ }
+ elsif ($_ eq "'") {
+ $text =~ s/\'/\&apos;/;
+ }
+ else {
+ my $rep = '&#' . sprintf('x%X', ord($_)) . ';';
+ if (/\W/) {
+ my $ptrn = "\\$_";
+ $text =~ s/$ptrn/$rep/g;
+ }
+ else {
+ $text =~ s/$_/$rep/g;
+ }
+ }
+ }
+ $text;
+}
+
+sub skip_until {
+ my $self = shift;
+ if ($self->{_State_} <= 1) {
+ SkipUntil($self->{Parser}, $_[0]);
+ }
+}
+
+sub release {
+ my $self = shift;
+ ParserRelease($self->{Parser});
+}
+
+sub DESTROY {
+ my $self = shift;
+ ParserFree($self->{Parser});
+}
+
+sub parse {
+ my $self = shift;
+ my $arg = shift;
+ croak "Parse already in progress (Expat)" if $self->{_State_};
+ $self->{_State_} = 1;
+ my $parser = $self->{Parser};
+ my $ioref;
+ my $result = 0;
+
+ if (defined $arg) {
+ local *@;
+ if (ref($arg) and UNIVERSAL::isa($arg, 'IO::Handle')) {
+ $ioref = $arg;
+ } elsif ($] < 5.008 and defined tied($arg)) {
+ require IO::Handle;
+ $ioref = $arg;
+ }
+ else {
+ require IO::Handle;
+ eval {
+ no strict 'refs';
+ $ioref = *{$arg}{IO} if defined *{$arg};
+ };
+ }
+ }
+
+ if (defined($ioref)) {
+ my $delim = $self->{Stream_Delimiter};
+ my $prev_rs;
+ my $ioclass = ref $ioref;
+ $ioclass = "IO::Handle" if !length $ioclass;
+
+ $prev_rs = $ioclass->input_record_separator("\n$delim\n")
+ if defined($delim);
+
+ $result = ParseStream($parser, $ioref, $delim);
+
+ $ioclass->input_record_separator($prev_rs)
+ if defined($delim);
+ } else {
+ $result = ParseString($parser, $arg);
+ }
+
+ $self->{_State_} = 2;
+ $result or croak $self->{ErrorMessage};
+}
+
+sub parsestring {
+ my $self = shift;
+ $self->parse(@_);
+}
+
+sub parsefile {
+ my $self = shift;
+ croak "Parser has already been used" if $self->{_State_};
+ local(*FILE);
+ open(FILE, $_[0]) or croak "Couldn't open $_[0]:\n$!";
+ binmode(FILE);
+ my $ret = $self->parse(*FILE);
+ close(FILE);
+ $ret;
+}
+
+################################################################
+package #hide from PAUSE
+ XML::Parser::ContentModel;
+use overload '""' => \&asString, 'eq' => \&thiseq;
+
+sub EMPTY () {1}
+sub ANY () {2}
+sub MIXED () {3}
+sub NAME () {4}
+sub CHOICE () {5}
+sub SEQ () {6}
+
+
+sub isempty {
+ return $_[0]->{Type} == EMPTY;
+}
+
+sub isany {
+ return $_[0]->{Type} == ANY;
+}
+
+sub ismixed {
+ return $_[0]->{Type} == MIXED;
+}
+
+sub isname {
+ return $_[0]->{Type} == NAME;
+}
+
+sub name {
+ return $_[0]->{Tag};
+}
+
+sub ischoice {
+ return $_[0]->{Type} == CHOICE;
+}
+
+sub isseq {
+ return $_[0]->{Type} == SEQ;
+}
+
+sub quant {
+ return $_[0]->{Quant};
+}
+
+sub children {
+ my $children = $_[0]->{Children};
+ if (defined $children) {
+ return @$children;
+ }
+ return undef;
+}
+
+sub asString {
+ my ($self) = @_;
+ my $ret;
+
+ if ($self->{Type} == NAME) {
+ $ret = $self->{Tag};
+ }
+ elsif ($self->{Type} == EMPTY) {
+ return "EMPTY";
+ }
+ elsif ($self->{Type} == ANY) {
+ return "ANY";
+ }
+ elsif ($self->{Type} == MIXED) {
+ $ret = '(#PCDATA';
+ foreach (@{$self->{Children}}) {
+ $ret .= '|' . $_;
+ }
+ $ret .= ')';
+ }
+ else {
+ my $sep = $self->{Type} == CHOICE ? '|' : ',';
+ $ret = '(' . join($sep, map { $_->asString } @{$self->{Children}}) . ')';
+ }
+
+ $ret .= $self->{Quant} if $self->{Quant};
+ return $ret;
+}
+
+sub thiseq {
+ my $self = shift;
+
+ return $self->asString eq $_[0];
+}
+
+################################################################
+package #hide from PAUSE
+ XML::Parser::ExpatNB;
+
+use vars qw(@ISA);
+use Carp;
+
+@ISA = qw(XML::Parser::Expat);
+
+sub parse {
+ my $self = shift;
+ my $class = ref($self);
+ croak "parse method not supported in $class";
+}
+
+sub parsestring {
+ my $self = shift;
+ my $class = ref($self);
+ croak "parsestring method not supported in $class";
+}
+
+sub parsefile {
+ my $self = shift;
+ my $class = ref($self);
+ croak "parsefile method not supported in $class";
+}
+
+sub parse_more {
+ my ($self, $data) = @_;
+
+ $self->{_State_} = 1;
+ my $ret = XML::Parser::Expat::ParsePartial($self->{Parser}, $data);
+
+ croak $self->{ErrorMessage} unless $ret;
+}
+
+sub parse_done {
+ my $self = shift;
+
+ my $ret = XML::Parser::Expat::ParseDone($self->{Parser});
+ unless ($ret) {
+ my $msg = $self->{ErrorMessage};
+ $self->release;
+ croak $msg;
+ }
+
+ $self->{_State_} = 2;
+
+ my $result = $ret;
+ my @result = ();
+ my $final = $self->{FinalHandler};
+ if (defined $final) {
+ if (wantarray) {
+ @result = &$final($self);
+ }
+ else {
+ $result = &$final($self);
+ }
+ }
+
+ $self->release;
+
+ return unless defined wantarray;
+ return wantarray ? @result : $result;
+}
+
+################################################################
+
+package #hide from PAUSE
+ XML::Parser::Encinfo;
+
+sub DESTROY {
+ my $self = shift;
+ XML::Parser::Expat::FreeEncoding($self);
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+XML::Parser::Expat - Lowlevel access to James Clark's expat XML parser
+
+=head1 SYNOPSIS
+
+ use XML::Parser::Expat;
+
+ $parser = XML::Parser::Expat->new;
+ $parser->setHandlers('Start' => \&sh,
+ 'End' => \&eh,
+ 'Char' => \&ch);
+ open(FOO, '<', 'info.xml') or die "Couldn't open";
+ $parser->parse(*FOO);
+ close(FOO);
+ # $parser->parse('<foo id="me"> here <em>we</em> go </foo>');
+
+ sub sh
+ {
+ my ($p, $el, %atts) = @_;
+ $p->setHandlers('Char' => \&spec)
+ if ($el eq 'special');
+ ...
+ }
+
+ sub eh
+ {
+ my ($p, $el) = @_;
+ $p->setHandlers('Char' => \&ch) # Special elements won't contain
+ if ($el eq 'special'); # other special elements
+ ...
+ }
+
+=head1 DESCRIPTION
+
+This module provides an interface to James Clark's XML parser, expat. As in
+expat, a single instance of the parser can only parse one document. Calls
+to parsestring after the first for a given instance will die.
+
+Expat (and XML::Parser::Expat) are event based. As the parser recognizes
+parts of the document (say the start or end of an XML element), then any
+handlers registered for that type of an event are called with suitable
+parameters.
+
+=head1 METHODS
+
+=over 4
+
+=item new
+
+This is a class method, the constructor for XML::Parser::Expat. Options are
+passed as keyword value pairs. The recognized options are:
+
+=over 4
+
+=item * ProtocolEncoding
+
+The protocol encoding name. The default is none. The expat built-in
+encodings are: C<UTF-8>, C<ISO-8859-1>, C<UTF-16>, and C<US-ASCII>.
+Other encodings may be used if they have encoding maps in one of the
+directories in the @Encoding_Path list. Setting the protocol encoding
+overrides any encoding in the XML declaration.
+
+=item * Namespaces
+
+When this option is given with a true value, then the parser does namespace
+processing. By default, namespace processing is turned off. When it is
+turned on, the parser consumes I<xmlns> attributes and strips off prefixes
+from element and attributes names where those prefixes have a defined
+namespace. A name's namespace can be found using the L<"namespace"> method
+and two names can be checked for absolute equality with the L<"eq_name">
+method.
+
+=item * NoExpand
+
+Normally, the parser will try to expand references to entities defined in
+the internal subset. If this option is set to a true value, and a default
+handler is also set, then the default handler will be called when an
+entity reference is seen in text. This has no effect if a default handler
+has not been registered, and it has no effect on the expansion of entity
+references inside attribute values.
+
+=item * Stream_Delimiter
+
+This option takes a string value. When this string is found alone on a line
+while parsing from a stream, then the parse is ended as if it saw an end of
+file. The intended use is with a stream of xml documents in a MIME multipart
+format. The string should not contain a trailing newline.
+
+=item * ErrorContext
+
+When this option is defined, errors are reported in context. The value
+of ErrorContext should be the number of lines to show on either side of
+the line in which the error occurred.
+
+=item * ParseParamEnt
+
+Unless standalone is set to "yes" in the XML declaration, setting this to
+a true value allows the external DTD to be read, and parameter entities
+to be parsed and expanded.
+
+=item * Base
+
+The base to use for relative pathnames or URLs. This can also be done by
+using the base method.
+
+=back
+
+=item setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]])
+
+This method registers handlers for the various events. If no handlers are
+registered, then a call to parsestring or parsefile will only determine if
+the corresponding XML document is well formed (by returning without error.)
+This may be called from within a handler, after the parse has started.
+
+Setting a handler to something that evaluates to false unsets that
+handler.
+
+This method returns a list of type, handler pairs corresponding to the
+input. The handlers returned are the ones that were in effect before the
+call to setHandlers.
+
+The recognized events and the parameters passed to the corresponding
+handlers are:
+
+=over 4
+
+=item * Start (Parser, Element [, Attr, Val [,...]])
+
+This event is generated when an XML start tag is recognized. Parser is
+an XML::Parser::Expat instance. Element is the name of the XML element that
+is opened with the start tag. The Attr & Val pairs are generated for each
+attribute in the start tag.
+
+=item * End (Parser, Element)
+
+This event is generated when an XML end tag is recognized. Note that
+an XML empty tag (<foo/>) generates both a start and an end event.
+
+There is always a lower level start and end handler installed that wrap
+the corresponding callbacks. This is to handle the context mechanism.
+A consequence of this is that the default handler (see below) will not
+see a start tag or end tag unless the default_current method is called.
+
+=item * Char (Parser, String)
+
+This event is generated when non-markup is recognized. The non-markup
+sequence of characters is in String. A single non-markup sequence of
+characters may generate multiple calls to this handler. Whatever the
+encoding of the string in the original document, this is given to the
+handler in UTF-8.
+
+=item * Proc (Parser, Target, Data)
+
+This event is generated when a processing instruction is recognized.
+
+=item * Comment (Parser, String)
+
+This event is generated when a comment is recognized.
+
+=item * CdataStart (Parser)
+
+This is called at the start of a CDATA section.
+
+=item * CdataEnd (Parser)
+
+This is called at the end of a CDATA section.
+
+=item * Default (Parser, String)
+
+This is called for any characters that don't have a registered handler.
+This includes both characters that are part of markup for which no
+events are generated (markup declarations) and characters that
+could generate events, but for which no handler has been registered.
+
+Whatever the encoding in the original document, the string is returned to
+the handler in UTF-8.
+
+=item * Unparsed (Parser, Entity, Base, Sysid, Pubid, Notation)
+
+This is called for a declaration of an unparsed entity. Entity is the name
+of the entity. Base is the base to be used for resolving a relative URI.
+Sysid is the system id. Pubid is the public id. Notation is the notation
+name. Base and Pubid may be undefined.
+
+=item * Notation (Parser, Notation, Base, Sysid, Pubid)
+
+This is called for a declaration of notation. Notation is the notation name.
+Base is the base to be used for resolving a relative URI. Sysid is the system
+id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined.
+
+=item * ExternEnt (Parser, Base, Sysid, Pubid)
+
+This is called when an external entity is referenced. Base is the base to be
+used for resolving a relative URI. Sysid is the system id. Pubid is the public
+id. Base, and Pubid may be undefined.
+
+This handler should either return a string, which represents the contents of
+the external entity, or return an open filehandle that can be read to obtain
+the contents of the external entity, or return undef, which indicates the
+external entity couldn't be found and will generate a parse error.
+
+If an open filehandle is returned, it must be returned as either a glob
+(*FOO) or as a reference to a glob (e.g. an instance of IO::Handle).
+
+=item * ExternEntFin (Parser)
+
+This is called after an external entity has been parsed. It allows
+applications to perform cleanup on actions performed in the above
+ExternEnt handler.
+
+=item * Entity (Parser, Name, Val, Sysid, Pubid, Ndata, IsParam)
+
+This is called when an entity is declared. For internal entities, the Val
+parameter will contain the value and the remaining three parameters will
+be undefined. For external entities, the Val parameter
+will be undefined, the Sysid parameter will have the system id, the Pubid
+parameter will have the public id if it was provided (it will be undefined
+otherwise), the Ndata parameter will contain the notation for unparsed
+entities. If this is a parameter entity declaration, then the IsParam
+parameter is true.
+
+Note that this handler and the Unparsed handler above overlap. If both are
+set, then this handler will not be called for unparsed entities.
+
+=item * Element (Parser, Name, Model)
+
+The element handler is called when an element declaration is found. Name is
+the element name, and Model is the content model as an
+XML::Parser::ContentModel object. See L<"XML::Parser::ContentModel Methods">
+for methods available for this class.
+
+=item * Attlist (Parser, Elname, Attname, Type, Default, Fixed)
+
+This handler is called for each attribute in an ATTLIST declaration.
+So an ATTLIST declaration that has multiple attributes
+will generate multiple calls to this handler. The Elname parameter is the
+name of the element with which the attribute is being associated. The Attname
+parameter is the name of the attribute. Type is the attribute type, given as
+a string. Default is the default value, which will either be "#REQUIRED",
+"#IMPLIED" or a quoted string (i.e. the returned string will begin and end
+with a quote character). If Fixed is true, then this is a fixed attribute.
+
+=item * Doctype (Parser, Name, Sysid, Pubid, Internal)
+
+This handler is called for DOCTYPE declarations. Name is the document type
+name. Sysid is the system id of the document type, if it was provided,
+otherwise it's undefined. Pubid is the public id of the document type,
+which will be undefined if no public id was given. Internal will be
+true or false, indicating whether or not the doctype declaration contains
+an internal subset.
+
+=item * DoctypeFin (Parser)
+
+This handler is called after parsing of the DOCTYPE declaration has finished,
+including any internal or external DTD declarations.
+
+=item * XMLDecl (Parser, Version, Encoding, Standalone)
+
+This handler is called for XML declarations. Version is a string containg
+the version. Encoding is either undefined or contains an encoding string.
+Standalone is either undefined, or true or false. Undefined indicates
+that no standalone parameter was given in the XML declaration. True or
+false indicates "yes" or "no" respectively.
+
+=back
+
+=item namespace(name)
+
+Return the URI of the namespace that the name belongs to. If the name doesn't
+belong to any namespace, an undef is returned. This is only valid on names
+received through the Start or End handlers from a single document, or through
+a call to the generate_ns_name method. In other words, don't use names
+generated from one instance of XML::Parser::Expat with other instances.
+
+=item eq_name(name1, name2)
+
+Return true if name1 and name2 are identical (i.e. same name and from
+the same namespace.) This is only meaningful if both names were obtained
+through the Start or End handlers from a single document, or through
+a call to the generate_ns_name method.
+
+=item generate_ns_name(name, namespace)
+
+Return a name, associated with a given namespace, good for using with the
+above 2 methods. The namespace argument should be the namespace URI, not
+a prefix.
+
+=item new_ns_prefixes
+
+When called from a start tag handler, returns namespace prefixes declared
+with this start tag. If called elsewere (or if there were no namespace
+prefixes declared), it returns an empty list. Setting of the default
+namespace is indicated with '#default' as a prefix.
+
+=item expand_ns_prefix(prefix)
+
+Return the uri to which the given prefix is currently bound. Returns
+undef if the prefix isn't currently bound. Use '#default' to find the
+current binding of the default namespace (if any).
+
+=item current_ns_prefixes
+
+Return a list of currently bound namespace prefixes. The order of the
+the prefixes in the list has no meaning. If the default namespace is
+currently bound, '#default' appears in the list.
+
+=item recognized_string
+
+Returns the string from the document that was recognized in order to call
+the current handler. For instance, when called from a start handler, it
+will give us the the start-tag string. The string is encoded in UTF-8.
+This method doesn't return a meaningful string inside declaration handlers.
+
+=item original_string
+
+Returns the verbatim string from the document that was recognized in
+order to call the current handler. The string is in the original document
+encoding. This method doesn't return a meaningful string inside declaration
+handlers.
+
+=item default_current
+
+When called from a handler, causes the sequence of characters that generated
+the corresponding event to be sent to the default handler (if one is
+registered). Use of this method is deprecated in favor the recognized_string
+method, which you can use without installing a default handler. This
+method doesn't deliver a meaningful string to the default handler when
+called from inside declaration handlers.
+
+=item xpcroak(message)
+
+Concatenate onto the given message the current line number within the
+XML document plus the message implied by ErrorContext. Then croak with
+the formed message.
+
+=item xpcarp(message)
+
+Concatenate onto the given message the current line number within the
+XML document plus the message implied by ErrorContext. Then carp with
+the formed message.
+
+=item current_line
+
+Returns the line number of the current position of the parse.
+
+=item current_column
+
+Returns the column number of the current position of the parse.
+
+=item current_byte
+
+Returns the current position of the parse.
+
+=item base([NEWBASE]);
+
+Returns the current value of the base for resolving relative URIs. If
+NEWBASE is supplied, changes the base to that value.
+
+=item context
+
+Returns a list of element names that represent open elements, with the
+last one being the innermost. Inside start and end tag handlers, this
+will be the tag of the parent element.
+
+=item current_element
+
+Returns the name of the innermost currently opened element. Inside
+start or end handlers, returns the parent of the element associated
+with those tags.
+
+=item in_element(NAME)
+
+Returns true if NAME is equal to the name of the innermost currently opened
+element. If namespace processing is being used and you want to check
+against a name that may be in a namespace, then use the generate_ns_name
+method to create the NAME argument.
+
+=item within_element(NAME)
+
+Returns the number of times the given name appears in the context list.
+If namespace processing is being used and you want to check
+against a name that may be in a namespace, then use the generate_ns_name
+method to create the NAME argument.
+
+=item depth
+
+Returns the size of the context list.
+
+=item element_index
+
+Returns an integer that is the depth-first visit order of the current
+element. This will be zero outside of the root element. For example,
+this will return 1 when called from the start handler for the root element
+start tag.
+
+=item skip_until(INDEX)
+
+INDEX is an integer that represents an element index. When this method
+is called, all handlers are suspended until the start tag for an element
+that has an index number equal to INDEX is seen. If a start handler has
+been set, then this is the first tag that the start handler will see
+after skip_until has been called.
+
+
+=item position_in_context(LINES)
+
+Returns a string that shows the current parse position. LINES should be
+an integer >= 0 that represents the number of lines on either side of the
+current parse line to place into the returned string.
+
+=item xml_escape(TEXT [, CHAR [, CHAR ...]])
+
+Returns TEXT with markup characters turned into character entities. Any
+additional characters provided as arguments are also turned into character
+references where found in TEXT.
+
+=item parse (SOURCE)
+
+The SOURCE parameter should either be a string containing the whole XML
+document, or it should be an open IO::Handle. Only a single document
+may be parsed for a given instance of XML::Parser::Expat, so this will croak
+if it's been called previously for this instance.
+
+=item parsestring(XML_DOC_STRING)
+
+Parses the given string as an XML document. Only a single document may be
+parsed for a given instance of XML::Parser::Expat, so this will die if either
+parsestring or parsefile has been called for this instance previously.
+
+This method is deprecated in favor of the parse method.
+
+=item parsefile(FILENAME)
+
+Parses the XML document in the given file. Will die if parsestring or
+parsefile has been called previously for this instance.
+
+=item is_defaulted(ATTNAME)
+
+NO LONGER WORKS. To find out if an attribute is defaulted please use
+the specified_attr method.
+
+=item specified_attr
+
+When the start handler receives lists of attributes and values, the
+non-defaulted (i.e. explicitly specified) attributes occur in the list
+first. This method returns the number of specified items in the list.
+So if this number is equal to the length of the list, there were no
+defaulted values. Otherwise the number points to the index of the
+first defaulted attribute name.
+
+=item finish
+
+Unsets all handlers (including internal ones that set context), but expat
+continues parsing to the end of the document or until it finds an error.
+It should finish up a lot faster than with the handlers set.
+
+=item release
+
+There are data structures used by XML::Parser::Expat that have circular
+references. This means that these structures will never be garbage
+collected unless these references are explicitly broken. Calling this
+method breaks those references (and makes the instance unusable.)
+
+Normally, higher level calls handle this for you, but if you are using
+XML::Parser::Expat directly, then it's your responsibility to call it.
+
+=back
+
+=head2 XML::Parser::ContentModel Methods
+
+The element declaration handlers are passed objects of this class as the
+content model of the element declaration. They also represent content
+particles, components of a content model.
+
+When referred to as a string, these objects are automagicly converted to a
+string representation of the model (or content particle).
+
+=over 4
+
+=item isempty
+
+This method returns true if the object is "EMPTY", false otherwise.
+
+=item isany
+
+This method returns true if the object is "ANY", false otherwise.
+
+=item ismixed
+
+This method returns true if the object is "(#PCDATA)" or "(#PCDATA|...)*",
+false otherwise.
+
+=item isname
+
+This method returns if the object is an element name.
+
+=item ischoice
+
+This method returns true if the object is a choice of content particles.
+
+
+=item isseq
+
+This method returns true if the object is a sequence of content particles.
+
+=item quant
+
+This method returns undef or a string representing the quantifier
+('?', '*', '+') associated with the model or particle.
+
+=item children
+
+This method returns undef or (for mixed, choice, and sequence types)
+an array of component content particles. There will always be at least
+one component for choices and sequences, but for a mixed content model
+of pure PCDATA, "(#PCDATA)", then an undef is returned.
+
+=back
+
+=head2 XML::Parser::ExpatNB Methods
+
+The class XML::Parser::ExpatNB is a subclass of XML::Parser::Expat used
+for non-blocking access to the expat library. It does not support the parse,
+parsestring, or parsefile methods, but it does have these additional methods:
+
+=over 4
+
+=item parse_more(DATA)
+
+Feed expat more text to munch on.
+
+=item parse_done
+
+Tell expat that it's gotten the whole document.
+
+=back
+
+=head1 FUNCTIONS
+
+=over 4
+
+=item XML::Parser::Expat::load_encoding(ENCODING)
+
+Load an external encoding. ENCODING is either the name of an encoding or
+the name of a file. The basename is converted to lowercase and a '.enc'
+extension is appended unless there's one already there. Then, unless
+it's an absolute pathname (i.e. begins with '/'), the first file by that
+name discovered in the @Encoding_Path path list is used.
+
+The encoding in the file is loaded and kept in the %Encoding_Table
+table. Earlier encodings of the same name are replaced.
+
+This function is automatically called by expat when it encounters an encoding
+it doesn't know about. Expat shouldn't call this twice for the same
+encoding name. The only reason users should use this function is to
+explicitly load an encoding not contained in the @Encoding_Path list.
+
+=back
+
+=head1 AUTHORS
+
+Larry Wall <F<larry@wall.org>> wrote version 1.0.
+
+Clark Cooper <F<coopercc@netheaven.com>> picked up support, changed the API
+for this version (2.x), provided documentation, and added some standard
+package features.
+
+=cut
diff --git a/Expat/Expat.xs b/Expat/Expat.xs
new file mode 100644
index 0000000..497b64f
--- /dev/null
+++ b/Expat/Expat.xs
@@ -0,0 +1,2214 @@
+/*****************************************************************
+** Expat.xs
+**
+** Copyright 1998 Larry Wall and Clark Cooper
+** All rights reserved.
+**
+** This program is free software; you can redistribute it and/or
+** modify it under the same terms as Perl itself.
+**
+*/
+
+#include <expat.h>
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+#undef convert
+
+#include "patchlevel.h"
+#include "encoding.h"
+
+
+/* Version 5.005_5x (Development version for 5.006) doesn't like sv_...
+ anymore, but 5.004 doesn't know about PL_sv..
+ Don't want to push up required version just for this. */
+
+#if PATCHLEVEL < 5
+#define PL_sv_undef sv_undef
+#define PL_sv_no sv_no
+#define PL_sv_yes sv_yes
+#define PL_na na
+#endif
+
+#define BUFSIZE 32768
+
+#define NSDELIM '|'
+
+/* Macro to update handler fields. Used in the various handler setting
+ XSUBS */
+
+#define XMLP_UPD(fld) \
+ RETVAL = cbv->fld ? newSVsv(cbv->fld) : &PL_sv_undef;\
+ if (cbv->fld) {\
+ if (cbv->fld != fld)\
+ sv_setsv(cbv->fld, fld);\
+ }\
+ else\
+ cbv->fld = newSVsv(fld)
+
+/* Macro to push old handler value onto return stack. This is done here
+ to get around a bug in 5.004 sv_2mortal function. */
+
+#define PUSHRET \
+ ST(0) = RETVAL;\
+ if (RETVAL != &PL_sv_undef && SvREFCNT(RETVAL)) sv_2mortal(RETVAL)
+
+typedef struct {
+ SV* self_sv;
+ XML_Parser p;
+
+ AV* context;
+ AV* new_prefix_list;
+ HV *nstab;
+ AV *nslst;
+
+ unsigned int st_serial;
+ unsigned int st_serial_stackptr;
+ unsigned int st_serial_stacksize;
+ unsigned int * st_serial_stack;
+
+ unsigned int skip_until;
+
+ SV *recstring;
+ char * delim;
+ STRLEN delimlen;
+
+ unsigned ns:1;
+ unsigned no_expand:1;
+ unsigned parseparam:1;
+
+ /* Callback handlers */
+
+ SV* start_sv;
+ SV* end_sv;
+ SV* char_sv;
+ SV* proc_sv;
+ SV* cmnt_sv;
+ SV* dflt_sv;
+
+ SV* entdcl_sv;
+ SV* eledcl_sv;
+ SV* attdcl_sv;
+ SV* doctyp_sv;
+ SV* doctypfin_sv;
+ SV* xmldec_sv;
+
+ SV* unprsd_sv;
+ SV* notation_sv;
+
+ SV* extent_sv;
+ SV* extfin_sv;
+
+ SV* startcd_sv;
+ SV* endcd_sv;
+} CallbackVector;
+
+
+static HV* EncodingTable = NULL;
+
+static XML_Char nsdelim[] = {NSDELIM, '\0'};
+
+static char *QuantChar[] = {"", "?", "*", "+"};
+
+/* Forward declarations */
+
+static void suspend_callbacks(CallbackVector *);
+static void resume_callbacks(CallbackVector *);
+
+#if PATCHLEVEL < 5 && SUBVERSION < 5
+
+/* ================================================================
+** This is needed where the length is explicitly given. The expat
+** library may sometimes give us zero-length strings. Perl's newSVpv
+** interprets a zero length as a directive to do a strlen. This
+** function is used when we want to force length to mean length, even
+** if zero.
+*/
+
+static SV *
+newSVpvn(char *s, STRLEN len)
+{
+ register SV *sv;
+
+ sv = newSV(0);
+ sv_setpvn(sv, s, len);
+ return sv;
+} /* End newSVpvn */
+
+#define ERRSV GvSV(errgv)
+#endif
+
+#ifdef SvUTF8_on
+
+static SV *
+newUTF8SVpv(char *s, STRLEN len) {
+ register SV *sv;
+
+ sv = newSVpv(s, len);
+ SvUTF8_on(sv);
+ return sv;
+} /* End new UTF8SVpv */
+
+static SV *
+newUTF8SVpvn(char *s, STRLEN len) {
+ register SV *sv;
+
+ sv = newSV(0);
+ sv_setpvn(sv, s, len);
+ SvUTF8_on(sv);
+ return sv;
+}
+
+#else /* SvUTF8_on not defined */
+
+#define newUTF8SVpv newSVpv
+#define newUTF8SVpvn newSVpvn
+
+#endif
+
+static void*
+mymalloc(size_t size) {
+#ifndef LEAKTEST
+ return safemalloc(size);
+#else
+ return safexmalloc(328,size);
+#endif
+}
+
+static void*
+myrealloc(void *p, size_t s) {
+#ifndef LEAKTEST
+ return saferealloc(p, s);
+#else
+ return safexrealloc(p, s);
+#endif
+}
+
+static void
+myfree(void *p) {
+ Safefree(p);
+}
+
+static XML_Memory_Handling_Suite ms = {mymalloc, myrealloc, myfree};
+
+static void
+append_error(XML_Parser parser, char * err)
+{
+ dSP;
+ CallbackVector * cbv;
+ SV ** errstr;
+
+ cbv = (CallbackVector*) XML_GetUserData(parser);
+ errstr = hv_fetch((HV*)SvRV(cbv->self_sv),
+ "ErrorMessage", 12, 0);
+
+ if (errstr && SvPOK(*errstr)) {
+ SV ** errctx = hv_fetch((HV*) SvRV(cbv->self_sv),
+ "ErrorContext", 12, 0);
+ int dopos = !err && errctx && SvOK(*errctx);
+
+ if (! err)
+ err = (char *) XML_ErrorString(XML_GetErrorCode(parser));
+
+ sv_catpvf(*errstr, "\n%s at line %d, column %d, byte %d%s",
+ err,
+ XML_GetCurrentLineNumber(parser),
+ XML_GetCurrentColumnNumber(parser),
+ XML_GetCurrentByteIndex(parser),
+ dopos ? ":\n" : "");
+
+ if (dopos)
+ {
+ int count;
+
+ ENTER ;
+ SAVETMPS ;
+ PUSHMARK(sp);
+ XPUSHs(cbv->self_sv);
+ XPUSHs(*errctx);
+ PUTBACK ;
+
+ count = perl_call_method("position_in_context", G_SCALAR);
+
+ SPAGAIN ;
+
+ if (count >= 1) {
+ sv_catsv(*errstr, POPs);
+ }
+
+ PUTBACK ;
+ FREETMPS ;
+ LEAVE ;
+ }
+ }
+} /* End append_error */
+
+static SV *
+generate_model(XML_Content *model) {
+ HV * hash = newHV();
+ SV * obj = newRV_noinc((SV *) hash);
+
+ sv_bless(obj, gv_stashpv("XML::Parser::ContentModel", 1));
+
+ hv_store(hash, "Type", 4, newSViv(model->type), 0);
+ if (model->quant != XML_CQUANT_NONE) {
+ hv_store(hash, "Quant", 5, newSVpv(QuantChar[model->quant], 1), 0);
+ }
+
+ switch(model->type) {
+ case XML_CTYPE_NAME:
+ hv_store(hash, "Tag", 3, newUTF8SVpv((char *)model->name, 0), 0);
+ break;
+
+ case XML_CTYPE_MIXED:
+ case XML_CTYPE_CHOICE:
+ case XML_CTYPE_SEQ:
+ if (model->children && model->numchildren)
+ {
+ AV * children = newAV();
+ int i;
+
+ for (i = 0; i < model->numchildren; i++) {
+ av_push(children, generate_model(&model->children[i]));
+ }
+
+ hv_store(hash, "Children", 8, newRV_noinc((SV *) children), 0);
+ }
+ break;
+ }
+
+ return obj;
+} /* End generate_model */
+
+static int
+parse_stream(XML_Parser parser, SV * ioref)
+{
+ dSP;
+ SV * tbuff;
+ SV * tsiz;
+ char * linebuff;
+ STRLEN lblen;
+ STRLEN br = 0;
+ int buffsize;
+ int done = 0;
+ int ret = 1;
+ char * msg = NULL;
+ CallbackVector * cbv;
+ char *buff = (char *) 0;
+
+ cbv = (CallbackVector*) XML_GetUserData(parser);
+
+ ENTER;
+ SAVETMPS;
+
+ if (cbv->delim) {
+ int cnt;
+ SV * tline;
+
+ PUSHMARK(SP);
+ XPUSHs(ioref);
+ PUTBACK ;
+
+ cnt = perl_call_method("getline", G_SCALAR);
+
+ SPAGAIN;
+
+ if (cnt != 1)
+ croak("getline method call failed");
+
+ tline = POPs;
+
+ if (! SvOK(tline)) {
+ lblen = 0;
+ }
+ else {
+ char * chk;
+ linebuff = SvPV(tline, lblen);
+ chk = &linebuff[lblen - cbv->delimlen - 1];
+
+ if (lblen > cbv->delimlen + 1
+ && *chk == *cbv->delim
+ && chk[cbv->delimlen] == '\n'
+ && strnEQ(++chk, cbv->delim + 1, cbv->delimlen - 1))
+ lblen -= cbv->delimlen + 1;
+ }
+
+ PUTBACK ;
+ buffsize = lblen;
+ done = lblen == 0;
+ }
+ else {
+ tbuff = newSV(0);
+ tsiz = newSViv(BUFSIZE);
+ buffsize = BUFSIZE;
+ }
+
+ while (! done)
+ {
+ char *buffer = XML_GetBuffer(parser, buffsize);
+
+ if (! buffer)
+ croak("Ran out of memory for input buffer");
+
+ SAVETMPS;
+
+ if (cbv->delim) {
+ Copy(linebuff, buffer, lblen, char);
+ br = lblen;
+ done = 1;
+ }
+ else {
+ int cnt;
+ SV * rdres;
+ char * tb;
+
+ PUSHMARK(SP);
+ EXTEND(SP, 3);
+ PUSHs(ioref);
+ PUSHs(tbuff);
+ PUSHs(tsiz);
+ PUTBACK ;
+
+ cnt = perl_call_method("read", G_SCALAR);
+
+ SPAGAIN ;
+
+ if (cnt != 1)
+ croak("read method call failed");
+
+ rdres = POPs;
+
+ if (! SvOK(rdres))
+ croak("read error");
+
+ tb = SvPV(tbuff, br);
+ if (br > 0)
+ Copy(tb, buffer, br, char);
+ else
+ done = 1;
+
+ PUTBACK ;
+ }
+
+ ret = XML_ParseBuffer(parser, br, done);
+
+ SPAGAIN; /* resync local SP in case callbacks changed global stack */
+
+ if (! ret)
+ break;
+
+ FREETMPS;
+ }
+
+ if (! ret)
+ append_error(parser, msg);
+
+ if (! cbv->delim) {
+ SvREFCNT_dec(tsiz);
+ SvREFCNT_dec(tbuff);
+ }
+
+ FREETMPS;
+ LEAVE;
+
+ return ret;
+} /* End parse_stream */
+
+static SV *
+gen_ns_name(const char * name, HV * ns_table, AV * ns_list)
+{
+ char *pos = strchr(name, NSDELIM);
+ SV * ret;
+
+ if (pos && pos > name)
+ {
+ SV ** name_ent = hv_fetch(ns_table, (char *) name,
+ pos - name, TRUE);
+ ret = newUTF8SVpv(&pos[1], 0);
+
+ if (name_ent)
+ {
+ int index;
+
+ if (SvOK(*name_ent))
+ {
+ index = SvIV(*name_ent);
+ }
+ else
+ {
+ av_push(ns_list, newUTF8SVpv((char *) name, pos - name));
+ index = av_len(ns_list);
+ sv_setiv(*name_ent, (IV) index);
+ }
+
+ sv_setiv(ret, (IV) index);
+ SvPOK_on(ret);
+ }
+ }
+ else
+ ret = newUTF8SVpv((char *) name, 0);
+
+ return ret;
+} /* End gen_ns_name */
+
+static void
+characterData(void *userData, const char *s, int len)
+{
+ dSP;
+ CallbackVector* cbv = (CallbackVector*) userData;
+
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(sp);
+ EXTEND(sp, 2);
+ PUSHs(cbv->self_sv);
+ PUSHs(sv_2mortal(newUTF8SVpvn((char*)s,len)));
+ PUTBACK;
+ perl_call_sv(cbv->char_sv, G_DISCARD);
+
+ FREETMPS;
+ LEAVE;
+} /* End characterData */
+
+static void
+startElement(void *userData, const char *name, const char **atts)
+{
+ dSP;
+ CallbackVector* cbv = (CallbackVector*) userData;
+ SV ** pcontext;
+ unsigned do_ns = cbv->ns;
+ unsigned skipping = 0;
+ SV ** pnstab;
+ SV ** pnslst;
+ SV * elname;
+
+ cbv->st_serial++;
+
+ if (cbv->skip_until) {
+ skipping = cbv->st_serial < cbv->skip_until;
+ if (! skipping) {
+ resume_callbacks(cbv);
+ cbv->skip_until = 0;
+ }
+ }
+
+ if (cbv->st_serial_stackptr >= cbv->st_serial_stacksize) {
+ unsigned int newsize = cbv->st_serial_stacksize + 512;
+
+ Renew(cbv->st_serial_stack, newsize, unsigned int);
+ cbv->st_serial_stacksize = newsize;
+ }
+
+ cbv->st_serial_stack[++cbv->st_serial_stackptr] = cbv->st_serial;
+
+ if (do_ns)
+ elname = gen_ns_name(name, cbv->nstab, cbv->nslst);
+ else
+ elname = newUTF8SVpv((char *)name, 0);
+
+ if (! skipping && SvTRUE(cbv->start_sv))
+ {
+ const char **attlim = atts;
+
+ while (*attlim)
+ attlim++;
+
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(sp);
+ EXTEND(sp, attlim - atts + 2);
+ PUSHs(cbv->self_sv);
+ PUSHs(elname);
+ while (*atts)
+ {
+ SV * attname;
+
+ attname = (do_ns ? gen_ns_name(*atts, cbv->nstab, cbv->nslst)
+ : newUTF8SVpv((char *) *atts, 0));
+
+ atts++;
+ PUSHs(sv_2mortal(attname));
+ if (*atts)
+ PUSHs(sv_2mortal(newUTF8SVpv((char*)*atts++,0)));
+ }
+ PUTBACK;
+ perl_call_sv(cbv->start_sv, G_DISCARD);
+
+ FREETMPS;
+ LEAVE;
+ }
+
+ av_push(cbv->context, elname);
+
+ if (cbv->ns) {
+ av_clear(cbv->new_prefix_list);
+ }
+} /* End startElement */
+
+static void
+endElement(void *userData, const char *name)
+{
+ dSP;
+ CallbackVector* cbv = (CallbackVector*) userData;
+ SV *elname;
+
+ elname = av_pop(cbv->context);
+
+ if (! cbv->st_serial_stackptr) {
+ croak("endElement: Start tag serial number stack underflow");
+ }
+
+ if (! cbv->skip_until && SvTRUE(cbv->end_sv))
+ {
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(sp);
+ EXTEND(sp, 2);
+ PUSHs(cbv->self_sv);
+ PUSHs(elname);
+ PUTBACK;
+ perl_call_sv(cbv->end_sv, G_DISCARD);
+
+ FREETMPS;
+ LEAVE;
+ }
+
+ cbv->st_serial_stackptr--;
+
+ SvREFCNT_dec(elname);
+} /* End endElement */
+
+static void
+processingInstruction(void *userData, const char *target, const char *data)
+{
+ dSP;
+ CallbackVector* cbv = (CallbackVector*) userData;
+
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(sp);
+ EXTEND(sp, 3);
+ PUSHs(cbv->self_sv);
+ PUSHs(sv_2mortal(newUTF8SVpv((char*)target,0)));
+ PUSHs(sv_2mortal(newUTF8SVpv((char*)data,0)));
+ PUTBACK;
+ perl_call_sv(cbv->proc_sv, G_DISCARD);
+
+ FREETMPS;
+ LEAVE;
+} /* End processingInstruction */
+
+static void
+commenthandle(void *userData, const char *string)
+{
+ dSP;
+ CallbackVector * cbv = (CallbackVector*) userData;
+
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(sp);
+ EXTEND(sp, 2);
+ PUSHs(cbv->self_sv);
+ PUSHs(sv_2mortal(newUTF8SVpv((char*) string, 0)));
+ PUTBACK;
+ perl_call_sv(cbv->cmnt_sv, G_DISCARD);
+
+ FREETMPS;
+ LEAVE;
+} /* End commenthandler */
+
+static void
+startCdata(void *userData)
+{
+ dSP;
+ CallbackVector* cbv = (CallbackVector*) userData;
+
+ if (cbv->startcd_sv) {
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(sp);
+ XPUSHs(cbv->self_sv);
+ PUTBACK;
+ perl_call_sv(cbv->startcd_sv, G_DISCARD);
+
+ FREETMPS;
+ LEAVE;
+ }
+} /* End startCdata */
+
+static void
+endCdata(void *userData)
+{
+ dSP;
+ CallbackVector* cbv = (CallbackVector*) userData;
+
+ if (cbv->endcd_sv) {
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(sp);
+ XPUSHs(cbv->self_sv);
+ PUTBACK;
+ perl_call_sv(cbv->endcd_sv, G_DISCARD);
+
+ FREETMPS;
+ LEAVE;
+ }
+} /* End endCdata */
+
+static void
+nsStart(void *userdata, const XML_Char *prefix, const XML_Char *uri){
+ dSP;
+ CallbackVector* cbv = (CallbackVector*) userdata;
+
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(sp);
+ EXTEND(sp, 3);
+ PUSHs(cbv->self_sv);
+ PUSHs(prefix ? sv_2mortal(newUTF8SVpv((char *)prefix, 0)) : &PL_sv_undef);
+ PUSHs(uri ? sv_2mortal(newUTF8SVpv((char *)uri, 0)) : &PL_sv_undef);
+ PUTBACK;
+ perl_call_method("NamespaceStart", G_DISCARD);
+
+ FREETMPS;
+ LEAVE;
+} /* End nsStart */
+
+static void
+nsEnd(void *userdata, const XML_Char *prefix) {
+ dSP;
+ CallbackVector* cbv = (CallbackVector*) userdata;
+
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(sp);
+ EXTEND(sp, 2);
+ PUSHs(cbv->self_sv);
+ PUSHs(prefix ? sv_2mortal(newUTF8SVpv((char *)prefix, 0)) : &PL_sv_undef);
+ PUTBACK;
+ perl_call_method("NamespaceEnd", G_DISCARD);
+
+ FREETMPS;
+ LEAVE;
+} /* End nsEnd */
+
+static void
+defaulthandle(void *userData, const char *string, int len)
+{
+ dSP;
+ CallbackVector* cbv = (CallbackVector*) userData;
+
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(sp);
+ EXTEND(sp, 2);
+ PUSHs(cbv->self_sv);
+ PUSHs(sv_2mortal(newUTF8SVpvn((char*)string, len)));
+ PUTBACK;
+ perl_call_sv(cbv->dflt_sv, G_DISCARD);
+
+ FREETMPS;
+ LEAVE;
+} /* End defaulthandle */
+
+static void
+elementDecl(void *data,
+ const char *name,
+ XML_Content *model) {
+ dSP;
+ CallbackVector *cbv = (CallbackVector*) data;
+ SV *cmod;
+
+ ENTER;
+ SAVETMPS;
+
+
+ cmod = generate_model(model);
+
+ Safefree(model);
+ PUSHMARK(sp);
+ EXTEND(sp, 3);
+ PUSHs(cbv->self_sv);
+ PUSHs(sv_2mortal(newUTF8SVpv((char *)name, 0)));
+ PUSHs(sv_2mortal(cmod));
+ PUTBACK;
+ perl_call_sv(cbv->eledcl_sv, G_DISCARD);
+ FREETMPS;
+ LEAVE;
+
+} /* End elementDecl */
+
+static void
+attributeDecl(void *data,
+ const char * elname,
+ const char * attname,
+ const char * att_type,
+ const char * dflt,
+ int reqorfix) {
+ dSP;
+ CallbackVector *cbv = (CallbackVector*) data;
+ SV * dfltsv;
+
+ if (dflt) {
+ dfltsv = newUTF8SVpv("'", 1);
+ sv_catpv(dfltsv, (char *) dflt);
+ sv_catpv(dfltsv, "'");
+ }
+ else {
+ dfltsv = newUTF8SVpv(reqorfix ? "#REQUIRED" : "#IMPLIED", 0);
+ }
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(sp);
+ EXTEND(sp, 5);
+ PUSHs(cbv->self_sv);
+ PUSHs(sv_2mortal(newUTF8SVpv((char *)elname, 0)));
+ PUSHs(sv_2mortal(newUTF8SVpv((char *)attname, 0)));
+ PUSHs(sv_2mortal(newUTF8SVpv((char *)att_type, 0)));
+ PUSHs(sv_2mortal(dfltsv));
+ if (dflt && reqorfix)
+ XPUSHs(&PL_sv_yes);
+ PUTBACK;
+ perl_call_sv(cbv->attdcl_sv, G_DISCARD);
+
+ FREETMPS;
+ LEAVE;
+} /* End attributeDecl */
+
+static void
+entityDecl(void *data,
+ const char *name,
+ int isparam,
+ const char *value,
+ int vlen,
+ const char *base,
+ const char *sysid,
+ const char *pubid,
+ const char *notation) {
+ dSP;
+ CallbackVector *cbv = (CallbackVector*) data;
+
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(sp);
+ EXTEND(sp, 6);
+ PUSHs(cbv->self_sv);
+ PUSHs(sv_2mortal(newUTF8SVpv((char*)name, 0)));
+ PUSHs(value ? sv_2mortal(newUTF8SVpvn((char*)value, vlen)) : &PL_sv_undef);
+ PUSHs(sysid ? sv_2mortal(newUTF8SVpv((char *)sysid, 0)) : &PL_sv_undef);
+ PUSHs(pubid ? sv_2mortal(newUTF8SVpv((char *)pubid, 0)) : &PL_sv_undef);
+ PUSHs(notation ? sv_2mortal(newUTF8SVpv((char *)notation, 0)) : &PL_sv_undef);
+ if (isparam)
+ XPUSHs(&PL_sv_yes);
+ PUTBACK;
+ perl_call_sv(cbv->entdcl_sv, G_DISCARD);
+
+ FREETMPS;
+ LEAVE;
+} /* End entityDecl */
+
+static void
+doctypeStart(void *userData,
+ const char* name,
+ const char* sysid,
+ const char* pubid,
+ int hasinternal) {
+ dSP;
+ CallbackVector *cbv = (CallbackVector*) userData;
+
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(sp);
+ EXTEND(sp, 5);
+ PUSHs(cbv->self_sv);
+ PUSHs(sv_2mortal(newUTF8SVpv((char*)name, 0)));
+ PUSHs(sysid ? sv_2mortal(newUTF8SVpv((char*)sysid, 0)) : &PL_sv_undef);
+ PUSHs(pubid ? sv_2mortal(newUTF8SVpv((char*)pubid, 0)) : &PL_sv_undef);
+ PUSHs(hasinternal ? &PL_sv_yes : &PL_sv_no);
+ PUTBACK;
+ perl_call_sv(cbv->doctyp_sv, G_DISCARD);
+ FREETMPS;
+ LEAVE;
+} /* End doctypeStart */
+
+static void
+doctypeEnd(void *userData) {
+ dSP;
+ CallbackVector *cbv = (CallbackVector*) userData;
+
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(sp);
+ EXTEND(sp, 1);
+ PUSHs(cbv->self_sv);
+ PUTBACK;
+ perl_call_sv(cbv->doctypfin_sv, G_DISCARD);
+ FREETMPS;
+ LEAVE;
+} /* End doctypeEnd */
+
+static void
+xmlDecl(void *userData,
+ const char *version,
+ const char *encoding,
+ int standalone) {
+ dSP;
+ CallbackVector *cbv = (CallbackVector*) userData;
+
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(sp);
+ EXTEND(sp, 4);
+ PUSHs(cbv->self_sv);
+ PUSHs(version ? sv_2mortal(newUTF8SVpv((char *)version, 0))
+ : &PL_sv_undef);
+ PUSHs(encoding ? sv_2mortal(newUTF8SVpv((char *)encoding, 0))
+ : &PL_sv_undef);
+ PUSHs(standalone == -1 ? &PL_sv_undef
+ : (standalone ? &PL_sv_yes : &PL_sv_no));
+ PUTBACK;
+ perl_call_sv(cbv->xmldec_sv, G_DISCARD);
+ FREETMPS;
+ LEAVE;
+} /* End xmlDecl */
+
+static void
+unparsedEntityDecl(void *userData,
+ const char* entity,
+ const char* base,
+ const char* sysid,
+ const char* pubid,
+ const char* notation)
+{
+ dSP;
+ CallbackVector* cbv = (CallbackVector*) userData;
+
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(sp);
+ EXTEND(sp, 6);
+ PUSHs(cbv->self_sv);
+ PUSHs(sv_2mortal(newUTF8SVpv((char*) entity, 0)));
+ PUSHs(base ? sv_2mortal(newUTF8SVpv((char*) base, 0)) : &PL_sv_undef);
+ PUSHs(sv_2mortal(newUTF8SVpv((char*) sysid, 0)));
+ PUSHs(pubid ? sv_2mortal(newUTF8SVpv((char*) pubid, 0)) : &PL_sv_undef);
+ PUSHs(sv_2mortal(newUTF8SVpv((char*) notation, 0)));
+ PUTBACK;
+ perl_call_sv(cbv->unprsd_sv, G_DISCARD);
+
+ FREETMPS;
+ LEAVE;
+} /* End unparsedEntityDecl */
+
+static void
+notationDecl(void *userData,
+ const char *name,
+ const char *base,
+ const char *sysid,
+ const char *pubid)
+{
+ dSP;
+ CallbackVector* cbv = (CallbackVector*) userData;
+
+ PUSHMARK(sp);
+ XPUSHs(cbv->self_sv);
+ XPUSHs(sv_2mortal(newUTF8SVpv((char*) name, 0)));
+ if (base)
+ {
+ XPUSHs(sv_2mortal(newUTF8SVpv((char *) base, 0)));
+ }
+ else if (sysid || pubid)
+ {
+ XPUSHs(&PL_sv_undef);
+ }
+
+ if (sysid)
+ {
+ XPUSHs(sv_2mortal(newUTF8SVpv((char *) sysid, 0)));
+ }
+ else if (pubid)
+ {
+ XPUSHs(&PL_sv_undef);
+ }
+
+ if (pubid)
+ XPUSHs(sv_2mortal(newUTF8SVpv((char *) pubid, 0)));
+
+ PUTBACK;
+ perl_call_sv(cbv->notation_sv, G_DISCARD);
+} /* End notationDecl */
+
+static int
+externalEntityRef(XML_Parser parser,
+ const char* open,
+ const char* base,
+ const char* sysid,
+ const char* pubid)
+{
+ dSP;
+#if defined(USE_THREADS) && PATCHLEVEL==6
+ dTHX;
+#endif
+
+ int count;
+ int ret = 0;
+ int parse_done = 0;
+
+ CallbackVector* cbv = (CallbackVector*) XML_GetUserData(parser);
+
+ if (! cbv->extent_sv)
+ return 0;
+
+ ENTER ;
+ SAVETMPS ;
+ PUSHMARK(sp);
+ EXTEND(sp, pubid ? 4 : 3);
+ PUSHs(cbv->self_sv);
+ PUSHs(base ? sv_2mortal(newUTF8SVpv((char*) base, 0)) : &PL_sv_undef);
+ PUSHs(sv_2mortal(newSVpv((char*) sysid, 0)));
+ if (pubid)
+ PUSHs(sv_2mortal(newUTF8SVpv((char*) pubid, 0)));
+ PUTBACK ;
+ count = perl_call_sv(cbv->extent_sv, G_SCALAR);
+
+ SPAGAIN ;
+
+ if (count >= 1) {
+ SV * result = POPs;
+ int type;
+
+ if (result && (type = SvTYPE(result)) > 0) {
+ SV **pval = hv_fetch((HV*) SvRV(cbv->self_sv), "Parser", 6, 0);
+
+ if (! pval || ! SvIOK(*pval))
+ append_error(parser, "Can't find parser entry in XML::Parser object");
+ else {
+ XML_Parser entpar;
+ char *errmsg = (char *) 0;
+
+ entpar = XML_ExternalEntityParserCreate(parser, open, 0);
+
+ XML_SetBase(entpar, XML_GetBase(parser));
+
+ sv_setiv(*pval, (IV) entpar);
+
+ cbv->p = entpar;
+
+ PUSHMARK(sp);
+ EXTEND(sp, 2);
+ PUSHs(*pval);
+ PUSHs(result);
+ PUTBACK;
+ count = perl_call_pv("XML::Parser::Expat::Do_External_Parse",
+ G_SCALAR | G_EVAL);
+ SPAGAIN;
+
+ if (SvTRUE(ERRSV)) {
+ char *hold;
+ STRLEN len;
+
+ POPs;
+ hold = SvPV(ERRSV, len);
+ New(326, errmsg, len + 1, char);
+ if (len)
+ Copy(hold, errmsg, len, char);
+ goto Extparse_Cleanup;
+ }
+
+ if (count > 0)
+ ret = POPi;
+
+ parse_done = 1;
+
+ Extparse_Cleanup:
+ cbv->p = parser;
+ sv_setiv(*pval, (IV) parser);
+ XML_ParserFree(entpar);
+
+ if (cbv->extfin_sv) {
+ PUSHMARK(sp);
+ PUSHs(cbv->self_sv);
+ PUTBACK;
+ perl_call_sv(cbv->extfin_sv, G_DISCARD);
+ SPAGAIN;
+ }
+
+ if (SvTRUE(ERRSV))
+ append_error(parser, SvPV(ERRSV, PL_na));
+ }
+ }
+ }
+
+ if (! ret && ! parse_done)
+ append_error(parser, "Handler couldn't resolve external entity");
+
+ PUTBACK ;
+ FREETMPS ;
+ LEAVE ;
+
+ return ret;
+} /* End externalEntityRef */
+
+/*================================================================
+** This is the function that expat calls to convert multi-byte sequences
+** for external encodings. Each byte in the sequence is used to index
+** into the current map to either set the next map or, in the case of
+** the final byte, to get the corresponding Unicode scalar, which is
+** returned.
+*/
+
+static int
+convert_to_unicode(void *data, const char *seq) {
+ Encinfo *enc = (Encinfo *) data;
+ PrefixMap *curpfx;
+ int count;
+ int index = 0;
+
+ for (count = 0; count < 4; count++) {
+ unsigned char byte = (unsigned char) seq[count];
+ unsigned char bndx;
+ unsigned char bmsk;
+ int offset;
+
+ curpfx = &enc->prefixes[index];
+ offset = ((int) byte) - curpfx->min;
+ if (offset < 0)
+ break;
+ if (offset >= curpfx->len && curpfx->len != 0)
+ break;
+
+ bndx = byte >> 3;
+ bmsk = 1 << (byte & 0x7);
+
+ if (curpfx->ispfx[bndx] & bmsk) {
+ index = enc->bytemap[curpfx->bmap_start + offset];
+ }
+ else if (curpfx->ischar[bndx] & bmsk) {
+ return enc->bytemap[curpfx->bmap_start + offset];
+ }
+ else
+ break;
+ }
+
+ return -1;
+} /* End convert_to_unicode */
+
+static int
+unknownEncoding(void *unused, const char *name, XML_Encoding *info)
+{
+ SV ** encinfptr;
+ Encinfo *enc;
+ int namelen;
+ int i;
+ char buff[42];
+
+ namelen = strlen(name);
+ if (namelen > 40)
+ return 0;
+
+ /* Make uppercase */
+ for (i = 0; i < namelen; i++) {
+ char c = name[i];
+ if (c >= 'a' && c <= 'z')
+ c -= 'a' - 'A';
+ buff[i] = c;
+ }
+
+ if (! EncodingTable) {
+ EncodingTable = perl_get_hv("XML::Parser::Expat::Encoding_Table", FALSE);
+ if (! EncodingTable)
+ croak("Can't find XML::Parser::Expat::Encoding_Table");
+ }
+
+ encinfptr = hv_fetch(EncodingTable, buff, namelen, 0);
+
+ if (! encinfptr || ! SvOK(*encinfptr)) {
+ /* Not found, so try to autoload */
+ dSP;
+ int count;
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(sp);
+ XPUSHs(sv_2mortal(newSVpvn(buff,namelen)));
+ PUTBACK;
+ perl_call_pv("XML::Parser::Expat::load_encoding", G_DISCARD);
+
+ encinfptr = hv_fetch(EncodingTable, buff, namelen, 0);
+ FREETMPS;
+ LEAVE;
+
+ if (! encinfptr || ! SvOK(*encinfptr))
+ return 0;
+ }
+
+ if (! sv_derived_from(*encinfptr, "XML::Parser::Encinfo"))
+ croak("Entry in XML::Parser::Expat::Encoding_Table not an Encinfo object");
+
+ enc = (Encinfo *) SvIV((SV*)SvRV(*encinfptr));
+ Copy(enc->firstmap, info->map, 256, int);
+ info->release = NULL;
+ if (enc->prefixes_size) {
+ info->data = (void *) enc;
+ info->convert = convert_to_unicode;
+ }
+ else {
+ info->data = NULL;
+ info->convert = NULL;
+ }
+
+ return 1;
+} /* End unknownEncoding */
+
+
+static void
+recString(void *userData, const char *string, int len)
+{
+ CallbackVector *cbv = (CallbackVector*) userData;
+
+ if (cbv->recstring) {
+ sv_catpvn(cbv->recstring, (char *) string, len);
+ }
+ else {
+ cbv->recstring = newUTF8SVpvn((char *) string, len);
+ }
+} /* End recString */
+
+static void
+suspend_callbacks(CallbackVector *cbv) {
+ if (SvTRUE(cbv->char_sv)) {
+ XML_SetCharacterDataHandler(cbv->p,
+ (XML_CharacterDataHandler) 0);
+ }
+
+ if (SvTRUE(cbv->proc_sv)) {
+ XML_SetProcessingInstructionHandler(cbv->p,
+ (XML_ProcessingInstructionHandler) 0);
+ }
+
+ if (SvTRUE(cbv->cmnt_sv)) {
+ XML_SetCommentHandler(cbv->p,
+ (XML_CommentHandler) 0);
+ }
+
+ if (SvTRUE(cbv->startcd_sv)
+ || SvTRUE(cbv->endcd_sv)) {
+ XML_SetCdataSectionHandler(cbv->p,
+ (XML_StartCdataSectionHandler) 0,
+ (XML_EndCdataSectionHandler) 0);
+ }
+
+ if (SvTRUE(cbv->unprsd_sv)) {
+ XML_SetUnparsedEntityDeclHandler(cbv->p,
+ (XML_UnparsedEntityDeclHandler) 0);
+ }
+
+ if (SvTRUE(cbv->notation_sv)) {
+ XML_SetNotationDeclHandler(cbv->p,
+ (XML_NotationDeclHandler) 0);
+ }
+
+ if (SvTRUE(cbv->extent_sv)) {
+ XML_SetExternalEntityRefHandler(cbv->p,
+ (XML_ExternalEntityRefHandler) 0);
+ }
+
+} /* End suspend_callbacks */
+
+static void
+resume_callbacks(CallbackVector *cbv) {
+ if (SvTRUE(cbv->char_sv)) {
+ XML_SetCharacterDataHandler(cbv->p, characterData);
+ }
+
+ if (SvTRUE(cbv->proc_sv)) {
+ XML_SetProcessingInstructionHandler(cbv->p, processingInstruction);
+ }
+
+ if (SvTRUE(cbv->cmnt_sv)) {
+ XML_SetCommentHandler(cbv->p, commenthandle);
+ }
+
+ if (SvTRUE(cbv->startcd_sv)
+ || SvTRUE(cbv->endcd_sv)) {
+ XML_SetCdataSectionHandler(cbv->p, startCdata, endCdata);
+ }
+
+ if (SvTRUE(cbv->unprsd_sv)) {
+ XML_SetUnparsedEntityDeclHandler(cbv->p, unparsedEntityDecl);
+ }
+
+ if (SvTRUE(cbv->notation_sv)) {
+ XML_SetNotationDeclHandler(cbv->p, notationDecl);
+ }
+
+ if (SvTRUE(cbv->extent_sv)) {
+ XML_SetExternalEntityRefHandler(cbv->p, externalEntityRef);
+ }
+
+} /* End resume_callbacks */
+
+
+MODULE = XML::Parser::Expat PACKAGE = XML::Parser::Expat PREFIX = XML_
+
+XML_Parser
+XML_ParserCreate(self_sv, enc_sv, namespaces)
+ SV * self_sv
+ SV * enc_sv
+ int namespaces
+ CODE:
+ {
+ CallbackVector *cbv;
+ enum XML_ParamEntityParsing pep = XML_PARAM_ENTITY_PARSING_NEVER;
+ char *enc = (char *) (SvTRUE(enc_sv) ? SvPV(enc_sv,PL_na) : 0);
+ SV ** spp;
+
+ Newz(320, cbv, 1, CallbackVector);
+ cbv->self_sv = SvREFCNT_inc(self_sv);
+ Newz(325, cbv->st_serial_stack, 1024, unsigned int);
+ spp = hv_fetch((HV*)SvRV(cbv->self_sv), "NoExpand", 8, 0);
+ if (spp && SvTRUE(*spp))
+ cbv->no_expand = 1;
+
+ spp = hv_fetch((HV*)SvRV(cbv->self_sv), "Context", 7, 0);
+ if (! spp || ! *spp || !SvROK(*spp))
+ croak("XML::Parser instance missing Context");
+
+ cbv->context = (AV*) SvRV(*spp);
+
+ cbv->ns = (unsigned) namespaces;
+ if (namespaces)
+ {
+ spp = hv_fetch((HV*)SvRV(cbv->self_sv), "New_Prefixes", 12, 0);
+ if (! spp || ! *spp || !SvROK(*spp))
+ croak("XML::Parser instance missing New_Prefixes");
+
+ cbv->new_prefix_list = (AV *) SvRV(*spp);
+
+ spp = hv_fetch((HV*)SvRV(cbv->self_sv), "Namespace_Table",
+ 15, FALSE);
+ if (! spp || ! *spp || !SvROK(*spp))
+ croak("XML::Parser instance missing Namespace_Table");
+
+ cbv->nstab = (HV *) SvRV(*spp);
+
+ spp = hv_fetch((HV*)SvRV(cbv->self_sv), "Namespace_List",
+ 14, FALSE);
+ if (! spp || ! *spp || !SvROK(*spp))
+ croak("XML::Parser instance missing Namespace_List");
+
+ cbv->nslst = (AV *) SvRV(*spp);
+
+ RETVAL = XML_ParserCreate_MM(enc, &ms, nsdelim);
+ XML_SetNamespaceDeclHandler(RETVAL,nsStart, nsEnd);
+ }
+ else
+ {
+ RETVAL = XML_ParserCreate_MM(enc, &ms, NULL);
+ }
+
+ cbv->p = RETVAL;
+ XML_SetUserData(RETVAL, (void *) cbv);
+ XML_SetElementHandler(RETVAL, startElement, endElement);
+ XML_SetUnknownEncodingHandler(RETVAL, unknownEncoding, 0);
+
+ spp = hv_fetch((HV*)SvRV(cbv->self_sv), "ParseParamEnt",
+ 13, FALSE);
+
+ if (spp && SvTRUE(*spp)) {
+ pep = XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE;
+ cbv->parseparam = 1;
+ }
+
+ XML_SetParamEntityParsing(RETVAL, pep);
+ }
+ OUTPUT:
+ RETVAL
+
+void
+XML_ParserRelease(parser)
+ XML_Parser parser
+ CODE:
+ {
+ CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser);
+
+ SvREFCNT_dec(cbv->self_sv);
+ }
+
+void
+XML_ParserFree(parser)
+ XML_Parser parser
+ CODE:
+ {
+ CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser);
+
+ Safefree(cbv->st_serial_stack);
+
+
+ /* Clean up any SVs that we have */
+ /* (Note that self_sv must already be taken care of
+ or we couldn't be here */
+
+ if (cbv->recstring)
+ SvREFCNT_dec(cbv->recstring);
+
+ if (cbv->start_sv)
+ SvREFCNT_dec(cbv->start_sv);
+
+ if (cbv->end_sv)
+ SvREFCNT_dec(cbv->end_sv);
+
+ if (cbv->char_sv)
+ SvREFCNT_dec(cbv->char_sv);
+
+ if (cbv->proc_sv)
+ SvREFCNT_dec(cbv->proc_sv);
+
+ if (cbv->cmnt_sv)
+ SvREFCNT_dec(cbv->cmnt_sv);
+
+ if (cbv->dflt_sv)
+ SvREFCNT_dec(cbv->dflt_sv);
+
+ if (cbv->entdcl_sv)
+ SvREFCNT_dec(cbv->entdcl_sv);
+
+ if (cbv->eledcl_sv)
+ SvREFCNT_dec(cbv->eledcl_sv);
+
+ if (cbv->attdcl_sv)
+ SvREFCNT_dec(cbv->attdcl_sv);
+
+ if (cbv->doctyp_sv)
+ SvREFCNT_dec(cbv->doctyp_sv);
+
+ if (cbv->doctypfin_sv)
+ SvREFCNT_dec(cbv->doctypfin_sv);
+
+ if (cbv->xmldec_sv)
+ SvREFCNT_dec(cbv->xmldec_sv);
+
+ if (cbv->unprsd_sv)
+ SvREFCNT_dec(cbv->unprsd_sv);
+
+ if (cbv->notation_sv)
+ SvREFCNT_dec(cbv->notation_sv);
+
+ if (cbv->extent_sv)
+ SvREFCNT_dec(cbv->extent_sv);
+
+ if (cbv->extfin_sv)
+ SvREFCNT_dec(cbv->extfin_sv);
+
+ if (cbv->startcd_sv)
+ SvREFCNT_dec(cbv->startcd_sv);
+
+ if (cbv->endcd_sv)
+ SvREFCNT_dec(cbv->endcd_sv);
+
+ /* ================ */
+
+ Safefree(cbv);
+ XML_ParserFree(parser);
+ }
+
+int
+XML_ParseString(parser, sv)
+ XML_Parser parser
+ SV * sv
+ CODE:
+ {
+ CallbackVector * cbv;
+ STRLEN len;
+ char *s = SvPV(sv, len);
+
+ cbv = (CallbackVector *) XML_GetUserData(parser);
+
+
+ RETVAL = XML_Parse(parser, s, len, 1);
+ SPAGAIN; /* XML_Parse might have changed stack pointer */
+ if (! RETVAL)
+ append_error(parser, NULL);
+ }
+
+ OUTPUT:
+ RETVAL
+
+int
+XML_ParseStream(parser, ioref, delim)
+ XML_Parser parser
+ SV * ioref
+ SV * delim
+ CODE:
+ {
+ SV **delimsv;
+ CallbackVector * cbv;
+
+ cbv = (CallbackVector *) XML_GetUserData(parser);
+ if (SvOK(delim)) {
+ cbv->delim = SvPV(delim, cbv->delimlen);
+ }
+ else {
+ cbv->delim = (char *) 0;
+ }
+
+ RETVAL = parse_stream(parser, ioref);
+ SPAGAIN; /* parse_stream might have changed stack pointer */
+ }
+
+ OUTPUT:
+ RETVAL
+
+int
+XML_ParsePartial(parser, sv)
+ XML_Parser parser
+ SV * sv
+ CODE:
+ {
+ STRLEN len;
+ char *s = SvPV(sv, len);
+ CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser);
+
+ RETVAL = XML_Parse(parser, s, len, 0);
+ if (! RETVAL)
+ append_error(parser, NULL);
+ }
+
+ OUTPUT:
+ RETVAL
+
+
+int
+XML_ParseDone(parser)
+ XML_Parser parser
+ CODE:
+ {
+ RETVAL = XML_Parse(parser, "", 0, 1);
+ if (! RETVAL)
+ append_error(parser, NULL);
+ }
+
+ OUTPUT:
+ RETVAL
+
+SV *
+XML_SetStartElementHandler(parser, start_sv)
+ XML_Parser parser
+ SV * start_sv
+ CODE:
+ {
+ CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser);
+ XMLP_UPD(start_sv);
+ PUSHRET;
+ }
+
+SV *
+XML_SetEndElementHandler(parser, end_sv)
+ XML_Parser parser
+ SV * end_sv
+ CODE:
+ {
+ CallbackVector *cbv = (CallbackVector*) XML_GetUserData(parser);
+ XMLP_UPD(end_sv);
+ PUSHRET;
+ }
+
+SV *
+XML_SetCharacterDataHandler(parser, char_sv)
+ XML_Parser parser
+ SV * char_sv
+ CODE:
+ {
+ XML_CharacterDataHandler charhndl = (XML_CharacterDataHandler) 0;
+ CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser);
+
+ XMLP_UPD(char_sv);
+ if (SvTRUE(char_sv))
+ charhndl = characterData;
+
+ XML_SetCharacterDataHandler(parser, charhndl);
+ PUSHRET;
+ }
+
+SV *
+XML_SetProcessingInstructionHandler(parser, proc_sv)
+ XML_Parser parser
+ SV * proc_sv
+ CODE:
+ {
+ XML_ProcessingInstructionHandler prochndl =
+ (XML_ProcessingInstructionHandler) 0;
+ CallbackVector* cbv = (CallbackVector*) XML_GetUserData(parser);
+
+ XMLP_UPD(proc_sv);
+ if (SvTRUE(proc_sv))
+ prochndl = processingInstruction;
+
+ XML_SetProcessingInstructionHandler(parser, prochndl);
+ PUSHRET;
+ }
+
+SV *
+XML_SetCommentHandler(parser, cmnt_sv)
+ XML_Parser parser
+ SV * cmnt_sv
+ CODE:
+ {
+ XML_CommentHandler cmnthndl = (XML_CommentHandler) 0;
+ CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser);
+
+ XMLP_UPD(cmnt_sv);
+ if (SvTRUE(cmnt_sv))
+ cmnthndl = commenthandle;
+
+ XML_SetCommentHandler(parser, cmnthndl);
+ PUSHRET;
+ }
+
+SV *
+XML_SetDefaultHandler(parser, dflt_sv)
+ XML_Parser parser
+ SV * dflt_sv
+ CODE:
+ {
+ XML_DefaultHandler dflthndl = (XML_DefaultHandler) 0;
+ CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser);
+
+ XMLP_UPD(dflt_sv);
+ if (SvTRUE(dflt_sv))
+ dflthndl = defaulthandle;
+
+ if (cbv->no_expand)
+ XML_SetDefaultHandler(parser, dflthndl);
+ else
+ XML_SetDefaultHandlerExpand(parser, dflthndl);
+
+ PUSHRET;
+ }
+
+SV *
+XML_SetUnparsedEntityDeclHandler(parser, unprsd_sv)
+ XML_Parser parser
+ SV * unprsd_sv
+ CODE:
+ {
+ XML_UnparsedEntityDeclHandler unprsdhndl =
+ (XML_UnparsedEntityDeclHandler) 0;
+ CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser);
+
+ XMLP_UPD(unprsd_sv);
+ if (SvTRUE(unprsd_sv))
+ unprsdhndl = unparsedEntityDecl;
+
+ XML_SetUnparsedEntityDeclHandler(parser, unprsdhndl);
+ PUSHRET;
+ }
+
+SV *
+XML_SetNotationDeclHandler(parser, notation_sv)
+ XML_Parser parser
+ SV * notation_sv
+ CODE:
+ {
+ XML_NotationDeclHandler nothndlr = (XML_NotationDeclHandler) 0;
+ CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser);
+
+ XMLP_UPD(notation_sv);
+ if (SvTRUE(notation_sv))
+ nothndlr = notationDecl;
+
+ XML_SetNotationDeclHandler(parser, nothndlr);
+ PUSHRET;
+ }
+
+SV *
+XML_SetExternalEntityRefHandler(parser, extent_sv)
+ XML_Parser parser
+ SV * extent_sv
+ CODE:
+ {
+ XML_ExternalEntityRefHandler exthndlr =
+ (XML_ExternalEntityRefHandler) 0;
+ CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser);
+
+ XMLP_UPD(extent_sv);
+ if (SvTRUE(extent_sv))
+ exthndlr = externalEntityRef;
+
+ XML_SetExternalEntityRefHandler(parser, exthndlr);
+ PUSHRET;
+ }
+
+SV *
+XML_SetExtEntFinishHandler(parser, extfin_sv)
+ XML_Parser parser
+ SV * extfin_sv
+ CODE:
+ {
+ CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser);
+
+ /* There is no corresponding handler for this in expat. This is
+ called from the externalEntityRef function above after parsing
+ the external entity. */
+
+ XMLP_UPD(extfin_sv);
+ PUSHRET;
+ }
+
+
+SV *
+XML_SetEntityDeclHandler(parser, entdcl_sv)
+ XML_Parser parser
+ SV * entdcl_sv
+ CODE:
+ {
+ XML_EntityDeclHandler enthndlr =
+ (XML_EntityDeclHandler) 0;
+ CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser);
+
+ XMLP_UPD(entdcl_sv);
+ if (SvTRUE(entdcl_sv))
+ enthndlr = entityDecl;
+
+ XML_SetEntityDeclHandler(parser, enthndlr);
+ PUSHRET;
+ }
+
+SV *
+XML_SetElementDeclHandler(parser, eledcl_sv)
+ XML_Parser parser
+ SV * eledcl_sv
+ CODE:
+ {
+ XML_ElementDeclHandler eldeclhndlr =
+ (XML_ElementDeclHandler) 0;
+ CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser);
+
+ XMLP_UPD(eledcl_sv);
+ if (SvTRUE(eledcl_sv))
+ eldeclhndlr = elementDecl;
+
+ XML_SetElementDeclHandler(parser, eldeclhndlr);
+ PUSHRET;
+ }
+
+SV *
+XML_SetAttListDeclHandler(parser, attdcl_sv)
+ XML_Parser parser
+ SV * attdcl_sv
+ CODE:
+ {
+ XML_AttlistDeclHandler attdeclhndlr =
+ (XML_AttlistDeclHandler) 0;
+ CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser);
+
+ XMLP_UPD(attdcl_sv);
+ if (SvTRUE(attdcl_sv))
+ attdeclhndlr = attributeDecl;
+
+ XML_SetAttlistDeclHandler(parser, attdeclhndlr);
+ PUSHRET;
+ }
+
+SV *
+XML_SetDoctypeHandler(parser, doctyp_sv)
+ XML_Parser parser
+ SV * doctyp_sv
+ CODE:
+ {
+ XML_StartDoctypeDeclHandler dtsthndlr =
+ (XML_StartDoctypeDeclHandler) 0;
+ CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser);
+ int set = 0;
+
+ XMLP_UPD(doctyp_sv);
+ if (SvTRUE(doctyp_sv))
+ dtsthndlr = doctypeStart;
+
+ XML_SetStartDoctypeDeclHandler(parser, dtsthndlr);
+ PUSHRET;
+ }
+
+SV *
+XML_SetEndDoctypeHandler(parser, doctypfin_sv)
+ XML_Parser parser
+ SV * doctypfin_sv
+ CODE:
+ {
+ XML_EndDoctypeDeclHandler dtendhndlr =
+ (XML_EndDoctypeDeclHandler) 0;
+ CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser);
+
+ XMLP_UPD(doctypfin_sv);
+ if (SvTRUE(doctypfin_sv))
+ dtendhndlr = doctypeEnd;
+
+ XML_SetEndDoctypeDeclHandler(parser, dtendhndlr);
+ PUSHRET;
+ }
+
+
+SV *
+XML_SetXMLDeclHandler(parser, xmldec_sv)
+ XML_Parser parser
+ SV * xmldec_sv
+ CODE:
+ {
+ XML_XmlDeclHandler xmldechndlr =
+ (XML_XmlDeclHandler) 0;
+ CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser);
+
+ XMLP_UPD(xmldec_sv);
+ if (SvTRUE(xmldec_sv))
+ xmldechndlr = xmlDecl;
+
+ XML_SetXmlDeclHandler(parser, xmldechndlr);
+ PUSHRET;
+ }
+
+
+void
+XML_SetBase(parser, base)
+ XML_Parser parser
+ SV * base
+ CODE:
+ {
+ char * b;
+
+ if (! SvOK(base)) {
+ b = (char *) 0;
+ }
+ else {
+ b = SvPV(base, PL_na);
+ }
+
+ XML_SetBase(parser, b);
+ }
+
+
+SV *
+XML_GetBase(parser)
+ XML_Parser parser
+ CODE:
+ {
+ const char *ret = XML_GetBase(parser);
+ if (ret) {
+ ST(0) = sv_newmortal();
+ sv_setpv(ST(0), ret);
+ }
+ else {
+ ST(0) = &PL_sv_undef;
+ }
+ }
+
+void
+XML_PositionContext(parser, lines)
+ XML_Parser parser
+ int lines
+ PREINIT:
+ int parsepos;
+ int size;
+ const char *pos = XML_GetInputContext(parser, &parsepos, &size);
+ const char *markbeg;
+ const char *limit;
+ const char *markend;
+ int length, relpos;
+ int cnt;
+
+ PPCODE:
+ if (! pos)
+ return;
+
+ for (markbeg = &pos[parsepos], cnt = 0; markbeg >= pos; markbeg--)
+ {
+ if (*markbeg == '\n')
+ {
+ cnt++;
+ if (cnt > lines)
+ break;
+ }
+ }
+
+ markbeg++;
+
+ relpos = 0;
+ limit = &pos[size];
+ for (markend = &pos[parsepos + 1], cnt = 0;
+ markend < limit;
+ markend++)
+ {
+ if (*markend == '\n')
+ {
+ if (cnt == 0)
+ relpos = (markend - markbeg) + 1;
+ cnt++;
+ if (cnt > lines)
+ {
+ markend++;
+ break;
+ }
+ }
+ }
+
+ length = markend - markbeg;
+ if (relpos == 0)
+ relpos = length;
+
+ EXTEND(sp, 2);
+ PUSHs(sv_2mortal(newSVpvn((char *) markbeg, length)));
+ PUSHs(sv_2mortal(newSViv(relpos)));
+
+SV *
+GenerateNSName(name, xml_namespace, table, list)
+ SV * name
+ SV * xml_namespace
+ SV * table
+ SV * list
+ CODE:
+ {
+ STRLEN nmlen, nslen;
+ char * nmstr;
+ char * nsstr;
+ char * buff;
+ char * bp;
+ char * blim;
+
+ nmstr = SvPV(name, nmlen);
+ nsstr = SvPV(xml_namespace, nslen);
+
+ /* Form a namespace-name string that looks like expat's */
+ New(321, buff, nmlen + nslen + 2, char);
+ bp = buff;
+ blim = bp + nslen;
+ while (bp < blim)
+ *bp++ = *nsstr++;
+ *bp++ = NSDELIM;
+ blim = bp + nmlen;
+ while (bp < blim)
+ *bp++ = *nmstr++;
+ *bp = '\0';
+
+ RETVAL = gen_ns_name(buff, (HV *) SvRV(table), (AV *) SvRV(list));
+ Safefree(buff);
+ }
+ OUTPUT:
+ RETVAL
+
+void
+XML_DefaultCurrent(parser)
+ XML_Parser parser
+ CODE:
+ {
+ CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser);
+
+ XML_DefaultCurrent(parser);
+ }
+
+SV *
+XML_RecognizedString(parser)
+ XML_Parser parser
+ CODE:
+ {
+ XML_DefaultHandler dflthndl = (XML_DefaultHandler) 0;
+ CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser);
+
+ if (cbv->dflt_sv) {
+ dflthndl = defaulthandle;
+ }
+
+ if (cbv->recstring) {
+ sv_setpvn(cbv->recstring, "", 0);
+ }
+
+ if (cbv->no_expand)
+ XML_SetDefaultHandler(parser, recString);
+ else
+ XML_SetDefaultHandlerExpand(parser, recString);
+
+ XML_DefaultCurrent(parser);
+
+ if (cbv->no_expand)
+ XML_SetDefaultHandler(parser, dflthndl);
+ else
+ XML_SetDefaultHandlerExpand(parser, dflthndl);
+
+ RETVAL = newSVsv(cbv->recstring);
+ }
+ OUTPUT:
+ RETVAL
+
+int
+XML_GetErrorCode(parser)
+ XML_Parser parser
+
+int
+XML_GetCurrentLineNumber(parser)
+ XML_Parser parser
+
+
+int
+XML_GetCurrentColumnNumber(parser)
+ XML_Parser parser
+
+long
+XML_GetCurrentByteIndex(parser)
+ XML_Parser parser
+
+int
+XML_GetSpecifiedAttributeCount(parser)
+ XML_Parser parser
+
+char *
+XML_ErrorString(code)
+ int code
+ CODE:
+ const char *ret = XML_ErrorString(code);
+ ST(0) = sv_newmortal();
+ sv_setpv((SV*)ST(0), ret);
+
+SV *
+XML_LoadEncoding(data, size)
+ char * data
+ int size
+ CODE:
+ {
+ Encmap_Header *emh = (Encmap_Header *) data;
+ unsigned pfxsize, bmsize;
+
+ if (size < sizeof(Encmap_Header)
+ || ntohl(emh->magic) != ENCMAP_MAGIC) {
+ RETVAL = &PL_sv_undef;
+ }
+ else {
+ Encinfo *entry;
+ SV *sv;
+ PrefixMap *pfx;
+ unsigned short *bm;
+ int namelen;
+ int i;
+
+ pfxsize = ntohs(emh->pfsize);
+ bmsize = ntohs(emh->bmsize);
+
+ if (size != (sizeof(Encmap_Header)
+ + pfxsize * sizeof(PrefixMap)
+ + bmsize * sizeof(unsigned short))) {
+ RETVAL = &PL_sv_undef;
+ }
+ else {
+ /* Convert to uppercase and get name length */
+
+ for (i = 0; i < sizeof(emh->name); i++) {
+ char c = emh->name[i];
+
+ if (c == (char) 0)
+ break;
+
+ if (c >= 'a' && c <= 'z')
+ emh->name[i] -= 'a' - 'A';
+ }
+ namelen = i;
+
+ RETVAL = newSVpvn(emh->name, namelen);
+
+ New(322, entry, 1, Encinfo);
+ entry->prefixes_size = pfxsize;
+ entry->bytemap_size = bmsize;
+ for (i = 0; i < 256; i++) {
+ entry->firstmap[i] = ntohl(emh->map[i]);
+ }
+
+ pfx = (PrefixMap *) &data[sizeof(Encmap_Header)];
+ bm = (unsigned short *) (((char *) pfx)
+ + sizeof(PrefixMap) * pfxsize);
+
+ New(323, entry->prefixes, pfxsize, PrefixMap);
+ New(324, entry->bytemap, bmsize, unsigned short);
+
+ for (i = 0; i < pfxsize; i++, pfx++) {
+ PrefixMap *dest = &entry->prefixes[i];
+
+ dest->min = pfx->min;
+ dest->len = pfx->len;
+ dest->bmap_start = ntohs(pfx->bmap_start);
+ Copy(pfx->ispfx, dest->ispfx,
+ sizeof(pfx->ispfx) + sizeof(pfx->ischar), unsigned char);
+ }
+
+ for (i = 0; i < bmsize; i++)
+ entry->bytemap[i] = ntohs(bm[i]);
+
+ sv = newSViv(0);
+ sv_setref_pv(sv, "XML::Parser::Encinfo", (void *) entry);
+
+ if (! EncodingTable) {
+ EncodingTable
+ = perl_get_hv("XML::Parser::Expat::Encoding_Table",
+ FALSE);
+ if (! EncodingTable)
+ croak("Can't find XML::Parser::Expat::Encoding_Table");
+ }
+
+ hv_store(EncodingTable, emh->name, namelen, sv, 0);
+ }
+ }
+ }
+ OUTPUT:
+ RETVAL
+
+void
+XML_FreeEncoding(enc)
+ Encinfo * enc
+ CODE:
+ Safefree(enc->bytemap);
+ Safefree(enc->prefixes);
+ Safefree(enc);
+
+SV *
+XML_OriginalString(parser)
+ XML_Parser parser
+ CODE:
+ {
+ int parsepos, size;
+ const char *buff = XML_GetInputContext(parser, &parsepos, &size);
+ if (buff) {
+ RETVAL = newSVpvn((char *) &buff[parsepos],
+ XML_GetCurrentByteCount(parser));
+ }
+ else {
+ RETVAL = newSVpv("", 0);
+ }
+ }
+ OUTPUT:
+ RETVAL
+
+SV *
+XML_SetStartCdataHandler(parser, startcd_sv)
+ XML_Parser parser
+ SV * startcd_sv
+ CODE:
+ {
+ CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser);
+ XML_StartCdataSectionHandler scdhndl =
+ (XML_StartCdataSectionHandler) 0;
+
+ XMLP_UPD(startcd_sv);
+ if (SvTRUE(startcd_sv))
+ scdhndl = startCdata;
+
+ XML_SetStartCdataSectionHandler(parser, scdhndl);
+ PUSHRET;
+ }
+
+SV *
+XML_SetEndCdataHandler(parser, endcd_sv)
+ XML_Parser parser
+ SV * endcd_sv
+ CODE:
+ {
+ CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser);
+ XML_EndCdataSectionHandler ecdhndl =
+ (XML_EndCdataSectionHandler) 0;
+
+ XMLP_UPD(endcd_sv);
+ if (SvTRUE(endcd_sv))
+ ecdhndl = endCdata;
+
+ XML_SetEndCdataSectionHandler(parser, ecdhndl);
+ PUSHRET;
+ }
+
+void
+XML_UnsetAllHandlers(parser)
+ XML_Parser parser
+ CODE:
+ {
+ CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser);
+
+ suspend_callbacks(cbv);
+ if (cbv->ns) {
+ XML_SetNamespaceDeclHandler(cbv->p,
+ (XML_StartNamespaceDeclHandler) 0,
+ (XML_EndNamespaceDeclHandler) 0);
+ }
+
+ XML_SetElementHandler(parser,
+ (XML_StartElementHandler) 0,
+ (XML_EndElementHandler) 0);
+
+ XML_SetUnknownEncodingHandler(parser,
+ (XML_UnknownEncodingHandler) 0,
+ (void *) 0);
+ }
+
+int
+XML_ElementIndex(parser)
+ XML_Parser parser
+ CODE:
+ {
+ CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser);
+ RETVAL = cbv->st_serial_stack[cbv->st_serial_stackptr];
+ }
+ OUTPUT:
+ RETVAL
+
+void
+XML_SkipUntil(parser, index)
+ XML_Parser parser
+ unsigned int index
+ CODE:
+ {
+ CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser);
+ if (index <= cbv->st_serial)
+ return;
+ cbv->skip_until = index;
+ suspend_callbacks(cbv);
+ }
+
+int
+XML_Do_External_Parse(parser, result)
+ XML_Parser parser
+ SV * result
+ CODE:
+ {
+ int type;
+
+ CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser);
+
+ if (SvROK(result) && SvOBJECT(SvRV(result))) {
+ RETVAL = parse_stream(parser, result);
+ }
+ else if (isGV(result)) {
+ RETVAL = parse_stream(parser,
+ sv_2mortal(newRV((SV*) GvIOp(result))));
+ }
+ else if (SvPOK(result)) {
+ STRLEN eslen;
+ int pret;
+ char *entstr = SvPV(result, eslen);
+
+ RETVAL = XML_Parse(parser, entstr, eslen, 1);
+ }
+ }
+ OUTPUT:
+ RETVAL
+
+
diff --git a/Expat/Makefile.PL b/Expat/Makefile.PL
new file mode 100644
index 0000000..6d5111c
--- /dev/null
+++ b/Expat/Makefile.PL
@@ -0,0 +1,29 @@
+use ExtUtils::MakeMaker;
+use Config;
+use English;
+
+my $libs = "-lexpat";
+my @extras = ();
+
+push(@extras, INC => "-I$expat_incpath") if $expat_incpath;
+
+$libs = "-L$expat_libpath $libs" if $expat_libpath;
+
+push(@extras, CAPI => 'TRUE')
+ if (($PERL_VERSION >= 5.005) and ($OSNAME eq 'MSWin32')
+ and ($Config{archname} =~ /-object\b/i));
+
+push(@extras,
+ ABSTRACT => "Lowlevel access to James Clark's expat XML parser",
+ AUTHOR => 'Matt Sergeant (matt@sergeant.org)')
+ if ($ExtUtils::MakeMaker::VERSION >= 5.4301);
+
+WriteMakefile(
+ NAME => 'XML::Parser::Expat',
+ C => ['Expat.c'],
+ LIBS => $libs,
+ XSPROTOARG => '-noprototypes',
+ VERSION_FROM => 'Expat.pm',
+ @extras
+);
+
diff --git a/Expat/encoding.h b/Expat/encoding.h
new file mode 100644
index 0000000..4e0374b
--- /dev/null
+++ b/Expat/encoding.h
@@ -0,0 +1,91 @@
+/*****************************************************************
+** encoding.h
+**
+** Copyright 1998 Clark Cooper
+** All rights reserved.
+**
+** This program is free software; you can redistribute it and/or
+** modify it under the same terms as Perl itself.
+*/
+
+#ifndef ENCODING_H
+#define ENCODING_H 1
+
+#define ENCMAP_MAGIC 0xfeebface
+
+typedef struct prefixmap {
+ unsigned char min;
+ unsigned char len; /* 0 => 256 */
+ unsigned short bmap_start;
+ unsigned char ispfx[32];
+ unsigned char ischar[32];
+} PrefixMap;
+
+typedef struct encinf
+{
+ unsigned short prefixes_size;
+ unsigned short bytemap_size;
+ int firstmap[256];
+ PrefixMap *prefixes;
+ unsigned short *bytemap;
+} Encinfo;
+
+typedef struct encmaphdr
+{
+ unsigned int magic;
+ char name[40];
+ unsigned short pfsize;
+ unsigned short bmsize;
+ int map[256];
+} Encmap_Header;
+
+/*================================================================
+** Structure of Encoding map binary encoding
+**
+** Note that all shorts and ints are in network order,
+** so when packing or unpacking with perl, use 'n' and 'N' respectively.
+** In C, use the htonl family of functions.
+**
+** The basic structure is:
+**
+** _______________________
+** |Header (including map expat needs for 1st byte)
+** |PrefixMap * pfsize
+** | This section isn't included for single-byte encodings.
+** | For multiple byte encodings, when a byte represents a prefix
+** | then it indexes into this vector instead of mapping to a
+** | Unicode character. The PrefixMap type is declared above. The
+** | ispfx and ischar fields are bitvectors indicating whether
+** | the byte being mapped is a prefix or character respectively.
+** | If neither is set, then the character is not mapped to Unicode.
+** |
+** | The min field is the 1st byte mapped for this prefix; the
+** | len field is the number of bytes mapped; and bmap_start is
+** | the starting index of the map for this prefix in the overall
+** | map (next section).
+** |unsigned short * bmsize
+** | This section also is omitted for single-byte encodings.
+** | Each short is either a Unicode scalar or an index into the
+** | PrefixMap vector.
+**
+** The header for these files is declared above as the Encmap_Header type.
+** The magic field is a magic number which should match the ENCMAP_MAGIC
+** macro above. The next 40 bytes stores IANA registered name for the
+** encoding. The pfsize field holds the number of PrefixMaps, which should
+** be zero for single byte encodings. The bmsize field holds the number of
+** shorts used for the overall map.
+**
+** The map field contains either the Unicode scalar encoded by the 1st byte
+** or -n where n is the number of bytes that such a 1st byte implies (Expat
+** requires that the number of bytes to encode a character is indicated by
+** the 1st byte) or -1 if the byte doesn't map to any Unicode character.
+**
+** If the encoding is a multiple byte encoding, then there will be PrefixMap
+** and character map sections. The 1st PrefixMap (index 0), covers a range
+** of bytes that includes all 1st byte prefixes.
+**
+** Look at convert_to_unicode in Expat.xs to see how this data structure
+** is used.
+*/
+
+#endif /* ndef ENCODING_H */
diff --git a/Expat/typemap b/Expat/typemap
new file mode 100644
index 0000000..47d7dc5
--- /dev/null
+++ b/Expat/typemap
@@ -0,0 +1,24 @@
+#
+##### XML::Parser::Expat typemap
+#
+
+XML_Parser T_PTR
+Encinfo * T_ENCOBJ
+
+################################################################
+INPUT
+T_ENCOBJ
+ if (sv_derived_from($arg, \"XML::Parser::Encinfo\")) {
+ IV tmp = SvIV((SV*)SvRV($arg));
+ $var = ($type) tmp;
+ }
+ else
+ croak(\"$var is not of type XML::Parser::Encinfo\")
+################################################################
+OUTPUT
+T_ENCOBJ
+ if ($var) {
+ sv_setref_pv($arg, \"XML::Parser::Encinfo\", (void*)$var);
+ }
+ else
+ $arg = &PL_sv_undef;