From 891c29af147fcbe6c4dd5d8ffbbb426665d4b558 Mon Sep 17 00:00:00 2001 From: Lorry Date: Mon, 21 May 2012 16:44:15 +0100 Subject: Tarball conversion --- Expat/Expat.pm | 1234 +++++++++++++++++++++++++++++ Expat/Expat.xs | 2214 +++++++++++++++++++++++++++++++++++++++++++++++++++++ Expat/Makefile.PL | 29 + Expat/encoding.h | 91 +++ Expat/typemap | 24 + 5 files changed, 3592 insertions(+) create mode 100644 Expat/Expat.pm create mode 100644 Expat/Expat.xs create mode 100644 Expat/Makefile.PL create mode 100644 Expat/encoding.h create mode 100644 Expat/typemap (limited to 'Expat') diff --git a/Expat/Expat.pm b/Expat/Expat.pm new file mode 100644 index 0000000..b1fd920 --- /dev/null +++ b/Expat/Expat.pm @@ -0,0 +1,1234 @@ +package XML::Parser::Expat; + +require 5.004; + +use strict; +use vars qw($VERSION @ISA %Handler_Setters %Encoding_Table @Encoding_Path + $have_File_Spec); +use Carp; + +require DynaLoader; + +@ISA = qw(DynaLoader); +$VERSION = "2.41"; + +$have_File_Spec = $INC{'File/Spec.pm'} || do 'File/Spec.pm'; + +%Encoding_Table = (); +if ($have_File_Spec) { + @Encoding_Path = (grep(-d $_, + map(File::Spec->catdir($_, qw(XML Parser Encodings)), + @INC)), + File::Spec->curdir); +} +else { + @Encoding_Path = (grep(-d $_, map($_ . '/XML/Parser/Encodings', @INC)), '.'); +} + + +bootstrap XML::Parser::Expat $VERSION; + +%Handler_Setters = ( + Start => \&SetStartElementHandler, + End => \&SetEndElementHandler, + Char => \&SetCharacterDataHandler, + Proc => \&SetProcessingInstructionHandler, + Comment => \&SetCommentHandler, + CdataStart => \&SetStartCdataHandler, + CdataEnd => \&SetEndCdataHandler, + Default => \&SetDefaultHandler, + Unparsed => \&SetUnparsedEntityDeclHandler, + Notation => \&SetNotationDeclHandler, + ExternEnt => \&SetExternalEntityRefHandler, + ExternEntFin => \&SetExtEntFinishHandler, + Entity => \&SetEntityDeclHandler, + Element => \&SetElementDeclHandler, + Attlist => \&SetAttListDeclHandler, + Doctype => \&SetDoctypeHandler, + DoctypeFin => \&SetEndDoctypeHandler, + XMLDecl => \&SetXMLDeclHandler + ); + +sub new { + my ($class, %args) = @_; + my $self = bless \%args, $_[0]; + $args{_State_} = 0; + $args{Context} = []; + $args{Namespaces} ||= 0; + $args{ErrorMessage} ||= ''; + if ($args{Namespaces}) { + $args{Namespace_Table} = {}; + $args{Namespace_List} = [undef]; + $args{Prefix_Table} = {}; + $args{New_Prefixes} = []; + } + $args{_Setters} = \%Handler_Setters; + $args{Parser} = ParserCreate($self, $args{ProtocolEncoding}, + $args{Namespaces}); + $self; +} + +sub load_encoding { + my ($file) = @_; + + $file =~ s!([^/]+)$!\L$1\E!; + $file .= '.enc' unless $file =~ /\.enc$/; + unless ($file =~ m!^/!) { + foreach (@Encoding_Path) { + my $tmp = ($have_File_Spec + ? File::Spec->catfile($_, $file) + : "$_/$file"); + if (-e $tmp) { + $file = $tmp; + last; + } + } + } + + local(*ENC); + open(ENC, $file) or croak("Couldn't open encmap $file:\n$!\n"); + binmode(ENC); + my $data; + my $br = sysread(ENC, $data, -s $file); + croak("Trouble reading $file:\n$!\n") + unless defined($br); + close(ENC); + + my $name = LoadEncoding($data, $br); + croak("$file isn't an encmap file") + unless defined($name); + + $name; +} # End load_encoding + +sub setHandlers { + my ($self, @handler_pairs) = @_; + + croak("Uneven number of arguments to setHandlers method") + if (int(@handler_pairs) & 1); + + my @ret; + + while (@handler_pairs) { + my $type = shift @handler_pairs; + my $handler = shift @handler_pairs; + croak "Handler for $type not a Code ref" + unless (! defined($handler) or ! $handler or ref($handler) eq 'CODE'); + + my $hndl = $self->{_Setters}->{$type}; + + unless (defined($hndl)) { + my @types = sort keys %{$self->{_Setters}}; + croak("Unknown Expat handler type: $type\n Valid types: @types"); + } + + my $old = &$hndl($self->{Parser}, $handler); + push (@ret, $type, $old); + } + + return @ret; +} + +sub xpcroak + { + my ($self, $message) = @_; + + my $eclines = $self->{ErrorContext}; + my $line = GetCurrentLineNumber($_[0]->{Parser}); + $message .= " at line $line"; + $message .= ":\n" . $self->position_in_context($eclines) + if defined($eclines); + croak $message; +} + +sub xpcarp { + my ($self, $message) = @_; + + my $eclines = $self->{ErrorContext}; + my $line = GetCurrentLineNumber($_[0]->{Parser}); + $message .= " at line $line"; + $message .= ":\n" . $self->position_in_context($eclines) + if defined($eclines); + carp $message; +} + +sub default_current { + my $self = shift; + if ($self->{_State_} == 1) { + return DefaultCurrent($self->{Parser}); + } +} + +sub recognized_string { + my $self = shift; + if ($self->{_State_} == 1) { + return RecognizedString($self->{Parser}); + } +} + +sub original_string { + my $self = shift; + if ($self->{_State_} == 1) { + return OriginalString($self->{Parser}); + } +} + +sub current_line { + my $self = shift; + if ($self->{_State_} == 1) { + return GetCurrentLineNumber($self->{Parser}); + } +} + +sub current_column { + my $self = shift; + if ($self->{_State_} == 1) { + return GetCurrentColumnNumber($self->{Parser}); + } +} + +sub current_byte { + my $self = shift; + if ($self->{_State_} == 1) { + return GetCurrentByteIndex($self->{Parser}); + } +} + +sub base { + my ($self, $newbase) = @_; + my $p = $self->{Parser}; + my $oldbase = GetBase($p); + SetBase($p, $newbase) if @_ > 1; + return $oldbase; +} + +sub context { + my $ctx = $_[0]->{Context}; + @$ctx; +} + +sub current_element { + my ($self) = @_; + @{$self->{Context}} ? $self->{Context}->[-1] : undef; +} + +sub in_element { + my ($self, $element) = @_; + @{$self->{Context}} ? $self->eq_name($self->{Context}->[-1], $element) + : undef; +} + +sub within_element { + my ($self, $element) = @_; + my $cnt = 0; + foreach (@{$self->{Context}}) { + $cnt++ if $self->eq_name($_, $element); + } + return $cnt; +} + +sub depth { + my ($self) = @_; + int(@{$self->{Context}}); +} + +sub element_index { + my ($self) = @_; + + if ($self->{_State_} == 1) { + return ElementIndex($self->{Parser}); + } +} + +################ +# Namespace methods + +sub namespace { + my ($self, $name) = @_; + local($^W) = 0; + $self->{Namespace_List}->[int($name)]; +} + +sub eq_name { + my ($self, $nm1, $nm2) = @_; + local($^W) = 0; + + int($nm1) == int($nm2) and $nm1 eq $nm2; +} + +sub generate_ns_name { + my ($self, $name, $namespace) = @_; + + $namespace ? + GenerateNSName($name, $namespace, $self->{Namespace_Table}, + $self->{Namespace_List}) + : $name; +} + +sub new_ns_prefixes { + my ($self) = @_; + if ($self->{Namespaces}) { + return @{$self->{New_Prefixes}}; + } + return (); +} + +sub expand_ns_prefix { + my ($self, $prefix) = @_; + + if ($self->{Namespaces}) { + my $stack = $self->{Prefix_Table}->{$prefix}; + return (defined($stack) and @$stack) ? $stack->[-1] : undef; + } + + return undef; +} + +sub current_ns_prefixes { + my ($self) = @_; + + if ($self->{Namespaces}) { + my %set = %{$self->{Prefix_Table}}; + + if (exists $set{'#default'} and not defined($set{'#default'}->[-1])) { + delete $set{'#default'}; + } + + return keys %set; + } + + return (); +} + + +################################################################ +# Namespace declaration handlers +# + +sub NamespaceStart { + my ($self, $prefix, $uri) = @_; + + $prefix = '#default' unless defined $prefix; + my $stack = $self->{Prefix_Table}->{$prefix}; + + if (defined $stack) { + push(@$stack, $uri); + } + else { + $self->{Prefix_Table}->{$prefix} = [$uri]; + } + + # The New_Prefixes list gets emptied at end of startElement function + # in Expat.xs + + push(@{$self->{New_Prefixes}}, $prefix); +} + +sub NamespaceEnd { + my ($self, $prefix) = @_; + + $prefix = '#default' unless defined $prefix; + + my $stack = $self->{Prefix_Table}->{$prefix}; + if (@$stack > 1) { + pop(@$stack); + } + else { + delete $self->{Prefix_Table}->{$prefix}; + } +} + +################ + +sub specified_attr { + my $self = shift; + + if ($self->{_State_} == 1) { + return GetSpecifiedAttributeCount($self->{Parser}); + } +} + +sub finish { + my ($self) = @_; + if ($self->{_State_} == 1) { + my $parser = $self->{Parser}; + UnsetAllHandlers($parser); + } +} + +sub position_in_context { + my ($self, $lines) = @_; + if ($self->{_State_} == 1) { + my $parser = $self->{Parser}; + my ($string, $linepos) = PositionContext($parser, $lines); + + return '' unless defined($string); + + my $col = GetCurrentColumnNumber($parser); + my $ptr = ('=' x ($col - 1)) . '^' . "\n"; + my $ret; + my $dosplit = $linepos < length($string); + + $string .= "\n" unless $string =~ /\n$/; + + if ($dosplit) { + $ret = substr($string, 0, $linepos) . $ptr + . substr($string, $linepos); + } else { + $ret = $string . $ptr; + } + + return $ret; + } +} + +sub xml_escape { + my $self = shift; + my $text = shift; + + study $text; + $text =~ s/\&/\&/g; + $text =~ s/ 1; + + if ($_ eq '>') { + $text =~ s/>/\>/g; + } + elsif ($_ eq '"') { + $text =~ s/\"/\"/; + } + elsif ($_ eq "'") { + $text =~ s/\'/\'/; + } + else { + my $rep = '&#' . sprintf('x%X', ord($_)) . ';'; + if (/\W/) { + my $ptrn = "\\$_"; + $text =~ s/$ptrn/$rep/g; + } + else { + $text =~ s/$_/$rep/g; + } + } + } + $text; +} + +sub skip_until { + my $self = shift; + if ($self->{_State_} <= 1) { + SkipUntil($self->{Parser}, $_[0]); + } +} + +sub release { + my $self = shift; + ParserRelease($self->{Parser}); +} + +sub DESTROY { + my $self = shift; + ParserFree($self->{Parser}); +} + +sub parse { + my $self = shift; + my $arg = shift; + croak "Parse already in progress (Expat)" if $self->{_State_}; + $self->{_State_} = 1; + my $parser = $self->{Parser}; + my $ioref; + my $result = 0; + + if (defined $arg) { + local *@; + if (ref($arg) and UNIVERSAL::isa($arg, 'IO::Handle')) { + $ioref = $arg; + } elsif ($] < 5.008 and defined tied($arg)) { + require IO::Handle; + $ioref = $arg; + } + else { + require IO::Handle; + eval { + no strict 'refs'; + $ioref = *{$arg}{IO} if defined *{$arg}; + }; + } + } + + if (defined($ioref)) { + my $delim = $self->{Stream_Delimiter}; + my $prev_rs; + my $ioclass = ref $ioref; + $ioclass = "IO::Handle" if !length $ioclass; + + $prev_rs = $ioclass->input_record_separator("\n$delim\n") + if defined($delim); + + $result = ParseStream($parser, $ioref, $delim); + + $ioclass->input_record_separator($prev_rs) + if defined($delim); + } else { + $result = ParseString($parser, $arg); + } + + $self->{_State_} = 2; + $result or croak $self->{ErrorMessage}; +} + +sub parsestring { + my $self = shift; + $self->parse(@_); +} + +sub parsefile { + my $self = shift; + croak "Parser has already been used" if $self->{_State_}; + local(*FILE); + open(FILE, $_[0]) or croak "Couldn't open $_[0]:\n$!"; + binmode(FILE); + my $ret = $self->parse(*FILE); + close(FILE); + $ret; +} + +################################################################ +package #hide from PAUSE + XML::Parser::ContentModel; +use overload '""' => \&asString, 'eq' => \&thiseq; + +sub EMPTY () {1} +sub ANY () {2} +sub MIXED () {3} +sub NAME () {4} +sub CHOICE () {5} +sub SEQ () {6} + + +sub isempty { + return $_[0]->{Type} == EMPTY; +} + +sub isany { + return $_[0]->{Type} == ANY; +} + +sub ismixed { + return $_[0]->{Type} == MIXED; +} + +sub isname { + return $_[0]->{Type} == NAME; +} + +sub name { + return $_[0]->{Tag}; +} + +sub ischoice { + return $_[0]->{Type} == CHOICE; +} + +sub isseq { + return $_[0]->{Type} == SEQ; +} + +sub quant { + return $_[0]->{Quant}; +} + +sub children { + my $children = $_[0]->{Children}; + if (defined $children) { + return @$children; + } + return undef; +} + +sub asString { + my ($self) = @_; + my $ret; + + if ($self->{Type} == NAME) { + $ret = $self->{Tag}; + } + elsif ($self->{Type} == EMPTY) { + return "EMPTY"; + } + elsif ($self->{Type} == ANY) { + return "ANY"; + } + elsif ($self->{Type} == MIXED) { + $ret = '(#PCDATA'; + foreach (@{$self->{Children}}) { + $ret .= '|' . $_; + } + $ret .= ')'; + } + else { + my $sep = $self->{Type} == CHOICE ? '|' : ','; + $ret = '(' . join($sep, map { $_->asString } @{$self->{Children}}) . ')'; + } + + $ret .= $self->{Quant} if $self->{Quant}; + return $ret; +} + +sub thiseq { + my $self = shift; + + return $self->asString eq $_[0]; +} + +################################################################ +package #hide from PAUSE + XML::Parser::ExpatNB; + +use vars qw(@ISA); +use Carp; + +@ISA = qw(XML::Parser::Expat); + +sub parse { + my $self = shift; + my $class = ref($self); + croak "parse method not supported in $class"; +} + +sub parsestring { + my $self = shift; + my $class = ref($self); + croak "parsestring method not supported in $class"; +} + +sub parsefile { + my $self = shift; + my $class = ref($self); + croak "parsefile method not supported in $class"; +} + +sub parse_more { + my ($self, $data) = @_; + + $self->{_State_} = 1; + my $ret = XML::Parser::Expat::ParsePartial($self->{Parser}, $data); + + croak $self->{ErrorMessage} unless $ret; +} + +sub parse_done { + my $self = shift; + + my $ret = XML::Parser::Expat::ParseDone($self->{Parser}); + unless ($ret) { + my $msg = $self->{ErrorMessage}; + $self->release; + croak $msg; + } + + $self->{_State_} = 2; + + my $result = $ret; + my @result = (); + my $final = $self->{FinalHandler}; + if (defined $final) { + if (wantarray) { + @result = &$final($self); + } + else { + $result = &$final($self); + } + } + + $self->release; + + return unless defined wantarray; + return wantarray ? @result : $result; +} + +################################################################ + +package #hide from PAUSE + XML::Parser::Encinfo; + +sub DESTROY { + my $self = shift; + XML::Parser::Expat::FreeEncoding($self); +} + +1; + +__END__ + +=head1 NAME + +XML::Parser::Expat - Lowlevel access to James Clark's expat XML parser + +=head1 SYNOPSIS + + use XML::Parser::Expat; + + $parser = XML::Parser::Expat->new; + $parser->setHandlers('Start' => \&sh, + 'End' => \&eh, + 'Char' => \&ch); + open(FOO, '<', 'info.xml') or die "Couldn't open"; + $parser->parse(*FOO); + close(FOO); + # $parser->parse(' here we go '); + + sub sh + { + my ($p, $el, %atts) = @_; + $p->setHandlers('Char' => \&spec) + if ($el eq 'special'); + ... + } + + sub eh + { + my ($p, $el) = @_; + $p->setHandlers('Char' => \&ch) # Special elements won't contain + if ($el eq 'special'); # other special elements + ... + } + +=head1 DESCRIPTION + +This module provides an interface to James Clark's XML parser, expat. As in +expat, a single instance of the parser can only parse one document. Calls +to parsestring after the first for a given instance will die. + +Expat (and XML::Parser::Expat) are event based. As the parser recognizes +parts of the document (say the start or end of an XML element), then any +handlers registered for that type of an event are called with suitable +parameters. + +=head1 METHODS + +=over 4 + +=item new + +This is a class method, the constructor for XML::Parser::Expat. Options are +passed as keyword value pairs. The recognized options are: + +=over 4 + +=item * ProtocolEncoding + +The protocol encoding name. The default is none. The expat built-in +encodings are: C, C, C, and C. +Other encodings may be used if they have encoding maps in one of the +directories in the @Encoding_Path list. Setting the protocol encoding +overrides any encoding in the XML declaration. + +=item * Namespaces + +When this option is given with a true value, then the parser does namespace +processing. By default, namespace processing is turned off. When it is +turned on, the parser consumes I attributes and strips off prefixes +from element and attributes names where those prefixes have a defined +namespace. A name's namespace can be found using the L<"namespace"> method +and two names can be checked for absolute equality with the L<"eq_name"> +method. + +=item * NoExpand + +Normally, the parser will try to expand references to entities defined in +the internal subset. If this option is set to a true value, and a default +handler is also set, then the default handler will be called when an +entity reference is seen in text. This has no effect if a default handler +has not been registered, and it has no effect on the expansion of entity +references inside attribute values. + +=item * Stream_Delimiter + +This option takes a string value. When this string is found alone on a line +while parsing from a stream, then the parse is ended as if it saw an end of +file. The intended use is with a stream of xml documents in a MIME multipart +format. The string should not contain a trailing newline. + +=item * ErrorContext + +When this option is defined, errors are reported in context. The value +of ErrorContext should be the number of lines to show on either side of +the line in which the error occurred. + +=item * ParseParamEnt + +Unless standalone is set to "yes" in the XML declaration, setting this to +a true value allows the external DTD to be read, and parameter entities +to be parsed and expanded. + +=item * Base + +The base to use for relative pathnames or URLs. This can also be done by +using the base method. + +=back + +=item setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]]) + +This method registers handlers for the various events. If no handlers are +registered, then a call to parsestring or parsefile will only determine if +the corresponding XML document is well formed (by returning without error.) +This may be called from within a handler, after the parse has started. + +Setting a handler to something that evaluates to false unsets that +handler. + +This method returns a list of type, handler pairs corresponding to the +input. The handlers returned are the ones that were in effect before the +call to setHandlers. + +The recognized events and the parameters passed to the corresponding +handlers are: + +=over 4 + +=item * Start (Parser, Element [, Attr, Val [,...]]) + +This event is generated when an XML start tag is recognized. Parser is +an XML::Parser::Expat instance. Element is the name of the XML element that +is opened with the start tag. The Attr & Val pairs are generated for each +attribute in the start tag. + +=item * End (Parser, Element) + +This event is generated when an XML end tag is recognized. Note that +an XML empty tag () generates both a start and an end event. + +There is always a lower level start and end handler installed that wrap +the corresponding callbacks. This is to handle the context mechanism. +A consequence of this is that the default handler (see below) will not +see a start tag or end tag unless the default_current method is called. + +=item * Char (Parser, String) + +This event is generated when non-markup is recognized. The non-markup +sequence of characters is in String. A single non-markup sequence of +characters may generate multiple calls to this handler. Whatever the +encoding of the string in the original document, this is given to the +handler in UTF-8. + +=item * Proc (Parser, Target, Data) + +This event is generated when a processing instruction is recognized. + +=item * Comment (Parser, String) + +This event is generated when a comment is recognized. + +=item * CdataStart (Parser) + +This is called at the start of a CDATA section. + +=item * CdataEnd (Parser) + +This is called at the end of a CDATA section. + +=item * Default (Parser, String) + +This is called for any characters that don't have a registered handler. +This includes both characters that are part of markup for which no +events are generated (markup declarations) and characters that +could generate events, but for which no handler has been registered. + +Whatever the encoding in the original document, the string is returned to +the handler in UTF-8. + +=item * Unparsed (Parser, Entity, Base, Sysid, Pubid, Notation) + +This is called for a declaration of an unparsed entity. Entity is the name +of the entity. Base is the base to be used for resolving a relative URI. +Sysid is the system id. Pubid is the public id. Notation is the notation +name. Base and Pubid may be undefined. + +=item * Notation (Parser, Notation, Base, Sysid, Pubid) + +This is called for a declaration of notation. Notation is the notation name. +Base is the base to be used for resolving a relative URI. Sysid is the system +id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined. + +=item * ExternEnt (Parser, Base, Sysid, Pubid) + +This is called when an external entity is referenced. Base is the base to be +used for resolving a relative URI. Sysid is the system id. Pubid is the public +id. Base, and Pubid may be undefined. + +This handler should either return a string, which represents the contents of +the external entity, or return an open filehandle that can be read to obtain +the contents of the external entity, or return undef, which indicates the +external entity couldn't be found and will generate a parse error. + +If an open filehandle is returned, it must be returned as either a glob +(*FOO) or as a reference to a glob (e.g. an instance of IO::Handle). + +=item * ExternEntFin (Parser) + +This is called after an external entity has been parsed. It allows +applications to perform cleanup on actions performed in the above +ExternEnt handler. + +=item * Entity (Parser, Name, Val, Sysid, Pubid, Ndata, IsParam) + +This is called when an entity is declared. For internal entities, the Val +parameter will contain the value and the remaining three parameters will +be undefined. For external entities, the Val parameter +will be undefined, the Sysid parameter will have the system id, the Pubid +parameter will have the public id if it was provided (it will be undefined +otherwise), the Ndata parameter will contain the notation for unparsed +entities. If this is a parameter entity declaration, then the IsParam +parameter is true. + +Note that this handler and the Unparsed handler above overlap. If both are +set, then this handler will not be called for unparsed entities. + +=item * Element (Parser, Name, Model) + +The element handler is called when an element declaration is found. Name is +the element name, and Model is the content model as an +XML::Parser::ContentModel object. See L<"XML::Parser::ContentModel Methods"> +for methods available for this class. + +=item * Attlist (Parser, Elname, Attname, Type, Default, Fixed) + +This handler is called for each attribute in an ATTLIST declaration. +So an ATTLIST declaration that has multiple attributes +will generate multiple calls to this handler. The Elname parameter is the +name of the element with which the attribute is being associated. The Attname +parameter is the name of the attribute. Type is the attribute type, given as +a string. Default is the default value, which will either be "#REQUIRED", +"#IMPLIED" or a quoted string (i.e. the returned string will begin and end +with a quote character). If Fixed is true, then this is a fixed attribute. + +=item * Doctype (Parser, Name, Sysid, Pubid, Internal) + +This handler is called for DOCTYPE declarations. Name is the document type +name. Sysid is the system id of the document type, if it was provided, +otherwise it's undefined. Pubid is the public id of the document type, +which will be undefined if no public id was given. Internal will be +true or false, indicating whether or not the doctype declaration contains +an internal subset. + +=item * DoctypeFin (Parser) + +This handler is called after parsing of the DOCTYPE declaration has finished, +including any internal or external DTD declarations. + +=item * XMLDecl (Parser, Version, Encoding, Standalone) + +This handler is called for XML declarations. Version is a string containg +the version. Encoding is either undefined or contains an encoding string. +Standalone is either undefined, or true or false. Undefined indicates +that no standalone parameter was given in the XML declaration. True or +false indicates "yes" or "no" respectively. + +=back + +=item namespace(name) + +Return the URI of the namespace that the name belongs to. If the name doesn't +belong to any namespace, an undef is returned. This is only valid on names +received through the Start or End handlers from a single document, or through +a call to the generate_ns_name method. In other words, don't use names +generated from one instance of XML::Parser::Expat with other instances. + +=item eq_name(name1, name2) + +Return true if name1 and name2 are identical (i.e. same name and from +the same namespace.) This is only meaningful if both names were obtained +through the Start or End handlers from a single document, or through +a call to the generate_ns_name method. + +=item generate_ns_name(name, namespace) + +Return a name, associated with a given namespace, good for using with the +above 2 methods. The namespace argument should be the namespace URI, not +a prefix. + +=item new_ns_prefixes + +When called from a start tag handler, returns namespace prefixes declared +with this start tag. If called elsewere (or if there were no namespace +prefixes declared), it returns an empty list. Setting of the default +namespace is indicated with '#default' as a prefix. + +=item expand_ns_prefix(prefix) + +Return the uri to which the given prefix is currently bound. Returns +undef if the prefix isn't currently bound. Use '#default' to find the +current binding of the default namespace (if any). + +=item current_ns_prefixes + +Return a list of currently bound namespace prefixes. The order of the +the prefixes in the list has no meaning. If the default namespace is +currently bound, '#default' appears in the list. + +=item recognized_string + +Returns the string from the document that was recognized in order to call +the current handler. For instance, when called from a start handler, it +will give us the the start-tag string. The string is encoded in UTF-8. +This method doesn't return a meaningful string inside declaration handlers. + +=item original_string + +Returns the verbatim string from the document that was recognized in +order to call the current handler. The string is in the original document +encoding. This method doesn't return a meaningful string inside declaration +handlers. + +=item default_current + +When called from a handler, causes the sequence of characters that generated +the corresponding event to be sent to the default handler (if one is +registered). Use of this method is deprecated in favor the recognized_string +method, which you can use without installing a default handler. This +method doesn't deliver a meaningful string to the default handler when +called from inside declaration handlers. + +=item xpcroak(message) + +Concatenate onto the given message the current line number within the +XML document plus the message implied by ErrorContext. Then croak with +the formed message. + +=item xpcarp(message) + +Concatenate onto the given message the current line number within the +XML document plus the message implied by ErrorContext. Then carp with +the formed message. + +=item current_line + +Returns the line number of the current position of the parse. + +=item current_column + +Returns the column number of the current position of the parse. + +=item current_byte + +Returns the current position of the parse. + +=item base([NEWBASE]); + +Returns the current value of the base for resolving relative URIs. If +NEWBASE is supplied, changes the base to that value. + +=item context + +Returns a list of element names that represent open elements, with the +last one being the innermost. Inside start and end tag handlers, this +will be the tag of the parent element. + +=item current_element + +Returns the name of the innermost currently opened element. Inside +start or end handlers, returns the parent of the element associated +with those tags. + +=item in_element(NAME) + +Returns true if NAME is equal to the name of the innermost currently opened +element. If namespace processing is being used and you want to check +against a name that may be in a namespace, then use the generate_ns_name +method to create the NAME argument. + +=item within_element(NAME) + +Returns the number of times the given name appears in the context list. +If namespace processing is being used and you want to check +against a name that may be in a namespace, then use the generate_ns_name +method to create the NAME argument. + +=item depth + +Returns the size of the context list. + +=item element_index + +Returns an integer that is the depth-first visit order of the current +element. This will be zero outside of the root element. For example, +this will return 1 when called from the start handler for the root element +start tag. + +=item skip_until(INDEX) + +INDEX is an integer that represents an element index. When this method +is called, all handlers are suspended until the start tag for an element +that has an index number equal to INDEX is seen. If a start handler has +been set, then this is the first tag that the start handler will see +after skip_until has been called. + + +=item position_in_context(LINES) + +Returns a string that shows the current parse position. LINES should be +an integer >= 0 that represents the number of lines on either side of the +current parse line to place into the returned string. + +=item xml_escape(TEXT [, CHAR [, CHAR ...]]) + +Returns TEXT with markup characters turned into character entities. Any +additional characters provided as arguments are also turned into character +references where found in TEXT. + +=item parse (SOURCE) + +The SOURCE parameter should either be a string containing the whole XML +document, or it should be an open IO::Handle. Only a single document +may be parsed for a given instance of XML::Parser::Expat, so this will croak +if it's been called previously for this instance. + +=item parsestring(XML_DOC_STRING) + +Parses the given string as an XML document. Only a single document may be +parsed for a given instance of XML::Parser::Expat, so this will die if either +parsestring or parsefile has been called for this instance previously. + +This method is deprecated in favor of the parse method. + +=item parsefile(FILENAME) + +Parses the XML document in the given file. Will die if parsestring or +parsefile has been called previously for this instance. + +=item is_defaulted(ATTNAME) + +NO LONGER WORKS. To find out if an attribute is defaulted please use +the specified_attr method. + +=item specified_attr + +When the start handler receives lists of attributes and values, the +non-defaulted (i.e. explicitly specified) attributes occur in the list +first. This method returns the number of specified items in the list. +So if this number is equal to the length of the list, there were no +defaulted values. Otherwise the number points to the index of the +first defaulted attribute name. + +=item finish + +Unsets all handlers (including internal ones that set context), but expat +continues parsing to the end of the document or until it finds an error. +It should finish up a lot faster than with the handlers set. + +=item release + +There are data structures used by XML::Parser::Expat that have circular +references. This means that these structures will never be garbage +collected unless these references are explicitly broken. Calling this +method breaks those references (and makes the instance unusable.) + +Normally, higher level calls handle this for you, but if you are using +XML::Parser::Expat directly, then it's your responsibility to call it. + +=back + +=head2 XML::Parser::ContentModel Methods + +The element declaration handlers are passed objects of this class as the +content model of the element declaration. They also represent content +particles, components of a content model. + +When referred to as a string, these objects are automagicly converted to a +string representation of the model (or content particle). + +=over 4 + +=item isempty + +This method returns true if the object is "EMPTY", false otherwise. + +=item isany + +This method returns true if the object is "ANY", false otherwise. + +=item ismixed + +This method returns true if the object is "(#PCDATA)" or "(#PCDATA|...)*", +false otherwise. + +=item isname + +This method returns if the object is an element name. + +=item ischoice + +This method returns true if the object is a choice of content particles. + + +=item isseq + +This method returns true if the object is a sequence of content particles. + +=item quant + +This method returns undef or a string representing the quantifier +('?', '*', '+') associated with the model or particle. + +=item children + +This method returns undef or (for mixed, choice, and sequence types) +an array of component content particles. There will always be at least +one component for choices and sequences, but for a mixed content model +of pure PCDATA, "(#PCDATA)", then an undef is returned. + +=back + +=head2 XML::Parser::ExpatNB Methods + +The class XML::Parser::ExpatNB is a subclass of XML::Parser::Expat used +for non-blocking access to the expat library. It does not support the parse, +parsestring, or parsefile methods, but it does have these additional methods: + +=over 4 + +=item parse_more(DATA) + +Feed expat more text to munch on. + +=item parse_done + +Tell expat that it's gotten the whole document. + +=back + +=head1 FUNCTIONS + +=over 4 + +=item XML::Parser::Expat::load_encoding(ENCODING) + +Load an external encoding. ENCODING is either the name of an encoding or +the name of a file. The basename is converted to lowercase and a '.enc' +extension is appended unless there's one already there. Then, unless +it's an absolute pathname (i.e. begins with '/'), the first file by that +name discovered in the @Encoding_Path path list is used. + +The encoding in the file is loaded and kept in the %Encoding_Table +table. Earlier encodings of the same name are replaced. + +This function is automatically called by expat when it encounters an encoding +it doesn't know about. Expat shouldn't call this twice for the same +encoding name. The only reason users should use this function is to +explicitly load an encoding not contained in the @Encoding_Path list. + +=back + +=head1 AUTHORS + +Larry Wall > wrote version 1.0. + +Clark Cooper > picked up support, changed the API +for this version (2.x), provided documentation, and added some standard +package features. + +=cut diff --git a/Expat/Expat.xs b/Expat/Expat.xs new file mode 100644 index 0000000..497b64f --- /dev/null +++ b/Expat/Expat.xs @@ -0,0 +1,2214 @@ +/***************************************************************** +** Expat.xs +** +** Copyright 1998 Larry Wall and Clark Cooper +** All rights reserved. +** +** This program is free software; you can redistribute it and/or +** modify it under the same terms as Perl itself. +** +*/ + +#include + +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" + +#undef convert + +#include "patchlevel.h" +#include "encoding.h" + + +/* Version 5.005_5x (Development version for 5.006) doesn't like sv_... + anymore, but 5.004 doesn't know about PL_sv.. + Don't want to push up required version just for this. */ + +#if PATCHLEVEL < 5 +#define PL_sv_undef sv_undef +#define PL_sv_no sv_no +#define PL_sv_yes sv_yes +#define PL_na na +#endif + +#define BUFSIZE 32768 + +#define NSDELIM '|' + +/* Macro to update handler fields. Used in the various handler setting + XSUBS */ + +#define XMLP_UPD(fld) \ + RETVAL = cbv->fld ? newSVsv(cbv->fld) : &PL_sv_undef;\ + if (cbv->fld) {\ + if (cbv->fld != fld)\ + sv_setsv(cbv->fld, fld);\ + }\ + else\ + cbv->fld = newSVsv(fld) + +/* Macro to push old handler value onto return stack. This is done here + to get around a bug in 5.004 sv_2mortal function. */ + +#define PUSHRET \ + ST(0) = RETVAL;\ + if (RETVAL != &PL_sv_undef && SvREFCNT(RETVAL)) sv_2mortal(RETVAL) + +typedef struct { + SV* self_sv; + XML_Parser p; + + AV* context; + AV* new_prefix_list; + HV *nstab; + AV *nslst; + + unsigned int st_serial; + unsigned int st_serial_stackptr; + unsigned int st_serial_stacksize; + unsigned int * st_serial_stack; + + unsigned int skip_until; + + SV *recstring; + char * delim; + STRLEN delimlen; + + unsigned ns:1; + unsigned no_expand:1; + unsigned parseparam:1; + + /* Callback handlers */ + + SV* start_sv; + SV* end_sv; + SV* char_sv; + SV* proc_sv; + SV* cmnt_sv; + SV* dflt_sv; + + SV* entdcl_sv; + SV* eledcl_sv; + SV* attdcl_sv; + SV* doctyp_sv; + SV* doctypfin_sv; + SV* xmldec_sv; + + SV* unprsd_sv; + SV* notation_sv; + + SV* extent_sv; + SV* extfin_sv; + + SV* startcd_sv; + SV* endcd_sv; +} CallbackVector; + + +static HV* EncodingTable = NULL; + +static XML_Char nsdelim[] = {NSDELIM, '\0'}; + +static char *QuantChar[] = {"", "?", "*", "+"}; + +/* Forward declarations */ + +static void suspend_callbacks(CallbackVector *); +static void resume_callbacks(CallbackVector *); + +#if PATCHLEVEL < 5 && SUBVERSION < 5 + +/* ================================================================ +** This is needed where the length is explicitly given. The expat +** library may sometimes give us zero-length strings. Perl's newSVpv +** interprets a zero length as a directive to do a strlen. This +** function is used when we want to force length to mean length, even +** if zero. +*/ + +static SV * +newSVpvn(char *s, STRLEN len) +{ + register SV *sv; + + sv = newSV(0); + sv_setpvn(sv, s, len); + return sv; +} /* End newSVpvn */ + +#define ERRSV GvSV(errgv) +#endif + +#ifdef SvUTF8_on + +static SV * +newUTF8SVpv(char *s, STRLEN len) { + register SV *sv; + + sv = newSVpv(s, len); + SvUTF8_on(sv); + return sv; +} /* End new UTF8SVpv */ + +static SV * +newUTF8SVpvn(char *s, STRLEN len) { + register SV *sv; + + sv = newSV(0); + sv_setpvn(sv, s, len); + SvUTF8_on(sv); + return sv; +} + +#else /* SvUTF8_on not defined */ + +#define newUTF8SVpv newSVpv +#define newUTF8SVpvn newSVpvn + +#endif + +static void* +mymalloc(size_t size) { +#ifndef LEAKTEST + return safemalloc(size); +#else + return safexmalloc(328,size); +#endif +} + +static void* +myrealloc(void *p, size_t s) { +#ifndef LEAKTEST + return saferealloc(p, s); +#else + return safexrealloc(p, s); +#endif +} + +static void +myfree(void *p) { + Safefree(p); +} + +static XML_Memory_Handling_Suite ms = {mymalloc, myrealloc, myfree}; + +static void +append_error(XML_Parser parser, char * err) +{ + dSP; + CallbackVector * cbv; + SV ** errstr; + + cbv = (CallbackVector*) XML_GetUserData(parser); + errstr = hv_fetch((HV*)SvRV(cbv->self_sv), + "ErrorMessage", 12, 0); + + if (errstr && SvPOK(*errstr)) { + SV ** errctx = hv_fetch((HV*) SvRV(cbv->self_sv), + "ErrorContext", 12, 0); + int dopos = !err && errctx && SvOK(*errctx); + + if (! err) + err = (char *) XML_ErrorString(XML_GetErrorCode(parser)); + + sv_catpvf(*errstr, "\n%s at line %d, column %d, byte %d%s", + err, + XML_GetCurrentLineNumber(parser), + XML_GetCurrentColumnNumber(parser), + XML_GetCurrentByteIndex(parser), + dopos ? ":\n" : ""); + + if (dopos) + { + int count; + + ENTER ; + SAVETMPS ; + PUSHMARK(sp); + XPUSHs(cbv->self_sv); + XPUSHs(*errctx); + PUTBACK ; + + count = perl_call_method("position_in_context", G_SCALAR); + + SPAGAIN ; + + if (count >= 1) { + sv_catsv(*errstr, POPs); + } + + PUTBACK ; + FREETMPS ; + LEAVE ; + } + } +} /* End append_error */ + +static SV * +generate_model(XML_Content *model) { + HV * hash = newHV(); + SV * obj = newRV_noinc((SV *) hash); + + sv_bless(obj, gv_stashpv("XML::Parser::ContentModel", 1)); + + hv_store(hash, "Type", 4, newSViv(model->type), 0); + if (model->quant != XML_CQUANT_NONE) { + hv_store(hash, "Quant", 5, newSVpv(QuantChar[model->quant], 1), 0); + } + + switch(model->type) { + case XML_CTYPE_NAME: + hv_store(hash, "Tag", 3, newUTF8SVpv((char *)model->name, 0), 0); + break; + + case XML_CTYPE_MIXED: + case XML_CTYPE_CHOICE: + case XML_CTYPE_SEQ: + if (model->children && model->numchildren) + { + AV * children = newAV(); + int i; + + for (i = 0; i < model->numchildren; i++) { + av_push(children, generate_model(&model->children[i])); + } + + hv_store(hash, "Children", 8, newRV_noinc((SV *) children), 0); + } + break; + } + + return obj; +} /* End generate_model */ + +static int +parse_stream(XML_Parser parser, SV * ioref) +{ + dSP; + SV * tbuff; + SV * tsiz; + char * linebuff; + STRLEN lblen; + STRLEN br = 0; + int buffsize; + int done = 0; + int ret = 1; + char * msg = NULL; + CallbackVector * cbv; + char *buff = (char *) 0; + + cbv = (CallbackVector*) XML_GetUserData(parser); + + ENTER; + SAVETMPS; + + if (cbv->delim) { + int cnt; + SV * tline; + + PUSHMARK(SP); + XPUSHs(ioref); + PUTBACK ; + + cnt = perl_call_method("getline", G_SCALAR); + + SPAGAIN; + + if (cnt != 1) + croak("getline method call failed"); + + tline = POPs; + + if (! SvOK(tline)) { + lblen = 0; + } + else { + char * chk; + linebuff = SvPV(tline, lblen); + chk = &linebuff[lblen - cbv->delimlen - 1]; + + if (lblen > cbv->delimlen + 1 + && *chk == *cbv->delim + && chk[cbv->delimlen] == '\n' + && strnEQ(++chk, cbv->delim + 1, cbv->delimlen - 1)) + lblen -= cbv->delimlen + 1; + } + + PUTBACK ; + buffsize = lblen; + done = lblen == 0; + } + else { + tbuff = newSV(0); + tsiz = newSViv(BUFSIZE); + buffsize = BUFSIZE; + } + + while (! done) + { + char *buffer = XML_GetBuffer(parser, buffsize); + + if (! buffer) + croak("Ran out of memory for input buffer"); + + SAVETMPS; + + if (cbv->delim) { + Copy(linebuff, buffer, lblen, char); + br = lblen; + done = 1; + } + else { + int cnt; + SV * rdres; + char * tb; + + PUSHMARK(SP); + EXTEND(SP, 3); + PUSHs(ioref); + PUSHs(tbuff); + PUSHs(tsiz); + PUTBACK ; + + cnt = perl_call_method("read", G_SCALAR); + + SPAGAIN ; + + if (cnt != 1) + croak("read method call failed"); + + rdres = POPs; + + if (! SvOK(rdres)) + croak("read error"); + + tb = SvPV(tbuff, br); + if (br > 0) + Copy(tb, buffer, br, char); + else + done = 1; + + PUTBACK ; + } + + ret = XML_ParseBuffer(parser, br, done); + + SPAGAIN; /* resync local SP in case callbacks changed global stack */ + + if (! ret) + break; + + FREETMPS; + } + + if (! ret) + append_error(parser, msg); + + if (! cbv->delim) { + SvREFCNT_dec(tsiz); + SvREFCNT_dec(tbuff); + } + + FREETMPS; + LEAVE; + + return ret; +} /* End parse_stream */ + +static SV * +gen_ns_name(const char * name, HV * ns_table, AV * ns_list) +{ + char *pos = strchr(name, NSDELIM); + SV * ret; + + if (pos && pos > name) + { + SV ** name_ent = hv_fetch(ns_table, (char *) name, + pos - name, TRUE); + ret = newUTF8SVpv(&pos[1], 0); + + if (name_ent) + { + int index; + + if (SvOK(*name_ent)) + { + index = SvIV(*name_ent); + } + else + { + av_push(ns_list, newUTF8SVpv((char *) name, pos - name)); + index = av_len(ns_list); + sv_setiv(*name_ent, (IV) index); + } + + sv_setiv(ret, (IV) index); + SvPOK_on(ret); + } + } + else + ret = newUTF8SVpv((char *) name, 0); + + return ret; +} /* End gen_ns_name */ + +static void +characterData(void *userData, const char *s, int len) +{ + dSP; + CallbackVector* cbv = (CallbackVector*) userData; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 2); + PUSHs(cbv->self_sv); + PUSHs(sv_2mortal(newUTF8SVpvn((char*)s,len))); + PUTBACK; + perl_call_sv(cbv->char_sv, G_DISCARD); + + FREETMPS; + LEAVE; +} /* End characterData */ + +static void +startElement(void *userData, const char *name, const char **atts) +{ + dSP; + CallbackVector* cbv = (CallbackVector*) userData; + SV ** pcontext; + unsigned do_ns = cbv->ns; + unsigned skipping = 0; + SV ** pnstab; + SV ** pnslst; + SV * elname; + + cbv->st_serial++; + + if (cbv->skip_until) { + skipping = cbv->st_serial < cbv->skip_until; + if (! skipping) { + resume_callbacks(cbv); + cbv->skip_until = 0; + } + } + + if (cbv->st_serial_stackptr >= cbv->st_serial_stacksize) { + unsigned int newsize = cbv->st_serial_stacksize + 512; + + Renew(cbv->st_serial_stack, newsize, unsigned int); + cbv->st_serial_stacksize = newsize; + } + + cbv->st_serial_stack[++cbv->st_serial_stackptr] = cbv->st_serial; + + if (do_ns) + elname = gen_ns_name(name, cbv->nstab, cbv->nslst); + else + elname = newUTF8SVpv((char *)name, 0); + + if (! skipping && SvTRUE(cbv->start_sv)) + { + const char **attlim = atts; + + while (*attlim) + attlim++; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, attlim - atts + 2); + PUSHs(cbv->self_sv); + PUSHs(elname); + while (*atts) + { + SV * attname; + + attname = (do_ns ? gen_ns_name(*atts, cbv->nstab, cbv->nslst) + : newUTF8SVpv((char *) *atts, 0)); + + atts++; + PUSHs(sv_2mortal(attname)); + if (*atts) + PUSHs(sv_2mortal(newUTF8SVpv((char*)*atts++,0))); + } + PUTBACK; + perl_call_sv(cbv->start_sv, G_DISCARD); + + FREETMPS; + LEAVE; + } + + av_push(cbv->context, elname); + + if (cbv->ns) { + av_clear(cbv->new_prefix_list); + } +} /* End startElement */ + +static void +endElement(void *userData, const char *name) +{ + dSP; + CallbackVector* cbv = (CallbackVector*) userData; + SV *elname; + + elname = av_pop(cbv->context); + + if (! cbv->st_serial_stackptr) { + croak("endElement: Start tag serial number stack underflow"); + } + + if (! cbv->skip_until && SvTRUE(cbv->end_sv)) + { + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 2); + PUSHs(cbv->self_sv); + PUSHs(elname); + PUTBACK; + perl_call_sv(cbv->end_sv, G_DISCARD); + + FREETMPS; + LEAVE; + } + + cbv->st_serial_stackptr--; + + SvREFCNT_dec(elname); +} /* End endElement */ + +static void +processingInstruction(void *userData, const char *target, const char *data) +{ + dSP; + CallbackVector* cbv = (CallbackVector*) userData; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 3); + PUSHs(cbv->self_sv); + PUSHs(sv_2mortal(newUTF8SVpv((char*)target,0))); + PUSHs(sv_2mortal(newUTF8SVpv((char*)data,0))); + PUTBACK; + perl_call_sv(cbv->proc_sv, G_DISCARD); + + FREETMPS; + LEAVE; +} /* End processingInstruction */ + +static void +commenthandle(void *userData, const char *string) +{ + dSP; + CallbackVector * cbv = (CallbackVector*) userData; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 2); + PUSHs(cbv->self_sv); + PUSHs(sv_2mortal(newUTF8SVpv((char*) string, 0))); + PUTBACK; + perl_call_sv(cbv->cmnt_sv, G_DISCARD); + + FREETMPS; + LEAVE; +} /* End commenthandler */ + +static void +startCdata(void *userData) +{ + dSP; + CallbackVector* cbv = (CallbackVector*) userData; + + if (cbv->startcd_sv) { + ENTER; + SAVETMPS; + + PUSHMARK(sp); + XPUSHs(cbv->self_sv); + PUTBACK; + perl_call_sv(cbv->startcd_sv, G_DISCARD); + + FREETMPS; + LEAVE; + } +} /* End startCdata */ + +static void +endCdata(void *userData) +{ + dSP; + CallbackVector* cbv = (CallbackVector*) userData; + + if (cbv->endcd_sv) { + ENTER; + SAVETMPS; + + PUSHMARK(sp); + XPUSHs(cbv->self_sv); + PUTBACK; + perl_call_sv(cbv->endcd_sv, G_DISCARD); + + FREETMPS; + LEAVE; + } +} /* End endCdata */ + +static void +nsStart(void *userdata, const XML_Char *prefix, const XML_Char *uri){ + dSP; + CallbackVector* cbv = (CallbackVector*) userdata; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 3); + PUSHs(cbv->self_sv); + PUSHs(prefix ? sv_2mortal(newUTF8SVpv((char *)prefix, 0)) : &PL_sv_undef); + PUSHs(uri ? sv_2mortal(newUTF8SVpv((char *)uri, 0)) : &PL_sv_undef); + PUTBACK; + perl_call_method("NamespaceStart", G_DISCARD); + + FREETMPS; + LEAVE; +} /* End nsStart */ + +static void +nsEnd(void *userdata, const XML_Char *prefix) { + dSP; + CallbackVector* cbv = (CallbackVector*) userdata; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 2); + PUSHs(cbv->self_sv); + PUSHs(prefix ? sv_2mortal(newUTF8SVpv((char *)prefix, 0)) : &PL_sv_undef); + PUTBACK; + perl_call_method("NamespaceEnd", G_DISCARD); + + FREETMPS; + LEAVE; +} /* End nsEnd */ + +static void +defaulthandle(void *userData, const char *string, int len) +{ + dSP; + CallbackVector* cbv = (CallbackVector*) userData; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 2); + PUSHs(cbv->self_sv); + PUSHs(sv_2mortal(newUTF8SVpvn((char*)string, len))); + PUTBACK; + perl_call_sv(cbv->dflt_sv, G_DISCARD); + + FREETMPS; + LEAVE; +} /* End defaulthandle */ + +static void +elementDecl(void *data, + const char *name, + XML_Content *model) { + dSP; + CallbackVector *cbv = (CallbackVector*) data; + SV *cmod; + + ENTER; + SAVETMPS; + + + cmod = generate_model(model); + + Safefree(model); + PUSHMARK(sp); + EXTEND(sp, 3); + PUSHs(cbv->self_sv); + PUSHs(sv_2mortal(newUTF8SVpv((char *)name, 0))); + PUSHs(sv_2mortal(cmod)); + PUTBACK; + perl_call_sv(cbv->eledcl_sv, G_DISCARD); + FREETMPS; + LEAVE; + +} /* End elementDecl */ + +static void +attributeDecl(void *data, + const char * elname, + const char * attname, + const char * att_type, + const char * dflt, + int reqorfix) { + dSP; + CallbackVector *cbv = (CallbackVector*) data; + SV * dfltsv; + + if (dflt) { + dfltsv = newUTF8SVpv("'", 1); + sv_catpv(dfltsv, (char *) dflt); + sv_catpv(dfltsv, "'"); + } + else { + dfltsv = newUTF8SVpv(reqorfix ? "#REQUIRED" : "#IMPLIED", 0); + } + + ENTER; + SAVETMPS; + PUSHMARK(sp); + EXTEND(sp, 5); + PUSHs(cbv->self_sv); + PUSHs(sv_2mortal(newUTF8SVpv((char *)elname, 0))); + PUSHs(sv_2mortal(newUTF8SVpv((char *)attname, 0))); + PUSHs(sv_2mortal(newUTF8SVpv((char *)att_type, 0))); + PUSHs(sv_2mortal(dfltsv)); + if (dflt && reqorfix) + XPUSHs(&PL_sv_yes); + PUTBACK; + perl_call_sv(cbv->attdcl_sv, G_DISCARD); + + FREETMPS; + LEAVE; +} /* End attributeDecl */ + +static void +entityDecl(void *data, + const char *name, + int isparam, + const char *value, + int vlen, + const char *base, + const char *sysid, + const char *pubid, + const char *notation) { + dSP; + CallbackVector *cbv = (CallbackVector*) data; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 6); + PUSHs(cbv->self_sv); + PUSHs(sv_2mortal(newUTF8SVpv((char*)name, 0))); + PUSHs(value ? sv_2mortal(newUTF8SVpvn((char*)value, vlen)) : &PL_sv_undef); + PUSHs(sysid ? sv_2mortal(newUTF8SVpv((char *)sysid, 0)) : &PL_sv_undef); + PUSHs(pubid ? sv_2mortal(newUTF8SVpv((char *)pubid, 0)) : &PL_sv_undef); + PUSHs(notation ? sv_2mortal(newUTF8SVpv((char *)notation, 0)) : &PL_sv_undef); + if (isparam) + XPUSHs(&PL_sv_yes); + PUTBACK; + perl_call_sv(cbv->entdcl_sv, G_DISCARD); + + FREETMPS; + LEAVE; +} /* End entityDecl */ + +static void +doctypeStart(void *userData, + const char* name, + const char* sysid, + const char* pubid, + int hasinternal) { + dSP; + CallbackVector *cbv = (CallbackVector*) userData; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 5); + PUSHs(cbv->self_sv); + PUSHs(sv_2mortal(newUTF8SVpv((char*)name, 0))); + PUSHs(sysid ? sv_2mortal(newUTF8SVpv((char*)sysid, 0)) : &PL_sv_undef); + PUSHs(pubid ? sv_2mortal(newUTF8SVpv((char*)pubid, 0)) : &PL_sv_undef); + PUSHs(hasinternal ? &PL_sv_yes : &PL_sv_no); + PUTBACK; + perl_call_sv(cbv->doctyp_sv, G_DISCARD); + FREETMPS; + LEAVE; +} /* End doctypeStart */ + +static void +doctypeEnd(void *userData) { + dSP; + CallbackVector *cbv = (CallbackVector*) userData; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 1); + PUSHs(cbv->self_sv); + PUTBACK; + perl_call_sv(cbv->doctypfin_sv, G_DISCARD); + FREETMPS; + LEAVE; +} /* End doctypeEnd */ + +static void +xmlDecl(void *userData, + const char *version, + const char *encoding, + int standalone) { + dSP; + CallbackVector *cbv = (CallbackVector*) userData; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 4); + PUSHs(cbv->self_sv); + PUSHs(version ? sv_2mortal(newUTF8SVpv((char *)version, 0)) + : &PL_sv_undef); + PUSHs(encoding ? sv_2mortal(newUTF8SVpv((char *)encoding, 0)) + : &PL_sv_undef); + PUSHs(standalone == -1 ? &PL_sv_undef + : (standalone ? &PL_sv_yes : &PL_sv_no)); + PUTBACK; + perl_call_sv(cbv->xmldec_sv, G_DISCARD); + FREETMPS; + LEAVE; +} /* End xmlDecl */ + +static void +unparsedEntityDecl(void *userData, + const char* entity, + const char* base, + const char* sysid, + const char* pubid, + const char* notation) +{ + dSP; + CallbackVector* cbv = (CallbackVector*) userData; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 6); + PUSHs(cbv->self_sv); + PUSHs(sv_2mortal(newUTF8SVpv((char*) entity, 0))); + PUSHs(base ? sv_2mortal(newUTF8SVpv((char*) base, 0)) : &PL_sv_undef); + PUSHs(sv_2mortal(newUTF8SVpv((char*) sysid, 0))); + PUSHs(pubid ? sv_2mortal(newUTF8SVpv((char*) pubid, 0)) : &PL_sv_undef); + PUSHs(sv_2mortal(newUTF8SVpv((char*) notation, 0))); + PUTBACK; + perl_call_sv(cbv->unprsd_sv, G_DISCARD); + + FREETMPS; + LEAVE; +} /* End unparsedEntityDecl */ + +static void +notationDecl(void *userData, + const char *name, + const char *base, + const char *sysid, + const char *pubid) +{ + dSP; + CallbackVector* cbv = (CallbackVector*) userData; + + PUSHMARK(sp); + XPUSHs(cbv->self_sv); + XPUSHs(sv_2mortal(newUTF8SVpv((char*) name, 0))); + if (base) + { + XPUSHs(sv_2mortal(newUTF8SVpv((char *) base, 0))); + } + else if (sysid || pubid) + { + XPUSHs(&PL_sv_undef); + } + + if (sysid) + { + XPUSHs(sv_2mortal(newUTF8SVpv((char *) sysid, 0))); + } + else if (pubid) + { + XPUSHs(&PL_sv_undef); + } + + if (pubid) + XPUSHs(sv_2mortal(newUTF8SVpv((char *) pubid, 0))); + + PUTBACK; + perl_call_sv(cbv->notation_sv, G_DISCARD); +} /* End notationDecl */ + +static int +externalEntityRef(XML_Parser parser, + const char* open, + const char* base, + const char* sysid, + const char* pubid) +{ + dSP; +#if defined(USE_THREADS) && PATCHLEVEL==6 + dTHX; +#endif + + int count; + int ret = 0; + int parse_done = 0; + + CallbackVector* cbv = (CallbackVector*) XML_GetUserData(parser); + + if (! cbv->extent_sv) + return 0; + + ENTER ; + SAVETMPS ; + PUSHMARK(sp); + EXTEND(sp, pubid ? 4 : 3); + PUSHs(cbv->self_sv); + PUSHs(base ? sv_2mortal(newUTF8SVpv((char*) base, 0)) : &PL_sv_undef); + PUSHs(sv_2mortal(newSVpv((char*) sysid, 0))); + if (pubid) + PUSHs(sv_2mortal(newUTF8SVpv((char*) pubid, 0))); + PUTBACK ; + count = perl_call_sv(cbv->extent_sv, G_SCALAR); + + SPAGAIN ; + + if (count >= 1) { + SV * result = POPs; + int type; + + if (result && (type = SvTYPE(result)) > 0) { + SV **pval = hv_fetch((HV*) SvRV(cbv->self_sv), "Parser", 6, 0); + + if (! pval || ! SvIOK(*pval)) + append_error(parser, "Can't find parser entry in XML::Parser object"); + else { + XML_Parser entpar; + char *errmsg = (char *) 0; + + entpar = XML_ExternalEntityParserCreate(parser, open, 0); + + XML_SetBase(entpar, XML_GetBase(parser)); + + sv_setiv(*pval, (IV) entpar); + + cbv->p = entpar; + + PUSHMARK(sp); + EXTEND(sp, 2); + PUSHs(*pval); + PUSHs(result); + PUTBACK; + count = perl_call_pv("XML::Parser::Expat::Do_External_Parse", + G_SCALAR | G_EVAL); + SPAGAIN; + + if (SvTRUE(ERRSV)) { + char *hold; + STRLEN len; + + POPs; + hold = SvPV(ERRSV, len); + New(326, errmsg, len + 1, char); + if (len) + Copy(hold, errmsg, len, char); + goto Extparse_Cleanup; + } + + if (count > 0) + ret = POPi; + + parse_done = 1; + + Extparse_Cleanup: + cbv->p = parser; + sv_setiv(*pval, (IV) parser); + XML_ParserFree(entpar); + + if (cbv->extfin_sv) { + PUSHMARK(sp); + PUSHs(cbv->self_sv); + PUTBACK; + perl_call_sv(cbv->extfin_sv, G_DISCARD); + SPAGAIN; + } + + if (SvTRUE(ERRSV)) + append_error(parser, SvPV(ERRSV, PL_na)); + } + } + } + + if (! ret && ! parse_done) + append_error(parser, "Handler couldn't resolve external entity"); + + PUTBACK ; + FREETMPS ; + LEAVE ; + + return ret; +} /* End externalEntityRef */ + +/*================================================================ +** This is the function that expat calls to convert multi-byte sequences +** for external encodings. Each byte in the sequence is used to index +** into the current map to either set the next map or, in the case of +** the final byte, to get the corresponding Unicode scalar, which is +** returned. +*/ + +static int +convert_to_unicode(void *data, const char *seq) { + Encinfo *enc = (Encinfo *) data; + PrefixMap *curpfx; + int count; + int index = 0; + + for (count = 0; count < 4; count++) { + unsigned char byte = (unsigned char) seq[count]; + unsigned char bndx; + unsigned char bmsk; + int offset; + + curpfx = &enc->prefixes[index]; + offset = ((int) byte) - curpfx->min; + if (offset < 0) + break; + if (offset >= curpfx->len && curpfx->len != 0) + break; + + bndx = byte >> 3; + bmsk = 1 << (byte & 0x7); + + if (curpfx->ispfx[bndx] & bmsk) { + index = enc->bytemap[curpfx->bmap_start + offset]; + } + else if (curpfx->ischar[bndx] & bmsk) { + return enc->bytemap[curpfx->bmap_start + offset]; + } + else + break; + } + + return -1; +} /* End convert_to_unicode */ + +static int +unknownEncoding(void *unused, const char *name, XML_Encoding *info) +{ + SV ** encinfptr; + Encinfo *enc; + int namelen; + int i; + char buff[42]; + + namelen = strlen(name); + if (namelen > 40) + return 0; + + /* Make uppercase */ + for (i = 0; i < namelen; i++) { + char c = name[i]; + if (c >= 'a' && c <= 'z') + c -= 'a' - 'A'; + buff[i] = c; + } + + if (! EncodingTable) { + EncodingTable = perl_get_hv("XML::Parser::Expat::Encoding_Table", FALSE); + if (! EncodingTable) + croak("Can't find XML::Parser::Expat::Encoding_Table"); + } + + encinfptr = hv_fetch(EncodingTable, buff, namelen, 0); + + if (! encinfptr || ! SvOK(*encinfptr)) { + /* Not found, so try to autoload */ + dSP; + int count; + + ENTER; + SAVETMPS; + PUSHMARK(sp); + XPUSHs(sv_2mortal(newSVpvn(buff,namelen))); + PUTBACK; + perl_call_pv("XML::Parser::Expat::load_encoding", G_DISCARD); + + encinfptr = hv_fetch(EncodingTable, buff, namelen, 0); + FREETMPS; + LEAVE; + + if (! encinfptr || ! SvOK(*encinfptr)) + return 0; + } + + if (! sv_derived_from(*encinfptr, "XML::Parser::Encinfo")) + croak("Entry in XML::Parser::Expat::Encoding_Table not an Encinfo object"); + + enc = (Encinfo *) SvIV((SV*)SvRV(*encinfptr)); + Copy(enc->firstmap, info->map, 256, int); + info->release = NULL; + if (enc->prefixes_size) { + info->data = (void *) enc; + info->convert = convert_to_unicode; + } + else { + info->data = NULL; + info->convert = NULL; + } + + return 1; +} /* End unknownEncoding */ + + +static void +recString(void *userData, const char *string, int len) +{ + CallbackVector *cbv = (CallbackVector*) userData; + + if (cbv->recstring) { + sv_catpvn(cbv->recstring, (char *) string, len); + } + else { + cbv->recstring = newUTF8SVpvn((char *) string, len); + } +} /* End recString */ + +static void +suspend_callbacks(CallbackVector *cbv) { + if (SvTRUE(cbv->char_sv)) { + XML_SetCharacterDataHandler(cbv->p, + (XML_CharacterDataHandler) 0); + } + + if (SvTRUE(cbv->proc_sv)) { + XML_SetProcessingInstructionHandler(cbv->p, + (XML_ProcessingInstructionHandler) 0); + } + + if (SvTRUE(cbv->cmnt_sv)) { + XML_SetCommentHandler(cbv->p, + (XML_CommentHandler) 0); + } + + if (SvTRUE(cbv->startcd_sv) + || SvTRUE(cbv->endcd_sv)) { + XML_SetCdataSectionHandler(cbv->p, + (XML_StartCdataSectionHandler) 0, + (XML_EndCdataSectionHandler) 0); + } + + if (SvTRUE(cbv->unprsd_sv)) { + XML_SetUnparsedEntityDeclHandler(cbv->p, + (XML_UnparsedEntityDeclHandler) 0); + } + + if (SvTRUE(cbv->notation_sv)) { + XML_SetNotationDeclHandler(cbv->p, + (XML_NotationDeclHandler) 0); + } + + if (SvTRUE(cbv->extent_sv)) { + XML_SetExternalEntityRefHandler(cbv->p, + (XML_ExternalEntityRefHandler) 0); + } + +} /* End suspend_callbacks */ + +static void +resume_callbacks(CallbackVector *cbv) { + if (SvTRUE(cbv->char_sv)) { + XML_SetCharacterDataHandler(cbv->p, characterData); + } + + if (SvTRUE(cbv->proc_sv)) { + XML_SetProcessingInstructionHandler(cbv->p, processingInstruction); + } + + if (SvTRUE(cbv->cmnt_sv)) { + XML_SetCommentHandler(cbv->p, commenthandle); + } + + if (SvTRUE(cbv->startcd_sv) + || SvTRUE(cbv->endcd_sv)) { + XML_SetCdataSectionHandler(cbv->p, startCdata, endCdata); + } + + if (SvTRUE(cbv->unprsd_sv)) { + XML_SetUnparsedEntityDeclHandler(cbv->p, unparsedEntityDecl); + } + + if (SvTRUE(cbv->notation_sv)) { + XML_SetNotationDeclHandler(cbv->p, notationDecl); + } + + if (SvTRUE(cbv->extent_sv)) { + XML_SetExternalEntityRefHandler(cbv->p, externalEntityRef); + } + +} /* End resume_callbacks */ + + +MODULE = XML::Parser::Expat PACKAGE = XML::Parser::Expat PREFIX = XML_ + +XML_Parser +XML_ParserCreate(self_sv, enc_sv, namespaces) + SV * self_sv + SV * enc_sv + int namespaces + CODE: + { + CallbackVector *cbv; + enum XML_ParamEntityParsing pep = XML_PARAM_ENTITY_PARSING_NEVER; + char *enc = (char *) (SvTRUE(enc_sv) ? SvPV(enc_sv,PL_na) : 0); + SV ** spp; + + Newz(320, cbv, 1, CallbackVector); + cbv->self_sv = SvREFCNT_inc(self_sv); + Newz(325, cbv->st_serial_stack, 1024, unsigned int); + spp = hv_fetch((HV*)SvRV(cbv->self_sv), "NoExpand", 8, 0); + if (spp && SvTRUE(*spp)) + cbv->no_expand = 1; + + spp = hv_fetch((HV*)SvRV(cbv->self_sv), "Context", 7, 0); + if (! spp || ! *spp || !SvROK(*spp)) + croak("XML::Parser instance missing Context"); + + cbv->context = (AV*) SvRV(*spp); + + cbv->ns = (unsigned) namespaces; + if (namespaces) + { + spp = hv_fetch((HV*)SvRV(cbv->self_sv), "New_Prefixes", 12, 0); + if (! spp || ! *spp || !SvROK(*spp)) + croak("XML::Parser instance missing New_Prefixes"); + + cbv->new_prefix_list = (AV *) SvRV(*spp); + + spp = hv_fetch((HV*)SvRV(cbv->self_sv), "Namespace_Table", + 15, FALSE); + if (! spp || ! *spp || !SvROK(*spp)) + croak("XML::Parser instance missing Namespace_Table"); + + cbv->nstab = (HV *) SvRV(*spp); + + spp = hv_fetch((HV*)SvRV(cbv->self_sv), "Namespace_List", + 14, FALSE); + if (! spp || ! *spp || !SvROK(*spp)) + croak("XML::Parser instance missing Namespace_List"); + + cbv->nslst = (AV *) SvRV(*spp); + + RETVAL = XML_ParserCreate_MM(enc, &ms, nsdelim); + XML_SetNamespaceDeclHandler(RETVAL,nsStart, nsEnd); + } + else + { + RETVAL = XML_ParserCreate_MM(enc, &ms, NULL); + } + + cbv->p = RETVAL; + XML_SetUserData(RETVAL, (void *) cbv); + XML_SetElementHandler(RETVAL, startElement, endElement); + XML_SetUnknownEncodingHandler(RETVAL, unknownEncoding, 0); + + spp = hv_fetch((HV*)SvRV(cbv->self_sv), "ParseParamEnt", + 13, FALSE); + + if (spp && SvTRUE(*spp)) { + pep = XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE; + cbv->parseparam = 1; + } + + XML_SetParamEntityParsing(RETVAL, pep); + } + OUTPUT: + RETVAL + +void +XML_ParserRelease(parser) + XML_Parser parser + CODE: + { + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + + SvREFCNT_dec(cbv->self_sv); + } + +void +XML_ParserFree(parser) + XML_Parser parser + CODE: + { + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + + Safefree(cbv->st_serial_stack); + + + /* Clean up any SVs that we have */ + /* (Note that self_sv must already be taken care of + or we couldn't be here */ + + if (cbv->recstring) + SvREFCNT_dec(cbv->recstring); + + if (cbv->start_sv) + SvREFCNT_dec(cbv->start_sv); + + if (cbv->end_sv) + SvREFCNT_dec(cbv->end_sv); + + if (cbv->char_sv) + SvREFCNT_dec(cbv->char_sv); + + if (cbv->proc_sv) + SvREFCNT_dec(cbv->proc_sv); + + if (cbv->cmnt_sv) + SvREFCNT_dec(cbv->cmnt_sv); + + if (cbv->dflt_sv) + SvREFCNT_dec(cbv->dflt_sv); + + if (cbv->entdcl_sv) + SvREFCNT_dec(cbv->entdcl_sv); + + if (cbv->eledcl_sv) + SvREFCNT_dec(cbv->eledcl_sv); + + if (cbv->attdcl_sv) + SvREFCNT_dec(cbv->attdcl_sv); + + if (cbv->doctyp_sv) + SvREFCNT_dec(cbv->doctyp_sv); + + if (cbv->doctypfin_sv) + SvREFCNT_dec(cbv->doctypfin_sv); + + if (cbv->xmldec_sv) + SvREFCNT_dec(cbv->xmldec_sv); + + if (cbv->unprsd_sv) + SvREFCNT_dec(cbv->unprsd_sv); + + if (cbv->notation_sv) + SvREFCNT_dec(cbv->notation_sv); + + if (cbv->extent_sv) + SvREFCNT_dec(cbv->extent_sv); + + if (cbv->extfin_sv) + SvREFCNT_dec(cbv->extfin_sv); + + if (cbv->startcd_sv) + SvREFCNT_dec(cbv->startcd_sv); + + if (cbv->endcd_sv) + SvREFCNT_dec(cbv->endcd_sv); + + /* ================ */ + + Safefree(cbv); + XML_ParserFree(parser); + } + +int +XML_ParseString(parser, sv) + XML_Parser parser + SV * sv + CODE: + { + CallbackVector * cbv; + STRLEN len; + char *s = SvPV(sv, len); + + cbv = (CallbackVector *) XML_GetUserData(parser); + + + RETVAL = XML_Parse(parser, s, len, 1); + SPAGAIN; /* XML_Parse might have changed stack pointer */ + if (! RETVAL) + append_error(parser, NULL); + } + + OUTPUT: + RETVAL + +int +XML_ParseStream(parser, ioref, delim) + XML_Parser parser + SV * ioref + SV * delim + CODE: + { + SV **delimsv; + CallbackVector * cbv; + + cbv = (CallbackVector *) XML_GetUserData(parser); + if (SvOK(delim)) { + cbv->delim = SvPV(delim, cbv->delimlen); + } + else { + cbv->delim = (char *) 0; + } + + RETVAL = parse_stream(parser, ioref); + SPAGAIN; /* parse_stream might have changed stack pointer */ + } + + OUTPUT: + RETVAL + +int +XML_ParsePartial(parser, sv) + XML_Parser parser + SV * sv + CODE: + { + STRLEN len; + char *s = SvPV(sv, len); + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + + RETVAL = XML_Parse(parser, s, len, 0); + if (! RETVAL) + append_error(parser, NULL); + } + + OUTPUT: + RETVAL + + +int +XML_ParseDone(parser) + XML_Parser parser + CODE: + { + RETVAL = XML_Parse(parser, "", 0, 1); + if (! RETVAL) + append_error(parser, NULL); + } + + OUTPUT: + RETVAL + +SV * +XML_SetStartElementHandler(parser, start_sv) + XML_Parser parser + SV * start_sv + CODE: + { + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + XMLP_UPD(start_sv); + PUSHRET; + } + +SV * +XML_SetEndElementHandler(parser, end_sv) + XML_Parser parser + SV * end_sv + CODE: + { + CallbackVector *cbv = (CallbackVector*) XML_GetUserData(parser); + XMLP_UPD(end_sv); + PUSHRET; + } + +SV * +XML_SetCharacterDataHandler(parser, char_sv) + XML_Parser parser + SV * char_sv + CODE: + { + XML_CharacterDataHandler charhndl = (XML_CharacterDataHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(char_sv); + if (SvTRUE(char_sv)) + charhndl = characterData; + + XML_SetCharacterDataHandler(parser, charhndl); + PUSHRET; + } + +SV * +XML_SetProcessingInstructionHandler(parser, proc_sv) + XML_Parser parser + SV * proc_sv + CODE: + { + XML_ProcessingInstructionHandler prochndl = + (XML_ProcessingInstructionHandler) 0; + CallbackVector* cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(proc_sv); + if (SvTRUE(proc_sv)) + prochndl = processingInstruction; + + XML_SetProcessingInstructionHandler(parser, prochndl); + PUSHRET; + } + +SV * +XML_SetCommentHandler(parser, cmnt_sv) + XML_Parser parser + SV * cmnt_sv + CODE: + { + XML_CommentHandler cmnthndl = (XML_CommentHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(cmnt_sv); + if (SvTRUE(cmnt_sv)) + cmnthndl = commenthandle; + + XML_SetCommentHandler(parser, cmnthndl); + PUSHRET; + } + +SV * +XML_SetDefaultHandler(parser, dflt_sv) + XML_Parser parser + SV * dflt_sv + CODE: + { + XML_DefaultHandler dflthndl = (XML_DefaultHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(dflt_sv); + if (SvTRUE(dflt_sv)) + dflthndl = defaulthandle; + + if (cbv->no_expand) + XML_SetDefaultHandler(parser, dflthndl); + else + XML_SetDefaultHandlerExpand(parser, dflthndl); + + PUSHRET; + } + +SV * +XML_SetUnparsedEntityDeclHandler(parser, unprsd_sv) + XML_Parser parser + SV * unprsd_sv + CODE: + { + XML_UnparsedEntityDeclHandler unprsdhndl = + (XML_UnparsedEntityDeclHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(unprsd_sv); + if (SvTRUE(unprsd_sv)) + unprsdhndl = unparsedEntityDecl; + + XML_SetUnparsedEntityDeclHandler(parser, unprsdhndl); + PUSHRET; + } + +SV * +XML_SetNotationDeclHandler(parser, notation_sv) + XML_Parser parser + SV * notation_sv + CODE: + { + XML_NotationDeclHandler nothndlr = (XML_NotationDeclHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(notation_sv); + if (SvTRUE(notation_sv)) + nothndlr = notationDecl; + + XML_SetNotationDeclHandler(parser, nothndlr); + PUSHRET; + } + +SV * +XML_SetExternalEntityRefHandler(parser, extent_sv) + XML_Parser parser + SV * extent_sv + CODE: + { + XML_ExternalEntityRefHandler exthndlr = + (XML_ExternalEntityRefHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(extent_sv); + if (SvTRUE(extent_sv)) + exthndlr = externalEntityRef; + + XML_SetExternalEntityRefHandler(parser, exthndlr); + PUSHRET; + } + +SV * +XML_SetExtEntFinishHandler(parser, extfin_sv) + XML_Parser parser + SV * extfin_sv + CODE: + { + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + + /* There is no corresponding handler for this in expat. This is + called from the externalEntityRef function above after parsing + the external entity. */ + + XMLP_UPD(extfin_sv); + PUSHRET; + } + + +SV * +XML_SetEntityDeclHandler(parser, entdcl_sv) + XML_Parser parser + SV * entdcl_sv + CODE: + { + XML_EntityDeclHandler enthndlr = + (XML_EntityDeclHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(entdcl_sv); + if (SvTRUE(entdcl_sv)) + enthndlr = entityDecl; + + XML_SetEntityDeclHandler(parser, enthndlr); + PUSHRET; + } + +SV * +XML_SetElementDeclHandler(parser, eledcl_sv) + XML_Parser parser + SV * eledcl_sv + CODE: + { + XML_ElementDeclHandler eldeclhndlr = + (XML_ElementDeclHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(eledcl_sv); + if (SvTRUE(eledcl_sv)) + eldeclhndlr = elementDecl; + + XML_SetElementDeclHandler(parser, eldeclhndlr); + PUSHRET; + } + +SV * +XML_SetAttListDeclHandler(parser, attdcl_sv) + XML_Parser parser + SV * attdcl_sv + CODE: + { + XML_AttlistDeclHandler attdeclhndlr = + (XML_AttlistDeclHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(attdcl_sv); + if (SvTRUE(attdcl_sv)) + attdeclhndlr = attributeDecl; + + XML_SetAttlistDeclHandler(parser, attdeclhndlr); + PUSHRET; + } + +SV * +XML_SetDoctypeHandler(parser, doctyp_sv) + XML_Parser parser + SV * doctyp_sv + CODE: + { + XML_StartDoctypeDeclHandler dtsthndlr = + (XML_StartDoctypeDeclHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + int set = 0; + + XMLP_UPD(doctyp_sv); + if (SvTRUE(doctyp_sv)) + dtsthndlr = doctypeStart; + + XML_SetStartDoctypeDeclHandler(parser, dtsthndlr); + PUSHRET; + } + +SV * +XML_SetEndDoctypeHandler(parser, doctypfin_sv) + XML_Parser parser + SV * doctypfin_sv + CODE: + { + XML_EndDoctypeDeclHandler dtendhndlr = + (XML_EndDoctypeDeclHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(doctypfin_sv); + if (SvTRUE(doctypfin_sv)) + dtendhndlr = doctypeEnd; + + XML_SetEndDoctypeDeclHandler(parser, dtendhndlr); + PUSHRET; + } + + +SV * +XML_SetXMLDeclHandler(parser, xmldec_sv) + XML_Parser parser + SV * xmldec_sv + CODE: + { + XML_XmlDeclHandler xmldechndlr = + (XML_XmlDeclHandler) 0; + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + + XMLP_UPD(xmldec_sv); + if (SvTRUE(xmldec_sv)) + xmldechndlr = xmlDecl; + + XML_SetXmlDeclHandler(parser, xmldechndlr); + PUSHRET; + } + + +void +XML_SetBase(parser, base) + XML_Parser parser + SV * base + CODE: + { + char * b; + + if (! SvOK(base)) { + b = (char *) 0; + } + else { + b = SvPV(base, PL_na); + } + + XML_SetBase(parser, b); + } + + +SV * +XML_GetBase(parser) + XML_Parser parser + CODE: + { + const char *ret = XML_GetBase(parser); + if (ret) { + ST(0) = sv_newmortal(); + sv_setpv(ST(0), ret); + } + else { + ST(0) = &PL_sv_undef; + } + } + +void +XML_PositionContext(parser, lines) + XML_Parser parser + int lines + PREINIT: + int parsepos; + int size; + const char *pos = XML_GetInputContext(parser, &parsepos, &size); + const char *markbeg; + const char *limit; + const char *markend; + int length, relpos; + int cnt; + + PPCODE: + if (! pos) + return; + + for (markbeg = &pos[parsepos], cnt = 0; markbeg >= pos; markbeg--) + { + if (*markbeg == '\n') + { + cnt++; + if (cnt > lines) + break; + } + } + + markbeg++; + + relpos = 0; + limit = &pos[size]; + for (markend = &pos[parsepos + 1], cnt = 0; + markend < limit; + markend++) + { + if (*markend == '\n') + { + if (cnt == 0) + relpos = (markend - markbeg) + 1; + cnt++; + if (cnt > lines) + { + markend++; + break; + } + } + } + + length = markend - markbeg; + if (relpos == 0) + relpos = length; + + EXTEND(sp, 2); + PUSHs(sv_2mortal(newSVpvn((char *) markbeg, length))); + PUSHs(sv_2mortal(newSViv(relpos))); + +SV * +GenerateNSName(name, xml_namespace, table, list) + SV * name + SV * xml_namespace + SV * table + SV * list + CODE: + { + STRLEN nmlen, nslen; + char * nmstr; + char * nsstr; + char * buff; + char * bp; + char * blim; + + nmstr = SvPV(name, nmlen); + nsstr = SvPV(xml_namespace, nslen); + + /* Form a namespace-name string that looks like expat's */ + New(321, buff, nmlen + nslen + 2, char); + bp = buff; + blim = bp + nslen; + while (bp < blim) + *bp++ = *nsstr++; + *bp++ = NSDELIM; + blim = bp + nmlen; + while (bp < blim) + *bp++ = *nmstr++; + *bp = '\0'; + + RETVAL = gen_ns_name(buff, (HV *) SvRV(table), (AV *) SvRV(list)); + Safefree(buff); + } + OUTPUT: + RETVAL + +void +XML_DefaultCurrent(parser) + XML_Parser parser + CODE: + { + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + + XML_DefaultCurrent(parser); + } + +SV * +XML_RecognizedString(parser) + XML_Parser parser + CODE: + { + XML_DefaultHandler dflthndl = (XML_DefaultHandler) 0; + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + + if (cbv->dflt_sv) { + dflthndl = defaulthandle; + } + + if (cbv->recstring) { + sv_setpvn(cbv->recstring, "", 0); + } + + if (cbv->no_expand) + XML_SetDefaultHandler(parser, recString); + else + XML_SetDefaultHandlerExpand(parser, recString); + + XML_DefaultCurrent(parser); + + if (cbv->no_expand) + XML_SetDefaultHandler(parser, dflthndl); + else + XML_SetDefaultHandlerExpand(parser, dflthndl); + + RETVAL = newSVsv(cbv->recstring); + } + OUTPUT: + RETVAL + +int +XML_GetErrorCode(parser) + XML_Parser parser + +int +XML_GetCurrentLineNumber(parser) + XML_Parser parser + + +int +XML_GetCurrentColumnNumber(parser) + XML_Parser parser + +long +XML_GetCurrentByteIndex(parser) + XML_Parser parser + +int +XML_GetSpecifiedAttributeCount(parser) + XML_Parser parser + +char * +XML_ErrorString(code) + int code + CODE: + const char *ret = XML_ErrorString(code); + ST(0) = sv_newmortal(); + sv_setpv((SV*)ST(0), ret); + +SV * +XML_LoadEncoding(data, size) + char * data + int size + CODE: + { + Encmap_Header *emh = (Encmap_Header *) data; + unsigned pfxsize, bmsize; + + if (size < sizeof(Encmap_Header) + || ntohl(emh->magic) != ENCMAP_MAGIC) { + RETVAL = &PL_sv_undef; + } + else { + Encinfo *entry; + SV *sv; + PrefixMap *pfx; + unsigned short *bm; + int namelen; + int i; + + pfxsize = ntohs(emh->pfsize); + bmsize = ntohs(emh->bmsize); + + if (size != (sizeof(Encmap_Header) + + pfxsize * sizeof(PrefixMap) + + bmsize * sizeof(unsigned short))) { + RETVAL = &PL_sv_undef; + } + else { + /* Convert to uppercase and get name length */ + + for (i = 0; i < sizeof(emh->name); i++) { + char c = emh->name[i]; + + if (c == (char) 0) + break; + + if (c >= 'a' && c <= 'z') + emh->name[i] -= 'a' - 'A'; + } + namelen = i; + + RETVAL = newSVpvn(emh->name, namelen); + + New(322, entry, 1, Encinfo); + entry->prefixes_size = pfxsize; + entry->bytemap_size = bmsize; + for (i = 0; i < 256; i++) { + entry->firstmap[i] = ntohl(emh->map[i]); + } + + pfx = (PrefixMap *) &data[sizeof(Encmap_Header)]; + bm = (unsigned short *) (((char *) pfx) + + sizeof(PrefixMap) * pfxsize); + + New(323, entry->prefixes, pfxsize, PrefixMap); + New(324, entry->bytemap, bmsize, unsigned short); + + for (i = 0; i < pfxsize; i++, pfx++) { + PrefixMap *dest = &entry->prefixes[i]; + + dest->min = pfx->min; + dest->len = pfx->len; + dest->bmap_start = ntohs(pfx->bmap_start); + Copy(pfx->ispfx, dest->ispfx, + sizeof(pfx->ispfx) + sizeof(pfx->ischar), unsigned char); + } + + for (i = 0; i < bmsize; i++) + entry->bytemap[i] = ntohs(bm[i]); + + sv = newSViv(0); + sv_setref_pv(sv, "XML::Parser::Encinfo", (void *) entry); + + if (! EncodingTable) { + EncodingTable + = perl_get_hv("XML::Parser::Expat::Encoding_Table", + FALSE); + if (! EncodingTable) + croak("Can't find XML::Parser::Expat::Encoding_Table"); + } + + hv_store(EncodingTable, emh->name, namelen, sv, 0); + } + } + } + OUTPUT: + RETVAL + +void +XML_FreeEncoding(enc) + Encinfo * enc + CODE: + Safefree(enc->bytemap); + Safefree(enc->prefixes); + Safefree(enc); + +SV * +XML_OriginalString(parser) + XML_Parser parser + CODE: + { + int parsepos, size; + const char *buff = XML_GetInputContext(parser, &parsepos, &size); + if (buff) { + RETVAL = newSVpvn((char *) &buff[parsepos], + XML_GetCurrentByteCount(parser)); + } + else { + RETVAL = newSVpv("", 0); + } + } + OUTPUT: + RETVAL + +SV * +XML_SetStartCdataHandler(parser, startcd_sv) + XML_Parser parser + SV * startcd_sv + CODE: + { + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + XML_StartCdataSectionHandler scdhndl = + (XML_StartCdataSectionHandler) 0; + + XMLP_UPD(startcd_sv); + if (SvTRUE(startcd_sv)) + scdhndl = startCdata; + + XML_SetStartCdataSectionHandler(parser, scdhndl); + PUSHRET; + } + +SV * +XML_SetEndCdataHandler(parser, endcd_sv) + XML_Parser parser + SV * endcd_sv + CODE: + { + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + XML_EndCdataSectionHandler ecdhndl = + (XML_EndCdataSectionHandler) 0; + + XMLP_UPD(endcd_sv); + if (SvTRUE(endcd_sv)) + ecdhndl = endCdata; + + XML_SetEndCdataSectionHandler(parser, ecdhndl); + PUSHRET; + } + +void +XML_UnsetAllHandlers(parser) + XML_Parser parser + CODE: + { + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + + suspend_callbacks(cbv); + if (cbv->ns) { + XML_SetNamespaceDeclHandler(cbv->p, + (XML_StartNamespaceDeclHandler) 0, + (XML_EndNamespaceDeclHandler) 0); + } + + XML_SetElementHandler(parser, + (XML_StartElementHandler) 0, + (XML_EndElementHandler) 0); + + XML_SetUnknownEncodingHandler(parser, + (XML_UnknownEncodingHandler) 0, + (void *) 0); + } + +int +XML_ElementIndex(parser) + XML_Parser parser + CODE: + { + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + RETVAL = cbv->st_serial_stack[cbv->st_serial_stackptr]; + } + OUTPUT: + RETVAL + +void +XML_SkipUntil(parser, index) + XML_Parser parser + unsigned int index + CODE: + { + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + if (index <= cbv->st_serial) + return; + cbv->skip_until = index; + suspend_callbacks(cbv); + } + +int +XML_Do_External_Parse(parser, result) + XML_Parser parser + SV * result + CODE: + { + int type; + + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + + if (SvROK(result) && SvOBJECT(SvRV(result))) { + RETVAL = parse_stream(parser, result); + } + else if (isGV(result)) { + RETVAL = parse_stream(parser, + sv_2mortal(newRV((SV*) GvIOp(result)))); + } + else if (SvPOK(result)) { + STRLEN eslen; + int pret; + char *entstr = SvPV(result, eslen); + + RETVAL = XML_Parse(parser, entstr, eslen, 1); + } + } + OUTPUT: + RETVAL + + diff --git a/Expat/Makefile.PL b/Expat/Makefile.PL new file mode 100644 index 0000000..6d5111c --- /dev/null +++ b/Expat/Makefile.PL @@ -0,0 +1,29 @@ +use ExtUtils::MakeMaker; +use Config; +use English; + +my $libs = "-lexpat"; +my @extras = (); + +push(@extras, INC => "-I$expat_incpath") if $expat_incpath; + +$libs = "-L$expat_libpath $libs" if $expat_libpath; + +push(@extras, CAPI => 'TRUE') + if (($PERL_VERSION >= 5.005) and ($OSNAME eq 'MSWin32') + and ($Config{archname} =~ /-object\b/i)); + +push(@extras, + ABSTRACT => "Lowlevel access to James Clark's expat XML parser", + AUTHOR => 'Matt Sergeant (matt@sergeant.org)') + if ($ExtUtils::MakeMaker::VERSION >= 5.4301); + +WriteMakefile( + NAME => 'XML::Parser::Expat', + C => ['Expat.c'], + LIBS => $libs, + XSPROTOARG => '-noprototypes', + VERSION_FROM => 'Expat.pm', + @extras +); + diff --git a/Expat/encoding.h b/Expat/encoding.h new file mode 100644 index 0000000..4e0374b --- /dev/null +++ b/Expat/encoding.h @@ -0,0 +1,91 @@ +/***************************************************************** +** encoding.h +** +** Copyright 1998 Clark Cooper +** All rights reserved. +** +** This program is free software; you can redistribute it and/or +** modify it under the same terms as Perl itself. +*/ + +#ifndef ENCODING_H +#define ENCODING_H 1 + +#define ENCMAP_MAGIC 0xfeebface + +typedef struct prefixmap { + unsigned char min; + unsigned char len; /* 0 => 256 */ + unsigned short bmap_start; + unsigned char ispfx[32]; + unsigned char ischar[32]; +} PrefixMap; + +typedef struct encinf +{ + unsigned short prefixes_size; + unsigned short bytemap_size; + int firstmap[256]; + PrefixMap *prefixes; + unsigned short *bytemap; +} Encinfo; + +typedef struct encmaphdr +{ + unsigned int magic; + char name[40]; + unsigned short pfsize; + unsigned short bmsize; + int map[256]; +} Encmap_Header; + +/*================================================================ +** Structure of Encoding map binary encoding +** +** Note that all shorts and ints are in network order, +** so when packing or unpacking with perl, use 'n' and 'N' respectively. +** In C, use the htonl family of functions. +** +** The basic structure is: +** +** _______________________ +** |Header (including map expat needs for 1st byte) +** |PrefixMap * pfsize +** | This section isn't included for single-byte encodings. +** | For multiple byte encodings, when a byte represents a prefix +** | then it indexes into this vector instead of mapping to a +** | Unicode character. The PrefixMap type is declared above. The +** | ispfx and ischar fields are bitvectors indicating whether +** | the byte being mapped is a prefix or character respectively. +** | If neither is set, then the character is not mapped to Unicode. +** | +** | The min field is the 1st byte mapped for this prefix; the +** | len field is the number of bytes mapped; and bmap_start is +** | the starting index of the map for this prefix in the overall +** | map (next section). +** |unsigned short * bmsize +** | This section also is omitted for single-byte encodings. +** | Each short is either a Unicode scalar or an index into the +** | PrefixMap vector. +** +** The header for these files is declared above as the Encmap_Header type. +** The magic field is a magic number which should match the ENCMAP_MAGIC +** macro above. The next 40 bytes stores IANA registered name for the +** encoding. The pfsize field holds the number of PrefixMaps, which should +** be zero for single byte encodings. The bmsize field holds the number of +** shorts used for the overall map. +** +** The map field contains either the Unicode scalar encoded by the 1st byte +** or -n where n is the number of bytes that such a 1st byte implies (Expat +** requires that the number of bytes to encode a character is indicated by +** the 1st byte) or -1 if the byte doesn't map to any Unicode character. +** +** If the encoding is a multiple byte encoding, then there will be PrefixMap +** and character map sections. The 1st PrefixMap (index 0), covers a range +** of bytes that includes all 1st byte prefixes. +** +** Look at convert_to_unicode in Expat.xs to see how this data structure +** is used. +*/ + +#endif /* ndef ENCODING_H */ diff --git a/Expat/typemap b/Expat/typemap new file mode 100644 index 0000000..47d7dc5 --- /dev/null +++ b/Expat/typemap @@ -0,0 +1,24 @@ +# +##### XML::Parser::Expat typemap +# + +XML_Parser T_PTR +Encinfo * T_ENCOBJ + +################################################################ +INPUT +T_ENCOBJ + if (sv_derived_from($arg, \"XML::Parser::Encinfo\")) { + IV tmp = SvIV((SV*)SvRV($arg)); + $var = ($type) tmp; + } + else + croak(\"$var is not of type XML::Parser::Encinfo\") +################################################################ +OUTPUT +T_ENCOBJ + if ($var) { + sv_setref_pv($arg, \"XML::Parser::Encinfo\", (void*)$var); + } + else + $arg = &PL_sv_undef; -- cgit v1.2.1