From c97631728ce7d6d3f4692a56c3cda7476b42a968 Mon Sep 17 00:00:00 2001 From: Lorry Tar Creator Date: Tue, 20 Nov 2007 14:28:05 +0000 Subject: Imported from /home/lorry/working-area/delta_perl-xml-parser/XML-Parser-2.36.tar.gz. --- Changes | 494 ++++ Expat/Expat.pm | 1230 +++++++++ Expat/Expat.xs | 2214 ++++++++++++++++ Expat/Makefile.PL | 29 + Expat/encoding.h | 91 + Expat/typemap | 24 + MANIFEST | 60 + META.yml | 11 + Makefile.PL | 114 + Parser.pm | 840 +++++++ Parser/Encodings/Japanese_Encodings.msg | 117 + Parser/Encodings/README | 51 + Parser/Encodings/big5.enc | Bin 0 -> 40706 bytes Parser/Encodings/euc-kr.enc | Bin 0 -> 45802 bytes Parser/Encodings/iso-8859-2.enc | Bin 0 -> 1072 bytes Parser/Encodings/iso-8859-3.enc | Bin 0 -> 1072 bytes Parser/Encodings/iso-8859-4.enc | Bin 0 -> 1072 bytes Parser/Encodings/iso-8859-5.enc | Bin 0 -> 1072 bytes Parser/Encodings/iso-8859-7.enc | Bin 0 -> 1072 bytes Parser/Encodings/iso-8859-8.enc | Bin 0 -> 1072 bytes Parser/Encodings/iso-8859-9.enc | Bin 0 -> 1072 bytes Parser/Encodings/windows-1250.enc | Bin 0 -> 1072 bytes Parser/Encodings/windows-1252.enc | Bin 0 -> 1072 bytes Parser/Encodings/x-euc-jp-jisx0221.enc | Bin 0 -> 37890 bytes Parser/Encodings/x-euc-jp-unicode.enc | Bin 0 -> 37890 bytes Parser/Encodings/x-sjis-cp932.enc | Bin 0 -> 20368 bytes Parser/Encodings/x-sjis-jdk117.enc | Bin 0 -> 18202 bytes Parser/Encodings/x-sjis-jisx0221.enc | Bin 0 -> 18202 bytes Parser/Encodings/x-sjis-unicode.enc | Bin 0 -> 18202 bytes Parser/LWPExternEnt.pl | 71 + Parser/Style/Debug.pm | 52 + Parser/Style/Objects.pm | 78 + Parser/Style/Stream.pm | 184 ++ Parser/Style/Subs.pm | 58 + Parser/Style/Tree.pm | 90 + README | 86 + samples/REC-xml-19980210.xml | 4197 +++++++++++++++++++++++++++++++ samples/canonical | 124 + samples/canontst.xml | 20 + samples/ctest.dtd | 2 + samples/xmlcomments | 44 + samples/xmlfilter | 329 +++ samples/xmlstats | 186 ++ t/astress.t | 264 ++ t/cdata.t | 40 + t/decl.t | 166 ++ t/defaulted.t | 50 + t/encoding.t | 110 + t/ext.ent | 1 + t/ext2.ent | 1 + t/external_ent.t | 70 + t/file.t | 15 + t/finish.t | 32 + t/foo.dtd | 20 + t/namespaces.t | 133 + t/parament.t | 117 + t/partial.t | 40 + t/skip.t | 53 + t/stream.t | 50 + t/styles.t | 62 + 60 files changed, 12020 insertions(+) create mode 100644 Changes create mode 100644 Expat/Expat.pm create mode 100644 Expat/Expat.xs create mode 100644 Expat/Makefile.PL create mode 100644 Expat/encoding.h create mode 100644 Expat/typemap create mode 100644 MANIFEST create mode 100644 META.yml create mode 100644 Makefile.PL create mode 100644 Parser.pm create mode 100644 Parser/Encodings/Japanese_Encodings.msg create mode 100644 Parser/Encodings/README create mode 100644 Parser/Encodings/big5.enc create mode 100644 Parser/Encodings/euc-kr.enc create mode 100644 Parser/Encodings/iso-8859-2.enc create mode 100644 Parser/Encodings/iso-8859-3.enc create mode 100644 Parser/Encodings/iso-8859-4.enc create mode 100644 Parser/Encodings/iso-8859-5.enc create mode 100644 Parser/Encodings/iso-8859-7.enc create mode 100644 Parser/Encodings/iso-8859-8.enc create mode 100644 Parser/Encodings/iso-8859-9.enc create mode 100644 Parser/Encodings/windows-1250.enc create mode 100644 Parser/Encodings/windows-1252.enc create mode 100644 Parser/Encodings/x-euc-jp-jisx0221.enc create mode 100644 Parser/Encodings/x-euc-jp-unicode.enc create mode 100644 Parser/Encodings/x-sjis-cp932.enc create mode 100644 Parser/Encodings/x-sjis-jdk117.enc create mode 100644 Parser/Encodings/x-sjis-jisx0221.enc create mode 100644 Parser/Encodings/x-sjis-unicode.enc create mode 100644 Parser/LWPExternEnt.pl create mode 100644 Parser/Style/Debug.pm create mode 100644 Parser/Style/Objects.pm create mode 100644 Parser/Style/Stream.pm create mode 100644 Parser/Style/Subs.pm create mode 100644 Parser/Style/Tree.pm create mode 100644 README create mode 100644 samples/REC-xml-19980210.xml create mode 100755 samples/canonical create mode 100644 samples/canontst.xml create mode 100644 samples/ctest.dtd create mode 100755 samples/xmlcomments create mode 100755 samples/xmlfilter create mode 100755 samples/xmlstats create mode 100644 t/astress.t create mode 100644 t/cdata.t create mode 100644 t/decl.t create mode 100644 t/defaulted.t create mode 100644 t/encoding.t create mode 100644 t/ext.ent create mode 100644 t/ext2.ent create mode 100644 t/external_ent.t create mode 100644 t/file.t create mode 100644 t/finish.t create mode 100644 t/foo.dtd create mode 100644 t/namespaces.t create mode 100644 t/parament.t create mode 100644 t/partial.t create mode 100644 t/skip.t create mode 100644 t/stream.t create mode 100644 t/styles.t diff --git a/Changes b/Changes new file mode 100644 index 0000000..f4b235f --- /dev/null +++ b/Changes @@ -0,0 +1,494 @@ +Revision history for Perl extension XML::Parser. + +2.36 + - Fix for Carp::Heavy bugs + +2.35 + - Works in 5.10 (Andreas J. Koenig) + - Added license in Makefile.PL (Alexandr Ciornii) + - Makefile.PL also searches for expat in C:/lib/Expat-2.0.0 (Alexandr Ciornii) + - No longer uses variable named 'namespace' in Expat.xs (Jeff Hunter) + +2.33 + - Fixed Tree style (grantm) + - Fixed some non-utf8 stuff in DTDs (patch in XML::DOM tarball) + +2.32 + - Memory leak fix (Juerd Waalboer). + - Added windows-1252 encoding + - Styles moved to separate .pm files to make loading faster and + ease maintainence + - Don't load IO::Handle unless we really need to + +2.31 Tue Apr 2 13:39:51 EST 2002 + - Ilya Zakharevich and + Dave Mitchell both provided patches to + fix problems module had with 5.8.0 + - Dave Mitchell also made some UTF-8 related fixes to the test suite. +2.30 Thu Oct 5 12:47:36 EDT 2000 + - Get rid of ContentStash global. Not that big a deal looking it up + everytime and gets rid of a potential threading problem. + - Switch to shareable library version of expat from sourceforge + (i.e. no longer include expat source and require that libexpat + be installed) + - Bob Tribit demonstrated a fix for problems + in compiling under perl 5.6.0 with 5.005 threading. + - Matt Sergeant discovered a typo ('IO::Handler' + instead of 'IO::Handle') in Expat.pm that caused IO::Handle objects + to be treated as strings instead of handles. + - Matt Sergeant also provided a patch to allow tied handles to work + properly in calls to parse. + - Eric Bohlman reported a failure when + incremental parsing and external parsing were used together. + Need to give explicit package when calling Do_External_Parse + from externalEntityRef otherwise fails when called through ExpatNB. +2.29 Sun May 21 21:19:45 EDT 2000 + - In expat, notation declaration handler registration wasn't + surviving through external entity references. + - Chase Tingley discovered that text + accumulation in the Stream style wasn't working across processing + instructions and recommended the appropriate fix. + - Jochen Wiedmann , noted that + you couldn't use ExpatNB directly because it wasn't setting + the protective _State_ variable. Now doing this in the + parse_more method of ExpatNB. + - At the suggestion of Grant Hopwood , now + calling the env_proxy method on the LWP::UserAgent in the LWP + external entity handler when it's created to set any proxies + from environment variables. + - Grant McLean, Matt Sergeant (& others I may have missed) noted that + loading the LWP & URI modules slowed startup of the module, even + if the application didn't need it. The default LWP handler is now + dynamicly loaded (along with LWP & URI modules) the first time an + external entity is referenced. Also provided a NoLWP option to + XML::Parser that forces the file based external entity handler. + - Fixed allocation errors in element declaration patches in expat + - The Expat base method now works, even before expat starts parsing. + - Changed the canonical script to take an optional file argument. + - Enno Derksen reported that the attlist handler + was not returning NOTATION type attlist information. + - Michel Rodriguez , noted that the constructor + for XML::Parser objects no longer checked for the existence of + applications installed external entity handlers before installing + the default ones. + - Burkhard Meier sent in a fix for + compiler directives in Expat/Makefile.PL for Win32 machines. + A change in 5.6.0 caused the old conditional to fail. + - Forgot to document changes to the Entity declaration handler: + there is an additional "IsParam" argument that indicates whether + or not the entity is a parameter entity. This information is + no longer passed on in the name. + - Ben Low reported an undefined macro with + version 5.004_04. +2.28 Mon Mar 27 21:21:50 EST 2000 + - Junked local (Expat.xs) declaration parsing and patched expat to + handle XML declarations, element declarations, attlist declarations, + and all entity declarations. By eliminating both shadow buffers and + local declaration parsing in Expat.xs, I've eliminated the two most + common sources of serious bugs in the expat interface. + o thus fixed the segfault and parse position bugs reported by + Ivan Kurmanov + o and the doctype bug reported by Kevin Lund + + o The element declaration handler no longer receives a string, + but an XML::Parser::ContentModel object that represents the + parsed model, but still looks like a string if referred to as + a string. This class is documented in the XML::Parser::Expat + pod under "XML::Parser::ContentModel Methods". + o The doctype declaration handler no longer receives the internal + subset as a string, but in its place a true or undef value + indicating whether or not there is an internal subset. Also, + it's called prior to processing either the internal or external + DTD subset (as suggested by Enno Derksen .) + o There is a new DoctypeFin handler that's called after finishing + parsing all of the DOCTYPE declaration, including any internal + or external DTD declarations. + o One bit of lossage is that recognized_string, original_string, + and default_current no longer work inside declaration handlers. + - Added a handler that gets called after parsing external entities: + ExternEntFin. Suggested by Jeff Horner . + - parsefile, file_ext_ent_handler, & lwp_ext_ent_handler now all + set the base path. This problem has been raised more than once + and I'm not sure to whom credit should be given. + - The file_ext_ent_handler now opens a file handle instead of + reading the entire entity at once. + - Merged patches supplied by Larry Wall to (for perl 5.6 and beyond) + tag generated strings as UTF-8, where appropriate. + - Fixed a bug in xml_escape reported by Jerry Geiger . + It failed when requesting escaping of perl regex meta-characters. + - Laurent Caprani reported a bug in the + Proc handler for the Debug style. + - sent in a patch for the element index + mechanism. I was popping the stack too soon in the endElement fcn. + - Jim Miner sent in a patch to fix a warning in + Expat.pm. + - Kurt Starsinic pointed out that the eval used to check for string + versus IO handle was leaving $@ dirty, thereby foiling higher + level exception handlers + - An expat question by Paul Prescod helped me + see that exeptions in the parse call bypass the Expat release method, + causing memory leaks. + - Mark D. Anderson noted that calling + recognized_string from the Final method caused a dump. There are + a bunch of methods that should not be called after parsing has + finished. These now have protective if statements around them. + - Updated canonical utility to conform to newer version of Canonical + XML working draft. +2.27 Sat Sep 25 18:26:44 EDT 1999 + - Corrected documentation in Parser.pm + - Deal with XML_NS and XML_BYTE_ORDER macros in Expat/Makefile.PL + - Chris Thorman noted that "require 'URI::URL.pm'" + in Parser.pm was in error (should be "require 'URI/URL.pm'") + - Andrew McNaughton noted "use English" and + use of '$&' slowed down regex handling for whole application, so + they were excised from XML::Parser::Expat. + - Work around "modification of read-only value" bug in perl 5.004 + - Enno Derksen reported that the Doctype handler + wasn't being called when ParseParamEnt was set. + - Now using Version 19990728 of expat, with local patches. + - Got rid of shadow buffer + o thus fixed the error reported by Ashley Sanders + + o and removed ExpatNB limitations that Peter Billam + noted. + - Vadim Konovalov had a problem compiling + for multi-threading that was fixed by changing Perl_sv_setsv to + sv_setsv. + - Added new Expat method: skip_until(index) + - Backward incompatible change to method xml_escape: to get former + behavior use $xp->xml_escape($string, '>', ...) + - Added utility, canonical, to samples +2.26 Sun Jul 25 19:06:41 EDT 1999 + - Ken Beesley discovered that + declarations in the external subset are not sent to registered + handlers when there is no internal subset. + - Fixed parse_dtd to work when entity values or attribute defaults + are so large that they might be broken across multiple calls to + the default handler. + - For lwp_ext_ent_handler, use URI::URL instead of URI so that old + 5.004 installations will work with it. +2.25 Fri Jul 23 06:23:43 EDT 1999 + - Now using Version 1990709 of expat. No local patches. + - Numerous people reported a SEGV problem when running t/cdata + on various platforms and versions of perl. The problem was + introduced with the setHandlers change. In some cases an + un-initialized value was being returned. + - Added an additional external entity handler, lwp_ext_ent_handler, + that deals with general URIs. It is installed instead of the + "file only" handler if the LWP package is installed. +2.24 Thu Jul 8 23:05:50 EDT 1999 + - KangChan Lee supplied the + EUC-KR encoding map. + - Enno Derksen forwarded reports by Jon Eisenzopf + and Stefaan Onderbeke + about a core dump using XML::DOM. This was due to a bug in the + prolog parsing part of XML::Parser. + - Loic Dachary discovered that changing G_DISCARD to + G_VOID introduced a small memory leak. Changed G_VOID back to + G_DISCARD. + - As suggested by Ben Holzman , the + setHandlers methods of both Parser and Expat now return lists that + consist of type, handler pairs that correspond to the input, but + the handlers returned are the ones that were in effect prior to + the call. + - Now using Version 19990626 of expat with a local patch (provided + by James Clark.) + - Added option ParseParamEnt. When set to a true value, parameter + entities are parsed and the external DTD is read (unless standalone + set to "Yes" in document). +2.23 Mon Apr 26 21:30:28 EDT 1999 + - Fixed a bug in the ExpatNB class reported by Gabe Beged-Dov + . The ErrorMessage attribute wasn't + being initialized for ExpatNB. This should have been done in + the Expat constructor. + - Applied patch provided by Nathan Kurz to + fix more perl stack manipulation errors in Expat.xs. + - Applied another patch by Nathan to change perl_call_sv flag + from G_DISCARD to G_VOID for callbacks, which helps performance. + - Murata Makoto reported a + problem on Win32 platforms that only showed up when UTF-16 was + being used. The needed call to binmode was added to the parsefile + methods. + - Added documentation for release method that was added in release + 2.20 to Expat pod. (Point raised by ) + - Now using Version 19990425 of expat. No local patches. + - Added specified_attr method and made ineffective the is_defaulted + method. +2.22 Sun Apr 4 11:47:25 EDT 1999 + - Loic Dachary reported a core dump with a small + file with a comment that wasn't properly closed. Fixed in expat + by updating positionPtr properly in final call of XML_Parse. + (Reported to & acknowledged by James Clark.) + - Made more fixes to Expat.xs position calculation. + - Loic Dachary provided patches for fixing a + memory growth problem with large documents. (Garbage collection + wasn't happening frequently enough.) + - As suggested by Gabe Beged-Dov , added + a non-blocking parse mechanism: + - Added parse_start method to XML::Parser, which returns a + XML::Parser::ExpatNB object. + - Added XML::Parser::ExpatNB class, which is a subclass of + Expat and has the additional methods parse_more & parse_done + - Made some performance tweaks as suggested by performance thread + on perl-xml discussion list. [With negligible results] + - Tried to clarify Tree style structure in Parser pod +2.21 Sun Mar 21 17:42:04 EST 1999 + - Warren Vik provided patches for a bug + introduced with the is_defaulted method. It manifested itself + by bogusly reporting duplicate attributes. + - Now using latest expat from ftp://ftp.jclark.com/pub/test/expat.zip, + Version 19990307. (Plus any patches in Expat/expat.patches.) + - As suggested by Tim Bray, added an xml_escape method to + Expat. + - Murray Nesbitt had build problems + on Win32 that were solved by swapping 2 include files in + Expat.xs + - Added following Expat namespace methods: + new_ns_prefixes + expand_ns_prefix + current_ns_prefixes + - Fixed memory handling in recognized_string method to get rid + of "Attempt to free unreferenced scalar" bug. +2.20 Sun Feb 28 15:35:52 EST 1999 + - Fixed miscellaneous bugs in xmlfilter. + - In the default external entity handler, prepend the base only + for relative URLs. + - Chris Nandor provided patches for building + on Macintosh. + - As suggested by Matt Sergeant , + added the finish method to Expat. + - Matt also provided a fix to a bug he discovered in the Streams + style. + - Fixed a parse position bug reported by Enno Derksen + that was affecting both original_string and position_in_context. + - Fixed a gross memory leak reported by David Megginson, + : there was a circular reference to the Expat + object and the internal end handler for context was not freeing + element names after they were removed from the context stack. + - Now using expat Version 19990109 + (Plus any patches in Expat/expat.patches) + - Added is_defaulted method to Expat to tell if an attribute + was defaulted. (Requested by Enno Derksen for XML::DOM.) + - Matt Sergeant reported that + the XML::Parser parse methods weren't propagating array context + to the Final handler. Now they are. + - Fixed more memory leaks (again reported by David Megginson). + The SVs pointing to the handlers weren't being reclaimed when + the callback vector was freed. + - Added the element_index method to Expat. +2.19 Sun Jan 3 11:23:45 EST 1999 + - When the recognized string is long enough, expat uses multiple + calls to reportDefault. Fixed recString handler in Expat.xs to + deal with this properly. + - Added original_string method to Expat. This returns the untranslated + string (i.e. original encoding) that caused current event. + - Alberto Accomazzi sent in more patches + for perl5.005_54 incompatibilities. + - Alberto also fingered a nasty memory bug in Expat.xs that arose + sometimes when you registered a declaration handler but no + default handler. It would give you a "Not a CODE reference" + error in a place that wasn't using any CODE references. + - reported a problem with compiling expat + on a Sun 4 due to non-exsitance of memmove on that OS. Provided + a workaround in Makefile.PL + - Now using expat Version 19981231 from James Clark's test directory. + - Made patch to this version in order to support original_string + (see Expat/expat.patches.) + - Added CdataStart and CdataEnd handlers to expat. +2.18 Sun Dec 27 07:39:23 EST 1998 + - Alberto Accomazzi pointed out that + the DESTROY sub in the new XML::Parser::Encinfo package was + pointing to the wrong package for calling FreeEncoding. + - Tarang Kumar Patel reported + the mis-declaration of an integer as unsigned in the + convert_to_unicode function in Expat.xs. + - Glenn R. Kronschnabl reported a problem + with ExternEnt handlers when using parsefile. Turned out to be + an unmatched ENTER; SAVETMPS pair that screwed up the Perl stack. + - Tom Hughes reported that the fix I put + in for the swith to PL_sv.. names failed with 5.0005_54, since + these became real variables instead of macros. Switched to just + checking the PATCHLEVEL macro. + - Yoshida Masato provided the EUC-JP encodings + (the corresponding XML files are in XML::Encoding 1.01 or later.) + - With the advice of MURATA Makoto , + removed the Shift_JIS encoding and replaced it with 4 variations + he provided. He also provided an explanatory message. + - Added the recognized_string method to Expat, deprecating + default_current. + - Now using expat Version 19981122 from James Clark's test directory + (this fixes another bug with external entity reference handlers) + - Added a default external entity handler that only accesses file: + based URLs. +2.17 Sun Dec 13 17:39:58 EST 1998 + - Replaced uses of malloc, realloc, and free with New, Renew, + and Safefree respectively + - In Expat.pm, fixed methods in_element and within_element to + work correctly with namespaces. + - xmlfilter - Substitute quoted equivalents for special characters + in attribute values. + - position_in_context was off by one line when position was at + the end of line. + - For the context methods in Expat.pm, do the right thing when + the context list is empty. + - Added methods xpcroak and xpcarp to Expat. + - Alberto Accomazzi noted that perl + releases 5.005_5* (the pre 5.006 development versions) won't + accept sv_undef (and related constants) anymore and we have + to switch to PL_sv_... + - Alberto also reported a warning in the newer versions of + IO::Handle about input_record_separator not being treated on + a per-handle basis. + - Fixed bug that Jon Udell reported in + Stream style: Text handler most of the time didn't see proper + context. + - Added XML::Parser::Expat::load_encoding function and support + for external encodings. +2.16 Tue Oct 27 22:27:33 EST 1998 + - Fixed bug reported by Enno Derksen : + Now treats parameter entity declarations correctly. The entity + handler sees the name beginning with '%' if it's a parameter + entity declaration. + - Nigel Hutchison pointed out that stream.t + wasn't portable off Unix systems. Replaced with portable version. + - Fixed bug reported by Enno Derksen : + XML Declaration was firing off both XMLDecl handler *and* Default + handler. + - Added option NoExpand to Expat to turn off expansion of entity + references when a default handler is set. +2.15 Tue Oct 20 14:50:11 EDT 1998 + - In Expat's parse method, account for undefined previous + record separators. + - Simplify a couple of Expat methods. + - Re-ordered Changes entries to put latest changes first. + - In XML::Parser::new, set Handlers if not already set + - New Handler (XMLDecl) for handling XML declarations + - New Handler (Doctype) for handling DOCTYPE declarations + - New Handler (Entity) for handling ENTITY declarations in + the internal subset. + - New Handler (Element) for handling ELEMENT declarations in + the internal subset. + - New Handler (Attlist) for handling ATTLIST declarations in + the internal subset. + - Documented new handlers + - Added t/decl.t to test new handlers +2.14 Sun Oct 11 22:17:15 EDT 1998 + - Always use method calls for streams. + - Use perl's input_record_separator to find delimiter (i.e. each + "line" is an entire XML doc with delimiter appended) + - Deal with line being longer than buffer. +2.13 Thu Oct 8 16:58:39 EDT 1998 + - Fixed a major oops in Expat.xs where I was trying to decrement + a refcnt on an unallocated SV, leading to a segment violation. + (Why did this show up on HPUX but not Linux?) +2.12 Thu Oct 8 00:05:10 EDT 1998 + - Incorporated fix to t/astress.t from (Mike + Fletcher). + - Change to xmlstats from (David + Alan Black) + - Access Handlers_Setters in Expat and Handler_Types in Parser + through object reference (following admonition in perltoot + about class data.) + - Added Stream_Delimiter option to Expat. + - In the parse_stream function in Expat.xs, if we either have a + Stream_Delimiter or if there's no file descriptor, use method + calls instead. For Stream_Delimiter in particular, the function + now uses the getline method so it can check for the delimiter + without consuming stuff past the delimiter from the stream. +2.11 Sun Oct 4 22:15:53 EDT 1998 + - Swapped out local patch for expat and swapped in James Clark's + patch. + - Pass on all Parser attributes (other than those excluded by + Non_Expat_Options) to the instance of Expat created at parse time. + - New method for Expat: generate_ns_name + - Split test.pl into t/*.t and change Makefile.PL so we don't do a + useless descent into Expat subdir for testing. + - Stop the numeric warning for eq_name and namespace method. +2.10 Fri Sep 25 18:36:46 EDT 1998 + - Uses expat Version 19980924 + (with local patch - see Expat/expat/xmlparse/xmlparse.c.diff) + - Use newSVpvn when PERL_VERSION >= 5.005 + - Completed xmlfilter + - Added support for namespace processing: + o Namespaces option to XML::Parser and XML::Parser::Expat + o Two new methods in Expat: + namespace - to return namespace associated with name + eq_name - compare 2 names for equality across namespaces. + - Use expat's new SetDefaultHandlerExpand instead of SetDefaultHandler + so that entity expansion may continue even if the default handler + is set. + - Moved test.pl back up main level and changed to work with XML::Parser + - Added tests for namespaces +2.09 Fri Sep 18 10:33:38 EDT 1998 + - Fixed errors that caused -w to fret in XML::Parser. + - Fixed depth method in XML::Parser::Expat + - There were a few places in Expat.xs where garbage strings may + have been returned due to the expat library giving us zero-length + strings. Fixed by using a local version of newSVpv where length + means length, even when zero. + - The default handler setter in Expat.xs, was inappropriately setting + cbv->dflt_sv when there was a null handler. +2.08 Thu Sep 17 11:47:13 EDT 1998 + - Make XML::Parser higher-level re-usable parser objects. Old object + now becomes XML::Parser::Expat. + - The XML::Parser object now supports the style mechanism very close + to that in the 1.0 version. +2.07 Wed Sep 9 11:03:43 EDT 1998 + - Added some samples (xmlcomments & xmlstats) + - Now requires 5.004 (due to sv_catpvf) + - Changed Makefile.PL to allow automatic manification + - Added a test that reads xml spec (to check buffer boundary errors) +2.06 Tue Sep 1 10:40:41 EDT 1998 + - Fixed the methods current_line, current_byte, and current_column + - Added some tests +2.05 Mon Aug 31 15:29:42 EDT 1998 + - Made Makefile.PL changes suggested by Murray Nesbitt + to support building on Win32 + and for making PPM binaries. + - Added method parse + - Changed parsestring and parsefile to use new parse method + - Deprecated parsestring method + - Improved error handling in the ExternEnt handler +2.04 Wed Aug 26 13:25:01 EDT 1998 + - Uses expat Version 1.0 of August 14, 1998 + - Some document changes + - Changed dist section in Makefile.PL + - Added ExternEnt handler + - Added tests for ExternEnt +2.03 Fri Aug 21 17:19:26 EDT 1998 + - Changed InitEncoding to ProtocolEncoding. Default to none. + Pass null string to expat's ParserCreate when there is no + ProtocolEncoding. + - Fixed bug in parsefile & parsestring where they were referring + to an ErrorContext *method* instead of a field. + - Fixed position_in_context bugs: + -- 'last' in do {} while (); + -- insert newline before pointer when no following newline + in buffer. + - Added some additional tests +2.02 Thu Aug 20 14:05:08 EDT 1998 + - Fixed parsefile problem reported by + "Robert Hanson" , using a modification of + his suggested fix. + - Responded to problem reported by + Bart Schuller + by pre-expanding parts of the XML_UPD macro to avoid confusing + some versions of gcc. + - Changed the constructor to take the option InitEncoding, which + gets passed to the ParserCreate call. When not given, defaults + to UTF-8. + - Added method position_in_context + - Added Constructor option ErrorContext and added reporting of + errors in context. +2.01 Wed Aug 19 11:42:42 EDT 1998 + - Added methods: + default_current, base, current_line, current_column, + current_byte, context + - Added some tests + - parsestring and parsefile now croak if they're re-used + - Filled in some documentation +2.00 Mon Aug 17 12:01:33 EDT 1998 + - repackaged with James Clark's most recent expat + - changed to an API closer to expat +1.00 March 1998 + - Larry Wall's original version diff --git a/Expat/Expat.pm b/Expat/Expat.pm new file mode 100644 index 0000000..9413d80 --- /dev/null +++ b/Expat/Expat.pm @@ -0,0 +1,1230 @@ +package XML::Parser::Expat; + +require 5.004; + +use strict; +use vars qw($VERSION @ISA %Handler_Setters %Encoding_Table @Encoding_Path + $have_File_Spec); +use Carp; + +require DynaLoader; + +@ISA = qw(DynaLoader); +$VERSION = "2.36" ; + +$have_File_Spec = $INC{'File/Spec.pm'} || do 'File/Spec.pm'; + +%Encoding_Table = (); +if ($have_File_Spec) { + @Encoding_Path = (grep(-d $_, + map(File::Spec->catdir($_, qw(XML Parser Encodings)), + @INC)), + File::Spec->curdir); +} +else { + @Encoding_Path = (grep(-d $_, map($_ . '/XML/Parser/Encodings', @INC)), '.'); +} + + +bootstrap XML::Parser::Expat $VERSION; + +%Handler_Setters = ( + Start => \&SetStartElementHandler, + End => \&SetEndElementHandler, + Char => \&SetCharacterDataHandler, + Proc => \&SetProcessingInstructionHandler, + Comment => \&SetCommentHandler, + CdataStart => \&SetStartCdataHandler, + CdataEnd => \&SetEndCdataHandler, + Default => \&SetDefaultHandler, + Unparsed => \&SetUnparsedEntityDeclHandler, + Notation => \&SetNotationDeclHandler, + ExternEnt => \&SetExternalEntityRefHandler, + ExternEntFin => \&SetExtEntFinishHandler, + Entity => \&SetEntityDeclHandler, + Element => \&SetElementDeclHandler, + Attlist => \&SetAttListDeclHandler, + Doctype => \&SetDoctypeHandler, + DoctypeFin => \&SetEndDoctypeHandler, + XMLDecl => \&SetXMLDeclHandler + ); + +sub new { + my ($class, %args) = @_; + my $self = bless \%args, $_[0]; + $args{_State_} = 0; + $args{Context} = []; + $args{Namespaces} ||= 0; + $args{ErrorMessage} ||= ''; + if ($args{Namespaces}) { + $args{Namespace_Table} = {}; + $args{Namespace_List} = [undef]; + $args{Prefix_Table} = {}; + $args{New_Prefixes} = []; + } + $args{_Setters} = \%Handler_Setters; + $args{Parser} = ParserCreate($self, $args{ProtocolEncoding}, + $args{Namespaces}); + $self; +} + +sub load_encoding { + my ($file) = @_; + + $file =~ s!([^/]+)$!\L$1\E!; + $file .= '.enc' unless $file =~ /\.enc$/; + unless ($file =~ m!^/!) { + foreach (@Encoding_Path) { + my $tmp = ($have_File_Spec + ? File::Spec->catfile($_, $file) + : "$_/$file"); + if (-e $tmp) { + $file = $tmp; + last; + } + } + } + + local(*ENC); + open(ENC, $file) or croak("Couldn't open encmap $file:\n$!\n"); + binmode(ENC); + my $data; + my $br = sysread(ENC, $data, -s $file); + croak("Trouble reading $file:\n$!\n") + unless defined($br); + close(ENC); + + my $name = LoadEncoding($data, $br); + croak("$file isn't an encmap file") + unless defined($name); + + $name; +} # End load_encoding + +sub setHandlers { + my ($self, @handler_pairs) = @_; + + croak("Uneven number of arguments to setHandlers method") + if (int(@handler_pairs) & 1); + + my @ret; + + while (@handler_pairs) { + my $type = shift @handler_pairs; + my $handler = shift @handler_pairs; + croak "Handler for $type not a Code ref" + unless (! defined($handler) or ! $handler or ref($handler) eq 'CODE'); + + my $hndl = $self->{_Setters}->{$type}; + + unless (defined($hndl)) { + my @types = sort keys %{$self->{_Setters}}; + croak("Unknown Expat handler type: $type\n Valid types: @types"); + } + + my $old = &$hndl($self->{Parser}, $handler); + push (@ret, $type, $old); + } + + return @ret; +} + +sub xpcroak + { + my ($self, $message) = @_; + + my $eclines = $self->{ErrorContext}; + my $line = GetCurrentLineNumber($_[0]->{Parser}); + $message .= " at line $line"; + $message .= ":\n" . $self->position_in_context($eclines) + if defined($eclines); + croak $message; +} + +sub xpcarp { + my ($self, $message) = @_; + + my $eclines = $self->{ErrorContext}; + my $line = GetCurrentLineNumber($_[0]->{Parser}); + $message .= " at line $line"; + $message .= ":\n" . $self->position_in_context($eclines) + if defined($eclines); + carp $message; +} + +sub default_current { + my $self = shift; + if ($self->{_State_} == 1) { + return DefaultCurrent($self->{Parser}); + } +} + +sub recognized_string { + my $self = shift; + if ($self->{_State_} == 1) { + return RecognizedString($self->{Parser}); + } +} + +sub original_string { + my $self = shift; + if ($self->{_State_} == 1) { + return OriginalString($self->{Parser}); + } +} + +sub current_line { + my $self = shift; + if ($self->{_State_} == 1) { + return GetCurrentLineNumber($self->{Parser}); + } +} + +sub current_column { + my $self = shift; + if ($self->{_State_} == 1) { + return GetCurrentColumnNumber($self->{Parser}); + } +} + +sub current_byte { + my $self = shift; + if ($self->{_State_} == 1) { + return GetCurrentByteIndex($self->{Parser}); + } +} + +sub base { + my ($self, $newbase) = @_; + my $p = $self->{Parser}; + my $oldbase = GetBase($p); + SetBase($p, $newbase) if @_ > 1; + return $oldbase; +} + +sub context { + my $ctx = $_[0]->{Context}; + @$ctx; +} + +sub current_element { + my ($self) = @_; + @{$self->{Context}} ? $self->{Context}->[-1] : undef; +} + +sub in_element { + my ($self, $element) = @_; + @{$self->{Context}} ? $self->eq_name($self->{Context}->[-1], $element) + : undef; +} + +sub within_element { + my ($self, $element) = @_; + my $cnt = 0; + foreach (@{$self->{Context}}) { + $cnt++ if $self->eq_name($_, $element); + } + return $cnt; +} + +sub depth { + my ($self) = @_; + int(@{$self->{Context}}); +} + +sub element_index { + my ($self) = @_; + + if ($self->{_State_} == 1) { + return ElementIndex($self->{Parser}); + } +} + +################ +# Namespace methods + +sub namespace { + my ($self, $name) = @_; + local($^W) = 0; + $self->{Namespace_List}->[int($name)]; +} + +sub eq_name { + my ($self, $nm1, $nm2) = @_; + local($^W) = 0; + + int($nm1) == int($nm2) and $nm1 eq $nm2; +} + +sub generate_ns_name { + my ($self, $name, $namespace) = @_; + + $namespace ? + GenerateNSName($name, $namespace, $self->{Namespace_Table}, + $self->{Namespace_List}) + : $name; +} + +sub new_ns_prefixes { + my ($self) = @_; + if ($self->{Namespaces}) { + return @{$self->{New_Prefixes}}; + } + return (); +} + +sub expand_ns_prefix { + my ($self, $prefix) = @_; + + if ($self->{Namespaces}) { + my $stack = $self->{Prefix_Table}->{$prefix}; + return (defined($stack) and @$stack) ? $stack->[-1] : undef; + } + + return undef; +} + +sub current_ns_prefixes { + my ($self) = @_; + + if ($self->{Namespaces}) { + my %set = %{$self->{Prefix_Table}}; + + if (exists $set{'#default'} and not defined($set{'#default'}->[-1])) { + delete $set{'#default'}; + } + + return keys %set; + } + + return (); +} + + +################################################################ +# Namespace declaration handlers +# + +sub NamespaceStart { + my ($self, $prefix, $uri) = @_; + + $prefix = '#default' unless defined $prefix; + my $stack = $self->{Prefix_Table}->{$prefix}; + + if (defined $stack) { + push(@$stack, $uri); + } + else { + $self->{Prefix_Table}->{$prefix} = [$uri]; + } + + # The New_Prefixes list gets emptied at end of startElement function + # in Expat.xs + + push(@{$self->{New_Prefixes}}, $prefix); +} + +sub NamespaceEnd { + my ($self, $prefix) = @_; + + $prefix = '#default' unless defined $prefix; + + my $stack = $self->{Prefix_Table}->{$prefix}; + if (@$stack > 1) { + pop(@$stack); + } + else { + delete $self->{Prefix_Table}->{$prefix}; + } +} + +################ + +sub specified_attr { + my $self = shift; + + if ($self->{_State_} == 1) { + return GetSpecifiedAttributeCount($self->{Parser}); + } +} + +sub finish { + my ($self) = @_; + if ($self->{_State_} == 1) { + my $parser = $self->{Parser}; + UnsetAllHandlers($parser); + } +} + +sub position_in_context { + my ($self, $lines) = @_; + if ($self->{_State_} == 1) { + my $parser = $self->{Parser}; + my ($string, $linepos) = PositionContext($parser, $lines); + + return '' unless defined($string); + + my $col = GetCurrentColumnNumber($parser); + my $ptr = ('=' x ($col - 1)) . '^' . "\n"; + my $ret; + my $dosplit = $linepos < length($string); + + $string .= "\n" unless $string =~ /\n$/; + + if ($dosplit) { + $ret = substr($string, 0, $linepos) . $ptr + . substr($string, $linepos); + } else { + $ret = $string . $ptr; + } + + return $ret; + } +} + +sub xml_escape { + my $self = shift; + my $text = shift; + + study $text; + $text =~ s/\&/\&/g; + $text =~ s/ 1; + + if ($_ eq '>') { + $text =~ s/>/\>/g; + } + elsif ($_ eq '"') { + $text =~ s/\"/\"/; + } + elsif ($_ eq "'") { + $text =~ s/\'/\'/; + } + else { + my $rep = '&#' . sprintf('x%X', ord($_)) . ';'; + if (/\W/) { + my $ptrn = "\\$_"; + $text =~ s/$ptrn/$rep/g; + } + else { + $text =~ s/$_/$rep/g; + } + } + } + $text; +} + +sub skip_until { + my $self = shift; + if ($self->{_State_} <= 1) { + SkipUntil($self->{Parser}, $_[0]); + } +} + +sub release { + my $self = shift; + ParserRelease($self->{Parser}); +} + +sub DESTROY { + my $self = shift; + ParserFree($self->{Parser}); +} + +sub parse { + my $self = shift; + my $arg = shift; + croak "Parse already in progress (Expat)" if $self->{_State_}; + $self->{_State_} = 1; + my $parser = $self->{Parser}; + my $ioref; + my $result = 0; + + if (defined $arg) { + if (ref($arg) and UNIVERSAL::isa($arg, 'IO::Handle')) { + $ioref = $arg; + } elsif (tied($arg)) { + my $class = ref($arg); + no strict 'refs'; + $ioref = $arg if defined &{"${class}::TIEHANDLE"}; + } + else { + require IO::Handle; + eval { + no strict 'refs'; + $ioref = *{$arg}{IO} if defined *{$arg}; + }; + undef $@; + } + } + + if (defined($ioref)) { + my $delim = $self->{Stream_Delimiter}; + my $prev_rs; + + $prev_rs = ref($ioref)->input_record_separator("\n$delim\n") + if defined($delim); + + $result = ParseStream($parser, $ioref, $delim); + + ref($ioref)->input_record_separator($prev_rs) + if defined($delim); + } else { + $result = ParseString($parser, $arg); + } + + $self->{_State_} = 2; + $result or croak $self->{ErrorMessage}; +} + +sub parsestring { + my $self = shift; + $self->parse(@_); +} + +sub parsefile { + my $self = shift; + croak "Parser has already been used" if $self->{_State_}; + local(*FILE); + open(FILE, $_[0]) or croak "Couldn't open $_[0]:\n$!"; + binmode(FILE); + my $ret = $self->parse(*FILE); + close(FILE); + $ret; +} + +################################################################ +package XML::Parser::ContentModel; +use overload '""' => \&asString, 'eq' => \&thiseq; + +sub EMPTY () {1} +sub ANY () {2} +sub MIXED () {3} +sub NAME () {4} +sub CHOICE () {5} +sub SEQ () {6} + + +sub isempty { + return $_[0]->{Type} == EMPTY; +} + +sub isany { + return $_[0]->{Type} == ANY; +} + +sub ismixed { + return $_[0]->{Type} == MIXED; +} + +sub isname { + return $_[0]->{Type} == NAME; +} + +sub name { + return $_[0]->{Tag}; +} + +sub ischoice { + return $_[0]->{Type} == CHOICE; +} + +sub isseq { + return $_[0]->{Type} == SEQ; +} + +sub quant { + return $_[0]->{Quant}; +} + +sub children { + my $children = $_[0]->{Children}; + if (defined $children) { + return @$children; + } + return undef; +} + +sub asString { + my ($self) = @_; + my $ret; + + if ($self->{Type} == NAME) { + $ret = $self->{Tag}; + } + elsif ($self->{Type} == EMPTY) { + return "EMPTY"; + } + elsif ($self->{Type} == ANY) { + return "ANY"; + } + elsif ($self->{Type} == MIXED) { + $ret = '(#PCDATA'; + foreach (@{$self->{Children}}) { + $ret .= '|' . $_; + } + $ret .= ')'; + } + else { + my $sep = $self->{Type} == CHOICE ? '|' : ','; + $ret = '(' . join($sep, map { $_->asString } @{$self->{Children}}) . ')'; + } + + $ret .= $self->{Quant} if $self->{Quant}; + return $ret; +} + +sub thiseq { + my $self = shift; + + return $self->asString eq $_[0]; +} + +################################################################ +package XML::Parser::ExpatNB; + +use vars qw(@ISA); +use Carp; + +@ISA = qw(XML::Parser::Expat); + +sub parse { + my $self = shift; + my $class = ref($self); + croak "parse method not supported in $class"; +} + +sub parsestring { + my $self = shift; + my $class = ref($self); + croak "parsestring method not supported in $class"; +} + +sub parsefile { + my $self = shift; + my $class = ref($self); + croak "parsefile method not supported in $class"; +} + +sub parse_more { + my ($self, $data) = @_; + + $self->{_State_} = 1; + my $ret = XML::Parser::Expat::ParsePartial($self->{Parser}, $data); + + croak $self->{ErrorMessage} unless $ret; +} + +sub parse_done { + my $self = shift; + + my $ret = XML::Parser::Expat::ParseDone($self->{Parser}); + unless ($ret) { + my $msg = $self->{ErrorMessage}; + $self->release; + croak $msg; + } + + $self->{_State_} = 2; + + my $result = $ret; + my @result = (); + my $final = $self->{FinalHandler}; + if (defined $final) { + if (wantarray) { + @result = &$final($self); + } + else { + $result = &$final($self); + } + } + + $self->release; + + return unless defined wantarray; + return wantarray ? @result : $result; +} + +################################################################ + +package XML::Parser::Encinfo; + +sub DESTROY { + my $self = shift; + XML::Parser::Expat::FreeEncoding($self); +} + +1; + +__END__ + +=head1 NAME + +XML::Parser::Expat - Lowlevel access to James Clark's expat XML parser + +=head1 SYNOPSIS + + use XML::Parser::Expat; + + $parser = new XML::Parser::Expat; + $parser->setHandlers('Start' => \&sh, + 'End' => \&eh, + 'Char' => \&ch); + open(FOO, 'info.xml') or die "Couldn't open"; + $parser->parse(*FOO); + close(FOO); + # $parser->parse(' here we go '); + + sub sh + { + my ($p, $el, %atts) = @_; + $p->setHandlers('Char' => \&spec) + if ($el eq 'special'); + ... + } + + sub eh + { + my ($p, $el) = @_; + $p->setHandlers('Char' => \&ch) # Special elements won't contain + if ($el eq 'special'); # other special elements + ... + } + +=head1 DESCRIPTION + +This module provides an interface to James Clark's XML parser, expat. As in +expat, a single instance of the parser can only parse one document. Calls +to parsestring after the first for a given instance will die. + +Expat (and XML::Parser::Expat) are event based. As the parser recognizes +parts of the document (say the start or end of an XML element), then any +handlers registered for that type of an event are called with suitable +parameters. + +=head1 METHODS + +=over 4 + +=item new + +This is a class method, the constructor for XML::Parser::Expat. Options are +passed as keyword value pairs. The recognized options are: + +=over 4 + +=item * ProtocolEncoding + +The protocol encoding name. The default is none. The expat built-in +encodings are: C, C, C, and C. +Other encodings may be used if they have encoding maps in one of the +directories in the @Encoding_Path list. Setting the protocol encoding +overrides any encoding in the XML declaration. + +=item * Namespaces + +When this option is given with a true value, then the parser does namespace +processing. By default, namespace processing is turned off. When it is +turned on, the parser consumes I attributes and strips off prefixes +from element and attributes names where those prefixes have a defined +namespace. A name's namespace can be found using the L<"namespace"> method +and two names can be checked for absolute equality with the L<"eq_name"> +method. + +=item * NoExpand + +Normally, the parser will try to expand references to entities defined in +the internal subset. If this option is set to a true value, and a default +handler is also set, then the default handler will be called when an +entity reference is seen in text. This has no effect if a default handler +has not been registered, and it has no effect on the expansion of entity +references inside attribute values. + +=item * Stream_Delimiter + +This option takes a string value. When this string is found alone on a line +while parsing from a stream, then the parse is ended as if it saw an end of +file. The intended use is with a stream of xml documents in a MIME multipart +format. The string should not contain a trailing newline. + +=item * ErrorContext + +When this option is defined, errors are reported in context. The value +of ErrorContext should be the number of lines to show on either side of +the line in which the error occurred. + +=item * ParseParamEnt + +Unless standalone is set to "yes" in the XML declaration, setting this to +a true value allows the external DTD to be read, and parameter entities +to be parsed and expanded. + +=item * Base + +The base to use for relative pathnames or URLs. This can also be done by +using the base method. + +=back + +=item setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]]) + +This method registers handlers for the various events. If no handlers are +registered, then a call to parsestring or parsefile will only determine if +the corresponding XML document is well formed (by returning without error.) +This may be called from within a handler, after the parse has started. + +Setting a handler to something that evaluates to false unsets that +handler. + +This method returns a list of type, handler pairs corresponding to the +input. The handlers returned are the ones that were in effect before the +call to setHandlers. + +The recognized events and the parameters passed to the corresponding +handlers are: + +=over 4 + +=item * Start (Parser, Element [, Attr, Val [,...]]) + +This event is generated when an XML start tag is recognized. Parser is +an XML::Parser::Expat instance. Element is the name of the XML element that +is opened with the start tag. The Attr & Val pairs are generated for each +attribute in the start tag. + +=item * End (Parser, Element) + +This event is generated when an XML end tag is recognized. Note that +an XML empty tag () generates both a start and an end event. + +There is always a lower level start and end handler installed that wrap +the corresponding callbacks. This is to handle the context mechanism. +A consequence of this is that the default handler (see below) will not +see a start tag or end tag unless the default_current method is called. + +=item * Char (Parser, String) + +This event is generated when non-markup is recognized. The non-markup +sequence of characters is in String. A single non-markup sequence of +characters may generate multiple calls to this handler. Whatever the +encoding of the string in the original document, this is given to the +handler in UTF-8. + +=item * Proc (Parser, Target, Data) + +This event is generated when a processing instruction is recognized. + +=item * Comment (Parser, String) + +This event is generated when a comment is recognized. + +=item * CdataStart (Parser) + +This is called at the start of a CDATA section. + +=item * CdataEnd (Parser) + +This is called at the end of a CDATA section. + +=item * Default (Parser, String) + +This is called for any characters that don't have a registered handler. +This includes both characters that are part of markup for which no +events are generated (markup declarations) and characters that +could generate events, but for which no handler has been registered. + +Whatever the encoding in the original document, the string is returned to +the handler in UTF-8. + +=item * Unparsed (Parser, Entity, Base, Sysid, Pubid, Notation) + +This is called for a declaration of an unparsed entity. Entity is the name +of the entity. Base is the base to be used for resolving a relative URI. +Sysid is the system id. Pubid is the public id. Notation is the notation +name. Base and Pubid may be undefined. + +=item * Notation (Parser, Notation, Base, Sysid, Pubid) + +This is called for a declaration of notation. Notation is the notation name. +Base is the base to be used for resolving a relative URI. Sysid is the system +id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined. + +=item * ExternEnt (Parser, Base, Sysid, Pubid) + +This is called when an external entity is referenced. Base is the base to be +used for resolving a relative URI. Sysid is the system id. Pubid is the public +id. Base, and Pubid may be undefined. + +This handler should either return a string, which represents the contents of +the external entity, or return an open filehandle that can be read to obtain +the contents of the external entity, or return undef, which indicates the +external entity couldn't be found and will generate a parse error. + +If an open filehandle is returned, it must be returned as either a glob +(*FOO) or as a reference to a glob (e.g. an instance of IO::Handle). + +=item * ExternEntFin (Parser) + +This is called after an external entity has been parsed. It allows +applications to perform cleanup on actions performed in the above +ExternEnt handler. + +=item * Entity (Parser, Name, Val, Sysid, Pubid, Ndata, IsParam) + +This is called when an entity is declared. For internal entities, the Val +parameter will contain the value and the remaining three parameters will +be undefined. For external entities, the Val parameter +will be undefined, the Sysid parameter will have the system id, the Pubid +parameter will have the public id if it was provided (it will be undefined +otherwise), the Ndata parameter will contain the notation for unparsed +entities. If this is a parameter entity declaration, then the IsParam +parameter is true. + +Note that this handler and the Unparsed handler above overlap. If both are +set, then this handler will not be called for unparsed entities. + +=item * Element (Parser, Name, Model) + +The element handler is called when an element declaration is found. Name is +the element name, and Model is the content model as an +XML::Parser::ContentModel object. See L<"XML::Parser::ContentModel Methods"> +for methods available for this class. + +=item * Attlist (Parser, Elname, Attname, Type, Default, Fixed) + +This handler is called for each attribute in an ATTLIST declaration. +So an ATTLIST declaration that has multiple attributes +will generate multiple calls to this handler. The Elname parameter is the +name of the element with which the attribute is being associated. The Attname +parameter is the name of the attribute. Type is the attribute type, given as +a string. Default is the default value, which will either be "#REQUIRED", +"#IMPLIED" or a quoted string (i.e. the returned string will begin and end +with a quote character). If Fixed is true, then this is a fixed attribute. + +=item * Doctype (Parser, Name, Sysid, Pubid, Internal) + +This handler is called for DOCTYPE declarations. Name is the document type +name. Sysid is the system id of the document type, if it was provided, +otherwise it's undefined. Pubid is the public id of the document type, +which will be undefined if no public id was given. Internal will be +true or false, indicating whether or not the doctype declaration contains +an internal subset. + +=item * DoctypeFin (Parser) + +This handler is called after parsing of the DOCTYPE declaration has finished, +including any internal or external DTD declarations. + +=item * XMLDecl (Parser, Version, Encoding, Standalone) + +This handler is called for XML declarations. Version is a string containg +the version. Encoding is either undefined or contains an encoding string. +Standalone is either undefined, or true or false. Undefined indicates +that no standalone parameter was given in the XML declaration. True or +false indicates "yes" or "no" respectively. + +=back + +=item namespace(name) + +Return the URI of the namespace that the name belongs to. If the name doesn't +belong to any namespace, an undef is returned. This is only valid on names +received through the Start or End handlers from a single document, or through +a call to the generate_ns_name method. In other words, don't use names +generated from one instance of XML::Parser::Expat with other instances. + +=item eq_name(name1, name2) + +Return true if name1 and name2 are identical (i.e. same name and from +the same namespace.) This is only meaningful if both names were obtained +through the Start or End handlers from a single document, or through +a call to the generate_ns_name method. + +=item generate_ns_name(name, namespace) + +Return a name, associated with a given namespace, good for using with the +above 2 methods. The namespace argument should be the namespace URI, not +a prefix. + +=item new_ns_prefixes + +When called from a start tag handler, returns namespace prefixes declared +with this start tag. If called elsewere (or if there were no namespace +prefixes declared), it returns an empty list. Setting of the default +namespace is indicated with '#default' as a prefix. + +=item expand_ns_prefix(prefix) + +Return the uri to which the given prefix is currently bound. Returns +undef if the prefix isn't currently bound. Use '#default' to find the +current binding of the default namespace (if any). + +=item current_ns_prefixes + +Return a list of currently bound namespace prefixes. The order of the +the prefixes in the list has no meaning. If the default namespace is +currently bound, '#default' appears in the list. + +=item recognized_string + +Returns the string from the document that was recognized in order to call +the current handler. For instance, when called from a start handler, it +will give us the the start-tag string. The string is encoded in UTF-8. +This method doesn't return a meaningful string inside declaration handlers. + +=item original_string + +Returns the verbatim string from the document that was recognized in +order to call the current handler. The string is in the original document +encoding. This method doesn't return a meaningful string inside declaration +handlers. + +=item default_current + +When called from a handler, causes the sequence of characters that generated +the corresponding event to be sent to the default handler (if one is +registered). Use of this method is deprecated in favor the recognized_string +method, which you can use without installing a default handler. This +method doesn't deliver a meaningful string to the default handler when +called from inside declaration handlers. + +=item xpcroak(message) + +Concatenate onto the given message the current line number within the +XML document plus the message implied by ErrorContext. Then croak with +the formed message. + +=item xpcarp(message) + +Concatenate onto the given message the current line number within the +XML document plus the message implied by ErrorContext. Then carp with +the formed message. + +=item current_line + +Returns the line number of the current position of the parse. + +=item current_column + +Returns the column number of the current position of the parse. + +=item current_byte + +Returns the current position of the parse. + +=item base([NEWBASE]); + +Returns the current value of the base for resolving relative URIs. If +NEWBASE is supplied, changes the base to that value. + +=item context + +Returns a list of element names that represent open elements, with the +last one being the innermost. Inside start and end tag handlers, this +will be the tag of the parent element. + +=item current_element + +Returns the name of the innermost currently opened element. Inside +start or end handlers, returns the parent of the element associated +with those tags. + +=item in_element(NAME) + +Returns true if NAME is equal to the name of the innermost currently opened +element. If namespace processing is being used and you want to check +against a name that may be in a namespace, then use the generate_ns_name +method to create the NAME argument. + +=item within_element(NAME) + +Returns the number of times the given name appears in the context list. +If namespace processing is being used and you want to check +against a name that may be in a namespace, then use the generate_ns_name +method to create the NAME argument. + +=item depth + +Returns the size of the context list. + +=item element_index + +Returns an integer that is the depth-first visit order of the current +element. This will be zero outside of the root element. For example, +this will return 1 when called from the start handler for the root element +start tag. + +=item skip_until(INDEX) + +INDEX is an integer that represents an element index. When this method +is called, all handlers are suspended until the start tag for an element +that has an index number equal to INDEX is seen. If a start handler has +been set, then this is the first tag that the start handler will see +after skip_until has been called. + + +=item position_in_context(LINES) + +Returns a string that shows the current parse position. LINES should be +an integer >= 0 that represents the number of lines on either side of the +current parse line to place into the returned string. + +=item xml_escape(TEXT [, CHAR [, CHAR ...]]) + +Returns TEXT with markup characters turned into character entities. Any +additional characters provided as arguments are also turned into character +references where found in TEXT. + +=item parse (SOURCE) + +The SOURCE parameter should either be a string containing the whole XML +document, or it should be an open IO::Handle. Only a single document +may be parsed for a given instance of XML::Parser::Expat, so this will croak +if it's been called previously for this instance. + +=item parsestring(XML_DOC_STRING) + +Parses the given string as an XML document. Only a single document may be +parsed for a given instance of XML::Parser::Expat, so this will die if either +parsestring or parsefile has been called for this instance previously. + +This method is deprecated in favor of the parse method. + +=item parsefile(FILENAME) + +Parses the XML document in the given file. Will die if parsestring or +parsefile has been called previously for this instance. + +=item is_defaulted(ATTNAME) + +NO LONGER WORKS. To find out if an attribute is defaulted please use +the specified_attr method. + +=item specified_attr + +When the start handler receives lists of attributes and values, the +non-defaulted (i.e. explicitly specified) attributes occur in the list +first. This method returns the number of specified items in the list. +So if this number is equal to the length of the list, there were no +defaulted values. Otherwise the number points to the index of the +first defaulted attribute name. + +=item finish + +Unsets all handlers (including internal ones that set context), but expat +continues parsing to the end of the document or until it finds an error. +It should finish up a lot faster than with the handlers set. + +=item release + +There are data structures used by XML::Parser::Expat that have circular +references. This means that these structures will never be garbage +collected unless these references are explicitly broken. Calling this +method breaks those references (and makes the instance unusable.) + +Normally, higher level calls handle this for you, but if you are using +XML::Parser::Expat directly, then it's your responsibility to call it. + +=back + +=head2 XML::Parser::ContentModel Methods + +The element declaration handlers are passed objects of this class as the +content model of the element declaration. They also represent content +particles, components of a content model. + +When referred to as a string, these objects are automagicly converted to a +string representation of the model (or content particle). + +=over 4 + +=item isempty + +This method returns true if the object is "EMPTY", false otherwise. + +=item isany + +This method returns true if the object is "ANY", false otherwise. + +=item ismixed + +This method returns true if the object is "(#PCDATA)" or "(#PCDATA|...)*", +false otherwise. + +=item isname + +This method returns if the object is an element name. + +=item ischoice + +This method returns true if the object is a choice of content particles. + + +=item isseq + +This method returns true if the object is a sequence of content particles. + +=item quant + +This method returns undef or a string representing the quantifier +('?', '*', '+') associated with the model or particle. + +=item children + +This method returns undef or (for mixed, choice, and sequence types) +an array of component content particles. There will always be at least +one component for choices and sequences, but for a mixed content model +of pure PCDATA, "(#PCDATA)", then an undef is returned. + +=back + +=head2 XML::Parser::ExpatNB Methods + +The class XML::Parser::ExpatNB is a subclass of XML::Parser::Expat used +for non-blocking access to the expat library. It does not support the parse, +parsestring, or parsefile methods, but it does have these additional methods: + +=over 4 + +=item parse_more(DATA) + +Feed expat more text to munch on. + +=item parse_done + +Tell expat that it's gotten the whole document. + +=back + +=head1 FUNCTIONS + +=over 4 + +=item XML::Parser::Expat::load_encoding(ENCODING) + +Load an external encoding. ENCODING is either the name of an encoding or +the name of a file. The basename is converted to lowercase and a '.enc' +extension is appended unless there's one already there. Then, unless +it's an absolute pathname (i.e. begins with '/'), the first file by that +name discovered in the @Encoding_Path path list is used. + +The encoding in the file is loaded and kept in the %Encoding_Table +table. Earlier encodings of the same name are replaced. + +This function is automaticly called by expat when it encounters an encoding +it doesn't know about. Expat shouldn't call this twice for the same +encoding name. The only reason users should use this function is to +explicitly load an encoding not contained in the @Encoding_Path list. + +=back + +=head1 AUTHORS + +Larry Wall > wrote version 1.0. + +Clark Cooper > picked up support, changed the API +for this version (2.x), provided documentation, and added some standard +package features. + +=cut diff --git a/Expat/Expat.xs b/Expat/Expat.xs new file mode 100644 index 0000000..497b64f --- /dev/null +++ b/Expat/Expat.xs @@ -0,0 +1,2214 @@ +/***************************************************************** +** Expat.xs +** +** Copyright 1998 Larry Wall and Clark Cooper +** All rights reserved. +** +** This program is free software; you can redistribute it and/or +** modify it under the same terms as Perl itself. +** +*/ + +#include + +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" + +#undef convert + +#include "patchlevel.h" +#include "encoding.h" + + +/* Version 5.005_5x (Development version for 5.006) doesn't like sv_... + anymore, but 5.004 doesn't know about PL_sv.. + Don't want to push up required version just for this. */ + +#if PATCHLEVEL < 5 +#define PL_sv_undef sv_undef +#define PL_sv_no sv_no +#define PL_sv_yes sv_yes +#define PL_na na +#endif + +#define BUFSIZE 32768 + +#define NSDELIM '|' + +/* Macro to update handler fields. Used in the various handler setting + XSUBS */ + +#define XMLP_UPD(fld) \ + RETVAL = cbv->fld ? newSVsv(cbv->fld) : &PL_sv_undef;\ + if (cbv->fld) {\ + if (cbv->fld != fld)\ + sv_setsv(cbv->fld, fld);\ + }\ + else\ + cbv->fld = newSVsv(fld) + +/* Macro to push old handler value onto return stack. This is done here + to get around a bug in 5.004 sv_2mortal function. */ + +#define PUSHRET \ + ST(0) = RETVAL;\ + if (RETVAL != &PL_sv_undef && SvREFCNT(RETVAL)) sv_2mortal(RETVAL) + +typedef struct { + SV* self_sv; + XML_Parser p; + + AV* context; + AV* new_prefix_list; + HV *nstab; + AV *nslst; + + unsigned int st_serial; + unsigned int st_serial_stackptr; + unsigned int st_serial_stacksize; + unsigned int * st_serial_stack; + + unsigned int skip_until; + + SV *recstring; + char * delim; + STRLEN delimlen; + + unsigned ns:1; + unsigned no_expand:1; + unsigned parseparam:1; + + /* Callback handlers */ + + SV* start_sv; + SV* end_sv; + SV* char_sv; + SV* proc_sv; + SV* cmnt_sv; + SV* dflt_sv; + + SV* entdcl_sv; + SV* eledcl_sv; + SV* attdcl_sv; + SV* doctyp_sv; + SV* doctypfin_sv; + SV* xmldec_sv; + + SV* unprsd_sv; + SV* notation_sv; + + SV* extent_sv; + SV* extfin_sv; + + SV* startcd_sv; + SV* endcd_sv; +} CallbackVector; + + +static HV* EncodingTable = NULL; + +static XML_Char nsdelim[] = {NSDELIM, '\0'}; + +static char *QuantChar[] = {"", "?", "*", "+"}; + +/* Forward declarations */ + +static void suspend_callbacks(CallbackVector *); +static void resume_callbacks(CallbackVector *); + +#if PATCHLEVEL < 5 && SUBVERSION < 5 + +/* ================================================================ +** This is needed where the length is explicitly given. The expat +** library may sometimes give us zero-length strings. Perl's newSVpv +** interprets a zero length as a directive to do a strlen. This +** function is used when we want to force length to mean length, even +** if zero. +*/ + +static SV * +newSVpvn(char *s, STRLEN len) +{ + register SV *sv; + + sv = newSV(0); + sv_setpvn(sv, s, len); + return sv; +} /* End newSVpvn */ + +#define ERRSV GvSV(errgv) +#endif + +#ifdef SvUTF8_on + +static SV * +newUTF8SVpv(char *s, STRLEN len) { + register SV *sv; + + sv = newSVpv(s, len); + SvUTF8_on(sv); + return sv; +} /* End new UTF8SVpv */ + +static SV * +newUTF8SVpvn(char *s, STRLEN len) { + register SV *sv; + + sv = newSV(0); + sv_setpvn(sv, s, len); + SvUTF8_on(sv); + return sv; +} + +#else /* SvUTF8_on not defined */ + +#define newUTF8SVpv newSVpv +#define newUTF8SVpvn newSVpvn + +#endif + +static void* +mymalloc(size_t size) { +#ifndef LEAKTEST + return safemalloc(size); +#else + return safexmalloc(328,size); +#endif +} + +static void* +myrealloc(void *p, size_t s) { +#ifndef LEAKTEST + return saferealloc(p, s); +#else + return safexrealloc(p, s); +#endif +} + +static void +myfree(void *p) { + Safefree(p); +} + +static XML_Memory_Handling_Suite ms = {mymalloc, myrealloc, myfree}; + +static void +append_error(XML_Parser parser, char * err) +{ + dSP; + CallbackVector * cbv; + SV ** errstr; + + cbv = (CallbackVector*) XML_GetUserData(parser); + errstr = hv_fetch((HV*)SvRV(cbv->self_sv), + "ErrorMessage", 12, 0); + + if (errstr && SvPOK(*errstr)) { + SV ** errctx = hv_fetch((HV*) SvRV(cbv->self_sv), + "ErrorContext", 12, 0); + int dopos = !err && errctx && SvOK(*errctx); + + if (! err) + err = (char *) XML_ErrorString(XML_GetErrorCode(parser)); + + sv_catpvf(*errstr, "\n%s at line %d, column %d, byte %d%s", + err, + XML_GetCurrentLineNumber(parser), + XML_GetCurrentColumnNumber(parser), + XML_GetCurrentByteIndex(parser), + dopos ? ":\n" : ""); + + if (dopos) + { + int count; + + ENTER ; + SAVETMPS ; + PUSHMARK(sp); + XPUSHs(cbv->self_sv); + XPUSHs(*errctx); + PUTBACK ; + + count = perl_call_method("position_in_context", G_SCALAR); + + SPAGAIN ; + + if (count >= 1) { + sv_catsv(*errstr, POPs); + } + + PUTBACK ; + FREETMPS ; + LEAVE ; + } + } +} /* End append_error */ + +static SV * +generate_model(XML_Content *model) { + HV * hash = newHV(); + SV * obj = newRV_noinc((SV *) hash); + + sv_bless(obj, gv_stashpv("XML::Parser::ContentModel", 1)); + + hv_store(hash, "Type", 4, newSViv(model->type), 0); + if (model->quant != XML_CQUANT_NONE) { + hv_store(hash, "Quant", 5, newSVpv(QuantChar[model->quant], 1), 0); + } + + switch(model->type) { + case XML_CTYPE_NAME: + hv_store(hash, "Tag", 3, newUTF8SVpv((char *)model->name, 0), 0); + break; + + case XML_CTYPE_MIXED: + case XML_CTYPE_CHOICE: + case XML_CTYPE_SEQ: + if (model->children && model->numchildren) + { + AV * children = newAV(); + int i; + + for (i = 0; i < model->numchildren; i++) { + av_push(children, generate_model(&model->children[i])); + } + + hv_store(hash, "Children", 8, newRV_noinc((SV *) children), 0); + } + break; + } + + return obj; +} /* End generate_model */ + +static int +parse_stream(XML_Parser parser, SV * ioref) +{ + dSP; + SV * tbuff; + SV * tsiz; + char * linebuff; + STRLEN lblen; + STRLEN br = 0; + int buffsize; + int done = 0; + int ret = 1; + char * msg = NULL; + CallbackVector * cbv; + char *buff = (char *) 0; + + cbv = (CallbackVector*) XML_GetUserData(parser); + + ENTER; + SAVETMPS; + + if (cbv->delim) { + int cnt; + SV * tline; + + PUSHMARK(SP); + XPUSHs(ioref); + PUTBACK ; + + cnt = perl_call_method("getline", G_SCALAR); + + SPAGAIN; + + if (cnt != 1) + croak("getline method call failed"); + + tline = POPs; + + if (! SvOK(tline)) { + lblen = 0; + } + else { + char * chk; + linebuff = SvPV(tline, lblen); + chk = &linebuff[lblen - cbv->delimlen - 1]; + + if (lblen > cbv->delimlen + 1 + && *chk == *cbv->delim + && chk[cbv->delimlen] == '\n' + && strnEQ(++chk, cbv->delim + 1, cbv->delimlen - 1)) + lblen -= cbv->delimlen + 1; + } + + PUTBACK ; + buffsize = lblen; + done = lblen == 0; + } + else { + tbuff = newSV(0); + tsiz = newSViv(BUFSIZE); + buffsize = BUFSIZE; + } + + while (! done) + { + char *buffer = XML_GetBuffer(parser, buffsize); + + if (! buffer) + croak("Ran out of memory for input buffer"); + + SAVETMPS; + + if (cbv->delim) { + Copy(linebuff, buffer, lblen, char); + br = lblen; + done = 1; + } + else { + int cnt; + SV * rdres; + char * tb; + + PUSHMARK(SP); + EXTEND(SP, 3); + PUSHs(ioref); + PUSHs(tbuff); + PUSHs(tsiz); + PUTBACK ; + + cnt = perl_call_method("read", G_SCALAR); + + SPAGAIN ; + + if (cnt != 1) + croak("read method call failed"); + + rdres = POPs; + + if (! SvOK(rdres)) + croak("read error"); + + tb = SvPV(tbuff, br); + if (br > 0) + Copy(tb, buffer, br, char); + else + done = 1; + + PUTBACK ; + } + + ret = XML_ParseBuffer(parser, br, done); + + SPAGAIN; /* resync local SP in case callbacks changed global stack */ + + if (! ret) + break; + + FREETMPS; + } + + if (! ret) + append_error(parser, msg); + + if (! cbv->delim) { + SvREFCNT_dec(tsiz); + SvREFCNT_dec(tbuff); + } + + FREETMPS; + LEAVE; + + return ret; +} /* End parse_stream */ + +static SV * +gen_ns_name(const char * name, HV * ns_table, AV * ns_list) +{ + char *pos = strchr(name, NSDELIM); + SV * ret; + + if (pos && pos > name) + { + SV ** name_ent = hv_fetch(ns_table, (char *) name, + pos - name, TRUE); + ret = newUTF8SVpv(&pos[1], 0); + + if (name_ent) + { + int index; + + if (SvOK(*name_ent)) + { + index = SvIV(*name_ent); + } + else + { + av_push(ns_list, newUTF8SVpv((char *) name, pos - name)); + index = av_len(ns_list); + sv_setiv(*name_ent, (IV) index); + } + + sv_setiv(ret, (IV) index); + SvPOK_on(ret); + } + } + else + ret = newUTF8SVpv((char *) name, 0); + + return ret; +} /* End gen_ns_name */ + +static void +characterData(void *userData, const char *s, int len) +{ + dSP; + CallbackVector* cbv = (CallbackVector*) userData; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 2); + PUSHs(cbv->self_sv); + PUSHs(sv_2mortal(newUTF8SVpvn((char*)s,len))); + PUTBACK; + perl_call_sv(cbv->char_sv, G_DISCARD); + + FREETMPS; + LEAVE; +} /* End characterData */ + +static void +startElement(void *userData, const char *name, const char **atts) +{ + dSP; + CallbackVector* cbv = (CallbackVector*) userData; + SV ** pcontext; + unsigned do_ns = cbv->ns; + unsigned skipping = 0; + SV ** pnstab; + SV ** pnslst; + SV * elname; + + cbv->st_serial++; + + if (cbv->skip_until) { + skipping = cbv->st_serial < cbv->skip_until; + if (! skipping) { + resume_callbacks(cbv); + cbv->skip_until = 0; + } + } + + if (cbv->st_serial_stackptr >= cbv->st_serial_stacksize) { + unsigned int newsize = cbv->st_serial_stacksize + 512; + + Renew(cbv->st_serial_stack, newsize, unsigned int); + cbv->st_serial_stacksize = newsize; + } + + cbv->st_serial_stack[++cbv->st_serial_stackptr] = cbv->st_serial; + + if (do_ns) + elname = gen_ns_name(name, cbv->nstab, cbv->nslst); + else + elname = newUTF8SVpv((char *)name, 0); + + if (! skipping && SvTRUE(cbv->start_sv)) + { + const char **attlim = atts; + + while (*attlim) + attlim++; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, attlim - atts + 2); + PUSHs(cbv->self_sv); + PUSHs(elname); + while (*atts) + { + SV * attname; + + attname = (do_ns ? gen_ns_name(*atts, cbv->nstab, cbv->nslst) + : newUTF8SVpv((char *) *atts, 0)); + + atts++; + PUSHs(sv_2mortal(attname)); + if (*atts) + PUSHs(sv_2mortal(newUTF8SVpv((char*)*atts++,0))); + } + PUTBACK; + perl_call_sv(cbv->start_sv, G_DISCARD); + + FREETMPS; + LEAVE; + } + + av_push(cbv->context, elname); + + if (cbv->ns) { + av_clear(cbv->new_prefix_list); + } +} /* End startElement */ + +static void +endElement(void *userData, const char *name) +{ + dSP; + CallbackVector* cbv = (CallbackVector*) userData; + SV *elname; + + elname = av_pop(cbv->context); + + if (! cbv->st_serial_stackptr) { + croak("endElement: Start tag serial number stack underflow"); + } + + if (! cbv->skip_until && SvTRUE(cbv->end_sv)) + { + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 2); + PUSHs(cbv->self_sv); + PUSHs(elname); + PUTBACK; + perl_call_sv(cbv->end_sv, G_DISCARD); + + FREETMPS; + LEAVE; + } + + cbv->st_serial_stackptr--; + + SvREFCNT_dec(elname); +} /* End endElement */ + +static void +processingInstruction(void *userData, const char *target, const char *data) +{ + dSP; + CallbackVector* cbv = (CallbackVector*) userData; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 3); + PUSHs(cbv->self_sv); + PUSHs(sv_2mortal(newUTF8SVpv((char*)target,0))); + PUSHs(sv_2mortal(newUTF8SVpv((char*)data,0))); + PUTBACK; + perl_call_sv(cbv->proc_sv, G_DISCARD); + + FREETMPS; + LEAVE; +} /* End processingInstruction */ + +static void +commenthandle(void *userData, const char *string) +{ + dSP; + CallbackVector * cbv = (CallbackVector*) userData; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 2); + PUSHs(cbv->self_sv); + PUSHs(sv_2mortal(newUTF8SVpv((char*) string, 0))); + PUTBACK; + perl_call_sv(cbv->cmnt_sv, G_DISCARD); + + FREETMPS; + LEAVE; +} /* End commenthandler */ + +static void +startCdata(void *userData) +{ + dSP; + CallbackVector* cbv = (CallbackVector*) userData; + + if (cbv->startcd_sv) { + ENTER; + SAVETMPS; + + PUSHMARK(sp); + XPUSHs(cbv->self_sv); + PUTBACK; + perl_call_sv(cbv->startcd_sv, G_DISCARD); + + FREETMPS; + LEAVE; + } +} /* End startCdata */ + +static void +endCdata(void *userData) +{ + dSP; + CallbackVector* cbv = (CallbackVector*) userData; + + if (cbv->endcd_sv) { + ENTER; + SAVETMPS; + + PUSHMARK(sp); + XPUSHs(cbv->self_sv); + PUTBACK; + perl_call_sv(cbv->endcd_sv, G_DISCARD); + + FREETMPS; + LEAVE; + } +} /* End endCdata */ + +static void +nsStart(void *userdata, const XML_Char *prefix, const XML_Char *uri){ + dSP; + CallbackVector* cbv = (CallbackVector*) userdata; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 3); + PUSHs(cbv->self_sv); + PUSHs(prefix ? sv_2mortal(newUTF8SVpv((char *)prefix, 0)) : &PL_sv_undef); + PUSHs(uri ? sv_2mortal(newUTF8SVpv((char *)uri, 0)) : &PL_sv_undef); + PUTBACK; + perl_call_method("NamespaceStart", G_DISCARD); + + FREETMPS; + LEAVE; +} /* End nsStart */ + +static void +nsEnd(void *userdata, const XML_Char *prefix) { + dSP; + CallbackVector* cbv = (CallbackVector*) userdata; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 2); + PUSHs(cbv->self_sv); + PUSHs(prefix ? sv_2mortal(newUTF8SVpv((char *)prefix, 0)) : &PL_sv_undef); + PUTBACK; + perl_call_method("NamespaceEnd", G_DISCARD); + + FREETMPS; + LEAVE; +} /* End nsEnd */ + +static void +defaulthandle(void *userData, const char *string, int len) +{ + dSP; + CallbackVector* cbv = (CallbackVector*) userData; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 2); + PUSHs(cbv->self_sv); + PUSHs(sv_2mortal(newUTF8SVpvn((char*)string, len))); + PUTBACK; + perl_call_sv(cbv->dflt_sv, G_DISCARD); + + FREETMPS; + LEAVE; +} /* End defaulthandle */ + +static void +elementDecl(void *data, + const char *name, + XML_Content *model) { + dSP; + CallbackVector *cbv = (CallbackVector*) data; + SV *cmod; + + ENTER; + SAVETMPS; + + + cmod = generate_model(model); + + Safefree(model); + PUSHMARK(sp); + EXTEND(sp, 3); + PUSHs(cbv->self_sv); + PUSHs(sv_2mortal(newUTF8SVpv((char *)name, 0))); + PUSHs(sv_2mortal(cmod)); + PUTBACK; + perl_call_sv(cbv->eledcl_sv, G_DISCARD); + FREETMPS; + LEAVE; + +} /* End elementDecl */ + +static void +attributeDecl(void *data, + const char * elname, + const char * attname, + const char * att_type, + const char * dflt, + int reqorfix) { + dSP; + CallbackVector *cbv = (CallbackVector*) data; + SV * dfltsv; + + if (dflt) { + dfltsv = newUTF8SVpv("'", 1); + sv_catpv(dfltsv, (char *) dflt); + sv_catpv(dfltsv, "'"); + } + else { + dfltsv = newUTF8SVpv(reqorfix ? "#REQUIRED" : "#IMPLIED", 0); + } + + ENTER; + SAVETMPS; + PUSHMARK(sp); + EXTEND(sp, 5); + PUSHs(cbv->self_sv); + PUSHs(sv_2mortal(newUTF8SVpv((char *)elname, 0))); + PUSHs(sv_2mortal(newUTF8SVpv((char *)attname, 0))); + PUSHs(sv_2mortal(newUTF8SVpv((char *)att_type, 0))); + PUSHs(sv_2mortal(dfltsv)); + if (dflt && reqorfix) + XPUSHs(&PL_sv_yes); + PUTBACK; + perl_call_sv(cbv->attdcl_sv, G_DISCARD); + + FREETMPS; + LEAVE; +} /* End attributeDecl */ + +static void +entityDecl(void *data, + const char *name, + int isparam, + const char *value, + int vlen, + const char *base, + const char *sysid, + const char *pubid, + const char *notation) { + dSP; + CallbackVector *cbv = (CallbackVector*) data; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 6); + PUSHs(cbv->self_sv); + PUSHs(sv_2mortal(newUTF8SVpv((char*)name, 0))); + PUSHs(value ? sv_2mortal(newUTF8SVpvn((char*)value, vlen)) : &PL_sv_undef); + PUSHs(sysid ? sv_2mortal(newUTF8SVpv((char *)sysid, 0)) : &PL_sv_undef); + PUSHs(pubid ? sv_2mortal(newUTF8SVpv((char *)pubid, 0)) : &PL_sv_undef); + PUSHs(notation ? sv_2mortal(newUTF8SVpv((char *)notation, 0)) : &PL_sv_undef); + if (isparam) + XPUSHs(&PL_sv_yes); + PUTBACK; + perl_call_sv(cbv->entdcl_sv, G_DISCARD); + + FREETMPS; + LEAVE; +} /* End entityDecl */ + +static void +doctypeStart(void *userData, + const char* name, + const char* sysid, + const char* pubid, + int hasinternal) { + dSP; + CallbackVector *cbv = (CallbackVector*) userData; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 5); + PUSHs(cbv->self_sv); + PUSHs(sv_2mortal(newUTF8SVpv((char*)name, 0))); + PUSHs(sysid ? sv_2mortal(newUTF8SVpv((char*)sysid, 0)) : &PL_sv_undef); + PUSHs(pubid ? sv_2mortal(newUTF8SVpv((char*)pubid, 0)) : &PL_sv_undef); + PUSHs(hasinternal ? &PL_sv_yes : &PL_sv_no); + PUTBACK; + perl_call_sv(cbv->doctyp_sv, G_DISCARD); + FREETMPS; + LEAVE; +} /* End doctypeStart */ + +static void +doctypeEnd(void *userData) { + dSP; + CallbackVector *cbv = (CallbackVector*) userData; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 1); + PUSHs(cbv->self_sv); + PUTBACK; + perl_call_sv(cbv->doctypfin_sv, G_DISCARD); + FREETMPS; + LEAVE; +} /* End doctypeEnd */ + +static void +xmlDecl(void *userData, + const char *version, + const char *encoding, + int standalone) { + dSP; + CallbackVector *cbv = (CallbackVector*) userData; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 4); + PUSHs(cbv->self_sv); + PUSHs(version ? sv_2mortal(newUTF8SVpv((char *)version, 0)) + : &PL_sv_undef); + PUSHs(encoding ? sv_2mortal(newUTF8SVpv((char *)encoding, 0)) + : &PL_sv_undef); + PUSHs(standalone == -1 ? &PL_sv_undef + : (standalone ? &PL_sv_yes : &PL_sv_no)); + PUTBACK; + perl_call_sv(cbv->xmldec_sv, G_DISCARD); + FREETMPS; + LEAVE; +} /* End xmlDecl */ + +static void +unparsedEntityDecl(void *userData, + const char* entity, + const char* base, + const char* sysid, + const char* pubid, + const char* notation) +{ + dSP; + CallbackVector* cbv = (CallbackVector*) userData; + + ENTER; + SAVETMPS; + + PUSHMARK(sp); + EXTEND(sp, 6); + PUSHs(cbv->self_sv); + PUSHs(sv_2mortal(newUTF8SVpv((char*) entity, 0))); + PUSHs(base ? sv_2mortal(newUTF8SVpv((char*) base, 0)) : &PL_sv_undef); + PUSHs(sv_2mortal(newUTF8SVpv((char*) sysid, 0))); + PUSHs(pubid ? sv_2mortal(newUTF8SVpv((char*) pubid, 0)) : &PL_sv_undef); + PUSHs(sv_2mortal(newUTF8SVpv((char*) notation, 0))); + PUTBACK; + perl_call_sv(cbv->unprsd_sv, G_DISCARD); + + FREETMPS; + LEAVE; +} /* End unparsedEntityDecl */ + +static void +notationDecl(void *userData, + const char *name, + const char *base, + const char *sysid, + const char *pubid) +{ + dSP; + CallbackVector* cbv = (CallbackVector*) userData; + + PUSHMARK(sp); + XPUSHs(cbv->self_sv); + XPUSHs(sv_2mortal(newUTF8SVpv((char*) name, 0))); + if (base) + { + XPUSHs(sv_2mortal(newUTF8SVpv((char *) base, 0))); + } + else if (sysid || pubid) + { + XPUSHs(&PL_sv_undef); + } + + if (sysid) + { + XPUSHs(sv_2mortal(newUTF8SVpv((char *) sysid, 0))); + } + else if (pubid) + { + XPUSHs(&PL_sv_undef); + } + + if (pubid) + XPUSHs(sv_2mortal(newUTF8SVpv((char *) pubid, 0))); + + PUTBACK; + perl_call_sv(cbv->notation_sv, G_DISCARD); +} /* End notationDecl */ + +static int +externalEntityRef(XML_Parser parser, + const char* open, + const char* base, + const char* sysid, + const char* pubid) +{ + dSP; +#if defined(USE_THREADS) && PATCHLEVEL==6 + dTHX; +#endif + + int count; + int ret = 0; + int parse_done = 0; + + CallbackVector* cbv = (CallbackVector*) XML_GetUserData(parser); + + if (! cbv->extent_sv) + return 0; + + ENTER ; + SAVETMPS ; + PUSHMARK(sp); + EXTEND(sp, pubid ? 4 : 3); + PUSHs(cbv->self_sv); + PUSHs(base ? sv_2mortal(newUTF8SVpv((char*) base, 0)) : &PL_sv_undef); + PUSHs(sv_2mortal(newSVpv((char*) sysid, 0))); + if (pubid) + PUSHs(sv_2mortal(newUTF8SVpv((char*) pubid, 0))); + PUTBACK ; + count = perl_call_sv(cbv->extent_sv, G_SCALAR); + + SPAGAIN ; + + if (count >= 1) { + SV * result = POPs; + int type; + + if (result && (type = SvTYPE(result)) > 0) { + SV **pval = hv_fetch((HV*) SvRV(cbv->self_sv), "Parser", 6, 0); + + if (! pval || ! SvIOK(*pval)) + append_error(parser, "Can't find parser entry in XML::Parser object"); + else { + XML_Parser entpar; + char *errmsg = (char *) 0; + + entpar = XML_ExternalEntityParserCreate(parser, open, 0); + + XML_SetBase(entpar, XML_GetBase(parser)); + + sv_setiv(*pval, (IV) entpar); + + cbv->p = entpar; + + PUSHMARK(sp); + EXTEND(sp, 2); + PUSHs(*pval); + PUSHs(result); + PUTBACK; + count = perl_call_pv("XML::Parser::Expat::Do_External_Parse", + G_SCALAR | G_EVAL); + SPAGAIN; + + if (SvTRUE(ERRSV)) { + char *hold; + STRLEN len; + + POPs; + hold = SvPV(ERRSV, len); + New(326, errmsg, len + 1, char); + if (len) + Copy(hold, errmsg, len, char); + goto Extparse_Cleanup; + } + + if (count > 0) + ret = POPi; + + parse_done = 1; + + Extparse_Cleanup: + cbv->p = parser; + sv_setiv(*pval, (IV) parser); + XML_ParserFree(entpar); + + if (cbv->extfin_sv) { + PUSHMARK(sp); + PUSHs(cbv->self_sv); + PUTBACK; + perl_call_sv(cbv->extfin_sv, G_DISCARD); + SPAGAIN; + } + + if (SvTRUE(ERRSV)) + append_error(parser, SvPV(ERRSV, PL_na)); + } + } + } + + if (! ret && ! parse_done) + append_error(parser, "Handler couldn't resolve external entity"); + + PUTBACK ; + FREETMPS ; + LEAVE ; + + return ret; +} /* End externalEntityRef */ + +/*================================================================ +** This is the function that expat calls to convert multi-byte sequences +** for external encodings. Each byte in the sequence is used to index +** into the current map to either set the next map or, in the case of +** the final byte, to get the corresponding Unicode scalar, which is +** returned. +*/ + +static int +convert_to_unicode(void *data, const char *seq) { + Encinfo *enc = (Encinfo *) data; + PrefixMap *curpfx; + int count; + int index = 0; + + for (count = 0; count < 4; count++) { + unsigned char byte = (unsigned char) seq[count]; + unsigned char bndx; + unsigned char bmsk; + int offset; + + curpfx = &enc->prefixes[index]; + offset = ((int) byte) - curpfx->min; + if (offset < 0) + break; + if (offset >= curpfx->len && curpfx->len != 0) + break; + + bndx = byte >> 3; + bmsk = 1 << (byte & 0x7); + + if (curpfx->ispfx[bndx] & bmsk) { + index = enc->bytemap[curpfx->bmap_start + offset]; + } + else if (curpfx->ischar[bndx] & bmsk) { + return enc->bytemap[curpfx->bmap_start + offset]; + } + else + break; + } + + return -1; +} /* End convert_to_unicode */ + +static int +unknownEncoding(void *unused, const char *name, XML_Encoding *info) +{ + SV ** encinfptr; + Encinfo *enc; + int namelen; + int i; + char buff[42]; + + namelen = strlen(name); + if (namelen > 40) + return 0; + + /* Make uppercase */ + for (i = 0; i < namelen; i++) { + char c = name[i]; + if (c >= 'a' && c <= 'z') + c -= 'a' - 'A'; + buff[i] = c; + } + + if (! EncodingTable) { + EncodingTable = perl_get_hv("XML::Parser::Expat::Encoding_Table", FALSE); + if (! EncodingTable) + croak("Can't find XML::Parser::Expat::Encoding_Table"); + } + + encinfptr = hv_fetch(EncodingTable, buff, namelen, 0); + + if (! encinfptr || ! SvOK(*encinfptr)) { + /* Not found, so try to autoload */ + dSP; + int count; + + ENTER; + SAVETMPS; + PUSHMARK(sp); + XPUSHs(sv_2mortal(newSVpvn(buff,namelen))); + PUTBACK; + perl_call_pv("XML::Parser::Expat::load_encoding", G_DISCARD); + + encinfptr = hv_fetch(EncodingTable, buff, namelen, 0); + FREETMPS; + LEAVE; + + if (! encinfptr || ! SvOK(*encinfptr)) + return 0; + } + + if (! sv_derived_from(*encinfptr, "XML::Parser::Encinfo")) + croak("Entry in XML::Parser::Expat::Encoding_Table not an Encinfo object"); + + enc = (Encinfo *) SvIV((SV*)SvRV(*encinfptr)); + Copy(enc->firstmap, info->map, 256, int); + info->release = NULL; + if (enc->prefixes_size) { + info->data = (void *) enc; + info->convert = convert_to_unicode; + } + else { + info->data = NULL; + info->convert = NULL; + } + + return 1; +} /* End unknownEncoding */ + + +static void +recString(void *userData, const char *string, int len) +{ + CallbackVector *cbv = (CallbackVector*) userData; + + if (cbv->recstring) { + sv_catpvn(cbv->recstring, (char *) string, len); + } + else { + cbv->recstring = newUTF8SVpvn((char *) string, len); + } +} /* End recString */ + +static void +suspend_callbacks(CallbackVector *cbv) { + if (SvTRUE(cbv->char_sv)) { + XML_SetCharacterDataHandler(cbv->p, + (XML_CharacterDataHandler) 0); + } + + if (SvTRUE(cbv->proc_sv)) { + XML_SetProcessingInstructionHandler(cbv->p, + (XML_ProcessingInstructionHandler) 0); + } + + if (SvTRUE(cbv->cmnt_sv)) { + XML_SetCommentHandler(cbv->p, + (XML_CommentHandler) 0); + } + + if (SvTRUE(cbv->startcd_sv) + || SvTRUE(cbv->endcd_sv)) { + XML_SetCdataSectionHandler(cbv->p, + (XML_StartCdataSectionHandler) 0, + (XML_EndCdataSectionHandler) 0); + } + + if (SvTRUE(cbv->unprsd_sv)) { + XML_SetUnparsedEntityDeclHandler(cbv->p, + (XML_UnparsedEntityDeclHandler) 0); + } + + if (SvTRUE(cbv->notation_sv)) { + XML_SetNotationDeclHandler(cbv->p, + (XML_NotationDeclHandler) 0); + } + + if (SvTRUE(cbv->extent_sv)) { + XML_SetExternalEntityRefHandler(cbv->p, + (XML_ExternalEntityRefHandler) 0); + } + +} /* End suspend_callbacks */ + +static void +resume_callbacks(CallbackVector *cbv) { + if (SvTRUE(cbv->char_sv)) { + XML_SetCharacterDataHandler(cbv->p, characterData); + } + + if (SvTRUE(cbv->proc_sv)) { + XML_SetProcessingInstructionHandler(cbv->p, processingInstruction); + } + + if (SvTRUE(cbv->cmnt_sv)) { + XML_SetCommentHandler(cbv->p, commenthandle); + } + + if (SvTRUE(cbv->startcd_sv) + || SvTRUE(cbv->endcd_sv)) { + XML_SetCdataSectionHandler(cbv->p, startCdata, endCdata); + } + + if (SvTRUE(cbv->unprsd_sv)) { + XML_SetUnparsedEntityDeclHandler(cbv->p, unparsedEntityDecl); + } + + if (SvTRUE(cbv->notation_sv)) { + XML_SetNotationDeclHandler(cbv->p, notationDecl); + } + + if (SvTRUE(cbv->extent_sv)) { + XML_SetExternalEntityRefHandler(cbv->p, externalEntityRef); + } + +} /* End resume_callbacks */ + + +MODULE = XML::Parser::Expat PACKAGE = XML::Parser::Expat PREFIX = XML_ + +XML_Parser +XML_ParserCreate(self_sv, enc_sv, namespaces) + SV * self_sv + SV * enc_sv + int namespaces + CODE: + { + CallbackVector *cbv; + enum XML_ParamEntityParsing pep = XML_PARAM_ENTITY_PARSING_NEVER; + char *enc = (char *) (SvTRUE(enc_sv) ? SvPV(enc_sv,PL_na) : 0); + SV ** spp; + + Newz(320, cbv, 1, CallbackVector); + cbv->self_sv = SvREFCNT_inc(self_sv); + Newz(325, cbv->st_serial_stack, 1024, unsigned int); + spp = hv_fetch((HV*)SvRV(cbv->self_sv), "NoExpand", 8, 0); + if (spp && SvTRUE(*spp)) + cbv->no_expand = 1; + + spp = hv_fetch((HV*)SvRV(cbv->self_sv), "Context", 7, 0); + if (! spp || ! *spp || !SvROK(*spp)) + croak("XML::Parser instance missing Context"); + + cbv->context = (AV*) SvRV(*spp); + + cbv->ns = (unsigned) namespaces; + if (namespaces) + { + spp = hv_fetch((HV*)SvRV(cbv->self_sv), "New_Prefixes", 12, 0); + if (! spp || ! *spp || !SvROK(*spp)) + croak("XML::Parser instance missing New_Prefixes"); + + cbv->new_prefix_list = (AV *) SvRV(*spp); + + spp = hv_fetch((HV*)SvRV(cbv->self_sv), "Namespace_Table", + 15, FALSE); + if (! spp || ! *spp || !SvROK(*spp)) + croak("XML::Parser instance missing Namespace_Table"); + + cbv->nstab = (HV *) SvRV(*spp); + + spp = hv_fetch((HV*)SvRV(cbv->self_sv), "Namespace_List", + 14, FALSE); + if (! spp || ! *spp || !SvROK(*spp)) + croak("XML::Parser instance missing Namespace_List"); + + cbv->nslst = (AV *) SvRV(*spp); + + RETVAL = XML_ParserCreate_MM(enc, &ms, nsdelim); + XML_SetNamespaceDeclHandler(RETVAL,nsStart, nsEnd); + } + else + { + RETVAL = XML_ParserCreate_MM(enc, &ms, NULL); + } + + cbv->p = RETVAL; + XML_SetUserData(RETVAL, (void *) cbv); + XML_SetElementHandler(RETVAL, startElement, endElement); + XML_SetUnknownEncodingHandler(RETVAL, unknownEncoding, 0); + + spp = hv_fetch((HV*)SvRV(cbv->self_sv), "ParseParamEnt", + 13, FALSE); + + if (spp && SvTRUE(*spp)) { + pep = XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE; + cbv->parseparam = 1; + } + + XML_SetParamEntityParsing(RETVAL, pep); + } + OUTPUT: + RETVAL + +void +XML_ParserRelease(parser) + XML_Parser parser + CODE: + { + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + + SvREFCNT_dec(cbv->self_sv); + } + +void +XML_ParserFree(parser) + XML_Parser parser + CODE: + { + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + + Safefree(cbv->st_serial_stack); + + + /* Clean up any SVs that we have */ + /* (Note that self_sv must already be taken care of + or we couldn't be here */ + + if (cbv->recstring) + SvREFCNT_dec(cbv->recstring); + + if (cbv->start_sv) + SvREFCNT_dec(cbv->start_sv); + + if (cbv->end_sv) + SvREFCNT_dec(cbv->end_sv); + + if (cbv->char_sv) + SvREFCNT_dec(cbv->char_sv); + + if (cbv->proc_sv) + SvREFCNT_dec(cbv->proc_sv); + + if (cbv->cmnt_sv) + SvREFCNT_dec(cbv->cmnt_sv); + + if (cbv->dflt_sv) + SvREFCNT_dec(cbv->dflt_sv); + + if (cbv->entdcl_sv) + SvREFCNT_dec(cbv->entdcl_sv); + + if (cbv->eledcl_sv) + SvREFCNT_dec(cbv->eledcl_sv); + + if (cbv->attdcl_sv) + SvREFCNT_dec(cbv->attdcl_sv); + + if (cbv->doctyp_sv) + SvREFCNT_dec(cbv->doctyp_sv); + + if (cbv->doctypfin_sv) + SvREFCNT_dec(cbv->doctypfin_sv); + + if (cbv->xmldec_sv) + SvREFCNT_dec(cbv->xmldec_sv); + + if (cbv->unprsd_sv) + SvREFCNT_dec(cbv->unprsd_sv); + + if (cbv->notation_sv) + SvREFCNT_dec(cbv->notation_sv); + + if (cbv->extent_sv) + SvREFCNT_dec(cbv->extent_sv); + + if (cbv->extfin_sv) + SvREFCNT_dec(cbv->extfin_sv); + + if (cbv->startcd_sv) + SvREFCNT_dec(cbv->startcd_sv); + + if (cbv->endcd_sv) + SvREFCNT_dec(cbv->endcd_sv); + + /* ================ */ + + Safefree(cbv); + XML_ParserFree(parser); + } + +int +XML_ParseString(parser, sv) + XML_Parser parser + SV * sv + CODE: + { + CallbackVector * cbv; + STRLEN len; + char *s = SvPV(sv, len); + + cbv = (CallbackVector *) XML_GetUserData(parser); + + + RETVAL = XML_Parse(parser, s, len, 1); + SPAGAIN; /* XML_Parse might have changed stack pointer */ + if (! RETVAL) + append_error(parser, NULL); + } + + OUTPUT: + RETVAL + +int +XML_ParseStream(parser, ioref, delim) + XML_Parser parser + SV * ioref + SV * delim + CODE: + { + SV **delimsv; + CallbackVector * cbv; + + cbv = (CallbackVector *) XML_GetUserData(parser); + if (SvOK(delim)) { + cbv->delim = SvPV(delim, cbv->delimlen); + } + else { + cbv->delim = (char *) 0; + } + + RETVAL = parse_stream(parser, ioref); + SPAGAIN; /* parse_stream might have changed stack pointer */ + } + + OUTPUT: + RETVAL + +int +XML_ParsePartial(parser, sv) + XML_Parser parser + SV * sv + CODE: + { + STRLEN len; + char *s = SvPV(sv, len); + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + + RETVAL = XML_Parse(parser, s, len, 0); + if (! RETVAL) + append_error(parser, NULL); + } + + OUTPUT: + RETVAL + + +int +XML_ParseDone(parser) + XML_Parser parser + CODE: + { + RETVAL = XML_Parse(parser, "", 0, 1); + if (! RETVAL) + append_error(parser, NULL); + } + + OUTPUT: + RETVAL + +SV * +XML_SetStartElementHandler(parser, start_sv) + XML_Parser parser + SV * start_sv + CODE: + { + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + XMLP_UPD(start_sv); + PUSHRET; + } + +SV * +XML_SetEndElementHandler(parser, end_sv) + XML_Parser parser + SV * end_sv + CODE: + { + CallbackVector *cbv = (CallbackVector*) XML_GetUserData(parser); + XMLP_UPD(end_sv); + PUSHRET; + } + +SV * +XML_SetCharacterDataHandler(parser, char_sv) + XML_Parser parser + SV * char_sv + CODE: + { + XML_CharacterDataHandler charhndl = (XML_CharacterDataHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(char_sv); + if (SvTRUE(char_sv)) + charhndl = characterData; + + XML_SetCharacterDataHandler(parser, charhndl); + PUSHRET; + } + +SV * +XML_SetProcessingInstructionHandler(parser, proc_sv) + XML_Parser parser + SV * proc_sv + CODE: + { + XML_ProcessingInstructionHandler prochndl = + (XML_ProcessingInstructionHandler) 0; + CallbackVector* cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(proc_sv); + if (SvTRUE(proc_sv)) + prochndl = processingInstruction; + + XML_SetProcessingInstructionHandler(parser, prochndl); + PUSHRET; + } + +SV * +XML_SetCommentHandler(parser, cmnt_sv) + XML_Parser parser + SV * cmnt_sv + CODE: + { + XML_CommentHandler cmnthndl = (XML_CommentHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(cmnt_sv); + if (SvTRUE(cmnt_sv)) + cmnthndl = commenthandle; + + XML_SetCommentHandler(parser, cmnthndl); + PUSHRET; + } + +SV * +XML_SetDefaultHandler(parser, dflt_sv) + XML_Parser parser + SV * dflt_sv + CODE: + { + XML_DefaultHandler dflthndl = (XML_DefaultHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(dflt_sv); + if (SvTRUE(dflt_sv)) + dflthndl = defaulthandle; + + if (cbv->no_expand) + XML_SetDefaultHandler(parser, dflthndl); + else + XML_SetDefaultHandlerExpand(parser, dflthndl); + + PUSHRET; + } + +SV * +XML_SetUnparsedEntityDeclHandler(parser, unprsd_sv) + XML_Parser parser + SV * unprsd_sv + CODE: + { + XML_UnparsedEntityDeclHandler unprsdhndl = + (XML_UnparsedEntityDeclHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(unprsd_sv); + if (SvTRUE(unprsd_sv)) + unprsdhndl = unparsedEntityDecl; + + XML_SetUnparsedEntityDeclHandler(parser, unprsdhndl); + PUSHRET; + } + +SV * +XML_SetNotationDeclHandler(parser, notation_sv) + XML_Parser parser + SV * notation_sv + CODE: + { + XML_NotationDeclHandler nothndlr = (XML_NotationDeclHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(notation_sv); + if (SvTRUE(notation_sv)) + nothndlr = notationDecl; + + XML_SetNotationDeclHandler(parser, nothndlr); + PUSHRET; + } + +SV * +XML_SetExternalEntityRefHandler(parser, extent_sv) + XML_Parser parser + SV * extent_sv + CODE: + { + XML_ExternalEntityRefHandler exthndlr = + (XML_ExternalEntityRefHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(extent_sv); + if (SvTRUE(extent_sv)) + exthndlr = externalEntityRef; + + XML_SetExternalEntityRefHandler(parser, exthndlr); + PUSHRET; + } + +SV * +XML_SetExtEntFinishHandler(parser, extfin_sv) + XML_Parser parser + SV * extfin_sv + CODE: + { + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + + /* There is no corresponding handler for this in expat. This is + called from the externalEntityRef function above after parsing + the external entity. */ + + XMLP_UPD(extfin_sv); + PUSHRET; + } + + +SV * +XML_SetEntityDeclHandler(parser, entdcl_sv) + XML_Parser parser + SV * entdcl_sv + CODE: + { + XML_EntityDeclHandler enthndlr = + (XML_EntityDeclHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(entdcl_sv); + if (SvTRUE(entdcl_sv)) + enthndlr = entityDecl; + + XML_SetEntityDeclHandler(parser, enthndlr); + PUSHRET; + } + +SV * +XML_SetElementDeclHandler(parser, eledcl_sv) + XML_Parser parser + SV * eledcl_sv + CODE: + { + XML_ElementDeclHandler eldeclhndlr = + (XML_ElementDeclHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(eledcl_sv); + if (SvTRUE(eledcl_sv)) + eldeclhndlr = elementDecl; + + XML_SetElementDeclHandler(parser, eldeclhndlr); + PUSHRET; + } + +SV * +XML_SetAttListDeclHandler(parser, attdcl_sv) + XML_Parser parser + SV * attdcl_sv + CODE: + { + XML_AttlistDeclHandler attdeclhndlr = + (XML_AttlistDeclHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(attdcl_sv); + if (SvTRUE(attdcl_sv)) + attdeclhndlr = attributeDecl; + + XML_SetAttlistDeclHandler(parser, attdeclhndlr); + PUSHRET; + } + +SV * +XML_SetDoctypeHandler(parser, doctyp_sv) + XML_Parser parser + SV * doctyp_sv + CODE: + { + XML_StartDoctypeDeclHandler dtsthndlr = + (XML_StartDoctypeDeclHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + int set = 0; + + XMLP_UPD(doctyp_sv); + if (SvTRUE(doctyp_sv)) + dtsthndlr = doctypeStart; + + XML_SetStartDoctypeDeclHandler(parser, dtsthndlr); + PUSHRET; + } + +SV * +XML_SetEndDoctypeHandler(parser, doctypfin_sv) + XML_Parser parser + SV * doctypfin_sv + CODE: + { + XML_EndDoctypeDeclHandler dtendhndlr = + (XML_EndDoctypeDeclHandler) 0; + CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); + + XMLP_UPD(doctypfin_sv); + if (SvTRUE(doctypfin_sv)) + dtendhndlr = doctypeEnd; + + XML_SetEndDoctypeDeclHandler(parser, dtendhndlr); + PUSHRET; + } + + +SV * +XML_SetXMLDeclHandler(parser, xmldec_sv) + XML_Parser parser + SV * xmldec_sv + CODE: + { + XML_XmlDeclHandler xmldechndlr = + (XML_XmlDeclHandler) 0; + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + + XMLP_UPD(xmldec_sv); + if (SvTRUE(xmldec_sv)) + xmldechndlr = xmlDecl; + + XML_SetXmlDeclHandler(parser, xmldechndlr); + PUSHRET; + } + + +void +XML_SetBase(parser, base) + XML_Parser parser + SV * base + CODE: + { + char * b; + + if (! SvOK(base)) { + b = (char *) 0; + } + else { + b = SvPV(base, PL_na); + } + + XML_SetBase(parser, b); + } + + +SV * +XML_GetBase(parser) + XML_Parser parser + CODE: + { + const char *ret = XML_GetBase(parser); + if (ret) { + ST(0) = sv_newmortal(); + sv_setpv(ST(0), ret); + } + else { + ST(0) = &PL_sv_undef; + } + } + +void +XML_PositionContext(parser, lines) + XML_Parser parser + int lines + PREINIT: + int parsepos; + int size; + const char *pos = XML_GetInputContext(parser, &parsepos, &size); + const char *markbeg; + const char *limit; + const char *markend; + int length, relpos; + int cnt; + + PPCODE: + if (! pos) + return; + + for (markbeg = &pos[parsepos], cnt = 0; markbeg >= pos; markbeg--) + { + if (*markbeg == '\n') + { + cnt++; + if (cnt > lines) + break; + } + } + + markbeg++; + + relpos = 0; + limit = &pos[size]; + for (markend = &pos[parsepos + 1], cnt = 0; + markend < limit; + markend++) + { + if (*markend == '\n') + { + if (cnt == 0) + relpos = (markend - markbeg) + 1; + cnt++; + if (cnt > lines) + { + markend++; + break; + } + } + } + + length = markend - markbeg; + if (relpos == 0) + relpos = length; + + EXTEND(sp, 2); + PUSHs(sv_2mortal(newSVpvn((char *) markbeg, length))); + PUSHs(sv_2mortal(newSViv(relpos))); + +SV * +GenerateNSName(name, xml_namespace, table, list) + SV * name + SV * xml_namespace + SV * table + SV * list + CODE: + { + STRLEN nmlen, nslen; + char * nmstr; + char * nsstr; + char * buff; + char * bp; + char * blim; + + nmstr = SvPV(name, nmlen); + nsstr = SvPV(xml_namespace, nslen); + + /* Form a namespace-name string that looks like expat's */ + New(321, buff, nmlen + nslen + 2, char); + bp = buff; + blim = bp + nslen; + while (bp < blim) + *bp++ = *nsstr++; + *bp++ = NSDELIM; + blim = bp + nmlen; + while (bp < blim) + *bp++ = *nmstr++; + *bp = '\0'; + + RETVAL = gen_ns_name(buff, (HV *) SvRV(table), (AV *) SvRV(list)); + Safefree(buff); + } + OUTPUT: + RETVAL + +void +XML_DefaultCurrent(parser) + XML_Parser parser + CODE: + { + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + + XML_DefaultCurrent(parser); + } + +SV * +XML_RecognizedString(parser) + XML_Parser parser + CODE: + { + XML_DefaultHandler dflthndl = (XML_DefaultHandler) 0; + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + + if (cbv->dflt_sv) { + dflthndl = defaulthandle; + } + + if (cbv->recstring) { + sv_setpvn(cbv->recstring, "", 0); + } + + if (cbv->no_expand) + XML_SetDefaultHandler(parser, recString); + else + XML_SetDefaultHandlerExpand(parser, recString); + + XML_DefaultCurrent(parser); + + if (cbv->no_expand) + XML_SetDefaultHandler(parser, dflthndl); + else + XML_SetDefaultHandlerExpand(parser, dflthndl); + + RETVAL = newSVsv(cbv->recstring); + } + OUTPUT: + RETVAL + +int +XML_GetErrorCode(parser) + XML_Parser parser + +int +XML_GetCurrentLineNumber(parser) + XML_Parser parser + + +int +XML_GetCurrentColumnNumber(parser) + XML_Parser parser + +long +XML_GetCurrentByteIndex(parser) + XML_Parser parser + +int +XML_GetSpecifiedAttributeCount(parser) + XML_Parser parser + +char * +XML_ErrorString(code) + int code + CODE: + const char *ret = XML_ErrorString(code); + ST(0) = sv_newmortal(); + sv_setpv((SV*)ST(0), ret); + +SV * +XML_LoadEncoding(data, size) + char * data + int size + CODE: + { + Encmap_Header *emh = (Encmap_Header *) data; + unsigned pfxsize, bmsize; + + if (size < sizeof(Encmap_Header) + || ntohl(emh->magic) != ENCMAP_MAGIC) { + RETVAL = &PL_sv_undef; + } + else { + Encinfo *entry; + SV *sv; + PrefixMap *pfx; + unsigned short *bm; + int namelen; + int i; + + pfxsize = ntohs(emh->pfsize); + bmsize = ntohs(emh->bmsize); + + if (size != (sizeof(Encmap_Header) + + pfxsize * sizeof(PrefixMap) + + bmsize * sizeof(unsigned short))) { + RETVAL = &PL_sv_undef; + } + else { + /* Convert to uppercase and get name length */ + + for (i = 0; i < sizeof(emh->name); i++) { + char c = emh->name[i]; + + if (c == (char) 0) + break; + + if (c >= 'a' && c <= 'z') + emh->name[i] -= 'a' - 'A'; + } + namelen = i; + + RETVAL = newSVpvn(emh->name, namelen); + + New(322, entry, 1, Encinfo); + entry->prefixes_size = pfxsize; + entry->bytemap_size = bmsize; + for (i = 0; i < 256; i++) { + entry->firstmap[i] = ntohl(emh->map[i]); + } + + pfx = (PrefixMap *) &data[sizeof(Encmap_Header)]; + bm = (unsigned short *) (((char *) pfx) + + sizeof(PrefixMap) * pfxsize); + + New(323, entry->prefixes, pfxsize, PrefixMap); + New(324, entry->bytemap, bmsize, unsigned short); + + for (i = 0; i < pfxsize; i++, pfx++) { + PrefixMap *dest = &entry->prefixes[i]; + + dest->min = pfx->min; + dest->len = pfx->len; + dest->bmap_start = ntohs(pfx->bmap_start); + Copy(pfx->ispfx, dest->ispfx, + sizeof(pfx->ispfx) + sizeof(pfx->ischar), unsigned char); + } + + for (i = 0; i < bmsize; i++) + entry->bytemap[i] = ntohs(bm[i]); + + sv = newSViv(0); + sv_setref_pv(sv, "XML::Parser::Encinfo", (void *) entry); + + if (! EncodingTable) { + EncodingTable + = perl_get_hv("XML::Parser::Expat::Encoding_Table", + FALSE); + if (! EncodingTable) + croak("Can't find XML::Parser::Expat::Encoding_Table"); + } + + hv_store(EncodingTable, emh->name, namelen, sv, 0); + } + } + } + OUTPUT: + RETVAL + +void +XML_FreeEncoding(enc) + Encinfo * enc + CODE: + Safefree(enc->bytemap); + Safefree(enc->prefixes); + Safefree(enc); + +SV * +XML_OriginalString(parser) + XML_Parser parser + CODE: + { + int parsepos, size; + const char *buff = XML_GetInputContext(parser, &parsepos, &size); + if (buff) { + RETVAL = newSVpvn((char *) &buff[parsepos], + XML_GetCurrentByteCount(parser)); + } + else { + RETVAL = newSVpv("", 0); + } + } + OUTPUT: + RETVAL + +SV * +XML_SetStartCdataHandler(parser, startcd_sv) + XML_Parser parser + SV * startcd_sv + CODE: + { + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + XML_StartCdataSectionHandler scdhndl = + (XML_StartCdataSectionHandler) 0; + + XMLP_UPD(startcd_sv); + if (SvTRUE(startcd_sv)) + scdhndl = startCdata; + + XML_SetStartCdataSectionHandler(parser, scdhndl); + PUSHRET; + } + +SV * +XML_SetEndCdataHandler(parser, endcd_sv) + XML_Parser parser + SV * endcd_sv + CODE: + { + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + XML_EndCdataSectionHandler ecdhndl = + (XML_EndCdataSectionHandler) 0; + + XMLP_UPD(endcd_sv); + if (SvTRUE(endcd_sv)) + ecdhndl = endCdata; + + XML_SetEndCdataSectionHandler(parser, ecdhndl); + PUSHRET; + } + +void +XML_UnsetAllHandlers(parser) + XML_Parser parser + CODE: + { + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + + suspend_callbacks(cbv); + if (cbv->ns) { + XML_SetNamespaceDeclHandler(cbv->p, + (XML_StartNamespaceDeclHandler) 0, + (XML_EndNamespaceDeclHandler) 0); + } + + XML_SetElementHandler(parser, + (XML_StartElementHandler) 0, + (XML_EndElementHandler) 0); + + XML_SetUnknownEncodingHandler(parser, + (XML_UnknownEncodingHandler) 0, + (void *) 0); + } + +int +XML_ElementIndex(parser) + XML_Parser parser + CODE: + { + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + RETVAL = cbv->st_serial_stack[cbv->st_serial_stackptr]; + } + OUTPUT: + RETVAL + +void +XML_SkipUntil(parser, index) + XML_Parser parser + unsigned int index + CODE: + { + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + if (index <= cbv->st_serial) + return; + cbv->skip_until = index; + suspend_callbacks(cbv); + } + +int +XML_Do_External_Parse(parser, result) + XML_Parser parser + SV * result + CODE: + { + int type; + + CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); + + if (SvROK(result) && SvOBJECT(SvRV(result))) { + RETVAL = parse_stream(parser, result); + } + else if (isGV(result)) { + RETVAL = parse_stream(parser, + sv_2mortal(newRV((SV*) GvIOp(result)))); + } + else if (SvPOK(result)) { + STRLEN eslen; + int pret; + char *entstr = SvPV(result, eslen); + + RETVAL = XML_Parse(parser, entstr, eslen, 1); + } + } + OUTPUT: + RETVAL + + diff --git a/Expat/Makefile.PL b/Expat/Makefile.PL new file mode 100644 index 0000000..6d5111c --- /dev/null +++ b/Expat/Makefile.PL @@ -0,0 +1,29 @@ +use ExtUtils::MakeMaker; +use Config; +use English; + +my $libs = "-lexpat"; +my @extras = (); + +push(@extras, INC => "-I$expat_incpath") if $expat_incpath; + +$libs = "-L$expat_libpath $libs" if $expat_libpath; + +push(@extras, CAPI => 'TRUE') + if (($PERL_VERSION >= 5.005) and ($OSNAME eq 'MSWin32') + and ($Config{archname} =~ /-object\b/i)); + +push(@extras, + ABSTRACT => "Lowlevel access to James Clark's expat XML parser", + AUTHOR => 'Matt Sergeant (matt@sergeant.org)') + if ($ExtUtils::MakeMaker::VERSION >= 5.4301); + +WriteMakefile( + NAME => 'XML::Parser::Expat', + C => ['Expat.c'], + LIBS => $libs, + XSPROTOARG => '-noprototypes', + VERSION_FROM => 'Expat.pm', + @extras +); + diff --git a/Expat/encoding.h b/Expat/encoding.h new file mode 100644 index 0000000..4e0374b --- /dev/null +++ b/Expat/encoding.h @@ -0,0 +1,91 @@ +/***************************************************************** +** encoding.h +** +** Copyright 1998 Clark Cooper +** All rights reserved. +** +** This program is free software; you can redistribute it and/or +** modify it under the same terms as Perl itself. +*/ + +#ifndef ENCODING_H +#define ENCODING_H 1 + +#define ENCMAP_MAGIC 0xfeebface + +typedef struct prefixmap { + unsigned char min; + unsigned char len; /* 0 => 256 */ + unsigned short bmap_start; + unsigned char ispfx[32]; + unsigned char ischar[32]; +} PrefixMap; + +typedef struct encinf +{ + unsigned short prefixes_size; + unsigned short bytemap_size; + int firstmap[256]; + PrefixMap *prefixes; + unsigned short *bytemap; +} Encinfo; + +typedef struct encmaphdr +{ + unsigned int magic; + char name[40]; + unsigned short pfsize; + unsigned short bmsize; + int map[256]; +} Encmap_Header; + +/*================================================================ +** Structure of Encoding map binary encoding +** +** Note that all shorts and ints are in network order, +** so when packing or unpacking with perl, use 'n' and 'N' respectively. +** In C, use the htonl family of functions. +** +** The basic structure is: +** +** _______________________ +** |Header (including map expat needs for 1st byte) +** |PrefixMap * pfsize +** | This section isn't included for single-byte encodings. +** | For multiple byte encodings, when a byte represents a prefix +** | then it indexes into this vector instead of mapping to a +** | Unicode character. The PrefixMap type is declared above. The +** | ispfx and ischar fields are bitvectors indicating whether +** | the byte being mapped is a prefix or character respectively. +** | If neither is set, then the character is not mapped to Unicode. +** | +** | The min field is the 1st byte mapped for this prefix; the +** | len field is the number of bytes mapped; and bmap_start is +** | the starting index of the map for this prefix in the overall +** | map (next section). +** |unsigned short * bmsize +** | This section also is omitted for single-byte encodings. +** | Each short is either a Unicode scalar or an index into the +** | PrefixMap vector. +** +** The header for these files is declared above as the Encmap_Header type. +** The magic field is a magic number which should match the ENCMAP_MAGIC +** macro above. The next 40 bytes stores IANA registered name for the +** encoding. The pfsize field holds the number of PrefixMaps, which should +** be zero for single byte encodings. The bmsize field holds the number of +** shorts used for the overall map. +** +** The map field contains either the Unicode scalar encoded by the 1st byte +** or -n where n is the number of bytes that such a 1st byte implies (Expat +** requires that the number of bytes to encode a character is indicated by +** the 1st byte) or -1 if the byte doesn't map to any Unicode character. +** +** If the encoding is a multiple byte encoding, then there will be PrefixMap +** and character map sections. The 1st PrefixMap (index 0), covers a range +** of bytes that includes all 1st byte prefixes. +** +** Look at convert_to_unicode in Expat.xs to see how this data structure +** is used. +*/ + +#endif /* ndef ENCODING_H */ diff --git a/Expat/typemap b/Expat/typemap new file mode 100644 index 0000000..47d7dc5 --- /dev/null +++ b/Expat/typemap @@ -0,0 +1,24 @@ +# +##### XML::Parser::Expat typemap +# + +XML_Parser T_PTR +Encinfo * T_ENCOBJ + +################################################################ +INPUT +T_ENCOBJ + if (sv_derived_from($arg, \"XML::Parser::Encinfo\")) { + IV tmp = SvIV((SV*)SvRV($arg)); + $var = ($type) tmp; + } + else + croak(\"$var is not of type XML::Parser::Encinfo\") +################################################################ +OUTPUT +T_ENCOBJ + if ($var) { + sv_setref_pv($arg, \"XML::Parser::Encinfo\", (void*)$var); + } + else + $arg = &PL_sv_undef; diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..6fd6a32 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,60 @@ +Changes Change log +Expat/Expat.pm XML::Parser::Expat module +Expat/Expat.xs Extension library +Expat/Makefile.PL perl MakeMaker script for XML::Parser::Expat +Expat/encoding.h Header file; describes *.enc structure +Expat/typemap XS typemap +MANIFEST This file +Makefile.PL perl MakeMaker script for XML::Parser +Parser.pm XML::Parser module +Parser/LWPExternEnt.pl LWP based external entity handler +Parser/Encodings/Japanese_Encodings.msg Message about Japanese encodings. +Parser/Encodings/README Info about encoding maps +Parser/Encodings/big5.enc Big5 binary encoding map +Parser/Encodings/euc-kr.enc EUC-KR binary encoding map +Parser/Encodings/iso-8859-2.enc ISO-8859-2 binary encoding map +Parser/Encodings/iso-8859-3.enc ISO-8859-3 binary encoding map +Parser/Encodings/iso-8859-4.enc ISO-8859-4 binary encoding map +Parser/Encodings/iso-8859-5.enc ISO-8859-5 binary encoding map +Parser/Encodings/iso-8859-7.enc ISO-8859-7 binary encoding map +Parser/Encodings/iso-8859-8.enc ISO-8859-8 binary encoding map +Parser/Encodings/iso-8859-9.enc ISO-8859-9 binary encoding map +Parser/Encodings/windows-1250.enc cp1250-WinLatin2 binary encoding map +Parser/Encodings/windows-1252.enc cp1252-WinLatin1 binary encoding map +Parser/Encodings/x-euc-jp-jisx0221.enc X-euc-jp-jisx0221 encoding map +Parser/Encodings/x-euc-jp-unicode.enc X-euc-jp-unicde encoding map +Parser/Encodings/x-sjis-cp932.enc x-sjis-cp932 encoding map +Parser/Encodings/x-sjis-jdk117.enc x-sjis-jdk117 encoding map +Parser/Encodings/x-sjis-jisx0221.enc x-sjis-jisx0221 encoding map +Parser/Encodings/x-sjis-unicode.enc x-sjis-unicode encoding map +Parser/Style/Debug.pm Debug style parser +Parser/Style/Objects.pm Objects style parser +Parser/Style/Stream.pm Stream style parser +Parser/Style/Subs.pm Subs style parser +Parser/Style/Tree.pm Tree style parser +README Short explanation +samples/canonical A utility to generate canonical XML +samples/canontst.xml An xml document to demonstrate canonical +samples/ctest.dtd An external DTD used by canontst.xml +samples/REC-xml-19980210.xml The XML spec in xml form +samples/xmlcomments A utility to extract comments +samples/xmlfilter A utility to filter elements +samples/xmlstats A utility to report on element statistics +t/astress.t Test script +t/cdata.t Test script +t/decl.t Test script +t/defaulted.t Test script +t/encoding.t Test script +t/external_ent.t Test script +t/file.t Test script +t/finish.t Test script +t/ext.ent External entity for parament.t test +t/ext2.ent External entity for parament.t test +t/foo.dtd External DTD for parament.t test +t/namespaces.t Test script +t/parament.t Test script +t/partial.t Test script +t/skip.t Test script +t/stream.t Test script +t/styles.t Test script +META.yml Module meta-data (added by MakeMaker) diff --git a/META.yml b/META.yml new file mode 100644 index 0000000..6bd7583 --- /dev/null +++ b/META.yml @@ -0,0 +1,11 @@ +# http://module-build.sourceforge.net/META-spec.html +#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# +name: XML-Parser +version: 2.36 +version_from: Parser.pm +installdirs: site +requires: + LWP: 0 + +distribution_type: module +generated_by: ExtUtils::MakeMaker version 6.17 diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..33af549 --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,114 @@ +use 5.004; +use ExtUtils::MakeMaker; +use Config; + +$expat_libpath = ''; +$expat_incpath = ''; + +my @replacement_args; + +foreach (@ARGV) { + if (/^EXPAT(LIB|INC)PATH=(.+)/) { + if ($1 eq 'LIB') { + $expat_libpath = $2; + } + else { + $expat_incpath = $2; + } + } + else { + push(@replacement_args, $_); + } +} + +@ARGV = @replacement_args; +if (not $expat_libpath and $] >= 5.006001 and $^O ne 'MSWin32') { + require ExtUtils::Liblist; # Buggy before this + ($expat_libpath) = ExtUtils::Liblist->ext('-lexpat'); +} + +unless ($expat_libpath) { + # Test for existence of libexpat + my $found = 0; + foreach (split(/\s+/, $Config{libpth})) { + if (-f "$_/libexpat." . $Config{so}) { + $found = 1; + last; + } + } + + if (!$found and $^O eq 'MSWin32') { + if (-f 'C:/lib/Expat-2.0.0/Libs/libexpat.dll') { + $expat_libpath = 'C:/lib/Expat-2.0.0/Libs'; + $expat_incpath = 'C:/lib/Expat-2.0.0/Source/lib'; + $found = 1; + } + + } + + unless ($found) { + die <<'Expat_Not_Installed;'; + +Expat must be installed prior to building XML::Parser and I can't find +it in the standard library directories. You can download expat from: + +http://sourceforge.net/projects/expat/ + +If expat is installed, but in a non-standard directory, then use the +following options to Makefile.PL: + + EXPATLIBPATH=... To set the directory in which to find libexpat + + EXPATINCPATH=... To set the directory in which to find expat.h + +For example: + + perl Makefile.PL EXPATLIBPATH=/home/me/lib EXPATINCPATH=/home/me/include + +Note that if you build against a shareable library in a non-standard location +you may (on some platforms) also have to set your LD_LIBRARY_PATH environment +variable at run time for perl to find the library. + +Expat_Not_Installed; + } +} + +# Don't try to descend into Expat directory for testing + +sub MY::test +{ + my $self = shift; + + my $hold = delete $self->{DIR}; + my $ret = $self->MM::test(@_); + $self->{DIR} = $hold if defined($hold); + $ret; +} + +my @extras = (); + +push(@extras, + CAPI => 'TRUE') + if ($PERL_VERSION >= 5.005 and $OSNAME eq 'MSWin32' + and $Config{archname} =~ /-object\b/i); + +push(@extras, + ABSTRACT_FROM => 'Parser.pm', + AUTHOR => 'Clark Cooper (coopercc@netheaven.com)') + if ($ExtUtils::MakeMaker::VERSION >= 5.4301); + +push(@extras, + LICENSE => 'perl') + if ($ExtUtils::MakeMaker::VERSION gt '6.30'); + +WriteMakefile( + NAME => 'XML::Parser', + DIR => [qw(Expat)], + dist => {COMPRESS => 'gzip', SUFFIX => '.gz'}, + VERSION_FROM => 'Parser.pm', + PREREQ_PM => { + LWP => 0, #for tests + }, + @extras + ); + diff --git a/Parser.pm b/Parser.pm new file mode 100644 index 0000000..064b021 --- /dev/null +++ b/Parser.pm @@ -0,0 +1,840 @@ +# XML::Parser +# +# Copyright (c) 1998-2000 Larry Wall and Clark Cooper +# All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the same terms as Perl itself. + +package XML::Parser; + +use Carp; + +BEGIN { + require XML::Parser::Expat; + $VERSION = '2.36'; + die "Parser.pm and Expat.pm versions don't match" + unless $VERSION eq $XML::Parser::Expat::VERSION; +} + +use strict; + +use vars qw($VERSION $LWP_load_failed); + +$LWP_load_failed = 0; + +sub new { + my ($class, %args) = @_; + my $style = $args{Style}; + + my $nonexopt = $args{Non_Expat_Options} ||= {}; + + $nonexopt->{Style} = 1; + $nonexopt->{Non_Expat_Options} = 1; + $nonexopt->{Handlers} = 1; + $nonexopt->{_HNDL_TYPES} = 1; + $nonexopt->{NoLWP} = 1; + + $args{_HNDL_TYPES} = {%XML::Parser::Expat::Handler_Setters}; + $args{_HNDL_TYPES}->{Init} = 1; + $args{_HNDL_TYPES}->{Final} = 1; + + $args{Handlers} ||= {}; + my $handlers = $args{Handlers}; + + if (defined($style)) { + my $stylepkg = $style; + + if ($stylepkg !~ /::/) { + $stylepkg = "\u$style"; + + eval { + my $fullpkg = 'XML::Parser::Style::' . $stylepkg; + my $stylefile = $fullpkg; + $stylefile =~ s/::/\//g; + require "$stylefile.pm"; + $stylepkg = $fullpkg; + }; + if ($@) { + # fallback to old behaviour + $stylepkg = 'XML::Parser::' . $stylepkg; + } + } + + my $htype; + foreach $htype (keys %{$args{_HNDL_TYPES}}) { + # Handlers explicity given override + # handlers from the Style package + unless (defined($handlers->{$htype})) { + + # A handler in the style package must either have + # exactly the right case as the type name or a + # completely lower case version of it. + + my $hname = "${stylepkg}::$htype"; + if (defined(&$hname)) { + $handlers->{$htype} = \&$hname; + next; + } + + $hname = "${stylepkg}::\L$htype"; + if (defined(&$hname)) { + $handlers->{$htype} = \&$hname; + next; + } + } + } + } + + unless (defined($handlers->{ExternEnt}) + or defined ($handlers->{ExternEntFin})) { + + if ($args{NoLWP} or $LWP_load_failed) { + $handlers->{ExternEnt} = \&file_ext_ent_handler; + $handlers->{ExternEntFin} = \&file_ext_ent_cleanup; + } + else { + # The following just bootstraps the real LWP external entity + # handler + + $handlers->{ExternEnt} = \&initial_ext_ent_handler; + + # No cleanup function available until LWPExternEnt.pl loaded + } + } + + $args{Pkg} ||= caller; + bless \%args, $class; +} # End of new + +sub setHandlers { + my ($self, @handler_pairs) = @_; + + croak("Uneven number of arguments to setHandlers method") + if (int(@handler_pairs) & 1); + + my @ret; + while (@handler_pairs) { + my $type = shift @handler_pairs; + my $handler = shift @handler_pairs; + unless (defined($self->{_HNDL_TYPES}->{$type})) { + my @types = sort keys %{$self->{_HNDL_TYPES}}; + + croak("Unknown Parser handler type: $type\n Valid types: @types"); + } + push(@ret, $type, $self->{Handlers}->{$type}); + $self->{Handlers}->{$type} = $handler; + } + + return @ret; +} + +sub parse_start { + my $self = shift; + my @expat_options = (); + + my ($key, $val); + while (($key, $val) = each %{$self}) { + push (@expat_options, $key, $val) + unless exists $self->{Non_Expat_Options}->{$key}; + } + + my %handlers = %{$self->{Handlers}}; + my $init = delete $handlers{Init}; + my $final = delete $handlers{Final}; + + my $expatnb = new XML::Parser::ExpatNB(@expat_options, @_); + $expatnb->setHandlers(%handlers); + + &$init($expatnb) + if defined($init); + + $expatnb->{_State_} = 1; + + $expatnb->{FinalHandler} = $final + if defined($final); + + return $expatnb; +} + +sub parse { + my $self = shift; + my $arg = shift; + my @expat_options = (); + my ($key, $val); + while (($key, $val) = each %{$self}) { + push(@expat_options, $key, $val) + unless exists $self->{Non_Expat_Options}->{$key}; + } + + my $expat = new XML::Parser::Expat(@expat_options, @_); + my %handlers = %{$self->{Handlers}}; + my $init = delete $handlers{Init}; + my $final = delete $handlers{Final}; + + $expat->setHandlers(%handlers); + + if ($self->{Base}) { + $expat->base($self->{Base}); + } + + &$init($expat) + if defined($init); + + my @result = (); + my $result; + eval { + $result = $expat->parse($arg); + }; + my $err = $@; + if ($err) { + $expat->release; + die $err; + } + + if ($result and defined($final)) { + if (wantarray) { + @result = &$final($expat); + } + else { + $result = &$final($expat); + } + } + + $expat->release; + + return unless defined wantarray; + return wantarray ? @result : $result; +} + +sub parsestring { + my $self = shift; + $self->parse(@_); +} + +sub parsefile { + my $self = shift; + my $file = shift; + local(*FILE); + open(FILE, $file) or croak "Couldn't open $file:\n$!"; + binmode(FILE); + my @ret; + my $ret; + + $self->{Base} = $file; + + if (wantarray) { + eval { + @ret = $self->parse(*FILE, @_); + }; + } + else { + eval { + $ret = $self->parse(*FILE, @_); + }; + } + my $err = $@; + close(FILE); + die $err if $err; + + return unless defined wantarray; + return wantarray ? @ret : $ret; +} + +sub initial_ext_ent_handler { + # This just bootstraps in the real lwp_ext_ent_handler which + # also loads the URI and LWP modules. + + unless ($LWP_load_failed) { + local($^W) = 0; + + my $stat = + eval { + require('XML/Parser/LWPExternEnt.pl'); + }; + + if ($stat) { + $_[0]->setHandlers(ExternEnt => \&lwp_ext_ent_handler, + ExternEntFin => \&lwp_ext_ent_cleanup); + + goto &lwp_ext_ent_handler; + } + + # Failed to load lwp handler, act as if NoLWP + + $LWP_load_failed = 1; + + my $cmsg = "Couldn't load LWP based external entity handler\n"; + $cmsg .= "Switching to file-based external entity handler\n"; + $cmsg .= " (To avoid this message, use NoLWP option to XML::Parser)\n"; + warn($cmsg); + } + + $_[0]->setHandlers(ExternEnt => \&file_ext_ent_handler, + ExternEntFin => \&file_ext_ent_cleanup); + goto &file_ext_ent_handler; + +} + +sub file_ext_ent_handler { + my ($xp, $base, $path) = @_; + + # Prepend base only for relative paths + + if (defined($base) + and not ($path =~ m!^(?:[\\/]|\w+:)!)) + { + my $newpath = $base; + $newpath =~ s![^\\/:]*$!$path!; + $path = $newpath; + } + + if ($path =~ /^\s*[|>+]/ + or $path =~ /\|\s*$/) { + $xp->{ErrorMessage} + .= "System ID ($path) contains Perl IO control characters"; + return undef; + } + + require IO::File; + my $fh = new IO::File($path); + unless (defined $fh) { + $xp->{ErrorMessage} + .= "Failed to open $path:\n$!"; + return undef; + } + + $xp->{_BaseStack} ||= []; + $xp->{_FhStack} ||= []; + + push(@{$xp->{_BaseStack}}, $base); + push(@{$xp->{_FhStack}}, $fh); + + $xp->base($path); + + return $fh; +} + +sub file_ext_ent_cleanup { + my ($xp) = @_; + + my $fh = pop(@{$xp->{_FhStack}}); + $fh->close; + + my $base = pop(@{$xp->{_BaseStack}}); + $xp->base($base); +} + +1; + +__END__ + +=head1 NAME + +XML::Parser - A perl module for parsing XML documents + +=head1 SYNOPSIS + + use XML::Parser; + + $p1 = new XML::Parser(Style => 'Debug'); + $p1->parsefile('REC-xml-19980210.xml'); + $p1->parse('Hello World'); + + # Alternative + $p2 = new XML::Parser(Handlers => {Start => \&handle_start, + End => \&handle_end, + Char => \&handle_char}); + $p2->parse($socket); + + # Another alternative + $p3 = new XML::Parser(ErrorContext => 2); + + $p3->setHandlers(Char => \&text, + Default => \&other); + + open(FOO, 'xmlgenerator |'); + $p3->parse(*FOO, ProtocolEncoding => 'ISO-8859-1'); + close(FOO); + + $p3->parsefile('junk.xml', ErrorContext => 3); + +=begin man +.ds PI PI + +=end man + +=head1 DESCRIPTION + +This module provides ways to parse XML documents. It is built on top of +L, which is a lower level interface to James Clark's +expat library. Each call to one of the parsing methods creates a new +instance of XML::Parser::Expat which is then used to parse the document. +Expat options may be provided when the XML::Parser object is created. +These options are then passed on to the Expat object on each parse call. +They can also be given as extra arguments to the parse methods, in which +case they override options given at XML::Parser creation time. + +The behavior of the parser is controlled either by C> and/or +C> options, or by L method. These all provide +mechanisms for XML::Parser to set the handlers needed by XML::Parser::Expat. +If neither C