diff options
author | Thies C. Arntzen <thies@php.net> | 2001-05-11 17:57:37 +0000 |
---|---|---|
committer | Thies C. Arntzen <thies@php.net> | 2001-05-11 17:57:37 +0000 |
commit | cd2f057f1a5bac5ec724c2602b2c18899fe7a7fa (patch) | |
tree | 4c03ea5a142ce17aae80e375ac2d78e577896448 /ext/xml | |
parent | 814fa2c71ca92d23ab515e9d6873631dfcf44ae1 (diff) | |
download | php-git-cd2f057f1a5bac5ec724c2602b2c18899fe7a7fa.tar.gz |
bump bundled expat to 1.95.1
win32 project _needs_ updating and i don't have windows
Diffstat (limited to 'ext/xml')
-rw-r--r-- | ext/xml/config.m4 | 16 | ||||
-rw-r--r-- | ext/xml/expat/Changes | 34 | ||||
-rw-r--r-- | ext/xml/expat/Makefile.in | 6 | ||||
-rw-r--r-- | ext/xml/expat/README | 57 | ||||
-rw-r--r-- | ext/xml/expat/README.php | 11 | ||||
-rw-r--r-- | ext/xml/expat/ascii.h | 86 | ||||
-rw-r--r-- | ext/xml/expat/asciitab.h (renamed from ext/xml/expat/xmltok/asciitab.h) | 29 | ||||
-rw-r--r-- | ext/xml/expat/expat.html | 73 | ||||
-rw-r--r-- | ext/xml/expat/iasciitab.h (renamed from ext/xml/expat/xmltok/iasciitab.h) | 29 | ||||
-rw-r--r-- | ext/xml/expat/latin1tab.h (renamed from ext/xml/expat/xmltok/latin1tab.h) | 29 | ||||
-rw-r--r-- | ext/xml/expat/nametab.h (renamed from ext/xml/expat/xmltok/nametab.h) | 0 | ||||
-rw-r--r-- | ext/xml/expat/utf8tab.h (renamed from ext/xml/expat/xmltok/utf8tab.h) | 29 | ||||
-rw-r--r-- | ext/xml/expat/xmlparse.c (renamed from ext/xml/expat/xmlparse/xmlparse.c) | 1981 | ||||
-rw-r--r-- | ext/xml/expat/xmlparse/Makefile.in | 7 | ||||
-rw-r--r-- | ext/xml/expat/xmlparse/expat_hashtable.h | 69 | ||||
-rw-r--r-- | ext/xml/expat/xmlparse/hashtable.c | 153 | ||||
-rw-r--r-- | ext/xml/expat/xmlparse/xmlparse.dsp | 202 | ||||
-rw-r--r-- | ext/xml/expat/xmlparse/xmlparse.h | 482 | ||||
-rw-r--r-- | ext/xml/expat/xmlrole.c (renamed from ext/xml/expat/xmltok/xmlrole.c) | 398 | ||||
-rw-r--r-- | ext/xml/expat/xmlrole.h (renamed from ext/xml/expat/xmltok/xmlrole.h) | 45 | ||||
-rw-r--r-- | ext/xml/expat/xmltok.c (renamed from ext/xml/expat/xmltok/xmltok.c) | 169 | ||||
-rw-r--r-- | ext/xml/expat/xmltok.h (renamed from ext/xml/expat/xmltok/xmltok.h) | 86 | ||||
-rw-r--r-- | ext/xml/expat/xmltok/Makefile.in | 5 | ||||
-rw-r--r-- | ext/xml/expat/xmltok/xmldef.h | 63 | ||||
-rw-r--r-- | ext/xml/expat/xmltok/xmltok.dsp | 163 | ||||
-rw-r--r-- | ext/xml/expat/xmltok/xmltok_impl.h | 71 | ||||
-rw-r--r-- | ext/xml/expat/xmltok_impl.c (renamed from ext/xml/expat/xmltok/xmltok_impl.c) | 244 | ||||
-rw-r--r-- | ext/xml/expat/xmltok_impl.h | 46 | ||||
-rw-r--r-- | ext/xml/expat/xmltok_ns.c (renamed from ext/xml/expat/xmltok/xmltok_ns.c) | 10 | ||||
-rw-r--r-- | ext/xml/php_xml.h | 13 | ||||
-rw-r--r-- | ext/xml/xml.c | 16 |
31 files changed, 2505 insertions, 2117 deletions
diff --git a/ext/xml/config.m4 b/ext/xml/config.m4 index ec85a84d73..1cbc39146e 100644 --- a/ext/xml/config.m4 +++ b/ext/xml/config.m4 @@ -16,10 +16,9 @@ PHP_ARG_ENABLE(xml,for XML support, [ --disable-xml Disable XML support using bundled expat lib], yes) if test "$PHP_XML" != "no"; then - - AC_DEFINE(HAVE_LIBEXPAT, 1, [ ]) - if test "$PHP_XML" = "yes"; then + AC_DEFINE(HAVE_LIBEXPAT, 1, [ ]) + AC_DEFINE(HAVE_LIBEXPAT_BUNDLED, 1, [ ]) CPPFLAGS="$CPPFLAGS -DXML_BYTE_ORDER=$order" EXPAT_INTERNAL_LIBADD="expat/libexpat.la" PHP_SUBST(EXPAT_INTERNAL_LIBADD) @@ -28,17 +27,12 @@ if test "$PHP_XML" != "no"; then PHP_SUBST(EXPAT_SHARED_LIBADD) PHP_EXTENSION(xml, $ext_shared) LIB_BUILD($ext_builddir/expat,$ext_shared,yes) - LIB_BUILD($ext_builddir/expat/xmlparse,$ext_shared,yes) - LIB_BUILD($ext_builddir/expat/xmltok,$ext_shared,yes) - PHP_ADD_INCLUDE($ext_srcdir/expat/xmltok) - PHP_ADD_INCLUDE($ext_srcdir/expat/xmlparse) - PHP_FAST_OUTPUT($ext_builddir/expat/Makefile $ext_builddir/expat/xmlparse/Makefile $ext_builddir/expat/xmltok/Makefile) - + PHP_ADD_INCLUDE($ext_srcdir/expat) + PHP_FAST_OUTPUT($ext_builddir/expat/Makefile) else - EXPAT_DIR="$withval" if test -f $EXPAT_DIR/lib/libexpat.a -o -f $EXPAT_DIR/lib/libexpat.so ; then - AC_DEFINE(HAVE_LIBEXPAT2, 1, [ ]) + AC_DEFINE(HAVE_LIBEXPAT, 1, [ ]) PHP_ADD_INCLUDE($EXPAT_DIR/include) else AC_MSG_RESULT(not found) diff --git a/ext/xml/expat/Changes b/ext/xml/expat/Changes new file mode 100644 index 0000000000..5b78b862a7 --- /dev/null +++ b/ext/xml/expat/Changes @@ -0,0 +1,34 @@ +Release 1.95.1 Sun Oct 22 15:11:36 EDT 2000 + - Changes to get expat to build under Microsoft compiler + - Removed all aborts and instead return an UNEXPECTED_STATE error. + - Fixed a bug where a stray '%' in an entity value would cause an + abort. + - Defined XML_SetEndNamespaceDeclHandler. Thanks to Darryl Miles for + finding this oversight. + - Changed default patterns in lib/Makefile.in to fit non-GNU makes + Thanks to robin@unrated.net for reporting and providing an + account to test on. + - The reference had the wrong label for XML_SetStartNamespaceDecl. + Reported by an anonymous user. +Release 1.95.0 Fri Sep 29 2000 + - XML_ParserCreate_MM + Allows you to set a memory management suite to replace the + standard malloc,realloc, and free. + - XML_SetReturnNSTriplet + If you turn this feature on when namespace processing is in + effect, then qualified, prefixed element and attribute names + are returned as "uri|name|prefix" where '|' is whatever + separator character is used in namespace processing. + - Merged in features from perl-expat + o XML_SetElementDeclHandler + o XML_SetAttlistDeclHandler + o XML_SetXmlDeclHandler + o XML_SetEntityDeclHandler + o StartDoctypeDeclHandler takes 3 additional parameters: + sysid, pubid, has_internal_subset + o Many paired handler setters (like XML_SetElementHandler) + now have corresponding individual handler setters + o XML_GetInputContext for getting the input context of + the current parse position. + - Added reference material + - Packaged into a distribution that builds a sharable library
\ No newline at end of file diff --git a/ext/xml/expat/Makefile.in b/ext/xml/expat/Makefile.in index c7daf05c0c..a7173241a1 100644 --- a/ext/xml/expat/Makefile.in +++ b/ext/xml/expat/Makefile.in @@ -1,9 +1,5 @@ LTLIBRARY_NAME = libexpat.la -LTLIBRARY_DEPENDENCIES = xmltok/libexpat_tok.la xmlparse/libexpat_parse.la -LTLIBRARY_LIBADD = $(LTLIBRARY_DEPENDENCIES) -LTLIBRARY_SHARED_LIBADD = $(LTLIBRARY_LIBADD) - -SUBDIRS = xmltok xmlparse +LTLIBRARY_SOURCES = xmlparse.c xmlrole.c xmltok.c include $(top_srcdir)/build/dynlib.mk diff --git a/ext/xml/expat/README b/ext/xml/expat/README new file mode 100644 index 0000000000..d1c6874190 --- /dev/null +++ b/ext/xml/expat/README @@ -0,0 +1,57 @@ + + Expat, Release 1.95.1 + +This is expat, the C library for parsing XML, written by James Clark. Expat +is a stream oriented XML parser. This means that you register handlers with +the parser prior to starting the parse. These handlers are called when +the parser discovers the associated structures in the document being parsed. +A start tag is an example of the kind of structures for which you may +register handlers. + +Expat is free software. You may copy, distribute, and modify it under the +terms of the License contained in the file, COPYING, distributed with this +package. This license is the same as the MIT/X Consortium license. + +Versions of expat that have an odd minor version (the middle number in the +release above), are development releases and should be considered as +beta software. Releases with even minor version numbers are intended to be +production grade software. + +To build expat, you first run the configuration shell script in the top +level distribution directory: + + ./configure + +There are many options which you may provide to configure (which you can +discover by running configure with the --help option.) But the one of most +interest is the one that sets the installation directory. By default, +the configure script will set things up to install libexpat into +/usr/local/lib and expat.h into /usr/local/include. If, for example, you'd +prefer to install into /home/me/mystuff/lib and /home/me/mystuff/include, +you can tell configure about that with: + + ./configure --prefix=/home/me/mystuff + +After running the configure script, the "make" command will build things and +"make install" will install things into their proper location. Note that +you need to have write permission into the directories into which things +will be installed. + +Alternatively, on Win32 systems with Microsoft's Developer's Studio installed, +you can simply double-click on lib/expat.dsp from Windows Explorer and build +and install in the usual way from with DevStudio. + +As a third alternative you may choose to download expat_win32bin which has +a pre-compiled dll in it. + +A reference manual is available in the doc/reference.html in this +distribution. + +The homepage for this project is http://expat.sourceforge.net. There are +links there to connect you to the bug reports page. If you need to report +a bug when you don't have access to a browser, you may also send a bug +report by email to expat-bugs@lists.sourceforge.net. + +Discussion related to the direction of future expat development takes place +on expat-discuss@lists.sourceforge.net. Archives of this list may be found +at http://www.geocrawler.com/redir-sf.php3?list=expat-discuss. diff --git a/ext/xml/expat/README.php b/ext/xml/expat/README.php new file mode 100644 index 0000000000..187ef60be8 --- /dev/null +++ b/ext/xml/expat/README.php @@ -0,0 +1,11 @@ +this is the PHP-bundled version of expat 1.95.1 + +changes from the original version: +- include <php_config.h> instead of <config.h> +- include "php_compat.h" for namespace protection +- hardcode version in xmlparse.c +- stripped off all unneded files + + +thies@thieso.net, 11th May, 2001 + diff --git a/ext/xml/expat/ascii.h b/ext/xml/expat/ascii.h new file mode 100644 index 0000000000..6376b1f311 --- /dev/null +++ b/ext/xml/expat/ascii.h @@ -0,0 +1,86 @@ +/* +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. +*/ + +#define ASCII_A 0x41 +#define ASCII_B 0x42 +#define ASCII_C 0x43 +#define ASCII_D 0x44 +#define ASCII_E 0x45 +#define ASCII_F 0x46 +#define ASCII_G 0x47 +#define ASCII_H 0x48 +#define ASCII_I 0x49 +#define ASCII_J 0x4A +#define ASCII_K 0x4B +#define ASCII_L 0x4C +#define ASCII_M 0x4D +#define ASCII_N 0x4E +#define ASCII_O 0x4F +#define ASCII_P 0x50 +#define ASCII_Q 0x51 +#define ASCII_R 0x52 +#define ASCII_S 0x53 +#define ASCII_T 0x54 +#define ASCII_U 0x55 +#define ASCII_V 0x56 +#define ASCII_W 0x57 +#define ASCII_X 0x58 +#define ASCII_Y 0x59 +#define ASCII_Z 0x5A + +#define ASCII_a 0x61 +#define ASCII_b 0x62 +#define ASCII_c 0x63 +#define ASCII_d 0x64 +#define ASCII_e 0x65 +#define ASCII_f 0x66 +#define ASCII_g 0x67 +#define ASCII_h 0x68 +#define ASCII_i 0x69 +#define ASCII_j 0x6A +#define ASCII_k 0x6B +#define ASCII_l 0x6C +#define ASCII_m 0x6D +#define ASCII_n 0x6E +#define ASCII_o 0x6F +#define ASCII_p 0x70 +#define ASCII_q 0x71 +#define ASCII_r 0x72 +#define ASCII_s 0x73 +#define ASCII_t 0x74 +#define ASCII_u 0x75 +#define ASCII_v 0x76 +#define ASCII_w 0x77 +#define ASCII_x 0x78 +#define ASCII_y 0x79 +#define ASCII_z 0x7A + +#define ASCII_0 0x30 +#define ASCII_1 0x31 +#define ASCII_2 0x32 +#define ASCII_3 0x33 +#define ASCII_4 0x34 +#define ASCII_5 0x35 +#define ASCII_6 0x36 +#define ASCII_7 0x37 +#define ASCII_8 0x38 +#define ASCII_9 0x39 + +#define ASCII_TAB 0x09 +#define ASCII_SPACE 0x20 +#define ASCII_EXCL 0x21 +#define ASCII_QUOT 0x22 +#define ASCII_AMP 0x26 +#define ASCII_APOS 0x27 +#define ASCII_MINUS 0x2D +#define ASCII_PERIOD 0x2E +#define ASCII_COLON 0x3A +#define ASCII_SEMI 0x3B +#define ASCII_LT 0x3C +#define ASCII_EQUALS 0x3D +#define ASCII_GT 0x3E +#define ASCII_LSQB 0x5B +#define ASCII_RSQB 0x5D +#define ASCII_UNDERSCORE 0x5F diff --git a/ext/xml/expat/xmltok/asciitab.h b/ext/xml/expat/asciitab.h index 8a8a2dd388..eb445cc52c 100644 --- a/ext/xml/expat/xmltok/asciitab.h +++ b/ext/xml/expat/asciitab.h @@ -1,31 +1,6 @@ /* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. */ /* 0x00 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, diff --git a/ext/xml/expat/expat.html b/ext/xml/expat/expat.html deleted file mode 100644 index 3806ca8d0e..0000000000 --- a/ext/xml/expat/expat.html +++ /dev/null @@ -1,73 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" -"http://www.w3.org/TR/REC-html40/loose.dtd"> - -<HTML> - -<TITLE>expat</TITLE> - -<BODY> - -<H1>expat - XML Parser Toolkit</H1> - -<H3>Version 1.1</H3> - -<P>Copyright (c) 1998, 1999 James Clark. Expat is subject to the <A -HREF="http://www.mozilla.org/NPL/NPL-1_1Final.html">Mozilla Public -License Version 1.1</A>. Alternatively you may use expat under the <A -href="http://www.gnu.org/copyleft/gpl.html">GNU General Public -License</A> instead. Please contact me if you wish to negotiate an -alternative license.</P> - -<P>Expat is an <A -HREF="http://www.w3.org/TR/1998/REC-xml-19980210">XML 1.0</A> parser -written in C. It aims to be fully conforming. It is currently not a -validating XML processor. The current production version of expat can -be downloaded from <A href = "ftp://ftp.jclark.com/pub/xml/expat.zip" ->ftp://ftp.jclark.com/pub/xml/expat.zip</A>.</P> - -<P>The directory <SAMP>xmltok</SAMP> contains a low-level library for -tokenizing XML. The interface is documented in -<SAMP>xmltok/xmltok.h</SAMP>.</P> - -<P>The directory <SAMP>xmlparse</SAMP> contains an XML parser library -which is built on top of the <SAMP>xmltok</SAMP> library. The -interface is documented in <SAMP>xmlparse/xmlparse.h</SAMP>. The -directory <SAMP>sample</SAMP> contains a simple example program using -this interface; <SAMP>sample/build.bat</SAMP> is a batch file to build -the example using Visual C++.</P> - -<P>The directory <SAMP>xmlwf</SAMP> contains the <SAMP>xmlwf</SAMP> -application, which uses the <SAMP>xmlparse</SAMP> library. The -arguments to <SAMP>xmlwf</SAMP> are one or more files which are each -to be checked for well-formedness. An option <SAMP>-d -<VAR>dir</VAR></SAMP> can be specified; for each well-formed input -file the corresponding <A -href="http://www.jclark.com/xml/canonxml.html">canonical XML</A> will -be written to <SAMP>dir/<VAR>f</VAR></SAMP>, where -<SAMP><VAR>f</VAR></SAMP> is the filename (without any path) of the -input file. A <CODE>-x</CODE> option will cause references to -external general entities to be processed. A <CODE>-s</CODE> option -will make documents that are not standalone cause an error (a document -is considered standalone if either it is intrinsically standalone -because it has no external subset and no references to parameter -entities in the internal subset or it is declared as standalone in the -XML declaration).</P> - -<P>The <SAMP>bin</SAMP> directory contains Win32 executables. The -<SAMP>lib</SAMP> directory contains Win32 import libraries.</P> - -<P>Answers to some frequently asked questions about expat can be found -in the <A HREF="http://www.jclark.com/xml/expatfaq.html">expat -FAQ</A>.</P> - -<P></P> - -<ADDRESS> - -<A HREF="mailto:jjc@jclark.com">James Clark</A> - -</ADDRESS> - -</BODY> - -</HTML> diff --git a/ext/xml/expat/xmltok/iasciitab.h b/ext/xml/expat/iasciitab.h index 333d6bb779..55dbc398b8 100644 --- a/ext/xml/expat/xmltok/iasciitab.h +++ b/ext/xml/expat/iasciitab.h @@ -1,31 +1,6 @@ /* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. */ /* Like asciitab.h, except that 0xD has code BT_S rather than BT_CR */ diff --git a/ext/xml/expat/xmltok/latin1tab.h b/ext/xml/expat/latin1tab.h index 48609aa8f9..178b1d186d 100644 --- a/ext/xml/expat/xmltok/latin1tab.h +++ b/ext/xml/expat/latin1tab.h @@ -1,31 +1,6 @@ /* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. */ /* 0x80 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, diff --git a/ext/xml/expat/xmltok/nametab.h b/ext/xml/expat/nametab.h index b05e62c77a..b05e62c77a 100644 --- a/ext/xml/expat/xmltok/nametab.h +++ b/ext/xml/expat/nametab.h diff --git a/ext/xml/expat/xmltok/utf8tab.h b/ext/xml/expat/utf8tab.h index a38fe624e8..9e3b6b83eb 100644 --- a/ext/xml/expat/xmltok/utf8tab.h +++ b/ext/xml/expat/utf8tab.h @@ -1,31 +1,6 @@ /* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. */ diff --git a/ext/xml/expat/xmlparse/xmlparse.c b/ext/xml/expat/xmlparse.c index faae9829ce..2749d51c56 100644 --- a/ext/xml/expat/xmlparse/xmlparse.c +++ b/ext/xml/expat/xmlparse.c @@ -1,37 +1,33 @@ /* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. +Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. */ +static char RCSId[] + = "$Header$"; + #include "php_compat.h" -#include "xmldef.h" -#include "xmlparse.h" +#ifdef COMPILED_FROM_DSP +# include "winconfig.h" +# define XMLPARSEAPI __declspec(dllexport) +# include "expat.h" +# undef XMLPARSEAPI +#else +#include <php_config.h> + +#ifdef __declspec +# define XMLPARSEAPI __declspec(dllexport) +#endif + +#include "expat.h" + +#ifdef __declspec +# undef XMLPARSEAPI +#endif +#endif /* ndef COMPILED_FROM_DSP */ + +#include <stddef.h> #ifdef XML_UNICODE #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX @@ -62,7 +58,6 @@ typedef char ICHAR; #endif - #ifdef XML_UNICODE_WCHAR_T #define XML_T(x) L ## x #else @@ -74,7 +69,25 @@ typedef char ICHAR; #include "xmltok.h" #include "xmlrole.h" -#include "expat_hashtable.h" + +typedef const XML_Char *KEY; + +typedef struct { + KEY name; +} NAMED; + +typedef struct { + NAMED **v; + size_t size; + size_t used; + size_t usedLim; + XML_Memory_Handling_Suite *mem; +} HASH_TABLE; + +typedef struct { + NAMED **p; + NAMED **end; +} HASH_TABLE_ITER; #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ #define INIT_DATA_BUF_SIZE 1024 @@ -124,8 +137,19 @@ typedef struct { const XML_Char *publicId; const XML_Char *notation; char open; + char is_param; } ENTITY; +typedef struct { + enum XML_Content_Type type; + enum XML_Content_Quant quant; + const XML_Char * name; + int firstchild; + int lastchild; + int childcnt; + int nextsib; +} CONTENT_SCAFFOLD; + typedef struct block { struct block *next; int size; @@ -138,6 +162,7 @@ typedef struct { const XML_Char *end; XML_Char *ptr; XML_Char *start; + XML_Memory_Handling_Suite *mem; } STRING_POOL; /* The XML_Char before the name is used to determine whether @@ -158,6 +183,7 @@ typedef struct { typedef struct { const XML_Char *name; PREFIX *prefix; + const ATTRIBUTE_ID *idAtt; int nDefaultAtts; int allocDefaultAtts; DEFAULT_ATTRIBUTE *defaultAtts; @@ -171,8 +197,18 @@ typedef struct { STRING_POOL pool; int complete; int standalone; - const XML_Char *base; +#ifdef XML_DTD + HASH_TABLE paramEntities; +#endif /* XML_DTD */ PREFIX defaultPrefix; + /* === scaffolding for building content model === */ + int in_eldecl; + CONTENT_SCAFFOLD *scaffold; + unsigned contentStringLen; + unsigned scaffSize; + unsigned scaffCount; + int scaffLevel; + int *scaffIndex; } DTD; typedef struct open_internal_entity { @@ -191,10 +227,11 @@ static Processor prologProcessor; static Processor prologInitProcessor; static Processor contentProcessor; static Processor cdataSectionProcessor; +#ifdef XML_DTD +static Processor ignoreSectionProcessor; +#endif /* XML_DTD */ static Processor epilogProcessor; -#if 0 static Processor errorProcessor; -#endif static Processor externalEntityInitProcessor; static Processor externalEntityInitProcessor2; static Processor externalEntityInitProcessor3; @@ -207,16 +244,29 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *, const c static enum XML_Error initializeEncoding(XML_Parser parser); static enum XML_Error +doProlog(XML_Parser parser, const ENCODING *enc, const char *s, + const char *end, int tok, const char *next, const char **nextPtr); +static enum XML_Error +processInternalParamEntity(XML_Parser parser, ENTITY *entity); +static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, const char *start, const char *end, const char **endPtr); static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr); +#ifdef XML_DTD +static enum XML_Error +doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr); +#endif /* XML_DTD */ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s, TAG_NAME *tagNamePtr, BINDING **bindingsPtr); static int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr); + static int -defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, int isCdata, const XML_Char *dfltValue); +defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, + int isCdata, int isId, const XML_Char *dfltValue, + XML_Parser parser); + static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *, STRING_POOL *); @@ -227,7 +277,7 @@ static ATTRIBUTE_ID * getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); static enum XML_Error -storeEntityValue(XML_Parser parser, const char *start, const char *end); +storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static int @@ -238,19 +288,46 @@ reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const c static const XML_Char *getContext(XML_Parser parser); static int setContext(XML_Parser parser, const XML_Char *context); static void normalizePublicId(XML_Char *s); -static int dtdInit(DTD *); -static void dtdDestroy(DTD *); -static int dtdCopy(DTD *newDtd, const DTD *oldDtd); -static void poolInit(STRING_POOL *); +static int dtdInit(DTD *, XML_Parser parser); + +static void dtdDestroy(DTD *, XML_Parser parser); + +static int dtdCopy(DTD *newDtd, const DTD *oldDtd, XML_Parser parser); + +static int copyEntityTable(HASH_TABLE *, STRING_POOL *, const HASH_TABLE *, + XML_Parser parser); + +#ifdef XML_DTD +static void dtdSwap(DTD *, DTD *); +#endif /* XML_DTD */ + +static NAMED *lookup(HASH_TABLE *table, KEY name, size_t createSize); + +static void hashTableInit(HASH_TABLE *, XML_Memory_Handling_Suite *ms); + +static void hashTableDestroy(HASH_TABLE *); +static void hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *); +static NAMED *hashTableIterNext(HASH_TABLE_ITER *); +static void poolInit(STRING_POOL *, XML_Memory_Handling_Suite *ms); static void poolClear(STRING_POOL *); static void poolDestroy(STRING_POOL *); static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end); static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end); + static int poolGrow(STRING_POOL *pool); + +static int nextScaffoldPart(XML_Parser parser); +static XML_Content *build_model(XML_Parser parser); + static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s); static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n); +static const XML_Char *poolAppendString(STRING_POOL *pool, const XML_Char *s); +static ELEMENT_TYPE * getElementType(XML_Parser Paraser, + const ENCODING *enc, + const char *ptr, + const char *end); #define poolStart(pool) ((pool)->start) #define poolEnd(pool) ((pool)->ptr) @@ -269,6 +346,7 @@ typedef struct { void *m_userData; void *m_handlerArg; char *m_buffer; + XML_Memory_Handling_Suite m_mem; /* first character to be parsed */ const char *m_bufferPtr; /* past last character to be parsed */ @@ -287,6 +365,8 @@ typedef struct { XML_StartCdataSectionHandler m_startCdataSectionHandler; XML_EndCdataSectionHandler m_endCdataSectionHandler; XML_DefaultHandler m_defaultHandler; + XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler; + XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler; XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler; XML_NotationDeclHandler m_notationDeclHandler; XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler; @@ -295,10 +375,16 @@ typedef struct { XML_ExternalEntityRefHandler m_externalEntityRefHandler; void *m_externalEntityRefHandlerArg; XML_UnknownEncodingHandler m_unknownEncodingHandler; + XML_ElementDeclHandler m_elementDeclHandler; + XML_AttlistDeclHandler m_attlistDeclHandler; + XML_EntityDeclHandler m_entityDeclHandler; + XML_XmlDeclHandler m_xmlDeclHandler; const ENCODING *m_encoding; INIT_ENCODING m_initEncoding; + const ENCODING *m_internalEncoding; const XML_Char *m_protocolEncodingName; int m_ns; + int m_ns_triplets; void *m_unknownEncodingMem; void *m_unknownEncodingData; void *m_unknownEncodingHandlerData; @@ -313,18 +399,25 @@ typedef struct { int m_defaultExpandInternalEntities; int m_tagLevel; ENTITY *m_declEntity; + const XML_Char *m_doctypeName; + const XML_Char *m_doctypeSysid; + const XML_Char *m_doctypePubid; + const XML_Char *m_declAttributeType; const XML_Char *m_declNotationName; const XML_Char *m_declNotationPublicId; ELEMENT_TYPE *m_declElementType; ATTRIBUTE_ID *m_declAttributeId; char m_declAttributeIsCdata; + char m_declAttributeIsId; DTD m_dtd; + const XML_Char *m_curBase; TAG *m_tagStack; TAG *m_freeTagList; BINDING *m_inheritedBindings; BINDING *m_freeBindingList; int m_attsSize; int m_nSpecifiedAtts; + int m_idAttIndex; ATTRIBUTE *m_atts; POSITION m_position; STRING_POOL m_tempPool; @@ -333,8 +426,16 @@ typedef struct { unsigned m_groupSize; int m_hadExternalDoctype; XML_Char m_namespaceSeparator; +#ifdef XML_DTD + enum XML_ParamEntityParsing m_paramEntityParsing; + XML_Parser m_parentParser; +#endif } Parser; +#define MALLOC(s) (((Parser *)parser)->m_mem.malloc_fcn((s))) +#define REALLOC(p,s) (((Parser *)parser)->m_mem.realloc_fcn((p),(s))) +#define FREE(p) (((Parser *)parser)->m_mem.free_fcn((p))) + #define userData (((Parser *)parser)->m_userData) #define handlerArg (((Parser *)parser)->m_handlerArg) #define startElementHandler (((Parser *)parser)->m_startElementHandler) @@ -345,6 +446,8 @@ typedef struct { #define startCdataSectionHandler (((Parser *)parser)->m_startCdataSectionHandler) #define endCdataSectionHandler (((Parser *)parser)->m_endCdataSectionHandler) #define defaultHandler (((Parser *)parser)->m_defaultHandler) +#define startDoctypeDeclHandler (((Parser *)parser)->m_startDoctypeDeclHandler) +#define endDoctypeDeclHandler (((Parser *)parser)->m_endDoctypeDeclHandler) #define unparsedEntityDeclHandler (((Parser *)parser)->m_unparsedEntityDeclHandler) #define notationDeclHandler (((Parser *)parser)->m_notationDeclHandler) #define startNamespaceDeclHandler (((Parser *)parser)->m_startNamespaceDeclHandler) @@ -353,8 +456,13 @@ typedef struct { #define externalEntityRefHandler (((Parser *)parser)->m_externalEntityRefHandler) #define externalEntityRefHandlerArg (((Parser *)parser)->m_externalEntityRefHandlerArg) #define unknownEncodingHandler (((Parser *)parser)->m_unknownEncodingHandler) +#define elementDeclHandler (((Parser *)parser)->m_elementDeclHandler) +#define attlistDeclHandler (((Parser *)parser)->m_attlistDeclHandler) +#define entityDeclHandler (((Parser *)parser)->m_entityDeclHandler) +#define xmlDeclHandler (((Parser *)parser)->m_xmlDeclHandler) #define encoding (((Parser *)parser)->m_encoding) #define initEncoding (((Parser *)parser)->m_initEncoding) +#define internalEncoding (((Parser *)parser)->m_internalEncoding) #define unknownEncodingMem (((Parser *)parser)->m_unknownEncodingMem) #define unknownEncodingData (((Parser *)parser)->m_unknownEncodingData) #define unknownEncodingHandlerData \ @@ -362,6 +470,7 @@ typedef struct { #define unknownEncodingRelease (((Parser *)parser)->m_unknownEncodingRelease) #define protocolEncodingName (((Parser *)parser)->m_protocolEncodingName) #define ns (((Parser *)parser)->m_ns) +#define ns_triplets (((Parser *)parser)->m_ns_triplets) #define prologState (((Parser *)parser)->m_prologState) #define processor (((Parser *)parser)->m_processor) #define errorCode (((Parser *)parser)->m_errorCode) @@ -381,12 +490,18 @@ typedef struct { #define dataBuf (((Parser *)parser)->m_dataBuf) #define dataBufEnd (((Parser *)parser)->m_dataBufEnd) #define dtd (((Parser *)parser)->m_dtd) +#define curBase (((Parser *)parser)->m_curBase) #define declEntity (((Parser *)parser)->m_declEntity) +#define doctypeName (((Parser *)parser)->m_doctypeName) +#define doctypeSysid (((Parser *)parser)->m_doctypeSysid) +#define doctypePubid (((Parser *)parser)->m_doctypePubid) +#define declAttributeType (((Parser *)parser)->m_declAttributeType) #define declNotationName (((Parser *)parser)->m_declNotationName) #define declNotationPublicId (((Parser *)parser)->m_declNotationPublicId) #define declElementType (((Parser *)parser)->m_declElementType) #define declAttributeId (((Parser *)parser)->m_declAttributeId) #define declAttributeIsCdata (((Parser *)parser)->m_declAttributeIsCdata) +#define declAttributeIsId (((Parser *)parser)->m_declAttributeIsId) #define freeTagList (((Parser *)parser)->m_freeTagList) #define freeBindingList (((Parser *)parser)->m_freeBindingList) #define inheritedBindings (((Parser *)parser)->m_inheritedBindings) @@ -394,12 +509,23 @@ typedef struct { #define atts (((Parser *)parser)->m_atts) #define attsSize (((Parser *)parser)->m_attsSize) #define nSpecifiedAtts (((Parser *)parser)->m_nSpecifiedAtts) +#define idAttIndex (((Parser *)parser)->m_idAttIndex) #define tempPool (((Parser *)parser)->m_tempPool) #define temp2Pool (((Parser *)parser)->m_temp2Pool) #define groupConnector (((Parser *)parser)->m_groupConnector) #define groupSize (((Parser *)parser)->m_groupSize) #define hadExternalDoctype (((Parser *)parser)->m_hadExternalDoctype) #define namespaceSeparator (((Parser *)parser)->m_namespaceSeparator) +#ifdef XML_DTD +#define parentParser (((Parser *)parser)->m_parentParser) +#define paramEntityParsing (((Parser *)parser)->m_paramEntityParsing) +#endif /* XML_DTD */ + +#ifdef COMPILED_FROM_DSP +BOOL WINAPI DllMain(HINSTANCE h, DWORD r, LPVOID p) { + return TRUE; +} +#endif /* def COMPILED_FROM_DSP */ #ifdef _MSC_VER #ifdef _DEBUG @@ -412,7 +538,54 @@ Parser *asParser(XML_Parser parser) XML_Parser XML_ParserCreate(const XML_Char *encodingName) { - XML_Parser parser = malloc(sizeof(Parser)); + return XML_ParserCreate_MM(encodingName, NULL, NULL); +} + +XML_Parser XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) +{ + XML_Char tmp[2]; + *tmp = nsSep; + return XML_ParserCreate_MM(encodingName, NULL, tmp); +} + +XML_Parser +XML_ParserCreate_MM(const XML_Char *encodingName, + const XML_Memory_Handling_Suite *memsuite, + const XML_Char *nameSep) { + + XML_Parser parser; + static + const XML_Char implicitContext[] = { + XML_T('x'), XML_T('m'), XML_T('l'), XML_T('='), + XML_T('h'), XML_T('t'), XML_T('t'), XML_T('p'), XML_T(':'), + XML_T('/'), XML_T('/'), XML_T('w'), XML_T('w'), XML_T('w'), + XML_T('.'), XML_T('w'), XML_T('3'), + XML_T('.'), XML_T('o'), XML_T('r'), XML_T('g'), + XML_T('/'), XML_T('X'), XML_T('M'), XML_T('L'), + XML_T('/'), XML_T('1'), XML_T('9'), XML_T('9'), XML_T('8'), + XML_T('/'), XML_T('n'), XML_T('a'), XML_T('m'), XML_T('e'), + XML_T('s'), XML_T('p'), XML_T('a'), XML_T('c'), XML_T('e'), + XML_T('\0') + }; + + + if (memsuite) { + XML_Memory_Handling_Suite *mtemp; + parser = memsuite->malloc_fcn(sizeof(Parser)); + mtemp = &(((Parser *) parser)->m_mem); + mtemp->malloc_fcn = memsuite->malloc_fcn; + mtemp->realloc_fcn = memsuite->realloc_fcn; + mtemp->free_fcn = memsuite->free_fcn; + } + else { + XML_Memory_Handling_Suite *mtemp; + parser = malloc(sizeof(Parser)); + mtemp = &(((Parser *) parser)->m_mem); + mtemp->malloc_fcn = malloc; + mtemp->realloc_fcn = realloc; + mtemp->free_fcn = free; + } + if (!parser) return parser; processor = prologInitProcessor; @@ -427,6 +600,8 @@ XML_Parser XML_ParserCreate(const XML_Char *encodingName) startCdataSectionHandler = 0; endCdataSectionHandler = 0; defaultHandler = 0; + startDoctypeDeclHandler = 0; + endDoctypeDeclHandler = 0; unparsedEntityDeclHandler = 0; notationDeclHandler = 0; startNamespaceDeclHandler = 0; @@ -435,6 +610,10 @@ XML_Parser XML_ParserCreate(const XML_Char *encodingName) externalEntityRefHandler = 0; externalEntityRefHandlerArg = parser; unknownEncodingHandler = 0; + elementDeclHandler = 0; + attlistDeclHandler = 0; + entityDeclHandler = 0; + xmlDeclHandler = 0; buffer = 0; bufferPtr = 0; bufferEnd = 0; @@ -444,6 +623,10 @@ XML_Parser XML_ParserCreate(const XML_Char *encodingName) declElementType = 0; declAttributeId = 0; declEntity = 0; + doctypeName = 0; + doctypeSysid = 0; + doctypePubid = 0; + declAttributeType = 0; declNotationName = 0; declNotationPublicId = 0; memset(&position, 0, sizeof(POSITION)); @@ -458,9 +641,9 @@ XML_Parser XML_ParserCreate(const XML_Char *encodingName) freeBindingList = 0; inheritedBindings = 0; attsSize = INIT_ATTS_SIZE; - atts = malloc(attsSize * sizeof(ATTRIBUTE)); + atts = MALLOC(attsSize * sizeof(ATTRIBUTE)); nSpecifiedAtts = 0; - dataBuf = malloc(INIT_DATA_BUF_SIZE * sizeof(XML_Char)); + dataBuf = MALLOC(INIT_DATA_BUF_SIZE * sizeof(XML_Char)); groupSize = 0; groupConnector = 0; hadExternalDoctype = 0; @@ -469,48 +652,41 @@ XML_Parser XML_ParserCreate(const XML_Char *encodingName) unknownEncodingData = 0; unknownEncodingHandlerData = 0; namespaceSeparator = '!'; +#ifdef XML_DTD + parentParser = 0; + paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; +#endif ns = 0; - poolInit(&tempPool); - poolInit(&temp2Pool); + ns_triplets = 0; + poolInit(&tempPool, &(((Parser *) parser)->m_mem)); + poolInit(&temp2Pool, &(((Parser *) parser)->m_mem)); protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0; - if (!dtdInit(&dtd) || !atts || !dataBuf + curBase = 0; + if (!dtdInit(&dtd, parser) || !atts || !dataBuf || (encodingName && !protocolEncodingName)) { XML_ParserFree(parser); return 0; } dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE; - XmlInitEncoding(&initEncoding, &encoding, 0); - return parser; -} -XML_Parser XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) -{ - static - const XML_Char implicitContext[] = { - XML_T('x'), XML_T('m'), XML_T('l'), XML_T('='), - XML_T('h'), XML_T('t'), XML_T('t'), XML_T('p'), XML_T(':'), - XML_T('/'), XML_T('/'), XML_T('w'), XML_T('w'), XML_T('w'), - XML_T('.'), XML_T('w'), XML_T('3'), - XML_T('.'), XML_T('o'), XML_T('r'), XML_T('g'), - XML_T('/'), XML_T('X'), XML_T('M'), XML_T('L'), - XML_T('/'), XML_T('1'), XML_T('9'), XML_T('9'), XML_T('8'), - XML_T('/'), XML_T('n'), XML_T('a'), XML_T('m'), XML_T('e'), - XML_T('s'), XML_T('p'), XML_T('a'), XML_T('c'), XML_T('e'), - XML_T('\0') - }; - - XML_Parser parser = XML_ParserCreate(encodingName); - if (parser) { + if (nameSep) { XmlInitEncodingNS(&initEncoding, &encoding, 0); ns = 1; - namespaceSeparator = nsSep; + internalEncoding = XmlGetInternalEncodingNS(); + namespaceSeparator = *nameSep; + + if (! setContext(parser, implicitContext)) { + XML_ParserFree(parser); + return 0; + } } - if (!setContext(parser, implicitContext)) { - XML_ParserFree(parser); - return 0; + else { + XmlInitEncoding(&initEncoding, &encoding, 0); + internalEncoding = XmlGetInternalEncoding(); } + return parser; -} +} /* End XML_ParserCreate_MM */ int XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { @@ -538,21 +714,43 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, XML_StartCdataSectionHandler oldStartCdataSectionHandler = startCdataSectionHandler; XML_EndCdataSectionHandler oldEndCdataSectionHandler = endCdataSectionHandler; XML_DefaultHandler oldDefaultHandler = defaultHandler; + XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler = unparsedEntityDeclHandler; + XML_NotationDeclHandler oldNotationDeclHandler = notationDeclHandler; XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler = startNamespaceDeclHandler; XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler = endNamespaceDeclHandler; XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler; XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler; XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler; + XML_ElementDeclHandler oldElementDeclHandler = elementDeclHandler; + XML_AttlistDeclHandler oldAttlistDeclHandler = attlistDeclHandler; + XML_EntityDeclHandler oldEntityDeclHandler = entityDeclHandler; + XML_XmlDeclHandler oldXmlDeclHandler = xmlDeclHandler; + ELEMENT_TYPE * oldDeclElementType = declElementType; + void *oldUserData = userData; void *oldHandlerArg = handlerArg; int oldDefaultExpandInternalEntities = defaultExpandInternalEntities; void *oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg; - - parser = (ns - ? XML_ParserCreateNS(encodingName, namespaceSeparator) - : XML_ParserCreate(encodingName)); +#ifdef XML_DTD + int oldParamEntityParsing = paramEntityParsing; +#endif + int oldns_triplets = ns_triplets; + + if (ns) { + XML_Char tmp[2]; + + *tmp = namespaceSeparator; + parser = XML_ParserCreate_MM(encodingName, &((Parser *)parser)->m_mem, + tmp); + } + else { + parser = XML_ParserCreate_MM(encodingName, &((Parser *)parser)->m_mem, + NULL); + } + if (!parser) return 0; + startElementHandler = oldStartElementHandler; endElementHandler = oldEndElementHandler; characterDataHandler = oldCharacterDataHandler; @@ -561,11 +759,18 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, startCdataSectionHandler = oldStartCdataSectionHandler; endCdataSectionHandler = oldEndCdataSectionHandler; defaultHandler = oldDefaultHandler; + unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler; + notationDeclHandler = oldNotationDeclHandler; startNamespaceDeclHandler = oldStartNamespaceDeclHandler; endNamespaceDeclHandler = oldEndNamespaceDeclHandler; notStandaloneHandler = oldNotStandaloneHandler; externalEntityRefHandler = oldExternalEntityRefHandler; unknownEncodingHandler = oldUnknownEncodingHandler; + elementDeclHandler = oldElementDeclHandler; + attlistDeclHandler = oldAttlistDeclHandler; + entityDeclHandler = oldEntityDeclHandler; + xmlDeclHandler = oldXmlDeclHandler; + declElementType = oldDeclElementType; userData = oldUserData; if (oldUserData == oldHandlerArg) handlerArg = userData; @@ -574,24 +779,39 @@ XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, if (oldExternalEntityRefHandlerArg != oldParser) externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; defaultExpandInternalEntities = oldDefaultExpandInternalEntities; - if (!dtdCopy(&dtd, oldDtd) || !setContext(parser, context)) { - XML_ParserFree(parser); - return 0; + ns_triplets = oldns_triplets; +#ifdef XML_DTD + paramEntityParsing = oldParamEntityParsing; + if (context) { +#endif /* XML_DTD */ + if (!dtdCopy(&dtd, oldDtd, parser) || !setContext(parser, context)) { + XML_ParserFree(parser); + return 0; + } + processor = externalEntityInitProcessor; +#ifdef XML_DTD + } + else { + dtdSwap(&dtd, oldDtd); + parentParser = oldParser; + XmlPrologStateInitExternalEntity(&prologState); + dtd.complete = 1; + hadExternalDoctype = 1; } - processor = externalEntityInitProcessor; +#endif /* XML_DTD */ return parser; } static -void destroyBindings(BINDING *bindings) +void destroyBindings(BINDING *bindings, XML_Parser parser) { for (;;) { BINDING *b = bindings; if (!b) break; bindings = b->nextTagBinding; - free(b->uri); - free(b); + FREE(b->uri); + FREE(b); } } @@ -607,23 +827,33 @@ void XML_ParserFree(XML_Parser parser) } p = tagStack; tagStack = tagStack->parent; - free(p->buf); - destroyBindings(p->bindings); - free(p); + FREE(p->buf); + destroyBindings(p->bindings, parser); + FREE(p); } - destroyBindings(freeBindingList); - destroyBindings(inheritedBindings); + destroyBindings(freeBindingList, parser); + destroyBindings(inheritedBindings, parser); poolDestroy(&tempPool); poolDestroy(&temp2Pool); - dtdDestroy(&dtd); - free((void *)atts); - free(groupConnector); - free(buffer); - free(dataBuf); - free(unknownEncodingMem); +#ifdef XML_DTD + if (parentParser) { + if (hadExternalDoctype) + dtd.complete = 0; + dtdSwap(&dtd, &((Parser *)parentParser)->m_dtd); + } +#endif /* XML_DTD */ + dtdDestroy(&dtd, parser); + FREE((void *)atts); + if (groupConnector) + FREE(groupConnector); + if (buffer) + FREE(buffer); + FREE(dataBuf); + if (unknownEncodingMem) + FREE(unknownEncodingMem); if (unknownEncodingRelease) unknownEncodingRelease(unknownEncodingData); - free(parser); + FREE(parser); } void XML_UseParserAsHandlerArg(XML_Parser parser) @@ -631,6 +861,11 @@ void XML_UseParserAsHandlerArg(XML_Parser parser) handlerArg = parser; } +void +XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) { + ns_triplets = do_nst; +} + void XML_SetUserData(XML_Parser parser, void *p) { if (handlerArg == userData) @@ -645,16 +880,16 @@ int XML_SetBase(XML_Parser parser, const XML_Char *p) p = poolCopyString(&dtd.pool, p); if (!p) return 0; - dtd.base = p; + curBase = p; } else - dtd.base = 0; + curBase = 0; return 1; } const XML_Char *XML_GetBase(XML_Parser parser) { - return dtd.base; + return curBase; } int XML_GetSpecifiedAttributeCount(XML_Parser parser) @@ -662,6 +897,11 @@ int XML_GetSpecifiedAttributeCount(XML_Parser parser) return nSpecifiedAtts; } +int XML_GetIdAttributeIndex(XML_Parser parser) +{ + return idAttIndex; +} + void XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, XML_EndElementHandler end) @@ -670,6 +910,16 @@ void XML_SetElementHandler(XML_Parser parser, endElementHandler = end; } +void XML_SetStartElementHandler(XML_Parser parser, + XML_StartElementHandler start) { + startElementHandler = start; +} + +void XML_SetEndElementHandler(XML_Parser parser, + XML_EndElementHandler end) { + endElementHandler = end; +} + void XML_SetCharacterDataHandler(XML_Parser parser, XML_CharacterDataHandler handler) { @@ -696,6 +946,16 @@ void XML_SetCdataSectionHandler(XML_Parser parser, endCdataSectionHandler = end; } +void XML_SetStartCdataSectionHandler(XML_Parser parser, + XML_StartCdataSectionHandler start) { + startCdataSectionHandler = start; +} + +void XML_SetEndCdataSectionHandler(XML_Parser parser, + XML_EndCdataSectionHandler end) { + endCdataSectionHandler = end; +} + void XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) { @@ -710,6 +970,24 @@ void XML_SetDefaultHandlerExpand(XML_Parser parser, defaultExpandInternalEntities = 1; } +void XML_SetDoctypeDeclHandler(XML_Parser parser, + XML_StartDoctypeDeclHandler start, + XML_EndDoctypeDeclHandler end) +{ + startDoctypeDeclHandler = start; + endDoctypeDeclHandler = end; +} + +void XML_SetStartDoctypeDeclHandler(XML_Parser parser, + XML_StartDoctypeDeclHandler start) { + startDoctypeDeclHandler = start; +} + +void XML_SetEndDoctypeDeclHandler(XML_Parser parser, + XML_EndDoctypeDeclHandler end) { + endDoctypeDeclHandler = end; +} + void XML_SetUnparsedEntityDeclHandler(XML_Parser parser, XML_UnparsedEntityDeclHandler handler) { @@ -730,6 +1008,17 @@ void XML_SetNamespaceDeclHandler(XML_Parser parser, endNamespaceDeclHandler = end; } +void XML_SetStartNamespaceDeclHandler(XML_Parser parser, + XML_StartNamespaceDeclHandler start) { + startNamespaceDeclHandler = start; +} + +void XML_SetEndNamespaceDeclHandler(XML_Parser parser, + XML_EndNamespaceDeclHandler end) { + endNamespaceDeclHandler = end; +} + + void XML_SetNotStandaloneHandler(XML_Parser parser, XML_NotStandaloneHandler handler) { @@ -758,6 +1047,40 @@ void XML_SetUnknownEncodingHandler(XML_Parser parser, unknownEncodingHandlerData = data; } +void XML_SetElementDeclHandler(XML_Parser parser, + XML_ElementDeclHandler eldecl) +{ + elementDeclHandler = eldecl; +} + +void XML_SetAttlistDeclHandler(XML_Parser parser, + XML_AttlistDeclHandler attdecl) +{ + attlistDeclHandler = attdecl; +} + +void XML_SetEntityDeclHandler(XML_Parser parser, + XML_EntityDeclHandler handler) +{ + entityDeclHandler = handler; +} + +void XML_SetXmlDeclHandler(XML_Parser parser, + XML_XmlDeclHandler handler) { + xmlDeclHandler = handler; +} + +int XML_SetParamEntityParsing(XML_Parser parser, + enum XML_ParamEntityParsing parsing) +{ +#ifdef XML_DTD + paramEntityParsing = parsing; + return 1; +#else + return parsing == XML_PARAM_ENTITY_PARSING_NEVER; +#endif +} + int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { if (len == 0) { @@ -768,8 +1091,10 @@ int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) if (errorCode == XML_ERROR_NONE) return 1; eventEndPtr = eventPtr; + processor = errorProcessor; return 0; } +#ifndef XML_CONTEXT_BYTES else if (bufferPtr == bufferEnd) { const char *end; int nLeftOver; @@ -780,11 +1105,13 @@ int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) if (errorCode == XML_ERROR_NONE) return 1; eventEndPtr = eventPtr; + processor = errorProcessor; return 0; } errorCode = processor(parser, s, parseEndPtr = s + len, &end); if (errorCode != XML_ERROR_NONE) { eventEndPtr = eventPtr; + processor = errorProcessor; return 0; } XmlUpdatePosition(encoding, positionPtr, end, &position); @@ -792,10 +1119,12 @@ int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) if (nLeftOver) { if (buffer == 0 || nLeftOver > bufferLim - buffer) { /* FIXME avoid integer overflow */ - buffer = buffer == 0 ? malloc(len * 2) : realloc(buffer, len * 2); + buffer = buffer == 0 ? MALLOC(len * 2) : REALLOC(buffer, len * 2); + /* FIXME storage leak if realloc fails */ if (!buffer) { errorCode = XML_ERROR_NO_MEMORY; eventPtr = eventEndPtr = 0; + processor = errorProcessor; return 0; } bufferLim = buffer + len * 2; @@ -806,6 +1135,7 @@ int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) } return 1; } +#endif /* not defined XML_CONTEXT_BYTES */ else { memcpy(XML_GetBuffer(parser, len), s, len); return XML_ParseBuffer(parser, len, isFinal); @@ -827,6 +1157,7 @@ int XML_ParseBuffer(XML_Parser parser, int len, int isFinal) } else { eventEndPtr = eventPtr; + processor = errorProcessor; return 0; } } @@ -836,10 +1167,26 @@ void *XML_GetBuffer(XML_Parser parser, int len) if (len > bufferLim - bufferEnd) { /* FIXME avoid integer overflow */ int neededSize = len + (bufferEnd - bufferPtr); +#ifdef XML_CONTEXT_BYTES + int keep = bufferPtr - buffer; + + if (keep > XML_CONTEXT_BYTES) + keep = XML_CONTEXT_BYTES; + neededSize += keep; +#endif /* defined XML_CONTEXT_BYTES */ if (neededSize <= bufferLim - buffer) { +#ifdef XML_CONTEXT_BYTES + if (keep < bufferPtr - buffer) { + int offset = (bufferPtr - buffer) - keep; + memmove(buffer, &buffer[offset], bufferEnd - bufferPtr + keep); + bufferEnd -= offset; + bufferPtr -= offset; + } +#else memmove(buffer, bufferPtr, bufferEnd - bufferPtr); bufferEnd = buffer + (bufferEnd - bufferPtr); bufferPtr = buffer; +#endif /* not defined XML_CONTEXT_BYTES */ } else { char *newBuf; @@ -849,18 +1196,35 @@ void *XML_GetBuffer(XML_Parser parser, int len) do { bufferSize *= 2; } while (bufferSize < neededSize); - newBuf = malloc(bufferSize); + newBuf = MALLOC(bufferSize); if (newBuf == 0) { errorCode = XML_ERROR_NO_MEMORY; return 0; } bufferLim = newBuf + bufferSize; +#ifdef XML_CONTEXT_BYTES + if (bufferPtr) { + int keep = bufferPtr - buffer; + if (keep > XML_CONTEXT_BYTES) + keep = XML_CONTEXT_BYTES; + memcpy(newBuf, &bufferPtr[-keep], bufferEnd - bufferPtr + keep); + FREE(buffer); + buffer = newBuf; + bufferEnd = buffer + (bufferEnd - bufferPtr) + keep; + bufferPtr = buffer + keep; + } + else { + bufferEnd = newBuf + (bufferEnd - bufferPtr); + bufferPtr = buffer = newBuf; + } +#else if (bufferPtr) { memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr); - free(buffer); + FREE(buffer); } bufferEnd = newBuf + (bufferEnd - bufferPtr); bufferPtr = buffer = newBuf; +#endif /* not defined XML_CONTEXT_BYTES */ } } return bufferEnd; @@ -885,6 +1249,18 @@ int XML_GetCurrentByteCount(XML_Parser parser) return 0; } +const char * XML_GetInputContext(XML_Parser parser, int *offset, int *size) +{ +#ifdef XML_CONTEXT_BYTES + if (eventPtr && buffer) { + *offset = eventPtr - buffer; + *size = bufferEnd - buffer; + return buffer; + } +#endif /* defined XML_CONTEXT_BYTES */ + return (char *) 0; +} + int XML_GetCurrentLineNumber(XML_Parser parser) { if (eventPtr) { @@ -908,7 +1284,7 @@ void XML_DefaultCurrent(XML_Parser parser) if (defaultHandler) { if (openInternalEntities) reportDefault(parser, - ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding(), + internalEncoding, openInternalEntities->internalEventPtr, openInternalEntities->internalEventEndPtr); else @@ -923,7 +1299,7 @@ const XML_LChar *XML_ErrorString(int code) XML_T("out of memory"), XML_T("syntax error"), XML_T("no element found"), - XML_T("not well-formed"), + XML_T("not well-formed (invalid token)"), XML_T("unclosed token"), XML_T("unclosed token"), XML_T("mismatched tag"), @@ -941,13 +1317,19 @@ const XML_LChar *XML_ErrorString(int code) XML_T("encoding specified in XML declaration is incorrect"), XML_T("unclosed CDATA section"), XML_T("error in processing external entity reference"), - XML_T("document is not standalone") + XML_T("document is not standalone"), + XML_T("unexpected parser state - please send a bug report") }; if (code > 0 && code < sizeof(message)/sizeof(message[0])) return message[code]; return 0; } +const XML_LChar * +XML_ExpatVersion() { + return "1.95.1"; /*VERSION; thies@thieso.net*/ +} + static enum XML_Error contentProcessor(XML_Parser parser, const char *start, @@ -1055,7 +1437,6 @@ doContent(XML_Parser parser, const char *end, const char **nextPtr) { - const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding(); const char **eventPP; const char **eventEndPP; if (enc == encoding) { @@ -1163,7 +1544,7 @@ doContent(XML_Parser parser, openEntity.internalEventEndPtr = 0; result = doContent(parser, tagLevel, - internalEnc, + internalEncoding, (char *)entity->textPtr, (char *)(entity->textPtr + entity->textLen), 0); @@ -1181,7 +1562,7 @@ doContent(XML_Parser parser, return XML_ERROR_NO_MEMORY; if (!externalEntityRefHandler(externalEntityRefHandlerArg, context, - dtd.base, + entity->base, entity->systemId, entity->publicId)) return XML_ERROR_EXTERNAL_ENTITY_HANDLING; @@ -1207,10 +1588,10 @@ doContent(XML_Parser parser, freeTagList = freeTagList->parent; } else { - tag = malloc(sizeof(TAG)); + tag = MALLOC(sizeof(TAG)); if (!tag) return XML_ERROR_NO_MEMORY; - tag->buf = malloc(INIT_TAG_BUF_SIZE); + tag->buf = MALLOC(INIT_TAG_BUF_SIZE); if (!tag->buf) return XML_ERROR_NO_MEMORY; tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; @@ -1227,7 +1608,7 @@ doContent(XML_Parser parser, if (tag->rawNameLength + (int)(sizeof(XML_Char) - 1) + (int)sizeof(XML_Char) > tag->bufEnd - tag->buf) { int bufSize = tag->rawNameLength * 4; bufSize = ROUND_UP(bufSize, sizeof(XML_Char)); - tag->buf = realloc(tag->buf, bufSize); + tag->buf = REALLOC(tag->buf, bufSize); if (!tag->buf) return XML_ERROR_NO_MEMORY; tag->bufEnd = tag->buf + bufSize; @@ -1254,7 +1635,7 @@ doContent(XML_Parser parser, if (fromPtr == rawNameEnd) break; bufSize = (tag->bufEnd - tag->buf) << 1; - tag->buf = realloc(tag->buf, bufSize); + tag->buf = REALLOC(tag->buf, bufSize); if (!tag->buf) return XML_ERROR_NO_MEMORY; tag->bufEnd = tag->buf + bufSize; @@ -1484,12 +1865,12 @@ doContent(XML_Parser parser, otherwise just check the attributes for well-formedness. */ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, - const char *s, TAG_NAME *tagNamePtr, + const char *attStr, TAG_NAME *tagNamePtr, BINDING **bindingsPtr) { ELEMENT_TYPE *elementType = 0; int nDefaultAtts = 0; - const XML_Char **appAtts; + const XML_Char **appAtts; /* the attribute list to pass to the application */ int attIndex = 0; int i; int n; @@ -1497,8 +1878,9 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, BINDING *binding; const XML_Char *localPart; + /* lookup the element type name */ if (tagNamePtr) { - elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, 0); + elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str,0); if (!elementType) { tagNamePtr->str = poolCopyString(&dtd.pool, tagNamePtr->str); if (!tagNamePtr->str) @@ -1511,23 +1893,26 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, } nDefaultAtts = elementType->nDefaultAtts; } - n = XmlGetAttributes(enc, s, attsSize, atts); + /* get the attributes from the tokenizer */ + n = XmlGetAttributes(enc, attStr, attsSize, atts); if (n + nDefaultAtts > attsSize) { int oldAttsSize = attsSize; attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; - atts = realloc((void *)atts, attsSize * sizeof(ATTRIBUTE)); + atts = REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE)); if (!atts) return XML_ERROR_NO_MEMORY; if (n > oldAttsSize) - XmlGetAttributes(enc, s, n, atts); + XmlGetAttributes(enc, attStr, n, atts); } appAtts = (const XML_Char **)atts; for (i = 0; i < n; i++) { + /* add the name and value to the attribute list */ ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name, atts[i].name + XmlNameLength(enc, atts[i].name)); if (!attId) return XML_ERROR_NO_MEMORY; + /* detect duplicate attributes */ if ((attId->name)[-1]) { if (enc == encoding) eventPtr = atts[i].name; @@ -1539,6 +1924,7 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, enum XML_Error result; int isCdata = 1; + /* figure out whether declared as other than CDATA */ if (attId->maybeTokenized) { int j; for (j = 0; j < nDefaultAtts; j++) { @@ -1549,6 +1935,7 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, } } + /* normalize the attribute value */ result = storeAttributeValue(parser, enc, isCdata, atts[i].valuePtr, atts[i].valueEnd, &tempPool); @@ -1562,18 +1949,22 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, poolDiscard(&tempPool); } else if (tagNamePtr) { + /* the value did not need normalizing */ appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd); if (appAtts[attIndex] == 0) return XML_ERROR_NO_MEMORY; poolFinish(&tempPool); } + /* handle prefixed attribute names */ if (attId->prefix && tagNamePtr) { if (attId->xmlns) { + /* deal with namespace declarations here */ if (!addBinding(parser, attId->prefix, attId, appAtts[attIndex], bindingsPtr)) return XML_ERROR_NO_MEMORY; --attIndex; } else { + /* deal with other prefixed names later */ attIndex++; nPrefixes++; (attId->name)[-1] = 2; @@ -1582,9 +1973,19 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, else attIndex++; } - nSpecifiedAtts = attIndex; if (tagNamePtr) { int j; + nSpecifiedAtts = attIndex; + if (elementType->idAtt && (elementType->idAtt->name)[-1]) { + for (i = 0; i < attIndex; i += 2) + if (appAtts[i] == elementType->idAtt->name) { + idAttIndex = i; + break; + } + } + else + idAttIndex = -1; + /* do attribute defaulting */ for (j = 0; j < nDefaultAtts; j++) { const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j; if (!(da->id->name)[-1] && da->value) { @@ -1611,6 +2012,7 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, } i = 0; if (nPrefixes) { + /* expand prefixed attribute names */ for (; i < attIndex; i += 2) { if (appAtts[i][-1] == 2) { ATTRIBUTE_ID *id; @@ -1630,6 +2032,15 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, if (!poolAppendChar(&tempPool, *s)) return XML_ERROR_NO_MEMORY; } while (*s++); + if (ns_triplets) { + tempPool.ptr[-1] = namespaceSeparator; + s = b->prefix->name; + do { + if (!poolAppendChar(&tempPool, *s)) + return XML_ERROR_NO_MEMORY; + } while (*s++); + } + appAtts[i] = poolStart(&tempPool); poolFinish(&tempPool); } @@ -1640,12 +2051,14 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, ((XML_Char *)(appAtts[i]))[-1] = 0; } } + /* clear the flags that say whether attributes were specified */ for (; i < attIndex; i += 2) ((XML_Char *)(appAtts[i]))[-1] = 0; if (!tagNamePtr) return XML_ERROR_NONE; for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding) binding->attId->name[-1] = 0; + /* expand the element type name */ if (elementType->prefix) { binding = elementType->prefix->binding; if (!binding) @@ -1662,15 +2075,23 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, return XML_ERROR_NONE; tagNamePtr->localPart = localPart; tagNamePtr->uriLen = binding->uriLen; - i = binding->uriLen; - do { - if (i == binding->uriAlloc) { - binding->uri = realloc(binding->uri, binding->uriAlloc *= 2); - if (!binding->uri) - return XML_ERROR_NO_MEMORY; - } - binding->uri[i++] = *localPart; - } while (*localPart++); + for (i = 0; localPart[i++];) + ; + n = i + binding->uriLen; + if (n > binding->uriAlloc) { + TAG *p; + XML_Char *uri = MALLOC((n + EXPAND_SPARE) * sizeof(XML_Char)); + if (!uri) + return XML_ERROR_NO_MEMORY; + binding->uriAlloc = n + EXPAND_SPARE; + memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char)); + for (p = tagStack; p; p = p->parent) + if (p->name.str == binding->uri) + p->name.str = uri; + FREE(binding->uri); + binding->uri = uri; + } + memcpy(binding->uri + binding->uriLen, localPart, i * sizeof(XML_Char)); tagNamePtr->str = binding->uri; return XML_ERROR_NONE; } @@ -1687,7 +2108,7 @@ int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, con if (freeBindingList) { b = freeBindingList; if (len > b->uriAlloc) { - b->uri = realloc(b->uri, len + EXPAND_SPARE); + b->uri = REALLOC(b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); if (!b->uri) return 0; b->uriAlloc = len + EXPAND_SPARE; @@ -1695,15 +2116,15 @@ int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, con freeBindingList = b->nextTagBinding; } else { - b = malloc(sizeof(BINDING)); + b = MALLOC(sizeof(BINDING)); if (!b) return 0; - b->uri = malloc(sizeof(XML_Char) * len + EXPAND_SPARE); + b->uri = MALLOC(sizeof(XML_Char) * (len + EXPAND_SPARE)); if (!b->uri) { - free(b); + FREE(b); return 0; } - b->uriAlloc = len; + b->uriAlloc = len + EXPAND_SPARE; } b->uriLen = len; memcpy(b->uri, uri, len * sizeof(XML_Char)); @@ -1828,13 +2249,92 @@ enum XML_Error doCdataSection(XML_Parser parser, } return XML_ERROR_UNCLOSED_CDATA_SECTION; default: - abort(); + *eventPP = next; + return XML_ERROR_UNEXPECTED_STATE; } *eventPP = s = next; } /* not reached */ } +#ifdef XML_DTD + +/* The idea here is to avoid using stack for each IGNORE section when +the whole file is parsed with one call. */ + +static +enum XML_Error ignoreSectionProcessor(XML_Parser parser, + const char *start, + const char *end, + const char **endPtr) +{ + enum XML_Error result = doIgnoreSection(parser, encoding, &start, end, endPtr); + if (start) { + processor = prologProcessor; + return prologProcessor(parser, start, end, endPtr); + } + return result; +} + +/* startPtr gets set to non-null is the section is closed, and to null if +the section is not yet closed. */ + +static +enum XML_Error doIgnoreSection(XML_Parser parser, + const ENCODING *enc, + const char **startPtr, + const char *end, + const char **nextPtr) +{ + const char *next; + int tok; + const char *s = *startPtr; + const char **eventPP; + const char **eventEndPP; + if (enc == encoding) { + eventPP = &eventPtr; + *eventPP = s; + eventEndPP = &eventEndPtr; + } + else { + eventPP = &(openInternalEntities->internalEventPtr); + eventEndPP = &(openInternalEntities->internalEventEndPtr); + } + *eventPP = s; + *startPtr = 0; + tok = XmlIgnoreSectionTok(enc, s, end, &next); + *eventEndPP = next; + switch (tok) { + case XML_TOK_IGNORE_SECT: + if (defaultHandler) + reportDefault(parser, enc, s, next); + *startPtr = next; + return XML_ERROR_NONE; + case XML_TOK_INVALID: + *eventPP = next; + return XML_ERROR_INVALID_TOKEN; + case XML_TOK_PARTIAL_CHAR: + if (nextPtr) { + *nextPtr = s; + return XML_ERROR_NONE; + } + return XML_ERROR_PARTIAL_CHAR; + case XML_TOK_PARTIAL: + case XML_TOK_NONE: + if (nextPtr) { + *nextPtr = s; + return XML_ERROR_NONE; + } + return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */ + default: + *eventPP = next; + return XML_ERROR_UNEXPECTED_STATE; + } + /* not reached */ +} + +#endif /* XML_DTD */ + static enum XML_Error initializeEncoding(XML_Parser parser) { @@ -1847,8 +2347,7 @@ initializeEncoding(XML_Parser parser) int i; for (i = 0; protocolEncodingName[i]; i++) { if (i == sizeof(encodingBuf) - 1 - || protocolEncodingName[i] >= 0x80 - || protocolEncodingName[i] < 0) { + || (protocolEncodingName[i] & ~0x7f) != 0) { encodingBuf[0] = '\0'; break; } @@ -1870,8 +2369,11 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s, const char *next) { const char *encodingName = 0; + const char *storedEncName = 0; const ENCODING *newEncoding = 0; - const char *version; + const char *version = 0; + const char *versionend; + const char *storedversion = 0; int standalone = -1; if (!(ns ? XmlParseXmlDeclNS @@ -1881,13 +2383,40 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, next, &eventPtr, &version, + &versionend, &encodingName, &newEncoding, &standalone)) return XML_ERROR_SYNTAX; - if (!isGeneralTextEntity && standalone == 1) + if (!isGeneralTextEntity && standalone == 1) { dtd.standalone = 1; - if (defaultHandler) +#ifdef XML_DTD + if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) + paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; +#endif /* XML_DTD */ + } + if (xmlDeclHandler) { + if (encodingName) { + storedEncName = poolStoreString(&temp2Pool, + encoding, + encodingName, + encodingName + + XmlNameLength(encoding, encodingName)); + if (! storedEncName) + return XML_ERROR_NO_MEMORY; + poolFinish(&temp2Pool); + } + if (version) { + storedversion = poolStoreString(&temp2Pool, + encoding, + version, + versionend - encoding->minBytesPerChar); + if (! storedversion) + return XML_ERROR_NO_MEMORY; + } + xmlDeclHandler(handlerArg, storedversion, storedEncName, standalone); + } + else if (defaultHandler) reportDefault(parser, encoding, s, next); if (!protocolEncodingName) { if (newEncoding) { @@ -1899,20 +2428,26 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, } else if (encodingName) { enum XML_Error result; - const XML_Char *s = poolStoreString(&tempPool, - encoding, - encodingName, - encodingName - + XmlNameLength(encoding, encodingName)); - if (!s) - return XML_ERROR_NO_MEMORY; - result = handleUnknownEncoding(parser, s); - poolDiscard(&tempPool); + if (! storedEncName) { + storedEncName = poolStoreString(&temp2Pool, + encoding, + encodingName, + encodingName + + XmlNameLength(encoding, encodingName)); + if (! storedEncName) + return XML_ERROR_NO_MEMORY; + } + result = handleUnknownEncoding(parser, storedEncName); + poolClear(&tempPool); if (result == XML_ERROR_UNKNOWN_ENCODING) eventPtr = encodingName; return result; } } + + if (storedEncName || storedversion) + poolClear(&temp2Pool); + return XML_ERROR_NONE; } @@ -1929,7 +2464,7 @@ handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) info.release = 0; if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, &info)) { ENCODING *enc; - unknownEncodingMem = malloc(XmlSizeOfUnknownEncoding()); + unknownEncodingMem = MALLOC(XmlSizeOfUnknownEncoding()); if (!unknownEncodingMem) { if (info.release) info.release(info.data); @@ -1973,9 +2508,40 @@ prologProcessor(XML_Parser parser, const char *end, const char **nextPtr) { + const char *next; + int tok = XmlPrologTok(encoding, s, end, &next); + return doProlog(parser, encoding, s, end, tok, next, nextPtr); +} + +static enum XML_Error +doProlog(XML_Parser parser, + const ENCODING *enc, + const char *s, + const char *end, + int tok, + const char *next, + const char **nextPtr) +{ +#ifdef XML_DTD + static const XML_Char externalSubsetName[] = { '#' , '\0' }; +#endif /* XML_DTD */ + + const char **eventPP; + const char **eventEndPP; + enum XML_Content_Quant quant; + + if (enc == encoding) { + eventPP = &eventPtr; + eventEndPP = &eventEndPtr; + } + else { + eventPP = &(openInternalEntities->internalEventPtr); + eventEndPP = &(openInternalEntities->internalEventEndPtr); + } for (;;) { - const char *next; - int tok = XmlPrologTok(encoding, s, end, &next); + int role; + *eventPP = s; + *eventEndPP = next; if (tok <= 0) { if (nextPtr != 0 && tok != XML_TOK_INVALID) { *nextPtr = s; @@ -1983,45 +2549,92 @@ prologProcessor(XML_Parser parser, } switch (tok) { case XML_TOK_INVALID: - eventPtr = next; + *eventPP = next; return XML_ERROR_INVALID_TOKEN; - case XML_TOK_NONE: - return XML_ERROR_NO_ELEMENTS; case XML_TOK_PARTIAL: return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: return XML_ERROR_PARTIAL_CHAR; - case XML_TOK_TRAILING_CR: - eventPtr = s + encoding->minBytesPerChar; + case XML_TOK_NONE: +#ifdef XML_DTD + if (enc != encoding) + return XML_ERROR_NONE; + if (parentParser) { + if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc) + == XML_ROLE_ERROR) + return XML_ERROR_SYNTAX; + hadExternalDoctype = 0; + return XML_ERROR_NONE; + } +#endif /* XML_DTD */ return XML_ERROR_NO_ELEMENTS; default: - abort(); + tok = -tok; + next = end; + break; } } - switch (XmlTokenRole(&prologState, tok, s, next, encoding)) { + role = XmlTokenRole(&prologState, tok, s, next, enc); + switch (role) { case XML_ROLE_XML_DECL: { enum XML_Error result = processXmlDecl(parser, 0, s, next); if (result != XML_ERROR_NONE) return result; + enc = encoding; } break; - case XML_ROLE_DOCTYPE_SYSTEM_ID: - if (!dtd.standalone - && notStandaloneHandler - && !notStandaloneHandler(handlerArg)) - return XML_ERROR_NOT_STANDALONE; - hadExternalDoctype = 1; + case XML_ROLE_DOCTYPE_NAME: + if (startDoctypeDeclHandler) { + doctypeName = poolStoreString(&tempPool, enc, s, next); + if (! doctypeName) + return XML_ERROR_NO_MEMORY; + poolFinish(&tempPool); + doctypeSysid = 0; + doctypePubid = 0; + } + break; + case XML_ROLE_DOCTYPE_INTERNAL_SUBSET: + if (startDoctypeDeclHandler) { + startDoctypeDeclHandler(handlerArg, doctypeName, doctypeSysid, + doctypePubid, 1); + doctypeName = 0; + poolClear(&tempPool); + } + break; +#ifdef XML_DTD + case XML_ROLE_TEXT_DECL: + { + enum XML_Error result = processXmlDecl(parser, 1, s, next); + if (result != XML_ERROR_NONE) + return result; + enc = encoding; + } break; +#endif /* XML_DTD */ case XML_ROLE_DOCTYPE_PUBLIC_ID: + if (startDoctypeDeclHandler) { + doctypePubid = poolStoreString(&tempPool, enc, s + 1, next - 1); + if (! doctypePubid) + return XML_ERROR_NO_MEMORY; + poolFinish(&tempPool); + } +#ifdef XML_DTD + declEntity = (ENTITY *)lookup(&dtd.paramEntities, + externalSubsetName, + sizeof(ENTITY)); + if (!declEntity) + return XML_ERROR_NO_MEMORY; +#endif /* XML_DTD */ + /* fall through */ case XML_ROLE_ENTITY_PUBLIC_ID: - if (!XmlIsPublicId(encoding, s, next, &eventPtr)) + if (!XmlIsPublicId(enc, s, next, eventPP)) return XML_ERROR_SYNTAX; if (declEntity) { XML_Char *tem = poolStoreString(&dtd.pool, - encoding, - s + encoding->minBytesPerChar, - next - encoding->minBytesPerChar); + enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); if (!tem) return XML_ERROR_NO_MEMORY; normalizePublicId(tem); @@ -2029,87 +2642,243 @@ prologProcessor(XML_Parser parser, poolFinish(&dtd.pool); } break; + case XML_ROLE_DOCTYPE_CLOSE: + if (doctypeName) { + startDoctypeDeclHandler(handlerArg, doctypeName, + doctypeSysid, doctypePubid, 0); + poolClear(&tempPool); + } + if (dtd.complete && hadExternalDoctype) { + dtd.complete = 0; +#ifdef XML_DTD + if (paramEntityParsing && externalEntityRefHandler) { + ENTITY *entity = (ENTITY *)lookup(&dtd.paramEntities, + externalSubsetName, + 0); + if (!externalEntityRefHandler(externalEntityRefHandlerArg, + 0, + entity->base, + entity->systemId, + entity->publicId)) + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + } +#endif /* XML_DTD */ + if (!dtd.complete + && !dtd.standalone + && notStandaloneHandler + && !notStandaloneHandler(handlerArg)) + return XML_ERROR_NOT_STANDALONE; + } + if (endDoctypeDeclHandler) + endDoctypeDeclHandler(handlerArg); + break; case XML_ROLE_INSTANCE_START: processor = contentProcessor; - if (hadExternalDoctype) - dtd.complete = 0; return contentProcessor(parser, s, end, nextPtr); case XML_ROLE_ATTLIST_ELEMENT_NAME: - { - const XML_Char *name = poolStoreString(&dtd.pool, encoding, s, next); - if (!name) - return XML_ERROR_NO_MEMORY; - declElementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE)); - if (!declElementType) - return XML_ERROR_NO_MEMORY; - if (declElementType->name != name) - poolDiscard(&dtd.pool); - else { - poolFinish(&dtd.pool); - if (!setElementTypePrefix(parser, declElementType)) - return XML_ERROR_NO_MEMORY; - } - break; - } + declElementType = getElementType(parser, enc, s, next); + if (!declElementType) + return XML_ERROR_NO_MEMORY; + break; case XML_ROLE_ATTRIBUTE_NAME: - declAttributeId = getAttributeId(parser, encoding, s, next); + declAttributeId = getAttributeId(parser, enc, s, next); if (!declAttributeId) return XML_ERROR_NO_MEMORY; declAttributeIsCdata = 0; + declAttributeType = 0; + declAttributeIsId = 0; break; case XML_ROLE_ATTRIBUTE_TYPE_CDATA: declAttributeIsCdata = 1; + declAttributeType = "CDATA"; + break; + case XML_ROLE_ATTRIBUTE_TYPE_ID: + declAttributeIsId = 1; + declAttributeType = "ID"; + break; + case XML_ROLE_ATTRIBUTE_TYPE_IDREF: + declAttributeType = "IDREF"; + break; + case XML_ROLE_ATTRIBUTE_TYPE_IDREFS: + declAttributeType = "IDREFS"; + break; + case XML_ROLE_ATTRIBUTE_TYPE_ENTITY: + declAttributeType = "ENTITY"; + break; + case XML_ROLE_ATTRIBUTE_TYPE_ENTITIES: + declAttributeType = "ENTITIES"; + break; + case XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN: + declAttributeType = "NMTOKEN"; + break; + case XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS: + declAttributeType = "NMTOKENS"; + break; + + case XML_ROLE_ATTRIBUTE_ENUM_VALUE: + case XML_ROLE_ATTRIBUTE_NOTATION_VALUE: + if (attlistDeclHandler) + { + char *prefix; + if (declAttributeType) { + prefix = "|"; + } + else { + prefix = (role == XML_ROLE_ATTRIBUTE_NOTATION_VALUE + ? "NOTATION(" + : "("); + } + if (! poolAppendString(&tempPool, prefix)) + return XML_ERROR_NO_MEMORY; + if (! poolAppend(&tempPool, enc, s, next)) + return XML_ERROR_NO_MEMORY; + declAttributeType = tempPool.start; + } break; case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE: case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE: if (dtd.complete - && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0)) + && !defineAttribute(declElementType, declAttributeId, + declAttributeIsCdata, declAttributeIsId, 0, + parser)) return XML_ERROR_NO_MEMORY; + if (attlistDeclHandler && declAttributeType) { + if (*declAttributeType == '(' + || *declAttributeType == 'N' && declAttributeType[1] == 'O') { + /* Enumerated or Notation type */ + if (! poolAppendChar(&tempPool, ')') + || ! poolAppendChar(&tempPool, '\0')) + return XML_ERROR_NO_MEMORY; + declAttributeType = tempPool.start; + poolFinish(&tempPool); + } + *eventEndPP = s; + attlistDeclHandler(handlerArg, declElementType->name, + declAttributeId->name, declAttributeType, + 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); + poolClear(&tempPool); + } break; case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: case XML_ROLE_FIXED_ATTRIBUTE_VALUE: { const XML_Char *attVal; enum XML_Error result - = storeAttributeValue(parser, encoding, declAttributeIsCdata, - s + encoding->minBytesPerChar, - next - encoding->minBytesPerChar, + = storeAttributeValue(parser, enc, declAttributeIsCdata, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar, &dtd.pool); if (result) return result; attVal = poolStart(&dtd.pool); poolFinish(&dtd.pool); if (dtd.complete - && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, attVal)) + /* ID attributes aren't allowed to have a default */ + && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0, attVal, parser)) return XML_ERROR_NO_MEMORY; + if (attlistDeclHandler && declAttributeType) { + if (*declAttributeType == '(' + || *declAttributeType == 'N' && declAttributeType[1] == 'O') { + /* Enumerated or Notation type */ + if (! poolAppendChar(&tempPool, ')') + || ! poolAppendChar(&tempPool, '\0')) + return XML_ERROR_NO_MEMORY; + declAttributeType = tempPool.start; + poolFinish(&tempPool); + } + *eventEndPP = s; + attlistDeclHandler(handlerArg, declElementType->name, + declAttributeId->name, declAttributeType, + attVal, + role == XML_ROLE_FIXED_ATTRIBUTE_VALUE); + poolClear(&tempPool); + } break; } case XML_ROLE_ENTITY_VALUE: { - enum XML_Error result = storeEntityValue(parser, s, next); + enum XML_Error result = storeEntityValue(parser, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (declEntity) { + declEntity->textPtr = poolStart(&dtd.pool); + declEntity->textLen = poolLength(&dtd.pool); + poolFinish(&dtd.pool); + if (entityDeclHandler) { + *eventEndPP = s; + entityDeclHandler(handlerArg, + declEntity->name, + declEntity->is_param, + declEntity->textPtr, + declEntity->textLen, + curBase, 0, 0, 0); + } + } + else + poolDiscard(&dtd.pool); if (result != XML_ERROR_NONE) return result; } break; + case XML_ROLE_DOCTYPE_SYSTEM_ID: + if (startDoctypeDeclHandler) { + doctypeSysid = poolStoreString(&tempPool, enc, s + 1, next - 1); + if (! doctypeSysid) + return XML_ERROR_NO_MEMORY; + poolFinish(&tempPool); + } + if (!dtd.standalone +#ifdef XML_DTD + && !paramEntityParsing +#endif /* XML_DTD */ + && notStandaloneHandler + && !notStandaloneHandler(handlerArg)) + return XML_ERROR_NOT_STANDALONE; + hadExternalDoctype = 1; +#ifndef XML_DTD + break; +#else /* XML_DTD */ + if (!declEntity) { + declEntity = (ENTITY *)lookup(&dtd.paramEntities, + externalSubsetName, + sizeof(ENTITY)); + declEntity->publicId = 0; + if (!declEntity) + return XML_ERROR_NO_MEMORY; + } + /* fall through */ +#endif /* XML_DTD */ case XML_ROLE_ENTITY_SYSTEM_ID: if (declEntity) { - declEntity->systemId = poolStoreString(&dtd.pool, encoding, - s + encoding->minBytesPerChar, - next - encoding->minBytesPerChar); + declEntity->systemId = poolStoreString(&dtd.pool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); if (!declEntity->systemId) return XML_ERROR_NO_MEMORY; - declEntity->base = dtd.base; + declEntity->base = curBase; poolFinish(&dtd.pool); } break; + case XML_ROLE_ENTITY_COMPLETE: + if (declEntity && entityDeclHandler) { + *eventEndPP = s; + entityDeclHandler(handlerArg, + declEntity->name, + 0,0,0, + declEntity->base, + declEntity->systemId, + declEntity->publicId, + 0); + } + break; case XML_ROLE_ENTITY_NOTATION_NAME: if (declEntity) { - declEntity->notation = poolStoreString(&dtd.pool, encoding, s, next); + declEntity->notation = poolStoreString(&dtd.pool, enc, s, next); if (!declEntity->notation) return XML_ERROR_NO_MEMORY; poolFinish(&dtd.pool); if (unparsedEntityDeclHandler) { - eventPtr = eventEndPtr = s; + *eventEndPP = s; unparsedEntityDeclHandler(handlerArg, declEntity->name, declEntity->base, @@ -2117,17 +2886,26 @@ prologProcessor(XML_Parser parser, declEntity->publicId, declEntity->notation); } - + else if (entityDeclHandler) { + *eventEndPP = s; + entityDeclHandler(handlerArg, + declEntity->name, + 0,0,0, + declEntity->base, + declEntity->systemId, + declEntity->publicId, + declEntity->notation); + } } break; case XML_ROLE_GENERAL_ENTITY_NAME: { const XML_Char *name; - if (XmlPredefinedEntityName(encoding, s, next)) { + if (XmlPredefinedEntityName(enc, s, next)) { declEntity = 0; break; } - name = poolStoreString(&dtd.pool, encoding, s, next); + name = poolStoreString(&dtd.pool, enc, s, next); if (!name) return XML_ERROR_NO_MEMORY; if (dtd.complete) { @@ -2138,8 +2916,11 @@ prologProcessor(XML_Parser parser, poolDiscard(&dtd.pool); declEntity = 0; } - else + else { poolFinish(&dtd.pool); + declEntity->publicId = 0; + declEntity->is_param = 0; + } } else { poolDiscard(&dtd.pool); @@ -2148,26 +2929,47 @@ prologProcessor(XML_Parser parser, } break; case XML_ROLE_PARAM_ENTITY_NAME: +#ifdef XML_DTD + if (dtd.complete) { + const XML_Char *name = poolStoreString(&dtd.pool, enc, s, next); + if (!name) + return XML_ERROR_NO_MEMORY; + declEntity = (ENTITY *)lookup(&dtd.paramEntities, + name, sizeof(ENTITY)); + if (!declEntity) + return XML_ERROR_NO_MEMORY; + if (declEntity->name != name) { + poolDiscard(&dtd.pool); + declEntity = 0; + } + else { + poolFinish(&dtd.pool); + declEntity->publicId = 0; + declEntity->is_param = 1; + } + } +#else /* not XML_DTD */ declEntity = 0; +#endif /* not XML_DTD */ break; case XML_ROLE_NOTATION_NAME: declNotationPublicId = 0; declNotationName = 0; if (notationDeclHandler) { - declNotationName = poolStoreString(&tempPool, encoding, s, next); + declNotationName = poolStoreString(&tempPool, enc, s, next); if (!declNotationName) return XML_ERROR_NO_MEMORY; poolFinish(&tempPool); } break; case XML_ROLE_NOTATION_PUBLIC_ID: - if (!XmlIsPublicId(encoding, s, next, &eventPtr)) + if (!XmlIsPublicId(enc, s, next, eventPP)) return XML_ERROR_SYNTAX; if (declNotationName) { XML_Char *tem = poolStoreString(&tempPool, - encoding, - s + encoding->minBytesPerChar, - next - encoding->minBytesPerChar); + enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); if (!tem) return XML_ERROR_NO_MEMORY; normalizePublicId(tem); @@ -2178,15 +2980,15 @@ prologProcessor(XML_Parser parser, case XML_ROLE_NOTATION_SYSTEM_ID: if (declNotationName && notationDeclHandler) { const XML_Char *systemId - = poolStoreString(&tempPool, encoding, - s + encoding->minBytesPerChar, - next - encoding->minBytesPerChar); + = poolStoreString(&tempPool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); if (!systemId) return XML_ERROR_NO_MEMORY; - eventPtr = eventEndPtr = s; + *eventEndPP = s; notationDeclHandler(handlerArg, declNotationName, - dtd.base, + curBase, systemId, declNotationPublicId); } @@ -2194,17 +2996,16 @@ prologProcessor(XML_Parser parser, break; case XML_ROLE_NOTATION_NO_SYSTEM_ID: if (declNotationPublicId && notationDeclHandler) { - eventPtr = eventEndPtr = s; + *eventEndPP = s; notationDeclHandler(handlerArg, declNotationName, - dtd.base, + curBase, 0, declNotationPublicId); } poolClear(&tempPool); break; case XML_ROLE_ERROR: - eventPtr = s; switch (tok) { case XML_TOK_PARAM_ENTITY_REF: return XML_ERROR_PARAM_ENTITY_REF; @@ -2213,50 +3014,220 @@ prologProcessor(XML_Parser parser, default: return XML_ERROR_SYNTAX; } +#ifdef XML_DTD + case XML_ROLE_IGNORE_SECT: + { + enum XML_Error result; + if (defaultHandler) + reportDefault(parser, enc, s, next); + result = doIgnoreSection(parser, enc, &next, end, nextPtr); + if (!next) { + processor = ignoreSectionProcessor; + return result; + } + } + break; +#endif /* XML_DTD */ case XML_ROLE_GROUP_OPEN: if (prologState.level >= groupSize) { - if (groupSize) - groupConnector = realloc(groupConnector, groupSize *= 2); + if (groupSize) { + groupConnector = REALLOC(groupConnector, groupSize *= 2); + if (dtd.scaffIndex) + dtd.scaffIndex = REALLOC(dtd.scaffIndex, groupSize * sizeof(int)); + } else - groupConnector = malloc(groupSize = 32); + groupConnector = MALLOC(groupSize = 32); if (!groupConnector) return XML_ERROR_NO_MEMORY; } groupConnector[prologState.level] = 0; + if (dtd.in_eldecl) { + int myindex = nextScaffoldPart(parser); + if (myindex < 0) + return XML_ERROR_NO_MEMORY; + dtd.scaffIndex[dtd.scaffLevel] = myindex; + dtd.scaffLevel++; + dtd.scaffold[myindex].type = XML_CTYPE_SEQ; + } break; case XML_ROLE_GROUP_SEQUENCE: - if (groupConnector[prologState.level] == '|') { - eventPtr = s; + if (groupConnector[prologState.level] == '|') return XML_ERROR_SYNTAX; - } groupConnector[prologState.level] = ','; break; case XML_ROLE_GROUP_CHOICE: - if (groupConnector[prologState.level] == ',') { - eventPtr = s; + if (groupConnector[prologState.level] == ',') return XML_ERROR_SYNTAX; + if (dtd.in_eldecl + && ! groupConnector[prologState.level] + && dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]].type != XML_CTYPE_MIXED + ) { + dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]].type = XML_CTYPE_CHOICE; } groupConnector[prologState.level] = '|'; break; case XML_ROLE_PARAM_ENTITY_REF: +#ifdef XML_DTD + case XML_ROLE_INNER_PARAM_ENTITY_REF: + if (paramEntityParsing + && (dtd.complete || role == XML_ROLE_INNER_PARAM_ENTITY_REF)) { + const XML_Char *name; + ENTITY *entity; + name = poolStoreString(&dtd.pool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!name) + return XML_ERROR_NO_MEMORY; + entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0); + poolDiscard(&dtd.pool); + if (!entity) { + /* FIXME what to do if !dtd.complete? */ + return XML_ERROR_UNDEFINED_ENTITY; + } + if (entity->open) + return XML_ERROR_RECURSIVE_ENTITY_REF; + if (entity->textPtr) { + enum XML_Error result; + result = processInternalParamEntity(parser, entity); + if (result != XML_ERROR_NONE) + return result; + break; + } + if (role == XML_ROLE_INNER_PARAM_ENTITY_REF) + return XML_ERROR_PARAM_ENTITY_REF; + if (externalEntityRefHandler) { + dtd.complete = 0; + entity->open = 1; + if (!externalEntityRefHandler(externalEntityRefHandlerArg, + 0, + entity->base, + entity->systemId, + entity->publicId)) { + entity->open = 0; + return XML_ERROR_EXTERNAL_ENTITY_HANDLING; + } + entity->open = 0; + if (dtd.complete) + break; + } + } +#endif /* XML_DTD */ if (!dtd.standalone && notStandaloneHandler && !notStandaloneHandler(handlerArg)) return XML_ERROR_NOT_STANDALONE; dtd.complete = 0; + if (defaultHandler) + reportDefault(parser, enc, s, next); + break; + + /* Element declaration stuff */ + + case XML_ROLE_ELEMENT_NAME: + if (elementDeclHandler) { + declElementType = getElementType(parser, enc, s, next); + if (! declElementType) + return XML_ERROR_NO_MEMORY; + dtd.scaffLevel = 0; + dtd.scaffCount = 0; + dtd.in_eldecl = 1; + } + break; + + case XML_ROLE_CONTENT_ANY: + case XML_ROLE_CONTENT_EMPTY: + if (dtd.in_eldecl) { + if (elementDeclHandler) { + XML_Content * content = (XML_Content *) MALLOC(sizeof(XML_Content)); + if (! content) + return XML_ERROR_NO_MEMORY; + content->quant = XML_CQUANT_NONE; + content->name = 0; + content->numchildren = 0; + content->children = 0; + content->type = ((role == XML_ROLE_CONTENT_ANY) ? + XML_CTYPE_ANY : + XML_CTYPE_EMPTY); + *eventEndPP = s; + elementDeclHandler(handlerArg, declElementType->name, content); + } + dtd.in_eldecl = 0; + } + break; + + case XML_ROLE_CONTENT_PCDATA: + if (dtd.in_eldecl) { + dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]].type = XML_CTYPE_MIXED; + } + break; + + case XML_ROLE_CONTENT_ELEMENT: + quant = XML_CQUANT_NONE; + goto elementContent; + case XML_ROLE_CONTENT_ELEMENT_OPT: + quant = XML_CQUANT_OPT; + goto elementContent; + case XML_ROLE_CONTENT_ELEMENT_REP: + quant = XML_CQUANT_REP; + goto elementContent; + case XML_ROLE_CONTENT_ELEMENT_PLUS: + quant = XML_CQUANT_PLUS; + elementContent: + if (dtd.in_eldecl) + { + ELEMENT_TYPE *el; + const char *nxt = quant == XML_CQUANT_NONE ? next : next - 1; + int myindex = nextScaffoldPart(parser); + if (myindex < 0) + return XML_ERROR_NO_MEMORY; + dtd.scaffold[myindex].type = XML_CTYPE_NAME; + dtd.scaffold[myindex].quant = quant; + el = getElementType(parser, enc, s, nxt); + if (! el) + return XML_ERROR_NO_MEMORY; + dtd.scaffold[myindex].name = el->name; + dtd.contentStringLen += nxt - s + 1; + } break; + + case XML_ROLE_GROUP_CLOSE: + quant = XML_CQUANT_NONE; + goto closeGroup; + case XML_ROLE_GROUP_CLOSE_OPT: + quant = XML_CQUANT_OPT; + goto closeGroup; + case XML_ROLE_GROUP_CLOSE_REP: + quant = XML_CQUANT_REP; + goto closeGroup; + case XML_ROLE_GROUP_CLOSE_PLUS: + quant = XML_CQUANT_PLUS; + closeGroup: + if (dtd.in_eldecl) { + dtd.scaffLevel--; + dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel]].quant = quant; + if (dtd.scaffLevel == 0) { + if (elementDeclHandler) { + XML_Content *model = build_model(parser); + if (! model) + return XML_ERROR_NO_MEMORY; + *eventEndPP = s; + elementDeclHandler(handlerArg, declElementType->name, model); + } + dtd.in_eldecl = 0; + dtd.contentStringLen = 0; + } + } + break; + /* End element declaration stuff */ + case XML_ROLE_NONE: switch (tok) { case XML_TOK_PI: - eventPtr = s; - eventEndPtr = next; - if (!reportProcessingInstruction(parser, encoding, s, next)) + if (!reportProcessingInstruction(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_COMMENT: - eventPtr = s; - eventEndPtr = next; - if (!reportComment(parser, encoding, s, next)) + if (!reportComment(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; break; } @@ -2268,14 +3239,20 @@ prologProcessor(XML_Parser parser, case XML_TOK_COMMENT: case XML_TOK_BOM: case XML_TOK_XML_DECL: +#ifdef XML_DTD + case XML_TOK_IGNORE_SECT: +#endif /* XML_DTD */ + case XML_TOK_PARAM_ENTITY_REF: break; default: - eventPtr = s; - eventEndPtr = next; - reportDefault(parser, encoding, s, next); +#ifdef XML_DTD + if (role != XML_ROLE_IGNORE_SECT) +#endif /* XML_DTD */ + reportDefault(parser, enc, s, next); } } s = next; + tok = XmlPrologTok(enc, s, end, &next); } /* not reached */ } @@ -2293,7 +3270,7 @@ enum XML_Error epilogProcessor(XML_Parser parser, int tok = XmlPrologTok(encoding, s, end, &next); eventEndPtr = next; switch (tok) { - case XML_TOK_TRAILING_CR: + case -XML_TOK_PROLOG_S: if (defaultHandler) { eventEndPtr = end; reportDefault(parser, encoding, s, end); @@ -2337,7 +3314,32 @@ enum XML_Error epilogProcessor(XML_Parser parser, } } -#if 0 +#ifdef XML_DTD + +static enum XML_Error +processInternalParamEntity(XML_Parser parser, ENTITY *entity) +{ + const char *s, *end, *next; + int tok; + enum XML_Error result; + OPEN_INTERNAL_ENTITY openEntity; + entity->open = 1; + openEntity.next = openInternalEntities; + openInternalEntities = &openEntity; + openEntity.entity = entity; + openEntity.internalEventPtr = 0; + openEntity.internalEventEndPtr = 0; + s = (char *)entity->textPtr; + end = (char *)(entity->textPtr + entity->textLen); + tok = XmlPrologTok(internalEncoding, s, end, &next); + result = doProlog(parser, internalEncoding, s, end, tok, next, 0); + entity->open = 0; + openInternalEntities = openEntity.next; + return result; +} + +#endif /* XML_DTD */ + static enum XML_Error errorProcessor(XML_Parser parser, const char *s, @@ -2346,7 +3348,6 @@ enum XML_Error errorProcessor(XML_Parser parser, { return errorCode; } -#endif static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, @@ -2368,7 +3369,6 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, const char *ptr, const char *end, STRING_POOL *pool) { - const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding(); for (;;) { const char *next; int tok = XmlAttributeValueTok(enc, ptr, end, &next); @@ -2469,7 +3469,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, enum XML_Error result; const XML_Char *textEnd = entity->textPtr + entity->textLen; entity->open = 1; - result = appendAttributeValue(parser, internalEnc, isCdata, (char *)entity->textPtr, (char *)textEnd, pool); + result = appendAttributeValue(parser, internalEncoding, isCdata, (char *)entity->textPtr, (char *)textEnd, pool); entity->open = 0; if (result) return result; @@ -2477,7 +3477,9 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, } break; default: - abort(); + if (enc == encoding) + eventPtr = ptr; + return XML_ERROR_UNEXPECTED_STATE; } ptr = next; } @@ -2486,38 +3488,65 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, static enum XML_Error storeEntityValue(XML_Parser parser, + const ENCODING *enc, const char *entityTextPtr, const char *entityTextEnd) { -#if 0 - const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding(); -#endif STRING_POOL *pool = &(dtd.pool); - entityTextPtr += encoding->minBytesPerChar; - entityTextEnd -= encoding->minBytesPerChar; for (;;) { const char *next; - int tok = XmlEntityValueTok(encoding, entityTextPtr, entityTextEnd, &next); + int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); switch (tok) { case XML_TOK_PARAM_ENTITY_REF: +#ifdef XML_DTD + if (parentParser || enc != encoding) { + enum XML_Error result; + const XML_Char *name; + ENTITY *entity; + name = poolStoreString(&tempPool, enc, + entityTextPtr + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!name) + return XML_ERROR_NO_MEMORY; + entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0); + poolDiscard(&tempPool); + if (!entity) { + if (enc == encoding) + eventPtr = entityTextPtr; + return XML_ERROR_UNDEFINED_ENTITY; + } + if (entity->open) { + if (enc == encoding) + eventPtr = entityTextPtr; + return XML_ERROR_RECURSIVE_ENTITY_REF; + } + if (entity->systemId) { + if (enc == encoding) + eventPtr = entityTextPtr; + return XML_ERROR_PARAM_ENTITY_REF; + } + entity->open = 1; + result = storeEntityValue(parser, + internalEncoding, + (char *)entity->textPtr, + (char *)(entity->textPtr + entity->textLen)); + entity->open = 0; + if (result) + return result; + break; + } +#endif /* XML_DTD */ eventPtr = entityTextPtr; return XML_ERROR_SYNTAX; case XML_TOK_NONE: - if (declEntity) { - declEntity->textPtr = pool->start; - declEntity->textLen = pool->ptr - pool->start; - poolFinish(pool); - } - else - poolDiscard(pool); return XML_ERROR_NONE; case XML_TOK_ENTITY_REF: case XML_TOK_DATA_CHARS: - if (!poolAppend(pool, encoding, entityTextPtr, next)) + if (!poolAppend(pool, enc, entityTextPtr, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_TRAILING_CR: - next = entityTextPtr + encoding->minBytesPerChar; + next = entityTextPtr + enc->minBytesPerChar; /* fall through */ case XML_TOK_DATA_NEWLINE: if (pool->end == pool->ptr && !poolGrow(pool)) @@ -2528,14 +3557,16 @@ enum XML_Error storeEntityValue(XML_Parser parser, { XML_Char buf[XML_ENCODE_MAX]; int i; - int n = XmlCharRefNumber(encoding, entityTextPtr); + int n = XmlCharRefNumber(enc, entityTextPtr); if (n < 0) { - eventPtr = entityTextPtr; + if (enc == encoding) + eventPtr = entityTextPtr; return XML_ERROR_BAD_CHAR_REF; } n = XmlEncode(n, (ICHAR *)buf); if (!n) { - eventPtr = entityTextPtr; + if (enc == encoding) + eventPtr = entityTextPtr; return XML_ERROR_BAD_CHAR_REF; } for (i = 0; i < n; i++) { @@ -2546,13 +3577,17 @@ enum XML_Error storeEntityValue(XML_Parser parser, } break; case XML_TOK_PARTIAL: - eventPtr = entityTextPtr; + if (enc == encoding) + eventPtr = entityTextPtr; return XML_ERROR_INVALID_TOKEN; case XML_TOK_INVALID: - eventPtr = next; + if (enc == encoding) + eventPtr = next; return XML_ERROR_INVALID_TOKEN; default: - abort(); + if (enc == encoding) + eventPtr = entityTextPtr; + return XML_ERROR_UNEXPECTED_STATE; } entityTextPtr = next; } @@ -2659,17 +3694,28 @@ reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char static int -defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, const XML_Char *value) +defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, + int isId, const XML_Char *value, XML_Parser parser) { DEFAULT_ATTRIBUTE *att; + if (value || isId) { + /* The handling of default attributes gets messed up if we have + a default which duplicates a non-default. */ + int i; + for (i = 0; i < type->nDefaultAtts; i++) + if (attId == type->defaultAtts[i].id) + return 1; + if (isId && !type->idAtt && !attId->xmlns) + type->idAtt = attId; + } if (type->nDefaultAtts == type->allocDefaultAtts) { if (type->allocDefaultAtts == 0) { type->allocDefaultAtts = 8; - type->defaultAtts = malloc(type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE)); + type->defaultAtts = MALLOC(type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE)); } else { type->allocDefaultAtts *= 2; - type->defaultAtts = realloc(type->defaultAtts, + type->defaultAtts = REALLOC(type->defaultAtts, type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE)); } if (!type->defaultAtts) @@ -2866,10 +3912,12 @@ int setContext(XML_Parser parser, const XML_Char *context) prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&tempPool), sizeof(PREFIX)); if (!prefix) return 0; - if (prefix->name == poolStart(&tempPool)) - poolFinish(&tempPool); - else - poolDiscard(&tempPool); + if (prefix->name == poolStart(&tempPool)) { + prefix->name = poolCopyString(&dtd.pool, prefix->name); + if (!prefix->name) + return 0; + } + poolDiscard(&tempPool); } for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); context++) if (!poolAppendChar(&tempPool, *context)) @@ -2915,22 +3963,46 @@ void normalizePublicId(XML_Char *publicId) *p = XML_T('\0'); } -static int dtdInit(DTD *p) +static int dtdInit(DTD *p, XML_Parser parser) { - poolInit(&(p->pool)); - hashTableInit(&(p->generalEntities)); - hashTableInit(&(p->elementTypes)); - hashTableInit(&(p->attributeIds)); - hashTableInit(&(p->prefixes)); + XML_Memory_Handling_Suite *ms = &((Parser *) parser)->m_mem; + poolInit(&(p->pool), ms); + hashTableInit(&(p->generalEntities), ms); + hashTableInit(&(p->elementTypes), ms); + hashTableInit(&(p->attributeIds), ms); + hashTableInit(&(p->prefixes), ms); p->complete = 1; p->standalone = 0; - p->base = 0; +#ifdef XML_DTD + hashTableInit(&(p->paramEntities), ms); +#endif /* XML_DTD */ p->defaultPrefix.name = 0; p->defaultPrefix.binding = 0; + + p->in_eldecl = 0; + p->scaffIndex = 0; + p->scaffLevel = 0; + p->scaffold = 0; + p->contentStringLen = 0; + p->scaffSize = 0; + p->scaffCount = 0; + return 1; } -static void dtdDestroy(DTD *p) +#ifdef XML_DTD + +static void dtdSwap(DTD *p1, DTD *p2) +{ + DTD tem; + memcpy(&tem, p1, sizeof(DTD)); + memcpy(p1, p2, sizeof(DTD)); + memcpy(p2, &tem, sizeof(DTD)); +} + +#endif /* XML_DTD */ + +static void dtdDestroy(DTD *p, XML_Parser parser) { HASH_TABLE_ITER iter; hashTableIterInit(&iter, &(p->elementTypes)); @@ -2939,29 +4011,29 @@ static void dtdDestroy(DTD *p) if (!e) break; if (e->allocDefaultAtts != 0) - free(e->defaultAtts); + FREE(e->defaultAtts); } hashTableDestroy(&(p->generalEntities)); +#ifdef XML_DTD + hashTableDestroy(&(p->paramEntities)); +#endif /* XML_DTD */ hashTableDestroy(&(p->elementTypes)); hashTableDestroy(&(p->attributeIds)); hashTableDestroy(&(p->prefixes)); poolDestroy(&(p->pool)); + if (p->scaffIndex) + FREE(p->scaffIndex); + if (p->scaffold) + FREE(p->scaffold); } /* Do a deep copy of the DTD. Return 0 for out of memory; non-zero otherwise. The new DTD has already been initialized. */ -static int dtdCopy(DTD *newDtd, const DTD *oldDtd) +static int dtdCopy(DTD *newDtd, const DTD *oldDtd, XML_Parser parser) { HASH_TABLE_ITER iter; - if (oldDtd->base) { - const XML_Char *tem = poolCopyString(&(newDtd->pool), oldDtd->base); - if (!tem) - return 0; - newDtd->base = tem; - } - /* Copy the prefix table. */ hashTableIterInit(&iter, &(oldDtd->prefixes)); @@ -3026,10 +4098,12 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd) if (!newE) return 0; if (oldE->nDefaultAtts) { - newE->defaultAtts = (DEFAULT_ATTRIBUTE *)malloc(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); + newE->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); if (!newE->defaultAtts) return 0; } + if (oldE->idAtt) + newE->idAtt = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->idAtt->name, 0); newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts; if (oldE->prefix) newE->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldE->prefix->name, 0); @@ -3046,9 +4120,43 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd) } } - /* Copy the entity table. */ + /* Copy the entity tables. */ + if (!copyEntityTable(&(newDtd->generalEntities), + &(newDtd->pool), + &(oldDtd->generalEntities), parser)) + return 0; + +#ifdef XML_DTD + if (!copyEntityTable(&(newDtd->paramEntities), + &(newDtd->pool), + &(oldDtd->paramEntities), parser)) + return 0; +#endif /* XML_DTD */ + + newDtd->complete = oldDtd->complete; + newDtd->standalone = oldDtd->standalone; - hashTableIterInit(&iter, &(oldDtd->generalEntities)); + /* Don't want deep copying for scaffolding */ + newDtd->in_eldecl = oldDtd->in_eldecl; + newDtd->scaffold = oldDtd->scaffold; + newDtd->contentStringLen = oldDtd->contentStringLen; + newDtd->scaffSize = oldDtd->scaffSize; + newDtd->scaffLevel = oldDtd->scaffLevel; + newDtd->scaffIndex = oldDtd->scaffIndex; + + return 1; +} /* End dtdCopy */ + +static int copyEntityTable(HASH_TABLE *newTable, + STRING_POOL *newPool, + const HASH_TABLE *oldTable, + XML_Parser parser) +{ + HASH_TABLE_ITER iter; + const XML_Char *cachedOldBase = 0; + const XML_Char *cachedNewBase = 0; + + hashTableIterInit(&iter, oldTable); for (;;) { ENTITY *newE; @@ -3056,54 +4164,181 @@ static int dtdCopy(DTD *newDtd, const DTD *oldDtd) const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter); if (!oldE) break; - name = poolCopyString(&(newDtd->pool), oldE->name); + name = poolCopyString(newPool, oldE->name); if (!name) return 0; - newE = (ENTITY *)lookup(&(newDtd->generalEntities), name, sizeof(ENTITY)); + newE = (ENTITY *)lookup(newTable, name, sizeof(ENTITY)); if (!newE) return 0; if (oldE->systemId) { - const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->systemId); + const XML_Char *tem = poolCopyString(newPool, oldE->systemId); if (!tem) return 0; newE->systemId = tem; if (oldE->base) { - if (oldE->base == oldDtd->base) - newE->base = newDtd->base; - tem = poolCopyString(&(newDtd->pool), oldE->base); - if (!tem) - return 0; - newE->base = tem; + if (oldE->base == cachedOldBase) + newE->base = cachedNewBase; + else { + cachedOldBase = oldE->base; + tem = poolCopyString(newPool, cachedOldBase); + if (!tem) + return 0; + cachedNewBase = newE->base = tem; + } } } else { - const XML_Char *tem = poolCopyStringN(&(newDtd->pool), oldE->textPtr, oldE->textLen); + const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen); if (!tem) return 0; newE->textPtr = tem; newE->textLen = oldE->textLen; } if (oldE->notation) { - const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->notation); + const XML_Char *tem = poolCopyString(newPool, oldE->notation); if (!tem) return 0; newE->notation = tem; } } - - newDtd->complete = oldDtd->complete; - newDtd->standalone = oldDtd->standalone; return 1; } +#define INIT_SIZE 64 + +static +int keyeq(KEY s1, KEY s2) +{ + for (; *s1 == *s2; s1++, s2++) + if (*s1 == 0) + return 1; + return 0; +} + +static +unsigned long hash(KEY s) +{ + unsigned long h = 0; + while (*s) + h = (h << 5) + h + (unsigned char)*s++; + return h; +} + +static +NAMED *lookup(HASH_TABLE *table, KEY name, size_t createSize) +{ + size_t i; + if (table->size == 0) { + size_t tsize; + + if (!createSize) + return 0; + tsize = INIT_SIZE * sizeof(NAMED *); + table->v = table->mem->malloc_fcn(tsize); + if (!table->v) + return 0; + memset(table->v, 0, tsize); + table->size = INIT_SIZE; + table->usedLim = INIT_SIZE / 2; + i = hash(name) & (table->size - 1); + } + else { + unsigned long h = hash(name); + for (i = h & (table->size - 1); + table->v[i]; + i == 0 ? i = table->size - 1 : --i) { + if (keyeq(name, table->v[i]->name)) + return table->v[i]; + } + if (!createSize) + return 0; + if (table->used == table->usedLim) { + /* check for overflow */ + size_t newSize = table->size * 2; + size_t tsize = newSize * sizeof(NAMED *); + NAMED **newV = table->mem->malloc_fcn(tsize); + if (!newV) + return 0; + memset(newV, 0, tsize); + for (i = 0; i < table->size; i++) + if (table->v[i]) { + size_t j; + for (j = hash(table->v[i]->name) & (newSize - 1); + newV[j]; + j == 0 ? j = newSize - 1 : --j) + ; + newV[j] = table->v[i]; + } + table->mem->free_fcn(table->v); + table->v = newV; + table->size = newSize; + table->usedLim = newSize/2; + for (i = h & (table->size - 1); + table->v[i]; + i == 0 ? i = table->size - 1 : --i) + ; + } + } + table->v[i] = table->mem->malloc_fcn(createSize); + if (!table->v[i]) + return 0; + memset(table->v[i], 0, createSize); + table->v[i]->name = name; + (table->used)++; + return table->v[i]; +} + +static +void hashTableDestroy(HASH_TABLE *table) +{ + size_t i; + for (i = 0; i < table->size; i++) { + NAMED *p = table->v[i]; + if (p) + table->mem->free_fcn(p); + } + if (table->v) + table->mem->free_fcn(table->v); +} + +static +void hashTableInit(HASH_TABLE *p, XML_Memory_Handling_Suite *ms) +{ + p->size = 0; + p->usedLim = 0; + p->used = 0; + p->v = 0; + p->mem = ms; +} + static -void poolInit(STRING_POOL *pool) +void hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) +{ + iter->p = table->v; + iter->end = iter->p + table->size; +} + +static +NAMED *hashTableIterNext(HASH_TABLE_ITER *iter) +{ + while (iter->p != iter->end) { + NAMED *tem = *(iter->p)++; + if (tem) + return tem; + } + return 0; +} + + +static +void poolInit(STRING_POOL *pool, XML_Memory_Handling_Suite *ms) { pool->blocks = 0; pool->freeBlocks = 0; pool->start = 0; pool->ptr = 0; pool->end = 0; + pool->mem = ms; } static @@ -3132,14 +4367,14 @@ void poolDestroy(STRING_POOL *pool) BLOCK *p = pool->blocks; while (p) { BLOCK *tem = p->next; - free(p); + pool->mem->free_fcn(p); p = tem; } pool->blocks = 0; p = pool->freeBlocks; while (p) { BLOCK *tem = p->next; - free(p); + pool->mem->free_fcn(p); p = tem; } pool->freeBlocks = 0; @@ -3190,6 +4425,17 @@ static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int } static +const XML_Char *poolAppendString(STRING_POOL *pool, const XML_Char *s) +{ + while (*s) { + if (!poolAppendChar(pool, *s)) + return 0; + s++; + } + return pool->start; +} /* End poolAppendString */ + +static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end) { @@ -3228,7 +4474,7 @@ int poolGrow(STRING_POOL *pool) } if (pool->blocks && pool->start == pool->blocks->s) { int blockSize = (pool->end - pool->start)*2; - pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize * sizeof(XML_Char)); + pool->blocks = pool->mem->realloc_fcn(pool->blocks, offsetof(BLOCK, s) + blockSize * sizeof(XML_Char)); if (!pool->blocks) return 0; pool->blocks->size = blockSize; @@ -3243,16 +4489,139 @@ int poolGrow(STRING_POOL *pool) blockSize = INIT_BLOCK_SIZE; else blockSize *= 2; - tem = malloc(offsetof(BLOCK, s) + blockSize * sizeof(XML_Char)); + tem = pool->mem->malloc_fcn(offsetof(BLOCK, s) + blockSize * sizeof(XML_Char)); if (!tem) return 0; tem->size = blockSize; tem->next = pool->blocks; pool->blocks = tem; - memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char)); + if (pool->ptr != pool->start) + memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char)); pool->ptr = tem->s + (pool->ptr - pool->start); pool->start = tem->s; pool->end = tem->s + blockSize; } return 1; } + +static int +nextScaffoldPart(XML_Parser parser) +{ + CONTENT_SCAFFOLD * me; + int next; + + if (! dtd.scaffIndex) { + dtd.scaffIndex = MALLOC(groupSize * sizeof(int)); + if (! dtd.scaffIndex) + return -1; + dtd.scaffIndex[0] = 0; + } + + if (dtd.scaffCount >= dtd.scaffSize) { + if (dtd.scaffold) { + dtd.scaffSize *= 2; + dtd.scaffold = (CONTENT_SCAFFOLD *) REALLOC(dtd.scaffold, + dtd.scaffSize * sizeof(CONTENT_SCAFFOLD)); + } + else { + dtd.scaffSize = 32; + dtd.scaffold = (CONTENT_SCAFFOLD *) MALLOC(dtd.scaffSize * sizeof(CONTENT_SCAFFOLD)); + } + if (! dtd.scaffold) + return -1; + } + next = dtd.scaffCount++; + me = &dtd.scaffold[next]; + if (dtd.scaffLevel) { + CONTENT_SCAFFOLD *parent = &dtd.scaffold[dtd.scaffIndex[dtd.scaffLevel - 1]]; + if (parent->lastchild) { + dtd.scaffold[parent->lastchild].nextsib = next; + } + if (! parent->childcnt) + parent->firstchild = next; + parent->lastchild = next; + parent->childcnt++; + } + me->firstchild = me->lastchild = me->childcnt = me->nextsib = 0; + return next; +} /* End nextScaffoldPart */ + +static void +build_node (XML_Parser parser, + int src_node, + XML_Content *dest, + XML_Content **contpos, + char **strpos) +{ + dest->type = dtd.scaffold[src_node].type; + dest->quant = dtd.scaffold[src_node].quant; + if (dest->type == XML_CTYPE_NAME) { + const char *src; + dest->name = *strpos; + src = dtd.scaffold[src_node].name; + for (;;) { + *(*strpos)++ = *src; + if (! *src) + break; + src++; + } + dest->numchildren = 0; + dest->children = 0; + } + else { + unsigned int i; + int cn; + dest->numchildren = dtd.scaffold[src_node].childcnt; + dest->children = *contpos; + *contpos += dest->numchildren; + for (i = 0, cn = dtd.scaffold[src_node].firstchild; + i < dest->numchildren; + i++, cn = dtd.scaffold[cn].nextsib) { + build_node(parser, cn, &(dest->children[i]), contpos, strpos); + } + dest->name = 0; + } +} /* End build_node */ + +static XML_Content * +build_model (XML_Parser parser) +{ + XML_Content *ret; + XML_Content *cpos; + char * str; + int allocsize = dtd.scaffCount * sizeof(XML_Content) + dtd.contentStringLen; + + ret = MALLOC(allocsize); + if (! ret) + return 0; + + str = (char *) (&ret[dtd.scaffCount]); + cpos = &ret[1]; + + build_node(parser, 0, ret, &cpos, &str); + return ret; +} /* End build_model */ + +static ELEMENT_TYPE * +getElementType(XML_Parser parser, + const ENCODING *enc, + const char *ptr, + const char *end) +{ + const XML_Char *name = poolStoreString(&dtd.pool, enc, ptr, end); + ELEMENT_TYPE *ret; + + if (! name) + return 0; + ret = (ELEMENT_TYPE *) lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE)); + if (! ret) + return 0; + if (ret->name != name) + poolDiscard(&dtd.pool); + else { + poolFinish(&dtd.pool); + if (!setElementTypePrefix(parser, ret)) + return 0; + } + return ret; +} /* End getElementType */ diff --git a/ext/xml/expat/xmlparse/Makefile.in b/ext/xml/expat/xmlparse/Makefile.in deleted file mode 100644 index 77df7598d0..0000000000 --- a/ext/xml/expat/xmlparse/Makefile.in +++ /dev/null @@ -1,7 +0,0 @@ - -LTLIBRARY_NAME = libexpat_parse.la -LTLIBRARY_SOURCES = xmlparse.c hashtable.c - -EXTRA_INCLUDES = -I$(srcdir)/../xmltok - -include $(top_srcdir)/build/dynlib.mk diff --git a/ext/xml/expat/xmlparse/expat_hashtable.h b/ext/xml/expat/xmlparse/expat_hashtable.h deleted file mode 100644 index df8ab8a4c8..0000000000 --- a/ext/xml/expat/xmlparse/expat_hashtable.h +++ /dev/null @@ -1,69 +0,0 @@ -/* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. -*/ - - -#include <stddef.h> - -#ifdef XML_UNICODE - -#ifdef XML_UNICODE_WCHAR_T -typedef const wchar_t *KEY; -#else /* not XML_UNICODE_WCHAR_T */ -typedef const unsigned short *KEY; -#endif /* not XML_UNICODE_WCHAR_T */ - -#else /* not XML_UNICODE */ - -typedef const char *KEY; - -#endif /* not XML_UNICODE */ - -typedef struct { - KEY name; -} NAMED; - -typedef struct { - NAMED **v; - size_t size; - size_t used; - size_t usedLim; -} HASH_TABLE; - -NAMED *lookup(HASH_TABLE *table, KEY name, size_t createSize); -void hashTableInit(HASH_TABLE *); -void hashTableDestroy(HASH_TABLE *); - -typedef struct { - NAMED **p; - NAMED **end; -} HASH_TABLE_ITER; - -void hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *); -NAMED *hashTableIterNext(HASH_TABLE_ITER *); diff --git a/ext/xml/expat/xmlparse/hashtable.c b/ext/xml/expat/xmlparse/hashtable.c deleted file mode 100644 index fc99ca7c44..0000000000 --- a/ext/xml/expat/xmlparse/hashtable.c +++ /dev/null @@ -1,153 +0,0 @@ -/* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -csompliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. -*/ - -#include "php_compat.h" - -#include "xmldef.h" - -#ifdef XML_UNICODE_WCHAR_T -#ifndef XML_UNICODE -#define XML_UNICODE -#endif -#endif - -#include "expat_hashtable.h" - -#define INIT_SIZE 64 - -static -int keyeq(KEY s1, KEY s2) -{ - for (; *s1 == *s2; s1++, s2++) - if (*s1 == 0) - return 1; - return 0; -} - -static -unsigned long hash(KEY s) -{ - unsigned long h = 0; - while (*s) - h = (h << 5) + h + (unsigned char)*s++; - return h; -} - -NAMED *lookup(HASH_TABLE *table, KEY name, size_t createSize) -{ - size_t i; - if (table->size == 0) { - if (!createSize) - return 0; - table->v = calloc(INIT_SIZE, sizeof(NAMED *)); - if (!table->v) - return 0; - table->size = INIT_SIZE; - table->usedLim = INIT_SIZE / 2; - i = hash(name) & (table->size - 1); - } - else { - unsigned long h = hash(name); - for (i = h & (table->size - 1); - table->v[i]; - i == 0 ? i = table->size - 1 : --i) { - if (keyeq(name, table->v[i]->name)) - return table->v[i]; - } - if (!createSize) - return 0; - if (table->used == table->usedLim) { - /* check for overflow */ - size_t newSize = table->size * 2; - NAMED **newV = calloc(newSize, sizeof(NAMED *)); - if (!newV) - return 0; - for (i = 0; i < table->size; i++) - if (table->v[i]) { - size_t j; - for (j = hash(table->v[i]->name) & (newSize - 1); - newV[j]; - j == 0 ? j = newSize - 1 : --j) - ; - newV[j] = table->v[i]; - } - free(table->v); - table->v = newV; - table->size = newSize; - table->usedLim = newSize/2; - for (i = h & (table->size - 1); - table->v[i]; - i == 0 ? i = table->size - 1 : --i) - ; - } - } - table->v[i] = calloc(1, createSize); - if (!table->v[i]) - return 0; - table->v[i]->name = name; - (table->used)++; - return table->v[i]; -} - -void hashTableDestroy(HASH_TABLE *table) -{ - size_t i; - for (i = 0; i < table->size; i++) { - NAMED *p = table->v[i]; - if (p) - free(p); - } - free(table->v); -} - -void hashTableInit(HASH_TABLE *p) -{ - p->size = 0; - p->usedLim = 0; - p->used = 0; - p->v = 0; -} - -void hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) -{ - iter->p = table->v; - iter->end = iter->p + table->size; -} - -NAMED *hashTableIterNext(HASH_TABLE_ITER *iter) -{ - while (iter->p != iter->end) { - NAMED *tem = *(iter->p)++; - if (tem) - return tem; - } - return 0; -} - diff --git a/ext/xml/expat/xmlparse/xmlparse.dsp b/ext/xml/expat/xmlparse/xmlparse.dsp deleted file mode 100644 index 1c019dae4b..0000000000 --- a/ext/xml/expat/xmlparse/xmlparse.dsp +++ /dev/null @@ -1,202 +0,0 @@ -# Microsoft Developer Studio Project File - Name="xmlparse" - Package Owner=<4>
-# Microsoft Developer Studio Generated Build File, Format Version 6.00
-# ** DO NOT EDIT **
-
-# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102
-
-CFG=xmlparse - Win32 Release
-!MESSAGE This is not a valid makefile. To build this project using NMAKE,
-!MESSAGE use the Export Makefile command and run
-!MESSAGE
-!MESSAGE NMAKE /f "xmlparse.mak".
-!MESSAGE
-!MESSAGE You can specify a configuration when running NMAKE
-!MESSAGE by defining the macro CFG on the command line. For example:
-!MESSAGE
-!MESSAGE NMAKE /f "xmlparse.mak" CFG="xmlparse - Win32 Release"
-!MESSAGE
-!MESSAGE Possible choices for configuration are:
-!MESSAGE
-!MESSAGE "xmlparse - Win32 Release" (based on "Win32 (x86) Dynamic-Link Library")
-!MESSAGE "xmlparse - Win32 Debug" (based on "Win32 (x86) Dynamic-Link Library")
-!MESSAGE "xmlparse - Win32 MinSize" (based on "Win32 (x86) Dynamic-Link Library")
-!MESSAGE
-
-# Begin Project
-# PROP AllowPerConfigDependencies 0
-# PROP Scc_ProjName ""
-# PROP Scc_LocalPath ""
-CPP=cl.exe
-MTL=midl.exe
-RSC=rc.exe
-
-!IF "$(CFG)" == "xmlparse - Win32 Release"
-
-# PROP BASE Use_MFC 0
-# PROP BASE Use_Debug_Libraries 0
-# PROP BASE Output_Dir ".\Release"
-# PROP BASE Intermediate_Dir ".\Release"
-# PROP BASE Target_Dir "."
-# PROP Use_MFC 0
-# PROP Use_Debug_Libraries 0
-# PROP Output_Dir ".\Release"
-# PROP Intermediate_Dir ".\Release"
-# PROP Ignore_Export_Lib 0
-# PROP Target_Dir "."
-# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /c
-# ADD CPP /nologo /W3 /GX /O2 /I "..\xmltok" /I "..\xmlwf" /D XMLTOKAPI=__declspec(dllimport) /D XMLPARSEAPI=__declspec(dllexport) /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "XML_NS" /YX /FD /c
-# ADD BASE MTL /nologo /D "NDEBUG" /win32
-# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32
-# ADD BASE RSC /l 0x809 /d "NDEBUG"
-# ADD RSC /l 0x809 /d "NDEBUG"
-BSC32=bscmake.exe
-# ADD BASE BSC32 /nologo
-# ADD BSC32 /nologo
-LINK32=link.exe
-# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /machine:I386
-# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /base:"0x20000000" /subsystem:windows /dll /machine:I386 /out:"..\bin\xmlparse.dll" /link50compat
-# SUBTRACT LINK32 /pdb:none
-
-!ELSEIF "$(CFG)" == "xmlparse - Win32 Debug"
-
-# PROP BASE Use_MFC 0
-# PROP BASE Use_Debug_Libraries 1
-# PROP BASE Output_Dir ".\Debug"
-# PROP BASE Intermediate_Dir ".\Debug"
-# PROP BASE Target_Dir "."
-# PROP Use_MFC 0
-# PROP Use_Debug_Libraries 1
-# PROP Output_Dir ".\Debug"
-# PROP Intermediate_Dir ".\Debug"
-# PROP Ignore_Export_Lib 0
-# PROP Target_Dir "."
-# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /c
-# ADD CPP /nologo /MDd /W3 /Gm /GX /ZI /Od /I "..\xmltok" /I "..\xmlwf" /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D XMLTOKAPI=__declspec(dllimport) /D XMLPARSEAPI=__declspec(dllexport) /YX /FD /c
-# ADD BASE MTL /nologo /D "_DEBUG" /win32
-# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32
-# ADD BASE RSC /l 0x809 /d "_DEBUG"
-# ADD RSC /l 0x809 /d "_DEBUG"
-BSC32=bscmake.exe
-# ADD BASE BSC32 /nologo
-# ADD BSC32 /nologo
-LINK32=link.exe
-# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386
-# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /base:"0x20000000" /subsystem:windows /dll /debug /machine:I386 /out:"..\dbgbin\xmlparse.dll"
-
-!ELSEIF "$(CFG)" == "xmlparse - Win32 MinSize"
-
-# PROP BASE Use_MFC 0
-# PROP BASE Use_Debug_Libraries 0
-# PROP BASE Output_Dir "MinSize"
-# PROP BASE Intermediate_Dir "MinSize"
-# PROP BASE Ignore_Export_Lib 0
-# PROP BASE Target_Dir ""
-# PROP Use_MFC 0
-# PROP Use_Debug_Libraries 0
-# PROP Output_Dir "MinSize"
-# PROP Intermediate_Dir "MinSize"
-# PROP Ignore_Export_Lib 0
-# PROP Target_Dir ""
-# ADD BASE CPP /nologo /MD /W3 /GX /O2 /I "..\xmltok" /I "..\xmlwf" /D XMLTOKAPI=__declspec(dllimport) /D XMLPARSEAPI=__declspec(dllexport) /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "XML_NS" /YX /FD /c
-# ADD CPP /nologo /W3 /GX /O1 /I "..\xmltok" /I "..\xmlwf" /D "XML_MIN_SIZE" /D "XML_WINLIB" /D XMLPARSEAPI=__declspec(dllexport) /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /YX /FD /c
-# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32
-# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32
-# ADD BASE RSC /l 0x809 /d "NDEBUG"
-# ADD RSC /l 0x809 /d "NDEBUG"
-BSC32=bscmake.exe
-# ADD BASE BSC32 /nologo
-# ADD BSC32 /nologo
-LINK32=link.exe
-# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /base:"0x20000000" /subsystem:windows /dll /machine:I386 /out:"..\bin\xmlparse.dll"
-# SUBTRACT BASE LINK32 /profile
-# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /base:"0x20000000" /entry:"DllMain" /subsystem:windows /dll /machine:I386 /out:"..\bin\xmlparse.dll"
-# SUBTRACT LINK32 /profile /nodefaultlib
-
-!ENDIF
-
-# Begin Target
-
-# Name "xmlparse - Win32 Release"
-# Name "xmlparse - Win32 Debug"
-# Name "xmlparse - Win32 MinSize"
-# Begin Group "Source Files"
-
-# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat;for;f90"
-# Begin Source File
-
-SOURCE=..\xmltok\dllmain.c
-
-!IF "$(CFG)" == "xmlparse - Win32 Release"
-
-# PROP Exclude_From_Build 1
-
-!ELSEIF "$(CFG)" == "xmlparse - Win32 Debug"
-
-# PROP Exclude_From_Build 1
-
-!ELSEIF "$(CFG)" == "xmlparse - Win32 MinSize"
-
-!ENDIF
-
-# End Source File
-# Begin Source File
-
-SOURCE=.\hashtable.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\xmlparse.c
-# End Source File
-# Begin Source File
-
-SOURCE=..\xmltok\xmlrole.c
-
-!IF "$(CFG)" == "xmlparse - Win32 Release"
-
-# PROP Exclude_From_Build 1
-
-!ELSEIF "$(CFG)" == "xmlparse - Win32 Debug"
-
-# PROP Exclude_From_Build 1
-
-!ELSEIF "$(CFG)" == "xmlparse - Win32 MinSize"
-
-!ENDIF
-
-# End Source File
-# Begin Source File
-
-SOURCE=..\xmltok\xmltok.c
-
-!IF "$(CFG)" == "xmlparse - Win32 Release"
-
-# PROP Exclude_From_Build 1
-
-!ELSEIF "$(CFG)" == "xmlparse - Win32 Debug"
-
-# PROP Exclude_From_Build 1
-
-!ELSEIF "$(CFG)" == "xmlparse - Win32 MinSize"
-
-!ENDIF
-
-# End Source File
-# End Group
-# Begin Group "Header Files"
-
-# PROP Default_Filter "h;hpp;hxx;hm;inl;fi;fd"
-# Begin Source File
-
-SOURCE=.\hashtable.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\xmlparse.h
-# End Source File
-# End Group
-# Begin Group "Resource Files"
-
-# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;cnt;rtf;gif;jpg;jpeg;jpe"
-# End Group
-# End Target
-# End Project
diff --git a/ext/xml/expat/xmlparse/xmlparse.h b/ext/xml/expat/xmlparse/xmlparse.h deleted file mode 100644 index f2f9c9be1c..0000000000 --- a/ext/xml/expat/xmlparse/xmlparse.h +++ /dev/null @@ -1,482 +0,0 @@ -/* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. -*/ - -#ifndef XmlParse_INCLUDED -#define XmlParse_INCLUDED 1 - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef XMLPARSEAPI -#define XMLPARSEAPI /* as nothing */ -#endif - -typedef void *XML_Parser; - -#ifdef XML_UNICODE_WCHAR_T - -/* XML_UNICODE_WCHAR_T will work only if sizeof(wchar_t) == 2 and wchar_t -uses Unicode. */ -/* Information is UTF-16 encoded as wchar_ts */ - -#ifndef XML_UNICODE -#define XML_UNICODE -#endif - -#include <stddef.h> -typedef wchar_t XML_Char; -typedef wchar_t XML_LChar; - -#else /* not XML_UNICODE_WCHAR_T */ - -#ifdef XML_UNICODE - -/* Information is UTF-16 encoded as unsigned shorts */ -typedef unsigned short XML_Char; -typedef char XML_LChar; - -#else /* not XML_UNICODE */ - -/* Information is UTF-8 encoded. */ -typedef char XML_Char; -typedef char XML_LChar; - -#endif /* not XML_UNICODE */ - -#endif /* not XML_UNICODE_WCHAR_T */ - - -/* Constructs a new parser; encoding is the encoding specified by the external -protocol or null if there is none specified. */ - -XML_Parser XMLPARSEAPI -XML_ParserCreate(const XML_Char *encoding); - -/* Constructs a new parser and namespace processor. Element type names -and attribute names that belong to a namespace will be expanded; -unprefixed attribute names are never expanded; unprefixed element type -names are expanded only if there is a default namespace. The expanded -name is the concatenation of the namespace URI, the namespace separator character, -and the local part of the name. If the namespace separator is '\0' then -the namespace URI and the local part will be concatenated without any -separator. When a namespace is not declared, the name and prefix will be -passed through without expansion. */ - -XML_Parser XMLPARSEAPI -XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); - - -/* atts is array of name/value pairs, terminated by 0; - names and values are 0 terminated. */ - -typedef void (*XML_StartElementHandler)(void *userData, - const XML_Char *name, - const XML_Char **atts); - -typedef void (*XML_EndElementHandler)(void *userData, - const XML_Char *name); - -/* s is not 0 terminated. */ -typedef void (*XML_CharacterDataHandler)(void *userData, - const XML_Char *s, - int len); - -/* target and data are 0 terminated */ -typedef void (*XML_ProcessingInstructionHandler)(void *userData, - const XML_Char *target, - const XML_Char *data); - -/* data is 0 terminated */ -typedef void (*XML_CommentHandler)(void *userData, const XML_Char *data); - -typedef void (*XML_StartCdataSectionHandler)(void *userData); -typedef void (*XML_EndCdataSectionHandler)(void *userData); - -/* This is called for any characters in the XML document for -which there is no applicable handler. This includes both -characters that are part of markup which is of a kind that is -not reported (comments, markup declarations), or characters -that are part of a construct which could be reported but -for which no handler has been supplied. The characters are passed -exactly as they were in the XML document except that -they will be encoded in UTF-8. Line boundaries are not normalized. -Note that a byte order mark character is not passed to the default handler. -There are no guarantees about how characters are divided between calls -to the default handler: for example, a comment might be split between -multiple calls. */ - -typedef void (*XML_DefaultHandler)(void *userData, - const XML_Char *s, - int len); - -/* This is called for a declaration of an unparsed (NDATA) -entity. The base argument is whatever was set by XML_SetBase. -The entityName, systemId and notationName arguments will never be null. -The other arguments may be. */ - -typedef void (*XML_UnparsedEntityDeclHandler)(void *userData, - const XML_Char *entityName, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId, - const XML_Char *notationName); - -/* This is called for a declaration of notation. -The base argument is whatever was set by XML_SetBase. -The notationName will never be null. The other arguments can be. */ - -typedef void (*XML_NotationDeclHandler)(void *userData, - const XML_Char *notationName, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId); - -/* When namespace processing is enabled, these are called once for -each namespace declaration. The call to the start and end element -handlers occur between the calls to the start and end namespace -declaration handlers. For an xmlns attribute, prefix will be null. -For an xmlns="" attribute, uri will be null. */ - -typedef void (*XML_StartNamespaceDeclHandler)(void *userData, - const XML_Char *prefix, - const XML_Char *uri); - -typedef void (*XML_EndNamespaceDeclHandler)(void *userData, - const XML_Char *prefix); - -/* This is called if the document is not standalone (it has an -external subset or a reference to a parameter entity, but does not -have standalone="yes"). If this handler returns 0, then processing -will not continue, and the parser will return a -XML_ERROR_NOT_STANDALONE error. */ - -typedef int (*XML_NotStandaloneHandler)(void *userData); - -/* This is called for a reference to an external parsed general entity. -The referenced entity is not automatically parsed. -The application can parse it immediately or later using -XML_ExternalEntityParserCreate. -The parser argument is the parser parsing the entity containing the reference; -it can be passed as the parser argument to XML_ExternalEntityParserCreate. -The systemId argument is the system identifier as specified in the entity declaration; -it will not be null. -The base argument is the system identifier that should be used as the base for -resolving systemId if systemId was relative; this is set by XML_SetBase; -it may be null. -The publicId argument is the public identifier as specified in the entity declaration, -or null if none was specified; the whitespace in the public identifier -will have been normalized as required by the XML spec. -The context argument specifies the parsing context in the format -expected by the context argument to -XML_ExternalEntityParserCreate; context is valid only until the handler -returns, so if the referenced entity is to be parsed later, it must be copied. -The handler should return 0 if processing should not continue because of -a fatal error in the handling of the external entity. -In this case the calling parser will return an XML_ERROR_EXTERNAL_ENTITY_HANDLING -error. -Note that unlike other handlers the first argument is the parser, not userData. */ - -typedef int (*XML_ExternalEntityRefHandler)(XML_Parser parser, - const XML_Char *context, - const XML_Char *base, - const XML_Char *systemId, - const XML_Char *publicId); - -/* This structure is filled in by the XML_UnknownEncodingHandler -to provide information to the parser about encodings that are unknown -to the parser. -The map[b] member gives information about byte sequences -whose first byte is b. -If map[b] is c where c is >= 0, then b by itself encodes the Unicode scalar value c. -If map[b] is -1, then the byte sequence is malformed. -If map[b] is -n, where n >= 2, then b is the first byte of an n-byte -sequence that encodes a single Unicode scalar value. -The data member will be passed as the first argument to the convert function. -The convert function is used to convert multibyte sequences; -s will point to a n-byte sequence where map[(unsigned char)*s] == -n. -The convert function must return the Unicode scalar value -represented by this byte sequence or -1 if the byte sequence is malformed. -The convert function may be null if the encoding is a single-byte encoding, -that is if map[b] >= -1 for all bytes b. -When the parser is finished with the encoding, then if release is not null, -it will call release passing it the data member; -once release has been called, the convert function will not be called again. - -Expat places certain restrictions on the encodings that are supported -using this mechanism. - -1. Every ASCII character that can appear in a well-formed XML document, -other than the characters - - $@\^`{}~ - -must be represented by a single byte, and that byte must be the -same byte that represents that character in ASCII. - -2. No character may require more than 4 bytes to encode. - -3. All characters encoded must have Unicode scalar values <= 0xFFFF, -(ie characters that would be encoded by surrogates in UTF-16 -are not allowed). Note that this restriction doesn't apply to -the built-in support for UTF-8 and UTF-16. - -4. No Unicode character may be encoded by more than one distinct sequence -of bytes. */ - -typedef struct { - int map[256]; - void *data; - int (*convert)(void *data, const char *s); - void (*release)(void *data); -} XML_Encoding; - -/* This is called for an encoding that is unknown to the parser. -The encodingHandlerData argument is that which was passed as the -second argument to XML_SetUnknownEncodingHandler. -The name argument gives the name of the encoding as specified in -the encoding declaration. -If the callback can provide information about the encoding, -it must fill in the XML_Encoding structure, and return 1. -Otherwise it must return 0. -If info does not describe a suitable encoding, -then the parser will return an XML_UNKNOWN_ENCODING error. */ - -typedef int (*XML_UnknownEncodingHandler)(void *encodingHandlerData, - const XML_Char *name, - XML_Encoding *info); - -void XMLPARSEAPI -XML_SetElementHandler(XML_Parser parser, - XML_StartElementHandler start, - XML_EndElementHandler end); - -void XMLPARSEAPI -XML_SetCharacterDataHandler(XML_Parser parser, - XML_CharacterDataHandler handler); - -void XMLPARSEAPI -XML_SetProcessingInstructionHandler(XML_Parser parser, - XML_ProcessingInstructionHandler handler); -void XMLPARSEAPI -XML_SetCommentHandler(XML_Parser parser, - XML_CommentHandler handler); - -void XMLPARSEAPI -XML_SetCdataSectionHandler(XML_Parser parser, - XML_StartCdataSectionHandler start, - XML_EndCdataSectionHandler end); - -/* This sets the default handler and also inhibits expansion of internal entities. -The entity reference will be passed to the default handler. */ - -void XMLPARSEAPI -XML_SetDefaultHandler(XML_Parser parser, - XML_DefaultHandler handler); - -/* This sets the default handler but does not inhibit expansion of internal entities. -The entity reference will not be passed to the default handler. */ - -void XMLPARSEAPI -XML_SetDefaultHandlerExpand(XML_Parser parser, - XML_DefaultHandler handler); - -void XMLPARSEAPI -XML_SetUnparsedEntityDeclHandler(XML_Parser parser, - XML_UnparsedEntityDeclHandler handler); - -void XMLPARSEAPI -XML_SetNotationDeclHandler(XML_Parser parser, - XML_NotationDeclHandler handler); - -void XMLPARSEAPI -XML_SetNamespaceDeclHandler(XML_Parser parser, - XML_StartNamespaceDeclHandler start, - XML_EndNamespaceDeclHandler end); - -void XMLPARSEAPI -XML_SetNotStandaloneHandler(XML_Parser parser, - XML_NotStandaloneHandler handler); - -void XMLPARSEAPI -XML_SetExternalEntityRefHandler(XML_Parser parser, - XML_ExternalEntityRefHandler handler); - -/* If a non-null value for arg is specified here, then it will be passed -as the first argument to the external entity ref handler instead -of the parser object. */ -void XMLPARSEAPI -XML_SetExternalEntityRefHandlerArg(XML_Parser, void *arg); - -void XMLPARSEAPI -XML_SetUnknownEncodingHandler(XML_Parser parser, - XML_UnknownEncodingHandler handler, - void *encodingHandlerData); - -/* This can be called within a handler for a start element, end element, -processing instruction or character data. It causes the corresponding -markup to be passed to the default handler. */ -void XMLPARSEAPI XML_DefaultCurrent(XML_Parser parser); - -/* This value is passed as the userData argument to callbacks. */ -void XMLPARSEAPI -XML_SetUserData(XML_Parser parser, void *userData); - -/* Returns the last value set by XML_SetUserData or null. */ -#define XML_GetUserData(parser) (*(void **)(parser)) - -/* This is equivalent to supplying an encoding argument -to XML_CreateParser. It must not be called after XML_Parse -or XML_ParseBuffer. */ - -int XMLPARSEAPI -XML_SetEncoding(XML_Parser parser, const XML_Char *encoding); - -/* If this function is called, then the parser will be passed -as the first argument to callbacks instead of userData. -The userData will still be accessible using XML_GetUserData. */ - -void XMLPARSEAPI -XML_UseParserAsHandlerArg(XML_Parser parser); - -/* Sets the base to be used for resolving relative URIs in system identifiers in -declarations. Resolving relative identifiers is left to the application: -this value will be passed through as the base argument to the -XML_ExternalEntityRefHandler, XML_NotationDeclHandler -and XML_UnparsedEntityDeclHandler. The base argument will be copied. -Returns zero if out of memory, non-zero otherwise. */ - -int XMLPARSEAPI -XML_SetBase(XML_Parser parser, const XML_Char *base); - -const XML_Char XMLPARSEAPI * -XML_GetBase(XML_Parser parser); - -/* Returns the number of the attributes passed in last call to the -XML_StartElementHandler that were specified in the start-tag rather -than defaulted. */ - -int XMLPARSEAPI XML_GetSpecifiedAttributeCount(XML_Parser parser); - -/* Parses some input. Returns 0 if a fatal error is detected. -The last call to XML_Parse must have isFinal true; -len may be zero for this call (or any other). */ -int XMLPARSEAPI -XML_Parse(XML_Parser parser, const char *s, int len, int isFinal); - -void XMLPARSEAPI * -XML_GetBuffer(XML_Parser parser, int len); - -int XMLPARSEAPI -XML_ParseBuffer(XML_Parser parser, int len, int isFinal); - -/* Creates an XML_Parser object that can parse an external general entity; -context is a '\0'-terminated string specifying the parse context; -encoding is a '\0'-terminated string giving the name of the externally specified encoding, -or null if there is no externally specified encoding. -The context string consists of a sequence of tokens separated by formfeeds (\f); -a token consisting of a name specifies that the general entity of the name -is open; a token of the form prefix=uri specifies the namespace for a particular -prefix; a token of the form =uri specifies the default namespace. -This can be called at any point after the first call to an ExternalEntityRefHandler -so longer as the parser has not yet been freed. -The new parser is completely independent and may safely be used in a separate thread. -The handlers and userData are initialized from the parser argument. -Returns 0 if out of memory. Otherwise returns a new XML_Parser object. */ -XML_Parser XMLPARSEAPI -XML_ExternalEntityParserCreate(XML_Parser parser, - const XML_Char *context, - const XML_Char *encoding); - -enum XML_Error { - XML_ERROR_NONE, - XML_ERROR_NO_MEMORY, - XML_ERROR_SYNTAX, - XML_ERROR_NO_ELEMENTS, - XML_ERROR_INVALID_TOKEN, - XML_ERROR_UNCLOSED_TOKEN, - XML_ERROR_PARTIAL_CHAR, - XML_ERROR_TAG_MISMATCH, - XML_ERROR_DUPLICATE_ATTRIBUTE, - XML_ERROR_JUNK_AFTER_DOC_ELEMENT, - XML_ERROR_PARAM_ENTITY_REF, - XML_ERROR_UNDEFINED_ENTITY, - XML_ERROR_RECURSIVE_ENTITY_REF, - XML_ERROR_ASYNC_ENTITY, - XML_ERROR_BAD_CHAR_REF, - XML_ERROR_BINARY_ENTITY_REF, - XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, - XML_ERROR_MISPLACED_XML_PI, - XML_ERROR_UNKNOWN_ENCODING, - XML_ERROR_INCORRECT_ENCODING, - XML_ERROR_UNCLOSED_CDATA_SECTION, - XML_ERROR_EXTERNAL_ENTITY_HANDLING, - XML_ERROR_NOT_STANDALONE -}; - -/* If XML_Parse or XML_ParseBuffer have returned 0, then XML_GetErrorCode -returns information about the error. */ - -enum XML_Error XMLPARSEAPI XML_GetErrorCode(XML_Parser parser); - -/* These functions return information about the current parse location. -They may be called when XML_Parse or XML_ParseBuffer return 0; -in this case the location is the location of the character at which -the error was detected. -They may also be called from any other callback called to report -some parse event; in this the location is the location of the first -of the sequence of characters that generated the event. */ - -int XMLPARSEAPI XML_GetCurrentLineNumber(XML_Parser parser); -int XMLPARSEAPI XML_GetCurrentColumnNumber(XML_Parser parser); -long XMLPARSEAPI XML_GetCurrentByteIndex(XML_Parser parser); - -/* Return the number of bytes in the current event. -Returns 0 if the event is in an internal entity. */ - -int XMLPARSEAPI XML_GetCurrentByteCount(XML_Parser parser); - -/* For backwards compatibility with previous versions. */ -#define XML_GetErrorLineNumber XML_GetCurrentLineNumber -#define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber -#define XML_GetErrorByteIndex XML_GetCurrentByteIndex - -/* Frees memory used by the parser. */ -void XMLPARSEAPI -XML_ParserFree(XML_Parser parser); - -/* Returns a string describing the error. */ -const XML_LChar XMLPARSEAPI *XML_ErrorString(int code); - -#ifdef __cplusplus -} -#endif - -#endif /* not XmlParse_INCLUDED */ diff --git a/ext/xml/expat/xmltok/xmlrole.c b/ext/xml/expat/xmlrole.c index 1a49e5342a..51046b72d6 100644 --- a/ext/xml/expat/xmltok/xmlrole.c +++ b/ext/xml/expat/xmlrole.c @@ -1,37 +1,21 @@ /* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. */ +static char RCSId[] + = "$Header$"; + #include "php_compat.h" -#include "xmldef.h" +#ifdef COMPILED_FROM_DSP +# include "winconfig.h" +#else +# include <php_config.h> +#endif /* ndef COMPILED_FROM_DSP */ + #include "xmlrole.h" +#include "ascii.h" /* Doesn't check: @@ -40,11 +24,44 @@ your version of this file under either the MPL or the GPL. */ +static const char KW_ANY[] = { ASCII_A, ASCII_N, ASCII_Y, '\0' }; +static const char KW_ATTLIST[] = { ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' }; +static const char KW_CDATA[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; +static const char KW_DOCTYPE[] = { ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' }; +static const char KW_ELEMENT[] = { ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' }; +static const char KW_EMPTY[] = { ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' }; +static const char KW_ENTITIES[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' }; +static const char KW_ENTITY[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' }; +static const char KW_FIXED[] = { ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' }; +static const char KW_ID[] = { ASCII_I, ASCII_D, '\0' }; +static const char KW_IDREF[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' }; +static const char KW_IDREFS[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' }; +static const char KW_IGNORE[] = { ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' }; +static const char KW_IMPLIED[] = { ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' }; +static const char KW_INCLUDE[] = { ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' }; +static const char KW_NDATA[] = { ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; +static const char KW_NMTOKEN[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' }; +static const char KW_NMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' }; +static const char KW_NOTATION[] = { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, '\0' }; +static const char KW_PCDATA[] = { ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; +static const char KW_PUBLIC[] = { ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' }; +static const char KW_REQUIRED[] = { ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, '\0' }; +static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' }; + #ifndef MIN_BYTES_PER_CHAR #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar) #endif -typedef int PROLOG_HANDLER(struct prolog_state *state, +#ifdef XML_DTD +#define setTopLevel(state) \ + ((state)->handler = ((state)->documentEntity \ + ? internalSubset \ + : externalSubset1)) +#else /* not XML_DTD */ +#define setTopLevel(state) ((state)->handler = internalSubset) +#endif /* not XML_DTD */ + +typedef int PROLOG_HANDLER(PROLOG_STATE *state, int tok, const char *ptr, const char *end, @@ -61,11 +78,15 @@ static PROLOG_HANDLER attlist7, attlist8, attlist9, element0, element1, element2, element3, element4, element5, element6, element7, +#ifdef XML_DTD + externalSubset0, externalSubset1, + condSect0, condSect1, condSect2, +#endif /* XML_DTD */ declClose, error; static -int syntaxError(PROLOG_STATE *); +int common(PROLOG_STATE *state, int tok); static int prolog0(PROLOG_STATE *state, @@ -91,7 +112,8 @@ int prolog0(PROLOG_STATE *state, case XML_TOK_DECL_OPEN: if (!XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), - "DOCTYPE")) + end, + KW_DOCTYPE)) break; state->handler = doctype0; return XML_ROLE_NONE; @@ -99,7 +121,7 @@ int prolog0(PROLOG_STATE *state, state->handler = error; return XML_ROLE_INSTANCE_START; } - return syntaxError(state); + return common(state, tok); } static @@ -119,7 +141,8 @@ int prolog1(PROLOG_STATE *state, case XML_TOK_DECL_OPEN: if (!XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), - "DOCTYPE")) + end, + KW_DOCTYPE)) break; state->handler = doctype0; return XML_ROLE_NONE; @@ -127,7 +150,7 @@ int prolog1(PROLOG_STATE *state, state->handler = error; return XML_ROLE_INSTANCE_START; } - return syntaxError(state); + return common(state, tok); } static @@ -147,7 +170,7 @@ int prolog2(PROLOG_STATE *state, state->handler = error; return XML_ROLE_INSTANCE_START; } - return syntaxError(state); + return common(state, tok); } static @@ -165,7 +188,7 @@ int doctype0(PROLOG_STATE *state, state->handler = doctype1; return XML_ROLE_DOCTYPE_NAME; } - return syntaxError(state); + return common(state, tok); } static @@ -180,22 +203,22 @@ int doctype1(PROLOG_STATE *state, return XML_ROLE_NONE; case XML_TOK_OPEN_BRACKET: state->handler = internalSubset; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; case XML_TOK_DECL_CLOSE: state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = doctype3; return XML_ROLE_NONE; } - if (XmlNameMatchesAscii(enc, ptr, "PUBLIC")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = doctype2; return XML_ROLE_NONE; } break; } - return syntaxError(state); + return common(state, tok); } static @@ -212,7 +235,7 @@ int doctype2(PROLOG_STATE *state, state->handler = doctype3; return XML_ROLE_DOCTYPE_PUBLIC_ID; } - return syntaxError(state); + return common(state, tok); } static @@ -229,7 +252,7 @@ int doctype3(PROLOG_STATE *state, state->handler = doctype4; return XML_ROLE_DOCTYPE_SYSTEM_ID; } - return syntaxError(state); + return common(state, tok); } static @@ -244,12 +267,12 @@ int doctype4(PROLOG_STATE *state, return XML_ROLE_NONE; case XML_TOK_OPEN_BRACKET: state->handler = internalSubset; - return XML_ROLE_NONE; + return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; case XML_TOK_DECL_CLOSE: state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; } - return syntaxError(state); + return common(state, tok); } static @@ -266,7 +289,7 @@ int doctype5(PROLOG_STATE *state, state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; } - return syntaxError(state); + return common(state, tok); } static @@ -282,25 +305,29 @@ int internalSubset(PROLOG_STATE *state, case XML_TOK_DECL_OPEN: if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), - "ENTITY")) { + end, + KW_ENTITY)) { state->handler = entity0; return XML_ROLE_NONE; } if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), - "ATTLIST")) { + end, + KW_ATTLIST)) { state->handler = attlist0; return XML_ROLE_NONE; } if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), - "ELEMENT")) { + end, + KW_ELEMENT)) { state->handler = element0; return XML_ROLE_NONE; } if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), - "NOTATION")) { + end, + KW_NOTATION)) { state->handler = notation0; return XML_ROLE_NONE; } @@ -314,9 +341,56 @@ int internalSubset(PROLOG_STATE *state, state->handler = doctype5; return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); +} + +#ifdef XML_DTD + +static +int externalSubset0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + state->handler = externalSubset1; + if (tok == XML_TOK_XML_DECL) + return XML_ROLE_TEXT_DECL; + return externalSubset1(state, tok, ptr, end, enc); +} + +static +int externalSubset1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + switch (tok) { + case XML_TOK_COND_SECT_OPEN: + state->handler = condSect0; + return XML_ROLE_NONE; + case XML_TOK_COND_SECT_CLOSE: + if (state->includeLevel == 0) + break; + state->includeLevel -= 1; + return XML_ROLE_NONE; + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; + case XML_TOK_CLOSE_BRACKET: + break; + case XML_TOK_NONE: + if (state->includeLevel) + break; + return XML_ROLE_NONE; + default: + return internalSubset(state, tok, ptr, end, enc); + } + return common(state, tok); } +#endif /* XML_DTD */ + static int entity0(PROLOG_STATE *state, int tok, @@ -334,7 +408,7 @@ int entity0(PROLOG_STATE *state, state->handler = entity2; return XML_ROLE_GENERAL_ENTITY_NAME; } - return syntaxError(state); + return common(state, tok); } static @@ -351,7 +425,7 @@ int entity1(PROLOG_STATE *state, state->handler = entity7; return XML_ROLE_PARAM_ENTITY_NAME; } - return syntaxError(state); + return common(state, tok); } static @@ -365,11 +439,11 @@ int entity2(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = entity4; return XML_ROLE_NONE; } - if (XmlNameMatchesAscii(enc, ptr, "PUBLIC")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = entity3; return XML_ROLE_NONE; } @@ -378,7 +452,7 @@ int entity2(PROLOG_STATE *state, state->handler = declClose; return XML_ROLE_ENTITY_VALUE; } - return syntaxError(state); + return common(state, tok); } static @@ -395,7 +469,7 @@ int entity3(PROLOG_STATE *state, state->handler = entity4; return XML_ROLE_ENTITY_PUBLIC_ID; } - return syntaxError(state); + return common(state, tok); } @@ -413,7 +487,7 @@ int entity4(PROLOG_STATE *state, state->handler = entity5; return XML_ROLE_ENTITY_SYSTEM_ID; } - return syntaxError(state); + return common(state, tok); } static @@ -427,16 +501,16 @@ int entity5(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: - state->handler = internalSubset; - return XML_ROLE_NONE; + setTopLevel(state); + return XML_ROLE_ENTITY_COMPLETE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, "NDATA")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) { state->handler = entity6; return XML_ROLE_NONE; } break; } - return syntaxError(state); + return common(state, tok); } static @@ -453,7 +527,7 @@ int entity6(PROLOG_STATE *state, state->handler = declClose; return XML_ROLE_ENTITY_NOTATION_NAME; } - return syntaxError(state); + return common(state, tok); } static @@ -467,11 +541,11 @@ int entity7(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = entity9; return XML_ROLE_NONE; } - if (XmlNameMatchesAscii(enc, ptr, "PUBLIC")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = entity8; return XML_ROLE_NONE; } @@ -480,7 +554,7 @@ int entity7(PROLOG_STATE *state, state->handler = declClose; return XML_ROLE_ENTITY_VALUE; } - return syntaxError(state); + return common(state, tok); } static @@ -497,7 +571,7 @@ int entity8(PROLOG_STATE *state, state->handler = entity9; return XML_ROLE_ENTITY_PUBLIC_ID; } - return syntaxError(state); + return common(state, tok); } static @@ -514,7 +588,7 @@ int entity9(PROLOG_STATE *state, state->handler = declClose; return XML_ROLE_ENTITY_SYSTEM_ID; } - return syntaxError(state); + return common(state, tok); } static @@ -531,7 +605,7 @@ int notation0(PROLOG_STATE *state, state->handler = notation1; return XML_ROLE_NOTATION_NAME; } - return syntaxError(state); + return common(state, tok); } static @@ -545,17 +619,17 @@ int notation1(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = notation3; return XML_ROLE_NONE; } - if (XmlNameMatchesAscii(enc, ptr, "PUBLIC")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = notation2; return XML_ROLE_NONE; } break; } - return syntaxError(state); + return common(state, tok); } static @@ -572,7 +646,7 @@ int notation2(PROLOG_STATE *state, state->handler = notation4; return XML_ROLE_NOTATION_PUBLIC_ID; } - return syntaxError(state); + return common(state, tok); } static @@ -589,7 +663,7 @@ int notation3(PROLOG_STATE *state, state->handler = declClose; return XML_ROLE_NOTATION_SYSTEM_ID; } - return syntaxError(state); + return common(state, tok); } static @@ -606,10 +680,10 @@ int notation4(PROLOG_STATE *state, state->handler = declClose; return XML_ROLE_NOTATION_SYSTEM_ID; case XML_TOK_DECL_CLOSE: - state->handler = internalSubset; + setTopLevel(state); return XML_ROLE_NOTATION_NO_SYSTEM_ID; } - return syntaxError(state); + return common(state, tok); } static @@ -627,7 +701,7 @@ int attlist0(PROLOG_STATE *state, state->handler = attlist1; return XML_ROLE_ATTLIST_ELEMENT_NAME; } - return syntaxError(state); + return common(state, tok); } static @@ -641,14 +715,14 @@ int attlist1(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: - state->handler = internalSubset; + setTopLevel(state); return XML_ROLE_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = attlist2; return XML_ROLE_ATTRIBUTE_NAME; } - return syntaxError(state); + return common(state, tok); } static @@ -664,23 +738,23 @@ int attlist2(PROLOG_STATE *state, case XML_TOK_NAME: { static const char *types[] = { - "CDATA", - "ID", - "IDREF", - "IDREFS", - "ENTITY", - "ENTITIES", - "NMTOKEN", - "NMTOKENS", + KW_CDATA, + KW_ID, + KW_IDREF, + KW_IDREFS, + KW_ENTITY, + KW_ENTITIES, + KW_NMTOKEN, + KW_NMTOKENS, }; int i; for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++) - if (XmlNameMatchesAscii(enc, ptr, types[i])) { + if (XmlNameMatchesAscii(enc, ptr, end, types[i])) { state->handler = attlist8; return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i; } } - if (XmlNameMatchesAscii(enc, ptr, "NOTATION")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) { state->handler = attlist5; return XML_ROLE_NONE; } @@ -689,7 +763,7 @@ int attlist2(PROLOG_STATE *state, state->handler = attlist3; return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } static @@ -708,7 +782,7 @@ int attlist3(PROLOG_STATE *state, state->handler = attlist4; return XML_ROLE_ATTRIBUTE_ENUM_VALUE; } - return syntaxError(state); + return common(state, tok); } static @@ -728,7 +802,7 @@ int attlist4(PROLOG_STATE *state, state->handler = attlist3; return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } static @@ -745,7 +819,7 @@ int attlist5(PROLOG_STATE *state, state->handler = attlist6; return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } @@ -763,7 +837,7 @@ int attlist6(PROLOG_STATE *state, state->handler = attlist7; return XML_ROLE_ATTRIBUTE_NOTATION_VALUE; } - return syntaxError(state); + return common(state, tok); } static @@ -783,7 +857,7 @@ int attlist7(PROLOG_STATE *state, state->handler = attlist6; return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } /* default value */ @@ -800,19 +874,22 @@ int attlist8(PROLOG_STATE *state, case XML_TOK_POUND_NAME: if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), - "IMPLIED")) { + end, + KW_IMPLIED)) { state->handler = attlist1; return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE; } if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), - "REQUIRED")) { + end, + KW_REQUIRED)) { state->handler = attlist1; return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE; } if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), - "FIXED")) { + end, + KW_FIXED)) { state->handler = attlist9; return XML_ROLE_NONE; } @@ -821,7 +898,7 @@ int attlist8(PROLOG_STATE *state, state->handler = attlist1; return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE; } - return syntaxError(state); + return common(state, tok); } static @@ -838,7 +915,7 @@ int attlist9(PROLOG_STATE *state, state->handler = attlist1; return XML_ROLE_FIXED_ATTRIBUTE_VALUE; } - return syntaxError(state); + return common(state, tok); } static @@ -856,7 +933,7 @@ int element0(PROLOG_STATE *state, state->handler = element1; return XML_ROLE_ELEMENT_NAME; } - return syntaxError(state); + return common(state, tok); } static @@ -870,11 +947,11 @@ int element1(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: - if (XmlNameMatchesAscii(enc, ptr, "EMPTY")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) { state->handler = declClose; return XML_ROLE_CONTENT_EMPTY; } - if (XmlNameMatchesAscii(enc, ptr, "ANY")) { + if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) { state->handler = declClose; return XML_ROLE_CONTENT_ANY; } @@ -884,7 +961,7 @@ int element1(PROLOG_STATE *state, state->level = 1; return XML_ROLE_GROUP_OPEN; } - return syntaxError(state); + return common(state, tok); } static @@ -900,7 +977,8 @@ int element2(PROLOG_STATE *state, case XML_TOK_POUND_NAME: if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), - "PCDATA")) { + end, + KW_PCDATA)) { state->handler = element3; return XML_ROLE_CONTENT_PCDATA; } @@ -923,7 +1001,7 @@ int element2(PROLOG_STATE *state, state->handler = element7; return XML_ROLE_CONTENT_ELEMENT_PLUS; } - return syntaxError(state); + return common(state, tok); } static @@ -937,6 +1015,8 @@ int element3(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_CLOSE_PAREN: + state->handler = declClose; + return XML_ROLE_GROUP_CLOSE; case XML_TOK_CLOSE_PAREN_ASTERISK: state->handler = declClose; return XML_ROLE_GROUP_CLOSE_REP; @@ -944,7 +1024,7 @@ int element3(PROLOG_STATE *state, state->handler = element4; return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } static @@ -962,7 +1042,7 @@ int element4(PROLOG_STATE *state, state->handler = element5; return XML_ROLE_CONTENT_ELEMENT; } - return syntaxError(state); + return common(state, tok); } static @@ -982,7 +1062,7 @@ int element5(PROLOG_STATE *state, state->handler = element4; return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } static @@ -1012,7 +1092,7 @@ int element6(PROLOG_STATE *state, state->handler = element7; return XML_ROLE_CONTENT_ELEMENT_PLUS; } - return syntaxError(state); + return common(state, tok); } static @@ -1052,9 +1132,72 @@ int element7(PROLOG_STATE *state, state->handler = element6; return XML_ROLE_GROUP_CHOICE; } - return syntaxError(state); + return common(state, tok); +} + +#ifdef XML_DTD + +static +int condSect0(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; + case XML_TOK_NAME: + if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) { + state->handler = condSect1; + return XML_ROLE_NONE; + } + if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) { + state->handler = condSect2; + return XML_ROLE_NONE; + } + break; + } + return common(state, tok); +} + +static +int condSect1(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; + case XML_TOK_OPEN_BRACKET: + state->handler = externalSubset1; + state->includeLevel += 1; + return XML_ROLE_NONE; + } + return common(state, tok); +} + +static +int condSect2(PROLOG_STATE *state, + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) +{ + switch (tok) { + case XML_TOK_PROLOG_S: + return XML_ROLE_NONE; + case XML_TOK_OPEN_BRACKET: + state->handler = externalSubset1; + return XML_ROLE_IGNORE_SECT; + } + return common(state, tok); } +#endif /* XML_DTD */ + static int declClose(PROLOG_STATE *state, int tok, @@ -1066,10 +1209,10 @@ int declClose(PROLOG_STATE *state, case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: - state->handler = internalSubset; + setTopLevel(state); return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } #if 0 @@ -1088,23 +1231,27 @@ int ignore(PROLOG_STATE *state, default: return XML_ROLE_NONE; } - return syntaxError(state); + return common(state, tok); } #endif static int error(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + int tok, + const char *ptr, + const char *end, + const ENCODING *enc) { return XML_ROLE_NONE; } static -int syntaxError(PROLOG_STATE *state) +int common(PROLOG_STATE *state, int tok) { +#ifdef XML_DTD + if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF) + return XML_ROLE_INNER_PARAM_ENTITY_REF; +#endif state->handler = error; return XML_ROLE_ERROR; } @@ -1112,4 +1259,19 @@ int syntaxError(PROLOG_STATE *state) void XmlPrologStateInit(PROLOG_STATE *state) { state->handler = prolog0; +#ifdef XML_DTD + state->documentEntity = 1; + state->includeLevel = 0; +#endif /* XML_DTD */ } + +#ifdef XML_DTD + +void XmlPrologStateInitExternalEntity(PROLOG_STATE *state) +{ + state->handler = externalSubset0; + state->documentEntity = 0; + state->includeLevel = 0; +} + +#endif /* XML_DTD */ diff --git a/ext/xml/expat/xmltok/xmlrole.h b/ext/xml/expat/xmlrole.h index 877c40ba1f..db3ebc8484 100644 --- a/ext/xml/expat/xmltok/xmlrole.h +++ b/ext/xml/expat/xmlrole.h @@ -1,31 +1,6 @@ /* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. */ #ifndef XmlRole_INCLUDED @@ -45,12 +20,14 @@ enum { XML_ROLE_DOCTYPE_NAME, XML_ROLE_DOCTYPE_SYSTEM_ID, XML_ROLE_DOCTYPE_PUBLIC_ID, + XML_ROLE_DOCTYPE_INTERNAL_SUBSET, XML_ROLE_DOCTYPE_CLOSE, XML_ROLE_GENERAL_ENTITY_NAME, XML_ROLE_PARAM_ENTITY_NAME, XML_ROLE_ENTITY_VALUE, XML_ROLE_ENTITY_SYSTEM_ID, XML_ROLE_ENTITY_PUBLIC_ID, + XML_ROLE_ENTITY_COMPLETE, XML_ROLE_ENTITY_NOTATION_NAME, XML_ROLE_NOTATION_NAME, XML_ROLE_NOTATION_SYSTEM_ID, @@ -87,6 +64,11 @@ enum { XML_ROLE_CONTENT_ELEMENT_REP, XML_ROLE_CONTENT_ELEMENT_OPT, XML_ROLE_CONTENT_ELEMENT_PLUS, +#ifdef XML_DTD + XML_ROLE_TEXT_DECL, + XML_ROLE_IGNORE_SECT, + XML_ROLE_INNER_PARAM_ENTITY_REF, +#endif /* XML_DTD */ XML_ROLE_PARAM_ENTITY_REF }; @@ -97,9 +79,16 @@ typedef struct prolog_state { const char *end, const ENCODING *enc); unsigned level; +#ifdef XML_DTD + unsigned includeLevel; + int documentEntity; +#endif /* XML_DTD */ } PROLOG_STATE; -void XMLTOKAPI XmlPrologStateInit(PROLOG_STATE *); +void XmlPrologStateInit(PROLOG_STATE *); +#ifdef XML_DTD +void XmlPrologStateInitExternalEntity(PROLOG_STATE *); +#endif /* XML_DTD */ #define XmlTokenRole(state, tok, ptr, end, enc) \ (((state)->handler)(state, tok, ptr, end, enc)) diff --git a/ext/xml/expat/xmltok/xmltok.c b/ext/xml/expat/xmltok.c index 487659e94b..78a845c831 100644 --- a/ext/xml/expat/xmltok/xmltok.c +++ b/ext/xml/expat/xmltok.c @@ -1,41 +1,31 @@ /* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. */ +static char RCSId[] + = "$Header$"; + #include "php_compat.h" -#include "xmldef.h" +#ifdef COMPILED_FROM_DSP +# include "winconfig.h" +#else +# include <php_config.h> +#endif /* ndef COMPILED_FROM_DSP */ + #include "xmltok.h" #include "nametab.h" +#ifdef XML_DTD +#define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) +#else +#define IGNORE_SECTION_TOK_VTABLE /* as nothing */ +#endif + #define VTABLE1 \ - { PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok) }, \ + { PREFIX(prologTok), PREFIX(contentTok), \ + PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \ { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ PREFIX(sameName), \ PREFIX(nameMatchesAscii), \ @@ -188,6 +178,7 @@ struct normal_encoding { static int checkCharRefNumber(int); #include "xmltok_impl.h" +#include "ascii.h" #ifdef XML_MIN_SIZE #define sb_isNameMin isNever @@ -225,7 +216,7 @@ int sb_byteToAscii(const ENCODING *enc, const char *p) return *p; } #else -#define BYTE_TO_ASCII(enc, p) (*p) +#define BYTE_TO_ASCII(enc, p) (*(p)) #endif #define IS_NAME_CHAR(enc, p, n) \ @@ -877,10 +868,10 @@ int streqci(const char *s1, const char *s2) for (;;) { char c1 = *s1++; char c2 = *s2++; - if ('a' <= c1 && c1 <= 'z') - c1 += 'A' - 'a'; - if ('a' <= c2 && c2 <= 'z') - c2 += 'A' - 'a'; + if (ASCII_a <= c1 && c1 <= ASCII_z) + c1 += ASCII_A - ASCII_a; + if (ASCII_a <= c2 && c2 <= ASCII_z) + c2 += ASCII_A - ASCII_a; if (c1 != c2) return 0; if (!c1) @@ -928,6 +919,7 @@ int parsePseudoAttribute(const ENCODING *enc, const char *ptr, const char *end, const char **namePtr, + const char **nameEndPtr, const char **valPtr, const char **nextTokPtr) { @@ -955,13 +947,16 @@ int parsePseudoAttribute(const ENCODING *enc, *nextTokPtr = ptr; return 0; } - if (c == '=') + if (c == ASCII_EQUALS) { + *nameEndPtr = ptr; break; + } if (isSpace(c)) { + *nameEndPtr = ptr; do { ptr += enc->minBytesPerChar; } while (isSpace(c = toAscii(enc, ptr, end))); - if (c != '=') { + if (c != ASCII_EQUALS) { *nextTokPtr = ptr; return 0; } @@ -979,7 +974,7 @@ int parsePseudoAttribute(const ENCODING *enc, ptr += enc->minBytesPerChar; c = toAscii(enc, ptr, end); } - if (c != '"' && c != '\'') { + if (c != ASCII_QUOT && c != ASCII_APOS) { *nextTokPtr = ptr; return 0; } @@ -990,12 +985,12 @@ int parsePseudoAttribute(const ENCODING *enc, c = toAscii(enc, ptr, end); if (c == open) break; - if (!('a' <= c && c <= 'z') - && !('A' <= c && c <= 'Z') - && !('0' <= c && c <= '9') - && c != '.' - && c != '-' - && c != '_') { + if (!(ASCII_a <= c && c <= ASCII_z) + && !(ASCII_A <= c && c <= ASCII_Z) + && !(ASCII_0 <= c && c <= ASCII_9) + && c != ASCII_PERIOD + && c != ASCII_MINUS + && c != ASCII_UNDERSCORE) { *nextTokPtr = ptr; return 0; } @@ -1004,6 +999,26 @@ int parsePseudoAttribute(const ENCODING *enc, return 1; } +static const char KW_version[] = { + ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0' +}; + +static const char KW_encoding[] = { + ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0' +}; + +static const char KW_standalone[] = { + ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0' +}; + +static const char KW_yes[] = { + ASCII_y, ASCII_e, ASCII_s, '\0' +}; + +static const char KW_no[] = { + ASCII_n, ASCII_o, '\0' +}; + static int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *, @@ -1014,19 +1029,21 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *end, const char **badPtr, const char **versionPtr, + const char **versionEndPtr, const char **encodingName, const ENCODING **encoding, int *standalone) { const char *val = 0; const char *name = 0; + const char *nameEnd = 0; ptr += 5 * enc->minBytesPerChar; end -= 2 * enc->minBytesPerChar; - if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr) || !name) { + if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) || !name) { *badPtr = ptr; return 0; } - if (!XmlNameMatchesAscii(enc, name, "version")) { + if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) { if (!isGeneralTextEntity) { *badPtr = name; return 0; @@ -1035,7 +1052,9 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, else { if (versionPtr) *versionPtr = val; - if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) { + if (versionEndPtr) + *versionEndPtr = ptr; + if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { *badPtr = ptr; return 0; } @@ -1048,9 +1067,9 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, return 1; } } - if (XmlNameMatchesAscii(enc, name, "encoding")) { + if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) { int c = toAscii(enc, val, end); - if (!('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z')) { + if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) { *badPtr = val; return 0; } @@ -1058,22 +1077,22 @@ int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, *encodingName = val; if (encoding) *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar); - if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) { + if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { *badPtr = ptr; return 0; } if (!name) return 1; } - if (!XmlNameMatchesAscii(enc, name, "standalone") || isGeneralTextEntity) { + if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) || isGeneralTextEntity) { *badPtr = name; return 0; } - if (XmlNameMatchesAscii(enc, val, "yes")) { + if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) { if (standalone) *standalone = 1; } - else if (XmlNameMatchesAscii(enc, val, "no")) { + else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) { if (standalone) *standalone = 0; } @@ -1170,7 +1189,7 @@ struct unknown_encoding { char utf8[256][4]; }; -int XmlSizeOfUnknownEncoding() +int XmlSizeOfUnknownEncoding(void) { return sizeof(struct unknown_encoding); } @@ -1265,7 +1284,7 @@ XmlInitUnknownEncoding(void *mem, { int i; struct unknown_encoding *e = mem; - for (i = 0; i < sizeof(struct normal_encoding); i++) + for (i = 0; i < (int)sizeof(struct normal_encoding); i++) ((char *)mem)[i] = ((char *)&latin1_encoding)[i]; for (i = 0; i < 128; i++) if (latin1_encoding.type[i] != BT_OTHER @@ -1350,21 +1369,40 @@ enum { NO_ENC }; +static const char KW_ISO_8859_1[] = { + ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1, '\0' +}; +static const char KW_US_ASCII[] = { + ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I, '\0' +}; +static const char KW_UTF_8[] = { + ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0' +}; +static const char KW_UTF_16[] = { + ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0' +}; +static const char KW_UTF_16BE[] = { + ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E, '\0' +}; +static const char KW_UTF_16LE[] = { + ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E, '\0' +}; + static int getEncodingIndex(const char *name) { static const char *encodingNames[] = { - "ISO-8859-1", - "US-ASCII", - "UTF-8", - "UTF-16", - "UTF-16BE" - "UTF-16LE", + KW_ISO_8859_1, + KW_US_ASCII, + KW_UTF_8, + KW_UTF_16, + KW_UTF_16BE, + KW_UTF_16LE, }; int i; if (name == 0) return NO_ENC; - for (i = 0; i < sizeof(encodingNames)/sizeof(encodingNames[0]); i++) + for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++) if (streqci(name, encodingNames[i])) return i; return UNKNOWN_ENC; @@ -1373,7 +1411,8 @@ int getEncodingIndex(const char *name) /* For binary compatibility, we store the index of the encoding specified at initialization in the isUtf16 member. */ -#define INIT_ENC_INDEX(enc) ((enc)->initEnc.isUtf16) +#define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16) +#define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i) /* This is what detects the encoding. encodingTable maps from encoding indices to encodings; @@ -1398,9 +1437,11 @@ int initScan(const ENCODING **encodingTable, encPtr = enc->encPtr; if (ptr + 1 == end) { /* only a single byte available for auto-detection */ +#ifndef XML_DTD /* FIXME */ /* a well-formed document entity must have more than one byte */ if (state != XML_CONTENT_STATE) return XML_TOK_PARTIAL; +#endif /* so we're parsing an external text entity... */ /* if UTF-16 was externally specified, then we need at least 2 bytes */ switch (INIT_ENC_INDEX(enc)) { @@ -1493,7 +1534,7 @@ int initScan(const ENCODING **encodingTable, break; } } - *encPtr = encodingTable[(int)INIT_ENC_INDEX(enc)]; + *encPtr = encodingTable[INIT_ENC_INDEX(enc)]; return XmlTok(*encPtr, state, ptr, end, nextTokPtr); } @@ -1522,7 +1563,7 @@ XmlInitUnknownEncodingNS(void *mem, { ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); if (enc) - ((struct normal_encoding *)enc)->type[':'] = BT_COLON; + ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON; return enc; } diff --git a/ext/xml/expat/xmltok/xmltok.h b/ext/xml/expat/xmltok.h index 06544d15cb..8b02324c38 100644 --- a/ext/xml/expat/xmltok/xmltok.h +++ b/ext/xml/expat/xmltok.h @@ -1,31 +1,6 @@ /* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. */ #ifndef XmlTok_INCLUDED @@ -35,10 +10,6 @@ your version of this file under either the MPL or the GPL. extern "C" { #endif -#ifndef XMLTOKAPI -#define XMLTOKAPI /* as nothing */ -#endif - /* The following token may be returned by XmlContentTok */ #define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be start of illegal ]]> sequence */ @@ -108,10 +79,22 @@ extern "C" { for a name with a colon. */ #define XML_TOK_PREFIXED_NAME 41 +#ifdef XML_DTD +#define XML_TOK_IGNORE_SECT 42 +#endif /* XML_DTD */ + +#ifdef XML_DTD +#define XML_N_STATES 4 +#else /* not XML_DTD */ #define XML_N_STATES 3 +#endif /* not XML_DTD */ + #define XML_PROLOG_STATE 0 #define XML_CONTENT_STATE 1 #define XML_CDATA_SECTION_STATE 2 +#ifdef XML_DTD +#define XML_IGNORE_SECTION_STATE 3 +#endif /* XML_DTD */ #define XML_N_LITERAL_TYPES 2 #define XML_ATTRIBUTE_VALUE_LITERAL 0 @@ -150,7 +133,7 @@ struct encoding { int (*sameName)(const ENCODING *, const char *, const char *); int (*nameMatchesAscii)(const ENCODING *, - const char *, const char *); + const char *, const char *, const char *); int (*nameLength)(const ENCODING *, const char *); const char *(*skipS)(const ENCODING *, const char *); int (*getAtts)(const ENCODING *enc, const char *ptr, @@ -211,6 +194,13 @@ literals, comments and processing instructions. #define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) +#ifdef XML_DTD + +#define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \ + XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr) + +#endif /* XML_DTD */ + /* This is used for performing a 2nd-level tokenization on the content of a literal that has already been returned by XmlTok. */ @@ -225,8 +215,8 @@ the content of a literal that has already been returned by XmlTok. */ #define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2)) -#define XmlNameMatchesAscii(enc, ptr1, ptr2) \ - (((enc)->nameMatchesAscii)(enc, ptr1, ptr2)) +#define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \ + (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2)) #define XmlNameLength(enc, ptr) \ (((enc)->nameLength)(enc, ptr)) @@ -260,42 +250,44 @@ typedef struct { const ENCODING **encPtr; } INIT_ENCODING; -int XMLTOKAPI XmlParseXmlDecl(int isGeneralTextEntity, +int XmlParseXmlDecl(int isGeneralTextEntity, const ENCODING *enc, const char *ptr, const char *end, const char **badPtr, const char **versionPtr, + const char **versionEndPtr, const char **encodingNamePtr, const ENCODING **namedEncodingPtr, int *standalonePtr); -int XMLTOKAPI XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name); -const ENCODING XMLTOKAPI *XmlGetUtf8InternalEncoding(); -const ENCODING XMLTOKAPI *XmlGetUtf16InternalEncoding(); -int XMLTOKAPI XmlUtf8Encode(int charNumber, char *buf); -int XMLTOKAPI XmlUtf16Encode(int charNumber, unsigned short *buf); +int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name); +const ENCODING *XmlGetUtf8InternalEncoding(void); +const ENCODING *XmlGetUtf16InternalEncoding(void); +int XmlUtf8Encode(int charNumber, char *buf); +int XmlUtf16Encode(int charNumber, unsigned short *buf); -int XMLTOKAPI XmlSizeOfUnknownEncoding(); -ENCODING XMLTOKAPI * +int XmlSizeOfUnknownEncoding(void); +ENCODING * XmlInitUnknownEncoding(void *mem, int *table, int (*conv)(void *userData, const char *p), void *userData); -int XMLTOKAPI XmlParseXmlDeclNS(int isGeneralTextEntity, +int XmlParseXmlDeclNS(int isGeneralTextEntity, const ENCODING *enc, const char *ptr, const char *end, const char **badPtr, const char **versionPtr, + const char **versionEndPtr, const char **encodingNamePtr, const ENCODING **namedEncodingPtr, int *standalonePtr); -int XMLTOKAPI XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name); -const ENCODING XMLTOKAPI *XmlGetUtf8InternalEncodingNS(); -const ENCODING XMLTOKAPI *XmlGetUtf16InternalEncodingNS(); -ENCODING XMLTOKAPI * +int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name); +const ENCODING *XmlGetUtf8InternalEncodingNS(void); +const ENCODING *XmlGetUtf16InternalEncodingNS(void); +ENCODING * XmlInitUnknownEncodingNS(void *mem, int *table, int (*conv)(void *userData, const char *p), diff --git a/ext/xml/expat/xmltok/Makefile.in b/ext/xml/expat/xmltok/Makefile.in deleted file mode 100644 index 73d1cab428..0000000000 --- a/ext/xml/expat/xmltok/Makefile.in +++ /dev/null @@ -1,5 +0,0 @@ - -LTLIBRARY_NAME = libexpat_tok.la -LTLIBRARY_SOURCES = xmltok.c xmlrole.c - -include $(top_srcdir)/build/dynlib.mk diff --git a/ext/xml/expat/xmltok/xmldef.h b/ext/xml/expat/xmltok/xmldef.h deleted file mode 100644 index 49ce9ed636..0000000000 --- a/ext/xml/expat/xmltok/xmldef.h +++ /dev/null @@ -1,63 +0,0 @@ -/* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. -*/ - -#include <string.h> - -#ifdef XML_WINLIB - -#define WIN32_LEAN_AND_MEAN -#define STRICT -#include <windows.h> - -#define malloc(x) HeapAlloc(GetProcessHeap(), 0, (x)) -#define calloc(x, y) HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, (x)*(y)) -#define free(x) HeapFree(GetProcessHeap(), 0, (x)) -#define realloc(x, y) HeapReAlloc(GetProcessHeap(), 0, x, y) -#define abort() /* as nothing */ - -#else /* not XML_WINLIB */ - -#include <stdlib.h> - -#endif /* not XML_WINLIB */ - -/* This file can be used for any definitions needed in -particular environments. */ - -#ifdef MOZILLA - -#include "nspr.h" -#define malloc(x) PR_Malloc(x) -#define realloc(x, y) PR_Realloc((x), (y)) -#define calloc(x, y) PR_Calloc((x),(y)) -#define free(x) PR_Free(x) -#define int int32 - -#endif /* MOZILLA */ diff --git a/ext/xml/expat/xmltok/xmltok.dsp b/ext/xml/expat/xmltok/xmltok.dsp deleted file mode 100644 index 2be3f90253..0000000000 --- a/ext/xml/expat/xmltok/xmltok.dsp +++ /dev/null @@ -1,163 +0,0 @@ -# Microsoft Developer Studio Project File - Name="xmltok" - Package Owner=<4>
-# Microsoft Developer Studio Generated Build File, Format Version 6.00
-# ** DO NOT EDIT **
-
-# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102
-
-CFG=xmltok - Win32 Release
-!MESSAGE This is not a valid makefile. To build this project using NMAKE,
-!MESSAGE use the Export Makefile command and run
-!MESSAGE
-!MESSAGE NMAKE /f "xmltok.mak".
-!MESSAGE
-!MESSAGE You can specify a configuration when running NMAKE
-!MESSAGE by defining the macro CFG on the command line. For example:
-!MESSAGE
-!MESSAGE NMAKE /f "xmltok.mak" CFG="xmltok - Win32 Release"
-!MESSAGE
-!MESSAGE Possible choices for configuration are:
-!MESSAGE
-!MESSAGE "xmltok - Win32 Release" (based on "Win32 (x86) Dynamic-Link Library")
-!MESSAGE "xmltok - Win32 Debug" (based on "Win32 (x86) Dynamic-Link Library")
-!MESSAGE
-
-# Begin Project
-# PROP AllowPerConfigDependencies 0
-# PROP Scc_ProjName ""
-# PROP Scc_LocalPath ""
-CPP=cl.exe
-MTL=midl.exe
-RSC=rc.exe
-
-!IF "$(CFG)" == "xmltok - Win32 Release"
-
-# PROP BASE Use_MFC 0
-# PROP BASE Use_Debug_Libraries 0
-# PROP BASE Output_Dir ".\Release"
-# PROP BASE Intermediate_Dir ".\Release"
-# PROP BASE Target_Dir "."
-# PROP Use_MFC 0
-# PROP Use_Debug_Libraries 0
-# PROP Output_Dir ".\Release"
-# PROP Intermediate_Dir ".\Release"
-# PROP Ignore_Export_Lib 0
-# PROP Target_Dir "."
-# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /c
-# ADD CPP /nologo /MT /W3 /GX /O2 /D XMLTOKAPI=__declspec(dllexport) /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "XML_NS" /YX /FD /c
-# ADD BASE MTL /nologo /D "NDEBUG" /win32
-# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32
-# ADD BASE RSC /l 0x809 /d "NDEBUG"
-# ADD RSC /l 0x809 /d "NDEBUG"
-BSC32=bscmake.exe
-# ADD BASE BSC32 /nologo
-# ADD BSC32 /nologo
-LINK32=link.exe
-# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /machine:I386
-# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /entry:"DllMain" /subsystem:windows /dll /machine:I386 /out:"..\bin\xmltok.dll" /link50compat
-# SUBTRACT LINK32 /pdb:none
-
-!ELSEIF "$(CFG)" == "xmltok - Win32 Debug"
-
-# PROP BASE Use_MFC 0
-# PROP BASE Use_Debug_Libraries 1
-# PROP BASE Output_Dir ".\Debug"
-# PROP BASE Intermediate_Dir ".\Debug"
-# PROP BASE Target_Dir "."
-# PROP Use_MFC 0
-# PROP Use_Debug_Libraries 1
-# PROP Output_Dir ".\Debug"
-# PROP Intermediate_Dir ".\Debug"
-# PROP Ignore_Export_Lib 0
-# PROP Target_Dir "."
-# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /c
-# ADD CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D XMLTOKAPI=__declspec(dllexport) /YX /FD /c
-# ADD BASE MTL /nologo /D "_DEBUG" /win32
-# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32
-# ADD BASE RSC /l 0x809 /d "_DEBUG"
-# ADD RSC /l 0x809 /d "_DEBUG"
-BSC32=bscmake.exe
-# ADD BASE BSC32 /nologo
-# ADD BSC32 /nologo
-LINK32=link.exe
-# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386
-# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386 /out:"..\dbgbin\xmltok.dll"
-
-!ENDIF
-
-# Begin Target
-
-# Name "xmltok - Win32 Release"
-# Name "xmltok - Win32 Debug"
-# Begin Group "Source Files"
-
-# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat;for;f90"
-# Begin Source File
-
-SOURCE=.\dllmain.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\xmlrole.c
-# End Source File
-# Begin Source File
-
-SOURCE=.\xmltok.c
-# End Source File
-# End Group
-# Begin Group "Header Files"
-
-# PROP Default_Filter "h;hpp;hxx;hm;inl;fi;fd"
-# Begin Source File
-
-SOURCE=.\asciitab.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\iasciitab.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\latin1tab.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\nametab.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\utf8tab.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\xmldef.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\xmlrole.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\xmltok.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\xmltok_impl.c
-# PROP BASE Exclude_From_Build 1
-# PROP Exclude_From_Build 1
-# End Source File
-# Begin Source File
-
-SOURCE=.\xmltok_impl.h
-# End Source File
-# Begin Source File
-
-SOURCE=.\xmltok_ns.c
-# PROP Exclude_From_Build 1
-# End Source File
-# End Group
-# Begin Group "Resource Files"
-
-# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;cnt;rtf;gif;jpg;jpeg;jpe"
-# End Group
-# End Target
-# End Project
diff --git a/ext/xml/expat/xmltok/xmltok_impl.h b/ext/xml/expat/xmltok/xmltok_impl.h deleted file mode 100644 index e72b225c83..0000000000 --- a/ext/xml/expat/xmltok/xmltok_impl.h +++ /dev/null @@ -1,71 +0,0 @@ -/* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. -*/ - -enum { - BT_NONXML, - BT_MALFORM, - BT_LT, - BT_AMP, - BT_RSQB, - BT_LEAD2, - BT_LEAD3, - BT_LEAD4, - BT_TRAIL, - BT_CR, - BT_LF, - BT_GT, - BT_QUOT, - BT_APOS, - BT_EQUALS, - BT_QUEST, - BT_EXCL, - BT_SOL, - BT_SEMI, - BT_NUM, - BT_LSQB, - BT_S, - BT_NMSTRT, - BT_COLON, - BT_HEX, - BT_DIGIT, - BT_NAME, - BT_MINUS, - BT_OTHER, /* known not to be a name or name start character */ - BT_NONASCII, /* might be a name or name start character */ - BT_PERCNT, - BT_LPAR, - BT_RPAR, - BT_AST, - BT_PLUS, - BT_COMMA, - BT_VERBAR -}; - -#include <stddef.h> diff --git a/ext/xml/expat/xmltok/xmltok_impl.c b/ext/xml/expat/xmltok_impl.c index c52539be8a..70131c0fcc 100644 --- a/ext/xml/expat/xmltok/xmltok_impl.c +++ b/ext/xml/expat/xmltok_impl.c @@ -1,33 +1,10 @@ /* -The contents of this file are subject to the Mozilla Public License -Version 1.1 (the "License"); you may not use this file except in -compliance with the License. You may obtain a copy of the License at -http://www.mozilla.org/MPL/ - -Software distributed under the License is distributed on an "AS IS" -basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -License for the specific language governing rights and limitations -under the License. - -The Original Code is expat. - -The Initial Developer of the Original Code is James Clark. -Portions created by James Clark are Copyright (C) 1998, 1999 -James Clark. All Rights Reserved. - -Contributor(s): - -Alternatively, the contents of this file may be used under the terms -of the GNU General Public License (the "GPL"), in which case the -provisions of the GPL are applicable instead of those above. If you -wish to allow use of your version of this file only under the terms of -the GPL and not to allow others to use your version of this file under -the MPL, indicate your decision by deleting the provisions above and -replace them with the notice and other provisions required by the -GPL. If you do not delete the provisions above, a recipient may use -your version of this file under either the MPL or the GPL. +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. */ +#include "php_compat.h" + #ifndef IS_INVALID_CHAR #define IS_INVALID_CHAR(enc, ptr, n) (0) #endif @@ -117,7 +94,7 @@ int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { if (ptr != end) { - if (!CHAR_MATCHES(enc, ptr, '-')) { + if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -128,10 +105,10 @@ int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, case BT_MINUS: if ((ptr += MINBPC(enc)) == end) return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, '-')) { + if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { if ((ptr += MINBPC(enc)) == end) return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, '>')) { + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -205,9 +182,9 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, if (end - ptr != MINBPC(enc)*3) return 1; switch (BYTE_TO_ASCII(enc, ptr)) { - case 'x': + case ASCII_x: break; - case 'X': + case ASCII_X: upper = 1; break; default: @@ -215,9 +192,9 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, } ptr += MINBPC(enc); switch (BYTE_TO_ASCII(enc, ptr)) { - case 'm': + case ASCII_m: break; - case 'M': + case ASCII_M: upper = 1; break; default: @@ -225,9 +202,9 @@ int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, } ptr += MINBPC(enc); switch (BYTE_TO_ASCII(enc, ptr)) { - case 'l': + case ASCII_l: break; - case 'L': + case ASCII_L: upper = 1; break; default: @@ -271,7 +248,7 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, '>')) { + if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr + MINBPC(enc); return tok; } @@ -290,7 +267,7 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, '>')) { + if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr + MINBPC(enc); return tok; } @@ -308,12 +285,13 @@ static int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { + static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB }; int i; /* CDATA[ */ if (end - ptr < 6 * MINBPC(enc)) return XML_TOK_PARTIAL; for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { - if (!CHAR_MATCHES(enc, ptr, "CDATA["[i])) { + if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -342,12 +320,12 @@ int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *en ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, ']')) + if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, '>')) { + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } @@ -485,7 +463,7 @@ int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { if (ptr != end) { - if (CHAR_MATCHES(enc, ptr, 'x')) + if (CHAR_MATCHES(enc, ptr, ASCII_x)) return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: @@ -678,7 +656,7 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, '>')) { + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -787,7 +765,7 @@ int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, '>')) { + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } @@ -836,12 +814,12 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); if (ptr == end) return XML_TOK_TRAILING_RSQB; - if (!CHAR_MATCHES(enc, ptr, ']')) + if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); if (ptr == end) return XML_TOK_TRAILING_RSQB; - if (!CHAR_MATCHES(enc, ptr, '>')) { + if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } @@ -866,12 +844,12 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, #undef LEAD_CASE case BT_RSQB: if (ptr + MINBPC(enc) != end) { - if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ']')) { + if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { ptr += MINBPC(enc); break; } if (ptr + 2*MINBPC(enc) != end) { - if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), '>')) { + if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { ptr += MINBPC(enc); break; } @@ -953,7 +931,7 @@ int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_INVALID; } } - return XML_TOK_PARTIAL; + return -XML_TOK_POUND_NAME; } static @@ -971,7 +949,7 @@ int PREFIX(scanLit)(int open, const ENCODING *enc, if (t != open) break; if (ptr == end) - return XML_TOK_PARTIAL; + return -XML_TOK_LITERAL; *nextTokPtr = ptr; switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_CR: case BT_LF: @@ -1033,7 +1011,7 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, } case BT_CR: if (ptr + MINBPC(enc) == end) - return XML_TOK_TRAILING_CR; + return -XML_TOK_PROLOG_S; /* fall through */ case BT_S: case BT_LF: for (;;) { @@ -1066,11 +1044,11 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_RSQB: ptr += MINBPC(enc); if (ptr == end) - return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, ']')) { + return -XML_TOK_CLOSE_BRACKET; + if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { if (ptr + MINBPC(enc) == end) return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr + MINBPC(enc), '>')) { + if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { *nextTokPtr = ptr + 2*MINBPC(enc); return XML_TOK_COND_SECT_CLOSE; } @@ -1083,7 +1061,7 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_RPAR: ptr += MINBPC(enc); if (ptr == end) - return XML_TOK_PARTIAL; + return -XML_TOK_CLOSE_PAREN; switch (BYTE_TYPE(enc, ptr)) { case BT_AST: *nextTokPtr = ptr + MINBPC(enc); @@ -1213,7 +1191,7 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_INVALID; } } - return XML_TOK_PARTIAL; + return -tok; } static @@ -1294,8 +1272,11 @@ int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_PERCNT: - if (ptr == start) - return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); + if (ptr == start) { + int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), + end, nextTokPtr); + return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; + } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_LF: @@ -1326,6 +1307,61 @@ int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end return XML_TOK_DATA_CHARS; } +#ifdef XML_DTD + +static +int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end, + const char **nextTokPtr) +{ + int level = 0; + if (MINBPC(enc) > 1) { + size_t n = end - ptr; + if (n & (MINBPC(enc) - 1)) { + n &= ~(MINBPC(enc) - 1); + end = ptr + n; + } + } + while (ptr != end) { + switch (BYTE_TYPE(enc, ptr)) { + INVALID_CASES(ptr, nextTokPtr) + case BT_LT: + if ((ptr += MINBPC(enc)) == end) + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { + if ((ptr += MINBPC(enc)) == end) + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { + ++level; + ptr += MINBPC(enc); + } + } + break; + case BT_RSQB: + if ((ptr += MINBPC(enc)) == end) + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { + if ((ptr += MINBPC(enc)) == end) + return XML_TOK_PARTIAL; + if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { + ptr += MINBPC(enc); + if (level == 0) { + *nextTokPtr = ptr; + return XML_TOK_IGNORE_SECT; + } + --level; + } + } + break; + default: + ptr += MINBPC(enc); + break; + } + } + return XML_TOK_PARTIAL; +} + +#endif /* XML_DTD */ + static int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, const char **badPtr) @@ -1357,7 +1393,7 @@ int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, #endif break; case BT_S: - if (CHAR_MATCHES(enc, ptr, '\t')) { + if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { *badPtr = ptr; return 0; } @@ -1391,7 +1427,8 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, { enum { other, inName, inValue } state = inName; int nAtts = 0; - int open = 0; + int open = 0; /* defined when state == inValue; + initialization just to shut up compilers */ for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { @@ -1452,8 +1489,8 @@ int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, && nAtts < attsMax && atts[nAtts].normalized && (ptr == atts[nAtts].valuePtr - || BYTE_TO_ASCII(enc, ptr) != ' ' - || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ' ' + || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE + || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) atts[nAtts].normalized = 0; break; @@ -1483,22 +1520,22 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) int result = 0; /* skip &# */ ptr += 2*MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'x')) { - for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) { + if (CHAR_MATCHES(enc, ptr, ASCII_x)) { + for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); switch (c) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': + case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: + case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: result <<= 4; - result |= (c - '0'); + result |= (c - ASCII_0); break; - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F: result <<= 4; - result += 10 + (c - 'A'); + result += 10 + (c - ASCII_A); break; - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f: result <<= 4; - result += 10 + (c - 'a'); + result += 10 + (c - ASCII_a); break; } if (result >= 0x110000) @@ -1506,10 +1543,10 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) } } else { - for (; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) { + for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); result *= 10; - result += (c - '0'); + result += (c - ASCII_0); if (result >= 0x110000) return -1; } @@ -1522,46 +1559,46 @@ int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const cha { switch ((end - ptr)/MINBPC(enc)) { case 2: - if (CHAR_MATCHES(enc, ptr + MINBPC(enc), 't')) { + if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { switch (BYTE_TO_ASCII(enc, ptr)) { - case 'l': - return '<'; - case 'g': - return '>'; + case ASCII_l: + return ASCII_LT; + case ASCII_g: + return ASCII_GT; } } break; case 3: - if (CHAR_MATCHES(enc, ptr, 'a')) { + if (CHAR_MATCHES(enc, ptr, ASCII_a)) { ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'm')) { + if (CHAR_MATCHES(enc, ptr, ASCII_m)) { ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'p')) - return '&'; + if (CHAR_MATCHES(enc, ptr, ASCII_p)) + return ASCII_AMP; } } break; case 4: switch (BYTE_TO_ASCII(enc, ptr)) { - case 'q': + case ASCII_q: ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'u')) { + if (CHAR_MATCHES(enc, ptr, ASCII_u)) { ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'o')) { + if (CHAR_MATCHES(enc, ptr, ASCII_o)) { ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 't')) - return '"'; + if (CHAR_MATCHES(enc, ptr, ASCII_t)) + return ASCII_QUOT; } } break; - case 'a': + case ASCII_a: ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'p')) { + if (CHAR_MATCHES(enc, ptr, ASCII_p)) { ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 'o')) { + if (CHAR_MATCHES(enc, ptr, ASCII_o)) { ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, 's')) - return '\''; + if (CHAR_MATCHES(enc, ptr, ASCII_s)) + return ASCII_APOS; } } break; @@ -1635,29 +1672,16 @@ int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) } static -int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, const char *ptr2) +int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, + const char *end1, const char *ptr2) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { + if (ptr1 == end1) + return 0; if (!CHAR_MATCHES(enc, ptr1, *ptr2)) return 0; } - switch (BYTE_TYPE(enc, ptr1)) { - case BT_LEAD2: - case BT_LEAD3: - case BT_LEAD4: - case BT_NONASCII: - case BT_NMSTRT: -#ifdef XML_NS - case BT_COLON: -#endif - case BT_HEX: - case BT_DIGIT: - case BT_NAME: - case BT_MINUS: - return 0; - default: - return 1; - } + return ptr1 == end1; } static diff --git a/ext/xml/expat/xmltok_impl.h b/ext/xml/expat/xmltok_impl.h new file mode 100644 index 0000000000..da0ea60a65 --- /dev/null +++ b/ext/xml/expat/xmltok_impl.h @@ -0,0 +1,46 @@ +/* +Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd +See the file COPYING for copying permission. +*/ + +enum { + BT_NONXML, + BT_MALFORM, + BT_LT, + BT_AMP, + BT_RSQB, + BT_LEAD2, + BT_LEAD3, + BT_LEAD4, + BT_TRAIL, + BT_CR, + BT_LF, + BT_GT, + BT_QUOT, + BT_APOS, + BT_EQUALS, + BT_QUEST, + BT_EXCL, + BT_SOL, + BT_SEMI, + BT_NUM, + BT_LSQB, + BT_S, + BT_NMSTRT, + BT_COLON, + BT_HEX, + BT_DIGIT, + BT_NAME, + BT_MINUS, + BT_OTHER, /* known not to be a name or name start character */ + BT_NONASCII, /* might be a name or name start character */ + BT_PERCNT, + BT_LPAR, + BT_RPAR, + BT_AST, + BT_PLUS, + BT_COMMA, + BT_VERBAR +}; + +#include <stddef.h> diff --git a/ext/xml/expat/xmltok/xmltok_ns.c b/ext/xml/expat/xmltok_ns.c index f2541616a5..21859738ac 100644 --- a/ext/xml/expat/xmltok/xmltok_ns.c +++ b/ext/xml/expat/xmltok_ns.c @@ -1,9 +1,9 @@ -const ENCODING *NS(XmlGetUtf8InternalEncoding)() +const ENCODING *NS(XmlGetUtf8InternalEncoding)(void) { return &ns(internal_utf8_encoding).enc; } -const ENCODING *NS(XmlGetUtf16InternalEncoding)() +const ENCODING *NS(XmlGetUtf16InternalEncoding)(void) { #if XML_BYTE_ORDER == 12 return &ns(internal_little2_encoding).enc; @@ -45,7 +45,7 @@ int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, const char *n int i = getEncodingIndex(name); if (i == UNKNOWN_ENC) return 0; - INIT_ENC_INDEX(p) = (char)i; + SET_INIT_ENC_INDEX(p, i); p->initEnc.scanners[XML_PROLOG_STATE] = NS(initScanProlog); p->initEnc.scanners[XML_CONTENT_STATE] = NS(initScanContent); p->initEnc.updatePosition = initUpdatePosition; @@ -65,7 +65,7 @@ const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const cha if (ptr != end) return 0; *p = 0; - if (streqci(buf, "UTF-16") && enc->minBytesPerChar == 2) + if (streqci(buf, KW_UTF_16) && enc->minBytesPerChar == 2) return enc; i = getEncodingIndex(buf); if (i == UNKNOWN_ENC) @@ -79,6 +79,7 @@ int NS(XmlParseXmlDecl)(int isGeneralTextEntity, const char *end, const char **badPtr, const char **versionPtr, + const char **versionEndPtr, const char **encodingName, const ENCODING **encoding, int *standalone) @@ -90,6 +91,7 @@ int NS(XmlParseXmlDecl)(int isGeneralTextEntity, end, badPtr, versionPtr, + versionEndPtr, encodingName, encoding, standalone); diff --git a/ext/xml/php_xml.h b/ext/xml/php_xml.h index 6738aeb9b6..ed2f709793 100644 --- a/ext/xml/php_xml.h +++ b/ext/xml/php_xml.h @@ -33,14 +33,9 @@ extern zend_module_entry xml_module_entry; #endif -#if defined(HAVE_LIBEXPAT) && defined(PHP_XML_INTERNAL) +#if defined(PHP_XML_INTERNAL) -#ifdef HAVE_LIBEXPAT2 #include <expat.h> -#else -#include <xmltok.h> -#include <xmlparse.h> -#endif #ifdef PHP_WIN32 #define PHP_XML_API __declspec(dllexport) @@ -71,10 +66,8 @@ typedef struct { zval *notationDeclHandler; zval *externalEntityRefHandler; zval *unknownEncodingHandler; -#ifdef HAVE_LIBEXPAT2 zval *startNamespaceDeclHandler; zval *endNamespaceDeclHandler; -#endif zval *object; zval *data; @@ -114,9 +107,7 @@ enum php_xml_option { #define XML_MAXLEVEL 255 /* XXX this should be dynamic */ PHP_FUNCTION(xml_parser_create); -#ifdef HAVE_LIBEXPAT2 PHP_FUNCTION(xml_parser_create_ns); -#endif PHP_FUNCTION(xml_set_object); PHP_FUNCTION(xml_set_element_handler); PHP_FUNCTION(xml_set_character_data_handler); @@ -125,10 +116,8 @@ PHP_FUNCTION(xml_set_default_handler); PHP_FUNCTION(xml_set_unparsed_entity_decl_handler); PHP_FUNCTION(xml_set_notation_decl_handler); PHP_FUNCTION(xml_set_external_entity_ref_handler); -#ifdef HAVE_LIBEXPAT2 PHP_FUNCTION(xml_set_start_namespace_decl_handler); PHP_FUNCTION(xml_set_end_namespace_decl_handler); -#endif PHP_FUNCTION(xml_parse); PHP_FUNCTION(xml_get_error_code); PHP_FUNCTION(xml_error_string); diff --git a/ext/xml/xml.c b/ext/xml/xml.c index ec65dece2e..31e3a5c2ca 100644 --- a/ext/xml/xml.c +++ b/ext/xml/xml.c @@ -92,10 +92,8 @@ void _xml_unparsedEntityDeclHandler(void *, const XML_Char *, const XML_Char *, void _xml_notationDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *); int _xml_externalEntityRefHandler(XML_Parser, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *); -#ifdef HAVE_LIBEXPAT2 void _xml_startNamespaceDeclHandler(void *, const XML_Char *, const XML_Char *); void _xml_endNamespaceDeclHandler(void *, const XML_Char *); -#endif /* }}} */ /* {{{ extension definition structures */ @@ -103,9 +101,7 @@ static unsigned char third_and_fourth_args_force_ref[] = { 4, BYREF_NONE, BYREF_ function_entry xml_functions[] = { PHP_FE(xml_parser_create, NULL) -#ifdef HAVE_LIBEXPAT2 PHP_FE(xml_parser_create_ns, NULL) -#endif PHP_FE(xml_set_object, second_arg_force_ref) PHP_FE(xml_set_element_handler, NULL) PHP_FE(xml_set_character_data_handler, NULL) @@ -114,10 +110,8 @@ function_entry xml_functions[] = { PHP_FE(xml_set_unparsed_entity_decl_handler, NULL) PHP_FE(xml_set_notation_decl_handler, NULL) PHP_FE(xml_set_external_entity_ref_handler, NULL) -#ifdef HAVE_LIBEXPAT2 PHP_FE(xml_set_start_namespace_decl_handler, NULL) PHP_FE(xml_set_end_namespace_decl_handler, NULL) -#endif PHP_FE(xml_parse, NULL) PHP_FE(xml_parse_into_struct, third_and_fourth_args_force_ref) PHP_FE(xml_get_error_code, NULL) @@ -234,9 +228,7 @@ PHP_MINFO_FUNCTION(xml) php_info_print_table_start(); php_info_print_table_row(2, "XML Support", "active"); -#if HAVE_LIBEXPAT2 php_info_print_table_row(2, "XML Namespace Support", "active"); -#endif php_info_print_table_end(); } @@ -336,14 +328,12 @@ xml_parser_dtor(zend_rsrc_list_entry *rsrc) if (parser->unknownEncodingHandler) { zval_ptr_dtor(&parser->unknownEncodingHandler); } -#ifdef HAVE_LIBEXPAT2 if (parser->startNamespaceDeclHandler) { zval_ptr_dtor(&parser->startNamespaceDeclHandler); } if (parser->endNamespaceDeclHandler) { zval_ptr_dtor(&parser->endNamespaceDeclHandler); } -#endif if (parser->baseURI) { efree(parser->baseURI); } @@ -997,7 +987,6 @@ _xml_externalEntityRefHandler(XML_Parser parserPtr, /* }}} */ -#ifdef HAVE_LIBEXPAT2 /* {{{ _xml_startNamespaceDeclHandler() */ void _xml_startNamespaceDeclHandler(void *userData, @@ -1041,7 +1030,6 @@ void _xml_endNamespaceDeclHandler(void *userData, } /* }}} */ -#endif /* }}} */ @@ -1099,7 +1087,6 @@ PHP_FUNCTION(xml_parser_create) } /* }}} */ -#ifdef HAVE_LIBEXPAT2 /* {{{ proto int xml_parser_create_ns([string encoding][, string sep]) Create an XML parser */ PHP_FUNCTION(xml_parser_create_ns) @@ -1158,7 +1145,6 @@ PHP_FUNCTION(xml_parser_create_ns) parser->index = return_value->value.lval; } /* }}} */ -#endif /* {{{ proto int xml_set_object(int pind, object &obj) Set up object which should be used for callbacks */ @@ -1327,7 +1313,6 @@ PHP_FUNCTION(xml_set_external_entity_ref_handler) } /* }}} */ -#ifdef HAVE_LIBEXPAT2 /* {{{ proto int xml_set_start_namespace_decl_handler(int pind, string hdl) Set up character data handler */ @@ -1366,7 +1351,6 @@ PHP_FUNCTION(xml_set_end_namespace_decl_handler) RETVAL_TRUE; } /* }}} */ -#endif /* {{{ proto int xml_parse(int pind, string data [, int isFinal]) Start parsing an XML document */ |